From f02316bb827e5beda6db3a8be7387bfbff569190 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 8 Nov 2024 20:58:59 -0600 Subject: [PATCH] cargo vendor --- .cargo/cargo.toml | 5 + vendor/aho-corasick/.cargo-checksum.json | 2 +- vendor/aho-corasick/Cargo.toml | 42 +- vendor/aho-corasick/DESIGN.md | 126 +- vendor/aho-corasick/README.md | 63 +- vendor/aho-corasick/src/ahocorasick.rs | 3504 ++++++----- vendor/aho-corasick/src/automaton.rs | 2033 +++++-- vendor/aho-corasick/src/classes.rs | 238 - vendor/aho-corasick/src/dfa.rs | 1348 +++-- vendor/aho-corasick/src/error.rs | 101 - vendor/aho-corasick/src/lib.rs | 321 +- vendor/aho-corasick/src/macros.rs | 18 + vendor/aho-corasick/src/nfa.rs | 1214 ---- vendor/aho-corasick/src/nfa/contiguous.rs | 1141 ++++ vendor/aho-corasick/src/nfa/mod.rs | 40 + vendor/aho-corasick/src/nfa/noncontiguous.rs | 1762 ++++++ vendor/aho-corasick/src/packed/api.rs | 258 +- vendor/aho-corasick/src/packed/ext.rs | 39 + vendor/aho-corasick/src/packed/mod.rs | 81 +- vendor/aho-corasick/src/packed/pattern.rs | 374 +- vendor/aho-corasick/src/packed/rabinkarp.rs | 45 +- .../aho-corasick/src/packed/teddy/README.md | 16 +- .../aho-corasick/src/packed/teddy/builder.rs | 780 +++ .../aho-corasick/src/packed/teddy/compile.rs | 414 -- .../aho-corasick/src/packed/teddy/generic.rs | 1382 +++++ vendor/aho-corasick/src/packed/teddy/mod.rs | 67 +- .../aho-corasick/src/packed/teddy/runtime.rs | 1204 ---- vendor/aho-corasick/src/packed/tests.rs | 127 +- vendor/aho-corasick/src/packed/vector.rs | 1891 +++++- vendor/aho-corasick/src/prefilter.rs | 1057 ---- vendor/aho-corasick/src/state_id.rs | 192 - vendor/aho-corasick/src/tests.rs | 724 ++- vendor/aho-corasick/src/transducer.rs | 270 + vendor/aho-corasick/src/util/alphabet.rs | 409 ++ vendor/aho-corasick/src/{ => util}/buffer.rs | 60 +- .../src/{ => util}/byte_frequencies.rs | 0 vendor/aho-corasick/src/util/debug.rs | 26 + vendor/aho-corasick/src/util/error.rs | 259 + vendor/aho-corasick/src/util/int.rs | 284 + vendor/aho-corasick/src/util/mod.rs | 12 + vendor/aho-corasick/src/util/prefilter.rs | 924 +++ vendor/aho-corasick/src/util/primitives.rs | 759 +++ vendor/aho-corasick/src/util/remapper.rs | 214 + vendor/aho-corasick/src/util/search.rs | 1148 ++++ vendor/aho-corasick/src/util/special.rs | 42 + vendor/autocfg/.cargo-checksum.json | 1 - vendor/autocfg/README.md | 95 - vendor/autocfg/examples/integers.rs | 9 - vendor/autocfg/examples/paths.rs | 22 - vendor/autocfg/examples/traits.rs | 26 - vendor/autocfg/examples/versions.rs | 9 - vendor/autocfg/src/error.rs | 69 - vendor/autocfg/src/lib.rs | 453 -- vendor/autocfg/src/tests.rs | 174 - vendor/autocfg/src/version.rs | 60 - vendor/autocfg/tests/rustflags.rs | 33 - vendor/equivalent/.cargo-checksum.json | 1 + vendor/{autocfg => equivalent}/Cargo.toml | 23 +- vendor/{autocfg => equivalent}/LICENSE-APACHE | 0 vendor/{autocfg => equivalent}/LICENSE-MIT | 2 +- vendor/equivalent/README.md | 25 + vendor/equivalent/src/lib.rs | 113 + vendor/hashbrown/.cargo-checksum.json | 2 +- vendor/hashbrown/CHANGELOG.md | 150 +- vendor/hashbrown/Cargo.toml | 40 +- vendor/hashbrown/README.md | 77 +- vendor/hashbrown/benches/bench.rs | 2 +- .../hashbrown/src/external_trait_impls/mod.rs | 2 + .../src/external_trait_impls/rayon/map.rs | 47 +- .../src/external_trait_impls/rayon/mod.rs | 1 + .../src/external_trait_impls/rayon/raw.rs | 23 +- .../src/external_trait_impls/rayon/set.rs | 34 +- .../src/external_trait_impls/rayon/table.rs | 252 + .../src/external_trait_impls/rkyv/hash_map.rs | 125 + .../src/external_trait_impls/rkyv/hash_set.rs | 123 + .../src/external_trait_impls/rkyv/mod.rs | 2 + .../src/external_trait_impls/serde.rs | 63 +- vendor/hashbrown/src/lib.rs | 77 +- vendor/hashbrown/src/macros.rs | 2 +- vendor/hashbrown/src/map.rs | 1318 +++-- vendor/hashbrown/src/raw/alloc.rs | 57 +- vendor/hashbrown/src/raw/bitmask.rs | 99 +- vendor/hashbrown/src/raw/generic.rs | 59 +- vendor/hashbrown/src/raw/mod.rs | 3318 +++++++++-- vendor/hashbrown/src/raw/neon.rs | 124 + vendor/hashbrown/src/raw/sse2.rs | 31 +- vendor/hashbrown/src/rustc_entry.rs | 32 +- vendor/hashbrown/src/scopeguard.rs | 14 +- vendor/hashbrown/src/set.rs | 475 +- vendor/hashbrown/src/table.rs | 2064 +++++++ vendor/hashbrown/tests/equivalent_trait.rs | 53 + vendor/hashbrown/tests/raw.rs | 11 + vendor/hashbrown/tests/rayon.rs | 4 +- vendor/hashbrown/tests/set.rs | 2 +- vendor/indexmap/.cargo-checksum.json | 2 +- vendor/indexmap/Cargo.toml | 36 +- vendor/indexmap/README.md | 2 +- vendor/indexmap/RELEASES.md | 58 + vendor/indexmap/build.rs | 8 - vendor/indexmap/src/arbitrary.rs | 2 + vendor/indexmap/src/equivalent.rs | 27 - vendor/indexmap/src/lib.rs | 112 +- vendor/indexmap/src/macros.rs | 32 +- vendor/indexmap/src/map.rs | 1049 +--- vendor/indexmap/src/map/core.rs | 164 +- vendor/indexmap/src/map/core/raw.rs | 67 +- vendor/indexmap/src/map/iter.rs | 541 ++ vendor/indexmap/src/{ => map}/serde_seq.rs | 44 +- vendor/indexmap/src/map/slice.rs | 540 ++ vendor/indexmap/src/map/tests.rs | 670 +++ vendor/indexmap/src/mutable_keys.rs | 42 +- vendor/indexmap/src/rayon/map.rs | 116 +- vendor/indexmap/src/rayon/set.rs | 49 +- vendor/indexmap/src/serde.rs | 4 - vendor/indexmap/src/set.rs | 1185 +--- vendor/indexmap/src/set/iter.rs | 543 ++ vendor/indexmap/src/set/slice.rs | 341 ++ vendor/indexmap/src/set/tests.rs | 684 +++ vendor/indexmap/src/util.rs | 22 + vendor/indexmap/tests/quick.rs | 6 + vendor/memchr/.cargo-checksum.json | 2 +- vendor/memchr/Cargo.toml | 43 +- vendor/memchr/README.md | 125 +- vendor/memchr/build.rs | 88 - .../memchr/scripts/make-byte-frequency-table | 74 - vendor/memchr/src/arch/aarch64/memchr.rs | 137 + vendor/memchr/src/arch/aarch64/mod.rs | 7 + vendor/memchr/src/arch/aarch64/neon/memchr.rs | 1031 ++++ vendor/memchr/src/arch/aarch64/neon/mod.rs | 6 + .../src/arch/aarch64/neon/packedpair.rs | 236 + vendor/memchr/src/arch/all/memchr.rs | 996 ++++ vendor/memchr/src/arch/all/mod.rs | 234 + .../all/packedpair/default_rank.rs} | 2 +- vendor/memchr/src/arch/all/packedpair/mod.rs | 359 ++ vendor/memchr/src/arch/all/rabinkarp.rs | 390 ++ vendor/memchr/src/arch/all/shiftor.rs | 89 + .../memchr/src/{memmem => arch/all}/twoway.rs | 327 +- vendor/memchr/src/arch/generic/memchr.rs | 1214 ++++ vendor/memchr/src/arch/generic/mod.rs | 14 + vendor/memchr/src/arch/generic/packedpair.rs | 317 + vendor/memchr/src/arch/mod.rs | 16 + vendor/memchr/src/arch/wasm32/memchr.rs | 137 + vendor/memchr/src/arch/wasm32/mod.rs | 7 + .../memchr/src/arch/wasm32/simd128/memchr.rs | 1020 ++++ vendor/memchr/src/arch/wasm32/simd128/mod.rs | 6 + .../src/arch/wasm32/simd128/packedpair.rs | 229 + vendor/memchr/src/arch/x86_64/avx2/memchr.rs | 1352 +++++ vendor/memchr/src/arch/x86_64/avx2/mod.rs | 6 + .../memchr/src/arch/x86_64/avx2/packedpair.rs | 272 + vendor/memchr/src/arch/x86_64/memchr.rs | 335 ++ vendor/memchr/src/arch/x86_64/mod.rs | 8 + vendor/memchr/src/arch/x86_64/sse2/memchr.rs | 1077 ++++ vendor/memchr/src/arch/x86_64/sse2/mod.rs | 6 + .../memchr/src/arch/x86_64/sse2/packedpair.rs | 232 + vendor/memchr/src/cow.rs | 62 +- vendor/memchr/src/ext.rs | 52 + vendor/memchr/src/lib.rs | 90 +- vendor/memchr/src/macros.rs | 20 + vendor/memchr/src/memchr.rs | 903 +++ vendor/memchr/src/memchr/c.rs | 44 - vendor/memchr/src/memchr/fallback.rs | 329 -- vendor/memchr/src/memchr/iter.rs | 173 - vendor/memchr/src/memchr/mod.rs | 410 -- vendor/memchr/src/memchr/naive.rs | 25 - vendor/memchr/src/memchr/x86/avx.rs | 755 --- vendor/memchr/src/memchr/x86/mod.rs | 148 - vendor/memchr/src/memchr/x86/sse2.rs | 791 --- vendor/memchr/src/memchr/x86/sse42.rs | 72 - vendor/memchr/src/memmem/genericsimd.rs | 266 - vendor/memchr/src/memmem/mod.rs | 834 +-- .../memchr/src/memmem/prefilter/fallback.rs | 122 - .../src/memmem/prefilter/genericsimd.rs | 207 - vendor/memchr/src/memmem/prefilter/mod.rs | 570 -- vendor/memchr/src/memmem/prefilter/wasm.rs | 39 - vendor/memchr/src/memmem/prefilter/x86/avx.rs | 46 - vendor/memchr/src/memmem/prefilter/x86/mod.rs | 5 - vendor/memchr/src/memmem/prefilter/x86/sse.rs | 42 - vendor/memchr/src/memmem/rabinkarp.rs | 233 - vendor/memchr/src/memmem/rarebytes.rs | 136 - vendor/memchr/src/memmem/searcher.rs | 1030 ++++ vendor/memchr/src/memmem/util.rs | 88 - vendor/memchr/src/memmem/vector.rs | 131 - vendor/memchr/src/memmem/wasm.rs | 75 - vendor/memchr/src/memmem/x86/avx.rs | 139 - vendor/memchr/src/memmem/x86/mod.rs | 2 - vendor/memchr/src/memmem/x86/sse.rs | 89 - vendor/memchr/src/tests/memchr/iter.rs | 230 - vendor/memchr/src/tests/memchr/memchr.rs | 134 - vendor/memchr/src/tests/memchr/mod.rs | 314 +- vendor/memchr/src/tests/memchr/naive.rs | 33 + vendor/memchr/src/tests/memchr/prop.rs | 321 + vendor/memchr/src/tests/memchr/simple.rs | 23 - vendor/memchr/src/tests/memchr/testdata.rs | 351 -- vendor/memchr/src/tests/mod.rs | 18 +- vendor/memchr/src/tests/packedpair.rs | 216 + vendor/memchr/src/tests/substring/mod.rs | 232 + vendor/memchr/src/tests/substring/naive.rs | 45 + vendor/memchr/src/tests/substring/prop.rs | 126 + vendor/memchr/src/vector.rs | 515 ++ vendor/proc-macro2/.cargo-checksum.json | 1 + vendor/proc-macro2/Cargo.toml | 67 + vendor/proc-macro2/LICENSE-APACHE | 176 + vendor/proc-macro2/LICENSE-MIT | 23 + vendor/proc-macro2/README.md | 94 + vendor/proc-macro2/build.rs | 202 + vendor/proc-macro2/build/probe.rs | 21 + vendor/proc-macro2/rust-toolchain.toml | 2 + vendor/proc-macro2/src/detection.rs | 75 + vendor/proc-macro2/src/extra.rs | 84 + vendor/proc-macro2/src/fallback.rs | 1143 ++++ vendor/proc-macro2/src/lib.rs | 1328 +++++ vendor/proc-macro2/src/location.rs | 29 + vendor/proc-macro2/src/marker.rs | 21 + vendor/proc-macro2/src/parse.rs | 996 ++++ vendor/proc-macro2/src/rcvec.rs | 145 + vendor/proc-macro2/src/wrapper.rs | 930 +++ vendor/proc-macro2/tests/comments.rs | 105 + vendor/proc-macro2/tests/features.rs | 8 + vendor/proc-macro2/tests/marker.rs | 99 + vendor/proc-macro2/tests/test.rs | 759 +++ vendor/proc-macro2/tests/test_fmt.rs | 28 + vendor/proc-macro2/tests/test_size.rs | 42 + vendor/quote/.cargo-checksum.json | 1 + vendor/quote/Cargo.toml | 50 + vendor/quote/LICENSE-APACHE | 176 + vendor/quote/LICENSE-MIT | 23 + vendor/quote/README.md | 272 + vendor/quote/rust-toolchain.toml | 2 + vendor/quote/src/ext.rs | 110 + vendor/quote/src/format.rs | 168 + vendor/quote/src/ident_fragment.rs | 88 + vendor/quote/src/lib.rs | 1444 +++++ vendor/quote/src/runtime.rs | 530 ++ vendor/quote/src/spanned.rs | 50 + vendor/quote/src/to_tokens.rs | 209 + vendor/quote/tests/compiletest.rs | 7 + vendor/quote/tests/test.rs | 549 ++ .../ui/does-not-have-iter-interpolated-dup.rs | 9 + ...does-not-have-iter-interpolated-dup.stderr | 11 + .../ui/does-not-have-iter-interpolated.rs | 9 + .../ui/does-not-have-iter-interpolated.stderr | 11 + .../tests/ui/does-not-have-iter-separated.rs | 5 + .../ui/does-not-have-iter-separated.stderr | 10 + vendor/quote/tests/ui/does-not-have-iter.rs | 5 + .../quote/tests/ui/does-not-have-iter.stderr | 10 + vendor/quote/tests/ui/not-quotable.rs | 7 + vendor/quote/tests/ui/not-quotable.stderr | 20 + vendor/quote/tests/ui/not-repeatable.rs | 8 + vendor/quote/tests/ui/not-repeatable.stderr | 35 + vendor/quote/tests/ui/wrong-type-span.rs | 7 + vendor/quote/tests/ui/wrong-type-span.stderr | 10 + vendor/regex-automata/.cargo-checksum.json | 1 + vendor/regex-automata/Cargo.toml | 183 + vendor/regex-automata/LICENSE-APACHE | 201 + vendor/regex-automata/LICENSE-MIT | 25 + vendor/regex-automata/README.md | 117 + vendor/regex-automata/src/dfa/accel.rs | 517 ++ vendor/regex-automata/src/dfa/automaton.rs | 2260 ++++++++ vendor/regex-automata/src/dfa/dense.rs | 5147 +++++++++++++++++ vendor/regex-automata/src/dfa/determinize.rs | 599 ++ vendor/regex-automata/src/dfa/minimize.rs | 463 ++ vendor/regex-automata/src/dfa/mod.rs | 360 ++ vendor/regex-automata/src/dfa/onepass.rs | 3192 ++++++++++ vendor/regex-automata/src/dfa/regex.rs | 871 +++ vendor/regex-automata/src/dfa/remapper.rs | 242 + vendor/regex-automata/src/dfa/search.rs | 644 +++ vendor/regex-automata/src/dfa/sparse.rs | 2639 +++++++++ vendor/regex-automata/src/dfa/special.rs | 494 ++ vendor/regex-automata/src/dfa/start.rs | 74 + vendor/regex-automata/src/hybrid/dfa.rs | 4418 ++++++++++++++ vendor/regex-automata/src/hybrid/error.rs | 242 + vendor/regex-automata/src/hybrid/id.rs | 354 ++ vendor/regex-automata/src/hybrid/mod.rs | 144 + vendor/regex-automata/src/hybrid/regex.rs | 895 +++ vendor/regex-automata/src/hybrid/search.rs | 802 +++ vendor/regex-automata/src/lib.rs | 648 +++ vendor/regex-automata/src/macros.rs | 20 + vendor/regex-automata/src/meta/error.rs | 241 + vendor/regex-automata/src/meta/limited.rs | 255 + vendor/regex-automata/src/meta/literal.rs | 81 + vendor/regex-automata/src/meta/mod.rs | 62 + vendor/regex-automata/src/meta/regex.rs | 3649 ++++++++++++ .../regex-automata/src/meta/reverse_inner.rs | 220 + vendor/regex-automata/src/meta/stopat.rs | 212 + vendor/regex-automata/src/meta/strategy.rs | 1914 ++++++ vendor/regex-automata/src/meta/wrappers.rs | 1351 +++++ vendor/regex-automata/src/nfa/mod.rs | 55 + .../src/nfa/thompson/backtrack.rs | 1908 ++++++ .../src/nfa/thompson/builder.rs | 1337 +++++ .../src/nfa/thompson/compiler.rs | 2265 ++++++++ .../regex-automata/src/nfa/thompson/error.rs | 185 + .../src/nfa/thompson/literal_trie.rs | 528 ++ vendor/regex-automata/src/nfa/thompson/map.rs | 296 + vendor/regex-automata/src/nfa/thompson/mod.rs | 81 + vendor/regex-automata/src/nfa/thompson/nfa.rs | 2099 +++++++ .../regex-automata/src/nfa/thompson/pikevm.rs | 2359 ++++++++ .../src/nfa/thompson/range_trie.rs | 1055 ++++ vendor/regex-automata/src/util/alphabet.rs | 1139 ++++ vendor/regex-automata/src/util/captures.rs | 2548 ++++++++ .../src/util/determinize/mod.rs | 682 +++ .../src/util/determinize/state.rs | 907 +++ vendor/regex-automata/src/util/empty.rs | 265 + vendor/regex-automata/src/util/escape.rs | 84 + vendor/regex-automata/src/util/int.rs | 252 + vendor/regex-automata/src/util/interpolate.rs | 579 ++ vendor/regex-automata/src/util/iter.rs | 1027 ++++ vendor/regex-automata/src/util/lazy.rs | 461 ++ vendor/regex-automata/src/util/look.rs | 2547 ++++++++ vendor/regex-automata/src/util/memchr.rs | 93 + vendor/regex-automata/src/util/mod.rs | 57 + vendor/regex-automata/src/util/pool.rs | 1199 ++++ .../src/util/prefilter/aho_corasick.rs | 149 + .../src/util/prefilter/byteset.rs | 58 + .../src/util/prefilter/memchr.rs | 186 + .../src/util/prefilter/memmem.rs | 88 + .../regex-automata/src/util/prefilter/mod.rs | 696 +++ .../src/util/prefilter/teddy.rs | 160 + vendor/regex-automata/src/util/primitives.rs | 776 +++ vendor/regex-automata/src/util/search.rs | 1969 +++++++ vendor/regex-automata/src/util/sparse_set.rs | 239 + vendor/regex-automata/src/util/start.rs | 479 ++ vendor/regex-automata/src/util/syntax.rs | 482 ++ .../src/util/unicode_data/mod.rs | 17 + .../src/util/unicode_data/perl_word.rs | 781 +++ vendor/regex-automata/src/util/utf8.rs | 196 + vendor/regex-automata/src/util/wire.rs | 975 ++++ vendor/regex-automata/test | 95 + vendor/regex-automata/tests/dfa/api.rs | 69 + vendor/regex-automata/tests/dfa/mod.rs | 8 + .../regex-automata/tests/dfa/onepass/mod.rs | 2 + .../regex-automata/tests/dfa/onepass/suite.rs | 197 + vendor/regex-automata/tests/dfa/regression.rs | 48 + vendor/regex-automata/tests/dfa/suite.rs | 443 ++ vendor/regex-automata/tests/fuzz/dense.rs | 52 + vendor/regex-automata/tests/fuzz/mod.rs | 2 + vendor/regex-automata/tests/fuzz/sparse.rs | 132 + ...h-9486fb7c8a93b12c12a62166b43d31640c0208a9 | Bin 0 -> 1894 bytes ...m-9486fb7c8a93b12c12a62166b43d31640c0208a9 | Bin 0 -> 1882 bytes ...h-0da59c0434eaf35e5a6b470fa9244bb79c72b000 | Bin 0 -> 941 bytes ...h-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 | Bin 0 -> 924 bytes ...h-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 | Bin 0 -> 933 bytes ...h-a1b839d899ced76d5d7d0f78f9edb7a421505838 | Bin 0 -> 802 bytes ...h-c383ae07ec5e191422eadc492117439011816570 | Bin 0 -> 924 bytes ...h-d07703ceb94b10dcd9e4acb809f2051420449e2b | Bin 0 -> 922 bytes ...h-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 | Bin 0 -> 728 bytes vendor/regex-automata/tests/gen/README.md | 65 + vendor/regex-automata/tests/gen/dense/mod.rs | 22 + .../tests/gen/dense/multi_pattern_v2.rs | 43 + .../dense/multi_pattern_v2_fwd.bigendian.dfa | Bin 0 -> 11100 bytes .../multi_pattern_v2_fwd.littleendian.dfa | Bin 0 -> 11100 bytes .../dense/multi_pattern_v2_rev.bigendian.dfa | Bin 0 -> 7584 bytes .../multi_pattern_v2_rev.littleendian.dfa | Bin 0 -> 7584 bytes vendor/regex-automata/tests/gen/mod.rs | 2 + vendor/regex-automata/tests/gen/sparse/mod.rs | 22 + .../tests/gen/sparse/multi_pattern_v2.rs | 37 + .../sparse/multi_pattern_v2_fwd.bigendian.dfa | Bin 0 -> 3476 bytes .../multi_pattern_v2_fwd.littleendian.dfa | Bin 0 -> 3476 bytes .../sparse/multi_pattern_v2_rev.bigendian.dfa | Bin 0 -> 1920 bytes .../multi_pattern_v2_rev.littleendian.dfa | Bin 0 -> 1920 bytes vendor/regex-automata/tests/hybrid/api.rs | 171 + vendor/regex-automata/tests/hybrid/mod.rs | 3 + vendor/regex-automata/tests/hybrid/suite.rs | 347 ++ vendor/regex-automata/tests/lib.rs | 115 + vendor/regex-automata/tests/meta/mod.rs | 2 + vendor/regex-automata/tests/meta/suite.rs | 200 + vendor/regex-automata/tests/nfa/mod.rs | 1 + .../tests/nfa/thompson/backtrack/mod.rs | 2 + .../tests/nfa/thompson/backtrack/suite.rs | 213 + .../regex-automata/tests/nfa/thompson/mod.rs | 4 + .../tests/nfa/thompson/pikevm/mod.rs | 2 + .../tests/nfa/thompson/pikevm/suite.rs | 162 + vendor/regex-syntax/.cargo-checksum.json | 2 +- vendor/regex-syntax/Cargo.toml | 32 +- vendor/regex-syntax/README.md | 10 +- vendor/regex-syntax/src/ast/mod.rs | 560 +- vendor/regex-syntax/src/ast/parse.rs | 990 +++- vendor/regex-syntax/src/ast/print.rs | 59 +- vendor/regex-syntax/src/ast/visitor.rs | 59 +- vendor/regex-syntax/src/debug.rs | 107 + vendor/regex-syntax/src/error.rs | 57 +- vendor/regex-syntax/src/hir/interval.rs | 83 +- vendor/regex-syntax/src/hir/literal.rs | 3214 ++++++++++ vendor/regex-syntax/src/hir/literal/mod.rs | 1686 ------ vendor/regex-syntax/src/hir/mod.rs | 3536 +++++++---- vendor/regex-syntax/src/hir/print.rs | 425 +- vendor/regex-syntax/src/hir/translate.rs | 2105 ++++--- vendor/regex-syntax/src/hir/visitor.rs | 34 +- vendor/regex-syntax/src/lib.rs | 187 +- vendor/regex-syntax/src/parser.rs | 108 +- .../src/freqs.rs => regex-syntax/src/rank.rs} | 5 +- vendor/regex-syntax/src/unicode.rs | 384 +- vendor/regex-syntax/src/utf8.rs | 33 +- vendor/regex-syntax/test | 12 +- vendor/regex/.cargo-checksum.json | 2 +- vendor/regex/CHANGELOG.md | 478 +- vendor/regex/Cargo.toml | 178 +- vendor/regex/Cross.toml | 7 + vendor/regex/HACKING.md | 341 -- vendor/regex/PERFORMANCE.md | 277 - vendor/regex/README.md | 220 +- vendor/regex/UNICODE.md | 13 +- vendor/regex/bench/README.md | 2 + vendor/regex/examples/regexdna-input.txt | 1671 ------ vendor/regex/examples/regexdna-output.txt | 13 - .../examples/shootout-regex-dna-bytes.rs | 68 - .../examples/shootout-regex-dna-cheat.rs | 90 - .../examples/shootout-regex-dna-replace.rs | 17 - .../shootout-regex-dna-single-cheat.rs | 75 - .../examples/shootout-regex-dna-single.rs | 57 - vendor/regex/examples/shootout-regex-dna.rs | 68 - vendor/regex/record/README.md | 4 + .../record/compile-test/2023-04-19_1.7.3.csv | 11 + .../record/compile-test/2023-04-20_master.csv | 11 + .../regex/record/compile-test/2023-07-05.csv | 37 + vendor/regex/record/compile-test/README.md | 27 + .../record/old-bench-log/01-lazy-dfa/dynamic | 73 + .../01-lazy-dfa/dynamic-no-lazy-dfa | 85 + .../record/old-bench-log/01-lazy-dfa/native | 65 + .../record/old-bench-log/01-lazy-dfa/nfa | 74 + .../record/old-bench-log/01-lazy-dfa/pcre | 60 + .../regex/record/old-bench-log/02-set/dynamic | 78 + .../regex/record/old-bench-log/03-bytes/onig | 68 + .../regex/record/old-bench-log/03-bytes/pcre | 66 + .../regex/record/old-bench-log/03-bytes/rust | 83 + .../record/old-bench-log/03-bytes/rust-bytes | 66 + vendor/regex/record/old-bench-log/04/onig | 78 + .../regex/record/old-bench-log/04/pcre1-jit | 77 + .../regex/record/old-bench-log/04/pcre2-jit | 77 + vendor/regex/record/old-bench-log/04/re2 | 79 + vendor/regex/record/old-bench-log/04/rust | 81 + vendor/regex/record/old-bench-log/04/tcl | 72 + vendor/regex/record/old-bench-log/05/onig | 99 + .../record/old-bench-log/05/onig-vs-rust | 95 + vendor/regex/record/old-bench-log/05/pcre1 | 98 + .../record/old-bench-log/05/pcre1-vs-rust | 94 + vendor/regex/record/old-bench-log/05/pcre2 | 98 + .../record/old-bench-log/05/pcre2-vs-rust | 94 + vendor/regex/record/old-bench-log/05/re2 | 101 + .../regex/record/old-bench-log/05/re2-vs-rust | 97 + vendor/regex/record/old-bench-log/05/rust | 103 + vendor/regex/record/old-bench-log/05/tcl | 94 + .../regex/record/old-bench-log/05/tcl-vs-rust | 90 + .../regex/record/old-bench-log/06/dphobos-dmd | 98 + .../record/old-bench-log/06/dphobos-dmd-ct | 99 + .../regex/record/old-bench-log/06/dphobos-ldc | 100 + .../record/old-bench-log/06/dphobos-ldc-ct | 99 + vendor/regex/record/old-bench-log/06/pcre1 | 98 + vendor/regex/record/old-bench-log/06/pcre2 | 98 + vendor/regex/record/old-bench-log/06/re2 | 101 + vendor/regex/record/old-bench-log/06/rust | 113 + vendor/regex/record/old-bench-log/07/boost | 97 + .../regex/record/old-bench-log/07/dphobos-dmd | 100 + .../record/old-bench-log/07/dphobos-dmd-ct | 98 + .../regex/record/old-bench-log/07/oniguruma | 99 + vendor/regex/record/old-bench-log/07/pcre1 | 98 + vendor/regex/record/old-bench-log/07/pcre2 | 98 + vendor/regex/record/old-bench-log/07/re2 | 101 + vendor/regex/record/old-bench-log/07/rust | 113 + .../regex/record/old-bench-log/07/rust-bytes | 101 + vendor/regex/record/old-bench-log/07/stdcpp | 87 + .../record/old-bench-log/07/stdcpp-libcxx | 87 + vendor/regex/record/old-bench-log/07/tcl | 94 + .../old-bench-log/08-new-memmem/rust-after-01 | 124 + .../old-bench-log/08-new-memmem/rust-after-02 | 124 + .../08-new-memmem/rust-before-01 | 124 + .../08-new-memmem/rust-before-02 | 124 + .../old-bench-log/09-new-baseline/pcre2 | 98 + .../record/old-bench-log/09-new-baseline/re2 | 101 + .../record/old-bench-log/09-new-baseline/rust | 124 + .../old-bench-log/09-new-baseline/rust-bytes | 112 + .../10-last-frontier/rust-after-literal.log | 124 + .../10-last-frontier/rust-before-literal.log | 124 + .../rust-bytes-after-literal.log | 112 + .../rust-bytes-before-literal.log | 112 + .../record/old-bench-log/11-regex-1.7.3/rust | 124 + .../old-bench-log/11-regex-1.7.3/rust-bytes | 112 + .../record/old-bench-log/12-regex-1.8.1/rust | 124 + .../old-bench-log/12-regex-1.8.1/rust-bytes | 112 + .../record/old-bench-log/13-regex-1.9.0/rust | 115 + .../old-bench-log/13-regex-1.9.0/rust-bytes | 103 + vendor/regex/record/old-bench-log/README.md | 11 + .../regex/record/old-bench-log/old/01-before | 28 + .../old-bench-log/old/02-new-syntax-crate | 28 + .../old-bench-log/old/03-new-syntax-crate | 28 + .../old-bench-log/old/04-fixed-benchmark | 28 + .../old-bench-log/old/05-thread-caching | 29 + .../record/old-bench-log/old/06-major-dynamic | 33 + .../record/old-bench-log/old/06-major-macro | 33 + .../old-bench-log/old/07-prefix-improvements | 33 + .../record/old-bench-log/old/08-case-fixes | 33 + .../old/09-before-compiler-rewrite | 33 + .../old-bench-log/old/10-compiler-rewrite | 33 + .../old-bench-log/old/11-compiler-rewrite | 33 + .../record/old-bench-log/old/12-executor | 35 + .../old-bench-log/old/12-executor-bytes | 35 + .../old/13-cache-byte-range-suffixes | 35 + vendor/regex/src/backtrack.rs | 282 - vendor/regex/src/builders.rs | 2539 ++++++++ vendor/regex/src/bytes.rs | 91 + vendor/regex/src/compile.rs | 1264 ---- vendor/regex/src/dfa.rs | 1945 ------- vendor/regex/src/error.rs | 75 +- vendor/regex/src/exec.rs | 1655 ------ vendor/regex/src/expand.rs | 239 - vendor/regex/src/find_byte.rs | 5 +- vendor/regex/src/input.rs | 432 -- vendor/regex/src/lib.rs | 1383 +++-- vendor/regex/src/literal/imp.rs | 402 -- vendor/regex/src/literal/mod.rs | 55 - vendor/regex/src/pattern.rs | 4 +- vendor/regex/src/pikevm.rs | 360 -- vendor/regex/src/pool.rs | 333 -- vendor/regex/src/prog.rs | 447 -- vendor/regex/src/re_builder.rs | 421 -- vendor/regex/src/re_bytes.rs | 1260 ---- vendor/regex/src/re_set.rs | 507 -- vendor/regex/src/re_trait.rs | 294 - vendor/regex/src/re_unicode.rs | 1311 ----- vendor/regex/src/regex/bytes.rs | 2600 +++++++++ vendor/regex/src/regex/mod.rs | 2 + vendor/regex/src/regex/string.rs | 2582 +++++++++ vendor/regex/src/regexset/bytes.rs | 710 +++ vendor/regex/src/regexset/mod.rs | 2 + vendor/regex/src/regexset/string.rs | 706 +++ vendor/regex/src/sparse.rs | 84 - vendor/regex/src/testdata/LICENSE | 19 - vendor/regex/src/testdata/README | 17 - vendor/regex/src/utf8.rs | 264 - vendor/regex/test | 28 +- vendor/regex/testdata/README.md | 22 + vendor/regex/testdata/anchored.toml | 127 + vendor/regex/testdata/bytes.toml | 235 + vendor/regex/testdata/crazy.toml | 315 + vendor/regex/testdata/crlf.toml | 117 + vendor/regex/testdata/earliest.toml | 52 + vendor/regex/testdata/empty.toml | 113 + vendor/regex/testdata/expensive.toml | 23 + vendor/regex/testdata/flags.toml | 68 + vendor/regex/testdata/fowler/basic.toml | 1611 ++++++ vendor/regex/testdata/fowler/dat/README | 25 + .../fowler/dat}/basic.dat | 32 +- .../fowler/dat}/nullsubexpr.dat | 21 +- .../fowler/dat}/repetition.dat | 20 +- vendor/regex/testdata/fowler/nullsubexpr.toml | 405 ++ vendor/regex/testdata/fowler/repetition.toml | 746 +++ vendor/regex/testdata/iter.toml | 143 + vendor/regex/testdata/leftmost-all.toml | 25 + vendor/regex/testdata/line-terminator.toml | 109 + vendor/regex/testdata/misc.toml | 99 + vendor/regex/testdata/multiline.toml | 845 +++ vendor/regex/testdata/no-unicode.toml | 222 + vendor/regex/testdata/overlapping.toml | 280 + vendor/regex/testdata/regex-lite.toml | 98 + vendor/regex/testdata/regression.toml | 830 +++ vendor/regex/testdata/set.toml | 641 ++ vendor/regex/testdata/substring.toml | 36 + vendor/regex/testdata/unicode.toml | 517 ++ vendor/regex/testdata/utf8.toml | 399 ++ .../regex/testdata/word-boundary-special.toml | 687 +++ vendor/regex/testdata/word-boundary.toml | 781 +++ vendor/regex/tests/api.rs | 234 - vendor/regex/tests/api_str.rs | 34 - vendor/regex/tests/bytes.rs | 107 - vendor/regex/tests/consistent.rs | 238 - vendor/regex/tests/crates_regex.rs | 3287 ----------- vendor/regex/tests/crazy.rs | 459 -- vendor/regex/tests/flags.rs | 31 - vendor/regex/tests/fowler.rs | 1588 ----- vendor/regex/tests/fuzz/mod.rs | 166 + ...h-7eb3351f0965e5d6c1cb98aa8585949ef96531ff | Bin 0 -> 77 bytes ...h-8760b19b25d74e3603d4c643e9c7404fdd3631f9 | Bin 0 -> 21 bytes ...h-cd33b13df59ea9d74503986f9d32a270dd43cc04 | Bin 0 -> 63 bytes ...m-8760b19b25d74e3603d4c643e9c7404fdd3631f9 | Bin 0 -> 16 bytes ...t-3ab758ea520027fefd3f00e1384d9aeef155739e | Bin 0 -> 68 bytes ...t-5345fccadf3812c53c3ccc7af5aa2741b7b2106c | Bin 0 -> 84 bytes ...t-6bd643eec330166e4ada91da2d3f284268481085 | Bin 0 -> 105 bytes ...t-93c73a43581f205f9aaffd9c17e52b34b17becd0 | Bin 0 -> 22 bytes ...t-9ca9cc9929fee1fcbb847a78384effb8b98ea18a | Bin 0 -> 83 bytes ...t-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 | Bin 0 -> 102 bytes vendor/regex/tests/lib.rs | 58 + vendor/regex/tests/macros.rs | 160 - vendor/regex/tests/macros_bytes.rs | 39 - vendor/regex/tests/macros_str.rs | 38 - vendor/regex/tests/misc.rs | 147 +- vendor/regex/tests/multiline.rs | 144 - vendor/regex/tests/noparse.rs | 45 - vendor/regex/tests/regression.rs | 246 +- vendor/regex/tests/regression_fuzz.rs | 34 +- vendor/regex/tests/replace.rs | 143 +- vendor/regex/tests/searcher.rs | 24 +- vendor/regex/tests/set.rs | 67 - vendor/regex/tests/shortest_match.rs | 14 - vendor/regex/tests/suffix_reverse.rs | 6 - vendor/regex/tests/suite_bytes.rs | 108 + vendor/regex/tests/suite_bytes_set.rs | 71 + vendor/regex/tests/suite_string.rs | 114 + vendor/regex/tests/suite_string_set.rs | 79 + vendor/regex/tests/test_backtrack.rs | 56 - vendor/regex/tests/test_backtrack_bytes.rs | 55 - .../regex/tests/test_backtrack_utf8bytes.rs | 58 - vendor/regex/tests/test_crates_regex.rs | 54 - vendor/regex/tests/test_default.rs | 232 - vendor/regex/tests/test_default_bytes.rs | 75 - vendor/regex/tests/test_nfa.rs | 50 - vendor/regex/tests/test_nfa_bytes.rs | 55 - vendor/regex/tests/test_nfa_utf8bytes.rs | 54 - vendor/regex/tests/unicode.rs | 251 - vendor/regex/tests/word_boundary.rs | 89 - vendor/regex/tests/word_boundary_ascii.rs | 9 - vendor/regex/tests/word_boundary_unicode.rs | 6 - vendor/serde/.cargo-checksum.json | 2 +- vendor/serde/Cargo.toml | 32 +- vendor/serde/README.md | 8 +- vendor/serde/build.rs | 68 +- vendor/serde/crates-io.md | 2 +- vendor/serde/src/de/format.rs | 4 +- vendor/serde/src/de/ignored_any.rs | 39 +- vendor/serde/src/de/impls.rs | 765 ++- vendor/serde/src/de/mod.rs | 197 +- vendor/serde/src/de/seed.rs | 2 +- vendor/serde/src/{private => de}/size_hint.rs | 16 +- vendor/serde/src/de/utf8.rs | 46 - vendor/serde/src/de/value.rs | 90 +- vendor/serde/src/integer128.rs | 79 +- vendor/serde/src/lib.rs | 133 +- vendor/serde/src/macros.rs | 17 +- vendor/serde/src/private/de.rs | 547 +- vendor/serde/src/private/doc.rs | 4 +- vendor/serde/src/private/mod.rs | 24 +- vendor/serde/src/private/ser.rs | 243 +- vendor/serde/src/ser/fmt.rs | 18 +- vendor/serde/src/ser/impls.rs | 248 +- vendor/serde/src/ser/impossible.rs | 6 +- vendor/serde/src/ser/mod.rs | 264 +- vendor/serde/src/std_error.rs | 8 +- vendor/serde_derive/.cargo-checksum.json | 1 + vendor/serde_derive/Cargo.toml | 59 + vendor/serde_derive/LICENSE-APACHE | 176 + vendor/serde_derive/LICENSE-MIT | 23 + vendor/serde_derive/README.md | 114 + vendor/serde_derive/crates-io.md | 65 + vendor/serde_derive/src/bound.rs | 408 ++ vendor/serde_derive/src/de.rs | 3148 ++++++++++ vendor/serde_derive/src/dummy.rs | 23 + vendor/serde_derive/src/fragment.rs | 74 + vendor/serde_derive/src/internals/ast.rs | 216 + vendor/serde_derive/src/internals/attr.rs | 1881 ++++++ vendor/serde_derive/src/internals/case.rs | 200 + vendor/serde_derive/src/internals/check.rs | 477 ++ vendor/serde_derive/src/internals/ctxt.rs | 68 + vendor/serde_derive/src/internals/mod.rs | 27 + vendor/serde_derive/src/internals/receiver.rs | 292 + vendor/serde_derive/src/internals/respan.rs | 16 + vendor/serde_derive/src/internals/symbol.rs | 71 + vendor/serde_derive/src/lib.rs | 101 + vendor/serde_derive/src/pretend.rs | 185 + vendor/serde_derive/src/ser.rs | 1359 +++++ vendor/serde_derive/src/this.rs | 32 + vendor/serde_spanned/.cargo-checksum.json | 2 +- vendor/serde_spanned/Cargo.toml | 58 +- vendor/serde_spanned/LICENSE-APACHE | 403 +- vendor/serde_spanned/LICENSE-MIT | 38 +- vendor/serde_spanned/src/spanned.rs | 57 + vendor/syn/.cargo-checksum.json | 1 + vendor/syn/Cargo.toml | 147 + vendor/syn/LICENSE-APACHE | 176 + vendor/syn/LICENSE-MIT | 23 + vendor/syn/README.md | 284 + vendor/syn/benches/file.rs | 57 + vendor/syn/benches/rust.rs | 182 + vendor/syn/src/attr.rs | 776 +++ vendor/syn/src/bigint.rs | 66 + vendor/syn/src/buffer.rs | 432 ++ vendor/syn/src/custom_keyword.rs | 259 + vendor/syn/src/custom_punctuation.rs | 302 + vendor/syn/src/data.rs | 404 ++ vendor/syn/src/derive.rs | 245 + vendor/syn/src/discouraged.rs | 219 + vendor/syn/src/drops.rs | 58 + vendor/syn/src/error.rs | 467 ++ vendor/syn/src/export.rs | 73 + vendor/syn/src/expr.rs | 3506 +++++++++++ vendor/syn/src/ext.rs | 135 + vendor/syn/src/file.rs | 125 + vendor/syn/src/gen/clone.rs | 2181 +++++++ vendor/syn/src/gen/debug.rs | 3052 ++++++++++ vendor/syn/src/gen/eq.rs | 2148 +++++++ vendor/syn/src/gen/fold.rs | 3459 +++++++++++ vendor/syn/src/gen/hash.rs | 2804 +++++++++ vendor/syn/src/gen/visit.rs | 3844 ++++++++++++ vendor/syn/src/gen/visit_mut.rs | 3847 ++++++++++++ vendor/syn/src/gen_helper.rs | 34 + vendor/syn/src/generics.rs | 1227 ++++ vendor/syn/src/group.rs | 291 + vendor/syn/src/ident.rs | 107 + vendor/syn/src/item.rs | 3404 +++++++++++ vendor/syn/src/lib.rs | 1010 ++++ vendor/syn/src/lifetime.rs | 156 + vendor/syn/src/lit.rs | 1651 ++++++ vendor/syn/src/lookahead.rs | 169 + vendor/syn/src/mac.rs | 211 + vendor/syn/src/macros.rs | 176 + vendor/syn/src/meta.rs | 426 ++ vendor/syn/src/op.rs | 218 + vendor/syn/src/parse.rs | 1386 +++++ vendor/syn/src/parse_macro_input.rs | 128 + vendor/syn/src/parse_quote.rs | 209 + vendor/syn/src/pat.rs | 917 +++ vendor/syn/src/path.rs | 877 +++ vendor/syn/src/print.rs | 16 + vendor/syn/src/punctuated.rs | 1108 ++++ vendor/syn/src/restriction.rs | 171 + vendor/syn/src/sealed.rs | 4 + vendor/syn/src/span.rs | 63 + vendor/syn/src/spanned.rs | 118 + vendor/syn/src/stmt.rs | 452 ++ vendor/syn/src/thread.rs | 60 + vendor/syn/src/token.rs | 1138 ++++ vendor/syn/src/tt.rs | 107 + vendor/syn/src/ty.rs | 1189 ++++ vendor/syn/src/verbatim.rs | 33 + vendor/syn/src/whitespace.rs | 65 + vendor/syn/tests/common/eq.rs | 879 +++ vendor/syn/tests/common/mod.rs | 28 + vendor/syn/tests/common/parse.rs | 51 + vendor/syn/tests/macros/mod.rs | 92 + vendor/syn/tests/regression.rs | 5 + vendor/syn/tests/regression/issue1108.rs | 5 + vendor/syn/tests/regression/issue1235.rs | 32 + vendor/syn/tests/repo/mod.rs | 375 ++ vendor/syn/tests/repo/progress.rs | 37 + vendor/syn/tests/test_asyncness.rs | 43 + vendor/syn/tests/test_attribute.rs | 225 + vendor/syn/tests/test_derive_input.rs | 781 +++ vendor/syn/tests/test_expr.rs | 540 ++ vendor/syn/tests/test_generics.rs | 282 + vendor/syn/tests/test_grouping.rs | 53 + vendor/syn/tests/test_ident.rs | 85 + vendor/syn/tests/test_item.rs | 332 ++ vendor/syn/tests/test_iterators.rs | 70 + vendor/syn/tests/test_lit.rs | 273 + vendor/syn/tests/test_meta.rs | 154 + vendor/syn/tests/test_parse_buffer.rs | 92 + vendor/syn/tests/test_parse_quote.rs | 164 + vendor/syn/tests/test_parse_stream.rs | 14 + vendor/syn/tests/test_pat.rs | 152 + vendor/syn/tests/test_path.rs | 130 + vendor/syn/tests/test_precedence.rs | 548 ++ vendor/syn/tests/test_receiver.rs | 321 + vendor/syn/tests/test_round_trip.rs | 239 + vendor/syn/tests/test_shebang.rs | 67 + vendor/syn/tests/test_should_parse.rs | 45 + vendor/syn/tests/test_size.rs | 36 + vendor/syn/tests/test_stmt.rs | 322 ++ vendor/syn/tests/test_token_trees.rs | 32 + vendor/syn/tests/test_ty.rs | 397 ++ vendor/syn/tests/test_visibility.rs | 144 + vendor/syn/tests/zzz_stable.rs | 33 + vendor/tinyrick-0.0.13/.cargo-checksum.json | 1 + vendor/tinyrick-0.0.13/Cargo.toml | 36 + vendor/tinyrick-0.0.13/Cross.toml | 15 + vendor/tinyrick-0.0.13/DEVELOPMENT.md | 61 + vendor/tinyrick-0.0.13/LICENSE.md | 26 + vendor/tinyrick-0.0.13/README.md | 179 + vendor/tinyrick-0.0.13/install.mk | 13 + vendor/tinyrick-0.0.13/makefile | 70 + vendor/tinyrick-0.0.13/sample.envrc | 10 + vendor/tinyrick-0.0.13/src/lib.rs | 256 + vendor/tinyrick-0.0.13/src/tinyrick.rs | 67 + vendor/tinyrick/.cargo-checksum.json | 2 +- vendor/tinyrick/Cargo.toml | 20 +- vendor/tinyrick/Cross.toml | 15 + vendor/tinyrick/DEVELOPMENT.md | 43 +- vendor/tinyrick/LICENSE.md | 26 + vendor/tinyrick/README.md | 53 +- vendor/tinyrick/build.sh | 90 - vendor/tinyrick/crosscompile-linux.sh | 28 - vendor/tinyrick/install.mk | 13 + vendor/tinyrick/makefile | 71 + vendor/tinyrick/sample.envrc | 10 + vendor/tinyrick/src/lib.rs | 337 +- vendor/tinyrick/src/tinyrick.rs | 93 +- vendor/tinyrick_extras/.cargo-checksum.json | 2 +- vendor/tinyrick_extras/Cargo.toml | 4 +- vendor/tinyrick_extras/DEVELOPMENT.md | 9 +- vendor/tinyrick_extras/LICENSE.md | 26 + vendor/tinyrick_extras/README.md | 9 +- vendor/tinyrick_extras/makefile | 12 + vendor/tinyrick_extras/src/lib.rs | 20 + vendor/tinyrick_extras/tinyrick.rs | 8 + vendor/toml_datetime/.cargo-checksum.json | 2 +- vendor/toml_datetime/Cargo.toml | 43 +- vendor/toml_datetime/LICENSE-APACHE | 403 +- vendor/toml_datetime/LICENSE-MIT | 38 +- vendor/toml_datetime/src/datetime.rs | 15 +- vendor/toml_edit/.cargo-checksum.json | 2 +- vendor/toml_edit/Cargo.toml | 45 +- vendor/toml_edit/LICENSE-APACHE | 403 +- vendor/toml_edit/LICENSE-MIT | 4 +- vendor/toml_edit/src/array.rs | 14 + vendor/toml_edit/src/array_of_tables.rs | 14 + vendor/toml_edit/src/de/value.rs | 4 +- vendor/toml_edit/src/encode.rs | 34 +- vendor/toml_edit/src/inline_table.rs | 18 + vendor/toml_edit/src/item.rs | 14 +- vendor/toml_edit/src/key.rs | 23 +- vendor/toml_edit/src/parser/array.rs | 31 +- vendor/toml_edit/src/parser/datetime.rs | 175 +- vendor/toml_edit/src/parser/document.rs | 115 +- vendor/toml_edit/src/parser/errors.rs | 174 +- vendor/toml_edit/src/parser/inline_table.rs | 39 +- vendor/toml_edit/src/parser/key.rs | 82 +- vendor/toml_edit/src/parser/macros.rs | 13 - vendor/toml_edit/src/parser/mod.rs | 71 +- vendor/toml_edit/src/parser/numbers.rs | 284 +- vendor/toml_edit/src/parser/state.rs | 4 +- vendor/toml_edit/src/parser/strings.rs | 258 +- vendor/toml_edit/src/parser/table.rs | 38 +- vendor/toml_edit/src/parser/trivia.rs | 64 +- vendor/toml_edit/src/parser/value.rs | 46 +- vendor/toml_edit/src/ser/map.rs | 192 +- vendor/toml_edit/src/ser/mod.rs | 2 +- vendor/toml_edit/src/ser/value.rs | 11 +- vendor/toml_edit/src/table.rs | 28 +- vendor/toml_edit/tests/testsuite/parse.rs | 13 + vendor/unicode-ident/.cargo-checksum.json | 1 + vendor/unicode-ident/Cargo.toml | 63 + vendor/unicode-ident/LICENSE-APACHE | 176 + vendor/unicode-ident/LICENSE-MIT | 23 + vendor/unicode-ident/LICENSE-UNICODE | 46 + vendor/unicode-ident/README.md | 283 + vendor/unicode-ident/benches/xid.rs | 124 + vendor/unicode-ident/src/lib.rs | 269 + vendor/unicode-ident/src/tables.rs | 651 +++ vendor/unicode-ident/tests/compare.rs | 67 + vendor/unicode-ident/tests/fst/mod.rs | 11 + .../unicode-ident/tests/fst/xid_continue.fst | Bin 0 -> 73249 bytes vendor/unicode-ident/tests/fst/xid_start.fst | Bin 0 -> 65487 bytes vendor/unicode-ident/tests/roaring/mod.rs | 21 + vendor/unicode-ident/tests/static_size.rs | 95 + vendor/unicode-ident/tests/tables/mod.rs | 7 + vendor/unicode-ident/tests/tables/tables.rs | 347 ++ vendor/unicode-ident/tests/trie/mod.rs | 7 + vendor/unicode-ident/tests/trie/trie.rs | 445 ++ vendor/unicode-width/.cargo-checksum.json | 2 +- vendor/unicode-width/Cargo.toml | 2 +- vendor/unicode-width/scripts/unicode.py | 4 +- vendor/unicode-width/src/lib.rs | 6 +- vendor/unicode-width/src/tables.rs | 6 +- vendor/winnow/.cargo-checksum.json | 2 +- vendor/winnow/Cargo.toml | 78 +- vendor/winnow/README.md | 3 +- vendor/winnow/benches/contains_token.rs | 66 +- vendor/winnow/benches/find_slice.rs | 50 + vendor/winnow/benches/iter.rs | 120 + vendor/winnow/benches/next_slice.rs | 133 + vendor/winnow/benches/number.rs | 38 +- vendor/winnow/examples/arithmetic/bench.rs | 26 +- vendor/winnow/examples/arithmetic/main.rs | 47 +- vendor/winnow/examples/arithmetic/parser.rs | 112 +- .../winnow/examples/arithmetic/parser_ast.rs | 214 +- .../examples/arithmetic/parser_lexer.rs | 297 + vendor/winnow/examples/css/main.rs | 2 +- vendor/winnow/examples/css/parser.rs | 26 +- vendor/winnow/examples/custom_error.rs | 20 +- vendor/winnow/examples/http/parser.rs | 80 +- .../winnow/examples/http/parser_streaming.rs | 79 +- vendor/winnow/examples/ini/bench.rs | 14 +- vendor/winnow/examples/ini/parser.rs | 53 +- vendor/winnow/examples/ini/parser_str.rs | 64 +- vendor/winnow/examples/iterator.rs | 30 +- vendor/winnow/examples/json/bench.rs | 28 +- vendor/winnow/examples/json/main.rs | 46 +- vendor/winnow/examples/json/parser.rs | 129 +- .../winnow/examples/json/parser_dispatch.rs | 131 +- vendor/winnow/examples/json/parser_partial.rs | 136 +- vendor/winnow/examples/json_iterator.rs | 124 +- vendor/winnow/examples/ndjson/main.rs | 7 +- vendor/winnow/examples/ndjson/parser.rs | 130 +- vendor/winnow/examples/s_expression/parser.rs | 89 +- vendor/winnow/examples/string/parser.rs | 59 +- vendor/winnow/src/_topic/arithmetic.rs | 6 + vendor/winnow/src/_topic/error.rs | 4 +- vendor/winnow/src/_topic/language.rs | 156 +- vendor/winnow/src/_topic/mod.rs | 8 +- vendor/winnow/src/_topic/nom.rs | 49 + vendor/winnow/src/_topic/partial.rs | 14 +- vendor/winnow/src/_topic/performance.rs | 55 + vendor/winnow/src/_topic/stream.rs | 36 +- vendor/winnow/src/_topic/why.rs | 21 +- vendor/winnow/src/_tutorial/chapter_0.rs | 3 +- vendor/winnow/src/_tutorial/chapter_1.rs | 53 +- vendor/winnow/src/_tutorial/chapter_2.rs | 216 +- vendor/winnow/src/_tutorial/chapter_3.rs | 298 +- vendor/winnow/src/_tutorial/chapter_4.rs | 74 +- vendor/winnow/src/_tutorial/chapter_5.rs | 252 +- vendor/winnow/src/_tutorial/chapter_6.rs | 116 +- vendor/winnow/src/_tutorial/chapter_7.rs | 95 +- vendor/winnow/src/_tutorial/mod.rs | 3 +- vendor/winnow/src/ascii/mod.rs | 1714 ++++++ .../winnow/src/{character => ascii}/tests.rs | 725 +-- vendor/winnow/src/{ => binary}/bits/mod.rs | 274 +- vendor/winnow/src/{ => binary}/bits/tests.rs | 55 +- vendor/winnow/src/{number => binary}/mod.rs | 1251 ++-- vendor/winnow/src/{number => binary}/tests.rs | 640 +- vendor/winnow/src/branch/tests.rs | 180 - vendor/winnow/src/bytes/mod.rs | 1167 ---- vendor/winnow/src/character/mod.rs | 1808 ------ .../{branch/mod.rs => combinator/branch.rs} | 150 +- vendor/winnow/src/combinator/core.rs | 491 ++ vendor/winnow/src/combinator/mod.rs | 1505 +---- vendor/winnow/src/combinator/multi.rs | 1259 ++++ vendor/winnow/src/combinator/parser.rs | 905 +++ vendor/winnow/src/combinator/sequence.rs | 175 + vendor/winnow/src/combinator/tests.rs | 1152 +++- vendor/winnow/src/error.rs | 1311 ++++- vendor/winnow/src/lib.rs | 32 +- vendor/winnow/src/macros.rs | 78 - vendor/winnow/src/macros/dispatch.rs | 55 + vendor/winnow/src/macros/mod.rs | 5 + vendor/winnow/src/macros/seq.rs | 268 + vendor/winnow/src/macros/test.rs | 378 ++ vendor/winnow/src/multi/mod.rs | 1065 ---- vendor/winnow/src/multi/tests.rs | 743 --- vendor/winnow/src/parser.rs | 650 ++- vendor/winnow/src/sequence/mod.rs | 169 - vendor/winnow/src/sequence/tests.rs | 211 - vendor/winnow/src/stream/impls.rs | 8 +- vendor/winnow/src/stream/mod.rs | 1346 ++++- vendor/winnow/src/stream/tests.rs | 138 +- vendor/winnow/src/token/mod.rs | 1174 ++++ vendor/winnow/src/{bytes => token}/tests.rs | 172 +- vendor/winnow/src/trace/internals.rs | 197 +- vendor/winnow/src/trace/mod.rs | 49 +- 934 files changed, 242501 insertions(+), 60153 deletions(-) create mode 100644 .cargo/cargo.toml delete mode 100644 vendor/aho-corasick/src/classes.rs delete mode 100644 vendor/aho-corasick/src/error.rs create mode 100644 vendor/aho-corasick/src/macros.rs delete mode 100644 vendor/aho-corasick/src/nfa.rs create mode 100644 vendor/aho-corasick/src/nfa/contiguous.rs create mode 100644 vendor/aho-corasick/src/nfa/mod.rs create mode 100644 vendor/aho-corasick/src/nfa/noncontiguous.rs create mode 100644 vendor/aho-corasick/src/packed/ext.rs create mode 100644 vendor/aho-corasick/src/packed/teddy/builder.rs delete mode 100644 vendor/aho-corasick/src/packed/teddy/compile.rs create mode 100644 vendor/aho-corasick/src/packed/teddy/generic.rs delete mode 100644 vendor/aho-corasick/src/packed/teddy/runtime.rs delete mode 100644 vendor/aho-corasick/src/prefilter.rs delete mode 100644 vendor/aho-corasick/src/state_id.rs create mode 100644 vendor/aho-corasick/src/transducer.rs create mode 100644 vendor/aho-corasick/src/util/alphabet.rs rename vendor/aho-corasick/src/{ => util}/buffer.rs (76%) rename vendor/aho-corasick/src/{ => util}/byte_frequencies.rs (100%) create mode 100644 vendor/aho-corasick/src/util/debug.rs create mode 100644 vendor/aho-corasick/src/util/error.rs create mode 100644 vendor/aho-corasick/src/util/int.rs create mode 100644 vendor/aho-corasick/src/util/mod.rs create mode 100644 vendor/aho-corasick/src/util/prefilter.rs create mode 100644 vendor/aho-corasick/src/util/primitives.rs create mode 100644 vendor/aho-corasick/src/util/remapper.rs create mode 100644 vendor/aho-corasick/src/util/search.rs create mode 100644 vendor/aho-corasick/src/util/special.rs delete mode 100644 vendor/autocfg/.cargo-checksum.json delete mode 100644 vendor/autocfg/README.md delete mode 100644 vendor/autocfg/examples/integers.rs delete mode 100644 vendor/autocfg/examples/paths.rs delete mode 100644 vendor/autocfg/examples/traits.rs delete mode 100644 vendor/autocfg/examples/versions.rs delete mode 100644 vendor/autocfg/src/error.rs delete mode 100644 vendor/autocfg/src/lib.rs delete mode 100644 vendor/autocfg/src/tests.rs delete mode 100644 vendor/autocfg/src/version.rs delete mode 100644 vendor/autocfg/tests/rustflags.rs create mode 100644 vendor/equivalent/.cargo-checksum.json rename vendor/{autocfg => equivalent}/Cargo.toml (61%) rename vendor/{autocfg => equivalent}/LICENSE-APACHE (100%) rename vendor/{autocfg => equivalent}/LICENSE-MIT (97%) create mode 100644 vendor/equivalent/README.md create mode 100644 vendor/equivalent/src/lib.rs create mode 100644 vendor/hashbrown/src/external_trait_impls/rayon/table.rs create mode 100644 vendor/hashbrown/src/external_trait_impls/rkyv/hash_map.rs create mode 100644 vendor/hashbrown/src/external_trait_impls/rkyv/hash_set.rs create mode 100644 vendor/hashbrown/src/external_trait_impls/rkyv/mod.rs create mode 100644 vendor/hashbrown/src/raw/neon.rs create mode 100644 vendor/hashbrown/src/table.rs create mode 100644 vendor/hashbrown/tests/equivalent_trait.rs create mode 100644 vendor/hashbrown/tests/raw.rs delete mode 100644 vendor/indexmap/build.rs delete mode 100644 vendor/indexmap/src/equivalent.rs create mode 100644 vendor/indexmap/src/map/iter.rs rename vendor/indexmap/src/{ => map}/serde_seq.rs (73%) create mode 100644 vendor/indexmap/src/map/slice.rs create mode 100644 vendor/indexmap/src/map/tests.rs create mode 100644 vendor/indexmap/src/set/iter.rs create mode 100644 vendor/indexmap/src/set/slice.rs create mode 100644 vendor/indexmap/src/set/tests.rs delete mode 100644 vendor/memchr/build.rs delete mode 100755 vendor/memchr/scripts/make-byte-frequency-table create mode 100644 vendor/memchr/src/arch/aarch64/memchr.rs create mode 100644 vendor/memchr/src/arch/aarch64/mod.rs create mode 100644 vendor/memchr/src/arch/aarch64/neon/memchr.rs create mode 100644 vendor/memchr/src/arch/aarch64/neon/mod.rs create mode 100644 vendor/memchr/src/arch/aarch64/neon/packedpair.rs create mode 100644 vendor/memchr/src/arch/all/memchr.rs create mode 100644 vendor/memchr/src/arch/all/mod.rs rename vendor/memchr/src/{memmem/byte_frequencies.rs => arch/all/packedpair/default_rank.rs} (99%) create mode 100644 vendor/memchr/src/arch/all/packedpair/mod.rs create mode 100644 vendor/memchr/src/arch/all/rabinkarp.rs create mode 100644 vendor/memchr/src/arch/all/shiftor.rs rename vendor/memchr/src/{memmem => arch/all}/twoway.rs (79%) create mode 100644 vendor/memchr/src/arch/generic/memchr.rs create mode 100644 vendor/memchr/src/arch/generic/mod.rs create mode 100644 vendor/memchr/src/arch/generic/packedpair.rs create mode 100644 vendor/memchr/src/arch/mod.rs create mode 100644 vendor/memchr/src/arch/wasm32/memchr.rs create mode 100644 vendor/memchr/src/arch/wasm32/mod.rs create mode 100644 vendor/memchr/src/arch/wasm32/simd128/memchr.rs create mode 100644 vendor/memchr/src/arch/wasm32/simd128/mod.rs create mode 100644 vendor/memchr/src/arch/wasm32/simd128/packedpair.rs create mode 100644 vendor/memchr/src/arch/x86_64/avx2/memchr.rs create mode 100644 vendor/memchr/src/arch/x86_64/avx2/mod.rs create mode 100644 vendor/memchr/src/arch/x86_64/avx2/packedpair.rs create mode 100644 vendor/memchr/src/arch/x86_64/memchr.rs create mode 100644 vendor/memchr/src/arch/x86_64/mod.rs create mode 100644 vendor/memchr/src/arch/x86_64/sse2/memchr.rs create mode 100644 vendor/memchr/src/arch/x86_64/sse2/mod.rs create mode 100644 vendor/memchr/src/arch/x86_64/sse2/packedpair.rs create mode 100644 vendor/memchr/src/ext.rs create mode 100644 vendor/memchr/src/macros.rs create mode 100644 vendor/memchr/src/memchr.rs delete mode 100644 vendor/memchr/src/memchr/c.rs delete mode 100644 vendor/memchr/src/memchr/fallback.rs delete mode 100644 vendor/memchr/src/memchr/iter.rs delete mode 100644 vendor/memchr/src/memchr/mod.rs delete mode 100644 vendor/memchr/src/memchr/naive.rs delete mode 100644 vendor/memchr/src/memchr/x86/avx.rs delete mode 100644 vendor/memchr/src/memchr/x86/mod.rs delete mode 100644 vendor/memchr/src/memchr/x86/sse2.rs delete mode 100644 vendor/memchr/src/memchr/x86/sse42.rs delete mode 100644 vendor/memchr/src/memmem/genericsimd.rs delete mode 100644 vendor/memchr/src/memmem/prefilter/fallback.rs delete mode 100644 vendor/memchr/src/memmem/prefilter/genericsimd.rs delete mode 100644 vendor/memchr/src/memmem/prefilter/mod.rs delete mode 100644 vendor/memchr/src/memmem/prefilter/wasm.rs delete mode 100644 vendor/memchr/src/memmem/prefilter/x86/avx.rs delete mode 100644 vendor/memchr/src/memmem/prefilter/x86/mod.rs delete mode 100644 vendor/memchr/src/memmem/prefilter/x86/sse.rs delete mode 100644 vendor/memchr/src/memmem/rabinkarp.rs delete mode 100644 vendor/memchr/src/memmem/rarebytes.rs create mode 100644 vendor/memchr/src/memmem/searcher.rs delete mode 100644 vendor/memchr/src/memmem/util.rs delete mode 100644 vendor/memchr/src/memmem/vector.rs delete mode 100644 vendor/memchr/src/memmem/wasm.rs delete mode 100644 vendor/memchr/src/memmem/x86/avx.rs delete mode 100644 vendor/memchr/src/memmem/x86/mod.rs delete mode 100644 vendor/memchr/src/memmem/x86/sse.rs delete mode 100644 vendor/memchr/src/tests/memchr/iter.rs delete mode 100644 vendor/memchr/src/tests/memchr/memchr.rs create mode 100644 vendor/memchr/src/tests/memchr/naive.rs create mode 100644 vendor/memchr/src/tests/memchr/prop.rs delete mode 100644 vendor/memchr/src/tests/memchr/simple.rs delete mode 100644 vendor/memchr/src/tests/memchr/testdata.rs create mode 100644 vendor/memchr/src/tests/packedpair.rs create mode 100644 vendor/memchr/src/tests/substring/mod.rs create mode 100644 vendor/memchr/src/tests/substring/naive.rs create mode 100644 vendor/memchr/src/tests/substring/prop.rs create mode 100644 vendor/memchr/src/vector.rs create mode 100644 vendor/proc-macro2/.cargo-checksum.json create mode 100644 vendor/proc-macro2/Cargo.toml create mode 100644 vendor/proc-macro2/LICENSE-APACHE create mode 100644 vendor/proc-macro2/LICENSE-MIT create mode 100644 vendor/proc-macro2/README.md create mode 100644 vendor/proc-macro2/build.rs create mode 100644 vendor/proc-macro2/build/probe.rs create mode 100644 vendor/proc-macro2/rust-toolchain.toml create mode 100644 vendor/proc-macro2/src/detection.rs create mode 100644 vendor/proc-macro2/src/extra.rs create mode 100644 vendor/proc-macro2/src/fallback.rs create mode 100644 vendor/proc-macro2/src/lib.rs create mode 100644 vendor/proc-macro2/src/location.rs create mode 100644 vendor/proc-macro2/src/marker.rs create mode 100644 vendor/proc-macro2/src/parse.rs create mode 100644 vendor/proc-macro2/src/rcvec.rs create mode 100644 vendor/proc-macro2/src/wrapper.rs create mode 100644 vendor/proc-macro2/tests/comments.rs create mode 100644 vendor/proc-macro2/tests/features.rs create mode 100644 vendor/proc-macro2/tests/marker.rs create mode 100644 vendor/proc-macro2/tests/test.rs create mode 100644 vendor/proc-macro2/tests/test_fmt.rs create mode 100644 vendor/proc-macro2/tests/test_size.rs create mode 100644 vendor/quote/.cargo-checksum.json create mode 100644 vendor/quote/Cargo.toml create mode 100644 vendor/quote/LICENSE-APACHE create mode 100644 vendor/quote/LICENSE-MIT create mode 100644 vendor/quote/README.md create mode 100644 vendor/quote/rust-toolchain.toml create mode 100644 vendor/quote/src/ext.rs create mode 100644 vendor/quote/src/format.rs create mode 100644 vendor/quote/src/ident_fragment.rs create mode 100644 vendor/quote/src/lib.rs create mode 100644 vendor/quote/src/runtime.rs create mode 100644 vendor/quote/src/spanned.rs create mode 100644 vendor/quote/src/to_tokens.rs create mode 100644 vendor/quote/tests/compiletest.rs create mode 100644 vendor/quote/tests/test.rs create mode 100644 vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.rs create mode 100644 vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr create mode 100644 vendor/quote/tests/ui/does-not-have-iter-interpolated.rs create mode 100644 vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr create mode 100644 vendor/quote/tests/ui/does-not-have-iter-separated.rs create mode 100644 vendor/quote/tests/ui/does-not-have-iter-separated.stderr create mode 100644 vendor/quote/tests/ui/does-not-have-iter.rs create mode 100644 vendor/quote/tests/ui/does-not-have-iter.stderr create mode 100644 vendor/quote/tests/ui/not-quotable.rs create mode 100644 vendor/quote/tests/ui/not-quotable.stderr create mode 100644 vendor/quote/tests/ui/not-repeatable.rs create mode 100644 vendor/quote/tests/ui/not-repeatable.stderr create mode 100644 vendor/quote/tests/ui/wrong-type-span.rs create mode 100644 vendor/quote/tests/ui/wrong-type-span.stderr create mode 100644 vendor/regex-automata/.cargo-checksum.json create mode 100644 vendor/regex-automata/Cargo.toml create mode 100644 vendor/regex-automata/LICENSE-APACHE create mode 100644 vendor/regex-automata/LICENSE-MIT create mode 100644 vendor/regex-automata/README.md create mode 100644 vendor/regex-automata/src/dfa/accel.rs create mode 100644 vendor/regex-automata/src/dfa/automaton.rs create mode 100644 vendor/regex-automata/src/dfa/dense.rs create mode 100644 vendor/regex-automata/src/dfa/determinize.rs create mode 100644 vendor/regex-automata/src/dfa/minimize.rs create mode 100644 vendor/regex-automata/src/dfa/mod.rs create mode 100644 vendor/regex-automata/src/dfa/onepass.rs create mode 100644 vendor/regex-automata/src/dfa/regex.rs create mode 100644 vendor/regex-automata/src/dfa/remapper.rs create mode 100644 vendor/regex-automata/src/dfa/search.rs create mode 100644 vendor/regex-automata/src/dfa/sparse.rs create mode 100644 vendor/regex-automata/src/dfa/special.rs create mode 100644 vendor/regex-automata/src/dfa/start.rs create mode 100644 vendor/regex-automata/src/hybrid/dfa.rs create mode 100644 vendor/regex-automata/src/hybrid/error.rs create mode 100644 vendor/regex-automata/src/hybrid/id.rs create mode 100644 vendor/regex-automata/src/hybrid/mod.rs create mode 100644 vendor/regex-automata/src/hybrid/regex.rs create mode 100644 vendor/regex-automata/src/hybrid/search.rs create mode 100644 vendor/regex-automata/src/lib.rs create mode 100644 vendor/regex-automata/src/macros.rs create mode 100644 vendor/regex-automata/src/meta/error.rs create mode 100644 vendor/regex-automata/src/meta/limited.rs create mode 100644 vendor/regex-automata/src/meta/literal.rs create mode 100644 vendor/regex-automata/src/meta/mod.rs create mode 100644 vendor/regex-automata/src/meta/regex.rs create mode 100644 vendor/regex-automata/src/meta/reverse_inner.rs create mode 100644 vendor/regex-automata/src/meta/stopat.rs create mode 100644 vendor/regex-automata/src/meta/strategy.rs create mode 100644 vendor/regex-automata/src/meta/wrappers.rs create mode 100644 vendor/regex-automata/src/nfa/mod.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/backtrack.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/builder.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/compiler.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/error.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/literal_trie.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/map.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/mod.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/nfa.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/pikevm.rs create mode 100644 vendor/regex-automata/src/nfa/thompson/range_trie.rs create mode 100644 vendor/regex-automata/src/util/alphabet.rs create mode 100644 vendor/regex-automata/src/util/captures.rs create mode 100644 vendor/regex-automata/src/util/determinize/mod.rs create mode 100644 vendor/regex-automata/src/util/determinize/state.rs create mode 100644 vendor/regex-automata/src/util/empty.rs create mode 100644 vendor/regex-automata/src/util/escape.rs create mode 100644 vendor/regex-automata/src/util/int.rs create mode 100644 vendor/regex-automata/src/util/interpolate.rs create mode 100644 vendor/regex-automata/src/util/iter.rs create mode 100644 vendor/regex-automata/src/util/lazy.rs create mode 100644 vendor/regex-automata/src/util/look.rs create mode 100644 vendor/regex-automata/src/util/memchr.rs create mode 100644 vendor/regex-automata/src/util/mod.rs create mode 100644 vendor/regex-automata/src/util/pool.rs create mode 100644 vendor/regex-automata/src/util/prefilter/aho_corasick.rs create mode 100644 vendor/regex-automata/src/util/prefilter/byteset.rs create mode 100644 vendor/regex-automata/src/util/prefilter/memchr.rs create mode 100644 vendor/regex-automata/src/util/prefilter/memmem.rs create mode 100644 vendor/regex-automata/src/util/prefilter/mod.rs create mode 100644 vendor/regex-automata/src/util/prefilter/teddy.rs create mode 100644 vendor/regex-automata/src/util/primitives.rs create mode 100644 vendor/regex-automata/src/util/search.rs create mode 100644 vendor/regex-automata/src/util/sparse_set.rs create mode 100644 vendor/regex-automata/src/util/start.rs create mode 100644 vendor/regex-automata/src/util/syntax.rs create mode 100644 vendor/regex-automata/src/util/unicode_data/mod.rs create mode 100644 vendor/regex-automata/src/util/unicode_data/perl_word.rs create mode 100644 vendor/regex-automata/src/util/utf8.rs create mode 100644 vendor/regex-automata/src/util/wire.rs create mode 100755 vendor/regex-automata/test create mode 100644 vendor/regex-automata/tests/dfa/api.rs create mode 100644 vendor/regex-automata/tests/dfa/mod.rs create mode 100644 vendor/regex-automata/tests/dfa/onepass/mod.rs create mode 100644 vendor/regex-automata/tests/dfa/onepass/suite.rs create mode 100644 vendor/regex-automata/tests/dfa/regression.rs create mode 100644 vendor/regex-automata/tests/dfa/suite.rs create mode 100644 vendor/regex-automata/tests/fuzz/dense.rs create mode 100644 vendor/regex-automata/tests/fuzz/mod.rs create mode 100644 vendor/regex-automata/tests/fuzz/sparse.rs create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b create mode 100644 vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 create mode 100644 vendor/regex-automata/tests/gen/README.md create mode 100644 vendor/regex-automata/tests/gen/dense/mod.rs create mode 100644 vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs create mode 100644 vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa create mode 100644 vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa create mode 100644 vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa create mode 100644 vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa create mode 100644 vendor/regex-automata/tests/gen/mod.rs create mode 100644 vendor/regex-automata/tests/gen/sparse/mod.rs create mode 100644 vendor/regex-automata/tests/gen/sparse/multi_pattern_v2.rs create mode 100644 vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa create mode 100644 vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa create mode 100644 vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa create mode 100644 vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa create mode 100644 vendor/regex-automata/tests/hybrid/api.rs create mode 100644 vendor/regex-automata/tests/hybrid/mod.rs create mode 100644 vendor/regex-automata/tests/hybrid/suite.rs create mode 100644 vendor/regex-automata/tests/lib.rs create mode 100644 vendor/regex-automata/tests/meta/mod.rs create mode 100644 vendor/regex-automata/tests/meta/suite.rs create mode 100644 vendor/regex-automata/tests/nfa/mod.rs create mode 100644 vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs create mode 100644 vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs create mode 100644 vendor/regex-automata/tests/nfa/thompson/mod.rs create mode 100644 vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs create mode 100644 vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs create mode 100644 vendor/regex-syntax/src/debug.rs create mode 100644 vendor/regex-syntax/src/hir/literal.rs delete mode 100644 vendor/regex-syntax/src/hir/literal/mod.rs rename vendor/{regex/src/freqs.rs => regex-syntax/src/rank.rs} (96%) create mode 100644 vendor/regex/Cross.toml delete mode 100644 vendor/regex/HACKING.md delete mode 100644 vendor/regex/PERFORMANCE.md create mode 100644 vendor/regex/bench/README.md delete mode 100644 vendor/regex/examples/regexdna-input.txt delete mode 100644 vendor/regex/examples/regexdna-output.txt delete mode 100644 vendor/regex/examples/shootout-regex-dna-bytes.rs delete mode 100644 vendor/regex/examples/shootout-regex-dna-cheat.rs delete mode 100644 vendor/regex/examples/shootout-regex-dna-replace.rs delete mode 100644 vendor/regex/examples/shootout-regex-dna-single-cheat.rs delete mode 100644 vendor/regex/examples/shootout-regex-dna-single.rs delete mode 100644 vendor/regex/examples/shootout-regex-dna.rs create mode 100644 vendor/regex/record/README.md create mode 100644 vendor/regex/record/compile-test/2023-04-19_1.7.3.csv create mode 100644 vendor/regex/record/compile-test/2023-04-20_master.csv create mode 100644 vendor/regex/record/compile-test/2023-07-05.csv create mode 100644 vendor/regex/record/compile-test/README.md create mode 100644 vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic create mode 100644 vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa create mode 100644 vendor/regex/record/old-bench-log/01-lazy-dfa/native create mode 100644 vendor/regex/record/old-bench-log/01-lazy-dfa/nfa create mode 100644 vendor/regex/record/old-bench-log/01-lazy-dfa/pcre create mode 100644 vendor/regex/record/old-bench-log/02-set/dynamic create mode 100644 vendor/regex/record/old-bench-log/03-bytes/onig create mode 100644 vendor/regex/record/old-bench-log/03-bytes/pcre create mode 100644 vendor/regex/record/old-bench-log/03-bytes/rust create mode 100644 vendor/regex/record/old-bench-log/03-bytes/rust-bytes create mode 100644 vendor/regex/record/old-bench-log/04/onig create mode 100644 vendor/regex/record/old-bench-log/04/pcre1-jit create mode 100644 vendor/regex/record/old-bench-log/04/pcre2-jit create mode 100644 vendor/regex/record/old-bench-log/04/re2 create mode 100644 vendor/regex/record/old-bench-log/04/rust create mode 100644 vendor/regex/record/old-bench-log/04/tcl create mode 100644 vendor/regex/record/old-bench-log/05/onig create mode 100644 vendor/regex/record/old-bench-log/05/onig-vs-rust create mode 100644 vendor/regex/record/old-bench-log/05/pcre1 create mode 100644 vendor/regex/record/old-bench-log/05/pcre1-vs-rust create mode 100644 vendor/regex/record/old-bench-log/05/pcre2 create mode 100644 vendor/regex/record/old-bench-log/05/pcre2-vs-rust create mode 100644 vendor/regex/record/old-bench-log/05/re2 create mode 100644 vendor/regex/record/old-bench-log/05/re2-vs-rust create mode 100644 vendor/regex/record/old-bench-log/05/rust create mode 100644 vendor/regex/record/old-bench-log/05/tcl create mode 100644 vendor/regex/record/old-bench-log/05/tcl-vs-rust create mode 100644 vendor/regex/record/old-bench-log/06/dphobos-dmd create mode 100644 vendor/regex/record/old-bench-log/06/dphobos-dmd-ct create mode 100644 vendor/regex/record/old-bench-log/06/dphobos-ldc create mode 100644 vendor/regex/record/old-bench-log/06/dphobos-ldc-ct create mode 100644 vendor/regex/record/old-bench-log/06/pcre1 create mode 100644 vendor/regex/record/old-bench-log/06/pcre2 create mode 100644 vendor/regex/record/old-bench-log/06/re2 create mode 100644 vendor/regex/record/old-bench-log/06/rust create mode 100644 vendor/regex/record/old-bench-log/07/boost create mode 100644 vendor/regex/record/old-bench-log/07/dphobos-dmd create mode 100644 vendor/regex/record/old-bench-log/07/dphobos-dmd-ct create mode 100644 vendor/regex/record/old-bench-log/07/oniguruma create mode 100644 vendor/regex/record/old-bench-log/07/pcre1 create mode 100644 vendor/regex/record/old-bench-log/07/pcre2 create mode 100644 vendor/regex/record/old-bench-log/07/re2 create mode 100644 vendor/regex/record/old-bench-log/07/rust create mode 100644 vendor/regex/record/old-bench-log/07/rust-bytes create mode 100644 vendor/regex/record/old-bench-log/07/stdcpp create mode 100644 vendor/regex/record/old-bench-log/07/stdcpp-libcxx create mode 100644 vendor/regex/record/old-bench-log/07/tcl create mode 100644 vendor/regex/record/old-bench-log/08-new-memmem/rust-after-01 create mode 100644 vendor/regex/record/old-bench-log/08-new-memmem/rust-after-02 create mode 100644 vendor/regex/record/old-bench-log/08-new-memmem/rust-before-01 create mode 100644 vendor/regex/record/old-bench-log/08-new-memmem/rust-before-02 create mode 100644 vendor/regex/record/old-bench-log/09-new-baseline/pcre2 create mode 100644 vendor/regex/record/old-bench-log/09-new-baseline/re2 create mode 100644 vendor/regex/record/old-bench-log/09-new-baseline/rust create mode 100644 vendor/regex/record/old-bench-log/09-new-baseline/rust-bytes create mode 100644 vendor/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log create mode 100644 vendor/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log create mode 100644 vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log create mode 100644 vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log create mode 100644 vendor/regex/record/old-bench-log/11-regex-1.7.3/rust create mode 100644 vendor/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes create mode 100644 vendor/regex/record/old-bench-log/12-regex-1.8.1/rust create mode 100644 vendor/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes create mode 100644 vendor/regex/record/old-bench-log/13-regex-1.9.0/rust create mode 100644 vendor/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes create mode 100644 vendor/regex/record/old-bench-log/README.md create mode 100644 vendor/regex/record/old-bench-log/old/01-before create mode 100644 vendor/regex/record/old-bench-log/old/02-new-syntax-crate create mode 100644 vendor/regex/record/old-bench-log/old/03-new-syntax-crate create mode 100644 vendor/regex/record/old-bench-log/old/04-fixed-benchmark create mode 100644 vendor/regex/record/old-bench-log/old/05-thread-caching create mode 100644 vendor/regex/record/old-bench-log/old/06-major-dynamic create mode 100644 vendor/regex/record/old-bench-log/old/06-major-macro create mode 100644 vendor/regex/record/old-bench-log/old/07-prefix-improvements create mode 100644 vendor/regex/record/old-bench-log/old/08-case-fixes create mode 100644 vendor/regex/record/old-bench-log/old/09-before-compiler-rewrite create mode 100644 vendor/regex/record/old-bench-log/old/10-compiler-rewrite create mode 100644 vendor/regex/record/old-bench-log/old/11-compiler-rewrite create mode 100644 vendor/regex/record/old-bench-log/old/12-executor create mode 100644 vendor/regex/record/old-bench-log/old/12-executor-bytes create mode 100644 vendor/regex/record/old-bench-log/old/13-cache-byte-range-suffixes delete mode 100644 vendor/regex/src/backtrack.rs create mode 100644 vendor/regex/src/builders.rs create mode 100644 vendor/regex/src/bytes.rs delete mode 100644 vendor/regex/src/compile.rs delete mode 100644 vendor/regex/src/dfa.rs delete mode 100644 vendor/regex/src/exec.rs delete mode 100644 vendor/regex/src/expand.rs delete mode 100644 vendor/regex/src/input.rs delete mode 100644 vendor/regex/src/literal/imp.rs delete mode 100644 vendor/regex/src/literal/mod.rs delete mode 100644 vendor/regex/src/pikevm.rs delete mode 100644 vendor/regex/src/pool.rs delete mode 100644 vendor/regex/src/prog.rs delete mode 100644 vendor/regex/src/re_builder.rs delete mode 100644 vendor/regex/src/re_bytes.rs delete mode 100644 vendor/regex/src/re_set.rs delete mode 100644 vendor/regex/src/re_trait.rs delete mode 100644 vendor/regex/src/re_unicode.rs create mode 100644 vendor/regex/src/regex/bytes.rs create mode 100644 vendor/regex/src/regex/mod.rs create mode 100644 vendor/regex/src/regex/string.rs create mode 100644 vendor/regex/src/regexset/bytes.rs create mode 100644 vendor/regex/src/regexset/mod.rs create mode 100644 vendor/regex/src/regexset/string.rs delete mode 100644 vendor/regex/src/sparse.rs delete mode 100644 vendor/regex/src/testdata/LICENSE delete mode 100644 vendor/regex/src/testdata/README delete mode 100644 vendor/regex/src/utf8.rs create mode 100644 vendor/regex/testdata/README.md create mode 100644 vendor/regex/testdata/anchored.toml create mode 100644 vendor/regex/testdata/bytes.toml create mode 100644 vendor/regex/testdata/crazy.toml create mode 100644 vendor/regex/testdata/crlf.toml create mode 100644 vendor/regex/testdata/earliest.toml create mode 100644 vendor/regex/testdata/empty.toml create mode 100644 vendor/regex/testdata/expensive.toml create mode 100644 vendor/regex/testdata/flags.toml create mode 100644 vendor/regex/testdata/fowler/basic.toml create mode 100644 vendor/regex/testdata/fowler/dat/README rename vendor/regex/{src/testdata => testdata/fowler/dat}/basic.dat (87%) rename vendor/regex/{src/testdata => testdata/fowler/dat}/nullsubexpr.dat (81%) rename vendor/regex/{src/testdata => testdata/fowler/dat}/repetition.dat (89%) create mode 100644 vendor/regex/testdata/fowler/nullsubexpr.toml create mode 100644 vendor/regex/testdata/fowler/repetition.toml create mode 100644 vendor/regex/testdata/iter.toml create mode 100644 vendor/regex/testdata/leftmost-all.toml create mode 100644 vendor/regex/testdata/line-terminator.toml create mode 100644 vendor/regex/testdata/misc.toml create mode 100644 vendor/regex/testdata/multiline.toml create mode 100644 vendor/regex/testdata/no-unicode.toml create mode 100644 vendor/regex/testdata/overlapping.toml create mode 100644 vendor/regex/testdata/regex-lite.toml create mode 100644 vendor/regex/testdata/regression.toml create mode 100644 vendor/regex/testdata/set.toml create mode 100644 vendor/regex/testdata/substring.toml create mode 100644 vendor/regex/testdata/unicode.toml create mode 100644 vendor/regex/testdata/utf8.toml create mode 100644 vendor/regex/testdata/word-boundary-special.toml create mode 100644 vendor/regex/testdata/word-boundary.toml delete mode 100644 vendor/regex/tests/api.rs delete mode 100644 vendor/regex/tests/api_str.rs delete mode 100644 vendor/regex/tests/bytes.rs delete mode 100644 vendor/regex/tests/consistent.rs delete mode 100644 vendor/regex/tests/crates_regex.rs delete mode 100644 vendor/regex/tests/crazy.rs delete mode 100644 vendor/regex/tests/flags.rs delete mode 100644 vendor/regex/tests/fowler.rs create mode 100644 vendor/regex/tests/fuzz/mod.rs create mode 100644 vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff create mode 100644 vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 create mode 100644 vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 create mode 100644 vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 create mode 100644 vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e create mode 100644 vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c create mode 100644 vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 create mode 100644 vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 create mode 100644 vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a create mode 100644 vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 create mode 100644 vendor/regex/tests/lib.rs delete mode 100644 vendor/regex/tests/macros.rs delete mode 100644 vendor/regex/tests/macros_bytes.rs delete mode 100644 vendor/regex/tests/macros_str.rs delete mode 100644 vendor/regex/tests/multiline.rs delete mode 100644 vendor/regex/tests/noparse.rs delete mode 100644 vendor/regex/tests/set.rs delete mode 100644 vendor/regex/tests/shortest_match.rs delete mode 100644 vendor/regex/tests/suffix_reverse.rs create mode 100644 vendor/regex/tests/suite_bytes.rs create mode 100644 vendor/regex/tests/suite_bytes_set.rs create mode 100644 vendor/regex/tests/suite_string.rs create mode 100644 vendor/regex/tests/suite_string_set.rs delete mode 100644 vendor/regex/tests/test_backtrack.rs delete mode 100644 vendor/regex/tests/test_backtrack_bytes.rs delete mode 100644 vendor/regex/tests/test_backtrack_utf8bytes.rs delete mode 100644 vendor/regex/tests/test_crates_regex.rs delete mode 100644 vendor/regex/tests/test_default.rs delete mode 100644 vendor/regex/tests/test_default_bytes.rs delete mode 100644 vendor/regex/tests/test_nfa.rs delete mode 100644 vendor/regex/tests/test_nfa_bytes.rs delete mode 100644 vendor/regex/tests/test_nfa_utf8bytes.rs delete mode 100644 vendor/regex/tests/unicode.rs delete mode 100644 vendor/regex/tests/word_boundary.rs delete mode 100644 vendor/regex/tests/word_boundary_ascii.rs delete mode 100644 vendor/regex/tests/word_boundary_unicode.rs rename vendor/serde/src/{private => de}/size_hint.rs (50%) delete mode 100644 vendor/serde/src/de/utf8.rs create mode 100644 vendor/serde_derive/.cargo-checksum.json create mode 100644 vendor/serde_derive/Cargo.toml create mode 100644 vendor/serde_derive/LICENSE-APACHE create mode 100644 vendor/serde_derive/LICENSE-MIT create mode 100644 vendor/serde_derive/README.md create mode 100644 vendor/serde_derive/crates-io.md create mode 100644 vendor/serde_derive/src/bound.rs create mode 100644 vendor/serde_derive/src/de.rs create mode 100644 vendor/serde_derive/src/dummy.rs create mode 100644 vendor/serde_derive/src/fragment.rs create mode 100644 vendor/serde_derive/src/internals/ast.rs create mode 100644 vendor/serde_derive/src/internals/attr.rs create mode 100644 vendor/serde_derive/src/internals/case.rs create mode 100644 vendor/serde_derive/src/internals/check.rs create mode 100644 vendor/serde_derive/src/internals/ctxt.rs create mode 100644 vendor/serde_derive/src/internals/mod.rs create mode 100644 vendor/serde_derive/src/internals/receiver.rs create mode 100644 vendor/serde_derive/src/internals/respan.rs create mode 100644 vendor/serde_derive/src/internals/symbol.rs create mode 100644 vendor/serde_derive/src/lib.rs create mode 100644 vendor/serde_derive/src/pretend.rs create mode 100644 vendor/serde_derive/src/ser.rs create mode 100644 vendor/serde_derive/src/this.rs create mode 100644 vendor/syn/.cargo-checksum.json create mode 100644 vendor/syn/Cargo.toml create mode 100644 vendor/syn/LICENSE-APACHE create mode 100644 vendor/syn/LICENSE-MIT create mode 100644 vendor/syn/README.md create mode 100644 vendor/syn/benches/file.rs create mode 100644 vendor/syn/benches/rust.rs create mode 100644 vendor/syn/src/attr.rs create mode 100644 vendor/syn/src/bigint.rs create mode 100644 vendor/syn/src/buffer.rs create mode 100644 vendor/syn/src/custom_keyword.rs create mode 100644 vendor/syn/src/custom_punctuation.rs create mode 100644 vendor/syn/src/data.rs create mode 100644 vendor/syn/src/derive.rs create mode 100644 vendor/syn/src/discouraged.rs create mode 100644 vendor/syn/src/drops.rs create mode 100644 vendor/syn/src/error.rs create mode 100644 vendor/syn/src/export.rs create mode 100644 vendor/syn/src/expr.rs create mode 100644 vendor/syn/src/ext.rs create mode 100644 vendor/syn/src/file.rs create mode 100644 vendor/syn/src/gen/clone.rs create mode 100644 vendor/syn/src/gen/debug.rs create mode 100644 vendor/syn/src/gen/eq.rs create mode 100644 vendor/syn/src/gen/fold.rs create mode 100644 vendor/syn/src/gen/hash.rs create mode 100644 vendor/syn/src/gen/visit.rs create mode 100644 vendor/syn/src/gen/visit_mut.rs create mode 100644 vendor/syn/src/gen_helper.rs create mode 100644 vendor/syn/src/generics.rs create mode 100644 vendor/syn/src/group.rs create mode 100644 vendor/syn/src/ident.rs create mode 100644 vendor/syn/src/item.rs create mode 100644 vendor/syn/src/lib.rs create mode 100644 vendor/syn/src/lifetime.rs create mode 100644 vendor/syn/src/lit.rs create mode 100644 vendor/syn/src/lookahead.rs create mode 100644 vendor/syn/src/mac.rs create mode 100644 vendor/syn/src/macros.rs create mode 100644 vendor/syn/src/meta.rs create mode 100644 vendor/syn/src/op.rs create mode 100644 vendor/syn/src/parse.rs create mode 100644 vendor/syn/src/parse_macro_input.rs create mode 100644 vendor/syn/src/parse_quote.rs create mode 100644 vendor/syn/src/pat.rs create mode 100644 vendor/syn/src/path.rs create mode 100644 vendor/syn/src/print.rs create mode 100644 vendor/syn/src/punctuated.rs create mode 100644 vendor/syn/src/restriction.rs create mode 100644 vendor/syn/src/sealed.rs create mode 100644 vendor/syn/src/span.rs create mode 100644 vendor/syn/src/spanned.rs create mode 100644 vendor/syn/src/stmt.rs create mode 100644 vendor/syn/src/thread.rs create mode 100644 vendor/syn/src/token.rs create mode 100644 vendor/syn/src/tt.rs create mode 100644 vendor/syn/src/ty.rs create mode 100644 vendor/syn/src/verbatim.rs create mode 100644 vendor/syn/src/whitespace.rs create mode 100644 vendor/syn/tests/common/eq.rs create mode 100644 vendor/syn/tests/common/mod.rs create mode 100644 vendor/syn/tests/common/parse.rs create mode 100644 vendor/syn/tests/macros/mod.rs create mode 100644 vendor/syn/tests/regression.rs create mode 100644 vendor/syn/tests/regression/issue1108.rs create mode 100644 vendor/syn/tests/regression/issue1235.rs create mode 100644 vendor/syn/tests/repo/mod.rs create mode 100644 vendor/syn/tests/repo/progress.rs create mode 100644 vendor/syn/tests/test_asyncness.rs create mode 100644 vendor/syn/tests/test_attribute.rs create mode 100644 vendor/syn/tests/test_derive_input.rs create mode 100644 vendor/syn/tests/test_expr.rs create mode 100644 vendor/syn/tests/test_generics.rs create mode 100644 vendor/syn/tests/test_grouping.rs create mode 100644 vendor/syn/tests/test_ident.rs create mode 100644 vendor/syn/tests/test_item.rs create mode 100644 vendor/syn/tests/test_iterators.rs create mode 100644 vendor/syn/tests/test_lit.rs create mode 100644 vendor/syn/tests/test_meta.rs create mode 100644 vendor/syn/tests/test_parse_buffer.rs create mode 100644 vendor/syn/tests/test_parse_quote.rs create mode 100644 vendor/syn/tests/test_parse_stream.rs create mode 100644 vendor/syn/tests/test_pat.rs create mode 100644 vendor/syn/tests/test_path.rs create mode 100644 vendor/syn/tests/test_precedence.rs create mode 100644 vendor/syn/tests/test_receiver.rs create mode 100644 vendor/syn/tests/test_round_trip.rs create mode 100644 vendor/syn/tests/test_shebang.rs create mode 100644 vendor/syn/tests/test_should_parse.rs create mode 100644 vendor/syn/tests/test_size.rs create mode 100644 vendor/syn/tests/test_stmt.rs create mode 100644 vendor/syn/tests/test_token_trees.rs create mode 100644 vendor/syn/tests/test_ty.rs create mode 100644 vendor/syn/tests/test_visibility.rs create mode 100644 vendor/syn/tests/zzz_stable.rs create mode 100644 vendor/tinyrick-0.0.13/.cargo-checksum.json create mode 100644 vendor/tinyrick-0.0.13/Cargo.toml create mode 100644 vendor/tinyrick-0.0.13/Cross.toml create mode 100644 vendor/tinyrick-0.0.13/DEVELOPMENT.md create mode 100644 vendor/tinyrick-0.0.13/LICENSE.md create mode 100644 vendor/tinyrick-0.0.13/README.md create mode 100644 vendor/tinyrick-0.0.13/install.mk create mode 100755 vendor/tinyrick-0.0.13/makefile create mode 100644 vendor/tinyrick-0.0.13/sample.envrc create mode 100644 vendor/tinyrick-0.0.13/src/lib.rs create mode 100644 vendor/tinyrick-0.0.13/src/tinyrick.rs create mode 100644 vendor/tinyrick/Cross.toml create mode 100644 vendor/tinyrick/LICENSE.md delete mode 100755 vendor/tinyrick/build.sh delete mode 100755 vendor/tinyrick/crosscompile-linux.sh create mode 100644 vendor/tinyrick/install.mk create mode 100755 vendor/tinyrick/makefile create mode 100644 vendor/tinyrick/sample.envrc create mode 100644 vendor/tinyrick_extras/LICENSE.md create mode 100644 vendor/tinyrick_extras/makefile delete mode 100644 vendor/toml_edit/src/parser/macros.rs create mode 100644 vendor/unicode-ident/.cargo-checksum.json create mode 100644 vendor/unicode-ident/Cargo.toml create mode 100644 vendor/unicode-ident/LICENSE-APACHE create mode 100644 vendor/unicode-ident/LICENSE-MIT create mode 100644 vendor/unicode-ident/LICENSE-UNICODE create mode 100644 vendor/unicode-ident/README.md create mode 100644 vendor/unicode-ident/benches/xid.rs create mode 100644 vendor/unicode-ident/src/lib.rs create mode 100644 vendor/unicode-ident/src/tables.rs create mode 100644 vendor/unicode-ident/tests/compare.rs create mode 100644 vendor/unicode-ident/tests/fst/mod.rs create mode 100644 vendor/unicode-ident/tests/fst/xid_continue.fst create mode 100644 vendor/unicode-ident/tests/fst/xid_start.fst create mode 100644 vendor/unicode-ident/tests/roaring/mod.rs create mode 100644 vendor/unicode-ident/tests/static_size.rs create mode 100644 vendor/unicode-ident/tests/tables/mod.rs create mode 100644 vendor/unicode-ident/tests/tables/tables.rs create mode 100644 vendor/unicode-ident/tests/trie/mod.rs create mode 100644 vendor/unicode-ident/tests/trie/trie.rs create mode 100644 vendor/winnow/benches/find_slice.rs create mode 100644 vendor/winnow/benches/iter.rs create mode 100644 vendor/winnow/benches/next_slice.rs create mode 100644 vendor/winnow/examples/arithmetic/parser_lexer.rs create mode 100644 vendor/winnow/src/_topic/nom.rs create mode 100644 vendor/winnow/src/_topic/performance.rs create mode 100644 vendor/winnow/src/ascii/mod.rs rename vendor/winnow/src/{character => ascii}/tests.rs (61%) rename vendor/winnow/src/{ => binary}/bits/mod.rs (51%) rename vendor/winnow/src/{ => binary}/bits/tests.rs (76%) rename vendor/winnow/src/{number => binary}/mod.rs (58%) rename vendor/winnow/src/{number => binary}/tests.rs (56%) delete mode 100644 vendor/winnow/src/branch/tests.rs delete mode 100644 vendor/winnow/src/bytes/mod.rs delete mode 100644 vendor/winnow/src/character/mod.rs rename vendor/winnow/src/{branch/mod.rs => combinator/branch.rs} (54%) create mode 100644 vendor/winnow/src/combinator/core.rs create mode 100644 vendor/winnow/src/combinator/multi.rs create mode 100644 vendor/winnow/src/combinator/parser.rs create mode 100644 vendor/winnow/src/combinator/sequence.rs delete mode 100644 vendor/winnow/src/macros.rs create mode 100644 vendor/winnow/src/macros/dispatch.rs create mode 100644 vendor/winnow/src/macros/mod.rs create mode 100644 vendor/winnow/src/macros/seq.rs create mode 100644 vendor/winnow/src/macros/test.rs delete mode 100644 vendor/winnow/src/multi/mod.rs delete mode 100644 vendor/winnow/src/multi/tests.rs delete mode 100644 vendor/winnow/src/sequence/mod.rs delete mode 100644 vendor/winnow/src/sequence/tests.rs create mode 100644 vendor/winnow/src/token/mod.rs rename vendor/winnow/src/{bytes => token}/tests.rs (80%) diff --git a/.cargo/cargo.toml b/.cargo/cargo.toml new file mode 100644 index 0000000..0236928 --- /dev/null +++ b/.cargo/cargo.toml @@ -0,0 +1,5 @@ +[source.crates-io] +replace-with = "vendored-sources" + +[source.vendored-sources] +directory = "vendor" diff --git a/vendor/aho-corasick/.cargo-checksum.json b/vendor/aho-corasick/.cargo-checksum.json index 8eac304..16b5b4e 100644 --- a/vendor/aho-corasick/.cargo-checksum.json +++ b/vendor/aho-corasick/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"c9b1b15e299ba4e6ed0d6f25cde30b26b13b6068a7fbd980000c37bca19b0104","DESIGN.md":"64ff45ea2a89d4c32b29af91acb7743a861fcac417cb94fde8e6559405d603b2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"5999e5768f5da8ab9b50c016766b5185b4c79936c56bef6d311ddcb0a38c4b94","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"b92c9a65c4ee8029ff5a710aa1514caf838e73072c177dff5375463769f0b1ce","src/automaton.rs":"931af0aad03079bc4f6400d573fce832ce1edeeaf196815a16750d57b54b2183","src/buffer.rs":"dae7ee7c1f846ca9cf115ba4949484000e1837b4fb7311f8d8c9a35011c9c26f","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"99a53a2ed8eea8c13699def90e31dfdff9d0b90572b1db3cb534e3396e7a0ed0","src/dfa.rs":"25e4455b3e179a7e192108d05f3683993456b36e3ebed99f827558c52525b7e6","src/error.rs":"d34c2c9c815df5d9dedc46b4b3ce109cd2cee07825de643f0c574ec960367beb","src/lib.rs":"7a47d4c87f83e0e7ddf0777a71e4858904e73477ce18404cb89e656070e86aef","src/nfa.rs":"3b817b4aa85540e8c0d35aff7ed7cfbab70ec7d2aaa779d63b4f5369bff31ce1","src/packed/api.rs":"df42e7500c94c9de1ac44145a0dd99ea02047e6bba229da12f2575337beebcf0","src/packed/mod.rs":"ad2f8e18996737347a1181a4457387276d139315bcabfc5e34494af0c0319701","src/packed/pattern.rs":"3abf3835d4c4f8a43753c52936a894d819f713f233fc046e19de5ef95200dcce","src/packed/rabinkarp.rs":"ad7d4533f96aed336e29c5553657ae57b0d733ace9c707a6cf4d08d8fc6edee5","src/packed/teddy/README.md":"b4b83fb5afafbbea6cb76fe70f49cc8ced888f682d98abe5ea5773e95d9ec2b0","src/packed/teddy/compile.rs":"aad40b3f93d2c388b409b31fb2795d414a365237789d5b1a7510d97ceb8ce260","src/packed/teddy/mod.rs":"83b52bd80272970ad17234d0db293d17c1710ec582302bf516b203c8edec037e","src/packed/teddy/runtime.rs":"836146e90b320b14fa2c65fe4af7915a41f6fb04408aac5fac731c22ff46adae","src/packed/tests.rs":"b8dc4d3281ecd6d0fa2bf7ef16cf292a467dfdce64e470c7921e983bfa60fee2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"82a3eb6d5c0c3f10bc8d5f57d55d6d14cf4cf21c475bb5253e1921084063b8d7","src/state_id.rs":"519ec8c7bf3fa72103d4c561c193759759f535dca924c9853efe630f406d2029","src/tests.rs":"ee9b85f3c27cb2fba5796e5be8019aafecc13ee9a4f614553f2bc8953f51c6de"},"package":"cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"} \ No newline at end of file +{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"747d0fcb1257c9b8b013104da3c5a67f5d6cf8a95a2163b13703c01cab2c010a","DESIGN.md":"59c960e1b73b1d7fb41e4df6c0c1b1fcf44dd2ebc8a349597a7d0595f8cb5130","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"afc4d559a98cf190029af0bf320fc0022725e349cd2a303aac860254e28f3c53","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"c699c07df70be45c666e128509ad571a7649d2073e4ae16ac1efd6793c9c6890","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"bfef1a94c5e7410584b1beb4e857b40d1ae2031b881cbc06fb1300409bbd555f","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"aeb6ee5fd80eea04decbc4b46aa27d1ab270b78d416a644da25b7934f009ee66","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"de94f02b04efd8744fb096759a8897c22012b0e0ca3ace161fd87c71befefe04","src/packed/api.rs":"160d3b10823316f7b0924e13c3afd222c8a7db5c0a00432401f311ef27d6a1b7","src/packed/ext.rs":"66be06fde8558429da23a290584d4b9fae665bf64c2578db4fe5f5f3ee864869","src/packed/mod.rs":"0020cd6f07ba5c8955923a9516d7f758864260eda53a6b6f629131c45ddeec62","src/packed/pattern.rs":"1e3a289a730c141fc30b295811e372d046c6619c7fd670308299b889a06c7673","src/packed/rabinkarp.rs":"403146eb1d838a84601d171393542340513cd1ee7ff750f2372161dd47746586","src/packed/teddy/README.md":"3a43194b64e221543d885176aba3beb1224a927385a20eca842daf6b0ea2f342","src/packed/teddy/builder.rs":"720735ea6c7ff92b081426513e6e82feed24a922849297bb538d28f7b8129f81","src/packed/teddy/generic.rs":"ea252ab05b32cea7dd9d71e332071d243db7dd0362e049252a27e5881ba2bf39","src/packed/teddy/mod.rs":"17d741f7e2fb9dbac5ba7d1bd4542cf1e35e9f146ace728e23fe6bbed20028b2","src/packed/tests.rs":"8e2f56eb3890ed3876ecb47d3121996e416563127b6430110d7b516df3f83b4b","src/packed/vector.rs":"840065521cbd4701fa5b8b506d1537843d858c903f7cadf3c68749ea1780874b","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"4ab6dbdba10027ddec2af63a9b28ce4eee30ded0daa5d8eb068b2b55542b6039","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"9fa4498f18bf70478b1996c1a013698b626d15f119aa81dbc536673c9f045718","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"} \ No newline at end of file diff --git a/vendor/aho-corasick/Cargo.toml b/vendor/aho-corasick/Cargo.toml index ee8e94e..05e899c 100644 --- a/vendor/aho-corasick/Cargo.toml +++ b/vendor/aho-corasick/Cargo.toml @@ -10,11 +10,16 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" +rust-version = "1.60.0" name = "aho-corasick" -version = "0.7.20" +version = "1.1.2" authors = ["Andrew Gallant "] -exclude = ["/aho-corasick-debug"] +exclude = [ + "/aho-corasick-debug", + "/benchmarks", + "/tmp", +] autotests = false description = "Fast multiple substring searching." homepage = "https://github.com/BurntSushi/aho-corasick" @@ -23,28 +28,47 @@ keywords = [ "string", "search", "text", - "aho", + "pattern", "multi", ] categories = ["text-processing"] license = "Unlicense OR MIT" repository = "https://github.com/BurntSushi/aho-corasick" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", + "--generate-link-to-definition", +] + [profile.bench] -debug = true +debug = 2 [profile.release] -debug = true +debug = 2 [lib] name = "aho_corasick" +[dependencies.log] +version = "0.4.17" +optional = true + [dependencies.memchr] version = "2.4.0" +optional = true default-features = false -[dev-dependencies] +[dev-dependencies.doc-comment] +version = "0.3.3" [features] -default = ["std"] -std = ["memchr/std"] +default = [ + "std", + "perf-literal", +] +logging = ["dep:log"] +perf-literal = ["dep:memchr"] +std = ["memchr?/std"] diff --git a/vendor/aho-corasick/DESIGN.md b/vendor/aho-corasick/DESIGN.md index 0e15ad0..f911f0c 100644 --- a/vendor/aho-corasick/DESIGN.md +++ b/vendor/aho-corasick/DESIGN.md @@ -13,19 +13,16 @@ own, Aho-Corasick isn't that complicated. The complex pieces come from the different variants of Aho-Corasick implemented in this crate. Specifically, they are: -* Aho-Corasick as an NFA, using dense transitions near the root with sparse - transitions elsewhere. -* Aho-Corasick as a DFA. (An NFA is slower to search, but cheaper to construct - and uses less memory.) - * A DFA with pre-multiplied state identifiers. This saves a multiplication - instruction in the core search loop. - * A DFA with equivalence classes of bytes as the alphabet, instead of the - traditional 256-byte alphabet. This shrinks the size of the DFA in memory, - but adds an extra lookup in the core search loop to map the input byte to - an equivalent class. -* The option to choose how state identifiers are represented, via one of - u8, u16, u32, u64 or usize. This permits creating compact automatons when - matching a small number of patterns. +* Aho-Corasick as a noncontiguous NFA. States have their transitions + represented sparsely, and each state puts its transitions in its own separate + allocation. Hence the same "noncontiguous." +* Aho-Corasick as a contiguous NFA. This NFA uses a single allocation to + represent the transitions of all states. That is, transitions are laid out + contiguously in memory. Moreover, states near the starting state are + represented densely, such that finding the next state ID takes a constant + number of instructions. +* Aho-Corasick as a DFA. In this case, all states are represented densely in + a transition table that uses one allocation. * Supporting "standard" match semantics, along with its overlapping variant, in addition to leftmost-first and leftmost-longest semantics. The "standard" semantics are typically what you see in a textbook description of @@ -45,7 +42,7 @@ they are: magnitude faster than just Aho-Corasick, but don't scale as well. * Support for searching streams. This can reuse most of the underlying code, but does require careful buffering support. -* Support for anchored searches, which permit efficient `is_prefix` checks for +* Support for anchored searches, which permit efficient "is prefix" checks for a large number of patterns. When you combine all of this together along with trying to make everything as @@ -81,7 +78,7 @@ CPU's cache.) Aho-Corasick can be succinctly described as a trie with state transitions between some of the nodes that efficiently instruct the search algorithm to -try matching alternative keys in the automaton. The trick is that these state +try matching alternative keys in the trie. The trick is that these state transitions are arranged such that each byte of input needs to be inspected only once. These state transitions are typically called "failure transitions," because they instruct the searcher (the thing traversing the automaton while @@ -205,9 +202,10 @@ Other than the complication around traversing failure transitions, this code is still roughly "traverse the automaton with bytes from the haystack, and quit when a match is seen." -And that concludes our section on the basics. While we didn't go deep into -how the automaton is built (see `src/nfa.rs`, which has detailed comments about -that), the basic structure of Aho-Corasick should be reasonably clear. +And that concludes our section on the basics. While we didn't go deep into how +the automaton is built (see `src/nfa/noncontiguous.rs`, which has detailed +comments about that), the basic structure of Aho-Corasick should be reasonably +clear. # NFAs and DFAs @@ -226,17 +224,17 @@ By this formulation, the Aho-Corasick automaton described in the previous section is an NFA. This is because failure transitions are, effectively, epsilon transitions. That is, whenever the automaton is in state `S`, it is actually in the set of states that are reachable by recursively following -failure transitions from `S`. (This means that, for example, the start state -is always active since the start state is reachable via failure transitions -from any state in the automaton.) +failure transitions from `S` until you reach the start state. (This means +that, for example, the start state is always active since the start state is +reachable via failure transitions from any state in the automaton.) NFAs have a lot of nice properties. They tend to be easier to construct, and also tend to use less memory. However, their primary downside is that they are -typically slower to execute. For example, the code above showing how to search -with an Aho-Corasick automaton needs to potentially iterate through many -failure transitions for every byte of input. While this is a fairly small -amount of overhead, this can add up, especially if the automaton has a lot of -overlapping patterns with a lot of failure transitions. +typically slower to execute a search with. For example, the code above showing +how to search with an Aho-Corasick automaton needs to potentially iterate +through many failure transitions for every byte of input. While this is a +fairly small amount of overhead, this can add up, especially if the automaton +has a lot of overlapping patterns with a lot of failure transitions. A DFA's search code, by contrast, looks like this: @@ -266,13 +264,14 @@ in the alphabet, and then building a single state transition table. Building this DFA can be much more costly than building the NFA, and use much more memory, but the better performance can be worth it. -Users of this crate can actually choose between using an NFA or a DFA. By -default, an NFA is used, because it typically strikes the best balance between -space usage and search performance. But the DFA option is available for cases -where a little extra memory and upfront time building the automaton is okay. -For example, the `AhoCorasick::auto_configure` and -`AhoCorasickBuilder::auto_configure` methods will enable the DFA setting if -there are a small number of patterns. +Users of this crate can actually choose between using one of two possible NFAs +(noncontiguous or contiguous) or a DFA. By default, a contiguous NFA is used, +in most circumstances, but if the number of patterns is small enough a DFA will +be used. A contiguous NFA is chosen because it uses orders of magnitude less +memory than a DFA, takes only a little longer to build than a noncontiguous +NFA and usually gets pretty close to the search speed of a DFA. (Callers can +override this automatic selection via the `AhoCorasickBuilder::start_kind` +configuration.) # More DFA tricks @@ -322,17 +321,18 @@ The same optimization works even when equivalence classes are enabled, as described above. The only difference is that the premultiplication is by the total number of equivalence classes instead of 256. -There isn't much downside to premultiplying state identifiers, other than the -fact that you may need to choose a bigger integer representation than you would -otherwise. For example, if you don't premultiply state identifiers, then an -automaton that uses `u8` as a state identifier can hold up to 256 states. -However, if they are premultiplied, then it can only hold up to -`floor(256 / len(alphabet))` states. Thus premultiplication impacts how compact -your DFA can be. In practice, it's pretty rare to use `u8` as a state -identifier, so premultiplication is usually a good thing to do. +There isn't much downside to premultiplying state identifiers, other than it +imposes a smaller limit on the total number of states in the DFA. Namely, with +premultiplied state identifiers, you run out of room in your state identifier +representation more rapidly than if the identifiers are just state indices. -Both equivalence classes and premultiplication are tuneable parameters via the -`AhoCorasickBuilder` type, and both are enabled by default. +Both equivalence classes and premultiplication are always enabled. There is a +`AhoCorasickBuilder::byte_classes` configuration, but disabling this just makes +it so there are always 256 equivalence classes, i.e., every class corresponds +to precisely one byte. When it's disabled, the equivalence class map itself is +still used. The purpose of disabling it is when one is debugging the underlying +automaton. It can be easier to comprehend when it uses actual byte values for +its transitions instead of equivalence classes. # Match semantics @@ -371,10 +371,11 @@ either leftmost-first (Perl-like) or leftmost-longest (POSIX) match semantics. Supporting leftmost semantics requires a couple key changes: * Constructing the Aho-Corasick automaton changes a bit in both how the trie is - constructed and how failure transitions are found. Namely, only a subset of - the failure transitions are added. Specifically, only the failure transitions - that either do not occur after a match or do occur after a match but preserve - that match are kept. (More details on this can be found in `src/nfa.rs`.) + constructed and how failure transitions are found. Namely, only a subset + of the failure transitions are added. Specifically, only the failure + transitions that either do not occur after a match or do occur after a match + but preserve that match are kept. (More details on this can be found in + `src/nfa/noncontiguous.rs`.) * The search algorithm changes slightly. Since we are looking for the leftmost match, we cannot quit as soon as a match is detected. Instead, after a match is detected, we must keep searching until either the end of the input or @@ -382,9 +383,9 @@ Supporting leftmost semantics requires a couple key changes: semantics. Dead states mean that searching should stop after a match has been found.) -Other implementations of Aho-Corasick do support leftmost match semantics, but -they do it with more overhead at search time, or even worse, with a queue of -matches and sophisticated hijinks to disambiguate the matches. While our +Most other implementations of Aho-Corasick do support leftmost match semantics, +but they do it with more overhead at search time, or even worse, with a queue +of matches and sophisticated hijinks to disambiguate the matches. While our construction algorithm becomes a bit more complicated, the correct match semantics fall out from the structure of the automaton itself. @@ -402,10 +403,10 @@ matches at that state, then they are reported before resuming the search. Enabling leftmost-first or leftmost-longest match semantics causes the automaton to use a subset of all failure transitions, which means that overlapping searches cannot be used. Therefore, if leftmost match semantics are -used, attempting to do an overlapping search will panic. Thus, to get -overlapping searches, the caller must use the default standard match semantics. -This behavior was chosen because there are only two alternatives, which were -deemed worse: +used, attempting to do an overlapping search will return an error (or panic +when using the infallible APIs). Thus, to get overlapping searches, the caller +must use the default standard match semantics. This behavior was chosen because +there are only two alternatives, which were deemed worse: * Compile two automatons internally, one for standard semantics and one for the semantics requested by the caller (if not standard). @@ -415,9 +416,7 @@ deemed worse: The first is untenable because of the amount of memory used by the automaton. The second increases the complexity of the API too much by adding too many types that do similar things. It is conceptually much simpler to keep all -searching isolated to a single type. Callers may query whether the automaton -supports overlapping searches via the `AhoCorasick::supports_overlapping` -method. +searching isolated to a single type. # Stream searching @@ -471,13 +470,12 @@ If all of that fails, then a packed multiple substring algorithm will be attempted. Currently, the only algorithm available for this is Teddy, but more may be added in the future. Teddy is unlike the above prefilters in that it confirms its own matches, so when Teddy is active, it might not be necessary -for Aho-Corasick to run at all. (See `Automaton::leftmost_find_at_no_state_imp` -in `src/automaton.rs`.) However, the current Teddy implementation only works -in `x86_64` and when SSSE3 or AVX2 are available, and moreover, only works -_well_ when there are a small number of patterns (say, less than 100). Teddy -also requires the haystack to be of a certain length (more than 16-34 bytes). -When the haystack is shorter than that, Rabin-Karp is used instead. (See -`src/packed/rabinkarp.rs`.) +for Aho-Corasick to run at all. However, the current Teddy implementation +only works in `x86_64` when SSSE3 or AVX2 are available or in `aarch64` +(using NEON), and moreover, only works _well_ when there are a small number +of patterns (say, less than 100). Teddy also requires the haystack to be of a +certain length (more than 16-34 bytes). When the haystack is shorter than that, +Rabin-Karp is used instead. (See `src/packed/rabinkarp.rs`.) There is a more thorough description of Teddy at [`src/packed/teddy/README.md`](src/packed/teddy/README.md). diff --git a/vendor/aho-corasick/README.md b/vendor/aho-corasick/README.md index f033e01..c0f525f 100644 --- a/vendor/aho-corasick/README.md +++ b/vendor/aho-corasick/README.md @@ -21,12 +21,8 @@ https://docs.rs/aho-corasick ### Usage -Add this to your `Cargo.toml`: - -```toml -[dependencies] -aho-corasick = "0.7" -``` +Run `cargo add aho-corasick` to automatically add this crate as a dependency +in your `Cargo.toml` file. ### Example: basic searching @@ -36,46 +32,47 @@ simultaneously. Each match includes the pattern that matched along with the byte offsets of the match. ```rust -use aho_corasick::AhoCorasick; +use aho_corasick::{AhoCorasick, PatternID}; let patterns = &["apple", "maple", "Snapple"]; let haystack = "Nobody likes maple in their apple flavored Snapple."; -let ac = AhoCorasick::new(patterns); +let ac = AhoCorasick::new(patterns).unwrap(); let mut matches = vec![]; for mat in ac.find_iter(haystack) { matches.push((mat.pattern(), mat.start(), mat.end())); } assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), + (PatternID::must(1), 13, 18), + (PatternID::must(0), 28, 33), + (PatternID::must(2), 43, 50), ]); ``` -### Example: case insensitivity +### Example: ASCII case insensitivity This is like the previous example, but matches `Snapple` case insensitively using `AhoCorasickBuilder`: ```rust -use aho_corasick::AhoCorasickBuilder; +use aho_corasick::{AhoCorasick, PatternID}; let patterns = &["apple", "maple", "snapple"]; let haystack = "Nobody likes maple in their apple flavored Snapple."; -let ac = AhoCorasickBuilder::new() +let ac = AhoCorasick::builder() .ascii_case_insensitive(true) - .build(patterns); + .build(patterns) + .unwrap(); let mut matches = vec![]; for mat in ac.find_iter(haystack) { matches.push((mat.pattern(), mat.start(), mat.end())); } assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), + (PatternID::must(1), 13, 18), + (PatternID::must(0), 28, 33), + (PatternID::must(2), 43, 50), ]); ``` @@ -85,7 +82,7 @@ assert_eq!(matches, vec![ This example shows how to execute a search and replace on a stream without loading the entire stream into memory first. -```rust +```rust,ignore use aho_corasick::AhoCorasick; let patterns = &["fox", "brown", "quick"]; @@ -96,7 +93,7 @@ let replace_with = &["sloth", "grey", "slow"]; let rdr = "The quick brown fox."; let mut wtr = vec![]; -let ac = AhoCorasick::new(patterns); +let ac = AhoCorasick::new(patterns).unwrap(); ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with) .expect("stream_replace_all failed"); assert_eq!(b"The slow grey sloth.".to_vec(), wtr); @@ -128,7 +125,7 @@ use aho_corasick::AhoCorasick; let patterns = &["Samwise", "Sam"]; let haystack = "Samwise"; -let ac = AhoCorasick::new(patterns); +let ac = AhoCorasick::new(patterns).unwrap(); let mat = ac.find(haystack).expect("should have a match"); assert_eq!("Sam", &haystack[mat.start()..mat.end()]); ``` @@ -137,14 +134,15 @@ And now here's the leftmost-first version, which matches how a Perl-like regex will work: ```rust -use aho_corasick::{AhoCorasickBuilder, MatchKind}; +use aho_corasick::{AhoCorasick, MatchKind}; let patterns = &["Samwise", "Sam"]; let haystack = "Samwise"; -let ac = AhoCorasickBuilder::new() +let ac = AhoCorasick::builder() .match_kind(MatchKind::LeftmostFirst) - .build(patterns); + .build(patterns) + .unwrap(); let mat = ac.find(haystack).expect("should have a match"); assert_eq!("Samwise", &haystack[mat.start()..mat.end()]); ``` @@ -156,7 +154,7 @@ expression alternation. See `MatchKind` in the docs for more details. ### Minimum Rust version policy -This crate's minimum supported `rustc` version is `1.41.1`. +This crate's minimum supported `rustc` version is `1.60.0`. The current policy is that the minimum Rust version required to use this crate can be increased in minor version updates. For example, if `crate 1.0` requires @@ -172,16 +170,5 @@ supported version of Rust. * [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/) is a Python wrapper for this library. - - -### Future work - -Here are some plans for the future: - -* Assuming the current API is sufficient, I'd like to commit to it and release - a `1.0` version of this crate some time in the next 6-12 months. -* Support stream searching with leftmost match semantics. Currently, only - standard match semantics are supported. Getting this right seems possible, - but is tricky since the match state needs to be propagated through multiple - searches. (With standard semantics, as soon as a match is seen the search - ends.) +* [tmikus/ahocorasick_rs](https://github.com/tmikus/ahocorasick_rs) is a Go + wrapper for this library. diff --git a/vendor/aho-corasick/src/ahocorasick.rs b/vendor/aho-corasick/src/ahocorasick.rs index cfd74fd..2947627 100644 --- a/vendor/aho-corasick/src/ahocorasick.rs +++ b/vendor/aho-corasick/src/ahocorasick.rs @@ -1,47 +1,137 @@ -use std::io; - -use crate::automaton::Automaton; -use crate::buffer::Buffer; -use crate::dfa::{self, DFA}; -use crate::error::Result; -use crate::nfa::{self, NFA}; -use crate::packed; -use crate::prefilter::{Prefilter, PrefilterState}; -use crate::state_id::StateID; -use crate::Match; +use core::{ + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +use alloc::{string::String, sync::Arc, vec::Vec}; + +use crate::{ + automaton::{self, Automaton, OverlappingState}, + dfa, + nfa::{contiguous, noncontiguous}, + util::{ + error::{BuildError, MatchError}, + prefilter::Prefilter, + primitives::{PatternID, StateID}, + search::{Anchored, Input, Match, MatchKind, StartKind}, + }, +}; /// An automaton for searching multiple strings in linear time. /// /// The `AhoCorasick` type supports a few basic ways of constructing an -/// automaton, including -/// [`AhoCorasick::new`](struct.AhoCorasick.html#method.new) -/// and -/// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured). -/// However, there are a fair number of configurable options that can be set -/// by using -/// [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) -/// instead. Such options include, but are not limited to, how matches are -/// determined, simple case insensitivity, whether to use a DFA or not and -/// various knobs for controlling the space-vs-time trade offs taken when -/// building the automaton. -/// -/// If you aren't sure where to start, try beginning with -/// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured). +/// automaton, with the default being [`AhoCorasick::new`]. However, there +/// are a fair number of configurable options that can be set by using +/// [`AhoCorasickBuilder`] instead. Such options include, but are not limited +/// to, how matches are determined, simple case insensitivity, whether to use a +/// DFA or not and various knobs for controlling the space-vs-time trade offs +/// taken when building the automaton. /// /// # Resource usage /// -/// Aho-Corasick automatons are always constructed in `O(p)` time, where `p` -/// is the combined length of all patterns being searched. With that said, -/// building an automaton can be fairly costly because of high constant -/// factors, particularly when enabling the -/// [DFA](struct.AhoCorasickBuilder.html#method.dfa) -/// option (which is disabled by default). For this reason, it's generally a -/// good idea to build an automaton once and reuse it as much as possible. +/// Aho-Corasick automatons are always constructed in `O(p)` time, where +/// `p` is the combined length of all patterns being searched. With that +/// said, building an automaton can be fairly costly because of high constant +/// factors, particularly when enabling the [DFA](AhoCorasickKind::DFA) option +/// with [`AhoCorasickBuilder::kind`]. For this reason, it's generally a good +/// idea to build an automaton once and reuse it as much as possible. +/// +/// Aho-Corasick automatons can also use a fair bit of memory. To get +/// a concrete idea of how much memory is being used, try using the +/// [`AhoCorasick::memory_usage`] method. +/// +/// To give a quick idea of the differences between Aho-Corasick +/// implementations and their resource usage, here's a sample of construction +/// times and heap memory used after building an automaton from 100,000 +/// randomly selected titles from Wikipedia: +/// +/// * 99MB for a [`noncontiguous::NFA`] in 240ms. +/// * 21MB for a [`contiguous::NFA`] in 275ms. +/// * 1.6GB for a [`dfa::DFA`] in 1.88s. +/// +/// (Note that the memory usage above reflects the size of each automaton and +/// not peak memory usage. For example, building a contiguous NFA requires +/// first building a noncontiguous NFA. Once the contiguous NFA is built, the +/// noncontiguous NFA is freed.) +/// +/// This experiment very strongly argues that a contiguous NFA is often the +/// best balance in terms of resource usage. It takes a little longer to build, +/// but its memory usage is quite small. Its search speed (not listed) is +/// also often faster than a noncontiguous NFA, but a little slower than a +/// DFA. Indeed, when no specific [`AhoCorasickKind`] is used (which is the +/// default), a contiguous NFA is used in most cases. +/// +/// The only "catch" to using a contiguous NFA is that, because of its variety +/// of compression tricks, it may not be able to support automatons as large as +/// what the noncontiguous NFA supports. In which case, building a contiguous +/// NFA will fail and (by default) `AhoCorasick` will automatically fall +/// back to a noncontiguous NFA. (This typically only happens when building +/// automatons from millions of patterns.) Otherwise, the small additional time +/// for building a contiguous NFA is almost certainly worth it. +/// +/// # Cloning +/// +/// The `AhoCorasick` type uses thread safe reference counting internally. It +/// is guaranteed that it is cheap to clone. +/// +/// # Search configuration +/// +/// Most of the search routines accept anything that can be cheaply converted +/// to an [`Input`]. This includes `&[u8]`, `&str` and `Input` itself. +/// +/// # Construction failure +/// +/// It is generally possible for building an Aho-Corasick automaton to fail. +/// Construction can fail in generally one way: when the inputs provided are +/// too big. Whether that's a pattern that is too long, too many patterns +/// or some combination of both. A first approximation for the scale at which +/// construction can fail is somewhere around "millions of patterns." +/// +/// For that reason, if you're building an Aho-Corasick automaton from +/// untrusted input (or input that doesn't have any reasonable bounds on its +/// size), then it is strongly recommended to handle the possibility of an +/// error. +/// +/// If you're constructing an Aho-Corasick automaton from static or trusted +/// data, then it is likely acceptable to panic (by calling `unwrap()` or +/// `expect()`) if construction fails. +/// +/// # Fallibility /// -/// Aho-Corasick automatons can also use a fair bit of memory. To get a -/// concrete idea of how much memory is being used, try using the -/// [`AhoCorasick::heap_bytes`](struct.AhoCorasick.html#method.heap_bytes) -/// method. +/// The `AhoCorasick` type provides a number of methods for searching, as one +/// might expect. Depending on how the Aho-Corasick automaton was built and +/// depending on the search configuration, it is possible for a search to +/// return an error. Since an error is _never_ dependent on the actual contents +/// of the haystack, this type provides both infallible and fallible methods +/// for searching. The infallible methods panic if an error occurs, and can be +/// used for convenience and when you know the search will never return an +/// error. +/// +/// For example, the [`AhoCorasick::find_iter`] method is the infallible +/// version of the [`AhoCorasick::try_find_iter`] method. +/// +/// Examples of errors that can occur: +/// +/// * Running a search that requires [`MatchKind::Standard`] semantics (such +/// as a stream or overlapping search) with an automaton that was built with +/// [`MatchKind::LeftmostFirst`] or [`MatchKind::LeftmostLongest`] semantics. +/// * Running an anchored search with an automaton that only supports +/// unanchored searches. (By default, `AhoCorasick` only supports unanchored +/// searches. But this can be toggled with [`AhoCorasickBuilder::start_kind`].) +/// * Running an unanchored search with an automaton that only supports +/// anchored searches. +/// +/// The common thread between the different types of errors is that they are +/// all rooted in the automaton construction and search configurations. If +/// those configurations are a static property of your program, then it is +/// reasonable to call infallible routines since you know an error will never +/// occur. And if one _does_ occur, then it's a bug in your program. +/// +/// To re-iterate, if the patterns, build or search configuration come from +/// user or untrusted data, then you should handle errors at build or search +/// time. If only the haystack comes from user or untrusted data, then there +/// should be no need to handle errors anywhere and it is generally encouraged +/// to `unwrap()` (or `expect()`) both build and search time calls. /// /// # Examples /// @@ -50,22 +140,23 @@ use crate::Match; /// pattern that matched along with the byte offsets of the match. /// /// ``` -/// use aho_corasick::AhoCorasickBuilder; +/// use aho_corasick::{AhoCorasick, PatternID}; /// /// let patterns = &["apple", "maple", "snapple"]; /// let haystack = "Nobody likes maple in their apple flavored Snapple."; /// -/// let ac = AhoCorasickBuilder::new() +/// let ac = AhoCorasick::builder() /// .ascii_case_insensitive(true) -/// .build(patterns); +/// .build(patterns) +/// .unwrap(); /// let mut matches = vec![]; /// for mat in ac.find_iter(haystack) { /// matches.push((mat.pattern(), mat.start(), mat.end())); /// } /// assert_eq!(matches, vec![ -/// (1, 13, 18), -/// (0, 28, 33), -/// (2, 43, 50), +/// (PatternID::must(1), 13, 18), +/// (PatternID::must(0), 28, 33), +/// (PatternID::must(2), 43, 50), /// ]); /// ``` /// @@ -78,45 +169,78 @@ use crate::Match; /// let haystack = "The quick brown fox."; /// let replace_with = &["sloth", "grey", "slow"]; /// -/// let ac = AhoCorasick::new(patterns); +/// let ac = AhoCorasick::new(patterns).unwrap(); /// let result = ac.replace_all(haystack, replace_with); /// assert_eq!(result, "The slow grey sloth."); /// ``` -#[derive(Clone, Debug)] -pub struct AhoCorasick { - imp: Imp, - match_kind: MatchKind, +#[derive(Clone)] +pub struct AhoCorasick { + /// The underlying Aho-Corasick automaton. It's one of + /// nfa::noncontiguous::NFA, nfa::contiguous::NFA or dfa::DFA. + aut: Arc, + /// The specific Aho-Corasick kind chosen. This makes it possible to + /// inspect any `AhoCorasick` and know what kind of search strategy it + /// uses. + kind: AhoCorasickKind, + /// The start kind of this automaton as configured by the caller. + /// + /// We don't really *need* to put this here, since the underlying automaton + /// will correctly return errors if the caller requests an unsupported + /// search type. But we do keep this here for API behavior consistency. + /// Namely, the NFAs in this crate support both unanchored and anchored + /// searches unconditionally. There's no way to disable one or the other. + /// They always both work. But the DFA in this crate specifically only + /// supports both unanchored and anchored searches if it's configured to + /// do so. Why? Because for the DFA, supporting both essentially requires + /// two copies of the transition table: one generated by following failure + /// transitions from the original NFA and one generated by not following + /// those failure transitions. + /// + /// So why record the start kind here? Well, consider what happens + /// when no specific 'AhoCorasickKind' is selected by the caller and + /// 'StartKind::Unanchored' is used (both are the default). It *might* + /// result in using a DFA or it might pick an NFA. If it picks an NFA, the + /// caller would then be able to run anchored searches, even though the + /// caller only asked for support for unanchored searches. Maybe that's + /// fine, but what if the DFA was chosen instead? Oops, the caller would + /// get an error. + /// + /// Basically, it seems bad to return an error or not based on some + /// internal implementation choice. So we smooth things out and ensure + /// anchored searches *always* report an error when only unanchored support + /// was asked for (and vice versa), even if the underlying automaton + /// supports it. + start_kind: StartKind, } +/// Convenience constructors for an Aho-Corasick searcher. To configure the +/// searcher, use an [`AhoCorasickBuilder`] instead. impl AhoCorasick { /// Create a new Aho-Corasick automaton using the default configuration. /// /// The default configuration optimizes for less space usage, but at the /// expense of longer search times. To change the configuration, use - /// [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) - /// for fine-grained control, or - /// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured) - /// for automatic configuration if you aren't sure which settings to pick. + /// [`AhoCorasickBuilder`]. /// - /// This uses the default - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// match semantics, which reports a match as soon as it is found. This - /// corresponds to the standard match semantics supported by textbook - /// descriptions of the Aho-Corasick algorithm. + /// This uses the default [`MatchKind::Standard`] match semantics, which + /// reports a match as soon as it is found. This corresponds to the + /// standard match semantics supported by textbook descriptions of the + /// Aho-Corasick algorithm. /// /// # Examples /// /// Basic usage: /// /// ``` - /// use aho_corasick::AhoCorasick; + /// use aho_corasick::{AhoCorasick, PatternID}; /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); + /// let ac = AhoCorasick::new(&["foo", "bar", "baz"]).unwrap(); + /// assert_eq!( + /// Some(PatternID::must(1)), + /// ac.find("xxx bar xxx").map(|m| m.pattern()), + /// ); /// ``` - pub fn new(patterns: I) -> AhoCorasick + pub fn new(patterns: I) -> Result where I: IntoIterator, P: AsRef<[u8]>, @@ -124,48 +248,52 @@ impl AhoCorasick { AhoCorasickBuilder::new().build(patterns) } - /// Build an Aho-Corasick automaton with an automatically determined - /// configuration. - /// - /// Specifically, this requires a slice of patterns instead of an iterator - /// since the configuration is determined by looking at the patterns before - /// constructing the automaton. The idea here is to balance space and time - /// automatically. That is, when searching a small number of patterns, this - /// will attempt to use the fastest possible configuration since the total - /// space required will be small anyway. As the number of patterns grows, - /// this will fall back to slower configurations that use less space. + /// A convenience method for returning a new Aho-Corasick builder. /// - /// If you want auto configuration but with match semantics different from - /// the default `MatchKind::Standard`, then use - /// [`AhoCorasickBuilder::auto_configure`](struct.AhoCorasickBuilder.html#method.auto_configure). + /// This usually permits one to just import the `AhoCorasick` type. /// /// # Examples /// - /// Basic usage is just like `new`, except you must provide a slice: + /// Basic usage: /// /// ``` - /// use aho_corasick::AhoCorasick; + /// use aho_corasick::{AhoCorasick, Match, MatchKind}; /// - /// let ac = AhoCorasick::new_auto_configured(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(&["samwise", "sam"]) + /// .unwrap(); + /// assert_eq!(Some(Match::must(0, 0..7)), ac.find("samwise")); /// ``` - pub fn new_auto_configured(patterns: &[B]) -> AhoCorasick - where - B: AsRef<[u8]>, - { - AhoCorasickBuilder::new().auto_configure(patterns).build(patterns) + pub fn builder() -> AhoCorasickBuilder { + AhoCorasickBuilder::new() } } -impl AhoCorasick { +/// Infallible search routines. These APIs panic when the underlying search +/// would otherwise fail. Infallible routines are useful because the errors are +/// a result of both search-time configuration and what configuration is used +/// to build the Aho-Corasick searcher. Both of these things are not usually +/// the result of user input, and thus, an error is typically indicative of a +/// programmer error. In cases where callers want errors instead of panics, use +/// the corresponding `try` method in the section below. +impl AhoCorasick { /// Returns true if and only if this automaton matches the haystack at any /// position. /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. + /// `input` may be any type that is cheaply convertible to an `Input`. This + /// includes, but is not limited to, `&str` and `&[u8]`. + /// + /// Aside from convenience, when `AhoCorasick` was built with + /// leftmost-first or leftmost-longest semantics, this might result in a + /// search that visits less of the haystack than [`AhoCorasick::find`] + /// would otherwise. (For standard semantics, matches are always + /// immediately returned once they are seen, so there is no way for this to + /// do less work in that case.) + /// + /// Note that there is no corresponding fallible routine for this method. + /// If you need a fallible version of this, then [`AhoCorasick::try_find`] + /// can be used with [`Input::earliest`] enabled. /// /// # Examples /// @@ -176,77 +304,43 @@ impl AhoCorasick { /// /// let ac = AhoCorasick::new(&[ /// "foo", "bar", "quux", "baz", - /// ]); + /// ]).unwrap(); /// assert!(ac.is_match("xxx bar xxx")); /// assert!(!ac.is_match("xxx qux xxx")); /// ``` - pub fn is_match>(&self, haystack: B) -> bool { - self.earliest_find(haystack).is_some() - } - - /// Returns the location of the first detected match in `haystack`. - /// - /// This method has the same behavior regardless of the - /// [`MatchKind`](enum.MatchKind.html) - /// of this automaton. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "abc", "b", - /// ]); - /// let mat = ac.earliest_find("abcd").expect("should have match"); - /// assert_eq!(1, mat.pattern()); - /// assert_eq!((1, 2), (mat.start(), mat.end())); - /// ``` - pub fn earliest_find>(&self, haystack: B) -> Option { - let mut prestate = PrefilterState::new(self.max_pattern_len()); - let mut start = self.imp.start_state(); - self.imp.earliest_find_at( - &mut prestate, - haystack.as_ref(), - 0, - &mut start, - ) + pub fn is_match<'h, I: Into>>(&self, input: I) -> bool { + self.aut + .try_find(&input.into().earliest(true)) + .expect("AhoCorasick::try_find is not expected to fail") + .is_some() } /// Returns the location of the first match according to the match /// semantics that this automaton was constructed with. /// - /// When using `MatchKind::Standard`, this corresponds precisely to the - /// same behavior as - /// [`earliest_find`](struct.AhoCorasick.html#method.earliest_find). - /// Otherwise, match semantics correspond to either - /// [leftmost-first](enum.MatchKind.html#variant.LeftmostFirst) - /// or - /// [leftmost-longest](enum.MatchKind.html#variant.LeftmostLongest). + /// `input` may be any type that is cheaply convertible to an `Input`. This + /// includes, but is not limited to, `&str` and `&[u8]`. /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. + /// This is the infallible version of [`AhoCorasick::try_find`]. + /// + /// # Panics + /// + /// This panics when [`AhoCorasick::try_find`] would return an error. /// /// # Examples /// /// Basic usage, with standard semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["b", "abc", "abcd"]; /// let haystack = "abcd"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mat = ac.find(haystack).expect("should have a match"); /// assert_eq!("b", &haystack[mat.start()..mat.end()]); /// ``` @@ -254,14 +348,15 @@ impl AhoCorasick { /// Now with leftmost-first semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["b", "abc", "abcd"]; /// let haystack = "abcd"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mat = ac.find(haystack).expect("should have a match"); /// assert_eq!("abc", &haystack[mat.start()..mat.end()]); /// ``` @@ -269,160 +364,287 @@ impl AhoCorasick { /// And finally, leftmost-longest semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["b", "abc", "abcd"]; /// let haystack = "abcd"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abcd", &haystack[mat.start()..mat.end()]); /// ``` - pub fn find>(&self, haystack: B) -> Option { - let mut prestate = PrefilterState::new(self.max_pattern_len()); - self.imp.find_at_no_state(&mut prestate, haystack.as_ref(), 0) + /// + /// # Example: configuring a search + /// + /// Because this method accepts anything that can be turned into an + /// [`Input`], it's possible to provide an `Input` directly in order to + /// configure the search. In this example, we show how to use the + /// `earliest` option to force the search to return as soon as it knows + /// a match has occurred. + /// + /// ``` + /// use aho_corasick::{AhoCorasick, Input, MatchKind}; + /// + /// let patterns = &["b", "abc", "abcd"]; + /// let haystack = "abcd"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostLongest) + /// .build(patterns) + /// .unwrap(); + /// let mat = ac.find(Input::new(haystack).earliest(true)) + /// .expect("should have a match"); + /// // The correct leftmost-longest match here is 'abcd', but since we + /// // told the search to quit as soon as it knows a match has occurred, + /// // we get a different match back. + /// assert_eq!("b", &haystack[mat.start()..mat.end()]); + /// ``` + pub fn find<'h, I: Into>>(&self, input: I) -> Option { + self.try_find(input) + .expect("AhoCorasick::try_find is not expected to fail") + } + + /// Returns the location of the first overlapping match in the given + /// input with respect to the current state of the underlying searcher. + /// + /// `input` may be any type that is cheaply convertible to an `Input`. This + /// includes, but is not limited to, `&str` and `&[u8]`. + /// + /// Overlapping searches do not report matches in their return value. + /// Instead, matches can be accessed via [`OverlappingState::get_match`] + /// after a search call. + /// + /// This is the infallible version of + /// [`AhoCorasick::try_find_overlapping`]. + /// + /// # Panics + /// + /// This panics when [`AhoCorasick::try_find_overlapping`] would + /// return an error. For example, when the Aho-Corasick searcher + /// doesn't support overlapping searches. (Only searchers built with + /// [`MatchKind::Standard`] semantics support overlapping searches.) + /// + /// # Example + /// + /// This shows how we can repeatedly call an overlapping search without + /// ever needing to explicitly re-slice the haystack. Overlapping search + /// works this way because searches depend on state saved during the + /// previous search. + /// + /// ``` + /// use aho_corasick::{ + /// automaton::OverlappingState, + /// AhoCorasick, Input, Match, + /// }; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; + /// + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let mut state = OverlappingState::start(); + /// + /// ac.find_overlapping(haystack, &mut state); + /// assert_eq!(Some(Match::must(2, 0..3)), state.get_match()); + /// + /// ac.find_overlapping(haystack, &mut state); + /// assert_eq!(Some(Match::must(0, 0..6)), state.get_match()); + /// + /// ac.find_overlapping(haystack, &mut state); + /// assert_eq!(Some(Match::must(2, 11..14)), state.get_match()); + /// + /// ac.find_overlapping(haystack, &mut state); + /// assert_eq!(Some(Match::must(2, 22..25)), state.get_match()); + /// + /// ac.find_overlapping(haystack, &mut state); + /// assert_eq!(Some(Match::must(0, 22..28)), state.get_match()); + /// + /// ac.find_overlapping(haystack, &mut state); + /// assert_eq!(Some(Match::must(1, 22..31)), state.get_match()); + /// + /// // No more match matches to be found. + /// ac.find_overlapping(haystack, &mut state); + /// assert_eq!(None, state.get_match()); + /// ``` + pub fn find_overlapping<'h, I: Into>>( + &self, + input: I, + state: &mut OverlappingState, + ) { + self.try_find_overlapping(input, state).expect( + "AhoCorasick::try_find_overlapping is not expected to fail", + ) } /// Returns an iterator of non-overlapping matches, using the match /// semantics that this automaton was constructed with. /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. + /// `input` may be any type that is cheaply convertible to an `Input`. This + /// includes, but is not limited to, `&str` and `&[u8]`. + /// + /// This is the infallible version of [`AhoCorasick::try_find_iter`]. + /// + /// # Panics + /// + /// This panics when [`AhoCorasick::try_find_iter`] would return an error. /// /// # Examples /// /// Basic usage, with standard semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind, PatternID}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); - /// let matches: Vec = ac + /// .build(patterns) + /// .unwrap(); + /// let matches: Vec = ac /// .find_iter(haystack) /// .map(|mat| mat.pattern()) /// .collect(); - /// assert_eq!(vec![2, 2, 2], matches); + /// assert_eq!(vec![ + /// PatternID::must(2), + /// PatternID::must(2), + /// PatternID::must(2), + /// ], matches); /// ``` /// /// Now with leftmost-first semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind, PatternID}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let matches: Vec = ac + /// .build(patterns) + /// .unwrap(); + /// let matches: Vec = ac /// .find_iter(haystack) /// .map(|mat| mat.pattern()) /// .collect(); - /// assert_eq!(vec![0, 2, 0], matches); + /// assert_eq!(vec![ + /// PatternID::must(0), + /// PatternID::must(2), + /// PatternID::must(0), + /// ], matches); /// ``` /// /// And finally, leftmost-longest semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind, PatternID}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); - /// let matches: Vec = ac + /// .build(patterns) + /// .unwrap(); + /// let matches: Vec = ac /// .find_iter(haystack) /// .map(|mat| mat.pattern()) /// .collect(); - /// assert_eq!(vec![0, 2, 1], matches); + /// assert_eq!(vec![ + /// PatternID::must(0), + /// PatternID::must(2), + /// PatternID::must(1), + /// ], matches); /// ``` - pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( + pub fn find_iter<'a, 'h, I: Into>>( &'a self, - haystack: &'b B, - ) -> FindIter<'a, 'b, S> { - FindIter::new(self, haystack.as_ref()) + input: I, + ) -> FindIter<'a, 'h> { + self.try_find_iter(input) + .expect("AhoCorasick::try_find_iter is not expected to fail") } - /// Returns an iterator of overlapping matches in the given `haystack`. + /// Returns an iterator of overlapping matches. Stated differently, this + /// returns an iterator of all possible matches at every position. /// - /// Overlapping matches can _only_ be detected using - /// `MatchKind::Standard` semantics. If this automaton was constructed with - /// leftmost semantics, then this method will panic. To determine whether - /// this will panic at runtime, use the - /// [`AhoCorasick::supports_overlapping`](struct.AhoCorasick.html#method.supports_overlapping) - /// method. + /// `input` may be any type that is cheaply convertible to an `Input`. This + /// includes, but is not limited to, `&str` and `&[u8]`. /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. + /// This is the infallible version of + /// [`AhoCorasick::try_find_overlapping_iter`]. /// /// # Panics /// - /// This panics when `AhoCorasick::supports_overlapping` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. + /// This panics when `AhoCorasick::try_find_overlapping_iter` would return + /// an error. For example, when the Aho-Corasick searcher is built with + /// either leftmost-first or leftmost-longest match semantics. Stated + /// differently, overlapping searches require one to build the searcher + /// with [`MatchKind::Standard`] (it is the default). /// - /// # Examples - /// - /// Basic usage, with standard semantics: + /// # Example: basic usage /// /// ``` - /// use aho_corasick::AhoCorasick; + /// use aho_corasick::{AhoCorasick, PatternID}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasick::new(patterns); - /// let matches: Vec = ac + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let matches: Vec = ac /// .find_overlapping_iter(haystack) /// .map(|mat| mat.pattern()) /// .collect(); - /// assert_eq!(vec![2, 0, 2, 2, 0, 1], matches); - /// ``` - pub fn find_overlapping_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( + /// assert_eq!(vec![ + /// PatternID::must(2), + /// PatternID::must(0), + /// PatternID::must(2), + /// PatternID::must(2), + /// PatternID::must(0), + /// PatternID::must(1), + /// ], matches); + /// ``` + pub fn find_overlapping_iter<'a, 'h, I: Into>>( &'a self, - haystack: &'b B, - ) -> FindOverlappingIter<'a, 'b, S> { - FindOverlappingIter::new(self, haystack.as_ref()) + input: I, + ) -> FindOverlappingIter<'a, 'h> { + self.try_find_overlapping_iter(input).expect( + "AhoCorasick::try_find_overlapping_iter is not expected to fail", + ) } /// Replace all matches with a corresponding value in the `replace_with` /// slice given. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). + /// [`AhoCorasick::find_iter`]. /// /// Replacements are determined by the index of the matching pattern. /// For example, if the pattern with index `2` is found, then it is /// replaced by `replace_with[2]`. /// + /// This is the infallible version of [`AhoCorasick::try_replace_all`]. + /// /// # Panics /// - /// This panics when `replace_with.len()` does not equal the total number - /// of patterns that are matched by this automaton. + /// This panics when [`AhoCorasick::try_replace_all`] would return an + /// error. /// - /// # Examples + /// This also panics when `replace_with.len()` does not equal + /// [`AhoCorasick::patterns_len`]. /// - /// Basic usage: + /// # Example: basic usage /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let result = ac.replace_all(haystack, &["x", "y", "z"]); /// assert_eq!("x the z to the xage", result); /// ``` @@ -430,46 +652,41 @@ impl AhoCorasick { where B: AsRef, { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "replace_all requires a replacement for every pattern \ - in the automaton" - ); - let mut dst = String::with_capacity(haystack.len()); - self.replace_all_with(haystack, &mut dst, |mat, _, dst| { - dst.push_str(replace_with[mat.pattern()].as_ref()); - true - }); - dst + self.try_replace_all(haystack, replace_with) + .expect("AhoCorasick::try_replace_all is not expected to fail") } /// Replace all matches using raw bytes with a corresponding value in the /// `replace_with` slice given. Matches correspond to the same matches as - /// reported by [`find_iter`](struct.AhoCorasick.html#method.find_iter). + /// reported by [`AhoCorasick::find_iter`]. /// /// Replacements are determined by the index of the matching pattern. /// For example, if the pattern with index `2` is found, then it is /// replaced by `replace_with[2]`. /// + /// This is the infallible version of + /// [`AhoCorasick::try_replace_all_bytes`]. + /// /// # Panics /// - /// This panics when `replace_with.len()` does not equal the total number - /// of patterns that are matched by this automaton. + /// This panics when [`AhoCorasick::try_replace_all_bytes`] would return an + /// error. /// - /// # Examples + /// This also panics when `replace_with.len()` does not equal + /// [`AhoCorasick::patterns_len`]. /// - /// Basic usage: + /// # Example: basic usage /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = b"append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let result = ac.replace_all_bytes(haystack, &["x", "y", "z"]); /// assert_eq!(b"x the z to the xage".to_vec(), result); /// ``` @@ -481,45 +698,47 @@ impl AhoCorasick { where B: AsRef<[u8]>, { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "replace_all_bytes requires a replacement for every pattern \ - in the automaton" - ); - let mut dst = Vec::with_capacity(haystack.len()); - self.replace_all_with_bytes(haystack, &mut dst, |mat, _, dst| { - dst.extend(replace_with[mat.pattern()].as_ref()); - true - }); - dst + self.try_replace_all_bytes(haystack, replace_with) + .expect("AhoCorasick::try_replace_all_bytes should not fail") } /// Replace all matches using a closure called on each match. /// Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). + /// [`AhoCorasick::find_iter`]. /// /// The closure accepts three parameters: the match found, the text of /// the match and a string buffer with which to write the replaced text /// (if any). If the closure returns `true`, then it continues to the next /// match. If the closure returns `false`, then searching is stopped. /// + /// Note that any matches with boundaries that don't fall on a valid UTF-8 + /// boundary are silently skipped. + /// + /// This is the infallible version of + /// [`AhoCorasick::try_replace_all_with`]. + /// + /// # Panics + /// + /// This panics when [`AhoCorasick::try_replace_all_with`] would return an + /// error. + /// /// # Examples /// /// Basic usage: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mut result = String::new(); /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| { - /// dst.push_str(&mat.pattern().to_string()); + /// dst.push_str(&mat.pattern().as_usize().to_string()); /// true /// }); /// assert_eq!("0 the 2 to the 0age", result); @@ -529,16 +748,17 @@ impl AhoCorasick { /// example above): /// /// ``` - /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID}; /// # let patterns = &["append", "appendage", "app"]; /// # let haystack = "append the app to the appendage"; - /// # let ac = AhoCorasickBuilder::new() + /// # let ac = AhoCorasick::builder() /// # .match_kind(MatchKind::LeftmostFirst) - /// # .build(patterns); + /// # .build(patterns) + /// # .unwrap(); /// let mut result = String::new(); /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| { - /// dst.push_str(&mat.pattern().to_string()); - /// mat.pattern() != 2 + /// dst.push_str(&mat.pattern().as_usize().to_string()); + /// mat.pattern() != PatternID::must(2) /// }); /// assert_eq!("0 the 2 to the appendage", result); /// ``` @@ -546,46 +766,48 @@ impl AhoCorasick { &self, haystack: &str, dst: &mut String, - mut replace_with: F, + replace_with: F, ) where F: FnMut(&Match, &str, &mut String) -> bool, { - let mut last_match = 0; - for mat in self.find_iter(haystack) { - dst.push_str(&haystack[last_match..mat.start()]); - last_match = mat.end(); - if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { - break; - }; - } - dst.push_str(&haystack[last_match..]); + self.try_replace_all_with(haystack, dst, replace_with) + .expect("AhoCorasick::try_replace_all_with should not fail") } /// Replace all matches using raw bytes with a closure called on each /// match. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). + /// [`AhoCorasick::find_iter`]. /// /// The closure accepts three parameters: the match found, the text of /// the match and a byte buffer with which to write the replaced text /// (if any). If the closure returns `true`, then it continues to the next /// match. If the closure returns `false`, then searching is stopped. /// + /// This is the infallible version of + /// [`AhoCorasick::try_replace_all_with_bytes`]. + /// + /// # Panics + /// + /// This panics when [`AhoCorasick::try_replace_all_with_bytes`] would + /// return an error. + /// /// # Examples /// /// Basic usage: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["append", "appendage", "app"]; /// let haystack = b"append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mut result = vec![]; /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { - /// dst.extend(mat.pattern().to_string().bytes()); + /// dst.extend(mat.pattern().as_usize().to_string().bytes()); /// true /// }); /// assert_eq!(b"0 the 2 to the 0age".to_vec(), result); @@ -595,16 +817,17 @@ impl AhoCorasick { /// example above): /// /// ``` - /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID}; /// # let patterns = &["append", "appendage", "app"]; /// # let haystack = b"append the app to the appendage"; - /// # let ac = AhoCorasickBuilder::new() + /// # let ac = AhoCorasick::builder() /// # .match_kind(MatchKind::LeftmostFirst) - /// # .build(patterns); + /// # .build(patterns) + /// # .unwrap(); /// let mut result = vec![]; /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { - /// dst.extend(mat.pattern().to_string().bytes()); - /// mat.pattern() != 2 + /// dst.extend(mat.pattern().as_usize().to_string().bytes()); + /// mat.pattern() != PatternID::must(2) /// }); /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result); /// ``` @@ -612,41 +835,35 @@ impl AhoCorasick { &self, haystack: &[u8], dst: &mut Vec, - mut replace_with: F, + replace_with: F, ) where F: FnMut(&Match, &[u8], &mut Vec) -> bool, { - let mut last_match = 0; - for mat in self.find_iter(haystack) { - dst.extend(&haystack[last_match..mat.start()]); - last_match = mat.end(); - if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { - break; - }; - } - dst.extend(&haystack[last_match..]); + self.try_replace_all_with_bytes(haystack, dst, replace_with) + .expect("AhoCorasick::try_replace_all_with_bytes should not fail") } /// Returns an iterator of non-overlapping matches in the given /// stream. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). + /// [`AhoCorasick::find_iter`]. /// /// The matches yielded by this iterator use absolute position offsets in /// the stream given, where the first byte has index `0`. Matches are /// yieled until the stream is exhausted. /// - /// Each item yielded by the iterator is an `io::Result`, where an - /// error is yielded if there was a problem reading from the reader given. + /// Each item yielded by the iterator is an `Result`, where an error is yielded if there was a problem + /// reading from the reader given. /// /// When searching a stream, an internal buffer is used. Therefore, callers /// should avoiding providing a buffered reader, if possible. /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. + /// This is the infallible version of + /// [`AhoCorasick::try_stream_find_iter`]. Note that both methods return + /// iterators that produce `Result` values. The difference is that this + /// routine panics if _construction_ of the iterator failed. The `Result` + /// values yield by the iterator come from whether the given reader returns + /// an error or not during the search. /// /// # Memory usage /// @@ -658,327 +875,1095 @@ impl AhoCorasick { /// /// # Panics /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. - /// - /// # Examples + /// This panics when [`AhoCorasick::try_stream_find_iter`] would return + /// an error. For example, when the Aho-Corasick searcher doesn't support + /// stream searches. (Only searchers built with [`MatchKind::Standard`] + /// semantics support stream searches.) /// - /// Basic usage: + /// # Example: basic usage /// /// ``` - /// use aho_corasick::AhoCorasick; + /// use aho_corasick::{AhoCorasick, PatternID}; /// - /// # fn example() -> Result<(), ::std::io::Error> { /// let patterns = &["append", "appendage", "app"]; /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasick::new(patterns); + /// let ac = AhoCorasick::new(patterns).unwrap(); /// let mut matches = vec![]; /// for result in ac.stream_find_iter(haystack.as_bytes()) { /// let mat = result?; /// matches.push(mat.pattern()); /// } - /// assert_eq!(vec![2, 2, 2], matches); - /// # Ok(()) }; example().unwrap() + /// assert_eq!(vec![ + /// PatternID::must(2), + /// PatternID::must(2), + /// PatternID::must(2), + /// ], matches); + /// + /// # Ok::<(), Box>(()) /// ``` - pub fn stream_find_iter<'a, R: io::Read>( + #[cfg(feature = "std")] + pub fn stream_find_iter<'a, R: std::io::Read>( &'a self, rdr: R, - ) -> StreamFindIter<'a, R, S> { - StreamFindIter::new(self, rdr) + ) -> StreamFindIter<'a, R> { + self.try_stream_find_iter(rdr) + .expect("AhoCorasick::try_stream_find_iter should not fail") } +} - /// Search for and replace all matches of this automaton in - /// the given reader, and write the replacements to the given - /// writer. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// Replacements are determined by the index of the matching pattern. - /// For example, if the pattern with index `2` is found, then it is - /// replaced by `replace_with[2]`. - /// - /// After all matches are replaced, the writer is _not_ flushed. +/// Fallible search routines. These APIs return an error in cases where the +/// infallible routines would panic. +impl AhoCorasick { + /// Returns the location of the first match according to the match + /// semantics that this automaton was constructed with, and according + /// to the given `Input` configuration. /// - /// If there was a problem reading from the given reader or writing to the - /// given writer, then the corresponding `io::Error` is returned and all - /// replacement is stopped. + /// This is the fallible version of [`AhoCorasick::find`]. /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. However, - /// callers may want to provide a buffered writer. + /// # Errors /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. + /// This returns an error when this Aho-Corasick searcher does not support + /// the given `Input` configuration. /// - /// # Memory usage + /// For example, if the Aho-Corasick searcher only supports anchored + /// searches or only supports unanchored searches, then providing an + /// `Input` that requests an anchored (or unanchored) search when it isn't + /// supported would result in an error. /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. + /// # Example: leftmost-first searching /// - /// # Panics + /// Basic usage with leftmost-first semantics: /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. + /// ``` + /// use aho_corasick::{AhoCorasick, MatchKind, Input}; /// - /// # Examples + /// let patterns = &["b", "abc", "abcd"]; + /// let haystack = "foo abcd"; /// - /// Basic usage: + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// let mat = ac.try_find(haystack)?.expect("should have a match"); + /// assert_eq!("abc", &haystack[mat.span()]); /// + /// # Ok::<(), Box>(()) /// ``` - /// use aho_corasick::AhoCorasick; /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["fox", "brown", "quick"]; - /// let haystack = "The quick brown fox."; - /// let replace_with = &["sloth", "grey", "slow"]; + /// # Example: anchored leftmost-first searching /// - /// let ac = AhoCorasick::new(patterns); - /// let mut result = vec![]; - /// ac.stream_replace_all(haystack.as_bytes(), &mut result, replace_with)?; - /// assert_eq!(b"The slow grey sloth.".to_vec(), result); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn stream_replace_all( - &self, - rdr: R, - wtr: W, - replace_with: &[B], - ) -> io::Result<()> - where - R: io::Read, - W: io::Write, - B: AsRef<[u8]>, - { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "stream_replace_all requires a replacement for every pattern \ - in the automaton" - ); - self.stream_replace_all_with(rdr, wtr, |mat, _, wtr| { - wtr.write_all(replace_with[mat.pattern()].as_ref()) - }) - } - - /// Search the given reader and replace all matches of this automaton - /// using the given closure. The result is written to the given - /// writer. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). + /// This shows how to anchor the search, so that even if the haystack + /// contains a match somewhere, a match won't be reported unless one can + /// be found that starts at the beginning of the search: /// - /// The closure accepts three parameters: the match found, the text of - /// the match and the writer with which to write the replaced text (if any). + /// ``` + /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind}; /// - /// After all matches are replaced, the writer is _not_ flushed. + /// let patterns = &["b", "abc", "abcd"]; + /// let haystack = "foo abcd"; /// - /// If there was a problem reading from the given reader or writing to the - /// given writer, then the corresponding `io::Error` is returned and all - /// replacement is stopped. + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .start_kind(StartKind::Anchored) + /// .build(patterns) + /// .unwrap(); + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// assert_eq!(None, ac.try_find(input)?); /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. However, - /// callers may want to provide a buffered writer. + /// # Ok::<(), Box>(()) + /// ``` /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. + /// If the beginning of the search is changed to where a match begins, then + /// it will be found: /// - /// # Memory usage + /// ``` + /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind}; /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. + /// let patterns = &["b", "abc", "abcd"]; + /// let haystack = "foo abcd"; /// - /// # Panics + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .start_kind(StartKind::Anchored) + /// .build(patterns) + /// .unwrap(); + /// let input = Input::new(haystack).range(4..).anchored(Anchored::Yes); + /// let mat = ac.try_find(input)?.expect("should have a match"); + /// assert_eq!("abc", &haystack[mat.span()]); /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. + /// # Ok::<(), Box>(()) + /// ``` /// - /// # Examples + /// # Example: earliest leftmost-first searching /// - /// Basic usage: + /// This shows how to run an "earliest" search even when the Aho-Corasick + /// searcher was compiled with leftmost-first match semantics. In this + /// case, the search is stopped as soon as it is known that a match has + /// occurred, even if it doesn't correspond to the leftmost-first match. /// /// ``` - /// use std::io::Write; - /// use aho_corasick::AhoCorasick; + /// use aho_corasick::{AhoCorasick, Input, MatchKind}; /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["fox", "brown", "quick"]; - /// let haystack = "The quick brown fox."; + /// let patterns = &["b", "abc", "abcd"]; + /// let haystack = "foo abcd"; /// - /// let ac = AhoCorasick::new(patterns); - /// let mut result = vec![]; - /// ac.stream_replace_all_with( - /// haystack.as_bytes(), - /// &mut result, - /// |mat, _, wtr| { - /// wtr.write_all(mat.pattern().to_string().as_bytes()) - /// }, - /// )?; - /// assert_eq!(b"The 2 1 0.".to_vec(), result); - /// # Ok(()) }; example().unwrap() + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// let input = Input::new(haystack).earliest(true); + /// let mat = ac.try_find(input)?.expect("should have a match"); + /// assert_eq!("b", &haystack[mat.span()]); + /// + /// # Ok::<(), Box>(()) /// ``` - pub fn stream_replace_all_with( + pub fn try_find<'h, I: Into>>( &self, - rdr: R, - mut wtr: W, - mut replace_with: F, - ) -> io::Result<()> - where - R: io::Read, - W: io::Write, - F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>, - { - let mut it = StreamChunkIter::new(self, rdr); - while let Some(result) = it.next() { - let chunk = result?; - match chunk { - StreamChunk::NonMatch { bytes, .. } => { - wtr.write_all(bytes)?; - } - StreamChunk::Match { bytes, mat } => { - replace_with(&mat, bytes, &mut wtr)?; - } - } - } - Ok(()) + input: I, + ) -> Result, MatchError> { + let input = input.into(); + enforce_anchored_consistency(self.start_kind, input.get_anchored())?; + self.aut.try_find(&input) } - /// Returns the match kind used by this automaton. + /// Returns the location of the first overlapping match in the given + /// input with respect to the current state of the underlying searcher. /// - /// # Examples + /// Overlapping searches do not report matches in their return value. + /// Instead, matches can be accessed via [`OverlappingState::get_match`] + /// after a search call. /// - /// Basic usage: + /// This is the fallible version of [`AhoCorasick::find_overlapping`]. /// - /// ``` - /// use aho_corasick::{AhoCorasick, MatchKind}; + /// # Errors /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert_eq!(&MatchKind::Standard, ac.match_kind()); - /// ``` - pub fn match_kind(&self) -> &MatchKind { - self.imp.match_kind() - } - - /// Returns the length of the longest pattern matched by this automaton. + /// This returns an error when this Aho-Corasick searcher does not support + /// the given `Input` configuration or if overlapping search is not + /// supported. /// - /// # Examples + /// One example is that only Aho-Corasicker searchers built with + /// [`MatchKind::Standard`] semantics support overlapping searches. Using + /// any other match semantics will result in this returning an error. /// - /// Basic usage: + /// # Example: basic usage /// - /// ``` - /// use aho_corasick::AhoCorasick; + /// This shows how we can repeatedly call an overlapping search without + /// ever needing to explicitly re-slice the haystack. Overlapping search + /// works this way because searches depend on state saved during the + /// previous search. /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert_eq!(4, ac.max_pattern_len()); /// ``` - pub fn max_pattern_len(&self) -> usize { - self.imp.max_pattern_len() - } - - /// Return the total number of patterns matched by this automaton. + /// use aho_corasick::{ + /// automaton::OverlappingState, + /// AhoCorasick, Input, Match, + /// }; /// - /// This includes patterns that may never participate in a match. For - /// example, if - /// [`MatchKind::LeftmostFirst`](enum.MatchKind.html#variant.LeftmostFirst) - /// match semantics are used, and the patterns `Sam` and `Samwise` were - /// used to build the automaton, then `Samwise` can never participate in a - /// match because `Sam` will always take priority. + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; /// - /// # Examples + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let mut state = OverlappingState::start(); /// - /// Basic usage: + /// ac.try_find_overlapping(haystack, &mut state)?; + /// assert_eq!(Some(Match::must(2, 0..3)), state.get_match()); /// - /// ``` - /// use aho_corasick::AhoCorasick; + /// ac.try_find_overlapping(haystack, &mut state)?; + /// assert_eq!(Some(Match::must(0, 0..6)), state.get_match()); /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(3, ac.pattern_count()); + /// ac.try_find_overlapping(haystack, &mut state)?; + /// assert_eq!(Some(Match::must(2, 11..14)), state.get_match()); + /// + /// ac.try_find_overlapping(haystack, &mut state)?; + /// assert_eq!(Some(Match::must(2, 22..25)), state.get_match()); + /// + /// ac.try_find_overlapping(haystack, &mut state)?; + /// assert_eq!(Some(Match::must(0, 22..28)), state.get_match()); + /// + /// ac.try_find_overlapping(haystack, &mut state)?; + /// assert_eq!(Some(Match::must(1, 22..31)), state.get_match()); + /// + /// // No more match matches to be found. + /// ac.try_find_overlapping(haystack, &mut state)?; + /// assert_eq!(None, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: implementing your own overlapping iteration + /// + /// The previous example can be easily adapted to implement your own + /// iteration by repeatedly calling `try_find_overlapping` until either + /// an error occurs or no more matches are reported. + /// + /// This is effectively equivalent to the iterator returned by + /// [`AhoCorasick::try_find_overlapping_iter`], with the only difference + /// being that the iterator checks for errors before construction and + /// absolves the caller of needing to check for errors on every search + /// call. (Indeed, if the first `try_find_overlapping` call succeeds and + /// the same `Input` is given to subsequent calls, then all subsequent + /// calls are guaranteed to succeed.) + /// + /// ``` + /// use aho_corasick::{ + /// automaton::OverlappingState, + /// AhoCorasick, Input, Match, + /// }; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; + /// + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// + /// loop { + /// ac.try_find_overlapping(haystack, &mut state)?; + /// let mat = match state.get_match() { + /// None => break, + /// Some(mat) => mat, + /// }; + /// matches.push(mat); + /// } + /// let expected = vec![ + /// Match::must(2, 0..3), + /// Match::must(0, 0..6), + /// Match::must(2, 11..14), + /// Match::must(2, 22..25), + /// Match::must(0, 22..28), + /// Match::must(1, 22..31), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: anchored iteration + /// + /// The previous example can also be adapted to implement + /// iteration over all anchored matches. In particular, + /// [`AhoCorasick::try_find_overlapping_iter`] does not support this + /// because it isn't totally clear what the match semantics ought to be. + /// + /// In this example, we will find all overlapping matches that start at + /// the beginning of our search. + /// + /// ``` + /// use aho_corasick::{ + /// automaton::OverlappingState, + /// AhoCorasick, Anchored, Input, Match, StartKind, + /// }; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; + /// + /// let ac = AhoCorasick::builder() + /// .start_kind(StartKind::Anchored) + /// .build(patterns) + /// .unwrap(); + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// + /// loop { + /// ac.try_find_overlapping(input.clone(), &mut state)?; + /// let mat = match state.get_match() { + /// None => break, + /// Some(mat) => mat, + /// }; + /// matches.push(mat); + /// } + /// let expected = vec![ + /// Match::must(2, 0..3), + /// Match::must(0, 0..6), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_find_overlapping<'h, I: Into>>( + &self, + input: I, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let input = input.into(); + enforce_anchored_consistency(self.start_kind, input.get_anchored())?; + self.aut.try_find_overlapping(&input, state) + } + + /// Returns an iterator of non-overlapping matches, using the match + /// semantics that this automaton was constructed with. + /// + /// This is the fallible version of [`AhoCorasick::find_iter`]. + /// + /// Note that the error returned by this method occurs during construction + /// of the iterator. The iterator itself yields `Match` values. That is, + /// once the iterator is constructed, the iteration itself will never + /// report an error. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the given `Input` configuration. + /// + /// For example, if the Aho-Corasick searcher only supports anchored + /// searches or only supports unanchored searches, then providing an + /// `Input` that requests an anchored (or unanchored) search when it isn't + /// supported would result in an error. + /// + /// # Example: leftmost-first searching + /// + /// Basic usage with leftmost-first semantics: + /// + /// ``` + /// use aho_corasick::{AhoCorasick, Input, MatchKind, PatternID}; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// let matches: Vec = ac + /// .try_find_iter(Input::new(haystack))? + /// .map(|mat| mat.pattern()) + /// .collect(); + /// assert_eq!(vec![ + /// PatternID::must(0), + /// PatternID::must(2), + /// PatternID::must(0), + /// ], matches); + /// + /// # Ok::<(), Box>(()) /// ``` - pub fn pattern_count(&self) -> usize { - self.imp.pattern_count() + /// + /// # Example: anchored leftmost-first searching + /// + /// This shows how to anchor the search, such that all matches must begin + /// at the starting location of the search. For an iterator, an anchored + /// search implies that all matches are adjacent. + /// + /// ``` + /// use aho_corasick::{ + /// AhoCorasick, Anchored, Input, MatchKind, PatternID, StartKind, + /// }; + /// + /// let patterns = &["foo", "bar", "quux"]; + /// let haystack = "fooquuxbar foo"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .start_kind(StartKind::Anchored) + /// .build(patterns) + /// .unwrap(); + /// let matches: Vec = ac + /// .try_find_iter(Input::new(haystack).anchored(Anchored::Yes))? + /// .map(|mat| mat.pattern()) + /// .collect(); + /// assert_eq!(vec![ + /// PatternID::must(0), + /// PatternID::must(2), + /// PatternID::must(1), + /// // The final 'foo' is not found because it is not adjacent to the + /// // 'bar' match. It needs to be adjacent because our search is + /// // anchored. + /// ], matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_find_iter<'a, 'h, I: Into>>( + &'a self, + input: I, + ) -> Result, MatchError> { + let input = input.into(); + enforce_anchored_consistency(self.start_kind, input.get_anchored())?; + Ok(FindIter(self.aut.try_find_iter(input)?)) + } + + /// Returns an iterator of overlapping matches. + /// + /// This is the fallible version of [`AhoCorasick::find_overlapping_iter`]. + /// + /// Note that the error returned by this method occurs during construction + /// of the iterator. The iterator itself yields `Match` values. That is, + /// once the iterator is constructed, the iteration itself will never + /// report an error. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the given `Input` configuration or does not support overlapping + /// searches. + /// + /// One example is that only Aho-Corasicker searchers built with + /// [`MatchKind::Standard`] semantics support overlapping searches. Using + /// any other match semantics will result in this returning an error. + /// + /// # Example: basic usage + /// + /// ``` + /// use aho_corasick::{AhoCorasick, Input, PatternID}; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; + /// + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let matches: Vec = ac + /// .try_find_overlapping_iter(Input::new(haystack))? + /// .map(|mat| mat.pattern()) + /// .collect(); + /// assert_eq!(vec![ + /// PatternID::must(2), + /// PatternID::must(0), + /// PatternID::must(2), + /// PatternID::must(2), + /// PatternID::must(0), + /// PatternID::must(1), + /// ], matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: anchored overlapping search returns an error + /// + /// It isn't clear what the match semantics for anchored overlapping + /// iterators *ought* to be, so currently an error is returned. Callers + /// may use [`AhoCorasick::try_find_overlapping`] to implement their own + /// semantics if desired. + /// + /// ``` + /// use aho_corasick::{AhoCorasick, Anchored, Input, StartKind}; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "appendappendage app"; + /// + /// let ac = AhoCorasick::builder() + /// .start_kind(StartKind::Anchored) + /// .build(patterns) + /// .unwrap(); + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// assert!(ac.try_find_overlapping_iter(input).is_err()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_find_overlapping_iter<'a, 'h, I: Into>>( + &'a self, + input: I, + ) -> Result, MatchError> { + let input = input.into(); + enforce_anchored_consistency(self.start_kind, input.get_anchored())?; + Ok(FindOverlappingIter(self.aut.try_find_overlapping_iter(input)?)) } - /// Returns true if and only if this automaton supports reporting - /// overlapping matches. + /// Replace all matches with a corresponding value in the `replace_with` + /// slice given. Matches correspond to the same matches as reported by + /// [`AhoCorasick::try_find_iter`]. + /// + /// Replacements are determined by the index of the matching pattern. + /// For example, if the pattern with index `2` is found, then it is + /// replaced by `replace_with[2]`. + /// + /// # Panics + /// + /// This panics when `replace_with.len()` does not equal + /// [`AhoCorasick::patterns_len`]. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the default `Input` configuration. More specifically, this occurs only + /// when the Aho-Corasick searcher does not support unanchored searches + /// since this replacement routine always does an unanchored search. /// - /// If this returns false and overlapping matches are requested, then it - /// will result in a panic. + /// # Example: basic usage /// - /// Since leftmost matching is inherently incompatible with overlapping - /// matches, only - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// supports overlapping matches. This is unlikely to change in the future. + /// ``` + /// use aho_corasick::{AhoCorasick, MatchKind}; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// let result = ac.try_replace_all(haystack, &["x", "y", "z"])?; + /// assert_eq!("x the z to the xage", result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_replace_all( + &self, + haystack: &str, + replace_with: &[B], + ) -> Result + where + B: AsRef, + { + enforce_anchored_consistency(self.start_kind, Anchored::No)?; + self.aut.try_replace_all(haystack, replace_with) + } + + /// Replace all matches using raw bytes with a corresponding value in the + /// `replace_with` slice given. Matches correspond to the same matches as + /// reported by [`AhoCorasick::try_find_iter`]. + /// + /// Replacements are determined by the index of the matching pattern. + /// For example, if the pattern with index `2` is found, then it is + /// replaced by `replace_with[2]`. + /// + /// This is the fallible version of [`AhoCorasick::replace_all_bytes`]. + /// + /// # Panics + /// + /// This panics when `replace_with.len()` does not equal + /// [`AhoCorasick::patterns_len`]. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the default `Input` configuration. More specifically, this occurs only + /// when the Aho-Corasick searcher does not support unanchored searches + /// since this replacement routine always does an unanchored search. + /// + /// # Example: basic usage + /// + /// ``` + /// use aho_corasick::{AhoCorasick, MatchKind}; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = b"append the app to the appendage"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// let result = ac.try_replace_all_bytes(haystack, &["x", "y", "z"])?; + /// assert_eq!(b"x the z to the xage".to_vec(), result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_replace_all_bytes( + &self, + haystack: &[u8], + replace_with: &[B], + ) -> Result, MatchError> + where + B: AsRef<[u8]>, + { + enforce_anchored_consistency(self.start_kind, Anchored::No)?; + self.aut.try_replace_all_bytes(haystack, replace_with) + } + + /// Replace all matches using a closure called on each match. + /// Matches correspond to the same matches as reported by + /// [`AhoCorasick::try_find_iter`]. + /// + /// The closure accepts three parameters: the match found, the text of + /// the match and a string buffer with which to write the replaced text + /// (if any). If the closure returns `true`, then it continues to the next + /// match. If the closure returns `false`, then searching is stopped. + /// + /// Note that any matches with boundaries that don't fall on a valid UTF-8 + /// boundary are silently skipped. + /// + /// This is the fallible version of [`AhoCorasick::replace_all_with`]. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the default `Input` configuration. More specifically, this occurs only + /// when the Aho-Corasick searcher does not support unanchored searches + /// since this replacement routine always does an unanchored search. /// /// # Examples /// /// Basic usage: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) - /// .build(&["foo", "bar", "baz"]); - /// assert!(ac.supports_overlapping()); + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(&["foo", "bar", "baz"]); - /// assert!(!ac.supports_overlapping()); + /// .build(patterns) + /// .unwrap(); + /// let mut result = String::new(); + /// ac.try_replace_all_with(haystack, &mut result, |mat, _, dst| { + /// dst.push_str(&mat.pattern().as_usize().to_string()); + /// true + /// })?; + /// assert_eq!("0 the 2 to the 0age", result); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Stopping the replacement by returning `false` (continued from the + /// example above): + /// + /// ``` + /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID}; + /// # let patterns = &["append", "appendage", "app"]; + /// # let haystack = "append the app to the appendage"; + /// # let ac = AhoCorasick::builder() + /// # .match_kind(MatchKind::LeftmostFirst) + /// # .build(patterns) + /// # .unwrap(); + /// let mut result = String::new(); + /// ac.try_replace_all_with(haystack, &mut result, |mat, _, dst| { + /// dst.push_str(&mat.pattern().as_usize().to_string()); + /// mat.pattern() != PatternID::must(2) + /// })?; + /// assert_eq!("0 the 2 to the appendage", result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_replace_all_with( + &self, + haystack: &str, + dst: &mut String, + replace_with: F, + ) -> Result<(), MatchError> + where + F: FnMut(&Match, &str, &mut String) -> bool, + { + enforce_anchored_consistency(self.start_kind, Anchored::No)?; + self.aut.try_replace_all_with(haystack, dst, replace_with) + } + + /// Replace all matches using raw bytes with a closure called on each + /// match. Matches correspond to the same matches as reported by + /// [`AhoCorasick::try_find_iter`]. + /// + /// The closure accepts three parameters: the match found, the text of + /// the match and a byte buffer with which to write the replaced text + /// (if any). If the closure returns `true`, then it continues to the next + /// match. If the closure returns `false`, then searching is stopped. + /// + /// This is the fallible version of + /// [`AhoCorasick::replace_all_with_bytes`]. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the default `Input` configuration. More specifically, this occurs only + /// when the Aho-Corasick searcher does not support unanchored searches + /// since this replacement routine always does an unanchored search. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use aho_corasick::{AhoCorasick, MatchKind}; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = b"append the app to the appendage"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// let mut result = vec![]; + /// ac.try_replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { + /// dst.extend(mat.pattern().as_usize().to_string().bytes()); + /// true + /// })?; + /// assert_eq!(b"0 the 2 to the 0age".to_vec(), result); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Stopping the replacement by returning `false` (continued from the + /// example above): + /// + /// ``` + /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID}; + /// # let patterns = &["append", "appendage", "app"]; + /// # let haystack = b"append the app to the appendage"; + /// # let ac = AhoCorasick::builder() + /// # .match_kind(MatchKind::LeftmostFirst) + /// # .build(patterns) + /// # .unwrap(); + /// let mut result = vec![]; + /// ac.try_replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { + /// dst.extend(mat.pattern().as_usize().to_string().bytes()); + /// mat.pattern() != PatternID::must(2) + /// })?; + /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_replace_all_with_bytes( + &self, + haystack: &[u8], + dst: &mut Vec, + replace_with: F, + ) -> Result<(), MatchError> + where + F: FnMut(&Match, &[u8], &mut Vec) -> bool, + { + enforce_anchored_consistency(self.start_kind, Anchored::No)?; + self.aut.try_replace_all_with_bytes(haystack, dst, replace_with) + } + + /// Returns an iterator of non-overlapping matches in the given + /// stream. Matches correspond to the same matches as reported by + /// [`AhoCorasick::try_find_iter`]. + /// + /// The matches yielded by this iterator use absolute position offsets in + /// the stream given, where the first byte has index `0`. Matches are + /// yieled until the stream is exhausted. + /// + /// Each item yielded by the iterator is an `Result`, where an error is yielded if there was a problem + /// reading from the reader given. + /// + /// When searching a stream, an internal buffer is used. Therefore, callers + /// should avoiding providing a buffered reader, if possible. + /// + /// This is the fallible version of [`AhoCorasick::stream_find_iter`]. + /// Note that both methods return iterators that produce `Result` values. + /// The difference is that this routine returns an error if _construction_ + /// of the iterator failed. The `Result` values yield by the iterator + /// come from whether the given reader returns an error or not during the + /// search. + /// + /// # Memory usage + /// + /// In general, searching streams will use a constant amount of memory for + /// its internal buffer. The one requirement is that the internal buffer + /// must be at least the size of the longest possible match. In most use + /// cases, the default buffer size will be much larger than any individual + /// match. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the default `Input` configuration. More specifically, this occurs only + /// when the Aho-Corasick searcher does not support unanchored searches + /// since this stream searching routine always does an unanchored search. + /// + /// This also returns an error if the searcher does not support stream + /// searches. Only searchers built with [`MatchKind::Standard`] semantics + /// support stream searches. + /// + /// # Example: basic usage + /// + /// ``` + /// use aho_corasick::{AhoCorasick, PatternID}; + /// + /// let patterns = &["append", "appendage", "app"]; + /// let haystack = "append the app to the appendage"; + /// + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let mut matches = vec![]; + /// for result in ac.try_stream_find_iter(haystack.as_bytes())? { + /// let mat = result?; + /// matches.push(mat.pattern()); + /// } + /// assert_eq!(vec![ + /// PatternID::must(2), + /// PatternID::must(2), + /// PatternID::must(2), + /// ], matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "std")] + pub fn try_stream_find_iter<'a, R: std::io::Read>( + &'a self, + rdr: R, + ) -> Result, MatchError> { + enforce_anchored_consistency(self.start_kind, Anchored::No)?; + self.aut.try_stream_find_iter(rdr).map(StreamFindIter) + } + + /// Search for and replace all matches of this automaton in + /// the given reader, and write the replacements to the given + /// writer. Matches correspond to the same matches as reported by + /// [`AhoCorasick::try_find_iter`]. + /// + /// Replacements are determined by the index of the matching pattern. For + /// example, if the pattern with index `2` is found, then it is replaced by + /// `replace_with[2]`. + /// + /// After all matches are replaced, the writer is _not_ flushed. + /// + /// If there was a problem reading from the given reader or writing to the + /// given writer, then the corresponding `io::Error` is returned and all + /// replacement is stopped. + /// + /// When searching a stream, an internal buffer is used. Therefore, callers + /// should avoiding providing a buffered reader, if possible. However, + /// callers may want to provide a buffered writer. + /// + /// Note that there is currently no infallible version of this routine. + /// + /// # Memory usage + /// + /// In general, searching streams will use a constant amount of memory for + /// its internal buffer. The one requirement is that the internal buffer + /// must be at least the size of the longest possible match. In most use + /// cases, the default buffer size will be much larger than any individual + /// match. + /// + /// # Panics + /// + /// This panics when `replace_with.len()` does not equal + /// [`AhoCorasick::patterns_len`]. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the default `Input` configuration. More specifically, this occurs only + /// when the Aho-Corasick searcher does not support unanchored searches + /// since this stream searching routine always does an unanchored search. + /// + /// This also returns an error if the searcher does not support stream + /// searches. Only searchers built with [`MatchKind::Standard`] semantics + /// support stream searches. + /// + /// # Example: basic usage + /// + /// ``` + /// use aho_corasick::AhoCorasick; + /// + /// let patterns = &["fox", "brown", "quick"]; + /// let haystack = "The quick brown fox."; + /// let replace_with = &["sloth", "grey", "slow"]; + /// + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let mut result = vec![]; + /// ac.try_stream_replace_all( + /// haystack.as_bytes(), + /// &mut result, + /// replace_with, + /// )?; + /// assert_eq!(b"The slow grey sloth.".to_vec(), result); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "std")] + pub fn try_stream_replace_all( + &self, + rdr: R, + wtr: W, + replace_with: &[B], + ) -> Result<(), std::io::Error> + where + R: std::io::Read, + W: std::io::Write, + B: AsRef<[u8]>, + { + enforce_anchored_consistency(self.start_kind, Anchored::No) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + self.aut.try_stream_replace_all(rdr, wtr, replace_with) + } + + /// Search the given reader and replace all matches of this automaton + /// using the given closure. The result is written to the given + /// writer. Matches correspond to the same matches as reported by + /// [`AhoCorasick::try_find_iter`]. + /// + /// The closure accepts three parameters: the match found, the text of + /// the match and the writer with which to write the replaced text (if any). + /// + /// After all matches are replaced, the writer is _not_ flushed. + /// + /// If there was a problem reading from the given reader or writing to the + /// given writer, then the corresponding `io::Error` is returned and all + /// replacement is stopped. + /// + /// When searching a stream, an internal buffer is used. Therefore, callers + /// should avoiding providing a buffered reader, if possible. However, + /// callers may want to provide a buffered writer. + /// + /// Note that there is currently no infallible version of this routine. + /// + /// # Memory usage + /// + /// In general, searching streams will use a constant amount of memory for + /// its internal buffer. The one requirement is that the internal buffer + /// must be at least the size of the longest possible match. In most use + /// cases, the default buffer size will be much larger than any individual + /// match. + /// + /// # Errors + /// + /// This returns an error when this Aho-Corasick searcher does not support + /// the default `Input` configuration. More specifically, this occurs only + /// when the Aho-Corasick searcher does not support unanchored searches + /// since this stream searching routine always does an unanchored search. + /// + /// This also returns an error if the searcher does not support stream + /// searches. Only searchers built with [`MatchKind::Standard`] semantics + /// support stream searches. + /// + /// # Example: basic usage + /// + /// ``` + /// use std::io::Write; + /// use aho_corasick::AhoCorasick; + /// + /// let patterns = &["fox", "brown", "quick"]; + /// let haystack = "The quick brown fox."; + /// + /// let ac = AhoCorasick::new(patterns).unwrap(); + /// let mut result = vec![]; + /// ac.try_stream_replace_all_with( + /// haystack.as_bytes(), + /// &mut result, + /// |mat, _, wtr| { + /// wtr.write_all(mat.pattern().as_usize().to_string().as_bytes()) + /// }, + /// )?; + /// assert_eq!(b"The 2 1 0.".to_vec(), result); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "std")] + pub fn try_stream_replace_all_with( + &self, + rdr: R, + wtr: W, + replace_with: F, + ) -> Result<(), std::io::Error> + where + R: std::io::Read, + W: std::io::Write, + F: FnMut(&Match, &[u8], &mut W) -> Result<(), std::io::Error>, + { + enforce_anchored_consistency(self.start_kind, Anchored::No) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + self.aut.try_stream_replace_all_with(rdr, wtr, replace_with) + } +} + +/// Routines for querying information about the Aho-Corasick automaton. +impl AhoCorasick { + /// Returns the kind of the Aho-Corasick automaton used by this searcher. + /// + /// Knowing the Aho-Corasick kind is principally useful for diagnostic + /// purposes. In particular, if no specific kind was given to + /// [`AhoCorasickBuilder::kind`], then one is automatically chosen and + /// this routine will report which one. + /// + /// Note that the heuristics used for choosing which `AhoCorasickKind` + /// may be changed in a semver compatible release. + /// + /// # Examples + /// + /// ``` + /// use aho_corasick::{AhoCorasick, AhoCorasickKind}; + /// + /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap(); + /// // The specific Aho-Corasick kind chosen is not guaranteed! + /// assert_eq!(AhoCorasickKind::DFA, ac.kind()); + /// ``` + pub fn kind(&self) -> AhoCorasickKind { + self.kind + } + + /// Returns the type of starting search configuration supported by this + /// Aho-Corasick automaton. + /// + /// # Examples + /// + /// ``` + /// use aho_corasick::{AhoCorasick, StartKind}; + /// + /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap(); + /// assert_eq!(StartKind::Unanchored, ac.start_kind()); + /// ``` + pub fn start_kind(&self) -> StartKind { + self.start_kind + } + + /// Returns the match kind used by this automaton. + /// + /// The match kind is important because it determines what kinds of + /// matches are returned. Also, some operations (such as overlapping + /// search and stream searching) are only supported when using the + /// [`MatchKind::Standard`] match kind. + /// + /// # Examples + /// + /// ``` + /// use aho_corasick::{AhoCorasick, MatchKind}; + /// + /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap(); + /// assert_eq!(MatchKind::Standard, ac.match_kind()); + /// ``` + pub fn match_kind(&self) -> MatchKind { + self.aut.match_kind() + } + + /// Returns the length of the shortest pattern matched by this automaton. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use aho_corasick::AhoCorasick; + /// + /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap(); + /// assert_eq!(3, ac.min_pattern_len()); + /// ``` + /// + /// Note that an `AhoCorasick` automaton has a minimum length of `0` if + /// and only if it can match the empty string: + /// + /// ``` + /// use aho_corasick::AhoCorasick; + /// + /// let ac = AhoCorasick::new(&["foo", "", "quux", "baz"]).unwrap(); + /// assert_eq!(0, ac.min_pattern_len()); + /// ``` + pub fn min_pattern_len(&self) -> usize { + self.aut.min_pattern_len() + } + + /// Returns the length of the longest pattern matched by this automaton. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use aho_corasick::AhoCorasick; + /// + /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap(); + /// assert_eq!(4, ac.max_pattern_len()); /// ``` - pub fn supports_overlapping(&self) -> bool { - self.match_kind.supports_overlapping() + pub fn max_pattern_len(&self) -> usize { + self.aut.max_pattern_len() } - /// Returns true if and only if this automaton supports stream searching. - /// - /// If this returns false and stream searching (or replacing) is attempted, - /// then it will result in a panic. + /// Return the total number of patterns matched by this automaton. /// - /// Currently, only - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// supports streaming. This may be expanded in the future. + /// This includes patterns that may never participate in a match. For + /// example, if [`MatchKind::LeftmostFirst`] match semantics are used, and + /// the patterns `Sam` and `Samwise` were used to build the automaton (in + /// that order), then `Samwise` can never participate in a match because + /// `Sam` will always take priority. /// /// # Examples /// /// Basic usage: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) - /// .build(&["foo", "bar", "baz"]); - /// assert!(ac.supports_stream()); + /// use aho_corasick::AhoCorasick; /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(&["foo", "bar", "baz"]); - /// assert!(!ac.supports_stream()); + /// let ac = AhoCorasick::new(&["foo", "bar", "baz"]).unwrap(); + /// assert_eq!(3, ac.patterns_len()); /// ``` - pub fn supports_stream(&self) -> bool { - self.match_kind.supports_stream() + pub fn patterns_len(&self) -> usize { + self.aut.patterns_len() } /// Returns the approximate total amount of heap used by this automaton, in @@ -989,212 +1974,84 @@ impl AhoCorasick { /// This example shows the difference in heap usage between a few /// configurations: /// - /// ```ignore - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } + /// use aho_corasick::{AhoCorasick, AhoCorasickKind, MatchKind}; + /// + /// let ac = AhoCorasick::builder() + /// .kind(None) // default + /// .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"]) + /// .unwrap(); + /// assert_eq!(5_632, ac.memory_usage()); /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(false) // default - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(10_336, ac.heap_bytes()); + /// let ac = AhoCorasick::builder() + /// .kind(None) // default + /// .ascii_case_insensitive(true) + /// .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"]) + /// .unwrap(); + /// assert_eq!(11_136, ac.memory_usage()); /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(false) // default + /// let ac = AhoCorasick::builder() + /// .kind(Some(AhoCorasickKind::NoncontiguousNFA)) /// .ascii_case_insensitive(true) - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(10_384, ac.heap_bytes()); + /// .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"]) + /// .unwrap(); + /// assert_eq!(10_879, ac.memory_usage()); /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(true) + /// let ac = AhoCorasick::builder() + /// .kind(Some(AhoCorasickKind::ContiguousNFA)) /// .ascii_case_insensitive(true) - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(1_248, ac.heap_bytes()); - /// ``` - pub fn heap_bytes(&self) -> usize { - match self.imp { - Imp::NFA(ref nfa) => nfa.heap_bytes(), - Imp::DFA(ref dfa) => dfa.heap_bytes(), - } + /// .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"]) + /// .unwrap(); + /// assert_eq!(2_584, ac.memory_usage()); + /// + /// let ac = AhoCorasick::builder() + /// .kind(Some(AhoCorasickKind::DFA)) + /// .ascii_case_insensitive(true) + /// .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"]) + /// .unwrap(); + /// // While this shows the DFA being the biggest here by a small margin, + /// // don't let the difference fool you. With such a small number of + /// // patterns, the difference is small, but a bigger number of patterns + /// // will reveal that the rate of growth of the DFA is far bigger than + /// // the NFAs above. For a large number of patterns, it is easy for the + /// // DFA to take an order of magnitude more heap space (or more!). + /// assert_eq!(11_136, ac.memory_usage()); + /// ``` + pub fn memory_usage(&self) -> usize { + self.aut.memory_usage() } } -/// The internal implementation of Aho-Corasick, which is either an NFA or -/// a DFA. The NFA is slower but uses less memory. The DFA is faster but uses -/// more memory. -#[derive(Clone, Debug)] -enum Imp { - NFA(NFA), - DFA(DFA), -} - -impl Imp { - /// Returns the type of match semantics implemented by this automaton. - fn match_kind(&self) -> &MatchKind { - match *self { - Imp::NFA(ref nfa) => nfa.match_kind(), - Imp::DFA(ref dfa) => dfa.match_kind(), - } - } - - /// Returns the identifier of the start state. - fn start_state(&self) -> S { - match *self { - Imp::NFA(ref nfa) => nfa.start_state(), - Imp::DFA(ref dfa) => dfa.start_state(), - } - } - - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for maintaining correct buffer sizes when - /// searching on streams. - fn max_pattern_len(&self) -> usize { - match *self { - Imp::NFA(ref nfa) => nfa.max_pattern_len(), - Imp::DFA(ref dfa) => dfa.max_pattern_len(), - } - } - - /// The total number of patterns added to this automaton. This includes - /// patterns that may never match. The maximum matching pattern that can be - /// reported is exactly one less than this number. - fn pattern_count(&self) -> usize { - match *self { - Imp::NFA(ref nfa) => nfa.pattern_count(), - Imp::DFA(ref dfa) => dfa.pattern_count(), - } - } - - /// Returns the prefilter object, if one exists, for the underlying - /// automaton. - fn prefilter(&self) -> Option<&dyn Prefilter> { - match *self { - Imp::NFA(ref nfa) => nfa.prefilter(), - Imp::DFA(ref dfa) => dfa.prefilter(), - } - } - - /// Returns true if and only if we should attempt to use a prefilter. - fn use_prefilter(&self) -> bool { - let p = match self.prefilter() { - None => return false, - Some(p) => p, - }; - !p.looks_for_non_start_of_match() - } - - #[inline(always)] - fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - match_index: &mut usize, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => nfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - Imp::DFA(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - } - } - - #[inline(always)] - fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => { - nfa.earliest_find_at(prestate, haystack, at, state_id) - } - Imp::DFA(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - } - } - - #[inline(always)] - fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => nfa.find_at_no_state(prestate, haystack, at), - Imp::DFA(ref dfa) => dfa.find_at_no_state(prestate, haystack, at), - } +// We provide a manual debug impl so that we don't include the 'start_kind', +// principally because it's kind of weird to do so and because it screws with +// the carefully curated debug output for the underlying automaton. +impl core::fmt::Debug for AhoCorasick { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("AhoCorasick").field(&self.aut).finish() } } /// An iterator of non-overlapping matches in a particular haystack. /// -/// This iterator yields matches according to the -/// [`MatchKind`](enum.MatchKind.html) -/// used by this automaton. +/// This iterator yields matches according to the [`MatchKind`] used by this +/// automaton. /// -/// This iterator is constructed via the -/// [`AhoCorasick::find_iter`](struct.AhoCorasick.html#method.find_iter) -/// method. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) +/// This iterator is constructed via the [`AhoCorasick::find_iter`] and +/// [`AhoCorasick::try_find_iter`] methods. /// /// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. /// -/// The lifetime `'b` refers to the lifetime of the haystack being searched. +/// The lifetime `'h` refers to the lifetime of the haystack being searched. #[derive(Debug)] -pub struct FindIter<'a, 'b, S: StateID> { - fsm: &'a Imp, - prestate: PrefilterState, - haystack: &'b [u8], - pos: usize, -} - -impl<'a, 'b, S: StateID> FindIter<'a, 'b, S> { - fn new(ac: &'a AhoCorasick, haystack: &'b [u8]) -> FindIter<'a, 'b, S> { - let prestate = PrefilterState::new(ac.max_pattern_len()); - FindIter { fsm: &ac.imp, prestate, haystack, pos: 0 } - } -} +pub struct FindIter<'a, 'h>(automaton::FindIter<'a, 'h, Arc>); -impl<'a, 'b, S: StateID> Iterator for FindIter<'a, 'b, S> { +impl<'a, 'h> Iterator for FindIter<'a, 'h> { type Item = Match; + #[inline] fn next(&mut self) -> Option { - if self.pos > self.haystack.len() { - return None; - } - let result = self.fsm.find_at_no_state( - &mut self.prestate, - self.haystack, - self.pos, - ); - let mat = match result { - None => return None, - Some(mat) => mat, - }; - if mat.end() == self.pos { - // If the automaton can match the empty string and if we found an - // empty match, then we need to forcefully move the position. - self.pos += 1; - } else { - self.pos = mat.end(); - } - Some(mat) + self.0.next() } } @@ -1203,315 +2060,93 @@ impl<'a, 'b, S: StateID> Iterator for FindIter<'a, 'b, S> { /// This iterator will report all possible matches in a particular haystack, /// even when the matches overlap. /// -/// This iterator is constructed via the -/// [`AhoCorasick::find_overlapping_iter`](struct.AhoCorasick.html#method.find_overlapping_iter) -/// method. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) +/// This iterator is constructed via the [`AhoCorasick::find_overlapping_iter`] +/// and [`AhoCorasick::try_find_overlapping_iter`] methods. /// /// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. /// -/// The lifetime `'b` refers to the lifetime of the haystack being searched. +/// The lifetime `'h` refers to the lifetime of the haystack being searched. #[derive(Debug)] -pub struct FindOverlappingIter<'a, 'b, S: StateID> { - fsm: &'a Imp, - prestate: PrefilterState, - haystack: &'b [u8], - pos: usize, - state_id: S, - match_index: usize, -} - -impl<'a, 'b, S: StateID> FindOverlappingIter<'a, 'b, S> { - fn new( - ac: &'a AhoCorasick, - haystack: &'b [u8], - ) -> FindOverlappingIter<'a, 'b, S> { - assert!( - ac.supports_overlapping(), - "automaton does not support overlapping searches" - ); - let prestate = PrefilterState::new(ac.max_pattern_len()); - FindOverlappingIter { - fsm: &ac.imp, - prestate, - haystack, - pos: 0, - state_id: ac.imp.start_state(), - match_index: 0, - } - } -} +pub struct FindOverlappingIter<'a, 'h>( + automaton::FindOverlappingIter<'a, 'h, Arc>, +); -impl<'a, 'b, S: StateID> Iterator for FindOverlappingIter<'a, 'b, S> { +impl<'a, 'h> Iterator for FindOverlappingIter<'a, 'h> { type Item = Match; + #[inline] fn next(&mut self) -> Option { - let result = self.fsm.overlapping_find_at( - &mut self.prestate, - self.haystack, - self.pos, - &mut self.state_id, - &mut self.match_index, - ); - match result { - None => return None, - Some(m) => { - self.pos = m.end(); - Some(m) - } - } + self.0.next() } } /// An iterator that reports Aho-Corasick matches in a stream. /// -/// This iterator yields elements of type `io::Result`, where an error -/// is reported if there was a problem reading from the underlying stream. -/// The iterator terminates only when the underlying stream reaches `EOF`. +/// This iterator yields elements of type `Result`, +/// where an error is reported if there was a problem reading from the +/// underlying stream. The iterator terminates only when the underlying stream +/// reaches `EOF`. /// -/// This iterator is constructed via the -/// [`AhoCorasick::stream_find_iter`](struct.AhoCorasick.html#method.stream_find_iter) -/// method. +/// This iterator is constructed via the [`AhoCorasick::stream_find_iter`] and +/// [`AhoCorasick::try_stream_find_iter`] methods. /// /// The type variable `R` refers to the `io::Read` stream that is being read /// from. /// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) -/// -/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. +/// The lifetime `'a` refers to the lifetime of the corresponding +/// [`AhoCorasick`] searcher. +#[cfg(feature = "std")] #[derive(Debug)] -pub struct StreamFindIter<'a, R, S: StateID> { - it: StreamChunkIter<'a, R, S>, -} +pub struct StreamFindIter<'a, R>( + automaton::StreamFindIter<'a, Arc, R>, +); -impl<'a, R: io::Read, S: StateID> StreamFindIter<'a, R, S> { - fn new(ac: &'a AhoCorasick, rdr: R) -> StreamFindIter<'a, R, S> { - StreamFindIter { it: StreamChunkIter::new(ac, rdr) } - } -} +#[cfg(feature = "std")] +impl<'a, R: std::io::Read> Iterator for StreamFindIter<'a, R> { + type Item = Result; -impl<'a, R: io::Read, S: StateID> Iterator for StreamFindIter<'a, R, S> { - type Item = io::Result; - - fn next(&mut self) -> Option> { - loop { - match self.it.next() { - None => return None, - Some(Err(err)) => return Some(Err(err)), - Some(Ok(StreamChunk::NonMatch { .. })) => {} - Some(Ok(StreamChunk::Match { mat, .. })) => { - return Some(Ok(mat)); - } - } - } + fn next(&mut self) -> Option> { + self.0.next() } } -/// An iterator over chunks in an underlying reader. Each chunk either -/// corresponds to non-matching bytes or matching bytes, but all bytes from -/// the underlying reader are reported in sequence. There may be an arbitrary -/// number of non-matching chunks before seeing a matching chunk. +/// A builder for configuring an Aho-Corasick automaton. /// -/// N.B. This does not actually implement Iterator because we need to borrow -/// from the underlying reader. But conceptually, it's still an iterator. -#[derive(Debug)] -struct StreamChunkIter<'a, R, S: StateID> { - /// The AC automaton. - fsm: &'a Imp, - /// State associated with this automaton's prefilter. It is a heuristic - /// for stopping the prefilter if it's deemed ineffective. - prestate: PrefilterState, - /// The source of bytes we read from. - rdr: R, - /// A fixed size buffer. This is what we actually search. There are some - /// invariants around the buffer's size, namely, it must be big enough to - /// contain the longest possible match. - buf: Buffer, - /// The ID of the FSM state we're currently in. - state_id: S, - /// The current position at which to start the next search in `buf`. - search_pos: usize, - /// The absolute position of `search_pos`, where `0` corresponds to the - /// position of the first byte read from `rdr`. - absolute_pos: usize, - /// The ending position of the last StreamChunk that was returned to the - /// caller. This position is used to determine whether we need to emit - /// non-matching bytes before emitting a match. - report_pos: usize, - /// A match that should be reported on the next call. - pending_match: Option, - /// Enabled only when the automaton can match the empty string. When - /// enabled, we need to execute one final search after consuming the - /// reader to find the trailing empty match. - has_empty_match_at_end: bool, -} - -/// A single chunk yielded by the stream chunk iterator. +/// # Quick advice /// -/// The `'r` lifetime refers to the lifetime of the stream chunk iterator. -#[derive(Debug)] -enum StreamChunk<'r> { - /// A chunk that does not contain any matches. - NonMatch { bytes: &'r [u8] }, - /// A chunk that precisely contains a match. - Match { bytes: &'r [u8], mat: Match }, -} - -impl<'a, R: io::Read, S: StateID> StreamChunkIter<'a, R, S> { - fn new(ac: &'a AhoCorasick, rdr: R) -> StreamChunkIter<'a, R, S> { - assert!( - ac.supports_stream(), - "stream searching is only supported for Standard match semantics" - ); - - let prestate = if ac.imp.use_prefilter() { - PrefilterState::new(ac.max_pattern_len()) - } else { - PrefilterState::disabled() - }; - let buf = Buffer::new(ac.imp.max_pattern_len()); - let state_id = ac.imp.start_state(); - StreamChunkIter { - fsm: &ac.imp, - prestate, - rdr, - buf, - state_id, - absolute_pos: 0, - report_pos: 0, - search_pos: 0, - pending_match: None, - has_empty_match_at_end: ac.is_match(""), - } - } - - fn next(&mut self) -> Option> { - loop { - if let Some(mut mat) = self.pending_match.take() { - let bytes = &self.buf.buffer()[mat.start()..mat.end()]; - self.report_pos = mat.end(); - mat = mat.increment(self.absolute_pos); - return Some(Ok(StreamChunk::Match { bytes, mat })); - } - if self.search_pos >= self.buf.len() { - if let Some(end) = self.unreported() { - let bytes = &self.buf.buffer()[self.report_pos..end]; - self.report_pos = end; - return Some(Ok(StreamChunk::NonMatch { bytes })); - } - if self.buf.len() >= self.buf.min_buffer_len() { - // This is the point at which we roll our buffer, which we - // only do if our buffer has at least the minimum amount of - // bytes in it. Before rolling, we update our various - // positions to be consistent with the buffer after it has - // been rolled. - - self.report_pos -= - self.buf.len() - self.buf.min_buffer_len(); - self.absolute_pos += - self.search_pos - self.buf.min_buffer_len(); - self.search_pos = self.buf.min_buffer_len(); - self.buf.roll(); - } - match self.buf.fill(&mut self.rdr) { - Err(err) => return Some(Err(err)), - Ok(false) => { - // We've hit EOF, but if there are still some - // unreported bytes remaining, return them now. - if self.report_pos < self.buf.len() { - let bytes = &self.buf.buffer()[self.report_pos..]; - self.report_pos = self.buf.len(); - - let chunk = StreamChunk::NonMatch { bytes }; - return Some(Ok(chunk)); - } else { - // We've reported everything, but there might still - // be a match at the very last position. - if !self.has_empty_match_at_end { - return None; - } - // fallthrough for another search to get trailing - // empty matches - self.has_empty_match_at_end = false; - } - } - Ok(true) => {} - } - } - let result = self.fsm.earliest_find_at( - &mut self.prestate, - self.buf.buffer(), - self.search_pos, - &mut self.state_id, - ); - match result { - None => { - self.search_pos = self.buf.len(); - } - Some(mat) => { - self.state_id = self.fsm.start_state(); - if mat.end() == self.search_pos { - // If the automaton can match the empty string and if - // we found an empty match, then we need to forcefully - // move the position. - self.search_pos += 1; - } else { - self.search_pos = mat.end(); - } - self.pending_match = Some(mat.clone()); - if self.report_pos < mat.start() { - let bytes = - &self.buf.buffer()[self.report_pos..mat.start()]; - self.report_pos = mat.start(); - - let chunk = StreamChunk::NonMatch { bytes }; - return Some(Ok(chunk)); - } - } - } - } - } - - fn unreported(&self) -> Option { - let end = self.search_pos.saturating_sub(self.buf.min_buffer_len()); - if self.report_pos < end { - Some(end) - } else { - None - } - } -} - -/// A builder for configuring an Aho-Corasick automaton. -#[derive(Clone, Debug)] +/// * Use [`AhoCorasickBuilder::match_kind`] to configure your searcher +/// with [`MatchKind::LeftmostFirst`] if you want to match how backtracking +/// regex engines execute searches for `pat1|pat2|..|patN`. Use +/// [`MatchKind::LeftmostLongest`] if you want to match how POSIX regex engines +/// do it. +/// * If you need an anchored search, use [`AhoCorasickBuilder::start_kind`] to +/// set the [`StartKind::Anchored`] mode since [`StartKind::Unanchored`] is the +/// default. Or just use [`StartKind::Both`] to support both types of searches. +/// * You might want to use [`AhoCorasickBuilder::kind`] to set your searcher +/// to always use a [`AhoCorasickKind::DFA`] if search speed is critical and +/// memory usage isn't a concern. Otherwise, not setting a kind will probably +/// make the right choice for you. Beware that if you use [`StartKind::Both`] +/// to build a searcher that supports both unanchored and anchored searches +/// _and_ you set [`AhoCorasickKind::DFA`], then the DFA will essentially be +/// duplicated to support both simultaneously. This results in very high memory +/// usage. +/// * For all other options, their defaults are almost certainly what you want. +#[derive(Clone, Debug, Default)] pub struct AhoCorasickBuilder { - nfa_builder: nfa::Builder, - dfa_builder: dfa::Builder, - dfa: bool, -} - -impl Default for AhoCorasickBuilder { - fn default() -> AhoCorasickBuilder { - AhoCorasickBuilder::new() - } + nfa_noncontiguous: noncontiguous::Builder, + nfa_contiguous: contiguous::Builder, + dfa: dfa::Builder, + kind: Option, + start_kind: StartKind, } impl AhoCorasickBuilder { /// Create a new builder for configuring an Aho-Corasick automaton. /// - /// If you don't need fine grained configuration or aren't sure which knobs - /// to set, try using - /// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured) - /// instead. + /// The builder provides a way to configure a number of things, including + /// ASCII case insensitivity and what kind of match semantics are used. pub fn new() -> AhoCorasickBuilder { - AhoCorasickBuilder { - nfa_builder: nfa::Builder::new(), - dfa_builder: dfa::Builder::new(), - dfa: false, - } + AhoCorasickBuilder::default() } /// Build an Aho-Corasick automaton using the configuration set on this @@ -1519,177 +2154,137 @@ impl AhoCorasickBuilder { /// /// A builder may be reused to create more automatons. /// - /// This method will use the default for representing internal state - /// identifiers, which is `usize`. This guarantees that building the - /// automaton will succeed and is generally a good default, but can make - /// the size of the automaton 2-8 times bigger than it needs to be, - /// depending on your target platform. - /// /// # Examples /// /// Basic usage: /// /// ``` - /// use aho_corasick::AhoCorasickBuilder; + /// use aho_corasick::{AhoCorasickBuilder, PatternID}; /// /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .build(patterns); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); + /// let ac = AhoCorasickBuilder::new().build(patterns).unwrap(); + /// assert_eq!( + /// Some(PatternID::must(1)), + /// ac.find("xxx bar xxx").map(|m| m.pattern()), + /// ); /// ``` - pub fn build(&self, patterns: I) -> AhoCorasick + pub fn build(&self, patterns: I) -> Result where I: IntoIterator, P: AsRef<[u8]>, { - // The builder only returns an error if the chosen state ID - // representation is too small to fit all of the given patterns. In - // this case, since we fix the representation to usize, it will always - // work because it's impossible to overflow usize since the underlying - // storage would OOM long before that happens. - self.build_with_size::(patterns) - .expect("usize state ID type should always work") + let nfa = self.nfa_noncontiguous.build(patterns)?; + let (aut, kind): (Arc, AhoCorasickKind) = + match self.kind { + None => { + debug!( + "asked for automatic Aho-Corasick implementation, \ + criteria: ", + nfa.patterns_len(), + nfa.max_pattern_len(), + self.start_kind, + ); + self.build_auto(nfa) + } + Some(AhoCorasickKind::NoncontiguousNFA) => { + debug!("forcefully chose noncontiguous NFA"); + (Arc::new(nfa), AhoCorasickKind::NoncontiguousNFA) + } + Some(AhoCorasickKind::ContiguousNFA) => { + debug!("forcefully chose contiguous NFA"); + let cnfa = + self.nfa_contiguous.build_from_noncontiguous(&nfa)?; + (Arc::new(cnfa), AhoCorasickKind::ContiguousNFA) + } + Some(AhoCorasickKind::DFA) => { + debug!("forcefully chose DFA"); + let dfa = self.dfa.build_from_noncontiguous(&nfa)?; + (Arc::new(dfa), AhoCorasickKind::DFA) + } + }; + Ok(AhoCorasick { aut, kind, start_kind: self.start_kind }) } - /// Build an Aho-Corasick automaton using the configuration set on this - /// builder with a specific state identifier representation. This only has - /// an effect when the `dfa` option is enabled. - /// - /// Generally, the choices for a state identifier representation are - /// `u8`, `u16`, `u32`, `u64` or `usize`, with `usize` being the default. - /// The advantage of choosing a smaller state identifier representation - /// is that the automaton produced will be smaller. This might be - /// beneficial for just generally using less space, or might even allow it - /// to fit more of the automaton in your CPU's cache, leading to overall - /// better search performance. - /// - /// Unlike the standard `build` method, this can report an error if the - /// state identifier representation cannot support the size of the - /// automaton. - /// - /// Note that the state identifier representation is determined by the - /// `S` type variable. This requires a type hint of some sort, either - /// by specifying the return type or using the turbofish, e.g., - /// `build_with_size::(...)`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; - /// - /// # fn example() -> Result<(), ::aho_corasick::Error> { - /// let patterns = &["foo", "bar", "baz"]; - /// let ac: AhoCorasick = AhoCorasickBuilder::new() - /// .build_with_size(patterns)?; - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// # Ok(()) }; example().unwrap() - /// ``` - /// - /// Or alternatively, with turbofish: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// # fn example() -> Result<(), ::aho_corasick::Error> { - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .build_with_size::(patterns)?; - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn build_with_size( + /// Implements the automatic selection logic for the Aho-Corasick + /// implementation to use. Since all Aho-Corasick automatons are built + /// from a non-contiguous NFA, the caller is responsible for building + /// that first. + fn build_auto( &self, - patterns: I, - ) -> Result> - where - S: StateID, - I: IntoIterator, - P: AsRef<[u8]>, - { - let nfa = self.nfa_builder.build(patterns)?; - let match_kind = nfa.match_kind().clone(); - let imp = if self.dfa { - let dfa = self.dfa_builder.build(&nfa)?; - Imp::DFA(dfa) - } else { - Imp::NFA(nfa) - }; - Ok(AhoCorasick { imp, match_kind }) - } - - /// Automatically configure the settings on this builder according to the - /// patterns that will be used to construct the automaton. - /// - /// The idea here is to balance space and time automatically. That is, when - /// searching a small number of patterns, this will attempt to use the - /// fastest possible configuration since the total space required will be - /// small anyway. As the number of patterns grows, this will fall back to - /// slower configurations that use less space. - /// - /// This is guaranteed to never set `match_kind`, but any other option may - /// be overridden. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .auto_configure(patterns) - /// .build(patterns); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn auto_configure>( - &mut self, - patterns: &[B], - ) -> &mut AhoCorasickBuilder { - // N.B. Currently we only use the length of `patterns` to make a - // decision here, and could therefore ask for an `ExactSizeIterator` - // instead. But it's conceivable that we might adapt this to look at - // the total number of bytes, which would requires a second pass. - // - // The logic here is fairly rudimentary at the moment, but probably - // OK. The idea here is to use the fastest thing possible for a small - // number of patterns. That is, a DFA with no byte classes, since byte - // classes require an extra indirection for every byte searched. With a - // moderate number of patterns, we still want a DFA, but save on both - // space and compilation time by enabling byte classes. Finally, fall - // back to the slower but smaller NFA. - if patterns.len() <= 100 { - // N.B. Using byte classes can actually be faster by improving - // locality, but this only really applies for multi-megabyte - // automata (i.e., automata that don't fit in your CPU's cache). - self.dfa(true); - } else if patterns.len() <= 5000 { - self.dfa(true); + nfa: noncontiguous::NFA, + ) -> (Arc, AhoCorasickKind) { + // We try to build a DFA if we have a very small number of patterns, + // otherwise the memory usage just gets too crazy. We also only do it + // when the start kind is unanchored or anchored, but not both, because + // both implies two full copies of the transition table. + let try_dfa = !matches!(self.start_kind, StartKind::Both) + && nfa.patterns_len() <= 100; + if try_dfa { + match self.dfa.build_from_noncontiguous(&nfa) { + Ok(dfa) => { + debug!("chose a DFA"); + return (Arc::new(dfa), AhoCorasickKind::DFA); + } + Err(_err) => { + debug!( + "failed to build DFA, trying something else: {}", + _err + ); + } + } } - self + // We basically always want a contiguous NFA if the limited + // circumstances in which we use a DFA are not true. It is quite fast + // and has excellent memory usage. The only way we don't use it is if + // there are so many states that it can't fit in a contiguous NFA. + // And the only way to know that is to try to build it. Building a + // contiguous NFA is mostly just reshuffling data from a noncontiguous + // NFA, so it isn't too expensive, especially relative to building a + // noncontiguous NFA in the first place. + match self.nfa_contiguous.build_from_noncontiguous(&nfa) { + Ok(nfa) => { + debug!("chose contiguous NFA"); + return (Arc::new(nfa), AhoCorasickKind::ContiguousNFA); + } + #[allow(unused_variables)] // unused when 'logging' is disabled + Err(_err) => { + debug!( + "failed to build contiguous NFA, \ + trying something else: {}", + _err + ); + } + } + debug!("chose non-contiguous NFA"); + (Arc::new(nfa), AhoCorasickKind::NoncontiguousNFA) } /// Set the desired match semantics. /// - /// The default is `MatchKind::Standard`, which corresponds to the match + /// The default is [`MatchKind::Standard`], which corresponds to the match /// semantics supported by the standard textbook description of the /// Aho-Corasick algorithm. Namely, matches are reported as soon as they /// are found. Moreover, this is the only way to get overlapping matches /// or do stream searching. /// /// The other kinds of match semantics that are supported are - /// `MatchKind::LeftmostFirst` and `MatchKind::LeftmostLongest`. The former - /// corresponds to the match you would get if you were to try to match - /// each pattern at each position in the haystack in the same order that - /// you give to the automaton. That is, it returns the leftmost match - /// corresponding the earliest pattern given to the automaton. The latter - /// corresponds to finding the longest possible match among all leftmost - /// matches. - /// - /// For more details on match semantics, see the - /// [documentation for `MatchKind`](enum.MatchKind.html). + /// [`MatchKind::LeftmostFirst`] and [`MatchKind::LeftmostLongest`]. The + /// former corresponds to the match you would get if you were to try to + /// match each pattern at each position in the haystack in the same order + /// that you give to the automaton. That is, it returns the leftmost match + /// corresponding to the earliest pattern given to the automaton. The + /// latter corresponds to finding the longest possible match among all + /// leftmost matches. + /// + /// For more details on match semantics, see the [documentation for + /// `MatchKind`](MatchKind). + /// + /// Note that setting this to [`MatchKind::LeftmostFirst`] or + /// [`MatchKind::LeftmostLongest`] will cause some search routines on + /// [`AhoCorasick`] to return an error (or panic if you're using the + /// infallible API). Notably, this includes stream and overlapping + /// searches. /// /// # Examples /// @@ -1700,14 +2295,15 @@ impl AhoCorasickBuilder { /// Standard semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["b", "abc", "abcd"]; /// let haystack = "abcd"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mat = ac.find(haystack).expect("should have a match"); /// assert_eq!("b", &haystack[mat.start()..mat.end()]); /// ``` @@ -1715,14 +2311,15 @@ impl AhoCorasickBuilder { /// Leftmost-first semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["b", "abc", "abcd"]; /// let haystack = "abcd"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mat = ac.find(haystack).expect("should have a match"); /// assert_eq!("abc", &haystack[mat.start()..mat.end()]); /// ``` @@ -1730,60 +2327,116 @@ impl AhoCorasickBuilder { /// Leftmost-longest semantics: /// /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// use aho_corasick::{AhoCorasick, MatchKind}; /// /// let patterns = &["b", "abc", "abcd"]; /// let haystack = "abcd"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// let mat = ac.find(haystack).expect("should have a match"); /// assert_eq!("abcd", &haystack[mat.start()..mat.end()]); /// ``` pub fn match_kind(&mut self, kind: MatchKind) -> &mut AhoCorasickBuilder { - self.nfa_builder.match_kind(kind); + self.nfa_noncontiguous.match_kind(kind); + self.nfa_contiguous.match_kind(kind); + self.dfa.match_kind(kind); self } - /// Enable anchored mode, which requires all matches to start at the - /// first position in a haystack. + /// Sets the starting state configuration for the automaton. /// - /// This option is disabled by default. + /// Every Aho-Corasick automaton is capable of having two start states: one + /// that is used for unanchored searches and one that is used for anchored + /// searches. Some automatons, like the NFAs, support this with almost zero + /// additional cost. Other automatons, like the DFA, require two copies of + /// the underlying transition table to support both simultaneously. /// - /// # Examples + /// Because there may be an added non-trivial cost to supporting both, it + /// is possible to configure which starting state configuration is needed. /// - /// Basic usage: + /// Indeed, since anchored searches tend to be somewhat more rare, + /// _only_ unanchored searches are supported by default. Thus, + /// [`StartKind::Unanchored`] is the default. + /// + /// Note that when this is set to [`StartKind::Unanchored`], then + /// running an anchored search will result in an error (or a panic + /// if using the infallible APIs). Similarly, when this is set to + /// [`StartKind::Anchored`], then running an unanchored search will + /// result in an error (or a panic if using the infallible APIs). When + /// [`StartKind::Both`] is used, then both unanchored and anchored searches + /// are always supported. + /// + /// Also note that even if an `AhoCorasick` searcher is using an NFA + /// internally (which always supports both unanchored and anchored + /// searches), an error will still be reported for a search that isn't + /// supported by the configuration set via this method. This means, + /// for example, that an error is never dependent on which internal + /// implementation of Aho-Corasick is used. + /// + /// # Example: anchored search + /// + /// This shows how to build a searcher that only supports anchored + /// searches: /// /// ``` - /// use aho_corasick::AhoCorasickBuilder; + /// use aho_corasick::{ + /// AhoCorasick, Anchored, Input, Match, MatchKind, StartKind, + /// }; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .start_kind(StartKind::Anchored) + /// .build(&["b", "abc", "abcd"]) + /// .unwrap(); /// - /// let patterns = &["foo", "bar"]; - /// let haystack = "foobar"; + /// // An unanchored search is not supported! An error here is guaranteed + /// // given the configuration above regardless of which kind of + /// // Aho-Corasick implementation ends up being used internally. + /// let input = Input::new("foo abcd").anchored(Anchored::No); + /// assert!(ac.try_find(input).is_err()); /// - /// let ac = AhoCorasickBuilder::new() - /// .anchored(true) - /// .build(patterns); - /// assert_eq!(1, ac.find_iter(haystack).count()); + /// let input = Input::new("foo abcd").anchored(Anchored::Yes); + /// assert_eq!(None, ac.try_find(input)?); + /// + /// let input = Input::new("abcd").anchored(Anchored::Yes); + /// assert_eq!(Some(Match::must(1, 0..3)), ac.try_find(input)?); + /// + /// # Ok::<(), Box>(()) /// ``` /// - /// When searching for overlapping matches, all matches that start at - /// the beginning of a haystack will be reported: + /// # Example: unanchored and anchored searches + /// + /// This shows how to build a searcher that supports both unanchored and + /// anchored searches: /// /// ``` - /// use aho_corasick::AhoCorasickBuilder; + /// use aho_corasick::{ + /// AhoCorasick, Anchored, Input, Match, MatchKind, StartKind, + /// }; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .start_kind(StartKind::Both) + /// .build(&["b", "abc", "abcd"]) + /// .unwrap(); + /// + /// let input = Input::new("foo abcd").anchored(Anchored::No); + /// assert_eq!(Some(Match::must(1, 4..7)), ac.try_find(input)?); /// - /// let patterns = &["foo", "foofoo"]; - /// let haystack = "foofoo"; + /// let input = Input::new("foo abcd").anchored(Anchored::Yes); + /// assert_eq!(None, ac.try_find(input)?); /// - /// let ac = AhoCorasickBuilder::new() - /// .anchored(true) - /// .build(patterns); - /// assert_eq!(2, ac.find_overlapping_iter(haystack).count()); - /// // A non-anchored search would return 3 matches. + /// let input = Input::new("abcd").anchored(Anchored::Yes); + /// assert_eq!(Some(Match::must(1, 0..3)), ac.try_find(input)?); + /// + /// # Ok::<(), Box>(()) /// ``` - pub fn anchored(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.nfa_builder.anchored(yes); + pub fn start_kind(&mut self, kind: StartKind) -> &mut AhoCorasickBuilder { + self.dfa.start_kind(kind); + self.start_kind = kind; self } @@ -1807,70 +2460,75 @@ impl AhoCorasickBuilder { /// Basic usage: /// /// ``` - /// use aho_corasick::AhoCorasickBuilder; + /// use aho_corasick::AhoCorasick; /// /// let patterns = &["FOO", "bAr", "BaZ"]; /// let haystack = "foo bar baz"; /// - /// let ac = AhoCorasickBuilder::new() + /// let ac = AhoCorasick::builder() /// .ascii_case_insensitive(true) - /// .build(patterns); + /// .build(patterns) + /// .unwrap(); /// assert_eq!(3, ac.find_iter(haystack).count()); /// ``` pub fn ascii_case_insensitive( &mut self, yes: bool, ) -> &mut AhoCorasickBuilder { - self.nfa_builder.ascii_case_insensitive(yes); + self.nfa_noncontiguous.ascii_case_insensitive(yes); + self.nfa_contiguous.ascii_case_insensitive(yes); + self.dfa.ascii_case_insensitive(yes); self } - /// Set the limit on how many NFA states use a dense representation for - /// their transitions. - /// - /// A dense representation uses more space, but supports faster access to - /// transitions at search time. Thus, this setting permits the control of a - /// space vs time trade off when using the NFA variant of Aho-Corasick. - /// - /// This limit is expressed in terms of the depth of a state, i.e., the - /// number of transitions from the starting state of the NFA. The idea is - /// that most of the time searching will be spent near the starting state - /// of the automaton, so states near the start state should use a dense - /// representation. States further away from the start state would then use - /// a sparse representation, which uses less space but is slower to access - /// transitions at search time. - /// - /// By default, this is set to a low but non-zero number. - /// - /// This setting has no effect if the `dfa` option is enabled. - pub fn dense_depth(&mut self, depth: usize) -> &mut AhoCorasickBuilder { - self.nfa_builder.dense_depth(depth); - self - } - - /// Compile the standard Aho-Corasick automaton into a deterministic finite - /// automaton (DFA). - /// - /// When this is disabled (which is the default), then a non-deterministic - /// finite automaton (NFA) is used instead. - /// - /// The main benefit to a DFA is that it can execute searches more quickly - /// than a NFA (perhaps 2-4 times as fast). The main drawback is that the - /// DFA uses more space and can take much longer to build. - /// - /// Enabling this option does not change the time complexity for - /// constructing the Aho-Corasick automaton (which is `O(p)` where - /// `p` is the total number of patterns being compiled). Enabling this - /// option does however reduce the time complexity of non-overlapping - /// searches from `O(n + p)` to `O(n)`, where `n` is the length of the - /// haystack. - /// - /// In general, it's a good idea to enable this if you're searching a - /// small number of fairly short patterns (~1000), or if you want the - /// fastest possible search without regard to compilation time or space - /// usage. - pub fn dfa(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa = yes; + /// Choose the type of underlying automaton to use. + /// + /// Currently, there are four choices: + /// + /// * [`AhoCorasickKind::NoncontiguousNFA`] instructs the searcher to + /// use a [`noncontiguous::NFA`]. A noncontiguous NFA is the fastest to + /// be built, has moderate memory usage and is typically the slowest to + /// execute a search. + /// * [`AhoCorasickKind::ContiguousNFA`] instructs the searcher to use a + /// [`contiguous::NFA`]. A contiguous NFA is a little slower to build than + /// a noncontiguous NFA, has excellent memory usage and is typically a + /// little slower than a DFA for a search. + /// * [`AhoCorasickKind::DFA`] instructs the searcher to use a + /// [`dfa::DFA`]. A DFA is very slow to build, uses exorbitant amounts of + /// memory, but will typically execute searches the fastest. + /// * `None` (the default) instructs the searcher to choose the "best" + /// Aho-Corasick implementation. This choice is typically based primarily + /// on the number of patterns. + /// + /// Setting this configuration does not change the time complexity for + /// constructing the Aho-Corasick automaton (which is `O(p)` where `p` + /// is the total number of patterns being compiled). Setting this to + /// [`AhoCorasickKind::DFA`] does however reduce the time complexity of + /// non-overlapping searches from `O(n + p)` to `O(n)`, where `n` is the + /// length of the haystack. + /// + /// In general, you should probably stick to the default unless you have + /// some kind of reason to use a specific Aho-Corasick implementation. For + /// example, you might choose `AhoCorasickKind::DFA` if you don't care + /// about memory usage and want the fastest possible search times. + /// + /// Setting this guarantees that the searcher returned uses the chosen + /// implementation. If that implementation could not be constructed, then + /// an error will be returned. In contrast, when `None` is used, it is + /// possible for it to attempt to construct, for example, a contiguous + /// NFA and have it fail. In which case, it will fall back to using a + /// noncontiguous NFA. + /// + /// If `None` is given, then one may use [`AhoCorasick::kind`] to determine + /// which Aho-Corasick implementation was chosen. + /// + /// Note that the heuristics used for choosing which `AhoCorasickKind` + /// may be changed in a semver compatible release. + pub fn kind( + &mut self, + kind: Option, + ) -> &mut AhoCorasickBuilder { + self.kind = kind; self } @@ -1882,260 +2540,250 @@ impl AhoCorasickBuilder { /// can be useful to disable this if the prefilter is observed to be /// sub-optimal for a particular workload. /// + /// Currently, prefilters are typically only active when building searchers + /// with a small (less than 100) number of patterns. + /// /// This is enabled by default. pub fn prefilter(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.nfa_builder.prefilter(yes); + self.nfa_noncontiguous.prefilter(yes); + self.nfa_contiguous.prefilter(yes); + self.dfa.prefilter(yes); self } - /// Shrink the size of the transition alphabet by mapping bytes to their - /// equivalence classes. This only has an effect when the `dfa` option is - /// enabled. + /// Set the limit on how many states use a dense representation for their + /// transitions. Other states will generally use a sparse representation. /// - /// When enabled, each a DFA will use a map from all possible bytes - /// to their corresponding equivalence class. Each equivalence class - /// represents a set of bytes that does not discriminate between a match - /// and a non-match in the DFA. For example, the patterns `bar` and `baz` - /// have at least five equivalence classes: singleton sets of `b`, `a`, `r` - /// and `z`, and a final set that contains every other byte. + /// A dense representation uses more memory but is generally faster, since + /// the next transition in a dense representation can be computed in a + /// constant number of instructions. A sparse representation uses less + /// memory but is generally slower, since the next transition in a sparse + /// representation requires executing a variable number of instructions. /// - /// The advantage of this map is that the size of the transition table can - /// be reduced drastically from `#states * 256 * sizeof(id)` to - /// `#states * k * sizeof(id)` where `k` is the number of equivalence - /// classes. As a result, total space usage can decrease substantially. - /// Moreover, since a smaller alphabet is used, compilation becomes faster - /// as well. - /// - /// The disadvantage of this map is that every byte searched must be - /// passed through this map before it can be used to determine the next - /// transition. This has a small match time performance cost. However, if - /// the DFA is otherwise very large without byte classes, then using byte - /// classes can greatly improve memory locality and thus lead to better - /// overall performance. - /// - /// This option is enabled by default. - #[deprecated( - since = "0.7.16", - note = "not carrying its weight, will be always enabled, see: https://github.com/BurntSushi/aho-corasick/issues/57" - )] - pub fn byte_classes(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa_builder.byte_classes(yes); + /// This setting is only used when an Aho-Corasick implementation is used + /// that supports the dense versus sparse representation trade off. Not all + /// do. + /// + /// This limit is expressed in terms of the depth of a state, i.e., the + /// number of transitions from the starting state of the automaton. The + /// idea is that most of the time searching will be spent near the starting + /// state of the automaton, so states near the start state should use a + /// dense representation. States further away from the start state would + /// then use a sparse representation. + /// + /// By default, this is set to a low but non-zero number. Setting this to + /// `0` is almost never what you want, since it is likely to make searches + /// very slow due to the start state itself being forced to use a sparse + /// representation. However, it is unlikely that increasing this number + /// will help things much, since the most active states have a small depth. + /// More to the point, the memory usage increases superlinearly as this + /// number increases. + pub fn dense_depth(&mut self, depth: usize) -> &mut AhoCorasickBuilder { + self.nfa_noncontiguous.dense_depth(depth); + self.nfa_contiguous.dense_depth(depth); self } - /// Premultiply state identifiers in the transition table. This only has - /// an effect when the `dfa` option is enabled. - /// - /// When enabled, state identifiers are premultiplied to point to their - /// corresponding row in the transition table. That is, given the `i`th - /// state, its corresponding premultiplied identifier is `i * k` where `k` - /// is the alphabet size of the automaton. (The alphabet size is at most - /// 256, but is in practice smaller if byte classes is enabled.) - /// - /// When state identifiers are not premultiplied, then the identifier of - /// the `i`th state is `i`. - /// - /// The advantage of premultiplying state identifiers is that is saves a - /// multiplication instruction per byte when searching with a DFA. This has - /// been observed to lead to a 20% performance benefit in micro-benchmarks. - /// - /// The primary disadvantage of premultiplying state identifiers is - /// that they require a larger integer size to represent. For example, - /// if the DFA has 200 states, then its premultiplied form requires 16 - /// bits to represent every possible state identifier, where as its - /// non-premultiplied form only requires 8 bits. - /// - /// This option is enabled by default. - #[deprecated( - since = "0.7.16", - note = "not carrying its weight, will be always enabled, see: https://github.com/BurntSushi/aho-corasick/issues/57" - )] - pub fn premultiply(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa_builder.premultiply(yes); + /// A debug settting for whether to attempt to shrink the size of the + /// automaton's alphabet or not. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging the underlying automaton. + /// + /// When enabled, some (but not all) Aho-Corasick automatons will use a map + /// from all possible bytes to their corresponding equivalence class. Each + /// equivalence class represents a set of bytes that does not discriminate + /// between a match and a non-match in the automaton. + /// + /// The advantage of this map is that the size of the transition table can + /// be reduced drastically from `#states * 256 * sizeof(u32)` to + /// `#states * k * sizeof(u32)` where `k` is the number of equivalence + /// classes (rounded up to the nearest power of 2). As a result, total + /// space usage can decrease substantially. Moreover, since a smaller + /// alphabet is used, automaton compilation becomes faster as well. + /// + /// **WARNING:** This is only useful for debugging automatons. Disabling + /// this does not yield any speed advantages. Namely, even when this is + /// disabled, a byte class map is still used while searching. The only + /// difference is that every byte will be forced into its own distinct + /// equivalence class. This is useful for debugging the actual generated + /// transitions because it lets one see the transitions defined on actual + /// bytes instead of the equivalence classes. + pub fn byte_classes(&mut self, yes: bool) -> &mut AhoCorasickBuilder { + self.nfa_contiguous.byte_classes(yes); + self.dfa.byte_classes(yes); self } } -/// A knob for controlling the match semantics of an Aho-Corasick automaton. -/// -/// There are two generally different ways that Aho-Corasick automatons can -/// report matches. The first way is the "standard" approach that results from -/// implementing most textbook explanations of Aho-Corasick. The second way is -/// to report only the leftmost non-overlapping matches. The leftmost approach -/// is in turn split into two different ways of resolving ambiguous matches: -/// leftmost-first and leftmost-longest. -/// -/// The `Standard` match kind is the default and is the only one that supports -/// overlapping matches and stream searching. (Trying to find overlapping -/// or streaming matches using leftmost match semantics will result in a -/// panic.) The `Standard` match kind will report matches as they are seen. -/// When searching for overlapping matches, then all possible matches are -/// reported. When searching for non-overlapping matches, the first match seen -/// is reported. For example, for non-overlapping matches, given the patterns -/// `abcd` and `b` and the subject string `abcdef`, only a match for `b` is -/// reported since it is detected first. The `abcd` match is never reported -/// since it overlaps with the `b` match. +/// The type of Aho-Corasick implementation to use in an [`AhoCorasick`] +/// searcher. /// -/// In contrast, the leftmost match kind always prefers the leftmost match -/// among all possible matches. Given the same example as above with `abcd` and -/// `b` as patterns and `abcdef` as the subject string, the leftmost match is -/// `abcd` since it begins before the `b` match, even though the `b` match is -/// detected before the `abcd` match. In this case, the `b` match is not -/// reported at all since it overlaps with the `abcd` match. -/// -/// The difference between leftmost-first and leftmost-longest is in how they -/// resolve ambiguous matches when there are multiple leftmost matches to -/// choose from. Leftmost-first always chooses the pattern that was provided -/// earliest, where as leftmost-longest always chooses the longest matching -/// pattern. For example, given the patterns `a` and `ab` and the subject -/// string `ab`, the leftmost-first match is `a` but the leftmost-longest match -/// is `ab`. Conversely, if the patterns were given in reverse order, i.e., -/// `ab` and `a`, then both the leftmost-first and leftmost-longest matches -/// would be `ab`. Stated differently, the leftmost-first match depends on the -/// order in which the patterns were given to the Aho-Corasick automaton. -/// Because of that, when leftmost-first matching is used, if a pattern `A` -/// that appears before a pattern `B` is a prefix of `B`, then it is impossible -/// to ever observe a match of `B`. -/// -/// If you're not sure which match kind to pick, then stick with the standard -/// kind, which is the default. In particular, if you need overlapping or -/// streaming matches, then you _must_ use the standard kind. The leftmost -/// kinds are useful in specific circumstances. For example, leftmost-first can -/// be very useful as a way to implement match priority based on the order of -/// patterns given and leftmost-longest can be useful for dictionary searching -/// such that only the longest matching words are reported. -/// -/// # Relationship with regular expression alternations -/// -/// Understanding match semantics can be a little tricky, and one easy way -/// to conceptualize non-overlapping matches from an Aho-Corasick automaton -/// is to think about them as a simple alternation of literals in a regular -/// expression. For example, let's say we wanted to match the strings -/// `Sam` and `Samwise`, which would turn into the regex `Sam|Samwise`. It -/// turns out that regular expression engines have two different ways of -/// matching this alternation. The first way, leftmost-longest, is commonly -/// found in POSIX compatible implementations of regular expressions (such as -/// `grep`). The second way, leftmost-first, is commonly found in backtracking -/// implementations such as Perl. (Some regex engines, such as RE2 and Rust's -/// regex engine do not use backtracking, but still implement leftmost-first -/// semantics in an effort to match the behavior of dominant backtracking -/// regex engines such as those found in Perl, Ruby, Python, Javascript and -/// PHP.) -/// -/// That is, when matching `Sam|Samwise` against `Samwise`, a POSIX regex -/// will match `Samwise` because it is the longest possible match, but a -/// Perl-like regex will match `Sam` since it appears earlier in the -/// alternation. Indeed, the regex `Sam|Samwise` in a Perl-like regex engine -/// will never match `Samwise` since `Sam` will always have higher priority. -/// Conversely, matching the regex `Samwise|Sam` against `Samwise` will lead to -/// a match of `Samwise` in both POSIX and Perl-like regexes since `Samwise` is -/// still longest match, but it also appears earlier than `Sam`. -/// -/// The "standard" match semantics of Aho-Corasick generally don't correspond -/// to the match semantics of any large group of regex implementations, so -/// there's no direct analogy that can be made here. Standard match semantics -/// are generally useful for overlapping matches, or if you just want to see -/// matches as they are detected. -/// -/// The main conclusion to draw from this section is that the match semantics -/// can be tweaked to precisely match either Perl-like regex alternations or -/// POSIX regex alternations. +/// This is principally used as an input to the +/// [`AhoCorasickBuilder::start_kind`] method. Its documentation goes into more +/// detail about each choice. +#[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum MatchKind { - /// Use standard match semantics, which support overlapping matches. When - /// used with non-overlapping matches, matches are reported as they are - /// seen. - Standard, - /// Use leftmost-first match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the match - /// corresponding to the pattern that appeared earlier when constructing - /// the automaton is reported. - /// - /// This does **not** support overlapping matches or stream searching. If - /// this match kind is used, attempting to find overlapping matches or - /// stream matches will panic. - LeftmostFirst, - /// Use leftmost-longest match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the longest match - /// is chosen. - /// - /// This does **not** support overlapping matches or stream searching. If - /// this match kind is used, attempting to find overlapping matches or - /// stream matches will panic. - LeftmostLongest, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, +pub enum AhoCorasickKind { + /// Use a noncontiguous NFA. + NoncontiguousNFA, + /// Use a contiguous NFA. + ContiguousNFA, + /// Use a DFA. Warning: DFAs typically use a large amount of memory. + DFA, } -/// The default match kind is `MatchKind::Standard`. -impl Default for MatchKind { - fn default() -> MatchKind { - MatchKind::Standard - } +/// A trait that effectively gives us practical dynamic dispatch over anything +/// that impls `Automaton`, but without needing to add a bunch of bounds to +/// the core `Automaton` trait. Basically, we provide all of the marker traits +/// that our automatons have, in addition to `Debug` impls and requiring that +/// there is no borrowed data. Without these, the main `AhoCorasick` type would +/// not be able to meaningfully impl `Debug` or the marker traits without also +/// requiring that all impls of `Automaton` do so, which would be not great. +trait AcAutomaton: + Automaton + Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static +{ } -impl MatchKind { - fn supports_overlapping(&self) -> bool { - self.is_standard() +impl AcAutomaton for A where + A: Automaton + Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static +{ +} + +impl crate::automaton::private::Sealed for Arc {} + +// I'm not sure why this trait impl shows up in the docs, as the AcAutomaton +// trait is not exported. So we forcefully hide it. +// +// SAFETY: This just defers to the underlying 'AcAutomaton' and thus inherits +// its safety properties. +#[doc(hidden)] +unsafe impl Automaton for Arc { + #[inline(always)] + fn start_state(&self, anchored: Anchored) -> Result { + (**self).start_state(anchored) + } + + #[inline(always)] + fn next_state( + &self, + anchored: Anchored, + sid: StateID, + byte: u8, + ) -> StateID { + (**self).next_state(anchored, sid, byte) } - fn supports_stream(&self) -> bool { - // TODO: It may be possible to support this. It's hard. - // - // See: https://github.com/rust-lang/regex/issues/425#issuecomment-471367838 - self.is_standard() + #[inline(always)] + fn is_special(&self, sid: StateID) -> bool { + (**self).is_special(sid) } - pub(crate) fn is_standard(&self) -> bool { - *self == MatchKind::Standard + #[inline(always)] + fn is_dead(&self, sid: StateID) -> bool { + (**self).is_dead(sid) } - pub(crate) fn is_leftmost(&self) -> bool { - *self == MatchKind::LeftmostFirst - || *self == MatchKind::LeftmostLongest + #[inline(always)] + fn is_match(&self, sid: StateID) -> bool { + (**self).is_match(sid) } - pub(crate) fn is_leftmost_first(&self) -> bool { - *self == MatchKind::LeftmostFirst + #[inline(always)] + fn is_start(&self, sid: StateID) -> bool { + (**self).is_start(sid) } - /// Convert this match kind into a packed match kind. If this match kind - /// corresponds to standard semantics, then this returns None, since - /// packed searching does not support standard semantics. - pub(crate) fn as_packed(&self) -> Option { - match *self { - MatchKind::Standard => None, - MatchKind::LeftmostFirst => Some(packed::MatchKind::LeftmostFirst), - MatchKind::LeftmostLongest => { - Some(packed::MatchKind::LeftmostLongest) - } - MatchKind::__Nonexhaustive => unreachable!(), - } + #[inline(always)] + fn match_kind(&self) -> MatchKind { + (**self).match_kind() } -} -#[cfg(test)] -mod tests { - use super::*; + #[inline(always)] + fn match_len(&self, sid: StateID) -> usize { + (**self).match_len(sid) + } + + #[inline(always)] + fn match_pattern(&self, sid: StateID, index: usize) -> PatternID { + (**self).match_pattern(sid, index) + } + + #[inline(always)] + fn patterns_len(&self) -> usize { + (**self).patterns_len() + } + + #[inline(always)] + fn pattern_len(&self, pid: PatternID) -> usize { + (**self).pattern_len(pid) + } + + #[inline(always)] + fn min_pattern_len(&self) -> usize { + (**self).min_pattern_len() + } + + #[inline(always)] + fn max_pattern_len(&self) -> usize { + (**self).max_pattern_len() + } + + #[inline(always)] + fn memory_usage(&self) -> usize { + (**self).memory_usage() + } + + #[inline(always)] + fn prefilter(&self) -> Option<&Prefilter> { + (**self).prefilter() + } - #[test] - fn oibits() { - use std::panic::{RefUnwindSafe, UnwindSafe}; + // Even though 'try_find' and 'try_find_overlapping' each have their + // own default impls, we explicitly define them here to fix a perf bug. + // Without these explicit definitions, the default impl will wind up using + // dynamic dispatch for all 'Automaton' method calls, including things like + // 'next_state' that absolutely must get inlined or else perf is trashed. + // Defining them explicitly here like this still requires dynamic dispatch + // to call 'try_find' itself, but all uses of 'Automaton' within 'try_find' + // are monomorphized. + // + // We don't need to explicitly impl any other methods, I think, because + // they are all implemented themselves in terms of 'try_find' and + // 'try_find_overlapping'. We still might wind up with an extra virtual + // call here or there, but that's okay since it's outside of any perf + // critical areas. - fn assert_send() {} - fn assert_sync() {} - fn assert_unwind_safe() {} + #[inline(always)] + fn try_find( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + (**self).try_find(input) + } + + #[inline(always)] + fn try_find_overlapping( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + (**self).try_find_overlapping(input, state) + } +} - assert_send::(); - assert_sync::(); - assert_unwind_safe::(); - assert_send::(); - assert_sync::(); - assert_unwind_safe::(); +/// Returns an error if the start state configuration does not support the +/// desired search configuration. See the internal 'AhoCorasick::start_kind' +/// field docs for more details. +fn enforce_anchored_consistency( + have: StartKind, + want: Anchored, +) -> Result<(), MatchError> { + match have { + StartKind::Both => Ok(()), + StartKind::Unanchored if !want.is_anchored() => Ok(()), + StartKind::Unanchored => Err(MatchError::invalid_input_anchored()), + StartKind::Anchored if want.is_anchored() => Ok(()), + StartKind::Anchored => Err(MatchError::invalid_input_unanchored()), } } diff --git a/vendor/aho-corasick/src/automaton.rs b/vendor/aho-corasick/src/automaton.rs index d88743a..c41dc6e 100644 --- a/vendor/aho-corasick/src/automaton.rs +++ b/vendor/aho-corasick/src/automaton.rs @@ -1,573 +1,1608 @@ -use crate::ahocorasick::MatchKind; -use crate::prefilter::{self, Candidate, Prefilter, PrefilterState}; -use crate::state_id::{dead_id, fail_id, StateID}; -use crate::Match; - -// NOTE: This trait essentially started as a copy of the same trait from from -// regex-automata, with some wording changed since we use this trait for -// NFAs in addition to DFAs in this crate. Additionally, we do not export -// this trait. It's only used internally to reduce code duplication. The -// regex-automata crate needs to expose it because its Regex type is generic -// over implementations of this trait. In this crate, we encapsulate everything -// behind the AhoCorasick type. -// -// This trait is a bit of a mess, but it's not quite clear how to fix it. -// Basically, there are several competing concerns: -// -// * We need performance, so everything effectively needs to get monomorphized. -// * There are several variations on searching Aho-Corasick automatons: -// overlapping, standard and leftmost. Overlapping and standard are somewhat -// combined together below, but there is no real way to combine standard with -// leftmost. Namely, leftmost requires continuing a search even after a match -// is found, in order to correctly disambiguate a match. -// * On top of that, *sometimes* callers want to know which state the automaton -// is in after searching. This is principally useful for overlapping and -// stream searches. However, when callers don't care about this, we really -// do not want to be forced to compute it, since it sometimes requires extra -// work. Thus, there are effectively two copies of leftmost searching: one -// for tracking the state ID and one that doesn't. We should ideally do the -// same for standard searching, but my sanity stopped me. - -// SAFETY RATIONALE: Previously, the code below went to some length to remove -// all bounds checks. This generally produced tighter assembly and lead to -// 20-50% improvements in micro-benchmarks on corpora made up of random -// characters. This somewhat makes sense, since the branch predictor is going -// to be at its worse on random text. -// -// However, using the aho-corasick-debug tool and manually benchmarking -// different inputs, the code *with* bounds checks actually wound up being -// slightly faster: -// -// $ cat input -// Sherlock Holmes -// John Watson -// Professor Moriarty -// Irene Adler -// Mary Watson -// -// $ aho-corasick-debug-safe \ -// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa -// pattern read time: 32.824µs -// automaton build time: 444.687µs -// automaton heap usage: 72392 bytes -// match count: 639 -// count time: 1.809961702s -// -// $ aho-corasick-debug-master \ -// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa -// pattern read time: 31.425µs -// automaton build time: 317.434µs -// automaton heap usage: 72392 bytes -// match count: 639 -// count time: 2.059157705s -// -// I was able to reproduce this result on two different machines (an i5 and -// an i7). Therefore, we go the route of safe code for now. - -/// A trait describing the interface of an Aho-Corasick finite state machine. -/// -/// Every automaton has exactly one fail state, one dead state and exactly one -/// start state. Generally, these correspond to the first, second and third -/// states, respectively. The dead state is always treated as a sentinel. That -/// is, no correct Aho-Corasick automaton will ever transition into the fail -/// state. The dead state, however, can be transitioned into, but only when -/// leftmost-first or leftmost-longest match semantics are enabled and only -/// when at least one match has been observed. -/// -/// Every automaton also has one or more match states, such that -/// `Automaton::is_match_state(id)` returns `true` if and only if `id` -/// corresponds to a match state. -pub trait Automaton { - /// The representation used for state identifiers in this automaton. - /// - /// Typically, this is one of `u8`, `u16`, `u32`, `u64` or `usize`. - type ID: StateID; - - /// The type of matching that should be done. - fn match_kind(&self) -> &MatchKind; - - /// Returns true if and only if this automaton uses anchored searches. - fn anchored(&self) -> bool; - - /// An optional prefilter for quickly skipping to the next candidate match. - /// A prefilter must report at least every match, although it may report - /// positions that do not correspond to a match. That is, it must not allow - /// false negatives, but can allow false positives. - /// - /// Currently, a prefilter only runs when the automaton is in the start - /// state. That is, the position reported by a prefilter should always - /// correspond to the start of a potential match. - fn prefilter(&self) -> Option<&dyn Prefilter>; - - /// Return the identifier of this automaton's start state. - fn start_state(&self) -> Self::ID; - - /// Returns true if and only if the given state identifier refers to a - /// valid state. - fn is_valid(&self, id: Self::ID) -> bool; - - /// Returns true if and only if the given identifier corresponds to a match +/*! +Provides [`Automaton`] trait for abstracting over Aho-Corasick automata. + +The `Automaton` trait provides a way to write generic code over any +Aho-Corasick automaton. It also provides access to lower level APIs that +permit walking the state transitions of an Aho-Corasick automaton manually. +*/ + +use alloc::{string::String, vec::Vec}; + +use crate::util::{ + error::MatchError, + primitives::PatternID, + search::{Anchored, Input, Match, MatchKind, Span}, +}; + +pub use crate::util::{ + prefilter::{Candidate, Prefilter}, + primitives::{StateID, StateIDError}, +}; + +/// We seal the `Automaton` trait for now. It's a big trait, and it's +/// conceivable that I might want to add new required methods, and sealing the +/// trait permits doing that in a backwards compatible fashion. On other the +/// hand, if you have a solid use case for implementing the trait yourself, +/// please file an issue and we can discuss it. This was *mostly* done as a +/// conservative step. +pub(crate) mod private { + pub trait Sealed {} +} +impl private::Sealed for crate::nfa::noncontiguous::NFA {} +impl private::Sealed for crate::nfa::contiguous::NFA {} +impl private::Sealed for crate::dfa::DFA {} + +impl<'a, T: private::Sealed + ?Sized> private::Sealed for &'a T {} + +/// A trait that abstracts over Aho-Corasick automata. +/// +/// This trait primarily exists for niche use cases such as: +/// +/// * Using an NFA or DFA directly, bypassing the top-level +/// [`AhoCorasick`](crate::AhoCorasick) searcher. Currently, these include +/// [`noncontiguous::NFA`](crate::nfa::noncontiguous::NFA), +/// [`contiguous::NFA`](crate::nfa::contiguous::NFA) and +/// [`dfa::DFA`](crate::dfa::DFA). +/// * Implementing your own custom search routine by walking the automaton +/// yourself. This might be useful for implementing search on non-contiguous +/// strings or streams. +/// +/// For most use cases, it is not expected that users will need +/// to use or even know about this trait. Indeed, the top level +/// [`AhoCorasick`](crate::AhoCorasick) searcher does not expose any details +/// about this trait, nor does it implement it itself. +/// +/// Note that this trait defines a number of default methods, such as +/// [`Automaton::try_find`] and [`Automaton::try_find_iter`], which implement +/// higher level search routines in terms of the lower level automata API. +/// +/// # Sealed +/// +/// Currently, this trait is sealed. That means users of this crate can write +/// generic routines over this trait but cannot implement it themselves. This +/// restriction may be lifted in the future, but sealing the trait permits +/// adding new required methods in a backwards compatible fashion. +/// +/// # Special states +/// +/// This trait encodes a notion of "special" states in an automaton. Namely, +/// a state is treated as special if it is a dead, match or start state: +/// +/// * A dead state is a state that cannot be left once entered. All transitions +/// on a dead state lead back to itself. The dead state is meant to be treated +/// as a sentinel indicating that the search should stop and return a match if +/// one has been found, and nothing otherwise. +/// * A match state is a state that indicates one or more patterns have +/// matched. Depending on the [`MatchKind`] of the automaton, a search may +/// stop once a match is seen, or it may continue looking for matches until +/// it enters a dead state or sees the end of the haystack. +/// * A start state is a state that a search begins in. It is useful to know +/// when a search enters a start state because it may mean that a prefilter can +/// be used to skip ahead and quickly look for candidate matches. Unlike dead +/// and match states, it is never necessary to explicitly handle start states +/// for correctness. Indeed, in this crate, implementations of `Automaton` +/// will only treat start states as "special" when a prefilter is enabled and +/// active. Otherwise, treating it as special has no purpose and winds up +/// slowing down the overall search because it results in ping-ponging between +/// the main state transition and the "special" state logic. +/// +/// Since checking whether a state is special by doing three different +/// checks would be too expensive inside a fast search loop, the +/// [`Automaton::is_special`] method is provided for quickly checking whether +/// the state is special. The `Automaton::is_dead`, `Automaton::is_match` and +/// `Automaton::is_start` predicates can then be used to determine which kind +/// of special state it is. +/// +/// # Panics +/// +/// Most of the APIs on this trait should panic or give incorrect results +/// if invalid inputs are given to it. For example, `Automaton::next_state` +/// has unspecified behavior if the state ID given to it is not a valid +/// state ID for the underlying automaton. Valid state IDs can only be +/// retrieved in one of two ways: calling `Automaton::start_state` or calling +/// `Automaton::next_state` with a valid state ID. +/// +/// # Safety +/// +/// This trait is not safe to implement so that code may rely on the +/// correctness of implementations of this trait to avoid undefined behavior. +/// The primary correctness guarantees are: +/// +/// * `Automaton::start_state` always returns a valid state ID or an error or +/// panics. +/// * `Automaton::next_state`, when given a valid state ID, always returns +/// a valid state ID for all values of `anchored` and `byte`, or otherwise +/// panics. +/// +/// In general, the rest of the methods on `Automaton` need to uphold their +/// contracts as well. For example, `Automaton::is_dead` should only returns +/// true if the given state ID is actually a dead state. +/// +/// Note that currently this crate does not rely on the safety property defined +/// here to avoid undefined behavior. Instead, this was done to make it +/// _possible_ to do in the future. +/// +/// # Example +/// +/// This example shows how one might implement a basic but correct search +/// routine. We keep things simple by not using prefilters or worrying about +/// anchored searches, but do make sure our search is correct for all possible +/// [`MatchKind`] semantics. (The comments in the code below note the parts +/// that are needed to support certain `MatchKind` semantics.) +/// +/// ``` +/// use aho_corasick::{ +/// automaton::Automaton, +/// nfa::noncontiguous::NFA, +/// Anchored, Match, MatchError, MatchKind, +/// }; +/// +/// // Run an unanchored search for 'aut' in 'haystack'. Return the first match +/// // seen according to the automaton's match semantics. This returns an error +/// // if the given automaton does not support unanchored searches. +/// fn find( +/// aut: A, +/// haystack: &[u8], +/// ) -> Result, MatchError> { +/// let mut sid = aut.start_state(Anchored::No)?; +/// let mut at = 0; +/// let mut mat = None; +/// let get_match = |sid, at| { +/// let pid = aut.match_pattern(sid, 0); +/// let len = aut.pattern_len(pid); +/// Match::new(pid, (at - len)..at) +/// }; +/// // Start states can be match states! +/// if aut.is_match(sid) { +/// mat = Some(get_match(sid, at)); +/// // Standard semantics require matches to be reported as soon as +/// // they're seen. Otherwise, we continue until we see a dead state +/// // or the end of the haystack. +/// if matches!(aut.match_kind(), MatchKind::Standard) { +/// return Ok(mat); +/// } +/// } +/// while at < haystack.len() { +/// sid = aut.next_state(Anchored::No, sid, haystack[at]); +/// if aut.is_special(sid) { +/// if aut.is_dead(sid) { +/// return Ok(mat); +/// } else if aut.is_match(sid) { +/// mat = Some(get_match(sid, at + 1)); +/// // As above, standard semantics require that we return +/// // immediately once a match is found. +/// if matches!(aut.match_kind(), MatchKind::Standard) { +/// return Ok(mat); +/// } +/// } +/// } +/// at += 1; +/// } +/// Ok(mat) +/// } +/// +/// // Show that it works for standard searches. +/// let nfa = NFA::new(&["samwise", "sam"]).unwrap(); +/// assert_eq!(Some(Match::must(1, 0..3)), find(&nfa, b"samwise")?); +/// +/// // But also works when using leftmost-first. Notice how the match result +/// // has changed! +/// let nfa = NFA::builder() +/// .match_kind(MatchKind::LeftmostFirst) +/// .build(&["samwise", "sam"]) +/// .unwrap(); +/// assert_eq!(Some(Match::must(0, 0..7)), find(&nfa, b"samwise")?); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub unsafe trait Automaton: private::Sealed { + /// Returns the starting state for the given anchor mode. + /// + /// Upon success, the state ID returned is guaranteed to be valid for + /// this automaton. + /// + /// # Errors + /// + /// This returns an error when the given search configuration is not + /// supported by the underlying automaton. For example, if the underlying + /// automaton only supports unanchored searches but the given configuration + /// was set to an anchored search, then this must return an error. + fn start_state(&self, anchored: Anchored) -> Result; + + /// Performs a state transition from `sid` for `byte` and returns the next + /// state. + /// + /// `anchored` should be [`Anchored::Yes`] when executing an anchored + /// search and [`Anchored::No`] otherwise. For some implementations of + /// `Automaton`, it is required to know whether the search is anchored + /// or not in order to avoid following failure transitions. Other + /// implementations may ignore `anchored` altogether and depend on + /// `Automaton::start_state` returning a state that walks a different path + /// through the automaton depending on whether the search is anchored or + /// not. + /// + /// # Panics + /// + /// This routine may panic or return incorrect results when the given state + /// ID is invalid. A state ID is valid if and only if: + /// + /// 1. It came from a call to `Automaton::start_state`, or + /// 2. It came from a previous call to `Automaton::next_state` with a + /// valid state ID. + /// + /// Implementations must treat all possible values of `byte` as valid. + /// + /// Implementations may panic on unsupported values of `anchored`, but are + /// not required to do so. + fn next_state( + &self, + anchored: Anchored, + sid: StateID, + byte: u8, + ) -> StateID; + + /// Returns true if the given ID represents a "special" state. A special + /// state is a dead, match or start state. + /// + /// Note that implementations may choose to return false when the given ID + /// corresponds to a start state. Namely, it always correct to treat start + /// states as non-special. Implementations must return true for states that + /// are dead or contain matches. + /// + /// This has unspecified behavior when given an invalid state ID. + fn is_special(&self, sid: StateID) -> bool; + + /// Returns true if the given ID represents a dead state. + /// + /// A dead state is a type of "sink" in a finite state machine. It + /// corresponds to a state whose transitions all loop back to itself. That + /// is, once entered, it can never be left. In practice, it serves as a + /// sentinel indicating that the search should terminate. + /// + /// This has unspecified behavior when given an invalid state ID. + fn is_dead(&self, sid: StateID) -> bool; + + /// Returns true if the given ID represents a match state. + /// + /// A match state is always associated with one or more pattern IDs that + /// matched at the position in the haystack when the match state was + /// entered. When a match state is entered, the match semantics dictate + /// whether it should be returned immediately (for `MatchKind::Standard`) + /// or if the search should continue (for `MatchKind::LeftmostFirst` and + /// `MatchKind::LeftmostLongest`) until a dead state is seen or the end of + /// the haystack has been reached. + /// + /// This has unspecified behavior when given an invalid state ID. + fn is_match(&self, sid: StateID) -> bool; + + /// Returns true if the given ID represents a start state. + /// + /// While it is never incorrect to ignore start states during a search + /// (except for the start of the search of course), knowing whether one has + /// entered a start state can be useful for certain classes of performance + /// optimizations. For example, if one is in a start state, it may be legal + /// to try to skip ahead and look for match candidates more quickly than + /// would otherwise be accomplished by walking the automaton. + /// + /// Implementations of `Automaton` in this crate "unspecialize" start + /// states when a prefilter is not active or enabled. In this case, it + /// is possible for `Automaton::is_special(sid)` to return false while + /// `Automaton::is_start(sid)` returns true. + /// + /// This has unspecified behavior when given an invalid state ID. + fn is_start(&self, sid: StateID) -> bool; + + /// Returns the match semantics that this automaton was built with. + fn match_kind(&self) -> MatchKind; + + /// Returns the total number of matches for the given state ID. + /// + /// This has unspecified behavior if the given ID does not refer to a match /// state. + fn match_len(&self, sid: StateID) -> usize; + + /// Returns the pattern ID for the match state given by `sid` at the + /// `index` given. + /// + /// Typically, `index` is only ever greater than `0` when implementing an + /// overlapping search. Otherwise, it's likely that your search only cares + /// about reporting the first pattern ID in a match state. + /// + /// This has unspecified behavior if the given ID does not refer to a match + /// state, or if the index is greater than or equal to the total number of + /// matches in this match state. + fn match_pattern(&self, sid: StateID, index: usize) -> PatternID; + + /// Returns the total number of patterns compiled into this automaton. + fn patterns_len(&self) -> usize; + + /// Returns the length of the pattern for the given ID. /// - /// The state ID given must be valid, or else implementors may panic. - fn is_match_state(&self, id: Self::ID) -> bool; + /// This has unspecified behavior when given an invalid pattern + /// ID. A pattern ID is valid if and only if it is less than + /// `Automaton::patterns_len`. + fn pattern_len(&self, pid: PatternID) -> usize; + + /// Returns the length, in bytes, of the shortest pattern in this + /// automaton. + fn min_pattern_len(&self) -> usize; - /// Returns true if and only if the given identifier corresponds to a state - /// that is either the dead state or a match state. + /// Returns the length, in bytes, of the longest pattern in this automaton. + fn max_pattern_len(&self) -> usize; + + /// Returns the heap memory usage, in bytes, used by this automaton. + fn memory_usage(&self) -> usize; + + /// Returns a prefilter, if available, that can be used to accelerate + /// searches for this automaton. /// - /// Depending on the implementation of the automaton, this routine can - /// be used to save a branch in the core matching loop. Nevertheless, - /// `is_match_state(id) || id == dead_id()` is always a valid - /// implementation. Indeed, this is the default implementation. + /// The typical way this is used is when the start state is entered during + /// a search. When that happens, one can use a prefilter to skip ahead and + /// look for candidate matches without having to walk the automaton on the + /// bytes between candidates. /// - /// The state ID given must be valid, or else implementors may panic. - fn is_match_or_dead_state(&self, id: Self::ID) -> bool { - id == dead_id() || self.is_match_state(id) + /// Typically a prefilter is only available when there are a small (<100) + /// number of patterns built into the automaton. + fn prefilter(&self) -> Option<&Prefilter>; + + /// Executes a non-overlapping search with this automaton using the given + /// configuration. + /// + /// See + /// [`AhoCorasick::try_find`](crate::AhoCorasick::try_find) + /// for more documentation and examples. + fn try_find( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + try_find_fwd(&self, input) } - /// If the given state is a match state, return the match corresponding - /// to the given match index. `end` must be the ending position of the - /// detected match. If no match exists or if `match_index` exceeds the - /// number of matches in this state, then `None` is returned. + /// Executes a overlapping search with this automaton using the given + /// configuration. /// - /// The state ID given must be valid, or else implementors may panic. + /// See + /// [`AhoCorasick::try_find_overlapping`](crate::AhoCorasick::try_find_overlapping) + /// for more documentation and examples. + fn try_find_overlapping( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + try_find_overlapping_fwd(&self, input, state) + } + + /// Returns an iterator of non-overlapping matches with this automaton + /// using the given configuration. /// - /// If the given state ID is correct and if the `match_index` is less than - /// the number of matches for that state, then this is guaranteed to return - /// a match. - fn get_match( + /// See + /// [`AhoCorasick::try_find_iter`](crate::AhoCorasick::try_find_iter) + /// for more documentation and examples. + fn try_find_iter<'a, 'h>( + &'a self, + input: Input<'h>, + ) -> Result, MatchError> + where + Self: Sized, + { + FindIter::new(self, input) + } + + /// Returns an iterator of overlapping matches with this automaton + /// using the given configuration. + /// + /// See + /// [`AhoCorasick::try_find_overlapping_iter`](crate::AhoCorasick::try_find_overlapping_iter) + /// for more documentation and examples. + fn try_find_overlapping_iter<'a, 'h>( + &'a self, + input: Input<'h>, + ) -> Result, MatchError> + where + Self: Sized, + { + if !self.match_kind().is_standard() { + return Err(MatchError::unsupported_overlapping( + self.match_kind(), + )); + } + // We might consider lifting this restriction. The reason why I added + // it was to ban the combination of "anchored search" and "overlapping + // iteration." The match semantics aren't totally clear in that case. + // Should we allow *any* matches that are adjacent to *any* previous + // match? Or only following the most recent one? Or only matches + // that start at the beginning of the search? We might also elect to + // just keep this restriction in place, as callers should be able to + // implement it themselves if they want to. + if input.get_anchored().is_anchored() { + return Err(MatchError::invalid_input_anchored()); + } + let _ = self.start_state(input.get_anchored())?; + let state = OverlappingState::start(); + Ok(FindOverlappingIter { aut: self, input, state }) + } + + /// Replaces all non-overlapping matches in `haystack` with + /// strings from `replace_with` depending on the pattern that + /// matched. The `replace_with` slice must have length equal to + /// `Automaton::patterns_len`. + /// + /// See + /// [`AhoCorasick::try_replace_all`](crate::AhoCorasick::try_replace_all) + /// for more documentation and examples. + fn try_replace_all( &self, - id: Self::ID, - match_index: usize, - end: usize, - ) -> Option; - - /// Returns the number of matches for the given state. If the given state - /// is not a match state, then this returns 0. - /// - /// The state ID given must be valid, or else implementors must panic. - fn match_count(&self, id: Self::ID) -> usize; - - /// Given the current state that this automaton is in and the next input - /// byte, this method returns the identifier of the next state. The - /// identifier returned must always be valid and may never correspond to - /// the fail state. The returned identifier may, however, point to the - /// dead state. - /// - /// This is not safe so that implementors may look up the next state - /// without memory safety checks such as bounds checks. As such, callers - /// must ensure that the given identifier corresponds to a valid automaton - /// state. Implementors must, in turn, ensure that this routine is safe for - /// all valid state identifiers and for all possible `u8` values. - fn next_state(&self, current: Self::ID, input: u8) -> Self::ID; - - /// Like next_state, but debug_asserts that the underlying - /// implementation never returns a `fail_id()` for the next state. - fn next_state_no_fail(&self, current: Self::ID, input: u8) -> Self::ID { - let next = self.next_state(current, input); - // We should never see a transition to the failure state. - debug_assert!( - next != fail_id(), - "automaton should never return fail_id for next state" + haystack: &str, + replace_with: &[B], + ) -> Result + where + Self: Sized, + B: AsRef, + { + assert_eq!( + replace_with.len(), + self.patterns_len(), + "replace_all requires a replacement for every pattern \ + in the automaton" ); - next + let mut dst = String::with_capacity(haystack.len()); + self.try_replace_all_with(haystack, &mut dst, |mat, _, dst| { + dst.push_str(replace_with[mat.pattern()].as_ref()); + true + })?; + Ok(dst) } - /// Execute a search using standard match semantics. + /// Replaces all non-overlapping matches in `haystack` with + /// strings from `replace_with` depending on the pattern that + /// matched. The `replace_with` slice must have length equal to + /// `Automaton::patterns_len`. /// - /// This can be used even when the automaton was constructed with leftmost - /// match semantics when you want to find the earliest possible match. This - /// can also be used as part of an overlapping search implementation. - /// - /// N.B. This does not report a match if `state_id` is given as a matching - /// state. As such, this should not be used directly. - #[inline(always)] - fn standard_find_at( + /// See + /// [`AhoCorasick::try_replace_all_bytes`](crate::AhoCorasick::try_replace_all_bytes) + /// for more documentation and examples. + fn try_replace_all_bytes( &self, - prestate: &mut PrefilterState, haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.standard_find_at_imp( - prestate, - Some(pre), - haystack, - at, - state_id, - ) - } else { - self.standard_find_at_imp(prestate, None, haystack, at, state_id) - } + replace_with: &[B], + ) -> Result, MatchError> + where + Self: Sized, + B: AsRef<[u8]>, + { + assert_eq!( + replace_with.len(), + self.patterns_len(), + "replace_all requires a replacement for every pattern \ + in the automaton" + ); + let mut dst = Vec::with_capacity(haystack.len()); + self.try_replace_all_with_bytes(haystack, &mut dst, |mat, _, dst| { + dst.extend(replace_with[mat.pattern()].as_ref()); + true + })?; + Ok(dst) } - // It's important for this to always be inlined. Namely, its only caller - // is standard_find_at, and the inlining should remove the case analysis - // for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn standard_find_at_imp( + /// Replaces all non-overlapping matches in `haystack` by calling the + /// `replace_with` closure given. + /// + /// See + /// [`AhoCorasick::try_replace_all_with`](crate::AhoCorasick::try_replace_all_with) + /// for more documentation and examples. + fn try_replace_all_with( &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - state_id: &mut Self::ID, - ) -> Option { - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && *state_id == self.start_state() - { - let c = prefilter::next(prestate, pre, haystack, at) - .into_option(); - match c { - None => return None, - Some(i) => { - at = i; - } - } - } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - *state_id = self.next_state_no_fail(*state_id, haystack[at]); - at += 1; - // This routine always quits immediately after seeing a - // match, and since dead states can only come after seeing - // a match, seeing a dead state here is impossible. (Unless - // we have an anchored automaton, in which case, dead states - // are used to stop a search.) - debug_assert!( - *state_id != dead_id() || self.anchored(), - "standard find should never see a dead state" - ); - - if self.is_match_or_dead_state(*state_id) { - return if *state_id == dead_id() { - None - } else { - self.get_match(*state_id, 0, at) - }; + haystack: &str, + dst: &mut String, + mut replace_with: F, + ) -> Result<(), MatchError> + where + Self: Sized, + F: FnMut(&Match, &str, &mut String) -> bool, + { + let mut last_match = 0; + for m in self.try_find_iter(Input::new(haystack))? { + // Since there are no restrictions on what kinds of patterns are + // in an Aho-Corasick automaton, we might get matches that split + // a codepoint, or even matches of a partial codepoint. When that + // happens, we just skip the match. + if !haystack.is_char_boundary(m.start()) + || !haystack.is_char_boundary(m.end()) + { + continue; } + dst.push_str(&haystack[last_match..m.start()]); + last_match = m.end(); + if !replace_with(&m, &haystack[m.start()..m.end()], dst) { + break; + }; } - None + dst.push_str(&haystack[last_match..]); + Ok(()) } - /// Execute a search using leftmost (either first or longest) match - /// semantics. + /// Replaces all non-overlapping matches in `haystack` by calling the + /// `replace_with` closure given. /// - /// The principle difference between searching with standard semantics and - /// searching with leftmost semantics is that leftmost searching will - /// continue searching even after a match has been found. Once a match - /// is found, the search does not stop until either the haystack has been - /// exhausted or a dead state is observed in the automaton. (Dead states - /// only exist in automatons constructed with leftmost semantics.) That is, - /// we rely on the construction of the automaton to tell us when to quit. - #[inline(never)] - fn leftmost_find_at( + /// See + /// [`AhoCorasick::try_replace_all_with_bytes`](crate::AhoCorasick::try_replace_all_with_bytes) + /// for more documentation and examples. + fn try_replace_all_with_bytes( &self, - prestate: &mut PrefilterState, haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.leftmost_find_at_imp( - prestate, - Some(pre), - haystack, - at, - state_id, - ) - } else { - self.leftmost_find_at_imp(prestate, None, haystack, at, state_id) + dst: &mut Vec, + mut replace_with: F, + ) -> Result<(), MatchError> + where + Self: Sized, + F: FnMut(&Match, &[u8], &mut Vec) -> bool, + { + let mut last_match = 0; + for m in self.try_find_iter(Input::new(haystack))? { + dst.extend(&haystack[last_match..m.start()]); + last_match = m.end(); + if !replace_with(&m, &haystack[m.start()..m.end()], dst) { + break; + }; } + dst.extend(&haystack[last_match..]); + Ok(()) } - // It's important for this to always be inlined. Namely, its only caller - // is leftmost_find_at, and the inlining should remove the case analysis - // for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn leftmost_find_at_imp( + /// Returns an iterator of non-overlapping matches with this automaton + /// from the stream given. + /// + /// See + /// [`AhoCorasick::try_stream_find_iter`](crate::AhoCorasick::try_stream_find_iter) + /// for more documentation and examples. + #[cfg(feature = "std")] + fn try_stream_find_iter<'a, R: std::io::Read>( + &'a self, + rdr: R, + ) -> Result, MatchError> + where + Self: Sized, + { + Ok(StreamFindIter { it: StreamChunkIter::new(self, rdr)? }) + } + + /// Replaces all non-overlapping matches in `rdr` with strings from + /// `replace_with` depending on the pattern that matched, and writes the + /// result to `wtr`. The `replace_with` slice must have length equal to + /// `Automaton::patterns_len`. + /// + /// See + /// [`AhoCorasick::try_stream_replace_all`](crate::AhoCorasick::try_stream_replace_all) + /// for more documentation and examples. + #[cfg(feature = "std")] + fn try_stream_replace_all( &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - state_id: &mut Self::ID, - ) -> Option { - debug_assert!(self.match_kind().is_leftmost()); - if self.anchored() && at > 0 && *state_id == self.start_state() { - return None; - } - let mut last_match = self.get_match(*state_id, 0, at); - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && *state_id == self.start_state() - { - let c = prefilter::next(prestate, pre, haystack, at) - .into_option(); - match c { - None => return None, - Some(i) => { - at = i; - } - } + rdr: R, + wtr: W, + replace_with: &[B], + ) -> std::io::Result<()> + where + Self: Sized, + R: std::io::Read, + W: std::io::Write, + B: AsRef<[u8]>, + { + assert_eq!( + replace_with.len(), + self.patterns_len(), + "streaming replace_all requires a replacement for every pattern \ + in the automaton", + ); + self.try_stream_replace_all_with(rdr, wtr, |mat, _, wtr| { + wtr.write_all(replace_with[mat.pattern()].as_ref()) + }) + } + + /// Replaces all non-overlapping matches in `rdr` by calling the + /// `replace_with` closure given and writing the result to `wtr`. + /// + /// See + /// [`AhoCorasick::try_stream_replace_all_with`](crate::AhoCorasick::try_stream_replace_all_with) + /// for more documentation and examples. + #[cfg(feature = "std")] + fn try_stream_replace_all_with( + &self, + rdr: R, + mut wtr: W, + mut replace_with: F, + ) -> std::io::Result<()> + where + Self: Sized, + R: std::io::Read, + W: std::io::Write, + F: FnMut(&Match, &[u8], &mut W) -> std::io::Result<()>, + { + let mut it = StreamChunkIter::new(self, rdr).map_err(|e| { + let kind = std::io::ErrorKind::Other; + std::io::Error::new(kind, e) + })?; + while let Some(result) = it.next() { + let chunk = result?; + match chunk { + StreamChunk::NonMatch { bytes, .. } => { + wtr.write_all(bytes)?; } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - *state_id = self.next_state_no_fail(*state_id, haystack[at]); - at += 1; - if self.is_match_or_dead_state(*state_id) { - if *state_id == dead_id() { - // The only way to enter into a dead state is if a match - // has been found, so we assert as much. This is different - // from normal automata, where you might enter a dead state - // if you know a subsequent match will never be found - // (regardless of whether a match has already been found). - // For Aho-Corasick, it is built so that we can match at - // any position, so the possibility of a match always - // exists. - // - // (Unless we have an anchored automaton, in which case, - // dead states are used to stop a search.) - debug_assert!( - last_match.is_some() || self.anchored(), - "dead state should only be seen after match" - ); - return last_match; + StreamChunk::Match { bytes, mat } => { + replace_with(&mat, bytes, &mut wtr)?; } - last_match = self.get_match(*state_id, 0, at); } } - last_match - } - - /// This is like leftmost_find_at, but does not need to track a caller - /// provided state id. In other words, the only output of this routine is a - /// match, if one exists. - /// - /// It is regrettable that we need to effectively copy a chunk of - /// implementation twice, but when we don't need to track the state ID, we - /// can allow the prefilter to report matches immediately without having - /// to re-confirm them with the automaton. The re-confirmation step is - /// necessary in leftmost_find_at because tracing through the automaton is - /// the only way to correctly set the state ID. (Perhaps an alternative - /// would be to keep a map from pattern ID to matching state ID, but that - /// complicates the code and still doesn't permit us to defer to the - /// prefilter entirely when possible.) - /// - /// I did try a few things to avoid the code duplication here, but nothing - /// optimized as well as this approach. (In microbenchmarks, there was - /// about a 25% difference.) - #[inline(never)] - fn leftmost_find_at_no_state( + Ok(()) + } +} + +// SAFETY: This just defers to the underlying 'AcAutomaton' and thus inherits +// its safety properties. +unsafe impl<'a, A: Automaton + ?Sized> Automaton for &'a A { + #[inline(always)] + fn start_state(&self, anchored: Anchored) -> Result { + (**self).start_state(anchored) + } + + #[inline(always)] + fn next_state( &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, + anchored: Anchored, + sid: StateID, + byte: u8, + ) -> StateID { + (**self).next_state(anchored, sid, byte) + } + + #[inline(always)] + fn is_special(&self, sid: StateID) -> bool { + (**self).is_special(sid) + } + + #[inline(always)] + fn is_dead(&self, sid: StateID) -> bool { + (**self).is_dead(sid) + } + + #[inline(always)] + fn is_match(&self, sid: StateID) -> bool { + (**self).is_match(sid) + } + + #[inline(always)] + fn is_start(&self, sid: StateID) -> bool { + (**self).is_start(sid) + } + + #[inline(always)] + fn match_kind(&self) -> MatchKind { + (**self).match_kind() + } + + #[inline(always)] + fn match_len(&self, sid: StateID) -> usize { + (**self).match_len(sid) + } + + #[inline(always)] + fn match_pattern(&self, sid: StateID, index: usize) -> PatternID { + (**self).match_pattern(sid, index) + } + + #[inline(always)] + fn patterns_len(&self) -> usize { + (**self).patterns_len() + } + + #[inline(always)] + fn pattern_len(&self, pid: PatternID) -> usize { + (**self).pattern_len(pid) + } + + #[inline(always)] + fn min_pattern_len(&self) -> usize { + (**self).min_pattern_len() + } + + #[inline(always)] + fn max_pattern_len(&self) -> usize { + (**self).max_pattern_len() + } + + #[inline(always)] + fn memory_usage(&self) -> usize { + (**self).memory_usage() + } + + #[inline(always)] + fn prefilter(&self) -> Option<&Prefilter> { + (**self).prefilter() + } +} + +/// Represents the current state of an overlapping search. +/// +/// This is used for overlapping searches since they need to know something +/// about the previous search. For example, when multiple patterns match at the +/// same position, this state tracks the last reported pattern so that the next +/// search knows whether to report another matching pattern or continue with +/// the search at the next position. Additionally, it also tracks which state +/// the last search call terminated in and the current offset of the search +/// in the haystack. +/// +/// This type provides limited introspection capabilities. The only thing a +/// caller can do is construct it and pass it around to permit search routines +/// to use it to track state, and to ask whether a match has been found. +/// +/// Callers should always provide a fresh state constructed via +/// [`OverlappingState::start`] when starting a new search. That same state +/// should be reused for subsequent searches on the same `Input`. The state +/// given will advance through the haystack itself. Callers can detect the end +/// of a search when neither an error nor a match is returned. +/// +/// # Example +/// +/// This example shows how to manually iterate over all overlapping matches. If +/// you need this, you might consider using +/// [`AhoCorasick::find_overlapping_iter`](crate::AhoCorasick::find_overlapping_iter) +/// instead, but this shows how to correctly use an `OverlappingState`. +/// +/// ``` +/// use aho_corasick::{ +/// automaton::OverlappingState, +/// AhoCorasick, Input, Match, +/// }; +/// +/// let patterns = &["append", "appendage", "app"]; +/// let haystack = "append the app to the appendage"; +/// +/// let ac = AhoCorasick::new(patterns).unwrap(); +/// let mut state = OverlappingState::start(); +/// let mut matches = vec![]; +/// +/// loop { +/// ac.find_overlapping(haystack, &mut state); +/// let mat = match state.get_match() { +/// None => break, +/// Some(mat) => mat, +/// }; +/// matches.push(mat); +/// } +/// let expected = vec![ +/// Match::must(2, 0..3), +/// Match::must(0, 0..6), +/// Match::must(2, 11..14), +/// Match::must(2, 22..25), +/// Match::must(0, 22..28), +/// Match::must(1, 22..31), +/// ]; +/// assert_eq!(expected, matches); +/// ``` +#[derive(Clone, Debug)] +pub struct OverlappingState { + /// The match reported by the most recent overlapping search to use this + /// state. + /// + /// If a search does not find any matches, then it is expected to clear + /// this value. + mat: Option, + /// The state ID of the state at which the search was in when the call + /// terminated. When this is a match state, `last_match` must be set to a + /// non-None value. + /// + /// A `None` value indicates the start state of the corresponding + /// automaton. We cannot use the actual ID, since any one automaton may + /// have many start states, and which one is in use depends on search-time + /// factors (such as whether the search is anchored or not). + id: Option, + /// The position of the search. + /// + /// When `id` is None (i.e., we are starting a search), this is set to + /// the beginning of the search as given by the caller regardless of its + /// current value. Subsequent calls to an overlapping search pick up at + /// this offset. + at: usize, + /// The index into the matching patterns of the next match to report if the + /// current state is a match state. Note that this may be 1 greater than + /// the total number of matches to report for the current match state. (In + /// which case, no more matches should be reported at the current position + /// and the search should advance to the next position.) + next_match_index: Option, +} + +impl OverlappingState { + /// Create a new overlapping state that begins at the start state. + pub fn start() -> OverlappingState { + OverlappingState { mat: None, id: None, at: 0, next_match_index: None } + } + + /// Return the match result of the most recent search to execute with this + /// state. + /// + /// Every search will clear this result automatically, such that if no + /// match is found, this will always correctly report `None`. + pub fn get_match(&self) -> Option { + self.mat + } +} + +/// An iterator of non-overlapping matches in a particular haystack. +/// +/// This iterator yields matches according to the [`MatchKind`] used by this +/// automaton. +/// +/// This iterator is constructed via the [`Automaton::try_find_iter`] method. +/// +/// The type variable `A` refers to the implementation of the [`Automaton`] +/// trait used to execute the search. +/// +/// The lifetime `'a` refers to the lifetime of the [`Automaton`] +/// implementation. +/// +/// The lifetime `'h` refers to the lifetime of the haystack being searched. +#[derive(Debug)] +pub struct FindIter<'a, 'h, A> { + /// The automaton used to drive the search. + aut: &'a A, + /// The input parameters to give to each search call. + /// + /// The start position of the search is mutated during iteration. + input: Input<'h>, + /// Records the end offset of the most recent match. This is necessary to + /// handle a corner case for preventing empty matches from overlapping with + /// the ending bounds of a prior match. + last_match_end: Option, +} + +impl<'a, 'h, A: Automaton> FindIter<'a, 'h, A> { + /// Creates a new non-overlapping iterator. If the given automaton would + /// return an error on a search with the given input configuration, then + /// that error is returned here. + fn new( + aut: &'a A, + input: Input<'h>, + ) -> Result, MatchError> { + // The only way this search can fail is if we cannot retrieve the start + // state. e.g., Asking for an anchored search when only unanchored + // searches are supported. + let _ = aut.start_state(input.get_anchored())?; + Ok(FindIter { aut, input, last_match_end: None }) + } + + /// Executes a search and returns a match if one is found. + /// + /// This does not advance the input forward. It just executes a search + /// based on the current configuration/offsets. + fn search(&self) -> Option { + // The unwrap is OK here because we check at iterator construction time + // that no subsequent search call (using the same configuration) will + // ever return an error. + self.aut + .try_find(&self.input) + .expect("already checked that no match error can occur") + } + + /// Handles the special case of an empty match by ensuring that 1) the + /// iterator always advances and 2) empty matches never overlap with other + /// matches. + /// + /// (1) is necessary because we principally make progress by setting the + /// starting location of the next search to the ending location of the last + /// match. But if a match is empty, then this results in a search that does + /// not advance and thus does not terminate. + /// + /// (2) is not strictly necessary, but makes intuitive sense and matches + /// the presiding behavior of most general purpose regex engines. + /// (Obviously this crate isn't a regex engine, but we choose to match + /// their semantics.) The "intuitive sense" here is that we want to report + /// NON-overlapping matches. So for example, given the patterns 'a' and + /// '' (an empty string) against the haystack 'a', without the special + /// handling, you'd get the matches [0, 1) and [1, 1), where the latter + /// overlaps with the end bounds of the former. + /// + /// Note that we mark this cold and forcefully prevent inlining because + /// handling empty matches like this is extremely rare and does require + /// quite a bit of code, comparatively. Keeping this code out of the main + /// iterator function keeps it smaller and more amenable to inlining + /// itself. + #[cold] + #[inline(never)] + fn handle_overlapping_empty_match( + &mut self, + mut m: Match, ) -> Option { - if let Some(pre) = self.prefilter() { - self.leftmost_find_at_no_state_imp( - prestate, - Some(pre), - haystack, - at, - ) - } else { - self.leftmost_find_at_no_state_imp(prestate, None, haystack, at) + assert!(m.is_empty()); + if Some(m.end()) == self.last_match_end { + self.input.set_start(self.input.start().checked_add(1).unwrap()); + m = self.search()?; } + Some(m) } +} + +impl<'a, 'h, A: Automaton> Iterator for FindIter<'a, 'h, A> { + type Item = Match; - // It's important for this to always be inlined. Namely, its only caller - // is leftmost_find_at_no_state, and the inlining should remove the case - // analysis for prefilter scanning when there is no prefilter available. #[inline(always)] - fn leftmost_find_at_no_state_imp( - &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(self.match_kind().is_leftmost()); - if self.anchored() && at > 0 { - return None; + fn next(&mut self) -> Option { + let mut m = self.search()?; + if m.is_empty() { + m = self.handle_overlapping_empty_match(m)?; } - // If our prefilter handles confirmation of matches 100% of the - // time, and since we don't need to track state IDs, we can avoid - // Aho-Corasick completely. - if let Some(pre) = prefilter { - // We should never have a prefilter during an anchored search. - debug_assert!(!self.anchored()); - if !pre.reports_false_positives() { - return match pre.next_candidate(prestate, haystack, at) { - Candidate::None => None, - Candidate::Match(m) => Some(m), - Candidate::PossibleStartOfMatch(_) => unreachable!(), - }; + self.input.set_start(m.end()); + self.last_match_end = Some(m.end()); + Some(m) + } +} + +/// An iterator of overlapping matches in a particular haystack. +/// +/// This iterator will report all possible matches in a particular haystack, +/// even when the matches overlap. +/// +/// This iterator is constructed via the +/// [`Automaton::try_find_overlapping_iter`] method. +/// +/// The type variable `A` refers to the implementation of the [`Automaton`] +/// trait used to execute the search. +/// +/// The lifetime `'a` refers to the lifetime of the [`Automaton`] +/// implementation. +/// +/// The lifetime `'h` refers to the lifetime of the haystack being searched. +#[derive(Debug)] +pub struct FindOverlappingIter<'a, 'h, A> { + aut: &'a A, + input: Input<'h>, + state: OverlappingState, +} + +impl<'a, 'h, A: Automaton> Iterator for FindOverlappingIter<'a, 'h, A> { + type Item = Match; + + #[inline(always)] + fn next(&mut self) -> Option { + self.aut + .try_find_overlapping(&self.input, &mut self.state) + .expect("already checked that no match error can occur here"); + self.state.get_match() + } +} + +/// An iterator that reports matches in a stream. +/// +/// This iterator yields elements of type `io::Result`, where an error +/// is reported if there was a problem reading from the underlying stream. +/// The iterator terminates only when the underlying stream reaches `EOF`. +/// +/// This iterator is constructed via the [`Automaton::try_stream_find_iter`] +/// method. +/// +/// The type variable `A` refers to the implementation of the [`Automaton`] +/// trait used to execute the search. +/// +/// The type variable `R` refers to the `io::Read` stream that is being read +/// from. +/// +/// The lifetime `'a` refers to the lifetime of the [`Automaton`] +/// implementation. +#[cfg(feature = "std")] +#[derive(Debug)] +pub struct StreamFindIter<'a, A, R> { + it: StreamChunkIter<'a, A, R>, +} + +#[cfg(feature = "std")] +impl<'a, A: Automaton, R: std::io::Read> Iterator + for StreamFindIter<'a, A, R> +{ + type Item = std::io::Result; + + fn next(&mut self) -> Option> { + loop { + match self.it.next() { + None => return None, + Some(Err(err)) => return Some(Err(err)), + Some(Ok(StreamChunk::NonMatch { .. })) => {} + Some(Ok(StreamChunk::Match { mat, .. })) => { + return Some(Ok(mat)); + } } } + } +} - let mut state_id = self.start_state(); - let mut last_match = self.get_match(state_id, 0, at); - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && state_id == self.start_state() - { - match prefilter::next(prestate, pre, haystack, at) { - Candidate::None => return None, - // Since we aren't tracking a state ID, we can - // quit early once we know we have a match. - Candidate::Match(m) => return Some(m), - Candidate::PossibleStartOfMatch(i) => { - at = i; +/// An iterator that reports matches in a stream. +/// +/// (This doesn't actually implement the `Iterator` trait because it returns +/// something with a lifetime attached to a buffer it owns, but that's OK. It +/// still has a `next` method and is iterator-like enough to be fine.) +/// +/// This iterator yields elements of type `io::Result`, where +/// an error is reported if there was a problem reading from the underlying +/// stream. The iterator terminates only when the underlying stream reaches +/// `EOF`. +/// +/// The idea here is that each chunk represents either a match or a non-match, +/// and if you concatenated all of the chunks together, you'd reproduce the +/// entire contents of the stream, byte-for-byte. +/// +/// This chunk machinery is a bit complicated and it isn't strictly required +/// for a stream searcher that just reports matches. But we do need something +/// like this to deal with the "replacement" API, which needs to know which +/// chunks it can copy and which it needs to replace. +#[cfg(feature = "std")] +#[derive(Debug)] +struct StreamChunkIter<'a, A, R> { + /// The underlying automaton to do the search. + aut: &'a A, + /// The source of bytes we read from. + rdr: R, + /// A roll buffer for managing bytes from `rdr`. Basically, this is used + /// to handle the case of a match that is split by two different + /// calls to `rdr.read()`. This isn't strictly needed if all we needed to + /// do was report matches, but here we are reporting chunks of non-matches + /// and matches and in order to do that, we really just cannot treat our + /// stream as non-overlapping blocks of bytes. We need to permit some + /// overlap while we retain bytes from a previous `read` call in memory. + buf: crate::util::buffer::Buffer, + /// The unanchored starting state of this automaton. + start: StateID, + /// The state of the automaton. + sid: StateID, + /// The absolute position over the entire stream. + absolute_pos: usize, + /// The position we're currently at within `buf`. + buffer_pos: usize, + /// The buffer position of the end of the bytes that we last returned + /// to the caller. Basically, whenever we find a match, we look to see if + /// there is a difference between where the match started and the position + /// of the last byte we returned to the caller. If there's a difference, + /// then we need to return a 'NonMatch' chunk. + buffer_reported_pos: usize, +} + +#[cfg(feature = "std")] +impl<'a, A: Automaton, R: std::io::Read> StreamChunkIter<'a, A, R> { + fn new( + aut: &'a A, + rdr: R, + ) -> Result, MatchError> { + // This restriction is a carry-over from older versions of this crate. + // I didn't have the bandwidth to think through how to handle, say, + // leftmost-first or leftmost-longest matching, but... it should be + // possible? The main problem is that once you see a match state in + // leftmost-first semantics, you can't just stop at that point and + // report a match. You have to keep going until you either hit a dead + // state or EOF. So how do you know when you'll hit a dead state? Well, + // you don't. With Aho-Corasick, I believe you can put a bound on it + // and say, "once a match has been seen, you'll need to scan forward at + // most N bytes" where N=aut.max_pattern_len(). + // + // Which is fine, but it does mean that state about whether we're still + // looking for a dead state or not needs to persist across buffer + // refills. Which this code doesn't really handle. It does preserve + // *some* state across buffer refills, basically ensuring that a match + // span is always in memory. + if !aut.match_kind().is_standard() { + return Err(MatchError::unsupported_stream(aut.match_kind())); + } + // This is kind of a cop-out, but empty matches are SUPER annoying. + // If we know they can't happen (which is what we enforce here), then + // it makes a lot of logic much simpler. With that said, I'm open to + // supporting this case, but we need to define proper semantics for it + // first. It wasn't totally clear to me what it should do at the time + // of writing, so I decided to just be conservative. + // + // It also seems like a very weird case to support anyway. Why search a + // stream if you're just going to get a match at every position? + // + // ¯\_(ツ)_/¯ + if aut.min_pattern_len() == 0 { + return Err(MatchError::unsupported_empty()); + } + let start = aut.start_state(Anchored::No)?; + Ok(StreamChunkIter { + aut, + rdr, + buf: crate::util::buffer::Buffer::new(aut.max_pattern_len()), + start, + sid: start, + absolute_pos: 0, + buffer_pos: 0, + buffer_reported_pos: 0, + }) + } + + fn next(&mut self) -> Option> { + // This code is pretty gnarly. It IS simpler than the equivalent code + // in the previous aho-corasick release, in part because we inline + // automaton traversal here and also in part because we have abdicated + // support for automatons that contain an empty pattern. + // + // I suspect this code could be made a bit simpler by designing a + // better buffer abstraction. + // + // But in general, this code is basically write-only. So you'll need + // to go through it step-by-step to grok it. One of the key bits of + // complexity is tracking a few different offsets. 'buffer_pos' is + // where we are in the buffer for search. 'buffer_reported_pos' is the + // position immediately following the last byte in the buffer that + // we've returned to the caller. And 'absolute_pos' is the overall + // current absolute position of the search in the entire stream, and + // this is what match spans are reported in terms of. + loop { + if self.aut.is_match(self.sid) { + let mat = self.get_match(); + if let Some(r) = self.get_non_match_chunk(mat) { + self.buffer_reported_pos += r.len(); + let bytes = &self.buf.buffer()[r]; + return Some(Ok(StreamChunk::NonMatch { bytes })); + } + self.sid = self.start; + let r = self.get_match_chunk(mat); + self.buffer_reported_pos += r.len(); + let bytes = &self.buf.buffer()[r]; + return Some(Ok(StreamChunk::Match { bytes, mat })); + } + if self.buffer_pos >= self.buf.buffer().len() { + if let Some(r) = self.get_pre_roll_non_match_chunk() { + self.buffer_reported_pos += r.len(); + let bytes = &self.buf.buffer()[r]; + return Some(Ok(StreamChunk::NonMatch { bytes })); + } + if self.buf.buffer().len() >= self.buf.min_buffer_len() { + self.buffer_pos = self.buf.min_buffer_len(); + self.buffer_reported_pos -= + self.buf.buffer().len() - self.buf.min_buffer_len(); + self.buf.roll(); + } + match self.buf.fill(&mut self.rdr) { + Err(err) => return Some(Err(err)), + Ok(true) => {} + Ok(false) => { + // We've hit EOF, but if there are still some + // unreported bytes remaining, return them now. + if let Some(r) = self.get_eof_non_match_chunk() { + self.buffer_reported_pos += r.len(); + let bytes = &self.buf.buffer()[r]; + return Some(Ok(StreamChunk::NonMatch { bytes })); } + // We've reported everything! + return None; } } } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - state_id = self.next_state_no_fail(state_id, haystack[at]); - at += 1; - if self.is_match_or_dead_state(state_id) { - if state_id == dead_id() { - // The only way to enter into a dead state is if a - // match has been found, so we assert as much. This - // is different from normal automata, where you might - // enter a dead state if you know a subsequent match - // will never be found (regardless of whether a match - // has already been found). For Aho-Corasick, it is - // built so that we can match at any position, so the - // possibility of a match always exists. - // - // (Unless we have an anchored automaton, in which - // case, dead states are used to stop a search.) - debug_assert!( - last_match.is_some() || self.anchored(), - "dead state should only be seen after match" - ); - return last_match; + let start = self.absolute_pos; + for &byte in self.buf.buffer()[self.buffer_pos..].iter() { + self.sid = self.aut.next_state(Anchored::No, self.sid, byte); + self.absolute_pos += 1; + if self.aut.is_match(self.sid) { + break; } - last_match = self.get_match(state_id, 0, at); } + self.buffer_pos += self.absolute_pos - start; } - last_match } - /// Execute an overlapping search. - /// - /// When executing an overlapping match, the previous state ID in addition - /// to the previous match index should be given. If there are more matches - /// at the given state, then the match is reported and the given index is - /// incremented. - #[inline(always)] - fn overlapping_find_at( + /// Return a match chunk for the given match. It is assumed that the match + /// ends at the current `buffer_pos`. + fn get_match_chunk(&self, mat: Match) -> core::ops::Range { + let start = self.buffer_pos - mat.len(); + let end = self.buffer_pos; + start..end + } + + /// Return a non-match chunk, if necessary, just before reporting a match. + /// This returns `None` if there is nothing to report. Otherwise, this + /// assumes that the given match ends at the current `buffer_pos`. + fn get_non_match_chunk( &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - match_index: &mut usize, - ) -> Option { - if self.anchored() && at > 0 && *state_id == self.start_state() { - return None; + mat: Match, + ) -> Option> { + let buffer_mat_start = self.buffer_pos - mat.len(); + if buffer_mat_start > self.buffer_reported_pos { + let start = self.buffer_reported_pos; + let end = buffer_mat_start; + return Some(start..end); } + None + } - let match_count = self.match_count(*state_id); - if *match_index < match_count { - // This is guaranteed to return a match since - // match_index < match_count. - let result = self.get_match(*state_id, *match_index, at); - debug_assert!(result.is_some(), "must be a match"); - *match_index += 1; - return result; + /// Look for any bytes that should be reported as a non-match just before + /// rolling the buffer. + /// + /// Note that this only reports bytes up to `buffer.len() - + /// min_buffer_len`, as it's not possible to know whether the bytes + /// following that will participate in a match or not. + fn get_pre_roll_non_match_chunk(&self) -> Option> { + let end = + self.buf.buffer().len().saturating_sub(self.buf.min_buffer_len()); + if self.buffer_reported_pos < end { + return Some(self.buffer_reported_pos..end); } + None + } - *match_index = 0; - match self.standard_find_at(prestate, haystack, at, state_id) { - None => None, - Some(m) => { - *match_index = 1; - Some(m) - } + /// Return any unreported bytes as a non-match up to the end of the buffer. + /// + /// This should only be called when the entire contents of the buffer have + /// been searched and EOF has been hit when trying to fill the buffer. + fn get_eof_non_match_chunk(&self) -> Option> { + if self.buffer_reported_pos < self.buf.buffer().len() { + return Some(self.buffer_reported_pos..self.buf.buffer().len()); } + None } - /// Return the earliest match found. This returns as soon as we know that - /// we have a match. As such, this does not necessarily correspond to the - /// leftmost starting match, but rather, the leftmost position at which a - /// match ends. - #[inline(always)] - fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if *state_id == self.start_state() { - if self.anchored() && at > 0 { - return None; - } - if let Some(m) = self.get_match(*state_id, 0, at) { - return Some(m); - } + /// Return the match at the current position for the current state. + /// + /// This panics if `self.aut.is_match(self.sid)` isn't true. + fn get_match(&self) -> Match { + get_match(self.aut, self.sid, 0, self.absolute_pos) + } +} + +/// A single chunk yielded by the stream chunk iterator. +/// +/// The `'r` lifetime refers to the lifetime of the stream chunk iterator. +#[cfg(feature = "std")] +#[derive(Debug)] +enum StreamChunk<'r> { + /// A chunk that does not contain any matches. + NonMatch { bytes: &'r [u8] }, + /// A chunk that precisely contains a match. + Match { bytes: &'r [u8], mat: Match }, +} + +#[inline(never)] +pub(crate) fn try_find_fwd( + aut: &A, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + let earliest = aut.match_kind().is_standard() || input.get_earliest(); + if input.get_anchored().is_anchored() { + try_find_fwd_imp(aut, input, None, Anchored::Yes, earliest) + } else if let Some(pre) = aut.prefilter() { + if earliest { + try_find_fwd_imp(aut, input, Some(pre), Anchored::No, true) + } else { + try_find_fwd_imp(aut, input, Some(pre), Anchored::No, false) + } + } else { + if earliest { + try_find_fwd_imp(aut, input, None, Anchored::No, true) + } else { + try_find_fwd_imp(aut, input, None, Anchored::No, false) } - self.standard_find_at(prestate, haystack, at, state_id) } +} - /// A convenience function for finding the next match according to the - /// match semantics of this automaton. For standard match semantics, this - /// finds the earliest match. Otherwise, the leftmost match is found. - #[inline(always)] - fn find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - match *self.match_kind() { - MatchKind::Standard => { - self.earliest_find_at(prestate, haystack, at, state_id) +#[inline(always)] +fn try_find_fwd_imp( + aut: &A, + input: &Input<'_>, + pre: Option<&Prefilter>, + anchored: Anchored, + earliest: bool, +) -> Result, MatchError> { + let mut sid = aut.start_state(input.get_anchored())?; + let mut at = input.start(); + let mut mat = None; + if aut.is_match(sid) { + mat = Some(get_match(aut, sid, 0, at)); + if earliest { + return Ok(mat); + } + } + if let Some(pre) = pre { + match pre.find_in(input.haystack(), input.get_span()) { + Candidate::None => return Ok(None), + Candidate::Match(m) => return Ok(Some(m)), + Candidate::PossibleStartOfMatch(i) => { + at = i; } - MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => { - self.leftmost_find_at(prestate, haystack, at, state_id) + } + } + while at < input.end() { + // I've tried unrolling this loop and eliding bounds checks, but no + // matter what I did, I could not observe a consistent improvement on + // any benchmark I could devise. (If someone wants to re-litigate this, + // the way to do it is to add an 'next_state_unchecked' method to the + // 'Automaton' trait with a default impl that uses 'next_state'. Then + // use 'aut.next_state_unchecked' here and implement it on DFA using + // unchecked slice index acces.) + sid = aut.next_state(anchored, sid, input.haystack()[at]); + if aut.is_special(sid) { + if aut.is_dead(sid) { + return Ok(mat); + } else if aut.is_match(sid) { + // We use 'at + 1' here because the match state is entered + // at the last byte of the pattern. Since we use half-open + // intervals, the end of the range of the match is one past the + // last byte. + let m = get_match(aut, sid, 0, at + 1); + // For the automata in this crate, we make a size trade off + // where we reuse the same automaton for both anchored and + // unanchored searches. We achieve this, principally, by simply + // not following failure transitions while computing the next + // state. Instead, if we fail to find the next state, we return + // a dead state, which instructs the search to stop. (This + // is why 'next_state' needs to know whether the search is + // anchored or not.) In addition, we have different start + // states for anchored and unanchored searches. The latter has + // a self-loop where as the former does not. + // + // In this way, we can use the same trie to execute both + // anchored and unanchored searches. There is a catch though. + // When building an Aho-Corasick automaton for unanchored + // searches, we copy matches from match states to other states + // (which would otherwise not be match states) if they are + // reachable via a failure transition. In the case of an + // anchored search, we *specifically* do not want to report + // these matches because they represent matches that start past + // the beginning of the search. + // + // Now we could tweak the automaton somehow to differentiate + // anchored from unanchored match states, but this would make + // 'aut.is_match' and potentially 'aut.is_special' slower. And + // also make the automaton itself more complex. + // + // Instead, we insert a special hack: if the search is + // anchored, we simply ignore matches that don't begin at + // the start of the search. This is not quite ideal, but we + // do specialize this function in such a way that unanchored + // searches don't pay for this additional branch. While this + // might cause a search to continue on for more than it + // otherwise optimally would, it will be no more than the + // longest pattern in the automaton. The reason for this is + // that we ensure we don't follow failure transitions during + // an anchored search. Combined with using a different anchored + // starting state with no self-loop, we guarantee that we'll + // at worst move through a number of transitions equal to the + // longest pattern. + // + // Now for DFAs, the whole point of them is to eliminate + // failure transitions entirely. So there is no way to say "if + // it's an anchored search don't follow failure transitions." + // Instead, we actually have to build two entirely separate + // automatons into the transition table. One with failure + // transitions built into it and another that is effectively + // just an encoding of the base trie into a transition table. + // DFAs still need this check though, because the match states + // still carry matches only reachable via a failure transition. + // Why? Because removing them seems difficult, although I + // haven't given it a lot of thought. + if !(anchored.is_anchored() && m.start() > input.start()) { + mat = Some(m); + if earliest { + return Ok(mat); + } + } + } else if let Some(pre) = pre { + // If we're here, we know it's a special state that is not a + // dead or a match state AND that a prefilter is active. Thus, + // it must be a start state. + debug_assert!(aut.is_start(sid)); + // We don't care about 'Candidate::Match' here because if such + // a match were possible, it would have been returned above + // when we run the prefilter before walking the automaton. + let span = Span::from(at..input.end()); + match pre.find_in(input.haystack(), span).into_option() { + None => return Ok(None), + Some(i) => { + if i > at { + at = i; + continue; + } + } + } + } else { + // When pre.is_none(), then starting states should not be + // treated as special. That is, without a prefilter, is_special + // should only return true when the state is a dead or a match + // state. + // + // It is possible to execute a search without a prefilter even + // when the underlying searcher has one: an anchored search. + // But in this case, the automaton makes it impossible to move + // back to the start state by construction, and thus, we should + // never reach this branch. + debug_assert!(false, "unreachable"); } - MatchKind::__Nonexhaustive => unreachable!(), } + at += 1; } + Ok(mat) +} - /// Like find_at, but does not track state identifiers. This permits some - /// optimizations when a prefilter that confirms its own matches is - /// present. - #[inline(always)] - fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self.match_kind() { - MatchKind::Standard => { - let mut state = self.start_state(); - self.earliest_find_at(prestate, haystack, at, &mut state) +#[inline(never)] +fn try_find_overlapping_fwd( + aut: &A, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + // Searching with a pattern ID is always anchored, so we should only ever + // use a prefilter when no pattern ID is given. + if aut.prefilter().is_some() && !input.get_anchored().is_anchored() { + let pre = aut.prefilter().unwrap(); + try_find_overlapping_fwd_imp(aut, input, Some(pre), state) + } else { + try_find_overlapping_fwd_imp(aut, input, None, state) + } +} + +#[inline(always)] +fn try_find_overlapping_fwd_imp( + aut: &A, + input: &Input<'_>, + pre: Option<&Prefilter>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + let mut sid = match state.id { + None => { + let sid = aut.start_state(input.get_anchored())?; + // Handle the case where the start state is a match state. That is, + // the empty string is in our automaton. We report every match we + // can here before moving on and updating 'state.at' and 'state.id' + // to find more matches in other parts of the haystack. + if aut.is_match(sid) { + let i = state.next_match_index.unwrap_or(0); + let len = aut.match_len(sid); + if i < len { + state.next_match_index = Some(i + 1); + state.mat = Some(get_match(aut, sid, i, input.start())); + return Ok(()); + } } - MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => { - self.leftmost_find_at_no_state(prestate, haystack, at) + state.at = input.start(); + state.id = Some(sid); + state.next_match_index = None; + state.mat = None; + sid + } + Some(sid) => { + // If we still have matches left to report in this state then + // report them until we've exhausted them. Only after that do we + // advance to the next offset in the haystack. + if let Some(i) = state.next_match_index { + let len = aut.match_len(sid); + if i < len { + state.next_match_index = Some(i + 1); + state.mat = Some(get_match(aut, sid, i, state.at + 1)); + return Ok(()); + } + // Once we've reported all matches at a given position, we need + // to advance the search to the next position. + state.at += 1; + state.next_match_index = None; + state.mat = None; } - MatchKind::__Nonexhaustive => unreachable!(), + sid } + }; + while state.at < input.end() { + sid = aut.next_state( + input.get_anchored(), + sid, + input.haystack()[state.at], + ); + if aut.is_special(sid) { + state.id = Some(sid); + if aut.is_dead(sid) { + return Ok(()); + } else if aut.is_match(sid) { + state.next_match_index = Some(1); + state.mat = Some(get_match(aut, sid, 0, state.at + 1)); + return Ok(()); + } else if let Some(pre) = pre { + // If we're here, we know it's a special state that is not a + // dead or a match state AND that a prefilter is active. Thus, + // it must be a start state. + debug_assert!(aut.is_start(sid)); + let span = Span::from(state.at..input.end()); + match pre.find_in(input.haystack(), span).into_option() { + None => return Ok(()), + Some(i) => { + if i > state.at { + state.at = i; + continue; + } + } + } + } else { + // When pre.is_none(), then starting states should not be + // treated as special. That is, without a prefilter, is_special + // should only return true when the state is a dead or a match + // state. + // + // ... except for one special case: in stream searching, we + // currently call overlapping search with a 'None' prefilter, + // regardless of whether one exists or not, because stream + // searching can't currently deal with prefilters correctly in + // all cases. + } + } + state.at += 1; + } + state.id = Some(sid); + Ok(()) +} + +#[inline(always)] +fn get_match( + aut: &A, + sid: StateID, + index: usize, + at: usize, +) -> Match { + let pid = aut.match_pattern(sid, index); + let len = aut.pattern_len(pid); + Match::new(pid, (at - len)..at) +} + +/// Write a prefix "state" indicator for fmt::Debug impls. It always writes +/// exactly two printable bytes to the given formatter. +/// +/// Specifically, this tries to succinctly distinguish the different types of +/// states: dead states, start states and match states. It even accounts for +/// the possible overlappings of different state types. (The only possible +/// overlapping is that of match and start states.) +pub(crate) fn fmt_state_indicator( + f: &mut core::fmt::Formatter<'_>, + aut: A, + id: StateID, +) -> core::fmt::Result { + if aut.is_dead(id) { + write!(f, "D ")?; + } else if aut.is_match(id) { + if aut.is_start(id) { + write!(f, "*>")?; + } else { + write!(f, "* ")?; + } + } else if aut.is_start(id) { + write!(f, " >")?; + } else { + write!(f, " ")?; } + Ok(()) +} + +/// Return an iterator of transitions in a sparse format given an iterator +/// of all explicitly defined transitions. The iterator yields ranges of +/// transitions, such that any adjacent transitions mapped to the same +/// state are combined into a single range. +pub(crate) fn sparse_transitions<'a>( + mut it: impl Iterator + 'a, +) -> impl Iterator + 'a { + let mut cur: Option<(u8, u8, StateID)> = None; + core::iter::from_fn(move || { + while let Some((class, next)) = it.next() { + let (prev_start, prev_end, prev_next) = match cur { + Some(x) => x, + None => { + cur = Some((class, class, next)); + continue; + } + }; + if prev_next == next { + cur = Some((prev_start, class, prev_next)); + } else { + cur = Some((class, class, next)); + return Some((prev_start, prev_end, prev_next)); + } + } + if let Some((start, end, next)) = cur.take() { + return Some((start, end, next)); + } + None + }) } diff --git a/vendor/aho-corasick/src/classes.rs b/vendor/aho-corasick/src/classes.rs deleted file mode 100644 index f84ae21..0000000 --- a/vendor/aho-corasick/src/classes.rs +++ /dev/null @@ -1,238 +0,0 @@ -use std::fmt; - -/// A representation of byte oriented equivalence classes. -/// -/// This is used in an FSM to reduce the size of the transition table. This can -/// have a particularly large impact not only on the total size of an FSM, but -/// also on compile times. -#[derive(Clone, Copy)] -pub struct ByteClasses([u8; 256]); - -impl ByteClasses { - /// Creates a new set of equivalence classes where all bytes are mapped to - /// the same class. - pub fn empty() -> ByteClasses { - ByteClasses([0; 256]) - } - - /// Creates a new set of equivalence classes where each byte belongs to - /// its own equivalence class. - pub fn singletons() -> ByteClasses { - let mut classes = ByteClasses::empty(); - for i in 0..256 { - classes.set(i as u8, i as u8); - } - classes - } - - /// Set the equivalence class for the given byte. - #[inline] - pub fn set(&mut self, byte: u8, class: u8) { - self.0[byte as usize] = class; - } - - /// Get the equivalence class for the given byte. - #[inline] - pub fn get(&self, byte: u8) -> u8 { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - self.0[byte as usize] - } - - /// Return the total number of elements in the alphabet represented by - /// these equivalence classes. Equivalently, this returns the total number - /// of equivalence classes. - #[inline] - pub fn alphabet_len(&self) -> usize { - self.0[255] as usize + 1 - } - - /// Returns true if and only if every byte in this class maps to its own - /// equivalence class. Equivalently, there are 256 equivalence classes - /// and each class contains exactly one byte. - #[inline] - pub fn is_singleton(&self) -> bool { - self.alphabet_len() == 256 - } - - /// Returns an iterator over a sequence of representative bytes from each - /// equivalence class. Namely, this yields exactly N items, where N is - /// equivalent to the number of equivalence classes. Each item is an - /// arbitrary byte drawn from each equivalence class. - /// - /// This is useful when one is determinizing an NFA and the NFA's alphabet - /// hasn't been converted to equivalence classes yet. Picking an arbitrary - /// byte from each equivalence class then permits a full exploration of - /// the NFA instead of using every possible byte value. - pub fn representatives(&self) -> ByteClassRepresentatives<'_> { - ByteClassRepresentatives { classes: self, byte: 0, last_class: None } - } - - /// Returns all of the bytes in the given equivalence class. - /// - /// The second element in the tuple indicates the number of elements in - /// the array. - fn elements(&self, equiv: u8) -> ([u8; 256], usize) { - let (mut array, mut len) = ([0; 256], 0); - for b in 0..256 { - if self.get(b as u8) == equiv { - array[len] = b as u8; - len += 1; - } - } - (array, len) - } -} - -impl fmt::Debug for ByteClasses { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_singleton() { - write!(f, "ByteClasses({{singletons}})") - } else { - write!(f, "ByteClasses(")?; - for equiv in 0..self.alphabet_len() { - let (members, len) = self.elements(equiv as u8); - write!(f, " {} => {:?}", equiv, &members[..len])?; - } - write!(f, ")") - } - } -} - -/// An iterator over representative bytes from each equivalence class. -#[derive(Debug)] -pub struct ByteClassRepresentatives<'a> { - classes: &'a ByteClasses, - byte: usize, - last_class: Option, -} - -impl<'a> Iterator for ByteClassRepresentatives<'a> { - type Item = u8; - - fn next(&mut self) -> Option { - while self.byte < 256 { - let byte = self.byte as u8; - let class = self.classes.get(byte); - self.byte += 1; - - if self.last_class != Some(class) { - self.last_class = Some(class); - return Some(byte); - } - } - None - } -} - -/// A byte class builder keeps track of an *approximation* of equivalence -/// classes of bytes during NFA construction. That is, every byte in an -/// equivalence class cannot discriminate between a match and a non-match. -/// -/// For example, in the literals `abc` and `xyz`, the bytes [\x00-`], [d-w] -/// and [{-\xFF] never discriminate between a match and a non-match, precisely -/// because they never occur in the literals anywhere. -/// -/// Note though that this does not necessarily compute the minimal set of -/// equivalence classes. For example, in the literals above, the byte ranges -/// [\x00-`], [d-w] and [{-\xFF] are all treated as distinct equivalence -/// classes even though they could be treated a single class. The reason for -/// this is implementation complexity. In the future, we should endeavor to -/// compute the minimal equivalence classes since they can have a rather large -/// impact on the size of the DFA. -/// -/// The representation here is 256 booleans, all initially set to false. Each -/// boolean maps to its corresponding byte based on position. A `true` value -/// indicates the end of an equivalence class, where its corresponding byte -/// and all of the bytes corresponding to all previous contiguous `false` -/// values are in the same equivalence class. -/// -/// This particular representation only permits contiguous ranges of bytes to -/// be in the same equivalence class, which means that we can never discover -/// the true minimal set of equivalence classes. -#[derive(Debug)] -pub struct ByteClassBuilder(Vec); - -impl ByteClassBuilder { - /// Create a new builder of byte classes where all bytes are part of the - /// same equivalence class. - pub fn new() -> ByteClassBuilder { - ByteClassBuilder(vec![false; 256]) - } - - /// Indicate the the range of byte given (inclusive) can discriminate a - /// match between it and all other bytes outside of the range. - pub fn set_range(&mut self, start: u8, end: u8) { - debug_assert!(start <= end); - if start > 0 { - self.0[start as usize - 1] = true; - } - self.0[end as usize] = true; - } - - /// Build byte classes that map all byte values to their corresponding - /// equivalence class. The last mapping indicates the largest equivalence - /// class identifier (which is never bigger than 255). - pub fn build(&self) -> ByteClasses { - let mut classes = ByteClasses::empty(); - let mut class = 0u8; - let mut i = 0; - loop { - classes.set(i as u8, class as u8); - if i >= 255 { - break; - } - if self.0[i] { - class = class.checked_add(1).unwrap(); - } - i += 1; - } - classes - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn byte_classes() { - let mut set = ByteClassBuilder::new(); - set.set_range(b'a', b'z'); - - let classes = set.build(); - assert_eq!(classes.get(0), 0); - assert_eq!(classes.get(1), 0); - assert_eq!(classes.get(2), 0); - assert_eq!(classes.get(b'a' - 1), 0); - assert_eq!(classes.get(b'a'), 1); - assert_eq!(classes.get(b'm'), 1); - assert_eq!(classes.get(b'z'), 1); - assert_eq!(classes.get(b'z' + 1), 2); - assert_eq!(classes.get(254), 2); - assert_eq!(classes.get(255), 2); - - let mut set = ByteClassBuilder::new(); - set.set_range(0, 2); - set.set_range(4, 6); - let classes = set.build(); - assert_eq!(classes.get(0), 0); - assert_eq!(classes.get(1), 0); - assert_eq!(classes.get(2), 0); - assert_eq!(classes.get(3), 1); - assert_eq!(classes.get(4), 2); - assert_eq!(classes.get(5), 2); - assert_eq!(classes.get(6), 2); - assert_eq!(classes.get(7), 3); - assert_eq!(classes.get(255), 3); - } - - #[test] - fn full_byte_classes() { - let mut set = ByteClassBuilder::new(); - for i in 0..256u16 { - set.set_range(i as u8, i as u8); - } - assert_eq!(set.build().alphabet_len(), 256); - } -} diff --git a/vendor/aho-corasick/src/dfa.rs b/vendor/aho-corasick/src/dfa.rs index a03a254..eabd15b 100644 --- a/vendor/aho-corasick/src/dfa.rs +++ b/vendor/aho-corasick/src/dfa.rs @@ -1,713 +1,835 @@ -use std::mem::size_of; - -use crate::ahocorasick::MatchKind; -use crate::automaton::Automaton; -use crate::classes::ByteClasses; -use crate::error::Result; -use crate::nfa::{PatternID, PatternLength, NFA}; -use crate::prefilter::{Prefilter, PrefilterObj, PrefilterState}; -use crate::state_id::{dead_id, fail_id, premultiply_overflow_error, StateID}; -use crate::Match; - -#[derive(Clone, Debug)] -pub enum DFA { - Standard(Standard), - ByteClass(ByteClass), - Premultiplied(Premultiplied), - PremultipliedByteClass(PremultipliedByteClass), +/*! +Provides direct access to a DFA implementation of Aho-Corasick. + +This is a low-level API that generally only needs to be used in niche +circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) +instead of a DFA directly. Using an `DFA` directly is typically only necessary +when one needs access to the [`Automaton`] trait implementation. +*/ + +use alloc::{vec, vec::Vec}; + +use crate::{ + automaton::Automaton, + nfa::noncontiguous, + util::{ + alphabet::ByteClasses, + error::{BuildError, MatchError}, + int::{Usize, U32}, + prefilter::Prefilter, + primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID}, + search::{Anchored, MatchKind, StartKind}, + special::Special, + }, +}; + +/// A DFA implementation of Aho-Corasick. +/// +/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of +/// this type directly. Using a `DFA` directly is typically only necessary when +/// one needs access to the [`Automaton`] trait implementation. +/// +/// This DFA can only be built by first constructing a [`noncontiguous::NFA`]. +/// Both [`DFA::new`] and [`Builder::build`] do this for you automatically, but +/// [`Builder::build_from_noncontiguous`] permits doing it explicitly. +/// +/// A DFA provides the best possible search performance (in this crate) via two +/// mechanisms: +/// +/// * All states use a dense representation for their transitions. +/// * All failure transitions are pre-computed such that they are never +/// explicitly handled at search time. +/// +/// These two facts combined mean that every state transition is performed +/// using a constant number of instructions. However, this comes at +/// great cost. The memory usage of a DFA can be quite exorbitant. +/// It is potentially multiple orders of magnitude greater than a +/// [`contiguous::NFA`](crate::nfa::contiguous::NFA) for example. In exchange, +/// a DFA will typically have better search speed than a `contiguous::NFA`, but +/// not by orders of magnitude. +/// +/// Unless you have a small number of patterns or memory usage is not a concern +/// and search performance is critical, a DFA is usually not the best choice. +/// +/// Moreover, unlike the NFAs in this crate, it is costly for a DFA to +/// support for anchored and unanchored search configurations. Namely, +/// since failure transitions are pre-computed, supporting both anchored +/// and unanchored searches requires a duplication of the transition table, +/// making the memory usage of such a DFA ever bigger. (The NFAs in this crate +/// unconditionally support both anchored and unanchored searches because there +/// is essentially no added cost for doing so.) It is for this reason that +/// a DFA's support for anchored and unanchored searches can be configured +/// via [`Builder::start_kind`]. By default, a DFA only supports unanchored +/// searches. +/// +/// # Example +/// +/// This example shows how to build an `DFA` directly and use it to execute +/// [`Automaton::try_find`]: +/// +/// ``` +/// use aho_corasick::{ +/// automaton::Automaton, +/// dfa::DFA, +/// Input, Match, +/// }; +/// +/// let patterns = &["b", "abc", "abcd"]; +/// let haystack = "abcd"; +/// +/// let nfa = DFA::new(patterns).unwrap(); +/// assert_eq!( +/// Some(Match::must(0, 1..2)), +/// nfa.try_find(&Input::new(haystack))?, +/// ); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// It is also possible to implement your own version of `try_find`. See the +/// [`Automaton`] documentation for an example. +#[derive(Clone)] +pub struct DFA { + /// The DFA transition table. IDs in this table are pre-multiplied. So + /// instead of the IDs being 0, 1, 2, 3, ..., they are 0*stride, 1*stride, + /// 2*stride, 3*stride, ... + trans: Vec, + /// The matches for every match state in this DFA. This is first indexed by + /// state index (so that's `sid >> stride2`) and then by order in which the + /// matches are meant to occur. + matches: Vec>, + /// The amount of heap memory used, in bytes, by the inner Vecs of + /// 'matches'. + matches_memory_usage: usize, + /// The length of each pattern. This is used to compute the start offset + /// of a match. + pattern_lens: Vec, + /// A prefilter for accelerating searches, if one exists. + prefilter: Option, + /// The match semantics built into this DFA. + match_kind: MatchKind, + /// The total number of states in this DFA. + state_len: usize, + /// The alphabet size, or total number of equivalence classes, for this + /// DFA. Note that the actual number of transitions in each state is + /// stride=2^stride2, where stride is the smallest power of 2 greater than + /// or equal to alphabet_len. We do things this way so that we can use + /// bitshifting to go from a state ID to an index into 'matches'. + alphabet_len: usize, + /// The exponent with a base 2, such that stride=2^stride2. Given a state + /// index 'i', its state identifier is 'i << stride2'. Given a state + /// identifier 'sid', its state index is 'sid >> stride2'. + stride2: usize, + /// The equivalence classes for this DFA. All transitions are defined on + /// equivalence classes and not on the 256 distinct byte values. + byte_classes: ByteClasses, + /// The length of the shortest pattern in this automaton. + min_pattern_len: usize, + /// The length of the longest pattern in this automaton. + max_pattern_len: usize, + /// The information required to deduce which states are "special" in this + /// DFA. + special: Special, } -impl DFA { - fn repr(&self) -> &Repr { - match *self { - DFA::Standard(ref dfa) => dfa.repr(), - DFA::ByteClass(ref dfa) => dfa.repr(), - DFA::Premultiplied(ref dfa) => dfa.repr(), - DFA::PremultipliedByteClass(ref dfa) => dfa.repr(), - } - } - - pub fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - pub fn heap_bytes(&self) -> usize { - self.repr().heap_bytes - } - - pub fn max_pattern_len(&self) -> usize { - self.repr().max_pattern_len - } - - pub fn pattern_count(&self) -> usize { - self.repr().pattern_count - } - - pub fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) +impl DFA { + /// Create a new Aho-Corasick DFA using the default configuration. + /// + /// Use a [`Builder`] if you want to change the configuration. + pub fn new(patterns: I) -> Result + where + I: IntoIterator, + P: AsRef<[u8]>, + { + DFA::builder().build(patterns) } - pub fn start_state(&self) -> S { - self.repr().start_id + /// A convenience method for returning a new Aho-Corasick DFA builder. + /// + /// This usually permits one to just import the `DFA` type. + pub fn builder() -> Builder { + Builder::new() } +} - #[inline(always)] - pub fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - match_index: &mut usize, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::ByteClass(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), +impl DFA { + /// A sentinel state ID indicating that a search should stop once it has + /// entered this state. When a search stops, it returns a match if one has + /// been found, otherwise no match. A DFA always has an actual dead state + /// at this ID. + /// + /// N.B. DFAs, unlike NFAs, do not have any notion of a FAIL state. + /// Namely, the whole point of a DFA is that the FAIL state is completely + /// compiled away. That is, DFA construction involves pre-computing the + /// failure transitions everywhere, such that failure transitions are no + /// longer used at search time. This, combined with its uniformly dense + /// representation, are the two most important factors in why it's faster + /// than the NFAs in this crate. + const DEAD: StateID = StateID::new_unchecked(0); + + /// Adds the given pattern IDs as matches to the given state and also + /// records the added memory usage. + fn set_matches( + &mut self, + sid: StateID, + pids: impl Iterator, + ) { + let index = (sid.as_usize() >> self.stride2).checked_sub(2).unwrap(); + let mut at_least_one = false; + for pid in pids { + self.matches[index].push(pid); + self.matches_memory_usage += PatternID::SIZE; + at_least_one = true; } + assert!(at_least_one, "match state must have non-empty pids"); } +} +// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always +// returns a valid state ID given a valid state ID. We otherwise claim that +// all other methods are correct as well. +unsafe impl Automaton for DFA { #[inline(always)] - pub fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::ByteClass(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::Premultiplied(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) + fn start_state(&self, anchored: Anchored) -> Result { + // Either of the start state IDs can be DEAD, in which case, support + // for that type of search is not provided by this DFA. Which start + // state IDs are inactive depends on the 'StartKind' configuration at + // DFA construction time. + match anchored { + Anchored::No => { + let start = self.special.start_unanchored_id; + if start == DFA::DEAD { + Err(MatchError::invalid_input_unanchored()) + } else { + Ok(start) + } } - DFA::PremultipliedByteClass(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) + Anchored::Yes => { + let start = self.special.start_anchored_id; + if start == DFA::DEAD { + Err(MatchError::invalid_input_anchored()) + } else { + Ok(start) + } } } } #[inline(always)] - pub fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::ByteClass(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::Premultiplied(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::PremultipliedByteClass(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - } - } -} - -#[derive(Clone, Debug)] -pub struct Standard(Repr); - -impl Standard { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for Standard { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( + fn next_state( &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - self.repr().get_match(id, match_index, end) + _anchored: Anchored, + sid: StateID, + byte: u8, + ) -> StateID { + let class = self.byte_classes.get(byte); + self.trans[(sid.as_u32() + u32::from(class)).as_usize()] } - fn match_count(&self, id: S) -> usize { - self.repr().match_count(id) + #[inline(always)] + fn is_special(&self, sid: StateID) -> bool { + sid <= self.special.max_special_id } - fn next_state(&self, current: S, input: u8) -> S { - let o = current.to_usize() * 256 + input as usize; - self.repr().trans[o] + #[inline(always)] + fn is_dead(&self, sid: StateID) -> bool { + sid == DFA::DEAD } -} - -#[derive(Clone, Debug)] -pub struct ByteClass(Repr); -impl ByteClass { - fn repr(&self) -> &Repr { - &self.0 + #[inline(always)] + fn is_match(&self, sid: StateID) -> bool { + !self.is_dead(sid) && sid <= self.special.max_match_id } -} - -impl Automaton for ByteClass { - type ID = S; - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind + #[inline(always)] + fn is_start(&self, sid: StateID) -> bool { + sid == self.special.start_unanchored_id + || sid == self.special.start_anchored_id } - fn anchored(&self) -> bool { - self.repr().anchored + #[inline(always)] + fn match_kind(&self) -> MatchKind { + self.match_kind } - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) + #[inline(always)] + fn patterns_len(&self) -> usize { + self.pattern_lens.len() } - fn start_state(&self) -> S { - self.repr().start_id + #[inline(always)] + fn pattern_len(&self, pid: PatternID) -> usize { + self.pattern_lens[pid].as_usize() } - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.repr().state_count + #[inline(always)] + fn min_pattern_len(&self) -> usize { + self.min_pattern_len } - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) + #[inline(always)] + fn max_pattern_len(&self) -> usize { + self.max_pattern_len } - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) + #[inline(always)] + fn match_len(&self, sid: StateID) -> usize { + debug_assert!(self.is_match(sid)); + let offset = (sid.as_usize() >> self.stride2) - 2; + self.matches[offset].len() } - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - self.repr().get_match(id, match_index, end) + #[inline(always)] + fn match_pattern(&self, sid: StateID, index: usize) -> PatternID { + debug_assert!(self.is_match(sid)); + let offset = (sid.as_usize() >> self.stride2) - 2; + self.matches[offset][index] } - fn match_count(&self, id: S) -> usize { - self.repr().match_count(id) - } + #[inline(always)] + fn memory_usage(&self) -> usize { + use core::mem::size_of; - fn next_state(&self, current: S, input: u8) -> S { - let alphabet_len = self.repr().byte_classes.alphabet_len(); - let input = self.repr().byte_classes.get(input); - let o = current.to_usize() * alphabet_len + input as usize; - self.repr().trans[o] + (self.trans.len() * size_of::()) + + (self.matches.len() * size_of::>()) + + self.matches_memory_usage + + (self.pattern_lens.len() * size_of::()) + + self.prefilter.as_ref().map_or(0, |p| p.memory_usage()) } -} -#[derive(Clone, Debug)] -pub struct Premultiplied(Repr); - -impl Premultiplied { - fn repr(&self) -> &Repr { - &self.0 + #[inline(always)] + fn prefilter(&self) -> Option<&Prefilter> { + self.prefilter.as_ref() } } -impl Automaton for Premultiplied { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - (id.to_usize() / 256) < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } +impl core::fmt::Debug for DFA { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use crate::{ + automaton::{fmt_state_indicator, sparse_transitions}, + util::debug::DebugByte, + }; - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } + writeln!(f, "dfa::DFA(")?; + for index in 0..self.state_len { + let sid = StateID::new_unchecked(index << self.stride2); + // While we do currently include the FAIL state in the transition + // table (to simplify construction), it is never actually used. It + // poses problems with the code below because it gets treated as + // a match state incidentally when it is, of course, not. So we + // special case it. The fail state is always the first state after + // the dead state. + // + // If the construction is changed to remove the fail state (it + // probably should be), then this special case should be updated. + if index == 1 { + writeln!(f, "F {:06}:", sid.as_usize())?; + continue; + } + fmt_state_indicator(f, self, sid)?; + write!(f, "{:06}: ", sid.as_usize())?; - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.repr().max_match { - return None; + let it = (0..self.byte_classes.alphabet_len()).map(|class| { + (class.as_u8(), self.trans[sid.as_usize() + class]) + }); + for (i, (start, end, next)) in sparse_transitions(it).enumerate() { + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!( + f, + "{:?} => {:?}", + DebugByte(start), + next.as_usize() + )?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + next.as_usize() + )?; + } + } + write!(f, "\n")?; + if self.is_match(sid) { + write!(f, " matches: ")?; + for i in 0..self.match_len(sid) { + if i > 0 { + write!(f, ", ")?; + } + let pid = self.match_pattern(sid, i); + write!(f, "{}", pid.as_usize())?; + } + write!(f, "\n")?; + } } - self.repr() - .matches - .get(id.to_usize() / 256) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) - } - - fn match_count(&self, id: S) -> usize { - let o = id.to_usize() / 256; - self.repr().matches[o].len() - } - - fn next_state(&self, current: S, input: u8) -> S { - let o = current.to_usize() + input as usize; - self.repr().trans[o] + writeln!(f, "match kind: {:?}", self.match_kind)?; + writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?; + writeln!(f, "state length: {:?}", self.state_len)?; + writeln!(f, "pattern length: {:?}", self.patterns_len())?; + writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?; + writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?; + writeln!(f, "alphabet length: {:?}", self.alphabet_len)?; + writeln!(f, "stride: {:?}", 1 << self.stride2)?; + writeln!(f, "byte classes: {:?}", self.byte_classes)?; + writeln!(f, "memory usage: {:?}", self.memory_usage())?; + writeln!(f, ")")?; + Ok(()) } } +/// A builder for configuring an Aho-Corasick DFA. +/// +/// This builder has a subset of the options available to a +/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options, +/// their behavior is identical. #[derive(Clone, Debug)] -pub struct PremultipliedByteClass(Repr); - -impl PremultipliedByteClass { - fn repr(&self) -> &Repr { - &self.0 - } +pub struct Builder { + noncontiguous: noncontiguous::Builder, + start_kind: StartKind, + byte_classes: bool, } -impl Automaton for PremultipliedByteClass { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - (id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.repr().max_match { - return None; +impl Default for Builder { + fn default() -> Builder { + Builder { + noncontiguous: noncontiguous::Builder::new(), + start_kind: StartKind::Unanchored, + byte_classes: true, } - self.repr() - .matches - .get(id.to_usize() / self.repr().alphabet_len()) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) } - - fn match_count(&self, id: S) -> usize { - let o = id.to_usize() / self.repr().alphabet_len(); - self.repr().matches[o].len() - } - - fn next_state(&self, current: S, input: u8) -> S { - let input = self.repr().byte_classes.get(input); - let o = current.to_usize() + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct Repr { - match_kind: MatchKind, - anchored: bool, - premultiplied: bool, - start_id: S, - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for keeping correct buffer sizes when searching - /// on streams. - max_pattern_len: usize, - /// The total number of patterns added to this automaton. This includes - /// patterns that may never match. - pattern_count: usize, - state_count: usize, - max_match: S, - /// The number of bytes of heap used by this NFA's transition table. - heap_bytes: usize, - /// A prefilter for quickly detecting candidate matchs, if pertinent. - prefilter: Option, - byte_classes: ByteClasses, - trans: Vec, - matches: Vec>, } -impl Repr { - /// Returns the total alphabet size for this DFA. - /// - /// If byte classes are enabled, then this corresponds to the number of - /// equivalence classes. If they are disabled, then this is always 256. - fn alphabet_len(&self) -> usize { - self.byte_classes.alphabet_len() - } - - /// Returns true only if the given state is a match state. - fn is_match_state(&self, id: S) -> bool { - id <= self.max_match && id > dead_id() - } - - /// Returns true only if the given state is either a dead state or a match - /// state. - fn is_match_or_dead_state(&self, id: S) -> bool { - id <= self.max_match +impl Builder { + /// Create a new builder for configuring an Aho-Corasick DFA. + pub fn new() -> Builder { + Builder::default() } - /// Get the ith match for the given state, where the end position of a - /// match was found at `end`. - /// - /// # Panics + /// Build an Aho-Corasick DFA from the given iterator of patterns. /// - /// The caller must ensure that the given state identifier is valid, - /// otherwise this may panic. The `match_index` need not be valid. That is, - /// if the given state has no matches then this returns `None`. - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.max_match { - return None; - } - self.matches - .get(id.to_usize()) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) + /// A builder may be reused to create more DFAs. + pub fn build(&self, patterns: I) -> Result + where + I: IntoIterator, + P: AsRef<[u8]>, + { + let nnfa = self.noncontiguous.build(patterns)?; + self.build_from_noncontiguous(&nnfa) } - /// Return the total number of matches for the given state. + /// Build an Aho-Corasick DFA from the given noncontiguous NFA. /// - /// # Panics - /// - /// The caller must ensure that the given identifier is valid, or else - /// this panics. - fn match_count(&self, id: S) -> usize { - self.matches[id.to_usize()].len() - } - - /// Get the next state given `from` as the current state and `byte` as the - /// current input byte. - fn next_state(&self, from: S, byte: u8) -> S { - let alphabet_len = self.alphabet_len(); - let byte = self.byte_classes.get(byte); - self.trans[from.to_usize() * alphabet_len + byte as usize] - } - - /// Set the `byte` transition for the `from` state to point to `to`. - fn set_next_state(&mut self, from: S, byte: u8, to: S) { - let alphabet_len = self.alphabet_len(); - let byte = self.byte_classes.get(byte); - self.trans[from.to_usize() * alphabet_len + byte as usize] = to; - } - - /// Swap the given states in place. - fn swap_states(&mut self, id1: S, id2: S) { - assert!(!self.premultiplied, "can't swap states in premultiplied DFA"); - - let o1 = id1.to_usize() * self.alphabet_len(); - let o2 = id2.to_usize() * self.alphabet_len(); - for b in 0..self.alphabet_len() { - self.trans.swap(o1 + b, o2 + b); + /// Note that when this method is used, only the `start_kind` and + /// `byte_classes` settings on this builder are respected. The other + /// settings only apply to the initial construction of the Aho-Corasick + /// automaton. Since using this method requires that initial construction + /// has already completed, all settings impacting only initial construction + /// are no longer relevant. + pub fn build_from_noncontiguous( + &self, + nnfa: &noncontiguous::NFA, + ) -> Result { + debug!("building DFA"); + let byte_classes = if self.byte_classes { + nnfa.byte_classes().clone() + } else { + ByteClasses::singletons() + }; + let state_len = match self.start_kind { + StartKind::Unanchored | StartKind::Anchored => nnfa.states().len(), + StartKind::Both => { + // These unwraps are OK because we know that the number of + // NFA states is < StateID::LIMIT which is in turn less than + // i32::MAX. Thus, there is always room to multiply by 2. + // Finally, the number of states is always at least 4 in the + // NFA (DEAD, FAIL, START-UNANCHORED, START-ANCHORED), so the + // subtraction of 4 is okay. + // + // Note that we subtract 4 because the "anchored" part of + // the DFA duplicates the unanchored part (without failure + // transitions), but reuses the DEAD, FAIL and START states. + nnfa.states() + .len() + .checked_mul(2) + .unwrap() + .checked_sub(4) + .unwrap() + } + }; + let trans_len = + match state_len.checked_shl(byte_classes.stride2().as_u32()) { + Some(trans_len) => trans_len, + None => { + return Err(BuildError::state_id_overflow( + StateID::MAX.as_u64(), + usize::MAX.as_u64(), + )) + } + }; + StateID::new(trans_len.checked_sub(byte_classes.stride()).unwrap()) + .map_err(|e| { + BuildError::state_id_overflow( + StateID::MAX.as_u64(), + e.attempted(), + ) + })?; + let num_match_states = match self.start_kind { + StartKind::Unanchored | StartKind::Anchored => { + nnfa.special().max_match_id.as_usize().checked_sub(1).unwrap() + } + StartKind::Both => nnfa + .special() + .max_match_id + .as_usize() + .checked_sub(1) + .unwrap() + .checked_mul(2) + .unwrap(), + }; + let mut dfa = DFA { + trans: vec![DFA::DEAD; trans_len], + matches: vec![vec![]; num_match_states], + matches_memory_usage: 0, + pattern_lens: nnfa.pattern_lens_raw().to_vec(), + prefilter: nnfa.prefilter().map(|p| p.clone()), + match_kind: nnfa.match_kind(), + state_len, + alphabet_len: byte_classes.alphabet_len(), + stride2: byte_classes.stride2(), + byte_classes, + min_pattern_len: nnfa.min_pattern_len(), + max_pattern_len: nnfa.max_pattern_len(), + // The special state IDs are set later. + special: Special::zero(), + }; + match self.start_kind { + StartKind::Both => { + self.finish_build_both_starts(nnfa, &mut dfa); + } + StartKind::Unanchored => { + self.finish_build_one_start(Anchored::No, nnfa, &mut dfa); + } + StartKind::Anchored => { + self.finish_build_one_start(Anchored::Yes, nnfa, &mut dfa) + } } - self.matches.swap(id1.to_usize(), id2.to_usize()); - } - - /// This routine shuffles all match states in this DFA to the beginning - /// of the DFA such that every non-match state appears after every match - /// state. (With one exception: the special fail and dead states remain as - /// the first two states.) - /// - /// The purpose of doing this shuffling is to avoid an extra conditional - /// in the search loop, and in particular, detecting whether a state is a - /// match or not does not need to access any memory. - /// - /// This updates `self.max_match` to point to the last matching state as - /// well as `self.start` if the starting state was moved. - fn shuffle_match_states(&mut self) { - assert!( - !self.premultiplied, - "cannot shuffle match states of premultiplied DFA" + debug!( + "DFA built, ", + dfa.state_len, + dfa.memory_usage(), + dfa.byte_classes.alphabet_len(), + dfa.byte_classes.stride(), ); - - if self.state_count <= 1 { - return; + // The vectors can grow ~twice as big during construction because a + // Vec amortizes growth. But here, let's shrink things back down to + // what we actually need since we're never going to add more to it. + dfa.trans.shrink_to_fit(); + dfa.pattern_lens.shrink_to_fit(); + dfa.matches.shrink_to_fit(); + // TODO: We might also want to shrink each Vec inside of `dfa.matches`, + // or even better, convert it to one contiguous allocation. But I think + // I went with nested allocs for good reason (can't remember), so this + // may be tricky to do. I decided not to shrink them here because it + // might require a fair bit of work to do. It's unclear whether it's + // worth it. + Ok(dfa) + } + + /// Finishes building a DFA for either unanchored or anchored searches, + /// but NOT both. + fn finish_build_one_start( + &self, + anchored: Anchored, + nnfa: &noncontiguous::NFA, + dfa: &mut DFA, + ) { + // This function always succeeds because we check above that all of the + // states in the NFA can be mapped to DFA state IDs. + let stride2 = dfa.stride2; + let old2new = |oldsid: StateID| { + StateID::new_unchecked(oldsid.as_usize() << stride2) + }; + for (oldsid, state) in nnfa.states().iter().with_state_ids() { + let newsid = old2new(oldsid); + if state.is_match() { + dfa.set_matches(newsid, nnfa.iter_matches(oldsid)); + } + sparse_iter( + nnfa, + oldsid, + &dfa.byte_classes, + |byte, class, mut oldnextsid| { + if oldnextsid == noncontiguous::NFA::FAIL { + if anchored.is_anchored() { + oldnextsid = noncontiguous::NFA::DEAD; + } else if state.fail() == noncontiguous::NFA::DEAD { + // This is a special case that avoids following + // DEAD transitions in a non-contiguous NFA. + // Following these transitions is pretty slow + // because the non-contiguous NFA will always use + // a sparse representation for it (because the + // DEAD state is usually treated as a sentinel). + // The *vast* majority of failure states are DEAD + // states, so this winds up being pretty slow if + // we go through the non-contiguous NFA state + // transition logic. Instead, just do it ourselves. + oldnextsid = noncontiguous::NFA::DEAD; + } else { + oldnextsid = nnfa.next_state( + Anchored::No, + state.fail(), + byte, + ); + } + } + dfa.trans[newsid.as_usize() + usize::from(class)] = + old2new(oldnextsid); + }, + ); } - - let mut first_non_match = self.start_id.to_usize(); - while first_non_match < self.state_count - && self.matches[first_non_match].len() > 0 - { - first_non_match += 1; + // Now that we've remapped all the IDs in our states, all that's left + // is remapping the special state IDs. + let old = nnfa.special(); + let new = &mut dfa.special; + new.max_special_id = old2new(old.max_special_id); + new.max_match_id = old2new(old.max_match_id); + if anchored.is_anchored() { + new.start_unanchored_id = DFA::DEAD; + new.start_anchored_id = old2new(old.start_anchored_id); + } else { + new.start_unanchored_id = old2new(old.start_unanchored_id); + new.start_anchored_id = DFA::DEAD; } + } - let mut swaps: Vec = vec![fail_id(); self.state_count]; - let mut cur = self.state_count - 1; - while cur > first_non_match { - if self.matches[cur].len() > 0 { - self.swap_states( - S::from_usize(cur), - S::from_usize(first_non_match), + /// Finishes building a DFA that supports BOTH unanchored and anchored + /// searches. It works by inter-leaving unanchored states with anchored + /// states in the same transition table. This way, we avoid needing to + /// re-shuffle states afterward to ensure that our states still look like + /// DEAD, MATCH, ..., START-UNANCHORED, START-ANCHORED, NON-MATCH, ... + /// + /// Honestly this is pretty inscrutable... Simplifications are most + /// welcome. + fn finish_build_both_starts( + &self, + nnfa: &noncontiguous::NFA, + dfa: &mut DFA, + ) { + let stride2 = dfa.stride2; + let stride = 1 << stride2; + let mut remap_unanchored = vec![DFA::DEAD; nnfa.states().len()]; + let mut remap_anchored = vec![DFA::DEAD; nnfa.states().len()]; + let mut is_anchored = vec![false; dfa.state_len]; + let mut newsid = DFA::DEAD; + let next_dfa_id = + |sid: StateID| StateID::new_unchecked(sid.as_usize() + stride); + for (oldsid, state) in nnfa.states().iter().with_state_ids() { + if oldsid == noncontiguous::NFA::DEAD + || oldsid == noncontiguous::NFA::FAIL + { + remap_unanchored[oldsid] = newsid; + remap_anchored[oldsid] = newsid; + newsid = next_dfa_id(newsid); + } else if oldsid == nnfa.special().start_unanchored_id + || oldsid == nnfa.special().start_anchored_id + { + if oldsid == nnfa.special().start_unanchored_id { + remap_unanchored[oldsid] = newsid; + remap_anchored[oldsid] = DFA::DEAD; + } else { + remap_unanchored[oldsid] = DFA::DEAD; + remap_anchored[oldsid] = newsid; + is_anchored[newsid.as_usize() >> stride2] = true; + } + if state.is_match() { + dfa.set_matches(newsid, nnfa.iter_matches(oldsid)); + } + sparse_iter( + nnfa, + oldsid, + &dfa.byte_classes, + |_, class, oldnextsid| { + let class = usize::from(class); + if oldnextsid == noncontiguous::NFA::FAIL { + dfa.trans[newsid.as_usize() + class] = DFA::DEAD; + } else { + dfa.trans[newsid.as_usize() + class] = oldnextsid; + } + }, ); - swaps[cur] = S::from_usize(first_non_match); - swaps[first_non_match] = S::from_usize(cur); - - first_non_match += 1; - while first_non_match < cur - && self.matches[first_non_match].len() > 0 - { - first_non_match += 1; + newsid = next_dfa_id(newsid); + } else { + let unewsid = newsid; + newsid = next_dfa_id(newsid); + let anewsid = newsid; + newsid = next_dfa_id(newsid); + + remap_unanchored[oldsid] = unewsid; + remap_anchored[oldsid] = anewsid; + is_anchored[anewsid.as_usize() >> stride2] = true; + if state.is_match() { + dfa.set_matches(unewsid, nnfa.iter_matches(oldsid)); + dfa.set_matches(anewsid, nnfa.iter_matches(oldsid)); } + sparse_iter( + nnfa, + oldsid, + &dfa.byte_classes, + |byte, class, oldnextsid| { + let class = usize::from(class); + if oldnextsid == noncontiguous::NFA::FAIL { + let oldnextsid = + if state.fail() == noncontiguous::NFA::DEAD { + noncontiguous::NFA::DEAD + } else { + nnfa.next_state( + Anchored::No, + state.fail(), + byte, + ) + }; + dfa.trans[unewsid.as_usize() + class] = oldnextsid; + } else { + dfa.trans[unewsid.as_usize() + class] = oldnextsid; + dfa.trans[anewsid.as_usize() + class] = oldnextsid; + } + }, + ); } - cur -= 1; } - for id in (0..self.state_count).map(S::from_usize) { - let alphabet_len = self.alphabet_len(); - let offset = id.to_usize() * alphabet_len; - for next in &mut self.trans[offset..offset + alphabet_len] { - if swaps[next.to_usize()] != fail_id() { - *next = swaps[next.to_usize()]; + for i in 0..dfa.state_len { + let sid = i << stride2; + if is_anchored[i] { + for next in dfa.trans[sid..][..stride].iter_mut() { + *next = remap_anchored[*next]; } - } - } - if swaps[self.start_id.to_usize()] != fail_id() { - self.start_id = swaps[self.start_id.to_usize()]; - } - self.max_match = S::from_usize(first_non_match - 1); - } - - fn premultiply(&mut self) -> Result<()> { - if self.premultiplied || self.state_count <= 1 { - return Ok(()); - } - - let alpha_len = self.alphabet_len(); - premultiply_overflow_error( - S::from_usize(self.state_count - 1), - alpha_len, - )?; - - for id in (2..self.state_count).map(S::from_usize) { - let offset = id.to_usize() * alpha_len; - for next in &mut self.trans[offset..offset + alpha_len] { - if *next == dead_id() { - continue; + } else { + for next in dfa.trans[sid..][..stride].iter_mut() { + *next = remap_unanchored[*next]; } - *next = S::from_usize(next.to_usize() * alpha_len); } } - self.premultiplied = true; - self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len); - self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len); - Ok(()) + // Now that we've remapped all the IDs in our states, all that's left + // is remapping the special state IDs. + let old = nnfa.special(); + let new = &mut dfa.special; + new.max_special_id = remap_anchored[old.max_special_id]; + new.max_match_id = remap_anchored[old.max_match_id]; + new.start_unanchored_id = remap_unanchored[old.start_unanchored_id]; + new.start_anchored_id = remap_anchored[old.start_anchored_id]; } - /// Computes the total amount of heap used by this NFA in bytes. - fn calculate_size(&mut self) { - let mut size = (self.trans.len() * size_of::()) - + (self.matches.len() - * size_of::>()); - for state_matches in &self.matches { - size += - state_matches.len() * size_of::<(PatternID, PatternLength)>(); - } - size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes()); - self.heap_bytes = size; + /// Set the desired match semantics. + /// + /// This only applies when using [`Builder::build`] and not + /// [`Builder::build_from_noncontiguous`]. + /// + /// See + /// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind) + /// for more documentation and examples. + pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder { + self.noncontiguous.match_kind(kind); + self } -} - -/// A builder for configuring the determinization of an NFA into a DFA. -#[derive(Clone, Debug)] -pub struct Builder { - premultiply: bool, - byte_classes: bool, -} -impl Builder { - /// Create a new builder for a DFA. - pub fn new() -> Builder { - Builder { premultiply: true, byte_classes: true } + /// Enable ASCII-aware case insensitive matching. + /// + /// This only applies when using [`Builder::build`] and not + /// [`Builder::build_from_noncontiguous`]. + /// + /// See + /// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive) + /// for more documentation and examples. + pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder { + self.noncontiguous.ascii_case_insensitive(yes); + self } - /// Build a DFA from the given NFA. + /// Enable heuristic prefilter optimizations. /// - /// This returns an error if the state identifiers exceed their - /// representation size. This can only happen when state ids are - /// premultiplied (which is enabled by default). - pub fn build(&self, nfa: &NFA) -> Result> { - let byte_classes = if self.byte_classes { - nfa.byte_classes().clone() - } else { - ByteClasses::singletons() - }; - let alphabet_len = byte_classes.alphabet_len(); - let trans = vec![fail_id(); alphabet_len * nfa.state_len()]; - let matches = vec![vec![]; nfa.state_len()]; - let mut repr = Repr { - match_kind: nfa.match_kind().clone(), - anchored: nfa.anchored(), - premultiplied: false, - start_id: nfa.start_state(), - max_pattern_len: nfa.max_pattern_len(), - pattern_count: nfa.pattern_count(), - state_count: nfa.state_len(), - max_match: fail_id(), - heap_bytes: 0, - prefilter: nfa.prefilter_obj().map(|p| p.clone()), - byte_classes: byte_classes.clone(), - trans, - matches, - }; - for id in (0..nfa.state_len()).map(S::from_usize) { - repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id)); - - let fail = nfa.failure_transition(id); - nfa.iter_all_transitions(&byte_classes, id, |b, mut next| { - if next == fail_id() { - next = nfa_next_state_memoized(nfa, &repr, id, fail, b); - } - repr.set_next_state(id, b, next); - }); - } - repr.shuffle_match_states(); - repr.calculate_size(); - if self.premultiply { - repr.premultiply()?; - if byte_classes.is_singleton() { - Ok(DFA::Premultiplied(Premultiplied(repr))) - } else { - Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr))) - } - } else { - if byte_classes.is_singleton() { - Ok(DFA::Standard(Standard(repr))) - } else { - Ok(DFA::ByteClass(ByteClass(repr))) - } - } + /// This only applies when using [`Builder::build`] and not + /// [`Builder::build_from_noncontiguous`]. + /// + /// See + /// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter) + /// for more documentation and examples. + pub fn prefilter(&mut self, yes: bool) -> &mut Builder { + self.noncontiguous.prefilter(yes); + self } - /// Whether to use byte classes or in the DFA. - pub fn byte_classes(&mut self, yes: bool) -> &mut Builder { - self.byte_classes = yes; + /// Sets the starting state configuration for the automaton. + /// + /// See + /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind) + /// for more documentation and examples. + pub fn start_kind(&mut self, kind: StartKind) -> &mut Builder { + self.start_kind = kind; self } - /// Whether to premultiply state identifier in the DFA. - pub fn premultiply(&mut self, yes: bool) -> &mut Builder { - self.premultiply = yes; + /// A debug setting for whether to attempt to shrink the size of the + /// automaton's alphabet or not. + /// + /// This should never be enabled unless you're debugging an automaton. + /// Namely, disabling byte classes makes transitions easier to reason + /// about, since they use the actual bytes instead of equivalence classes. + /// Disabling this confers no performance benefit at search time. + /// + /// See + /// [`AhoCorasickBuilder::byte_classes`](crate::AhoCorasickBuilder::byte_classes) + /// for more documentation and examples. + pub fn byte_classes(&mut self, yes: bool) -> &mut Builder { + self.byte_classes = yes; self } } -/// This returns the next NFA transition (including resolving failure -/// transitions), except once it sees a state id less than the id of the DFA -/// state that is currently being populated, then we no longer need to follow -/// failure transitions and can instead query the pre-computed state id from -/// the DFA itself. +/// Iterate over all possible equivalence class transitions in this state. +/// The closure is called for all transitions with a distinct equivalence +/// class, even those not explicitly represented in this sparse state. For +/// any implicitly defined transitions, the given closure is called with +/// the fail state ID. /// -/// In general, this should only be called when a failure transition is seen. -fn nfa_next_state_memoized( - nfa: &NFA, - dfa: &Repr, - populating: S, - mut current: S, - input: u8, -) -> S { - loop { - if current < populating { - return dfa.next_state(current, input); +/// The closure is guaranteed to be called precisely +/// `byte_classes.alphabet_len()` times, once for every possible class in +/// ascending order. +fn sparse_iter( + nnfa: &noncontiguous::NFA, + oldsid: StateID, + classes: &ByteClasses, + mut f: F, +) { + let mut prev_class = None; + let mut byte = 0usize; + for t in nnfa.iter_trans(oldsid) { + while byte < usize::from(t.byte()) { + let rep = byte.as_u8(); + let class = classes.get(rep); + byte += 1; + if prev_class != Some(class) { + f(rep, class, noncontiguous::NFA::FAIL); + prev_class = Some(class); + } + } + let rep = t.byte(); + let class = classes.get(rep); + byte += 1; + if prev_class != Some(class) { + f(rep, class, t.next()); + prev_class = Some(class); } - let next = nfa.next_state(current, input); - if next != fail_id() { - return next; + } + for b in byte..=255 { + let rep = b.as_u8(); + let class = classes.get(rep); + if prev_class != Some(class) { + f(rep, class, noncontiguous::NFA::FAIL); + prev_class = Some(class); } - current = nfa.failure_transition(current); } } diff --git a/vendor/aho-corasick/src/error.rs b/vendor/aho-corasick/src/error.rs deleted file mode 100644 index a57a777..0000000 --- a/vendor/aho-corasick/src/error.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::error; -use std::fmt; -use std::result; - -pub type Result = result::Result; - -/// An error that occurred during the construction of an Aho-Corasick -/// automaton. -#[derive(Clone, Debug)] -pub struct Error { - kind: ErrorKind, -} - -/// The kind of error that occurred. -#[derive(Clone, Debug)] -pub enum ErrorKind { - /// An error that occurs when constructing an automaton would require the - /// use of a state ID that overflows the chosen state ID representation. - /// For example, if one is using `u8` for state IDs and builds a DFA with - /// 257 states, then the last state's ID will be `256` which cannot be - /// represented with `u8`. - StateIDOverflow { - /// The maximum possible state ID. - max: usize, - }, - /// An error that occurs when premultiplication of state IDs is requested - /// when constructing an Aho-Corasick DFA, but doing so would overflow the - /// chosen state ID representation. - /// - /// When `max == requested_max`, then the state ID would overflow `usize`. - PremultiplyOverflow { - /// The maximum possible state id. - max: usize, - /// The maximum ID required by premultiplication. - requested_max: usize, - }, -} - -impl Error { - /// Return the kind of this error. - pub fn kind(&self) -> &ErrorKind { - &self.kind - } - - pub(crate) fn state_id_overflow(max: usize) -> Error { - Error { kind: ErrorKind::StateIDOverflow { max } } - } - - pub(crate) fn premultiply_overflow( - max: usize, - requested_max: usize, - ) -> Error { - Error { kind: ErrorKind::PremultiplyOverflow { max, requested_max } } - } -} - -impl error::Error for Error { - fn description(&self) -> &str { - match self.kind { - ErrorKind::StateIDOverflow { .. } => { - "state id representation too small" - } - ErrorKind::PremultiplyOverflow { .. } => { - "state id representation too small for premultiplication" - } - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.kind { - ErrorKind::StateIDOverflow { max } => write!( - f, - "building the automaton failed because it required \ - building more states that can be identified, where the \ - maximum ID for the chosen representation is {}", - max, - ), - ErrorKind::PremultiplyOverflow { max, requested_max } => { - if max == requested_max { - write!( - f, - "premultiplication of states requires the ability to \ - represent a state ID greater than what can fit on \ - this platform's usize, which is {}", - ::std::usize::MAX, - ) - } else { - write!( - f, - "premultiplication of states requires the ability to \ - represent at least a state ID of {}, but the chosen \ - representation only permits a maximum state ID of {}", - requested_max, max, - ) - } - } - } - } -} diff --git a/vendor/aho-corasick/src/lib.rs b/vendor/aho-corasick/src/lib.rs index 4465a56..20e8b81 100644 --- a/vendor/aho-corasick/src/lib.rs +++ b/vendor/aho-corasick/src/lib.rs @@ -10,32 +10,24 @@ off. Other features include simple ASCII case insensitive matching, finding overlapping matches, replacements, searching streams and even searching and replacing text in streams. -Finally, unlike all other (known) Aho-Corasick implementations, this one -supports enabling -[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst) -or -[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst) -match semantics, using a (seemingly) novel alternative construction algorithm. -For more details on what match semantics means, see the -[`MatchKind`](enum.MatchKind.html) -type. +Finally, unlike most other Aho-Corasick implementations, this one +supports enabling [leftmost-first](MatchKind::LeftmostFirst) or +[leftmost-longest](MatchKind::LeftmostLongest) match semantics, using a +(seemingly) novel alternative construction algorithm. For more details on what +match semantics means, see the [`MatchKind`] type. # Overview This section gives a brief overview of the primary types in this crate: -* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents - an Aho-Corasick automaton. This is the type you use to execute searches. -* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build - an Aho-Corasick automaton, and supports configuring a number of options. -* [`Match`](struct.Match.html) represents a single match reported by an - Aho-Corasick automaton. Each match has two pieces of information: the pattern - that matched and the start and end byte offsets corresponding to the position - in the haystack at which it matched. - -Additionally, the [`packed`](packed/index.html) sub-module contains a lower -level API for using fast vectorized routines for finding a small number of -patterns in a haystack. +* [`AhoCorasick`] is the primary type and represents an Aho-Corasick automaton. +This is the type you use to execute searches. +* [`AhoCorasickBuilder`] can be used to build an Aho-Corasick automaton, and +supports configuring a number of options. +* [`Match`] represents a single match reported by an Aho-Corasick automaton. +Each match has two pieces of information: the pattern that matched and the +start and end byte offsets corresponding to the position in the haystack at +which it matched. # Example: basic searching @@ -44,20 +36,20 @@ simultaneously. Each match includes the pattern that matched along with the byte offsets of the match. ``` -use aho_corasick::AhoCorasick; +use aho_corasick::{AhoCorasick, PatternID}; let patterns = &["apple", "maple", "Snapple"]; let haystack = "Nobody likes maple in their apple flavored Snapple."; -let ac = AhoCorasick::new(patterns); +let ac = AhoCorasick::new(patterns).unwrap(); let mut matches = vec![]; for mat in ac.find_iter(haystack) { matches.push((mat.pattern(), mat.start(), mat.end())); } assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), + (PatternID::must(1), 13, 18), + (PatternID::must(0), 28, 33), + (PatternID::must(2), 43, 50), ]); ``` @@ -67,22 +59,23 @@ This is like the previous example, but matches `Snapple` case insensitively using `AhoCorasickBuilder`: ``` -use aho_corasick::AhoCorasickBuilder; +use aho_corasick::{AhoCorasick, PatternID}; let patterns = &["apple", "maple", "snapple"]; let haystack = "Nobody likes maple in their apple flavored Snapple."; -let ac = AhoCorasickBuilder::new() +let ac = AhoCorasick::builder() .ascii_case_insensitive(true) - .build(patterns); + .build(patterns) + .unwrap(); let mut matches = vec![]; for mat in ac.find_iter(haystack) { matches.push((mat.pattern(), mat.start(), mat.end())); } assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), + (PatternID::must(1), 13, 18), + (PatternID::must(0), 28, 33), + (PatternID::must(2), 43, 50), ]); ``` @@ -92,9 +85,10 @@ This example shows how to execute a search and replace on a stream without loading the entire stream into memory first. ``` +# #[cfg(feature = "std")] { use aho_corasick::AhoCorasick; -# fn example() -> Result<(), ::std::io::Error> { +# fn example() -> Result<(), std::io::Error> { let patterns = &["fox", "brown", "quick"]; let replace_with = &["sloth", "grey", "slow"]; @@ -103,10 +97,11 @@ let replace_with = &["sloth", "grey", "slow"]; let rdr = "The quick brown fox."; let mut wtr = vec![]; -let ac = AhoCorasick::new(patterns); -ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?; +let ac = AhoCorasick::new(patterns).unwrap(); +ac.try_stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?; assert_eq!(b"The slow grey sloth.".to_vec(), wtr); # Ok(()) }; example().unwrap() +# } ``` # Example: finding the leftmost first match @@ -134,7 +129,7 @@ use aho_corasick::AhoCorasick; let patterns = &["Samwise", "Sam"]; let haystack = "Samwise"; -let ac = AhoCorasick::new(patterns); +let ac = AhoCorasick::new(patterns).unwrap(); let mat = ac.find(haystack).expect("should have a match"); assert_eq!("Sam", &haystack[mat.start()..mat.end()]); ``` @@ -143,23 +138,22 @@ And now here's the leftmost-first version, which matches how a Perl-like regex will work: ``` -use aho_corasick::{AhoCorasickBuilder, MatchKind}; +use aho_corasick::{AhoCorasick, MatchKind}; let patterns = &["Samwise", "Sam"]; let haystack = "Samwise"; -let ac = AhoCorasickBuilder::new() +let ac = AhoCorasick::builder() .match_kind(MatchKind::LeftmostFirst) - .build(patterns); + .build(patterns) + .unwrap(); let mat = ac.find(haystack).expect("should have a match"); assert_eq!("Samwise", &haystack[mat.start()..mat.end()]); ``` In addition to leftmost-first semantics, this library also supports leftmost-longest semantics, which match the POSIX behavior of a regular -expression alternation. See -[`MatchKind`](enum.MatchKind.html) -for more details. +expression alternation. See [`MatchKind`] for more details. # Prefilters @@ -175,129 +169,158 @@ number of patterns gets too big, prefilters are no longer used. While a prefilter is generally good to have on by default since it works well in the common case, it can lead to less predictable or even sub-optimal performance in some cases. For that reason, prefilters can be explicitly -disabled via -[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter). +disabled via [`AhoCorasickBuilder::prefilter`]. + +# Lower level APIs + +This crate also provides several sub-modules that collectively expose many of +the implementation details of the main [`AhoCorasick`] type. Most users of this +library can completely ignore the submodules and their contents, but if you +needed finer grained control, some parts of them may be useful to you. Here is +a brief overview of each and why you might want to use them: + +* The [`packed`] sub-module contains a lower level API for using fast +vectorized routines for finding a small number of patterns in a haystack. +You might want to use this API when you want to completely side-step using +Aho-Corasick automata. Otherwise, the fast vectorized routines are used +automatically as prefilters for `AhoCorasick` searches whenever possible. +* The [`automaton`] sub-module provides a lower level finite state +machine interface that the various Aho-Corasick implementations in +this crate implement. This sub-module's main contribution is the +[`Automaton`](automaton::Automaton) trait, which permits manually walking the +state transitions of an Aho-Corasick automaton. +* The [`dfa`] and [`nfa`] sub-modules provide DFA and NFA implementations of +the aforementioned `Automaton` trait. The main reason one might want to use +these sub-modules is to get access to a type that implements the `Automaton` +trait. (The top-level `AhoCorasick` type does not implement the `Automaton` +trait.) + +As mentioned above, if you aren't sure whether you need these sub-modules, +you should be able to safely ignore them and just focus on the [`AhoCorasick`] +type. + +# Crate features + +This crate exposes a few features for controlling dependency usage and whether +this crate can be used without the standard library. + +* **std** - + Enables support for the standard library. This feature is enabled by + default. When disabled, only `core` and `alloc` are used. At an API + level, enabling `std` enables `std::error::Error` trait impls for the + various error types, and higher level stream search routines such as + [`AhoCorasick::try_stream_find_iter`]. But the `std` feature is also required + to enable vectorized prefilters. Prefilters can greatly accelerate searches, + but generally only apply when the number of patterns is small (less than + ~100). +* **perf-literal** - + Enables support for literal prefilters that use vectorized routines from + external crates. This feature is enabled by default. If you're only using + Aho-Corasick for large numbers of patterns or otherwise can abide lower + throughput when searching with a small number of patterns, then it is + reasonable to disable this feature. +* **logging** - + Enables a dependency on the `log` crate and emits messages to aide in + diagnostics. This feature is disabled by default. */ +#![no_std] #![deny(missing_docs)] - -// We can never be truly no_std, but we could be alloc-only some day, so -// require the std feature for now. -#[cfg(not(feature = "std"))] -compile_error!("`std` feature is currently required to build this crate"); - -// #[cfg(doctest)] -// #[macro_use] -// extern crate doc_comment; - -// #[cfg(doctest)] -// doctest!("../README.md"); - -pub use crate::ahocorasick::{ - AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind, - StreamFindIter, +#![deny(rustdoc::broken_intra_doc_links)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +extern crate alloc; +#[cfg(any(test, feature = "std"))] +extern crate std; + +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); + +#[cfg(feature = "std")] +pub use crate::ahocorasick::StreamFindIter; +pub use crate::{ + ahocorasick::{ + AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, FindIter, + FindOverlappingIter, + }, + util::{ + error::{BuildError, MatchError, MatchErrorKind}, + primitives::{PatternID, PatternIDError}, + search::{Anchored, Input, Match, MatchKind, Span, StartKind}, + }, }; -pub use crate::error::{Error, ErrorKind}; -pub use crate::state_id::StateID; + +#[macro_use] +mod macros; mod ahocorasick; -mod automaton; -mod buffer; -mod byte_frequencies; -mod classes; -mod dfa; -mod error; -mod nfa; +pub mod automaton; +pub mod dfa; +pub mod nfa; pub mod packed; -mod prefilter; -mod state_id; #[cfg(test)] mod tests; +// I wrote out the module for implementing fst::Automaton only to later realize +// that this would make fst a public dependency and fst is not at 1.0 yet. I +// decided to just keep the code in tree, but build it only during tests. +// +// TODO: I think I've changed my mind again. I'm considering pushing it out +// into either a separate crate or into 'fst' directly as an optional feature. +// #[cfg(test)] +// #[allow(dead_code)] +// mod transducer; +pub(crate) mod util; -/// A representation of a match reported by an Aho-Corasick automaton. -/// -/// A match has two essential pieces of information: the identifier of the -/// pattern that matched, along with the start and end offsets of the match -/// in the haystack. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use aho_corasick::AhoCorasick; -/// -/// let ac = AhoCorasick::new(&[ -/// "foo", "bar", "baz", -/// ]); -/// let mat = ac.find("xxx bar xxx").expect("should have a match"); -/// assert_eq!(1, mat.pattern()); -/// assert_eq!(4, mat.start()); -/// assert_eq!(7, mat.end()); -/// ``` -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct Match { - /// The pattern id. - pattern: usize, - /// The length of this match, such that the starting position of the match - /// is `end - len`. - /// - /// We use length here because, other than the pattern id, the only - /// information about each pattern that the automaton stores is its length. - /// So using the length here is just a bit more natural. But it isn't - /// technically required. - len: usize, - /// The end offset of the match, exclusive. - end: usize, -} - -impl Match { - /// Returns the identifier of the pattern that matched. - /// - /// The identifier of a pattern is derived from the position in which it - /// was originally inserted into the corresponding automaton. The first - /// pattern has identifier `0`, and each subsequent pattern is `1`, `2` - /// and so on. - #[inline] - pub fn pattern(&self) -> usize { - self.pattern - } - - /// The starting position of the match. - #[inline] - pub fn start(&self) -> usize { - self.end - self.len +#[cfg(test)] +mod testoibits { + use std::panic::{RefUnwindSafe, UnwindSafe}; + + use super::*; + + fn assert_all() {} + + #[test] + fn oibits_main() { + assert_all::(); + assert_all::(); + assert_all::(); + assert_all::(); + assert_all::(); + + assert_all::(); + assert_all::(); + assert_all::(); + + assert_all::(); + assert_all::(); + assert_all::(); + assert_all::(); + assert_all::(); + assert_all::(); } - /// The ending position of the match. - #[inline] - pub fn end(&self) -> usize { - self.end - } + #[test] + fn oibits_automaton() { + use crate::{automaton, dfa::DFA}; - /// The length, in bytes, of the match. - #[inline] - pub fn len(&self) -> usize { - self.len - } + assert_all::>(); + assert_all::>(); + #[cfg(feature = "std")] + assert_all::>(); + assert_all::(); - /// Returns true if and only if this match is empty. That is, when - /// `start() == end()`. - /// - /// An empty match can only be returned when the empty string was among - /// the patterns used to build the Aho-Corasick automaton. - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 + assert_all::(); + assert_all::(); } - #[inline] - fn increment(&self, by: usize) -> Match { - Match { pattern: self.pattern, len: self.len, end: self.end + by } - } + #[test] + fn oibits_packed() { + use crate::packed; - #[inline] - fn from_span(id: usize, start: usize, end: usize) -> Match { - Match { pattern: id, len: end - start, end } + assert_all::(); + assert_all::(); + assert_all::(); + assert_all::(); + assert_all::(); } } diff --git a/vendor/aho-corasick/src/macros.rs b/vendor/aho-corasick/src/macros.rs new file mode 100644 index 0000000..fc73e6e --- /dev/null +++ b/vendor/aho-corasick/src/macros.rs @@ -0,0 +1,18 @@ +#![allow(unused_macros)] + +macro_rules! log { + ($($tt:tt)*) => { + #[cfg(feature = "logging")] + { + $($tt)* + } + } +} + +macro_rules! debug { + ($($tt:tt)*) => { log!(log::debug!($($tt)*)) } +} + +macro_rules! trace { + ($($tt:tt)*) => { log!(log::trace!($($tt)*)) } +} diff --git a/vendor/aho-corasick/src/nfa.rs b/vendor/aho-corasick/src/nfa.rs deleted file mode 100644 index 05c5cfb..0000000 --- a/vendor/aho-corasick/src/nfa.rs +++ /dev/null @@ -1,1214 +0,0 @@ -use std::cmp; -use std::collections::{BTreeSet, VecDeque}; -use std::fmt; -use std::mem::size_of; -use std::ops::{Index, IndexMut}; - -use crate::ahocorasick::MatchKind; -use crate::automaton::Automaton; -use crate::classes::{ByteClassBuilder, ByteClasses}; -use crate::error::Result; -use crate::prefilter::{self, opposite_ascii_case, Prefilter, PrefilterObj}; -use crate::state_id::{dead_id, fail_id, usize_to_state_id, StateID}; -use crate::Match; - -/// The identifier for a pattern, which is simply the position of the pattern -/// in the sequence of patterns given by the caller. -pub type PatternID = usize; - -/// The length of a pattern, in bytes. -pub type PatternLength = usize; - -/// An Aho-Corasick automaton, represented as an NFA. -/// -/// This is the classical formulation of Aho-Corasick, which involves building -/// up a prefix trie of a given set of patterns, and then wiring up failure -/// transitions between states in order to guarantee linear time matching. The -/// standard formulation is, technically, an NFA because of these failure -/// transitions. That is, one can see them as enabling the automaton to be in -/// multiple states at once. Indeed, during search, it is possible to check -/// the transitions on multiple states for a single input byte. -/// -/// This particular implementation not only supports the standard style of -/// matching, but also provides a mode for choosing leftmost-first or -/// leftmost-longest match semantics. When a leftmost mode is chosen, some -/// failure transitions that would otherwise be added are elided. See -/// the documentation of `MatchKind` for more details and examples on how the -/// match semantics may differ. -/// -/// If one wants a DFA, then it is necessary to first build an NFA and convert -/// it into a DFA. Note, however, that because we've constrained ourselves to -/// matching literal patterns, this does not need to use subset construction -/// for determinization. Instead, the DFA has at most a number of states -/// equivalent to the number of NFA states. The only real difference between -/// them is that all failure transitions are followed and pre-computed. This -/// uses much more memory, but also executes searches more quickly. -#[derive(Clone)] -pub struct NFA { - /// The match semantics built into this NFA. - match_kind: MatchKind, - /// The start state id as an index into `states`. - start_id: S, - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for keeping correct buffer sizes when searching - /// on streams. - max_pattern_len: usize, - /// The total number of patterns added to this automaton, including - /// patterns that may never be matched. - pattern_count: usize, - /// The number of bytes of heap used by this NFA's transition table. - heap_bytes: usize, - /// A prefilter for quickly skipping to candidate matches, if pertinent. - prefilter: Option, - /// Whether this automaton anchors all matches to the start of input. - anchored: bool, - /// A set of equivalence classes in terms of bytes. We compute this while - /// building the NFA, but don't use it in the NFA's states. Instead, we - /// use this for building the DFA. We store it on the NFA since it's easy - /// to compute while visiting the patterns. - byte_classes: ByteClasses, - /// A set of states. Each state defines its own transitions, a fail - /// transition and a set of indices corresponding to matches. - /// - /// The first state is always the fail state, which is used only as a - /// sentinel. Namely, in the final NFA, no transition into the fail state - /// exists. (Well, they do, but they aren't followed. Instead, the state's - /// failure transition is followed.) - /// - /// The second state (index 1) is always the dead state. Dead states are - /// in every automaton, but only used when leftmost-{first,longest} match - /// semantics are enabled. Specifically, they instruct search to stop - /// at specific points in order to report the correct match location. In - /// the standard Aho-Corasick construction, there are no transitions to - /// the dead state. - /// - /// The third state (index 2) is generally intended to be the starting or - /// "root" state. - states: Vec>, -} - -impl NFA { - /// Returns the equivalence classes of bytes found while constructing - /// this NFA. - /// - /// Note that the NFA doesn't actually make use of these equivalence - /// classes. Instead, these are useful for building the DFA when desired. - pub fn byte_classes(&self) -> &ByteClasses { - &self.byte_classes - } - - /// Returns a prefilter, if one exists. - pub fn prefilter_obj(&self) -> Option<&PrefilterObj> { - self.prefilter.as_ref() - } - - /// Returns the total number of heap bytes used by this NFA's transition - /// table. - pub fn heap_bytes(&self) -> usize { - self.heap_bytes - + self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes()) - } - - /// Return the length of the longest pattern in this automaton. - pub fn max_pattern_len(&self) -> usize { - self.max_pattern_len - } - - /// Return the total number of patterns added to this automaton. - pub fn pattern_count(&self) -> usize { - self.pattern_count - } - - /// Returns the total number of states in this NFA. - pub fn state_len(&self) -> usize { - self.states.len() - } - - /// Returns the matches for the given state. - pub fn matches(&self, id: S) -> &[(PatternID, PatternLength)] { - &self.states[id.to_usize()].matches - } - - /// Returns an iterator over all transitions in the given state according - /// to the given equivalence classes, including transitions to `fail_id()`. - /// The number of transitions returned is always equivalent to the number - /// of equivalence classes. - pub fn iter_all_transitions( - &self, - byte_classes: &ByteClasses, - id: S, - f: F, - ) { - self.states[id.to_usize()].trans.iter_all(byte_classes, f); - } - - /// Returns the failure transition for the given state. - pub fn failure_transition(&self, id: S) -> S { - self.states[id.to_usize()].fail - } - - /// Returns the next state for the given state and input byte. - /// - /// Note that this does not follow failure transitions. As such, the id - /// returned may be `fail_id`. - pub fn next_state(&self, current: S, input: u8) -> S { - self.states[current.to_usize()].next_state(input) - } - - fn state(&self, id: S) -> &State { - &self.states[id.to_usize()] - } - - fn state_mut(&mut self, id: S) -> &mut State { - &mut self.states[id.to_usize()] - } - - fn start(&self) -> &State { - self.state(self.start_id) - } - - fn start_mut(&mut self) -> &mut State { - let id = self.start_id; - self.state_mut(id) - } - - fn iter_transitions_mut(&mut self, id: S) -> IterTransitionsMut<'_, S> { - IterTransitionsMut::new(self, id) - } - - fn copy_matches(&mut self, src: S, dst: S) { - let (src, dst) = - get_two_mut(&mut self.states, src.to_usize(), dst.to_usize()); - dst.matches.extend_from_slice(&src.matches); - } - - fn copy_empty_matches(&mut self, dst: S) { - let start_id = self.start_id; - self.copy_matches(start_id, dst); - } - - fn add_dense_state(&mut self, depth: usize) -> Result { - let trans = Transitions::Dense(Dense::new()); - let id = usize_to_state_id(self.states.len())?; - self.states.push(State { - trans, - // Anchored automatons do not have any failure transitions. - fail: if self.anchored { dead_id() } else { self.start_id }, - depth, - matches: vec![], - }); - Ok(id) - } - - fn add_sparse_state(&mut self, depth: usize) -> Result { - let trans = Transitions::Sparse(vec![]); - let id = usize_to_state_id(self.states.len())?; - self.states.push(State { - trans, - // Anchored automatons do not have any failure transitions. - fail: if self.anchored { dead_id() } else { self.start_id }, - depth, - matches: vec![], - }); - Ok(id) - } -} - -impl Automaton for NFA { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.match_kind - } - - fn anchored(&self) -> bool { - self.anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.states.len() - } - - fn is_match_state(&self, id: S) -> bool { - self.states[id.to_usize()].is_match() - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - let state = match self.states.get(id.to_usize()) { - None => return None, - Some(state) => state, - }; - state.matches.get(match_index).map(|&(id, len)| Match { - pattern: id, - len, - end, - }) - } - - fn match_count(&self, id: S) -> usize { - self.states[id.to_usize()].matches.len() - } - - fn next_state(&self, mut current: S, input: u8) -> S { - // This terminates since: - // - // 1. `State.fail` never points to fail_id(). - // 2. All `State.fail` values point to a state closer to `start`. - // 3. The start state has no transitions to fail_id(). - loop { - let state = &self.states[current.to_usize()]; - let next = state.next_state(input); - if next != fail_id() { - return next; - } - current = state.fail; - } - } -} - -/// A representation of an NFA state for an Aho-Corasick automaton. -/// -/// It contains the transitions to the next state, a failure transition for -/// cases where there exists no other transition for the current input byte, -/// the matches implied by visiting this state (if any) and the depth of this -/// state. The depth of a state is simply the distance from it to the start -/// state in the automaton, where the depth of the start state is 0. -#[derive(Clone, Debug)] -pub struct State { - trans: Transitions, - fail: S, - matches: Vec<(PatternID, PatternLength)>, - // TODO: Strictly speaking, this isn't needed for searching. It's only - // used when building an NFA that supports leftmost match semantics. We - // could drop this from the state and dynamically build a map only when - // computing failure transitions, but it's not clear which is better. - // Benchmark this. - depth: usize, -} - -impl State { - fn heap_bytes(&self) -> usize { - self.trans.heap_bytes() - + (self.matches.len() * size_of::<(PatternID, PatternLength)>()) - } - - fn add_match(&mut self, i: PatternID, len: PatternLength) { - self.matches.push((i, len)); - } - - fn is_match(&self) -> bool { - !self.matches.is_empty() - } - - fn next_state(&self, input: u8) -> S { - self.trans.next_state(input) - } - - fn set_next_state(&mut self, input: u8, next: S) { - self.trans.set_next_state(input, next); - } -} - -/// Represents the transitions for a single dense state. -/// -/// The primary purpose here is to encapsulate index access. Namely, since a -/// dense representation always contains 256 elements, all values of `u8` are -/// valid indices. -#[derive(Clone, Debug)] -struct Dense(Vec); - -impl Dense -where - S: StateID, -{ - fn new() -> Self { - Dense(vec![fail_id(); 256]) - } - - #[inline] - fn len(&self) -> usize { - self.0.len() - } -} - -impl Index for Dense { - type Output = S; - - #[inline] - fn index(&self, i: u8) -> &S { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - &self.0[i as usize] - } -} - -impl IndexMut for Dense { - #[inline] - fn index_mut(&mut self, i: u8) -> &mut S { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - &mut self.0[i as usize] - } -} - -/// A representation of transitions in an NFA. -/// -/// Transitions have either a sparse representation, which is slower for -/// lookups but uses less memory, or a dense representation, which is faster -/// for lookups but uses more memory. In the sparse representation, the absence -/// of a state implies a transition to `fail_id()`. Transitions to `dead_id()` -/// are still explicitly represented. -/// -/// For the NFA, by default, we use a dense representation for transitions for -/// states close to the start state because it's likely these are the states -/// that will be most frequently visited. -#[derive(Clone, Debug)] -enum Transitions { - Sparse(Vec<(u8, S)>), - Dense(Dense), -} - -impl Transitions { - fn heap_bytes(&self) -> usize { - match *self { - Transitions::Sparse(ref sparse) => { - sparse.len() * size_of::<(u8, S)>() - } - Transitions::Dense(ref dense) => dense.len() * size_of::(), - } - } - - fn next_state(&self, input: u8) -> S { - match *self { - Transitions::Sparse(ref sparse) => { - for &(b, id) in sparse { - if b == input { - return id; - } - } - fail_id() - } - Transitions::Dense(ref dense) => dense[input], - } - } - - fn set_next_state(&mut self, input: u8, next: S) { - match *self { - Transitions::Sparse(ref mut sparse) => { - match sparse.binary_search_by_key(&input, |&(b, _)| b) { - Ok(i) => sparse[i] = (input, next), - Err(i) => sparse.insert(i, (input, next)), - } - } - Transitions::Dense(ref mut dense) => { - dense[input] = next; - } - } - } - - /// Iterate over transitions in this state while skipping over transitions - /// to `fail_id()`. - fn iter(&self, mut f: F) { - match *self { - Transitions::Sparse(ref sparse) => { - for &(b, id) in sparse { - f(b, id); - } - } - Transitions::Dense(ref dense) => { - for b in AllBytesIter::new() { - let id = dense[b]; - if id != fail_id() { - f(b, id); - } - } - } - } - } - - /// Iterate over all transitions in this state according to the given - /// equivalence classes, including transitions to `fail_id()`. - fn iter_all(&self, classes: &ByteClasses, mut f: F) { - if classes.is_singleton() { - match *self { - Transitions::Sparse(ref sparse) => { - sparse_iter(sparse, f); - } - Transitions::Dense(ref dense) => { - for b in AllBytesIter::new() { - f(b, dense[b]); - } - } - } - } else { - // In this case, we only want to yield a single byte for each - // equivalence class. - match *self { - Transitions::Sparse(ref sparse) => { - let mut last_class = None; - sparse_iter(sparse, |b, next| { - let class = classes.get(b); - if last_class != Some(class) { - last_class = Some(class); - f(b, next); - } - }) - } - Transitions::Dense(ref dense) => { - for b in classes.representatives() { - f(b, dense[b]); - } - } - } - } - } -} - -/// Iterator over transitions in a state, skipping transitions to `fail_id()`. -/// -/// This abstracts over the representation of NFA transitions, which may be -/// either in a sparse or dense representation. -/// -/// This somewhat idiosyncratically borrows the NFA mutably, so that when one -/// is iterating over transitions, the caller can still mutate the NFA. This -/// is useful when creating failure transitions. -#[derive(Debug)] -struct IterTransitionsMut<'a, S: StateID> { - nfa: &'a mut NFA, - state_id: S, - cur: usize, -} - -impl<'a, S: StateID> IterTransitionsMut<'a, S> { - fn new(nfa: &'a mut NFA, state_id: S) -> IterTransitionsMut<'a, S> { - IterTransitionsMut { nfa, state_id, cur: 0 } - } - - fn nfa(&mut self) -> &mut NFA { - self.nfa - } -} - -impl<'a, S: StateID> Iterator for IterTransitionsMut<'a, S> { - type Item = (u8, S); - - fn next(&mut self) -> Option<(u8, S)> { - match self.nfa.states[self.state_id.to_usize()].trans { - Transitions::Sparse(ref sparse) => { - if self.cur >= sparse.len() { - return None; - } - let i = self.cur; - self.cur += 1; - Some(sparse[i]) - } - Transitions::Dense(ref dense) => { - while self.cur < dense.len() { - // There are always exactly 255 transitions in dense repr. - debug_assert!(self.cur < 256); - - let b = self.cur as u8; - let id = dense[b]; - self.cur += 1; - if id != fail_id() { - return Some((b, id)); - } - } - None - } - } - } -} - -/// A simple builder for configuring the NFA construction of Aho-Corasick. -#[derive(Clone, Debug)] -pub struct Builder { - dense_depth: usize, - match_kind: MatchKind, - prefilter: bool, - anchored: bool, - ascii_case_insensitive: bool, -} - -impl Default for Builder { - fn default() -> Builder { - Builder { - dense_depth: 2, - match_kind: MatchKind::default(), - prefilter: true, - anchored: false, - ascii_case_insensitive: false, - } - } -} - -impl Builder { - pub fn new() -> Builder { - Builder::default() - } - - pub fn build(&self, patterns: I) -> Result> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - Compiler::new(self)?.compile(patterns) - } - - pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder { - self.match_kind = kind; - self - } - - pub fn dense_depth(&mut self, depth: usize) -> &mut Builder { - self.dense_depth = depth; - self - } - - pub fn prefilter(&mut self, yes: bool) -> &mut Builder { - self.prefilter = yes; - self - } - - pub fn anchored(&mut self, yes: bool) -> &mut Builder { - self.anchored = yes; - self - } - - pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder { - self.ascii_case_insensitive = yes; - self - } -} - -/// A compiler uses a builder configuration and builds up the NFA formulation -/// of an Aho-Corasick automaton. This roughly corresponds to the standard -/// formulation described in textbooks. -#[derive(Debug)] -struct Compiler<'a, S: StateID> { - builder: &'a Builder, - prefilter: prefilter::Builder, - nfa: NFA, - byte_classes: ByteClassBuilder, -} - -impl<'a, S: StateID> Compiler<'a, S> { - fn new(builder: &'a Builder) -> Result> { - Ok(Compiler { - builder, - prefilter: prefilter::Builder::new(builder.match_kind) - .ascii_case_insensitive(builder.ascii_case_insensitive), - nfa: NFA { - match_kind: builder.match_kind, - start_id: usize_to_state_id(2)?, - max_pattern_len: 0, - pattern_count: 0, - heap_bytes: 0, - prefilter: None, - anchored: builder.anchored, - byte_classes: ByteClasses::singletons(), - states: vec![], - }, - byte_classes: ByteClassBuilder::new(), - }) - } - - fn compile(mut self, patterns: I) -> Result> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - self.add_state(0)?; // the fail state, which is never entered - self.add_state(0)?; // the dead state, only used for leftmost - self.add_state(0)?; // the start state - self.build_trie(patterns)?; - self.add_start_state_loop(); - self.add_dead_state_loop(); - if !self.builder.anchored { - self.fill_failure_transitions(); - } - self.close_start_state_loop(); - self.nfa.byte_classes = self.byte_classes.build(); - if !self.builder.anchored { - self.nfa.prefilter = self.prefilter.build(); - } - self.calculate_size(); - Ok(self.nfa) - } - - /// This sets up the initial prefix trie that makes up the Aho-Corasick - /// automaton. Effectively, it creates the basic structure of the - /// automaton, where every pattern given has a path from the start state to - /// the end of the pattern. - fn build_trie(&mut self, patterns: I) -> Result<()> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - 'PATTERNS: for (pati, pat) in patterns.into_iter().enumerate() { - let pat = pat.as_ref(); - self.nfa.max_pattern_len = - cmp::max(self.nfa.max_pattern_len, pat.len()); - self.nfa.pattern_count += 1; - - let mut prev = self.nfa.start_id; - let mut saw_match = false; - for (depth, &b) in pat.iter().enumerate() { - // When leftmost-first match semantics are requested, we - // specifically stop adding patterns when a previously added - // pattern is a prefix of it. We avoid adding it because - // leftmost-first semantics imply that the pattern can never - // match. This is not just an optimization to save space! It - // is necessary for correctness. In fact, this is the only - // difference in the automaton between the implementations for - // leftmost-first and leftmost-longest. - saw_match = saw_match || self.nfa.state(prev).is_match(); - if self.builder.match_kind.is_leftmost_first() && saw_match { - // Skip to the next pattern immediately. This avoids - // incorrectly adding a match after this loop terminates. - continue 'PATTERNS; - } - - // Add this byte to our equivalence classes. We don't use these - // for NFA construction. These are instead used only if we're - // building a DFA. They would technically be useful for the - // NFA, but it would require a second pass over the patterns. - self.byte_classes.set_range(b, b); - if self.builder.ascii_case_insensitive { - let b = opposite_ascii_case(b); - self.byte_classes.set_range(b, b); - } - - // If the transition from prev using the current byte already - // exists, then just move through it. Otherwise, add a new - // state. We track the depth here so that we can determine - // how to represent transitions. States near the start state - // use a dense representation that uses more memory but is - // faster. Other states use a sparse representation that uses - // less memory but is slower. - let next = self.nfa.state(prev).next_state(b); - if next != fail_id() { - prev = next; - } else { - let next = self.add_state(depth + 1)?; - self.nfa.state_mut(prev).set_next_state(b, next); - if self.builder.ascii_case_insensitive { - let b = opposite_ascii_case(b); - self.nfa.state_mut(prev).set_next_state(b, next); - } - prev = next; - } - } - // Once the pattern has been added, log the match in the final - // state that it reached. - self.nfa.state_mut(prev).add_match(pati, pat.len()); - // ... and hand it to the prefilter builder, if applicable. - if self.builder.prefilter { - self.prefilter.add(pat); - } - } - Ok(()) - } - - /// This routine creates failure transitions according to the standard - /// textbook formulation of the Aho-Corasick algorithm, with a couple small - /// tweaks to support "leftmost" semantics. - /// - /// Building failure transitions is the most interesting part of building - /// the Aho-Corasick automaton, because they are what allow searches to - /// be performed in linear time. Specifically, a failure transition is - /// a single transition associated with each state that points back to - /// the longest proper suffix of the pattern being searched. The failure - /// transition is followed whenever there exists no transition on the - /// current state for the current input byte. If there is no other proper - /// suffix, then the failure transition points back to the starting state. - /// - /// For example, let's say we built an Aho-Corasick automaton with the - /// following patterns: 'abcd' and 'cef'. The trie looks like this: - /// - /// ```ignore - /// a - S1 - b - S2 - c - S3 - d - S4* - /// / - /// S0 - c - S5 - e - S6 - f - S7* - /// ``` - /// - /// At this point, it should be fairly straight-forward to see how this - /// trie can be used in a simplistic way. At any given position in the - /// text we're searching (called the "subject" string), all we need to do - /// is follow the transitions in the trie by consuming one transition for - /// each byte in the subject string. If we reach a match state, then we can - /// report that location as a match. - /// - /// The trick comes when searching a subject string like 'abcef'. We'll - /// initially follow the transition from S0 to S1 and wind up in S3 after - /// observng the 'c' byte. At this point, the next byte is 'e' but state - /// S3 has no transition for 'e', so the search fails. We then would need - /// to restart the search at the next position in 'abcef', which - /// corresponds to 'b'. The match would fail, but the next search starting - /// at 'c' would finally succeed. The problem with this approach is that - /// we wind up searching the subject string potentially many times. In - /// effect, this makes the algorithm have worst case `O(n * m)` complexity, - /// where `n ~ len(subject)` and `m ~ len(all patterns)`. We would instead - /// like to achieve a `O(n + m)` worst case complexity. - /// - /// This is where failure transitions come in. Instead of dying at S3 in - /// the first search, the automaton can instruct the search to move to - /// another part of the automaton that corresponds to a suffix of what - /// we've seen so far. Recall that we've seen 'abc' in the subject string, - /// and the automaton does indeed have a non-empty suffix, 'c', that could - /// potentially lead to another match. Thus, the actual Aho-Corasick - /// automaton for our patterns in this case looks like this: - /// - /// ```ignore - /// a - S1 - b - S2 - c - S3 - d - S4* - /// / / - /// / ---------------- - /// / / - /// S0 - c - S5 - e - S6 - f - S7* - /// ``` - /// - /// That is, we have a failure transition from S3 to S5, which is followed - /// exactly in cases when we are in state S3 but see any byte other than - /// 'd' (that is, we've "failed" to find a match in this portion of our - /// trie). We know we can transition back to S5 because we've already seen - /// a 'c' byte, so we don't need to re-scan it. We can then pick back up - /// with the search starting at S5 and complete our match. - /// - /// Adding failure transitions to a trie is fairly simple, but subtle. The - /// key issue is that you might have multiple failure transition that you - /// need to follow. For example, look at the trie for the patterns - /// 'abcd', 'b', 'bcd' and 'cd': - /// - /// ```ignore - /// - a - S1 - b - S2* - c - S3 - d - S4* - /// / / / - /// / ------- ------- - /// / / / - /// S0 --- b - S5* - c - S6 - d - S7* - /// \ / - /// \ -------- - /// \ / - /// - c - S8 - d - S9* - /// ``` - /// - /// The failure transitions for this trie are defined from S2 to S5, - /// S3 to S6 and S6 to S8. Moreover, state S2 needs to track that it - /// corresponds to a match, since its failure transition to S5 is itself - /// a match state. - /// - /// Perhaps simplest way to think about adding these failure transitions - /// is recursively. That is, if you know the failure transitions for every - /// possible previous state that could be visited (e.g., when computing the - /// failure transition for S3, you already know the failure transitions - /// for S0, S1 and S2), then you can simply follow the failure transition - /// of the previous state and check whether the incoming transition is - /// defined after following the failure transition. - /// - /// For example, when determining the failure state for S3, by our - /// assumptions, we already know that there is a failure transition from - /// S2 (the previous state) to S5. So we follow that transition and check - /// whether the transition connecting S2 to S3 is defined. Indeed, it is, - /// as there is a transition from S5 to S6 for the byte 'c'. If no such - /// transition existed, we could keep following the failure transitions - /// until we reach the start state, which is the failure transition for - /// every state that has no corresponding proper suffix. - /// - /// We don't actually use recursion to implement this, but instead, use a - /// breadth first search of the automaton. Our base case is the start - /// state, whose failure transition is just a transition to itself. - /// - /// When building a leftmost automaton, we proceed as above, but only - /// include a subset of failure transitions. Namely, we omit any failure - /// transitions that appear after a match state in the trie. This is - /// because failure transitions always point back to a proper suffix of - /// what has been seen so far. Thus, following a failure transition after - /// a match implies looking for a match that starts after the one that has - /// already been seen, which is of course therefore not the leftmost match. - /// - /// N.B. I came up with this algorithm on my own, and after scouring all of - /// the other AC implementations I know of (Perl, Snort, many on GitHub). - /// I couldn't find any that implement leftmost semantics like this. - /// Perl of course needs leftmost-first semantics, but they implement it - /// with a seeming hack at *search* time instead of encoding it into the - /// automaton. There are also a couple Java libraries that support leftmost - /// longest semantics, but they do it by building a queue of matches at - /// search time, which is even worse than what Perl is doing. ---AG - fn fill_failure_transitions(&mut self) { - let kind = self.match_kind(); - // Initialize the queue for breadth first search with all transitions - // out of the start state. We handle the start state specially because - // we only want to follow non-self transitions. If we followed self - // transitions, then this would never terminate. - let mut queue = VecDeque::new(); - let mut seen = self.queued_set(); - let mut it = self.nfa.iter_transitions_mut(self.nfa.start_id); - while let Some((_, next)) = it.next() { - // Skip anything we've seen before and any self-transitions on the - // start state. - if next == it.nfa().start_id || seen.contains(next) { - continue; - } - queue.push_back(next); - seen.insert(next); - // Under leftmost semantics, if a state immediately following - // the start state is a match state, then we never want to - // follow its failure transition since the failure transition - // necessarily leads back to the start state, which we never - // want to do for leftmost matching after a match has been - // found. - // - // We apply the same logic to non-start states below as well. - if kind.is_leftmost() && it.nfa().state(next).is_match() { - it.nfa().state_mut(next).fail = dead_id(); - } - } - while let Some(id) = queue.pop_front() { - let mut it = self.nfa.iter_transitions_mut(id); - while let Some((b, next)) = it.next() { - if seen.contains(next) { - // The only way to visit a duplicate state in a transition - // list is when ASCII case insensitivity is enabled. In - // this case, we want to skip it since it's redundant work. - // But it would also end up duplicating matches, which - // results in reporting duplicate matches in some cases. - // See the 'acasei010' regression test. - continue; - } - queue.push_back(next); - seen.insert(next); - - // As above for start states, under leftmost semantics, once - // we see a match all subsequent states should have no failure - // transitions because failure transitions always imply looking - // for a match that is a suffix of what has been seen so far - // (where "seen so far" corresponds to the string formed by - // following the transitions from the start state to the - // current state). Under leftmost semantics, we specifically do - // not want to allow this to happen because we always want to - // report the match found at the leftmost position. - // - // The difference between leftmost-first and leftmost-longest - // occurs previously while we build the trie. For - // leftmost-first, we simply omit any entries that would - // otherwise require passing through a match state. - // - // Note that for correctness, the failure transition has to be - // set to the dead state for ALL states following a match, not - // just the match state itself. However, by setting the failure - // transition to the dead state on all match states, the dead - // state will automatically propagate to all subsequent states - // via the failure state computation below. - if kind.is_leftmost() && it.nfa().state(next).is_match() { - it.nfa().state_mut(next).fail = dead_id(); - continue; - } - let mut fail = it.nfa().state(id).fail; - while it.nfa().state(fail).next_state(b) == fail_id() { - fail = it.nfa().state(fail).fail; - } - fail = it.nfa().state(fail).next_state(b); - it.nfa().state_mut(next).fail = fail; - it.nfa().copy_matches(fail, next); - } - // If the start state is a match state, then this automaton can - // match the empty string. This implies all states are match states - // since every position matches the empty string, so copy the - // matches from the start state to every state. Strictly speaking, - // this is only necessary for overlapping matches since each - // non-empty non-start match state needs to report empty matches - // in addition to its own. For the non-overlapping case, such - // states only report the first match, which is never empty since - // it isn't a start state. - if !kind.is_leftmost() { - it.nfa().copy_empty_matches(id); - } - } - } - - /// Returns a set that tracked queued states. - /// - /// This is only necessary when ASCII case insensitivity is enabled, since - /// it is the only way to visit the same state twice. Otherwise, this - /// returns an inert set that nevers adds anything and always reports - /// `false` for every member test. - fn queued_set(&self) -> QueuedSet { - if self.builder.ascii_case_insensitive { - QueuedSet::active() - } else { - QueuedSet::inert() - } - } - - /// Set the failure transitions on the start state to loop back to the - /// start state. This effectively permits the Aho-Corasick automaton to - /// match at any position. This is also required for finding the next - /// state to terminate, namely, finding the next state should never return - /// a fail_id. - /// - /// This must be done after building the initial trie, since trie - /// construction depends on transitions to `fail_id` to determine whether a - /// state already exists or not. - fn add_start_state_loop(&mut self) { - let start_id = self.nfa.start_id; - let start = self.nfa.start_mut(); - for b in AllBytesIter::new() { - if start.next_state(b) == fail_id() { - start.set_next_state(b, start_id); - } - } - } - - /// Remove the start state loop by rewriting any transitions on the start - /// state back to the start state with transitions to the dead state. - /// - /// The loop is only closed when two conditions are met: the start state - /// is a match state and the match kind is leftmost-first or - /// leftmost-longest. (Alternatively, if this is an anchored automaton, - /// then the start state is always closed, regardless of aforementioned - /// conditions.) - /// - /// The reason for this is that under leftmost semantics, a start state - /// that is also a match implies that we should never restart the search - /// process. We allow normal transitions out of the start state, but if - /// none exist, we transition to the dead state, which signals that - /// searching should stop. - fn close_start_state_loop(&mut self) { - if self.builder.anchored - || (self.match_kind().is_leftmost() && self.nfa.start().is_match()) - { - let start_id = self.nfa.start_id; - let start = self.nfa.start_mut(); - for b in AllBytesIter::new() { - if start.next_state(b) == start_id { - start.set_next_state(b, dead_id()); - } - } - } - } - - /// Sets all transitions on the dead state to point back to the dead state. - /// Normally, missing transitions map back to the failure state, but the - /// point of the dead state is to act as a sink that can never be escaped. - fn add_dead_state_loop(&mut self) { - let dead = self.nfa.state_mut(dead_id()); - for b in AllBytesIter::new() { - dead.set_next_state(b, dead_id()); - } - } - - /// Computes the total amount of heap used by this NFA in bytes. - fn calculate_size(&mut self) { - let mut size = 0; - for state in &self.nfa.states { - size += size_of::>() + state.heap_bytes(); - } - self.nfa.heap_bytes = size; - } - - /// Add a new state to the underlying NFA with the given depth. The depth - /// is used to determine how to represent the transitions. - /// - /// If adding the new state would overflow the chosen state ID - /// representation, then this returns an error. - fn add_state(&mut self, depth: usize) -> Result { - if depth < self.builder.dense_depth { - self.nfa.add_dense_state(depth) - } else { - self.nfa.add_sparse_state(depth) - } - } - - /// Returns the match kind configured on the underlying builder. - fn match_kind(&self) -> MatchKind { - self.builder.match_kind - } -} - -/// A set of state identifiers used to avoid revisiting the same state multiple -/// times when filling in failure transitions. -/// -/// This set has an "inert" and an "active" mode. When inert, the set never -/// stores anything and always returns `false` for every member test. This is -/// useful to avoid the performance and memory overhead of maintaining this -/// set when it is not needed. -#[derive(Debug)] -struct QueuedSet { - set: Option>, -} - -impl QueuedSet { - /// Return an inert set that returns `false` for every state ID membership - /// test. - fn inert() -> QueuedSet { - QueuedSet { set: None } - } - - /// Return an active set that tracks state ID membership. - fn active() -> QueuedSet { - QueuedSet { set: Some(BTreeSet::new()) } - } - - /// Inserts the given state ID into this set. (If the set is inert, then - /// this is a no-op.) - fn insert(&mut self, state_id: S) { - if let Some(ref mut set) = self.set { - set.insert(state_id); - } - } - - /// Returns true if and only if the given state ID is in this set. If the - /// set is inert, this always returns false. - fn contains(&self, state_id: S) -> bool { - match self.set { - None => false, - Some(ref set) => set.contains(&state_id), - } - } -} - -/// An iterator over every byte value. -/// -/// We use this instead of (0..256).map(|b| b as u8) because this optimizes -/// better in debug builds. -/// -/// We also use this instead of 0..=255 because we're targeting Rust 1.24 and -/// inclusive range syntax was stabilized in Rust 1.26. We can get rid of this -/// once our MSRV is Rust 1.26 or newer. -#[derive(Debug)] -struct AllBytesIter(u16); - -impl AllBytesIter { - fn new() -> AllBytesIter { - AllBytesIter(0) - } -} - -impl Iterator for AllBytesIter { - type Item = u8; - - fn next(&mut self) -> Option { - if self.0 >= 256 { - None - } else { - let b = self.0 as u8; - self.0 += 1; - Some(b) - } - } -} - -impl fmt::Debug for NFA { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "NFA(")?; - writeln!(f, "match_kind: {:?}", self.match_kind)?; - writeln!(f, "prefilter: {:?}", self.prefilter)?; - writeln!(f, "{}", "-".repeat(79))?; - for (id, s) in self.states.iter().enumerate() { - let mut trans = vec![]; - s.trans.iter(|byte, next| { - // The start state has a bunch of uninteresting transitions - // back into itself. It's questionable to hide them since they - // are critical to understanding the automaton, but they are - // very noisy without better formatting for contiugous ranges - // to the same state. - if id == self.start_id.to_usize() && next == self.start_id { - return; - } - // Similarly, the dead state has a bunch of uninteresting - // transitions too. - if id == dead_id() { - return; - } - trans.push(format!("{} => {}", escape(byte), next.to_usize())); - }); - writeln!(f, "{:04}: {}", id, trans.join(", "))?; - - let matches: Vec = s - .matches - .iter() - .map(|&(pattern_id, _)| pattern_id.to_string()) - .collect(); - writeln!(f, " matches: {}", matches.join(", "))?; - writeln!(f, " fail: {}", s.fail.to_usize())?; - writeln!(f, " depth: {}", s.depth)?; - } - writeln!(f, "{}", "-".repeat(79))?; - writeln!(f, ")")?; - Ok(()) - } -} - -/// Iterate over all possible byte transitions given a sparse set. -fn sparse_iter(trans: &[(u8, S)], mut f: F) { - let mut byte = 0u16; - for &(b, id) in trans { - while byte < (b as u16) { - f(byte as u8, fail_id()); - byte += 1; - } - f(b, id); - byte += 1; - } - for b in byte..256 { - f(b as u8, fail_id()); - } -} - -/// Safely return two mutable borrows to two different locations in the given -/// slice. -/// -/// This panics if i == j. -fn get_two_mut(xs: &mut [T], i: usize, j: usize) -> (&mut T, &mut T) { - assert!(i != j, "{} must not be equal to {}", i, j); - if i < j { - let (before, after) = xs.split_at_mut(j); - (&mut before[i], &mut after[0]) - } else { - let (before, after) = xs.split_at_mut(i); - (&mut after[0], &mut before[j]) - } -} - -/// Return the given byte as its escaped string form. -fn escape(b: u8) -> String { - use std::ascii; - - String::from_utf8(ascii::escape_default(b).collect::>()).unwrap() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn scratch() { - let nfa: NFA = Builder::new() - .dense_depth(0) - // .match_kind(MatchKind::LeftmostShortest) - // .match_kind(MatchKind::LeftmostLongest) - .match_kind(MatchKind::LeftmostFirst) - // .build(&["abcd", "ce", "b"]) - // .build(&["ab", "bc"]) - // .build(&["b", "bcd", "ce"]) - // .build(&["abc", "bx"]) - // .build(&["abc", "bd", "ab"]) - // .build(&["abcdefghi", "hz", "abcdefgh"]) - // .build(&["abcd", "bce", "b"]) - .build(&["abcdefg", "bcde", "bcdef"]) - .unwrap(); - println!("{:?}", nfa); - } -} diff --git a/vendor/aho-corasick/src/nfa/contiguous.rs b/vendor/aho-corasick/src/nfa/contiguous.rs new file mode 100644 index 0000000..29c1621 --- /dev/null +++ b/vendor/aho-corasick/src/nfa/contiguous.rs @@ -0,0 +1,1141 @@ +/*! +Provides a contiguous NFA implementation of Aho-Corasick. + +This is a low-level API that generally only needs to be used in niche +circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) +instead of a contiguous NFA directly. Using an `NFA` directly is typically only +necessary when one needs access to the [`Automaton`] trait implementation. +*/ + +use alloc::{vec, vec::Vec}; + +use crate::{ + automaton::Automaton, + nfa::noncontiguous, + util::{ + alphabet::ByteClasses, + error::{BuildError, MatchError}, + int::{Usize, U16, U32}, + prefilter::Prefilter, + primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID}, + search::{Anchored, MatchKind}, + special::Special, + }, +}; + +/// A contiguous NFA implementation of Aho-Corasick. +/// +/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of +/// this type directly. Using an `NFA` directly is typically only necessary +/// when one needs access to the [`Automaton`] trait implementation. +/// +/// This NFA can only be built by first constructing a [`noncontiguous::NFA`]. +/// Both [`NFA::new`] and [`Builder::build`] do this for you automatically, but +/// [`Builder::build_from_noncontiguous`] permits doing it explicitly. +/// +/// The main difference between a noncontiguous NFA and a contiguous NFA is +/// that the latter represents all of its states and transitions in a single +/// allocation, where as the former uses a separate allocation for each state. +/// Doing this at construction time while keeping a low memory footprint isn't +/// feasible, which is primarily why there are two different NFA types: one +/// that does the least amount of work possible to build itself, and another +/// that does a little extra work to compact itself and make state transitions +/// faster by making some states use a dense representation. +/// +/// Because a contiguous NFA uses a single allocation, there is a lot more +/// opportunity for compression tricks to reduce the heap memory used. Indeed, +/// it is not uncommon for a contiguous NFA to use an order of magnitude less +/// heap memory than a noncontiguous NFA. Since building a contiguous NFA +/// usually only takes a fraction of the time it takes to build a noncontiguous +/// NFA, the overall build time is not much slower. Thus, in most cases, a +/// contiguous NFA is the best choice. +/// +/// Since a contiguous NFA uses various tricks for compression and to achieve +/// faster state transitions, currently, its limit on the number of states +/// is somewhat smaller than what a noncontiguous NFA can achieve. Generally +/// speaking, you shouldn't expect to run into this limit if the number of +/// patterns is under 1 million. It is plausible that this limit will be +/// increased in the future. If the limit is reached, building a contiguous NFA +/// will return an error. Often, since building a contiguous NFA is relatively +/// cheap, it can make sense to always try it even if you aren't sure if it +/// will fail or not. If it does, you can always fall back to a noncontiguous +/// NFA. (Indeed, the main [`AhoCorasick`](crate::AhoCorasick) type employs a +/// strategy similar to this at construction time.) +/// +/// # Example +/// +/// This example shows how to build an `NFA` directly and use it to execute +/// [`Automaton::try_find`]: +/// +/// ``` +/// use aho_corasick::{ +/// automaton::Automaton, +/// nfa::contiguous::NFA, +/// Input, Match, +/// }; +/// +/// let patterns = &["b", "abc", "abcd"]; +/// let haystack = "abcd"; +/// +/// let nfa = NFA::new(patterns).unwrap(); +/// assert_eq!( +/// Some(Match::must(0, 1..2)), +/// nfa.try_find(&Input::new(haystack))?, +/// ); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// It is also possible to implement your own version of `try_find`. See the +/// [`Automaton`] documentation for an example. +#[derive(Clone)] +pub struct NFA { + /// The raw NFA representation. Each state is packed with a header + /// (containing the format of the state, the failure transition and, for + /// a sparse state, the number of transitions), its transitions and any + /// matching pattern IDs for match states. + repr: Vec, + /// The length of each pattern. This is used to compute the start offset + /// of a match. + pattern_lens: Vec, + /// The total number of states in this NFA. + state_len: usize, + /// A prefilter for accelerating searches, if one exists. + prefilter: Option, + /// The match semantics built into this NFA. + match_kind: MatchKind, + /// The alphabet size, or total number of equivalence classes, for this + /// NFA. Dense states always have this many transitions. + alphabet_len: usize, + /// The equivalence classes for this NFA. All transitions, dense and + /// sparse, are defined on equivalence classes and not on the 256 distinct + /// byte values. + byte_classes: ByteClasses, + /// The length of the shortest pattern in this automaton. + min_pattern_len: usize, + /// The length of the longest pattern in this automaton. + max_pattern_len: usize, + /// The information required to deduce which states are "special" in this + /// NFA. + special: Special, +} + +impl NFA { + /// Create a new Aho-Corasick contiguous NFA using the default + /// configuration. + /// + /// Use a [`Builder`] if you want to change the configuration. + pub fn new(patterns: I) -> Result + where + I: IntoIterator, + P: AsRef<[u8]>, + { + NFA::builder().build(patterns) + } + + /// A convenience method for returning a new Aho-Corasick contiguous NFA + /// builder. + /// + /// This usually permits one to just import the `NFA` type. + pub fn builder() -> Builder { + Builder::new() + } +} + +impl NFA { + /// A sentinel state ID indicating that a search should stop once it has + /// entered this state. When a search stops, it returns a match if one + /// has been found, otherwise no match. A contiguous NFA always has an + /// actual dead state at this ID. + const DEAD: StateID = StateID::new_unchecked(0); + /// Another sentinel state ID indicating that a search should move through + /// current state's failure transition. + /// + /// Note that unlike DEAD, this does not actually point to a valid state + /// in a contiguous NFA. (noncontiguous::NFA::FAIL does point to a valid + /// state.) Instead, this points to the position that is guaranteed to + /// never be a valid state ID (by making sure it points to a place in the + /// middle of the encoding of the DEAD state). Since we never need to + /// actually look at the FAIL state itself, this works out. + /// + /// By why do it this way? So that FAIL is a constant. I don't have any + /// concrete evidence that this materially helps matters, but it's easy to + /// do. The alternative would be making the FAIL ID point to the second + /// state, which could be made a constant but is a little trickier to do. + /// The easiest path is to just make the FAIL state a runtime value, but + /// since comparisons with FAIL occur in perf critical parts of the search, + /// we want it to be as tight as possible and not waste any registers. + /// + /// Very hand wavy... But the code complexity that results from this is + /// very mild. + const FAIL: StateID = StateID::new_unchecked(1); +} + +// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always +// returns a valid state ID given a valid state ID. We otherwise claim that +// all other methods are correct as well. +unsafe impl Automaton for NFA { + #[inline(always)] + fn start_state(&self, anchored: Anchored) -> Result { + match anchored { + Anchored::No => Ok(self.special.start_unanchored_id), + Anchored::Yes => Ok(self.special.start_anchored_id), + } + } + + #[inline(always)] + fn next_state( + &self, + anchored: Anchored, + mut sid: StateID, + byte: u8, + ) -> StateID { + let repr = &self.repr; + let class = self.byte_classes.get(byte); + let u32tosid = StateID::from_u32_unchecked; + loop { + let o = sid.as_usize(); + let kind = repr[o] & 0xFF; + // I tried to encapsulate the "next transition" logic into its own + // function, but it seemed to always result in sub-optimal codegen + // that led to real and significant slowdowns. So we just inline + // the logic here. + // + // I've also tried a lot of different ways to speed up this + // routine, and most of them have failed. + if kind == State::KIND_DENSE { + let next = u32tosid(repr[o + 2 + usize::from(class)]); + if next != NFA::FAIL { + return next; + } + } else if kind == State::KIND_ONE { + if class == repr[o].low_u16().high_u8() { + return u32tosid(repr[o + 2]); + } + } else { + // NOTE: I tried a SWAR technique in the loop below, but found + // it slower. See the 'swar' test in the tests for this module. + let trans_len = kind.as_usize(); + let classes_len = u32_len(trans_len); + let trans_offset = o + 2 + classes_len; + for (i, &chunk) in + repr[o + 2..][..classes_len].iter().enumerate() + { + let classes = chunk.to_ne_bytes(); + if classes[0] == class { + return u32tosid(repr[trans_offset + i * 4]); + } + if classes[1] == class { + return u32tosid(repr[trans_offset + i * 4 + 1]); + } + if classes[2] == class { + return u32tosid(repr[trans_offset + i * 4 + 2]); + } + if classes[3] == class { + return u32tosid(repr[trans_offset + i * 4 + 3]); + } + } + } + // For an anchored search, we never follow failure transitions + // because failure transitions lead us down a path to matching + // a *proper* suffix of the path we were on. Thus, it can only + // produce matches that appear after the beginning of the search. + if anchored.is_anchored() { + return NFA::DEAD; + } + sid = u32tosid(repr[o + 1]); + } + } + + #[inline(always)] + fn is_special(&self, sid: StateID) -> bool { + sid <= self.special.max_special_id + } + + #[inline(always)] + fn is_dead(&self, sid: StateID) -> bool { + sid == NFA::DEAD + } + + #[inline(always)] + fn is_match(&self, sid: StateID) -> bool { + !self.is_dead(sid) && sid <= self.special.max_match_id + } + + #[inline(always)] + fn is_start(&self, sid: StateID) -> bool { + sid == self.special.start_unanchored_id + || sid == self.special.start_anchored_id + } + + #[inline(always)] + fn match_kind(&self) -> MatchKind { + self.match_kind + } + + #[inline(always)] + fn patterns_len(&self) -> usize { + self.pattern_lens.len() + } + + #[inline(always)] + fn pattern_len(&self, pid: PatternID) -> usize { + self.pattern_lens[pid].as_usize() + } + + #[inline(always)] + fn min_pattern_len(&self) -> usize { + self.min_pattern_len + } + + #[inline(always)] + fn max_pattern_len(&self) -> usize { + self.max_pattern_len + } + + #[inline(always)] + fn match_len(&self, sid: StateID) -> usize { + State::match_len(self.alphabet_len, &self.repr[sid.as_usize()..]) + } + + #[inline(always)] + fn match_pattern(&self, sid: StateID, index: usize) -> PatternID { + State::match_pattern( + self.alphabet_len, + &self.repr[sid.as_usize()..], + index, + ) + } + + #[inline(always)] + fn memory_usage(&self) -> usize { + use core::mem::size_of; + + (self.repr.len() * size_of::()) + + (self.pattern_lens.len() * size_of::()) + + self.prefilter.as_ref().map_or(0, |p| p.memory_usage()) + } + + #[inline(always)] + fn prefilter(&self) -> Option<&Prefilter> { + self.prefilter.as_ref() + } +} + +impl core::fmt::Debug for NFA { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use crate::automaton::fmt_state_indicator; + + writeln!(f, "contiguous::NFA(")?; + let mut sid = NFA::DEAD; // always the first state and always present + loop { + let raw = &self.repr[sid.as_usize()..]; + if raw.is_empty() { + break; + } + let is_match = self.is_match(sid); + let state = State::read(self.alphabet_len, is_match, raw); + fmt_state_indicator(f, self, sid)?; + write!( + f, + "{:06}({:06}): ", + sid.as_usize(), + state.fail.as_usize() + )?; + state.fmt(f)?; + write!(f, "\n")?; + if self.is_match(sid) { + write!(f, " matches: ")?; + for i in 0..state.match_len { + let pid = State::match_pattern(self.alphabet_len, raw, i); + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", pid.as_usize())?; + } + write!(f, "\n")?; + } + // The FAIL state doesn't actually have space for a state allocated + // for it, so we have to treat it as a special case. write below + // the DEAD state. + if sid == NFA::DEAD { + writeln!(f, "F {:06}:", NFA::FAIL.as_usize())?; + } + let len = State::len(self.alphabet_len, is_match, raw); + sid = StateID::new(sid.as_usize().checked_add(len).unwrap()) + .unwrap(); + } + writeln!(f, "match kind: {:?}", self.match_kind)?; + writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?; + writeln!(f, "state length: {:?}", self.state_len)?; + writeln!(f, "pattern length: {:?}", self.patterns_len())?; + writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?; + writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?; + writeln!(f, "alphabet length: {:?}", self.alphabet_len)?; + writeln!(f, "byte classes: {:?}", self.byte_classes)?; + writeln!(f, "memory usage: {:?}", self.memory_usage())?; + writeln!(f, ")")?; + + Ok(()) + } +} + +/// The "in memory" representation a single dense or sparse state. +/// +/// A `State`'s in memory representation is not ever actually materialized +/// during a search with a contiguous NFA. Doing so would be too slow. (Indeed, +/// the only time a `State` is actually constructed is in `Debug` impls.) +/// Instead, a `State` exposes a number of static methods for reading certain +/// things from the raw binary encoding of the state. +#[derive(Clone)] +struct State<'a> { + /// The state to transition to when 'class_to_next' yields a transition + /// to the FAIL state. + fail: StateID, + /// The number of pattern IDs in this state. For a non-match state, this is + /// always zero. Otherwise it is always bigger than zero. + match_len: usize, + /// The sparse or dense representation of the transitions for this state. + trans: StateTrans<'a>, +} + +/// The underlying representation of sparse or dense transitions for a state. +/// +/// Note that like `State`, we don't typically construct values of this type +/// during a search since we don't always need all values and thus would +/// represent a lot of wasteful work. +#[derive(Clone)] +enum StateTrans<'a> { + /// A sparse representation of transitions for a state, where only non-FAIL + /// transitions are explicitly represented. + Sparse { + classes: &'a [u32], + /// The transitions for this state, where each transition is packed + /// into a u32. The low 8 bits correspond to the byte class for the + /// transition, and the high 24 bits correspond to the next state ID. + /// + /// This packing is why the max state ID allowed for a contiguous + /// NFA is 2^24-1. + nexts: &'a [u32], + }, + /// A "one transition" state that is never a match state. + /// + /// These are by far the most common state, so we use a specialized and + /// very compact representation for them. + One { + /// The element of this NFA's alphabet that this transition is + /// defined for. + class: u8, + /// The state this should transition to if the current symbol is + /// equal to 'class'. + next: u32, + }, + /// A dense representation of transitions for a state, where all + /// transitions are explicitly represented, including transitions to the + /// FAIL state. + Dense { + /// A dense set of transitions to other states. The transitions may + /// point to a FAIL state, in which case, the search should try the + /// same transition lookup at 'fail'. + /// + /// Note that this is indexed by byte equivalence classes and not + /// byte values. That means 'class_to_next[byte]' is wrong and + /// 'class_to_next[classes.get(byte)]' is correct. The number of + /// transitions is always equivalent to 'classes.alphabet_len()'. + class_to_next: &'a [u32], + }, +} + +impl<'a> State<'a> { + /// The offset of where the "kind" of a state is stored. If it isn't one + /// of the sentinel values below, then it's a sparse state and the kind + /// corresponds to the number of transitions in the state. + const KIND: usize = 0; + + /// A sentinel value indicating that the state uses a dense representation. + const KIND_DENSE: u32 = 0xFF; + /// A sentinel value indicating that the state uses a special "one + /// transition" encoding. In practice, non-match states with one transition + /// make up the overwhelming majority of all states in any given + /// Aho-Corasick automaton, so we can specialize them using a very compact + /// representation. + const KIND_ONE: u32 = 0xFE; + + /// The maximum number of transitions to encode as a sparse state. Usually + /// states with a lot of transitions are either very rare, or occur near + /// the start state. In the latter case, they are probably dense already + /// anyway. In the former case, making them dense is fine because they're + /// rare. + /// + /// This needs to be small enough to permit each of the sentinel values for + /// 'KIND' above. Namely, a sparse state embeds the number of transitions + /// into the 'KIND'. Basically, "sparse" is a state kind too, but it's the + /// "else" branch. + /// + /// N.B. There isn't anything particularly magical about 127 here. I + /// just picked it because I figured any sparse state with this many + /// transitions is going to be exceptionally rare, and if it did have this + /// many transitions, then it would be quite slow to do a linear scan on + /// the transitions during a search anyway. + const MAX_SPARSE_TRANSITIONS: usize = 127; + + /// Remap state IDs in-place. + /// + /// `state` should be the the raw binary encoding of a state. (The start + /// of the slice must correspond to the start of the state, but the slice + /// may extend past the end of the encoding of the state.) + fn remap( + alphabet_len: usize, + old_to_new: &[StateID], + state: &mut [u32], + ) -> Result<(), BuildError> { + let kind = State::kind(state); + if kind == State::KIND_DENSE { + state[1] = old_to_new[state[1].as_usize()].as_u32(); + for next in state[2..][..alphabet_len].iter_mut() { + *next = old_to_new[next.as_usize()].as_u32(); + } + } else if kind == State::KIND_ONE { + state[1] = old_to_new[state[1].as_usize()].as_u32(); + state[2] = old_to_new[state[2].as_usize()].as_u32(); + } else { + let trans_len = State::sparse_trans_len(state); + let classes_len = u32_len(trans_len); + state[1] = old_to_new[state[1].as_usize()].as_u32(); + for next in state[2 + classes_len..][..trans_len].iter_mut() { + *next = old_to_new[next.as_usize()].as_u32(); + } + } + Ok(()) + } + + /// Returns the length, in number of u32s, of this state. + /// + /// This is useful for reading states consecutively, e.g., in the Debug + /// impl without needing to store a separate map from state index to state + /// identifier. + /// + /// `state` should be the the raw binary encoding of a state. (The start + /// of the slice must correspond to the start of the state, but the slice + /// may extend past the end of the encoding of the state.) + fn len(alphabet_len: usize, is_match: bool, state: &[u32]) -> usize { + let kind_len = 1; + let fail_len = 1; + let kind = State::kind(state); + let (classes_len, trans_len) = if kind == State::KIND_DENSE { + (0, alphabet_len) + } else if kind == State::KIND_ONE { + (0, 1) + } else { + let trans_len = State::sparse_trans_len(state); + let classes_len = u32_len(trans_len); + (classes_len, trans_len) + }; + let match_len = if !is_match { + 0 + } else if State::match_len(alphabet_len, state) == 1 { + // This is a special case because when there is one pattern ID for + // a match state, it is represented by a single u32 with its high + // bit set (which is impossible for a valid pattern ID). + 1 + } else { + // We add 1 to include the u32 that indicates the number of + // pattern IDs that follow. + 1 + State::match_len(alphabet_len, state) + }; + kind_len + fail_len + classes_len + trans_len + match_len + } + + /// Returns the kind of this state. + /// + /// This only includes the low byte. + #[inline(always)] + fn kind(state: &[u32]) -> u32 { + state[State::KIND] & 0xFF + } + + /// Get the number of sparse transitions in this state. This can never + /// be more than State::MAX_SPARSE_TRANSITIONS, as all states with more + /// transitions are encoded as dense states. + /// + /// `state` should be the the raw binary encoding of a sparse state. (The + /// start of the slice must correspond to the start of the state, but the + /// slice may extend past the end of the encoding of the state.) If this + /// isn't a sparse state, then the return value is unspecified. + /// + /// Do note that this is only legal to call on a sparse state. So for + /// example, "one transition" state is not a sparse state, so it would not + /// be legal to call this method on such a state. + #[inline(always)] + fn sparse_trans_len(state: &[u32]) -> usize { + (state[State::KIND] & 0xFF).as_usize() + } + + /// Returns the total number of matching pattern IDs in this state. Calling + /// this on a state that isn't a match results in unspecified behavior. + /// Thus, the returned number is never 0 for all correct calls. + /// + /// `state` should be the the raw binary encoding of a state. (The start + /// of the slice must correspond to the start of the state, but the slice + /// may extend past the end of the encoding of the state.) + #[inline(always)] + fn match_len(alphabet_len: usize, state: &[u32]) -> usize { + // We don't need to handle KIND_ONE here because it can never be a + // match state. + let packed = if State::kind(state) == State::KIND_DENSE { + let start = 2 + alphabet_len; + state[start].as_usize() + } else { + let trans_len = State::sparse_trans_len(state); + let classes_len = u32_len(trans_len); + let start = 2 + classes_len + trans_len; + state[start].as_usize() + }; + if packed & (1 << 31) == 0 { + packed + } else { + 1 + } + } + + /// Returns the pattern ID corresponding to the given index for the state + /// given. The `index` provided must be less than the number of pattern IDs + /// in this state. + /// + /// `state` should be the the raw binary encoding of a state. (The start of + /// the slice must correspond to the start of the state, but the slice may + /// extend past the end of the encoding of the state.) + /// + /// If the given state is not a match state or if the index is out of + /// bounds, then this has unspecified behavior. + #[inline(always)] + fn match_pattern( + alphabet_len: usize, + state: &[u32], + index: usize, + ) -> PatternID { + // We don't need to handle KIND_ONE here because it can never be a + // match state. + let start = if State::kind(state) == State::KIND_DENSE { + 2 + alphabet_len + } else { + let trans_len = State::sparse_trans_len(state); + let classes_len = u32_len(trans_len); + 2 + classes_len + trans_len + }; + let packed = state[start]; + let pid = if packed & (1 << 31) == 0 { + state[start + 1 + index] + } else { + assert_eq!(0, index); + packed & !(1 << 31) + }; + PatternID::from_u32_unchecked(pid) + } + + /// Read a state's binary encoding to its in-memory representation. + /// + /// `alphabet_len` should be the total number of transitions defined for + /// dense states. + /// + /// `is_match` should be true if this state is a match state and false + /// otherwise. + /// + /// `state` should be the the raw binary encoding of a state. (The start + /// of the slice must correspond to the start of the state, but the slice + /// may extend past the end of the encoding of the state.) + fn read( + alphabet_len: usize, + is_match: bool, + state: &'a [u32], + ) -> State<'a> { + let kind = State::kind(state); + let match_len = + if !is_match { 0 } else { State::match_len(alphabet_len, state) }; + let (trans, fail) = if kind == State::KIND_DENSE { + let fail = StateID::from_u32_unchecked(state[1]); + let class_to_next = &state[2..][..alphabet_len]; + (StateTrans::Dense { class_to_next }, fail) + } else if kind == State::KIND_ONE { + let fail = StateID::from_u32_unchecked(state[1]); + let class = state[State::KIND].low_u16().high_u8(); + let next = state[2]; + (StateTrans::One { class, next }, fail) + } else { + let fail = StateID::from_u32_unchecked(state[1]); + let trans_len = State::sparse_trans_len(state); + let classes_len = u32_len(trans_len); + let classes = &state[2..][..classes_len]; + let nexts = &state[2 + classes_len..][..trans_len]; + (StateTrans::Sparse { classes, nexts }, fail) + }; + State { fail, match_len, trans } + } + + /// Encode the "old" state from a noncontiguous NFA to its binary + /// representation to the given `dst` slice. `classes` should be the byte + /// classes computed for the noncontiguous NFA that the given state came + /// from. + /// + /// This returns an error if `dst` became so big that `StateID`s can no + /// longer be created for new states. Otherwise, it returns the state ID of + /// the new state created. + /// + /// When `force_dense` is true, then the encoded state will always use a + /// dense format. Otherwise, the choice between dense and sparse will be + /// automatically chosen based on the old state. + fn write( + nnfa: &noncontiguous::NFA, + oldsid: StateID, + old: &noncontiguous::State, + classes: &ByteClasses, + dst: &mut Vec, + force_dense: bool, + ) -> Result { + let sid = StateID::new(dst.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + let old_len = nnfa.iter_trans(oldsid).count(); + // For states with a lot of transitions, we might as well just make + // them dense. These kinds of hot states tend to be very rare, so we're + // okay with it. This also gives us more sentinels in the state's + // 'kind', which lets us create different state kinds to save on + // space. + let kind = if force_dense || old_len > State::MAX_SPARSE_TRANSITIONS { + State::KIND_DENSE + } else if old_len == 1 && !old.is_match() { + State::KIND_ONE + } else { + // For a sparse state, the kind is just the number of transitions. + u32::try_from(old_len).unwrap() + }; + if kind == State::KIND_DENSE { + dst.push(kind); + dst.push(old.fail().as_u32()); + State::write_dense_trans(nnfa, oldsid, classes, dst)?; + } else if kind == State::KIND_ONE { + let t = nnfa.iter_trans(oldsid).next().unwrap(); + let class = u32::from(classes.get(t.byte())); + dst.push(kind | (class << 8)); + dst.push(old.fail().as_u32()); + dst.push(t.next().as_u32()); + } else { + dst.push(kind); + dst.push(old.fail().as_u32()); + State::write_sparse_trans(nnfa, oldsid, classes, dst)?; + } + // Now finally write the number of matches and the matches themselves. + if old.is_match() { + let matches_len = nnfa.iter_matches(oldsid).count(); + if matches_len == 1 { + let pid = nnfa.iter_matches(oldsid).next().unwrap().as_u32(); + assert_eq!(0, pid & (1 << 31)); + dst.push((1 << 31) | pid); + } else { + assert_eq!(0, matches_len & (1 << 31)); + dst.push(matches_len.as_u32()); + dst.extend(nnfa.iter_matches(oldsid).map(|pid| pid.as_u32())); + } + } + Ok(sid) + } + + /// Encode the "old" state transitions from a noncontiguous NFA to its + /// binary sparse representation to the given `dst` slice. `classes` should + /// be the byte classes computed for the noncontiguous NFA that the given + /// state came from. + /// + /// This returns an error if `dst` became so big that `StateID`s can no + /// longer be created for new states. + fn write_sparse_trans( + nnfa: &noncontiguous::NFA, + oldsid: StateID, + classes: &ByteClasses, + dst: &mut Vec, + ) -> Result<(), BuildError> { + let (mut chunk, mut len) = ([0; 4], 0); + for t in nnfa.iter_trans(oldsid) { + chunk[len] = classes.get(t.byte()); + len += 1; + if len == 4 { + dst.push(u32::from_ne_bytes(chunk)); + chunk = [0; 4]; + len = 0; + } + } + if len > 0 { + // In the case where the number of transitions isn't divisible + // by 4, the last u32 chunk will have some left over room. In + // this case, we "just" repeat the last equivalence class. By + // doing this, we know the leftover faux transitions will never + // be followed because if they were, it would have been followed + // prior to it in the last equivalence class. This saves us some + // branching in the search time state transition code. + let repeat = chunk[len - 1]; + while len < 4 { + chunk[len] = repeat; + len += 1; + } + dst.push(u32::from_ne_bytes(chunk)); + } + for t in nnfa.iter_trans(oldsid) { + dst.push(t.next().as_u32()); + } + Ok(()) + } + + /// Encode the "old" state transitions from a noncontiguous NFA to its + /// binary dense representation to the given `dst` slice. `classes` should + /// be the byte classes computed for the noncontiguous NFA that the given + /// state came from. + /// + /// This returns an error if `dst` became so big that `StateID`s can no + /// longer be created for new states. + fn write_dense_trans( + nnfa: &noncontiguous::NFA, + oldsid: StateID, + classes: &ByteClasses, + dst: &mut Vec, + ) -> Result<(), BuildError> { + // Our byte classes let us shrink the size of our dense states to the + // number of equivalence classes instead of just fixing it to 256. + // Any non-explicitly defined transition is just a transition to the + // FAIL state, so we fill that in first and then overwrite them with + // explicitly defined transitions. (Most states probably only have one + // or two explicitly defined transitions.) + // + // N.B. Remember that while building the contiguous NFA, we use state + // IDs from the noncontiguous NFA. It isn't until we've added all + // states that we go back and map noncontiguous IDs to contiguous IDs. + let start = dst.len(); + dst.extend( + core::iter::repeat(noncontiguous::NFA::FAIL.as_u32()) + .take(classes.alphabet_len()), + ); + assert!(start < dst.len(), "equivalence classes are never empty"); + for t in nnfa.iter_trans(oldsid) { + dst[start + usize::from(classes.get(t.byte()))] = + t.next().as_u32(); + } + Ok(()) + } + + /// Return an iterator over every explicitly defined transition in this + /// state. + fn transitions<'b>(&'b self) -> impl Iterator + 'b { + let mut i = 0; + core::iter::from_fn(move || match self.trans { + StateTrans::Sparse { classes, nexts } => { + if i >= nexts.len() { + return None; + } + let chunk = classes[i / 4]; + let class = chunk.to_ne_bytes()[i % 4]; + let next = StateID::from_u32_unchecked(nexts[i]); + i += 1; + Some((class, next)) + } + StateTrans::One { class, next } => { + if i == 0 { + i += 1; + Some((class, StateID::from_u32_unchecked(next))) + } else { + None + } + } + StateTrans::Dense { class_to_next } => { + if i >= class_to_next.len() { + return None; + } + let class = i.as_u8(); + let next = StateID::from_u32_unchecked(class_to_next[i]); + i += 1; + Some((class, next)) + } + }) + } +} + +impl<'a> core::fmt::Debug for State<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use crate::{automaton::sparse_transitions, util::debug::DebugByte}; + + let it = sparse_transitions(self.transitions()) + // Writing out all FAIL transitions is quite noisy. Instead, we + // just require readers of the output to assume anything absent + // maps to the FAIL transition. + .filter(|&(_, _, sid)| sid != NFA::FAIL) + .enumerate(); + for (i, (start, end, sid)) in it { + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!(f, "{:?} => {:?}", DebugByte(start), sid.as_usize())?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + sid.as_usize() + )?; + } + } + Ok(()) + } +} + +/// A builder for configuring an Aho-Corasick contiguous NFA. +/// +/// This builder has a subset of the options available to a +/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options, +/// their behavior is identical. +#[derive(Clone, Debug)] +pub struct Builder { + noncontiguous: noncontiguous::Builder, + dense_depth: usize, + byte_classes: bool, +} + +impl Default for Builder { + fn default() -> Builder { + Builder { + noncontiguous: noncontiguous::Builder::new(), + dense_depth: 2, + byte_classes: true, + } + } +} + +impl Builder { + /// Create a new builder for configuring an Aho-Corasick contiguous NFA. + pub fn new() -> Builder { + Builder::default() + } + + /// Build an Aho-Corasick contiguous NFA from the given iterator of + /// patterns. + /// + /// A builder may be reused to create more NFAs. + pub fn build(&self, patterns: I) -> Result + where + I: IntoIterator, + P: AsRef<[u8]>, + { + let nnfa = self.noncontiguous.build(patterns)?; + self.build_from_noncontiguous(&nnfa) + } + + /// Build an Aho-Corasick contiguous NFA from the given noncontiguous NFA. + /// + /// Note that when this method is used, only the `dense_depth` and + /// `byte_classes` settings on this builder are respected. The other + /// settings only apply to the initial construction of the Aho-Corasick + /// automaton. Since using this method requires that initial construction + /// has already completed, all settings impacting only initial construction + /// are no longer relevant. + pub fn build_from_noncontiguous( + &self, + nnfa: &noncontiguous::NFA, + ) -> Result { + debug!("building contiguous NFA"); + let byte_classes = if self.byte_classes { + nnfa.byte_classes().clone() + } else { + ByteClasses::singletons() + }; + let mut index_to_state_id = vec![NFA::DEAD; nnfa.states().len()]; + let mut nfa = NFA { + repr: vec![], + pattern_lens: nnfa.pattern_lens_raw().to_vec(), + state_len: nnfa.states().len(), + prefilter: nnfa.prefilter().map(|p| p.clone()), + match_kind: nnfa.match_kind(), + alphabet_len: byte_classes.alphabet_len(), + byte_classes, + min_pattern_len: nnfa.min_pattern_len(), + max_pattern_len: nnfa.max_pattern_len(), + // The special state IDs are set later. + special: Special::zero(), + }; + for (oldsid, state) in nnfa.states().iter().with_state_ids() { + // We don't actually encode a fail state since it isn't necessary. + // But we still want to make sure any FAIL ids are mapped + // correctly. + if oldsid == noncontiguous::NFA::FAIL { + index_to_state_id[oldsid] = NFA::FAIL; + continue; + } + let force_dense = state.depth().as_usize() < self.dense_depth; + let newsid = State::write( + nnfa, + oldsid, + state, + &nfa.byte_classes, + &mut nfa.repr, + force_dense, + )?; + index_to_state_id[oldsid] = newsid; + } + for &newsid in index_to_state_id.iter() { + if newsid == NFA::FAIL { + continue; + } + let state = &mut nfa.repr[newsid.as_usize()..]; + State::remap(nfa.alphabet_len, &index_to_state_id, state)?; + } + // Now that we've remapped all the IDs in our states, all that's left + // is remapping the special state IDs. + let remap = &index_to_state_id; + let old = nnfa.special(); + let new = &mut nfa.special; + new.max_special_id = remap[old.max_special_id]; + new.max_match_id = remap[old.max_match_id]; + new.start_unanchored_id = remap[old.start_unanchored_id]; + new.start_anchored_id = remap[old.start_anchored_id]; + debug!( + "contiguous NFA built, ", + nfa.state_len, + nfa.memory_usage(), + nfa.byte_classes.alphabet_len(), + ); + // The vectors can grow ~twice as big during construction because a + // Vec amortizes growth. But here, let's shrink things back down to + // what we actually need since we're never going to add more to it. + nfa.repr.shrink_to_fit(); + nfa.pattern_lens.shrink_to_fit(); + Ok(nfa) + } + + /// Set the desired match semantics. + /// + /// This only applies when using [`Builder::build`] and not + /// [`Builder::build_from_noncontiguous`]. + /// + /// See + /// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind) + /// for more documentation and examples. + pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder { + self.noncontiguous.match_kind(kind); + self + } + + /// Enable ASCII-aware case insensitive matching. + /// + /// This only applies when using [`Builder::build`] and not + /// [`Builder::build_from_noncontiguous`]. + /// + /// See + /// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive) + /// for more documentation and examples. + pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder { + self.noncontiguous.ascii_case_insensitive(yes); + self + } + + /// Enable heuristic prefilter optimizations. + /// + /// This only applies when using [`Builder::build`] and not + /// [`Builder::build_from_noncontiguous`]. + /// + /// See + /// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter) + /// for more documentation and examples. + pub fn prefilter(&mut self, yes: bool) -> &mut Builder { + self.noncontiguous.prefilter(yes); + self + } + + /// Set the limit on how many states use a dense representation for their + /// transitions. Other states will generally use a sparse representation. + /// + /// See + /// [`AhoCorasickBuilder::dense_depth`](crate::AhoCorasickBuilder::dense_depth) + /// for more documentation and examples. + pub fn dense_depth(&mut self, depth: usize) -> &mut Builder { + self.dense_depth = depth; + self + } + + /// A debug setting for whether to attempt to shrink the size of the + /// automaton's alphabet or not. + /// + /// This should never be enabled unless you're debugging an automaton. + /// Namely, disabling byte classes makes transitions easier to reason + /// about, since they use the actual bytes instead of equivalence classes. + /// Disabling this confers no performance benefit at search time. + /// + /// See + /// [`AhoCorasickBuilder::byte_classes`](crate::AhoCorasickBuilder::byte_classes) + /// for more documentation and examples. + pub fn byte_classes(&mut self, yes: bool) -> &mut Builder { + self.byte_classes = yes; + self + } +} + +/// Computes the number of u32 values needed to represent one byte per the +/// number of transitions given. +fn u32_len(ntrans: usize) -> usize { + if ntrans % 4 == 0 { + ntrans >> 2 + } else { + (ntrans >> 2) + 1 + } +} + +#[cfg(test)] +mod tests { + // This test demonstrates a SWAR technique I tried in the sparse transition + // code inside of 'next_state'. Namely, sparse transitions work by + // iterating over u32 chunks, with each chunk containing up to 4 classes + // corresponding to 4 transitions. This SWAR technique lets us find a + // matching transition without converting the u32 to a [u8; 4]. + // + // It turned out to be a little slower unfortunately, which isn't too + // surprising, since this is likely a throughput oriented optimization. + // Loop unrolling doesn't really help us because the vast majority of + // states have very few transitions. + // + // Anyway, this code was a little tricky to write, so I converted it to a + // test in case someone figures out how to use it more effectively than + // I could. + // + // (This also only works on little endian. So big endian would need to be + // accounted for if we ever decided to use this I think.) + #[cfg(target_endian = "little")] + #[test] + fn swar() { + use super::*; + + fn has_zero_byte(x: u32) -> u32 { + const LO_U32: u32 = 0x01010101; + const HI_U32: u32 = 0x80808080; + + x.wrapping_sub(LO_U32) & !x & HI_U32 + } + + fn broadcast(b: u8) -> u32 { + (u32::from(b)) * (u32::MAX / 255) + } + + fn index_of(x: u32) -> usize { + let o = + (((x - 1) & 0x01010101).wrapping_mul(0x01010101) >> 24) - 1; + o.as_usize() + } + + let bytes: [u8; 4] = [b'1', b'A', b'a', b'z']; + let chunk = u32::from_ne_bytes(bytes); + + let needle = broadcast(b'1'); + assert_eq!(0, index_of(has_zero_byte(needle ^ chunk))); + let needle = broadcast(b'A'); + assert_eq!(1, index_of(has_zero_byte(needle ^ chunk))); + let needle = broadcast(b'a'); + assert_eq!(2, index_of(has_zero_byte(needle ^ chunk))); + let needle = broadcast(b'z'); + assert_eq!(3, index_of(has_zero_byte(needle ^ chunk))); + } +} diff --git a/vendor/aho-corasick/src/nfa/mod.rs b/vendor/aho-corasick/src/nfa/mod.rs new file mode 100644 index 0000000..93f4dc2 --- /dev/null +++ b/vendor/aho-corasick/src/nfa/mod.rs @@ -0,0 +1,40 @@ +/*! +Provides direct access to NFA implementations of Aho-Corasick. + +The principle characteristic of an NFA in this crate is that it may +transition through multiple states per byte of haystack. In Aho-Corasick +parlance, NFAs follow failure transitions during a search. In contrast, +a [`DFA`](crate::dfa::DFA) pre-computes all failure transitions during +compilation at the expense of a much bigger memory footprint. + +Currently, there are two NFA implementations provided: noncontiguous and +contiguous. The names reflect their internal representation, and consequently, +the trade offs associated with them: + +* A [`noncontiguous::NFA`] uses a separate allocation for every NFA state to +represent its transitions in a sparse format. This is ideal for building an +NFA, since it cheaply permits different states to have a different number of +transitions. A noncontiguous NFA is where the main Aho-Corasick construction +algorithm is implemented. All other Aho-Corasick implementations are built by +first constructing a noncontiguous NFA. +* A [`contiguous::NFA`] is uses a single allocation to represent all states, +while still encoding most states as sparse states but permitting states near +the starting state to have a dense representation. The dense representation +uses more memory, but permits computing transitions during a search more +quickly. By only making the most active states dense (the states near the +starting state), a contiguous NFA better balances memory usage with search +speed. The single contiguous allocation also uses less overhead per state and +enables compression tricks where most states only use 8 bytes of heap memory. + +When given the choice between these two, you almost always want to pick a +contiguous NFA. It takes only a little longer to build, but both its memory +usage and search speed are typically much better than a noncontiguous NFA. A +noncontiguous NFA is useful when prioritizing build times, or when there are +so many patterns that a contiguous NFA could not be built. (Currently, because +of both memory and search speed improvements, a contiguous NFA has a smaller +internal limit on the total number of NFA states it can represent. But you +would likely need to have hundreds of thousands or even millions of patterns +before you hit this limit.) +*/ +pub mod contiguous; +pub mod noncontiguous; diff --git a/vendor/aho-corasick/src/nfa/noncontiguous.rs b/vendor/aho-corasick/src/nfa/noncontiguous.rs new file mode 100644 index 0000000..af32617 --- /dev/null +++ b/vendor/aho-corasick/src/nfa/noncontiguous.rs @@ -0,0 +1,1762 @@ +/*! +Provides a noncontiguous NFA implementation of Aho-Corasick. + +This is a low-level API that generally only needs to be used in niche +circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) +instead of a noncontiguous NFA directly. Using an `NFA` directly is typically +only necessary when one needs access to the [`Automaton`] trait implementation. +*/ + +use alloc::{ + collections::{BTreeSet, VecDeque}, + vec, + vec::Vec, +}; + +use crate::{ + automaton::Automaton, + util::{ + alphabet::{ByteClassSet, ByteClasses}, + error::{BuildError, MatchError}, + prefilter::{self, opposite_ascii_case, Prefilter}, + primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID}, + remapper::Remapper, + search::{Anchored, MatchKind}, + special::Special, + }, +}; + +/// A noncontiguous NFA implementation of Aho-Corasick. +/// +/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of +/// this type directly. Using an `NFA` directly is typically only necessary +/// when one needs access to the [`Automaton`] trait implementation. +/// +/// This NFA represents the "core" implementation of Aho-Corasick in this +/// crate. Namely, constructing this NFA involving building a trie and then +/// filling in the failure transitions between states, similar to what is +/// described in any standard textbook description of Aho-Corasick. +/// +/// In order to minimize heap usage and to avoid additional construction costs, +/// this implementation represents the transitions of all states as distinct +/// sparse memory allocations. This is where it gets its name from. That is, +/// this NFA has no contiguous memory allocation for its transition table. Each +/// state gets its own allocation. +/// +/// While the sparse representation keeps memory usage to somewhat reasonable +/// levels, it is still quite large and also results in somewhat mediocre +/// search performance. For this reason, it is almost always a good idea to +/// use a [`contiguous::NFA`](crate::nfa::contiguous::NFA) instead. It is +/// marginally slower to build, but has higher throughput and can sometimes use +/// an order of magnitude less memory. The main reason to use a noncontiguous +/// NFA is when you need the fastest possible construction time, or when a +/// contiguous NFA does not have the desired capacity. (The total number of NFA +/// states it can have is fewer than a noncontiguous NFA.) +/// +/// # Example +/// +/// This example shows how to build an `NFA` directly and use it to execute +/// [`Automaton::try_find`]: +/// +/// ``` +/// use aho_corasick::{ +/// automaton::Automaton, +/// nfa::noncontiguous::NFA, +/// Input, Match, +/// }; +/// +/// let patterns = &["b", "abc", "abcd"]; +/// let haystack = "abcd"; +/// +/// let nfa = NFA::new(patterns).unwrap(); +/// assert_eq!( +/// Some(Match::must(0, 1..2)), +/// nfa.try_find(&Input::new(haystack))?, +/// ); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// It is also possible to implement your own version of `try_find`. See the +/// [`Automaton`] documentation for an example. +#[derive(Clone)] +pub struct NFA { + /// The match semantics built into this NFA. + match_kind: MatchKind, + /// A set of states. Each state defines its own transitions, a fail + /// transition and a set of indices corresponding to matches. + /// + /// The first state is always the fail state, which is used only as a + /// sentinel. Namely, in the final NFA, no transition into the fail state + /// exists. (Well, they do, but they aren't followed. Instead, the state's + /// failure transition is followed.) + /// + /// The second state (index 1) is always the dead state. Dead states are + /// in every automaton, but only used when leftmost-{first,longest} match + /// semantics are enabled. Specifically, they instruct search to stop + /// at specific points in order to report the correct match location. In + /// the standard Aho-Corasick construction, there are no transitions to + /// the dead state. + /// + /// The third state (index 2) is generally intended to be the starting or + /// "root" state. + states: Vec, + /// Transitions stored in a sparse representation via a linked list. + /// + /// Each transition contains three pieces of information: the byte it + /// is defined for, the state it transitions to and a link to the next + /// transition in the same state (or `StateID::ZERO` if it is the last + /// transition). + /// + /// The first transition for each state is determined by `State::sparse`. + /// + /// Note that this contains a complete set of all transitions in this NFA, + /// including states that have a dense representation for transitions. + /// (Adding dense transitions for a state doesn't remove its sparse + /// transitions, since deleting transitions from this particular sparse + /// representation would be fairly expensive.) + sparse: Vec, + /// Transitions stored in a dense representation. + /// + /// A state has a row in this table if and only if `State::dense` is + /// not equal to `StateID::ZERO`. When not zero, there are precisely + /// `NFA::byte_classes::alphabet_len()` entries beginning at `State::dense` + /// in this table. + /// + /// Generally a very small minority of states have a dense representation + /// since it uses so much memory. + dense: Vec, + /// Matches stored in linked list for each state. + /// + /// Like sparse transitions, each match has a link to the next match in the + /// state. + /// + /// The first match for each state is determined by `State::matches`. + matches: Vec, + /// The length, in bytes, of each pattern in this NFA. This slice is + /// indexed by `PatternID`. + /// + /// The number of entries in this vector corresponds to the total number of + /// patterns in this automaton. + pattern_lens: Vec, + /// A prefilter for quickly skipping to candidate matches, if pertinent. + prefilter: Option, + /// A set of equivalence classes in terms of bytes. We compute this while + /// building the NFA, but don't use it in the NFA's states. Instead, we + /// use this for building the DFA. We store it on the NFA since it's easy + /// to compute while visiting the patterns. + byte_classes: ByteClasses, + /// The length, in bytes, of the shortest pattern in this automaton. This + /// information is useful for detecting whether an automaton matches the + /// empty string or not. + min_pattern_len: usize, + /// The length, in bytes, of the longest pattern in this automaton. This + /// information is useful for keeping correct buffer sizes when searching + /// on streams. + max_pattern_len: usize, + /// The information required to deduce which states are "special" in this + /// NFA. + /// + /// Since the DEAD and FAIL states are always the first two states and + /// there are only ever two start states (which follow all of the match + /// states), it follows that we can determine whether a state is a fail, + /// dead, match or start with just a few comparisons on the ID itself: + /// + /// is_dead(sid): sid == NFA::DEAD + /// is_fail(sid): sid == NFA::FAIL + /// is_match(sid): NFA::FAIL < sid && sid <= max_match_id + /// is_start(sid): sid == start_unanchored_id || sid == start_anchored_id + /// + /// Note that this only applies to the NFA after it has been constructed. + /// During construction, the start states are the first ones added and the + /// match states are inter-leaved with non-match states. Once all of the + /// states have been added, the states are shuffled such that the above + /// predicates hold. + special: Special, +} + +impl NFA { + /// Create a new Aho-Corasick noncontiguous NFA using the default + /// configuration. + /// + /// Use a [`Builder`] if you want to change the configuration. + pub fn new(patterns: I) -> Result + where + I: IntoIterator, + P: AsRef<[u8]>, + { + NFA::builder().build(patterns) + } + + /// A convenience method for returning a new Aho-Corasick noncontiguous NFA + /// builder. + /// + /// This usually permits one to just import the `NFA` type. + pub fn builder() -> Builder { + Builder::new() + } +} + +impl NFA { + /// The DEAD state is a sentinel state like the FAIL state. The DEAD state + /// instructs any search to stop and return any currently recorded match, + /// or no match otherwise. Generally speaking, it is impossible for an + /// unanchored standard search to enter a DEAD state. But an anchored + /// search can, and so to can a leftmost search. + /// + /// We put DEAD before FAIL so that DEAD is always 0. We repeat this + /// decision across the other Aho-Corasicm automata, so that DEAD + /// states there are always 0 too. It's not that we need all of the + /// implementations to agree, but rather, the contiguous NFA and the DFA + /// use a sort of "premultiplied" state identifier where the only state + /// whose ID is always known and constant is the first state. Subsequent + /// state IDs depend on how much space has already been used in the + /// transition table. + pub(crate) const DEAD: StateID = StateID::new_unchecked(0); + /// The FAIL state mostly just corresponds to the ID of any transition on a + /// state that isn't explicitly defined. When one transitions into the FAIL + /// state, one must follow the previous state's failure transition before + /// doing the next state lookup. In this way, FAIL is more of a sentinel + /// than a state that one actually transitions into. In particular, it is + /// never exposed in the `Automaton` interface. + pub(crate) const FAIL: StateID = StateID::new_unchecked(1); + + /// Returns the equivalence classes of bytes found while constructing + /// this NFA. + /// + /// Note that the NFA doesn't actually make use of these equivalence + /// classes. Instead, these are useful for building the DFA when desired. + pub(crate) fn byte_classes(&self) -> &ByteClasses { + &self.byte_classes + } + + /// Returns a slice containing the length of each pattern in this searcher. + /// It is indexed by `PatternID` and has length `NFA::patterns_len`. + /// + /// This is exposed for convenience when building a contiguous NFA. But it + /// can be reconstructed from the `Automaton` API if necessary. + pub(crate) fn pattern_lens_raw(&self) -> &[SmallIndex] { + &self.pattern_lens + } + + /// Returns a slice of all states in this non-contiguous NFA. + pub(crate) fn states(&self) -> &[State] { + &self.states + } + + /// Returns the underlying "special" state information for this NFA. + pub(crate) fn special(&self) -> &Special { + &self.special + } + + /// Swaps the states at `id1` and `id2`. + /// + /// This does not update the transitions of any state to account for the + /// state swap. + pub(crate) fn swap_states(&mut self, id1: StateID, id2: StateID) { + self.states.swap(id1.as_usize(), id2.as_usize()); + } + + /// Re-maps all state IDs in this NFA according to the `map` function + /// given. + pub(crate) fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + let alphabet_len = self.byte_classes.alphabet_len(); + for state in self.states.iter_mut() { + state.fail = map(state.fail); + let mut link = state.sparse; + while link != StateID::ZERO { + let t = &mut self.sparse[link]; + t.next = map(t.next); + link = t.link; + } + if state.dense != StateID::ZERO { + let start = state.dense.as_usize(); + for next in self.dense[start..][..alphabet_len].iter_mut() { + *next = map(*next); + } + } + } + } + + /// Iterate over all of the transitions for the given state ID. + pub(crate) fn iter_trans( + &self, + sid: StateID, + ) -> impl Iterator + '_ { + let mut link = self.states[sid].sparse; + core::iter::from_fn(move || { + if link == StateID::ZERO { + return None; + } + let t = self.sparse[link]; + link = t.link; + Some(t) + }) + } + + /// Iterate over all of the matches for the given state ID. + pub(crate) fn iter_matches( + &self, + sid: StateID, + ) -> impl Iterator + '_ { + let mut link = self.states[sid].matches; + core::iter::from_fn(move || { + if link == StateID::ZERO { + return None; + } + let m = self.matches[link]; + link = m.link; + Some(m.pid) + }) + } + + /// Return the link following the one given. If the one given is the last + /// link for the given state, then return `None`. + /// + /// If no previous link is given, then this returns the first link in the + /// state, if one exists. + /// + /// This is useful for manually iterating over the transitions in a single + /// state without borrowing the NFA. This permits mutating other parts of + /// the NFA during iteration. Namely, one can access the transition pointed + /// to by the link via `self.sparse[link]`. + fn next_link( + &self, + sid: StateID, + prev: Option, + ) -> Option { + let link = + prev.map_or(self.states[sid].sparse, |p| self.sparse[p].link); + if link == StateID::ZERO { + None + } else { + Some(link) + } + } + + /// Follow the transition for the given byte in the given state. If no such + /// transition exists, then the FAIL state ID is returned. + #[inline(always)] + fn follow_transition(&self, sid: StateID, byte: u8) -> StateID { + let s = &self.states[sid]; + // This is a special case that targets starting states and states + // near a start state. Namely, after the initial trie is constructed, + // we look for states close to the start state to convert to a dense + // representation for their transitions. This winds up using a lot more + // memory per state in exchange for faster transition lookups. But + // since we only do this for a small number of states (by default), the + // memory usage is usually minimal. + // + // This has *massive* benefit when executing searches because the + // unanchored starting state is by far the hottest state and is + // frequently visited. Moreover, the 'for' loop below that works + // decently on an actually sparse state is disastrous on a state that + // is nearly or completely dense. + if s.dense == StateID::ZERO { + self.follow_transition_sparse(sid, byte) + } else { + let class = usize::from(self.byte_classes.get(byte)); + self.dense[s.dense.as_usize() + class] + } + } + + /// Like `follow_transition`, but always uses the sparse representation. + #[inline(always)] + fn follow_transition_sparse(&self, sid: StateID, byte: u8) -> StateID { + for t in self.iter_trans(sid) { + if byte <= t.byte { + if byte == t.byte { + return t.next; + } + break; + } + } + NFA::FAIL + } + + /// Set the transition for the given byte to the state ID given. + /// + /// Note that one should not set transitions to the FAIL state. It is not + /// technically incorrect, but it wastes space. If a transition is not + /// defined, then it is automatically assumed to lead to the FAIL state. + fn add_transition( + &mut self, + prev: StateID, + byte: u8, + next: StateID, + ) -> Result<(), BuildError> { + if self.states[prev].dense != StateID::ZERO { + let dense = self.states[prev].dense; + let class = usize::from(self.byte_classes.get(byte)); + self.dense[dense.as_usize() + class] = next; + } + + let head = self.states[prev].sparse; + if head == StateID::ZERO || byte < self.sparse[head].byte { + let new_link = self.alloc_transition()?; + self.sparse[new_link] = Transition { byte, next, link: head }; + self.states[prev].sparse = new_link; + return Ok(()); + } else if byte == self.sparse[head].byte { + self.sparse[head].next = next; + return Ok(()); + } + + // We handled the only cases where the beginning of the transition + // chain needs to change. At this point, we now know that there is + // at least one entry in the transition chain and the byte for that + // transition is less than the byte for the transition we're adding. + let (mut link_prev, mut link_next) = (head, self.sparse[head].link); + while link_next != StateID::ZERO && byte > self.sparse[link_next].byte + { + link_prev = link_next; + link_next = self.sparse[link_next].link; + } + if link_next == StateID::ZERO || byte < self.sparse[link_next].byte { + let link = self.alloc_transition()?; + self.sparse[link] = Transition { byte, next, link: link_next }; + self.sparse[link_prev].link = link; + } else { + assert_eq!(byte, self.sparse[link_next].byte); + self.sparse[link_next].next = next; + } + Ok(()) + } + + /// This sets every possible transition (all 255 of them) for the given + /// state to the name `next` value. + /// + /// This is useful for efficiently initializing start/dead states. + /// + /// # Panics + /// + /// This requires that the state has no transitions added to it already. + /// If it has any transitions, then this panics. It will also panic if + /// the state has been densified prior to calling this. + fn init_full_state( + &mut self, + prev: StateID, + next: StateID, + ) -> Result<(), BuildError> { + assert_eq!( + StateID::ZERO, + self.states[prev].dense, + "state must not be dense yet" + ); + assert_eq!( + StateID::ZERO, + self.states[prev].sparse, + "state must have zero transitions" + ); + let mut prev_link = StateID::ZERO; + for byte in 0..=255 { + let new_link = self.alloc_transition()?; + self.sparse[new_link] = + Transition { byte, next, link: StateID::ZERO }; + if prev_link == StateID::ZERO { + self.states[prev].sparse = new_link; + } else { + self.sparse[prev_link].link = new_link; + } + prev_link = new_link; + } + Ok(()) + } + + /// Add a match for the given pattern ID to the state for the given ID. + fn add_match( + &mut self, + sid: StateID, + pid: PatternID, + ) -> Result<(), BuildError> { + let head = self.states[sid].matches; + let mut link = head; + while self.matches[link].link != StateID::ZERO { + link = self.matches[link].link; + } + let new_match_link = self.alloc_match()?; + self.matches[new_match_link].pid = pid; + if link == StateID::ZERO { + self.states[sid].matches = new_match_link; + } else { + self.matches[link].link = new_match_link; + } + Ok(()) + } + + /// Copy matches from the `src` state to the `dst` state. This is useful + /// when a match state can be reached via a failure transition. In which + /// case, you'll want to copy the matches (if any) from the state reached + /// by the failure transition to the original state you were at. + fn copy_matches( + &mut self, + src: StateID, + dst: StateID, + ) -> Result<(), BuildError> { + let head_dst = self.states[dst].matches; + let mut link_dst = head_dst; + while self.matches[link_dst].link != StateID::ZERO { + link_dst = self.matches[link_dst].link; + } + let mut link_src = self.states[src].matches; + while link_src != StateID::ZERO { + let new_match_link = + StateID::new(self.matches.len()).map_err(|e| { + BuildError::state_id_overflow( + StateID::MAX.as_u64(), + e.attempted(), + ) + })?; + self.matches.push(Match { + pid: self.matches[link_src].pid, + link: StateID::ZERO, + }); + if link_dst == StateID::ZERO { + self.states[dst].matches = new_match_link; + } else { + self.matches[link_dst].link = new_match_link; + } + + link_dst = new_match_link; + link_src = self.matches[link_src].link; + } + Ok(()) + } + + /// Create a new entry in `NFA::trans`, if there's room, and return that + /// entry's ID. If there's no room, then an error is returned. + fn alloc_transition(&mut self) -> Result { + let id = StateID::new(self.sparse.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + self.sparse.push(Transition::default()); + Ok(id) + } + + /// Create a new entry in `NFA::matches`, if there's room, and return that + /// entry's ID. If there's no room, then an error is returned. + fn alloc_match(&mut self) -> Result { + let id = StateID::new(self.matches.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + self.matches.push(Match::default()); + Ok(id) + } + + /// Create a new set of `N` transitions in this NFA's dense transition + /// table. The ID return corresponds to the index at which the `N` + /// transitions begin. So `id+0` is the first transition and `id+(N-1)` is + /// the last. + /// + /// `N` is determined via `NFA::byte_classes::alphabet_len`. + fn alloc_dense_state(&mut self) -> Result { + let id = StateID::new(self.dense.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + // We use FAIL because it's the correct default. If a state doesn't + // have a transition defined for every possible byte value, then the + // transition function should return NFA::FAIL. + self.dense.extend( + core::iter::repeat(NFA::FAIL) + .take(self.byte_classes.alphabet_len()), + ); + Ok(id) + } + + /// Allocate and add a fresh state to the underlying NFA and return its + /// ID (guaranteed to be one more than the ID of the previously allocated + /// state). If the ID would overflow `StateID`, then this returns an error. + fn alloc_state(&mut self, depth: usize) -> Result { + // This is OK because we error when building the trie if we see a + // pattern whose length cannot fit into a 'SmallIndex', and the longest + // possible depth corresponds to the length of the longest pattern. + let depth = SmallIndex::new(depth) + .expect("patterns longer than SmallIndex::MAX are not allowed"); + let id = StateID::new(self.states.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + self.states.push(State { + sparse: StateID::ZERO, + dense: StateID::ZERO, + matches: StateID::ZERO, + fail: self.special.start_unanchored_id, + depth, + }); + Ok(id) + } +} + +// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always +// returns a valid state ID given a valid state ID. We otherwise claim that +// all other methods are correct as well. +unsafe impl Automaton for NFA { + #[inline(always)] + fn start_state(&self, anchored: Anchored) -> Result { + match anchored { + Anchored::No => Ok(self.special.start_unanchored_id), + Anchored::Yes => Ok(self.special.start_anchored_id), + } + } + + #[inline(always)] + fn next_state( + &self, + anchored: Anchored, + mut sid: StateID, + byte: u8, + ) -> StateID { + // This terminates since: + // + // 1. state.fail never points to the FAIL state. + // 2. All state.fail values point to a state closer to the start state. + // 3. The start state has no transitions to the FAIL state. + loop { + let next = self.follow_transition(sid, byte); + if next != NFA::FAIL { + return next; + } + // For an anchored search, we never follow failure transitions + // because failure transitions lead us down a path to matching + // a *proper* suffix of the path we were on. Thus, it can only + // produce matches that appear after the beginning of the search. + if anchored.is_anchored() { + return NFA::DEAD; + } + sid = self.states[sid].fail(); + } + } + + #[inline(always)] + fn is_special(&self, sid: StateID) -> bool { + sid <= self.special.max_special_id + } + + #[inline(always)] + fn is_dead(&self, sid: StateID) -> bool { + sid == NFA::DEAD + } + + #[inline(always)] + fn is_match(&self, sid: StateID) -> bool { + // N.B. This returns true when sid==NFA::FAIL but that's okay because + // NFA::FAIL is not actually a valid state ID from the perspective of + // the Automaton trait. Namely, it is never returned by 'start_state' + // or by 'next_state'. So we don't need to care about it here. + !self.is_dead(sid) && sid <= self.special.max_match_id + } + + #[inline(always)] + fn is_start(&self, sid: StateID) -> bool { + sid == self.special.start_unanchored_id + || sid == self.special.start_anchored_id + } + + #[inline(always)] + fn match_kind(&self) -> MatchKind { + self.match_kind + } + + #[inline(always)] + fn patterns_len(&self) -> usize { + self.pattern_lens.len() + } + + #[inline(always)] + fn pattern_len(&self, pid: PatternID) -> usize { + self.pattern_lens[pid].as_usize() + } + + #[inline(always)] + fn min_pattern_len(&self) -> usize { + self.min_pattern_len + } + + #[inline(always)] + fn max_pattern_len(&self) -> usize { + self.max_pattern_len + } + + #[inline(always)] + fn match_len(&self, sid: StateID) -> usize { + self.iter_matches(sid).count() + } + + #[inline(always)] + fn match_pattern(&self, sid: StateID, index: usize) -> PatternID { + self.iter_matches(sid).nth(index).unwrap() + } + + #[inline(always)] + fn memory_usage(&self) -> usize { + self.states.len() * core::mem::size_of::() + + self.sparse.len() * core::mem::size_of::() + + self.matches.len() * core::mem::size_of::() + + self.dense.len() * StateID::SIZE + + self.pattern_lens.len() * SmallIndex::SIZE + + self.prefilter.as_ref().map_or(0, |p| p.memory_usage()) + } + + #[inline(always)] + fn prefilter(&self) -> Option<&Prefilter> { + self.prefilter.as_ref() + } +} + +/// A representation of a sparse NFA state for an Aho-Corasick automaton. +/// +/// It contains the transitions to the next state, a failure transition for +/// cases where there exists no other transition for the current input byte +/// and the matches implied by visiting this state (if any). +#[derive(Clone, Debug)] +pub(crate) struct State { + /// A pointer to `NFA::trans` corresponding to the head of a linked list + /// containing all of the transitions for this state. + /// + /// This is `StateID::ZERO` if and only if this state has zero transitions. + sparse: StateID, + /// A pointer to a row of `N` transitions in `NFA::dense`. These + /// transitions correspond precisely to what is obtained by traversing + /// `sparse`, but permits constant time lookup. + /// + /// When this is zero (which is true for most states in the default + /// configuration), then this state has no dense representation. + /// + /// Note that `N` is equal to `NFA::byte_classes::alphabet_len()`. This is + /// typically much less than 256 (the maximum value). + dense: StateID, + /// A pointer to `NFA::matches` corresponding to the head of a linked list + /// containing all of the matches for this state. + /// + /// This is `StateID::ZERO` if and only if this state is not a match state. + matches: StateID, + /// The state that should be transitioned to if the current byte in the + /// haystack does not have a corresponding transition defined in this + /// state. + fail: StateID, + /// The depth of this state. Specifically, this is the distance from this + /// state to the starting state. (For the special sentinel states DEAD and + /// FAIL, their depth is always 0.) The depth of a starting state is 0. + /// + /// Note that depth is currently not used in this non-contiguous NFA. It + /// may in the future, but it is used in the contiguous NFA. Namely, it + /// permits an optimization where states near the starting state have their + /// transitions stored in a dense fashion, but all other states have their + /// transitions stored in a sparse fashion. (This non-contiguous NFA uses + /// a sparse representation for all states unconditionally.) In any case, + /// this is really the only convenient place to compute and store this + /// information, which we need when building the contiguous NFA. + depth: SmallIndex, +} + +impl State { + /// Return true if and only if this state is a match state. + pub(crate) fn is_match(&self) -> bool { + self.matches != StateID::ZERO + } + + /// Returns the failure transition for this state. + pub(crate) fn fail(&self) -> StateID { + self.fail + } + + /// Returns the depth of this state. That is, the number of transitions + /// this state is from the start state of the NFA. + pub(crate) fn depth(&self) -> SmallIndex { + self.depth + } +} + +/// A single transition in a non-contiguous NFA. +#[derive(Clone, Copy, Default)] +#[repr(packed)] +pub(crate) struct Transition { + byte: u8, + next: StateID, + link: StateID, +} + +impl Transition { + /// Return the byte for which this transition is defined. + pub(crate) fn byte(&self) -> u8 { + self.byte + } + + /// Return the ID of the state that this transition points to. + pub(crate) fn next(&self) -> StateID { + self.next + } + + /// Return the ID of the next transition. + fn link(&self) -> StateID { + self.link + } +} + +impl core::fmt::Debug for Transition { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "Transition(byte: {:X?}, next: {:?}, link: {:?})", + self.byte, + self.next().as_usize(), + self.link().as_usize() + ) + } +} + +/// A single match in a non-contiguous NFA. +#[derive(Clone, Copy, Default)] +struct Match { + pid: PatternID, + link: StateID, +} + +impl Match { + /// Return the pattern ID for this match. + pub(crate) fn pattern(&self) -> PatternID { + self.pid + } + + /// Return the ID of the next match. + fn link(&self) -> StateID { + self.link + } +} + +impl core::fmt::Debug for Match { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "Match(pid: {:?}, link: {:?})", + self.pattern().as_usize(), + self.link().as_usize() + ) + } +} + +/// A builder for configuring an Aho-Corasick noncontiguous NFA. +/// +/// This builder has a subset of the options available to a +/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options, +/// their behavior is identical. +#[derive(Clone, Debug)] +pub struct Builder { + match_kind: MatchKind, + prefilter: bool, + ascii_case_insensitive: bool, + dense_depth: usize, +} + +impl Default for Builder { + fn default() -> Builder { + Builder { + match_kind: MatchKind::default(), + prefilter: true, + ascii_case_insensitive: false, + dense_depth: 3, + } + } +} + +impl Builder { + /// Create a new builder for configuring an Aho-Corasick noncontiguous NFA. + pub fn new() -> Builder { + Builder::default() + } + + /// Build an Aho-Corasick noncontiguous NFA from the given iterator of + /// patterns. + /// + /// A builder may be reused to create more NFAs. + pub fn build(&self, patterns: I) -> Result + where + I: IntoIterator, + P: AsRef<[u8]>, + { + debug!("building non-contiguous NFA"); + let nfa = Compiler::new(self)?.compile(patterns)?; + debug!( + "non-contiguous NFA built, ", + nfa.states.len(), + nfa.memory_usage() + ); + Ok(nfa) + } + + /// Set the desired match semantics. + /// + /// See + /// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind) + /// for more documentation and examples. + pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder { + self.match_kind = kind; + self + } + + /// Enable ASCII-aware case insensitive matching. + /// + /// See + /// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive) + /// for more documentation and examples. + pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder { + self.ascii_case_insensitive = yes; + self + } + + /// Set the limit on how many states use a dense representation for their + /// transitions. Other states will generally use a sparse representation. + /// + /// See + /// [`AhoCorasickBuilder::dense_depth`](crate::AhoCorasickBuilder::dense_depth) + /// for more documentation and examples. + pub fn dense_depth(&mut self, depth: usize) -> &mut Builder { + self.dense_depth = depth; + self + } + + /// Enable heuristic prefilter optimizations. + /// + /// See + /// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter) + /// for more documentation and examples. + pub fn prefilter(&mut self, yes: bool) -> &mut Builder { + self.prefilter = yes; + self + } +} + +/// A compiler uses a builder configuration and builds up the NFA formulation +/// of an Aho-Corasick automaton. This roughly corresponds to the standard +/// formulation described in textbooks, with some tweaks to support leftmost +/// searching. +#[derive(Debug)] +struct Compiler<'a> { + builder: &'a Builder, + prefilter: prefilter::Builder, + nfa: NFA, + byteset: ByteClassSet, +} + +impl<'a> Compiler<'a> { + fn new(builder: &'a Builder) -> Result, BuildError> { + let prefilter = prefilter::Builder::new(builder.match_kind) + .ascii_case_insensitive(builder.ascii_case_insensitive); + Ok(Compiler { + builder, + prefilter, + nfa: NFA { + match_kind: builder.match_kind, + states: vec![], + sparse: vec![], + dense: vec![], + matches: vec![], + pattern_lens: vec![], + prefilter: None, + byte_classes: ByteClasses::singletons(), + min_pattern_len: usize::MAX, + max_pattern_len: 0, + special: Special::zero(), + }, + byteset: ByteClassSet::empty(), + }) + } + + fn compile(mut self, patterns: I) -> Result + where + I: IntoIterator, + P: AsRef<[u8]>, + { + // Add dummy transition/match links, so that no valid link will point + // to another link at index 0. + self.nfa.sparse.push(Transition::default()); + self.nfa.matches.push(Match::default()); + // Add a dummy dense transition so that no states can have dense==0 + // represent a valid pointer to dense transitions. This permits + // dense==0 to be a sentinel indicating "no dense transitions." + self.nfa.dense.push(NFA::DEAD); + // the dead state, only used for leftmost and fixed to id==0 + self.nfa.alloc_state(0)?; + // the fail state, which is never entered and fixed to id==1 + self.nfa.alloc_state(0)?; + // unanchored start state, initially fixed to id==2 but later shuffled + // to appear after all non-start match states. + self.nfa.special.start_unanchored_id = self.nfa.alloc_state(0)?; + // anchored start state, initially fixed to id==3 but later shuffled + // to appear after unanchored start state. + self.nfa.special.start_anchored_id = self.nfa.alloc_state(0)?; + // Initialize the unanchored starting state in order to make it dense, + // and thus make transition lookups on this state faster. + self.init_unanchored_start_state()?; + // Set all transitions on the DEAD state to point to itself. This way, + // the DEAD state can never be escaped. It MUST be used as a sentinel + // in any correct search. + self.add_dead_state_loop()?; + // Build the base trie from the given patterns. + self.build_trie(patterns)?; + self.nfa.states.shrink_to_fit(); + // Turn our set of bytes into equivalent classes. This NFA + // implementation uses byte classes only for states that use a dense + // representation of transitions. (And that's why this comes before + // `self.densify()`, as the byte classes need to be set first.) + self.nfa.byte_classes = self.byteset.byte_classes(); + // Add transitions (and maybe matches) to the anchored starting state. + // The anchored starting state is used for anchored searches. The only + // mechanical difference between it and the unanchored start state is + // that missing transitions map to the DEAD state instead of the FAIL + // state. + self.set_anchored_start_state()?; + // Rewrite transitions to the FAIL state on the unanchored start state + // as self-transitions. This keeps the start state active at all times. + self.add_unanchored_start_state_loop(); + // Make some (possibly zero) states use a dense representation for + // transitions. It's important to do this right after the states + // and non-failure transitions are solidified. That way, subsequent + // accesses (particularly `fill_failure_transitions`) will benefit from + // the faster transition lookup in densified states. + self.densify()?; + // The meat of the Aho-Corasick algorithm: compute and write failure + // transitions. i.e., the state to move to when a transition isn't + // defined in the current state. These are epsilon transitions and thus + // make this formulation an NFA. + self.fill_failure_transitions()?; + // Handle a special case under leftmost semantics when at least one + // of the patterns is the empty string. + self.close_start_state_loop_for_leftmost(); + // Shuffle states so that we have DEAD, FAIL, MATCH, ..., START, START, + // NON-MATCH, ... This permits us to very quickly query the type of + // the state we're currently in during a search. + self.shuffle(); + self.nfa.prefilter = self.prefilter.build(); + // Store the maximum ID of all *relevant* special states. Start states + // are only relevant when we have a prefilter, otherwise, there is zero + // reason to care about whether a state is a start state or not during + // a search. Indeed, without a prefilter, we are careful to explicitly + // NOT care about start states, otherwise the search can ping pong + // between the unrolled loop and the handling of special-status states + // and destroy perf. + self.nfa.special.max_special_id = if self.nfa.prefilter.is_some() { + // Why the anchored starting state? Because we always put it + // after the unanchored starting state and it is therefore the + // maximum. Why put unanchored followed by anchored? No particular + // reason, but that's how the states are logically organized in the + // Thompson NFA implementation found in regex-automata. ¯\_(ツ)_/¯ + self.nfa.special.start_anchored_id + } else { + self.nfa.special.max_match_id + }; + self.nfa.sparse.shrink_to_fit(); + self.nfa.dense.shrink_to_fit(); + self.nfa.matches.shrink_to_fit(); + self.nfa.pattern_lens.shrink_to_fit(); + Ok(self.nfa) + } + + /// This sets up the initial prefix trie that makes up the Aho-Corasick + /// automaton. Effectively, it creates the basic structure of the + /// automaton, where every pattern given has a path from the start state to + /// the end of the pattern. + fn build_trie(&mut self, patterns: I) -> Result<(), BuildError> + where + I: IntoIterator, + P: AsRef<[u8]>, + { + 'PATTERNS: for (i, pat) in patterns.into_iter().enumerate() { + let pid = PatternID::new(i).map_err(|e| { + BuildError::pattern_id_overflow( + PatternID::MAX.as_u64(), + e.attempted(), + ) + })?; + let pat = pat.as_ref(); + let patlen = SmallIndex::new(pat.len()) + .map_err(|_| BuildError::pattern_too_long(pid, pat.len()))?; + self.nfa.min_pattern_len = + core::cmp::min(self.nfa.min_pattern_len, pat.len()); + self.nfa.max_pattern_len = + core::cmp::max(self.nfa.max_pattern_len, pat.len()); + assert_eq!( + i, + self.nfa.pattern_lens.len(), + "expected number of patterns to match pattern ID" + ); + self.nfa.pattern_lens.push(patlen); + // We add the pattern to the prefilter here because the pattern + // ID in the prefilter is determined with respect to the patterns + // added to the prefilter. That is, it isn't the ID we have here, + // but the one determined by its own accounting of patterns. + // To ensure they line up, we add every pattern we see to the + // prefilter, even if some patterns ultimately are impossible to + // match (in leftmost-first semantics specifically). + // + // Another way of doing this would be to expose an API in the + // prefilter to permit setting your own pattern IDs. Or to just use + // our own map and go between them. But this case is sufficiently + // rare that we don't bother and just make sure they're in sync. + if self.builder.prefilter { + self.prefilter.add(pat); + } + + let mut prev = self.nfa.special.start_unanchored_id; + let mut saw_match = false; + for (depth, &b) in pat.iter().enumerate() { + // When leftmost-first match semantics are requested, we + // specifically stop adding patterns when a previously added + // pattern is a prefix of it. We avoid adding it because + // leftmost-first semantics imply that the pattern can never + // match. This is not just an optimization to save space! It + // is necessary for correctness. In fact, this is the only + // difference in the automaton between the implementations for + // leftmost-first and leftmost-longest. + saw_match = saw_match || self.nfa.states[prev].is_match(); + if self.builder.match_kind.is_leftmost_first() && saw_match { + // Skip to the next pattern immediately. This avoids + // incorrectly adding a match after this loop terminates. + continue 'PATTERNS; + } + + // Add this byte to our equivalence classes. These don't + // get used while building the trie, but other Aho-Corasick + // implementations may use them. + self.byteset.set_range(b, b); + if self.builder.ascii_case_insensitive { + let b = opposite_ascii_case(b); + self.byteset.set_range(b, b); + } + + // If the transition from prev using the current byte already + // exists, then just move through it. Otherwise, add a new + // state. We track the depth here so that we can determine + // how to represent transitions. States near the start state + // use a dense representation that uses more memory but is + // faster. Other states use a sparse representation that uses + // less memory but is slower. + let next = self.nfa.follow_transition(prev, b); + if next != NFA::FAIL { + prev = next; + } else { + let next = self.nfa.alloc_state(depth)?; + self.nfa.add_transition(prev, b, next)?; + if self.builder.ascii_case_insensitive { + let b = opposite_ascii_case(b); + self.nfa.add_transition(prev, b, next)?; + } + prev = next; + } + } + // Once the pattern has been added, log the match in the final + // state that it reached. + self.nfa.add_match(prev, pid)?; + } + Ok(()) + } + + /// This routine creates failure transitions according to the standard + /// textbook formulation of the Aho-Corasick algorithm, with a couple small + /// tweaks to support "leftmost" semantics. + /// + /// Building failure transitions is the most interesting part of building + /// the Aho-Corasick automaton, because they are what allow searches to + /// be performed in linear time. Specifically, a failure transition is + /// a single transition associated with each state that points back to + /// the longest proper suffix of the pattern being searched. The failure + /// transition is followed whenever there exists no transition on the + /// current state for the current input byte. If there is no other proper + /// suffix, then the failure transition points back to the starting state. + /// + /// For example, let's say we built an Aho-Corasick automaton with the + /// following patterns: 'abcd' and 'cef'. The trie looks like this: + /// + /// ```ignore + /// a - S1 - b - S2 - c - S3 - d - S4* + /// / + /// S0 - c - S5 - e - S6 - f - S7* + /// ``` + /// + /// At this point, it should be fairly straight-forward to see how this + /// trie can be used in a simplistic way. At any given position in the + /// text we're searching (called the "subject" string), all we need to do + /// is follow the transitions in the trie by consuming one transition for + /// each byte in the subject string. If we reach a match state, then we can + /// report that location as a match. + /// + /// The trick comes when searching a subject string like 'abcef'. We'll + /// initially follow the transition from S0 to S1 and wind up in S3 after + /// observng the 'c' byte. At this point, the next byte is 'e' but state + /// S3 has no transition for 'e', so the search fails. We then would need + /// to restart the search at the next position in 'abcef', which + /// corresponds to 'b'. The match would fail, but the next search starting + /// at 'c' would finally succeed. The problem with this approach is that + /// we wind up searching the subject string potentially many times. In + /// effect, this makes the algorithm have worst case `O(n * m)` complexity, + /// where `n ~ len(subject)` and `m ~ len(all patterns)`. We would instead + /// like to achieve a `O(n + m)` worst case complexity. + /// + /// This is where failure transitions come in. Instead of dying at S3 in + /// the first search, the automaton can instruct the search to move to + /// another part of the automaton that corresponds to a suffix of what + /// we've seen so far. Recall that we've seen 'abc' in the subject string, + /// and the automaton does indeed have a non-empty suffix, 'c', that could + /// potentially lead to another match. Thus, the actual Aho-Corasick + /// automaton for our patterns in this case looks like this: + /// + /// ```ignore + /// a - S1 - b - S2 - c - S3 - d - S4* + /// / / + /// / ---------------- + /// / / + /// S0 - c - S5 - e - S6 - f - S7* + /// ``` + /// + /// That is, we have a failure transition from S3 to S5, which is followed + /// exactly in cases when we are in state S3 but see any byte other than + /// 'd' (that is, we've "failed" to find a match in this portion of our + /// trie). We know we can transition back to S5 because we've already seen + /// a 'c' byte, so we don't need to re-scan it. We can then pick back up + /// with the search starting at S5 and complete our match. + /// + /// Adding failure transitions to a trie is fairly simple, but subtle. The + /// key issue is that you might have multiple failure transition that you + /// need to follow. For example, look at the trie for the patterns + /// 'abcd', 'b', 'bcd' and 'cd': + /// + /// ```ignore + /// - a - S1 - b - S2* - c - S3 - d - S4* + /// / / / + /// / ------- ------- + /// / / / + /// S0 --- b - S5* - c - S6 - d - S7* + /// \ / + /// \ -------- + /// \ / + /// - c - S8 - d - S9* + /// ``` + /// + /// The failure transitions for this trie are defined from S2 to S5, + /// S3 to S6 and S6 to S8. Moreover, state S2 needs to track that it + /// corresponds to a match, since its failure transition to S5 is itself + /// a match state. + /// + /// Perhaps simplest way to think about adding these failure transitions + /// is recursively. That is, if you know the failure transitions for every + /// possible previous state that could be visited (e.g., when computing the + /// failure transition for S3, you already know the failure transitions + /// for S0, S1 and S2), then you can simply follow the failure transition + /// of the previous state and check whether the incoming transition is + /// defined after following the failure transition. + /// + /// For example, when determining the failure state for S3, by our + /// assumptions, we already know that there is a failure transition from + /// S2 (the previous state) to S5. So we follow that transition and check + /// whether the transition connecting S2 to S3 is defined. Indeed, it is, + /// as there is a transition from S5 to S6 for the byte 'c'. If no such + /// transition existed, we could keep following the failure transitions + /// until we reach the start state, which is the failure transition for + /// every state that has no corresponding proper suffix. + /// + /// We don't actually use recursion to implement this, but instead, use a + /// breadth first search of the automaton. Our base case is the start + /// state, whose failure transition is just a transition to itself. + /// + /// When building a leftmost automaton, we proceed as above, but only + /// include a subset of failure transitions. Namely, we omit any failure + /// transitions that appear after a match state in the trie. This is + /// because failure transitions always point back to a proper suffix of + /// what has been seen so far. Thus, following a failure transition after + /// a match implies looking for a match that starts after the one that has + /// already been seen, which is of course therefore not the leftmost match. + /// + /// N.B. I came up with this algorithm on my own, and after scouring all of + /// the other AC implementations I know of (Perl, Snort, many on GitHub). + /// I couldn't find any that implement leftmost semantics like this. + /// Perl of course needs leftmost-first semantics, but they implement it + /// with a seeming hack at *search* time instead of encoding it into the + /// automaton. There are also a couple Java libraries that support leftmost + /// longest semantics, but they do it by building a queue of matches at + /// search time, which is even worse than what Perl is doing. ---AG + fn fill_failure_transitions(&mut self) -> Result<(), BuildError> { + let is_leftmost = self.builder.match_kind.is_leftmost(); + let start_uid = self.nfa.special.start_unanchored_id; + // Initialize the queue for breadth first search with all transitions + // out of the start state. We handle the start state specially because + // we only want to follow non-self transitions. If we followed self + // transitions, then this would never terminate. + let mut queue = VecDeque::new(); + let mut seen = self.queued_set(); + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(start_uid, prev_link) { + prev_link = Some(link); + let t = self.nfa.sparse[link]; + + // Skip anything we've seen before and any self-transitions on the + // start state. + if start_uid == t.next() || seen.contains(t.next) { + continue; + } + queue.push_back(t.next); + seen.insert(t.next); + // Under leftmost semantics, if a state immediately following + // the start state is a match state, then we never want to + // follow its failure transition since the failure transition + // necessarily leads back to the start state, which we never + // want to do for leftmost matching after a match has been + // found. + // + // We apply the same logic to non-start states below as well. + if is_leftmost && self.nfa.states[t.next].is_match() { + self.nfa.states[t.next].fail = NFA::DEAD; + } + } + while let Some(id) = queue.pop_front() { + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(id, prev_link) { + prev_link = Some(link); + let t = self.nfa.sparse[link]; + + if seen.contains(t.next) { + // The only way to visit a duplicate state in a transition + // list is when ASCII case insensitivity is enabled. In + // this case, we want to skip it since it's redundant work. + // But it would also end up duplicating matches, which + // results in reporting duplicate matches in some cases. + // See the 'acasei010' regression test. + continue; + } + queue.push_back(t.next); + seen.insert(t.next); + + // As above for start states, under leftmost semantics, once + // we see a match all subsequent states should have no failure + // transitions because failure transitions always imply looking + // for a match that is a suffix of what has been seen so far + // (where "seen so far" corresponds to the string formed by + // following the transitions from the start state to the + // current state). Under leftmost semantics, we specifically do + // not want to allow this to happen because we always want to + // report the match found at the leftmost position. + // + // The difference between leftmost-first and leftmost-longest + // occurs previously while we build the trie. For + // leftmost-first, we simply omit any entries that would + // otherwise require passing through a match state. + // + // Note that for correctness, the failure transition has to be + // set to the dead state for ALL states following a match, not + // just the match state itself. However, by setting the failure + // transition to the dead state on all match states, the dead + // state will automatically propagate to all subsequent states + // via the failure state computation below. + if is_leftmost && self.nfa.states[t.next].is_match() { + self.nfa.states[t.next].fail = NFA::DEAD; + continue; + } + let mut fail = self.nfa.states[id].fail; + while self.nfa.follow_transition(fail, t.byte) == NFA::FAIL { + fail = self.nfa.states[fail].fail; + } + fail = self.nfa.follow_transition(fail, t.byte); + self.nfa.states[t.next].fail = fail; + self.nfa.copy_matches(fail, t.next)?; + } + // If the start state is a match state, then this automaton can + // match the empty string. This implies all states are match states + // since every position matches the empty string, so copy the + // matches from the start state to every state. Strictly speaking, + // this is only necessary for overlapping matches since each + // non-empty non-start match state needs to report empty matches + // in addition to its own. For the non-overlapping case, such + // states only report the first match, which is never empty since + // it isn't a start state. + if !is_leftmost { + self.nfa + .copy_matches(self.nfa.special.start_unanchored_id, id)?; + } + } + Ok(()) + } + + /// Shuffle the states so that they appear in this sequence: + /// + /// DEAD, FAIL, MATCH..., START, START, NON-MATCH... + /// + /// The idea here is that if we know how special states are laid out in our + /// transition table, then we can determine what "kind" of state we're in + /// just by comparing our current state ID with a particular value. In this + /// way, we avoid doing extra memory lookups. + /// + /// Before shuffling begins, our states look something like this: + /// + /// DEAD, FAIL, START, START, (MATCH | NON-MATCH)... + /// + /// So all we need to do is move all of the MATCH states so that they + /// all appear before any NON-MATCH state, like so: + /// + /// DEAD, FAIL, START, START, MATCH... NON-MATCH... + /// + /// Then it's just a simple matter of swapping the two START states with + /// the last two MATCH states. + /// + /// (This is the same technique used for fully compiled DFAs in + /// regex-automata.) + fn shuffle(&mut self) { + let old_start_uid = self.nfa.special.start_unanchored_id; + let old_start_aid = self.nfa.special.start_anchored_id; + assert!(old_start_uid < old_start_aid); + assert_eq!( + 3, + old_start_aid.as_usize(), + "anchored start state should be at index 3" + ); + // We implement shuffling by a sequence of pairwise swaps of states. + // Since we have a number of things referencing states via their + // IDs and swapping them changes their IDs, we need to record every + // swap we make so that we can remap IDs. The remapper handles this + // book-keeping for us. + let mut remapper = Remapper::new(&self.nfa, 0); + // The way we proceed here is by moving all match states so that + // they directly follow the start states. So it will go: DEAD, FAIL, + // START-UNANCHORED, START-ANCHORED, MATCH, ..., NON-MATCH, ... + // + // To do that, we proceed forward through all states after + // START-ANCHORED and swap match states so that they appear before all + // non-match states. + let mut next_avail = StateID::from(4u8); + for i in next_avail.as_usize()..self.nfa.states.len() { + let sid = StateID::new(i).unwrap(); + if !self.nfa.states[sid].is_match() { + continue; + } + remapper.swap(&mut self.nfa, sid, next_avail); + // The key invariant here is that only non-match states exist + // between 'next_avail' and 'sid' (with them being potentially + // equivalent). Thus, incrementing 'next_avail' by 1 is guaranteed + // to land on the leftmost non-match state. (Unless 'next_avail' + // and 'sid' are equivalent, in which case, a swap will occur but + // it is a no-op.) + next_avail = StateID::new(next_avail.one_more()).unwrap(); + } + // Now we'd like to move the start states to immediately following the + // match states. (The start states may themselves be match states, but + // we'll handle that later.) We arrange the states this way so that we + // don't necessarily need to check whether a state is a start state or + // not before checking whether a state is a match state. For example, + // we'd like to be able to write this as our state machine loop: + // + // sid = start() + // for byte in haystack: + // sid = next(sid, byte) + // if sid <= nfa.max_start_id: + // if sid <= nfa.max_dead_id: + // # search complete + // elif sid <= nfa.max_match_id: + // # found match + // + // The important context here is that we might not want to look for + // start states at all. Namely, if a searcher doesn't have a prefilter, + // then there is no reason to care about whether we're in a start state + // or not. And indeed, if we did check for it, this very hot loop would + // ping pong between the special state handling and the main state + // transition logic. This in turn stalls the CPU by killing branch + // prediction. + // + // So essentially, we really want to be able to "forget" that start + // states even exist and this is why we put them at the end. + let new_start_aid = + StateID::new(next_avail.as_usize().checked_sub(1).unwrap()) + .unwrap(); + remapper.swap(&mut self.nfa, old_start_aid, new_start_aid); + let new_start_uid = + StateID::new(next_avail.as_usize().checked_sub(2).unwrap()) + .unwrap(); + remapper.swap(&mut self.nfa, old_start_uid, new_start_uid); + let new_max_match_id = + StateID::new(next_avail.as_usize().checked_sub(3).unwrap()) + .unwrap(); + self.nfa.special.max_match_id = new_max_match_id; + self.nfa.special.start_unanchored_id = new_start_uid; + self.nfa.special.start_anchored_id = new_start_aid; + // If one start state is a match state, then they both are. + if self.nfa.states[self.nfa.special.start_anchored_id].is_match() { + self.nfa.special.max_match_id = self.nfa.special.start_anchored_id; + } + remapper.remap(&mut self.nfa); + } + + /// Attempts to convert the transition representation of a subset of states + /// in this NFA from sparse to dense. This can greatly improve search + /// performance since states with a higher number of transitions tend to + /// correlate with very active states. + /// + /// We generally only densify states that are close to the start state. + /// These tend to be the most active states and thus benefit from a dense + /// representation more than other states. + /// + /// This tends to best balance between memory usage and performance. In + /// particular, the *vast majority* of all states in a typical Aho-Corasick + /// automaton have only 1 transition and are usually farther from the start + /// state and thus don't get densified. + /// + /// Note that this doesn't remove the sparse representation of transitions + /// for states that are densified. It could be done, but actually removing + /// entries from `NFA::sparse` is likely more expensive than it's worth. + fn densify(&mut self) -> Result<(), BuildError> { + for i in 0..self.nfa.states.len() { + let sid = StateID::new(i).unwrap(); + // Don't bother densifying states that are only used as sentinels. + if sid == NFA::DEAD || sid == NFA::FAIL { + continue; + } + // Only densify states that are "close enough" to the start state. + if self.nfa.states[sid].depth.as_usize() + >= self.builder.dense_depth + { + continue; + } + let dense = self.nfa.alloc_dense_state()?; + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(sid, prev_link) { + prev_link = Some(link); + let t = self.nfa.sparse[link]; + + let class = usize::from(self.nfa.byte_classes.get(t.byte)); + let index = dense.as_usize() + class; + self.nfa.dense[index] = t.next; + } + self.nfa.states[sid].dense = dense; + } + Ok(()) + } + + /// Returns a set that tracked queued states. + /// + /// This is only necessary when ASCII case insensitivity is enabled, since + /// it is the only way to visit the same state twice. Otherwise, this + /// returns an inert set that nevers adds anything and always reports + /// `false` for every member test. + fn queued_set(&self) -> QueuedSet { + if self.builder.ascii_case_insensitive { + QueuedSet::active() + } else { + QueuedSet::inert() + } + } + + /// Initializes the unanchored start state by making it dense. This is + /// achieved by explicitly setting every transition to the FAIL state. + /// This isn't necessary for correctness, since any missing transition is + /// automatically assumed to be mapped to the FAIL state. We do this to + /// make the unanchored starting state dense, and thus in turn make + /// transition lookups on it faster. (Which is worth doing because it's + /// the most active state.) + fn init_unanchored_start_state(&mut self) -> Result<(), BuildError> { + let start_uid = self.nfa.special.start_unanchored_id; + let start_aid = self.nfa.special.start_anchored_id; + self.nfa.init_full_state(start_uid, NFA::FAIL)?; + self.nfa.init_full_state(start_aid, NFA::FAIL)?; + Ok(()) + } + + /// Setup the anchored start state by copying all of the transitions and + /// matches from the unanchored starting state with one change: the failure + /// transition is changed to the DEAD state, so that for any undefined + /// transitions, the search will stop. + fn set_anchored_start_state(&mut self) -> Result<(), BuildError> { + let start_uid = self.nfa.special.start_unanchored_id; + let start_aid = self.nfa.special.start_anchored_id; + let (mut uprev_link, mut aprev_link) = (None, None); + loop { + let unext = self.nfa.next_link(start_uid, uprev_link); + let anext = self.nfa.next_link(start_aid, aprev_link); + let (ulink, alink) = match (unext, anext) { + (Some(ulink), Some(alink)) => (ulink, alink), + (None, None) => break, + _ => unreachable!(), + }; + uprev_link = Some(ulink); + aprev_link = Some(alink); + self.nfa.sparse[alink].next = self.nfa.sparse[ulink].next; + } + self.nfa.copy_matches(start_uid, start_aid)?; + // This is the main difference between the unanchored and anchored + // starting states. If a lookup on an anchored starting state fails, + // then the search should stop. + // + // N.B. This assumes that the loop on the unanchored starting state + // hasn't been created yet. + self.nfa.states[start_aid].fail = NFA::DEAD; + Ok(()) + } + + /// Set the failure transitions on the start state to loop back to the + /// start state. This effectively permits the Aho-Corasick automaton to + /// match at any position. This is also required for finding the next + /// state to terminate, namely, finding the next state should never return + /// a fail_id. + /// + /// This must be done after building the initial trie, since trie + /// construction depends on transitions to `fail_id` to determine whether a + /// state already exists or not. + fn add_unanchored_start_state_loop(&mut self) { + let start_uid = self.nfa.special.start_unanchored_id; + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(start_uid, prev_link) { + prev_link = Some(link); + if self.nfa.sparse[link].next() == NFA::FAIL { + self.nfa.sparse[link].next = start_uid; + } + } + } + + /// Remove the start state loop by rewriting any transitions on the start + /// state back to the start state with transitions to the dead state. + /// + /// The loop is only closed when two conditions are met: the start state + /// is a match state and the match kind is leftmost-first or + /// leftmost-longest. + /// + /// The reason for this is that under leftmost semantics, a start state + /// that is also a match implies that we should never restart the search + /// process. We allow normal transitions out of the start state, but if + /// none exist, we transition to the dead state, which signals that + /// searching should stop. + fn close_start_state_loop_for_leftmost(&mut self) { + let start_uid = self.nfa.special.start_unanchored_id; + let start = &mut self.nfa.states[start_uid]; + let dense = start.dense; + if self.builder.match_kind.is_leftmost() && start.is_match() { + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(start_uid, prev_link) { + prev_link = Some(link); + if self.nfa.sparse[link].next() == start_uid { + self.nfa.sparse[link].next = NFA::DEAD; + if dense != StateID::ZERO { + let b = self.nfa.sparse[link].byte; + let class = usize::from(self.nfa.byte_classes.get(b)); + self.nfa.dense[dense.as_usize() + class] = NFA::DEAD; + } + } + } + } + } + + /// Sets all transitions on the dead state to point back to the dead state. + /// Normally, missing transitions map back to the failure state, but the + /// point of the dead state is to act as a sink that can never be escaped. + fn add_dead_state_loop(&mut self) -> Result<(), BuildError> { + self.nfa.init_full_state(NFA::DEAD, NFA::DEAD)?; + Ok(()) + } +} + +/// A set of state identifiers used to avoid revisiting the same state multiple +/// times when filling in failure transitions. +/// +/// This set has an "inert" and an "active" mode. When inert, the set never +/// stores anything and always returns `false` for every member test. This is +/// useful to avoid the performance and memory overhead of maintaining this +/// set when it is not needed. +#[derive(Debug)] +struct QueuedSet { + set: Option>, +} + +impl QueuedSet { + /// Return an inert set that returns `false` for every state ID membership + /// test. + fn inert() -> QueuedSet { + QueuedSet { set: None } + } + + /// Return an active set that tracks state ID membership. + fn active() -> QueuedSet { + QueuedSet { set: Some(BTreeSet::new()) } + } + + /// Inserts the given state ID into this set. (If the set is inert, then + /// this is a no-op.) + fn insert(&mut self, state_id: StateID) { + if let Some(ref mut set) = self.set { + set.insert(state_id); + } + } + + /// Returns true if and only if the given state ID is in this set. If the + /// set is inert, this always returns false. + fn contains(&self, state_id: StateID) -> bool { + match self.set { + None => false, + Some(ref set) => set.contains(&state_id), + } + } +} + +impl core::fmt::Debug for NFA { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use crate::{ + automaton::{fmt_state_indicator, sparse_transitions}, + util::debug::DebugByte, + }; + + writeln!(f, "noncontiguous::NFA(")?; + for (sid, state) in self.states.iter().with_state_ids() { + // The FAIL state doesn't actually have space for a state allocated + // for it, so we have to treat it as a special case. + if sid == NFA::FAIL { + writeln!(f, "F {:06}:", sid.as_usize())?; + continue; + } + fmt_state_indicator(f, self, sid)?; + write!( + f, + "{:06}({:06}): ", + sid.as_usize(), + state.fail.as_usize() + )?; + + let it = sparse_transitions( + self.iter_trans(sid).map(|t| (t.byte, t.next)), + ) + .enumerate(); + for (i, (start, end, sid)) in it { + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!( + f, + "{:?} => {:?}", + DebugByte(start), + sid.as_usize() + )?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + sid.as_usize() + )?; + } + } + + write!(f, "\n")?; + if self.is_match(sid) { + write!(f, " matches: ")?; + for (i, pid) in self.iter_matches(sid).enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", pid.as_usize())?; + } + write!(f, "\n")?; + } + } + writeln!(f, "match kind: {:?}", self.match_kind)?; + writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?; + writeln!(f, "state length: {:?}", self.states.len())?; + writeln!(f, "pattern length: {:?}", self.patterns_len())?; + writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?; + writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?; + writeln!(f, "memory usage: {:?}", self.memory_usage())?; + writeln!(f, ")")?; + Ok(()) + } +} diff --git a/vendor/aho-corasick/src/packed/api.rs b/vendor/aho-corasick/src/packed/api.rs index 51703d0..44f0bc9 100644 --- a/vendor/aho-corasick/src/packed/api.rs +++ b/vendor/aho-corasick/src/packed/api.rs @@ -1,9 +1,9 @@ -use std::u16; +use alloc::sync::Arc; -use crate::packed::pattern::Patterns; -use crate::packed::rabinkarp::RabinKarp; -use crate::packed::teddy::{self, Teddy}; -use crate::Match; +use crate::{ + packed::{pattern::Patterns, rabinkarp::RabinKarp, teddy}, + util::search::{Match, Span}, +}; /// This is a limit placed on the total number of patterns we're willing to try /// and match at once. As more sophisticated algorithms are added, this number @@ -13,12 +13,10 @@ const PATTERN_LIMIT: usize = 128; /// A knob for controlling the match semantics of a packed multiple string /// searcher. /// -/// This differs from the -/// [`MatchKind`](../enum.MatchKind.html) -/// type in the top-level crate module in that it doesn't support -/// "standard" match semantics, and instead only supports leftmost-first or -/// leftmost-longest. Namely, "standard" semantics cannot be easily supported -/// by packed searchers. +/// This differs from the [`MatchKind`](crate::MatchKind) type in the top-level +/// crate module in that it doesn't support "standard" match semantics, +/// and instead only supports leftmost-first or leftmost-longest. Namely, +/// "standard" semantics cannot be easily supported by packed searchers. /// /// For more information on the distinction between leftmost-first and /// leftmost-longest, see the docs on the top-level `MatchKind` type. @@ -26,6 +24,7 @@ const PATTERN_LIMIT: usize = 128; /// Unlike the top-level `MatchKind` type, the default match semantics for this /// type are leftmost-first. #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[non_exhaustive] pub enum MatchKind { /// Use leftmost-first match semantics, which reports leftmost matches. /// When there are multiple possible leftmost matches, the match @@ -38,13 +37,6 @@ pub enum MatchKind { /// When there are multiple possible leftmost matches, the longest match /// is chosen. LeftmostLongest, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, } impl Default for MatchKind { @@ -59,9 +51,8 @@ impl Default for MatchKind { /// match semantics (leftmost-first or leftmost-longest) of a searcher. In the /// future, more knobs may be made available. /// -/// A configuration produces a [`packed::Builder`](struct.Builder.html), which -/// in turn can be used to construct a -/// [`packed::Searcher`](struct.Searcher.html) for searching. +/// A configuration produces a [`packed::Builder`](Builder), which in turn can +/// be used to construct a [`packed::Searcher`](Searcher) for searching. /// /// # Example /// @@ -69,7 +60,7 @@ impl Default for MatchKind { /// default (leftmost-first). /// /// ``` -/// use aho_corasick::packed::{Config, MatchKind}; +/// use aho_corasick::{packed::{Config, MatchKind}, PatternID}; /// /// # fn example() -> Option<()> { /// let searcher = Config::new() @@ -78,13 +69,15 @@ impl Default for MatchKind { /// .add("foo") /// .add("foobar") /// .build()?; -/// let matches: Vec = searcher +/// let matches: Vec = searcher /// .find_iter("foobar") /// .map(|mat| mat.pattern()) /// .collect(); -/// assert_eq!(vec![1], matches); +/// assert_eq!(vec![PatternID::must(1)], matches); /// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { +/// # if cfg!(all(feature = "std", any( +/// # target_arch = "x86_64", target_arch = "aarch64", +/// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -94,8 +87,9 @@ impl Default for MatchKind { pub struct Config { kind: MatchKind, force: Option, - force_teddy_fat: Option, - force_avx: Option, + only_teddy_fat: Option, + only_teddy_256bit: Option, + heuristic_pattern_limits: bool, } /// An internal option for forcing the use of a particular packed algorithm. @@ -122,15 +116,14 @@ impl Config { Config { kind: MatchKind::LeftmostFirst, force: None, - force_teddy_fat: None, - force_avx: None, + only_teddy_fat: None, + only_teddy_256bit: None, + heuristic_pattern_limits: true, } } /// Create a packed builder from this configuration. The builder can be - /// used to accumulate patterns and create a - /// [`Searcher`](struct.Searcher.html) - /// from them. + /// used to accumulate patterns and create a [`Searcher`] from them. pub fn builder(&self) -> Builder { Builder::from_config(self.clone()) } @@ -147,7 +140,7 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_teddy(&mut self, yes: bool) -> &mut Config { + pub fn only_teddy(&mut self, yes: bool) -> &mut Config { if yes { self.force = Some(ForceAlgorithm::Teddy); } else { @@ -162,8 +155,8 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_teddy_fat(&mut self, yes: Option) -> &mut Config { - self.force_teddy_fat = yes; + pub fn only_teddy_fat(&mut self, yes: Option) -> &mut Config { + self.only_teddy_fat = yes; self } @@ -174,8 +167,8 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_avx(&mut self, yes: Option) -> &mut Config { - self.force_avx = yes; + pub fn only_teddy_256bit(&mut self, yes: Option) -> &mut Config { + self.only_teddy_256bit = yes; self } @@ -185,7 +178,7 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_rabin_karp(&mut self, yes: bool) -> &mut Config { + pub fn only_rabin_karp(&mut self, yes: bool) -> &mut Config { if yes { self.force = Some(ForceAlgorithm::RabinKarp); } else { @@ -193,6 +186,17 @@ impl Config { } self } + + /// Request that heuristic limitations on the number of patterns be + /// employed. This useful to disable for benchmarking where one wants to + /// explore how Teddy performs on large number of patterns even if the + /// heuristics would otherwise refuse construction. + /// + /// This is enabled by default. + pub fn heuristic_pattern_limits(&mut self, yes: bool) -> &mut Config { + self.heuristic_pattern_limits = yes; + self + } } /// A builder for constructing a packed searcher from a collection of patterns. @@ -203,20 +207,22 @@ impl Config { /// default, leftmost-first match semantics are used. /// /// ``` -/// use aho_corasick::packed::{Builder, MatchKind}; +/// use aho_corasick::{packed::{Builder, MatchKind}, PatternID}; /// /// # fn example() -> Option<()> { /// let searcher = Builder::new() /// .add("foobar") /// .add("foo") /// .build()?; -/// let matches: Vec = searcher +/// let matches: Vec = searcher /// .find_iter("foobar") /// .map(|mat| mat.pattern()) /// .collect(); -/// assert_eq!(vec![0], matches); +/// assert_eq!(vec![PatternID::ZERO], matches); /// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { +/// # if cfg!(all(feature = "std", any( +/// # target_arch = "x86_64", target_arch = "aarch64", +/// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -250,6 +256,7 @@ impl Builder { } let mut patterns = self.patterns.clone(); patterns.set_match_kind(self.config.kind); + let patterns = Arc::new(patterns); let rabinkarp = RabinKarp::new(&patterns); // Effectively, we only want to return a searcher if we can use Teddy, // since Teddy is our only fast packed searcher at the moment. @@ -258,23 +265,28 @@ impl Builder { // is to force it using undocumented APIs (for tests/benchmarks). let (search_kind, minimum_len) = match self.config.force { None | Some(ForceAlgorithm::Teddy) => { - let teddy = match self.build_teddy(&patterns) { + debug!("trying to build Teddy packed matcher"); + let teddy = match self.build_teddy(Arc::clone(&patterns)) { None => return None, Some(teddy) => teddy, }; let minimum_len = teddy.minimum_len(); (SearchKind::Teddy(teddy), minimum_len) } - Some(ForceAlgorithm::RabinKarp) => (SearchKind::RabinKarp, 0), + Some(ForceAlgorithm::RabinKarp) => { + debug!("using Rabin-Karp packed matcher"); + (SearchKind::RabinKarp, 0) + } }; Some(Searcher { patterns, rabinkarp, search_kind, minimum_len }) } - fn build_teddy(&self, patterns: &Patterns) -> Option { + fn build_teddy(&self, patterns: Arc) -> Option { teddy::Builder::new() - .avx(self.config.force_avx) - .fat(self.config.force_teddy_fat) - .build(&patterns) + .only_256bit(self.config.only_teddy_256bit) + .only_fat(self.config.only_teddy_fat) + .heuristic_pattern_limits(self.config.heuristic_pattern_limits) + .build(patterns) } /// Add the given pattern to this set to match. @@ -297,7 +309,7 @@ impl Builder { return self; } // Just in case PATTERN_LIMIT increases beyond u16::MAX. - assert!(self.patterns.len() <= u16::MAX as usize); + assert!(self.patterns.len() <= core::u16::MAX as usize); let pattern = pattern.as_ref(); if pattern.is_empty() { @@ -332,6 +344,16 @@ impl Builder { } self } + + /// Returns the number of patterns added to this builder. + pub fn len(&self) -> usize { + self.patterns.len() + } + + /// Returns the length, in bytes, of the shortest pattern added. + pub fn minimum_len(&self) -> usize { + self.patterns.minimum_len() + } } impl Default for Builder { @@ -344,8 +366,7 @@ impl Default for Builder { /// /// If callers need more flexible construction, or if one wants to change the /// match semantics (either leftmost-first or leftmost-longest), then one can -/// use the [`Config`](struct.Config.html) and/or -/// [`Builder`](struct.Builder.html) types for more fine grained control. +/// use the [`Config`] and/or [`Builder`] types for more fine grained control. /// /// # Example /// @@ -353,17 +374,19 @@ impl Default for Builder { /// By default, leftmost-first match semantics are used. /// /// ``` -/// use aho_corasick::packed::{MatchKind, Searcher}; +/// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; /// /// # fn example() -> Option<()> { /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; -/// let matches: Vec = searcher +/// let matches: Vec = searcher /// .find_iter("foobar") /// .map(|mat| mat.pattern()) /// .collect(); -/// assert_eq!(vec![0], matches); +/// assert_eq!(vec![PatternID::ZERO], matches); /// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { +/// # if cfg!(all(feature = "std", any( +/// # target_arch = "x86_64", target_arch = "aarch64", +/// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -371,7 +394,7 @@ impl Default for Builder { /// ``` #[derive(Clone, Debug)] pub struct Searcher { - patterns: Patterns, + patterns: Arc, rabinkarp: RabinKarp, search_kind: SearchKind, minimum_len: usize, @@ -379,7 +402,7 @@ pub struct Searcher { #[derive(Clone, Debug)] enum SearchKind { - Teddy(Teddy), + Teddy(teddy::Searcher), RabinKarp, } @@ -396,17 +419,19 @@ impl Searcher { /// Basic usage: /// /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; + /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; /// /// # fn example() -> Option<()> { /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let matches: Vec = searcher + /// let matches: Vec = searcher /// .find_iter("foobar") /// .map(|mat| mat.pattern()) /// .collect(); - /// assert_eq!(vec![0], matches); + /// assert_eq!(vec![PatternID::ZERO], matches); /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -420,6 +445,20 @@ impl Searcher { Builder::new().extend(patterns).build() } + /// A convenience function for calling `Config::new()`. + /// + /// This is useful for avoiding an additional import. + pub fn config() -> Config { + Config::new() + } + + /// A convenience function for calling `Builder::new()`. + /// + /// This is useful for avoiding an additional import. + pub fn builder() -> Builder { + Builder::new() + } + /// Return the first occurrence of any of the patterns in this searcher, /// according to its match semantics, in the given haystack. The `Match` /// returned will include the identifier of the pattern that matched, which @@ -431,23 +470,27 @@ impl Searcher { /// Basic usage: /// /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; + /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; /// /// # fn example() -> Option<()> { /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; /// let mat = searcher.find("foobar")?; - /// assert_eq!(0, mat.pattern()); + /// assert_eq!(PatternID::ZERO, mat.pattern()); /// assert_eq!(0, mat.start()); /// assert_eq!(6, mat.end()); /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); /// # } /// ``` + #[inline] pub fn find>(&self, haystack: B) -> Option { - self.find_at(haystack, 0) + let haystack = haystack.as_ref(); + self.find_in(haystack, Span::from(0..haystack.len())) } /// Return the first occurrence of any of the patterns in this searcher, @@ -464,36 +507,40 @@ impl Searcher { /// Basic usage: /// /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; + /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID, Span}; /// /// # fn example() -> Option<()> { + /// let haystack = "foofoobar"; /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let mat = searcher.find_at("foofoobar", 3)?; - /// assert_eq!(0, mat.pattern()); + /// let mat = searcher.find_in(haystack, Span::from(3..haystack.len()))?; + /// assert_eq!(PatternID::ZERO, mat.pattern()); /// assert_eq!(3, mat.start()); /// assert_eq!(9, mat.end()); /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); /// # } /// ``` - pub fn find_at>( + #[inline] + pub fn find_in>( &self, haystack: B, - at: usize, + span: Span, ) -> Option { let haystack = haystack.as_ref(); match self.search_kind { SearchKind::Teddy(ref teddy) => { - if haystack[at..].len() < teddy.minimum_len() { - return self.slow_at(haystack, at); + if haystack[span].len() < teddy.minimum_len() { + return self.find_in_slow(haystack, span); } - teddy.find_at(&self.patterns, haystack, at) + teddy.find(&haystack[..span.end], span.start) } SearchKind::RabinKarp => { - self.rabinkarp.find_at(&self.patterns, haystack, at) + self.rabinkarp.find_at(&haystack[..span.end], span.start) } } } @@ -506,27 +553,37 @@ impl Searcher { /// Basic usage: /// /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; + /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; /// /// # fn example() -> Option<()> { /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let matches: Vec = searcher + /// let matches: Vec = searcher /// .find_iter("foobar fooba foofoo") /// .map(|mat| mat.pattern()) /// .collect(); - /// assert_eq!(vec![0, 1, 1, 1], matches); + /// assert_eq!(vec![ + /// PatternID::must(0), + /// PatternID::must(1), + /// PatternID::must(1), + /// PatternID::must(1), + /// ], matches); /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); /// # } /// ``` + #[inline] pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( &'a self, haystack: &'b B, ) -> FindIter<'a, 'b> { - FindIter { searcher: self, haystack: haystack.as_ref(), at: 0 } + let haystack = haystack.as_ref(); + let span = Span::from(0..haystack.len()); + FindIter { searcher: self, haystack, span } } /// Returns the match kind used by this packed searcher. @@ -543,12 +600,15 @@ impl Searcher { /// // leftmost-first is the default. /// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind()); /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); /// # } /// ``` + #[inline] pub fn match_kind(&self) -> &MatchKind { self.patterns.match_kind() } @@ -563,16 +623,18 @@ impl Searcher { /// want to avoid ever using the slower variant, which one can do by /// never passing a haystack shorter than the minimum length returned by /// this method. + #[inline] pub fn minimum_len(&self) -> usize { self.minimum_len } /// Returns the approximate total amount of heap used by this searcher, in /// units of bytes. - pub fn heap_bytes(&self) -> usize { - self.patterns.heap_bytes() - + self.rabinkarp.heap_bytes() - + self.search_kind.heap_bytes() + #[inline] + pub fn memory_usage(&self) -> usize { + self.patterns.memory_usage() + + self.rabinkarp.memory_usage() + + self.search_kind.memory_usage() } /// Use a slow (non-packed) searcher. @@ -581,15 +643,15 @@ impl Searcher { /// not be used to search a specific haystack. For example, if Teddy was /// built but the haystack is smaller than ~34 bytes, then Teddy might not /// be able to run. - fn slow_at(&self, haystack: &[u8], at: usize) -> Option { - self.rabinkarp.find_at(&self.patterns, haystack, at) + fn find_in_slow(&self, haystack: &[u8], span: Span) -> Option { + self.rabinkarp.find_at(&haystack[..span.end], span.start) } } impl SearchKind { - fn heap_bytes(&self) -> usize { + fn memory_usage(&self) -> usize { match *self { - SearchKind::Teddy(ref ted) => ted.heap_bytes(), + SearchKind::Teddy(ref ted) => ted.memory_usage(), SearchKind::RabinKarp => 0, } } @@ -597,28 +659,28 @@ impl SearchKind { /// An iterator over non-overlapping matches from a packed searcher. /// -/// The lifetime `'s` refers to the lifetime of the underlying -/// [`Searcher`](struct.Searcher.html), while the lifetime `'h` refers to the -/// lifetime of the haystack being searched. +/// The lifetime `'s` refers to the lifetime of the underlying [`Searcher`], +/// while the lifetime `'h` refers to the lifetime of the haystack being +/// searched. #[derive(Debug)] pub struct FindIter<'s, 'h> { searcher: &'s Searcher, haystack: &'h [u8], - at: usize, + span: Span, } impl<'s, 'h> Iterator for FindIter<'s, 'h> { type Item = Match; fn next(&mut self) -> Option { - if self.at > self.haystack.len() { + if self.span.start > self.span.end { return None; } - match self.searcher.find_at(&self.haystack, self.at) { + match self.searcher.find_in(&self.haystack, self.span) { None => None, - Some(c) => { - self.at = c.end; - Some(c) + Some(m) => { + self.span.start = m.end(); + Some(m) } } } diff --git a/vendor/aho-corasick/src/packed/ext.rs b/vendor/aho-corasick/src/packed/ext.rs new file mode 100644 index 0000000..b689642 --- /dev/null +++ b/vendor/aho-corasick/src/packed/ext.rs @@ -0,0 +1,39 @@ +/// A trait for adding some helper routines to pointers. +pub(crate) trait Pointer { + /// Returns the distance, in units of `T`, between `self` and `origin`. + /// + /// # Safety + /// + /// Same as `ptr::offset_from` in addition to `self >= origin`. + unsafe fn distance(self, origin: Self) -> usize; + + /// Casts this pointer to `usize`. + /// + /// Callers should not convert the `usize` back to a pointer if at all + /// possible. (And if you believe it's necessary, open an issue to discuss + /// why. Otherwise, it has the potential to violate pointer provenance.) + /// The purpose of this function is just to be able to do arithmetic, i.e., + /// computing offsets or alignments. + fn as_usize(self) -> usize; +} + +impl Pointer for *const T { + unsafe fn distance(self, origin: *const T) -> usize { + // TODO: Replace with `ptr::sub_ptr` once stabilized. + usize::try_from(self.offset_from(origin)).unwrap_unchecked() + } + + fn as_usize(self) -> usize { + self as usize + } +} + +impl Pointer for *mut T { + unsafe fn distance(self, origin: *mut T) -> usize { + (self as *const T).distance(origin as *const T) + } + + fn as_usize(self) -> usize { + (self as *const T).as_usize() + } +} diff --git a/vendor/aho-corasick/src/packed/mod.rs b/vendor/aho-corasick/src/packed/mod.rs index 97c40ff..3990bc9 100644 --- a/vendor/aho-corasick/src/packed/mod.rs +++ b/vendor/aho-corasick/src/packed/mod.rs @@ -1,58 +1,59 @@ /*! -A lower level API for packed multiple substring search, principally for a small -number of patterns. +Provides packed multiple substring search, principally for a small number of +patterns. -This sub-module provides vectorized routines for quickly finding matches of a -small number of patterns. In general, users of this crate shouldn't need to -interface with this module directly, as the primary -[`AhoCorasick`](../struct.AhoCorasick.html) -searcher will use these routines automatically as a prefilter when applicable. -However, in some cases, callers may want to bypass the Aho-Corasick machinery -entirely and use this vectorized searcher directly. +This sub-module provides vectorized routines for quickly finding +matches of a small number of patterns. In general, users of this crate +shouldn't need to interface with this module directly, as the primary +[`AhoCorasick`](crate::AhoCorasick) searcher will use these routines +automatically as a prefilter when applicable. However, in some cases, callers +may want to bypass the Aho-Corasick machinery entirely and use this vectorized +searcher directly. # Overview The primary types in this sub-module are: -* [`Searcher`](struct.Searcher.html) executes the actual search algorithm to - report matches in a haystack. -* [`Builder`](struct.Builder.html) accumulates patterns incrementally and can - construct a `Searcher`. -* [`Config`](struct.Config.html) permits tuning the searcher, and itself will - produce a `Builder` (which can then be used to build a `Searcher`). - Currently, the only tuneable knob are the match semantics, but this may be - expanded in the future. +* [`Searcher`] executes the actual search algorithm to report matches in a +haystack. +* [`Builder`] accumulates patterns incrementally and can construct a +`Searcher`. +* [`Config`] permits tuning the searcher, and itself will produce a `Builder` +(which can then be used to build a `Searcher`). Currently, the only tuneable +knob are the match semantics, but this may be expanded in the future. # Examples This example shows how to create a searcher from an iterator of patterns. By default, leftmost-first match semantics are used. (See the top-level -[`MatchKind`](../enum.MatchKind.html) type for more details about match -semantics, which apply similarly to packed substring search.) +[`MatchKind`] type for more details about match semantics, which apply +similarly to packed substring search.) ``` -use aho_corasick::packed::{MatchKind, Searcher}; +use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; # fn example() -> Option<()> { let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; -let matches: Vec = searcher +let matches: Vec = searcher .find_iter("foobar") .map(|mat| mat.pattern()) .collect(); -assert_eq!(vec![0], matches); +assert_eq!(vec![PatternID::ZERO], matches); # Some(()) } -# if cfg!(target_arch = "x86_64") { +# if cfg!(all(feature = "std", any( +# target_arch = "x86_64", target_arch = "aarch64", +# ))) { # example().unwrap() # } else { # assert!(example().is_none()); # } ``` -This example shows how to use [`Config`](struct.Config.html) to change the -match semantics to leftmost-longest: +This example shows how to use [`Config`] to change the match semantics to +leftmost-longest: ``` -use aho_corasick::packed::{Config, MatchKind}; +use aho_corasick::{packed::{Config, MatchKind}, PatternID}; # fn example() -> Option<()> { let searcher = Config::new() @@ -61,13 +62,15 @@ let searcher = Config::new() .add("foo") .add("foobar") .build()?; -let matches: Vec = searcher +let matches: Vec = searcher .find_iter("foobar") .map(|mat| mat.pattern()) .collect(); -assert_eq!(vec![1], matches); +assert_eq!(vec![PatternID::must(1)], matches); # Some(()) } -# if cfg!(target_arch = "x86_64") { +# if cfg!(all(feature = "std", any( +# target_arch = "x86_64", target_arch = "aarch64", +# ))) { # example().unwrap() # } else { # assert!(example().is_none()); @@ -83,21 +86,21 @@ search tend to do better with a small number of patterns, where as Aho-Corasick generally maintains reasonably consistent performance regardless of the number of patterns you give it. Because of this, the vectorized searcher in this sub-module cannot be used as a general purpose searcher, since building the -searcher may fail. However, in exchange, when searching for a small number of -patterns, searching can be quite a bit faster than Aho-Corasick (sometimes by -an order of magnitude). +searcher may fail even when given a small number of patterns. However, in +exchange, when searching for a small number of patterns, searching can be quite +a bit faster than Aho-Corasick (sometimes by an order of magnitude). The key take away here is that constructing a searcher from a list of patterns -is a fallible operation. While the precise conditions under which building a -searcher can fail is specifically an implementation detail, here are some -common reasons: +is a fallible operation with no clear rules for when it will fail. While the +precise conditions under which building a searcher can fail is specifically an +implementation detail, here are some common reasons: * Too many patterns were given. Typically, the limit is on the order of 100 or so, but this limit may fluctuate based on available CPU features. * The available packed algorithms require CPU features that aren't available. For example, currently, this crate only provides packed algorithms for - `x86_64`. Therefore, constructing a packed searcher on any other target - (e.g., ARM) will always fail. + `x86_64` and `aarch64`. Therefore, constructing a packed searcher on any + other target will always fail. * Zero patterns were given, or one of the patterns given was empty. Packed searchers require at least one pattern and that all patterns are non-empty. * Something else about the nature of the patterns (typically based on @@ -108,10 +111,10 @@ common reasons: pub use crate::packed::api::{Builder, Config, FindIter, MatchKind, Searcher}; mod api; +mod ext; mod pattern; mod rabinkarp; mod teddy; -#[cfg(test)] +#[cfg(all(feature = "std", test))] mod tests; -#[cfg(target_arch = "x86_64")] mod vector; diff --git a/vendor/aho-corasick/src/packed/pattern.rs b/vendor/aho-corasick/src/packed/pattern.rs index f4c6756..95aca4d 100644 --- a/vendor/aho-corasick/src/packed/pattern.rs +++ b/vendor/aho-corasick/src/packed/pattern.rs @@ -1,16 +1,11 @@ -use std::cmp; -use std::fmt; -use std::mem; -use std::u16; -use std::usize; +use core::{cmp, fmt, mem, u16, usize}; -use crate::packed::api::MatchKind; +use alloc::{boxed::Box, string::String, vec, vec::Vec}; -/// The type used for representing a pattern identifier. -/// -/// We don't use `usize` here because our packed searchers don't scale to -/// huge numbers of patterns, so we keep things a bit smaller. -pub type PatternID = u16; +use crate::{ + packed::{api::MatchKind, ext::Pointer}, + PatternID, +}; /// A non-empty collection of non-empty patterns to search for. /// @@ -22,7 +17,7 @@ pub type PatternID = u16; /// Note that this collection is not a set. The same pattern can appear more /// than once. #[derive(Clone, Debug)] -pub struct Patterns { +pub(crate) struct Patterns { /// The match semantics supported by this collection of patterns. /// /// The match semantics determines the order of the iterator over patterns. @@ -40,14 +35,17 @@ pub struct Patterns { order: Vec, /// The length of the smallest pattern, in bytes. minimum_len: usize, - /// The largest pattern identifier. This should always be equivalent to - /// the number of patterns minus one in this collection. - max_pattern_id: PatternID, /// The total number of pattern bytes across the entire collection. This /// is used for reporting total heap usage in constant time. total_pattern_bytes: usize, } +// BREADCRUMBS: I think we want to experiment with a different bucket +// representation. Basically, each bucket is just a Range to a single +// contiguous allocation? Maybe length-prefixed patterns or something? The +// idea is to try to get rid of the pointer chasing in verification. I don't +// know that that is the issue, but I suspect it is. + impl Patterns { /// Create a new collection of patterns for the given match semantics. The /// ID of each pattern is the index of the pattern at which it occurs in @@ -56,13 +54,12 @@ impl Patterns { /// If any of the patterns in the slice given are empty, then this panics. /// Similarly, if the number of patterns given is zero, then this also /// panics. - pub fn new() -> Patterns { + pub(crate) fn new() -> Patterns { Patterns { kind: MatchKind::default(), by_id: vec![], order: vec![], minimum_len: usize::MAX, - max_pattern_id: 0, total_pattern_bytes: 0, } } @@ -70,12 +67,11 @@ impl Patterns { /// Add a pattern to this collection. /// /// This panics if the pattern given is empty. - pub fn add(&mut self, bytes: &[u8]) { + pub(crate) fn add(&mut self, bytes: &[u8]) { assert!(!bytes.is_empty()); assert!(self.by_id.len() <= u16::MAX as usize); - let id = self.by_id.len() as u16; - self.max_pattern_id = id; + let id = PatternID::new(self.by_id.len()).unwrap(); self.order.push(id); self.by_id.push(bytes.to_vec()); self.minimum_len = cmp::min(self.minimum_len, bytes.len()); @@ -85,39 +81,36 @@ impl Patterns { /// Set the match kind semantics for this collection of patterns. /// /// If the kind is not set, then the default is leftmost-first. - pub fn set_match_kind(&mut self, kind: MatchKind) { - match kind { + pub(crate) fn set_match_kind(&mut self, kind: MatchKind) { + self.kind = kind; + match self.kind { MatchKind::LeftmostFirst => { self.order.sort(); } MatchKind::LeftmostLongest => { let (order, by_id) = (&mut self.order, &mut self.by_id); order.sort_by(|&id1, &id2| { - by_id[id1 as usize] - .len() - .cmp(&by_id[id2 as usize].len()) - .reverse() + by_id[id1].len().cmp(&by_id[id2].len()).reverse() }); } - MatchKind::__Nonexhaustive => unreachable!(), } } /// Return the number of patterns in this collection. /// /// This is guaranteed to be greater than zero. - pub fn len(&self) -> usize { + pub(crate) fn len(&self) -> usize { self.by_id.len() } /// Returns true if and only if this collection of patterns is empty. - pub fn is_empty(&self) -> bool { + pub(crate) fn is_empty(&self) -> bool { self.len() == 0 } /// Returns the approximate total amount of heap used by these patterns, in /// units of bytes. - pub fn heap_bytes(&self) -> usize { + pub(crate) fn memory_usage(&self) -> usize { self.order.len() * mem::size_of::() + self.by_id.len() * mem::size_of::>() + self.total_pattern_bytes @@ -125,38 +118,29 @@ impl Patterns { /// Clears all heap memory associated with this collection of patterns and /// resets all state such that it is a valid empty collection. - pub fn reset(&mut self) { + pub(crate) fn reset(&mut self) { self.kind = MatchKind::default(); self.by_id.clear(); self.order.clear(); self.minimum_len = usize::MAX; - self.max_pattern_id = 0; - } - - /// Return the maximum pattern identifier in this collection. This can be - /// useful in searchers for ensuring that the collection of patterns they - /// are provided at search time and at build time have the same size. - pub fn max_pattern_id(&self) -> PatternID { - assert_eq!((self.max_pattern_id + 1) as usize, self.len()); - self.max_pattern_id } /// Returns the length, in bytes, of the smallest pattern. /// /// This is guaranteed to be at least one. - pub fn minimum_len(&self) -> usize { + pub(crate) fn minimum_len(&self) -> usize { self.minimum_len } /// Returns the match semantics used by these patterns. - pub fn match_kind(&self) -> &MatchKind { + pub(crate) fn match_kind(&self) -> &MatchKind { &self.kind } /// Return the pattern with the given identifier. If such a pattern does /// not exist, then this panics. - pub fn get(&self, id: PatternID) -> Pattern<'_> { - Pattern(&self.by_id[id as usize]) + pub(crate) fn get(&self, id: PatternID) -> Pattern<'_> { + Pattern(&self.by_id[id]) } /// Return the pattern with the given identifier without performing bounds @@ -166,9 +150,8 @@ impl Patterns { /// /// Callers must ensure that a pattern with the given identifier exists /// before using this method. - #[cfg(target_arch = "x86_64")] - pub unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> { - Pattern(self.by_id.get_unchecked(id as usize)) + pub(crate) unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> { + Pattern(self.by_id.get_unchecked(id.as_usize())) } /// Return an iterator over all the patterns in this collection, in the @@ -189,7 +172,7 @@ impl Patterns { /// the order provided by this iterator, then the result is guaranteed /// to satisfy the correct match semantics. (Either leftmost-first or /// leftmost-longest.) - pub fn iter(&self) -> PatternIter<'_> { + pub(crate) fn iter(&self) -> PatternIter<'_> { PatternIter { patterns: self, i: 0 } } } @@ -202,7 +185,7 @@ impl Patterns { /// The lifetime `'p` corresponds to the lifetime of the collection of patterns /// this is iterating over. #[derive(Debug)] -pub struct PatternIter<'p> { +pub(crate) struct PatternIter<'p> { patterns: &'p Patterns, i: usize, } @@ -223,7 +206,7 @@ impl<'p> Iterator for PatternIter<'p> { /// A pattern that is used in packed searching. #[derive(Clone)] -pub struct Pattern<'a>(&'a [u8]); +pub(crate) struct Pattern<'a>(&'a [u8]); impl<'a> fmt::Debug for Pattern<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -235,84 +218,263 @@ impl<'a> fmt::Debug for Pattern<'a> { impl<'p> Pattern<'p> { /// Returns the length of this pattern, in bytes. - pub fn len(&self) -> usize { + pub(crate) fn len(&self) -> usize { self.0.len() } /// Returns the bytes of this pattern. - pub fn bytes(&self) -> &[u8] { + pub(crate) fn bytes(&self) -> &[u8] { &self.0 } /// Returns the first `len` low nybbles from this pattern. If this pattern /// is shorter than `len`, then this panics. - #[cfg(target_arch = "x86_64")] - pub fn low_nybbles(&self, len: usize) -> Vec { - let mut nybs = vec![]; - for &b in self.bytes().iter().take(len) { - nybs.push(b & 0xF); + pub(crate) fn low_nybbles(&self, len: usize) -> Box<[u8]> { + let mut nybs = vec![0; len].into_boxed_slice(); + for (i, byte) in self.bytes().iter().take(len).enumerate() { + nybs[i] = byte & 0xF; } nybs } /// Returns true if this pattern is a prefix of the given bytes. #[inline(always)] - pub fn is_prefix(&self, bytes: &[u8]) -> bool { - self.len() <= bytes.len() && self.equals(&bytes[..self.len()]) + pub(crate) fn is_prefix(&self, bytes: &[u8]) -> bool { + is_prefix(bytes, self.bytes()) } - /// Returns true if and only if this pattern equals the given bytes. + /// Returns true if this pattern is a prefix of the haystack given by the + /// raw `start` and `end` pointers. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. #[inline(always)] - pub fn equals(&self, bytes: &[u8]) -> bool { - // Why not just use memcmp for this? Well, memcmp requires calling out - // to libc, and this routine is called in fairly hot code paths. Other - // than just calling out to libc, it also seems to result in worse - // codegen. By rolling our own memcpy in pure Rust, it seems to appear - // more friendly to the optimizer. - // - // This results in an improvement in just about every benchmark. Some - // smaller than others, but in some cases, up to 30% faster. - - if self.len() != bytes.len() { + pub(crate) unsafe fn is_prefix_raw( + &self, + start: *const u8, + end: *const u8, + ) -> bool { + let patlen = self.bytes().len(); + let haylen = end.distance(start); + if patlen > haylen { return false; } - if self.len() < 8 { - for (&b1, &b2) in self.bytes().iter().zip(bytes) { - if b1 != b2 { - return false; - } + // SAFETY: We've checked that the haystack has length at least equal + // to this pattern. All other safety concerns are the responsibility + // of the caller. + is_equal_raw(start, self.bytes().as_ptr(), patlen) + } +} + +/// Returns true if and only if `needle` is a prefix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { + if needle.len() > haystack.len() { + return false; + } + // SAFETY: Our pointers are derived directly from borrowed slices which + // uphold all of our safety guarantees except for length. We account for + // length with the check above. + unsafe { is_equal_raw(haystack.as_ptr(), needle.as_ptr(), needle.len()) } +} + +/// Compare corresponding bytes in `x` and `y` for equality. +/// +/// That is, this returns true if and only if `x.len() == y.len()` and +/// `x[i] == y[i]` for all `0 <= i < x.len()`. +/// +/// Note that this isn't used. We only use it in tests as a convenient way +/// of testing `is_equal_raw`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +#[cfg(test)] +#[inline(always)] +fn is_equal(x: &[u8], y: &[u8]) -> bool { + if x.len() != y.len() { + return false; + } + // SAFETY: Our pointers are derived directly from borrowed slices which + // uphold all of our safety guarantees except for length. We account for + // length with the check above. + unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) } +} + +/// Compare `n` bytes at the given pointers for equality. +/// +/// This returns true if and only if `*x.add(i) == *y.add(i)` for all +/// `0 <= i < n`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +/// +/// # Safety +/// +/// * Both `x` and `y` must be valid for reads of up to `n` bytes. +/// * Both `x` and `y` must point to an initialized value. +/// * Both `x` and `y` must each point to an allocated object and +/// must either be in bounds or at most one byte past the end of the +/// allocated object. `x` and `y` do not need to point to the same allocated +/// object, but they may. +/// * Both `x` and `y` must be _derived from_ a pointer to their respective +/// allocated objects. +/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly +/// for `y` and `y+n`. +/// * The distance being in bounds must not rely on "wrapping around" the +/// address space. +#[inline(always)] +unsafe fn is_equal_raw(mut x: *const u8, mut y: *const u8, n: usize) -> bool { + // If we don't have enough bytes to do 4-byte at a time loads, then + // handle each possible length specially. Note that I used to have a + // byte-at-a-time loop here and that turned out to be quite a bit slower + // for the memmem/pathological/defeat-simple-vector-alphabet benchmark. + if n < 4 { + return match n { + 0 => true, + 1 => x.read() == y.read(), + 2 => { + x.cast::().read_unaligned() + == y.cast::().read_unaligned() } - return true; + // I also tried copy_nonoverlapping here and it looks like the + // codegen is the same. + 3 => x.cast::<[u8; 3]>().read() == y.cast::<[u8; 3]>().read(), + _ => unreachable!(), + }; + } + // When we have 4 or more bytes to compare, then proceed in chunks of 4 at + // a time using unaligned loads. + // + // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is + // that this particular version of memcmp is likely to be called with tiny + // needles. That means that if we do 8 byte loads, then a higher proportion + // of memcmp calls will use the slower variant above. With that said, this + // is a hypothesis and is only loosely supported by benchmarks. There's + // likely some improvement that could be made here. The main thing here + // though is to optimize for latency, not throughput. + + // SAFETY: The caller is responsible for ensuring the pointers we get are + // valid and readable for at least `n` bytes. We also do unaligned loads, + // so there's no need to ensure we're aligned. (This is justified by this + // routine being specifically for short strings.) + let xend = x.add(n.wrapping_sub(4)); + let yend = y.add(n.wrapping_sub(4)); + while x < xend { + let vx = x.cast::().read_unaligned(); + let vy = y.cast::().read_unaligned(); + if vx != vy { + return false; } - // When we have 8 or more bytes to compare, then proceed in chunks of - // 8 at a time using unaligned loads. - let mut p1 = self.bytes().as_ptr(); - let mut p2 = bytes.as_ptr(); - let p1end = self.bytes()[self.len() - 8..].as_ptr(); - let p2end = bytes[bytes.len() - 8..].as_ptr(); - // SAFETY: Via the conditional above, we know that both `p1` and `p2` - // have the same length, so `p1 < p1end` implies that `p2 < p2end`. - // Thus, derefencing both `p1` and `p2` in the loop below is safe. - // - // Moreover, we set `p1end` and `p2end` to be 8 bytes before the actual - // end of of `p1` and `p2`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. - // - // Finally, we needn't worry about 64-bit alignment here, since we - // do unaligned loads. - unsafe { - while p1 < p1end { - let v1 = (p1 as *const u64).read_unaligned(); - let v2 = (p2 as *const u64).read_unaligned(); - if v1 != v2 { - return false; - } - p1 = p1.add(8); - p2 = p2.add(8); - } - let v1 = (p1end as *const u64).read_unaligned(); - let v2 = (p2end as *const u64).read_unaligned(); - v1 == v2 + x = x.add(4); + y = y.add(4); + } + let vx = xend.cast::().read_unaligned(); + let vy = yend.cast::().read_unaligned(); + vx == vy +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn equals_different_lengths() { + assert!(!is_equal(b"", b"a")); + assert!(!is_equal(b"a", b"")); + assert!(!is_equal(b"ab", b"a")); + assert!(!is_equal(b"a", b"ab")); + } + + #[test] + fn equals_mismatch() { + let one_mismatch = [ + (&b"a"[..], &b"x"[..]), + (&b"ab"[..], &b"ax"[..]), + (&b"abc"[..], &b"abx"[..]), + (&b"abcd"[..], &b"abcx"[..]), + (&b"abcde"[..], &b"abcdx"[..]), + (&b"abcdef"[..], &b"abcdex"[..]), + (&b"abcdefg"[..], &b"abcdefx"[..]), + (&b"abcdefgh"[..], &b"abcdefgx"[..]), + (&b"abcdefghi"[..], &b"abcdefghx"[..]), + (&b"abcdefghij"[..], &b"abcdefghix"[..]), + (&b"abcdefghijk"[..], &b"abcdefghijx"[..]), + (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]), + (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]), + (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]), + ]; + for (x, y) in one_mismatch { + assert_eq!(x.len(), y.len(), "lengths should match"); + assert!(!is_equal(x, y)); + assert!(!is_equal(y, x)); } } + + #[test] + fn equals_yes() { + assert!(is_equal(b"", b"")); + assert!(is_equal(b"a", b"a")); + assert!(is_equal(b"ab", b"ab")); + assert!(is_equal(b"abc", b"abc")); + assert!(is_equal(b"abcd", b"abcd")); + assert!(is_equal(b"abcde", b"abcde")); + assert!(is_equal(b"abcdef", b"abcdef")); + assert!(is_equal(b"abcdefg", b"abcdefg")); + assert!(is_equal(b"abcdefgh", b"abcdefgh")); + assert!(is_equal(b"abcdefghi", b"abcdefghi")); + } + + #[test] + fn prefix() { + assert!(is_prefix(b"", b"")); + assert!(is_prefix(b"a", b"")); + assert!(is_prefix(b"ab", b"")); + assert!(is_prefix(b"foo", b"foo")); + assert!(is_prefix(b"foobar", b"foo")); + + assert!(!is_prefix(b"foo", b"fob")); + assert!(!is_prefix(b"foobar", b"fob")); + } } diff --git a/vendor/aho-corasick/src/packed/rabinkarp.rs b/vendor/aho-corasick/src/packed/rabinkarp.rs index c081f70..fdd8a6f 100644 --- a/vendor/aho-corasick/src/packed/rabinkarp.rs +++ b/vendor/aho-corasick/src/packed/rabinkarp.rs @@ -1,7 +1,6 @@ -use std::mem; +use alloc::{sync::Arc, vec, vec::Vec}; -use crate::packed::pattern::{PatternID, Patterns}; -use crate::Match; +use crate::{packed::pattern::Patterns, util::search::Match, PatternID}; /// The type of the rolling hash used in the Rabin-Karp algorithm. type Hash = usize; @@ -34,7 +33,9 @@ const NUM_BUCKETS: usize = 64; /// But ESMAJ provides something a bit more concrete: /// https://www-igm.univ-mlv.fr/~lecroq/string/node5.html #[derive(Clone, Debug)] -pub struct RabinKarp { +pub(crate) struct RabinKarp { + /// The patterns we're searching for. + patterns: Arc, /// The order of patterns in each bucket is significant. Namely, they are /// arranged such that the first one to match is the correct match. This /// may not necessarily correspond to the order provided by the caller. @@ -49,16 +50,6 @@ pub struct RabinKarp { /// The factor to subtract out of a hash before updating it with a new /// byte. hash_2pow: usize, - /// The maximum identifier of a pattern. This is used as a sanity check - /// to ensure that the patterns provided by the caller are the same as - /// the patterns that were used to compile the matcher. This sanity check - /// possibly permits safely eliminating bounds checks regardless of what - /// patterns are provided by the caller. - /// - /// (Currently, we don't use this to elide bounds checks since it doesn't - /// result in a measurable performance improvement, but we do use it for - /// better failure modes.) - max_pattern_id: PatternID, } impl RabinKarp { @@ -66,7 +57,7 @@ impl RabinKarp { /// /// This panics if any of the patterns in the collection are empty, or if /// the collection is itself empty. - pub fn new(patterns: &Patterns) -> RabinKarp { + pub(crate) fn new(patterns: &Arc) -> RabinKarp { assert!(patterns.len() >= 1); let hash_len = patterns.minimum_len(); assert!(hash_len >= 1); @@ -77,10 +68,10 @@ impl RabinKarp { } let mut rk = RabinKarp { + patterns: Arc::clone(patterns), buckets: vec![vec![]; NUM_BUCKETS], hash_len, hash_2pow, - max_pattern_id: patterns.max_pattern_id(), }; for (id, pat) in patterns.iter() { let hash = rk.hash(&pat.bytes()[..rk.hash_len]); @@ -92,18 +83,12 @@ impl RabinKarp { /// Return the first matching pattern in the given haystack, begining the /// search at `at`. - pub fn find_at( + pub(crate) fn find_at( &self, - patterns: &Patterns, haystack: &[u8], mut at: usize, ) -> Option { assert_eq!(NUM_BUCKETS, self.buckets.len()); - assert_eq!( - self.max_pattern_id, - patterns.max_pattern_id(), - "Rabin-Karp must be called with same patterns it was built with", - ); if at + self.hash_len > haystack.len() { return None; @@ -113,7 +98,7 @@ impl RabinKarp { let bucket = &self.buckets[hash % NUM_BUCKETS]; for &(phash, pid) in bucket { if phash == hash { - if let Some(c) = self.verify(patterns, pid, haystack, at) { + if let Some(c) = self.verify(pid, haystack, at) { return Some(c); } } @@ -132,10 +117,9 @@ impl RabinKarp { /// Returns the approximate total amount of heap used by this searcher, in /// units of bytes. - pub fn heap_bytes(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; - self.buckets.len() * mem::size_of::>() - + num_patterns * mem::size_of::<(Hash, PatternID)>() + pub(crate) fn memory_usage(&self) -> usize { + self.buckets.len() * core::mem::size_of::>() + + self.patterns.len() * core::mem::size_of::<(Hash, PatternID)>() } /// Verify whether the pattern with the given id matches at @@ -150,14 +134,13 @@ impl RabinKarp { #[cold] fn verify( &self, - patterns: &Patterns, id: PatternID, haystack: &[u8], at: usize, ) -> Option { - let pat = patterns.get(id); + let pat = self.patterns.get(id); if pat.is_prefix(&haystack[at..]) { - Some(Match::from_span(id as usize, at, at + pat.len())) + Some(Match::new(id, at..at + pat.len())) } else { None } diff --git a/vendor/aho-corasick/src/packed/teddy/README.md b/vendor/aho-corasick/src/packed/teddy/README.md index 51b999b..f0928cb 100644 --- a/vendor/aho-corasick/src/packed/teddy/README.md +++ b/vendor/aho-corasick/src/packed/teddy/README.md @@ -225,14 +225,14 @@ fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that block. Once we have that, we can look for the position of the least significant bit -in `C`. (Least significant because we only target `x86_64` here, which is -always little endian. Thus, the least significant bytes correspond to bytes -in our haystack at a lower address.) That position, modulo `8`, gives us -the pattern that the fingerprint matches. That position, integer divided by -`8`, also gives us the byte offset that the fingerprint occurs in inside the -16 byte haystack block. Using those two pieces of information, we can run a -verification procedure that tries to match all substrings containing that -fingerprint at that position in the haystack. +in `C`. (Least significant because we only target little endian here. Thus, +the least significant bytes correspond to bytes in our haystack at a lower +address.) That position, modulo `8`, gives us the pattern that the fingerprint +matches. That position, integer divided by `8`, also gives us the byte offset +that the fingerprint occurs in inside the 16 byte haystack block. Using those +two pieces of information, we can run a verification procedure that tries +to match all substrings containing that fingerprint at that position in the +haystack. # Implementation notes diff --git a/vendor/aho-corasick/src/packed/teddy/builder.rs b/vendor/aho-corasick/src/packed/teddy/builder.rs new file mode 100644 index 0000000..be91777 --- /dev/null +++ b/vendor/aho-corasick/src/packed/teddy/builder.rs @@ -0,0 +1,780 @@ +use core::{ + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +use alloc::sync::Arc; + +use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match}; + +/// A builder for constructing a Teddy matcher. +/// +/// The builder primarily permits fine grained configuration of the Teddy +/// matcher. Most options are made only available for testing/benchmarking +/// purposes. In reality, options are automatically determined by the nature +/// and number of patterns given to the builder. +#[derive(Clone, Debug)] +pub(crate) struct Builder { + /// When none, this is automatically determined. Otherwise, `false` means + /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used + /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't + /// available and Fat Teddy was requested, no matcher will be built. + only_fat: Option, + /// When none, this is automatically determined. Otherwise, `false` means + /// that 128-bit vectors will be used (up to SSSE3 instructions) where as + /// `true` means that 256-bit vectors will be used. As with `fat`, if + /// 256-bit vectors are requested and they aren't available, then a + /// searcher will not be built. + only_256bit: Option, + /// When true (the default), the number of patterns will be used as a + /// heuristic for refusing construction of a Teddy searcher. The point here + /// is that too many patterns can overwhelm Teddy. But this can be disabled + /// in cases where the caller knows better. + heuristic_pattern_limits: bool, +} + +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} + +impl Builder { + /// Create a new builder for configuring a Teddy matcher. + pub(crate) fn new() -> Builder { + Builder { + only_fat: None, + only_256bit: None, + heuristic_pattern_limits: true, + } + } + + /// Build a matcher for the set of patterns given. If a matcher could not + /// be built, then `None` is returned. + /// + /// Generally, a matcher isn't built if the necessary CPU features aren't + /// available, an unsupported target or if the searcher is believed to be + /// slower than standard techniques (i.e., if there are too many literals). + pub(crate) fn build(&self, patterns: Arc) -> Option { + self.build_imp(patterns) + } + + /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses + /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful + /// for a larger set of literals. + /// + /// `None` is the default, which results in an automatic selection based + /// on the number of literals and available CPU features. + pub(crate) fn only_fat(&mut self, yes: Option) -> &mut Builder { + self.only_fat = yes; + self + } + + /// Request the use of 256-bit vectors (true) or 128-bit vectors (false). + /// Generally, a larger vector size is better since it either permits + /// matching more patterns or matching more bytes in the haystack at once. + /// + /// `None` is the default, which results in an automatic selection based on + /// the number of literals and available CPU features. + pub(crate) fn only_256bit(&mut self, yes: Option) -> &mut Builder { + self.only_256bit = yes; + self + } + + /// Request that heuristic limitations on the number of patterns be + /// employed. This useful to disable for benchmarking where one wants to + /// explore how Teddy performs on large number of patterns even if the + /// heuristics would otherwise refuse construction. + /// + /// This is enabled by default. + pub(crate) fn heuristic_pattern_limits( + &mut self, + yes: bool, + ) -> &mut Builder { + self.heuristic_pattern_limits = yes; + self + } + + fn build_imp(&self, patterns: Arc) -> Option { + let patlimit = self.heuristic_pattern_limits; + // There's no particular reason why we limit ourselves to little endian + // here, but it seems likely that some parts of Teddy as they are + // currently written (e.g., the uses of `trailing_zeros`) are likely + // wrong on non-little-endian targets. Such things are likely easy to + // fix, but at the time of writing (2023/09/18), I actually do not know + // how to test this code on a big-endian target. So for now, we're + // conservative and just bail out. + if !cfg!(target_endian = "little") { + debug!("skipping Teddy because target isn't little endian"); + return None; + } + // Too many patterns will overwhelm Teddy and likely lead to slow + // downs, typically in the verification step. + if patlimit && patterns.len() > 64 { + debug!("skipping Teddy because of too many patterns"); + return None; + } + + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + { + use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3}; + + let mask_len = core::cmp::min(4, patterns.minimum_len()); + let beefy = patterns.len() > 32; + let has_avx2 = self::x86_64::is_available_avx2(); + let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3(); + let use_avx2 = if self.only_256bit == Some(true) { + if !has_avx2 { + debug!( + "skipping Teddy because avx2 was demanded but unavailable" + ); + return None; + } + true + } else if self.only_256bit == Some(false) { + if !has_ssse3 { + debug!( + "skipping Teddy because ssse3 was demanded but unavailable" + ); + return None; + } + false + } else if !has_ssse3 && !has_avx2 { + debug!( + "skipping Teddy because ssse3 and avx2 are unavailable" + ); + return None; + } else { + has_avx2 + }; + let fat = match self.only_fat { + None => use_avx2 && beefy, + Some(false) => false, + Some(true) if !use_avx2 => { + debug!( + "skipping Teddy because fat was demanded, but fat \ + Teddy requires avx2 which is unavailable" + ); + return None; + } + Some(true) => true, + }; + // Just like for aarch64, it's possible that too many patterns will + // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which + // helps things scale a bit more by spreading patterns over more + // buckets. + // + // These thresholds were determined by looking at the measurements + // for the rust/aho-corasick/packed/leftmost-first and + // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/` + // benchmarks. + if patlimit && mask_len == 1 && patterns.len() > 16 { + debug!( + "skipping Teddy (mask len: 1) because there are \ + too many patterns", + ); + return None; + } + match (mask_len, use_avx2, fat) { + (1, false, _) => { + debug!("Teddy choice: 128-bit slim, 1 byte"); + SlimSSSE3::<1>::new(&patterns) + } + (1, true, false) => { + debug!("Teddy choice: 256-bit slim, 1 byte"); + SlimAVX2::<1>::new(&patterns) + } + (1, true, true) => { + debug!("Teddy choice: 256-bit fat, 1 byte"); + FatAVX2::<1>::new(&patterns) + } + (2, false, _) => { + debug!("Teddy choice: 128-bit slim, 2 bytes"); + SlimSSSE3::<2>::new(&patterns) + } + (2, true, false) => { + debug!("Teddy choice: 256-bit slim, 2 bytes"); + SlimAVX2::<2>::new(&patterns) + } + (2, true, true) => { + debug!("Teddy choice: 256-bit fat, 2 bytes"); + FatAVX2::<2>::new(&patterns) + } + (3, false, _) => { + debug!("Teddy choice: 128-bit slim, 3 bytes"); + SlimSSSE3::<3>::new(&patterns) + } + (3, true, false) => { + debug!("Teddy choice: 256-bit slim, 3 bytes"); + SlimAVX2::<3>::new(&patterns) + } + (3, true, true) => { + debug!("Teddy choice: 256-bit fat, 3 bytes"); + FatAVX2::<3>::new(&patterns) + } + (4, false, _) => { + debug!("Teddy choice: 128-bit slim, 4 bytes"); + SlimSSSE3::<4>::new(&patterns) + } + (4, true, false) => { + debug!("Teddy choice: 256-bit slim, 4 bytes"); + SlimAVX2::<4>::new(&patterns) + } + (4, true, true) => { + debug!("Teddy choice: 256-bit fat, 4 bytes"); + FatAVX2::<4>::new(&patterns) + } + _ => { + debug!("no supported Teddy configuration found"); + None + } + } + } + #[cfg(target_arch = "aarch64")] + { + use self::aarch64::SlimNeon; + + let mask_len = core::cmp::min(4, patterns.minimum_len()); + if self.only_256bit == Some(true) { + debug!( + "skipping Teddy because 256-bits were demanded \ + but unavailable" + ); + return None; + } + if self.only_fat == Some(true) { + debug!( + "skipping Teddy because fat was demanded but unavailable" + ); + } + // Since we don't have Fat teddy in aarch64 (I think we'd want at + // least 256-bit vectors for that), we need to be careful not to + // allow too many patterns as it might overwhelm Teddy. Generally + // speaking, as the mask length goes up, the more patterns we can + // handle because the mask length results in fewer candidates + // generated. + // + // These thresholds were determined by looking at the measurements + // for the rust/aho-corasick/packed/leftmost-first and + // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/` + // benchmarks. + match mask_len { + 1 => { + if patlimit && patterns.len() > 16 { + debug!( + "skipping Teddy (mask len: 1) because there are \ + too many patterns", + ); + } + debug!("Teddy choice: 128-bit slim, 1 byte"); + SlimNeon::<1>::new(&patterns) + } + 2 => { + if patlimit && patterns.len() > 32 { + debug!( + "skipping Teddy (mask len: 2) because there are \ + too many patterns", + ); + } + debug!("Teddy choice: 128-bit slim, 2 bytes"); + SlimNeon::<2>::new(&patterns) + } + 3 => { + if patlimit && patterns.len() > 48 { + debug!( + "skipping Teddy (mask len: 3) because there are \ + too many patterns", + ); + } + debug!("Teddy choice: 128-bit slim, 3 bytes"); + SlimNeon::<3>::new(&patterns) + } + 4 => { + debug!("Teddy choice: 128-bit slim, 4 bytes"); + SlimNeon::<4>::new(&patterns) + } + _ => { + debug!("no supported Teddy configuration found"); + None + } + } + } + #[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "aarch64" + )))] + { + None + } + } +} + +/// A searcher that dispatches to one of several possible Teddy variants. +#[derive(Clone, Debug)] +pub(crate) struct Searcher { + /// The Teddy variant we use. We use dynamic dispatch under the theory that + /// it results in better codegen then a enum, although this is a specious + /// claim. + /// + /// This `Searcher` is essentially a wrapper for a `SearcherT` trait + /// object. We just make `memory_usage` and `minimum_len` available without + /// going through dynamic dispatch. + imp: Arc, + /// Total heap memory used by the Teddy variant. + memory_usage: usize, + /// The minimum haystack length this searcher can handle. It is intended + /// for callers to use some other search routine (such as Rabin-Karp) in + /// cases where the haystack (or remainer of the haystack) is too short. + minimum_len: usize, +} + +impl Searcher { + /// Look for the leftmost occurrence of any pattern in this search in the + /// given haystack starting at the given position. + /// + /// # Panics + /// + /// This panics when `haystack[at..].len()` is less than the minimum length + /// for this haystack. + #[inline(always)] + pub(crate) fn find( + &self, + haystack: &[u8], + at: usize, + ) -> Option { + // SAFETY: The Teddy implementations all require a minimum haystack + // length, and this is required for safety. Therefore, we assert it + // here in order to make this method sound. + assert!(haystack[at..].len() >= self.minimum_len); + let hayptr = haystack.as_ptr(); + // SAFETY: Construction of the searcher guarantees that we are able + // to run it in the current environment (i.e., we won't get an AVX2 + // searcher on a x86-64 CPU without AVX2 support). Also, the pointers + // are valid as they are derived directly from a borrowed slice. + let teddym = unsafe { + self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))? + }; + let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize()); + let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize()); + let span = crate::Span { start, end }; + // OK because we won't permit the construction of a searcher that + // could report a pattern ID bigger than what can fit in the crate-wide + // PatternID type. + let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize()); + let m = crate::Match::new(pid, span); + Some(m) + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + #[inline(always)] + pub(crate) fn memory_usage(&self) -> usize { + self.memory_usage + } + + /// Returns the minimum length, in bytes, that a haystack must be in order + /// to use it with this searcher. + #[inline(always)] + pub(crate) fn minimum_len(&self) -> usize { + self.minimum_len + } +} + +/// A trait that provides dynamic dispatch over the different possible Teddy +/// variants on the same algorithm. +/// +/// On `x86_64` for example, it isn't known until runtime which of 12 possible +/// variants will be used. One might use one of the four slim 128-bit vector +/// variants, or one of the four 256-bit vector variants or even one of the +/// four fat 256-bit vector variants. +/// +/// Since this choice is generally made when the Teddy searcher is constructed +/// and this choice is based on the patterns given and what the current CPU +/// supports, it follows that there must be some kind of indirection at search +/// time that "selects" the variant chosen at build time. +/// +/// There are a few different ways to go about this. One approach is to use an +/// enum. It works fine, but in my experiments, this generally results in worse +/// codegen. Another approach, which is what we use here, is dynamic dispatch +/// via a trait object. We basically implement this trait for each possible +/// variant, select the variant we want at build time and convert it to a +/// trait object for use at search time. +/// +/// Another approach is to use function pointers and stick each of the possible +/// variants into a union. This is essentially isomorphic to the dynamic +/// dispatch approach, but doesn't require any allocations. Since this crate +/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The +/// `memchr` crate does this.) +trait SearcherT: + Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static +{ + /// Execute a search on the given haystack (identified by `start` and `end` + /// raw pointers). + /// + /// # Safety + /// + /// Essentially, the `start` and `end` pointers must be valid and point + /// to a haystack one can read. As long as you derive them from, for + /// example, a `&[u8]`, they should automatically satisfy all of the safety + /// obligations: + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// * It must be the case that `start <= end`. + /// * `end - start` must be greater than the minimum length for this + /// searcher. + /// + /// Also, it is expected that implementations of this trait will tag this + /// method with a `target_feature` attribute. Callers must ensure that + /// they are executing this method in an environment where that attribute + /// is valid. + unsafe fn find(&self, start: *const u8, end: *const u8) -> Option; +} + +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +mod x86_64 { + use core::arch::x86_64::{__m128i, __m256i}; + + use alloc::sync::Arc; + + use crate::packed::{ + ext::Pointer, + pattern::Patterns, + teddy::generic::{self, Match}, + }; + + use super::{Searcher, SearcherT}; + + #[derive(Clone, Debug)] + pub(super) struct SlimSSSE3 { + slim128: generic::Slim<__m128i, BYTES>, + } + + // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! slim_ssse3 { + ($len:expr) => { + impl SlimSSSE3<$len> { + /// Creates a new searcher using "slim" Teddy with 128-bit + /// vectors. If SSSE3 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + if !is_available_ssse3() { + return None; + } + Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether SSSE3 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that SSSE3 is available in the current + /// environment. + #[target_feature(enable = "ssse3")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let slim128 = generic::Slim::<__m128i, $len>::new( + Arc::clone(patterns), + ); + let memory_usage = slim128.memory_usage(); + let minimum_len = slim128.minimum_len(); + let imp = Arc::new(SlimSSSE3 { slim128 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for SlimSSSE3<$len> { + #[target_feature(enable = "ssse3")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + self.slim128.find(start, end) + } + } + }; + } + + slim_ssse3!(1); + slim_ssse3!(2); + slim_ssse3!(3); + slim_ssse3!(4); + + #[derive(Clone, Debug)] + pub(super) struct SlimAVX2 { + slim128: generic::Slim<__m128i, BYTES>, + slim256: generic::Slim<__m256i, BYTES>, + } + + // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! slim_avx2 { + ($len:expr) => { + impl SlimAVX2<$len> { + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors. If AVX2 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + if !is_available_avx2() { + return None; + } + Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether AVX2 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that AVX2 is available in the current + /// environment. + #[target_feature(enable = "avx2")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let slim128 = generic::Slim::<__m128i, $len>::new( + Arc::clone(&patterns), + ); + let slim256 = generic::Slim::<__m256i, $len>::new( + Arc::clone(&patterns), + ); + let memory_usage = + slim128.memory_usage() + slim256.memory_usage(); + let minimum_len = slim128.minimum_len(); + let imp = Arc::new(SlimAVX2 { slim128, slim256 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for SlimAVX2<$len> { + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + let len = end.distance(start); + if len < self.slim256.minimum_len() { + self.slim128.find(start, end) + } else { + self.slim256.find(start, end) + } + } + } + }; + } + + slim_avx2!(1); + slim_avx2!(2); + slim_avx2!(3); + slim_avx2!(4); + + #[derive(Clone, Debug)] + pub(super) struct FatAVX2 { + fat256: generic::Fat<__m256i, BYTES>, + } + + // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! fat_avx2 { + ($len:expr) => { + impl FatAVX2<$len> { + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors. If AVX2 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + if !is_available_avx2() { + return None; + } + Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether AVX2 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that AVX2 is available in the current + /// environment. + #[target_feature(enable = "avx2")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let fat256 = generic::Fat::<__m256i, $len>::new( + Arc::clone(&patterns), + ); + let memory_usage = fat256.memory_usage(); + let minimum_len = fat256.minimum_len(); + let imp = Arc::new(FatAVX2 { fat256 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for FatAVX2<$len> { + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + self.fat256.find(start, end) + } + } + }; + } + + fat_avx2!(1); + fat_avx2!(2); + fat_avx2!(3); + fat_avx2!(4); + + #[inline] + pub(super) fn is_available_ssse3() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "ssse3")] + { + true + } + #[cfg(not(target_feature = "ssse3"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("ssse3") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + #[inline] + pub(super) fn is_available_avx2() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } +} + +#[cfg(target_arch = "aarch64")] +mod aarch64 { + use core::arch::aarch64::uint8x16_t; + + use alloc::sync::Arc; + + use crate::packed::{ + pattern::Patterns, + teddy::generic::{self, Match}, + }; + + use super::{Searcher, SearcherT}; + + #[derive(Clone, Debug)] + pub(super) struct SlimNeon { + slim128: generic::Slim, + } + + // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! slim_neon { + ($len:expr) => { + impl SlimNeon<$len> { + /// Creates a new searcher using "slim" Teddy with 128-bit + /// vectors. If SSSE3 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether SSSE3 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that SSSE3 is available in the current + /// environment. + #[target_feature(enable = "neon")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let slim128 = generic::Slim::::new( + Arc::clone(patterns), + ); + let memory_usage = slim128.memory_usage(); + let minimum_len = slim128.minimum_len(); + let imp = Arc::new(SlimNeon { slim128 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for SlimNeon<$len> { + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + self.slim128.find(start, end) + } + } + }; + } + + slim_neon!(1); + slim_neon!(2); + slim_neon!(3); + slim_neon!(4); +} diff --git a/vendor/aho-corasick/src/packed/teddy/compile.rs b/vendor/aho-corasick/src/packed/teddy/compile.rs deleted file mode 100644 index 741cb69..0000000 --- a/vendor/aho-corasick/src/packed/teddy/compile.rs +++ /dev/null @@ -1,414 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. - -use std::cmp; -use std::collections::BTreeMap; -use std::fmt; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::packed::teddy::Teddy; - -/// A builder for constructing a Teddy matcher. -/// -/// The builder primarily permits fine grained configuration of the Teddy -/// matcher. Most options are made only available for testing/benchmarking -/// purposes. In reality, options are automatically determined by the nature -/// and number of patterns given to the builder. -#[derive(Clone, Debug)] -pub struct Builder { - /// When none, this is automatically determined. Otherwise, `false` means - /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used - /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't - /// available and Fat Teddy was requested, no matcher will be built. - fat: Option, - /// When none, this is automatically determined. Otherwise, `false` means - /// that 128-bit vectors will be used (up to SSSE3 instructions) where as - /// `true` means that 256-bit vectors will be used. As with `fat`, if - /// 256-bit vectors are requested and they aren't available, then a - /// searcher will not be built. - avx: Option, -} - -impl Default for Builder { - fn default() -> Builder { - Builder::new() - } -} - -impl Builder { - /// Create a new builder for configuring a Teddy matcher. - pub fn new() -> Builder { - Builder { fat: None, avx: None } - } - - /// Build a matcher for the set of patterns given. If a matcher could not - /// be built, then `None` is returned. - /// - /// Generally, a matcher isn't built if the necessary CPU features aren't - /// available, an unsupported target or if the searcher is believed to be - /// slower than standard techniques (i.e., if there are too many literals). - pub fn build(&self, patterns: &Patterns) -> Option { - self.build_imp(patterns) - } - - /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses - /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful - /// for a larger set of literals. - /// - /// `None` is the default, which results in an automatic selection based - /// on the number of literals and available CPU features. - pub fn fat(&mut self, yes: Option) -> &mut Builder { - self.fat = yes; - self - } - - /// Request the use of 256-bit vectors (true) or 128-bit vectors (false). - /// Generally, a larger vector size is better since it either permits - /// matching more patterns or matching more bytes in the haystack at once. - /// - /// `None` is the default, which results in an automatic selection based on - /// the number of literals and available CPU features. - pub fn avx(&mut self, yes: Option) -> &mut Builder { - self.avx = yes; - self - } - - fn build_imp(&self, patterns: &Patterns) -> Option { - use crate::packed::teddy::runtime; - - // Most of the logic here is just about selecting the optimal settings, - // or perhaps even rejecting construction altogether. The choices - // we have are: fat (avx only) or not, ssse3 or avx2, and how many - // patterns we allow ourselves to search. Additionally, for testing - // and benchmarking, we permit callers to try to "force" a setting, - // and if the setting isn't allowed (e.g., forcing AVX when AVX isn't - // available), then we bail and return nothing. - - if patterns.len() > 64 { - return None; - } - let has_ssse3 = is_x86_feature_detected!("ssse3"); - let has_avx = is_x86_feature_detected!("avx2"); - let avx = if self.avx == Some(true) { - if !has_avx { - return None; - } - true - } else if self.avx == Some(false) { - if !has_ssse3 { - return None; - } - false - } else if !has_ssse3 && !has_avx { - return None; - } else { - has_avx - }; - let fat = match self.fat { - None => avx && patterns.len() > 32, - Some(false) => false, - Some(true) if !avx => return None, - Some(true) => true, - }; - - let mut compiler = Compiler::new(patterns, fat); - compiler.compile(); - let Compiler { buckets, masks, .. } = compiler; - // SAFETY: It is required that the builder only produce Teddy matchers - // that are allowed to run on the current CPU, since we later assume - // that the presence of (for example) TeddySlim1Mask256 means it is - // safe to call functions marked with the `avx2` target feature. - match (masks.len(), avx, fat) { - (1, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask128( - runtime::TeddySlim1Mask128 { - mask1: runtime::Mask128::new(masks[0]), - }, - ), - }), - (1, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask256( - runtime::TeddySlim1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }), - (1, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat1Mask256( - runtime::TeddyFat1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }), - (2, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask128( - runtime::TeddySlim2Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - }, - ), - }), - (2, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask256( - runtime::TeddySlim2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }), - (2, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat2Mask256( - runtime::TeddyFat2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }), - (3, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask128( - runtime::TeddySlim3Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - mask3: runtime::Mask128::new(masks[2]), - }, - ), - }), - (3, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask256( - runtime::TeddySlim3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }), - (3, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat3Mask256( - runtime::TeddyFat3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }), - _ => unreachable!(), - } - } -} - -/// A compiler is in charge of allocating patterns into buckets and generating -/// the masks necessary for searching. -#[derive(Clone)] -struct Compiler<'p> { - patterns: &'p Patterns, - buckets: Vec>, - masks: Vec, -} - -impl<'p> Compiler<'p> { - /// Create a new Teddy compiler for the given patterns. If `fat` is true, - /// then 16 buckets will be used instead of 8. - /// - /// This panics if any of the patterns given are empty. - fn new(patterns: &'p Patterns, fat: bool) -> Compiler<'p> { - let mask_len = cmp::min(3, patterns.minimum_len()); - assert!(1 <= mask_len && mask_len <= 3); - - Compiler { - patterns, - buckets: vec![vec![]; if fat { 16 } else { 8 }], - masks: vec![Mask::default(); mask_len], - } - } - - /// Compile the patterns in this compiler into buckets and masks. - fn compile(&mut self) { - let mut lonibble_to_bucket: BTreeMap, usize> = BTreeMap::new(); - for (id, pattern) in self.patterns.iter() { - // We try to be slightly clever in how we assign patterns into - // buckets. Generally speaking, we want patterns with the same - // prefix to be in the same bucket, since it minimizes the amount - // of time we spend churning through buckets in the verification - // step. - // - // So we could assign patterns with the same N-prefix (where N - // is the size of the mask, which is one of {1, 2, 3}) to the - // same bucket. However, case insensitive searches are fairly - // common, so we'd for example, ideally want to treat `abc` and - // `ABC` as if they shared the same prefix. ASCII has the nice - // property that the lower 4 bits of A and a are the same, so we - // therefore group patterns with the same low-nybbe-N-prefix into - // the same bucket. - // - // MOREOVER, this is actually necessary for correctness! In - // particular, by grouping patterns with the same prefix into the - // same bucket, we ensure that we preserve correct leftmost-first - // and leftmost-longest match semantics. In addition to the fact - // that `patterns.iter()` iterates in the correct order, this - // guarantees that all possible ambiguous matches will occur in - // the same bucket. The verification routine could be adjusted to - // support correct leftmost match semantics regardless of bucket - // allocation, but that results in a performance hit. It's much - // nicer to be able to just stop as soon as a match is found. - let lonybs = pattern.low_nybbles(self.masks.len()); - if let Some(&bucket) = lonibble_to_bucket.get(&lonybs) { - self.buckets[bucket].push(id); - } else { - // N.B. We assign buckets in reverse because it shouldn't have - // any influence on performance, but it does make it harder to - // get leftmost match semantics accidentally correct. - let bucket = (self.buckets.len() - 1) - - (id as usize % self.buckets.len()); - self.buckets[bucket].push(id); - lonibble_to_bucket.insert(lonybs, bucket); - } - } - for (bucket_index, bucket) in self.buckets.iter().enumerate() { - for &pat_id in bucket { - let pat = self.patterns.get(pat_id); - for (i, mask) in self.masks.iter_mut().enumerate() { - if self.buckets.len() == 8 { - mask.add_slim(bucket_index as u8, pat.bytes()[i]); - } else { - mask.add_fat(bucket_index as u8, pat.bytes()[i]); - } - } - } - } - } -} - -impl<'p> fmt::Debug for Compiler<'p> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut buckets = vec![vec![]; self.buckets.len()]; - for (i, bucket) in self.buckets.iter().enumerate() { - for &patid in bucket { - buckets[i].push(self.patterns.get(patid)); - } - } - f.debug_struct("Compiler") - .field("buckets", &buckets) - .field("masks", &self.masks) - .finish() - } -} - -/// Mask represents the low and high nybble masks that will be used during -/// search. Each mask is 32 bytes wide, although only the first 16 bytes are -/// used for the SSSE3 runtime. -/// -/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set -/// if and only if the corresponding nybble is in the ith bucket. The index of -/// the byte (0-15, inclusive) corresponds to the nybble. -/// -/// Each mask is used as the target of a shuffle, where the indices for the -/// shuffle are taken from the haystack. AND'ing the shuffles for both the -/// low and high masks together also results in 8-bit bitsets, but where bit -/// `i` is set if and only if the correspond *byte* is in the ith bucket. -/// -/// During compilation, masks are just arrays. But during search, these masks -/// are represented as 128-bit or 256-bit vectors. -/// -/// (See the README is this directory for more details.) -#[derive(Clone, Copy, Default)] -pub struct Mask { - lo: [u8; 32], - hi: [u8; 32], -} - -impl Mask { - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-7. - /// - /// This is for "slim" Teddy, where there are only 8 buckets. - fn add_slim(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 8); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // When using 256-bit vectors, we need to set this bucket assignment in - // the low and high 128-bit portions of the mask. This allows us to - // process 32 bytes at a time. Namely, AVX2 shuffles operate on each - // of the 128-bit lanes, rather than the full 256-bit vector at once. - self.lo[byte_lo] |= 1 << bucket; - self.lo[byte_lo + 16] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - self.hi[byte_hi + 16] |= 1 << bucket; - } - - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-15. - /// - /// This is for "fat" Teddy, where there are 16 buckets. - fn add_fat(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 16); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // Unlike slim teddy, fat teddy only works with AVX2. For fat teddy, - // the high 128 bits of our mask correspond to buckets 8-15, while the - // low 128 bits correspond to buckets 0-7. - if bucket < 8 { - self.lo[byte_lo] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - } else { - self.lo[byte_lo + 16] |= 1 << (bucket % 8); - self.hi[byte_hi + 16] |= 1 << (bucket % 8); - } - } - - /// Return the low 128 bits of the low-nybble mask. - pub fn lo128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.lo[..16]); - tmp - } - - /// Return the full low-nybble mask. - pub fn lo256(&self) -> [u8; 32] { - self.lo - } - - /// Return the low 128 bits of the high-nybble mask. - pub fn hi128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.hi[..16]); - tmp - } - - /// Return the full high-nybble mask. - pub fn hi256(&self) -> [u8; 32] { - self.hi - } -} - -impl fmt::Debug for Mask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let (mut parts_lo, mut parts_hi) = (vec![], vec![]); - for i in 0..32 { - parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); - parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); - } - f.debug_struct("Mask") - .field("lo", &parts_lo) - .field("hi", &parts_hi) - .finish() - } -} diff --git a/vendor/aho-corasick/src/packed/teddy/generic.rs b/vendor/aho-corasick/src/packed/teddy/generic.rs new file mode 100644 index 0000000..2aacd00 --- /dev/null +++ b/vendor/aho-corasick/src/packed/teddy/generic.rs @@ -0,0 +1,1382 @@ +use core::fmt::Debug; + +use alloc::{ + boxed::Box, collections::BTreeMap, format, sync::Arc, vec, vec::Vec, +}; + +use crate::{ + packed::{ + ext::Pointer, + pattern::Patterns, + vector::{FatVector, Vector}, + }, + util::int::U32, + PatternID, +}; + +/// A match type specialized to the Teddy implementations below. +/// +/// Essentially, instead of representing a match at byte offsets, we use +/// raw pointers. This is because the implementations below operate on raw +/// pointers, and so this is a more natural return type based on how the +/// implementation works. +/// +/// Also, the `PatternID` used here is a `u16`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Match { + pid: PatternID, + start: *const u8, + end: *const u8, +} + +impl Match { + /// Returns the ID of the pattern that matched. + pub(crate) fn pattern(&self) -> PatternID { + self.pid + } + + /// Returns a pointer into the haystack at which the match starts. + pub(crate) fn start(&self) -> *const u8 { + self.start + } + + /// Returns a pointer into the haystack at which the match ends. + pub(crate) fn end(&self) -> *const u8 { + self.end + } +} + +/// A "slim" Teddy implementation that is generic over both the vector type +/// and the minimum length of the patterns being searched for. +/// +/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths. +#[derive(Clone, Debug)] +pub(crate) struct Slim { + /// A generic data structure for doing "slim" Teddy verification. + teddy: Teddy<8>, + /// The masks used as inputs to the shuffle operation to generate + /// candidates (which are fed into the verification routines). + masks: [Mask; BYTES], +} + +impl Slim { + /// Create a new "slim" Teddy searcher for the given patterns. + /// + /// # Panics + /// + /// This panics when `BYTES` is any value other than 1, 2, 3 or 4. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn new(patterns: Arc) -> Slim { + assert!( + 1 <= BYTES && BYTES <= 4, + "only 1, 2, 3 or 4 bytes are supported" + ); + let teddy = Teddy::new(patterns); + let masks = SlimMaskBuilder::from_teddy(&teddy); + Slim { teddy, masks } + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + #[inline(always)] + pub(crate) fn memory_usage(&self) -> usize { + self.teddy.memory_usage() + } + + /// Returns the minimum length, in bytes, that a haystack must be in order + /// to use it with this searcher. + #[inline(always)] + pub(crate) fn minimum_len(&self) -> usize { + V::BYTES + (BYTES - 1) + } +} + +impl Slim { + /// Look for an occurrences of the patterns in this finder in the haystack + /// given by the `start` and `end` pointers. + /// + /// If no match could be found, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start; + while cur <= end.sub(V::BYTES) { + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + } + None + } + + /// Look for a match starting at the `V::BYTES` at and after `cur`. If + /// there isn't one, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + ) -> Option { + let c = self.candidate(cur); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur, end, c) { + return Some(m); + } + } + None + } + + /// Look for a candidate match (represented as a vector) starting at the + /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with + /// all bits set to zero is returned. + /// + /// # Safety + /// + /// The given pointer representing the haystack must be valid to read + /// from. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8) -> V { + let chunk = V::load_unaligned(cur); + Mask::members1(chunk, self.masks) + } +} + +impl Slim { + /// See Slim::find. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(1); + let mut prev0 = V::splat(0xFF); + while cur <= end.sub(V::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + prev0 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + } + None + } + + /// See Slim::find_one. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(1), end, c) { + return Some(m); + } + } + None + } + + /// See Slim::candidate. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V { + let chunk = V::load_unaligned(cur); + let (res0, res1) = Mask::members2(chunk, self.masks); + let res0prev0 = res0.shift_in_one_byte(*prev0); + let res = res0prev0.and(res1); + *prev0 = res0; + res + } +} + +impl Slim { + /// See Slim::find. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(2); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + while cur <= end.sub(V::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + } + None + } + + /// See Slim::find_one. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(2), end, c) { + return Some(m); + } + } + None + } + + /// See Slim::candidate. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> V { + let chunk = V::load_unaligned(cur); + let (res0, res1, res2) = Mask::members3(chunk, self.masks); + let res0prev0 = res0.shift_in_two_bytes(*prev0); + let res1prev1 = res1.shift_in_one_byte(*prev1); + let res = res0prev0.and(res1prev1).and(res2); + *prev0 = res0; + *prev1 = res1; + res + } +} + +impl Slim { + /// See Slim::find. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(3); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + let mut prev2 = V::splat(0xFF); + while cur <= end.sub(V::BYTES) { + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + prev2 = V::splat(0xFF); + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + } + None + } + + /// See Slim::find_one. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1, prev2); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(3), end, c) { + return Some(m); + } + } + None + } + + /// See Slim::candidate. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> V { + let chunk = V::load_unaligned(cur); + let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks); + let res0prev0 = res0.shift_in_three_bytes(*prev0); + let res1prev1 = res1.shift_in_two_bytes(*prev1); + let res2prev2 = res2.shift_in_one_byte(*prev2); + let res = res0prev0.and(res1prev1).and(res2prev2).and(res3); + *prev0 = res0; + *prev1 = res1; + *prev2 = res2; + res + } +} + +/// A "fat" Teddy implementation that is generic over both the vector type +/// and the minimum length of the patterns being searched for. +/// +/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths. +#[derive(Clone, Debug)] +pub(crate) struct Fat { + /// A generic data structure for doing "fat" Teddy verification. + teddy: Teddy<16>, + /// The masks used as inputs to the shuffle operation to generate + /// candidates (which are fed into the verification routines). + masks: [Mask; BYTES], +} + +impl Fat { + /// Create a new "fat" Teddy searcher for the given patterns. + /// + /// # Panics + /// + /// This panics when `BYTES` is any value other than 1, 2, 3 or 4. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn new(patterns: Arc) -> Fat { + assert!( + 1 <= BYTES && BYTES <= 4, + "only 1, 2, 3 or 4 bytes are supported" + ); + let teddy = Teddy::new(patterns); + let masks = FatMaskBuilder::from_teddy(&teddy); + Fat { teddy, masks } + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + #[inline(always)] + pub(crate) fn memory_usage(&self) -> usize { + self.teddy.memory_usage() + } + + /// Returns the minimum length, in bytes, that a haystack must be in order + /// to use it with this searcher. + #[inline(always)] + pub(crate) fn minimum_len(&self) -> usize { + V::Half::BYTES + (BYTES - 1) + } +} + +impl Fat { + /// Look for an occurrences of the patterns in this finder in the haystack + /// given by the `start` and `end` pointers. + /// + /// If no match could be found, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start; + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + } + None + } + + /// Look for a match starting at the `V::BYTES` at and after `cur`. If + /// there isn't one, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + ) -> Option { + let c = self.candidate(cur); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur, end, c) { + return Some(m); + } + } + None + } + + /// Look for a candidate match (represented as a vector) starting at the + /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with + /// all bits set to zero is returned. + /// + /// # Safety + /// + /// The given pointer representing the haystack must be valid to read + /// from. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8) -> V { + let chunk = V::load_half_unaligned(cur); + Mask::members1(chunk, self.masks) + } +} + +impl Fat { + /// See `Fat::find`. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(1); + let mut prev0 = V::splat(0xFF); + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + prev0 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + } + None + } + + /// See `Fat::find_one`. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(1), end, c) { + return Some(m); + } + } + None + } + + /// See `Fat::candidate`. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V { + let chunk = V::load_half_unaligned(cur); + let (res0, res1) = Mask::members2(chunk, self.masks); + let res0prev0 = res0.half_shift_in_one_byte(*prev0); + let res = res0prev0.and(res1); + *prev0 = res0; + res + } +} + +impl Fat { + /// See `Fat::find`. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(2); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + } + None + } + + /// See `Fat::find_one`. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(2), end, c) { + return Some(m); + } + } + None + } + + /// See `Fat::candidate`. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> V { + let chunk = V::load_half_unaligned(cur); + let (res0, res1, res2) = Mask::members3(chunk, self.masks); + let res0prev0 = res0.half_shift_in_two_bytes(*prev0); + let res1prev1 = res1.half_shift_in_one_byte(*prev1); + let res = res0prev0.and(res1prev1).and(res2); + *prev0 = res0; + *prev1 = res1; + res + } +} + +impl Fat { + /// See `Fat::find`. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(3); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + let mut prev2 = V::splat(0xFF); + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + prev2 = V::splat(0xFF); + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + } + None + } + + /// See `Fat::find_one`. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1, prev2); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(3), end, c) { + return Some(m); + } + } + None + } + + /// See `Fat::candidate`. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> V { + let chunk = V::load_half_unaligned(cur); + let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks); + let res0prev0 = res0.half_shift_in_three_bytes(*prev0); + let res1prev1 = res1.half_shift_in_two_bytes(*prev1); + let res2prev2 = res2.half_shift_in_one_byte(*prev2); + let res = res0prev0.and(res1prev1).and(res2prev2).and(res3); + *prev0 = res0; + *prev1 = res1; + *prev2 = res2; + res + } +} + +/// The common elements of all "slim" and "fat" Teddy search implementations. +/// +/// Essentially, this contains the patterns and the buckets. Namely, it +/// contains enough to implement the verification step after candidates are +/// identified via the shuffle masks. +/// +/// It is generic over the number of buckets used. In general, the number of +/// buckets is either 8 (for "slim" Teddy) or 16 (for "fat" Teddy). The generic +/// parameter isn't really meant to be instantiated for any value other than +/// 8 or 16, although it is technically possible. The main hiccup is that there +/// is some bit-shifting done in the critical part of verification that could +/// be quite expensive if `N` is not a multiple of 2. +#[derive(Clone, Debug)] +struct Teddy { + /// The patterns we are searching for. + /// + /// A pattern string can be found by its `PatternID`. + patterns: Arc, + /// The allocation of patterns in buckets. This only contains the IDs of + /// patterns. In order to do full verification, callers must provide the + /// actual patterns when using Teddy. + buckets: [Vec; BUCKETS], + // N.B. The above representation is very simple, but it definitely results + // in ping-ponging between different allocations during verification. I've + // tried experimenting with other representations that flatten the pattern + // strings into a single allocation, but it doesn't seem to help much. + // Probably everything is small enough to fit into cache anyway, and so the + // pointer chasing isn't a big deal? + // + // One other avenue I haven't explored is some kind of hashing trick + // that let's us do another high-confidence check before launching into + // `memcmp`. +} + +impl Teddy { + /// Create a new generic data structure for Teddy verification. + fn new(patterns: Arc) -> Teddy { + assert_ne!(0, patterns.len(), "Teddy requires at least one pattern"); + assert_ne!( + 0, + patterns.minimum_len(), + "Teddy does not support zero-length patterns" + ); + assert!( + BUCKETS == 8 || BUCKETS == 16, + "Teddy only supports 8 or 16 buckets" + ); + // MSRV(1.63): Use core::array::from_fn below instead of allocating a + // superfluous outer Vec. Not a big deal (especially given the BTreeMap + // allocation below), but nice to not do it. + let buckets = + <[Vec; BUCKETS]>::try_from(vec![vec![]; BUCKETS]) + .unwrap(); + let mut t = Teddy { patterns, buckets }; + + let mut map: BTreeMap, usize> = BTreeMap::new(); + for (id, pattern) in t.patterns.iter() { + // We try to be slightly clever in how we assign patterns into + // buckets. Generally speaking, we want patterns with the same + // prefix to be in the same bucket, since it minimizes the amount + // of time we spend churning through buckets in the verification + // step. + // + // So we could assign patterns with the same N-prefix (where N is + // the size of the mask, which is one of {1, 2, 3}) to the same + // bucket. However, case insensitive searches are fairly common, so + // we'd for example, ideally want to treat `abc` and `ABC` as if + // they shared the same prefix. ASCII has the nice property that + // the lower 4 bits of A and a are the same, so we therefore group + // patterns with the same low-nybble-N-prefix into the same bucket. + // + // MOREOVER, this is actually necessary for correctness! In + // particular, by grouping patterns with the same prefix into the + // same bucket, we ensure that we preserve correct leftmost-first + // and leftmost-longest match semantics. In addition to the fact + // that `patterns.iter()` iterates in the correct order, this + // guarantees that all possible ambiguous matches will occur in + // the same bucket. The verification routine could be adjusted to + // support correct leftmost match semantics regardless of bucket + // allocation, but that results in a performance hit. It's much + // nicer to be able to just stop as soon as a match is found. + let lonybs = pattern.low_nybbles(t.mask_len()); + if let Some(&bucket) = map.get(&lonybs) { + t.buckets[bucket].push(id); + } else { + // N.B. We assign buckets in reverse because it shouldn't have + // any influence on performance, but it does make it harder to + // get leftmost match semantics accidentally correct. + let bucket = (BUCKETS - 1) - (id.as_usize() % BUCKETS); + t.buckets[bucket].push(id); + map.insert(lonybs, bucket); + } + } + t + } + + /// Verify whether there are any matches starting at or after `cur` in the + /// haystack. The candidate chunk given should correspond to 8-bit bitsets + /// for N buckets. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. + #[inline(always)] + unsafe fn verify64( + &self, + cur: *const u8, + end: *const u8, + mut candidate_chunk: u64, + ) -> Option { + while candidate_chunk != 0 { + let bit = candidate_chunk.trailing_zeros().as_usize(); + candidate_chunk &= !(1 << bit); + + let cur = cur.add(bit / BUCKETS); + let bucket = bit % BUCKETS; + if let Some(m) = self.verify_bucket(cur, end, bucket) { + return Some(m); + } + } + None + } + + /// Verify whether there are any matches starting at `at` in the given + /// `haystack` corresponding only to patterns in the given bucket. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. + /// + /// The bucket index must be less than or equal to `self.buckets.len()`. + #[inline(always)] + unsafe fn verify_bucket( + &self, + cur: *const u8, + end: *const u8, + bucket: usize, + ) -> Option { + debug_assert!(bucket < self.buckets.len()); + // SAFETY: The caller must ensure that the bucket index is correct. + for pid in self.buckets.get_unchecked(bucket).iter().copied() { + // SAFETY: This is safe because we are guaranteed that every + // index in a Teddy bucket is a valid index into `pats`, by + // construction. + debug_assert!(pid.as_usize() < self.patterns.len()); + let pat = self.patterns.get_unchecked(pid); + if pat.is_prefix_raw(cur, end) { + let start = cur; + let end = start.add(pat.len()); + return Some(Match { pid, start, end }); + } + } + None + } + + /// Returns the total number of masks required by the patterns in this + /// Teddy searcher. + /// + /// Basically, the mask length corresponds to the type of Teddy searcher + /// to use: a 1-byte, 2-byte, 3-byte or 4-byte searcher. The bigger the + /// better, typically, since searching for longer substrings usually + /// decreases the rate of false positives. Therefore, the number of masks + /// needed is the length of the shortest pattern in this searcher. If the + /// length of the shortest pattern (in bytes) is bigger than 4, then the + /// mask length is 4 since there are no Teddy searchers for more than 4 + /// bytes. + fn mask_len(&self) -> usize { + core::cmp::min(4, self.patterns.minimum_len()) + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + fn memory_usage(&self) -> usize { + // This is an upper bound rather than a precise accounting. No + // particular reason, other than it's probably very close to actual + // memory usage in practice. + self.patterns.len() * core::mem::size_of::() + } +} + +impl Teddy<8> { + /// Runs the verification routine for "slim" Teddy. + /// + /// The candidate given should be a collection of 8-bit bitsets (one bitset + /// per lane), where the ith bit is set in the jth lane if and only if the + /// byte occurring at `at + j` in `cur` is in the bucket `i`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// The given pointers must be valid to read from. + #[inline(always)] + unsafe fn verify( + &self, + mut cur: *const u8, + end: *const u8, + candidate: V, + ) -> Option { + debug_assert!(!candidate.is_zero()); + // Convert the candidate into 64-bit chunks, and then verify each of + // those chunks. + candidate.for_each_64bit_lane( + #[inline(always)] + |_, chunk| { + let result = self.verify64(cur, end, chunk); + cur = cur.add(8); + result + }, + ) + } +} + +impl Teddy<16> { + /// Runs the verification routine for "fat" Teddy. + /// + /// The candidate given should be a collection of 8-bit bitsets (one bitset + /// per lane), where the ith bit is set in the jth lane if and only if the + /// byte occurring at `at + (j < 16 ? j : j - 16)` in `cur` is in the + /// bucket `j < 16 ? i : i + 8`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// The given pointers must be valid to read from. + #[inline(always)] + unsafe fn verify( + &self, + mut cur: *const u8, + end: *const u8, + candidate: V, + ) -> Option { + // This is a bit tricky, but we basically want to convert our + // candidate, which looks like this (assuming a 256-bit vector): + // + // a31 a30 ... a17 a16 a15 a14 ... a01 a00 + // + // where each a(i) is an 8-bit bitset corresponding to the activated + // buckets, to this + // + // a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00 + // + // Namely, for Fat Teddy, the high 128-bits of the candidate correspond + // to the same bytes in the haystack in the low 128-bits (so we only + // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7. + // + // The verification routine wants to look at all potentially matching + // buckets before moving on to the next lane. So for example, both + // a16 and a00 both correspond to the first byte in our window; a00 + // contains buckets 0-7 and a16 contains buckets 8-15. Specifically, + // a16 should be checked before a01. So the transformation shown above + // allows us to use our normal verification procedure with one small + // change: we treat each bitset as 16 bits instead of 8 bits. + debug_assert!(!candidate.is_zero()); + + // Swap the 128-bit lanes in the candidate vector. + let swapped = candidate.swap_halves(); + // Interleave the bytes from the low 128-bit lanes, starting with + // cand first. + let r1 = candidate.interleave_low_8bit_lanes(swapped); + // Interleave the bytes from the high 128-bit lanes, starting with + // cand first. + let r2 = candidate.interleave_high_8bit_lanes(swapped); + // Now just take the 2 low 64-bit integers from both r1 and r2. We + // can drop the high 64-bit integers because they are a mirror image + // of the low 64-bit integers. All we care about are the low 128-bit + // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets + // laid out in the desired order, as described above. + r1.for_each_low_64bit_lane( + r2, + #[inline(always)] + |_, chunk| { + let result = self.verify64(cur, end, chunk); + cur = cur.add(4); + result + }, + ) + } +} + +/// A vector generic mask for the low and high nybbles in a set of patterns. +/// Each 8-bit lane `j` in a vector corresponds to a bitset where the `i`th bit +/// is set if and only if the nybble `j` is in the bucket `i` at a particular +/// position. +/// +/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being +/// used. For Slim Teddy, the bitsets in the lower half are the same as the +/// bitsets in the higher half, so that we can search `V::BYTES` bytes at a +/// time. (Remember, the nybbles in the haystack are used as indices into these +/// masks, and 256-bit shuffles only operate on 128-bit lanes.) +/// +/// For Fat Teddy, the bitsets are not repeated, but instead, the high half +/// bits correspond to an addition 8 buckets. So that a bitset `00100010` has +/// buckets 1 and 5 set if it's in the lower half, but has buckets 9 and 13 set +/// if it's in the higher half. +#[derive(Clone, Copy, Debug)] +struct Mask { + lo: V, + hi: V, +} + +impl Mask { + /// Return a candidate for Teddy (fat or slim) that is searching for 1-byte + /// candidates. + /// + /// If a candidate is returned, it will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. If no candidate is found, then the vector returned will have all + /// lanes set to zero. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// `mask1` should correspond to a low/high mask for the first byte of all + /// patterns that are being searched. + #[inline(always)] + unsafe fn members1(chunk: V, masks: [Mask; 1]) -> V { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + let locand = masks[0].lo.shuffle_bytes(hlo); + let hicand = masks[0].hi.shuffle_bytes(hhi); + locand.and(hicand) + } + + /// Return a candidate for Teddy (fat or slim) that is searching for 2-byte + /// candidates. + /// + /// If candidates are returned, each will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. Each candidate returned corresponds to the first and second bytes + /// of the patterns being searched. If no candidate is found, then all of + /// the lanes will be set to zero in at least one of the vectors returned. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// The masks should correspond to the masks computed for the first and + /// second bytes of all patterns that are being searched. + #[inline(always)] + unsafe fn members2(chunk: V, masks: [Mask; 2]) -> (V, V) { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + + let locand1 = masks[0].lo.shuffle_bytes(hlo); + let hicand1 = masks[0].hi.shuffle_bytes(hhi); + let cand1 = locand1.and(hicand1); + + let locand2 = masks[1].lo.shuffle_bytes(hlo); + let hicand2 = masks[1].hi.shuffle_bytes(hhi); + let cand2 = locand2.and(hicand2); + + (cand1, cand2) + } + + /// Return a candidate for Teddy (fat or slim) that is searching for 3-byte + /// candidates. + /// + /// If candidates are returned, each will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. Each candidate returned corresponds to the first, second and third + /// bytes of the patterns being searched. If no candidate is found, then + /// all of the lanes will be set to zero in at least one of the vectors + /// returned. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// The masks should correspond to the masks computed for the first, second + /// and third bytes of all patterns that are being searched. + #[inline(always)] + unsafe fn members3(chunk: V, masks: [Mask; 3]) -> (V, V, V) { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + + let locand1 = masks[0].lo.shuffle_bytes(hlo); + let hicand1 = masks[0].hi.shuffle_bytes(hhi); + let cand1 = locand1.and(hicand1); + + let locand2 = masks[1].lo.shuffle_bytes(hlo); + let hicand2 = masks[1].hi.shuffle_bytes(hhi); + let cand2 = locand2.and(hicand2); + + let locand3 = masks[2].lo.shuffle_bytes(hlo); + let hicand3 = masks[2].hi.shuffle_bytes(hhi); + let cand3 = locand3.and(hicand3); + + (cand1, cand2, cand3) + } + + /// Return a candidate for Teddy (fat or slim) that is searching for 4-byte + /// candidates. + /// + /// If candidates are returned, each will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. Each candidate returned corresponds to the first, second, third + /// and fourth bytes of the patterns being searched. If no candidate is + /// found, then all of the lanes will be set to zero in at least one of the + /// vectors returned. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// The masks should correspond to the masks computed for the first, + /// second, third and fourth bytes of all patterns that are being searched. + #[inline(always)] + unsafe fn members4(chunk: V, masks: [Mask; 4]) -> (V, V, V, V) { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + + let locand1 = masks[0].lo.shuffle_bytes(hlo); + let hicand1 = masks[0].hi.shuffle_bytes(hhi); + let cand1 = locand1.and(hicand1); + + let locand2 = masks[1].lo.shuffle_bytes(hlo); + let hicand2 = masks[1].hi.shuffle_bytes(hhi); + let cand2 = locand2.and(hicand2); + + let locand3 = masks[2].lo.shuffle_bytes(hlo); + let hicand3 = masks[2].hi.shuffle_bytes(hhi); + let cand3 = locand3.and(hicand3); + + let locand4 = masks[3].lo.shuffle_bytes(hlo); + let hicand4 = masks[3].hi.shuffle_bytes(hhi); + let cand4 = locand4.and(hicand4); + + (cand1, cand2, cand3, cand4) + } +} + +/// Represents the low and high nybble masks that will be used during +/// search. Each mask is 32 bytes wide, although only the first 16 bytes are +/// used for 128-bit vectors. +/// +/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set +/// if and only if the corresponding nybble is in the ith bucket. The index of +/// the byte (0-15, inclusive) corresponds to the nybble. +/// +/// Each mask is used as the target of a shuffle, where the indices for the +/// shuffle are taken from the haystack. AND'ing the shuffles for both the +/// low and high masks together also results in 8-bit bitsets, but where bit +/// `i` is set if and only if the correspond *byte* is in the ith bucket. +#[derive(Clone, Default)] +struct SlimMaskBuilder { + lo: [u8; 32], + hi: [u8; 32], +} + +impl SlimMaskBuilder { + /// Update this mask by adding the given byte to the given bucket. The + /// given bucket must be in the range 0-7. + /// + /// # Panics + /// + /// When `bucket >= 8`. + fn add(&mut self, bucket: usize, byte: u8) { + assert!(bucket < 8); + + let bucket = u8::try_from(bucket).unwrap(); + let byte_lo = usize::from(byte & 0xF); + let byte_hi = usize::from((byte >> 4) & 0xF); + // When using 256-bit vectors, we need to set this bucket assignment in + // the low and high 128-bit portions of the mask. This allows us to + // process 32 bytes at a time. Namely, AVX2 shuffles operate on each + // of the 128-bit lanes, rather than the full 256-bit vector at once. + self.lo[byte_lo] |= 1 << bucket; + self.lo[byte_lo + 16] |= 1 << bucket; + self.hi[byte_hi] |= 1 << bucket; + self.hi[byte_hi + 16] |= 1 << bucket; + } + + /// Turn this builder into a vector mask. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn build(&self) -> Mask { + assert!(V::BYTES <= self.lo.len()); + assert!(V::BYTES <= self.hi.len()); + Mask { + lo: V::load_unaligned(self.lo[..].as_ptr()), + hi: V::load_unaligned(self.hi[..].as_ptr()), + } + } + + /// A convenience function for building `N` vector masks from a slim + /// `Teddy` value. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn from_teddy( + teddy: &Teddy<8>, + ) -> [Mask; BYTES] { + // MSRV(1.63): Use core::array::from_fn to just build the array here + // instead of creating a vector and turning it into an array. + let mut mask_builders = vec![SlimMaskBuilder::default(); BYTES]; + for (bucket_index, bucket) in teddy.buckets.iter().enumerate() { + for pid in bucket.iter().copied() { + let pat = teddy.patterns.get(pid); + for (i, builder) in mask_builders.iter_mut().enumerate() { + builder.add(bucket_index, pat.bytes()[i]); + } + } + } + let array = + <[SlimMaskBuilder; BYTES]>::try_from(mask_builders).unwrap(); + array.map(|builder| builder.build()) + } +} + +impl Debug for SlimMaskBuilder { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let (mut parts_lo, mut parts_hi) = (vec![], vec![]); + for i in 0..32 { + parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); + parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); + } + f.debug_struct("SlimMaskBuilder") + .field("lo", &parts_lo) + .field("hi", &parts_hi) + .finish() + } +} + +/// Represents the low and high nybble masks that will be used during "fat" +/// Teddy search. +/// +/// Each mask is 32 bytes wide, and at the time of writing, only 256-bit vectors +/// support fat Teddy. +/// +/// A fat Teddy mask is like a slim Teddy mask, except that instead of +/// repeating the bitsets in the high and low 128-bits in 256-bit vectors, the +/// high and low 128-bit halves each represent distinct buckets. (Bringing the +/// total to 16 instead of 8.) This permits spreading the patterns out a bit +/// more and thus putting less pressure on verification to be fast. +/// +/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set +/// if and only if the corresponding nybble is in the ith bucket. The index of +/// the byte (0-15, inclusive) corresponds to the nybble. +#[derive(Clone, Copy, Default)] +struct FatMaskBuilder { + lo: [u8; 32], + hi: [u8; 32], +} + +impl FatMaskBuilder { + /// Update this mask by adding the given byte to the given bucket. The + /// given bucket must be in the range 0-15. + /// + /// # Panics + /// + /// When `bucket >= 16`. + fn add(&mut self, bucket: usize, byte: u8) { + assert!(bucket < 16); + + let bucket = u8::try_from(bucket).unwrap(); + let byte_lo = usize::from(byte & 0xF); + let byte_hi = usize::from((byte >> 4) & 0xF); + // Unlike slim teddy, fat teddy only works with AVX2. For fat teddy, + // the high 128 bits of our mask correspond to buckets 8-15, while the + // low 128 bits correspond to buckets 0-7. + if bucket < 8 { + self.lo[byte_lo] |= 1 << bucket; + self.hi[byte_hi] |= 1 << bucket; + } else { + self.lo[byte_lo + 16] |= 1 << (bucket % 8); + self.hi[byte_hi + 16] |= 1 << (bucket % 8); + } + } + + /// Turn this builder into a vector mask. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn build(&self) -> Mask { + assert!(V::BYTES <= self.lo.len()); + assert!(V::BYTES <= self.hi.len()); + Mask { + lo: V::load_unaligned(self.lo[..].as_ptr()), + hi: V::load_unaligned(self.hi[..].as_ptr()), + } + } + + /// A convenience function for building `N` vector masks from a fat + /// `Teddy` value. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn from_teddy( + teddy: &Teddy<16>, + ) -> [Mask; BYTES] { + // MSRV(1.63): Use core::array::from_fn to just build the array here + // instead of creating a vector and turning it into an array. + let mut mask_builders = vec![FatMaskBuilder::default(); BYTES]; + for (bucket_index, bucket) in teddy.buckets.iter().enumerate() { + for pid in bucket.iter().copied() { + let pat = teddy.patterns.get(pid); + for (i, builder) in mask_builders.iter_mut().enumerate() { + builder.add(bucket_index, pat.bytes()[i]); + } + } + } + let array = + <[FatMaskBuilder; BYTES]>::try_from(mask_builders).unwrap(); + array.map(|builder| builder.build()) + } +} + +impl Debug for FatMaskBuilder { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let (mut parts_lo, mut parts_hi) = (vec![], vec![]); + for i in 0..32 { + parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); + parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); + } + f.debug_struct("FatMaskBuilder") + .field("lo", &parts_lo) + .field("hi", &parts_hi) + .finish() + } +} diff --git a/vendor/aho-corasick/src/packed/teddy/mod.rs b/vendor/aho-corasick/src/packed/teddy/mod.rs index 3268cdf..26cfcdc 100644 --- a/vendor/aho-corasick/src/packed/teddy/mod.rs +++ b/vendor/aho-corasick/src/packed/teddy/mod.rs @@ -1,62 +1,9 @@ -#[cfg(target_arch = "x86_64")] -pub use crate::packed::teddy::compile::Builder; -#[cfg(not(target_arch = "x86_64"))] -pub use crate::packed::teddy::fallback::Builder; -#[cfg(not(target_arch = "x86_64"))] -pub use crate::packed::teddy::fallback::Teddy; -#[cfg(target_arch = "x86_64")] -pub use crate::packed::teddy::runtime::Teddy; +// Regrettable, but Teddy stuff just isn't used on all targets. And for some +// targets, like aarch64, only "slim" Teddy is used and so "fat" Teddy gets a +// bunch of dead-code warnings. Just not worth trying to squash them. Blech. +#![allow(dead_code)] -#[cfg(target_arch = "x86_64")] -mod compile; -#[cfg(target_arch = "x86_64")] -mod runtime; +pub(crate) use self::builder::{Builder, Searcher}; -#[cfg(not(target_arch = "x86_64"))] -mod fallback { - use crate::packed::pattern::Patterns; - use crate::Match; - - #[derive(Clone, Debug, Default)] - pub struct Builder(()); - - impl Builder { - pub fn new() -> Builder { - Builder(()) - } - - pub fn build(&self, _: &Patterns) -> Option { - None - } - - pub fn fat(&mut self, _: Option) -> &mut Builder { - self - } - - pub fn avx(&mut self, _: Option) -> &mut Builder { - self - } - } - - #[derive(Clone, Debug)] - pub struct Teddy(()); - - impl Teddy { - pub fn find_at( - &self, - _: &Patterns, - _: &[u8], - _: usize, - ) -> Option { - None - } - - pub fn minimum_len(&self) -> usize { - 0 - } - - pub fn heap_bytes(&self) -> usize { - 0 - } - } -} +mod builder; +mod generic; diff --git a/vendor/aho-corasick/src/packed/teddy/runtime.rs b/vendor/aho-corasick/src/packed/teddy/runtime.rs deleted file mode 100644 index 0d96913..0000000 --- a/vendor/aho-corasick/src/packed/teddy/runtime.rs +++ /dev/null @@ -1,1204 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. -// It is strongly recommended to peruse the README before trying to grok this -// code, as its use of SIMD is pretty opaque, although I tried to add comments -// where appropriate. -// -// Moreover, while there is a lot of code in this file, most of it is -// repeated variants of the same thing. Specifically, there are three Teddy -// variants: Slim 128-bit Teddy (8 buckets), Slim 256-bit Teddy (8 buckets) -// and Fat 256-bit Teddy (16 buckets). For each variant, there are three -// implementations, corresponding to mask lengths of 1, 2 and 3. Bringing it to -// a total of nine variants. Each one is structured roughly the same: -// -// while at <= len(haystack) - CHUNK_SIZE: -// let candidate = find_candidate_in_chunk(haystack, at) -// if not all zeroes(candidate): -// if match = verify(haystack, at, candidate): -// return match -// -// For the most part, this remains unchanged. The parts that vary are the -// verification routine (for slim vs fat Teddy) and the candidate extraction -// (based on the number of masks). -// -// In the code below, a "candidate" corresponds to a single vector with 8-bit -// lanes. Each lane is itself an 8-bit bitset, where the ith bit is set in the -// jth lane if and only if the byte occurring at position `j` is in the -// bucket `i` (where the `j`th position is the position in the current window -// of the haystack, which is always 16 or 32 bytes). Note to be careful here: -// the ith bit and the jth lane correspond to the least significant bits of the -// vector. So when visualizing how the current window of bytes is stored in a -// vector, you often need to flip it around. For example, the text `abcd` in a -// 4-byte vector would look like this: -// -// 01100100 01100011 01100010 01100001 -// d c b a -// -// When the mask length is 1, then finding the candidate is pretty straight -// forward: you just apply the shuffle indices (from the haystack window) to -// the masks, and then AND them together, as described in the README. But for -// masks of length 2 and 3, you need to keep a little state. Specifically, -// you need to store the final 1 (for mask length 2) or 2 (for mask length 3) -// bytes of the candidate for use when searching the next window. This is for -// handling matches that span two windows. -// -// With respect to the repeated code, it would likely be possible to reduce -// the number of copies of code below using polymorphism, but I find this -// formulation clearer instead of needing to reason through generics. However, -// I admit, there may be a simpler generic construction that I'm missing. -// -// All variants are fairly heavily tested in src/packed/tests.rs. - -use std::arch::x86_64::*; -use std::mem; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::packed::teddy::compile; -use crate::packed::vector::*; -use crate::Match; - -/// The Teddy runtime. -/// -/// A Teddy runtime can be used to quickly search for occurrences of one or -/// more patterns. While it does not scale to an arbitrary number of patterns -/// like Aho-Corasick, it does find occurrences for a small set of patterns -/// much more quickly than Aho-Corasick. -/// -/// Teddy cannot run on small haystacks below a certain size, which is -/// dependent on the type of matcher used. This size can be queried via the -/// `minimum_len` method. Violating this will result in a panic. -/// -/// Finally, when callers use a Teddy runtime, they must provide precisely the -/// patterns used to construct the Teddy matcher. Violating this will result -/// in either a panic or incorrect results, but will never sacrifice memory -/// safety. -#[derive(Clone, Debug)] -pub struct Teddy { - /// The allocation of patterns in buckets. This only contains the IDs of - /// patterns. In order to do full verification, callers must provide the - /// actual patterns when using Teddy. - pub buckets: Vec>, - /// The maximum identifier of a pattern. This is used as a sanity check to - /// ensure that the patterns provided by the caller are the same as the - /// patterns that were used to compile the matcher. This sanity check - /// permits safely eliminating bounds checks regardless of what patterns - /// are provided by the caller. - /// - /// Note that users of the aho-corasick crate cannot get this wrong. Only - /// code internal to this crate can get it wrong, since neither `Patterns` - /// type nor the Teddy runtime are public API items. - pub max_pattern_id: PatternID, - /// The actual runtime to use. - pub exec: Exec, -} - -impl Teddy { - /// Return the first occurrence of a match in the given haystack after or - /// starting at `at`. - /// - /// The patterns provided must be precisely the same patterns given to the - /// Teddy builder, otherwise this may panic or produce incorrect results. - /// - /// All matches are consistent with the match semantics (leftmost-first or - /// leftmost-longest) set on `pats`. - pub fn find_at( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - ) -> Option { - // This assert is a bit subtle, but it's an important guarantee. - // Namely, if the maximum pattern ID seen by Teddy is the same as the - // one in the patterns given, then we are guaranteed that every pattern - // ID in all Teddy buckets are valid indices into `pats`. While this - // is nominally true, there is no guarantee that callers provide the - // same `pats` to both the Teddy builder and the searcher, which would - // otherwise make `find_at` unsafe to call. But this assert lets us - // keep this routine safe and eliminate an important bounds check in - // verification. - assert_eq!( - self.max_pattern_id, - pats.max_pattern_id(), - "teddy must be called with same patterns it was built with", - ); - // SAFETY: The haystack must have at least a minimum number of bytes - // for Teddy to be able to work. The minimum number varies depending on - // which matcher is used below. If this is violated, then it's possible - // for searching to do out-of-bounds writes. - assert!(haystack[at..].len() >= self.minimum_len()); - // SAFETY: The various Teddy matchers are always safe to call because - // the Teddy builder guarantees that a particular Exec variant is - // built only when it can be run the current CPU. That is, the Teddy - // builder will not produce a Exec::TeddySlim1Mask256 unless AVX2 is - // enabled. That is, our dynamic CPU feature detection is performed - // once in the builder, and we rely on the type system to avoid needing - // to do it again. - unsafe { - match self.exec { - Exec::TeddySlim1Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - } - } - } - - /// Returns the minimum length of a haystack that must be provided by - /// callers to this Teddy searcher. Providing a haystack shorter than this - /// will result in a panic, but will never violate memory safety. - pub fn minimum_len(&self) -> usize { - // SAFETY: These values must be correct in order to ensure safety. - // The Teddy runtime assumes their haystacks have at least these - // lengths. Violating this will sacrifice memory safety. - match self.exec { - Exec::TeddySlim1Mask128(_) => 16, - Exec::TeddySlim1Mask256(_) => 32, - Exec::TeddyFat1Mask256(_) => 16, - Exec::TeddySlim2Mask128(_) => 17, - Exec::TeddySlim2Mask256(_) => 33, - Exec::TeddyFat2Mask256(_) => 17, - Exec::TeddySlim3Mask128(_) => 18, - Exec::TeddySlim3Mask256(_) => 34, - Exec::TeddyFat3Mask256(_) => 34, - } - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; - self.buckets.len() * mem::size_of::>() - + num_patterns * mem::size_of::() - } - - /// Runs the verification routine for Slim 128-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the SSSE3 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify128( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m128i, - ) -> Option { - debug_assert!(!is_all_zeroes128(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = unpack64x128(cand); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Runs the verification routine for Slim 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!is_all_zeroes256(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = unpack64x256(cand); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Runs the verification routine for Fat 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + (j < 16 ? j : j - 16)` in `haystack` is in the - /// bucket `j < 16 ? i : i + 8`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify_fat256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!is_all_zeroes256(cand)); - debug_assert_eq!(16, self.buckets.len()); - - // This is a bit tricky, but we basically want to convert our - // candidate, which looks like this - // - // a31 a30 ... a17 a16 a15 a14 ... a01 a00 - // - // where each a(i) is an 8-bit bitset corresponding to the activated - // buckets, to this - // - // a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00 - // - // Namely, for Fat Teddy, the high 128-bits of the candidate correspond - // to the same bytes in the haystack in the low 128-bits (so we only - // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7. - // - // The verification routine wants to look at all potentially matching - // buckets before moving on to the next lane. So for example, both - // a16 and a00 both correspond to the first byte in our window; a00 - // contains buckets 0-7 and a16 contains buckets 8-15. Specifically, - // a16 should be checked before a01. So the transformation shown above - // allows us to use our normal verification procedure with one small - // change: we treat each bitset as 16 bits instead of 8 bits. - - // Swap the 128-bit lanes in the candidate vector. - let swap = _mm256_permute4x64_epi64(cand, 0x4E); - // Interleave the bytes from the low 128-bit lanes, starting with - // cand first. - let r1 = _mm256_unpacklo_epi8(cand, swap); - // Interleave the bytes from the high 128-bit lanes, starting with - // cand first. - let r2 = _mm256_unpackhi_epi8(cand, swap); - // Now just take the 2 low 64-bit integers from both r1 and r2. We - // can drop the high 64-bit integers because they are a mirror image - // of the low 64-bit integers. All we care about are the low 128-bit - // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets - // laid out in the desired order, as described above. - let parts = unpacklo64x256(r1, r2); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 4; - if let Some(m) = self.verify64(pats, 16, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at or after `at` in the - /// given `haystack`. The candidate given should correspond to either 8-bit - /// (for 8 buckets) or 16-bit (16 buckets) bitsets. - #[inline(always)] - fn verify64( - &self, - pats: &Patterns, - bucket_count: usize, - haystack: &[u8], - at: usize, - mut cand: u64, - ) -> Option { - // N.B. While the bucket count is known from self.buckets.len(), - // requiring it as a parameter makes it easier for the optimizer to - // know its value, and thus produce more efficient codegen. - debug_assert!(bucket_count == 8 || bucket_count == 16); - while cand != 0 { - let bit = cand.trailing_zeros() as usize; - cand &= !(1 << bit); - - let at = at + (bit / bucket_count); - let bucket = bit % bucket_count; - if let Some(m) = self.verify_bucket(pats, haystack, bucket, at) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at `at` in the given - /// `haystack` corresponding only to patterns in the given bucket. - #[inline(always)] - fn verify_bucket( - &self, - pats: &Patterns, - haystack: &[u8], - bucket: usize, - at: usize, - ) -> Option { - // Forcing this function to not inline and be "cold" seems to help - // the codegen for Teddy overall. Interestingly, this is good for a - // 16% boost in the sherlock/packed/teddy/name/alt1 benchmark (among - // others). Overall, this seems like a problem with codegen, since - // creating the Match itself is a very small amount of code. - #[cold] - #[inline(never)] - fn match_from_span( - pati: PatternID, - start: usize, - end: usize, - ) -> Match { - Match::from_span(pati as usize, start, end) - } - - // N.B. The bounds check for this bucket lookup *should* be elided - // since we assert the number of buckets in each `find_at` routine, - // and the compiler can prove that the `% 8` (or `% 16`) in callers - // of this routine will always be in bounds. - for &pati in &self.buckets[bucket] { - // SAFETY: This is safe because we are guaranteed that every - // index in a Teddy bucket is a valid index into `pats`. This - // guarantee is upheld by the assert checking `max_pattern_id` in - // the beginning of `find_at` above. - // - // This explicit bounds check elision is (amazingly) good for a - // 25-50% boost in some benchmarks, particularly ones with a lot - // of short literals. - let pat = unsafe { pats.get_unchecked(pati) }; - if pat.is_prefix(&haystack[at..]) { - return Some(match_from_span(pati, at, at + pat.len())); - } - } - None - } -} - -/// Exec represents the different search strategies supported by the Teddy -/// runtime. -/// -/// This enum is an important safety abstraction. Namely, callers should only -/// construct a variant in this enum if it is safe to execute its corresponding -/// target features on the current CPU. The 128-bit searchers require SSSE3, -/// while the 256-bit searchers require AVX2. -#[derive(Clone, Debug)] -pub enum Exec { - TeddySlim1Mask128(TeddySlim1Mask128), - TeddySlim1Mask256(TeddySlim1Mask256), - TeddyFat1Mask256(TeddyFat1Mask256), - TeddySlim2Mask128(TeddySlim2Mask128), - TeddySlim2Mask256(TeddySlim2Mask256), - TeddyFat2Mask256(TeddyFat2Mask256), - TeddySlim3Mask128(TeddySlim3Mask128), - TeddySlim3Mask256(TeddySlim3Mask256), - TeddyFat3Mask256(TeddyFat3Mask256), -} - -// Most of the code below remains undocumented because they are effectively -// repeated versions of themselves. The general structure is described in the -// README and in the comments above. - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask128 { - pub mask1: Mask128, -} - -impl TeddySlim1Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - members1m128(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask256 { - pub mask1: Mask256, -} - -impl TeddySlim1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 32 { - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat1Mask256 { - pub mask1: Mask256, -} - -impl TeddyFat1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, -} - -impl TeddySlim2Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones128(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones128(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - let (res0, res1) = members2m128(chunk, self.mask1, self.mask2); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 15); - _mm_and_si128(res0prev0, res1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddySlim2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones256(); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = alignr256_15(res0, *prev0); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddyFat2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones256(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 15); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, - pub mask3: Mask128, -} - -impl TeddySlim3Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones128(), ones128()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones128(); - prev1 = ones128(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - prev1: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - let (res0, res1, res2) = - members3m128(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm_alignr_epi8(res1, *prev1, 15); - let res = _mm_and_si128(_mm_and_si128(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddySlim3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones256(), ones256()); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = ones256(); - prev1 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = alignr256_14(res0, *prev0); - let res1prev1 = alignr256_15(res1, *prev1); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddyFat3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones256(), ones256()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones256(); - prev1 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm256_alignr_epi8(res1, *prev1, 15); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -/// A 128-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -#[derive(Clone, Copy, Debug)] -pub struct Mask128 { - lo: __m128i, - hi: __m128i, -} - -impl Mask128 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask128 { - // SAFETY: This is safe since [u8; 16] has the same representation - // as __m128i. - unsafe { - Mask128 { - lo: mem::transmute(mask.lo128()), - hi: mem::transmute(mask.hi128()), - } - } - } -} - -/// A 256-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -/// -/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being -/// used. For Slim Teddy, the bitsets in the lower 128-bits are the same as -/// the bitsets in the higher 128-bits, so that we can search 32 bytes at a -/// time. (Remember, the nybbles in the haystack are used as indices into these -/// masks, and 256-bit shuffles only operate on 128-bit lanes.) -/// -/// For Fat Teddy, the bitsets are not repeated, but instead, the high 128 -/// bits correspond to buckets 8-15. So that a bitset `00100010` has buckets -/// 1 and 5 set if it's in the lower 128 bits, but has buckets 9 and 13 set -/// if it's in the higher 128 bits. -#[derive(Clone, Copy, Debug)] -pub struct Mask256 { - lo: __m256i, - hi: __m256i, -} - -impl Mask256 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask256 { - // SAFETY: This is safe since [u8; 32] has the same representation - // as __m256i. - unsafe { - Mask256 { - lo: mem::transmute(mask.lo256()), - hi: mem::transmute(mask.hi256()), - } - } - } -} - -// The "members" routines below are responsible for taking a chunk of bytes, -// a number of nybble masks and returning the result of using the masks to -// lookup bytes in the chunk. The results of the high and low nybble masks are -// AND'ed together, such that each candidate returned is a vector, with byte -// sized lanes, and where each lane is an 8-bit bitset corresponding to the -// buckets that contain the corresponding byte. -// -// In the case of masks of length greater than 1, callers will need to keep -// the results from the previous haystack's window, and then shift the vectors -// so that they all line up. Then they can be AND'ed together. - -/// Return a candidate for Slim 128-bit Teddy, where `chunk` corresponds to a -/// 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -#[target_feature(enable = "ssse3")] -unsafe fn members1m128(chunk: __m128i, mask1: Mask128) -> __m128i { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return a candidate for Slim 256-bit Teddy, where `chunk` corresponds to a -/// 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members1m256(chunk: __m256i, mask1: Mask256) -> __m256i { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members2m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, -) -> (__m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members2m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, -) -> (__m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members3m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, - mask3: Mask128, -) -> (__m128i, __m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm_and_si128( - _mm_shuffle_epi8(mask3.lo, hlo), - _mm_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members3m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, - mask3: Mask256, -) -> (__m256i, __m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm256_and_si256( - _mm256_shuffle_epi8(mask3.lo, hlo), - _mm256_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} diff --git a/vendor/aho-corasick/src/packed/tests.rs b/vendor/aho-corasick/src/packed/tests.rs index 91410cb..2b0d44e 100644 --- a/vendor/aho-corasick/src/packed/tests.rs +++ b/vendor/aho-corasick/src/packed/tests.rs @@ -1,8 +1,16 @@ use std::collections::HashMap; -use std::usize; -use crate::packed::{Config, MatchKind}; -use crate::Match; +use alloc::{ + format, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ + packed::{Config, MatchKind}, + util::search::Match, +}; /// A description of a single test against a multi-pattern searcher. /// @@ -32,8 +40,9 @@ struct SearchTestOwned { impl SearchTest { fn variations(&self) -> Vec { + let count = if cfg!(miri) { 1 } else { 261 }; let mut tests = vec![]; - for i in 0..=260 { + for i in 0..count { tests.push(self.offset_prefix(i)); tests.push(self.offset_suffix(i)); tests.push(self.offset_both(i)); @@ -83,15 +92,6 @@ impl SearchTest { matches: self.matches.to_vec(), } } - - // fn to_owned(&self) -> SearchTestOwned { - // SearchTestOwned { - // name: self.name.to_string(), - // patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - // haystack: self.haystack.to_string(), - // matches: self.matches.iter().cloned().collect(), - // } - // } } /// Short-hand constructor for SearchTest. We use it a lot below. @@ -144,6 +144,9 @@ const BASICS: &'static [SearchTest] = &[ t!(basic200, &["abc"], "abc", &[(0, 0, 3)]), t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]), t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]), + t!(basic230, &["abcd"], "abcd", &[(0, 0, 4)]), + t!(basic240, &["abcd"], "zazabzabcdz", &[(0, 6, 10)]), + t!(basic250, &["abcd"], "zazabcdzabcdz", &[(0, 3, 7), (0, 8, 12)]), t!(basic300, &["a", "b"], "", &[]), t!(basic310, &["a", "b"], "z", &[]), t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]), @@ -381,26 +384,34 @@ macro_rules! testconfig { run_search_tests($collection, |test| { let mut config = Config::new(); $with(&mut config); - config - .builder() - .extend(test.patterns.iter().map(|p| p.as_bytes())) - .build() - .unwrap() - .find_iter(&test.haystack) - .collect() + let mut builder = config.builder(); + builder.extend(test.patterns.iter().map(|p| p.as_bytes())); + let searcher = match builder.build() { + Some(searcher) => searcher, + None => { + // For x86-64 and aarch64, not building a searcher is + // probably a bug, so be loud. + if cfg!(any( + target_arch = "x86_64", + target_arch = "aarch64" + )) { + panic!("failed to build packed searcher") + } + return None; + } + }; + Some(searcher.find_iter(&test.haystack).collect()) }); } }; } -#[cfg(target_arch = "x86_64")] testconfig!( search_default_leftmost_first, PACKED_LEFTMOST_FIRST, |_: &mut Config| {} ); -#[cfg(target_arch = "x86_64")] testconfig!( search_default_leftmost_longest, PACKED_LEFTMOST_LONGEST, @@ -409,92 +420,90 @@ testconfig!( } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); + c.only_teddy(true); } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_ssse3_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); + c.only_teddy(true); + #[cfg(target_arch = "x86_64")] + if std::is_x86_feature_detected!("ssse3") { + c.only_teddy_256bit(Some(false)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_ssse3_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); + #[cfg(target_arch = "x86_64")] + if std::is_x86_feature_detected!("ssse3") { + c.only_teddy_256bit(Some(false)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_avx2_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); + c.only_teddy(true); + #[cfg(target_arch = "x86_64")] + if std::is_x86_feature_detected!("avx2") { + c.only_teddy_256bit(Some(true)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_avx2_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); + #[cfg(target_arch = "x86_64")] + if std::is_x86_feature_detected!("avx2") { + c.only_teddy_256bit(Some(true)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_fat_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); + c.only_teddy(true); + #[cfg(target_arch = "x86_64")] + if std::is_x86_feature_detected!("avx2") { + c.only_teddy_fat(Some(true)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_fat_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); + #[cfg(target_arch = "x86_64")] + if std::is_x86_feature_detected!("avx2") { + c.only_teddy_fat(Some(true)); } } ); @@ -503,7 +512,7 @@ testconfig!( search_rabinkarp_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_rabin_karp(true); + c.only_rabin_karp(true); } ); @@ -511,7 +520,7 @@ testconfig!( search_rabinkarp_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest); + c.only_rabin_karp(true).match_kind(MatchKind::LeftmostLongest); } ); @@ -539,7 +548,7 @@ fn search_tests_have_unique_names() { assert("TEDDY", TEDDY); } -fn run_search_tests Vec>( +fn run_search_tests Option>>( which: TestCollection, mut f: F, ) { @@ -547,18 +556,24 @@ fn run_search_tests Vec>( |matches: Vec| -> Vec<(usize, usize, usize)> { matches .into_iter() - .map(|m| (m.pattern(), m.start(), m.end())) + .map(|m| (m.pattern().as_usize(), m.start(), m.end())) .collect() }; for &tests in which { for spec in tests { for test in spec.variations() { + let results = match f(&test) { + None => continue, + Some(results) => results, + }; assert_eq!( test.matches, - get_match_triples(f(&test)).as_slice(), - "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}", + get_match_triples(results).as_slice(), + "test: {}, patterns: {:?}, haystack(len={:?}): {:?}, \ + offset: {:?}", test.name, test.patterns, + test.haystack.len(), test.haystack, test.offset, ); diff --git a/vendor/aho-corasick/src/packed/vector.rs b/vendor/aho-corasick/src/packed/vector.rs index ca6c2b0..ed3f890 100644 --- a/vendor/aho-corasick/src/packed/vector.rs +++ b/vendor/aho-corasick/src/packed/vector.rs @@ -1,181 +1,1752 @@ -// This file contains a set of fairly generic utility functions when working -// with SIMD vectors. -// -// SAFETY: All of the routines below are unsafe to call because they assume -// the necessary CPU target features in order to use particular vendor -// intrinsics. Calling these routines when the underlying CPU does not support -// the appropriate target features is NOT safe. Callers must ensure this -// themselves. -// -// Note that it may not look like this safety invariant is being upheld when -// these routines are called. Namely, the CPU feature check is typically pretty -// far away from when these routines are used. Instead, we rely on the fact -// that certain types serve as a guaranteed receipt that pertinent target -// features are enabled. For example, the only way TeddySlim3Mask256 can be -// constructed is if the AVX2 CPU feature is available. Thus, any code running -// inside of TeddySlim3Mask256 can use any of the functions below without any -// additional checks: its very existence *is* the check. - -use std::arch::x86_64::*; - -/// Shift `a` to the left by two bytes (removing its two most significant -/// bytes), and concatenate it with the the two most significant bytes of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_14(a: __m256i, b: __m256i) -> __m256i { - // Credit goes to jneem for figuring this out: - // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 - // - // TL;DR avx2's PALIGNR instruction is actually just two 128-bit PALIGNR - // instructions, which is not what we want, so we need to do some extra - // shuffling. - - // This permute gives us the low 16 bytes of a concatenated with the high - // 16 bytes of b, in order of most significant to least significant. So - // `v = a[15:0] b[31:16]`. - let v = _mm256_permute2x128_si256(b, a, 0x21); - // This effectively does this (where we deal in terms of byte-indexing - // and byte-shifting, and use inclusive ranges): - // - // ret[15:0] := ((a[15:0] << 16) | v[15:0]) >> 14 - // = ((a[15:0] << 16) | b[31:16]) >> 14 - // ret[31:16] := ((a[31:16] << 16) | v[31:16]) >> 14 - // = ((a[31:16] << 16) | a[15:0]) >> 14 - // - // Which therefore results in: - // - // ret[31:0] := a[29:16] a[15:14] a[13:0] b[31:30] - // - // The end result is that we've effectively done this: - // - // (a << 2) | (b >> 30) - // - // When `A` and `B` are strings---where the beginning of the string is in - // the least significant bits---we effectively result in the following - // semantic operation: - // - // (A >> 2) | (B << 30) - // - // The reversal being attributed to the fact that we are in little-endian. - _mm256_alignr_epi8(a, v, 14) -} +// NOTE: The descriptions for each of the vector methods on the traits below +// are pretty inscrutable. For this reason, there are tests for every method +// on for every trait impl below. If you're confused about what an op does, +// consult its test. (They probably should be doc tests, but I couldn't figure +// out how to write them in a non-annoying way.) -/// Shift `a` to the left by one byte (removing its most significant byte), and -/// concatenate it with the the most significant byte of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_15(a: __m256i, b: __m256i) -> __m256i { - // For explanation, see alignr256_14. - let v = _mm256_permute2x128_si256(b, a, 0x21); - _mm256_alignr_epi8(a, v, 15) -} +use core::{ + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; -/// Unpack the given 128-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "ssse3")] -pub unsafe fn unpack64x128(a: __m128i) -> [u64; 2] { - [ - _mm_cvtsi128_si64(a) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64, - ] -} +/// A trait for describing vector operations used by vectorized searchers. +/// +/// The trait is highly constrained to low level vector operations needed for +/// the specific algorithms used in this crate. In general, it was invented +/// mostly to be generic over x86's __m128i and __m256i types. At time of +/// writing, it also supports wasm and aarch64 128-bit vector types as well. +/// +/// # Safety +/// +/// All methods are not safe since they are intended to be implemented using +/// vendor intrinsics, which are also not safe. Callers must ensure that +/// the appropriate target features are enabled in the calling function, +/// and that the current CPU supports them. All implementations should +/// avoid marking the routines with `#[target_feature]` and instead mark +/// them as `#[inline(always)]` to ensure they get appropriately inlined. +/// (`inline(always)` cannot be used with target_feature.) +pub(crate) trait Vector: + Copy + Debug + Send + Sync + UnwindSafe + RefUnwindSafe +{ + /// The number of bits in the vector. + const BITS: usize; + /// The number of bytes in the vector. That is, this is the size of the + /// vector in memory. + const BYTES: usize; + + /// Create a vector with 8-bit lanes with the given byte repeated into each + /// lane. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn splat(byte: u8) -> Self; + + /// Read a vector-size number of bytes from the given pointer. The pointer + /// does not need to be aligned. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// Callers must guarantee that at least `BYTES` bytes are readable from + /// `data`. + unsafe fn load_unaligned(data: *const u8) -> Self; + + /// Returns true if and only if this vector has zero in all of its lanes. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn is_zero(self) -> bool; -/// Unpack the given 256-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpack64x256(a: __m256i) -> [u64; 4] { - // Using transmute here is precisely equivalent, but actually slower. It's - // not quite clear why. - let lo = _mm256_extracti128_si256(a, 0); - let hi = _mm256_extracti128_si256(a, 1); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] + /// Do an 8-bit pairwise equality check. If lane `i` is equal in this + /// vector and the one given, then lane `i` in the resulting vector is set + /// to `0xFF`. Otherwise, it is set to `0x00`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn cmpeq(self, vector2: Self) -> Self; + + /// Perform a bitwise 'and' of this vector and the one given and return + /// the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn and(self, vector2: Self) -> Self; + + /// Perform a bitwise 'or' of this vector and the one given and return + /// the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn or(self, vector2: Self) -> Self; + + /// Shift each 8-bit lane in this vector to the right by the number of + /// bits indictated by the `BITS` type parameter. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_8bit_lane_right(self) -> Self; + + /// Shift this vector to the left by one byte and shift the most + /// significant byte of `vector2` into the least significant position of + /// this vector. + /// + /// Stated differently, this behaves as if `self` and `vector2` were + /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted + /// right by `Self::BYTES - 1` bytes. + /// + /// With respect to the Teddy algorithm, `vector2` is usually a previous + /// `Self::BYTES` chunk from the haystack and `self` is the chunk + /// immediately following it. This permits combining the last two bytes + /// from the previous chunk (`vector2`) with the first `Self::BYTES - 1` + /// bytes from the current chunk. This permits aligning the result of + /// various shuffles so that they can be and-ed together and a possible + /// candidate discovered. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self; + + /// Shift this vector to the left by two bytes and shift the two most + /// significant bytes of `vector2` into the least significant position of + /// this vector. + /// + /// Stated differently, this behaves as if `self` and `vector2` were + /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted + /// right by `Self::BYTES - 2` bytes. + /// + /// With respect to the Teddy algorithm, `vector2` is usually a previous + /// `Self::BYTES` chunk from the haystack and `self` is the chunk + /// immediately following it. This permits combining the last two bytes + /// from the previous chunk (`vector2`) with the first `Self::BYTES - 2` + /// bytes from the current chunk. This permits aligning the result of + /// various shuffles so that they can be and-ed together and a possible + /// candidate discovered. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self; + + /// Shift this vector to the left by three bytes and shift the three most + /// significant bytes of `vector2` into the least significant position of + /// this vector. + /// + /// Stated differently, this behaves as if `self` and `vector2` were + /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted + /// right by `Self::BYTES - 3` bytes. + /// + /// With respect to the Teddy algorithm, `vector2` is usually a previous + /// `Self::BYTES` chunk from the haystack and `self` is the chunk + /// immediately following it. This permits combining the last three bytes + /// from the previous chunk (`vector2`) with the first `Self::BYTES - 3` + /// bytes from the current chunk. This permits aligning the result of + /// various shuffles so that they can be and-ed together and a possible + /// candidate discovered. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self; + + /// Shuffles the bytes in this vector according to the indices in each of + /// the corresponding lanes in `indices`. + /// + /// If `i` is the index of corresponding lanes, `A` is this vector, `B` is + /// indices and `C` is the resulting vector, then `C = A[B[i]]`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shuffle_bytes(self, indices: Self) -> Self; + + /// Call the provided function for each 64-bit lane in this vector. The + /// given function is provided the lane index and lane value as a `u64`. + /// + /// If `f` returns `Some`, then iteration over the lanes is stopped and the + /// value is returned. Otherwise, this returns `None`. + /// + /// # Notes + /// + /// Conceptually it would be nice if we could have a + /// `unpack64(self) -> [u64; BITS / 64]` method, but defining that is + /// tricky given Rust's [current support for const generics][support]. + /// And even if we could, it would be tricky to write generic code over + /// it. (Not impossible. We could introduce another layer that requires + /// `AsRef<[u64]>` or something.) + /// + /// [support]: https://github.com/rust-lang/rust/issues/60551 + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn for_each_64bit_lane( + self, + f: impl FnMut(usize, u64) -> Option, + ) -> Option; } -/// Unpack the low 128-bits of `a` and `b`, and return them as 4 64-bit -/// integers. +/// This trait extends the `Vector` trait with additional operations to support +/// Fat Teddy. /// -/// More precisely, if a = a4 a3 a2 a1 and b = b4 b3 b2 b1, where each element -/// is a 64-bit integer and a1/b1 correspond to the least significant 64 bits, -/// then the return value is `b2 b1 a2 a1`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpacklo64x256(a: __m256i, b: __m256i) -> [u64; 4] { - let lo = _mm256_castsi256_si128(a); - let hi = _mm256_castsi256_si128(b); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] -} +/// Fat Teddy uses 16 buckets instead of 8, but reads half as many bytes (as +/// the vector size) instead of the full size of a vector per iteration. For +/// example, when using a 256-bit vector, Slim Teddy reads 32 bytes at a timr +/// but Fat Teddy reads 16 bytes at a time. +/// +/// Fat Teddy is useful when searching for a large number of literals. +/// The extra number of buckets spreads the literals out more and reduces +/// verification time. +/// +/// Currently we only implement this for AVX on x86_64. It would be nice to +/// implement this for SSE on x86_64 and NEON on aarch64, with the latter two +/// only reading 8 bytes at a time. It's not clear how well it would work, but +/// there are some tricky things to figure out in terms of implementation. The +/// `half_shift_in_{one,two,three}_bytes` methods in particular are probably +/// the trickiest of the bunch. For AVX2, these are implemented by taking +/// advantage of the fact that `_mm256_alignr_epi8` operates on each 128-bit +/// half instead of the full 256-bit vector. (Where as `_mm_alignr_epi8` +/// operates on the full 128-bit vector and not on each 64-bit half.) I didn't +/// do a careful survey of NEON to see if it could easily support these +/// operations. +pub(crate) trait FatVector: Vector { + type Half: Vector; -/// Returns true if and only if all bits in the given 128-bit vector are 0. -#[target_feature(enable = "ssse3")] -pub unsafe fn is_all_zeroes128(a: __m128i) -> bool { - let cmp = _mm_cmpeq_epi8(a, zeroes128()); - _mm_movemask_epi8(cmp) as u32 == 0xFFFF -} + /// Read a half-vector-size number of bytes from the given pointer, and + /// broadcast it across both halfs of a full vector. The pointer does not + /// need to be aligned. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// Callers must guarantee that at least `Self::HALF::BYTES` bytes are + /// readable from `data`. + unsafe fn load_half_unaligned(data: *const u8) -> Self; + + /// Like `Vector::shift_in_one_byte`, except this is done for each half + /// of the vector instead. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self; + + /// Like `Vector::shift_in_two_bytes`, except this is done for each half + /// of the vector instead. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self; + + /// Like `Vector::shift_in_two_bytes`, except this is done for each half + /// of the vector instead. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self; + + /// Swap the 128-bit lanes in this vector. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn swap_halves(self) -> Self; + + /// Unpack and interleave the 8-bit lanes from the low 128 bits of each + /// vector and return the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self; + + /// Unpack and interleave the 8-bit lanes from the high 128 bits of each + /// vector and return the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self; -/// Returns true if and only if all bits in the given 256-bit vector are 0. -#[target_feature(enable = "avx2")] -pub unsafe fn is_all_zeroes256(a: __m256i) -> bool { - let cmp = _mm256_cmpeq_epi8(a, zeroes256()); - _mm256_movemask_epi8(cmp) as u32 == 0xFFFFFFFF + /// Call the provided function for each 64-bit lane in the lower half + /// of this vector and then in the other vector. The given function is + /// provided the lane index and lane value as a `u64`. (The high 128-bits + /// of each vector are ignored.) + /// + /// If `f` returns `Some`, then iteration over the lanes is stopped and the + /// value is returned. Otherwise, this returns `None`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn for_each_low_64bit_lane( + self, + vector2: Self, + f: impl FnMut(usize, u64) -> Option, + ) -> Option; } -/// Load a 128-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "sse2")] -pub unsafe fn loadu128(slice: &[u8], at: usize) -> __m128i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm_loadu_si128(ptr as *const u8 as *const __m128i) +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +mod x86_64_ssse3 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, I8}; + + use super::Vector; + + impl Vector for __m128i { + const BITS: usize = 128; + const BYTES: usize = 16; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m128i { + _mm_set1_epi8(i8::from_bits(byte)) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m128i { + _mm_loadu_si128(data.cast::<__m128i>()) + } + + #[inline(always)] + unsafe fn is_zero(self) -> bool { + let cmp = self.cmpeq(Self::splat(0)); + _mm_movemask_epi8(cmp).to_bits() == 0xFFFF + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m128i { + _mm_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m128i { + _mm_and_si128(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m128i { + _mm_or_si128(self, vector2) + } + + #[inline(always)] + unsafe fn shift_8bit_lane_right(self) -> Self { + // Apparently there is no _mm_srli_epi8, so we emulate it by + // shifting 16-bit integers and masking out the high nybble of each + // 8-bit lane (since that nybble will contain bits from the low + // nybble of the previous lane). + let lomask = Self::splat(0xF); + _mm_srli_epi16(self, BITS).and(lomask) + } + + #[inline(always)] + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self { + _mm_alignr_epi8(self, vector2, 15) + } + + #[inline(always)] + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self { + _mm_alignr_epi8(self, vector2, 14) + } + + #[inline(always)] + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self { + _mm_alignr_epi8(self, vector2, 13) + } + + #[inline(always)] + unsafe fn shuffle_bytes(self, indices: Self) -> Self { + _mm_shuffle_epi8(self, indices) + } + + #[inline(always)] + unsafe fn for_each_64bit_lane( + self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + // We could just use _mm_extract_epi64 here, but that requires + // SSE 4.1. It isn't necessarily a problem to just require SSE 4.1, + // but everything else works with SSSE3 so we stick to that subset. + let lanes: [u64; 2] = core::mem::transmute(self); + if let Some(t) = f(0, lanes[0]) { + return Some(t); + } + if let Some(t) = f(1, lanes[1]) { + return Some(t); + } + None + } + } } -/// Load a 256-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "avx2")] -pub unsafe fn loadu256(slice: &[u8], at: usize) -> __m256i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm256_loadu_si256(ptr as *const u8 as *const __m256i) +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +mod x86_64_avx2 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, I64, I8}; + + use super::{FatVector, Vector}; + + impl Vector for __m256i { + const BITS: usize = 256; + const BYTES: usize = 32; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m256i { + _mm256_set1_epi8(i8::from_bits(byte)) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m256i { + _mm256_loadu_si256(data.cast::<__m256i>()) + } + + #[inline(always)] + unsafe fn is_zero(self) -> bool { + let cmp = self.cmpeq(Self::splat(0)); + _mm256_movemask_epi8(cmp).to_bits() == 0xFFFFFFFF + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m256i { + _mm256_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m256i { + _mm256_and_si256(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m256i { + _mm256_or_si256(self, vector2) + } + + #[inline(always)] + unsafe fn shift_8bit_lane_right(self) -> Self { + let lomask = Self::splat(0xF); + _mm256_srli_epi16(self, BITS).and(lomask) + } + + #[inline(always)] + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self { + // Credit goes to jneem for figuring this out: + // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 + // + // TL;DR avx2's PALIGNR instruction is actually just two 128-bit + // PALIGNR instructions, which is not what we want, so we need to + // do some extra shuffling. + let v = _mm256_permute2x128_si256(vector2, self, 0x21); + _mm256_alignr_epi8(self, v, 15) + } + + #[inline(always)] + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self { + // Credit goes to jneem for figuring this out: + // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 + // + // TL;DR avx2's PALIGNR instruction is actually just two 128-bit + // PALIGNR instructions, which is not what we want, so we need to + // do some extra shuffling. + let v = _mm256_permute2x128_si256(vector2, self, 0x21); + _mm256_alignr_epi8(self, v, 14) + } + + #[inline(always)] + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self { + // Credit goes to jneem for figuring this out: + // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 + // + // TL;DR avx2's PALIGNR instruction is actually just two 128-bit + // PALIGNR instructions, which is not what we want, so we need to + // do some extra shuffling. + let v = _mm256_permute2x128_si256(vector2, self, 0x21); + _mm256_alignr_epi8(self, v, 13) + } + + #[inline(always)] + unsafe fn shuffle_bytes(self, indices: Self) -> Self { + _mm256_shuffle_epi8(self, indices) + } + + #[inline(always)] + unsafe fn for_each_64bit_lane( + self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + // NOTE: At one point in the past, I used transmute to this to + // get a [u64; 4], but it turned out to lead to worse codegen IIRC. + // I've tried it more recently, and it looks like that's no longer + // the case. But since there's no difference, we stick with the + // slightly more complicated but transmute-free version. + let lane = _mm256_extract_epi64(self, 0).to_bits(); + if let Some(t) = f(0, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 1).to_bits(); + if let Some(t) = f(1, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 2).to_bits(); + if let Some(t) = f(2, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 3).to_bits(); + if let Some(t) = f(3, lane) { + return Some(t); + } + None + } + } + + impl FatVector for __m256i { + type Half = __m128i; + + #[inline(always)] + unsafe fn load_half_unaligned(data: *const u8) -> Self { + let half = Self::Half::load_unaligned(data); + _mm256_broadcastsi128_si256(half) + } + + #[inline(always)] + unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self { + _mm256_alignr_epi8(self, vector2, 15) + } + + #[inline(always)] + unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self { + _mm256_alignr_epi8(self, vector2, 14) + } + + #[inline(always)] + unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self { + _mm256_alignr_epi8(self, vector2, 13) + } + + #[inline(always)] + unsafe fn swap_halves(self) -> Self { + _mm256_permute4x64_epi64(self, 0x4E) + } + + #[inline(always)] + unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self { + _mm256_unpacklo_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self { + _mm256_unpackhi_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn for_each_low_64bit_lane( + self, + vector2: Self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + let lane = _mm256_extract_epi64(self, 0).to_bits(); + if let Some(t) = f(0, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 1).to_bits(); + if let Some(t) = f(1, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(vector2, 0).to_bits(); + if let Some(t) = f(2, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(vector2, 1).to_bits(); + if let Some(t) = f(3, lane) { + return Some(t); + } + None + } + } } -/// Returns a 128-bit vector with all bits set to 0. -#[target_feature(enable = "sse2")] -pub unsafe fn zeroes128() -> __m128i { - _mm_set1_epi8(0) +#[cfg(target_arch = "aarch64")] +mod aarch64_neon { + use core::arch::aarch64::*; + + use super::Vector; + + impl Vector for uint8x16_t { + const BITS: usize = 128; + const BYTES: usize = 16; + + #[inline(always)] + unsafe fn splat(byte: u8) -> uint8x16_t { + vdupq_n_u8(byte) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> uint8x16_t { + vld1q_u8(data) + } + + #[inline(always)] + unsafe fn is_zero(self) -> bool { + // Could also use vmaxvq_u8. + // ... I tried that and couldn't observe any meaningful difference + // in benchmarks. + let maxes = vreinterpretq_u64_u8(vpmaxq_u8(self, self)); + vgetq_lane_u64(maxes, 0) == 0 + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> uint8x16_t { + vceqq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> uint8x16_t { + vandq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> uint8x16_t { + vorrq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn shift_8bit_lane_right(self) -> Self { + debug_assert!(BITS <= 7); + vshrq_n_u8(self, BITS) + } + + #[inline(always)] + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self { + vextq_u8(vector2, self, 15) + } + + #[inline(always)] + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self { + vextq_u8(vector2, self, 14) + } + + #[inline(always)] + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self { + vextq_u8(vector2, self, 13) + } + + #[inline(always)] + unsafe fn shuffle_bytes(self, indices: Self) -> Self { + vqtbl1q_u8(self, indices) + } + + #[inline(always)] + unsafe fn for_each_64bit_lane( + self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + let this = vreinterpretq_u64_u8(self); + let lane = vgetq_lane_u64(this, 0); + if let Some(t) = f(0, lane) { + return Some(t); + } + let lane = vgetq_lane_u64(this, 1); + if let Some(t) = f(1, lane) { + return Some(t); + } + None + } + } } -/// Returns a 256-bit vector with all bits set to 0. -#[target_feature(enable = "avx2")] -pub unsafe fn zeroes256() -> __m256i { - _mm256_set1_epi8(0) +#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))] +mod tests_x86_64_ssse3 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, U32}; + + use super::*; + + fn is_runnable() -> bool { + std::is_x86_feature_detected!("ssse3") + } + + #[target_feature(enable = "ssse3")] + unsafe fn load(lanes: [u8; 16]) -> __m128i { + __m128i::load_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "ssse3")] + unsafe fn unload(v: __m128i) -> [u8; 16] { + [ + _mm_extract_epi8(v, 0).to_bits().low_u8(), + _mm_extract_epi8(v, 1).to_bits().low_u8(), + _mm_extract_epi8(v, 2).to_bits().low_u8(), + _mm_extract_epi8(v, 3).to_bits().low_u8(), + _mm_extract_epi8(v, 4).to_bits().low_u8(), + _mm_extract_epi8(v, 5).to_bits().low_u8(), + _mm_extract_epi8(v, 6).to_bits().low_u8(), + _mm_extract_epi8(v, 7).to_bits().low_u8(), + _mm_extract_epi8(v, 8).to_bits().low_u8(), + _mm_extract_epi8(v, 9).to_bits().low_u8(), + _mm_extract_epi8(v, 10).to_bits().low_u8(), + _mm_extract_epi8(v, 11).to_bits().low_u8(), + _mm_extract_epi8(v, 12).to_bits().low_u8(), + _mm_extract_epi8(v, 13).to_bits().low_u8(), + _mm_extract_epi8(v, 14).to_bits().low_u8(), + _mm_extract_epi8(v, 15).to_bits().low_u8(), + ] + } + + #[test] + fn vector_splat() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = __m128i::splat(0xAF); + assert_eq!( + unload(v), + [ + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_is_zero() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(!v.is_zero()); + let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(v.is_zero()); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_cmpeq() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]); + let v2 = + load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]); + assert_eq!( + unload(v1.cmpeq(v2)), + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_and() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.and(v2)), + [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_or() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.or(v2)), + [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_8bit_lane_right() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v.shift_8bit_lane_right::<2>()), + [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_one_byte() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_one_byte(v2)), + [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_two_bytes() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_two_bytes(v2)), + [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_three_bytes() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_three_bytes(v2)), + [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shuffle_bytes() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = + load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]); + assert_eq!( + unload(v1.shuffle_bytes(v2)), + [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_for_each_64bit_lane() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + ]); + let mut lanes = [0u64; 2]; + v.for_each_64bit_lane(|i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],); + } + if !is_runnable() { + return; + } + unsafe { test() } + } } -/// Returns a 128-bit vector with all bits set to 1. -#[target_feature(enable = "sse2")] -pub unsafe fn ones128() -> __m128i { - _mm_set1_epi8(0xFF as u8 as i8) +#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))] +mod tests_x86_64_avx2 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, U32}; + + use super::*; + + fn is_runnable() -> bool { + std::is_x86_feature_detected!("avx2") + } + + #[target_feature(enable = "avx2")] + unsafe fn load(lanes: [u8; 32]) -> __m256i { + __m256i::load_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "avx2")] + unsafe fn load_half(lanes: [u8; 16]) -> __m256i { + __m256i::load_half_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "avx2")] + unsafe fn unload(v: __m256i) -> [u8; 32] { + [ + _mm256_extract_epi8(v, 0).to_bits().low_u8(), + _mm256_extract_epi8(v, 1).to_bits().low_u8(), + _mm256_extract_epi8(v, 2).to_bits().low_u8(), + _mm256_extract_epi8(v, 3).to_bits().low_u8(), + _mm256_extract_epi8(v, 4).to_bits().low_u8(), + _mm256_extract_epi8(v, 5).to_bits().low_u8(), + _mm256_extract_epi8(v, 6).to_bits().low_u8(), + _mm256_extract_epi8(v, 7).to_bits().low_u8(), + _mm256_extract_epi8(v, 8).to_bits().low_u8(), + _mm256_extract_epi8(v, 9).to_bits().low_u8(), + _mm256_extract_epi8(v, 10).to_bits().low_u8(), + _mm256_extract_epi8(v, 11).to_bits().low_u8(), + _mm256_extract_epi8(v, 12).to_bits().low_u8(), + _mm256_extract_epi8(v, 13).to_bits().low_u8(), + _mm256_extract_epi8(v, 14).to_bits().low_u8(), + _mm256_extract_epi8(v, 15).to_bits().low_u8(), + _mm256_extract_epi8(v, 16).to_bits().low_u8(), + _mm256_extract_epi8(v, 17).to_bits().low_u8(), + _mm256_extract_epi8(v, 18).to_bits().low_u8(), + _mm256_extract_epi8(v, 19).to_bits().low_u8(), + _mm256_extract_epi8(v, 20).to_bits().low_u8(), + _mm256_extract_epi8(v, 21).to_bits().low_u8(), + _mm256_extract_epi8(v, 22).to_bits().low_u8(), + _mm256_extract_epi8(v, 23).to_bits().low_u8(), + _mm256_extract_epi8(v, 24).to_bits().low_u8(), + _mm256_extract_epi8(v, 25).to_bits().low_u8(), + _mm256_extract_epi8(v, 26).to_bits().low_u8(), + _mm256_extract_epi8(v, 27).to_bits().low_u8(), + _mm256_extract_epi8(v, 28).to_bits().low_u8(), + _mm256_extract_epi8(v, 29).to_bits().low_u8(), + _mm256_extract_epi8(v, 30).to_bits().low_u8(), + _mm256_extract_epi8(v, 31).to_bits().low_u8(), + ] + } + + #[test] + fn vector_splat() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = __m256i::splat(0xAF); + assert_eq!( + unload(v), + [ + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_is_zero() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert!(!v.is_zero()); + let v = load([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert!(v.is_zero()); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_cmpeq() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 1, + ]); + let v2 = load([ + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, + 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + ]); + assert_eq!( + unload(v1.cmpeq(v2)), + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_and() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + let v2 = load([ + 0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v1.and(v2)), + [ + 0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_or() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + let v2 = load([ + 0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v1.or(v2)), + [ + 0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_8bit_lane_right() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v.shift_8bit_lane_right::<2>()), + [ + 0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_one_byte() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.shift_in_one_byte(v2)), + [ + 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_two_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.shift_in_two_bytes(v2)), + [ + 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_three_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.shift_in_three_bytes(v2)), + [ + 62, 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shuffle_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, + 16, 16, 20, 20, 20, 20, 24, 24, 24, 24, 28, 28, 28, 28, + ]); + assert_eq!( + unload(v1.shuffle_bytes(v2)), + [ + 1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13, 17, + 17, 17, 17, 21, 21, 21, 21, 25, 25, 25, 25, 29, 29, 29, + 29 + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_for_each_64bit_lane() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, + 0x1F, 0x20, + ]); + let mut lanes = [0u64; 4]; + v.for_each_64bit_lane(|i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!( + lanes, + [ + 0x0807060504030201, + 0x100F0E0D0C0B0A09, + 0x1817161514131211, + 0x201F1E1D1C1B1A19 + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_half_shift_in_one_byte() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load_half([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ]); + let v2 = load_half([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.half_shift_in_one_byte(v2)), + [ + 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_half_shift_in_two_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load_half([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ]); + let v2 = load_half([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.half_shift_in_two_bytes(v2)), + [ + 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31, + 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_half_shift_in_three_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load_half([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ]); + let v2 = load_half([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.half_shift_in_three_bytes(v2)), + [ + 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30, + 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_swap_halves() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v.swap_halves()), + [ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_interleave_low_8bit_lanes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.interleave_low_8bit_lanes(v2)), + [ + 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40, + 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55, + 24, 56, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_interleave_high_8bit_lanes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.interleave_high_8bit_lanes(v2)), + [ + 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 16, + 48, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, + 63, 32, 64, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_for_each_low_64bit_lane() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, + 0x1F, 0x20, + ]); + let v2 = load([ + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, + 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, + 0x3F, 0x40, + ]); + let mut lanes = [0u64; 4]; + v1.for_each_low_64bit_lane(v2, |i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!( + lanes, + [ + 0x0807060504030201, + 0x100F0E0D0C0B0A09, + 0x2827262524232221, + 0x302F2E2D2C2B2A29 + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } } -/// Returns a 256-bit vector with all bits set to 1. -#[target_feature(enable = "avx2")] -pub unsafe fn ones256() -> __m256i { - _mm256_set1_epi8(0xFF as u8 as i8) +#[cfg(all(test, target_arch = "aarch64", target_feature = "neon"))] +mod tests_aarch64_neon { + use core::arch::aarch64::*; + + use super::*; + + #[target_feature(enable = "neon")] + unsafe fn load(lanes: [u8; 16]) -> uint8x16_t { + uint8x16_t::load_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "neon")] + unsafe fn unload(v: uint8x16_t) -> [u8; 16] { + [ + vgetq_lane_u8(v, 0), + vgetq_lane_u8(v, 1), + vgetq_lane_u8(v, 2), + vgetq_lane_u8(v, 3), + vgetq_lane_u8(v, 4), + vgetq_lane_u8(v, 5), + vgetq_lane_u8(v, 6), + vgetq_lane_u8(v, 7), + vgetq_lane_u8(v, 8), + vgetq_lane_u8(v, 9), + vgetq_lane_u8(v, 10), + vgetq_lane_u8(v, 11), + vgetq_lane_u8(v, 12), + vgetq_lane_u8(v, 13), + vgetq_lane_u8(v, 14), + vgetq_lane_u8(v, 15), + ] + } + + // Example functions. These don't test the Vector traits, but rather, + // specific NEON instructions. They are basically little experiments I + // wrote to figure out what an instruction does since their descriptions + // are so dense. I decided to keep the experiments around as example tests + // in case there' useful. + + #[test] + fn example_vmaxvq_u8_non_zero() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(vmaxvq_u8(v), 1); + } + unsafe { example() } + } + + #[test] + fn example_vmaxvq_u8_zero() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(vmaxvq_u8(v), 0); + } + unsafe { example() } + } + + #[test] + fn example_vpmaxq_u8_non_zero() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let r = vpmaxq_u8(v, v); + assert_eq!( + unload(r), + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + ); + } + unsafe { example() } + } + + #[test] + fn example_vpmaxq_u8_self() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let r = vpmaxq_u8(v, v); + assert_eq!( + unload(r), + [2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16] + ); + } + unsafe { example() } + } + + #[test] + fn example_vpmaxq_u8_other() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let r = vpmaxq_u8(v1, v2); + assert_eq!( + unload(r), + [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32] + ); + } + unsafe { example() } + } + + // Now we test the actual methods on the Vector trait. + + #[test] + fn vector_splat() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = uint8x16_t::splat(0xAF); + assert_eq!( + unload(v), + [ + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF + ] + ); + } + unsafe { test() } + } + + #[test] + fn vector_is_zero() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(!v.is_zero()); + let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(v.is_zero()); + } + unsafe { test() } + } + + #[test] + fn vector_cmpeq() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]); + let v2 = + load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]); + assert_eq!( + unload(v1.cmpeq(v2)), + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF] + ); + } + unsafe { test() } + } + + #[test] + fn vector_and() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.and(v2)), + [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + unsafe { test() } + } + + #[test] + fn vector_or() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.or(v2)), + [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_8bit_lane_right() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v.shift_8bit_lane_right::<2>()), + [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_one_byte() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_one_byte(v2)), + [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_two_bytes() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_two_bytes(v2)), + [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_three_bytes() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_three_bytes(v2)), + [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + ); + } + unsafe { test() } + } + + #[test] + fn vector_shuffle_bytes() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = + load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]); + assert_eq!( + unload(v1.shuffle_bytes(v2)), + [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13], + ); + } + unsafe { test() } + } + + #[test] + fn vector_for_each_64bit_lane() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + ]); + let mut lanes = [0u64; 2]; + v.for_each_64bit_lane(|i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],); + } + unsafe { test() } + } } diff --git a/vendor/aho-corasick/src/prefilter.rs b/vendor/aho-corasick/src/prefilter.rs deleted file mode 100644 index ef81411..0000000 --- a/vendor/aho-corasick/src/prefilter.rs +++ /dev/null @@ -1,1057 +0,0 @@ -use std::cmp; -use std::fmt; -use std::panic::{RefUnwindSafe, UnwindSafe}; -use std::u8; - -use memchr::{memchr, memchr2, memchr3}; - -use crate::ahocorasick::MatchKind; -use crate::packed; -use crate::Match; - -/// A candidate is the result of running a prefilter on a haystack at a -/// particular position. The result is either no match, a confirmed match or -/// a possible match. -/// -/// When no match is returned, the prefilter is guaranteeing that no possible -/// match can be found in the haystack, and the caller may trust this. That is, -/// all correct prefilters must never report false negatives. -/// -/// In some cases, a prefilter can confirm a match very quickly, in which case, -/// the caller may use this to stop what it's doing and report the match. In -/// this case, prefilter implementations must never report a false positive. -/// In other cases, the prefilter can only report a potential match, in which -/// case the callers must attempt to confirm the match. In this case, prefilter -/// implementations are permitted to return false positives. -#[derive(Clone, Debug)] -pub enum Candidate { - None, - Match(Match), - PossibleStartOfMatch(usize), -} - -impl Candidate { - /// Convert this candidate into an option. This is useful when callers - /// do not distinguish between true positives and false positives (i.e., - /// the caller must always confirm the match in order to update some other - /// state). - pub fn into_option(self) -> Option { - match self { - Candidate::None => None, - Candidate::Match(ref m) => Some(m.start()), - Candidate::PossibleStartOfMatch(start) => Some(start), - } - } -} - -/// A prefilter describes the behavior of fast literal scanners for quickly -/// skipping past bytes in the haystack that we know cannot possibly -/// participate in a match. -pub trait Prefilter: - Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug -{ - /// Returns the next possible match candidate. This may yield false - /// positives, so callers must confirm a match starting at the position - /// returned. This, however, must never produce false negatives. That is, - /// this must, at minimum, return the starting position of the next match - /// in the given haystack after or at the given position. - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate; - - /// A method for cloning a prefilter, to work-around the fact that Clone - /// is not object-safe. - fn clone_prefilter(&self) -> Box; - - /// Returns the approximate total amount of heap used by this prefilter, in - /// units of bytes. - fn heap_bytes(&self) -> usize; - - /// Returns true if and only if this prefilter never returns false - /// positives. This is useful for completely avoiding the automaton - /// when the prefilter can quickly confirm its own matches. - /// - /// By default, this returns true, which is conservative; it is always - /// correct to return `true`. Returning `false` here and reporting a false - /// positive will result in incorrect searches. - fn reports_false_positives(&self) -> bool { - true - } - - /// Returns true if and only if this prefilter may look for a non-starting - /// position of a match. - /// - /// This is useful in a streaming context where prefilters that don't look - /// for a starting position of a match can be quite difficult to deal with. - /// - /// This returns false by default. - fn looks_for_non_start_of_match(&self) -> bool { - false - } -} - -impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P { - #[inline] - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - (**self).next_candidate(state, haystack, at) - } - - fn clone_prefilter(&self) -> Box { - (**self).clone_prefilter() - } - - fn heap_bytes(&self) -> usize { - (**self).heap_bytes() - } - - fn reports_false_positives(&self) -> bool { - (**self).reports_false_positives() - } -} - -/// A convenience object for representing any type that implements Prefilter -/// and is cloneable. -#[derive(Debug)] -pub struct PrefilterObj(Box); - -impl Clone for PrefilterObj { - fn clone(&self) -> Self { - PrefilterObj(self.0.clone_prefilter()) - } -} - -impl PrefilterObj { - /// Create a new prefilter object. - pub fn new(t: T) -> PrefilterObj { - PrefilterObj(Box::new(t)) - } - - /// Return the underlying prefilter trait object. - pub fn as_ref(&self) -> &dyn Prefilter { - &*self.0 - } -} - -/// PrefilterState tracks state associated with the effectiveness of a -/// prefilter. It is used to track how many bytes, on average, are skipped by -/// the prefilter. If this average dips below a certain threshold over time, -/// then the state renders the prefilter inert and stops using it. -/// -/// A prefilter state should be created for each search. (Where creating an -/// iterator via, e.g., `find_iter`, is treated as a single search.) -#[derive(Clone, Debug)] -pub struct PrefilterState { - /// The number of skips that has been executed. - skips: usize, - /// The total number of bytes that have been skipped. - skipped: usize, - /// The maximum length of a match. This is used to help determine how many - /// bytes on average should be skipped in order for a prefilter to be - /// effective. - max_match_len: usize, - /// Once this heuristic has been deemed permanently ineffective, it will be - /// inert throughout the rest of its lifetime. This serves as a cheap way - /// to check inertness. - inert: bool, - /// The last (absolute) position at which a prefilter scanned to. - /// Prefilters can use this position to determine whether to re-scan or - /// not. - /// - /// Unlike other things that impact effectiveness, this is a fleeting - /// condition. That is, a prefilter can be considered ineffective if it is - /// at a position before `last_scan_at`, but can become effective again - /// once the search moves past `last_scan_at`. - /// - /// The utility of this is to both avoid additional overhead from calling - /// the prefilter and to avoid quadratic behavior. This ensures that a - /// prefilter will scan any particular byte at most once. (Note that some - /// prefilters, like the start-byte prefilter, do not need to use this - /// field at all, since it only looks for starting bytes.) - last_scan_at: usize, -} - -impl PrefilterState { - /// The minimum number of skip attempts to try before considering whether - /// a prefilter is effective or not. - const MIN_SKIPS: usize = 40; - - /// The minimum amount of bytes that skipping must average, expressed as a - /// factor of the multiple of the length of a possible match. - /// - /// That is, after MIN_SKIPS have occurred, if the average number of bytes - /// skipped ever falls below MIN_AVG_FACTOR * max-match-length, then the - /// prefilter outed to be rendered inert. - const MIN_AVG_FACTOR: usize = 2; - - /// Create a fresh prefilter state. - pub fn new(max_match_len: usize) -> PrefilterState { - PrefilterState { - skips: 0, - skipped: 0, - max_match_len, - inert: false, - last_scan_at: 0, - } - } - - /// Create a prefilter state that always disables the prefilter. - pub fn disabled() -> PrefilterState { - PrefilterState { - skips: 0, - skipped: 0, - max_match_len: 0, - inert: true, - last_scan_at: 0, - } - } - - /// Update this state with the number of bytes skipped on the last - /// invocation of the prefilter. - #[inline] - fn update_skipped_bytes(&mut self, skipped: usize) { - self.skips += 1; - self.skipped += skipped; - } - - /// Updates the position at which the last scan stopped. This may be - /// greater than the position of the last candidate reported. For example, - /// searching for the "rare" byte `z` in `abczdef` for the pattern `abcz` - /// will report a candidate at position `0`, but the end of its last scan - /// will be at position `3`. - /// - /// This position factors into the effectiveness of this prefilter. If the - /// current position is less than the last position at which a scan ended, - /// then the prefilter should not be re-run until the search moves past - /// that position. - #[inline] - fn update_at(&mut self, at: usize) { - if at > self.last_scan_at { - self.last_scan_at = at; - } - } - - /// Return true if and only if this state indicates that a prefilter is - /// still effective. - /// - /// The given pos should correspond to the current starting position of the - /// search. - #[inline] - pub fn is_effective(&mut self, at: usize) -> bool { - if self.inert { - return false; - } - if at < self.last_scan_at { - return false; - } - if self.skips < PrefilterState::MIN_SKIPS { - return true; - } - - let min_avg = PrefilterState::MIN_AVG_FACTOR * self.max_match_len; - if self.skipped >= min_avg * self.skips { - return true; - } - - // We're inert. - self.inert = true; - false - } -} - -/// A builder for constructing the best possible prefilter. When constructed, -/// this builder will heuristically select the best prefilter it can build, -/// if any, and discard the rest. -#[derive(Debug)] -pub struct Builder { - count: usize, - ascii_case_insensitive: bool, - start_bytes: StartBytesBuilder, - rare_bytes: RareBytesBuilder, - packed: Option, -} - -impl Builder { - /// Create a new builder for constructing the best possible prefilter. - pub fn new(kind: MatchKind) -> Builder { - let pbuilder = kind - .as_packed() - .map(|kind| packed::Config::new().match_kind(kind).builder()); - Builder { - count: 0, - ascii_case_insensitive: false, - start_bytes: StartBytesBuilder::new(), - rare_bytes: RareBytesBuilder::new(), - packed: pbuilder, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - pub fn ascii_case_insensitive(mut self, yes: bool) -> Builder { - self.ascii_case_insensitive = yes; - self.start_bytes = self.start_bytes.ascii_case_insensitive(yes); - self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes); - self - } - - /// Return a prefilter suitable for quickly finding potential matches. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - pub fn build(&self) -> Option { - // match (self.start_bytes.build(), self.rare_bytes.build()) { - match (self.start_bytes.build(), self.rare_bytes.build()) { - // If we could build both start and rare prefilters, then there are - // a few cases in which we'd want to use the start-byte prefilter - // over the rare-byte prefilter, since the former has lower - // overhead. - (prestart @ Some(_), prerare @ Some(_)) => { - // If the start-byte prefilter can scan for a smaller number - // of bytes than the rare-byte prefilter, then it's probably - // faster. - let has_fewer_bytes = - self.start_bytes.count < self.rare_bytes.count; - // Otherwise, if the combined frequency rank of the detected - // bytes in the start-byte prefilter is "close" to the combined - // frequency rank of the rare-byte prefilter, then we pick - // the start-byte prefilter even if the rare-byte prefilter - // heuristically searches for rare bytes. This is because the - // rare-byte prefilter has higher constant costs, so we tend to - // prefer the start-byte prefilter when we can. - let has_rarer_bytes = - self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50; - if has_fewer_bytes || has_rarer_bytes { - prestart - } else { - prerare - } - } - (prestart @ Some(_), None) => prestart, - (None, prerare @ Some(_)) => prerare, - (None, None) if self.ascii_case_insensitive => None, - (None, None) => self - .packed - .as_ref() - .and_then(|b| b.build()) - .map(|s| PrefilterObj::new(Packed(s))), - } - } - - /// Add a literal string to this prefilter builder. - pub fn add(&mut self, bytes: &[u8]) { - self.count += 1; - self.start_bytes.add(bytes); - self.rare_bytes.add(bytes); - if let Some(ref mut pbuilder) = self.packed { - pbuilder.add(bytes); - } - } -} - -/// A type that wraps a packed searcher and implements the `Prefilter` -/// interface. -#[derive(Clone, Debug)] -struct Packed(packed::Searcher); - -impl Prefilter for Packed { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - self.0.find_at(haystack, at).map_or(Candidate::None, Candidate::Match) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - self.0.heap_bytes() - } - - fn reports_false_positives(&self) -> bool { - false - } -} - -/// A builder for constructing a rare byte prefilter. -/// -/// A rare byte prefilter attempts to pick out a small set of rare bytes that -/// occurr in the patterns, and then quickly scan to matches of those rare -/// bytes. -#[derive(Clone, Debug)] -struct RareBytesBuilder { - /// Whether this prefilter should account for ASCII case insensitivity or - /// not. - ascii_case_insensitive: bool, - /// A set of rare bytes, indexed by byte value. - rare_set: ByteSet, - /// A set of byte offsets associated with bytes in a pattern. An entry - /// corresponds to a particular bytes (its index) and is only non-zero if - /// the byte occurred at an offset greater than 0 in at least one pattern. - /// - /// If a byte's offset is not representable in 8 bits, then the rare bytes - /// prefilter becomes inert. - byte_offsets: RareByteOffsets, - /// Whether this is available as a prefilter or not. This can be set to - /// false during construction if a condition is seen that invalidates the - /// use of the rare-byte prefilter. - available: bool, - /// The number of bytes set to an active value in `byte_offsets`. - count: usize, - /// The sum of frequency ranks for the rare bytes detected. This is - /// intended to give a heuristic notion of how rare the bytes are. - rank_sum: u16, -} - -/// A set of bytes. -#[derive(Clone, Copy)] -struct ByteSet([bool; 256]); - -impl ByteSet { - fn empty() -> ByteSet { - ByteSet([false; 256]) - } - - fn insert(&mut self, b: u8) -> bool { - let new = !self.contains(b); - self.0[b as usize] = true; - new - } - - fn contains(&self, b: u8) -> bool { - self.0[b as usize] - } -} - -impl fmt::Debug for ByteSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut bytes = vec![]; - for b in 0..=255 { - if self.contains(b) { - bytes.push(b); - } - } - f.debug_struct("ByteSet").field("set", &bytes).finish() - } -} - -/// A set of byte offsets, keyed by byte. -#[derive(Clone, Copy)] -struct RareByteOffsets { - /// Each entry corresponds to the maximum offset of the corresponding - /// byte across all patterns seen. - set: [RareByteOffset; 256], -} - -impl RareByteOffsets { - /// Create a new empty set of rare byte offsets. - pub fn empty() -> RareByteOffsets { - RareByteOffsets { set: [RareByteOffset::default(); 256] } - } - - /// Add the given offset for the given byte to this set. If the offset is - /// greater than the existing offset, then it overwrites the previous - /// value and returns false. If there is no previous value set, then this - /// sets it and returns true. - pub fn set(&mut self, byte: u8, off: RareByteOffset) { - self.set[byte as usize].max = - cmp::max(self.set[byte as usize].max, off.max); - } -} - -impl fmt::Debug for RareByteOffsets { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut offsets = vec![]; - for off in self.set.iter() { - if off.max > 0 { - offsets.push(off); - } - } - f.debug_struct("RareByteOffsets").field("set", &offsets).finish() - } -} - -/// Offsets associated with an occurrence of a "rare" byte in any of the -/// patterns used to construct a single Aho-Corasick automaton. -#[derive(Clone, Copy, Debug)] -struct RareByteOffset { - /// The maximum offset at which a particular byte occurs from the start - /// of any pattern. This is used as a shift amount. That is, when an - /// occurrence of this byte is found, the candidate position reported by - /// the prefilter is `position_of_byte - max`, such that the automaton - /// will begin its search at a position that is guaranteed to observe a - /// match. - /// - /// To avoid accidentally quadratic behavior, a prefilter is considered - /// ineffective when it is asked to start scanning from a position that it - /// has already scanned past. - /// - /// Using a `u8` here means that if we ever see a pattern that's longer - /// than 255 bytes, then the entire rare byte prefilter is disabled. - max: u8, -} - -impl Default for RareByteOffset { - fn default() -> RareByteOffset { - RareByteOffset { max: 0 } - } -} - -impl RareByteOffset { - /// Create a new rare byte offset. If the given offset is too big, then - /// None is returned. In that case, callers should render the rare bytes - /// prefilter inert. - fn new(max: usize) -> Option { - if max > u8::MAX as usize { - None - } else { - Some(RareByteOffset { max: max as u8 }) - } - } -} - -impl RareBytesBuilder { - /// Create a new builder for constructing a rare byte prefilter. - fn new() -> RareBytesBuilder { - RareBytesBuilder { - ascii_case_insensitive: false, - rare_set: ByteSet::empty(), - byte_offsets: RareByteOffsets::empty(), - available: true, - count: 0, - rank_sum: 0, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder { - self.ascii_case_insensitive = yes; - self - } - - /// Build the rare bytes prefilter. - /// - /// If there are more than 3 distinct starting bytes, or if heuristics - /// otherwise determine that this prefilter should not be used, then `None` - /// is returned. - fn build(&self) -> Option { - if !self.available || self.count > 3 { - return None; - } - let (mut bytes, mut len) = ([0; 3], 0); - for b in 0..=255 { - if self.rare_set.contains(b) { - bytes[len] = b as u8; - len += 1; - } - } - match len { - 0 => None, - 1 => Some(PrefilterObj::new(RareBytesOne { - byte1: bytes[0], - offset: self.byte_offsets.set[bytes[0] as usize], - })), - 2 => Some(PrefilterObj::new(RareBytesTwo { - offsets: self.byte_offsets, - byte1: bytes[0], - byte2: bytes[1], - })), - 3 => Some(PrefilterObj::new(RareBytesThree { - offsets: self.byte_offsets, - byte1: bytes[0], - byte2: bytes[1], - byte3: bytes[2], - })), - _ => unreachable!(), - } - } - - /// Add a byte string to this builder. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - fn add(&mut self, bytes: &[u8]) { - // If we've already given up, then do nothing. - if !self.available { - return; - } - // If we've already blown our budget, then don't waste time looking - // for more rare bytes. - if self.count > 3 { - self.available = false; - return; - } - // If the pattern is too long, then our offset table is bunk, so - // give up. - if bytes.len() >= 256 { - self.available = false; - return; - } - let mut rarest = match bytes.get(0) { - None => return, - Some(&b) => (b, freq_rank(b)), - }; - // The idea here is to look for the rarest byte in each pattern, and - // add that to our set. As a special exception, if we see a byte that - // we've already added, then we immediately stop and choose that byte, - // even if there's another rare byte in the pattern. This helps us - // apply the rare byte optimization in more cases by attempting to pick - // bytes that are in common between patterns. So for example, if we - // were searching for `Sherlock` and `lockjaw`, then this would pick - // `k` for both patterns, resulting in the use of `memchr` instead of - // `memchr2` for `k` and `j`. - let mut found = false; - for (pos, &b) in bytes.iter().enumerate() { - self.set_offset(pos, b); - if found { - continue; - } - if self.rare_set.contains(b) { - found = true; - continue; - } - let rank = freq_rank(b); - if rank < rarest.1 { - rarest = (b, rank); - } - } - if !found { - self.add_rare_byte(rarest.0); - } - } - - fn set_offset(&mut self, pos: usize, byte: u8) { - // This unwrap is OK because pos is never bigger than our max. - let offset = RareByteOffset::new(pos).unwrap(); - self.byte_offsets.set(byte, offset); - if self.ascii_case_insensitive { - self.byte_offsets.set(opposite_ascii_case(byte), offset); - } - } - - fn add_rare_byte(&mut self, byte: u8) { - self.add_one_rare_byte(byte); - if self.ascii_case_insensitive { - self.add_one_rare_byte(opposite_ascii_case(byte)); - } - } - - fn add_one_rare_byte(&mut self, byte: u8) { - if self.rare_set.insert(byte) { - self.count += 1; - self.rank_sum += freq_rank(byte) as u16; - } - } -} - -/// A prefilter for scanning for a single "rare" byte. -#[derive(Clone, Debug)] -struct RareBytesOne { - byte1: u8, - offset: RareByteOffset, -} - -impl Prefilter for RareBytesOne { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr(self.byte1, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.last_scan_at = pos; - cmp::max(at, pos.saturating_sub(self.offset.max as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: It should be possible to use a rare byte prefilter in a - // streaming context. The main problem is that we usually assume that - // if a prefilter has scanned some text and not found anything, then no - // match *starts* in that text. This doesn't matter in non-streaming - // contexts, but in a streaming context, if we're looking for a byte - // that doesn't start at the beginning of a match and don't find it, - // then it's still possible for a match to start at the end of the - // current buffer content. In order to fix this, the streaming searcher - // would need to become aware of prefilters that do this and use the - // appropriate offset in various places. It is quite a delicate change - // and probably shouldn't be attempted until streaming search has a - // better testing strategy. In particular, we'd really like to be able - // to vary the buffer size to force strange cases that occur at the - // edge of the buffer. If we make the buffer size minimal, then these - // cases occur more frequently and easier. - // - // This is also a bummer because this means that if the prefilter - // builder chose a rare byte prefilter, then a streaming search won't - // use any prefilter at all because the builder doesn't know how it's - // going to be used. Assuming we don't make streaming search aware of - // these special types of prefilters as described above, we could fix - // this by building a "backup" prefilter that could be used when the - // rare byte prefilter could not. But that's a bandaide. Sigh. - true - } -} - -/// A prefilter for scanning for two "rare" bytes. -#[derive(Clone, Debug)] -struct RareBytesTwo { - offsets: RareByteOffsets, - byte1: u8, - byte2: u8, -} - -impl Prefilter for RareBytesTwo { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr2(self.byte1, self.byte2, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.update_at(pos); - let offset = self.offsets.set[haystack[pos] as usize].max; - cmp::max(at, pos.saturating_sub(offset as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: See Prefilter impl for RareBytesOne. - true - } -} - -/// A prefilter for scanning for three "rare" bytes. -#[derive(Clone, Debug)] -struct RareBytesThree { - offsets: RareByteOffsets, - byte1: u8, - byte2: u8, - byte3: u8, -} - -impl Prefilter for RareBytesThree { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.update_at(pos); - let offset = self.offsets.set[haystack[pos] as usize].max; - cmp::max(at, pos.saturating_sub(offset as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: See Prefilter impl for RareBytesOne. - true - } -} - -/// A builder for constructing a starting byte prefilter. -/// -/// A starting byte prefilter is a simplistic prefilter that looks for possible -/// matches by reporting all positions corresponding to a particular byte. This -/// generally only takes affect when there are at most 3 distinct possible -/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two -/// distinct starting bytes (`f` and `b`), and this prefilter returns all -/// occurrences of either `f` or `b`. -/// -/// In some cases, a heuristic frequency analysis may determine that it would -/// be better not to use this prefilter even when there are 3 or fewer distinct -/// starting bytes. -#[derive(Clone, Debug)] -struct StartBytesBuilder { - /// Whether this prefilter should account for ASCII case insensitivity or - /// not. - ascii_case_insensitive: bool, - /// The set of starting bytes observed. - byteset: Vec, - /// The number of bytes set to true in `byteset`. - count: usize, - /// The sum of frequency ranks for the rare bytes detected. This is - /// intended to give a heuristic notion of how rare the bytes are. - rank_sum: u16, -} - -impl StartBytesBuilder { - /// Create a new builder for constructing a start byte prefilter. - fn new() -> StartBytesBuilder { - StartBytesBuilder { - ascii_case_insensitive: false, - byteset: vec![false; 256], - count: 0, - rank_sum: 0, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder { - self.ascii_case_insensitive = yes; - self - } - - /// Build the starting bytes prefilter. - /// - /// If there are more than 3 distinct starting bytes, or if heuristics - /// otherwise determine that this prefilter should not be used, then `None` - /// is returned. - fn build(&self) -> Option { - if self.count > 3 { - return None; - } - let (mut bytes, mut len) = ([0; 3], 0); - for b in 0..256 { - if !self.byteset[b] { - continue; - } - // We don't handle non-ASCII bytes for now. Getting non-ASCII - // bytes right is trickier, since we generally don't want to put - // a leading UTF-8 code unit into a prefilter that isn't ASCII, - // since they can frequently. Instead, it would be better to use a - // continuation byte, but this requires more sophisticated analysis - // of the automaton and a richer prefilter API. - if b > 0x7F { - return None; - } - bytes[len] = b as u8; - len += 1; - } - match len { - 0 => None, - 1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })), - 2 => Some(PrefilterObj::new(StartBytesTwo { - byte1: bytes[0], - byte2: bytes[1], - })), - 3 => Some(PrefilterObj::new(StartBytesThree { - byte1: bytes[0], - byte2: bytes[1], - byte3: bytes[2], - })), - _ => unreachable!(), - } - } - - /// Add a byte string to this builder. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - fn add(&mut self, bytes: &[u8]) { - if self.count > 3 { - return; - } - if let Some(&byte) = bytes.get(0) { - self.add_one_byte(byte); - if self.ascii_case_insensitive { - self.add_one_byte(opposite_ascii_case(byte)); - } - } - } - - fn add_one_byte(&mut self, byte: u8) { - if !self.byteset[byte as usize] { - self.byteset[byte as usize] = true; - self.count += 1; - self.rank_sum += freq_rank(byte) as u16; - } - } -} - -/// A prefilter for scanning for a single starting byte. -#[derive(Clone, Debug)] -struct StartBytesOne { - byte1: u8, -} - -impl Prefilter for StartBytesOne { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr(self.byte1, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// A prefilter for scanning for two starting bytes. -#[derive(Clone, Debug)] -struct StartBytesTwo { - byte1: u8, - byte2: u8, -} - -impl Prefilter for StartBytesTwo { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr2(self.byte1, self.byte2, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// A prefilter for scanning for three starting bytes. -#[derive(Clone, Debug)] -struct StartBytesThree { - byte1: u8, - byte2: u8, - byte3: u8, -} - -impl Prefilter for StartBytesThree { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// Return the next candidate reported by the given prefilter while -/// simultaneously updating the given prestate. -/// -/// The caller is responsible for checking the prestate before deciding whether -/// to initiate a search. -#[inline] -pub fn next( - prestate: &mut PrefilterState, - prefilter: P, - haystack: &[u8], - at: usize, -) -> Candidate { - let cand = prefilter.next_candidate(prestate, haystack, at); - match cand { - Candidate::None => { - prestate.update_skipped_bytes(haystack.len() - at); - } - Candidate::Match(ref m) => { - prestate.update_skipped_bytes(m.start() - at); - } - Candidate::PossibleStartOfMatch(i) => { - prestate.update_skipped_bytes(i - at); - } - } - cand -} - -/// If the given byte is an ASCII letter, then return it in the opposite case. -/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns -/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned. -pub fn opposite_ascii_case(b: u8) -> u8 { - if b'A' <= b && b <= b'Z' { - b.to_ascii_lowercase() - } else if b'a' <= b && b <= b'z' { - b.to_ascii_uppercase() - } else { - b - } -} - -/// Return the frequency rank of the given byte. The higher the rank, the more -/// common the byte (heuristically speaking). -fn freq_rank(b: u8) -> u8 { - use crate::byte_frequencies::BYTE_FREQUENCIES; - BYTE_FREQUENCIES[b as usize] -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn scratch() { - let mut b = Builder::new(MatchKind::LeftmostFirst); - b.add(b"Sherlock"); - b.add(b"locjaw"); - // b.add(b"Sherlock"); - // b.add(b"Holmes"); - // b.add(b"Watson"); - // b.add("Шерлок Холмс".as_bytes()); - // b.add("Джон Уотсон".as_bytes()); - - let s = b.build().unwrap(); - println!("{:?}", s); - } -} diff --git a/vendor/aho-corasick/src/state_id.rs b/vendor/aho-corasick/src/state_id.rs deleted file mode 100644 index 8973806..0000000 --- a/vendor/aho-corasick/src/state_id.rs +++ /dev/null @@ -1,192 +0,0 @@ -use std::fmt::Debug; -use std::hash::Hash; - -use crate::error::{Error, Result}; - -// NOTE: Most of this code was copied from regex-automata, but without the -// (de)serialization specific stuff. - -/// Check that the premultiplication of the given state identifier can -/// fit into the representation indicated by `S`. If it cannot, or if it -/// overflows `usize` itself, then an error is returned. -pub fn premultiply_overflow_error( - last_state: S, - alphabet_len: usize, -) -> Result<()> { - let requested = match last_state.to_usize().checked_mul(alphabet_len) { - Some(requested) => requested, - None => return Err(Error::premultiply_overflow(0, 0)), - }; - if requested > S::max_id() { - return Err(Error::premultiply_overflow(S::max_id(), requested)); - } - Ok(()) -} - -/// Convert the given `usize` to the chosen state identifier -/// representation. If the given value cannot fit in the chosen -/// representation, then an error is returned. -pub fn usize_to_state_id(value: usize) -> Result { - if value > S::max_id() { - Err(Error::state_id_overflow(S::max_id())) - } else { - Ok(S::from_usize(value)) - } -} - -/// Return the unique identifier for an automaton's fail state in the chosen -/// representation indicated by `S`. -pub fn fail_id() -> S { - S::from_usize(0) -} - -/// Return the unique identifier for an automaton's fail state in the chosen -/// representation indicated by `S`. -pub fn dead_id() -> S { - S::from_usize(1) -} - -mod private { - /// Sealed stops crates other than aho-corasick from implementing any - /// traits that use it. - pub trait Sealed {} - impl Sealed for u8 {} - impl Sealed for u16 {} - impl Sealed for u32 {} - impl Sealed for u64 {} - impl Sealed for usize {} -} - -/// A trait describing the representation of an automaton's state identifier. -/// -/// The purpose of this trait is to safely express both the possible state -/// identifier representations that can be used in an automaton and to convert -/// between state identifier representations and types that can be used to -/// efficiently index memory (such as `usize`). -/// -/// In general, one should not need to implement this trait explicitly. Indeed, -/// for now, this trait is sealed such that it cannot be implemented by any -/// other type. In particular, this crate provides implementations for `u8`, -/// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for -/// targets that can represent all corresponding values in a `usize`.) -pub trait StateID: - private::Sealed - + Clone - + Copy - + Debug - + Eq - + Hash - + PartialEq - + PartialOrd - + Ord -{ - /// Convert from a `usize` to this implementation's representation. - /// - /// Implementors may assume that `n <= Self::max_id`. That is, implementors - /// do not need to check whether `n` can fit inside this implementation's - /// representation. - fn from_usize(n: usize) -> Self; - - /// Convert this implementation's representation to a `usize`. - /// - /// Implementors must not return a `usize` value greater than - /// `Self::max_id` and must not permit overflow when converting between the - /// implementor's representation and `usize`. In general, the preferred - /// way for implementors to achieve this is to simply not provide - /// implementations of `StateID` that cannot fit into the target platform's - /// `usize`. - fn to_usize(self) -> usize; - - /// Return the maximum state identifier supported by this representation. - /// - /// Implementors must return a correct bound. Doing otherwise may result - /// in unspecified behavior (but will not violate memory safety). - fn max_id() -> usize; -} - -impl StateID for usize { - #[inline] - fn from_usize(n: usize) -> usize { - n - } - - #[inline] - fn to_usize(self) -> usize { - self - } - - #[inline] - fn max_id() -> usize { - ::std::usize::MAX - } -} - -impl StateID for u8 { - #[inline] - fn from_usize(n: usize) -> u8 { - n as u8 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u8::MAX as usize - } -} - -impl StateID for u16 { - #[inline] - fn from_usize(n: usize) -> u16 { - n as u16 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u16::MAX as usize - } -} - -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -impl StateID for u32 { - #[inline] - fn from_usize(n: usize) -> u32 { - n as u32 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u32::MAX as usize - } -} - -#[cfg(target_pointer_width = "64")] -impl StateID for u64 { - #[inline] - fn from_usize(n: usize) -> u64 { - n as u64 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u64::MAX as usize - } -} diff --git a/vendor/aho-corasick/src/tests.rs b/vendor/aho-corasick/src/tests.rs index 20cd3d1..a5276f8 100644 --- a/vendor/aho-corasick/src/tests.rs +++ b/vendor/aho-corasick/src/tests.rs @@ -1,8 +1,9 @@ -use std::collections::HashMap; -use std::io; -use std::usize; +use std::{collections::HashMap, format, string::String, vec::Vec}; -use crate::{AhoCorasickBuilder, Match, MatchKind}; +use crate::{ + AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, Anchored, Input, Match, + MatchKind, StartKind, +}; /// A description of a single test against an Aho-Corasick automaton. /// @@ -54,9 +55,13 @@ const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection = const AC_STANDARD_OVERLAPPING: TestCollection = &[BASICS, OVERLAPPING, REGRESSION]; +/* +Iterators of anchored overlapping searches were removed from the API in +after 0.7, but we leave the tests commented out for posterity. /// Tests for Aho-Corasick's anchored standard overlapping match semantics. const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection = &[ANCHORED_BASICS, ANCHORED_OVERLAPPING]; +*/ /// Tests for Aho-Corasick's leftmost-first match semantics. const AC_LEFTMOST_FIRST: TestCollection = @@ -90,7 +95,8 @@ const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[ /// should produce the same answer. const BASICS: &'static [SearchTest] = &[ t!(basic000, &[], "", &[]), - t!(basic001, &["a"], "", &[]), + t!(basic001, &[""], "a", &[(0, 0, 0), (0, 1, 1)]), + t!(basic002, &["a"], "", &[]), t!(basic010, &["a"], "a", &[(0, 0, 1)]), t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]), t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]), @@ -151,15 +157,18 @@ const BASICS: &'static [SearchTest] = &[ /// produce the same answer. const ANCHORED_BASICS: &'static [SearchTest] = &[ t!(abasic000, &[], "", &[]), + t!(abasic001, &[], "a", &[]), + t!(abasic002, &[], "abc", &[]), t!(abasic010, &[""], "", &[(0, 0, 0)]), - t!(abasic020, &[""], "a", &[(0, 0, 0)]), - t!(abasic030, &[""], "abc", &[(0, 0, 0)]), + t!(abasic020, &[""], "a", &[(0, 0, 0), (0, 1, 1)]), + t!(abasic030, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), t!(abasic100, &["a"], "a", &[(0, 0, 1)]), - t!(abasic110, &["a"], "aa", &[(0, 0, 1)]), - t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1)]), - t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1)]), + t!(abasic110, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]), + t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1), (1, 1, 2)]), + t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1), (0, 1, 2)]), t!(abasic140, &["foo", "foofoo"], "foo", &[(0, 0, 3)]), t!(abasic150, &["foofoo", "foo"], "foo", &[(1, 0, 3)]), + t!(abasic200, &["foo"], "foofoo foo", &[(0, 0, 3), (0, 3, 6)]), ]; /// Tests for non-overlapping standard match semantics. @@ -193,13 +202,13 @@ const STANDARD_ANCHORED: &'static [SearchTest] = &[ t!(astandard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]), t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]), t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]), - t!(astandard040, &["a", ""], "a", &[(1, 0, 0)]), + t!(astandard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]), t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), - t!(astandard410, &["", "a"], "a", &[(0, 0, 0)]), - t!(astandard420, &["", "a"], "aa", &[(0, 0, 0)]), - t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0)]), - t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0)]), - t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0)]), + t!(astandard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), + t!(astandard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2)]), + t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]), + t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1)]), + t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), ]; /// Tests for non-overlapping leftmost match semantics. These should pass for @@ -208,7 +217,8 @@ const STANDARD_ANCHORED: &'static [SearchTest] = &[ /// first when constructing the automaton should always be the same. const LEFTMOST: &'static [SearchTest] = &[ t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftmost010, &["a", ""], "a", &[(0, 0, 1), (1, 1, 1)]), + t!(leftmost010, &["a", ""], "a", &[(0, 0, 1)]), + t!(leftmost011, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]), t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]), t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), @@ -260,12 +270,14 @@ const LEFTMOST: &'static [SearchTest] = &[ /// Like LEFTMOST, but for anchored searches. const ANCHORED_LEFTMOST: &'static [SearchTest] = &[ t!(aleftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), + // We shouldn't allow an empty match immediately following a match, right? t!(aleftmost010, &["a", ""], "a", &[(0, 0, 1)]), - t!(aleftmost020, &["", ""], "a", &[(0, 0, 0)]), - t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1)]), - t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1)]), + t!(aleftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]), + t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), + t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]), t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]), + t!(aleftmost301, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]), t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]), t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]), @@ -304,8 +316,8 @@ const ANCHORED_LEFTMOST: &'static [SearchTest] = &[ t!( aleftmost410, &["z", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8)] + "abcdefghzyz", + &[(3, 0, 8), (0, 8, 9)] ), ]; @@ -316,8 +328,10 @@ const LEFTMOST_FIRST: &'static [SearchTest] = &[ t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), - t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), + t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]), + t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), + t!(leftfirst014, &["a", ""], "a", &[(0, 0, 1)]), + t!(leftfirst015, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]), t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]), @@ -343,10 +357,10 @@ const LEFTMOST_FIRST: &'static [SearchTest] = &[ /// Like LEFTMOST_FIRST, but for anchored searches. const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[ t!(aleftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0)]), - t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0)]), + t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), + t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]), t!(aleftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]), - t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0)]), + t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), t!(aleftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), t!(aleftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]), @@ -370,11 +384,12 @@ const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[ const LEFTMOST_LONGEST: &'static [SearchTest] = &[ t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), - t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]), - t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]), - t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), - t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]), - t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]), + t!(leftlong020, &["", "a"], "a", &[(1, 0, 1)]), + t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1)]), + t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1)]), + t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1)]), + t!(leftlong024, &["", "a"], "ab", &[(1, 0, 1), (0, 2, 2)]), + t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), @@ -401,7 +416,7 @@ const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[ t!(aleftlong021, &["", "a", ""], "a", &[(1, 0, 1)]), t!(aleftlong022, &["a", "", ""], "a", &[(0, 0, 1)]), t!(aleftlong023, &["", "", "a"], "a", &[(2, 0, 1)]), - t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1)]), + t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), t!(aleftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), t!(aleftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), t!(aleftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), @@ -444,10 +459,15 @@ const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[ t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), t!(anover030, &["abc", "bc"], "zazabcz", &[]), - t!(anover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]), + t!( + anover100, + &["ab", "ba"], + "abababa", + &[(0, 0, 2), (0, 2, 4), (0, 4, 6)] + ), t!(anover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]), - t!(anover300, &["", ""], "", &[(0, 0, 0),]), - t!(anover310, &["", ""], "a", &[(0, 0, 0)]), + t!(anover300, &["", ""], "", &[(0, 0, 0)]), + t!(anover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]), ]; /// Tests for overlapping match semantics. @@ -532,6 +552,9 @@ const OVERLAPPING: &'static [SearchTest] = &[ ), ]; +/* +Iterators of anchored overlapping searches were removed from the API in +after 0.7, but we leave the tests commented out for posterity. /// Like OVERLAPPING, but for anchored searches. const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[ t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), @@ -550,6 +573,7 @@ const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[ t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]), t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]), ]; +*/ /// Tests for ASCII case insensitivity. /// @@ -627,15 +651,33 @@ const REGRESSION: &'static [SearchTest] = &[ // each one turns a different knob on AhoCorasickBuilder. macro_rules! testconfig { + (anchored, $name:ident, $collection:expr, $kind:ident, $with:expr) => { + #[test] + fn $name() { + run_search_tests($collection, |test| { + let mut builder = AhoCorasick::builder(); + $with(&mut builder); + let input = Input::new(test.haystack).anchored(Anchored::Yes); + builder + .match_kind(MatchKind::$kind) + .build(test.patterns) + .unwrap() + .try_find_iter(input) + .unwrap() + .collect() + }); + } + }; (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => { #[test] fn $name() { run_search_tests($collection, |test| { - let mut builder = AhoCorasickBuilder::new(); + let mut builder = AhoCorasick::builder(); $with(&mut builder); builder .match_kind(MatchKind::$kind) .build(test.patterns) + .unwrap() .find_overlapping_iter(test.haystack) .collect() }); @@ -644,14 +686,17 @@ macro_rules! testconfig { (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => { #[test] fn $name() { - run_search_tests($collection, |test| { - let buf = - io::BufReader::with_capacity(1, test.haystack.as_bytes()); - let mut builder = AhoCorasickBuilder::new(); + run_stream_search_tests($collection, |test| { + let buf = std::io::BufReader::with_capacity( + 1, + test.haystack.as_bytes(), + ); + let mut builder = AhoCorasick::builder(); $with(&mut builder); builder .match_kind(MatchKind::$kind) .build(test.patterns) + .unwrap() .stream_find_iter(buf) .map(|result| result.unwrap()) .collect() @@ -662,11 +707,12 @@ macro_rules! testconfig { #[test] fn $name() { run_search_tests($collection, |test| { - let mut builder = AhoCorasickBuilder::new(); + let mut builder = AhoCorasick::builder(); $with(&mut builder); builder .match_kind(MatchKind::$kind) .build(test.patterns) + .unwrap() .find_iter(test.haystack) .collect() }); @@ -679,98 +725,145 @@ macro_rules! testcombo { mod $name { use super::*; - testconfig!(nfa_default, $collection, $kind, |_| ()); + testconfig!(default, $collection, $kind, |_| ()); testconfig!( - nfa_no_prefilter, + nfa_default, $collection, $kind, |b: &mut AhoCorasickBuilder| { - b.prefilter(false); + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); } ); testconfig!( - nfa_all_sparse, + nfa_noncontig_no_prefilter, $collection, $kind, |b: &mut AhoCorasickBuilder| { - b.dense_depth(0); + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .prefilter(false); } ); testconfig!( - nfa_all_dense, + nfa_noncontig_all_sparse, $collection, $kind, |b: &mut AhoCorasickBuilder| { - b.dense_depth(usize::MAX); + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .dense_depth(0); } ); testconfig!( - dfa_default, + nfa_noncontig_all_dense, $collection, $kind, |b: &mut AhoCorasickBuilder| { - b.dfa(true); + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .dense_depth(usize::MAX); } ); testconfig!( - dfa_no_prefilter, + nfa_contig_default, $collection, $kind, |b: &mut AhoCorasickBuilder| { - b.dfa(true).prefilter(false); + b.kind(Some(AhoCorasickKind::ContiguousNFA)); } ); testconfig!( - dfa_all_sparse, + nfa_contig_no_prefilter, $collection, $kind, |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(0); + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .prefilter(false); } ); testconfig!( - dfa_all_dense, + nfa_contig_all_sparse, $collection, $kind, |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(usize::MAX); + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .dense_depth(0); } ); testconfig!( - dfa_no_byte_class, + nfa_contig_all_dense, + $collection, + $kind, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .dense_depth(usize::MAX); + } + ); + testconfig!( + nfa_contig_no_byte_class, + $collection, + $kind, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .byte_classes(false); + } + ); + testconfig!( + dfa_default, + $collection, + $kind, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::DFA)); + } + ); + testconfig!( + dfa_start_both, + $collection, + $kind, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::DFA)) + .start_kind(StartKind::Both); + } + ); + testconfig!( + dfa_no_prefilter, $collection, $kind, |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false); + b.kind(Some(AhoCorasickKind::DFA)).prefilter(false); } ); testconfig!( - dfa_no_premultiply, + dfa_start_both_no_prefilter, $collection, $kind, |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).premultiply(false); + b.kind(Some(AhoCorasickKind::DFA)) + .start_kind(StartKind::Both) + .prefilter(false); } ); testconfig!( - dfa_no_byte_class_no_premultiply, + dfa_no_byte_class, $collection, $kind, |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when options are removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false).premultiply(false); + b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false); + } + ); + testconfig!( + dfa_start_both_no_byte_class, + $collection, + $kind, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::DFA)) + .start_kind(StartKind::Both) + .byte_classes(false); } ); } }; } -// Write out the combinations. +// Write out the various combinations of match semantics given the variety of +// configurations tested by 'testcombo!'. testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest); testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst); testcombo!( @@ -782,27 +875,63 @@ testcombo!( // Write out the overlapping combo by hand since there is only one of them. testconfig!( overlapping, - search_standard_overlapping_nfa_default, + search_standard_overlapping_default, AC_STANDARD_OVERLAPPING, Standard, |_| () ); testconfig!( overlapping, - search_standard_overlapping_nfa_all_sparse, + search_standard_overlapping_nfa_noncontig_default, AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.dense_depth(0); + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); } ); testconfig!( overlapping, - search_standard_overlapping_nfa_all_dense, + search_standard_overlapping_nfa_noncontig_no_prefilter, AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.dense_depth(usize::MAX); + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)).prefilter(false); + } +); +testconfig!( + overlapping, + search_standard_overlapping_nfa_contig_default, + AC_STANDARD_OVERLAPPING, + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)); + } +); +testconfig!( + overlapping, + search_standard_overlapping_nfa_contig_no_prefilter, + AC_STANDARD_OVERLAPPING, + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)).prefilter(false); + } +); +testconfig!( + overlapping, + search_standard_overlapping_nfa_contig_all_sparse, + AC_STANDARD_OVERLAPPING, + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(0); + } +); +testconfig!( + overlapping, + search_standard_overlapping_nfa_contig_all_dense, + AC_STANDARD_OVERLAPPING, + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(usize::MAX); } ); testconfig!( @@ -811,214 +940,385 @@ testconfig!( AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.dfa(true); + b.kind(Some(AhoCorasickKind::DFA)); } ); testconfig!( overlapping, - search_standard_overlapping_dfa_all_sparse, + search_standard_overlapping_dfa_start_both, AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(0); + b.kind(Some(AhoCorasickKind::DFA)).start_kind(StartKind::Both); } ); testconfig!( overlapping, - search_standard_overlapping_dfa_all_dense, + search_standard_overlapping_dfa_no_prefilter, AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(usize::MAX); + b.kind(Some(AhoCorasickKind::DFA)).prefilter(false); } ); testconfig!( overlapping, - search_standard_overlapping_dfa_no_byte_class, + search_standard_overlapping_dfa_start_both_no_prefilter, AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false); + b.kind(Some(AhoCorasickKind::DFA)) + .start_kind(StartKind::Both) + .prefilter(false); } ); testconfig!( overlapping, - search_standard_overlapping_dfa_no_premultiply, + search_standard_overlapping_dfa_no_byte_class, AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).premultiply(false); + b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false); } ); testconfig!( overlapping, - search_standard_overlapping_dfa_no_byte_class_no_premultiply, + search_standard_overlapping_dfa_start_both_no_byte_class, AC_STANDARD_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when options are removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false).premultiply(false); + b.kind(Some(AhoCorasickKind::DFA)) + .start_kind(StartKind::Both) + .byte_classes(false); } ); // Also write out tests manually for streams, since we only test the standard // match semantics. We also don't bother testing different automaton // configurations, since those are well covered by tests above. +#[cfg(feature = "std")] testconfig!( stream, - search_standard_stream_nfa_default, + search_standard_stream_default, AC_STANDARD_NON_OVERLAPPING, Standard, |_| () ); +#[cfg(feature = "std")] +testconfig!( + stream, + search_standard_stream_nfa_noncontig_default, + AC_STANDARD_NON_OVERLAPPING, + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); + } +); +#[cfg(feature = "std")] +testconfig!( + stream, + search_standard_stream_nfa_contig_default, + AC_STANDARD_NON_OVERLAPPING, + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)); + } +); +#[cfg(feature = "std")] testconfig!( stream, search_standard_stream_dfa_default, AC_STANDARD_NON_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.dfa(true); + b.kind(Some(AhoCorasickKind::DFA)); } ); // Same thing for anchored searches. Write them out manually. testconfig!( - search_standard_anchored_nfa_default, + anchored, + search_standard_anchored_default, AC_STANDARD_ANCHORED_NON_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.anchored(true); + b.start_kind(StartKind::Anchored); } ); testconfig!( - search_standard_anchored_dfa_default, + anchored, + search_standard_anchored_nfa_noncontig_default, AC_STANDARD_ANCHORED_NON_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); + b.start_kind(StartKind::Anchored) + .kind(Some(AhoCorasickKind::NoncontiguousNFA)); } ); testconfig!( - overlapping, - search_standard_anchored_overlapping_nfa_default, - AC_STANDARD_ANCHORED_OVERLAPPING, + anchored, + search_standard_anchored_nfa_contig_default, + AC_STANDARD_ANCHORED_NON_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.anchored(true); + b.start_kind(StartKind::Anchored) + .kind(Some(AhoCorasickKind::ContiguousNFA)); } ); testconfig!( - overlapping, - search_standard_anchored_overlapping_dfa_default, - AC_STANDARD_ANCHORED_OVERLAPPING, + anchored, + search_standard_anchored_dfa_default, + AC_STANDARD_ANCHORED_NON_OVERLAPPING, Standard, |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); + b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); } ); testconfig!( - search_leftmost_first_anchored_nfa_default, + anchored, + search_standard_anchored_dfa_start_both, + AC_STANDARD_ANCHORED_NON_OVERLAPPING, + Standard, + |b: &mut AhoCorasickBuilder| { + b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); + } +); +testconfig!( + anchored, + search_leftmost_first_anchored_default, + AC_LEFTMOST_FIRST_ANCHORED, + LeftmostFirst, + |b: &mut AhoCorasickBuilder| { + b.start_kind(StartKind::Anchored); + } +); +testconfig!( + anchored, + search_leftmost_first_anchored_nfa_noncontig_default, AC_LEFTMOST_FIRST_ANCHORED, LeftmostFirst, |b: &mut AhoCorasickBuilder| { - b.anchored(true); + b.start_kind(StartKind::Anchored) + .kind(Some(AhoCorasickKind::NoncontiguousNFA)); } ); testconfig!( + anchored, + search_leftmost_first_anchored_nfa_contig_default, + AC_LEFTMOST_FIRST_ANCHORED, + LeftmostFirst, + |b: &mut AhoCorasickBuilder| { + b.start_kind(StartKind::Anchored) + .kind(Some(AhoCorasickKind::ContiguousNFA)); + } +); +testconfig!( + anchored, search_leftmost_first_anchored_dfa_default, AC_LEFTMOST_FIRST_ANCHORED, LeftmostFirst, |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); + b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); + } +); +testconfig!( + anchored, + search_leftmost_first_anchored_dfa_start_both, + AC_LEFTMOST_FIRST_ANCHORED, + LeftmostFirst, + |b: &mut AhoCorasickBuilder| { + b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); + } +); +testconfig!( + anchored, + search_leftmost_longest_anchored_default, + AC_LEFTMOST_LONGEST_ANCHORED, + LeftmostLongest, + |b: &mut AhoCorasickBuilder| { + b.start_kind(StartKind::Anchored); } ); testconfig!( - search_leftmost_longest_anchored_nfa_default, + anchored, + search_leftmost_longest_anchored_nfa_noncontig_default, AC_LEFTMOST_LONGEST_ANCHORED, LeftmostLongest, |b: &mut AhoCorasickBuilder| { - b.anchored(true); + b.start_kind(StartKind::Anchored) + .kind(Some(AhoCorasickKind::NoncontiguousNFA)); } ); testconfig!( + anchored, + search_leftmost_longest_anchored_nfa_contig_default, + AC_LEFTMOST_LONGEST_ANCHORED, + LeftmostLongest, + |b: &mut AhoCorasickBuilder| { + b.start_kind(StartKind::Anchored) + .kind(Some(AhoCorasickKind::ContiguousNFA)); + } +); +testconfig!( + anchored, search_leftmost_longest_anchored_dfa_default, AC_LEFTMOST_LONGEST_ANCHORED, LeftmostLongest, |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); + b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); + } +); +testconfig!( + anchored, + search_leftmost_longest_anchored_dfa_start_both, + AC_LEFTMOST_LONGEST_ANCHORED, + LeftmostLongest, + |b: &mut AhoCorasickBuilder| { + b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); } ); // And also write out the test combinations for ASCII case insensitivity. testconfig!( - acasei_standard_nfa_default, + acasei_standard_default, &[ASCII_CASE_INSENSITIVE], Standard, |b: &mut AhoCorasickBuilder| { b.prefilter(false).ascii_case_insensitive(true); } ); +testconfig!( + acasei_standard_nfa_noncontig_default, + &[ASCII_CASE_INSENSITIVE], + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .prefilter(false) + .ascii_case_insensitive(true); + } +); +testconfig!( + acasei_standard_nfa_contig_default, + &[ASCII_CASE_INSENSITIVE], + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .prefilter(false) + .ascii_case_insensitive(true); + } +); testconfig!( acasei_standard_dfa_default, &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], Standard, |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); + b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); } ); testconfig!( overlapping, - acasei_standard_overlapping_nfa_default, + acasei_standard_overlapping_default, &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], Standard, |b: &mut AhoCorasickBuilder| { b.ascii_case_insensitive(true); } ); +testconfig!( + overlapping, + acasei_standard_overlapping_nfa_noncontig_default, + &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .ascii_case_insensitive(true); + } +); +testconfig!( + overlapping, + acasei_standard_overlapping_nfa_contig_default, + &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], + Standard, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .ascii_case_insensitive(true); + } +); testconfig!( overlapping, acasei_standard_overlapping_dfa_default, &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], Standard, |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); + b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); } ); testconfig!( - acasei_leftmost_first_nfa_default, + acasei_leftmost_first_default, &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], LeftmostFirst, |b: &mut AhoCorasickBuilder| { b.ascii_case_insensitive(true); } ); +testconfig!( + acasei_leftmost_first_nfa_noncontig_default, + &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], + LeftmostFirst, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .ascii_case_insensitive(true); + } +); +testconfig!( + acasei_leftmost_first_nfa_contig_default, + &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], + LeftmostFirst, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .ascii_case_insensitive(true); + } +); testconfig!( acasei_leftmost_first_dfa_default, &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], LeftmostFirst, |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); + b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); } ); testconfig!( - acasei_leftmost_longest_nfa_default, + acasei_leftmost_longest_default, &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], LeftmostLongest, |b: &mut AhoCorasickBuilder| { b.ascii_case_insensitive(true); } ); +testconfig!( + acasei_leftmost_longest_nfa_noncontig_default, + &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], + LeftmostLongest, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .ascii_case_insensitive(true); + } +); +testconfig!( + acasei_leftmost_longest_nfa_contig_default, + &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], + LeftmostLongest, + |b: &mut AhoCorasickBuilder| { + b.kind(Some(AhoCorasickKind::ContiguousNFA)) + .ascii_case_insensitive(true); + } +); testconfig!( acasei_leftmost_longest_dfa_default, &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], LeftmostLongest, |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); + b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); } ); @@ -1030,7 +1330,7 @@ fn run_search_tests Vec>( |matches: Vec| -> Vec<(usize, usize, usize)> { matches .into_iter() - .map(|m| (m.pattern(), m.start(), m.end())) + .map(|m| (m.pattern().as_usize(), m.start(), m.end())) .collect() }; for &tests in which { @@ -1047,6 +1347,37 @@ fn run_search_tests Vec>( } } +// Like 'run_search_tests', but we skip any tests that contain the empty +// pattern because stream searching doesn't support it. +#[cfg(feature = "std")] +fn run_stream_search_tests Vec>( + which: TestCollection, + mut f: F, +) { + let get_match_triples = + |matches: Vec| -> Vec<(usize, usize, usize)> { + matches + .into_iter() + .map(|m| (m.pattern().as_usize(), m.start(), m.end())) + .collect() + }; + for &tests in which { + for test in tests { + if test.patterns.iter().any(|p| p.is_empty()) { + continue; + } + assert_eq!( + test.matches, + get_match_triples(f(&test)).as_slice(), + "test: {}, patterns: {:?}, haystack: {:?}", + test.name, + test.patterns, + test.haystack + ); + } + } +} + #[test] fn search_tests_have_unique_names() { let assert = |constname, tests: &[SearchTest]| { @@ -1073,55 +1404,129 @@ fn search_tests_have_unique_names() { assert("REGRESSION", REGRESSION); } +#[cfg(feature = "std")] #[test] #[should_panic] fn stream_not_allowed_leftmost_first() { - let fsm = AhoCorasickBuilder::new() + let fsm = AhoCorasick::builder() .match_kind(MatchKind::LeftmostFirst) - .build(None::); + .build(None::) + .unwrap(); assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); } +#[cfg(feature = "std")] #[test] #[should_panic] fn stream_not_allowed_leftmost_longest() { - let fsm = AhoCorasickBuilder::new() + let fsm = AhoCorasick::builder() .match_kind(MatchKind::LeftmostLongest) - .build(None::); + .build(None::) + .unwrap(); assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); } #[test] #[should_panic] fn overlapping_not_allowed_leftmost_first() { - let fsm = AhoCorasickBuilder::new() + let fsm = AhoCorasick::builder() .match_kind(MatchKind::LeftmostFirst) - .build(None::); + .build(None::) + .unwrap(); assert_eq!(fsm.find_overlapping_iter("").count(), 0); } #[test] #[should_panic] fn overlapping_not_allowed_leftmost_longest() { - let fsm = AhoCorasickBuilder::new() + let fsm = AhoCorasick::builder() .match_kind(MatchKind::LeftmostLongest) - .build(None::); + .build(None::) + .unwrap(); assert_eq!(fsm.find_overlapping_iter("").count(), 0); } +// This tests that if we build an AC matcher with an "unanchored" start kind, +// then we can't run an anchored search even if the underlying searcher +// supports it. +// +// The key bit here is that both of the NFAs in this crate unconditionally +// support both unanchored and anchored searches, but the DFA does not because +// of the added cost of doing so. To avoid the top-level AC matcher sometimes +// supporting anchored and sometimes not (depending on which searcher it +// chooses to use internally), we ensure that the given 'StartKind' is always +// respected. #[test] -fn state_id_too_small() { - let mut patterns = vec![]; - for c1 in (b'a'..b'z').map(|b| b as char) { - for c2 in (b'a'..b'z').map(|b| b as char) { - for c3 in (b'a'..b'z').map(|b| b as char) { - patterns.push(format!("{}{}{}", c1, c2, c3)); - } - } - } - let result = - AhoCorasickBuilder::new().build_with_size::(&patterns); - assert!(result.is_err()); +fn anchored_not_allowed_even_if_technically_available() { + let ac = AhoCorasick::builder() + .kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .start_kind(StartKind::Unanchored) + .build(&["foo"]) + .unwrap(); + assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err()); + + let ac = AhoCorasick::builder() + .kind(Some(AhoCorasickKind::ContiguousNFA)) + .start_kind(StartKind::Unanchored) + .build(&["foo"]) + .unwrap(); + assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err()); + + // For completeness, check that the DFA returns an error too. + let ac = AhoCorasick::builder() + .kind(Some(AhoCorasickKind::DFA)) + .start_kind(StartKind::Unanchored) + .build(&["foo"]) + .unwrap(); + assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err()); +} + +// This is like the test aboved, but with unanchored and anchored flipped. That +// is, we asked for an AC searcher with anchored support and we check that +// unanchored searches return an error even if the underlying searcher would +// technically support it. +#[test] +fn unanchored_not_allowed_even_if_technically_available() { + let ac = AhoCorasick::builder() + .kind(Some(AhoCorasickKind::NoncontiguousNFA)) + .start_kind(StartKind::Anchored) + .build(&["foo"]) + .unwrap(); + assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err()); + + let ac = AhoCorasick::builder() + .kind(Some(AhoCorasickKind::ContiguousNFA)) + .start_kind(StartKind::Anchored) + .build(&["foo"]) + .unwrap(); + assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err()); + + // For completeness, check that the DFA returns an error too. + let ac = AhoCorasick::builder() + .kind(Some(AhoCorasickKind::DFA)) + .start_kind(StartKind::Anchored) + .build(&["foo"]) + .unwrap(); + assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err()); +} + +// This tests that a prefilter does not cause a search to report a match +// outside the bounds provided by the caller. +// +// This is a regression test for a bug I introduced during the rewrite of most +// of the crate after 0.7. It was never released. The tricky part here is +// ensuring we get a prefilter that can report matches on its own (such as the +// packed searcher). Otherwise, prefilters that report false positives might +// have searched past the bounds provided by the caller, but confirming the +// match would subsequently fail. +#[test] +fn prefilter_stays_in_bounds() { + let ac = AhoCorasick::builder() + .match_kind(MatchKind::LeftmostFirst) + .build(&["sam", "frodo", "pippin", "merry", "gandalf", "sauron"]) + .unwrap(); + let haystack = "foo gandalf"; + assert_eq!(None, ac.find(Input::new(haystack).range(0..10))); } // See: https://github.com/BurntSushi/aho-corasick/issues/44 @@ -1130,9 +1535,10 @@ fn state_id_too_small() { // visit an exponential number of states when filling in failure transitions. #[test] fn regression_ascii_case_insensitive_no_exponential() { - let ac = AhoCorasickBuilder::new() + let ac = AhoCorasick::builder() .ascii_case_insensitive(true) - .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"]); + .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"]) + .unwrap(); assert!(ac.find("").is_none()); } @@ -1145,24 +1551,23 @@ fn regression_ascii_case_insensitive_no_exponential() { fn regression_rare_byte_prefilter() { use crate::AhoCorasick; - let ac = AhoCorasick::new_auto_configured(&["ab/j/", "x/"]); + let ac = AhoCorasick::new(&["ab/j/", "x/"]).unwrap(); assert!(ac.is_match("ab/j/")); } #[test] fn regression_case_insensitive_prefilter() { - use crate::AhoCorasickBuilder; - for c in b'a'..b'z' { for c2 in b'a'..b'z' { let c = c as char; let c2 = c2 as char; let needle = format!("{}{}", c, c2).to_lowercase(); let haystack = needle.to_uppercase(); - let ac = AhoCorasickBuilder::new() + let ac = AhoCorasick::builder() .ascii_case_insensitive(true) .prefilter(true) - .build(&[&needle]); + .build(&[&needle]) + .unwrap(); assert_eq!( 1, ac.find_iter(&haystack).count(), @@ -1178,6 +1583,7 @@ fn regression_case_insensitive_prefilter() { // See: https://github.com/BurntSushi/aho-corasick/issues/64 // // This occurs when the rare byte prefilter is active. +#[cfg(feature = "std")] #[test] fn regression_stream_rare_byte_prefilter() { use std::io::Read; @@ -1186,9 +1592,9 @@ fn regression_stream_rare_byte_prefilter() { const MAGIC: [u8; 5] = *b"1234j"; // NOTE: The test fails for value in 8188..=8191 These value put the string - // to search accross two call to read because the buffer size is 8192 by + // to search accross two call to read because the buffer size is 64KB by // default. - const BEGIN: usize = 8191; + const BEGIN: usize = 65_535; /// This is just a structure that implements Reader. The reader /// implementation will simulate a file filled with 0, except for the MAGIC @@ -1199,8 +1605,7 @@ fn regression_stream_rare_byte_prefilter() { } impl Read for R { - fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result { - //dbg!(buf.len()); + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { if self.read > 100000 { return Ok(0); } @@ -1231,16 +1636,21 @@ fn regression_stream_rare_byte_prefilter() { } } - fn run() -> ::std::io::Result<()> { - let aut = AhoCorasickBuilder::new().build(&[&MAGIC]); + fn run() -> std::io::Result<()> { + let aut = AhoCorasick::builder() + // Enable byte classes to make debugging the automaton easier. It + // should have no effect on the test result. + .byte_classes(false) + .build(&[&MAGIC]) + .unwrap(); // While reading from a vector, it works: - let mut buf = vec![]; + let mut buf = alloc::vec![]; R::default().read_to_end(&mut buf)?; let from_whole = aut.find_iter(&buf).next().unwrap().start(); - //But using stream_find_iter fails! - let mut file = R::default(); + // But using stream_find_iter fails! + let mut file = std::io::BufReader::new(R::default()); let begin = aut .stream_find_iter(&mut file) .next() diff --git a/vendor/aho-corasick/src/transducer.rs b/vendor/aho-corasick/src/transducer.rs new file mode 100644 index 0000000..39bb240 --- /dev/null +++ b/vendor/aho-corasick/src/transducer.rs @@ -0,0 +1,270 @@ +/*! +Provides implementations of `fst::Automaton` for Aho-Corasick automata. + +This works by providing two wrapper types, [`Anchored`] and [`Unanchored`]. +The former executes an anchored search on an FST while the latter executes +an unanchored search. Building these wrappers is fallible and will fail if +the underlying Aho-Corasick automaton does not support the type of search it +represents. +*/ + +use crate::{ + automaton::{Automaton, StateID}, + Anchored as AcAnchored, Input, MatchError, +}; + +/// Represents an unanchored Aho-Corasick search of a finite state transducer. +/// +/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the +/// underlying automaton does not support unanchored searches. +/// +/// # Example +/// +/// This shows how to build an FST of keys and then run an unanchored search on +/// those keys using an Aho-Corasick automaton. +/// +/// ``` +/// use aho_corasick::{nfa::contiguous::NFA, transducer::Unanchored}; +/// use fst::{Automaton, IntoStreamer, Set, Streamer}; +/// +/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); +/// let nfa = NFA::new(&["bcd", "x"]).unwrap(); +/// // NFAs always support both unanchored and anchored searches. +/// let searcher = Unanchored::new(&nfa).unwrap(); +/// +/// let mut stream = set.search(searcher).into_stream(); +/// let mut results = vec![]; +/// while let Some(key) = stream.next() { +/// results.push(std::str::from_utf8(key).unwrap().to_string()); +/// } +/// assert_eq!(vec!["abcd", "bcd", "xyz"], results); +/// ``` +#[derive(Clone, Debug)] +pub struct Unanchored(A); + +impl Unanchored { + /// Create a new `Unanchored` implementation of the `fst::Automaton` trait. + /// + /// If the given Aho-Corasick automaton does not support unanchored + /// searches, then this returns an error. + pub fn new(aut: A) -> Result, MatchError> { + let input = Input::new("").anchored(AcAnchored::No); + let _ = aut.start_state(&input)?; + Ok(Unanchored(aut)) + } + + /// Returns a borrow to the underlying automaton. + pub fn as_ref(&self) -> &A { + &self.0 + } + + /// Unwrap this value and return the inner automaton. + pub fn into_inner(self) -> A { + self.0 + } +} + +impl fst::Automaton for Unanchored { + type State = StateID; + + #[inline] + fn start(&self) -> StateID { + let input = Input::new("").anchored(AcAnchored::No); + self.0.start_state(&input).expect("support for unanchored searches") + } + + #[inline] + fn is_match(&self, state: &StateID) -> bool { + self.0.is_match(*state) + } + + #[inline] + fn accept(&self, state: &StateID, byte: u8) -> StateID { + if fst::Automaton::is_match(self, state) { + return *state; + } + self.0.next_state(AcAnchored::No, *state, byte) + } + + #[inline] + fn can_match(&self, state: &StateID) -> bool { + !self.0.is_dead(*state) + } +} + +/// Represents an anchored Aho-Corasick search of a finite state transducer. +/// +/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the +/// underlying automaton does not support unanchored searches. +/// +/// # Example +/// +/// This shows how to build an FST of keys and then run an anchored search on +/// those keys using an Aho-Corasick automaton. +/// +/// ``` +/// use aho_corasick::{nfa::contiguous::NFA, transducer::Anchored}; +/// use fst::{Automaton, IntoStreamer, Set, Streamer}; +/// +/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); +/// let nfa = NFA::new(&["bcd", "x"]).unwrap(); +/// // NFAs always support both unanchored and anchored searches. +/// let searcher = Anchored::new(&nfa).unwrap(); +/// +/// let mut stream = set.search(searcher).into_stream(); +/// let mut results = vec![]; +/// while let Some(key) = stream.next() { +/// results.push(std::str::from_utf8(key).unwrap().to_string()); +/// } +/// assert_eq!(vec!["bcd", "xyz"], results); +/// ``` +/// +/// This is like the example above, except we use an Aho-Corasick DFA, which +/// requires explicitly configuring it to support anchored searches. (NFAs +/// unconditionally support both unanchored and anchored searches.) +/// +/// ``` +/// use aho_corasick::{dfa::DFA, transducer::Anchored, StartKind}; +/// use fst::{Automaton, IntoStreamer, Set, Streamer}; +/// +/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); +/// let dfa = DFA::builder() +/// .start_kind(StartKind::Anchored) +/// .build(&["bcd", "x"]) +/// .unwrap(); +/// // We've explicitly configured our DFA to support anchored searches. +/// let searcher = Anchored::new(&dfa).unwrap(); +/// +/// let mut stream = set.search(searcher).into_stream(); +/// let mut results = vec![]; +/// while let Some(key) = stream.next() { +/// results.push(std::str::from_utf8(key).unwrap().to_string()); +/// } +/// assert_eq!(vec!["bcd", "xyz"], results); +/// ``` +#[derive(Clone, Debug)] +pub struct Anchored(A); + +impl Anchored { + /// Create a new `Anchored` implementation of the `fst::Automaton` trait. + /// + /// If the given Aho-Corasick automaton does not support anchored searches, + /// then this returns an error. + pub fn new(aut: A) -> Result, MatchError> { + let input = Input::new("").anchored(AcAnchored::Yes); + let _ = aut.start_state(&input)?; + Ok(Anchored(aut)) + } + + /// Returns a borrow to the underlying automaton. + pub fn as_ref(&self) -> &A { + &self.0 + } + + /// Unwrap this value and return the inner automaton. + pub fn into_inner(self) -> A { + self.0 + } +} + +impl fst::Automaton for Anchored { + type State = StateID; + + #[inline] + fn start(&self) -> StateID { + let input = Input::new("").anchored(AcAnchored::Yes); + self.0.start_state(&input).expect("support for unanchored searches") + } + + #[inline] + fn is_match(&self, state: &StateID) -> bool { + self.0.is_match(*state) + } + + #[inline] + fn accept(&self, state: &StateID, byte: u8) -> StateID { + if fst::Automaton::is_match(self, state) { + return *state; + } + self.0.next_state(AcAnchored::Yes, *state, byte) + } + + #[inline] + fn can_match(&self, state: &StateID) -> bool { + !self.0.is_dead(*state) + } +} + +#[cfg(test)] +mod tests { + use alloc::{string::String, vec, vec::Vec}; + + use fst::{Automaton, IntoStreamer, Set, Streamer}; + + use crate::{ + dfa::DFA, + nfa::{contiguous, noncontiguous}, + StartKind, + }; + + use super::*; + + fn search>( + set: &Set, + aut: A, + ) -> Vec { + let mut stream = set.search(aut).into_stream(); + let mut results = vec![]; + while let Some(key) = stream.next() { + results.push(String::from(core::str::from_utf8(key).unwrap())); + } + results + } + + #[test] + fn unanchored() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let patterns = vec!["baz", "bax"]; + let expected = vec!["baz", "xbax"]; + + let aut = Unanchored(noncontiguous::NFA::new(&patterns).unwrap()); + let got = search(&set, &aut); + assert_eq!(got, expected); + + let aut = Unanchored(contiguous::NFA::new(&patterns).unwrap()); + let got = search(&set, &aut); + assert_eq!(got, expected); + + let aut = Unanchored(DFA::new(&patterns).unwrap()); + let got = search(&set, &aut); + assert_eq!(got, expected); + } + + #[test] + fn anchored() { + let set = + Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) + .unwrap(); + let patterns = vec!["baz", "bax"]; + let expected = vec!["baz"]; + + let aut = Anchored(noncontiguous::NFA::new(&patterns).unwrap()); + let got = search(&set, &aut); + assert_eq!(got, expected); + + let aut = Anchored(contiguous::NFA::new(&patterns).unwrap()); + let got = search(&set, &aut); + assert_eq!(got, expected); + + let aut = Anchored( + DFA::builder() + .start_kind(StartKind::Anchored) + .build(&patterns) + .unwrap(), + ); + let got = search(&set, &aut); + assert_eq!(got, expected); + } +} diff --git a/vendor/aho-corasick/src/util/alphabet.rs b/vendor/aho-corasick/src/util/alphabet.rs new file mode 100644 index 0000000..69724fa --- /dev/null +++ b/vendor/aho-corasick/src/util/alphabet.rs @@ -0,0 +1,409 @@ +use crate::util::int::Usize; + +/// A representation of byte oriented equivalence classes. +/// +/// This is used in finite state machines to reduce the size of the transition +/// table. This can have a particularly large impact not only on the total size +/// of an FSM, but also on FSM build times because it reduces the number of +/// transitions that need to be visited/set. +#[derive(Clone, Copy)] +pub(crate) struct ByteClasses([u8; 256]); + +impl ByteClasses { + /// Creates a new set of equivalence classes where all bytes are mapped to + /// the same class. + pub(crate) fn empty() -> ByteClasses { + ByteClasses([0; 256]) + } + + /// Creates a new set of equivalence classes where each byte belongs to + /// its own equivalence class. + pub(crate) fn singletons() -> ByteClasses { + let mut classes = ByteClasses::empty(); + for b in 0..=255 { + classes.set(b, b); + } + classes + } + + /// Set the equivalence class for the given byte. + #[inline] + pub(crate) fn set(&mut self, byte: u8, class: u8) { + self.0[usize::from(byte)] = class; + } + + /// Get the equivalence class for the given byte. + #[inline] + pub(crate) fn get(&self, byte: u8) -> u8 { + self.0[usize::from(byte)] + } + + /// Return the total number of elements in the alphabet represented by + /// these equivalence classes. Equivalently, this returns the total number + /// of equivalence classes. + #[inline] + pub(crate) fn alphabet_len(&self) -> usize { + // Add one since the number of equivalence classes is one bigger than + // the last one. + usize::from(self.0[255]) + 1 + } + + /// Returns the stride, as a base-2 exponent, required for these + /// equivalence classes. + /// + /// The stride is always the smallest power of 2 that is greater than or + /// equal to the alphabet length. This is done so that converting between + /// state IDs and indices can be done with shifts alone, which is much + /// faster than integer division. The "stride2" is the exponent. i.e., + /// `2^stride2 = stride`. + pub(crate) fn stride2(&self) -> usize { + let zeros = self.alphabet_len().next_power_of_two().trailing_zeros(); + usize::try_from(zeros).unwrap() + } + + /// Returns the stride for these equivalence classes, which corresponds + /// to the smallest power of 2 greater than or equal to the number of + /// equivalence classes. + pub(crate) fn stride(&self) -> usize { + 1 << self.stride2() + } + + /// Returns true if and only if every byte in this class maps to its own + /// equivalence class. Equivalently, there are 257 equivalence classes + /// and each class contains exactly one byte (plus the special EOI class). + #[inline] + pub(crate) fn is_singleton(&self) -> bool { + self.alphabet_len() == 256 + } + + /// Returns an iterator over all equivalence classes in this set. + pub(crate) fn iter(&self) -> ByteClassIter { + ByteClassIter { it: 0..self.alphabet_len() } + } + + /// Returns an iterator of the bytes in the given equivalence class. + pub(crate) fn elements(&self, class: u8) -> ByteClassElements { + ByteClassElements { classes: self, class, bytes: 0..=255 } + } + + /// Returns an iterator of byte ranges in the given equivalence class. + /// + /// That is, a sequence of contiguous ranges are returned. Typically, every + /// class maps to a single contiguous range. + fn element_ranges(&self, class: u8) -> ByteClassElementRanges { + ByteClassElementRanges { elements: self.elements(class), range: None } + } +} + +impl core::fmt::Debug for ByteClasses { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if self.is_singleton() { + write!(f, "ByteClasses()") + } else { + write!(f, "ByteClasses(")?; + for (i, class) in self.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?} => [", class)?; + for (start, end) in self.element_ranges(class) { + if start == end { + write!(f, "{:?}", start)?; + } else { + write!(f, "{:?}-{:?}", start, end)?; + } + } + write!(f, "]")?; + } + write!(f, ")") + } + } +} + +/// An iterator over each equivalence class. +#[derive(Debug)] +pub(crate) struct ByteClassIter { + it: core::ops::Range, +} + +impl Iterator for ByteClassIter { + type Item = u8; + + fn next(&mut self) -> Option { + self.it.next().map(|class| class.as_u8()) + } +} + +/// An iterator over all elements in a specific equivalence class. +#[derive(Debug)] +pub(crate) struct ByteClassElements<'a> { + classes: &'a ByteClasses, + class: u8, + bytes: core::ops::RangeInclusive, +} + +impl<'a> Iterator for ByteClassElements<'a> { + type Item = u8; + + fn next(&mut self) -> Option { + while let Some(byte) = self.bytes.next() { + if self.class == self.classes.get(byte) { + return Some(byte); + } + } + None + } +} + +/// An iterator over all elements in an equivalence class expressed as a +/// sequence of contiguous ranges. +#[derive(Debug)] +pub(crate) struct ByteClassElementRanges<'a> { + elements: ByteClassElements<'a>, + range: Option<(u8, u8)>, +} + +impl<'a> Iterator for ByteClassElementRanges<'a> { + type Item = (u8, u8); + + fn next(&mut self) -> Option<(u8, u8)> { + loop { + let element = match self.elements.next() { + None => return self.range.take(), + Some(element) => element, + }; + match self.range.take() { + None => { + self.range = Some((element, element)); + } + Some((start, end)) => { + if usize::from(end) + 1 != usize::from(element) { + self.range = Some((element, element)); + return Some((start, end)); + } + self.range = Some((start, element)); + } + } + } + } +} + +/// A partitioning of bytes into equivalence classes. +/// +/// A byte class set keeps track of an *approximation* of equivalence classes +/// of bytes during NFA construction. That is, every byte in an equivalence +/// class cannot discriminate between a match and a non-match. +/// +/// Note that this may not compute the minimal set of equivalence classes. +/// Basically, any byte in a pattern given to the noncontiguous NFA builder +/// will automatically be treated as its own equivalence class. All other +/// bytes---any byte not in any pattern---will be treated as their own +/// equivalence classes. In theory, all bytes not in any pattern should +/// be part of a single equivalence class, but in practice, we only treat +/// contiguous ranges of bytes as an equivalence class. So the number of +/// classes computed may be bigger than necessary. This usually doesn't make +/// much of a difference, and keeps the implementation simple. +#[derive(Clone, Debug)] +pub(crate) struct ByteClassSet(ByteSet); + +impl Default for ByteClassSet { + fn default() -> ByteClassSet { + ByteClassSet::empty() + } +} + +impl ByteClassSet { + /// Create a new set of byte classes where all bytes are part of the same + /// equivalence class. + pub(crate) fn empty() -> Self { + ByteClassSet(ByteSet::empty()) + } + + /// Indicate the the range of byte given (inclusive) can discriminate a + /// match between it and all other bytes outside of the range. + pub(crate) fn set_range(&mut self, start: u8, end: u8) { + debug_assert!(start <= end); + if start > 0 { + self.0.add(start - 1); + } + self.0.add(end); + } + + /// Convert this boolean set to a map that maps all byte values to their + /// corresponding equivalence class. The last mapping indicates the largest + /// equivalence class identifier (which is never bigger than 255). + pub(crate) fn byte_classes(&self) -> ByteClasses { + let mut classes = ByteClasses::empty(); + let mut class = 0u8; + let mut b = 0u8; + loop { + classes.set(b, class); + if b == 255 { + break; + } + if self.0.contains(b) { + class = class.checked_add(1).unwrap(); + } + b = b.checked_add(1).unwrap(); + } + classes + } +} + +/// A simple set of bytes that is reasonably cheap to copy and allocation free. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct ByteSet { + bits: BitSet, +} + +/// The representation of a byte set. Split out so that we can define a +/// convenient Debug impl for it while keeping "ByteSet" in the output. +#[derive(Clone, Copy, Default, Eq, PartialEq)] +struct BitSet([u128; 2]); + +impl ByteSet { + /// Create an empty set of bytes. + pub(crate) fn empty() -> ByteSet { + ByteSet { bits: BitSet([0; 2]) } + } + + /// Add a byte to this set. + /// + /// If the given byte already belongs to this set, then this is a no-op. + pub(crate) fn add(&mut self, byte: u8) { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] |= 1 << bit; + } + + /// Return true if and only if the given byte is in this set. + pub(crate) fn contains(&self, byte: u8) -> bool { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] & (1 << bit) > 0 + } +} + +impl core::fmt::Debug for BitSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut fmtd = f.debug_set(); + for b in 0u8..=255 { + if (ByteSet { bits: *self }).contains(b) { + fmtd.entry(&b); + } + } + fmtd.finish() + } +} + +#[cfg(test)] +mod tests { + use alloc::{vec, vec::Vec}; + + use super::*; + + #[test] + fn byte_classes() { + let mut set = ByteClassSet::empty(); + set.set_range(b'a', b'z'); + + let classes = set.byte_classes(); + assert_eq!(classes.get(0), 0); + assert_eq!(classes.get(1), 0); + assert_eq!(classes.get(2), 0); + assert_eq!(classes.get(b'a' - 1), 0); + assert_eq!(classes.get(b'a'), 1); + assert_eq!(classes.get(b'm'), 1); + assert_eq!(classes.get(b'z'), 1); + assert_eq!(classes.get(b'z' + 1), 2); + assert_eq!(classes.get(254), 2); + assert_eq!(classes.get(255), 2); + + let mut set = ByteClassSet::empty(); + set.set_range(0, 2); + set.set_range(4, 6); + let classes = set.byte_classes(); + assert_eq!(classes.get(0), 0); + assert_eq!(classes.get(1), 0); + assert_eq!(classes.get(2), 0); + assert_eq!(classes.get(3), 1); + assert_eq!(classes.get(4), 2); + assert_eq!(classes.get(5), 2); + assert_eq!(classes.get(6), 2); + assert_eq!(classes.get(7), 3); + assert_eq!(classes.get(255), 3); + } + + #[test] + fn full_byte_classes() { + let mut set = ByteClassSet::empty(); + for b in 0u8..=255 { + set.set_range(b, b); + } + assert_eq!(set.byte_classes().alphabet_len(), 256); + } + + #[test] + fn elements_typical() { + let mut set = ByteClassSet::empty(); + set.set_range(b'b', b'd'); + set.set_range(b'g', b'm'); + set.set_range(b'z', b'z'); + let classes = set.byte_classes(); + // class 0: \x00-a + // class 1: b-d + // class 2: e-f + // class 3: g-m + // class 4: n-y + // class 5: z-z + // class 6: \x7B-\xFF + assert_eq!(classes.alphabet_len(), 7); + + let elements = classes.elements(0).collect::>(); + assert_eq!(elements.len(), 98); + assert_eq!(elements[0], b'\x00'); + assert_eq!(elements[97], b'a'); + + let elements = classes.elements(1).collect::>(); + assert_eq!(elements, vec![b'b', b'c', b'd'],); + + let elements = classes.elements(2).collect::>(); + assert_eq!(elements, vec![b'e', b'f'],); + + let elements = classes.elements(3).collect::>(); + assert_eq!(elements, vec![b'g', b'h', b'i', b'j', b'k', b'l', b'm',],); + + let elements = classes.elements(4).collect::>(); + assert_eq!(elements.len(), 12); + assert_eq!(elements[0], b'n'); + assert_eq!(elements[11], b'y'); + + let elements = classes.elements(5).collect::>(); + assert_eq!(elements, vec![b'z']); + + let elements = classes.elements(6).collect::>(); + assert_eq!(elements.len(), 133); + assert_eq!(elements[0], b'\x7B'); + assert_eq!(elements[132], b'\xFF'); + } + + #[test] + fn elements_singletons() { + let classes = ByteClasses::singletons(); + assert_eq!(classes.alphabet_len(), 256); + + let elements = classes.elements(b'a').collect::>(); + assert_eq!(elements, vec![b'a']); + } + + #[test] + fn elements_empty() { + let classes = ByteClasses::empty(); + assert_eq!(classes.alphabet_len(), 1); + + let elements = classes.elements(0).collect::>(); + assert_eq!(elements.len(), 256); + assert_eq!(elements[0], b'\x00'); + assert_eq!(elements[255], b'\xFF'); + } +} diff --git a/vendor/aho-corasick/src/buffer.rs b/vendor/aho-corasick/src/util/buffer.rs similarity index 76% rename from vendor/aho-corasick/src/buffer.rs rename to vendor/aho-corasick/src/util/buffer.rs index e7339eb..e9e982a 100644 --- a/vendor/aho-corasick/src/buffer.rs +++ b/vendor/aho-corasick/src/util/buffer.rs @@ -1,9 +1,7 @@ -use std::cmp; -use std::io; -use std::ptr; +use alloc::{vec, vec::Vec}; /// The default buffer capacity that we use for the stream buffer. -const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB +const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB /// A fairly simple roll buffer for supporting stream searches. /// @@ -25,8 +23,16 @@ const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB /// /// A lot of the logic for dealing with this is unfortunately split out between /// this roll buffer and the `StreamChunkIter`. +/// +/// Note also that this buffer is not actually required to just report matches. +/// Because a `Match` is just some offsets. But it *is* required for supporting +/// things like `try_stream_replace_all` because that needs some mechanism for +/// knowing which bytes in the stream correspond to a match and which don't. So +/// when a match occurs across two `read` calls, *something* needs to retain +/// the bytes from the previous `read` call because you don't know before the +/// second read call whether a match exists or not. #[derive(Debug)] -pub struct Buffer { +pub(crate) struct Buffer { /// The raw buffer contents. This has a fixed size and never increases. buf: Vec, /// The minimum size of the buffer, which is equivalent to the maximum @@ -40,8 +46,8 @@ pub struct Buffer { impl Buffer { /// Create a new buffer for stream searching. The minimum buffer length /// given should be the size of the maximum possible match length. - pub fn new(min_buffer_len: usize) -> Buffer { - let min = cmp::max(1, min_buffer_len); + pub(crate) fn new(min_buffer_len: usize) -> Buffer { + let min = core::cmp::max(1, min_buffer_len); // The minimum buffer amount is also the amount that we roll our // buffer in order to support incremental searching. To this end, // our actual capacity needs to be at least 1 byte bigger than our @@ -53,13 +59,13 @@ impl Buffer { // implementation with the minimal buffer size. For now, we just // uncomment out the next line and comment out the subsequent line. // let capacity = 1 + min; - let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY); + let capacity = core::cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY); Buffer { buf: vec![0; capacity], min, end: 0 } } /// Return the contents of this buffer. #[inline] - pub fn buffer(&self) -> &[u8] { + pub(crate) fn buffer(&self) -> &[u8] { &self.buf[..self.end] } @@ -67,16 +73,10 @@ impl Buffer { /// smaller than this is if the stream itself contains less than the /// minimum buffer amount. #[inline] - pub fn min_buffer_len(&self) -> usize { + pub(crate) fn min_buffer_len(&self) -> usize { self.min } - /// Return the total length of the contents in the buffer. - #[inline] - pub fn len(&self) -> usize { - self.end - } - /// Return all free capacity in this buffer. fn free_buffer(&mut self) -> &mut [u8] { &mut self.buf[self.end..] @@ -86,7 +86,10 @@ impl Buffer { /// this buffer's free capacity. If no more bytes could be read, then this /// returns false. Otherwise, this reads until it has filled the buffer /// past the minimum amount. - pub fn fill(&mut self, mut rdr: R) -> io::Result { + pub(crate) fn fill( + &mut self, + mut rdr: R, + ) -> std::io::Result { let mut readany = false; loop { let readlen = rdr.read(self.free_buffer())?; @@ -95,7 +98,7 @@ impl Buffer { } readany = true; self.end += readlen; - if self.len() >= self.min { + if self.buffer().len() >= self.min { return Ok(true); } } @@ -107,26 +110,15 @@ impl Buffer { /// /// This should only be called when the entire contents of this buffer have /// been searched. - pub fn roll(&mut self) { + pub(crate) fn roll(&mut self) { let roll_start = self .end .checked_sub(self.min) .expect("buffer capacity should be bigger than minimum amount"); - let roll_len = self.min; + let roll_end = roll_start + self.min; - assert!(roll_start + roll_len <= self.end); - unsafe { - // SAFETY: A buffer contains Copy data, so there's no problem - // moving it around. Safety also depends on our indices being in - // bounds, which they always should be, given the assert above. - // - // TODO: Switch to [T]::copy_within once our MSRV is high enough. - ptr::copy( - self.buf[roll_start..].as_ptr(), - self.buf.as_mut_ptr(), - roll_len, - ); - } - self.end = roll_len; + assert!(roll_end <= self.end); + self.buf.copy_within(roll_start..roll_end, 0); + self.end = self.min; } } diff --git a/vendor/aho-corasick/src/byte_frequencies.rs b/vendor/aho-corasick/src/util/byte_frequencies.rs similarity index 100% rename from vendor/aho-corasick/src/byte_frequencies.rs rename to vendor/aho-corasick/src/util/byte_frequencies.rs diff --git a/vendor/aho-corasick/src/util/debug.rs b/vendor/aho-corasick/src/util/debug.rs new file mode 100644 index 0000000..22b5f22 --- /dev/null +++ b/vendor/aho-corasick/src/util/debug.rs @@ -0,0 +1,26 @@ +/// A type that wraps a single byte with a convenient fmt::Debug impl that +/// escapes the byte. +pub(crate) struct DebugByte(pub(crate) u8); + +impl core::fmt::Debug for DebugByte { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + // Special case ASCII space. It's too hard to read otherwise, so + // put quotes around it. I sometimes wonder whether just '\x20' would + // be better... + if self.0 == b' ' { + return write!(f, "' '"); + } + // 10 bytes is enough to cover any output from ascii::escape_default. + let mut bytes = [0u8; 10]; + let mut len = 0; + for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { + // capitalize \xab to \xAB + if i >= 2 && b'a' <= b && b <= b'f' { + b -= 32; + } + bytes[len] = b; + len += 1; + } + write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) + } +} diff --git a/vendor/aho-corasick/src/util/error.rs b/vendor/aho-corasick/src/util/error.rs new file mode 100644 index 0000000..326d046 --- /dev/null +++ b/vendor/aho-corasick/src/util/error.rs @@ -0,0 +1,259 @@ +use crate::util::{ + primitives::{PatternID, SmallIndex}, + search::MatchKind, +}; + +/// An error that occurred during the construction of an Aho-Corasick +/// automaton. +/// +/// Build errors occur when some kind of limit has been exceeded, either in the +/// number of states, the number of patterns of the length of a pattern. These +/// limits aren't part of the public API, but they should generally be large +/// enough to handle most use cases. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: ErrorKind, +} + +/// The kind of error that occurred. +#[derive(Clone, Debug)] +enum ErrorKind { + /// An error that occurs when allocating a new state would result in an + /// identifier that exceeds the capacity of a `StateID`. + StateIDOverflow { + /// The maximum possible id. + max: u64, + /// The maximum ID requested. + requested_max: u64, + }, + /// An error that occurs when adding a pattern to an Aho-Corasick + /// automaton would result in an identifier that exceeds the capacity of a + /// `PatternID`. + PatternIDOverflow { + /// The maximum possible id. + max: u64, + /// The maximum ID requested. + requested_max: u64, + }, + /// Occurs when a pattern string is given to the Aho-Corasick constructor + /// that is too long. + PatternTooLong { + /// The ID of the pattern that was too long. + pattern: PatternID, + /// The length that was too long. + len: usize, + }, +} + +impl BuildError { + pub(crate) fn state_id_overflow( + max: u64, + requested_max: u64, + ) -> BuildError { + BuildError { kind: ErrorKind::StateIDOverflow { max, requested_max } } + } + + pub(crate) fn pattern_id_overflow( + max: u64, + requested_max: u64, + ) -> BuildError { + BuildError { + kind: ErrorKind::PatternIDOverflow { max, requested_max }, + } + } + + pub(crate) fn pattern_too_long( + pattern: PatternID, + len: usize, + ) -> BuildError { + BuildError { kind: ErrorKind::PatternTooLong { pattern, len } } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError {} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind { + ErrorKind::StateIDOverflow { max, requested_max } => { + write!( + f, + "state identifier overflow: failed to create state ID \ + from {}, which exceeds the max of {}", + requested_max, max, + ) + } + ErrorKind::PatternIDOverflow { max, requested_max } => { + write!( + f, + "pattern identifier overflow: failed to create pattern ID \ + from {}, which exceeds the max of {}", + requested_max, max, + ) + } + ErrorKind::PatternTooLong { pattern, len } => { + write!( + f, + "pattern {} with length {} exceeds \ + the maximum pattern length of {}", + pattern.as_usize(), + len, + SmallIndex::MAX.as_usize(), + ) + } + } + } +} + +/// An error that occurred during an Aho-Corasick search. +/// +/// An error that occurs during a search is limited to some kind of +/// misconfiguration that resulted in an illegal call. Stated differently, +/// whether an error occurs is not dependent on the specific bytes in the +/// haystack. +/// +/// Examples of misconfiguration: +/// +/// * Executing a stream or overlapping search on a searcher that was built was +/// something other than [`MatchKind::Standard`](crate::MatchKind::Standard) +/// semantics. +/// * Requested an anchored or an unanchored search on a searcher that doesn't +/// support unanchored or anchored searches, respectively. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MatchError(alloc::boxed::Box); + +impl MatchError { + /// Create a new error value with the given kind. + /// + /// This is a more verbose version of the kind-specific constructors, e.g., + /// `MatchError::unsupported_stream`. + pub fn new(kind: MatchErrorKind) -> MatchError { + MatchError(alloc::boxed::Box::new(kind)) + } + + /// Returns a reference to the underlying error kind. + pub fn kind(&self) -> &MatchErrorKind { + &self.0 + } + + /// Create a new "invalid anchored search" error. This occurs when the + /// caller requests an anchored search but where anchored searches aren't + /// supported. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::InvalidInputAnchored`] kind. + pub fn invalid_input_anchored() -> MatchError { + MatchError::new(MatchErrorKind::InvalidInputAnchored) + } + + /// Create a new "invalid unanchored search" error. This occurs when the + /// caller requests an unanchored search but where unanchored searches + /// aren't supported. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::InvalidInputUnanchored`] kind. + pub fn invalid_input_unanchored() -> MatchError { + MatchError::new(MatchErrorKind::InvalidInputUnanchored) + } + + /// Create a new "unsupported stream search" error. This occurs when the + /// caller requests a stream search while using an Aho-Corasick automaton + /// with a match kind other than [`MatchKind::Standard`]. + /// + /// The match kind given should be the match kind of the automaton. It + /// should never be `MatchKind::Standard`. + pub fn unsupported_stream(got: MatchKind) -> MatchError { + MatchError::new(MatchErrorKind::UnsupportedStream { got }) + } + + /// Create a new "unsupported overlapping search" error. This occurs when + /// the caller requests an overlapping search while using an Aho-Corasick + /// automaton with a match kind other than [`MatchKind::Standard`]. + /// + /// The match kind given should be the match kind of the automaton. It + /// should never be `MatchKind::Standard`. + pub fn unsupported_overlapping(got: MatchKind) -> MatchError { + MatchError::new(MatchErrorKind::UnsupportedOverlapping { got }) + } + + /// Create a new "unsupported empty pattern" error. This occurs when the + /// caller requests a search for which matching an automaton that contains + /// an empty pattern string is not supported. + pub fn unsupported_empty() -> MatchError { + MatchError::new(MatchErrorKind::UnsupportedEmpty) + } +} + +/// The underlying kind of a [`MatchError`]. +/// +/// This is a **non-exhaustive** enum. That means new variants may be added in +/// a semver-compatible release. +#[non_exhaustive] +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum MatchErrorKind { + /// An error indicating that an anchored search was requested, but from a + /// searcher that was built without anchored support. + InvalidInputAnchored, + /// An error indicating that an unanchored search was requested, but from a + /// searcher that was built without unanchored support. + InvalidInputUnanchored, + /// An error indicating that a stream search was attempted on an + /// Aho-Corasick automaton with an unsupported `MatchKind`. + UnsupportedStream { + /// The match semantics for the automaton that was used. + got: MatchKind, + }, + /// An error indicating that an overlapping search was attempted on an + /// Aho-Corasick automaton with an unsupported `MatchKind`. + UnsupportedOverlapping { + /// The match semantics for the automaton that was used. + got: MatchKind, + }, + /// An error indicating that the operation requested doesn't support + /// automatons that contain an empty pattern string. + UnsupportedEmpty, +} + +#[cfg(feature = "std")] +impl std::error::Error for MatchError {} + +impl core::fmt::Display for MatchError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match *self.kind() { + MatchErrorKind::InvalidInputAnchored => { + write!(f, "anchored searches are not supported or enabled") + } + MatchErrorKind::InvalidInputUnanchored => { + write!(f, "unanchored searches are not supported or enabled") + } + MatchErrorKind::UnsupportedStream { got } => { + write!( + f, + "match kind {:?} does not support stream searching", + got, + ) + } + MatchErrorKind::UnsupportedOverlapping { got } => { + write!( + f, + "match kind {:?} does not support overlapping searches", + got, + ) + } + MatchErrorKind::UnsupportedEmpty => { + write!( + f, + "matching with an empty pattern string is not \ + supported for this operation", + ) + } + } + } +} diff --git a/vendor/aho-corasick/src/util/int.rs b/vendor/aho-corasick/src/util/int.rs new file mode 100644 index 0000000..28ede7a --- /dev/null +++ b/vendor/aho-corasick/src/util/int.rs @@ -0,0 +1,284 @@ +/*! +This module provides several integer oriented traits for converting between +both fixed size integers and integers whose size varies based on the target +(like `usize`). + +The main design principle for this module is to centralize all uses of `as`. +The thinking here is that `as` makes it very easy to perform accidental lossy +conversions, and if we centralize all its uses here under more descriptive +higher level operations, its use and correctness becomes easier to audit. + +This was copied mostly wholesale from `regex-automata`. + +NOTE: for simplicity, we don't take target pointer width into account here for +`usize` conversions. Since we currently only panic in debug mode, skipping the +check when it can be proven it isn't needed at compile time doesn't really +matter. Now, if we wind up wanting to do as many checks as possible in release +mode, then we would want to skip those when we know the conversions are always +non-lossy. +*/ + +pub(crate) trait U8 { + fn as_usize(self) -> usize; +} + +impl U8 for u8 { + fn as_usize(self) -> usize { + usize::from(self) + } +} + +pub(crate) trait U16 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn high_u8(self) -> u8; +} + +impl U16 for u16 { + fn as_usize(self) -> usize { + usize::from(self) + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn high_u8(self) -> u8 { + (self >> 8) as u8 + } +} + +pub(crate) trait U32 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn low_u16(self) -> u16; + fn high_u16(self) -> u16; +} + +impl U32 for u32 { + #[inline] + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("u32 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn low_u16(self) -> u16 { + self as u16 + } + + fn high_u16(self) -> u16 { + (self >> 16) as u16 + } +} + +pub(crate) trait U64 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn low_u16(self) -> u16; + fn low_u32(self) -> u32; + fn high_u32(self) -> u32; +} + +impl U64 for u64 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("u64 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn low_u16(self) -> u16 { + self as u16 + } + + fn low_u32(self) -> u32 { + self as u32 + } + + fn high_u32(self) -> u32 { + (self >> 32) as u32 + } +} + +pub(crate) trait I8 { + fn as_usize(self) -> usize; + fn to_bits(self) -> u8; + fn from_bits(n: u8) -> i8; +} + +impl I8 for i8 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("i8 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn to_bits(self) -> u8 { + self as u8 + } + + fn from_bits(n: u8) -> i8 { + n as i8 + } +} + +pub(crate) trait I32 { + fn as_usize(self) -> usize; + fn to_bits(self) -> u32; + fn from_bits(n: u32) -> i32; +} + +impl I32 for i32 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("i32 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn to_bits(self) -> u32 { + self as u32 + } + + fn from_bits(n: u32) -> i32 { + n as i32 + } +} + +pub(crate) trait I64 { + fn as_usize(self) -> usize; + fn to_bits(self) -> u64; + fn from_bits(n: u64) -> i64; +} + +impl I64 for i64 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("i64 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn to_bits(self) -> u64 { + self as u64 + } + + fn from_bits(n: u64) -> i64 { + n as i64 + } +} + +pub(crate) trait Usize { + fn as_u8(self) -> u8; + fn as_u16(self) -> u16; + fn as_u32(self) -> u32; + fn as_u64(self) -> u64; +} + +impl Usize for usize { + fn as_u8(self) -> u8 { + #[cfg(debug_assertions)] + { + u8::try_from(self).expect("usize overflowed u8") + } + #[cfg(not(debug_assertions))] + { + self as u8 + } + } + + fn as_u16(self) -> u16 { + #[cfg(debug_assertions)] + { + u16::try_from(self).expect("usize overflowed u16") + } + #[cfg(not(debug_assertions))] + { + self as u16 + } + } + + fn as_u32(self) -> u32 { + #[cfg(debug_assertions)] + { + u32::try_from(self).expect("usize overflowed u32") + } + #[cfg(not(debug_assertions))] + { + self as u32 + } + } + + fn as_u64(self) -> u64 { + #[cfg(debug_assertions)] + { + u64::try_from(self).expect("usize overflowed u64") + } + #[cfg(not(debug_assertions))] + { + self as u64 + } + } +} + +// Pointers aren't integers, but we convert pointers to integers to perform +// offset arithmetic in some places. (And no, we don't convert the integers +// back to pointers.) So add 'as_usize' conversions here too for completeness. +// +// These 'as' casts are actually okay because they're always non-lossy. But the +// idea here is to just try and remove as much 'as' as possible, particularly +// in this crate where we are being really paranoid about offsets and making +// sure we don't panic on inputs that might be untrusted. This way, the 'as' +// casts become easier to audit if they're all in one place, even when some of +// them are actually okay 100% of the time. + +pub(crate) trait Pointer { + fn as_usize(self) -> usize; +} + +impl Pointer for *const T { + fn as_usize(self) -> usize { + self as usize + } +} + +pub(crate) trait PointerMut { + fn as_usize(self) -> usize; +} + +impl PointerMut for *mut T { + fn as_usize(self) -> usize { + self as usize + } +} diff --git a/vendor/aho-corasick/src/util/mod.rs b/vendor/aho-corasick/src/util/mod.rs new file mode 100644 index 0000000..f7a1ddd --- /dev/null +++ b/vendor/aho-corasick/src/util/mod.rs @@ -0,0 +1,12 @@ +pub(crate) mod alphabet; +#[cfg(feature = "std")] +pub(crate) mod buffer; +pub(crate) mod byte_frequencies; +pub(crate) mod debug; +pub(crate) mod error; +pub(crate) mod int; +pub(crate) mod prefilter; +pub(crate) mod primitives; +pub(crate) mod remapper; +pub(crate) mod search; +pub(crate) mod special; diff --git a/vendor/aho-corasick/src/util/prefilter.rs b/vendor/aho-corasick/src/util/prefilter.rs new file mode 100644 index 0000000..f5ddc75 --- /dev/null +++ b/vendor/aho-corasick/src/util/prefilter.rs @@ -0,0 +1,924 @@ +use core::{ + cmp, + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, + u8, +}; + +use alloc::{sync::Arc, vec, vec::Vec}; + +use crate::{ + packed, + util::{ + alphabet::ByteSet, + search::{Match, MatchKind, Span}, + }, +}; + +/// A prefilter for accelerating a search. +/// +/// This crate uses prefilters in the core search implementations to accelerate +/// common cases. They typically only apply to cases where there are a small +/// number of patterns (less than 100 or so), but when they do, thoughput can +/// be boosted considerably, perhaps by an order of magnitude. When a prefilter +/// is active, it is used whenever a search enters an automaton's start state. +/// +/// Currently, prefilters cannot be constructed by +/// callers. A `Prefilter` can only be accessed via the +/// [`Automaton::prefilter`](crate::automaton::Automaton::prefilter) +/// method and used to execute a search. In other words, a prefilter can be +/// used to optimize your own search implementation if necessary, but cannot do +/// much else. If you have a use case for more APIs, please submit an issue. +#[derive(Clone, Debug)] +pub struct Prefilter { + finder: Arc, + memory_usage: usize, +} + +impl Prefilter { + /// Execute a search in the haystack within the span given. If a match or + /// a possible match is returned, then it is guaranteed to occur within + /// the bounds of the span. + /// + /// If the span provided is invalid for the given haystack, then behavior + /// is unspecified. + #[inline] + pub fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + self.finder.find_in(haystack, span) + } + + #[inline] + pub(crate) fn memory_usage(&self) -> usize { + self.memory_usage + } +} + +/// A candidate is the result of running a prefilter on a haystack at a +/// particular position. +/// +/// The result is either no match, a confirmed match or a possible match. +/// +/// When no match is returned, the prefilter is guaranteeing that no possible +/// match can be found in the haystack, and the caller may trust this. That is, +/// all correct prefilters must never report false negatives. +/// +/// In some cases, a prefilter can confirm a match very quickly, in which case, +/// the caller may use this to stop what it's doing and report the match. In +/// this case, prefilter implementations must never report a false positive. +/// In other cases, the prefilter can only report a potential match, in which +/// case the callers must attempt to confirm the match. In this case, prefilter +/// implementations are permitted to return false positives. +#[derive(Clone, Debug)] +pub enum Candidate { + /// No match was found. Since false negatives are not possible, this means + /// the search can quit as it is guaranteed not to find another match. + None, + /// A confirmed match was found. Callers do not need to confirm it. + Match(Match), + /// The start of a possible match was found. Callers must confirm it before + /// reporting it as a match. + PossibleStartOfMatch(usize), +} + +impl Candidate { + /// Convert this candidate into an option. This is useful when callers + /// do not distinguish between true positives and false positives (i.e., + /// the caller must always confirm the match). + pub fn into_option(self) -> Option { + match self { + Candidate::None => None, + Candidate::Match(ref m) => Some(m.start()), + Candidate::PossibleStartOfMatch(start) => Some(start), + } + } +} + +/// A prefilter describes the behavior of fast literal scanners for quickly +/// skipping past bytes in the haystack that we know cannot possibly +/// participate in a match. +trait PrefilterI: + Send + Sync + RefUnwindSafe + UnwindSafe + Debug + 'static +{ + /// Returns the next possible match candidate. This may yield false + /// positives, so callers must confirm a match starting at the position + /// returned. This, however, must never produce false negatives. That is, + /// this must, at minimum, return the starting position of the next match + /// in the given haystack after or at the given position. + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate; +} + +impl PrefilterI for Arc

{ + #[inline(always)] + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + (**self).find_in(haystack, span) + } +} + +/// A builder for constructing the best possible prefilter. When constructed, +/// this builder will heuristically select the best prefilter it can build, +/// if any, and discard the rest. +#[derive(Debug)] +pub(crate) struct Builder { + count: usize, + ascii_case_insensitive: bool, + start_bytes: StartBytesBuilder, + rare_bytes: RareBytesBuilder, + memmem: MemmemBuilder, + packed: Option, + // If we run across a condition that suggests we shouldn't use a prefilter + // at all (like an empty pattern), then disable prefilters entirely. + enabled: bool, +} + +impl Builder { + /// Create a new builder for constructing the best possible prefilter. + pub(crate) fn new(kind: MatchKind) -> Builder { + let pbuilder = kind + .as_packed() + .map(|kind| packed::Config::new().match_kind(kind).builder()); + Builder { + count: 0, + ascii_case_insensitive: false, + start_bytes: StartBytesBuilder::new(), + rare_bytes: RareBytesBuilder::new(), + memmem: MemmemBuilder::default(), + packed: pbuilder, + enabled: true, + } + } + + /// Enable ASCII case insensitivity. When set, byte strings added to this + /// builder will be interpreted without respect to ASCII case. + pub(crate) fn ascii_case_insensitive(mut self, yes: bool) -> Builder { + self.ascii_case_insensitive = yes; + self.start_bytes = self.start_bytes.ascii_case_insensitive(yes); + self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes); + self + } + + /// Return a prefilter suitable for quickly finding potential matches. + /// + /// All patterns added to an Aho-Corasick automaton should be added to this + /// builder before attempting to construct the prefilter. + pub(crate) fn build(&self) -> Option { + if !self.enabled { + debug!("prefilter not enabled, skipping"); + return None; + } + // If we only have one pattern, then deferring to memmem is always + // the best choice. This is kind of a weird case, because, well, why + // use Aho-Corasick if you only have one pattern? But maybe you don't + // know exactly how many patterns you'll get up front, and you need to + // support the option of multiple patterns. So instead of relying on + // the caller to branch and use memmem explicitly, we just do it for + // them. + if !self.ascii_case_insensitive { + if let Some(pre) = self.memmem.build() { + debug!("using memmem prefilter"); + return Some(pre); + } + } + let (packed, patlen, minlen) = if self.ascii_case_insensitive { + (None, usize::MAX, 0) + } else { + let patlen = self.packed.as_ref().map_or(usize::MAX, |p| p.len()); + let minlen = self.packed.as_ref().map_or(0, |p| p.minimum_len()); + let packed = + self.packed.as_ref().and_then(|b| b.build()).map(|s| { + let memory_usage = s.memory_usage(); + debug!( + "built packed prefilter (len: {}, \ + minimum pattern len: {}, memory usage: {}) \ + for consideration", + patlen, minlen, memory_usage, + ); + Prefilter { finder: Arc::new(Packed(s)), memory_usage } + }); + (packed, patlen, minlen) + }; + match (self.start_bytes.build(), self.rare_bytes.build()) { + // If we could build both start and rare prefilters, then there are + // a few cases in which we'd want to use the start-byte prefilter + // over the rare-byte prefilter, since the former has lower + // overhead. + (prestart @ Some(_), prerare @ Some(_)) => { + debug!( + "both start (len={}, rank={}) and \ + rare (len={}, rank={}) byte prefilters \ + are available", + self.start_bytes.count, + self.start_bytes.rank_sum, + self.rare_bytes.count, + self.rare_bytes.rank_sum, + ); + if patlen <= 16 + && minlen >= 2 + && self.start_bytes.count >= 3 + && self.rare_bytes.count >= 3 + { + debug!( + "start and rare byte prefilters available, but \ + they're probably slower than packed so using \ + packed" + ); + return packed; + } + // If the start-byte prefilter can scan for a smaller number + // of bytes than the rare-byte prefilter, then it's probably + // faster. + let has_fewer_bytes = + self.start_bytes.count < self.rare_bytes.count; + // Otherwise, if the combined frequency rank of the detected + // bytes in the start-byte prefilter is "close" to the combined + // frequency rank of the rare-byte prefilter, then we pick + // the start-byte prefilter even if the rare-byte prefilter + // heuristically searches for rare bytes. This is because the + // rare-byte prefilter has higher constant costs, so we tend to + // prefer the start-byte prefilter when we can. + let has_rarer_bytes = + self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50; + if has_fewer_bytes { + debug!( + "using start byte prefilter because it has fewer + bytes to search for than the rare byte prefilter", + ); + prestart + } else if has_rarer_bytes { + debug!( + "using start byte prefilter because its byte \ + frequency rank was determined to be \ + \"good enough\" relative to the rare byte prefilter \ + byte frequency rank", + ); + prestart + } else { + debug!("using rare byte prefilter"); + prerare + } + } + (prestart @ Some(_), None) => { + if patlen <= 16 && minlen >= 2 && self.start_bytes.count >= 3 { + debug!( + "start byte prefilter available, but \ + it's probably slower than packed so using \ + packed" + ); + return packed; + } + debug!( + "have start byte prefilter but not rare byte prefilter, \ + so using start byte prefilter", + ); + prestart + } + (None, prerare @ Some(_)) => { + if patlen <= 16 && minlen >= 2 && self.rare_bytes.count >= 3 { + debug!( + "rare byte prefilter available, but \ + it's probably slower than packed so using \ + packed" + ); + return packed; + } + debug!( + "have rare byte prefilter but not start byte prefilter, \ + so using rare byte prefilter", + ); + prerare + } + (None, None) if self.ascii_case_insensitive => { + debug!( + "no start or rare byte prefilter and ASCII case \ + insensitivity was enabled, so skipping prefilter", + ); + None + } + (None, None) => { + if packed.is_some() { + debug!("falling back to packed prefilter"); + } else { + debug!("no prefilter available"); + } + packed + } + } + } + + /// Add a literal string to this prefilter builder. + pub(crate) fn add(&mut self, bytes: &[u8]) { + if bytes.is_empty() { + self.enabled = false; + } + if !self.enabled { + return; + } + self.count += 1; + self.start_bytes.add(bytes); + self.rare_bytes.add(bytes); + self.memmem.add(bytes); + if let Some(ref mut pbuilder) = self.packed { + pbuilder.add(bytes); + } + } +} + +/// A type that wraps a packed searcher and implements the `Prefilter` +/// interface. +#[derive(Clone, Debug)] +struct Packed(packed::Searcher); + +impl PrefilterI for Packed { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + self.0 + .find_in(&haystack, span) + .map_or(Candidate::None, Candidate::Match) + } +} + +/// A builder for constructing a prefilter that uses memmem. +#[derive(Debug, Default)] +struct MemmemBuilder { + /// The number of patterns that have been added. + count: usize, + /// The singular pattern to search for. This is only set when count==1. + one: Option>, +} + +impl MemmemBuilder { + fn build(&self) -> Option { + #[cfg(all(feature = "std", feature = "perf-literal"))] + fn imp(builder: &MemmemBuilder) -> Option { + let pattern = builder.one.as_ref()?; + assert_eq!(1, builder.count); + let finder = Arc::new(Memmem( + memchr::memmem::Finder::new(pattern).into_owned(), + )); + let memory_usage = pattern.len(); + Some(Prefilter { finder, memory_usage }) + } + + #[cfg(not(all(feature = "std", feature = "perf-literal")))] + fn imp(_: &MemmemBuilder) -> Option { + None + } + + imp(self) + } + + fn add(&mut self, bytes: &[u8]) { + self.count += 1; + if self.count == 1 { + self.one = Some(bytes.to_vec()); + } else { + self.one = None; + } + } +} + +/// A type that wraps a SIMD accelerated single substring search from the +/// `memchr` crate for use as a prefilter. +/// +/// Currently, this prefilter is only active for Aho-Corasick searchers with +/// a single pattern. In theory, this could be extended to support searchers +/// that have a common prefix of more than one byte (for one byte, we would use +/// memchr), but it's not clear if it's worth it or not. +/// +/// Also, unfortunately, this currently also requires the 'std' feature to +/// be enabled. That's because memchr doesn't have a no-std-but-with-alloc +/// mode, and so APIs like Finder::into_owned aren't available when 'std' is +/// disabled. But there should be an 'alloc' feature that brings in APIs like +/// Finder::into_owned but doesn't use std-only features like runtime CPU +/// feature detection. +#[cfg(all(feature = "std", feature = "perf-literal"))] +#[derive(Clone, Debug)] +struct Memmem(memchr::memmem::Finder<'static>); + +#[cfg(all(feature = "std", feature = "perf-literal"))] +impl PrefilterI for Memmem { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + use crate::util::primitives::PatternID; + + self.0.find(&haystack[span]).map_or(Candidate::None, |i| { + let start = span.start + i; + let end = start + self.0.needle().len(); + // N.B. We can declare a match and use a fixed pattern ID here + // because a Memmem prefilter is only ever created for searchers + // with exactly one pattern. Thus, every match is always a match + // and it is always for the first and only pattern. + Candidate::Match(Match::new(PatternID::ZERO, start..end)) + }) + } +} + +/// A builder for constructing a rare byte prefilter. +/// +/// A rare byte prefilter attempts to pick out a small set of rare bytes that +/// occurr in the patterns, and then quickly scan to matches of those rare +/// bytes. +#[derive(Clone, Debug)] +struct RareBytesBuilder { + /// Whether this prefilter should account for ASCII case insensitivity or + /// not. + ascii_case_insensitive: bool, + /// A set of rare bytes, indexed by byte value. + rare_set: ByteSet, + /// A set of byte offsets associated with bytes in a pattern. An entry + /// corresponds to a particular bytes (its index) and is only non-zero if + /// the byte occurred at an offset greater than 0 in at least one pattern. + /// + /// If a byte's offset is not representable in 8 bits, then the rare bytes + /// prefilter becomes inert. + byte_offsets: RareByteOffsets, + /// Whether this is available as a prefilter or not. This can be set to + /// false during construction if a condition is seen that invalidates the + /// use of the rare-byte prefilter. + available: bool, + /// The number of bytes set to an active value in `byte_offsets`. + count: usize, + /// The sum of frequency ranks for the rare bytes detected. This is + /// intended to give a heuristic notion of how rare the bytes are. + rank_sum: u16, +} + +/// A set of byte offsets, keyed by byte. +#[derive(Clone, Copy)] +struct RareByteOffsets { + /// Each entry corresponds to the maximum offset of the corresponding + /// byte across all patterns seen. + set: [RareByteOffset; 256], +} + +impl RareByteOffsets { + /// Create a new empty set of rare byte offsets. + pub(crate) fn empty() -> RareByteOffsets { + RareByteOffsets { set: [RareByteOffset::default(); 256] } + } + + /// Add the given offset for the given byte to this set. If the offset is + /// greater than the existing offset, then it overwrites the previous + /// value and returns false. If there is no previous value set, then this + /// sets it and returns true. + pub(crate) fn set(&mut self, byte: u8, off: RareByteOffset) { + self.set[byte as usize].max = + cmp::max(self.set[byte as usize].max, off.max); + } +} + +impl core::fmt::Debug for RareByteOffsets { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut offsets = vec![]; + for off in self.set.iter() { + if off.max > 0 { + offsets.push(off); + } + } + f.debug_struct("RareByteOffsets").field("set", &offsets).finish() + } +} + +/// Offsets associated with an occurrence of a "rare" byte in any of the +/// patterns used to construct a single Aho-Corasick automaton. +#[derive(Clone, Copy, Debug)] +struct RareByteOffset { + /// The maximum offset at which a particular byte occurs from the start + /// of any pattern. This is used as a shift amount. That is, when an + /// occurrence of this byte is found, the candidate position reported by + /// the prefilter is `position_of_byte - max`, such that the automaton + /// will begin its search at a position that is guaranteed to observe a + /// match. + /// + /// To avoid accidentally quadratic behavior, a prefilter is considered + /// ineffective when it is asked to start scanning from a position that it + /// has already scanned past. + /// + /// Using a `u8` here means that if we ever see a pattern that's longer + /// than 255 bytes, then the entire rare byte prefilter is disabled. + max: u8, +} + +impl Default for RareByteOffset { + fn default() -> RareByteOffset { + RareByteOffset { max: 0 } + } +} + +impl RareByteOffset { + /// Create a new rare byte offset. If the given offset is too big, then + /// None is returned. In that case, callers should render the rare bytes + /// prefilter inert. + fn new(max: usize) -> Option { + if max > u8::MAX as usize { + None + } else { + Some(RareByteOffset { max: max as u8 }) + } + } +} + +impl RareBytesBuilder { + /// Create a new builder for constructing a rare byte prefilter. + fn new() -> RareBytesBuilder { + RareBytesBuilder { + ascii_case_insensitive: false, + rare_set: ByteSet::empty(), + byte_offsets: RareByteOffsets::empty(), + available: true, + count: 0, + rank_sum: 0, + } + } + + /// Enable ASCII case insensitivity. When set, byte strings added to this + /// builder will be interpreted without respect to ASCII case. + fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder { + self.ascii_case_insensitive = yes; + self + } + + /// Build the rare bytes prefilter. + /// + /// If there are more than 3 distinct rare bytes found, or if heuristics + /// otherwise determine that this prefilter should not be used, then `None` + /// is returned. + fn build(&self) -> Option { + #[cfg(feature = "perf-literal")] + fn imp(builder: &RareBytesBuilder) -> Option { + if !builder.available || builder.count > 3 { + return None; + } + let (mut bytes, mut len) = ([0; 3], 0); + for b in 0..=255 { + if builder.rare_set.contains(b) { + bytes[len] = b as u8; + len += 1; + } + } + let finder: Arc = match len { + 0 => return None, + 1 => Arc::new(RareBytesOne { + byte1: bytes[0], + offset: builder.byte_offsets.set[bytes[0] as usize], + }), + 2 => Arc::new(RareBytesTwo { + offsets: builder.byte_offsets, + byte1: bytes[0], + byte2: bytes[1], + }), + 3 => Arc::new(RareBytesThree { + offsets: builder.byte_offsets, + byte1: bytes[0], + byte2: bytes[1], + byte3: bytes[2], + }), + _ => unreachable!(), + }; + Some(Prefilter { finder, memory_usage: 0 }) + } + + #[cfg(not(feature = "perf-literal"))] + fn imp(_: &RareBytesBuilder) -> Option { + None + } + + imp(self) + } + + /// Add a byte string to this builder. + /// + /// All patterns added to an Aho-Corasick automaton should be added to this + /// builder before attempting to construct the prefilter. + fn add(&mut self, bytes: &[u8]) { + // If we've already given up, then do nothing. + if !self.available { + return; + } + // If we've already blown our budget, then don't waste time looking + // for more rare bytes. + if self.count > 3 { + self.available = false; + return; + } + // If the pattern is too long, then our offset table is bunk, so + // give up. + if bytes.len() >= 256 { + self.available = false; + return; + } + let mut rarest = match bytes.get(0) { + None => return, + Some(&b) => (b, freq_rank(b)), + }; + // The idea here is to look for the rarest byte in each pattern, and + // add that to our set. As a special exception, if we see a byte that + // we've already added, then we immediately stop and choose that byte, + // even if there's another rare byte in the pattern. This helps us + // apply the rare byte optimization in more cases by attempting to pick + // bytes that are in common between patterns. So for example, if we + // were searching for `Sherlock` and `lockjaw`, then this would pick + // `k` for both patterns, resulting in the use of `memchr` instead of + // `memchr2` for `k` and `j`. + let mut found = false; + for (pos, &b) in bytes.iter().enumerate() { + self.set_offset(pos, b); + if found { + continue; + } + if self.rare_set.contains(b) { + found = true; + continue; + } + let rank = freq_rank(b); + if rank < rarest.1 { + rarest = (b, rank); + } + } + if !found { + self.add_rare_byte(rarest.0); + } + } + + fn set_offset(&mut self, pos: usize, byte: u8) { + // This unwrap is OK because pos is never bigger than our max. + let offset = RareByteOffset::new(pos).unwrap(); + self.byte_offsets.set(byte, offset); + if self.ascii_case_insensitive { + self.byte_offsets.set(opposite_ascii_case(byte), offset); + } + } + + fn add_rare_byte(&mut self, byte: u8) { + self.add_one_rare_byte(byte); + if self.ascii_case_insensitive { + self.add_one_rare_byte(opposite_ascii_case(byte)); + } + } + + fn add_one_rare_byte(&mut self, byte: u8) { + if !self.rare_set.contains(byte) { + self.rare_set.add(byte); + self.count += 1; + self.rank_sum += freq_rank(byte) as u16; + } + } +} + +/// A prefilter for scanning for a single "rare" byte. +#[cfg(feature = "perf-literal")] +#[derive(Clone, Debug)] +struct RareBytesOne { + byte1: u8, + offset: RareByteOffset, +} + +#[cfg(feature = "perf-literal")] +impl PrefilterI for RareBytesOne { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + memchr::memchr(self.byte1, &haystack[span]) + .map(|i| { + let pos = span.start + i; + cmp::max( + span.start, + pos.saturating_sub(usize::from(self.offset.max)), + ) + }) + .map_or(Candidate::None, Candidate::PossibleStartOfMatch) + } +} + +/// A prefilter for scanning for two "rare" bytes. +#[cfg(feature = "perf-literal")] +#[derive(Clone, Debug)] +struct RareBytesTwo { + offsets: RareByteOffsets, + byte1: u8, + byte2: u8, +} + +#[cfg(feature = "perf-literal")] +impl PrefilterI for RareBytesTwo { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + memchr::memchr2(self.byte1, self.byte2, &haystack[span]) + .map(|i| { + let pos = span.start + i; + let offset = self.offsets.set[usize::from(haystack[pos])].max; + cmp::max(span.start, pos.saturating_sub(usize::from(offset))) + }) + .map_or(Candidate::None, Candidate::PossibleStartOfMatch) + } +} + +/// A prefilter for scanning for three "rare" bytes. +#[cfg(feature = "perf-literal")] +#[derive(Clone, Debug)] +struct RareBytesThree { + offsets: RareByteOffsets, + byte1: u8, + byte2: u8, + byte3: u8, +} + +#[cfg(feature = "perf-literal")] +impl PrefilterI for RareBytesThree { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span]) + .map(|i| { + let pos = span.start + i; + let offset = self.offsets.set[usize::from(haystack[pos])].max; + cmp::max(span.start, pos.saturating_sub(usize::from(offset))) + }) + .map_or(Candidate::None, Candidate::PossibleStartOfMatch) + } +} + +/// A builder for constructing a starting byte prefilter. +/// +/// A starting byte prefilter is a simplistic prefilter that looks for possible +/// matches by reporting all positions corresponding to a particular byte. This +/// generally only takes affect when there are at most 3 distinct possible +/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two +/// distinct starting bytes (`f` and `b`), and this prefilter returns all +/// occurrences of either `f` or `b`. +/// +/// In some cases, a heuristic frequency analysis may determine that it would +/// be better not to use this prefilter even when there are 3 or fewer distinct +/// starting bytes. +#[derive(Clone, Debug)] +struct StartBytesBuilder { + /// Whether this prefilter should account for ASCII case insensitivity or + /// not. + ascii_case_insensitive: bool, + /// The set of starting bytes observed. + byteset: Vec, + /// The number of bytes set to true in `byteset`. + count: usize, + /// The sum of frequency ranks for the rare bytes detected. This is + /// intended to give a heuristic notion of how rare the bytes are. + rank_sum: u16, +} + +impl StartBytesBuilder { + /// Create a new builder for constructing a start byte prefilter. + fn new() -> StartBytesBuilder { + StartBytesBuilder { + ascii_case_insensitive: false, + byteset: vec![false; 256], + count: 0, + rank_sum: 0, + } + } + + /// Enable ASCII case insensitivity. When set, byte strings added to this + /// builder will be interpreted without respect to ASCII case. + fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder { + self.ascii_case_insensitive = yes; + self + } + + /// Build the starting bytes prefilter. + /// + /// If there are more than 3 distinct starting bytes, or if heuristics + /// otherwise determine that this prefilter should not be used, then `None` + /// is returned. + fn build(&self) -> Option { + #[cfg(feature = "perf-literal")] + fn imp(builder: &StartBytesBuilder) -> Option { + if builder.count > 3 { + return None; + } + let (mut bytes, mut len) = ([0; 3], 0); + for b in 0..256 { + if !builder.byteset[b] { + continue; + } + // We don't handle non-ASCII bytes for now. Getting non-ASCII + // bytes right is trickier, since we generally don't want to put + // a leading UTF-8 code unit into a prefilter that isn't ASCII, + // since they can frequently. Instead, it would be better to use a + // continuation byte, but this requires more sophisticated analysis + // of the automaton and a richer prefilter API. + if b > 0x7F { + return None; + } + bytes[len] = b as u8; + len += 1; + } + let finder: Arc = match len { + 0 => return None, + 1 => Arc::new(StartBytesOne { byte1: bytes[0] }), + 2 => Arc::new(StartBytesTwo { + byte1: bytes[0], + byte2: bytes[1], + }), + 3 => Arc::new(StartBytesThree { + byte1: bytes[0], + byte2: bytes[1], + byte3: bytes[2], + }), + _ => unreachable!(), + }; + Some(Prefilter { finder, memory_usage: 0 }) + } + + #[cfg(not(feature = "perf-literal"))] + fn imp(_: &StartBytesBuilder) -> Option { + None + } + + imp(self) + } + + /// Add a byte string to this builder. + /// + /// All patterns added to an Aho-Corasick automaton should be added to this + /// builder before attempting to construct the prefilter. + fn add(&mut self, bytes: &[u8]) { + if self.count > 3 { + return; + } + if let Some(&byte) = bytes.get(0) { + self.add_one_byte(byte); + if self.ascii_case_insensitive { + self.add_one_byte(opposite_ascii_case(byte)); + } + } + } + + fn add_one_byte(&mut self, byte: u8) { + if !self.byteset[byte as usize] { + self.byteset[byte as usize] = true; + self.count += 1; + self.rank_sum += freq_rank(byte) as u16; + } + } +} + +/// A prefilter for scanning for a single starting byte. +#[cfg(feature = "perf-literal")] +#[derive(Clone, Debug)] +struct StartBytesOne { + byte1: u8, +} + +#[cfg(feature = "perf-literal")] +impl PrefilterI for StartBytesOne { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + memchr::memchr(self.byte1, &haystack[span]) + .map(|i| span.start + i) + .map_or(Candidate::None, Candidate::PossibleStartOfMatch) + } +} + +/// A prefilter for scanning for two starting bytes. +#[cfg(feature = "perf-literal")] +#[derive(Clone, Debug)] +struct StartBytesTwo { + byte1: u8, + byte2: u8, +} + +#[cfg(feature = "perf-literal")] +impl PrefilterI for StartBytesTwo { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + memchr::memchr2(self.byte1, self.byte2, &haystack[span]) + .map(|i| span.start + i) + .map_or(Candidate::None, Candidate::PossibleStartOfMatch) + } +} + +/// A prefilter for scanning for three starting bytes. +#[cfg(feature = "perf-literal")] +#[derive(Clone, Debug)] +struct StartBytesThree { + byte1: u8, + byte2: u8, + byte3: u8, +} + +#[cfg(feature = "perf-literal")] +impl PrefilterI for StartBytesThree { + fn find_in(&self, haystack: &[u8], span: Span) -> Candidate { + memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span]) + .map(|i| span.start + i) + .map_or(Candidate::None, Candidate::PossibleStartOfMatch) + } +} + +/// If the given byte is an ASCII letter, then return it in the opposite case. +/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns +/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned. +pub(crate) fn opposite_ascii_case(b: u8) -> u8 { + if b'A' <= b && b <= b'Z' { + b.to_ascii_lowercase() + } else if b'a' <= b && b <= b'z' { + b.to_ascii_uppercase() + } else { + b + } +} + +/// Return the frequency rank of the given byte. The higher the rank, the more +/// common the byte (heuristically speaking). +fn freq_rank(b: u8) -> u8 { + use crate::util::byte_frequencies::BYTE_FREQUENCIES; + BYTE_FREQUENCIES[b as usize] +} diff --git a/vendor/aho-corasick/src/util/primitives.rs b/vendor/aho-corasick/src/util/primitives.rs new file mode 100644 index 0000000..784d397 --- /dev/null +++ b/vendor/aho-corasick/src/util/primitives.rs @@ -0,0 +1,759 @@ +/*! +Lower level primitive types that are useful in a variety of circumstances. + +# Overview + +This list represents the principle types in this module and briefly describes +when you might want to use them. + +* [`PatternID`] - A type that represents the identifier of a regex pattern. +This is probably the most widely used type in this module (which is why it's +also re-exported in the crate root). +* [`StateID`] - A type the represents the identifier of a finite automaton +state. This is used for both NFAs and DFAs, with the notable exception of +the hybrid NFA/DFA. (The hybrid NFA/DFA uses a special purpose "lazy" state +identifier.) +* [`SmallIndex`] - The internal representation of both a `PatternID` and a +`StateID`. Its purpose is to serve as a type that can index memory without +being as big as a `usize` on 64-bit targets. The main idea behind this type +is that there are many things in regex engines that will, in practice, never +overflow a 32-bit integer. (For example, like the number of patterns in a regex +or the number of states in an NFA.) Thus, a `SmallIndex` can be used to index +memory without peppering `as` casts everywhere. Moreover, it forces callers +to handle errors in the case where, somehow, the value would otherwise overflow +either a 32-bit integer or a `usize` (e.g., on 16-bit targets). +*/ + +// The macro we use to define some types below adds methods that we don't +// use on some of the types. There isn't much, so we just squash the warning. +#![allow(dead_code)] + +use alloc::vec::Vec; + +use crate::util::int::{Usize, U16, U32, U64}; + +/// A type that represents a "small" index. +/// +/// The main idea of this type is to provide something that can index memory, +/// but uses less memory than `usize` on 64-bit systems. Specifically, its +/// representation is always a `u32` and has `repr(transparent)` enabled. (So +/// it is safe to transmute between a `u32` and a `SmallIndex`.) +/// +/// A small index is typically useful in cases where there is no practical way +/// that the index will overflow a 32-bit integer. A good example of this is +/// an NFA state. If you could somehow build an NFA with `2^30` states, its +/// memory usage would be exorbitant and its runtime execution would be so +/// slow as to be completely worthless. Therefore, this crate generally deems +/// it acceptable to return an error if it would otherwise build an NFA that +/// requires a slice longer than what a 32-bit integer can index. In exchange, +/// we can use 32-bit indices instead of 64-bit indices in various places. +/// +/// This type ensures this by providing a constructor that will return an error +/// if its argument cannot fit into the type. This makes it much easier to +/// handle these sorts of boundary cases that are otherwise extremely subtle. +/// +/// On all targets, this type guarantees that its value will fit in a `u32`, +/// `i32`, `usize` and an `isize`. This means that on 16-bit targets, for +/// example, this type's maximum value will never overflow an `isize`, +/// which means it will never overflow a `i16` even though its internal +/// representation is still a `u32`. +/// +/// The purpose for making the type fit into even signed integer types like +/// `isize` is to guarantee that the difference between any two small indices +/// is itself also a small index. This is useful in certain contexts, e.g., +/// for delta encoding. +/// +/// # Other types +/// +/// The following types wrap `SmallIndex` to provide a more focused use case: +/// +/// * [`PatternID`] is for representing the identifiers of patterns. +/// * [`StateID`] is for representing the identifiers of states in finite +/// automata. It is used for both NFAs and DFAs. +/// +/// # Representation +/// +/// This type is always represented internally by a `u32` and is marked as +/// `repr(transparent)`. Thus, this type always has the same representation as +/// a `u32`. It is thus safe to transmute between a `u32` and a `SmallIndex`. +/// +/// # Indexing +/// +/// For convenience, callers may use a `SmallIndex` to index slices. +/// +/// # Safety +/// +/// While a `SmallIndex` is meant to guarantee that its value fits into `usize` +/// without using as much space as a `usize` on all targets, callers must +/// not rely on this property for safety. Callers may choose to rely on this +/// property for correctness however. For example, creating a `SmallIndex` with +/// an invalid value can be done in entirely safe code. This may in turn result +/// in panics or silent logical errors. +#[derive( + Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, +)] +#[repr(transparent)] +pub(crate) struct SmallIndex(u32); + +impl SmallIndex { + /// The maximum index value. + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + pub const MAX: SmallIndex = + // FIXME: Use as_usize() once const functions in traits are stable. + SmallIndex::new_unchecked(core::i32::MAX as usize - 1); + + /// The maximum index value. + #[cfg(target_pointer_width = "16")] + pub const MAX: SmallIndex = + SmallIndex::new_unchecked(core::isize::MAX - 1); + + /// The total number of values that can be represented as a small index. + pub const LIMIT: usize = SmallIndex::MAX.as_usize() + 1; + + /// The zero index value. + pub const ZERO: SmallIndex = SmallIndex::new_unchecked(0); + + /// The number of bytes that a single small index uses in memory. + pub const SIZE: usize = core::mem::size_of::(); + + /// Create a new small index. + /// + /// If the given index exceeds [`SmallIndex::MAX`], then this returns + /// an error. + #[inline] + pub fn new(index: usize) -> Result { + SmallIndex::try_from(index) + } + + /// Create a new small index without checking whether the given value + /// exceeds [`SmallIndex::MAX`]. + /// + /// Using this routine with an invalid index value will result in + /// unspecified behavior, but *not* undefined behavior. In particular, an + /// invalid index value is likely to cause panics or possibly even silent + /// logical errors. + /// + /// Callers must never rely on a `SmallIndex` to be within a certain range + /// for memory safety. + #[inline] + pub const fn new_unchecked(index: usize) -> SmallIndex { + // FIXME: Use as_u32() once const functions in traits are stable. + SmallIndex::from_u32_unchecked(index as u32) + } + + /// Create a new small index from a `u32` without checking whether the + /// given value exceeds [`SmallIndex::MAX`]. + /// + /// Using this routine with an invalid index value will result in + /// unspecified behavior, but *not* undefined behavior. In particular, an + /// invalid index value is likely to cause panics or possibly even silent + /// logical errors. + /// + /// Callers must never rely on a `SmallIndex` to be within a certain range + /// for memory safety. + #[inline] + pub const fn from_u32_unchecked(index: u32) -> SmallIndex { + SmallIndex(index) + } + + /// Like [`SmallIndex::new`], but panics if the given index is not valid. + #[inline] + pub fn must(index: usize) -> SmallIndex { + SmallIndex::new(index).expect("invalid small index") + } + + /// Return this small index as a `usize`. This is guaranteed to never + /// overflow `usize`. + #[inline] + pub const fn as_usize(&self) -> usize { + // FIXME: Use as_usize() once const functions in traits are stable. + self.0 as usize + } + + /// Return this small index as a `u64`. This is guaranteed to never + /// overflow. + #[inline] + pub const fn as_u64(&self) -> u64 { + // FIXME: Use u64::from() once const functions in traits are stable. + self.0 as u64 + } + + /// Return the internal `u32` of this small index. This is guaranteed to + /// never overflow `u32`. + #[inline] + pub const fn as_u32(&self) -> u32 { + self.0 + } + + /// Return the internal `u32` of this small index represented as an `i32`. + /// This is guaranteed to never overflow an `i32`. + #[inline] + pub const fn as_i32(&self) -> i32 { + // This is OK because we guarantee that our max value is <= i32::MAX. + self.0 as i32 + } + + /// Returns one more than this small index as a usize. + /// + /// Since a small index has constraints on its maximum value, adding `1` to + /// it will always fit in a `usize`, `isize`, `u32` and a `i32`. + #[inline] + pub fn one_more(&self) -> usize { + self.as_usize() + 1 + } + + /// Decode this small index from the bytes given using the native endian + /// byte order for the current target. + /// + /// If the decoded integer is not representable as a small index for the + /// current target, then this returns an error. + #[inline] + pub fn from_ne_bytes( + bytes: [u8; 4], + ) -> Result { + let id = u32::from_ne_bytes(bytes); + if id > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(id) }); + } + Ok(SmallIndex::new_unchecked(id.as_usize())) + } + + /// Decode this small index from the bytes given using the native endian + /// byte order for the current target. + /// + /// This is analogous to [`SmallIndex::new_unchecked`] in that is does not + /// check whether the decoded integer is representable as a small index. + #[inline] + pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> SmallIndex { + SmallIndex::new_unchecked(u32::from_ne_bytes(bytes).as_usize()) + } + + /// Return the underlying small index integer as raw bytes in native endian + /// format. + #[inline] + pub fn to_ne_bytes(&self) -> [u8; 4] { + self.0.to_ne_bytes() + } +} + +impl core::ops::Index for [T] { + type Output = T; + + #[inline] + fn index(&self, index: SmallIndex) -> &T { + &self[index.as_usize()] + } +} + +impl core::ops::IndexMut for [T] { + #[inline] + fn index_mut(&mut self, index: SmallIndex) -> &mut T { + &mut self[index.as_usize()] + } +} + +impl core::ops::Index for Vec { + type Output = T; + + #[inline] + fn index(&self, index: SmallIndex) -> &T { + &self[index.as_usize()] + } +} + +impl core::ops::IndexMut for Vec { + #[inline] + fn index_mut(&mut self, index: SmallIndex) -> &mut T { + &mut self[index.as_usize()] + } +} + +impl From for SmallIndex { + fn from(sid: StateID) -> SmallIndex { + sid.0 + } +} + +impl From for SmallIndex { + fn from(pid: PatternID) -> SmallIndex { + pid.0 + } +} + +impl From for SmallIndex { + fn from(index: u8) -> SmallIndex { + SmallIndex::new_unchecked(usize::from(index)) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u16) -> Result { + if u32::from(index) > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(index) }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u32) -> Result { + if index > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(index) }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u64) -> Result { + if index > SmallIndex::MAX.as_u64() { + return Err(SmallIndexError { attempted: index }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: usize) -> Result { + if index > SmallIndex::MAX.as_usize() { + return Err(SmallIndexError { attempted: index.as_u64() }); + } + Ok(SmallIndex::new_unchecked(index)) + } +} + +/// This error occurs when a small index could not be constructed. +/// +/// This occurs when given an integer exceeding the maximum small index value. +/// +/// When the `std` feature is enabled, this implements the `Error` trait. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmallIndexError { + attempted: u64, +} + +impl SmallIndexError { + /// Returns the value that could not be converted to a small index. + pub fn attempted(&self) -> u64 { + self.attempted + } +} + +#[cfg(feature = "std")] +impl std::error::Error for SmallIndexError {} + +impl core::fmt::Display for SmallIndexError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create small index from {:?}, which exceeds {:?}", + self.attempted(), + SmallIndex::MAX, + ) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct SmallIndexIter { + rng: core::ops::Range, +} + +impl Iterator for SmallIndexIter { + type Item = SmallIndex; + + fn next(&mut self) -> Option { + if self.rng.start >= self.rng.end { + return None; + } + let next_id = self.rng.start + 1; + let id = core::mem::replace(&mut self.rng.start, next_id); + // new_unchecked is OK since we asserted that the number of + // elements in this iterator will fit in an ID at construction. + Some(SmallIndex::new_unchecked(id)) + } +} + +macro_rules! index_type_impls { + ($name:ident, $err:ident, $iter:ident, $withiter:ident) => { + impl $name { + /// The maximum value. + pub const MAX: $name = $name(SmallIndex::MAX); + + /// The total number of values that can be represented. + pub const LIMIT: usize = SmallIndex::LIMIT; + + /// The zero value. + pub const ZERO: $name = $name(SmallIndex::ZERO); + + /// The number of bytes that a single value uses in memory. + pub const SIZE: usize = SmallIndex::SIZE; + + /// Create a new value that is represented by a "small index." + /// + /// If the given index exceeds the maximum allowed value, then this + /// returns an error. + #[inline] + pub fn new(value: usize) -> Result<$name, $err> { + SmallIndex::new(value).map($name).map_err($err) + } + + /// Create a new value without checking whether the given argument + /// exceeds the maximum. + /// + /// Using this routine with an invalid value will result in + /// unspecified behavior, but *not* undefined behavior. In + /// particular, an invalid ID value is likely to cause panics or + /// possibly even silent logical errors. + /// + /// Callers must never rely on this type to be within a certain + /// range for memory safety. + #[inline] + pub const fn new_unchecked(value: usize) -> $name { + $name(SmallIndex::new_unchecked(value)) + } + + /// Create a new value from a `u32` without checking whether the + /// given value exceeds the maximum. + /// + /// Using this routine with an invalid value will result in + /// unspecified behavior, but *not* undefined behavior. In + /// particular, an invalid ID value is likely to cause panics or + /// possibly even silent logical errors. + /// + /// Callers must never rely on this type to be within a certain + /// range for memory safety. + #[inline] + pub const fn from_u32_unchecked(index: u32) -> $name { + $name(SmallIndex::from_u32_unchecked(index)) + } + + /// Like `new`, but panics if the given value is not valid. + #[inline] + pub fn must(value: usize) -> $name { + $name::new(value).expect(concat!( + "invalid ", + stringify!($name), + " value" + )) + } + + /// Return the internal value as a `usize`. This is guaranteed to + /// never overflow `usize`. + #[inline] + pub const fn as_usize(&self) -> usize { + self.0.as_usize() + } + + /// Return the internal value as a `u64`. This is guaranteed to + /// never overflow. + #[inline] + pub const fn as_u64(&self) -> u64 { + self.0.as_u64() + } + + /// Return the internal value as a `u32`. This is guaranteed to + /// never overflow `u32`. + #[inline] + pub const fn as_u32(&self) -> u32 { + self.0.as_u32() + } + + /// Return the internal value as a `i32`. This is guaranteed to + /// never overflow an `i32`. + #[inline] + pub const fn as_i32(&self) -> i32 { + self.0.as_i32() + } + + /// Returns one more than this value as a usize. + /// + /// Since values represented by a "small index" have constraints + /// on their maximum value, adding `1` to it will always fit in a + /// `usize`, `u32` and a `i32`. + #[inline] + pub fn one_more(&self) -> usize { + self.0.one_more() + } + + /// Decode this value from the bytes given using the native endian + /// byte order for the current target. + /// + /// If the decoded integer is not representable as a small index + /// for the current target, then this returns an error. + #[inline] + pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<$name, $err> { + SmallIndex::from_ne_bytes(bytes).map($name).map_err($err) + } + + /// Decode this value from the bytes given using the native endian + /// byte order for the current target. + /// + /// This is analogous to `new_unchecked` in that is does not check + /// whether the decoded integer is representable as a small index. + #[inline] + pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> $name { + $name(SmallIndex::from_ne_bytes_unchecked(bytes)) + } + + /// Return the underlying integer as raw bytes in native endian + /// format. + #[inline] + pub fn to_ne_bytes(&self) -> [u8; 4] { + self.0.to_ne_bytes() + } + + /// Returns an iterator over all values from 0 up to and not + /// including the given length. + /// + /// If the given length exceeds this type's limit, then this + /// panics. + pub(crate) fn iter(len: usize) -> $iter { + $iter::new(len) + } + } + + // We write our own Debug impl so that we get things like PatternID(5) + // instead of PatternID(SmallIndex(5)). + impl core::fmt::Debug for $name { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple(stringify!($name)).field(&self.as_u32()).finish() + } + } + + impl core::ops::Index<$name> for [T] { + type Output = T; + + #[inline] + fn index(&self, index: $name) -> &T { + &self[index.as_usize()] + } + } + + impl core::ops::IndexMut<$name> for [T] { + #[inline] + fn index_mut(&mut self, index: $name) -> &mut T { + &mut self[index.as_usize()] + } + } + + impl core::ops::Index<$name> for Vec { + type Output = T; + + #[inline] + fn index(&self, index: $name) -> &T { + &self[index.as_usize()] + } + } + + impl core::ops::IndexMut<$name> for Vec { + #[inline] + fn index_mut(&mut self, index: $name) -> &mut T { + &mut self[index.as_usize()] + } + } + + impl From for $name { + fn from(index: SmallIndex) -> $name { + $name(index) + } + } + + impl From for $name { + fn from(value: u8) -> $name { + $name(SmallIndex::from(value)) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u16) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u32) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u64) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: usize) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + /// This error occurs when an ID could not be constructed. + /// + /// This occurs when given an integer exceeding the maximum allowed + /// value. + /// + /// When the `std` feature is enabled, this implements the `Error` + /// trait. + #[derive(Clone, Debug, Eq, PartialEq)] + pub struct $err(SmallIndexError); + + impl $err { + /// Returns the value that could not be converted to an ID. + pub fn attempted(&self) -> u64 { + self.0.attempted() + } + } + + #[cfg(feature = "std")] + impl std::error::Error for $err {} + + impl core::fmt::Display for $err { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create {} from {:?}, which exceeds {:?}", + stringify!($name), + self.attempted(), + $name::MAX, + ) + } + } + + #[derive(Clone, Debug)] + pub(crate) struct $iter(SmallIndexIter); + + impl $iter { + fn new(len: usize) -> $iter { + assert!( + len <= $name::LIMIT, + "cannot create iterator for {} when number of \ + elements exceed {:?}", + stringify!($name), + $name::LIMIT, + ); + $iter(SmallIndexIter { rng: 0..len }) + } + } + + impl Iterator for $iter { + type Item = $name; + + fn next(&mut self) -> Option<$name> { + self.0.next().map($name) + } + } + + /// An iterator adapter that is like std::iter::Enumerate, but attaches + /// small index values instead. It requires `ExactSizeIterator`. At + /// construction, it ensures that the index of each element in the + /// iterator is representable in the corresponding small index type. + #[derive(Clone, Debug)] + pub(crate) struct $withiter { + it: I, + ids: $iter, + } + + impl $withiter { + fn new(it: I) -> $withiter { + let ids = $name::iter(it.len()); + $withiter { it, ids } + } + } + + impl Iterator for $withiter { + type Item = ($name, I::Item); + + fn next(&mut self) -> Option<($name, I::Item)> { + let item = self.it.next()?; + // Number of elements in this iterator must match, according + // to contract of ExactSizeIterator. + let id = self.ids.next().unwrap(); + Some((id, item)) + } + } + }; +} + +/// The identifier of a pattern in an Aho-Corasick automaton. +/// +/// It is represented by a `u32` even on 64-bit systems in order to conserve +/// space. Namely, on all targets, this type guarantees that its value will +/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit +/// targets, for example, this type's maximum value will never overflow an +/// `isize`, which means it will never overflow a `i16` even though its +/// internal representation is still a `u32`. +/// +/// # Safety +/// +/// While a `PatternID` is meant to guarantee that its value fits into `usize` +/// without using as much space as a `usize` on all targets, callers must +/// not rely on this property for safety. Callers may choose to rely on this +/// property for correctness however. For example, creating a `StateID` with an +/// invalid value can be done in entirely safe code. This may in turn result in +/// panics or silent logical errors. +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct PatternID(SmallIndex); + +/// The identifier of a finite automaton state. +/// +/// It is represented by a `u32` even on 64-bit systems in order to conserve +/// space. Namely, on all targets, this type guarantees that its value will +/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit +/// targets, for example, this type's maximum value will never overflow an +/// `isize`, which means it will never overflow a `i16` even though its +/// internal representation is still a `u32`. +/// +/// # Safety +/// +/// While a `StateID` is meant to guarantee that its value fits into `usize` +/// without using as much space as a `usize` on all targets, callers must +/// not rely on this property for safety. Callers may choose to rely on this +/// property for correctness however. For example, creating a `StateID` with an +/// invalid value can be done in entirely safe code. This may in turn result in +/// panics or silent logical errors. +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct StateID(SmallIndex); + +index_type_impls!(PatternID, PatternIDError, PatternIDIter, WithPatternIDIter); +index_type_impls!(StateID, StateIDError, StateIDIter, WithStateIDIter); + +/// A utility trait that defines a couple of adapters for making it convenient +/// to access indices as "small index" types. We require ExactSizeIterator so +/// that iterator construction can do a single check to make sure the index of +/// each element is representable by its small index type. +pub(crate) trait IteratorIndexExt: Iterator { + fn with_pattern_ids(self) -> WithPatternIDIter + where + Self: Sized + ExactSizeIterator, + { + WithPatternIDIter::new(self) + } + + fn with_state_ids(self) -> WithStateIDIter + where + Self: Sized + ExactSizeIterator, + { + WithStateIDIter::new(self) + } +} + +impl IteratorIndexExt for I {} diff --git a/vendor/aho-corasick/src/util/remapper.rs b/vendor/aho-corasick/src/util/remapper.rs new file mode 100644 index 0000000..7c47a08 --- /dev/null +++ b/vendor/aho-corasick/src/util/remapper.rs @@ -0,0 +1,214 @@ +use alloc::vec::Vec; + +use crate::{nfa::noncontiguous, util::primitives::StateID}; + +/// Remappable is a tightly coupled abstraction that facilitates remapping +/// state identifiers in DFAs. +/// +/// The main idea behind remapping state IDs is that DFAs often need to check +/// if a certain state is a "special" state of some kind (like a match state) +/// during a search. Since this is extremely perf critical code, we want this +/// check to be as fast as possible. Partitioning state IDs into, for example, +/// into "non-match" and "match" states means one can tell if a state is a +/// match state via a simple comparison of the state ID. +/// +/// The issue is that during the DFA construction process, it's not +/// particularly easy to partition the states. Instead, the simplest thing is +/// to often just do a pass over all of the states and shuffle them into their +/// desired partitionings. To do that, we need a mechanism for swapping states. +/// Hence, this abstraction. +/// +/// Normally, for such little code, I would just duplicate it. But this is a +/// key optimization and the implementation is a bit subtle. So the abstraction +/// is basically a ham-fisted attempt at DRY. The only place we use this is in +/// the dense and one-pass DFAs. +/// +/// See also src/dfa/special.rs for a more detailed explanation of how dense +/// DFAs are partitioned. +pub(crate) trait Remappable: core::fmt::Debug { + /// Return the total number of states. + fn state_len(&self) -> usize; + + /// Swap the states pointed to by the given IDs. The underlying finite + /// state machine should be mutated such that all of the transitions in + /// `id1` are now in the memory region where the transitions for `id2` + /// were, and all of the transitions in `id2` are now in the memory region + /// where the transitions for `id1` were. + /// + /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`. + /// + /// It is expected that, after calling this, the underlying state machine + /// will be left in an inconsistent state, since any other transitions + /// pointing to, e.g., `id1` need to be updated to point to `id2`, since + /// that's where `id1` moved to. + /// + /// In order to "fix" the underlying inconsistent state, a `Remapper` + /// should be used to guarantee that `remap` is called at the appropriate + /// time. + fn swap_states(&mut self, id1: StateID, id2: StateID); + + /// This must remap every single state ID in the underlying value according + /// to the function given. For example, in a DFA, this should remap every + /// transition and every starting state ID. + fn remap(&mut self, map: impl Fn(StateID) -> StateID); +} + +/// Remapper is an abstraction the manages the remapping of state IDs in a +/// finite state machine. This is useful when one wants to shuffle states into +/// different positions in the machine. +/// +/// One of the key complexities this manages is the ability to correctly move +/// one state multiple times. +/// +/// Once shuffling is complete, `remap` must be called, which will rewrite +/// all pertinent transitions to updated state IDs. Neglecting to call `remap` +/// will almost certainly result in a corrupt machine. +#[derive(Debug)] +pub(crate) struct Remapper { + /// A map from the index of a state to its pre-multiplied identifier. + /// + /// When a state is swapped with another, then their corresponding + /// locations in this map are also swapped. Thus, its new position will + /// still point to its old pre-multiplied StateID. + /// + /// While there is a bit more to it, this then allows us to rewrite the + /// state IDs in a DFA's transition table in a single pass. This is done + /// by iterating over every ID in this map, then iterating over each + /// transition for the state at that ID and re-mapping the transition from + /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position + /// in this map where `old_id` *started*, and set it to where it ended up + /// after all swaps have been completed. + map: Vec, + /// A way to map indices to state IDs (and back). + idx: IndexMapper, +} + +impl Remapper { + /// Create a new remapper from the given remappable implementation. The + /// remapper can then be used to swap states. The remappable value given + /// here must the same one given to `swap` and `remap`. + /// + /// The given stride should be the stride of the transition table expressed + /// as a power of 2. This stride is used to map between state IDs and state + /// indices. If state IDs and state indices are equivalent, then provide + /// a `stride2` of `0`, which acts as an identity. + pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper { + let idx = IndexMapper { stride2 }; + let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect(); + Remapper { map, idx } + } + + /// Swap two states. Once this is called, callers must follow through to + /// call `remap`, or else it's possible for the underlying remappable + /// value to be in a corrupt state. + pub(crate) fn swap( + &mut self, + r: &mut impl Remappable, + id1: StateID, + id2: StateID, + ) { + if id1 == id2 { + return; + } + r.swap_states(id1, id2); + self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2)); + } + + /// Complete the remapping process by rewriting all state IDs in the + /// remappable value according to the swaps performed. + pub(crate) fn remap(mut self, r: &mut impl Remappable) { + // Update the map to account for states that have been swapped + // multiple times. For example, if (A, C) and (C, G) are swapped, then + // transitions previously pointing to A should now point to G. But if + // we don't update our map, they will erroneously be set to C. All we + // do is follow the swaps in our map until we see our original state + // ID. + // + // The intuition here is to think about how changes are made to the + // map: only through pairwise swaps. That means that starting at any + // given state, it is always possible to find the loop back to that + // state by following the swaps represented in the map (which might be + // 0 swaps). + // + // We are also careful to clone the map before starting in order to + // freeze it. We use the frozen map to find our loops, since we need to + // update our map as well. Without freezing it, our updates could break + // the loops referenced above and produce incorrect results. + let oldmap = self.map.clone(); + for i in 0..r.state_len() { + let cur_id = self.idx.to_state_id(i); + let mut new_id = oldmap[i]; + if cur_id == new_id { + continue; + } + loop { + let id = oldmap[self.idx.to_index(new_id)]; + if cur_id == id { + self.map[i] = new_id; + break; + } + new_id = id; + } + } + r.remap(|sid| self.map[self.idx.to_index(sid)]); + } +} + +/// A simple type for mapping between state indices and state IDs. +/// +/// The reason why this exists is because state IDs are "premultiplied" in a +/// DFA. That is, in order to get to the transitions for a particular state, +/// one need only use the state ID as-is, instead of having to multiply it by +/// transition table's stride. +/// +/// The downside of this is that it's inconvenient to map between state IDs +/// using a dense map, e.g., Vec. That's because state IDs look like +/// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`, +/// etc. +/// +/// Since our state IDs are premultiplied, we can convert back-and-forth +/// between IDs and indices by simply unmultiplying the IDs and multiplying the +/// indices. +/// +/// Note that for a sparse NFA, state IDs and indices are equivalent. In this +/// case, we set the stride of the index mapped to be `0`, which acts as an +/// identity. +#[derive(Debug)] +struct IndexMapper { + /// The power of 2 corresponding to the stride of the corresponding + /// transition table. 'id >> stride2' de-multiplies an ID while 'index << + /// stride2' pre-multiplies an index to an ID. + stride2: usize, +} + +impl IndexMapper { + /// Convert a state ID to a state index. + fn to_index(&self, id: StateID) -> usize { + id.as_usize() >> self.stride2 + } + + /// Convert a state index to a state ID. + fn to_state_id(&self, index: usize) -> StateID { + // CORRECTNESS: If the given index is not valid, then it is not + // required for this to panic or return a valid state ID. We'll "just" + // wind up with panics or silent logic errors at some other point. But + // this is OK because if Remappable::state_len is correct and so is + // 'to_index', then all inputs to 'to_state_id' should be valid indices + // and thus transform into valid state IDs. + StateID::new_unchecked(index << self.stride2) + } +} + +impl Remappable for noncontiguous::NFA { + fn state_len(&self) -> usize { + noncontiguous::NFA::states(self).len() + } + + fn swap_states(&mut self, id1: StateID, id2: StateID) { + noncontiguous::NFA::swap_states(self, id1, id2) + } + + fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + noncontiguous::NFA::remap(self, map) + } +} diff --git a/vendor/aho-corasick/src/util/search.rs b/vendor/aho-corasick/src/util/search.rs new file mode 100644 index 0000000..59b7035 --- /dev/null +++ b/vendor/aho-corasick/src/util/search.rs @@ -0,0 +1,1148 @@ +use core::ops::{Range, RangeBounds}; + +use crate::util::primitives::PatternID; + +/// The configuration and the haystack to use for an Aho-Corasick search. +/// +/// When executing a search, there are a few parameters one might want to +/// configure: +/// +/// * The haystack to search, provided to the [`Input::new`] constructor. This +/// is the only required parameter. +/// * The span _within_ the haystack to limit a search to. (The default +/// is the entire haystack.) This is configured via [`Input::span`] or +/// [`Input::range`]. +/// * Whether to run an unanchored (matches can occur anywhere after the +/// start of the search) or anchored (matches can only occur beginning at +/// the start of the search) search. Unanchored search is the default. This is +/// configured via [`Input::anchored`]. +/// * Whether to quit the search as soon as a match has been found, regardless +/// of the [`MatchKind`] that the searcher was built with. This is configured +/// via [`Input::earliest`]. +/// +/// For most cases, the defaults for all optional parameters are appropriate. +/// The utility of this type is that it keeps the default or common case simple +/// while permitting tweaking parameters in more niche use cases while reusing +/// the same search APIs. +/// +/// # Valid bounds and search termination +/// +/// An `Input` permits setting the bounds of a search via either +/// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or +/// else a panic will occur. Bounds are valid if and only if: +/// +/// * The bounds represent a valid range into the input's haystack. +/// * **or** the end bound is a valid ending bound for the haystack *and* +/// the start bound is exactly one greater than the end bound. +/// +/// In the latter case, [`Input::is_done`] will return true and indicates any +/// search receiving such an input should immediately return with no match. +/// +/// Other than representing "search is complete," the `Input::span` and +/// `Input::range` APIs are never necessary. Instead, callers can slice the +/// haystack instead, e.g., with `&haystack[start..end]`. With that said, they +/// can be more convenient than slicing because the match positions reported +/// when using `Input::span` or `Input::range` are in terms of the original +/// haystack. If you instead use `&haystack[start..end]`, then you'll need to +/// add `start` to any match position returned in order for it to be a correct +/// index into `haystack`. +/// +/// # Example: `&str` and `&[u8]` automatically convert to an `Input` +/// +/// There is a `From<&T> for Input` implementation for all `T: AsRef<[u8]>`. +/// Additionally, the [`AhoCorasick`](crate::AhoCorasick) search APIs accept +/// a `Into`. These two things combined together mean you can provide +/// things like `&str` and `&[u8]` to search APIs when the defaults are +/// suitable, but also an `Input` when they're not. For example: +/// +/// ``` +/// use aho_corasick::{AhoCorasick, Anchored, Input, Match, StartKind}; +/// +/// // Build a searcher that supports both unanchored and anchored modes. +/// let ac = AhoCorasick::builder() +/// .start_kind(StartKind::Both) +/// .build(&["abcd", "b"]) +/// .unwrap(); +/// let haystack = "abcd"; +/// +/// // A search using default parameters is unanchored. With standard +/// // semantics, this finds `b` first. +/// assert_eq!( +/// Some(Match::must(1, 1..2)), +/// ac.find(haystack), +/// ); +/// // Using the same 'find' routine, we can provide an 'Input' explicitly +/// // that is configured to do an anchored search. Since 'b' doesn't start +/// // at the beginning of the search, it is not reported as a match. +/// assert_eq!( +/// Some(Match::must(0, 0..4)), +/// ac.find(Input::new(haystack).anchored(Anchored::Yes)), +/// ); +/// ``` +#[derive(Clone)] +pub struct Input<'h> { + haystack: &'h [u8], + span: Span, + anchored: Anchored, + earliest: bool, +} + +impl<'h> Input<'h> { + /// Create a new search configuration for the given haystack. + #[inline] + pub fn new>(haystack: &'h H) -> Input<'h> { + Input { + haystack: haystack.as_ref(), + span: Span { start: 0, end: haystack.as_ref().len() }, + anchored: Anchored::No, + earliest: false, + } + } + + /// Set the span for this search. + /// + /// This routine is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. To provide anything supported by range + /// syntax, use the [`Input::range`] method. + /// + /// The default span is the entire haystack. + /// + /// Note that [`Input::range`] overrides this method and vice versa. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// This example shows how the span of the search can impact whether a + /// match is reported or not. + /// + /// ``` + /// use aho_corasick::{AhoCorasick, Input, MatchKind}; + /// + /// let patterns = &["b", "abcd", "abc"]; + /// let haystack = "abcd"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// let input = Input::new(haystack).span(0..3); + /// let mat = ac.try_find(input)?.expect("should have a match"); + /// // Without the span stopping the search early, 'abcd' would be reported + /// // because it is the correct leftmost-first match. + /// assert_eq!("abc", &haystack[mat.span()]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn span>(mut self, span: S) -> Input<'h> { + self.set_span(span); + self + } + + /// Like `Input::span`, but accepts any range instead. + /// + /// The default range is the entire haystack. + /// + /// Note that [`Input::span`] overrides this method and vice versa. + /// + /// # Panics + /// + /// This routine will panic if the given range could not be converted + /// to a valid [`Range`]. For example, this would panic when given + /// `0..=usize::MAX` since it cannot be represented using a half-open + /// interval in terms of `usize`. + /// + /// This routine also panics if the given range does not correspond to + /// valid bounds in the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// + /// let input = Input::new("foobar").range(2..=4); + /// assert_eq!(2..5, input.get_range()); + /// ``` + #[inline] + pub fn range>(mut self, range: R) -> Input<'h> { + self.set_range(range); + self + } + + /// Sets the anchor mode of a search. + /// + /// When a search is anchored (via [`Anchored::Yes`]), a match must begin + /// at the start of a search. When a search is not anchored (that's + /// [`Anchored::No`]), searchers will look for a match anywhere in the + /// haystack. + /// + /// By default, the anchored mode is [`Anchored::No`]. + /// + /// # Support for anchored searches + /// + /// Anchored or unanchored searches might not always be available, + /// depending on the type of searcher used and its configuration: + /// + /// * [`noncontiguous::NFA`](crate::nfa::noncontiguous::NFA) always + /// supports both unanchored and anchored searches. + /// * [`contiguous::NFA`](crate::nfa::contiguous::NFA) always supports both + /// unanchored and anchored searches. + /// * [`dfa::DFA`](crate::dfa::DFA) supports only unanchored + /// searches by default. + /// [`dfa::Builder::start_kind`](crate::dfa::Builder::start_kind) can + /// be used to change the default to supporting both kinds of searches + /// or even just anchored searches. + /// * [`AhoCorasick`](crate::AhoCorasick) inherits the same setup as a + /// `DFA`. Namely, it only supports unanchored searches by default, but + /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind) + /// can change this. + /// + /// If you try to execute a search using a `try_` ("fallible") method with + /// an unsupported anchor mode, then an error will be returned. For calls + /// to infallible search methods, a panic will result. + /// + /// # Example + /// + /// This demonstrates the differences between an anchored search and + /// an unanchored search. Notice that we build our `AhoCorasick` searcher + /// with [`StartKind::Both`] so that it supports both unanchored and + /// anchored searches simultaneously. + /// + /// ``` + /// use aho_corasick::{ + /// AhoCorasick, Anchored, Input, MatchKind, StartKind, + /// }; + /// + /// let patterns = &["bcd"]; + /// let haystack = "abcd"; + /// + /// let ac = AhoCorasick::builder() + /// .start_kind(StartKind::Both) + /// .build(patterns) + /// .unwrap(); + /// + /// // Note that 'Anchored::No' is the default, so it doesn't need to + /// // be explicitly specified here. + /// let input = Input::new(haystack); + /// let mat = ac.try_find(input)?.expect("should have a match"); + /// assert_eq!("bcd", &haystack[mat.span()]); + /// + /// // While 'bcd' occurs in the haystack, it does not begin where our + /// // search begins, so no match is found. + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// assert_eq!(None, ac.try_find(input)?); + /// + /// // However, if we start our search where 'bcd' starts, then we will + /// // find a match. + /// let input = Input::new(haystack).range(1..).anchored(Anchored::Yes); + /// let mat = ac.try_find(input)?.expect("should have a match"); + /// assert_eq!("bcd", &haystack[mat.span()]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn anchored(mut self, mode: Anchored) -> Input<'h> { + self.set_anchored(mode); + self + } + + /// Whether to execute an "earliest" search or not. + /// + /// When running a non-overlapping search, an "earliest" search will + /// return the match location as early as possible. For example, given + /// the patterns `abc` and `b`, and a haystack of `abc`, a normal + /// leftmost-first search will return `abc` as a match. But an "earliest" + /// search will return as soon as it is known that a match occurs, which + /// happens once `b` is seen. + /// + /// Note that when using [`MatchKind::Standard`], the "earliest" option + /// has no effect since standard semantics are already "earliest." Note + /// also that this has no effect in overlapping searches, since overlapping + /// searches also use standard semantics and report all possible matches. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows the difference between "earliest" searching and + /// normal leftmost searching. + /// + /// ``` + /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind}; + /// + /// let patterns = &["abc", "b"]; + /// let haystack = "abc"; + /// + /// let ac = AhoCorasick::builder() + /// .match_kind(MatchKind::LeftmostFirst) + /// .build(patterns) + /// .unwrap(); + /// + /// // The normal leftmost-first match. + /// let input = Input::new(haystack); + /// let mat = ac.try_find(input)?.expect("should have a match"); + /// assert_eq!("abc", &haystack[mat.span()]); + /// + /// // The "earliest" possible match, even if it isn't leftmost-first. + /// let input = Input::new(haystack).earliest(true); + /// let mat = ac.try_find(input)?.expect("should have a match"); + /// assert_eq!("b", &haystack[mat.span()]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn earliest(mut self, yes: bool) -> Input<'h> { + self.set_earliest(yes); + self + } + + /// Set the span for this search configuration. + /// + /// This is like the [`Input::span`] method, except this mutates the + /// span in place. + /// + /// This routine is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_span(2..4); + /// assert_eq!(2..4, input.get_range()); + /// ``` + #[inline] + pub fn set_span>(&mut self, span: S) { + let span = span.into(); + assert!( + span.end <= self.haystack.len() + && span.start <= span.end.wrapping_add(1), + "invalid span {:?} for haystack of length {}", + span, + self.haystack.len(), + ); + self.span = span; + } + + /// Set the span for this search configuration given any range. + /// + /// This is like the [`Input::range`] method, except this mutates the + /// span in place. + /// + /// # Panics + /// + /// This routine will panic if the given range could not be converted + /// to a valid [`Range`]. For example, this would panic when given + /// `0..=usize::MAX` since it cannot be represented using a half-open + /// interval in terms of `usize`. + /// + /// This routine also panics if the given range does not correspond to + /// valid bounds in the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_range(2..=4); + /// assert_eq!(2..5, input.get_range()); + /// ``` + #[inline] + pub fn set_range>(&mut self, range: R) { + use core::ops::Bound; + + // It's a little weird to convert ranges into spans, and then spans + // back into ranges when we actually slice the haystack. Because + // of that process, we always represent everything as a half-open + // internal. Therefore, handling things like m..=n is a little awkward. + let start = match range.start_bound() { + Bound::Included(&i) => i, + // Can this case ever happen? Range syntax doesn't support it... + Bound::Excluded(&i) => i.checked_add(1).unwrap(), + Bound::Unbounded => 0, + }; + let end = match range.end_bound() { + Bound::Included(&i) => i.checked_add(1).unwrap(), + Bound::Excluded(&i) => i, + Bound::Unbounded => self.haystack().len(), + }; + self.set_span(Span { start, end }); + } + + /// Set the starting offset for the span for this search configuration. + /// + /// This is a convenience routine for only mutating the start of a span + /// without having to set the entire span. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_start(5); + /// assert_eq!(5..6, input.get_range()); + /// ``` + #[inline] + pub fn set_start(&mut self, start: usize) { + self.set_span(Span { start, ..self.get_span() }); + } + + /// Set the ending offset for the span for this search configuration. + /// + /// This is a convenience routine for only mutating the end of a span + /// without having to set the entire span. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_end(5); + /// assert_eq!(0..5, input.get_range()); + /// ``` + #[inline] + pub fn set_end(&mut self, end: usize) { + self.set_span(Span { end, ..self.get_span() }); + } + + /// Set the anchor mode of a search. + /// + /// This is like [`Input::anchored`], except it mutates the search + /// configuration in place. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::{Anchored, Input}; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(Anchored::No, input.get_anchored()); + /// + /// input.set_anchored(Anchored::Yes); + /// assert_eq!(Anchored::Yes, input.get_anchored()); + /// ``` + #[inline] + pub fn set_anchored(&mut self, mode: Anchored) { + self.anchored = mode; + } + + /// Set whether the search should execute in "earliest" mode or not. + /// + /// This is like [`Input::earliest`], except it mutates the search + /// configuration in place. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert!(!input.get_earliest()); + /// input.set_earliest(true); + /// assert!(input.get_earliest()); + /// ``` + #[inline] + pub fn set_earliest(&mut self, yes: bool) { + self.earliest = yes; + } + + /// Return a borrow of the underlying haystack as a slice of bytes. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(b"foobar", input.haystack()); + /// ``` + #[inline] + pub fn haystack(&self) -> &[u8] { + self.haystack + } + + /// Return the start position of this search. + /// + /// This is a convenience routine for `search.get_span().start()`. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0, input.start()); + /// + /// let input = Input::new("foobar").span(2..4); + /// assert_eq!(2, input.start()); + /// ``` + #[inline] + pub fn start(&self) -> usize { + self.get_span().start + } + + /// Return the end position of this search. + /// + /// This is a convenience routine for `search.get_span().end()`. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(6, input.end()); + /// + /// let input = Input::new("foobar").span(2..4); + /// assert_eq!(4, input.end()); + /// ``` + #[inline] + pub fn end(&self) -> usize { + self.get_span().end + } + + /// Return the span for this search configuration. + /// + /// If one was not explicitly set, then the span corresponds to the entire + /// range of the haystack. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::{Input, Span}; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(Span { start: 0, end: 6 }, input.get_span()); + /// ``` + #[inline] + pub fn get_span(&self) -> Span { + self.span + } + + /// Return the span as a range for this search configuration. + /// + /// If one was not explicitly set, then the span corresponds to the entire + /// range of the haystack. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// ``` + #[inline] + pub fn get_range(&self) -> Range { + self.get_span().range() + } + + /// Return the anchored mode for this search configuration. + /// + /// If no anchored mode was set, then it defaults to [`Anchored::No`]. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::{Anchored, Input}; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(Anchored::No, input.get_anchored()); + /// + /// input.set_anchored(Anchored::Yes); + /// assert_eq!(Anchored::Yes, input.get_anchored()); + /// ``` + #[inline] + pub fn get_anchored(&self) -> Anchored { + self.anchored + } + + /// Return whether this search should execute in "earliest" mode. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let input = Input::new("foobar"); + /// assert!(!input.get_earliest()); + /// ``` + #[inline] + pub fn get_earliest(&self) -> bool { + self.earliest + } + + /// Return true if this input has been exhausted, which in turn means all + /// subsequent searches will return no matches. + /// + /// This occurs precisely when the start position of this search is greater + /// than the end position of the search. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert!(!input.is_done()); + /// input.set_start(6); + /// assert!(!input.is_done()); + /// input.set_start(7); + /// assert!(input.is_done()); + /// ``` + #[inline] + pub fn is_done(&self) -> bool { + self.get_span().start > self.get_span().end + } +} + +impl<'h> core::fmt::Debug for Input<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut fmter = f.debug_struct("Input"); + match core::str::from_utf8(self.haystack()) { + Ok(nice) => fmter.field("haystack", &nice), + Err(_) => fmter.field("haystack", &self.haystack()), + } + .field("span", &self.span) + .field("anchored", &self.anchored) + .field("earliest", &self.earliest) + .finish() + } +} + +impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> { + #[inline] + fn from(haystack: &'h H) -> Input<'h> { + Input::new(haystack) + } +} + +/// A representation of a range in a haystack. +/// +/// A span corresponds to the starting and ending _byte offsets_ of a +/// contiguous region of bytes. The starting offset is inclusive while the +/// ending offset is exclusive. That is, a span is a half-open interval. +/// +/// A span is used to report the offsets of a match, but it is also used to +/// convey which region of a haystack should be searched via routines like +/// [`Input::span`]. +/// +/// This is basically equivalent to a `std::ops::Range`, except this +/// type implements `Copy` which makes it more ergonomic to use in the context +/// of this crate. Indeed, `Span` exists only because `Range` does +/// not implement `Copy`. Like a range, this implements `Index` for `[u8]` +/// and `str`, and `IndexMut` for `[u8]`. For convenience, this also impls +/// `From`, which means things like `Span::from(5..10)` work. +/// +/// There are no constraints on the values of a span. It is, for example, legal +/// to create a span where `start > end`. +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub struct Span { + /// The start offset of the span, inclusive. + pub start: usize, + /// The end offset of the span, exclusive. + pub end: usize, +} + +impl Span { + /// Returns this span as a range. + #[inline] + pub fn range(&self) -> Range { + Range::from(*self) + } + + /// Returns true when this span is empty. That is, when `start >= end`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start >= self.end + } + + /// Returns the length of this span. + /// + /// This returns `0` in precisely the cases that `is_empty` returns `true`. + #[inline] + pub fn len(&self) -> usize { + self.end.saturating_sub(self.start) + } + + /// Returns true when the given offset is contained within this span. + /// + /// Note that an empty span contains no offsets and will always return + /// false. + #[inline] + pub fn contains(&self, offset: usize) -> bool { + !self.is_empty() && self.start <= offset && offset <= self.end + } + + /// Returns a new span with `offset` added to this span's `start` and `end` + /// values. + #[inline] + pub fn offset(&self, offset: usize) -> Span { + Span { start: self.start + offset, end: self.end + offset } + } +} + +impl core::fmt::Debug for Span { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} + +impl core::ops::Index for [u8] { + type Output = [u8]; + + #[inline] + fn index(&self, index: Span) -> &[u8] { + &self[index.range()] + } +} + +impl core::ops::IndexMut for [u8] { + #[inline] + fn index_mut(&mut self, index: Span) -> &mut [u8] { + &mut self[index.range()] + } +} + +impl core::ops::Index for str { + type Output = str; + + #[inline] + fn index(&self, index: Span) -> &str { + &self[index.range()] + } +} + +impl From> for Span { + #[inline] + fn from(range: Range) -> Span { + Span { start: range.start, end: range.end } + } +} + +impl From for Range { + #[inline] + fn from(span: Span) -> Range { + Range { start: span.start, end: span.end } + } +} + +impl PartialEq> for Span { + #[inline] + fn eq(&self, range: &Range) -> bool { + self.start == range.start && self.end == range.end + } +} + +impl PartialEq for Range { + #[inline] + fn eq(&self, span: &Span) -> bool { + self.start == span.start && self.end == span.end + } +} + +/// The type of anchored search to perform. +/// +/// If an Aho-Corasick searcher does not support the anchored mode selected, +/// then the search will return an error or panic, depending on whether a +/// fallible or an infallible routine was called. +#[non_exhaustive] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Anchored { + /// Run an unanchored search. This means a match may occur anywhere at or + /// after the start position of the search up until the end position of the + /// search. + No, + /// Run an anchored search. This means that a match must begin at the start + /// position of the search and end before the end position of the search. + Yes, +} + +impl Anchored { + /// Returns true if and only if this anchor mode corresponds to an anchored + /// search. + /// + /// # Example + /// + /// ``` + /// use aho_corasick::Anchored; + /// + /// assert!(!Anchored::No.is_anchored()); + /// assert!(Anchored::Yes.is_anchored()); + /// ``` + #[inline] + pub fn is_anchored(&self) -> bool { + matches!(*self, Anchored::Yes) + } +} + +/// A representation of a match reported by an Aho-Corasick searcher. +/// +/// A match has two essential pieces of information: the [`PatternID`] that +/// matches, and the [`Span`] of the match in a haystack. +/// +/// The pattern is identified by an ID, which corresponds to its position +/// (starting from `0`) relative to other patterns used to construct the +/// corresponding searcher. If only a single pattern is provided, then all +/// matches are guaranteed to have a pattern ID of `0`. +/// +/// Every match reported by a searcher guarantees that its span has its start +/// offset as less than or equal to its end offset. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct Match { + /// The pattern ID. + pattern: PatternID, + /// The underlying match span. + span: Span, +} + +impl Match { + /// Create a new match from a pattern ID and a span. + /// + /// This constructor is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// # Panics + /// + /// This panics if `end < start`. + /// + /// # Example + /// + /// This shows how to create a match for the first pattern in an + /// Aho-Corasick searcher using convenient range syntax. + /// + /// ``` + /// use aho_corasick::{Match, PatternID}; + /// + /// let m = Match::new(PatternID::ZERO, 5..10); + /// assert_eq!(0, m.pattern().as_usize()); + /// assert_eq!(5, m.start()); + /// assert_eq!(10, m.end()); + /// ``` + #[inline] + pub fn new>(pattern: PatternID, span: S) -> Match { + let span = span.into(); + assert!(span.start <= span.end, "invalid match span"); + Match { pattern, span } + } + + /// Create a new match from a pattern ID and a byte offset span. + /// + /// This constructor is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// This is like [`Match::new`], but accepts a `usize` instead of a + /// [`PatternID`]. This panics if the given `usize` is not representable + /// as a `PatternID`. + /// + /// # Panics + /// + /// This panics if `end < start` or if `pattern > PatternID::MAX`. + /// + /// # Example + /// + /// This shows how to create a match for the third pattern in an + /// Aho-Corasick searcher using convenient range syntax. + /// + /// ``` + /// use aho_corasick::Match; + /// + /// let m = Match::must(3, 5..10); + /// assert_eq!(3, m.pattern().as_usize()); + /// assert_eq!(5, m.start()); + /// assert_eq!(10, m.end()); + /// ``` + #[inline] + pub fn must>(pattern: usize, span: S) -> Match { + Match::new(PatternID::must(pattern), span) + } + + /// Returns the ID of the pattern that matched. + /// + /// The ID of a pattern is derived from the position in which it was + /// originally inserted into the corresponding searcher. The first pattern + /// has identifier `0`, and each subsequent pattern is `1`, `2` and so on. + #[inline] + pub fn pattern(&self) -> PatternID { + self.pattern + } + + /// The starting position of the match. + /// + /// This is a convenience routine for `Match::span().start`. + #[inline] + pub fn start(&self) -> usize { + self.span().start + } + + /// The ending position of the match. + /// + /// This is a convenience routine for `Match::span().end`. + #[inline] + pub fn end(&self) -> usize { + self.span().end + } + + /// Returns the match span as a range. + /// + /// This is a convenience routine for `Match::span().range()`. + #[inline] + pub fn range(&self) -> core::ops::Range { + self.span().range() + } + + /// Returns the span for this match. + #[inline] + pub fn span(&self) -> Span { + self.span + } + + /// Returns true when the span in this match is empty. + /// + /// An empty match can only be returned when empty pattern is in the + /// Aho-Corasick searcher. + #[inline] + pub fn is_empty(&self) -> bool { + self.span().is_empty() + } + + /// Returns the length of this match. + /// + /// This returns `0` in precisely the cases that `is_empty` returns `true`. + #[inline] + pub fn len(&self) -> usize { + self.span().len() + } + + /// Returns a new match with `offset` added to its span's `start` and `end` + /// values. + #[inline] + pub fn offset(&self, offset: usize) -> Match { + Match { + pattern: self.pattern, + span: Span { + start: self.start() + offset, + end: self.end() + offset, + }, + } + } +} + +/// A knob for controlling the match semantics of an Aho-Corasick automaton. +/// +/// There are two generally different ways that Aho-Corasick automatons can +/// report matches. The first way is the "standard" approach that results from +/// implementing most textbook explanations of Aho-Corasick. The second way is +/// to report only the leftmost non-overlapping matches. The leftmost approach +/// is in turn split into two different ways of resolving ambiguous matches: +/// leftmost-first and leftmost-longest. +/// +/// The `Standard` match kind is the default and is the only one that supports +/// overlapping matches and stream searching. (Trying to find overlapping or +/// streaming matches using leftmost match semantics will result in an error in +/// fallible APIs and a panic when using infallibe APIs.) The `Standard` match +/// kind will report matches as they are seen. When searching for overlapping +/// matches, then all possible matches are reported. When searching for +/// non-overlapping matches, the first match seen is reported. For example, for +/// non-overlapping matches, given the patterns `abcd` and `b` and the haystack +/// `abcdef`, only a match for `b` is reported since it is detected first. The +/// `abcd` match is never reported since it overlaps with the `b` match. +/// +/// In contrast, the leftmost match kind always prefers the leftmost match +/// among all possible matches. Given the same example as above with `abcd` and +/// `b` as patterns and `abcdef` as the haystack, the leftmost match is `abcd` +/// since it begins before the `b` match, even though the `b` match is detected +/// before the `abcd` match. In this case, the `b` match is not reported at all +/// since it overlaps with the `abcd` match. +/// +/// The difference between leftmost-first and leftmost-longest is in how they +/// resolve ambiguous matches when there are multiple leftmost matches to +/// choose from. Leftmost-first always chooses the pattern that was provided +/// earliest, where as leftmost-longest always chooses the longest matching +/// pattern. For example, given the patterns `a` and `ab` and the subject +/// string `ab`, the leftmost-first match is `a` but the leftmost-longest match +/// is `ab`. Conversely, if the patterns were given in reverse order, i.e., +/// `ab` and `a`, then both the leftmost-first and leftmost-longest matches +/// would be `ab`. Stated differently, the leftmost-first match depends on the +/// order in which the patterns were given to the Aho-Corasick automaton. +/// Because of that, when leftmost-first matching is used, if a pattern `A` +/// that appears before a pattern `B` is a prefix of `B`, then it is impossible +/// to ever observe a match of `B`. +/// +/// If you're not sure which match kind to pick, then stick with the standard +/// kind, which is the default. In particular, if you need overlapping or +/// streaming matches, then you _must_ use the standard kind. The leftmost +/// kinds are useful in specific circumstances. For example, leftmost-first can +/// be very useful as a way to implement match priority based on the order of +/// patterns given and leftmost-longest can be useful for dictionary searching +/// such that only the longest matching words are reported. +/// +/// # Relationship with regular expression alternations +/// +/// Understanding match semantics can be a little tricky, and one easy way +/// to conceptualize non-overlapping matches from an Aho-Corasick automaton +/// is to think about them as a simple alternation of literals in a regular +/// expression. For example, let's say we wanted to match the strings +/// `Sam` and `Samwise`, which would turn into the regex `Sam|Samwise`. It +/// turns out that regular expression engines have two different ways of +/// matching this alternation. The first way, leftmost-longest, is commonly +/// found in POSIX compatible implementations of regular expressions (such as +/// `grep`). The second way, leftmost-first, is commonly found in backtracking +/// implementations such as Perl. (Some regex engines, such as RE2 and Rust's +/// regex engine do not use backtracking, but still implement leftmost-first +/// semantics in an effort to match the behavior of dominant backtracking +/// regex engines such as those found in Perl, Ruby, Python, Javascript and +/// PHP.) +/// +/// That is, when matching `Sam|Samwise` against `Samwise`, a POSIX regex +/// will match `Samwise` because it is the longest possible match, but a +/// Perl-like regex will match `Sam` since it appears earlier in the +/// alternation. Indeed, the regex `Sam|Samwise` in a Perl-like regex engine +/// will never match `Samwise` since `Sam` will always have higher priority. +/// Conversely, matching the regex `Samwise|Sam` against `Samwise` will lead to +/// a match of `Samwise` in both POSIX and Perl-like regexes since `Samwise` is +/// still longest match, but it also appears earlier than `Sam`. +/// +/// The "standard" match semantics of Aho-Corasick generally don't correspond +/// to the match semantics of any large group of regex implementations, so +/// there's no direct analogy that can be made here. Standard match semantics +/// are generally useful for overlapping matches, or if you just want to see +/// matches as they are detected. +/// +/// The main conclusion to draw from this section is that the match semantics +/// can be tweaked to precisely match either Perl-like regex alternations or +/// POSIX regex alternations. +#[non_exhaustive] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MatchKind { + /// Use standard match semantics, which support overlapping matches. When + /// used with non-overlapping matches, matches are reported as they are + /// seen. + Standard, + /// Use leftmost-first match semantics, which reports leftmost matches. + /// When there are multiple possible leftmost matches, the match + /// corresponding to the pattern that appeared earlier when constructing + /// the automaton is reported. + /// + /// This does **not** support overlapping matches or stream searching. If + /// this match kind is used, attempting to find overlapping matches or + /// stream matches will fail. + LeftmostFirst, + /// Use leftmost-longest match semantics, which reports leftmost matches. + /// When there are multiple possible leftmost matches, the longest match + /// is chosen. + /// + /// This does **not** support overlapping matches or stream searching. If + /// this match kind is used, attempting to find overlapping matches or + /// stream matches will fail. + LeftmostLongest, +} + +/// The default match kind is `MatchKind::Standard`. +impl Default for MatchKind { + fn default() -> MatchKind { + MatchKind::Standard + } +} + +impl MatchKind { + #[inline] + pub(crate) fn is_standard(&self) -> bool { + matches!(*self, MatchKind::Standard) + } + + #[inline] + pub(crate) fn is_leftmost(&self) -> bool { + matches!(*self, MatchKind::LeftmostFirst | MatchKind::LeftmostLongest) + } + + #[inline] + pub(crate) fn is_leftmost_first(&self) -> bool { + matches!(*self, MatchKind::LeftmostFirst) + } + + /// Convert this match kind into a packed match kind. If this match kind + /// corresponds to standard semantics, then this returns None, since + /// packed searching does not support standard semantics. + #[inline] + pub(crate) fn as_packed(&self) -> Option { + match *self { + MatchKind::Standard => None, + MatchKind::LeftmostFirst => { + Some(crate::packed::MatchKind::LeftmostFirst) + } + MatchKind::LeftmostLongest => { + Some(crate::packed::MatchKind::LeftmostLongest) + } + } + } +} + +/// The kind of anchored starting configurations to support in an Aho-Corasick +/// searcher. +/// +/// Depending on which searcher is used internally by +/// [`AhoCorasick`](crate::AhoCorasick), supporting both unanchored +/// and anchored searches can be quite costly. For this reason, +/// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind) +/// can be used to configure whether your searcher supports unanchored, +/// anchored or both kinds of searches. +/// +/// This searcher configuration knob works in concert with the search time +/// configuration [`Input::anchored`]. Namely, if one requests an unsupported +/// anchored mode, then the search will either panic or return an error, +/// depending on whether you're using infallible or fallibe APIs, respectively. +/// +/// `AhoCorasick` by default only supports unanchored searches. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum StartKind { + /// Support both anchored and unanchored searches. + Both, + /// Support only unanchored searches. Requesting an anchored search will + /// return an error in fallible APIs and panic in infallible APIs. + Unanchored, + /// Support only anchored searches. Requesting an unanchored search will + /// return an error in fallible APIs and panic in infallible APIs. + Anchored, +} + +impl Default for StartKind { + fn default() -> StartKind { + StartKind::Unanchored + } +} diff --git a/vendor/aho-corasick/src/util/special.rs b/vendor/aho-corasick/src/util/special.rs new file mode 100644 index 0000000..beeba40 --- /dev/null +++ b/vendor/aho-corasick/src/util/special.rs @@ -0,0 +1,42 @@ +use crate::util::primitives::StateID; + +/// A collection of sentinel state IDs for Aho-Corasick automata. +/// +/// This specifically enables the technique by which we determine which states +/// are dead, matches or start states. Namely, by arranging states in a +/// particular order, we can determine the type of a state simply by looking at +/// its ID. +#[derive(Clone, Debug)] +pub(crate) struct Special { + /// The maximum ID of all the "special" states. This corresponds either to + /// start_anchored_id when a prefilter is active and max_match_id when a + /// prefilter is not active. The idea here is that if there is no prefilter, + /// then there is no point in treating start states as special. + pub(crate) max_special_id: StateID, + /// The maximum ID of all the match states. Any state ID bigger than this + /// is guaranteed to be a non-match ID. + /// + /// It is possible and legal for max_match_id to be equal to + /// start_anchored_id, which occurs precisely in the case where the empty + /// string is a pattern that was added to the underlying automaton. + pub(crate) max_match_id: StateID, + /// The state ID of the start state used for unanchored searches. + pub(crate) start_unanchored_id: StateID, + /// The state ID of the start state used for anchored searches. This is + /// always start_unanchored_id+1. + pub(crate) start_anchored_id: StateID, +} + +impl Special { + /// Create a new set of "special" state IDs with all IDs initialized to + /// zero. The general idea here is that they will be updated and set to + /// correct values later. + pub(crate) fn zero() -> Special { + Special { + max_special_id: StateID::ZERO, + max_match_id: StateID::ZERO, + start_unanchored_id: StateID::ZERO, + start_anchored_id: StateID::ZERO, + } + } +} diff --git a/vendor/autocfg/.cargo-checksum.json b/vendor/autocfg/.cargo-checksum.json deleted file mode 100644 index e4cc3fc..0000000 --- a/vendor/autocfg/.cargo-checksum.json +++ /dev/null @@ -1 +0,0 @@ -{"files":{"Cargo.lock":"3d91565ed13de572a9ebde408a0c98e33f931d6ab52f212b0830a60b4ab26b77","Cargo.toml":"39f627122dceaad42146634719fde802fca3baa1b3908753af723074ae2a6d69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"27995d58ad5c1145c1a8cd86244ce844886958a35eb2b78c6b772748669999ac","README.md":"4c8f9b5016f2a0c3dbeca5bc41241f57db5568f803e58c1fa480ae2b3638d0a9","examples/integers.rs":"589ff4271566dfa322becddf3e2c7b592e6e0bc97b02892ce75619b7e452e930","examples/paths.rs":"1b30e466b824ce8df7ad0a55334424131d9d2573d6cf9f7d5d50c09c8901d526","examples/traits.rs":"cbee6a3e1f7db60b02ae25b714926517144a77cb492021f492774cf0e1865a9e","examples/versions.rs":"38535e6d9f5bfae0de474a3db79a40e8f5da8ba9334c5ff4c363de9bc99d4d12","src/error.rs":"12de7dafea4a35d1dc2f0fa79bfa038386bbbea72bf083979f4ddf227999eeda","src/lib.rs":"6fa01458e8f9258d84f83ead24fdb0cdf9aec10838b0262f1dfbdf79c530c537","src/tests.rs":"f0e6dc1ad9223c0336c02e215ea3940acb2af6c3bc8fd791e16cd4e786e6a608","src/version.rs":"175727d5f02f2fe2271ddc9b041db2a5b9c6fe0f95afd17c73a4d982612764a3","tests/rustflags.rs":"5c8169b88216055019db61b5d7baf4abdf675e3b14b54f5037bb1e3acd0a5d3f"},"package":"d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"} \ No newline at end of file diff --git a/vendor/autocfg/README.md b/vendor/autocfg/README.md deleted file mode 100644 index 8c95696..0000000 --- a/vendor/autocfg/README.md +++ /dev/null @@ -1,95 +0,0 @@ -autocfg -======= - -[![autocfg crate](https://img.shields.io/crates/v/autocfg.svg)](https://crates.io/crates/autocfg) -[![autocfg documentation](https://docs.rs/autocfg/badge.svg)](https://docs.rs/autocfg) -![minimum rustc 1.0](https://img.shields.io/badge/rustc-1.0+-red.svg) -![build status](https://github.com/cuviper/autocfg/workflows/master/badge.svg) - -A Rust library for build scripts to automatically configure code based on -compiler support. Code snippets are dynamically tested to see if the `rustc` -will accept them, rather than hard-coding specific version support. - - -## Usage - -Add this to your `Cargo.toml`: - -```toml -[build-dependencies] -autocfg = "1" -``` - -Then use it in your `build.rs` script to detect compiler features. For -example, to test for 128-bit integer support, it might look like: - -```rust -extern crate autocfg; - -fn main() { - let ac = autocfg::new(); - ac.emit_has_type("i128"); - - // (optional) We don't need to rerun for anything external. - autocfg::rerun_path("build.rs"); -} -``` - -If the type test succeeds, this will write a `cargo:rustc-cfg=has_i128` line -for Cargo, which translates to Rust arguments `--cfg has_i128`. Then in the -rest of your Rust code, you can add `#[cfg(has_i128)]` conditions on code that -should only be used when the compiler supports it. - - -## Release Notes - -- 1.1.0 (2022-02-07) - - Use `CARGO_ENCODED_RUSTFLAGS` when it is set. - -- 1.0.1 (2020-08-20) - - Apply `RUSTFLAGS` for more `--target` scenarios, by @adamreichold. - -- 1.0.0 (2020-01-08) - - 🎉 Release 1.0! 🎉 (no breaking changes) - - Add `probe_expression` and `emit_expression_cfg` to test arbitrary expressions. - - Add `probe_constant` and `emit_constant_cfg` to test arbitrary constant expressions. - -- 0.1.7 (2019-10-20) - - Apply `RUSTFLAGS` when probing `$TARGET != $HOST`, mainly for sysroot, by @roblabla. - -- 0.1.6 (2019-08-19) - - Add `probe`/`emit_sysroot_crate`, by @leo60228. - -- 0.1.5 (2019-07-16) - - Mask some warnings from newer rustc. - -- 0.1.4 (2019-05-22) - - Relax `std`/`no_std` probing to a warning instead of an error. - - Improve `rustc` bootstrap compatibility. - -- 0.1.3 (2019-05-21) - - Auto-detects if `#![no_std]` is needed for the `$TARGET`. - -- 0.1.2 (2019-01-16) - - Add `rerun_env(ENV)` to print `cargo:rerun-if-env-changed=ENV`. - - Add `rerun_path(PATH)` to print `cargo:rerun-if-changed=PATH`. - - -## Minimum Rust version policy - -This crate's minimum supported `rustc` version is `1.0.0`. Compatibility is -its entire reason for existence, so this crate will be extremely conservative -about raising this requirement. If this is ever deemed necessary, it will be -treated as a major breaking change for semver purposes. - - -## License - -This project is licensed under either of - - * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or - http://www.apache.org/licenses/LICENSE-2.0) - * MIT license ([LICENSE-MIT](LICENSE-MIT) or - http://opensource.org/licenses/MIT) - -at your option. diff --git a/vendor/autocfg/examples/integers.rs b/vendor/autocfg/examples/integers.rs deleted file mode 100644 index 23d4cba..0000000 --- a/vendor/autocfg/examples/integers.rs +++ /dev/null @@ -1,9 +0,0 @@ -extern crate autocfg; - -fn main() { - // Normally, cargo will set `OUT_DIR` for build scripts. - let ac = autocfg::AutoCfg::with_dir("target").unwrap(); - for i in 3..8 { - ac.emit_has_type(&format!("i{}", 1 << i)); - } -} diff --git a/vendor/autocfg/examples/paths.rs b/vendor/autocfg/examples/paths.rs deleted file mode 100644 index b7a6ca7..0000000 --- a/vendor/autocfg/examples/paths.rs +++ /dev/null @@ -1,22 +0,0 @@ -extern crate autocfg; - -fn main() { - // Normally, cargo will set `OUT_DIR` for build scripts. - let ac = autocfg::AutoCfg::with_dir("target").unwrap(); - - // since ancient times... - ac.emit_has_path("std::vec::Vec"); - ac.emit_path_cfg("std::vec::Vec", "has_vec"); - - // rustc 1.10.0 - ac.emit_has_path("std::panic::PanicInfo"); - ac.emit_path_cfg("std::panic::PanicInfo", "has_panic_info"); - - // rustc 1.20.0 - ac.emit_has_path("std::mem::ManuallyDrop"); - ac.emit_path_cfg("std::mem::ManuallyDrop", "has_manually_drop"); - - // rustc 1.25.0 - ac.emit_has_path("std::ptr::NonNull"); - ac.emit_path_cfg("std::ptr::NonNull", "has_non_null"); -} diff --git a/vendor/autocfg/examples/traits.rs b/vendor/autocfg/examples/traits.rs deleted file mode 100644 index c1ca003..0000000 --- a/vendor/autocfg/examples/traits.rs +++ /dev/null @@ -1,26 +0,0 @@ -extern crate autocfg; - -fn main() { - // Normally, cargo will set `OUT_DIR` for build scripts. - let ac = autocfg::AutoCfg::with_dir("target").unwrap(); - - // since ancient times... - ac.emit_has_trait("std::ops::Add"); - ac.emit_trait_cfg("std::ops::Add", "has_ops"); - - // trait parameters have to be provided - ac.emit_has_trait("std::borrow::Borrow"); - ac.emit_trait_cfg("std::borrow::Borrow", "has_borrow"); - - // rustc 1.8.0 - ac.emit_has_trait("std::ops::AddAssign"); - ac.emit_trait_cfg("std::ops::AddAssign", "has_assign_ops"); - - // rustc 1.12.0 - ac.emit_has_trait("std::iter::Sum"); - ac.emit_trait_cfg("std::iter::Sum", "has_sum"); - - // rustc 1.28.0 - ac.emit_has_trait("std::alloc::GlobalAlloc"); - ac.emit_trait_cfg("std::alloc::GlobalAlloc", "has_global_alloc"); -} diff --git a/vendor/autocfg/examples/versions.rs b/vendor/autocfg/examples/versions.rs deleted file mode 100644 index 992919b..0000000 --- a/vendor/autocfg/examples/versions.rs +++ /dev/null @@ -1,9 +0,0 @@ -extern crate autocfg; - -fn main() { - // Normally, cargo will set `OUT_DIR` for build scripts. - let ac = autocfg::AutoCfg::with_dir("target").unwrap(); - for i in 0..100 { - ac.emit_rustc_version(1, i); - } -} diff --git a/vendor/autocfg/src/error.rs b/vendor/autocfg/src/error.rs deleted file mode 100644 index 4624835..0000000 --- a/vendor/autocfg/src/error.rs +++ /dev/null @@ -1,69 +0,0 @@ -use std::error; -use std::fmt; -use std::io; -use std::num; -use std::str; - -/// A common error type for the `autocfg` crate. -#[derive(Debug)] -pub struct Error { - kind: ErrorKind, -} - -impl error::Error for Error { - fn description(&self) -> &str { - "AutoCfg error" - } - - fn cause(&self) -> Option<&error::Error> { - match self.kind { - ErrorKind::Io(ref e) => Some(e), - ErrorKind::Num(ref e) => Some(e), - ErrorKind::Utf8(ref e) => Some(e), - ErrorKind::Other(_) => None, - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - match self.kind { - ErrorKind::Io(ref e) => e.fmt(f), - ErrorKind::Num(ref e) => e.fmt(f), - ErrorKind::Utf8(ref e) => e.fmt(f), - ErrorKind::Other(s) => s.fmt(f), - } - } -} - -#[derive(Debug)] -enum ErrorKind { - Io(io::Error), - Num(num::ParseIntError), - Utf8(str::Utf8Error), - Other(&'static str), -} - -pub fn from_io(e: io::Error) -> Error { - Error { - kind: ErrorKind::Io(e), - } -} - -pub fn from_num(e: num::ParseIntError) -> Error { - Error { - kind: ErrorKind::Num(e), - } -} - -pub fn from_utf8(e: str::Utf8Error) -> Error { - Error { - kind: ErrorKind::Utf8(e), - } -} - -pub fn from_str(s: &'static str) -> Error { - Error { - kind: ErrorKind::Other(s), - } -} diff --git a/vendor/autocfg/src/lib.rs b/vendor/autocfg/src/lib.rs deleted file mode 100644 index cbe393a..0000000 --- a/vendor/autocfg/src/lib.rs +++ /dev/null @@ -1,453 +0,0 @@ -//! A Rust library for build scripts to automatically configure code based on -//! compiler support. Code snippets are dynamically tested to see if the `rustc` -//! will accept them, rather than hard-coding specific version support. -//! -//! -//! ## Usage -//! -//! Add this to your `Cargo.toml`: -//! -//! ```toml -//! [build-dependencies] -//! autocfg = "1" -//! ``` -//! -//! Then use it in your `build.rs` script to detect compiler features. For -//! example, to test for 128-bit integer support, it might look like: -//! -//! ```rust -//! extern crate autocfg; -//! -//! fn main() { -//! # // Normally, cargo will set `OUT_DIR` for build scripts. -//! # std::env::set_var("OUT_DIR", "target"); -//! let ac = autocfg::new(); -//! ac.emit_has_type("i128"); -//! -//! // (optional) We don't need to rerun for anything external. -//! autocfg::rerun_path("build.rs"); -//! } -//! ``` -//! -//! If the type test succeeds, this will write a `cargo:rustc-cfg=has_i128` line -//! for Cargo, which translates to Rust arguments `--cfg has_i128`. Then in the -//! rest of your Rust code, you can add `#[cfg(has_i128)]` conditions on code that -//! should only be used when the compiler supports it. -//! -//! ## Caution -//! -//! Many of the probing methods of `AutoCfg` document the particular template they -//! use, **subject to change**. The inputs are not validated to make sure they are -//! semantically correct for their expected use, so it's _possible_ to escape and -//! inject something unintended. However, such abuse is unsupported and will not -//! be considered when making changes to the templates. - -#![deny(missing_debug_implementations)] -#![deny(missing_docs)] -// allow future warnings that can't be fixed while keeping 1.0 compatibility -#![allow(unknown_lints)] -#![allow(bare_trait_objects)] -#![allow(ellipsis_inclusive_range_patterns)] - -/// Local macro to avoid `std::try!`, deprecated in Rust 1.39. -macro_rules! try { - ($result:expr) => { - match $result { - Ok(value) => value, - Err(error) => return Err(error), - } - }; -} - -use std::env; -use std::ffi::OsString; -use std::fs; -use std::io::{stderr, Write}; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; -#[allow(deprecated)] -use std::sync::atomic::ATOMIC_USIZE_INIT; -use std::sync::atomic::{AtomicUsize, Ordering}; - -mod error; -pub use error::Error; - -mod version; -use version::Version; - -#[cfg(test)] -mod tests; - -/// Helper to detect compiler features for `cfg` output in build scripts. -#[derive(Clone, Debug)] -pub struct AutoCfg { - out_dir: PathBuf, - rustc: PathBuf, - rustc_version: Version, - target: Option, - no_std: bool, - rustflags: Vec, -} - -/// Writes a config flag for rustc on standard out. -/// -/// This looks like: `cargo:rustc-cfg=CFG` -/// -/// Cargo will use this in arguments to rustc, like `--cfg CFG`. -pub fn emit(cfg: &str) { - println!("cargo:rustc-cfg={}", cfg); -} - -/// Writes a line telling Cargo to rerun the build script if `path` changes. -/// -/// This looks like: `cargo:rerun-if-changed=PATH` -/// -/// This requires at least cargo 0.7.0, corresponding to rustc 1.6.0. Earlier -/// versions of cargo will simply ignore the directive. -pub fn rerun_path(path: &str) { - println!("cargo:rerun-if-changed={}", path); -} - -/// Writes a line telling Cargo to rerun the build script if the environment -/// variable `var` changes. -/// -/// This looks like: `cargo:rerun-if-env-changed=VAR` -/// -/// This requires at least cargo 0.21.0, corresponding to rustc 1.20.0. Earlier -/// versions of cargo will simply ignore the directive. -pub fn rerun_env(var: &str) { - println!("cargo:rerun-if-env-changed={}", var); -} - -/// Create a new `AutoCfg` instance. -/// -/// # Panics -/// -/// Panics if `AutoCfg::new()` returns an error. -pub fn new() -> AutoCfg { - AutoCfg::new().unwrap() -} - -impl AutoCfg { - /// Create a new `AutoCfg` instance. - /// - /// # Common errors - /// - /// - `rustc` can't be executed, from `RUSTC` or in the `PATH`. - /// - The version output from `rustc` can't be parsed. - /// - `OUT_DIR` is not set in the environment, or is not a writable directory. - /// - pub fn new() -> Result { - match env::var_os("OUT_DIR") { - Some(d) => Self::with_dir(d), - None => Err(error::from_str("no OUT_DIR specified!")), - } - } - - /// Create a new `AutoCfg` instance with the specified output directory. - /// - /// # Common errors - /// - /// - `rustc` can't be executed, from `RUSTC` or in the `PATH`. - /// - The version output from `rustc` can't be parsed. - /// - `dir` is not a writable directory. - /// - pub fn with_dir>(dir: T) -> Result { - let rustc = env::var_os("RUSTC").unwrap_or_else(|| "rustc".into()); - let rustc: PathBuf = rustc.into(); - let rustc_version = try!(Version::from_rustc(&rustc)); - - let target = env::var_os("TARGET"); - - // Sanity check the output directory - let dir = dir.into(); - let meta = try!(fs::metadata(&dir).map_err(error::from_io)); - if !meta.is_dir() || meta.permissions().readonly() { - return Err(error::from_str("output path is not a writable directory")); - } - - let mut ac = AutoCfg { - rustflags: rustflags(&target, &dir), - out_dir: dir, - rustc: rustc, - rustc_version: rustc_version, - target: target, - no_std: false, - }; - - // Sanity check with and without `std`. - if !ac.probe("").unwrap_or(false) { - ac.no_std = true; - if !ac.probe("").unwrap_or(false) { - // Neither worked, so assume nothing... - ac.no_std = false; - let warning = b"warning: autocfg could not probe for `std`\n"; - stderr().write_all(warning).ok(); - } - } - Ok(ac) - } - - /// Test whether the current `rustc` reports a version greater than - /// or equal to "`major`.`minor`". - pub fn probe_rustc_version(&self, major: usize, minor: usize) -> bool { - self.rustc_version >= Version::new(major, minor, 0) - } - - /// Sets a `cfg` value of the form `rustc_major_minor`, like `rustc_1_29`, - /// if the current `rustc` is at least that version. - pub fn emit_rustc_version(&self, major: usize, minor: usize) { - if self.probe_rustc_version(major, minor) { - emit(&format!("rustc_{}_{}", major, minor)); - } - } - - fn probe>(&self, code: T) -> Result { - #[allow(deprecated)] - static ID: AtomicUsize = ATOMIC_USIZE_INIT; - - let id = ID.fetch_add(1, Ordering::Relaxed); - let mut command = Command::new(&self.rustc); - command - .arg("--crate-name") - .arg(format!("probe{}", id)) - .arg("--crate-type=lib") - .arg("--out-dir") - .arg(&self.out_dir) - .arg("--emit=llvm-ir"); - - if let Some(target) = self.target.as_ref() { - command.arg("--target").arg(target); - } - - command.args(&self.rustflags); - - command.arg("-").stdin(Stdio::piped()); - let mut child = try!(command.spawn().map_err(error::from_io)); - let mut stdin = child.stdin.take().expect("rustc stdin"); - - if self.no_std { - try!(stdin.write_all(b"#![no_std]\n").map_err(error::from_io)); - } - try!(stdin.write_all(code.as_ref()).map_err(error::from_io)); - drop(stdin); - - let status = try!(child.wait().map_err(error::from_io)); - Ok(status.success()) - } - - /// Tests whether the given sysroot crate can be used. - /// - /// The test code is subject to change, but currently looks like: - /// - /// ```ignore - /// extern crate CRATE as probe; - /// ``` - pub fn probe_sysroot_crate(&self, name: &str) -> bool { - self.probe(format!("extern crate {} as probe;", name)) // `as _` wasn't stabilized until Rust 1.33 - .unwrap_or(false) - } - - /// Emits a config value `has_CRATE` if `probe_sysroot_crate` returns true. - pub fn emit_sysroot_crate(&self, name: &str) { - if self.probe_sysroot_crate(name) { - emit(&format!("has_{}", mangle(name))); - } - } - - /// Tests whether the given path can be used. - /// - /// The test code is subject to change, but currently looks like: - /// - /// ```ignore - /// pub use PATH; - /// ``` - pub fn probe_path(&self, path: &str) -> bool { - self.probe(format!("pub use {};", path)).unwrap_or(false) - } - - /// Emits a config value `has_PATH` if `probe_path` returns true. - /// - /// Any non-identifier characters in the `path` will be replaced with - /// `_` in the generated config value. - pub fn emit_has_path(&self, path: &str) { - if self.probe_path(path) { - emit(&format!("has_{}", mangle(path))); - } - } - - /// Emits the given `cfg` value if `probe_path` returns true. - pub fn emit_path_cfg(&self, path: &str, cfg: &str) { - if self.probe_path(path) { - emit(cfg); - } - } - - /// Tests whether the given trait can be used. - /// - /// The test code is subject to change, but currently looks like: - /// - /// ```ignore - /// pub trait Probe: TRAIT + Sized {} - /// ``` - pub fn probe_trait(&self, name: &str) -> bool { - self.probe(format!("pub trait Probe: {} + Sized {{}}", name)) - .unwrap_or(false) - } - - /// Emits a config value `has_TRAIT` if `probe_trait` returns true. - /// - /// Any non-identifier characters in the trait `name` will be replaced with - /// `_` in the generated config value. - pub fn emit_has_trait(&self, name: &str) { - if self.probe_trait(name) { - emit(&format!("has_{}", mangle(name))); - } - } - - /// Emits the given `cfg` value if `probe_trait` returns true. - pub fn emit_trait_cfg(&self, name: &str, cfg: &str) { - if self.probe_trait(name) { - emit(cfg); - } - } - - /// Tests whether the given type can be used. - /// - /// The test code is subject to change, but currently looks like: - /// - /// ```ignore - /// pub type Probe = TYPE; - /// ``` - pub fn probe_type(&self, name: &str) -> bool { - self.probe(format!("pub type Probe = {};", name)) - .unwrap_or(false) - } - - /// Emits a config value `has_TYPE` if `probe_type` returns true. - /// - /// Any non-identifier characters in the type `name` will be replaced with - /// `_` in the generated config value. - pub fn emit_has_type(&self, name: &str) { - if self.probe_type(name) { - emit(&format!("has_{}", mangle(name))); - } - } - - /// Emits the given `cfg` value if `probe_type` returns true. - pub fn emit_type_cfg(&self, name: &str, cfg: &str) { - if self.probe_type(name) { - emit(cfg); - } - } - - /// Tests whether the given expression can be used. - /// - /// The test code is subject to change, but currently looks like: - /// - /// ```ignore - /// pub fn probe() { let _ = EXPR; } - /// ``` - pub fn probe_expression(&self, expr: &str) -> bool { - self.probe(format!("pub fn probe() {{ let _ = {}; }}", expr)) - .unwrap_or(false) - } - - /// Emits the given `cfg` value if `probe_expression` returns true. - pub fn emit_expression_cfg(&self, expr: &str, cfg: &str) { - if self.probe_expression(expr) { - emit(cfg); - } - } - - /// Tests whether the given constant expression can be used. - /// - /// The test code is subject to change, but currently looks like: - /// - /// ```ignore - /// pub const PROBE: () = ((), EXPR).0; - /// ``` - pub fn probe_constant(&self, expr: &str) -> bool { - self.probe(format!("pub const PROBE: () = ((), {}).0;", expr)) - .unwrap_or(false) - } - - /// Emits the given `cfg` value if `probe_constant` returns true. - pub fn emit_constant_cfg(&self, expr: &str, cfg: &str) { - if self.probe_constant(expr) { - emit(cfg); - } - } -} - -fn mangle(s: &str) -> String { - s.chars() - .map(|c| match c { - 'A'...'Z' | 'a'...'z' | '0'...'9' => c, - _ => '_', - }) - .collect() -} - -fn dir_contains_target( - target: &Option, - dir: &Path, - cargo_target_dir: Option, -) -> bool { - target - .as_ref() - .and_then(|target| { - dir.to_str().and_then(|dir| { - let mut cargo_target_dir = cargo_target_dir - .map(PathBuf::from) - .unwrap_or_else(|| PathBuf::from("target")); - cargo_target_dir.push(target); - - cargo_target_dir - .to_str() - .map(|cargo_target_dir| dir.contains(&cargo_target_dir)) - }) - }) - .unwrap_or(false) -} - -fn rustflags(target: &Option, dir: &Path) -> Vec { - // Starting with rust-lang/cargo#9601, shipped in Rust 1.55, Cargo always sets - // CARGO_ENCODED_RUSTFLAGS for any host/target build script invocation. This - // includes any source of flags, whether from the environment, toml config, or - // whatever may come in the future. The value is either an empty string, or a - // list of arguments separated by the ASCII unit separator (US), 0x1f. - if let Ok(a) = env::var("CARGO_ENCODED_RUSTFLAGS") { - return if a.is_empty() { - Vec::new() - } else { - a.split('\x1f').map(str::to_string).collect() - }; - } - - // Otherwise, we have to take a more heuristic approach, and we don't - // support values from toml config at all. - // - // Cargo only applies RUSTFLAGS for building TARGET artifact in - // cross-compilation environment. Sadly, we don't have a way to detect - // when we're building HOST artifact in a cross-compilation environment, - // so for now we only apply RUSTFLAGS when cross-compiling an artifact. - // - // See https://github.com/cuviper/autocfg/pull/10#issuecomment-527575030. - if *target != env::var_os("HOST") - || dir_contains_target(target, dir, env::var_os("CARGO_TARGET_DIR")) - { - if let Ok(rustflags) = env::var("RUSTFLAGS") { - // This is meant to match how cargo handles the RUSTFLAGS environment variable. - // See https://github.com/rust-lang/cargo/blob/69aea5b6f69add7c51cca939a79644080c0b0ba0/src/cargo/core/compiler/build_context/target_info.rs#L434-L441 - return rustflags - .split(' ') - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string) - .collect(); - } - } - - Vec::new() -} diff --git a/vendor/autocfg/src/tests.rs b/vendor/autocfg/src/tests.rs deleted file mode 100644 index d3b1fbb..0000000 --- a/vendor/autocfg/src/tests.rs +++ /dev/null @@ -1,174 +0,0 @@ -use super::AutoCfg; -use std::env; -use std::path::Path; - -impl AutoCfg { - fn core_std(&self, path: &str) -> String { - let krate = if self.no_std { "core" } else { "std" }; - format!("{}::{}", krate, path) - } - - fn assert_std(&self, probe_result: bool) { - assert_eq!(!self.no_std, probe_result); - } - - fn assert_min(&self, major: usize, minor: usize, probe_result: bool) { - assert_eq!(self.probe_rustc_version(major, minor), probe_result); - } - - fn for_test() -> Result { - match env::var_os("TESTS_TARGET_DIR") { - Some(d) => Self::with_dir(d), - None => Self::with_dir("target"), - } - } -} - -#[test] -fn autocfg_version() { - let ac = AutoCfg::for_test().unwrap(); - println!("version: {:?}", ac.rustc_version); - assert!(ac.probe_rustc_version(1, 0)); -} - -#[test] -fn version_cmp() { - use super::version::Version; - let v123 = Version::new(1, 2, 3); - - assert!(Version::new(1, 0, 0) < v123); - assert!(Version::new(1, 2, 2) < v123); - assert!(Version::new(1, 2, 3) == v123); - assert!(Version::new(1, 2, 4) > v123); - assert!(Version::new(1, 10, 0) > v123); - assert!(Version::new(2, 0, 0) > v123); -} - -#[test] -fn probe_add() { - let ac = AutoCfg::for_test().unwrap(); - let add = ac.core_std("ops::Add"); - let add_rhs = add.clone() + ""; - let add_rhs_output = add.clone() + ""; - let dyn_add_rhs_output = "dyn ".to_string() + &*add_rhs_output; - assert!(ac.probe_path(&add)); - assert!(ac.probe_trait(&add)); - assert!(ac.probe_trait(&add_rhs)); - assert!(ac.probe_trait(&add_rhs_output)); - ac.assert_min(1, 27, ac.probe_type(&dyn_add_rhs_output)); -} - -#[test] -fn probe_as_ref() { - let ac = AutoCfg::for_test().unwrap(); - let as_ref = ac.core_std("convert::AsRef"); - let as_ref_str = as_ref.clone() + ""; - let dyn_as_ref_str = "dyn ".to_string() + &*as_ref_str; - assert!(ac.probe_path(&as_ref)); - assert!(ac.probe_trait(&as_ref_str)); - assert!(ac.probe_type(&as_ref_str)); - ac.assert_min(1, 27, ac.probe_type(&dyn_as_ref_str)); -} - -#[test] -fn probe_i128() { - let ac = AutoCfg::for_test().unwrap(); - let i128_path = ac.core_std("i128"); - ac.assert_min(1, 26, ac.probe_path(&i128_path)); - ac.assert_min(1, 26, ac.probe_type("i128")); -} - -#[test] -fn probe_sum() { - let ac = AutoCfg::for_test().unwrap(); - let sum = ac.core_std("iter::Sum"); - let sum_i32 = sum.clone() + ""; - let dyn_sum_i32 = "dyn ".to_string() + &*sum_i32; - ac.assert_min(1, 12, ac.probe_path(&sum)); - ac.assert_min(1, 12, ac.probe_trait(&sum)); - ac.assert_min(1, 12, ac.probe_trait(&sum_i32)); - ac.assert_min(1, 12, ac.probe_type(&sum_i32)); - ac.assert_min(1, 27, ac.probe_type(&dyn_sum_i32)); -} - -#[test] -fn probe_std() { - let ac = AutoCfg::for_test().unwrap(); - ac.assert_std(ac.probe_sysroot_crate("std")); -} - -#[test] -fn probe_alloc() { - let ac = AutoCfg::for_test().unwrap(); - ac.assert_min(1, 36, ac.probe_sysroot_crate("alloc")); -} - -#[test] -fn probe_bad_sysroot_crate() { - let ac = AutoCfg::for_test().unwrap(); - assert!(!ac.probe_sysroot_crate("doesnt_exist")); -} - -#[test] -fn probe_no_std() { - let ac = AutoCfg::for_test().unwrap(); - assert!(ac.probe_type("i32")); - assert!(ac.probe_type("[i32]")); - ac.assert_std(ac.probe_type("Vec")); -} - -#[test] -fn probe_expression() { - let ac = AutoCfg::for_test().unwrap(); - assert!(ac.probe_expression(r#""test".trim_left()"#)); - ac.assert_min(1, 30, ac.probe_expression(r#""test".trim_start()"#)); - ac.assert_std(ac.probe_expression("[1, 2, 3].to_vec()")); -} - -#[test] -fn probe_constant() { - let ac = AutoCfg::for_test().unwrap(); - assert!(ac.probe_constant("1 + 2 + 3")); - ac.assert_min(1, 33, ac.probe_constant("{ let x = 1 + 2 + 3; x * x }")); - ac.assert_min(1, 39, ac.probe_constant(r#""test".len()"#)); -} - -#[test] -fn dir_does_not_contain_target() { - assert!(!super::dir_contains_target( - &Some("x86_64-unknown-linux-gnu".into()), - Path::new("/project/target/debug/build/project-ea75983148559682/out"), - None, - )); -} - -#[test] -fn dir_does_contain_target() { - assert!(super::dir_contains_target( - &Some("x86_64-unknown-linux-gnu".into()), - Path::new( - "/project/target/x86_64-unknown-linux-gnu/debug/build/project-0147aca016480b9d/out" - ), - None, - )); -} - -#[test] -fn dir_does_not_contain_target_with_custom_target_dir() { - assert!(!super::dir_contains_target( - &Some("x86_64-unknown-linux-gnu".into()), - Path::new("/project/custom/debug/build/project-ea75983148559682/out"), - Some("custom".into()), - )); -} - -#[test] -fn dir_does_contain_target_with_custom_target_dir() { - assert!(super::dir_contains_target( - &Some("x86_64-unknown-linux-gnu".into()), - Path::new( - "/project/custom/x86_64-unknown-linux-gnu/debug/build/project-0147aca016480b9d/out" - ), - Some("custom".into()), - )); -} diff --git a/vendor/autocfg/src/version.rs b/vendor/autocfg/src/version.rs deleted file mode 100644 index 378c21e..0000000 --- a/vendor/autocfg/src/version.rs +++ /dev/null @@ -1,60 +0,0 @@ -use std::path::Path; -use std::process::Command; -use std::str; - -use super::{error, Error}; - -/// A version structure for making relative comparisons. -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct Version { - major: usize, - minor: usize, - patch: usize, -} - -impl Version { - /// Creates a `Version` instance for a specific `major.minor.patch` version. - pub fn new(major: usize, minor: usize, patch: usize) -> Self { - Version { - major: major, - minor: minor, - patch: patch, - } - } - - pub fn from_rustc(rustc: &Path) -> Result { - // Get rustc's verbose version - let output = try!(Command::new(rustc) - .args(&["--version", "--verbose"]) - .output() - .map_err(error::from_io)); - if !output.status.success() { - return Err(error::from_str("could not execute rustc")); - } - let output = try!(str::from_utf8(&output.stdout).map_err(error::from_utf8)); - - // Find the release line in the verbose version output. - let release = match output.lines().find(|line| line.starts_with("release: ")) { - Some(line) => &line["release: ".len()..], - None => return Err(error::from_str("could not find rustc release")), - }; - - // Strip off any extra channel info, e.g. "-beta.N", "-nightly" - let version = match release.find('-') { - Some(i) => &release[..i], - None => release, - }; - - // Split the version into semver components. - let mut iter = version.splitn(3, '.'); - let major = try!(iter.next().ok_or(error::from_str("missing major version"))); - let minor = try!(iter.next().ok_or(error::from_str("missing minor version"))); - let patch = try!(iter.next().ok_or(error::from_str("missing patch version"))); - - Ok(Version::new( - try!(major.parse().map_err(error::from_num)), - try!(minor.parse().map_err(error::from_num)), - try!(patch.parse().map_err(error::from_num)), - )) - } -} diff --git a/vendor/autocfg/tests/rustflags.rs b/vendor/autocfg/tests/rustflags.rs deleted file mode 100644 index f054546..0000000 --- a/vendor/autocfg/tests/rustflags.rs +++ /dev/null @@ -1,33 +0,0 @@ -extern crate autocfg; - -use std::env; - -/// Tests that autocfg uses the RUSTFLAGS or CARGO_ENCODED_RUSTFLAGS -/// environment variables when running rustc. -#[test] -fn test_with_sysroot() { - // Use the same path as this test binary. - let dir = env::current_exe().unwrap().parent().unwrap().to_path_buf(); - env::set_var("OUT_DIR", &format!("{}", dir.display())); - - // If we have encoded rustflags, they take precedence, even if empty. - env::set_var("CARGO_ENCODED_RUSTFLAGS", ""); - env::set_var("RUSTFLAGS", &format!("-L {}", dir.display())); - let ac = autocfg::AutoCfg::new().unwrap(); - assert!(ac.probe_sysroot_crate("std")); - assert!(!ac.probe_sysroot_crate("autocfg")); - - // Now try again with useful encoded args. - env::set_var( - "CARGO_ENCODED_RUSTFLAGS", - &format!("-L\x1f{}", dir.display()), - ); - let ac = autocfg::AutoCfg::new().unwrap(); - assert!(ac.probe_sysroot_crate("autocfg")); - - // Try the old-style RUSTFLAGS, ensuring HOST != TARGET. - env::remove_var("CARGO_ENCODED_RUSTFLAGS"); - env::set_var("HOST", "lol"); - let ac = autocfg::AutoCfg::new().unwrap(); - assert!(ac.probe_sysroot_crate("autocfg")); -} diff --git a/vendor/equivalent/.cargo-checksum.json b/vendor/equivalent/.cargo-checksum.json new file mode 100644 index 0000000..18cb3e7 --- /dev/null +++ b/vendor/equivalent/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"87d7f4d5b05a14966f11c5e40940ca2687a3ae8e751bb15d7f537ae95310ab7b","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7365cc8878a1d7ce155a58c4ca09c3d7a6be413efa5334a80ea842912b669349","README.md":"bbcbb8419f9bb01a51d3d5e808fe35651d423014992a72be3e7acd518485f190","src/lib.rs":"1dd84363f561b30b1da713486c6b583900353e62c569d7ba1dd84eb2c04f1a14"},"package":"5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"} \ No newline at end of file diff --git a/vendor/autocfg/Cargo.toml b/vendor/equivalent/Cargo.toml similarity index 61% rename from vendor/autocfg/Cargo.toml rename to vendor/equivalent/Cargo.toml index f8bd64c..925d53e 100644 --- a/vendor/autocfg/Cargo.toml +++ b/vendor/equivalent/Cargo.toml @@ -10,15 +10,18 @@ # See Cargo.toml.orig for the original contents. [package] -name = "autocfg" -version = "1.1.0" -authors = ["Josh Stone "] -exclude = ["/.github/**", "/bors.toml"] -description = "Automatic cfg for Rust compiler features" +rust-version = "1.6" +name = "equivalent" +version = "1.0.1" +description = "Traits for key comparison in maps." readme = "README.md" -keywords = ["rustc", "build", "autoconf"] -categories = ["development-tools::build-utils"] +keywords = [ + "hashmap", + "no_std", +] +categories = [ + "data-structures", + "no-std", +] license = "Apache-2.0 OR MIT" -repository = "https://github.com/cuviper/autocfg" - -[dependencies] +repository = "https://github.com/cuviper/equivalent" diff --git a/vendor/autocfg/LICENSE-APACHE b/vendor/equivalent/LICENSE-APACHE similarity index 100% rename from vendor/autocfg/LICENSE-APACHE rename to vendor/equivalent/LICENSE-APACHE diff --git a/vendor/autocfg/LICENSE-MIT b/vendor/equivalent/LICENSE-MIT similarity index 97% rename from vendor/autocfg/LICENSE-MIT rename to vendor/equivalent/LICENSE-MIT index 44fbc4d..5ac40da 100644 --- a/vendor/autocfg/LICENSE-MIT +++ b/vendor/equivalent/LICENSE-MIT @@ -1,4 +1,4 @@ -Copyright (c) 2018 Josh Stone +Copyright (c) 2016--2023 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated diff --git a/vendor/equivalent/README.md b/vendor/equivalent/README.md new file mode 100644 index 0000000..8ff7e24 --- /dev/null +++ b/vendor/equivalent/README.md @@ -0,0 +1,25 @@ +# Equivalent + +[![crates.io](https://img.shields.io/crates/v/equivalent.svg)](https://crates.io/crates/equivalent) +[![docs](https://docs.rs/equivalent/badge.svg)](https://docs.rs/equivalent) + +`Equivalent` and `Comparable` are Rust traits for key comparison in maps. + +These may be used in the implementation of maps where the lookup type `Q` +may be different than the stored key type `K`. + +* `Q: Equivalent` checks for equality, similar to the `HashMap` + constraint `K: Borrow, Q: Eq`. +* `Q: Comparable` checks the ordering, similar to the `BTreeMap` + constraint `K: Borrow, Q: Ord`. + +These traits are not used by the maps in the standard library, but they may +add more flexibility in third-party map implementations, especially in +situations where a strict `K: Borrow` relationship is not available. + +## License + +Equivalent is distributed under the terms of both the MIT license and the +Apache License (Version 2.0). See [LICENSE-APACHE](LICENSE-APACHE) and +[LICENSE-MIT](LICENSE-MIT) for details. Opening a pull request is +assumed to signal agreement with these licensing terms. diff --git a/vendor/equivalent/src/lib.rs b/vendor/equivalent/src/lib.rs new file mode 100644 index 0000000..09ba58d --- /dev/null +++ b/vendor/equivalent/src/lib.rs @@ -0,0 +1,113 @@ +//! [`Equivalent`] and [`Comparable`] are traits for key comparison in maps. +//! +//! These may be used in the implementation of maps where the lookup type `Q` +//! may be different than the stored key type `K`. +//! +//! * `Q: Equivalent` checks for equality, similar to the `HashMap` +//! constraint `K: Borrow, Q: Eq`. +//! * `Q: Comparable` checks the ordering, similar to the `BTreeMap` +//! constraint `K: Borrow, Q: Ord`. +//! +//! These traits are not used by the maps in the standard library, but they may +//! add more flexibility in third-party map implementations, especially in +//! situations where a strict `K: Borrow` relationship is not available. +//! +//! # Examples +//! +//! ``` +//! use equivalent::*; +//! use std::cmp::Ordering; +//! +//! pub struct Pair(pub A, pub B); +//! +//! impl<'a, A: ?Sized, B: ?Sized, C, D> Equivalent<(C, D)> for Pair<&'a A, &'a B> +//! where +//! A: Equivalent, +//! B: Equivalent, +//! { +//! fn equivalent(&self, key: &(C, D)) -> bool { +//! self.0.equivalent(&key.0) && self.1.equivalent(&key.1) +//! } +//! } +//! +//! impl<'a, A: ?Sized, B: ?Sized, C, D> Comparable<(C, D)> for Pair<&'a A, &'a B> +//! where +//! A: Comparable, +//! B: Comparable, +//! { +//! fn compare(&self, key: &(C, D)) -> Ordering { +//! match self.0.compare(&key.0) { +//! Ordering::Equal => self.1.compare(&key.1), +//! not_equal => not_equal, +//! } +//! } +//! } +//! +//! fn main() { +//! let key = (String::from("foo"), String::from("bar")); +//! let q1 = Pair("foo", "bar"); +//! let q2 = Pair("boo", "bar"); +//! let q3 = Pair("foo", "baz"); +//! +//! assert!(q1.equivalent(&key)); +//! assert!(!q2.equivalent(&key)); +//! assert!(!q3.equivalent(&key)); +//! +//! assert_eq!(q1.compare(&key), Ordering::Equal); +//! assert_eq!(q2.compare(&key), Ordering::Less); +//! assert_eq!(q3.compare(&key), Ordering::Greater); +//! } +//! ``` + +#![no_std] + +use core::borrow::Borrow; +use core::cmp::Ordering; + +/// Key equivalence trait. +/// +/// This trait allows hash table lookup to be customized. It has one blanket +/// implementation that uses the regular solution with `Borrow` and `Eq`, just +/// like `HashMap` does, so that you can pass `&str` to lookup into a map with +/// `String` keys and so on. +/// +/// # Contract +/// +/// The implementor **must** hash like `K`, if it is hashable. +pub trait Equivalent { + /// Compare self to `key` and return `true` if they are equal. + fn equivalent(&self, key: &K) -> bool; +} + +impl Equivalent for Q +where + Q: Eq, + K: Borrow, +{ + #[inline] + fn equivalent(&self, key: &K) -> bool { + PartialEq::eq(self, key.borrow()) + } +} + +/// Key ordering trait. +/// +/// This trait allows ordered map lookup to be customized. It has one blanket +/// implementation that uses the regular solution with `Borrow` and `Ord`, just +/// like `BTreeMap` does, so that you can pass `&str` to lookup into a map with +/// `String` keys and so on. +pub trait Comparable: Equivalent { + /// Compare self to `key` and return their ordering. + fn compare(&self, key: &K) -> Ordering; +} + +impl Comparable for Q +where + Q: Ord, + K: Borrow, +{ + #[inline] + fn compare(&self, key: &K) -> Ordering { + Ord::cmp(self, key.borrow()) + } +} diff --git a/vendor/hashbrown/.cargo-checksum.json b/vendor/hashbrown/.cargo-checksum.json index 5561cde..4b77da6 100644 --- a/vendor/hashbrown/.cargo-checksum.json +++ b/vendor/hashbrown/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"ade49a29d368e16ce508aee91b477ecbad7e2e52eb6fee7b4c1fc86199963f0e","Cargo.toml":"421b3a71d97faf0a7e52c3b2bfbe0f1c036b9dbf6232b4e5b41221bb54358f5a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"ff8f68cb076caf8cefe7a6430d4ac086ce6af2ca8ce2c4e5a2004d4552ef52a2","README.md":"a536b3bb3f3521e59836080f05a4783150fa8484f759a31468ce3b6dba1f33eb","benches/bench.rs":"aadc39d815eadf094ed9357d946319df2d93194203bbccb7c33cea6951d654df","benches/insert_unique_unchecked.rs":"cb84275f22d5f95a5ac995ac6b2df74ffcf342765b401d27c95f2955c7b7cb9f","clippy.toml":"7535949f908c6d9aea4f9a9f3a7625552c93fc29e963d059d40f4def9d77ea7b","src/external_trait_impls/mod.rs":"d69528827794524cfd9acbeacc1ac4f6131e3c7574311e6d919f818f65fbff07","src/external_trait_impls/rayon/helpers.rs":"ba105bf0853ebc45157f22116ad0f55d3bdab75e721d8e7a677c7b912d0c0c6d","src/external_trait_impls/rayon/map.rs":"2809e2a0071db8101c38789deb955f3830c5c3455eb1794ff64a0cf2ceb53fc7","src/external_trait_impls/rayon/mod.rs":"156de9c1ad0123334ea3b7e5a17444faf1b8bf971aa88a1f23e2f2d1c3021141","src/external_trait_impls/rayon/raw.rs":"e62c5f3ca5fffea47357e64b6f8c34cec94af62d9bd28a2b87934da46c22b66e","src/external_trait_impls/rayon/set.rs":"c4c44d44e56c2f59e9e1355662e29d8744ac96645ca4414127a359fb46cb0fbf","src/external_trait_impls/serde.rs":"0bc1a1f218d1ae7a5262557a5e3737b9334caf7d50c136dbdc75ff75680c223b","src/lib.rs":"c82fbee9684bfff40ef55d5f0c9f855c11f71f9fd1720fb084ef8331bdbc41d8","src/macros.rs":"36fe532656879c80f7753d13354b889f5b45caef451a1bb3a27dbc32d74c9878","src/map.rs":"df39edae67c569378dea9a4d928685cb4d06569712c6ac36a54df76fb5d87fe3","src/raw/alloc.rs":"184a0345bc2c7544b65c28724063be26b1f2b28dbaaa028a0b01192ccac25557","src/raw/bitmask.rs":"820d90b19b7e3433a1048ace008c9526331cd53a576cb0cfc1ff9960b6fe52f8","src/raw/generic.rs":"f5013a50d6d82d5cc8bad8b8c26c24d00fa810197f9f123256c58ac92e0d98f9","src/raw/mod.rs":"fa38247c6b3bd70636be50400debb9966a3446d49ee13e4f4e2dfe4ceed1b201","src/raw/sse2.rs":"838cfdb1daa1e70951ed25f985283b8b7ab4b46fa130f92eda152047ce6086f6","src/rustc_entry.rs":"cdd70972cba5b79ca1cad79869cb5e184d6dc4798ef90822e966ef89679ba011","src/scopeguard.rs":"d13de1b12897add7fe1c3eba6f906c9cc09d86509b6cfe06b95d63803fe9265c","src/set.rs":"6877d4a42eeadd681e3b8881528e4b20f14cfedbc11e9318bfcf425ef96d1546","tests/hasher.rs":"9a8fdf67e4415618e16729969c386eefe71408cded5d46cf7b67d969276a3452","tests/rayon.rs":"83d5289771542203f539a41cccb889fbe7ce70f5adf5b903ac9f051e3ba13cfa","tests/serde.rs":"6bac8054db722dd049901b37a6e006535bac30f425eb5cd91af19b5bc1dfe78e","tests/set.rs":"01cf39efb04646ef4c63a809ebb96dfa63cfec472bf8bdb6c121f6526d40c40e"},"package":"8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"} \ No newline at end of file +{"files":{"CHANGELOG.md":"f93d5061aa77fe578b130b84ffddb1d9f53dac45a8db5dd8edfc2fad7a3b8bca","Cargo.toml":"e8848d9e1cc46174ce8e51c40fdfb9f414a2cbb92f99a342b013b9165f897150","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"ff8f68cb076caf8cefe7a6430d4ac086ce6af2ca8ce2c4e5a2004d4552ef52a2","README.md":"84c222ce49510535419d338b7532a72a2bf22b7466e44de78d92d25b6c7d636b","benches/bench.rs":"ef7bc025922f077d307c565640c005d056e3d6c1713448a95aae92d3c22c1005","benches/insert_unique_unchecked.rs":"cb84275f22d5f95a5ac995ac6b2df74ffcf342765b401d27c95f2955c7b7cb9f","clippy.toml":"7535949f908c6d9aea4f9a9f3a7625552c93fc29e963d059d40f4def9d77ea7b","src/external_trait_impls/mod.rs":"0625e6a5e3b8ecc8901a12aeeea54393fd84617fb3a14d98a34d2d2bddb8d257","src/external_trait_impls/rayon/helpers.rs":"ba105bf0853ebc45157f22116ad0f55d3bdab75e721d8e7a677c7b912d0c0c6d","src/external_trait_impls/rayon/map.rs":"96fdf39b3f601f77152d7ce84541b8f51f32b9274b7da9c294862892e721a5d8","src/external_trait_impls/rayon/mod.rs":"126edc882501dddd25e442d9236508b5b386eb8c0a9f5d654f2dd081086c1616","src/external_trait_impls/rayon/raw.rs":"04012fb2e99648819b4bc0044107ed3cb94013e242b7865075c5bd9ebf1b6865","src/external_trait_impls/rayon/set.rs":"7539348ff7bc6e3cce6b3c019d62dc401eea0138c578fef729c2593e8ead1cfa","src/external_trait_impls/rayon/table.rs":"8778d29509c68b5b7cb66859db025d3939ce22e7cf370b20ff3dea4fe4b29fd0","src/external_trait_impls/rkyv/hash_map.rs":"7abe24318143b776016052b05840656afc858b1ba5252f3d418d61972477f53d","src/external_trait_impls/rkyv/hash_set.rs":"38d969125d17d606492ec4ec9fc06b7e7118eb903240dacf40de21b9b06fa5c8","src/external_trait_impls/rkyv/mod.rs":"54399ce5574fd1d84b7b0cb4238fa3e898575e89a6724299be009d2172bda02e","src/external_trait_impls/serde.rs":"6dbe104dee16b453b6b048b541c6e02c6d067d970dfafd243fc4360288b0168c","src/lib.rs":"fbc05970d6458046590e9c4a33fc9a6fdc94ef725b9b00354fa609e207e6ae50","src/macros.rs":"98a26b908fc0fbe6a58d008a317e550013d615eb3cc17a5054a573c62c1d74cb","src/map.rs":"f1280987ac23fa387c4378666c8e0a971723b3935b5d76d6b17f1f707a20c30a","src/raw/alloc.rs":"902f8588d0fdee3e5c3dc02410f41d4b38ac88843727387f929f3186b3a2d322","src/raw/bitmask.rs":"3b3dce8d6a48856ada19085abf43908f124ab3419fcf434b9ca64d7bff243f67","src/raw/generic.rs":"efc5e603be3e9a17935aef1836a38ce01c78a0093b2af0671548eb5459b37921","src/raw/mod.rs":"42dc33a975e00fa4d984cc00d33915412b5c0a80341e38b53ad46a44f65170ad","src/raw/neon.rs":"9907d8ebc36fc3df562dde478ea9b72213fda65288a304718d8647f0029dc9ad","src/raw/sse2.rs":"39038e3344e49f4638e211bcdbf56565ac53e90dce56172cc3b526fea911c2af","src/rustc_entry.rs":"8142ed89b50155602ef8c1628382bd62d3ee903920fe49d403d4100a278c6ba4","src/scopeguard.rs":"1a246e08a63c06cd8ad934bd7da229421bf804f991ae93cd7e242da27ca6c601","src/set.rs":"c23534afec96391f396904150556b149fe8500f1c0d17d70cdf10e8fa1121b45","src/table.rs":"5f5038ff39ca1680e2f5e13f9b50c7166ed41b1fa1232f7d17ff53a96c5fa8b2","tests/equivalent_trait.rs":"84faa3fe9d67c375d03fec81f0f1412c47862477d42e84e7d235258236338d5b","tests/hasher.rs":"9a8fdf67e4415618e16729969c386eefe71408cded5d46cf7b67d969276a3452","tests/raw.rs":"43ed2f98877533a0905611d9a30f26b183dd3e103e3856eeab80e7b8ac7894d3","tests/rayon.rs":"39cb24ab45fce8087bb54948715c8b6973ebfba1a325292b5b3cd9aab50b5fd2","tests/serde.rs":"6bac8054db722dd049901b37a6e006535bac30f425eb5cd91af19b5bc1dfe78e","tests/set.rs":"9f8011c29d1059aadb54b6dd4623521d5178b4278b4a56021ef2cee4bbb19fd9"},"package":"290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"} \ No newline at end of file diff --git a/vendor/hashbrown/CHANGELOG.md b/vendor/hashbrown/CHANGELOG.md index 3354b54..0901165 100644 --- a/vendor/hashbrown/CHANGELOG.md +++ b/vendor/hashbrown/CHANGELOG.md @@ -7,35 +7,143 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [Unreleased] +## [v0.14.3] - 2023-11-26 + +### Added + +- Specialized `fold` implementation of iterators. (#480) + +### Fixed + +- Avoid using unstable `ptr::invalid_mut` on nightly. (#481) + +## [v0.14.2] - 2023-10-19 + +### Added + +- `HashTable` type which provides a low-level but safe API with explicit hashing. (#466) + +### Fixed + +- Disabled the use of NEON instructions on big-endian ARM. (#475) +- Disabled the use of NEON instructions on Miri. (#476) + +## [v0.14.1] - 2023-09-28 + +### Added + +- Allow serializing `HashMap`s that use a custom allocator. (#449) + +### Changed + +- Use the `Equivalent` trait from the `equivalent` crate. (#442) +- Slightly improved performance of table resizing. (#451) +- Relaxed MSRV to 1.63.0. (#457) +- Removed `Clone` requirement from custom allocators. (#468) + +### Fixed + +- Fixed custom allocators being leaked in some situations. (#439, #465) + +## [v0.14.0] - 2023-06-01 + +### Added + +- Support for `allocator-api2` crate + for interfacing with custom allocators on stable. (#417) +- Optimized implementation for ARM using NEON instructions. (#430) +- Support for rkyv serialization. (#432) +- `Equivalent` trait to look up values without `Borrow`. (#345) +- `Hash{Map,Set}::raw_table_mut` is added whic returns a mutable reference. (#404) +- Fast path for `clear` on empty tables. (#428) + +### Changed + +- Optimized insertion to only perform a single lookup. (#277) +- `DrainFilter` (`drain_filter`) has been renamed to `ExtractIf` and no longer drops remaining + elements when the iterator is dropped. #(374) +- Bumped MSRV to 1.64.0. (#431) +- `{Map,Set}::raw_table` now returns an immutable reference. (#404) +- `VacantEntry` and `OccupiedEntry` now use the default hasher if none is + specified in generics. (#389) +- `RawTable::data_start` now returns a `NonNull` to match `RawTable::data_end`. (#387) +- `RawIter::{reflect_insert, reflect_remove}` are now unsafe. (#429) +- `RawTable::find_potential` is renamed to `find_or_find_insert_slot` and returns an `InsertSlot`. (#429) +- `RawTable::remove` now also returns an `InsertSlot`. (#429) +- `InsertSlot` can be used to insert an element with `RawTable::insert_in_slot`. (#429) +- `RawIterHash` no longer has a lifetime tied to that of the `RawTable`. (#427) +- The trait bounds of `HashSet::raw_table` have been relaxed to not require `Eq + Hash`. (#423) +- `EntryRef::and_replace_entry_with` and `OccupiedEntryRef::replace_entry_with` + were changed to give a `&K` instead of a `&Q` to the closure. + +### Removed + +- Support for `bumpalo` as an allocator with custom wrapper. + Use `allocator-api2` feature in `bumpalo` to use it as an allocator + for `hashbrown` collections. (#417) + +## [v0.13.2] - 2023-01-12 + +### Fixed + +- Added `#[inline(always)]` to `find_inner`. (#375) +- Fixed `RawTable::allocation_info` for empty tables. (#376) + +## [v0.13.1] - 2022-11-10 + +### Added + +- Added `Equivalent` trait to customize key lookups. (#350) +- Added support for 16-bit targets. (#368) +- Added `RawTable::allocation_info` which provides information about the memory + usage of a table. (#371) + +### Changed + +- Bumped MSRV to 1.61.0. +- Upgraded to `ahash` 0.8. (#357) +- Make `with_hasher_in` const. (#355) +- The following methods have been removed from the `RawTable` API in favor of + safer alternatives: + - `RawTable::erase_no_drop` => Use `RawTable::erase` or `RawTable::remove` instead. + - `Bucket::read` => Use `RawTable::remove` instead. + - `Bucket::drop` => Use `RawTable::erase` instead. + - `Bucket::write` => Use `Bucket::as_mut` instead. + +### Fixed + +- Ensure that `HashMap` allocations don't exceed `isize::MAX`. (#362) +- Fixed issue with field retagging in scopeguard. (#359) + ## [v0.12.3] - 2022-07-17 -## Fixed +### Fixed - Fixed double-drop in `RawTable::clone_from`. (#348) ## [v0.12.2] - 2022-07-09 -## Added +### Added - Added `Entry` API for `HashSet`. (#342) - Added `Extend<&'a (K, V)> for HashMap`. (#340) - Added length-based short-circuiting for hash table iteration. (#338) - Added a function to access the `RawTable` of a `HashMap`. (#335) -## Changed +### Changed - Edited `do_alloc` to reduce LLVM IR generated. (#341) ## [v0.12.1] - 2022-05-02 -## Fixed +### Fixed - Fixed underflow in `RawIterRange::size_hint`. (#325) - Fixed the implementation of `Debug` for `ValuesMut` and `IntoValues`. (#325) ## [v0.12.0] - 2022-01-17 -## Added +### Added - Added `From<[T; N]>` and `From<[(K, V); N]>` for `HashSet` and `HashMap` respectively. (#297) - Added an `allocator()` getter to HashMap and HashSet. (#257) @@ -44,7 +152,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/). - Implement `From` on `HashSet` and `HashMap`. (#298) - Added `entry_ref` API to `HashMap`. (#201) -## Changed +### Changed - Bumped minimum Rust version to 1.56.1 and edition to 2021. - Use u64 for the GroupWord on WebAssembly. (#271) @@ -56,7 +164,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/). - Rename `get_each_mut` to `get_many_mut` and align API with the stdlib. (#291) - Don't hash the key when searching in an empty table. (#305) -## Fixed +### Fixed - Guard against allocations exceeding isize::MAX. (#268) - Made `RawTable::insert_no_grow` unsafe. (#254) @@ -65,19 +173,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [v0.11.2] - 2021-03-25 -## Fixed +### Fixed - Added missing allocator type parameter to `HashMap`'s and `HashSet`'s `Clone` impls. (#252) ## [v0.11.1] - 2021-03-20 -## Fixed +### Fixed - Added missing `pub` modifier to `BumpWrapper`. (#251) ## [v0.11.0] - 2021-03-14 -## Added +### Added - Added safe `try_insert_no_grow` method to `RawTable`. (#229) - Added support for `bumpalo` as an allocator without the `nightly` feature. (#231) - Implemented `Default` for `RawTable`. (#237) @@ -86,22 +194,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/). - Added `From>` for `HashSet`. (#235) - Added `try_insert` method to `HashMap`. (#247) -## Changed +### Changed - The minimum Rust version has been bumped to 1.49.0. (#230) - Significantly improved compilation times by reducing the amount of generated IR. (#205) -## Removed +### Removed - We no longer re-export the unstable allocator items from the standard library, nor the stable shims approximating the same. (#227) - Removed hasher specialization support from `aHash`, which was resulting in inconsistent hashes being generated for a key. (#248) -## Fixed +### Fixed - Fixed union length comparison. (#228) ## ~~[v0.10.0] - 2021-01-16~~ This release was _yanked_ due to inconsistent hashes being generated with the `nightly` feature. (#248) -## Changed +### Changed - Parametrized `RawTable`, `HashSet` and `HashMap` over an allocator. (#133) - Improved branch prediction hints on stable. (#209) - Optimized hashing of primitive types with AHash using specialization. (#207) @@ -109,7 +217,7 @@ This release was _yanked_ due to inconsistent hashes being generated with the `n ## [v0.9.1] - 2020-09-28 -## Added +### Added - Added safe methods to `RawTable` (#202): - `get`: `find` and `as_ref` - `get_mut`: `find` and `as_mut` @@ -117,7 +225,7 @@ This release was _yanked_ due to inconsistent hashes being generated with the `n - `remove_entry`: `find` and `remove` - `erase_entry`: `find` and `erase` -## Changed +### Changed - Removed `from_key_hashed_nocheck`'s `Q: Hash`. (#200) - Made `RawTable::drain` safe. (#201) @@ -215,7 +323,7 @@ This release was _yanked_ due to inconsistent hashes being generated with the `n ## [v0.6.2] - 2019-10-23 ### Added -- Added an `inline-more` feature (enabled by default) which allows choosing a tradeoff between +- Added an `inline-more` feature (enabled by default) which allows choosing a tradeoff between runtime performance and compilation time. (#119) ## [v0.6.1] - 2019-10-04 @@ -363,7 +471,13 @@ This release was _yanked_ due to a breaking change for users of `no-default-feat - Initial release -[Unreleased]: https://github.com/rust-lang/hashbrown/compare/v0.12.3...HEAD +[Unreleased]: https://github.com/rust-lang/hashbrown/compare/v0.14.3...HEAD +[v0.14.3]: https://github.com/rust-lang/hashbrown/compare/v0.14.2...v0.14.3 +[v0.14.2]: https://github.com/rust-lang/hashbrown/compare/v0.14.1...v0.14.2 +[v0.14.1]: https://github.com/rust-lang/hashbrown/compare/v0.14.0...v0.14.1 +[v0.14.0]: https://github.com/rust-lang/hashbrown/compare/v0.13.2...v0.14.0 +[v0.13.2]: https://github.com/rust-lang/hashbrown/compare/v0.13.1...v0.13.2 +[v0.13.1]: https://github.com/rust-lang/hashbrown/compare/v0.12.3...v0.13.1 [v0.12.3]: https://github.com/rust-lang/hashbrown/compare/v0.12.2...v0.12.3 [v0.12.2]: https://github.com/rust-lang/hashbrown/compare/v0.12.1...v0.12.2 [v0.12.1]: https://github.com/rust-lang/hashbrown/compare/v0.12.0...v0.12.1 diff --git a/vendor/hashbrown/Cargo.toml b/vendor/hashbrown/Cargo.toml index fb130d2..ffdc0c3 100644 --- a/vendor/hashbrown/Cargo.toml +++ b/vendor/hashbrown/Cargo.toml @@ -11,9 +11,9 @@ [package] edition = "2021" -rust-version = "1.56.0" +rust-version = "1.63.0" name = "hashbrown" -version = "0.12.3" +version = "0.14.3" authors = ["Amanieu d'Antras "] exclude = [ ".github", @@ -33,7 +33,6 @@ categories = [ ] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/hashbrown" -resolver = "2" [package.metadata.docs.rs] features = [ @@ -42,9 +41,10 @@ features = [ "serde", "raw", ] +rustdoc-args = ["--generate-link-to-definition"] [dependencies.ahash] -version = "0.7.0" +version = "0.8.6" optional = true default-features = false @@ -53,9 +53,11 @@ version = "1.0.0" optional = true package = "rustc-std-workspace-alloc" -[dependencies.bumpalo] -version = "3.5.0" +[dependencies.allocator-api2] +version = "0.2.9" +features = ["alloc"] optional = true +default-features = false [dependencies.compiler_builtins] version = "0.1.2" @@ -66,15 +68,30 @@ version = "1.0.0" optional = true package = "rustc-std-workspace-core" +[dependencies.equivalent] +version = "1.0" +optional = true +default-features = false + [dependencies.rayon] version = "1.0" optional = true +[dependencies.rkyv] +version = "0.7.42" +features = ["alloc"] +optional = true +default-features = false + [dependencies.serde] version = "1.0.25" optional = true default-features = false +[dev-dependencies.bumpalo] +version = "3.13.0" +features = ["allocator-api2"] + [dev-dependencies.doc-comment] version = "0.3.1" @@ -91,17 +108,24 @@ features = ["small_rng"] [dev-dependencies.rayon] version = "1.0" +[dev-dependencies.rkyv] +version = "0.7.42" +features = ["validation"] + [dev-dependencies.serde_test] version = "1.0" [features] -ahash-compile-time-rng = ["ahash/compile-time-rng"] default = [ "ahash", "inline-more", + "allocator-api2", ] inline-more = [] -nightly = [] +nightly = [ + "allocator-api2?/nightly", + "bumpalo/allocator_api", +] raw = [] rustc-dep-of-std = [ "nightly", diff --git a/vendor/hashbrown/README.md b/vendor/hashbrown/README.md index 2eddcf3..5eaef8b 100644 --- a/vendor/hashbrown/README.md +++ b/vendor/hashbrown/README.md @@ -4,7 +4,7 @@ hashbrown [![Build Status](https://github.com/rust-lang/hashbrown/actions/workflows/rust.yml/badge.svg)](https://github.com/rust-lang/hashbrown/actions) [![Crates.io](https://img.shields.io/crates/v/hashbrown.svg)](https://crates.io/crates/hashbrown) [![Documentation](https://docs.rs/hashbrown/badge.svg)](https://docs.rs/hashbrown) -[![Rust](https://img.shields.io/badge/rust-1.56.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/hashbrown) +[![Rust](https://img.shields.io/badge/rust-1.63.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/hashbrown) This crate is a Rust port of Google's high-performance [SwissTable] hash map, adapted to make it a drop-in replacement for Rust's standard `HashMap` @@ -40,44 +40,44 @@ Compared to the previous implementation of `std::collections::HashMap` (Rust 1.3 With the hashbrown default AHash hasher: -| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | -|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| -| insert_ahash_highbits | 18,865 | 8,020 | -10,845 | -57.49% | x 2.35 | -| insert_ahash_random | 19,711 | 8,019 | -11,692 | -59.32% | x 2.46 | -| insert_ahash_serial | 19,365 | 6,463 | -12,902 | -66.63% | x 3.00 | -| insert_erase_ahash_highbits | 51,136 | 17,916 | -33,220 | -64.96% | x 2.85 | -| insert_erase_ahash_random | 51,157 | 17,688 | -33,469 | -65.42% | x 2.89 | -| insert_erase_ahash_serial | 45,479 | 14,895 | -30,584 | -67.25% | x 3.05 | -| iter_ahash_highbits | 1,399 | 1,092 | -307 | -21.94% | x 1.28 | -| iter_ahash_random | 1,586 | 1,059 | -527 | -33.23% | x 1.50 | -| iter_ahash_serial | 3,168 | 1,079 | -2,089 | -65.94% | x 2.94 | -| lookup_ahash_highbits | 32,351 | 4,792 | -27,559 | -85.19% | x 6.75 | -| lookup_ahash_random | 17,419 | 4,817 | -12,602 | -72.35% | x 3.62 | -| lookup_ahash_serial | 15,254 | 3,606 | -11,648 | -76.36% | x 4.23 | -| lookup_fail_ahash_highbits | 21,187 | 4,369 | -16,818 | -79.38% | x 4.85 | -| lookup_fail_ahash_random | 21,550 | 4,395 | -17,155 | -79.61% | x 4.90 | -| lookup_fail_ahash_serial | 19,450 | 3,176 | -16,274 | -83.67% | x 6.12 | +| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +| :-------------------------- | :----------------: | ----------------: | :----------: | ------: | ------- | +| insert_ahash_highbits | 18,865 | 8,020 | -10,845 | -57.49% | x 2.35 | +| insert_ahash_random | 19,711 | 8,019 | -11,692 | -59.32% | x 2.46 | +| insert_ahash_serial | 19,365 | 6,463 | -12,902 | -66.63% | x 3.00 | +| insert_erase_ahash_highbits | 51,136 | 17,916 | -33,220 | -64.96% | x 2.85 | +| insert_erase_ahash_random | 51,157 | 17,688 | -33,469 | -65.42% | x 2.89 | +| insert_erase_ahash_serial | 45,479 | 14,895 | -30,584 | -67.25% | x 3.05 | +| iter_ahash_highbits | 1,399 | 1,092 | -307 | -21.94% | x 1.28 | +| iter_ahash_random | 1,586 | 1,059 | -527 | -33.23% | x 1.50 | +| iter_ahash_serial | 3,168 | 1,079 | -2,089 | -65.94% | x 2.94 | +| lookup_ahash_highbits | 32,351 | 4,792 | -27,559 | -85.19% | x 6.75 | +| lookup_ahash_random | 17,419 | 4,817 | -12,602 | -72.35% | x 3.62 | +| lookup_ahash_serial | 15,254 | 3,606 | -11,648 | -76.36% | x 4.23 | +| lookup_fail_ahash_highbits | 21,187 | 4,369 | -16,818 | -79.38% | x 4.85 | +| lookup_fail_ahash_random | 21,550 | 4,395 | -17,155 | -79.61% | x 4.90 | +| lookup_fail_ahash_serial | 19,450 | 3,176 | -16,274 | -83.67% | x 6.12 | With the libstd default SipHash hasher: -|name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | -|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| -|insert_std_highbits |19,216 |16,885 | -2,331 | -12.13% | x 1.14 | -|insert_std_random |19,179 |17,034 | -2,145 | -11.18% | x 1.13 | -|insert_std_serial |19,462 |17,493 | -1,969 | -10.12% | x 1.11 | -|insert_erase_std_highbits |50,825 |35,847 | -14,978 | -29.47% | x 1.42 | -|insert_erase_std_random |51,448 |35,392 | -16,056 | -31.21% | x 1.45 | -|insert_erase_std_serial |87,711 |38,091 | -49,620 | -56.57% | x 2.30 | -|iter_std_highbits |1,378 |1,159 | -219 | -15.89% | x 1.19 | -|iter_std_random |1,395 |1,132 | -263 | -18.85% | x 1.23 | -|iter_std_serial |1,704 |1,105 | -599 | -35.15% | x 1.54 | -|lookup_std_highbits |17,195 |13,642 | -3,553 | -20.66% | x 1.26 | -|lookup_std_random |17,181 |13,773 | -3,408 | -19.84% | x 1.25 | -|lookup_std_serial |15,483 |13,651 | -1,832 | -11.83% | x 1.13 | -|lookup_fail_std_highbits |20,926 |13,474 | -7,452 | -35.61% | x 1.55 | -|lookup_fail_std_random |21,766 |13,505 | -8,261 | -37.95% | x 1.61 | -|lookup_fail_std_serial |19,336 |13,519 | -5,817 | -30.08% | x 1.43 | +| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +| :------------------------ | :----------------: | ----------------: | :----------: | ------: | ------- | +| insert_std_highbits | 19,216 | 16,885 | -2,331 | -12.13% | x 1.14 | +| insert_std_random | 19,179 | 17,034 | -2,145 | -11.18% | x 1.13 | +| insert_std_serial | 19,462 | 17,493 | -1,969 | -10.12% | x 1.11 | +| insert_erase_std_highbits | 50,825 | 35,847 | -14,978 | -29.47% | x 1.42 | +| insert_erase_std_random | 51,448 | 35,392 | -16,056 | -31.21% | x 1.45 | +| insert_erase_std_serial | 87,711 | 38,091 | -49,620 | -56.57% | x 2.30 | +| iter_std_highbits | 1,378 | 1,159 | -219 | -15.89% | x 1.19 | +| iter_std_random | 1,395 | 1,132 | -263 | -18.85% | x 1.23 | +| iter_std_serial | 1,704 | 1,105 | -599 | -35.15% | x 1.54 | +| lookup_std_highbits | 17,195 | 13,642 | -3,553 | -20.66% | x 1.26 | +| lookup_std_random | 17,181 | 13,773 | -3,408 | -19.84% | x 1.25 | +| lookup_std_serial | 15,483 | 13,651 | -1,832 | -11.83% | x 1.13 | +| lookup_fail_std_highbits | 20,926 | 13,474 | -7,452 | -35.61% | x 1.55 | +| lookup_fail_std_random | 21,766 | 13,505 | -8,261 | -37.95% | x 1.61 | +| lookup_fail_std_serial | 19,336 | 13,519 | -5,817 | -30.08% | x 1.43 | ## Usage @@ -85,7 +85,7 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -hashbrown = "0.12" +hashbrown = "0.14" ``` Then: @@ -101,14 +101,13 @@ This crate has the following Cargo features: - `nightly`: Enables nightly-only features including: `#[may_dangle]`. - `serde`: Enables serde serialization support. +- `rkyv`: Enables rkyv serialization support. - `rayon`: Enables rayon parallel iterator support. - `raw`: Enables access to the experimental and unsafe `RawTable` API. - `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost of compilation time. (enabled by default) -- `bumpalo`: Provides a `BumpWrapper` type which allows `bumpalo` to be used for memory allocation. - `ahash`: Compiles with ahash as default hasher. (enabled by default) -- `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash. For targets with no random number generator -this pre-generates seeds at compile time and embeds them as constants. See [aHash's documentation](https://github.com/tkaitchuck/aHash#flags) (disabled by default) +- `allocator-api2`: Enables support for allocators that support `allocator-api2`. (enabled by default) ## License diff --git a/vendor/hashbrown/benches/bench.rs b/vendor/hashbrown/benches/bench.rs index c393b9a..346bd7e 100644 --- a/vendor/hashbrown/benches/bench.rs +++ b/vendor/hashbrown/benches/bench.rs @@ -311,7 +311,7 @@ fn rehash_in_place(b: &mut Bencher) { // Each loop triggers one rehash for _ in 0..10 { - for i in 0..224 { + for i in 0..223 { set.insert(i); } diff --git a/vendor/hashbrown/src/external_trait_impls/mod.rs b/vendor/hashbrown/src/external_trait_impls/mod.rs index ef49783..01d386b 100644 --- a/vendor/hashbrown/src/external_trait_impls/mod.rs +++ b/vendor/hashbrown/src/external_trait_impls/mod.rs @@ -1,4 +1,6 @@ #[cfg(feature = "rayon")] pub(crate) mod rayon; +#[cfg(feature = "rkyv")] +mod rkyv; #[cfg(feature = "serde")] mod serde; diff --git a/vendor/hashbrown/src/external_trait_impls/rayon/map.rs b/vendor/hashbrown/src/external_trait_impls/rayon/map.rs index 14d91c2..2534dc9 100644 --- a/vendor/hashbrown/src/external_trait_impls/rayon/map.rs +++ b/vendor/hashbrown/src/external_trait_impls/rayon/map.rs @@ -232,11 +232,11 @@ impl fmt::Debug for ParValuesMut<'_, K, V> { /// [`into_par_iter`]: /hashbrown/struct.HashMap.html#method.into_par_iter /// [`HashMap`]: /hashbrown/struct.HashMap.html /// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html -pub struct IntoParIter { +pub struct IntoParIter { inner: RawIntoParIter<(K, V), A>, } -impl ParallelIterator for IntoParIter { +impl ParallelIterator for IntoParIter { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -248,9 +248,7 @@ impl ParallelIterator for IntoPar } } -impl fmt::Debug - for IntoParIter -{ +impl fmt::Debug for IntoParIter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { ParIter { inner: unsafe { self.inner.par_iter() }, @@ -267,11 +265,11 @@ impl fmt::Debug /// /// [`par_drain`]: /hashbrown/struct.HashMap.html#method.par_drain /// [`HashMap`]: /hashbrown/struct.HashMap.html -pub struct ParDrain<'a, K, V, A: Allocator + Clone = Global> { +pub struct ParDrain<'a, K, V, A: Allocator = Global> { inner: RawParDrain<'a, (K, V), A>, } -impl ParallelIterator for ParDrain<'_, K, V, A> { +impl ParallelIterator for ParDrain<'_, K, V, A> { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -283,9 +281,7 @@ impl ParallelIterator for ParDrai } } -impl fmt::Debug - for ParDrain<'_, K, V, A> -{ +impl fmt::Debug for ParDrain<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { ParIter { inner: unsafe { self.inner.par_iter() }, @@ -295,7 +291,7 @@ impl fmt::Debug } } -impl HashMap { +impl HashMap { /// Visits (potentially in parallel) immutably borrowed keys in an arbitrary order. #[cfg_attr(feature = "inline-more", inline)] pub fn par_keys(&self) -> ParKeys<'_, K, V> { @@ -315,7 +311,7 @@ impl HashMap { } } -impl HashMap { +impl HashMap { /// Visits (potentially in parallel) mutably borrowed values in an arbitrary order. #[cfg_attr(feature = "inline-more", inline)] pub fn par_values_mut(&mut self) -> ParValuesMut<'_, K, V> { @@ -340,7 +336,7 @@ where K: Eq + Hash + Sync, V: PartialEq + Sync, S: BuildHasher + Sync, - A: Allocator + Clone + Sync, + A: Allocator + Sync, { /// Returns `true` if the map is equal to another, /// i.e. both maps contain the same keys mapped to the same values. @@ -354,9 +350,7 @@ where } } -impl IntoParallelIterator - for HashMap -{ +impl IntoParallelIterator for HashMap { type Item = (K, V); type Iter = IntoParIter; @@ -368,9 +362,7 @@ impl IntoParallelIterator } } -impl<'a, K: Sync, V: Sync, S, A: Allocator + Clone> IntoParallelIterator - for &'a HashMap -{ +impl<'a, K: Sync, V: Sync, S, A: Allocator> IntoParallelIterator for &'a HashMap { type Item = (&'a K, &'a V); type Iter = ParIter<'a, K, V>; @@ -383,9 +375,7 @@ impl<'a, K: Sync, V: Sync, S, A: Allocator + Clone> IntoParallelIterator } } -impl<'a, K: Sync, V: Send, S, A: Allocator + Clone> IntoParallelIterator - for &'a mut HashMap -{ +impl<'a, K: Sync, V: Send, S, A: Allocator> IntoParallelIterator for &'a mut HashMap { type Item = (&'a K, &'a mut V); type Iter = ParIterMut<'a, K, V>; @@ -424,7 +414,7 @@ where K: Eq + Hash + Send, V: Send, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn par_extend(&mut self, par_iter: I) where @@ -440,7 +430,7 @@ where K: Copy + Eq + Hash + Sync, V: Copy + Sync, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn par_extend(&mut self, par_iter: I) where @@ -456,7 +446,7 @@ where K: Eq + Hash, S: BuildHasher, I: IntoParallelIterator, - A: Allocator + Clone, + A: Allocator, HashMap: Extend, { let (list, len) = super::helpers::collect(par_iter); @@ -561,10 +551,7 @@ mod test_par_map { assert_eq!(value.load(Ordering::Relaxed), 100); // retain only half - let _v: Vec<_> = hm - .into_par_iter() - .filter(|&(ref key, _)| key.k < 50) - .collect(); + let _v: Vec<_> = hm.into_par_iter().filter(|(key, _)| key.k < 50).collect(); assert_eq!(key.load(Ordering::Relaxed), 50); assert_eq!(value.load(Ordering::Relaxed), 50); @@ -611,7 +598,7 @@ mod test_par_map { assert_eq!(value.load(Ordering::Relaxed), 100); // retain only half - let _v: Vec<_> = hm.drain().filter(|&(ref key, _)| key.k < 50).collect(); + let _v: Vec<_> = hm.drain().filter(|(key, _)| key.k < 50).collect(); assert!(hm.is_empty()); assert_eq!(key.load(Ordering::Relaxed), 50); diff --git a/vendor/hashbrown/src/external_trait_impls/rayon/mod.rs b/vendor/hashbrown/src/external_trait_impls/rayon/mod.rs index 99337a1..61ca69b 100644 --- a/vendor/hashbrown/src/external_trait_impls/rayon/mod.rs +++ b/vendor/hashbrown/src/external_trait_impls/rayon/mod.rs @@ -2,3 +2,4 @@ mod helpers; pub(crate) mod map; pub(crate) mod raw; pub(crate) mod set; +pub(crate) mod table; diff --git a/vendor/hashbrown/src/external_trait_impls/rayon/raw.rs b/vendor/hashbrown/src/external_trait_impls/rayon/raw.rs index 883303e..612be47 100644 --- a/vendor/hashbrown/src/external_trait_impls/rayon/raw.rs +++ b/vendor/hashbrown/src/external_trait_impls/rayon/raw.rs @@ -1,7 +1,6 @@ use crate::raw::Bucket; use crate::raw::{Allocator, Global, RawIter, RawIterRange, RawTable}; use crate::scopeguard::guard; -use alloc::alloc::dealloc; use core::marker::PhantomData; use core::mem; use core::ptr::NonNull; @@ -76,18 +75,18 @@ impl UnindexedProducer for ParIterProducer { } /// Parallel iterator which consumes a table and returns elements. -pub struct RawIntoParIter { +pub struct RawIntoParIter { table: RawTable, } -impl RawIntoParIter { +impl RawIntoParIter { #[cfg_attr(feature = "inline-more", inline)] pub(super) unsafe fn par_iter(&self) -> RawParIter { self.table.par_iter() } } -impl ParallelIterator for RawIntoParIter { +impl ParallelIterator for RawIntoParIter { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -97,9 +96,9 @@ impl ParallelIterator for RawIntoParIter ParallelIterator for RawIntoParIter { +pub struct RawParDrain<'a, T, A: Allocator = Global> { // We don't use a &'a mut RawTable because we want RawParDrain to be // covariant over T. table: NonNull>, marker: PhantomData<&'a RawTable>, } -unsafe impl Send for RawParDrain<'_, T, A> {} +unsafe impl Send for RawParDrain<'_, T, A> {} -impl RawParDrain<'_, T, A> { +impl RawParDrain<'_, T, A> { #[cfg_attr(feature = "inline-more", inline)] pub(super) unsafe fn par_iter(&self) -> RawParIter { self.table.as_ref().par_iter() } } -impl ParallelIterator for RawParDrain<'_, T, A> { +impl ParallelIterator for RawParDrain<'_, T, A> { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -143,7 +142,7 @@ impl ParallelIterator for RawParDrain<'_, T, A> { } } -impl Drop for RawParDrain<'_, T, A> { +impl Drop for RawParDrain<'_, T, A> { fn drop(&mut self) { // If drive_unindexed is not called then simply clear the table. unsafe { @@ -204,7 +203,7 @@ impl Drop for ParDrainProducer { } } -impl RawTable { +impl RawTable { /// Returns a parallel iterator over the elements in a `RawTable`. #[cfg_attr(feature = "inline-more", inline)] pub unsafe fn par_iter(&self) -> RawParIter { diff --git a/vendor/hashbrown/src/external_trait_impls/rayon/set.rs b/vendor/hashbrown/src/external_trait_impls/rayon/set.rs index ee4f6e6..3de98fc 100644 --- a/vendor/hashbrown/src/external_trait_impls/rayon/set.rs +++ b/vendor/hashbrown/src/external_trait_impls/rayon/set.rs @@ -16,11 +16,11 @@ use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelExtend, Pa /// [`into_par_iter`]: /hashbrown/struct.HashSet.html#method.into_par_iter /// [`HashSet`]: /hashbrown/struct.HashSet.html /// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html -pub struct IntoParIter { +pub struct IntoParIter { inner: map::IntoParIter, } -impl ParallelIterator for IntoParIter { +impl ParallelIterator for IntoParIter { type Item = T; fn drive_unindexed(self, consumer: C) -> C::Result @@ -38,11 +38,11 @@ impl ParallelIterator for IntoParIter { +pub struct ParDrain<'a, T, A: Allocator = Global> { inner: map::ParDrain<'a, T, (), A>, } -impl ParallelIterator for ParDrain<'_, T, A> { +impl ParallelIterator for ParDrain<'_, T, A> { type Item = T; fn drive_unindexed(self, consumer: C) -> C::Result @@ -85,7 +85,7 @@ impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> { /// /// [`par_difference`]: /hashbrown/struct.HashSet.html#method.par_difference /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParDifference<'a, T, S, A: Allocator + Clone = Global> { +pub struct ParDifference<'a, T, S, A: Allocator = Global> { a: &'a HashSet, b: &'a HashSet, } @@ -94,7 +94,7 @@ impl<'a, T, S, A> ParallelIterator for ParDifference<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, - A: Allocator + Clone + Sync, + A: Allocator + Sync, { type Item = &'a T; @@ -118,7 +118,7 @@ where /// /// [`par_symmetric_difference`]: /hashbrown/struct.HashSet.html#method.par_symmetric_difference /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParSymmetricDifference<'a, T, S, A: Allocator + Clone = Global> { +pub struct ParSymmetricDifference<'a, T, S, A: Allocator = Global> { a: &'a HashSet, b: &'a HashSet, } @@ -127,7 +127,7 @@ impl<'a, T, S, A> ParallelIterator for ParSymmetricDifference<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, - A: Allocator + Clone + Sync, + A: Allocator + Sync, { type Item = &'a T; @@ -150,7 +150,7 @@ where /// /// [`par_intersection`]: /hashbrown/struct.HashSet.html#method.par_intersection /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParIntersection<'a, T, S, A: Allocator + Clone = Global> { +pub struct ParIntersection<'a, T, S, A: Allocator = Global> { a: &'a HashSet, b: &'a HashSet, } @@ -159,7 +159,7 @@ impl<'a, T, S, A> ParallelIterator for ParIntersection<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, - A: Allocator + Clone + Sync, + A: Allocator + Sync, { type Item = &'a T; @@ -181,7 +181,7 @@ where /// /// [`par_union`]: /hashbrown/struct.HashSet.html#method.par_union /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParUnion<'a, T, S, A: Allocator + Clone = Global> { +pub struct ParUnion<'a, T, S, A: Allocator = Global> { a: &'a HashSet, b: &'a HashSet, } @@ -190,7 +190,7 @@ impl<'a, T, S, A> ParallelIterator for ParUnion<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, - A: Allocator + Clone + Sync, + A: Allocator + Sync, { type Item = &'a T; @@ -216,7 +216,7 @@ impl HashSet where T: Eq + Hash + Sync, S: BuildHasher + Sync, - A: Allocator + Clone + Sync, + A: Allocator + Sync, { /// Visits (potentially in parallel) the values representing the union, /// i.e. all the values in `self` or `other`, without duplicates. @@ -289,7 +289,7 @@ where impl HashSet where T: Eq + Hash + Send, - A: Allocator + Clone + Send, + A: Allocator + Send, { /// Consumes (potentially in parallel) all values in an arbitrary order, /// while preserving the set's allocated memory for reuse. @@ -301,7 +301,7 @@ where } } -impl IntoParallelIterator for HashSet { +impl IntoParallelIterator for HashSet { type Item = T; type Iter = IntoParIter; @@ -313,7 +313,7 @@ impl IntoParallelIterator for HashSet IntoParallelIterator for &'a HashSet { +impl<'a, T: Sync, S, A: Allocator> IntoParallelIterator for &'a HashSet { type Item = &'a T; type Iter = ParIter<'a, T>; @@ -374,7 +374,7 @@ fn extend(set: &mut HashSet, par_iter: I) where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, I: IntoParallelIterator, HashSet: Extend, { diff --git a/vendor/hashbrown/src/external_trait_impls/rayon/table.rs b/vendor/hashbrown/src/external_trait_impls/rayon/table.rs new file mode 100644 index 0000000..e8e5094 --- /dev/null +++ b/vendor/hashbrown/src/external_trait_impls/rayon/table.rs @@ -0,0 +1,252 @@ +//! Rayon extensions for `HashTable`. + +use super::raw::{RawIntoParIter, RawParDrain, RawParIter}; +use crate::hash_table::HashTable; +use crate::raw::{Allocator, Global}; +use core::fmt; +use core::marker::PhantomData; +use rayon::iter::plumbing::UnindexedConsumer; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +/// Parallel iterator over shared references to entries in a map. +/// +/// This iterator is created by the [`par_iter`] method on [`HashTable`] +/// (provided by the [`IntoParallelRefIterator`] trait). +/// See its documentation for more. +/// +/// [`par_iter`]: /hashbrown/struct.HashTable.html#method.par_iter +/// [`HashTable`]: /hashbrown/struct.HashTable.html +/// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html +pub struct ParIter<'a, T> { + inner: RawParIter, + marker: PhantomData<&'a T>, +} + +impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> { + type Item = &'a T; + + #[cfg_attr(feature = "inline-more", inline)] + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.inner + .map(|x| unsafe { x.as_ref() }) + .drive_unindexed(consumer) + } +} + +impl Clone for ParIter<'_, T> { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + marker: PhantomData, + } + } +} + +impl fmt::Debug for ParIter<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = unsafe { self.inner.iter() }.map(|x| unsafe { x.as_ref() }); + f.debug_list().entries(iter).finish() + } +} + +/// Parallel iterator over mutable references to entries in a map. +/// +/// This iterator is created by the [`par_iter_mut`] method on [`HashTable`] +/// (provided by the [`IntoParallelRefMutIterator`] trait). +/// See its documentation for more. +/// +/// [`par_iter_mut`]: /hashbrown/struct.HashTable.html#method.par_iter_mut +/// [`HashTable`]: /hashbrown/struct.HashTable.html +/// [`IntoParallelRefMutIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefMutIterator.html +pub struct ParIterMut<'a, T> { + inner: RawParIter, + marker: PhantomData<&'a mut T>, +} + +impl<'a, T: Send> ParallelIterator for ParIterMut<'a, T> { + type Item = &'a mut T; + + #[cfg_attr(feature = "inline-more", inline)] + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.inner + .map(|x| unsafe { x.as_mut() }) + .drive_unindexed(consumer) + } +} + +impl fmt::Debug for ParIterMut<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + ParIter { + inner: self.inner.clone(), + marker: PhantomData, + } + .fmt(f) + } +} + +/// Parallel iterator over entries of a consumed map. +/// +/// This iterator is created by the [`into_par_iter`] method on [`HashTable`] +/// (provided by the [`IntoParallelIterator`] trait). +/// See its documentation for more. +/// +/// [`into_par_iter`]: /hashbrown/struct.HashTable.html#method.into_par_iter +/// [`HashTable`]: /hashbrown/struct.HashTable.html +/// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html +pub struct IntoParIter { + inner: RawIntoParIter, +} + +impl ParallelIterator for IntoParIter { + type Item = T; + + #[cfg_attr(feature = "inline-more", inline)] + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.inner.drive_unindexed(consumer) + } +} + +impl fmt::Debug for IntoParIter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + ParIter { + inner: unsafe { self.inner.par_iter() }, + marker: PhantomData, + } + .fmt(f) + } +} + +/// Parallel draining iterator over entries of a map. +/// +/// This iterator is created by the [`par_drain`] method on [`HashTable`]. +/// See its documentation for more. +/// +/// [`par_drain`]: /hashbrown/struct.HashTable.html#method.par_drain +/// [`HashTable`]: /hashbrown/struct.HashTable.html +pub struct ParDrain<'a, T, A: Allocator = Global> { + inner: RawParDrain<'a, T, A>, +} + +impl ParallelIterator for ParDrain<'_, T, A> { + type Item = T; + + #[cfg_attr(feature = "inline-more", inline)] + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.inner.drive_unindexed(consumer) + } +} + +impl fmt::Debug for ParDrain<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + ParIter { + inner: unsafe { self.inner.par_iter() }, + marker: PhantomData, + } + .fmt(f) + } +} + +impl HashTable { + /// Consumes (potentially in parallel) all values in an arbitrary order, + /// while preserving the map's allocated memory for reuse. + #[cfg_attr(feature = "inline-more", inline)] + pub fn par_drain(&mut self) -> ParDrain<'_, T, A> { + ParDrain { + inner: self.raw.par_drain(), + } + } +} + +impl IntoParallelIterator for HashTable { + type Item = T; + type Iter = IntoParIter; + + #[cfg_attr(feature = "inline-more", inline)] + fn into_par_iter(self) -> Self::Iter { + IntoParIter { + inner: self.raw.into_par_iter(), + } + } +} + +impl<'a, T: Sync, A: Allocator> IntoParallelIterator for &'a HashTable { + type Item = &'a T; + type Iter = ParIter<'a, T>; + + #[cfg_attr(feature = "inline-more", inline)] + fn into_par_iter(self) -> Self::Iter { + ParIter { + inner: unsafe { self.raw.par_iter() }, + marker: PhantomData, + } + } +} + +impl<'a, T: Send, A: Allocator> IntoParallelIterator for &'a mut HashTable { + type Item = &'a mut T; + type Iter = ParIterMut<'a, T>; + + #[cfg_attr(feature = "inline-more", inline)] + fn into_par_iter(self) -> Self::Iter { + ParIterMut { + inner: unsafe { self.raw.par_iter() }, + marker: PhantomData, + } + } +} + +#[cfg(test)] +mod test_par_table { + use alloc::vec::Vec; + use core::sync::atomic::{AtomicUsize, Ordering}; + + use rayon::prelude::*; + + use crate::{ + hash_map::{make_hash, DefaultHashBuilder}, + hash_table::HashTable, + }; + + #[test] + fn test_iterate() { + let hasher = DefaultHashBuilder::default(); + let mut a = HashTable::new(); + for i in 0..32 { + a.insert_unique(make_hash(&hasher, &i), i, |x| make_hash(&hasher, x)); + } + let observed = AtomicUsize::new(0); + a.par_iter().for_each(|k| { + observed.fetch_or(1 << *k, Ordering::Relaxed); + }); + assert_eq!(observed.into_inner(), 0xFFFF_FFFF); + } + + #[test] + fn test_move_iter() { + let hasher = DefaultHashBuilder::default(); + let hs = { + let mut hs = HashTable::new(); + + hs.insert_unique(make_hash(&hasher, &'a'), 'a', |x| make_hash(&hasher, x)); + hs.insert_unique(make_hash(&hasher, &'b'), 'b', |x| make_hash(&hasher, x)); + + hs + }; + + let v = hs.into_par_iter().collect::>(); + assert!(v == ['a', 'b'] || v == ['b', 'a']); + } +} diff --git a/vendor/hashbrown/src/external_trait_impls/rkyv/hash_map.rs b/vendor/hashbrown/src/external_trait_impls/rkyv/hash_map.rs new file mode 100644 index 0000000..fae7f76 --- /dev/null +++ b/vendor/hashbrown/src/external_trait_impls/rkyv/hash_map.rs @@ -0,0 +1,125 @@ +use crate::HashMap; +use core::{ + borrow::Borrow, + hash::{BuildHasher, Hash}, +}; +use rkyv::{ + collections::hash_map::{ArchivedHashMap, HashMapResolver}, + ser::{ScratchSpace, Serializer}, + Archive, Deserialize, Fallible, Serialize, +}; + +impl Archive for HashMap +where + K::Archived: Hash + Eq, +{ + type Archived = ArchivedHashMap; + type Resolver = HashMapResolver; + + #[inline] + unsafe fn resolve(&self, pos: usize, resolver: Self::Resolver, out: *mut Self::Archived) { + ArchivedHashMap::resolve_from_len(self.len(), pos, resolver, out); + } +} + +impl Serialize for HashMap +where + K: Serialize + Hash + Eq, + K::Archived: Hash + Eq, + V: Serialize, + S: Serializer + ScratchSpace + ?Sized, +{ + #[inline] + fn serialize(&self, serializer: &mut S) -> Result { + unsafe { ArchivedHashMap::serialize_from_iter(self.iter(), serializer) } + } +} + +impl + Deserialize, D> for ArchivedHashMap +where + K::Archived: Deserialize + Hash + Eq, + V::Archived: Deserialize, +{ + #[inline] + fn deserialize(&self, deserializer: &mut D) -> Result, D::Error> { + let mut result = HashMap::with_capacity_and_hasher(self.len(), S::default()); + for (k, v) in self.iter() { + result.insert(k.deserialize(deserializer)?, v.deserialize(deserializer)?); + } + Ok(result) + } +} + +impl, V, AK: Hash + Eq, AV: PartialEq, S: BuildHasher> + PartialEq> for ArchivedHashMap +{ + #[inline] + fn eq(&self, other: &HashMap) -> bool { + if self.len() != other.len() { + false + } else { + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| value.eq(v))) + } + } +} + +impl, V, AK: Hash + Eq, AV: PartialEq> + PartialEq> for HashMap +{ + #[inline] + fn eq(&self, other: &ArchivedHashMap) -> bool { + other.eq(self) + } +} + +#[cfg(test)] +mod tests { + use crate::HashMap; + use alloc::string::String; + use rkyv::{ + archived_root, check_archived_root, + ser::{serializers::AllocSerializer, Serializer}, + Deserialize, Infallible, + }; + + #[test] + fn index_map() { + let mut value = HashMap::new(); + value.insert(String::from("foo"), 10); + value.insert(String::from("bar"), 20); + value.insert(String::from("baz"), 40); + value.insert(String::from("bat"), 80); + + let mut serializer = AllocSerializer::<4096>::default(); + serializer.serialize_value(&value).unwrap(); + let result = serializer.into_serializer().into_inner(); + let archived = unsafe { archived_root::>(result.as_ref()) }; + + assert_eq!(value.len(), archived.len()); + for (k, v) in value.iter() { + let (ak, av) = archived.get_key_value(k.as_str()).unwrap(); + assert_eq!(k, ak); + assert_eq!(v, av); + } + + let deserialized: HashMap = archived.deserialize(&mut Infallible).unwrap(); + assert_eq!(value, deserialized); + } + + #[test] + fn validate_index_map() { + let mut value = HashMap::new(); + value.insert(String::from("foo"), 10); + value.insert(String::from("bar"), 20); + value.insert(String::from("baz"), 40); + value.insert(String::from("bat"), 80); + + let mut serializer = AllocSerializer::<4096>::default(); + serializer.serialize_value(&value).unwrap(); + let result = serializer.into_serializer().into_inner(); + check_archived_root::>(result.as_ref()) + .expect("failed to validate archived index map"); + } +} diff --git a/vendor/hashbrown/src/external_trait_impls/rkyv/hash_set.rs b/vendor/hashbrown/src/external_trait_impls/rkyv/hash_set.rs new file mode 100644 index 0000000..c8a69cf --- /dev/null +++ b/vendor/hashbrown/src/external_trait_impls/rkyv/hash_set.rs @@ -0,0 +1,123 @@ +use crate::HashSet; +use core::{ + borrow::Borrow, + hash::{BuildHasher, Hash}, +}; +use rkyv::{ + collections::hash_set::{ArchivedHashSet, HashSetResolver}, + ser::{ScratchSpace, Serializer}, + Archive, Deserialize, Fallible, Serialize, +}; + +impl Archive for HashSet +where + K::Archived: Hash + Eq, +{ + type Archived = ArchivedHashSet; + type Resolver = HashSetResolver; + + #[inline] + unsafe fn resolve(&self, pos: usize, resolver: Self::Resolver, out: *mut Self::Archived) { + ArchivedHashSet::::resolve_from_len(self.len(), pos, resolver, out); + } +} + +impl Serialize for HashSet +where + K::Archived: Hash + Eq, + K: Serialize + Hash + Eq, + S: ScratchSpace + Serializer + ?Sized, +{ + #[inline] + fn serialize(&self, serializer: &mut S) -> Result { + unsafe { ArchivedHashSet::serialize_from_iter(self.iter(), serializer) } + } +} + +impl Deserialize, D> for ArchivedHashSet +where + K: Archive + Hash + Eq, + K::Archived: Deserialize + Hash + Eq, + D: Fallible + ?Sized, + S: Default + BuildHasher, +{ + #[inline] + fn deserialize(&self, deserializer: &mut D) -> Result, D::Error> { + let mut result = HashSet::with_hasher(S::default()); + for k in self.iter() { + result.insert(k.deserialize(deserializer)?); + } + Ok(result) + } +} + +impl, AK: Hash + Eq, S: BuildHasher> PartialEq> + for ArchivedHashSet +{ + #[inline] + fn eq(&self, other: &HashSet) -> bool { + if self.len() != other.len() { + false + } else { + self.iter().all(|key| other.get(key).is_some()) + } + } +} + +impl, AK: Hash + Eq, S: BuildHasher> PartialEq> + for HashSet +{ + #[inline] + fn eq(&self, other: &ArchivedHashSet) -> bool { + other.eq(self) + } +} + +#[cfg(test)] +mod tests { + use crate::HashSet; + use alloc::string::String; + use rkyv::{ + archived_root, check_archived_root, + ser::{serializers::AllocSerializer, Serializer}, + Deserialize, Infallible, + }; + + #[test] + fn index_set() { + let mut value = HashSet::new(); + value.insert(String::from("foo")); + value.insert(String::from("bar")); + value.insert(String::from("baz")); + value.insert(String::from("bat")); + + let mut serializer = AllocSerializer::<4096>::default(); + serializer.serialize_value(&value).unwrap(); + let result = serializer.into_serializer().into_inner(); + let archived = unsafe { archived_root::>(result.as_ref()) }; + + assert_eq!(value.len(), archived.len()); + for k in value.iter() { + let ak = archived.get(k.as_str()).unwrap(); + assert_eq!(k, ak); + } + + let deserialized: HashSet = archived.deserialize(&mut Infallible).unwrap(); + assert_eq!(value, deserialized); + } + + #[test] + fn validate_index_set() { + let mut value = HashSet::new(); + value.insert(String::from("foo")); + value.insert(String::from("bar")); + value.insert(String::from("baz")); + value.insert(String::from("bat")); + + let mut serializer = AllocSerializer::<4096>::default(); + serializer.serialize_value(&value).unwrap(); + let result = serializer.into_serializer().into_inner(); + check_archived_root::>(result.as_ref()) + .expect("failed to validate archived index set"); + } +} diff --git a/vendor/hashbrown/src/external_trait_impls/rkyv/mod.rs b/vendor/hashbrown/src/external_trait_impls/rkyv/mod.rs new file mode 100644 index 0000000..2bde6a0 --- /dev/null +++ b/vendor/hashbrown/src/external_trait_impls/rkyv/mod.rs @@ -0,0 +1,2 @@ +mod hash_map; +mod hash_set; diff --git a/vendor/hashbrown/src/external_trait_impls/serde.rs b/vendor/hashbrown/src/external_trait_impls/serde.rs index 4d62dee..0a76dbe 100644 --- a/vendor/hashbrown/src/external_trait_impls/serde.rs +++ b/vendor/hashbrown/src/external_trait_impls/serde.rs @@ -11,6 +11,7 @@ mod size_hint { } mod map { + use crate::raw::Allocator; use core::fmt; use core::hash::{BuildHasher, Hash}; use core::marker::PhantomData; @@ -21,11 +22,12 @@ mod map { use super::size_hint; - impl Serialize for HashMap + impl Serialize for HashMap where K: Serialize + Eq + Hash, V: Serialize, H: BuildHasher, + A: Allocator, { #[cfg_attr(feature = "inline-more", inline)] fn serialize(&self, serializer: S) -> Result @@ -36,40 +38,46 @@ mod map { } } - impl<'de, K, V, S> Deserialize<'de> for HashMap + impl<'de, K, V, S, A> Deserialize<'de> for HashMap where K: Deserialize<'de> + Eq + Hash, V: Deserialize<'de>, S: BuildHasher + Default, + A: Allocator + Default, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { - struct MapVisitor { - marker: PhantomData>, + struct MapVisitor + where + A: Allocator, + { + marker: PhantomData>, } - impl<'de, K, V, S> Visitor<'de> for MapVisitor + impl<'de, K, V, S, A> Visitor<'de> for MapVisitor where K: Deserialize<'de> + Eq + Hash, V: Deserialize<'de>, S: BuildHasher + Default, + A: Allocator + Default, { - type Value = HashMap; + type Value = HashMap; fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str("a map") } #[cfg_attr(feature = "inline-more", inline)] - fn visit_map(self, mut map: A) -> Result + fn visit_map(self, mut map: M) -> Result where - A: MapAccess<'de>, + M: MapAccess<'de>, { - let mut values = HashMap::with_capacity_and_hasher( + let mut values = HashMap::with_capacity_and_hasher_in( size_hint::cautious(map.size_hint()), S::default(), + A::default(), ); while let Some((key, value)) = map.next_entry()? { @@ -89,6 +97,7 @@ mod map { } mod set { + use crate::raw::Allocator; use core::fmt; use core::hash::{BuildHasher, Hash}; use core::marker::PhantomData; @@ -99,10 +108,11 @@ mod set { use super::size_hint; - impl Serialize for HashSet + impl Serialize for HashSet where T: Serialize + Eq + Hash, H: BuildHasher, + A: Allocator, { #[cfg_attr(feature = "inline-more", inline)] fn serialize(&self, serializer: S) -> Result @@ -113,38 +123,44 @@ mod set { } } - impl<'de, T, S> Deserialize<'de> for HashSet + impl<'de, T, S, A> Deserialize<'de> for HashSet where T: Deserialize<'de> + Eq + Hash, S: BuildHasher + Default, + A: Allocator + Default, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { - struct SeqVisitor { - marker: PhantomData>, + struct SeqVisitor + where + A: Allocator, + { + marker: PhantomData>, } - impl<'de, T, S> Visitor<'de> for SeqVisitor + impl<'de, T, S, A> Visitor<'de> for SeqVisitor where T: Deserialize<'de> + Eq + Hash, S: BuildHasher + Default, + A: Allocator + Default, { - type Value = HashSet; + type Value = HashSet; fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str("a sequence") } #[cfg_attr(feature = "inline-more", inline)] - fn visit_seq(self, mut seq: A) -> Result + fn visit_seq(self, mut seq: M) -> Result where - A: SeqAccess<'de>, + M: SeqAccess<'de>, { - let mut values = HashSet::with_capacity_and_hasher( + let mut values = HashSet::with_capacity_and_hasher_in( size_hint::cautious(seq.size_hint()), S::default(), + A::default(), ); while let Some(value) = seq.next_element()? { @@ -166,12 +182,15 @@ mod set { where D: Deserializer<'de>, { - struct SeqInPlaceVisitor<'a, T, S>(&'a mut HashSet); + struct SeqInPlaceVisitor<'a, T, S, A>(&'a mut HashSet) + where + A: Allocator; - impl<'a, 'de, T, S> Visitor<'de> for SeqInPlaceVisitor<'a, T, S> + impl<'a, 'de, T, S, A> Visitor<'de> for SeqInPlaceVisitor<'a, T, S, A> where T: Deserialize<'de> + Eq + Hash, S: BuildHasher + Default, + A: Allocator, { type Value = (); @@ -180,9 +199,9 @@ mod set { } #[cfg_attr(feature = "inline-more", inline)] - fn visit_seq(self, mut seq: A) -> Result + fn visit_seq(self, mut seq: M) -> Result where - A: SeqAccess<'de>, + M: SeqAccess<'de>, { self.0.clear(); self.0.reserve(size_hint::cautious(seq.size_hint())); diff --git a/vendor/hashbrown/src/lib.rs b/vendor/hashbrown/src/lib.rs index bc1c971..6e9592a 100644 --- a/vendor/hashbrown/src/lib.rs +++ b/vendor/hashbrown/src/lib.rs @@ -20,9 +20,8 @@ extend_one, allocator_api, slice_ptr_get, - nonnull_slice_from_raw_parts, maybe_uninit_array_assume_init, - build_hasher_simple_hash_one + strict_provenance ) )] #![allow( @@ -37,6 +36,7 @@ )] #![warn(missing_docs)] #![warn(rust_2018_idioms)] +#![cfg_attr(feature = "nightly", warn(fuzzy_provenance_casts))] #[cfg(test)] #[macro_use] @@ -81,6 +81,7 @@ mod map; mod rustc_entry; mod scopeguard; mod set; +mod table; pub mod hash_map { //! A hash map implemented with quadratic probing and SIMD lookup. @@ -113,9 +114,63 @@ pub mod hash_set { pub use crate::external_trait_impls::rayon::set::*; } } +pub mod hash_table { + //! A hash table implemented with quadratic probing and SIMD lookup. + pub use crate::table::*; + + #[cfg(feature = "rayon")] + /// [rayon]-based parallel iterator types for hash tables. + /// You will rarely need to interact with it directly unless you have need + /// to name one of the iterator types. + /// + /// [rayon]: https://docs.rs/rayon/1.0/rayon + pub mod rayon { + pub use crate::external_trait_impls::rayon::table::*; + } +} pub use crate::map::HashMap; pub use crate::set::HashSet; +pub use crate::table::HashTable; + +#[cfg(feature = "equivalent")] +pub use equivalent::Equivalent; + +// This is only used as a fallback when building as part of `std`. +#[cfg(not(feature = "equivalent"))] +/// Key equivalence trait. +/// +/// This trait defines the function used to compare the input value with the +/// map keys (or set values) during a lookup operation such as [`HashMap::get`] +/// or [`HashSet::contains`]. +/// It is provided with a blanket implementation based on the +/// [`Borrow`](core::borrow::Borrow) trait. +/// +/// # Correctness +/// +/// Equivalent values must hash to the same value. +pub trait Equivalent { + /// Checks if this value is equivalent to the given key. + /// + /// Returns `true` if both values are equivalent, and `false` otherwise. + /// + /// # Correctness + /// + /// When this function returns `true`, both `self` and `key` must hash to + /// the same value. + fn equivalent(&self, key: &K) -> bool; +} + +#[cfg(not(feature = "equivalent"))] +impl Equivalent for Q +where + Q: Eq, + K: core::borrow::Borrow, +{ + fn equivalent(&self, key: &K) -> bool { + self == key.borrow() + } +} /// The error type for `try_reserve` methods. #[derive(Clone, PartialEq, Eq, Debug)] @@ -130,21 +185,3 @@ pub enum TryReserveError { layout: alloc::alloc::Layout, }, } - -/// Wrapper around `Bump` which allows it to be used as an allocator for -/// `HashMap`, `HashSet` and `RawTable`. -/// -/// `Bump` can be used directly without this wrapper on nightly if you enable -/// the `allocator-api` feature of the `bumpalo` crate. -#[cfg(feature = "bumpalo")] -#[derive(Clone, Copy, Debug)] -pub struct BumpWrapper<'a>(pub &'a bumpalo::Bump); - -#[cfg(feature = "bumpalo")] -#[test] -fn test_bumpalo() { - use bumpalo::Bump; - let bump = Bump::new(); - let mut map = HashMap::new_in(BumpWrapper(&bump)); - map.insert(0, 1); -} diff --git a/vendor/hashbrown/src/macros.rs b/vendor/hashbrown/src/macros.rs index f8ef917..eaba6be 100644 --- a/vendor/hashbrown/src/macros.rs +++ b/vendor/hashbrown/src/macros.rs @@ -37,7 +37,7 @@ macro_rules! cfg_if { // semicolon is all the remaining items (@__items ($($not:meta,)*) ; ) => {}; (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { - // Emit all items within one block, applying an approprate #[cfg]. The + // Emit all items within one block, applying an appropriate #[cfg]. The // #[cfg] will require all `$m` matchers specified and must also negate // all previous matchers. cfg_if! { @__apply cfg(all($($m,)* not(any($($not),*)))), $($it)* } diff --git a/vendor/hashbrown/src/map.rs b/vendor/hashbrown/src/map.rs index a5d3ccb..9cd768f 100644 --- a/vendor/hashbrown/src/map.rs +++ b/vendor/hashbrown/src/map.rs @@ -1,5 +1,7 @@ -use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable}; -use crate::TryReserveError; +use crate::raw::{ + Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawTable, +}; +use crate::{Equivalent, TryReserveError}; use core::borrow::Borrow; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; @@ -10,7 +12,7 @@ use core::ops::Index; /// Default hasher for `HashMap`. #[cfg(feature = "ahash")] -pub type DefaultHashBuilder = ahash::RandomState; +pub type DefaultHashBuilder = core::hash::BuildHasherDefault; /// Dummy default hasher for `HashMap`. #[cfg(not(feature = "ahash"))] @@ -185,7 +187,7 @@ pub enum DefaultHashBuilder {} /// .iter().cloned().collect(); /// // use the values stored in map /// ``` -pub struct HashMap { +pub struct HashMap { pub(crate) hash_builder: S, pub(crate) table: RawTable<(K, V), A>, } @@ -209,13 +211,12 @@ impl Clone for HashMap(hash_builder: &S) -> impl Fn(&(Q, V)) -> u64 + '_ +pub(crate) fn make_hasher(hash_builder: &S) -> impl Fn(&(Q, V)) -> u64 + '_ where - K: Borrow, Q: Hash, S: BuildHasher, { - move |val| make_hash::(hash_builder, &val.0) + move |val| make_hash::(hash_builder, &val.0) } /// Ensures that a single closure type across uses of this which, in turn prevents multiple @@ -223,10 +224,9 @@ where #[cfg_attr(feature = "inline-more", inline)] fn equivalent_key(k: &Q) -> impl Fn(&(K, V)) -> bool + '_ where - K: Borrow, - Q: ?Sized + Eq, + Q: ?Sized + Equivalent, { - move |x| k.eq(x.0.borrow()) + move |x| k.equivalent(&x.0) } /// Ensures that a single closure type across uses of this which, in turn prevents multiple @@ -234,17 +234,15 @@ where #[cfg_attr(feature = "inline-more", inline)] fn equivalent(k: &Q) -> impl Fn(&K) -> bool + '_ where - K: Borrow, - Q: ?Sized + Eq, + Q: ?Sized + Equivalent, { - move |x| k.eq(x.borrow()) + move |x| k.equivalent(x) } #[cfg(not(feature = "nightly"))] #[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 +pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 where - K: Borrow, Q: Hash + ?Sized, S: BuildHasher, { @@ -256,38 +254,14 @@ where #[cfg(feature = "nightly")] #[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 +pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 where - K: Borrow, Q: Hash + ?Sized, S: BuildHasher, { hash_builder.hash_one(val) } -#[cfg(not(feature = "nightly"))] -#[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 -where - K: Hash, - S: BuildHasher, -{ - use core::hash::Hasher; - let mut state = hash_builder.build_hasher(); - val.hash(&mut state); - state.finish() -} - -#[cfg(feature = "nightly")] -#[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 -where - K: Hash, - S: BuildHasher, -{ - hash_builder.hash_one(val) -} - #[cfg(feature = "ahash")] impl HashMap { /// Creates an empty `HashMap`. @@ -295,6 +269,18 @@ impl HashMap { /// The hash map is initially created with a capacity of 0, so it will not allocate until it /// is first inserted into. /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`], for example with + /// [`with_hasher`](HashMap::with_hasher) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// /// # Examples /// /// ``` @@ -313,6 +299,18 @@ impl HashMap { /// The hash map will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash map will not allocate. /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`], for example with + /// [`with_capacity_and_hasher`](HashMap::with_capacity_and_hasher) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// /// # Examples /// /// ``` @@ -328,11 +326,46 @@ impl HashMap { } #[cfg(feature = "ahash")] -impl HashMap { +impl HashMap { /// Creates an empty `HashMap` using the given allocator. /// /// The hash map is initially created with a capacity of 0, so it will not allocate until it /// is first inserted into. + /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`], for example with + /// [`with_hasher_in`](HashMap::with_hasher_in) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashMap; + /// use bumpalo::Bump; + /// + /// let bump = Bump::new(); + /// let mut map = HashMap::new_in(&bump); + /// + /// // The created HashMap holds none elements + /// assert_eq!(map.len(), 0); + /// + /// // The created HashMap also doesn't allocate memory + /// assert_eq!(map.capacity(), 0); + /// + /// // Now we insert element inside created HashMap + /// map.insert("One", 1); + /// // We can see that the HashMap holds 1 element + /// assert_eq!(map.len(), 1); + /// // And it also allocates some capacity + /// assert!(map.capacity() > 1); + /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn new_in(alloc: A) -> Self { Self::with_hasher_in(DefaultHashBuilder::default(), alloc) @@ -342,6 +375,46 @@ impl HashMap { /// /// The hash map will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash map will not allocate. + /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`], for example with + /// [`with_capacity_and_hasher_in`](HashMap::with_capacity_and_hasher_in) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashMap; + /// use bumpalo::Bump; + /// + /// let bump = Bump::new(); + /// let mut map = HashMap::with_capacity_in(5, &bump); + /// + /// // The created HashMap holds none elements + /// assert_eq!(map.len(), 0); + /// // But it can hold at least 5 elements without reallocating + /// let empty_map_capacity = map.capacity(); + /// assert!(empty_map_capacity >= 5); + /// + /// // Now we insert some 5 elements inside created HashMap + /// map.insert("One", 1); + /// map.insert("Two", 2); + /// map.insert("Three", 3); + /// map.insert("Four", 4); + /// map.insert("Five", 5); + /// + /// // We can see that the HashMap holds 5 elements + /// assert_eq!(map.len(), 5); + /// // But its capacity isn't changed + /// assert_eq!(map.capacity(), empty_map_capacity) + /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { Self::with_capacity_and_hasher_in(capacity, DefaultHashBuilder::default(), alloc) @@ -355,14 +428,21 @@ impl HashMap { /// The hash map is initially created with a capacity of 0, so it will not /// allocate until it is first inserted into. /// - /// Warning: `hash_builder` is normally randomly generated, and - /// is designed to allow HashMaps to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for /// the HashMap to be useful, see its documentation for details. /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html + /// /// # Examples /// /// ``` @@ -376,8 +456,6 @@ impl HashMap { /// /// map.insert(1, 2); /// ``` - /// - /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html #[cfg_attr(feature = "inline-more", inline)] pub const fn with_hasher(hash_builder: S) -> Self { Self { @@ -392,14 +470,21 @@ impl HashMap { /// The hash map will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash map will not allocate. /// - /// Warning: `hash_builder` is normally randomly generated, and - /// is designed to allow HashMaps to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for /// the HashMap to be useful, see its documentation for details. /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html + /// /// # Examples /// /// ``` @@ -413,8 +498,6 @@ impl HashMap { /// /// map.insert(1, 2); /// ``` - /// - /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html #[cfg_attr(feature = "inline-more", inline)] pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { Self { @@ -424,7 +507,7 @@ impl HashMap { } } -impl HashMap { +impl HashMap { /// Returns a reference to the underlying allocator. #[inline] pub fn allocator(&self) -> &A { @@ -434,12 +517,19 @@ impl HashMap { /// Creates an empty `HashMap` which will use the given hash builder to hash /// keys. It will be allocated with the given allocator. /// - /// The created map has the default initial capacity. + /// The hash map is initially created with a capacity of 0, so it will not allocate until it + /// is first inserted into. + /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`]. /// - /// Warning: `hash_builder` is normally randomly generated, and - /// is designed to allow HashMaps to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html /// /// # Examples /// @@ -452,7 +542,7 @@ impl HashMap { /// map.insert(1, 2); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn with_hasher_in(hash_builder: S, alloc: A) -> Self { + pub const fn with_hasher_in(hash_builder: S, alloc: A) -> Self { Self { hash_builder, table: RawTable::new_in(alloc), @@ -465,10 +555,16 @@ impl HashMap { /// The hash map will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash map will not allocate. /// - /// Warning: `hash_builder` is normally randomly generated, and - /// is designed to allow HashMaps to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashMap`]. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html /// /// # Examples /// @@ -810,14 +906,11 @@ impl HashMap { /// /// let mut map: HashMap = (0..8).map(|x|(x, x*10)).collect(); /// assert_eq!(map.len(), 8); - /// let capacity_before_retain = map.capacity(); /// /// map.retain(|&k, _| k % 2 == 0); /// /// // We can see, that the number of elements inside map is changed. /// assert_eq!(map.len(), 4); - /// // But map capacity is equal to old one. - /// assert_eq!(map.capacity(), capacity_before_retain); /// /// let mut vec: Vec<(i32, i32)> = map.iter().map(|(&k, &v)| (k, v)).collect(); /// vec.sort_unstable(); @@ -844,26 +937,25 @@ impl HashMap { /// In other words, move all pairs `(k, v)` such that `f(&k, &mut v)` returns `true` out /// into another iterator. /// - /// Note that `drain_filter` lets you mutate every value in the filter closure, regardless of + /// Note that `extract_if` lets you mutate every value in the filter closure, regardless of /// whether you choose to keep or remove it. /// - /// When the returned DrainedFilter is dropped, any remaining elements that satisfy - /// the predicate are dropped from the table. - /// - /// It is unspecified how many more elements will be subjected to the closure - /// if a panic occurs in the closure, or a panic occurs while dropping an element, - /// or if the `DrainFilter` value is leaked. + /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating + /// or the iteration short-circuits, then the remaining elements will be retained. + /// Use [`retain()`] with a negated predicate if you do not need the returned iterator. /// /// Keeps the allocated memory for reuse. /// + /// [`retain()`]: HashMap::retain + /// /// # Examples /// /// ``` /// use hashbrown::HashMap; /// /// let mut map: HashMap = (0..8).map(|x| (x, x)).collect(); - /// let capacity_before_drain_filter = map.capacity(); - /// let drained: HashMap = map.drain_filter(|k, _v| k % 2 == 0).collect(); + /// + /// let drained: HashMap = map.extract_if(|k, _v| k % 2 == 0).collect(); /// /// let mut evens = drained.keys().cloned().collect::>(); /// let mut odds = map.keys().cloned().collect::>(); @@ -872,27 +964,24 @@ impl HashMap { /// /// assert_eq!(evens, vec![0, 2, 4, 6]); /// assert_eq!(odds, vec![1, 3, 5, 7]); - /// // Map capacity is equal to old one. - /// assert_eq!(map.capacity(), capacity_before_drain_filter); /// /// let mut map: HashMap = (0..8).map(|x| (x, x)).collect(); /// /// { // Iterator is dropped without being consumed. - /// let d = map.drain_filter(|k, _v| k % 2 != 0); + /// let d = map.extract_if(|k, _v| k % 2 != 0); /// } /// - /// // But the map lens have been reduced by half - /// // even if we do not use DrainFilter iterator. - /// assert_eq!(map.len(), 4); + /// // ExtractIf was not exhausted, therefore no elements were drained. + /// assert_eq!(map.len(), 8); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn drain_filter(&mut self, f: F) -> DrainFilter<'_, K, V, F, A> + pub fn extract_if(&mut self, f: F) -> ExtractIf<'_, K, V, F, A> where F: FnMut(&K, &mut V) -> bool, { - DrainFilter { + ExtractIf { f, - inner: DrainFilterInner { + inner: RawExtractIf { iter: unsafe { self.table.iter() }, table: &mut self.table, }, @@ -984,7 +1073,7 @@ impl HashMap where K: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { /// Reserves capacity for at least `additional` more elements to be inserted /// in the `HashMap`. The collection may reserve more space to avoid @@ -992,9 +1081,12 @@ where /// /// # Panics /// - /// Panics if the new allocation size overflows [`usize`]. + /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program + /// in case of allocation error. Use [`try_reserve`](HashMap::try_reserve) instead + /// if you want to handle memory allocation failure. /// - /// [`usize`]: https://doc.rust-lang.org/std/primitive.usize.html + /// [`isize::MAX`]: https://doc.rust-lang.org/std/primitive.isize.html + /// [`abort`]: https://doc.rust-lang.org/alloc/alloc/fn.handle_alloc_error.html /// /// # Examples /// @@ -1012,7 +1104,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn reserve(&mut self, additional: usize) { self.table - .reserve(additional, make_hasher::(&self.hash_builder)); + .reserve(additional, make_hasher::<_, V, S>(&self.hash_builder)); } /// Tries to reserve capacity for at least `additional` more elements to be inserted @@ -1062,7 +1154,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { self.table - .try_reserve(additional, make_hasher::(&self.hash_builder)) + .try_reserve(additional, make_hasher::<_, V, S>(&self.hash_builder)) } /// Shrinks the capacity of the map as much as possible. It will drop @@ -1084,7 +1176,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to_fit(&mut self) { self.table - .shrink_to(0, make_hasher::(&self.hash_builder)); + .shrink_to(0, make_hasher::<_, V, S>(&self.hash_builder)); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -1113,7 +1205,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to(&mut self, min_capacity: usize) { self.table - .shrink_to(min_capacity, make_hasher::(&self.hash_builder)); + .shrink_to(min_capacity, make_hasher::<_, V, S>(&self.hash_builder)); } /// Gets the given key's corresponding entry in the map for in-place manipulation. @@ -1137,7 +1229,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S, A> { - let hash = make_insert_hash::(&self.hash_builder, &key); + let hash = make_hash::(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, equivalent_key(&key)) { Entry::Occupied(OccupiedEntry { hash, @@ -1174,10 +1266,9 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn entry_ref<'a, 'b, Q: ?Sized>(&'a mut self, key: &'b Q) -> EntryRef<'a, 'b, K, Q, V, S, A> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { - let hash = make_hash::(&self.hash_builder, key); + let hash = make_hash::(&self.hash_builder, key); if let Some(elem) = self.table.find(hash, equivalent_key(key)) { EntryRef::Occupied(OccupiedEntryRef { hash, @@ -1216,12 +1307,11 @@ where #[inline] pub fn get(&self, k: &Q) -> Option<&V> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { // Avoid `Option::map` because it bloats LLVM IR. match self.get_inner(k) { - Some(&(_, ref v)) => Some(v), + Some((_, v)) => Some(v), None => None, } } @@ -1248,12 +1338,11 @@ where #[inline] pub fn get_key_value(&self, k: &Q) -> Option<(&K, &V)> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { // Avoid `Option::map` because it bloats LLVM IR. match self.get_inner(k) { - Some(&(ref key, ref value)) => Some((key, value)), + Some((key, value)) => Some((key, value)), None => None, } } @@ -1261,13 +1350,12 @@ where #[inline] fn get_inner(&self, k: &Q) -> Option<&(K, V)> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { if self.table.is_empty() { None } else { - let hash = make_hash::(&self.hash_builder, k); + let hash = make_hash::(&self.hash_builder, k); self.table.get(hash, equivalent_key(k)) } } @@ -1298,8 +1386,7 @@ where #[inline] pub fn get_key_value_mut(&mut self, k: &Q) -> Option<(&K, &mut V)> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { // Avoid `Option::map` because it bloats LLVM IR. match self.get_inner_mut(k) { @@ -1330,8 +1417,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn contains_key(&self, k: &Q) -> bool where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { self.get_inner(k).is_some() } @@ -1362,8 +1448,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { // Avoid `Option::map` because it bloats LLVM IR. match self.get_inner_mut(k) { @@ -1375,13 +1460,12 @@ where #[inline] fn get_inner_mut(&mut self, k: &Q) -> Option<&mut (K, V)> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { if self.table.is_empty() { None } else { - let hash = make_hash::(&self.hash_builder, k); + let hash = make_hash::(&self.hash_builder, k); self.table.get_mut(hash, equivalent_key(k)) } } @@ -1431,8 +1515,7 @@ where /// ``` pub fn get_many_mut(&mut self, ks: [&Q; N]) -> Option<[&'_ mut V; N]> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { self.get_many_mut_inner(ks).map(|res| res.map(|(_, v)| v)) } @@ -1487,8 +1570,7 @@ where ks: [&Q; N], ) -> Option<[&'_ mut V; N]> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { self.get_many_unchecked_mut_inner(ks) .map(|res| res.map(|(_, v)| v)) @@ -1543,8 +1625,7 @@ where ks: [&Q; N], ) -> Option<[(&'_ K, &'_ mut V); N]> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { self.get_many_mut_inner(ks) .map(|res| res.map(|(k, v)| (&*k, v))) @@ -1599,8 +1680,7 @@ where ks: [&Q; N], ) -> Option<[(&'_ K, &'_ mut V); N]> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { self.get_many_unchecked_mut_inner(ks) .map(|res| res.map(|(k, v)| (&*k, v))) @@ -1611,12 +1691,11 @@ where ks: [&Q; N], ) -> Option<[&'_ mut (K, V); N]> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { let hashes = self.build_hashes_inner(ks); self.table - .get_many_mut(hashes, |i, (k, _)| ks[i].eq(k.borrow())) + .get_many_mut(hashes, |i, (k, _)| ks[i].equivalent(k)) } unsafe fn get_many_unchecked_mut_inner( @@ -1624,22 +1703,20 @@ where ks: [&Q; N], ) -> Option<[&'_ mut (K, V); N]> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { let hashes = self.build_hashes_inner(ks); self.table - .get_many_unchecked_mut(hashes, |i, (k, _)| ks[i].eq(k.borrow())) + .get_many_unchecked_mut(hashes, |i, (k, _)| ks[i].equivalent(k)) } fn build_hashes_inner(&self, ks: [&Q; N]) -> [u64; N] where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { let mut hashes = [0_u64; N]; for i in 0..N { - hashes[i] = make_hash::(&self.hash_builder, ks[i]); + hashes[i] = make_hash::(&self.hash_builder, ks[i]); } hashes } @@ -1672,13 +1749,19 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = make_insert_hash::(&self.hash_builder, &k); - if let Some((_, item)) = self.table.get_mut(hash, equivalent_key(&k)) { - Some(mem::replace(item, v)) - } else { - self.table - .insert(hash, (k, v), make_hasher::(&self.hash_builder)); - None + let hash = make_hash::(&self.hash_builder, &k); + let hasher = make_hasher::<_, V, S>(&self.hash_builder); + match self + .table + .find_or_find_insert_slot(hash, equivalent_key(&k), hasher) + { + Ok(bucket) => Some(mem::replace(unsafe { &mut bucket.as_mut().1 }, v)), + Err(slot) => { + unsafe { + self.table.insert_in_slot(hash, slot, (k, v)); + } + None + } } } @@ -1733,10 +1816,10 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn insert_unique_unchecked(&mut self, k: K, v: V) -> (&K, &mut V) { - let hash = make_insert_hash::(&self.hash_builder, &k); + let hash = make_hash::(&self.hash_builder, &k); let bucket = self .table - .insert(hash, (k, v), make_hasher::(&self.hash_builder)); + .insert(hash, (k, v), make_hasher::<_, V, S>(&self.hash_builder)); let (k_ref, v_ref) = unsafe { bucket.as_mut() }; (k_ref, v_ref) } @@ -1801,19 +1884,17 @@ where /// assert!(map.is_empty() && map.capacity() == 0); /// /// map.insert(1, "a"); - /// let capacity_before_remove = map.capacity(); /// /// assert_eq!(map.remove(&1), Some("a")); /// assert_eq!(map.remove(&1), None); /// - /// // Now map holds none elements but capacity is equal to the old one - /// assert!(map.len() == 0 && map.capacity() == capacity_before_remove); + /// // Now map holds none elements + /// assert!(map.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove(&mut self, k: &Q) -> Option where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { // Avoid `Option::map` because it bloats LLVM IR. match self.remove_entry(k) { @@ -1842,26 +1923,24 @@ where /// assert!(map.is_empty() && map.capacity() == 0); /// /// map.insert(1, "a"); - /// let capacity_before_remove = map.capacity(); /// /// assert_eq!(map.remove_entry(&1), Some((1, "a"))); /// assert_eq!(map.remove(&1), None); /// - /// // Now map hold none elements but capacity is equal to the old one - /// assert!(map.len() == 0 && map.capacity() == capacity_before_remove); + /// // Now map hold none elements + /// assert!(map.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove_entry(&mut self, k: &Q) -> Option<(K, V)> where - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { - let hash = make_hash::(&self.hash_builder, k); + let hash = make_hash::(&self.hash_builder, k); self.table.remove_entry(hash, equivalent_key(k)) } } -impl HashMap { +impl HashMap { /// Creates a raw entry builder for the HashMap. /// /// Raw entries provide the lowest level of control for searching and @@ -2013,19 +2092,31 @@ impl HashMap { RawEntryBuilder { map: self } } + /// Returns a reference to the [`RawTable`] used underneath [`HashMap`]. + /// This function is only available if the `raw` feature of the crate is enabled. + /// + /// See [`raw_table_mut`] for more. + /// + /// [`raw_table_mut`]: Self::raw_table_mut + #[cfg(feature = "raw")] + #[cfg_attr(feature = "inline-more", inline)] + pub fn raw_table(&self) -> &RawTable<(K, V), A> { + &self.table + } + /// Returns a mutable reference to the [`RawTable`] used underneath [`HashMap`]. /// This function is only available if the `raw` feature of the crate is enabled. /// /// # Note /// - /// Calling the function safe, but using raw hash table API's may require + /// Calling this function is safe, but using the raw hash table API may require /// unsafe functions or blocks. /// /// `RawTable` API gives the lowest level of control under the map that can be useful /// for extending the HashMap's API, but may lead to *[undefined behavior]*. /// /// [`HashMap`]: struct.HashMap.html - /// [`RawTable`]: raw/struct.RawTable.html + /// [`RawTable`]: crate::raw::RawTable /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html /// /// # Examples @@ -2049,9 +2140,9 @@ impl HashMap { /// where /// F: Fn(&(K, V)) -> bool, /// { - /// let raw_table = map.raw_table(); + /// let raw_table = map.raw_table_mut(); /// match raw_table.find(hash, is_match) { - /// Some(bucket) => Some(unsafe { raw_table.remove(bucket) }), + /// Some(bucket) => Some(unsafe { raw_table.remove(bucket).0 }), /// None => None, /// } /// } @@ -2070,7 +2161,7 @@ impl HashMap { /// ``` #[cfg(feature = "raw")] #[cfg_attr(feature = "inline-more", inline)] - pub fn raw_table(&mut self) -> &mut RawTable<(K, V), A> { + pub fn raw_table_mut(&mut self) -> &mut RawTable<(K, V), A> { &mut self.table } } @@ -2080,7 +2171,7 @@ where K: Eq + Hash, V: PartialEq, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn eq(&self, other: &Self) -> bool { if self.len() != other.len() { @@ -2097,7 +2188,7 @@ where K: Eq + Hash, V: Eq, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { } @@ -2105,7 +2196,7 @@ impl Debug for HashMap where K: Debug, V: Debug, - A: Allocator + Clone, + A: Allocator, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_map().entries(self.iter()).finish() @@ -2115,7 +2206,7 @@ where impl Default for HashMap where S: Default, - A: Default + Allocator + Clone, + A: Default + Allocator, { /// Creates an empty `HashMap`, with the `Default` value for the hasher and allocator. /// @@ -2140,10 +2231,10 @@ where impl Index<&Q> for HashMap where - K: Eq + Hash + Borrow, - Q: Eq + Hash, + K: Eq + Hash, + Q: Hash + Equivalent, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { type Output = V; @@ -2174,7 +2265,7 @@ where impl From<[(K, V); N]> for HashMap where K: Eq + Hash, - A: Default + Allocator + Clone, + A: Default + Allocator, { /// # Examples /// @@ -2319,11 +2410,11 @@ impl IterMut<'_, K, V> { /// assert_eq!(iter.next(), None); /// assert_eq!(iter.next(), None); /// ``` -pub struct IntoIter { +pub struct IntoIter { inner: RawIntoIter<(K, V), A>, } -impl IntoIter { +impl IntoIter { /// Returns a iterator of references over the remaining items. #[cfg_attr(feature = "inline-more", inline)] pub(super) fn iter(&self) -> Iter<'_, K, V> { @@ -2363,11 +2454,11 @@ impl IntoIter { /// assert_eq!(keys.next(), None); /// assert_eq!(keys.next(), None); /// ``` -pub struct IntoKeys { +pub struct IntoKeys { inner: IntoIter, } -impl Iterator for IntoKeys { +impl Iterator for IntoKeys { type Item = K; #[inline] @@ -2378,18 +2469,26 @@ impl Iterator for IntoKeys { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[inline] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (k, _)| f(acc, k)) + } } -impl ExactSizeIterator for IntoKeys { +impl ExactSizeIterator for IntoKeys { #[inline] fn len(&self) -> usize { self.inner.len() } } -impl FusedIterator for IntoKeys {} +impl FusedIterator for IntoKeys {} -impl fmt::Debug for IntoKeys { +impl fmt::Debug for IntoKeys { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list() .entries(self.inner.iter().map(|(k, _)| k)) @@ -2425,11 +2524,11 @@ impl fmt::Debug for IntoKeys /// assert_eq!(values.next(), None); /// assert_eq!(values.next(), None); /// ``` -pub struct IntoValues { +pub struct IntoValues { inner: IntoIter, } -impl Iterator for IntoValues { +impl Iterator for IntoValues { type Item = V; #[inline] @@ -2440,18 +2539,26 @@ impl Iterator for IntoValues { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[inline] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } } -impl ExactSizeIterator for IntoValues { +impl ExactSizeIterator for IntoValues { #[inline] fn len(&self) -> usize { self.inner.len() } } -impl FusedIterator for IntoValues {} +impl FusedIterator for IntoValues {} -impl fmt::Debug for IntoValues { +impl fmt::Debug for IntoValues { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list() .entries(self.inner.iter().map(|(_, v)| v)) @@ -2583,11 +2690,11 @@ impl fmt::Debug for Values<'_, K, V> { /// assert_eq!(drain_iter.next(), None); /// assert_eq!(drain_iter.next(), None); /// ``` -pub struct Drain<'a, K, V, A: Allocator + Clone = Global> { +pub struct Drain<'a, K, V, A: Allocator = Global> { inner: RawDrain<'a, (K, V), A>, } -impl Drain<'_, K, V, A> { +impl Drain<'_, K, V, A> { /// Returns a iterator of references over the remaining items. #[cfg_attr(feature = "inline-more", inline)] pub(super) fn iter(&self) -> Iter<'_, K, V> { @@ -2601,10 +2708,10 @@ impl Drain<'_, K, V, A> { /// A draining iterator over entries of a `HashMap` which don't satisfy the predicate /// `f(&k, &mut v)` in arbitrary order. The iterator element type is `(K, V)`. /// -/// This `struct` is created by the [`drain_filter`] method on [`HashMap`]. See its +/// This `struct` is created by the [`extract_if`] method on [`HashMap`]. See its /// documentation for more. /// -/// [`drain_filter`]: struct.HashMap.html#method.drain_filter +/// [`extract_if`]: struct.HashMap.html#method.extract_if /// [`HashMap`]: struct.HashMap.html /// /// # Examples @@ -2614,63 +2721,40 @@ impl Drain<'_, K, V, A> { /// /// let mut map: HashMap = [(1, "a"), (2, "b"), (3, "c")].into(); /// -/// let mut drain_filter = map.drain_filter(|k, _v| k % 2 != 0); -/// let mut vec = vec![drain_filter.next(), drain_filter.next()]; +/// let mut extract_if = map.extract_if(|k, _v| k % 2 != 0); +/// let mut vec = vec![extract_if.next(), extract_if.next()]; /// -/// // The `DrainFilter` iterator produces items in arbitrary order, so the +/// // The `ExtractIf` iterator produces items in arbitrary order, so the /// // items must be sorted to test them against a sorted array. /// vec.sort_unstable(); /// assert_eq!(vec, [Some((1, "a")),Some((3, "c"))]); /// /// // It is fused iterator -/// assert_eq!(drain_filter.next(), None); -/// assert_eq!(drain_filter.next(), None); -/// drop(drain_filter); +/// assert_eq!(extract_if.next(), None); +/// assert_eq!(extract_if.next(), None); +/// drop(extract_if); /// /// assert_eq!(map.len(), 1); /// ``` -pub struct DrainFilter<'a, K, V, F, A: Allocator + Clone = Global> +#[must_use = "Iterators are lazy unless consumed"] +pub struct ExtractIf<'a, K, V, F, A: Allocator = Global> where F: FnMut(&K, &mut V) -> bool, { f: F, - inner: DrainFilterInner<'a, K, V, A>, -} - -impl<'a, K, V, F, A> Drop for DrainFilter<'a, K, V, F, A> -where - F: FnMut(&K, &mut V) -> bool, - A: Allocator + Clone, -{ - #[cfg_attr(feature = "inline-more", inline)] - fn drop(&mut self) { - while let Some(item) = self.next() { - let guard = ConsumeAllOnDrop(self); - drop(item); - mem::forget(guard); - } - } -} - -pub(super) struct ConsumeAllOnDrop<'a, T: Iterator>(pub &'a mut T); - -impl Drop for ConsumeAllOnDrop<'_, T> { - #[cfg_attr(feature = "inline-more", inline)] - fn drop(&mut self) { - self.0.for_each(drop); - } + inner: RawExtractIf<'a, (K, V), A>, } -impl Iterator for DrainFilter<'_, K, V, F, A> +impl Iterator for ExtractIf<'_, K, V, F, A> where F: FnMut(&K, &mut V) -> bool, - A: Allocator + Clone, + A: Allocator, { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] fn next(&mut self) -> Option { - self.inner.next(&mut self.f) + self.inner.next(|&mut (ref k, ref mut v)| (self.f)(k, v)) } #[inline] @@ -2679,31 +2763,7 @@ where } } -impl FusedIterator for DrainFilter<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {} - -/// Portions of `DrainFilter` shared with `set::DrainFilter` -pub(super) struct DrainFilterInner<'a, K, V, A: Allocator + Clone> { - pub iter: RawIter<(K, V)>, - pub table: &'a mut RawTable<(K, V), A>, -} - -impl DrainFilterInner<'_, K, V, A> { - #[cfg_attr(feature = "inline-more", inline)] - pub(super) fn next(&mut self, f: &mut F) -> Option<(K, V)> - where - F: FnMut(&K, &mut V) -> bool, - { - unsafe { - for item in &mut self.iter { - let &mut (ref key, ref mut value) = item.as_mut(); - if f(key, value) { - return Some(self.table.remove(item)); - } - } - } - None - } -} +impl FusedIterator for ExtractIf<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {} /// A mutable iterator over the values of a `HashMap` in arbitrary order. /// The iterator element type is `&'a mut V`. @@ -2791,7 +2851,7 @@ pub struct ValuesMut<'a, K, V> { /// /// assert_eq!(map.len(), 6); /// ``` -pub struct RawEntryBuilderMut<'a, K, V, S, A: Allocator + Clone = Global> { +pub struct RawEntryBuilderMut<'a, K, V, S, A: Allocator = Global> { map: &'a mut HashMap, } @@ -2879,7 +2939,7 @@ pub struct RawEntryBuilderMut<'a, K, V, S, A: Allocator + Clone = Global> { /// vec.sort_unstable(); /// assert_eq!(vec, [('a', 10), ('b', 20), ('c', 30), ('d', 40), ('e', 50), ('f', 60)]); /// ``` -pub enum RawEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { +pub enum RawEntryMut<'a, K, V, S, A: Allocator = Global> { /// An occupied entry. /// /// # Examples @@ -2970,7 +3030,7 @@ pub enum RawEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { /// assert_eq!(map.get(&"b"), None); /// assert_eq!(map.len(), 1); /// ``` -pub struct RawOccupiedEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { +pub struct RawOccupiedEntryMut<'a, K, V, S, A: Allocator = Global> { elem: Bucket<(K, V)>, table: &'a mut RawTable<(K, V), A>, hash_builder: &'a S, @@ -2981,7 +3041,7 @@ where K: Send, V: Send, S: Send, - A: Send + Allocator + Clone, + A: Send + Allocator, { } unsafe impl Sync for RawOccupiedEntryMut<'_, K, V, S, A> @@ -2989,7 +3049,7 @@ where K: Sync, V: Sync, S: Sync, - A: Sync + Allocator + Clone, + A: Sync + Allocator, { } @@ -3041,7 +3101,7 @@ where /// } /// assert!(map[&"c"] == 30 && map.len() == 3); /// ``` -pub struct RawVacantEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { +pub struct RawVacantEntryMut<'a, K, V, S, A: Allocator = Global> { table: &'a mut RawTable<(K, V), A>, hash_builder: &'a S, } @@ -3080,11 +3140,11 @@ pub struct RawVacantEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { /// assert_eq!(map.raw_entry().from_key_hashed_nocheck(hash, &k), kv); /// } /// ``` -pub struct RawEntryBuilder<'a, K, V, S, A: Allocator + Clone = Global> { +pub struct RawEntryBuilder<'a, K, V, S, A: Allocator = Global> { map: &'a HashMap, } -impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> RawEntryBuilderMut<'a, K, V, S, A> { /// Creates a `RawEntryMut` from the given key. /// /// # Examples @@ -3103,10 +3163,9 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { pub fn from_key(self, k: &Q) -> RawEntryMut<'a, K, V, S, A> where S: BuildHasher, - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { - let hash = make_hash::(&self.map.hash_builder, k); + let hash = make_hash::(&self.map.hash_builder, k); self.from_key_hashed_nocheck(hash, k) } @@ -3136,14 +3195,13 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { #[allow(clippy::wrong_self_convention)] pub fn from_key_hashed_nocheck(self, hash: u64, k: &Q) -> RawEntryMut<'a, K, V, S, A> where - K: Borrow, - Q: Eq, + Q: Equivalent, { self.from_hash(hash, equivalent(k)) } } -impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> RawEntryBuilderMut<'a, K, V, S, A> { /// Creates a `RawEntryMut` from the given hash and matching function. /// /// # Examples @@ -3194,7 +3252,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { } } -impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilder<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> RawEntryBuilder<'a, K, V, S, A> { /// Access an immutable entry by key. /// /// # Examples @@ -3211,10 +3269,9 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilder<'a, K, V, S, A> { pub fn from_key(self, k: &Q) -> Option<(&'a K, &'a V)> where S: BuildHasher, - K: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { - let hash = make_hash::(&self.map.hash_builder, k); + let hash = make_hash::(&self.map.hash_builder, k); self.from_key_hashed_nocheck(hash, k) } @@ -3242,8 +3299,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilder<'a, K, V, S, A> { #[allow(clippy::wrong_self_convention)] pub fn from_key_hashed_nocheck(self, hash: u64, k: &Q) -> Option<(&'a K, &'a V)> where - K: Borrow, - Q: Eq, + Q: Equivalent, { self.from_hash(hash, equivalent(k)) } @@ -3254,7 +3310,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilder<'a, K, V, S, A> { F: FnMut(&K) -> bool, { match self.map.table.get(hash, |(k, _)| is_match(k)) { - Some(&(ref key, ref value)) => Some((key, value)), + Some((key, value)) => Some((key, value)), None => None, } } @@ -3289,7 +3345,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilder<'a, K, V, S, A> { } } -impl<'a, K, V, S, A: Allocator + Clone> RawEntryMut<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> RawEntryMut<'a, K, V, S, A> { /// Sets the value of the entry, and returns a RawOccupiedEntryMut. /// /// # Examples @@ -3483,7 +3539,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryMut<'a, K, V, S, A> { } } -impl<'a, K, V, S, A: Allocator + Clone> RawOccupiedEntryMut<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> RawOccupiedEntryMut<'a, K, V, S, A> { /// Gets a reference to the key in the entry. /// /// # Examples @@ -3650,7 +3706,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawOccupiedEntryMut<'a, K, V, S, A> { #[cfg_attr(feature = "inline-more", inline)] pub fn get_key_value(&self) -> (&K, &V) { unsafe { - let &(ref key, ref value) = self.elem.as_ref(); + let (key, value) = self.elem.as_ref(); (key, value) } } @@ -3822,7 +3878,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawOccupiedEntryMut<'a, K, V, S, A> { /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove_entry(self) -> (K, V) { - unsafe { self.table.remove(self.elem) } + unsafe { self.table.remove(self.elem).0 } } /// Provides shared access to the key and owned access to the value of @@ -3882,7 +3938,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawOccupiedEntryMut<'a, K, V, S, A> { } } -impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> RawVacantEntryMut<'a, K, V, S, A> { /// Sets the value of the entry with the VacantEntry's key, /// and returns a mutable reference to it. /// @@ -3906,7 +3962,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let hash = make_insert_hash::(self.hash_builder, &key); + let hash = make_hash::(self.hash_builder, &key); self.insert_hashed_nocheck(hash, key, value) } @@ -3950,7 +4006,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { let &mut (ref mut k, ref mut v) = self.table.insert_entry( hash, (key, value), - make_hasher::(self.hash_builder), + make_hasher::<_, V, S>(self.hash_builder), ); (k, v) } @@ -4014,11 +4070,11 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let hash = make_insert_hash::(self.hash_builder, &key); + let hash = make_hash::(self.hash_builder, &key); let elem = self.table.insert( hash, (key, value), - make_hasher::(self.hash_builder), + make_hasher::<_, V, S>(self.hash_builder), ); RawOccupiedEntryMut { elem, @@ -4028,13 +4084,13 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { } } -impl Debug for RawEntryBuilderMut<'_, K, V, S, A> { +impl Debug for RawEntryBuilderMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawEntryBuilder").finish() } } -impl Debug for RawEntryMut<'_, K, V, S, A> { +impl Debug for RawEntryMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { RawEntryMut::Vacant(ref v) => f.debug_tuple("RawEntry").field(v).finish(), @@ -4043,7 +4099,7 @@ impl Debug for RawEntryMut<'_, K, V } } -impl Debug for RawOccupiedEntryMut<'_, K, V, S, A> { +impl Debug for RawOccupiedEntryMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawOccupiedEntryMut") .field("key", self.key()) @@ -4052,13 +4108,13 @@ impl Debug for RawOccupiedEntryMut< } } -impl Debug for RawVacantEntryMut<'_, K, V, S, A> { +impl Debug for RawVacantEntryMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawVacantEntryMut").finish() } } -impl Debug for RawEntryBuilder<'_, K, V, S, A> { +impl Debug for RawEntryBuilder<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawEntryBuilder").finish() } @@ -4109,7 +4165,7 @@ impl Debug for RawEntryBuilder<'_, K, V, S, A> { /// ``` pub enum Entry<'a, K, V, S, A = Global> where - A: Allocator + Clone, + A: Allocator, { /// An occupied entry. /// @@ -4142,7 +4198,7 @@ where Vacant(VacantEntry<'a, K, V, S, A>), } -impl Debug for Entry<'_, K, V, S, A> { +impl Debug for Entry<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Entry::Vacant(ref v) => f.debug_tuple("Entry").field(v).finish(), @@ -4191,7 +4247,7 @@ impl Debug for Entry<'_, K, V, S, A /// assert_eq!(map.get(&"c"), None); /// assert_eq!(map.len(), 2); /// ``` -pub struct OccupiedEntry<'a, K, V, S, A: Allocator + Clone = Global> { +pub struct OccupiedEntry<'a, K, V, S = DefaultHashBuilder, A: Allocator = Global> { hash: u64, key: Option, elem: Bucket<(K, V)>, @@ -4203,7 +4259,7 @@ where K: Send, V: Send, S: Send, - A: Send + Allocator + Clone, + A: Send + Allocator, { } unsafe impl Sync for OccupiedEntry<'_, K, V, S, A> @@ -4211,11 +4267,11 @@ where K: Sync, V: Sync, S: Sync, - A: Sync + Allocator + Clone, + A: Sync + Allocator, { } -impl Debug for OccupiedEntry<'_, K, V, S, A> { +impl Debug for OccupiedEntry<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OccupiedEntry") .field("key", self.key()) @@ -4254,13 +4310,13 @@ impl Debug for OccupiedEntry<'_, K, /// } /// assert!(map[&"b"] == 20 && map.len() == 2); /// ``` -pub struct VacantEntry<'a, K, V, S, A: Allocator + Clone = Global> { +pub struct VacantEntry<'a, K, V, S = DefaultHashBuilder, A: Allocator = Global> { hash: u64, key: K, table: &'a mut HashMap, } -impl Debug for VacantEntry<'_, K, V, S, A> { +impl Debug for VacantEntry<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("VacantEntry").field(self.key()).finish() } @@ -4320,7 +4376,7 @@ impl Debug for VacantEntry<'_, K, V, S, A> /// ``` pub enum EntryRef<'a, 'b, K, Q: ?Sized, V, S, A = Global> where - A: Allocator + Clone, + A: Allocator, { /// An occupied entry. /// @@ -4353,7 +4409,7 @@ where Vacant(VacantEntryRef<'a, 'b, K, Q, V, S, A>), } -impl, Q: ?Sized + Debug, V: Debug, S, A: Allocator + Clone> Debug +impl, Q: ?Sized + Debug, V: Debug, S, A: Allocator> Debug for EntryRef<'_, '_, K, Q, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -4431,7 +4487,7 @@ impl<'a, K: Borrow, Q: ?Sized> AsRef for KeyOrRef<'a, K, Q> { /// assert_eq!(map.get("c"), None); /// assert_eq!(map.len(), 2); /// ``` -pub struct OccupiedEntryRef<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone = Global> { +pub struct OccupiedEntryRef<'a, 'b, K, Q: ?Sized, V, S, A: Allocator = Global> { hash: u64, key: Option>, elem: Bucket<(K, V)>, @@ -4444,7 +4500,7 @@ where Q: Sync + ?Sized, V: Send, S: Send, - A: Send + Allocator + Clone, + A: Send + Allocator, { } unsafe impl<'a, 'b, K, Q, V, S, A> Sync for OccupiedEntryRef<'a, 'b, K, Q, V, S, A> @@ -4453,16 +4509,16 @@ where Q: Sync + ?Sized, V: Sync, S: Sync, - A: Sync + Allocator + Clone, + A: Sync + Allocator, { } -impl, Q: ?Sized + Debug, V: Debug, S, A: Allocator + Clone> Debug +impl, Q: ?Sized + Debug, V: Debug, S, A: Allocator> Debug for OccupiedEntryRef<'_, '_, K, Q, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OccupiedEntryRef") - .field("key", &self.key()) + .field("key", &self.key().borrow()) .field("value", &self.get()) .finish() } @@ -4498,13 +4554,13 @@ impl, Q: ?Sized + Debug, V: Debug, S, A: Allocator + Clone> Debug /// } /// assert!(map["b"] == 20 && map.len() == 2); /// ``` -pub struct VacantEntryRef<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone = Global> { +pub struct VacantEntryRef<'a, 'b, K, Q: ?Sized, V, S, A: Allocator = Global> { hash: u64, key: KeyOrRef<'b, K, Q>, table: &'a mut HashMap, } -impl, Q: ?Sized + Debug, V, S, A: Allocator + Clone> Debug +impl, Q: ?Sized + Debug, V, S, A: Allocator> Debug for VacantEntryRef<'_, '_, K, Q, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -4536,14 +4592,14 @@ impl, Q: ?Sized + Debug, V, S, A: Allocator + Clone> Debug /// } /// assert_eq!(map[&"a"], 100); /// ``` -pub struct OccupiedError<'a, K, V, S, A: Allocator + Clone = Global> { +pub struct OccupiedError<'a, K, V, S, A: Allocator = Global> { /// The entry in the map that was already occupied. pub entry: OccupiedEntry<'a, K, V, S, A>, /// The value which was not inserted, because the entry was already occupied. pub value: V, } -impl Debug for OccupiedError<'_, K, V, S, A> { +impl Debug for OccupiedError<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OccupiedError") .field("key", self.entry.key()) @@ -4553,9 +4609,7 @@ impl Debug for OccupiedError<'_, K, } } -impl<'a, K: Debug, V: Debug, S, A: Allocator + Clone> fmt::Display - for OccupiedError<'a, K, V, S, A> -{ +impl<'a, K: Debug, V: Debug, S, A: Allocator> fmt::Display for OccupiedError<'a, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, @@ -4567,7 +4621,7 @@ impl<'a, K: Debug, V: Debug, S, A: Allocator + Clone> fmt::Display } } -impl<'a, K, V, S, A: Allocator + Clone> IntoIterator for &'a HashMap { +impl<'a, K, V, S, A: Allocator> IntoIterator for &'a HashMap { type Item = (&'a K, &'a V); type IntoIter = Iter<'a, K, V>; @@ -4599,7 +4653,7 @@ impl<'a, K, V, S, A: Allocator + Clone> IntoIterator for &'a HashMap } } -impl<'a, K, V, S, A: Allocator + Clone> IntoIterator for &'a mut HashMap { +impl<'a, K, V, S, A: Allocator> IntoIterator for &'a mut HashMap { type Item = (&'a K, &'a mut V); type IntoIter = IterMut<'a, K, V>; @@ -4636,7 +4690,7 @@ impl<'a, K, V, S, A: Allocator + Clone> IntoIterator for &'a mut HashMap IntoIterator for HashMap { +impl IntoIterator for HashMap { type Item = (K, V); type IntoIter = IntoIter; @@ -4684,6 +4738,17 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, x| unsafe { + let (k, v) = x.as_ref(); + f(acc, (k, v)) + }) + } } impl ExactSizeIterator for Iter<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4712,6 +4777,17 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, x| unsafe { + let (k, v) = x.as_mut(); + f(acc, (k, v)) + }) + } } impl ExactSizeIterator for IterMut<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4731,7 +4807,7 @@ where } } -impl Iterator for IntoIter { +impl Iterator for IntoIter { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -4742,16 +4818,24 @@ impl Iterator for IntoIter { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, f) + } } -impl ExactSizeIterator for IntoIter { +impl ExactSizeIterator for IntoIter { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.inner.len() } } -impl FusedIterator for IntoIter {} +impl FusedIterator for IntoIter {} -impl fmt::Debug for IntoIter { +impl fmt::Debug for IntoIter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.iter()).finish() } @@ -4772,6 +4856,14 @@ impl<'a, K, V> Iterator for Keys<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (k, _)| f(acc, k)) + } } impl ExactSizeIterator for Keys<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4796,6 +4888,14 @@ impl<'a, K, V> Iterator for Values<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } } impl ExactSizeIterator for Values<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4820,6 +4920,14 @@ impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } } impl ExactSizeIterator for ValuesMut<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4837,7 +4945,7 @@ impl fmt::Debug for ValuesMut<'_, K, V> { } } -impl<'a, K, V, A: Allocator + Clone> Iterator for Drain<'a, K, V, A> { +impl<'a, K, V, A: Allocator> Iterator for Drain<'a, K, V, A> { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -4848,27 +4956,35 @@ impl<'a, K, V, A: Allocator + Clone> Iterator for Drain<'a, K, V, A> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, f) + } } -impl ExactSizeIterator for Drain<'_, K, V, A> { +impl ExactSizeIterator for Drain<'_, K, V, A> { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.inner.len() } } -impl FusedIterator for Drain<'_, K, V, A> {} +impl FusedIterator for Drain<'_, K, V, A> {} impl fmt::Debug for Drain<'_, K, V, A> where K: fmt::Debug, V: fmt::Debug, - A: Allocator + Clone, + A: Allocator, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.iter()).finish() } } -impl<'a, K, V, S, A: Allocator + Clone> Entry<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> Entry<'a, K, V, S, A> { /// Sets the value of the entry, and returns an OccupiedEntry. /// /// # Examples @@ -5115,7 +5231,7 @@ impl<'a, K, V, S, A: Allocator + Clone> Entry<'a, K, V, S, A> { } } -impl<'a, K, V: Default, S, A: Allocator + Clone> Entry<'a, K, V, S, A> { +impl<'a, K, V: Default, S, A: Allocator> Entry<'a, K, V, S, A> { /// Ensures a value is in the entry by inserting the default value if empty, /// and returns a mutable reference to the value in the entry. /// @@ -5148,7 +5264,7 @@ impl<'a, K, V: Default, S, A: Allocator + Clone> Entry<'a, K, V, S, A> { } } -impl<'a, K, V, S, A: Allocator + Clone> OccupiedEntry<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> OccupiedEntry<'a, K, V, S, A> { /// Gets a reference to the key in the entry. /// /// # Examples @@ -5183,7 +5299,6 @@ impl<'a, K, V, S, A: Allocator + Clone> OccupiedEntry<'a, K, V, S, A> { /// assert!(map.is_empty() && map.capacity() == 0); /// /// map.entry("poneyland").or_insert(12); - /// let capacity_before_remove = map.capacity(); /// /// if let Entry::Occupied(o) = map.entry("poneyland") { /// // We delete the entry from the map. @@ -5191,12 +5306,12 @@ impl<'a, K, V, S, A: Allocator + Clone> OccupiedEntry<'a, K, V, S, A> { /// } /// /// assert_eq!(map.contains_key("poneyland"), false); - /// // Now map hold none elements but capacity is equal to the old one - /// assert!(map.len() == 0 && map.capacity() == capacity_before_remove); + /// // Now map hold none elements + /// assert!(map.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove_entry(self) -> (K, V) { - unsafe { self.table.table.remove(self.elem) } + unsafe { self.table.table.remove(self.elem).0 } } /// Gets a reference to the value in the entry. @@ -5319,15 +5434,14 @@ impl<'a, K, V, S, A: Allocator + Clone> OccupiedEntry<'a, K, V, S, A> { /// assert!(map.is_empty() && map.capacity() == 0); /// /// map.entry("poneyland").or_insert(12); - /// let capacity_before_remove = map.capacity(); /// /// if let Entry::Occupied(o) = map.entry("poneyland") { /// assert_eq!(o.remove(), 12); /// } /// /// assert_eq!(map.contains_key("poneyland"), false); - /// // Now map hold none elements but capacity is equal to the old one - /// assert!(map.len() == 0 && map.capacity() == capacity_before_remove); + /// // Now map hold none elements + /// assert!(map.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove(self) -> V { @@ -5505,7 +5619,7 @@ impl<'a, K, V, S, A: Allocator + Clone> OccupiedEntry<'a, K, V, S, A> { } } -impl<'a, K, V, S, A: Allocator + Clone> VacantEntry<'a, K, V, S, A> { +impl<'a, K, V, S, A: Allocator> VacantEntry<'a, K, V, S, A> { /// Gets a reference to the key that would be used when inserting a value /// through the `VacantEntry`. /// @@ -5567,7 +5681,7 @@ impl<'a, K, V, S, A: Allocator + Clone> VacantEntry<'a, K, V, S, A> { let entry = table.insert_entry( self.hash, (self.key, value), - make_hasher::(&self.table.hash_builder), + make_hasher::<_, V, S>(&self.table.hash_builder), ); &mut entry.1 } @@ -5581,7 +5695,7 @@ impl<'a, K, V, S, A: Allocator + Clone> VacantEntry<'a, K, V, S, A> { let elem = self.table.table.insert( self.hash, (self.key, value), - make_hasher::(&self.table.hash_builder), + make_hasher::<_, V, S>(&self.table.hash_builder), ); OccupiedEntry { hash: self.hash, @@ -5592,7 +5706,7 @@ impl<'a, K, V, S, A: Allocator + Clone> VacantEntry<'a, K, V, S, A> { } } -impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> EntryRef<'a, 'b, K, Q, V, S, A> { +impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator> EntryRef<'a, 'b, K, Q, V, S, A> { /// Sets the value of the entry, and returns an OccupiedEntryRef. /// /// # Examples @@ -5682,10 +5796,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> EntryRef<'a, 'b, K, Q, V, /// Ensures a value is in the entry by inserting, if empty, the result of the default function. /// This method allows for generating key-derived values for insertion by providing the default - /// function a reference to the key that was moved during the `.entry_ref(key)` method call. - /// - /// The reference to the moved key is provided so that cloning or copying the key is - /// unnecessary, unlike with `.or_insert_with(|| ... )`. + /// function an access to the borrower form of the key. /// /// # Examples /// @@ -5737,7 +5848,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> EntryRef<'a, 'b, K, Q, V, K: Borrow, { match *self { - EntryRef::Occupied(ref entry) => entry.key(), + EntryRef::Occupied(ref entry) => entry.key().borrow(), EntryRef::Vacant(ref entry) => entry.key(), } } @@ -5833,8 +5944,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> EntryRef<'a, 'b, K, Q, V, #[cfg_attr(feature = "inline-more", inline)] pub fn and_replace_entry_with(self, f: F) -> Self where - F: FnOnce(&Q, V) -> Option, - K: Borrow, + F: FnOnce(&K, V) -> Option, { match self { EntryRef::Occupied(entry) => entry.replace_entry_with(f), @@ -5843,7 +5953,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> EntryRef<'a, 'b, K, Q, V, } } -impl<'a, 'b, K, Q: ?Sized, V: Default, S, A: Allocator + Clone> EntryRef<'a, 'b, K, Q, V, S, A> { +impl<'a, 'b, K, Q: ?Sized, V: Default, S, A: Allocator> EntryRef<'a, 'b, K, Q, V, S, A> { /// Ensures a value is in the entry by inserting the default value if empty, /// and returns a mutable reference to the value in the entry. /// @@ -5876,7 +5986,7 @@ impl<'a, 'b, K, Q: ?Sized, V: Default, S, A: Allocator + Clone> EntryRef<'a, 'b, } } -impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, K, Q, V, S, A> { +impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator> OccupiedEntryRef<'a, 'b, K, Q, V, S, A> { /// Gets a reference to the key in the entry. /// /// # Examples @@ -5893,11 +6003,8 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// } /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn key(&self) -> &Q - where - K: Borrow, - { - unsafe { &self.elem.as_ref().0 }.borrow() + pub fn key(&self) -> &K { + unsafe { &self.elem.as_ref().0 } } /// Take the ownership of the key and value from the map. @@ -5914,7 +6021,6 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// assert!(map.is_empty() && map.capacity() == 0); /// /// map.entry_ref("poneyland").or_insert(12); - /// let capacity_before_remove = map.capacity(); /// /// if let EntryRef::Occupied(o) = map.entry_ref("poneyland") { /// // We delete the entry from the map. @@ -5923,11 +6029,11 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// /// assert_eq!(map.contains_key("poneyland"), false); /// // Now map hold none elements but capacity is equal to the old one - /// assert!(map.len() == 0 && map.capacity() == capacity_before_remove); + /// assert!(map.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove_entry(self) -> (K, V) { - unsafe { self.table.table.remove(self.elem) } + unsafe { self.table.table.remove(self.elem).0 } } /// Gets a reference to the value in the entry. @@ -6048,7 +6154,6 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// assert!(map.is_empty() && map.capacity() == 0); /// /// map.entry_ref("poneyland").or_insert(12); - /// let capacity_before_remove = map.capacity(); /// /// if let EntryRef::Occupied(o) = map.entry_ref("poneyland") { /// assert_eq!(o.remove(), 12); @@ -6056,7 +6161,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// /// assert_eq!(map.contains_key("poneyland"), false); /// // Now map hold none elements but capacity is equal to the old one - /// assert!(map.len() == 0 && map.capacity() == capacity_before_remove); + /// assert!(map.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove(self) -> V { @@ -6068,7 +6173,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// /// # Panics /// - /// Will panic if this OccupiedEntry was created through [`EntryRef::insert`]. + /// Will panic if this OccupiedEntryRef was created through [`EntryRef::insert`]. /// /// # Examples /// @@ -6110,7 +6215,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// /// # Panics /// - /// Will panic if this OccupiedEntry was created through [`Entry::insert`]. + /// Will panic if this OccupiedEntryRef was created through [`EntryRef::insert`]. /// /// # Examples /// @@ -6138,7 +6243,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, /// fn reclaim_memory(map: &mut HashMap, usize>, keys: &[Rc]) { /// for key in keys { /// if let EntryRef::Occupied(entry) = map.entry_ref(key.as_ref()) { - /// /// Replaces the entry's key with our version of it in `keys`. + /// // Replaces the entry's key with our version of it in `keys`. /// entry.replace_key(); /// } /// } @@ -6204,8 +6309,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, #[cfg_attr(feature = "inline-more", inline)] pub fn replace_entry_with(self, f: F) -> EntryRef<'a, 'b, K, Q, V, S, A> where - F: FnOnce(&Q, V) -> Option, - K: Borrow, + F: FnOnce(&K, V) -> Option, { unsafe { let mut spare_key = None; @@ -6213,7 +6317,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, self.table .table .replace_bucket_with(self.elem.clone(), |(key, value)| { - if let Some(new_value) = f(key.borrow(), value) { + if let Some(new_value) = f(&key, value) { Some((key, new_value)) } else { spare_key = Some(KeyOrRef::Owned(key)); @@ -6234,7 +6338,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> OccupiedEntryRef<'a, 'b, } } -impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> VacantEntryRef<'a, 'b, K, Q, V, S, A> { +impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator> VacantEntryRef<'a, 'b, K, Q, V, S, A> { /// Gets a reference to the key that would be used when inserting a value /// through the `VacantEntryRef`. /// @@ -6305,7 +6409,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> VacantEntryRef<'a, 'b, K, let entry = table.insert_entry( self.hash, (self.key.into_owned(), value), - make_hasher::(&self.table.hash_builder), + make_hasher::<_, V, S>(&self.table.hash_builder), ); &mut entry.1 } @@ -6319,7 +6423,7 @@ impl<'a, 'b, K, Q: ?Sized, V, S, A: Allocator + Clone> VacantEntryRef<'a, 'b, K, let elem = self.table.table.insert( self.hash, (self.key.into_owned(), value), - make_hasher::(&self.table.hash_builder), + make_hasher::<_, V, S>(&self.table.hash_builder), ); OccupiedEntryRef { hash: self.hash, @@ -6334,7 +6438,7 @@ impl FromIterator<(K, V)> for HashMap where K: Eq + Hash, S: BuildHasher + Default, - A: Default + Allocator + Clone, + A: Default + Allocator, { #[cfg_attr(feature = "inline-more", inline)] fn from_iter>(iter: T) -> Self { @@ -6354,7 +6458,7 @@ impl Extend<(K, V)> for HashMap where K: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { /// Inserts all new key-values from the iterator to existing `HashMap`. /// Replace values with existing keys with new values returned from the iterator. @@ -6438,7 +6542,7 @@ where K: Eq + Hash + Copy, V: Copy, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { /// Inserts all new key-values from the iterator to existing `HashMap`. /// Replace values with existing keys with new values returned from the iterator. @@ -6455,17 +6559,17 @@ where /// map.insert(1, 100); /// /// let arr = [(1, 1), (2, 2)]; - /// let some_iter = arr.iter().map(|&(k, v)| (k, v)); + /// let some_iter = arr.iter().map(|(k, v)| (k, v)); /// map.extend(some_iter); /// // Replace values with existing keys with new values returned from the iterator. /// // So that the map.get(&1) doesn't return Some(&100). /// assert_eq!(map.get(&1), Some(&1)); /// /// let some_vec: Vec<_> = vec![(3, 3), (4, 4)]; - /// map.extend(some_vec.iter().map(|&(k, v)| (k, v))); + /// map.extend(some_vec.iter().map(|(k, v)| (k, v))); /// /// let some_arr = [(5, 5), (6, 6)]; - /// map.extend(some_arr.iter().map(|&(k, v)| (k, v))); + /// map.extend(some_arr.iter().map(|(k, v)| (k, v))); /// /// // You can also extend from another HashMap /// let mut new_map = HashMap::new(); @@ -6503,7 +6607,7 @@ where K: Eq + Hash + Copy, V: Copy, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { /// Inserts all new key-values from the iterator to existing `HashMap`. /// Replace values with existing keys with new values returned from the iterator. @@ -6570,12 +6674,12 @@ fn assert_covariance() { fn iter_val<'a, 'new>(v: Iter<'a, u8, &'static str>) -> Iter<'a, u8, &'new str> { v } - fn into_iter_key<'new, A: Allocator + Clone>( + fn into_iter_key<'new, A: Allocator>( v: IntoIter<&'static str, u8, A>, ) -> IntoIter<&'new str, u8, A> { v } - fn into_iter_val<'new, A: Allocator + Clone>( + fn into_iter_val<'new, A: Allocator>( v: IntoIter, ) -> IntoIter { v @@ -6605,6 +6709,12 @@ mod test_map { use super::Entry::{Occupied, Vacant}; use super::EntryRef; use super::{HashMap, RawEntryMut}; + use alloc::string::{String, ToString}; + use alloc::sync::Arc; + use allocator_api2::alloc::{AllocError, Allocator, Global}; + use core::alloc::Layout; + use core::ptr::NonNull; + use core::sync::atomic::{AtomicI8, Ordering}; use rand::{rngs::SmallRng, Rng, SeedableRng}; use std::borrow::ToOwned; use std::cell::RefCell; @@ -6827,7 +6937,6 @@ mod test_map { } }); - #[allow(clippy::let_underscore_drop)] // kind-of a false positive for _ in half.by_ref() {} DROP_VECTOR.with(|v| { @@ -7155,10 +7264,10 @@ mod test_map { map.insert(1, 2); map.insert(3, 4); - let map_str = format!("{:?}", map); + let map_str = format!("{map:?}"); assert!(map_str == "{1: 2, 3: 4}" || map_str == "{3: 4, 1: 2}"); - assert_eq!(format!("{:?}", empty), "{}"); + assert_eq!(format!("{empty:?}"), "{}"); } #[test] @@ -7474,7 +7583,7 @@ mod test_map { // Test for #19292 fn check(m: &HashMap) { for k in m.keys() { - assert!(m.contains_key(k), "{} is in keys() but not in the map?", k); + assert!(m.contains_key(k), "{k} is in keys() but not in the map?"); } } @@ -7510,7 +7619,7 @@ mod test_map { // Test for #19292 fn check(m: &HashMap) { for k in m.keys() { - assert!(m.contains_key(k), "{} is in keys() but not in the map?", k); + assert!(m.contains_key(k), "{k} is in keys() but not in the map?"); } } @@ -7559,6 +7668,7 @@ mod test_map { } #[test] + #[allow(clippy::needless_borrow)] fn test_extend_ref_kv_tuple() { use std::ops::AddAssign; let mut a = HashMap::new(); @@ -7580,7 +7690,7 @@ mod test_map { let vec: Vec<_> = (100..200).map(|i| (i, i)).collect(); a.extend(iter); a.extend(&vec); - a.extend(&create_arr::(200, 1)); + a.extend(create_arr::(200, 1)); assert_eq!(a.len(), 300); @@ -7981,7 +8091,7 @@ mod test_map { // Test for #19292 fn check(m: &HashMap) { for k in m.keys() { - assert!(m.contains_key(k), "{} is in keys() but not in the map?", k); + assert!(m.contains_key(k), "{k} is in keys() but not in the map?"); } } @@ -8011,7 +8121,7 @@ mod test_map { // Test for #19292 fn check(m: &HashMap) { for k in m.keys() { - assert!(m.contains_key(k), "{} is in keys() but not in the map?", k); + assert!(m.contains_key(k), "{k} is in keys() but not in the map?"); } } @@ -8049,10 +8159,10 @@ mod test_map { } #[test] - fn test_drain_filter() { + fn test_extract_if() { { let mut map: HashMap = (0..8).map(|x| (x, x * 10)).collect(); - let drained = map.drain_filter(|&k, _| k % 2 == 0); + let drained = map.extract_if(|&k, _| k % 2 == 0); let mut out = drained.collect::>(); out.sort_unstable(); assert_eq!(vec![(0, 0), (2, 20), (4, 40), (6, 60)], out); @@ -8060,7 +8170,7 @@ mod test_map { } { let mut map: HashMap = (0..8).map(|x| (x, x * 10)).collect(); - drop(map.drain_filter(|&k, _| k % 2 == 0)); + map.extract_if(|&k, _| k % 2 == 0).for_each(drop); assert_eq!(map.len(), 4); } } @@ -8070,27 +8180,32 @@ mod test_map { fn test_try_reserve() { use crate::TryReserveError::{AllocError, CapacityOverflow}; - const MAX_USIZE: usize = usize::MAX; + const MAX_ISIZE: usize = isize::MAX as usize; let mut empty_bytes: HashMap = HashMap::new(); - if let Err(CapacityOverflow) = empty_bytes.try_reserve(MAX_USIZE) { + if let Err(CapacityOverflow) = empty_bytes.try_reserve(usize::MAX) { } else { panic!("usize::MAX should trigger an overflow!"); } - if let Err(AllocError { .. }) = empty_bytes.try_reserve(MAX_USIZE / 16) { + if let Err(CapacityOverflow) = empty_bytes.try_reserve(MAX_ISIZE) { + } else { + panic!("isize::MAX should trigger an overflow!"); + } + + if let Err(AllocError { .. }) = empty_bytes.try_reserve(MAX_ISIZE / 5) { } else { // This may succeed if there is enough free memory. Attempt to // allocate a few more hashmaps to ensure the allocation will fail. let mut empty_bytes2: HashMap = HashMap::new(); - let _ = empty_bytes2.try_reserve(MAX_USIZE / 16); + let _ = empty_bytes2.try_reserve(MAX_ISIZE / 5); let mut empty_bytes3: HashMap = HashMap::new(); - let _ = empty_bytes3.try_reserve(MAX_USIZE / 16); + let _ = empty_bytes3.try_reserve(MAX_ISIZE / 5); let mut empty_bytes4: HashMap = HashMap::new(); - if let Err(AllocError { .. }) = empty_bytes4.try_reserve(MAX_USIZE / 16) { + if let Err(AllocError { .. }) = empty_bytes4.try_reserve(MAX_ISIZE / 5) { } else { - panic!("usize::MAX / 8 should trigger an OOM!"); + panic!("isize::MAX / 5 should trigger an OOM!"); } } } @@ -8104,7 +8219,7 @@ mod test_map { let mut map: HashMap<_, _> = xs.iter().copied().collect(); let compute_hash = |map: &HashMap, k: i32| -> u64 { - super::make_insert_hash::(map.hasher(), &k) + super::make_hash::(map.hasher(), &k) }; // Existing key (insert) @@ -8266,21 +8381,21 @@ mod test_map { loop { // occasionally remove some elements if i < n && rng.gen_bool(0.1) { - let hash_value = super::make_insert_hash(&hash_builder, &i); + let hash_value = super::make_hash(&hash_builder, &i); unsafe { let e = map.table.find(hash_value, |q| q.0.eq(&i)); if let Some(e) = e { it.reflect_remove(&e); - let t = map.table.remove(e); + let t = map.table.remove(e).0; removed.push(t); left -= 1; } else { - assert!(removed.contains(&(i, 2 * i)), "{} not in {:?}", i, removed); + assert!(removed.contains(&(i, 2 * i)), "{i} not in {removed:?}"); let e = map.table.insert( hash_value, (i, 2 * i), - super::make_hasher::(&hash_builder), + super::make_hasher::<_, usize, _>(&hash_builder), ); it.reflect_insert(&e); if let Some(p) = removed.iter().position(|e| e == &(i, 2 * i)) { @@ -8405,4 +8520,441 @@ mod test_map { map2.clone_from(&map1); } + + #[test] + #[should_panic = "panic in clone"] + fn test_clone_from_memory_leaks() { + use ::alloc::vec::Vec; + + struct CheckedClone { + panic_in_clone: bool, + need_drop: Vec, + } + impl Clone for CheckedClone { + fn clone(&self) -> Self { + if self.panic_in_clone { + panic!("panic in clone") + } + Self { + panic_in_clone: self.panic_in_clone, + need_drop: self.need_drop.clone(), + } + } + } + let mut map1 = HashMap::new(); + map1.insert( + 1, + CheckedClone { + panic_in_clone: false, + need_drop: vec![0, 1, 2], + }, + ); + map1.insert( + 2, + CheckedClone { + panic_in_clone: false, + need_drop: vec![3, 4, 5], + }, + ); + map1.insert( + 3, + CheckedClone { + panic_in_clone: true, + need_drop: vec![6, 7, 8], + }, + ); + let _map2 = map1.clone(); + } + + struct MyAllocInner { + drop_count: Arc, + } + + #[derive(Clone)] + struct MyAlloc { + _inner: Arc, + } + + impl MyAlloc { + fn new(drop_count: Arc) -> Self { + MyAlloc { + _inner: Arc::new(MyAllocInner { drop_count }), + } + } + } + + impl Drop for MyAllocInner { + fn drop(&mut self) { + println!("MyAlloc freed."); + self.drop_count.fetch_sub(1, Ordering::SeqCst); + } + } + + unsafe impl Allocator for MyAlloc { + fn allocate(&self, layout: Layout) -> std::result::Result, AllocError> { + let g = Global; + g.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + let g = Global; + g.deallocate(ptr, layout) + } + } + + #[test] + fn test_hashmap_into_iter_bug() { + let dropped: Arc = Arc::new(AtomicI8::new(1)); + + { + let mut map = HashMap::with_capacity_in(10, MyAlloc::new(dropped.clone())); + for i in 0..10 { + map.entry(i).or_insert_with(|| "i".to_string()); + } + + for (k, v) in map { + println!("{}, {}", k, v); + } + } + + // All allocator clones should already be dropped. + assert_eq!(dropped.load(Ordering::SeqCst), 0); + } + + #[derive(Debug)] + struct CheckedCloneDrop { + panic_in_clone: bool, + panic_in_drop: bool, + dropped: bool, + data: T, + } + + impl CheckedCloneDrop { + fn new(panic_in_clone: bool, panic_in_drop: bool, data: T) -> Self { + CheckedCloneDrop { + panic_in_clone, + panic_in_drop, + dropped: false, + data, + } + } + } + + impl Clone for CheckedCloneDrop { + fn clone(&self) -> Self { + if self.panic_in_clone { + panic!("panic in clone") + } + Self { + panic_in_clone: self.panic_in_clone, + panic_in_drop: self.panic_in_drop, + dropped: self.dropped, + data: self.data.clone(), + } + } + } + + impl Drop for CheckedCloneDrop { + fn drop(&mut self) { + if self.panic_in_drop { + self.dropped = true; + panic!("panic in drop"); + } + if self.dropped { + panic!("double drop"); + } + self.dropped = true; + } + } + + /// Return hashmap with predefined distribution of elements. + /// All elements will be located in the same order as elements + /// returned by iterator. + /// + /// This function does not panic, but returns an error as a `String` + /// to distinguish between a test panic and an error in the input data. + fn get_test_map( + iter: I, + mut fun: impl FnMut(u64) -> T, + alloc: A, + ) -> Result, DefaultHashBuilder, A>, String> + where + I: Iterator + Clone + ExactSizeIterator, + A: Allocator, + T: PartialEq + core::fmt::Debug, + { + use crate::scopeguard::guard; + + let mut map: HashMap, _, A> = + HashMap::with_capacity_in(iter.size_hint().0, alloc); + { + let mut guard = guard(&mut map, |map| { + for (_, value) in map.iter_mut() { + value.panic_in_drop = false + } + }); + + let mut count = 0; + // Hash and Key must be equal to each other for controlling the elements placement. + for (panic_in_clone, panic_in_drop) in iter.clone() { + if core::mem::needs_drop::() && panic_in_drop { + return Err(String::from( + "panic_in_drop can be set with a type that doesn't need to be dropped", + )); + } + guard.table.insert( + count, + ( + count, + CheckedCloneDrop::new(panic_in_clone, panic_in_drop, fun(count)), + ), + |(k, _)| *k, + ); + count += 1; + } + + // Let's check that all elements are located as we wanted + let mut check_count = 0; + for ((key, value), (panic_in_clone, panic_in_drop)) in guard.iter().zip(iter) { + if *key != check_count { + return Err(format!( + "key != check_count,\nkey: `{}`,\ncheck_count: `{}`", + key, check_count + )); + } + if value.dropped + || value.panic_in_clone != panic_in_clone + || value.panic_in_drop != panic_in_drop + || value.data != fun(check_count) + { + return Err(format!( + "Value is not equal to expected,\nvalue: `{:?}`,\nexpected: \ + `CheckedCloneDrop {{ panic_in_clone: {}, panic_in_drop: {}, dropped: {}, data: {:?} }}`", + value, panic_in_clone, panic_in_drop, false, fun(check_count) + )); + } + check_count += 1; + } + + if guard.len() != check_count as usize { + return Err(format!( + "map.len() != check_count,\nmap.len(): `{}`,\ncheck_count: `{}`", + guard.len(), + check_count + )); + } + + if count != check_count { + return Err(format!( + "count != check_count,\ncount: `{}`,\ncheck_count: `{}`", + count, check_count + )); + } + core::mem::forget(guard); + } + Ok(map) + } + + const DISARMED: bool = false; + const ARMED: bool = true; + + const ARMED_FLAGS: [bool; 8] = [ + DISARMED, DISARMED, DISARMED, ARMED, DISARMED, DISARMED, DISARMED, DISARMED, + ]; + + const DISARMED_FLAGS: [bool; 8] = [ + DISARMED, DISARMED, DISARMED, DISARMED, DISARMED, DISARMED, DISARMED, DISARMED, + ]; + + #[test] + #[should_panic = "panic in clone"] + fn test_clone_memory_leaks_and_double_drop_one() { + let dropped: Arc = Arc::new(AtomicI8::new(2)); + + { + assert_eq!(ARMED_FLAGS.len(), DISARMED_FLAGS.len()); + + let map: HashMap>, DefaultHashBuilder, MyAlloc> = + match get_test_map( + ARMED_FLAGS.into_iter().zip(DISARMED_FLAGS), + |n| vec![n], + MyAlloc::new(dropped.clone()), + ) { + Ok(map) => map, + Err(msg) => panic!("{msg}"), + }; + + // Clone should normally clone a few elements, and then (when the + // clone function panics), deallocate both its own memory, memory + // of `dropped: Arc` and the memory of already cloned + // elements (Vec memory inside CheckedCloneDrop). + let _map2 = map.clone(); + } + } + + #[test] + #[should_panic = "panic in drop"] + fn test_clone_memory_leaks_and_double_drop_two() { + let dropped: Arc = Arc::new(AtomicI8::new(2)); + + { + assert_eq!(ARMED_FLAGS.len(), DISARMED_FLAGS.len()); + + let map: HashMap, DefaultHashBuilder, _> = match get_test_map( + DISARMED_FLAGS.into_iter().zip(DISARMED_FLAGS), + |n| n, + MyAlloc::new(dropped.clone()), + ) { + Ok(map) => map, + Err(msg) => panic!("{msg}"), + }; + + let mut map2 = match get_test_map( + DISARMED_FLAGS.into_iter().zip(ARMED_FLAGS), + |n| n, + MyAlloc::new(dropped.clone()), + ) { + Ok(map) => map, + Err(msg) => panic!("{msg}"), + }; + + // The `clone_from` should try to drop the elements of `map2` without + // double drop and leaking the allocator. Elements that have not been + // dropped leak their memory. + map2.clone_from(&map); + } + } + + /// We check that we have a working table if the clone operation from another + /// thread ended in a panic (when buckets of maps are equal to each other). + #[test] + fn test_catch_panic_clone_from_when_len_is_equal() { + use std::thread; + + let dropped: Arc = Arc::new(AtomicI8::new(2)); + + { + assert_eq!(ARMED_FLAGS.len(), DISARMED_FLAGS.len()); + + let mut map = match get_test_map( + DISARMED_FLAGS.into_iter().zip(DISARMED_FLAGS), + |n| vec![n], + MyAlloc::new(dropped.clone()), + ) { + Ok(map) => map, + Err(msg) => panic!("{msg}"), + }; + + thread::scope(|s| { + let result: thread::ScopedJoinHandle<'_, String> = s.spawn(|| { + let scope_map = + match get_test_map(ARMED_FLAGS.into_iter().zip(DISARMED_FLAGS), |n| vec![n * 2], MyAlloc::new(dropped.clone())) { + Ok(map) => map, + Err(msg) => return msg, + }; + if map.table.buckets() != scope_map.table.buckets() { + return format!( + "map.table.buckets() != scope_map.table.buckets(),\nleft: `{}`,\nright: `{}`", + map.table.buckets(), scope_map.table.buckets() + ); + } + map.clone_from(&scope_map); + "We must fail the cloning!!!".to_owned() + }); + if let Ok(msg) = result.join() { + panic!("{msg}") + } + }); + + // Let's check that all iterators work fine and do not return elements + // (especially `RawIterRange`, which does not depend on the number of + // elements in the table, but looks directly at the control bytes) + // + // SAFETY: We know for sure that `RawTable` will outlive + // the returned `RawIter / RawIterRange` iterator. + assert_eq!(map.len(), 0); + assert_eq!(map.iter().count(), 0); + assert_eq!(unsafe { map.table.iter().count() }, 0); + assert_eq!(unsafe { map.table.iter().iter.count() }, 0); + + for idx in 0..map.table.buckets() { + let idx = idx as u64; + assert!( + map.table.find(idx, |(k, _)| *k == idx).is_none(), + "Index: {idx}" + ); + } + } + + // All allocator clones should already be dropped. + assert_eq!(dropped.load(Ordering::SeqCst), 0); + } + + /// We check that we have a working table if the clone operation from another + /// thread ended in a panic (when buckets of maps are not equal to each other). + #[test] + fn test_catch_panic_clone_from_when_len_is_not_equal() { + use std::thread; + + let dropped: Arc = Arc::new(AtomicI8::new(2)); + + { + assert_eq!(ARMED_FLAGS.len(), DISARMED_FLAGS.len()); + + let mut map = match get_test_map( + [DISARMED].into_iter().zip([DISARMED]), + |n| vec![n], + MyAlloc::new(dropped.clone()), + ) { + Ok(map) => map, + Err(msg) => panic!("{msg}"), + }; + + thread::scope(|s| { + let result: thread::ScopedJoinHandle<'_, String> = s.spawn(|| { + let scope_map = match get_test_map( + ARMED_FLAGS.into_iter().zip(DISARMED_FLAGS), + |n| vec![n * 2], + MyAlloc::new(dropped.clone()), + ) { + Ok(map) => map, + Err(msg) => return msg, + }; + if map.table.buckets() == scope_map.table.buckets() { + return format!( + "map.table.buckets() == scope_map.table.buckets(): `{}`", + map.table.buckets() + ); + } + map.clone_from(&scope_map); + "We must fail the cloning!!!".to_owned() + }); + if let Ok(msg) = result.join() { + panic!("{msg}") + } + }); + + // Let's check that all iterators work fine and do not return elements + // (especially `RawIterRange`, which does not depend on the number of + // elements in the table, but looks directly at the control bytes) + // + // SAFETY: We know for sure that `RawTable` will outlive + // the returned `RawIter / RawIterRange` iterator. + assert_eq!(map.len(), 0); + assert_eq!(map.iter().count(), 0); + assert_eq!(unsafe { map.table.iter().count() }, 0); + assert_eq!(unsafe { map.table.iter().iter.count() }, 0); + + for idx in 0..map.table.buckets() { + let idx = idx as u64; + assert!( + map.table.find(idx, |(k, _)| *k == idx).is_none(), + "Index: {idx}" + ); + } + } + + // All allocator clones should already be dropped. + assert_eq!(dropped.load(Ordering::SeqCst), 0); + } } diff --git a/vendor/hashbrown/src/raw/alloc.rs b/vendor/hashbrown/src/raw/alloc.rs index ba09ea9..15299e7 100644 --- a/vendor/hashbrown/src/raw/alloc.rs +++ b/vendor/hashbrown/src/raw/alloc.rs @@ -1,5 +1,9 @@ pub(crate) use self::inner::{do_alloc, Allocator, Global}; +// Nightly-case. +// Use unstable `allocator_api` feature. +// This is compatible with `allocator-api2` which can be enabled or not. +// This is used when building for `std`. #[cfg(feature = "nightly")] mod inner { use crate::alloc::alloc::Layout; @@ -7,28 +11,44 @@ mod inner { use core::ptr::NonNull; #[allow(clippy::map_err_ignore)] - pub fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { match alloc.allocate(layout) { Ok(ptr) => Ok(ptr.as_non_null_ptr()), Err(_) => Err(()), } } +} - #[cfg(feature = "bumpalo")] - unsafe impl Allocator for crate::BumpWrapper<'_> { - #[inline] - fn allocate(&self, layout: Layout) -> Result, core::alloc::AllocError> { - match self.0.try_alloc_layout(layout) { - Ok(ptr) => Ok(NonNull::slice_from_raw_parts(ptr, layout.size())), - Err(_) => Err(core::alloc::AllocError), - } +// Basic non-nightly case. +// This uses `allocator-api2` enabled by default. +// If any crate enables "nightly" in `allocator-api2`, +// this will be equivalent to the nightly case, +// since `allocator_api2::alloc::Allocator` would be re-export of +// `core::alloc::Allocator`. +#[cfg(all(not(feature = "nightly"), feature = "allocator-api2"))] +mod inner { + use crate::alloc::alloc::Layout; + pub use allocator_api2::alloc::{Allocator, Global}; + use core::ptr::NonNull; + + #[allow(clippy::map_err_ignore)] + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + match alloc.allocate(layout) { + Ok(ptr) => Ok(ptr.cast()), + Err(_) => Err(()), } - #[inline] - unsafe fn deallocate(&self, _ptr: NonNull, _layout: Layout) {} } } -#[cfg(not(feature = "nightly"))] +// No-defaults case. +// When building with default-features turned off and +// neither `nightly` nor `allocator-api2` is enabled, +// this will be used. +// Making it impossible to use any custom allocator with collections defined +// in this crate. +// Any crate in build-tree can enable `allocator-api2`, +// or `nightly` without disturbing users that don't want to use it. +#[cfg(not(any(feature = "nightly", feature = "allocator-api2")))] mod inner { use crate::alloc::alloc::{alloc, dealloc, Layout}; use core::ptr::NonNull; @@ -41,6 +61,7 @@ mod inner { #[derive(Copy, Clone)] pub struct Global; + unsafe impl Allocator for Global { #[inline] fn allocate(&self, layout: Layout) -> Result, ()> { @@ -51,6 +72,7 @@ mod inner { dealloc(ptr.as_ptr(), layout); } } + impl Default for Global { #[inline] fn default() -> Self { @@ -58,16 +80,7 @@ mod inner { } } - pub fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { alloc.allocate(layout) } - - #[cfg(feature = "bumpalo")] - unsafe impl Allocator for crate::BumpWrapper<'_> { - #[allow(clippy::map_err_ignore)] - fn allocate(&self, layout: Layout) -> Result, ()> { - self.0.try_alloc_layout(layout).map_err(|_| ()) - } - unsafe fn deallocate(&self, _ptr: NonNull, _layout: Layout) {} - } } diff --git a/vendor/hashbrown/src/raw/bitmask.rs b/vendor/hashbrown/src/raw/bitmask.rs index 7d4f9fc..6576b3c 100644 --- a/vendor/hashbrown/src/raw/bitmask.rs +++ b/vendor/hashbrown/src/raw/bitmask.rs @@ -1,6 +1,6 @@ -use super::imp::{BitMaskWord, BITMASK_MASK, BITMASK_STRIDE}; -#[cfg(feature = "nightly")] -use core::intrinsics; +use super::imp::{ + BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE, +}; /// A bit mask which contains the result of a `Match` operation on a `Group` and /// allows iterating through them. @@ -8,75 +8,55 @@ use core::intrinsics; /// The bit mask is arranged so that low-order bits represent lower memory /// addresses for group match results. /// -/// For implementation reasons, the bits in the set may be sparsely packed, so -/// that there is only one bit-per-byte used (the high bit, 7). If this is the +/// For implementation reasons, the bits in the set may be sparsely packed with +/// groups of 8 bits representing one element. If any of these bits are non-zero +/// then this element is considered to true in the mask. If this is the /// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be /// performed on counts/indices to normalize this difference. `BITMASK_MASK` is /// similarly a mask of all the actually-used bits. +/// +/// To iterate over a bit mask, it must be converted to a form where only 1 bit +/// is set per element. This is done by applying `BITMASK_ITER_MASK` on the +/// mask bits. #[derive(Copy, Clone)] -pub struct BitMask(pub BitMaskWord); +pub(crate) struct BitMask(pub(crate) BitMaskWord); #[allow(clippy::use_self)] impl BitMask { /// Returns a new `BitMask` with all bits inverted. #[inline] #[must_use] - pub fn invert(self) -> Self { + #[allow(dead_code)] + pub(crate) fn invert(self) -> Self { BitMask(self.0 ^ BITMASK_MASK) } - /// Flip the bit in the mask for the entry at the given index. - /// - /// Returns the bit's previous state. - #[inline] - #[allow(clippy::cast_ptr_alignment)] - #[cfg(feature = "raw")] - pub unsafe fn flip(&mut self, index: usize) -> bool { - // NOTE: The + BITMASK_STRIDE - 1 is to set the high bit. - let mask = 1 << (index * BITMASK_STRIDE + BITMASK_STRIDE - 1); - self.0 ^= mask; - // The bit was set if the bit is now 0. - self.0 & mask == 0 - } - /// Returns a new `BitMask` with the lowest bit removed. #[inline] #[must_use] - pub fn remove_lowest_bit(self) -> Self { + fn remove_lowest_bit(self) -> Self { BitMask(self.0 & (self.0 - 1)) } + /// Returns whether the `BitMask` has at least one set bit. #[inline] - pub fn any_bit_set(self) -> bool { + pub(crate) fn any_bit_set(self) -> bool { self.0 != 0 } /// Returns the first set bit in the `BitMask`, if there is one. #[inline] - pub fn lowest_set_bit(self) -> Option { - if self.0 == 0 { - None + pub(crate) fn lowest_set_bit(self) -> Option { + if let Some(nonzero) = NonZeroBitMaskWord::new(self.0) { + Some(Self::nonzero_trailing_zeros(nonzero)) } else { - Some(unsafe { self.lowest_set_bit_nonzero() }) + None } } - /// Returns the first set bit in the `BitMask`, if there is one. The - /// bitmask must not be empty. - #[inline] - #[cfg(feature = "nightly")] - pub unsafe fn lowest_set_bit_nonzero(self) -> usize { - intrinsics::cttz_nonzero(self.0) as usize / BITMASK_STRIDE - } - #[inline] - #[cfg(not(feature = "nightly"))] - pub unsafe fn lowest_set_bit_nonzero(self) -> usize { - self.trailing_zeros() - } - /// Returns the number of trailing zeroes in the `BitMask`. #[inline] - pub fn trailing_zeros(self) -> usize { + pub(crate) fn trailing_zeros(self) -> usize { // ARM doesn't have a trailing_zeroes instruction, and instead uses // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM // versions (pre-ARMv7) don't have RBIT and need to emulate it @@ -89,9 +69,21 @@ impl BitMask { } } + /// Same as above but takes a `NonZeroBitMaskWord`. + #[inline] + fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize { + if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { + // SAFETY: A byte-swapped non-zero value is still non-zero. + let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) }; + swapped.leading_zeros() as usize / BITMASK_STRIDE + } else { + nonzero.trailing_zeros() as usize / BITMASK_STRIDE + } + } + /// Returns the number of leading zeroes in the `BitMask`. #[inline] - pub fn leading_zeros(self) -> usize { + pub(crate) fn leading_zeros(self) -> usize { self.0.leading_zeros() as usize / BITMASK_STRIDE } } @@ -102,13 +94,32 @@ impl IntoIterator for BitMask { #[inline] fn into_iter(self) -> BitMaskIter { - BitMaskIter(self) + // A BitMask only requires each element (group of bits) to be non-zero. + // However for iteration we need each element to only contain 1 bit. + BitMaskIter(BitMask(self.0 & BITMASK_ITER_MASK)) } } /// Iterator over the contents of a `BitMask`, returning the indices of set /// bits. -pub struct BitMaskIter(BitMask); +#[derive(Copy, Clone)] +pub(crate) struct BitMaskIter(pub(crate) BitMask); + +impl BitMaskIter { + /// Flip the bit in the mask for the entry at the given index. + /// + /// Returns the bit's previous state. + #[inline] + #[allow(clippy::cast_ptr_alignment)] + #[cfg(feature = "raw")] + pub(crate) unsafe fn flip(&mut self, index: usize) -> bool { + // NOTE: The + BITMASK_STRIDE - 1 is to set the high bit. + let mask = 1 << (index * BITMASK_STRIDE + BITMASK_STRIDE - 1); + self.0 .0 ^= mask; + // The bit was set if the bit is now 0. + self.0 .0 & mask == 0 + } +} impl Iterator for BitMaskIter { type Item = usize; diff --git a/vendor/hashbrown/src/raw/generic.rs b/vendor/hashbrown/src/raw/generic.rs index b4d31e6..c668b06 100644 --- a/vendor/hashbrown/src/raw/generic.rs +++ b/vendor/hashbrown/src/raw/generic.rs @@ -5,26 +5,29 @@ use core::{mem, ptr}; // Use the native word size as the group size. Using a 64-bit group size on // a 32-bit architecture will just end up being more expensive because // shifts and multiplies will need to be emulated. -#[cfg(any( - target_pointer_width = "64", - target_arch = "aarch64", - target_arch = "x86_64", - target_arch = "wasm32", -))] -type GroupWord = u64; -#[cfg(all( - target_pointer_width = "32", - not(target_arch = "aarch64"), - not(target_arch = "x86_64"), - not(target_arch = "wasm32"), -))] -type GroupWord = u32; -pub type BitMaskWord = GroupWord; -pub const BITMASK_STRIDE: usize = 8; +cfg_if! { + if #[cfg(any( + target_pointer_width = "64", + target_arch = "aarch64", + target_arch = "x86_64", + target_arch = "wasm32", + ))] { + type GroupWord = u64; + type NonZeroGroupWord = core::num::NonZeroU64; + } else { + type GroupWord = u32; + type NonZeroGroupWord = core::num::NonZeroU32; + } +} + +pub(crate) type BitMaskWord = GroupWord; +pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord; +pub(crate) const BITMASK_STRIDE: usize = 8; // We only care about the highest bit of each byte for the mask. #[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)] -pub const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080_u64 as GroupWord; +pub(crate) const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080_u64 as GroupWord; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; /// Helper function to replicate a byte across a `GroupWord`. #[inline] @@ -37,7 +40,7 @@ fn repeat(byte: u8) -> GroupWord { /// /// This implementation uses a word-sized integer. #[derive(Copy, Clone)] -pub struct Group(GroupWord); +pub(crate) struct Group(GroupWord); // We perform all operations in the native endianness, and convert to // little-endian just before creating a BitMask. The can potentially @@ -46,14 +49,14 @@ pub struct Group(GroupWord); #[allow(clippy::use_self)] impl Group { /// Number of bytes in the group. - pub const WIDTH: usize = mem::size_of::(); + pub(crate) const WIDTH: usize = mem::size_of::(); /// Returns a full group of empty bytes, suitable for use as the initial /// value for an empty hash table. /// /// This is guaranteed to be aligned to the group size. #[inline] - pub const fn static_empty() -> &'static [u8; Group::WIDTH] { + pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] { #[repr(C)] struct AlignedBytes { _align: [Group; 0], @@ -69,7 +72,7 @@ impl Group { /// Loads a group of bytes starting at the given address. #[inline] #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub unsafe fn load(ptr: *const u8) -> Self { + pub(crate) unsafe fn load(ptr: *const u8) -> Self { Group(ptr::read_unaligned(ptr.cast())) } @@ -77,7 +80,7 @@ impl Group { /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub unsafe fn load_aligned(ptr: *const u8) -> Self { + pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); Group(ptr::read(ptr.cast())) @@ -87,7 +90,7 @@ impl Group { /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub unsafe fn store_aligned(self, ptr: *mut u8) { + pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); ptr::write(ptr.cast(), self.0); @@ -104,7 +107,7 @@ impl Group { /// - This only happens if there is at least 1 true match. /// - The chance of this happening is very low (< 1% chance per byte). #[inline] - pub fn match_byte(self, byte: u8) -> BitMask { + pub(crate) fn match_byte(self, byte: u8) -> BitMask { // This algorithm is derived from // https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord let cmp = self.0 ^ repeat(byte); @@ -114,7 +117,7 @@ impl Group { /// Returns a `BitMask` indicating all bytes in the group which are /// `EMPTY`. #[inline] - pub fn match_empty(self) -> BitMask { + pub(crate) fn match_empty(self) -> BitMask { // If the high bit is set, then the byte must be either: // 1111_1111 (EMPTY) or 1000_0000 (DELETED). // So we can just check if the top two bits are 1 by ANDing them. @@ -124,14 +127,14 @@ impl Group { /// Returns a `BitMask` indicating all bytes in the group which are /// `EMPTY` or `DELETED`. #[inline] - pub fn match_empty_or_deleted(self) -> BitMask { + pub(crate) fn match_empty_or_deleted(self) -> BitMask { // A byte is EMPTY or DELETED iff the high bit is set BitMask((self.0 & repeat(0x80)).to_le()) } /// Returns a `BitMask` indicating all bytes in the group which are full. #[inline] - pub fn match_full(self) -> BitMask { + pub(crate) fn match_full(self) -> BitMask { self.match_empty_or_deleted().invert() } @@ -140,7 +143,7 @@ impl Group { /// - `DELETED => EMPTY` /// - `FULL => DELETED` #[inline] - pub fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 // and high_bit = 0 (FULL) to 1000_0000 // diff --git a/vendor/hashbrown/src/raw/mod.rs b/vendor/hashbrown/src/raw/mod.rs index 211b818..711fb89 100644 --- a/vendor/hashbrown/src/raw/mod.rs +++ b/vendor/hashbrown/src/raw/mod.rs @@ -4,7 +4,6 @@ use crate::TryReserveError; use core::iter::FusedIterator; use core::marker::PhantomData; use core::mem; -use core::mem::ManuallyDrop; use core::mem::MaybeUninit; use core::ptr::NonNull; use core::{hint, ptr}; @@ -21,12 +20,21 @@ cfg_if! { if #[cfg(all( target_feature = "sse2", any(target_arch = "x86", target_arch = "x86_64"), - not(miri) + not(miri), ))] { mod sse2; use sse2 as imp; + } else if #[cfg(all( + target_arch = "aarch64", + target_feature = "neon", + // NEON intrinsics are currently broken on big-endian targets. + // See https://github.com/rust-lang/stdarch/issues/1484. + target_endian = "little", + not(miri), + ))] { + mod neon; + use neon as imp; } else { - #[path = "generic.rs"] mod generic; use generic as imp; } @@ -37,36 +45,24 @@ pub(crate) use self::alloc::{do_alloc, Allocator, Global}; mod bitmask; -use self::bitmask::{BitMask, BitMaskIter}; +use self::bitmask::BitMaskIter; use self::imp::Group; // Branch prediction hint. This is currently only available on nightly but it // consistently improves performance by 10-15%. +#[cfg(not(feature = "nightly"))] +use core::convert::identity as likely; +#[cfg(not(feature = "nightly"))] +use core::convert::identity as unlikely; #[cfg(feature = "nightly")] use core::intrinsics::{likely, unlikely}; -// On stable we can use #[cold] to get a equivalent effect: this attributes -// suggests that the function is unlikely to be called -#[cfg(not(feature = "nightly"))] -#[inline] -#[cold] -fn cold() {} - -#[cfg(not(feature = "nightly"))] -#[inline] -fn likely(b: bool) -> bool { - if !b { - cold(); - } - b -} -#[cfg(not(feature = "nightly"))] -#[inline] -fn unlikely(b: bool) -> bool { - if b { - cold(); - } - b +// FIXME: use strict provenance functions once they are stable. +// Implement it with a transmute for now. +#[inline(always)] +#[allow(clippy::useless_transmute)] // clippy is wrong, cast and transmute are different here +fn invalid_mut(addr: usize) -> *mut T { + unsafe { core::mem::transmute(addr) } } #[inline] @@ -101,6 +97,13 @@ impl Fallibility { } } +trait SizedTypeProperties: Sized { + const IS_ZERO_SIZED: bool = mem::size_of::() == 0; + const NEEDS_DROP: bool = mem::needs_drop::(); +} + +impl SizedTypeProperties for T {} + /// Control byte value for an empty bucket. const EMPTY: u8 = 0b1111_1111; @@ -134,6 +137,13 @@ fn h1(hash: u64) -> usize { hash as usize } +// Constant for h2 function that grabing the top 7 bits of the hash. +const MIN_HASH_LEN: usize = if mem::size_of::() < mem::size_of::() { + mem::size_of::() +} else { + mem::size_of::() +}; + /// Secondary hash function, saved in the low 7 bits of the control byte. #[inline] #[allow(clippy::cast_possible_truncation)] @@ -141,8 +151,8 @@ fn h2(hash: u64) -> u8 { // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit // value, some hash functions (such as FxHash) produce a usize result // instead, which means that the top 32 bits are 0 on 32-bit platforms. - let hash_len = usize::min(mem::size_of::(), mem::size_of::()); - let top7 = hash >> (hash_len * 8 - 7); + // So we use MIN_HASH_LEN constant to handle this. + let top7 = hash >> (MIN_HASH_LEN * 8 - 7); (top7 & 0x7f) as u8 // truncation } @@ -230,11 +240,15 @@ struct TableLayout { impl TableLayout { #[inline] - fn new() -> Self { + const fn new() -> Self { let layout = Layout::new::(); Self { size: layout.size(), - ctrl_align: usize::max(layout.align(), Group::WIDTH), + ctrl_align: if layout.align() > Group::WIDTH { + layout.align() + } else { + Group::WIDTH + }, } } @@ -248,6 +262,12 @@ impl TableLayout { size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; + // We need an additional check to ensure that the allocation doesn't + // exceed `isize::MAX` (https://github.com/rust-lang/rust/pull/95295). + if len > isize::MAX as usize - (ctrl_align - 1) { + return None; + } + Some(( unsafe { Layout::from_size_align_unchecked(len, ctrl_align) }, ctrl_offset, @@ -255,14 +275,9 @@ impl TableLayout { } } -/// Returns a Layout which describes the allocation required for a hash table, -/// and the offset of the control bytes in the allocation. -/// (the offset is also one past last element of buckets) -/// -/// Returns `None` if an overflow occurs. -#[cfg_attr(feature = "inline-more", inline)] -fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { - TableLayout::new::().calculate_layout_for(buckets) +/// A reference to an empty bucket into which an can be inserted. +pub struct InsertSlot { + index: usize, } /// A reference to a hash table bucket containing a `T`. @@ -290,11 +305,79 @@ impl Clone for Bucket { } impl Bucket { + /// Creates a [`Bucket`] that contain pointer to the data. + /// The pointer calculation is performed by calculating the + /// offset from given `base` pointer (convenience for + /// `base.as_ptr().sub(index)`). + /// + /// `index` is in units of `T`; e.g., an `index` of 3 represents a pointer + /// offset of `3 * size_of::()` bytes. + /// + /// If the `T` is a ZST, then we instead track the index of the element + /// in the table so that `erase` works properly (return + /// `NonNull::new_unchecked((index + 1) as *mut T)`) + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and the safety + /// rules of [`NonNull::new_unchecked`] function. + /// + /// Thus, in order to uphold the safety contracts for the [`<*mut T>::sub`] method + /// and [`NonNull::new_unchecked`] function, as well as for the correct + /// logic of the work of this crate, the following rules are necessary and + /// sufficient: + /// + /// * the `base` pointer must not be `dangling` and must points to the + /// end of the first `value element` from the `data part` of the table, i.e. + /// must be the pointer that returned by [`RawTable::data_end`] or by + /// [`RawTableInner::data_end`]; + /// + /// * `index` must not be greater than `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` + /// must be no greater than the number returned by the function + /// [`RawTable::buckets`] or [`RawTableInner::buckets`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the + /// `index` must not be greater than `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` + /// must be no greater than the number returned by the function + /// [`RawTable::buckets`] or [`RawTableInner::buckets`]. + /// + /// [`Bucket`]: crate::raw::Bucket + /// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1 + /// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked + /// [`RawTable::data_end`]: crate::raw::RawTable::data_end + /// [`RawTableInner::data_end`]: RawTableInner::data_end + /// [`RawTable::buckets`]: crate::raw::RawTable::buckets + /// [`RawTableInner::buckets`]: RawTableInner::buckets #[inline] unsafe fn from_base_index(base: NonNull, index: usize) -> Self { - let ptr = if mem::size_of::() == 0 { - // won't overflow because index must be less than length - (index + 1) as *mut T + // If mem::size_of::() != 0 then return a pointer to an `element` in + // the data part of the table (we start counting from "0", so that + // in the expression T[last], the "last" index actually one less than the + // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask"): + // + // `from_base_index(base, 1).as_ptr()` returns a pointer that + // points here in the data part of the table + // (to the start of T1) + // | + // | `base: NonNull` must point here + // | (to the end of T0 or to the start of C0) + // v v + // [Padding], Tlast, ..., |T1|, T0, |C0, C1, ..., Clast + // ^ + // `from_base_index(base, 1)` returns a pointer + // that points here in the data part of the table + // (to the end of T1) + // + // where: T0...Tlast - our stored data; C0...Clast - control bytes + // or metadata for data. + let ptr = if T::IS_ZERO_SIZED { + // won't overflow because index must be less than length (bucket_mask) + // and bucket_mask is guaranteed to be less than `isize::MAX` + // (see TableLayout::calculate_layout_for method) + invalid_mut(index + 1) } else { base.as_ptr().sub(index) }; @@ -302,27 +385,183 @@ impl Bucket { ptr: NonNull::new_unchecked(ptr), } } + + /// Calculates the index of a [`Bucket`] as distance between two pointers + /// (convenience for `base.as_ptr().offset_from(self.ptr.as_ptr()) as usize`). + /// The returned value is in units of T: the distance in bytes divided by + /// [`core::mem::size_of::()`]. + /// + /// If the `T` is a ZST, then we return the index of the element in + /// the table so that `erase` works properly (return `self.ptr.as_ptr() as usize - 1`). + /// + /// This function is the inverse of [`from_base_index`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*const T>::offset_from`] method of `*const T`. + /// + /// Thus, in order to uphold the safety contracts for [`<*const T>::offset_from`] + /// method, as well as for the correct logic of the work of this crate, the + /// following rules are necessary and sufficient: + /// + /// * `base` contained pointer must not be `dangling` and must point to the + /// end of the first `element` from the `data part` of the table, i.e. + /// must be a pointer that returns by [`RawTable::data_end`] or by + /// [`RawTableInner::data_end`]; + /// + /// * `self` also must not contain dangling pointer; + /// + /// * both `self` and `base` must be created from the same [`RawTable`] + /// (or [`RawTableInner`]). + /// + /// If `mem::size_of::() == 0`, this function is always safe. + /// + /// [`Bucket`]: crate::raw::Bucket + /// [`from_base_index`]: crate::raw::Bucket::from_base_index + /// [`RawTable::data_end`]: crate::raw::RawTable::data_end + /// [`RawTableInner::data_end`]: RawTableInner::data_end + /// [`RawTable`]: crate::raw::RawTable + /// [`RawTableInner`]: RawTableInner + /// [`<*const T>::offset_from`]: https://doc.rust-lang.org/nightly/core/primitive.pointer.html#method.offset_from #[inline] unsafe fn to_base_index(&self, base: NonNull) -> usize { - if mem::size_of::() == 0 { + // If mem::size_of::() != 0 then return an index under which we used to store the + // `element` in the data part of the table (we start counting from "0", so + // that in the expression T[last], the "last" index actually is one less than the + // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask"). + // For example for 5th element in table calculation is performed like this: + // + // mem::size_of::() + // | + // | `self = from_base_index(base, 5)` that returns pointer + // | that points here in tha data part of the table + // | (to the end of T5) + // | | `base: NonNull` must point here + // v | (to the end of T0 or to the start of C0) + // /???\ v v + // [Padding], Tlast, ..., |T10|, ..., T5|, T4, T3, T2, T1, T0, |C0, C1, C2, C3, C4, C5, ..., C10, ..., Clast + // \__________ __________/ + // \/ + // `bucket.to_base_index(base)` = 5 + // (base.as_ptr() as usize - self.ptr.as_ptr() as usize) / mem::size_of::() + // + // where: T0...Tlast - our stored data; C0...Clast - control bytes or metadata for data. + if T::IS_ZERO_SIZED { + // this can not be UB self.ptr.as_ptr() as usize - 1 } else { offset_from(base.as_ptr(), self.ptr.as_ptr()) } } + + /// Acquires the underlying raw pointer `*mut T` to `data`. + /// + /// # Note + /// + /// If `T` is not [`Copy`], do not use `*mut T` methods that can cause calling the + /// destructor of `T` (for example the [`<*mut T>::drop_in_place`] method), because + /// for properly dropping the data we also need to clear `data` control bytes. If we + /// drop data, but do not clear `data control byte` it leads to double drop when + /// [`RawTable`] goes out of scope. + /// + /// If you modify an already initialized `value`, so [`Hash`] and [`Eq`] on the new + /// `T` value and its borrowed form *must* match those for the old `T` value, as the map + /// will not re-evaluate where the new value should go, meaning the value may become + /// "lost" if their location does not reflect their state. + /// + /// [`RawTable`]: crate::raw::RawTable + /// [`<*mut T>::drop_in_place`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.drop_in_place + /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html + /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "raw")] + /// # fn test() { + /// use core::hash::{BuildHasher, Hash}; + /// use hashbrown::raw::{Bucket, RawTable}; + /// + /// type NewHashBuilder = core::hash::BuildHasherDefault; + /// + /// fn make_hash(hash_builder: &S, key: &K) -> u64 { + /// use core::hash::Hasher; + /// let mut state = hash_builder.build_hasher(); + /// key.hash(&mut state); + /// state.finish() + /// } + /// + /// let hash_builder = NewHashBuilder::default(); + /// let mut table = RawTable::new(); + /// + /// let value = ("a", 100); + /// let hash = make_hash(&hash_builder, &value.0); + /// + /// table.insert(hash, value.clone(), |val| make_hash(&hash_builder, &val.0)); + /// + /// let bucket: Bucket<(&str, i32)> = table.find(hash, |(k1, _)| k1 == &value.0).unwrap(); + /// + /// assert_eq!(unsafe { &*bucket.as_ptr() }, &("a", 100)); + /// # } + /// # fn main() { + /// # #[cfg(feature = "raw")] + /// # test() + /// # } + /// ``` #[inline] pub fn as_ptr(&self) -> *mut T { - if mem::size_of::() == 0 { + if T::IS_ZERO_SIZED { // Just return an arbitrary ZST pointer which is properly aligned - mem::align_of::() as *mut T + // invalid pointer is good enough for ZST + invalid_mut(mem::align_of::()) } else { unsafe { self.ptr.as_ptr().sub(1) } } } + + /// Create a new [`Bucket`] that is offset from the `self` by the given + /// `offset`. The pointer calculation is performed by calculating the + /// offset from `self` pointer (convenience for `self.ptr.as_ptr().sub(offset)`). + /// This function is used for iterators. + /// + /// `offset` is in units of `T`; e.g., a `offset` of 3 represents a pointer + /// offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and safety + /// rules of [`NonNull::new_unchecked`] function. + /// + /// Thus, in order to uphold the safety contracts for [`<*mut T>::sub`] method + /// and [`NonNull::new_unchecked`] function, as well as for the correct + /// logic of the work of this crate, the following rules are necessary and + /// sufficient: + /// + /// * `self` contained pointer must not be `dangling`; + /// + /// * `self.to_base_index() + ofset` must not be greater than `RawTableInner.bucket_mask`, + /// i.e. `(self.to_base_index() + ofset) <= RawTableInner.bucket_mask` or, in other + /// words, `self.to_base_index() + ofset + 1` must be no greater than the number returned + /// by the function [`RawTable::buckets`] or [`RawTableInner::buckets`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the + /// `self.to_base_index() + ofset` must not be greater than `RawTableInner.bucket_mask`, + /// i.e. `(self.to_base_index() + ofset) <= RawTableInner.bucket_mask` or, in other words, + /// `self.to_base_index() + ofset + 1` must be no greater than the number returned by the + /// function [`RawTable::buckets`] or [`RawTableInner::buckets`]. + /// + /// [`Bucket`]: crate::raw::Bucket + /// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1 + /// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked + /// [`RawTable::buckets`]: crate::raw::RawTable::buckets + /// [`RawTableInner::buckets`]: RawTableInner::buckets #[inline] unsafe fn next_n(&self, offset: usize) -> Self { - let ptr = if mem::size_of::() == 0 { - (self.ptr.as_ptr() as usize + offset) as *mut T + let ptr = if T::IS_ZERO_SIZED { + // invalid pointer is good enough for ZST + invalid_mut(self.ptr.as_ptr() as usize + offset) } else { self.ptr.as_ptr().sub(offset) }; @@ -330,26 +569,212 @@ impl Bucket { ptr: NonNull::new_unchecked(ptr), } } + + /// Executes the destructor (if any) of the pointed-to `data`. + /// + /// # Safety + /// + /// See [`ptr::drop_in_place`] for safety concerns. + /// + /// You should use [`RawTable::erase`] instead of this function, + /// or be careful with calling this function directly, because for + /// properly dropping the data we need also clear `data` control bytes. + /// If we drop data, but do not erase `data control byte` it leads to + /// double drop when [`RawTable`] goes out of scope. + /// + /// [`ptr::drop_in_place`]: https://doc.rust-lang.org/core/ptr/fn.drop_in_place.html + /// [`RawTable`]: crate::raw::RawTable + /// [`RawTable::erase`]: crate::raw::RawTable::erase #[cfg_attr(feature = "inline-more", inline)] - pub unsafe fn drop(&self) { + pub(crate) unsafe fn drop(&self) { self.as_ptr().drop_in_place(); } + + /// Reads the `value` from `self` without moving it. This leaves the + /// memory in `self` unchanged. + /// + /// # Safety + /// + /// See [`ptr::read`] for safety concerns. + /// + /// You should use [`RawTable::remove`] instead of this function, + /// or be careful with calling this function directly, because compiler + /// calls its destructor when readed `value` goes out of scope. It + /// can cause double dropping when [`RawTable`] goes out of scope, + /// because of not erased `data control byte`. + /// + /// [`ptr::read`]: https://doc.rust-lang.org/core/ptr/fn.read.html + /// [`RawTable`]: crate::raw::RawTable + /// [`RawTable::remove`]: crate::raw::RawTable::remove #[inline] - pub unsafe fn read(&self) -> T { + pub(crate) unsafe fn read(&self) -> T { self.as_ptr().read() } + + /// Overwrites a memory location with the given `value` without reading + /// or dropping the old value (like [`ptr::write`] function). + /// + /// # Safety + /// + /// See [`ptr::write`] for safety concerns. + /// + /// # Note + /// + /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match + /// those for the old `T` value, as the map will not re-evaluate where the new + /// value should go, meaning the value may become "lost" if their location + /// does not reflect their state. + /// + /// [`ptr::write`]: https://doc.rust-lang.org/core/ptr/fn.write.html + /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html + /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html #[inline] - pub unsafe fn write(&self, val: T) { + pub(crate) unsafe fn write(&self, val: T) { self.as_ptr().write(val); } + + /// Returns a shared immutable reference to the `value`. + /// + /// # Safety + /// + /// See [`NonNull::as_ref`] for safety concerns. + /// + /// [`NonNull::as_ref`]: https://doc.rust-lang.org/core/ptr/struct.NonNull.html#method.as_ref + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "raw")] + /// # fn test() { + /// use core::hash::{BuildHasher, Hash}; + /// use hashbrown::raw::{Bucket, RawTable}; + /// + /// type NewHashBuilder = core::hash::BuildHasherDefault; + /// + /// fn make_hash(hash_builder: &S, key: &K) -> u64 { + /// use core::hash::Hasher; + /// let mut state = hash_builder.build_hasher(); + /// key.hash(&mut state); + /// state.finish() + /// } + /// + /// let hash_builder = NewHashBuilder::default(); + /// let mut table = RawTable::new(); + /// + /// let value: (&str, String) = ("A pony", "is a small horse".to_owned()); + /// let hash = make_hash(&hash_builder, &value.0); + /// + /// table.insert(hash, value.clone(), |val| make_hash(&hash_builder, &val.0)); + /// + /// let bucket: Bucket<(&str, String)> = table.find(hash, |(k, _)| k == &value.0).unwrap(); + /// + /// assert_eq!( + /// unsafe { bucket.as_ref() }, + /// &("A pony", "is a small horse".to_owned()) + /// ); + /// # } + /// # fn main() { + /// # #[cfg(feature = "raw")] + /// # test() + /// # } + /// ``` #[inline] pub unsafe fn as_ref<'a>(&self) -> &'a T { &*self.as_ptr() } + + /// Returns a unique mutable reference to the `value`. + /// + /// # Safety + /// + /// See [`NonNull::as_mut`] for safety concerns. + /// + /// # Note + /// + /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match + /// those for the old `T` value, as the map will not re-evaluate where the new + /// value should go, meaning the value may become "lost" if their location + /// does not reflect their state. + /// + /// [`NonNull::as_mut`]: https://doc.rust-lang.org/core/ptr/struct.NonNull.html#method.as_mut + /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html + /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "raw")] + /// # fn test() { + /// use core::hash::{BuildHasher, Hash}; + /// use hashbrown::raw::{Bucket, RawTable}; + /// + /// type NewHashBuilder = core::hash::BuildHasherDefault; + /// + /// fn make_hash(hash_builder: &S, key: &K) -> u64 { + /// use core::hash::Hasher; + /// let mut state = hash_builder.build_hasher(); + /// key.hash(&mut state); + /// state.finish() + /// } + /// + /// let hash_builder = NewHashBuilder::default(); + /// let mut table = RawTable::new(); + /// + /// let value: (&str, String) = ("A pony", "is a small horse".to_owned()); + /// let hash = make_hash(&hash_builder, &value.0); + /// + /// table.insert(hash, value.clone(), |val| make_hash(&hash_builder, &val.0)); + /// + /// let bucket: Bucket<(&str, String)> = table.find(hash, |(k, _)| k == &value.0).unwrap(); + /// + /// unsafe { + /// bucket + /// .as_mut() + /// .1 + /// .push_str(" less than 147 cm at the withers") + /// }; + /// assert_eq!( + /// unsafe { bucket.as_ref() }, + /// &( + /// "A pony", + /// "is a small horse less than 147 cm at the withers".to_owned() + /// ) + /// ); + /// # } + /// # fn main() { + /// # #[cfg(feature = "raw")] + /// # test() + /// # } + /// ``` #[inline] pub unsafe fn as_mut<'a>(&self) -> &'a mut T { &mut *self.as_ptr() } + + /// Copies `size_of` bytes from `other` to `self`. The source + /// and destination may *not* overlap. + /// + /// # Safety + /// + /// See [`ptr::copy_nonoverlapping`] for safety concerns. + /// + /// Like [`read`], `copy_nonoverlapping` creates a bitwise copy of `T`, regardless of + /// whether `T` is [`Copy`]. If `T` is not [`Copy`], using *both* the values + /// in the region beginning at `*self` and the region beginning at `*other` can + /// [violate memory safety]. + /// + /// # Note + /// + /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match + /// those for the old `T` value, as the map will not re-evaluate where the new + /// value should go, meaning the value may become "lost" if their location + /// does not reflect their state. + /// + /// [`ptr::copy_nonoverlapping`]: https://doc.rust-lang.org/core/ptr/fn.copy_nonoverlapping.html + /// [`read`]: https://doc.rust-lang.org/core/ptr/fn.read.html + /// [violate memory safety]: https://doc.rust-lang.org/std/ptr/fn.read.html#ownership-of-the-returned-value + /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html + /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html #[cfg(feature = "raw")] #[inline] pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) { @@ -358,15 +783,16 @@ impl Bucket { } /// A raw hash table with an unsafe API. -pub struct RawTable { - table: RawTableInner, +pub struct RawTable { + table: RawTableInner, + alloc: A, // Tell dropck that we own instances of T. marker: PhantomData, } /// Non-generic part of `RawTable` which allows functions to be instantiated only once regardless /// of how many different key-value types are used. -struct RawTableInner { +struct RawTableInner { // Mask to get an index from a hash value. The value is one less than the // number of buckets in the table. bucket_mask: usize, @@ -380,8 +806,6 @@ struct RawTableInner { // Number of elements in the table, only really used by len() items: usize, - - alloc: A, } impl RawTable { @@ -393,7 +817,8 @@ impl RawTable { #[inline] pub const fn new() -> Self { Self { - table: RawTableInner::new_in(Global), + table: RawTableInner::NEW, + alloc: Global, marker: PhantomData, } } @@ -412,7 +837,9 @@ impl RawTable { } } -impl RawTable { +impl RawTable { + const TABLE_LAYOUT: TableLayout = TableLayout::new::(); + /// Creates a new empty hash table without allocating any memory, using the /// given allocator. /// @@ -420,9 +847,10 @@ impl RawTable { /// leave the data pointer dangling since that bucket is never written to /// due to our load factor forcing us to always have at least 1 free bucket. #[inline] - pub fn new_in(alloc: A) -> Self { + pub const fn new_in(alloc: A) -> Self { Self { - table: RawTableInner::new_in(alloc), + table: RawTableInner::NEW, + alloc, marker: PhantomData, } } @@ -440,73 +868,99 @@ impl RawTable { Ok(Self { table: RawTableInner::new_uninitialized( - alloc, - TableLayout::new::(), + &alloc, + Self::TABLE_LAYOUT, buckets, fallibility, )?, + alloc, marker: PhantomData, }) } - /// Attempts to allocate a new hash table with at least enough capacity - /// for inserting the given number of elements without reallocating. - fn fallible_with_capacity( - alloc: A, - capacity: usize, - fallibility: Fallibility, - ) -> Result { + /// Attempts to allocate a new hash table using the given allocator, with at least enough + /// capacity for inserting the given number of elements without reallocating. + #[cfg(feature = "raw")] + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { Ok(Self { table: RawTableInner::fallible_with_capacity( - alloc, - TableLayout::new::(), + &alloc, + Self::TABLE_LAYOUT, capacity, - fallibility, + Fallibility::Fallible, )?, + alloc, marker: PhantomData, }) } - /// Attempts to allocate a new hash table using the given allocator, with at least enough - /// capacity for inserting the given number of elements without reallocating. - #[cfg(feature = "raw")] - pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { - Self::fallible_with_capacity(alloc, capacity, Fallibility::Fallible) - } - /// Allocates a new hash table using the given allocator, with at least enough capacity for /// inserting the given number of elements without reallocating. pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { - // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. - match Self::fallible_with_capacity(alloc, capacity, Fallibility::Infallible) { - Ok(capacity) => capacity, - Err(_) => unsafe { hint::unreachable_unchecked() }, + Self { + table: RawTableInner::with_capacity(&alloc, Self::TABLE_LAYOUT, capacity), + alloc, + marker: PhantomData, } } /// Returns a reference to the underlying allocator. #[inline] pub fn allocator(&self) -> &A { - &self.table.alloc + &self.alloc } - /// Deallocates the table without dropping any entries. - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn free_buckets(&mut self) { - self.table.free_buckets(TableLayout::new::()); + /// Returns pointer to one past last `data` element in the the table as viewed from + /// the start point of the allocation. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + pub fn data_end(&self) -> NonNull { + // SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe + // + // `self.table.ctrl.as_ptr().cast()` returns pointer that + // points here (to the end of `T0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTable::buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) } } - /// Returns pointer to one past last element of data table. + /// Returns pointer to start of data table. #[inline] - pub unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) + #[cfg(any(feature = "raw", feature = "nightly"))] + pub unsafe fn data_start(&self) -> NonNull { + NonNull::new_unchecked(self.data_end().as_ptr().wrapping_sub(self.buckets())) } - /// Returns pointer to start of data table. + /// Return the information about memory allocated by the table. + /// + /// `RawTable` allocates single memory block to store both data and metadata. + /// This function returns allocation size and alignment and the beginning of the area. + /// These are the arguments which will be passed to `dealloc` when the table is dropped. + /// + /// This function might be useful for memory profiling. #[inline] - #[cfg(feature = "nightly")] - pub unsafe fn data_start(&self) -> *mut T { - self.data_end().as_ptr().wrapping_sub(self.buckets()) + #[cfg(feature = "raw")] + pub fn allocation_info(&self) -> (NonNull, Layout) { + // SAFETY: We use the same `table_layout` that was used to allocate + // this table. + unsafe { self.table.allocation_info_or_zero(Self::TABLE_LAYOUT) } } /// Returns the index of a bucket from a `Bucket`. @@ -516,8 +970,55 @@ impl RawTable { } /// Returns a pointer to an element in the table. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the caller of this function must observe the + /// following safety rules: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTable::buckets`] + /// function, i.e. `(index + 1) <= self.buckets()`. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e. + /// `(index + 1) <= self.buckets()`. + /// + /// [`RawTable::buckets`]: RawTable::buckets + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] pub unsafe fn bucket(&self, index: usize) -> Bucket { + // If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + // the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"): + // + // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`) + // | + // | `base = self.data_end()` points here + // | (to the start of CT0 or to the end of T0) + // v v + // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + // ^ \__________ __________/ + // `table.bucket(3)` returns a pointer that points \/ + // here in the `data` part of the `RawTable` (to additional control bytes + // the end of T3) `m = Group::WIDTH - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`; + // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask` + // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`. debug_assert_ne!(self.table.bucket_mask, 0); debug_assert!(index < self.buckets()); Bucket::from_base_index(self.data_end(), index) @@ -525,8 +1026,7 @@ impl RawTable { /// Erases an element from the table without dropping it. #[cfg_attr(feature = "inline-more", inline)] - #[deprecated(since = "0.8.1", note = "use erase or remove instead")] - pub unsafe fn erase_no_drop(&mut self, item: &Bucket) { + unsafe fn erase_no_drop(&mut self, item: &Bucket) { let index = self.bucket_index(item); self.table.erase(index); } @@ -534,7 +1034,6 @@ impl RawTable { /// Erases an element from the table, dropping it in place. #[cfg_attr(feature = "inline-more", inline)] #[allow(clippy::needless_pass_by_value)] - #[allow(deprecated)] pub unsafe fn erase(&mut self, item: Bucket) { // Erase the element from the table first since drop might panic. self.erase_no_drop(&item); @@ -558,12 +1057,18 @@ impl RawTable { } /// Removes an element from the table, returning it. + /// + /// This also returns an `InsertSlot` pointing to the newly free bucket. #[cfg_attr(feature = "inline-more", inline)] #[allow(clippy::needless_pass_by_value)] - #[allow(deprecated)] - pub unsafe fn remove(&mut self, item: Bucket) -> T { + pub unsafe fn remove(&mut self, item: Bucket) -> (T, InsertSlot) { self.erase_no_drop(&item); - item.read() + ( + item.read(), + InsertSlot { + index: self.bucket_index(&item), + }, + ) } /// Finds and removes an element from the table, returning it. @@ -571,7 +1076,7 @@ impl RawTable { pub fn remove_entry(&mut self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option { // Avoid `Option::map` because it bloats LLVM IR. match self.find(hash, eq) { - Some(bucket) => Some(unsafe { self.remove(bucket) }), + Some(bucket) => Some(unsafe { self.remove(bucket).0 }), None => None, } } @@ -585,18 +1090,17 @@ impl RawTable { /// Removes all elements from the table without freeing the backing memory. #[cfg_attr(feature = "inline-more", inline)] pub fn clear(&mut self) { + if self.is_empty() { + // Special case empty table to avoid surprising O(capacity) time. + return; + } // Ensure that the table is reset even if one of the drops panic let mut self_ = guard(self, |self_| self_.clear_no_drop()); unsafe { - self_.drop_elements(); - } - } - - unsafe fn drop_elements(&mut self) { - if mem::needs_drop::() && !self.is_empty() { - for item in self.iter() { - item.drop(); - } + // SAFETY: ScopeGuard sets to zero the `items` field of the table + // even in case of panic during the dropping of the elements so + // that there will be no double drop of the elements. + self_.table.drop_elements::(); } } @@ -607,7 +1111,16 @@ impl RawTable { // space for. let min_size = usize::max(self.table.items, min_size); if min_size == 0 { - *self = Self::new_in(self.table.alloc.clone()); + let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } return; } @@ -624,14 +1137,33 @@ impl RawTable { if min_buckets < self.buckets() { // Fast path if the table is empty if self.table.items == 0 { - *self = Self::with_capacity_in(min_size, self.table.alloc.clone()); + let new_inner = + RawTableInner::with_capacity(&self.alloc, Self::TABLE_LAYOUT, min_size); + let mut old_inner = mem::replace(&mut self.table, new_inner); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } } else { // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. - if self - .resize(min_size, hasher, Fallibility::Infallible) - .is_err() - { - unsafe { hint::unreachable_unchecked() } + unsafe { + // SAFETY: + // 1. We know for sure that `min_size >= self.table.items`. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose RawTable::new_uninitialized in a public API. + if self + .resize(min_size, hasher, Fallibility::Infallible) + .is_err() + { + // SAFETY: The result of calling the `resize` function cannot be an error + // because `fallibility == Fallibility::Infallible. + hint::unreachable_unchecked() + } } } } @@ -641,13 +1173,18 @@ impl RawTable { /// without reallocation. #[cfg_attr(feature = "inline-more", inline)] pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) { - if additional > self.table.growth_left { + if unlikely(additional > self.table.growth_left) { // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. - if self - .reserve_rehash(additional, hasher, Fallibility::Infallible) - .is_err() - { - unsafe { hint::unreachable_unchecked() } + unsafe { + // SAFETY: The [`RawTableInner`] must already have properly initialized control + // bytes since we will never expose RawTable::new_uninitialized in a public API. + if self + .reserve_rehash(additional, hasher, Fallibility::Infallible) + .is_err() + { + // SAFETY: All allocation errors will be caught inside `RawTableInner::reserve_rehash`. + hint::unreachable_unchecked() + } } } } @@ -661,28 +1198,45 @@ impl RawTable { hasher: impl Fn(&T) -> u64, ) -> Result<(), TryReserveError> { if additional > self.table.growth_left { - self.reserve_rehash(additional, hasher, Fallibility::Fallible) + // SAFETY: The [`RawTableInner`] must already have properly initialized control + // bytes since we will never expose RawTable::new_uninitialized in a public API. + unsafe { self.reserve_rehash(additional, hasher, Fallibility::Fallible) } } else { Ok(()) } } /// Out-of-line slow path for `reserve` and `try_reserve`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes, + /// otherwise calling this function results in [`undefined behavior`] + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[cold] #[inline(never)] - fn reserve_rehash( + unsafe fn reserve_rehash( &mut self, additional: usize, hasher: impl Fn(&T) -> u64, fallibility: Fallibility, ) -> Result<(), TryReserveError> { unsafe { + // SAFETY: + // 1. We know for sure that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 2. The `drop` function is the actual drop function of the elements stored in + // the table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. self.table.reserve_rehash_inner( + &self.alloc, additional, &|table, index| hasher(table.bucket::(index).as_ref()), fallibility, - TableLayout::new::(), - if mem::needs_drop::() { + Self::TABLE_LAYOUT, + if T::NEEDS_DROP { Some(mem::transmute(ptr::drop_in_place:: as unsafe fn(*mut T))) } else { None @@ -693,20 +1247,50 @@ impl RawTable { /// Allocates a new table of a different size and moves the contents of the /// current table into it. - fn resize( + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes, + /// otherwise calling this function results in [`undefined behavior`] + /// + /// The caller of this function must ensure that `capacity >= self.table.items` + /// otherwise: + /// + /// * If `self.table.items != 0`, calling of this function with `capacity` + /// equal to 0 (`capacity == 0`) results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and + /// `self.table.items > capacity_to_buckets(capacity)` + /// calling this function results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and + /// `self.table.items > capacity_to_buckets(capacity)` + /// calling this function are never return (will go into an + /// infinite loop). + /// + /// See [`RawTableInner::find_insert_slot`] for more information. + /// + /// [`RawTableInner::find_insert_slot`]: RawTableInner::find_insert_slot + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn resize( &mut self, capacity: usize, hasher: impl Fn(&T) -> u64, fallibility: Fallibility, ) -> Result<(), TryReserveError> { - unsafe { - self.table.resize_inner( - capacity, - &|table, index| hasher(table.bucket::(index).as_ref()), - fallibility, - TableLayout::new::(), - ) - } + // SAFETY: + // 1. The caller of this function guarantees that `capacity >= self.table.items`. + // 2. We know for sure that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + self.table.resize_inner( + &self.alloc, + capacity, + &|table, index| hasher(table.bucket::(index).as_ref()), + fallibility, + Self::TABLE_LAYOUT, + ) } /// Inserts a new element into the table, and returns its raw bucket. @@ -715,22 +1299,27 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket { unsafe { - let mut index = self.table.find_insert_slot(hash); + // SAFETY: + // 1. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose `RawTable::new_uninitialized` in a public API. + // + // 2. We reserve additional space (if necessary) right after calling this function. + let mut slot = self.table.find_insert_slot(hash); - // We can avoid growing the table once we have reached our load - // factor if we are replacing a tombstone. This works since the - // number of EMPTY slots does not change in this case. - let old_ctrl = *self.table.ctrl(index); + // We can avoid growing the table once we have reached our load factor if we are replacing + // a tombstone. This works since the number of EMPTY slots does not change in this case. + // + // SAFETY: The function is guaranteed to return [`InsertSlot`] that contains an index + // in the range `0..=self.buckets()`. + let old_ctrl = *self.table.ctrl(slot.index); if unlikely(self.table.growth_left == 0 && special_is_empty(old_ctrl)) { self.reserve(1, hasher); - index = self.table.find_insert_slot(hash); + // SAFETY: We know for sure that `RawTableInner` has control bytes + // initialized and that there is extra space in the table. + slot = self.table.find_insert_slot(hash); } - self.table.record_item_insert_at(index, old_ctrl, hash); - - let bucket = self.bucket(index); - bucket.write(value); - bucket + self.insert_in_slot(hash, slot, value) } } @@ -796,9 +1385,9 @@ impl RawTable { { let index = self.bucket_index(&bucket); let old_ctrl = *self.table.ctrl(index); - debug_assert!(is_full(old_ctrl)); + debug_assert!(self.is_bucket_full(index)); let old_growth_left = self.table.growth_left; - let item = self.remove(bucket); + let item = self.remove(bucket).0; if let Some(new_item) = f(item) { self.table.growth_left = old_growth_left; self.table.set_ctrl(index, old_ctrl); @@ -810,28 +1399,89 @@ impl RawTable { } } - /// Searches for an element in the table. + /// Searches for an element in the table. If the element is not found, + /// returns `Err` with the position of a slot where an element with the + /// same hash could be inserted. + /// + /// This function may resize the table if additional space is required for + /// inserting an element. #[inline] - pub fn find(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> Option> { - let result = self.table.find_inner(hash, &mut |index| unsafe { - eq(self.bucket(index).as_ref()) - }); + pub fn find_or_find_insert_slot( + &mut self, + hash: u64, + mut eq: impl FnMut(&T) -> bool, + hasher: impl Fn(&T) -> u64, + ) -> Result, InsertSlot> { + self.reserve(1, hasher); - // Avoid `Option::map` because it bloats LLVM IR. - match result { - Some(index) => Some(unsafe { self.bucket(index) }), - None => None, + unsafe { + // SAFETY: + // 1. We know for sure that there is at least one empty `bucket` in the table. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since we will + // never expose `RawTable::new_uninitialized` in a public API. + // 3. The `find_or_find_insert_slot_inner` function returns the `index` of only the full bucket, + // which is in the range `0..self.buckets()` (since there is at least one empty `bucket` in + // the table), so calling `self.bucket(index)` and `Bucket::as_ref` is safe. + match self + .table + .find_or_find_insert_slot_inner(hash, &mut |index| eq(self.bucket(index).as_ref())) + { + // SAFETY: See explanation above. + Ok(index) => Ok(self.bucket(index)), + Err(slot) => Err(slot), + } } } - /// Gets a reference to an element in the table. + /// Inserts a new element into the table in the given slot, and returns its + /// raw bucket. + /// + /// # Safety + /// + /// `slot` must point to a slot previously returned by + /// `find_or_find_insert_slot`, and no mutation of the table must have + /// occurred since that call. #[inline] - pub fn get(&self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option<&T> { - // Avoid `Option::map` because it bloats LLVM IR. - match self.find(hash, eq) { - Some(bucket) => Some(unsafe { bucket.as_ref() }), - None => None, - } + pub unsafe fn insert_in_slot(&mut self, hash: u64, slot: InsertSlot, value: T) -> Bucket { + let old_ctrl = *self.table.ctrl(slot.index); + self.table.record_item_insert_at(slot.index, old_ctrl, hash); + + let bucket = self.bucket(slot.index); + bucket.write(value); + bucket + } + + /// Searches for an element in the table. + #[inline] + pub fn find(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> Option> { + unsafe { + // SAFETY: + // 1. The [`RawTableInner`] must already have properly initialized control bytes since we + // will never expose `RawTable::new_uninitialized` in a public API. + // 1. The `find_inner` function returns the `index` of only the full bucket, which is in + // the range `0..self.buckets()`, so calling `self.bucket(index)` and `Bucket::as_ref` + // is safe. + let result = self + .table + .find_inner(hash, &mut |index| eq(self.bucket(index).as_ref())); + + // Avoid `Option::map` because it bloats LLVM IR. + match result { + // SAFETY: See explanation above. + Some(index) => Some(self.bucket(index)), + None => None, + } + } + } + + /// Gets a reference to an element in the table. + #[inline] + pub fn get(&self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option<&T> { + // Avoid `Option::map` because it bloats LLVM IR. + match self.find(hash, eq) { + Some(bucket) => Some(unsafe { bucket.as_ref() }), + None => None, + } } /// Gets a mutable reference to an element in the table. @@ -928,17 +1578,27 @@ impl RawTable { self.table.bucket_mask + 1 } + /// Checks whether the bucket at `index` is full. + /// + /// # Safety + /// + /// The caller must ensure `index` is less than the number of buckets. + #[inline] + pub unsafe fn is_bucket_full(&self, index: usize) -> bool { + self.table.is_bucket_full(index) + } + /// Returns an iterator over every element in the table. It is up to /// the caller to ensure that the `RawTable` outlives the `RawIter`. /// Because we cannot make the `next` method unsafe on the `RawIter` /// struct, we have to make the `iter` method unsafe. #[inline] pub unsafe fn iter(&self) -> RawIter { - let data = Bucket::from_base_index(self.data_end(), 0); - RawIter { - iter: RawIterRange::new(self.table.ctrl.as_ptr(), data, self.table.buckets()), - items: self.table.items, - } + // SAFETY: + // 1. The caller must uphold the safety contract for `iter` method. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose RawTable::new_uninitialized in a public API. + self.table.iter() } /// Returns an iterator over occupied buckets that could match a given hash. @@ -952,7 +1612,7 @@ impl RawTable { /// `RawIterHash` struct, we have to make the `iter_hash` method unsafe. #[cfg_attr(feature = "inline-more", inline)] #[cfg(feature = "raw")] - pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<'_, T, A> { + pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash { RawIterHash::new(self, hash) } @@ -978,8 +1638,8 @@ impl RawTable { debug_assert_eq!(iter.len(), self.len()); RawDrain { iter, - table: ManuallyDrop::new(mem::replace(self, Self::new_in(self.table.alloc.clone()))), - orig_table: NonNull::from(self), + table: mem::replace(&mut self.table, RawTableInner::NEW), + orig_table: NonNull::from(&mut self.table), marker: PhantomData, } } @@ -993,31 +1653,31 @@ impl RawTable { pub unsafe fn into_iter_from(self, iter: RawIter) -> RawIntoIter { debug_assert_eq!(iter.len(), self.len()); - let alloc = self.table.alloc.clone(); let allocation = self.into_allocation(); RawIntoIter { iter, allocation, marker: PhantomData, - alloc, } } /// Converts the table into a raw allocation. The contents of the table /// should be dropped using a `RawIter` before freeing the allocation. #[cfg_attr(feature = "inline-more", inline)] - pub(crate) fn into_allocation(self) -> Option<(NonNull, Layout)> { + pub(crate) fn into_allocation(self) -> Option<(NonNull, Layout, A)> { let alloc = if self.table.is_empty_singleton() { None } else { // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(self.table.buckets()) { - Some(lco) => lco, - None => unsafe { hint::unreachable_unchecked() }, - }; + let (layout, ctrl_offset) = + match Self::TABLE_LAYOUT.calculate_layout_for(self.table.buckets()) { + Some(lco) => lco, + None => unsafe { hint::unreachable_unchecked() }, + }; Some(( unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset)) }, layout, + unsafe { ptr::read(&self.alloc) }, )) }; mem::forget(self); @@ -1025,41 +1685,62 @@ impl RawTable { } } -unsafe impl Send for RawTable +unsafe impl Send for RawTable where T: Send, A: Send, { } -unsafe impl Sync for RawTable +unsafe impl Sync for RawTable where T: Sync, A: Sync, { } -impl RawTableInner { +impl RawTableInner { + const NEW: Self = RawTableInner::new(); + + /// Creates a new empty hash table without allocating any memory. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never accessed + /// due to our load factor forcing us to always have at least 1 free bucket. #[inline] - const fn new_in(alloc: A) -> Self { + const fn new() -> Self { Self { // Be careful to cast the entire slice to a raw pointer. ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) }, bucket_mask: 0, items: 0, growth_left: 0, - alloc, } } } -impl RawTableInner { +impl RawTableInner { + /// Allocates a new [`RawTableInner`] with the given number of buckets. + /// The control bytes and buckets are left uninitialized. + /// + /// # Safety + /// + /// The caller of this function must ensure that the `buckets` is power of two + /// and also initialize all control bytes of the length `self.bucket_mask + 1 + + /// Group::WIDTH` with the [`EMPTY`] bytes. + /// + /// See also [`Allocator`] API for other safety concerns. + /// + /// [`Allocator`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html #[cfg_attr(feature = "inline-more", inline)] - unsafe fn new_uninitialized( - alloc: A, + unsafe fn new_uninitialized( + alloc: &A, table_layout: TableLayout, buckets: usize, fallibility: Fallibility, - ) -> Result { + ) -> Result + where + A: Allocator, + { debug_assert!(buckets.is_power_of_two()); // Avoid `Option::ok_or_else` because it bloats LLVM IR. @@ -1068,45 +1749,48 @@ impl RawTableInner { None => return Err(fallibility.capacity_overflow()), }; - // We need an additional check to ensure that the allocation doesn't - // exceed `isize::MAX`. We can skip this check on 64-bit systems since - // such allocations will never succeed anyways. - // - // This mirrors what Vec does in the standard library. - if mem::size_of::() < 8 && layout.size() > isize::MAX as usize { - return Err(fallibility.capacity_overflow()); - } - - let ptr: NonNull = match do_alloc(&alloc, layout) { + let ptr: NonNull = match do_alloc(alloc, layout) { Ok(block) => block.cast(), Err(_) => return Err(fallibility.alloc_err(layout)), }; + // SAFETY: null pointer will be caught in above check let ctrl = NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)); Ok(Self { ctrl, bucket_mask: buckets - 1, items: 0, growth_left: bucket_mask_to_capacity(buckets - 1), - alloc, }) } + /// Attempts to allocate a new [`RawTableInner`] with at least enough + /// capacity for inserting the given number of elements without reallocating. + /// + /// All the control bytes are initialized with the [`EMPTY`] bytes. #[inline] - fn fallible_with_capacity( - alloc: A, + fn fallible_with_capacity( + alloc: &A, table_layout: TableLayout, capacity: usize, fallibility: Fallibility, - ) -> Result { + ) -> Result + where + A: Allocator, + { if capacity == 0 { - Ok(Self::new_in(alloc)) + Ok(Self::NEW) } else { + // SAFETY: We checked that we could successfully allocate the new table, and then + // initialized all control bytes with the constant `EMPTY` byte. unsafe { let buckets = capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?; let result = Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?; + // SAFETY: We checked that the table is allocated and therefore the table already has + // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) + // so writing `self.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); Ok(result) @@ -1114,66 +1798,397 @@ impl RawTableInner { } } - /// Searches for an empty or deleted bucket which is suitable for inserting - /// a new element and sets the hash for that slot. + /// Allocates a new [`RawTableInner`] with at least enough capacity for inserting + /// the given number of elements without reallocating. + /// + /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program + /// in case of allocation error. Use [`fallible_with_capacity`] instead if you want to + /// handle memory allocation failure. + /// + /// All the control bytes are initialized with the [`EMPTY`] bytes. + /// + /// [`fallible_with_capacity`]: RawTableInner::fallible_with_capacity + /// [`abort`]: https://doc.rust-lang.org/alloc/alloc/fn.handle_alloc_error.html + fn with_capacity(alloc: &A, table_layout: TableLayout, capacity: usize) -> Self + where + A: Allocator, + { + // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. + match Self::fallible_with_capacity(alloc, table_layout, capacity, Fallibility::Infallible) { + Ok(table_inner) => table_inner, + // SAFETY: All allocation errors will be caught inside `RawTableInner::new_uninitialized`. + Err(_) => unsafe { hint::unreachable_unchecked() }, + } + } + + /// Fixes up an insertion slot returned by the [`RawTableInner::find_insert_slot_in_group`] method. + /// + /// In tables smaller than the group width (`self.buckets() < Group::WIDTH`), trailing control + /// bytes outside the range of the table are filled with [`EMPTY`] entries. These will unfortunately + /// trigger a match of [`RawTableInner::find_insert_slot_in_group`] function. This is because + /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking + /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied. + /// We detect this situation here and perform a second scan starting at the beginning of the table. + /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the + /// trailing control bytes (containing [`EMPTY`] bytes). + /// + /// If this function is called correctly, it is guaranteed to return [`InsertSlot`] with an + /// index of an empty or deleted bucket in the range `0..self.buckets()` (see `Warning` and + /// `Safety`). + /// + /// # Warning + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise if the table is less than + /// the group width (`self.buckets() < Group::WIDTH`) this function returns an index outside of the + /// table indices range `0..self.buckets()` (`0..=self.bucket_mask`). Attempt to write data at that + /// index will cause immediate [`undefined behavior`]. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::ctrl`] method. + /// Thus, in order to uphold those safety contracts, as well as for the correct logic of the work + /// of this crate, the following rules are necessary and sufficient: + /// + /// * The [`RawTableInner`] must have properly initialized control bytes otherwise calling this + /// function results in [`undefined behavior`]. + /// + /// * This function must only be used on insertion slots found by [`RawTableInner::find_insert_slot_in_group`] + /// (after the `find_insert_slot_in_group` function, but before insertion into the table). /// - /// There must be at least 1 empty bucket in the table. + /// * The `index` must not be greater than the `self.bucket_mask`, i.e. `(index + 1) <= self.buckets()` + /// (this one is provided by the [`RawTableInner::find_insert_slot_in_group`] function). + /// + /// Calling this function with an index not provided by [`RawTableInner::find_insert_slot_in_group`] + /// may result in [`undefined behavior`] even if the index satisfies the safety rules of the + /// [`RawTableInner::ctrl`] function (`index < self.bucket_mask + 1 + Group::WIDTH`). + /// + /// [`RawTableInner::ctrl`]: RawTableInner::ctrl + /// [`RawTableInner::find_insert_slot_in_group`]: RawTableInner::find_insert_slot_in_group + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn fix_insert_slot(&self, mut index: usize) -> InsertSlot { + // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`. + if unlikely(self.is_bucket_full(index)) { + debug_assert!(self.bucket_mask < Group::WIDTH); + // SAFETY: + // + // * Since the caller of this function ensures that the control bytes are properly + // initialized and `ptr = self.ctrl(0)` points to the start of the array of control + // bytes, therefore: `ctrl` is valid for reads, properly aligned to `Group::WIDTH` + // and points to the properly initialized control bytes (see also + // `TableLayout::calculate_layout_for` and `ptr::read`); + // + // * Because the caller of this function ensures that the index was provided by the + // `self.find_insert_slot_in_group()` function, so for for tables larger than the + // group width (self.buckets() >= Group::WIDTH), we will never end up in the given + // branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group` + // cannot return a full bucket index. For tables smaller than the group width, calling + // the `unwrap_unchecked` function is also safe, as the trailing control bytes outside + // the range of the table are filled with EMPTY bytes (and we know for sure that there + // is at least one FULL bucket), so this second scan either finds an empty slot (due to + // the load factor) or hits the trailing control bytes (containing EMPTY). + index = Group::load_aligned(self.ctrl(0)) + .match_empty_or_deleted() + .lowest_set_bit() + .unwrap_unchecked(); + } + InsertSlot { index } + } + + /// Finds the position to insert something in a group. + /// + /// **This may have false positives and must be fixed up with `fix_insert_slot` + /// before it's used.** + /// + /// The function is guaranteed to return the index of an empty or deleted [`Bucket`] + /// in the range `0..self.buckets()` (`0..=self.bucket_mask`). #[inline] - unsafe fn prepare_insert_slot(&self, hash: u64) -> (usize, u8) { - let index = self.find_insert_slot(hash); + fn find_insert_slot_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option { + let bit = group.match_empty_or_deleted().lowest_set_bit(); + + if likely(bit.is_some()) { + // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask) + } else { + None + } + } + + /// Searches for an element in the table, or a potential slot where that element could + /// be inserted (an empty or deleted [`Bucket`] index). + /// + /// This uses dynamic dispatch to reduce the amount of code generated, but that is + /// eliminated by LLVM optimizations. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise, if the + /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, this function + /// will never return (will go into an infinite loop) for tables larger than the group + /// width, or return an index outside of the table indices range if the table is less + /// than the group width. + /// + /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool` + /// function with only `FULL` buckets' indices and return the `index` of the found + /// element (as `Ok(index)`). If the element is not found and there is at least 1 + /// empty or deleted [`Bucket`] in the table, the function is guaranteed to return + /// [InsertSlot] with an index in the range `0..self.buckets()`, but in any case, + /// if this function returns [`InsertSlot`], it will contain an index in the range + /// `0..=self.buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// Attempt to write data at the [`InsertSlot`] returned by this function when the table is + /// less than the group width and if there was not at least one empty or deleted bucket in + /// the table will cause immediate [`undefined behavior`]. This is because in this case the + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY] + /// control bytes outside the table range. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn find_or_find_insert_slot_inner( + &self, + hash: u64, + eq: &mut dyn FnMut(usize) -> bool, + ) -> Result { + let mut insert_slot = None; + + let h2_hash = h2(hash); + let mut probe_seq = self.probe_seq(hash); + + loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1` + // of the table due to masking with `self.bucket_mask` and also because mumber of + // buckets is a power of two (see `self.probe_seq` function). + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + + for bit in group.match_byte(h2_hash) { + let index = (probe_seq.pos + bit) & self.bucket_mask; + + if likely(eq(index)) { + return Ok(index); + } + } + + // We didn't find the element we were looking for in the group, try to get an + // insertion slot from the group if we don't have one yet. + if likely(insert_slot.is_none()) { + insert_slot = self.find_insert_slot_in_group(&group, &probe_seq); + } + + // Only stop the search if the group contains at least one empty element. + // Otherwise, the element that we are looking for might be in a following group. + if likely(group.match_empty().any_bit_set()) { + // We must have found a insert slot by now, since the current group contains at + // least one. For tables smaller than the group width, there will still be an + // empty element in the current (and only) group due to the load factor. + unsafe { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * We use this function with the slot / index found by `self.find_insert_slot_in_group` + return Err(self.fix_insert_slot(insert_slot.unwrap_unchecked())); + } + } + + probe_seq.move_next(self.bucket_mask); + } + } + + /// Searches for an empty or deleted bucket which is suitable for inserting a new + /// element and sets the hash for that slot. Returns an index of that slot and the + /// old control byte stored in the found index. + /// + /// This function does not check if the given element exists in the table. Also, + /// this function does not check if there is enough space in the table to insert + /// a new element. Caller of the funtion must make ensure that the table has at + /// least 1 empty or deleted `bucket`, otherwise this function will never return + /// (will go into an infinite loop) for tables larger than the group width, or + /// return an index outside of the table indices range if the table is less than + /// the group width. + /// + /// If there is at least 1 empty or deleted `bucket` in the table, the function is + /// guaranteed to return an `index` in the range `0..self.buckets()`, but in any case, + /// if this function returns an `index` it will be in the range `0..=self.buckets()`. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for the + /// [`RawTableInner::set_ctrl_h2`] and [`RawTableInner::find_insert_slot`] methods. + /// Thus, in order to uphold the safety contracts for that methods, as well as for + /// the correct logic of the work of this crate, you must observe the following rules + /// when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated and has properly initialized + /// control bytes otherwise calling this function results in [`undefined behavior`]. + /// + /// * The caller of this function must ensure that the "data" parts of the table + /// will have an entry in the returned index (matching the given hash) right + /// after calling this function. + /// + /// Attempt to write data at the `index` returned by this function when the table is + /// less than the group width and if there was not at least one empty or deleted bucket in + /// the table will cause immediate [`undefined behavior`]. This is because in this case the + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY] + /// control bytes outside the table range. + /// + /// The caller must independently increase the `items` field of the table, and also, + /// if the old control byte was [`EMPTY`], then decrease the table's `growth_left` + /// field, and do not change it if the old control byte was [`DELETED`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`RawTableInner::ctrl`]: RawTableInner::ctrl + /// [`RawTableInner::set_ctrl_h2`]: RawTableInner::set_ctrl_h2 + /// [`RawTableInner::find_insert_slot`]: RawTableInner::find_insert_slot + #[inline] + unsafe fn prepare_insert_slot(&mut self, hash: u64) -> (usize, u8) { + // SAFETY: Caller of this function ensures that the control bytes are properly initialized. + let index: usize = self.find_insert_slot(hash).index; + // SAFETY: + // 1. The `find_insert_slot` function either returns an `index` less than or + // equal to `self.buckets() = self.bucket_mask + 1` of the table, or never + // returns if it cannot find an empty or deleted slot. + // 2. The caller of this function guarantees that the table has already been + // allocated let old_ctrl = *self.ctrl(index); self.set_ctrl_h2(index, hash); (index, old_ctrl) } /// Searches for an empty or deleted bucket which is suitable for inserting - /// a new element. + /// a new element, returning the `index` for the new [`Bucket`]. /// - /// There must be at least 1 empty bucket in the table. + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise this function + /// will never return (will go into an infinite loop) for tables larger than the group + /// width, or return an index outside of the table indices range if the table is less + /// than the group width. + /// + /// If there is at least 1 empty or deleted `bucket` in the table, the function is + /// guaranteed to return [`InsertSlot`] with an index in the range `0..self.buckets()`, + /// but in any case, if this function returns [`InsertSlot`], it will contain an index + /// in the range `0..=self.buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// Attempt to write data at the [`InsertSlot`] returned by this function when the table is + /// less than the group width and if there was not at least one empty or deleted bucket in + /// the table will cause immediate [`undefined behavior`]. This is because in this case the + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY] + /// control bytes outside the table range. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - fn find_insert_slot(&self, hash: u64) -> usize { + unsafe fn find_insert_slot(&self, hash: u64) -> InsertSlot { let mut probe_seq = self.probe_seq(hash); loop { - unsafe { - let group = Group::load(self.ctrl(probe_seq.pos)); - if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { - let result = (probe_seq.pos + bit) & self.bucket_mask; - - // In tables smaller than the group width, trailing control - // bytes outside the range of the table are filled with - // EMPTY entries. These will unfortunately trigger a - // match, but once masked may point to a full bucket that - // is already occupied. We detect this situation here and - // perform a second scan starting at the beginning of the - // table. This second scan is guaranteed to find an empty - // slot (due to the load factor) before hitting the trailing - // control bytes (containing EMPTY). - if unlikely(is_full(*self.ctrl(result))) { - debug_assert!(self.bucket_mask < Group::WIDTH); - debug_assert_ne!(probe_seq.pos, 0); - return Group::load_aligned(self.ctrl(0)) - .match_empty_or_deleted() - .lowest_set_bit_nonzero(); - } + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1` + // of the table due to masking with `self.bucket_mask` and also because mumber of + // buckets is a power of two (see `self.probe_seq` function). + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; - return result; + let index = self.find_insert_slot_in_group(&group, &probe_seq); + if likely(index.is_some()) { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * We use this function with the slot / index found by `self.find_insert_slot_in_group` + unsafe { + return self.fix_insert_slot(index.unwrap_unchecked()); } } probe_seq.move_next(self.bucket_mask); } } - /// Searches for an element in the table. This uses dynamic dispatch to reduce the amount of - /// code generated, but it is eliminated by LLVM optimizations. - #[inline] - fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option { + /// Searches for an element in a table, returning the `index` of the found element. + /// This uses dynamic dispatch to reduce the amount of code generated, but it is + /// eliminated by LLVM optimizations. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty `bucket`, otherwise, if the + /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, + /// this function will also never return (will go into an infinite loop). + /// + /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool` + /// function with only `FULL` buckets' indices and return the `index` of the found + /// element as `Some(index)`, so the index will always be in the range + /// `0..self.buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline(always)] + unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option { let h2_hash = h2(hash); let mut probe_seq = self.probe_seq(hash); loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1` + // of the table due to masking with `self.bucket_mask`. + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new_in). let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; for bit in group.match_byte(h2_hash) { + // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. let index = (probe_seq.pos + bit) & self.bucket_mask; if likely(eq(index)) { @@ -1189,12 +2204,52 @@ impl RawTableInner { } } + /// Prepares for rehashing data in place (that is, without allocating new memory). + /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control + /// bytes to `EMPTY`, i.e. performs the following conversion: + /// + /// - `EMPTY` control bytes -> `EMPTY`; + /// - `DELETED` control bytes -> `EMPTY`; + /// - `FULL` control bytes -> `DELETED`. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The caller of this function must convert the `DELETED` bytes back to `FULL` + /// bytes when re-inserting them into their ideal position (which was impossible + /// to do during the first insert due to tombstones). If the caller does not do + /// this, then calling this function may result in a memory leak. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes otherwise + /// calling this function results in [`undefined behavior`]. + /// + /// Calling this function on a table that has not been allocated results in + /// [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[allow(clippy::mut_mut)] #[inline] unsafe fn prepare_rehash_in_place(&mut self) { - // Bulk convert all full control bytes to DELETED, and all DELETED - // control bytes to EMPTY. This effectively frees up all buckets - // containing a DELETED entry. + // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY. + // This effectively frees up all buckets containing a DELETED entry. + // + // SAFETY: + // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`; + // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned` + // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`; + // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated; + // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0 + // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for). for i in (0..self.buckets()).step_by(Group::WIDTH) { let group = Group::load_aligned(self.ctrl(i)); let group = group.convert_special_to_empty_and_full_to_deleted(); @@ -1203,15 +2258,245 @@ impl RawTableInner { // Fix up the trailing control bytes. See the comments in set_ctrl // for the handling of tables smaller than the group width. - if self.buckets() < Group::WIDTH { + // + // SAFETY: The caller of this function guarantees that [`RawTableInner`] + // has already been allocated + if unlikely(self.buckets() < Group::WIDTH) { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes, + // so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to + // `Group::WIDTH` is safe self.ctrl(0) .copy_to(self.ctrl(Group::WIDTH), self.buckets()); } else { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of + // control bytes,so copying `Group::WIDTH` bytes with offset equal + // to `self.buckets() == self.bucket_mask + 1` is safe self.ctrl(0) .copy_to(self.ctrl(self.buckets()), Group::WIDTH); } } + /// Returns an iterator over every element in the table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result + /// is [`undefined behavior`]: + /// + /// * The caller has to ensure that the `RawTableInner` outlives the + /// `RawIter`. Because we cannot make the `next` method unsafe on + /// the `RawIter` struct, we have to make the `iter` method unsafe. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise using the returned [`RawIter`] results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn iter(&self) -> RawIter { + // SAFETY: + // 1. Since the caller of this function ensures that the control bytes + // are properly initialized and `self.data_end()` points to the start + // of the array of control bytes, therefore: `ctrl` is valid for reads, + // properly aligned to `Group::WIDTH` and points to the properly initialized + // control bytes. + // 2. `data` bucket index in the table is equal to the `ctrl` index (i.e. + // equal to zero). + // 3. We pass the exact value of buckets of the table to the function. + // + // `ctrl` points here (to the start + // of the first control byte `CT0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + let data = Bucket::from_base_index(self.data_end(), 0); + RawIter { + // SAFETY: See explanation above + iter: RawIterRange::new(self.ctrl.as_ptr(), data, self.buckets()), + items: self.items, + } + } + + /// Executes the destructors (if any) of the values stored in the table. + /// + /// # Note + /// + /// This function does not erase the control bytes of the table and does + /// not make any changes to the `items` or `growth_left` fields of the + /// table. If necessary, the caller of this function must manually set + /// up these table fields, for example using the [`clear_no_drop`] function. + /// + /// Be careful during calling this function, because drop function of + /// the elements can panic, and this can leave table in an inconsistent + /// state. + /// + /// # Safety + /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise calling this function may result in [`undefined behavior`]. + /// + /// If `T` is a type that should be dropped and **the table is not empty**, + /// calling this function more than once results in [`undefined behavior`]. + /// + /// If `T` is not [`Copy`], attempting to use values stored in the table after + /// calling this function may result in [`undefined behavior`]. + /// + /// It is safe to call this function on a table that has not been allocated, + /// on a table with uninitialized control bytes, and on a table with no actual + /// data but with `Full` control bytes if `self.items == 0`. + /// + /// See also [`Bucket::drop`] / [`Bucket::as_ptr`] methods, for more information + /// about of properly removing or saving `element` from / into the [`RawTable`] / + /// [`RawTableInner`]. + /// + /// [`Bucket::drop`]: Bucket::drop + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`clear_no_drop`]: RawTableInner::clear_no_drop + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn drop_elements(&mut self) { + // Check that `self.items != 0`. Protects against the possibility + // of creating an iterator on an table with uninitialized control bytes. + if T::NEEDS_DROP && self.items != 0 { + // SAFETY: We know for sure that RawTableInner will outlive the + // returned `RawIter` iterator, and the caller of this function + // must uphold the safety contract for `drop_elements` method. + for item in self.iter::() { + // SAFETY: The caller must uphold the safety contract for + // `drop_elements` method. + item.drop(); + } + } + } + + /// Executes the destructors (if any) of the values stored in the table and than + /// deallocates the table. + /// + /// # Note + /// + /// Calling this function automatically makes invalid (dangling) all instances of + /// buckets ([`Bucket`]) and makes invalid (dangling) the `ctrl` field of the table. + /// + /// This function does not make any changes to the `bucket_mask`, `items` or `growth_left` + /// fields of the table. If necessary, the caller of this function must manually set + /// up these table fields. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * Calling this function more than once; + /// + /// * The type `T` must be the actual type of the elements stored in the table. + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used + /// to allocate this table. + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that + /// was used to allocate this table. + /// + /// The caller of this function should pay attention to the possibility of the + /// elements' drop function panicking, because this: + /// + /// * May leave the table in an inconsistent state; + /// + /// * Memory is never deallocated, so a memory leak may occur. + /// + /// Attempt to use the `ctrl` field of the table (dereference) after calling this + /// function results in [`undefined behavior`]. + /// + /// It is safe to call this function on a table that has not been allocated, + /// on a table with uninitialized control bytes, and on a table with no actual + /// data but with `Full` control bytes if `self.items == 0`. + /// + /// See also [`RawTableInner::drop_elements`] or [`RawTableInner::free_buckets`] + /// for more information. + /// + /// [`RawTableInner::drop_elements`]: RawTableInner::drop_elements + /// [`RawTableInner::free_buckets`]: RawTableInner::free_buckets + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn drop_inner_table(&mut self, alloc: &A, table_layout: TableLayout) { + if !self.is_empty_singleton() { + unsafe { + // SAFETY: The caller must uphold the safety contract for `drop_inner_table` method. + self.drop_elements::(); + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. The caller must uphold the safety contract for `drop_inner_table` method. + self.free_buckets(alloc, table_layout); + } + } + } + + /// Returns a pointer to an element in the table (convenience for + /// `Bucket::from_base_index(self.data_end::(), index)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived from the + /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling + /// this function, the following safety rules must be observed: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`] + /// function, i.e. `(index + 1) <= self.buckets()`. + /// + /// * The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`Bucket`] may result in [`undefined behavior`]. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e. + /// `(index + 1) <= self.buckets()`. + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"): + /// + /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`]) + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// ^ \__________ __________/ + /// `table.bucket(3)` returns a pointer that points \/ + /// here in the `data` part of the `RawTableInner` additional control bytes + /// (to the end of T3) `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + /// ``` + /// + /// [`Bucket::from_base_index`]: Bucket::from_base_index + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn bucket(&self, index: usize) -> Bucket { debug_assert_ne!(self.bucket_mask, 0); @@ -1219,6 +2504,52 @@ impl RawTableInner { Bucket::from_base_index(self.data_end(), index) } + /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table + /// (convenience for `self.data_end::().as_ptr().sub((index + 1) * size_of)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`, + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`] + /// function, i.e. `(index + 1) <= self.buckets()`; + /// + /// * The `size_of` must be equal to the size of the elements stored in the table; + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"): + /// + /// `table.bucket_ptr(3, mem::size_of::())` returns a pointer that points here in the + /// `data` part of the `RawTableInner`, i.e. to the start of T3 + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// \__________ __________/ + /// \/ + /// additional control bytes + /// `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + /// ``` + /// + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 { debug_assert_ne!(self.bucket_mask, 0); @@ -1227,9 +2558,47 @@ impl RawTableInner { base.sub((index + 1) * size_of) } + /// Returns pointer to one past last `data` element in the the table as viewed from + /// the start point of the allocation (convenience for `self.ctrl.cast()`). + /// + /// This function actually returns a pointer to the end of the `data element` at + /// index "0" (zero). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Note + /// + /// The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`NonNull`] may result in [`undefined behavior`]. + /// + /// ```none + /// `table.data_end::()` returns pointer that points here + /// (to the end of `T0`) + /// ∨ + /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + /// \________ ________/ + /// \/ + /// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`. + /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + /// with loading `Group` bytes from the heap works properly, even if the result + /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + /// `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.ctrl.as_ptr().cast()) + fn data_end(&self) -> NonNull { + unsafe { + // SAFETY: `self.ctrl` is `NonNull`, so casting it is safe + NonNull::new_unchecked(self.ctrl.as_ptr().cast()) + } } /// Returns an iterator-like object for a probe sequence on the table. @@ -1240,6 +2609,8 @@ impl RawTableInner { #[inline] fn probe_seq(&self, hash: u64) -> ProbeSeq { ProbeSeq { + // This is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. pos: h1(hash) & self.bucket_mask, stride: 0, } @@ -1250,7 +2621,7 @@ impl RawTableInner { #[cfg(feature = "raw")] #[inline] unsafe fn prepare_insert_no_grow(&mut self, hash: u64) -> Result { - let index = self.find_insert_slot(hash); + let index = self.find_insert_slot(hash).index; let old_ctrl = *self.ctrl(index); if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) { Err(()) @@ -1277,13 +2648,68 @@ impl RawTableInner { /// Sets a control byte to the hash, and possibly also the replicated control byte at /// the end of the array. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl`] + /// method. Thus, in order to uphold the safety contracts for the method, you must observe the + /// following rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`RawTableInner::set_ctrl`]: RawTableInner::set_ctrl + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn set_ctrl_h2(&self, index: usize, hash: u64) { + unsafe fn set_ctrl_h2(&mut self, index: usize, hash: u64) { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl_h2`] self.set_ctrl(index, h2(hash)); } + /// Replaces the hash in the control byte at the given index with the provided one, + /// and possibly also replicates the new control byte at the end of the array of control + /// bytes, returning the old control byte. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl_h2`] + /// and [`RawTableInner::ctrl`] methods. Thus, in order to uphold the safety contracts for both + /// methods, you must observe the following rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`RawTableInner::set_ctrl_h2`]: RawTableInner::set_ctrl_h2 + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn replace_ctrl_h2(&self, index: usize, hash: u64) -> u8 { + unsafe fn replace_ctrl_h2(&mut self, index: usize, hash: u64) -> u8 { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::replace_ctrl_h2`] let prev_ctrl = *self.ctrl(index); self.set_ctrl_h2(index, hash); prev_ctrl @@ -1291,10 +2717,35 @@ impl RawTableInner { /// Sets a control byte, and possibly also the replicated control byte at /// the end of the array. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn set_ctrl(&self, index: usize, ctrl: u8) { + unsafe fn set_ctrl(&mut self, index: usize, ctrl: u8) { // Replicate the first Group::WIDTH control bytes at the end of - // the array without using a branch: + // the array without using a branch. If the tables smaller than + // the group width (self.buckets() < Group::WIDTH), + // `index2 = Group::WIDTH + index`, otherwise `index2` is: + // // - If index >= Group::WIDTH then index == index2. // - Otherwise index2 == self.bucket_mask + 1 + index. // @@ -1311,16 +2762,43 @@ impl RawTableInner { // --------------------------------------------- // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] | // --------------------------------------------- + + // This is the same as `(index.wrapping_sub(Group::WIDTH)) % self.buckets() + Group::WIDTH` + // because the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl`] *self.ctrl(index) = ctrl; *self.ctrl(index2) = ctrl; } /// Returns a pointer to a control byte. + /// + /// # Safety + /// + /// For the allocated [`RawTableInner`], the result is [`Undefined Behavior`], + /// if the `index` is greater than the `self.bucket_mask + 1 + Group::WIDTH`. + /// In that case, calling this function with `index == self.bucket_mask + 1 + Group::WIDTH` + /// will return a pointer to the end of the allocated table and it is useless on its own. + /// + /// Calling this function with `index >= self.bucket_mask + 1 + Group::WIDTH` on a + /// table that has not been allocated results in [`Undefined Behavior`]. + /// + /// So to satisfy both requirements you should always follow the rule that + /// `index < self.bucket_mask + 1 + Group::WIDTH` + /// + /// Calling this function on [`RawTableInner`] that are not already allocated is safe + /// for read-only purpose. + /// + /// See also [`Bucket::as_ptr()`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`Bucket::as_ptr()`]: Bucket::as_ptr() + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn ctrl(&self, index: usize) -> *mut u8 { debug_assert!(index < self.num_ctrl_bytes()); + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::ctrl`] self.ctrl.as_ptr().add(index) } @@ -1329,6 +2807,17 @@ impl RawTableInner { self.bucket_mask + 1 } + /// Checks whether the bucket at `index` is full. + /// + /// # Safety + /// + /// The caller must ensure `index` is less than the number of buckets. + #[inline] + unsafe fn is_bucket_full(&self, index: usize) -> bool { + debug_assert!(index < self.buckets()); + is_full(*self.ctrl(index)) + } + #[inline] fn num_ctrl_bytes(&self) -> usize { self.bucket_mask + 1 + Group::WIDTH @@ -1339,25 +2828,45 @@ impl RawTableInner { self.bucket_mask == 0 } + /// Attempts to allocate a new hash table with at least enough capacity + /// for inserting the given number of elements without reallocating, + /// and return it inside ScopeGuard to protect against panic in the hash + /// function. + /// + /// # Note + /// + /// It is recommended (but not required): + /// + /// * That the new table's `capacity` be greater than or equal to `self.items`. + /// + /// * The `alloc` is the same [`Allocator`] as the `Allocator` used + /// to allocate this table. + /// + /// * The `table_layout` is the same [`TableLayout`] as the `TableLayout` used + /// to allocate this table. + /// + /// If `table_layout` does not match the `TableLayout` that was used to allocate + /// this table, then using `mem::swap` with the `self` and the new table returned + /// by this function results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[allow(clippy::mut_mut)] #[inline] - unsafe fn prepare_resize( + fn prepare_resize<'a, A>( &self, + alloc: &'a A, table_layout: TableLayout, capacity: usize, fallibility: Fallibility, - ) -> Result, TryReserveError> { + ) -> Result, TryReserveError> + where + A: Allocator, + { debug_assert!(self.items <= capacity); // Allocate and initialize the new table. - let mut new_table = RawTableInner::fallible_with_capacity( - self.alloc.clone(), - table_layout, - capacity, - fallibility, - )?; - new_table.growth_left -= self.items; - new_table.items = self.items; + let new_table = + RawTableInner::fallible_with_capacity(alloc, table_layout, capacity, fallibility)?; // The hash function may panic, in which case we simply free the new // table without dropping any elements that may have been copied into @@ -1367,7 +2876,11 @@ impl RawTableInner { // the comment at the bottom of this function. Ok(guard(new_table, move |self_| { if !self_.is_empty_singleton() { - self_.free_buckets(table_layout); + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. We know for sure that the `alloc` and `table_layout` matches the + // [`Allocator`] and [`TableLayout`] used to allocate this table. + unsafe { self_.free_buckets(alloc, table_layout) }; } })) } @@ -1376,16 +2889,38 @@ impl RawTableInner { /// /// This uses dynamic dispatch to reduce the amount of /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used + /// to allocate this table. + /// + /// * The `layout` must be the same [`TableLayout`] as the `TableLayout` + /// used to allocate this table. + /// + /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of + /// the elements stored in the table. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[allow(clippy::inline_always)] #[inline(always)] - unsafe fn reserve_rehash_inner( + unsafe fn reserve_rehash_inner( &mut self, + alloc: &A, additional: usize, hasher: &dyn Fn(&mut Self, usize) -> u64, fallibility: Fallibility, layout: TableLayout, drop: Option, - ) -> Result<(), TryReserveError> { + ) -> Result<(), TryReserveError> + where + A: Allocator, + { // Avoid `Option::ok_or_else` because it bloats LLVM IR. let new_items = match self.items.checked_add(additional) { Some(new_items) => new_items, @@ -1395,12 +2930,30 @@ impl RawTableInner { if new_items <= full_capacity / 2 { // Rehash in-place without re-allocating if we have plenty of spare // capacity that is locked up due to DELETED entries. + + // SAFETY: + // 1. We know for sure that `[`RawTableInner`]` has already been allocated + // (since new_items <= full_capacity / 2); + // 2. The caller ensures that `drop` function is the actual drop function of + // the elements stored in the table. + // 3. The caller ensures that `layout` matches the [`TableLayout`] that was + // used to allocate this table. + // 4. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. self.rehash_in_place(hasher, layout.size, drop); Ok(()) } else { // Otherwise, conservatively resize to at least the next size up // to avoid churning deletes into frequent rehashes. + // + // SAFETY: + // 1. We know for sure that `capacity >= self.items`. + // 2. The caller ensures that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. self.resize_inner( + alloc, usize::max(new_items, full_capacity + 1), hasher, fallibility, @@ -1409,48 +2962,160 @@ impl RawTableInner { } } + /// Returns an iterator over full buckets indices in the table. + /// + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// * The caller has to ensure that the `RawTableInner` outlives the + /// `FullBucketsIndices`. Because we cannot make the `next` method + /// unsafe on the `FullBucketsIndices` struct, we have to make the + /// `full_buckets_indices` method unsafe. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + #[inline(always)] + unsafe fn full_buckets_indices(&self) -> FullBucketsIndices { + // SAFETY: + // 1. Since the caller of this function ensures that the control bytes + // are properly initialized and `self.ctrl(0)` points to the start + // of the array of control bytes, therefore: `ctrl` is valid for reads, + // properly aligned to `Group::WIDTH` and points to the properly initialized + // control bytes. + // 2. The value of `items` is equal to the amount of data (values) added + // to the table. + // + // `ctrl` points here (to the start + // of the first control byte `CT0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + let ctrl = NonNull::new_unchecked(self.ctrl(0)); + + FullBucketsIndices { + // Load the first group + // SAFETY: See explanation above. + current_group: Group::load_aligned(ctrl.as_ptr()).match_full().into_iter(), + group_first_index: 0, + ctrl, + items: self.items, + } + } + /// Allocates a new table of a different size and moves the contents of the /// current table into it. /// /// This uses dynamic dispatch to reduce the amount of /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used + /// to allocate this table; + /// + /// * The `layout` must be the same [`TableLayout`] as the `TableLayout` + /// used to allocate this table; + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// The caller of this function must ensure that `capacity >= self.items` + /// otherwise: + /// + /// * If `self.items != 0`, calling of this function with `capacity == 0` + /// results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and + /// `self.items > capacity_to_buckets(capacity)` calling this function + /// results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and + /// `self.items > capacity_to_buckets(capacity)` calling this function + /// are never return (will go into an infinite loop). + /// + /// Note: It is recommended (but not required) that the new table's `capacity` + /// be greater than or equal to `self.items`. In case if `capacity <= self.items` + /// this function can never return. See [`RawTableInner::find_insert_slot`] for + /// more information. + /// + /// [`RawTableInner::find_insert_slot`]: RawTableInner::find_insert_slot + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[allow(clippy::inline_always)] #[inline(always)] - unsafe fn resize_inner( + unsafe fn resize_inner( &mut self, + alloc: &A, capacity: usize, hasher: &dyn Fn(&mut Self, usize) -> u64, fallibility: Fallibility, layout: TableLayout, - ) -> Result<(), TryReserveError> { - let mut new_table = self.prepare_resize(layout, capacity, fallibility)?; - - // Copy all elements to the new table. - for i in 0..self.buckets() { - if !is_full(*self.ctrl(i)) { - continue; - } - + ) -> Result<(), TryReserveError> + where + A: Allocator, + { + // SAFETY: We know for sure that `alloc` and `layout` matches the [`Allocator`] and [`TableLayout`] + // that were used to allocate this table. + let mut new_table = self.prepare_resize(alloc, layout, capacity, fallibility)?; + + // SAFETY: We know for sure that RawTableInner will outlive the + // returned `FullBucketsIndices` iterator, and the caller of this + // function ensures that the control bytes are properly initialized. + for full_byte_index in self.full_buckets_indices() { // This may panic. - let hash = hasher(self, i); + let hash = hasher(self, full_byte_index); + // SAFETY: // We can use a simpler version of insert() here since: - // - there are no DELETED entries. - // - we know there is enough space in the table. - // - all elements are unique. - let (index, _) = new_table.prepare_insert_slot(hash); - + // 1. There are no DELETED entries. + // 2. We know there is enough space in the table. + // 3. All elements are unique. + // 4. The caller of this function guarantees that `capacity > 0` + // so `new_table` must already have some allocated memory. + // 5. We set `growth_left` and `items` fields of the new table + // after the loop. + // 6. We insert into the table, at the returned index, the data + // matching the given hash immediately after calling this function. + let (new_index, _) = new_table.prepare_insert_slot(hash); + + // SAFETY: + // + // * `src` is valid for reads of `layout.size` bytes, since the + // table is alive and the `full_byte_index` is guaranteed to be + // within bounds (see `FullBucketsIndices::next_impl`); + // + // * `dst` is valid for writes of `layout.size` bytes, since the + // caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate old table and we have the `new_index` + // returned by `prepare_insert_slot`. + // + // * Both `src` and `dst` are properly aligned. + // + // * Both `src` and `dst` point to different region of memory. ptr::copy_nonoverlapping( - self.bucket_ptr(i, layout.size), - new_table.bucket_ptr(index, layout.size), + self.bucket_ptr(full_byte_index, layout.size), + new_table.bucket_ptr(new_index, layout.size), layout.size, ); } + // The hash function didn't panic, so we can safely set the + // `growth_left` and `items` fields of the new table. + new_table.growth_left -= self.items; + new_table.items = self.items; + // We successfully copied all elements without panicking. Now replace // self with the new table. The old table will have its memory freed but // the items will not be dropped (since they have been moved into the // new table). + // SAFETY: The caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate this table. mem::swap(self, &mut new_table); Ok(()) @@ -1463,6 +3128,21 @@ impl RawTableInner { /// /// This uses dynamic dispatch to reduce the amount of /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * The `size_of` must be equal to the size of the elements stored in the table; + /// + /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of + /// the elements stored in the table. + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[allow(clippy::inline_always)] #[cfg_attr(feature = "inline-more", inline(always))] #[cfg_attr(not(feature = "inline-more"), inline)] @@ -1506,8 +3186,10 @@ impl RawTableInner { let hash = hasher(*guard, i); // Search for a suitable place to put it - let new_i = guard.find_insert_slot(hash); - let new_i_p = guard.bucket_ptr(new_i, size_of); + // + // SAFETY: Caller of this function ensures that the control bytes + // are properly initialized. + let new_i = guard.find_insert_slot(hash).index; // Probing works by scanning through all of the control // bytes in groups, which may not be aligned to the group @@ -1519,6 +3201,8 @@ impl RawTableInner { continue 'outer; } + let new_i_p = guard.bucket_ptr(new_i, size_of); + // We are moving the current item to a new position. Write // our H2 to the control byte of the new position. let prev_ctrl = guard.replace_ctrl_h2(new_i, hash); @@ -1545,17 +3229,107 @@ impl RawTableInner { mem::forget(guard); } + /// Deallocates the table without dropping any entries. + /// + /// # Note + /// + /// This function must be called only after [`drop_elements`](RawTableInner::drop_elements), + /// else it can lead to leaking of memory. Also calling this function automatically + /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid + /// (dangling) the `ctrl` field of the table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used + /// to allocate this table. + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that was used + /// to allocate this table. + /// + /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`GlobalAlloc::dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc + /// [`Allocator::deallocate`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html#tymethod.deallocate + #[inline] + unsafe fn free_buckets(&mut self, alloc: &A, table_layout: TableLayout) + where + A: Allocator, + { + // SAFETY: The caller must uphold the safety contract for `free_buckets` + // method. + let (ptr, layout) = self.allocation_info(table_layout); + alloc.deallocate(ptr, layout); + } + + /// Returns a pointer to the allocated memory and the layout that was used to + /// allocate the table. + /// + /// # Safety + /// + /// Caller of this function must observe the following safety rules: + /// + /// * The [`RawTableInner`] has already been allocated, otherwise + /// calling this function results in [`undefined behavior`] + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` + /// that was used to allocate this table. Failure to comply with this condition + /// may result in [`undefined behavior`]. + /// + /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`GlobalAlloc::dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc + /// [`Allocator::deallocate`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html#tymethod.deallocate #[inline] - unsafe fn free_buckets(&mut self, table_layout: TableLayout) { + unsafe fn allocation_info(&self, table_layout: TableLayout) -> (NonNull, Layout) { + debug_assert!( + !self.is_empty_singleton(), + "this function can only be called on non-empty tables" + ); + // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. let (layout, ctrl_offset) = match table_layout.calculate_layout_for(self.buckets()) { Some(lco) => lco, - None => hint::unreachable_unchecked(), + None => unsafe { hint::unreachable_unchecked() }, }; - self.alloc.deallocate( - NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)), + ( + // SAFETY: The caller must uphold the safety contract for `allocation_info` method. + unsafe { NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)) }, layout, - ); + ) + } + + /// Returns a pointer to the allocated memory and the layout that was used to + /// allocate the table. If [`RawTableInner`] has not been allocated, this + /// function return `dangling` pointer and `()` (unit) layout. + /// + /// # Safety + /// + /// The `table_layout` must be the same [`TableLayout`] as the `TableLayout` + /// that was used to allocate this table. Failure to comply with this condition + /// may result in [`undefined behavior`]. + /// + /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`GlobalAlloc::dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc + /// [`Allocator::deallocate`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html#tymethod.deallocate + #[cfg(feature = "raw")] + unsafe fn allocation_info_or_zero(&self, table_layout: TableLayout) -> (NonNull, Layout) { + if self.is_empty_singleton() { + (NonNull::dangling(), Layout::new::<()>()) + } else { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. The caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate this table. + unsafe { self.allocation_info(table_layout) } + } } /// Marks all table buckets as empty without dropping their contents. @@ -1570,27 +3344,95 @@ impl RawTableInner { self.growth_left = bucket_mask_to_capacity(self.bucket_mask); } + /// Erases the [`Bucket`]'s control byte at the given index so that it does not + /// triggered as full, decreases the `items` of the table and, if it can be done, + /// increases `self.growth_left`. + /// + /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it + /// does not make any changes to the `data` parts of the table. The caller of this + /// function must take care to properly drop the `data`, otherwise calling this + /// function may result in a memory leak. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * It must be the full control byte at the given position; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// Calling this function on a table with no elements is unspecified, but calling subsequent + /// functions is likely to result in [`undefined behavior`] due to overflow subtraction + /// (`self.items -= 1 cause overflow when self.items == 0`). + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn erase(&mut self, index: usize) { - debug_assert!(is_full(*self.ctrl(index))); + debug_assert!(self.is_bucket_full(index)); + + // This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because + // the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; + // SAFETY: + // - The caller must uphold the safety contract for `erase` method; + // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask` let empty_before = Group::load(self.ctrl(index_before)).match_empty(); let empty_after = Group::load(self.ctrl(index)).match_empty(); - // If we are inside a continuous block of Group::WIDTH full or deleted - // cells then a probe window may have seen a full block when trying to - // insert. We therefore need to keep that block non-empty so that - // lookups will continue searching to the next probe window. + // Inserting and searching in the map is performed by two key functions: + // + // - The `find_insert_slot` function that looks up the index of any `EMPTY` or `DELETED` + // slot in a group to be able to insert. If it doesn't find an `EMPTY` or `DELETED` + // slot immediately in the first group, it jumps to the next `Group` looking for it, + // and so on until it has gone through all the groups in the control bytes. // - // Note that in this context `leading_zeros` refers to the bytes at the - // end of a group, while `trailing_zeros` refers to the bytes at the - // beginning of a group. + // - The `find_inner` function that looks for the index of the desired element by looking + // at all the `FULL` bytes in the group. If it did not find the element right away, and + // there is no `EMPTY` byte in the group, then this means that the `find_insert_slot` + // function may have found a suitable slot in the next group. Therefore, `find_inner` + // jumps further, and if it does not find the desired element and again there is no `EMPTY` + // byte, then it jumps further, and so on. The search stops only if `find_inner` function + // finds the desired element or hits an `EMPTY` slot/byte. + // + // Accordingly, this leads to two consequences: + // + // - The map must have `EMPTY` slots (bytes); + // + // - You can't just mark the byte to be erased as `EMPTY`, because otherwise the `find_inner` + // function may stumble upon an `EMPTY` byte before finding the desired element and stop + // searching. + // + // Thus it is necessary to check all bytes after and before the erased element. If we are in + // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes + // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as + // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there + // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble + // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well. + // + // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index` + // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH) + // cannot have `DELETED` bytes. + // + // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while + // `trailing_zeros` refers to the bytes at the beginning of a group. let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { DELETED } else { self.growth_left += 1; EMPTY }; + // SAFETY: the caller must uphold the safety contract for `erase` method. self.set_ctrl(index, ctrl); self.items -= 1; } @@ -1599,12 +3441,16 @@ impl RawTableInner { impl Clone for RawTable { fn clone(&self) -> Self { if self.table.is_empty_singleton() { - Self::new_in(self.table.alloc.clone()) + Self::new_in(self.alloc.clone()) } else { unsafe { // Avoid `Result::ok_or_else` because it bloats LLVM IR. - let new_table = match Self::new_uninitialized( - self.table.alloc.clone(), + // + // SAFETY: This is safe as we are taking the size of an already allocated table + // and therefore сapacity overflow cannot occur, `self.table.buckets()` is power + // of two and all allocator errors will be caught inside `RawTableInner::new_uninitialized`. + let mut new_table = match Self::new_uninitialized( + self.alloc.clone(), self.table.buckets(), Fallibility::Infallible, ) { @@ -1612,24 +3458,32 @@ impl Clone for RawTable { Err(_) => hint::unreachable_unchecked(), }; - // If cloning fails then we need to free the allocation for the - // new table. However we don't run its drop since its control - // bytes are not initialized yet. - let mut guard = guard(ManuallyDrop::new(new_table), |new_table| { - new_table.free_buckets(); - }); - - guard.clone_from_spec(self); - - // Disarm the scope guard and return the newly created table. - ManuallyDrop::into_inner(ScopeGuard::into_inner(guard)) + // Cloning elements may fail (the clone function may panic). But we don't + // need to worry about uninitialized control bits, since: + // 1. The number of items (elements) in the table is zero, which means that + // the control bits will not be readed by Drop function. + // 2. The `clone_from_spec` method will first copy all control bits from + // `self` (thus initializing them). But this will not affect the `Drop` + // function, since the `clone_from_spec` function sets `items` only after + // successfully clonning all elements. + new_table.clone_from_spec(self); + new_table } } } fn clone_from(&mut self, source: &Self) { if source.table.is_empty_singleton() { - *self = Self::new_in(self.table.alloc.clone()); + let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } } else { unsafe { // Make sure that if any panics occurs, we clear the table and @@ -1644,27 +3498,38 @@ impl Clone for RawTable { // // This leak is unavoidable: we can't try dropping more elements // since this could lead to another panic and abort the process. - self_.drop_elements(); + // + // SAFETY: If something gets wrong we clear our table right after + // dropping the elements, so there is no double drop, since `items` + // will be equal to zero. + self_.table.drop_elements::(); // If necessary, resize our table to match the source. if self_.buckets() != source.buckets() { - // Skip our drop by using ptr::write. - if !self_.table.is_empty_singleton() { - self_.free_buckets(); + let new_inner = match RawTableInner::new_uninitialized( + &self_.alloc, + Self::TABLE_LAYOUT, + source.buckets(), + Fallibility::Infallible, + ) { + Ok(table) => table, + Err(_) => hint::unreachable_unchecked(), + }; + // Replace the old inner with new uninitialized one. It's ok, since if something gets + // wrong `ScopeGuard` will initialize all control bytes and leave empty table. + let mut old_inner = mem::replace(&mut self_.table, new_inner); + if !old_inner.is_empty_singleton() { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. We know for sure that `alloc` and `table_layout` matches + // the [`Allocator`] and [`TableLayout`] that were used to allocate this table. + old_inner.free_buckets(&self_.alloc, Self::TABLE_LAYOUT); } - (&mut **self_ as *mut Self).write( - // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. - match Self::new_uninitialized( - self_.table.alloc.clone(), - source.buckets(), - Fallibility::Infallible, - ) { - Ok(table) => table, - Err(_) => hint::unreachable_unchecked(), - }, - ); } + // Cloning elements may fail (the clone function may panic), but the `ScopeGuard` + // inside the `clone_from_impl` function will take care of that, dropping all + // cloned elements if necessary. Our `ScopeGuard` will clear the table. self_.clone_from_spec(source); // Disarm the scope guard if cloning was successful. @@ -1696,7 +3561,8 @@ impl RawTableClone for RawTable { .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); source .data_start() - .copy_to_nonoverlapping(self.data_start(), self.table.buckets()); + .as_ptr() + .copy_to_nonoverlapping(self.data_start().as_ptr(), self.table.buckets()); self.table.items = source.table.items; self.table.growth_left = source.table.growth_left; @@ -1720,9 +3586,9 @@ impl RawTable { // to make sure we drop only the elements that have been // cloned so far. let mut guard = guard((0, &mut *self), |(index, self_)| { - if mem::needs_drop::() && !self_.is_empty() { + if T::NEEDS_DROP { for i in 0..=*index { - if is_full(*self_.table.ctrl(i)) { + if self_.is_bucket_full(i) { self_.bucket(i).drop(); } } @@ -1757,7 +3623,7 @@ impl RawTable { { self.clear(); - let guard_self = guard(&mut *self, |self_| { + let mut guard_self = guard(&mut *self, |self_| { // Clear the partially copied table if a panic occurs, otherwise // items and growth_left will be out of sync with the contents // of the table. @@ -1790,7 +3656,7 @@ impl RawTable { } } -impl Default for RawTable { +impl Default for RawTable { #[inline] fn default() -> Self { Self::new_in(Default::default()) @@ -1798,31 +3664,41 @@ impl Default for RawTable { } #[cfg(feature = "nightly")] -unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawTable { +unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { - if !self.table.is_empty_singleton() { - unsafe { - self.drop_elements(); - self.free_buckets(); - } + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If the drop function of any elements fails, then only a memory leak will occur, + // and we don't care because we are inside the `Drop` function of the `RawTable`, + // so there won't be any table left in an inconsistent state. + self.table + .drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); } } } #[cfg(not(feature = "nightly"))] -impl Drop for RawTable { +impl Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { - if !self.table.is_empty_singleton() { - unsafe { - self.drop_elements(); - self.free_buckets(); - } + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If the drop function of any elements fails, then only a memory leak will occur, + // and we don't care because we are inside the `Drop` function of the `RawTable`, + // so there won't be any table left in an inconsistent state. + self.table + .drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); } } } -impl IntoIterator for RawTable { +impl IntoIterator for RawTable { type Item = T; type IntoIter = RawIntoIter; @@ -1840,7 +3716,7 @@ impl IntoIterator for RawTable { pub(crate) struct RawIterRange { // Mask of full buckets in the current group. Bits are cleared from this // mask as each element is processed. - current_group: BitMask, + current_group: BitMaskIter, // Pointer to the buckets for the current group. data: Bucket, @@ -1856,19 +3732,44 @@ pub(crate) struct RawIterRange { impl RawIterRange { /// Returns a `RawIterRange` covering a subset of a table. /// - /// The control byte address must be aligned to the group size. + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * `ctrl` must be [valid] for reads, i.e. table outlives the `RawIterRange`; + /// + /// * `ctrl` must be properly aligned to the group size (Group::WIDTH); + /// + /// * `ctrl` must point to the array of properly initialized control bytes; + /// + /// * `data` must be the [`Bucket`] at the `ctrl` index in the table; + /// + /// * the value of `len` must be less than or equal to the number of table buckets, + /// and the returned value of `ctrl.as_ptr().add(len).offset_from(ctrl.as_ptr())` + /// must be positive. + /// + /// * The `ctrl.add(len)` pointer must be either in bounds or one + /// byte past the end of the same [allocated table]. + /// + /// * The `len` must be a power of two. + /// + /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { debug_assert_ne!(len, 0); debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); + // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`] let end = ctrl.add(len); // Load the first group and advance ctrl to point to the next group + // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`] let current_group = Group::load_aligned(ctrl).match_full(); let next_ctrl = ctrl.add(Group::WIDTH); Self { - current_group, + current_group: current_group.into_iter(), data, next_ctrl, end, @@ -1925,8 +3826,7 @@ impl RawIterRange { #[cfg_attr(feature = "inline-more", inline)] unsafe fn next_impl(&mut self) -> Option> { loop { - if let Some(index) = self.current_group.lowest_set_bit() { - self.current_group = self.current_group.remove_lowest_bit(); + if let Some(index) = self.current_group.next() { return Some(self.data.next_n(index)); } @@ -1939,7 +3839,86 @@ impl RawIterRange { // than the group size where the trailing control bytes are all // EMPTY. On larger tables self.end is guaranteed to be aligned // to the group size (since tables are power-of-two sized). - self.current_group = Group::load_aligned(self.next_ctrl).match_full(); + self.current_group = Group::load_aligned(self.next_ctrl).match_full().into_iter(); + self.data = self.data.next_n(Group::WIDTH); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); + } + } + + /// Folds every element into an accumulator by applying an operation, + /// returning the final result. + /// + /// `fold_impl()` takes three arguments: the number of items remaining in + /// the iterator, an initial value, and a closure with two arguments: an + /// 'accumulator', and an element. The closure returns the value that the + /// accumulator should have for the next iteration. + /// + /// The initial value is the value the accumulator will have on the first call. + /// + /// After applying this closure to every element of the iterator, `fold_impl()` + /// returns the accumulator. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved, + /// i.e. table outlives the `RawIterRange`; + /// + /// * The provided `n` value must match the actual number of items + /// in the table. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[allow(clippy::while_let_on_iterator)] + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn fold_impl(mut self, mut n: usize, mut acc: B, mut f: F) -> B + where + F: FnMut(B, Bucket) -> B, + { + loop { + while let Some(index) = self.current_group.next() { + // The returned `index` will always be in the range `0..Group::WIDTH`, + // so that calling `self.data.next_n(index)` is safe (see detailed explanation below). + debug_assert!(n != 0); + let bucket = self.data.next_n(index); + acc = f(acc, bucket); + n -= 1; + } + + if n == 0 { + return acc; + } + + // SAFETY: The caller of this function ensures that: + // + // 1. The provided `n` value matches the actual number of items in the table; + // 2. The table is alive and did not moved. + // + // Taking the above into account, we always stay within the bounds, because: + // + // 1. For tables smaller than the group width (self.buckets() <= Group::WIDTH), + // we will never end up in the given branch, since we should have already + // yielded all the elements of the table. + // + // 2. For tables larger than the group width. The the number of buckets is a + // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Sinse + // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the + // the start of the array of control bytes, and never try to iterate after + // getting all the elements, the last `self.current_group` will read bytes + // from the `self.buckets() - Group::WIDTH` index. We know also that + // `self.current_group.next()` will always retun indices within the range + // `0..Group::WIDTH`. + // + // Knowing all of the above and taking into account that we are synchronizing + // the `self.data` index with the index we used to read the `self.current_group`, + // the subsequent `self.data.next_n(index)` will always return a bucket with + // an index number less than `self.buckets()`. + // + // The last `self.next_ctrl`, whose index would be `self.buckets()`, will never + // actually be read, since we should have already yielded all the elements of + // the table. + self.current_group = Group::load_aligned(self.next_ctrl).match_full().into_iter(); self.data = self.data.next_n(Group::WIDTH); self.next_ctrl = self.next_ctrl.add(Group::WIDTH); } @@ -2016,7 +3995,7 @@ impl RawIter { /// This method should be called _before_ the removal is made. It is not necessary to call this /// method if you are removing an item that this iterator yielded in the past. #[cfg(feature = "raw")] - pub fn reflect_remove(&mut self, b: &Bucket) { + pub unsafe fn reflect_remove(&mut self, b: &Bucket) { self.reflect_toggle_full(b, false); } @@ -2030,36 +4009,76 @@ impl RawIter { /// /// This method should be called _after_ the given insert is made. #[cfg(feature = "raw")] - pub fn reflect_insert(&mut self, b: &Bucket) { + pub unsafe fn reflect_insert(&mut self, b: &Bucket) { self.reflect_toggle_full(b, true); } /// Refresh the iterator so that it reflects a change to the state of the given bucket. #[cfg(feature = "raw")] - fn reflect_toggle_full(&mut self, b: &Bucket, is_insert: bool) { - unsafe { - if b.as_ptr() > self.iter.data.as_ptr() { - // The iterator has already passed the bucket's group. - // So the toggle isn't relevant to this iterator. - return; + unsafe fn reflect_toggle_full(&mut self, b: &Bucket, is_insert: bool) { + if b.as_ptr() > self.iter.data.as_ptr() { + // The iterator has already passed the bucket's group. + // So the toggle isn't relevant to this iterator. + return; + } + + if self.iter.next_ctrl < self.iter.end + && b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr() + { + // The iterator has not yet reached the bucket's group. + // We don't need to reload anything, but we do need to adjust the item count. + + if cfg!(debug_assertions) { + // Double-check that the user isn't lying to us by checking the bucket state. + // To do that, we need to find its control byte. We know that self.iter.data is + // at self.iter.next_ctrl - Group::WIDTH, so we work from there: + let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr()); + let ctrl = self.iter.next_ctrl.sub(Group::WIDTH).add(offset); + // This method should be called _before_ a removal, or _after_ an insert, + // so in both cases the ctrl byte should indicate that the bucket is full. + assert!(is_full(*ctrl)); } - if self.iter.next_ctrl < self.iter.end - && b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr() - { - // The iterator has not yet reached the bucket's group. - // We don't need to reload anything, but we do need to adjust the item count. + if is_insert { + self.items += 1; + } else { + self.items -= 1; + } - if cfg!(debug_assertions) { - // Double-check that the user isn't lying to us by checking the bucket state. - // To do that, we need to find its control byte. We know that self.iter.data is - // at self.iter.next_ctrl - Group::WIDTH, so we work from there: - let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let ctrl = self.iter.next_ctrl.sub(Group::WIDTH).add(offset); - // This method should be called _before_ a removal, or _after_ an insert, - // so in both cases the ctrl byte should indicate that the bucket is full. - assert!(is_full(*ctrl)); - } + return; + } + + // The iterator is at the bucket group that the toggled bucket is in. + // We need to do two things: + // + // - Determine if the iterator already yielded the toggled bucket. + // If it did, we're done. + // - Otherwise, update the iterator cached group so that it won't + // yield a to-be-removed bucket, or _will_ yield a to-be-added bucket. + // We'll also need to update the item count accordingly. + if let Some(index) = self.iter.current_group.0.lowest_set_bit() { + let next_bucket = self.iter.data.next_n(index); + if b.as_ptr() > next_bucket.as_ptr() { + // The toggled bucket is "before" the bucket the iterator would yield next. We + // therefore don't need to do anything --- the iterator has already passed the + // bucket in question. + // + // The item count must already be correct, since a removal or insert "prior" to + // the iterator's position wouldn't affect the item count. + } else { + // The removed bucket is an upcoming bucket. We need to make sure it does _not_ + // get yielded, and also that it's no longer included in the item count. + // + // NOTE: We can't just reload the group here, both since that might reflect + // inserts we've already passed, and because that might inadvertently unset the + // bits for _other_ removals. If we do that, we'd have to also decrement the + // item count for those other bits that we unset. But the presumably subsequent + // call to reflect for those buckets might _also_ decrement the item count. + // Instead, we _just_ flip the bit for the particular bucket the caller asked + // us to reflect. + let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr()); + let was_full = self.iter.current_group.flip(our_bit); + debug_assert_ne!(was_full, is_insert); if is_insert { self.items += 1; @@ -2067,65 +4086,23 @@ impl RawIter { self.items -= 1; } - return; - } - - // The iterator is at the bucket group that the toggled bucket is in. - // We need to do two things: - // - // - Determine if the iterator already yielded the toggled bucket. - // If it did, we're done. - // - Otherwise, update the iterator cached group so that it won't - // yield a to-be-removed bucket, or _will_ yield a to-be-added bucket. - // We'll also need to update the item count accordingly. - if let Some(index) = self.iter.current_group.lowest_set_bit() { - let next_bucket = self.iter.data.next_n(index); - if b.as_ptr() > next_bucket.as_ptr() { - // The toggled bucket is "before" the bucket the iterator would yield next. We - // therefore don't need to do anything --- the iterator has already passed the - // bucket in question. - // - // The item count must already be correct, since a removal or insert "prior" to - // the iterator's position wouldn't affect the item count. - } else { - // The removed bucket is an upcoming bucket. We need to make sure it does _not_ - // get yielded, and also that it's no longer included in the item count. - // - // NOTE: We can't just reload the group here, both since that might reflect - // inserts we've already passed, and because that might inadvertently unset the - // bits for _other_ removals. If we do that, we'd have to also decrement the - // item count for those other bits that we unset. But the presumably subsequent - // call to reflect for those buckets might _also_ decrement the item count. - // Instead, we _just_ flip the bit for the particular bucket the caller asked - // us to reflect. - let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let was_full = self.iter.current_group.flip(our_bit); - debug_assert_ne!(was_full, is_insert); - - if is_insert { - self.items += 1; + if cfg!(debug_assertions) { + if b.as_ptr() == next_bucket.as_ptr() { + // The removed bucket should no longer be next + debug_assert_ne!(self.iter.current_group.0.lowest_set_bit(), Some(index)); } else { - self.items -= 1; - } - - if cfg!(debug_assertions) { - if b.as_ptr() == next_bucket.as_ptr() { - // The removed bucket should no longer be next - debug_assert_ne!(self.iter.current_group.lowest_set_bit(), Some(index)); - } else { - // We should not have changed what bucket comes next. - debug_assert_eq!(self.iter.current_group.lowest_set_bit(), Some(index)); - } + // We should not have changed what bucket comes next. + debug_assert_eq!(self.iter.current_group.0.lowest_set_bit(), Some(index)); } } - } else { - // We must have already iterated past the removed item. } + } else { + // We must have already iterated past the removed item. } } unsafe fn drop_elements(&mut self) { - if mem::needs_drop::() && self.len() != 0 { + if T::NEEDS_DROP && self.items != 0 { for item in self { item.drop(); } @@ -2159,9 +4136,8 @@ impl Iterator for RawIter { self.iter.next_impl::() }; - if nxt.is_some() { - self.items -= 1; - } + debug_assert!(nxt.is_some()); + self.items -= 1; nxt } @@ -2170,33 +4146,160 @@ impl Iterator for RawIter { fn size_hint(&self) -> (usize, Option) { (self.items, Some(self.items)) } + + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + unsafe { self.iter.fold_impl(self.items, init, f) } + } } impl ExactSizeIterator for RawIter {} impl FusedIterator for RawIter {} +/// Iterator which returns an index of every full bucket in the table. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding index of that bucket. +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator. +/// - The order in which the iterator yields indices of the buckets is unspecified +/// and may change in the future. +pub(crate) struct FullBucketsIndices { + // Mask of full buckets in the current group. Bits are cleared from this + // mask as each element is processed. + current_group: BitMaskIter, + + // Initial value of the bytes' indices of the current group (relative + // to the start of the control bytes). + group_first_index: usize, + + // Pointer to the current group of control bytes, + // Must be aligned to the group size (Group::WIDTH). + ctrl: NonNull, + + // Number of elements in the table. + items: usize, +} + +impl FullBucketsIndices { + /// Advances the iterator and returns the next value. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved, + /// i.e. table outlives the `FullBucketsIndices`; + /// + /// * It never tries to iterate after getting all elements. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline(always)] + unsafe fn next_impl(&mut self) -> Option { + loop { + if let Some(index) = self.current_group.next() { + // The returned `self.group_first_index + index` will always + // be in the range `0..self.buckets()`. See explanation below. + return Some(self.group_first_index + index); + } + + // SAFETY: The caller of this function ensures that: + // + // 1. It never tries to iterate after getting all the elements; + // 2. The table is alive and did not moved; + // 3. The first `self.ctrl` pointed to the start of the array of control bytes. + // + // Taking the above into account, we always stay within the bounds, because: + // + // 1. For tables smaller than the group width (self.buckets() <= Group::WIDTH), + // we will never end up in the given branch, since we should have already + // yielded all the elements of the table. + // + // 2. For tables larger than the group width. The the number of buckets is a + // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Sinse + // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the + // the start of the array of control bytes, and never try to iterate after + // getting all the elements, the last `self.ctrl` will be equal to + // the `self.buckets() - Group::WIDTH`, so `self.current_group.next()` + // will always contains indices within the range `0..Group::WIDTH`, + // and subsequent `self.group_first_index + index` will always return a + // number less than `self.buckets()`. + self.ctrl = NonNull::new_unchecked(self.ctrl.as_ptr().add(Group::WIDTH)); + + // SAFETY: See explanation above. + self.current_group = Group::load_aligned(self.ctrl.as_ptr()) + .match_full() + .into_iter(); + self.group_first_index += Group::WIDTH; + } + } +} + +impl Iterator for FullBucketsIndices { + type Item = usize; + + /// Advances the iterator and returns the next value. It is up to + /// the caller to ensure that the `RawTable` outlives the `FullBucketsIndices`, + /// because we cannot make the `next` method unsafe. + #[inline(always)] + fn next(&mut self) -> Option { + // Return if we already yielded all items. + if self.items == 0 { + return None; + } + + let nxt = unsafe { + // SAFETY: + // 1. We check number of items to yield using `items` field. + // 2. The caller ensures that the table is alive and has not moved. + self.next_impl() + }; + + debug_assert!(nxt.is_some()); + self.items -= 1; + + nxt + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + (self.items, Some(self.items)) + } +} + +impl ExactSizeIterator for FullBucketsIndices {} +impl FusedIterator for FullBucketsIndices {} + /// Iterator which consumes a table and returns elements. -pub struct RawIntoIter { +pub struct RawIntoIter { iter: RawIter, - allocation: Option<(NonNull, Layout)>, + allocation: Option<(NonNull, Layout, A)>, marker: PhantomData, - alloc: A, } -impl RawIntoIter { +impl RawIntoIter { #[cfg_attr(feature = "inline-more", inline)] pub fn iter(&self) -> RawIter { self.iter.clone() } } -unsafe impl Send for RawIntoIter +unsafe impl Send for RawIntoIter where T: Send, A: Send, { } -unsafe impl Sync for RawIntoIter +unsafe impl Sync for RawIntoIter where T: Sync, A: Sync, @@ -2204,7 +4307,7 @@ where } #[cfg(feature = "nightly")] -unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawIntoIter { +unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawIntoIter { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { unsafe { @@ -2212,14 +4315,14 @@ unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawIntoIter { self.iter.drop_elements(); // Free the table - if let Some((ptr, layout)) = self.allocation { - self.alloc.deallocate(ptr, layout); + if let Some((ptr, layout, ref alloc)) = self.allocation { + alloc.deallocate(ptr, layout); } } } } #[cfg(not(feature = "nightly"))] -impl Drop for RawIntoIter { +impl Drop for RawIntoIter { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { unsafe { @@ -2227,14 +4330,14 @@ impl Drop for RawIntoIter { self.iter.drop_elements(); // Free the table - if let Some((ptr, layout)) = self.allocation { - self.alloc.deallocate(ptr, layout); + if let Some((ptr, layout, ref alloc)) = self.allocation { + alloc.deallocate(ptr, layout); } } } } -impl Iterator for RawIntoIter { +impl Iterator for RawIntoIter { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -2248,45 +4351,45 @@ impl Iterator for RawIntoIter { } } -impl ExactSizeIterator for RawIntoIter {} -impl FusedIterator for RawIntoIter {} +impl ExactSizeIterator for RawIntoIter {} +impl FusedIterator for RawIntoIter {} /// Iterator which consumes elements without freeing the table storage. -pub struct RawDrain<'a, T, A: Allocator + Clone = Global> { +pub struct RawDrain<'a, T, A: Allocator = Global> { iter: RawIter, // The table is moved into the iterator for the duration of the drain. This // ensures that an empty table is left if the drain iterator is leaked // without dropping. - table: ManuallyDrop>, - orig_table: NonNull>, + table: RawTableInner, + orig_table: NonNull, // We don't use a &'a mut RawTable because we want RawDrain to be // covariant over T. marker: PhantomData<&'a RawTable>, } -impl RawDrain<'_, T, A> { +impl RawDrain<'_, T, A> { #[cfg_attr(feature = "inline-more", inline)] pub fn iter(&self) -> RawIter { self.iter.clone() } } -unsafe impl Send for RawDrain<'_, T, A> +unsafe impl Send for RawDrain<'_, T, A> where T: Send, A: Send, { } -unsafe impl Sync for RawDrain<'_, T, A> +unsafe impl Sync for RawDrain<'_, T, A> where T: Sync, A: Sync, { } -impl Drop for RawDrain<'_, T, A> { +impl Drop for RawDrain<'_, T, A> { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { unsafe { @@ -2300,12 +4403,12 @@ impl Drop for RawDrain<'_, T, A> { // Move the now empty table back to its original location. self.orig_table .as_ptr() - .copy_from_nonoverlapping(&*self.table, 1); + .copy_from_nonoverlapping(&self.table, 1); } } } -impl Iterator for RawDrain<'_, T, A> { +impl Iterator for RawDrain<'_, T, A> { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -2322,21 +4425,36 @@ impl Iterator for RawDrain<'_, T, A> { } } -impl ExactSizeIterator for RawDrain<'_, T, A> {} -impl FusedIterator for RawDrain<'_, T, A> {} +impl ExactSizeIterator for RawDrain<'_, T, A> {} +impl FusedIterator for RawDrain<'_, T, A> {} /// Iterator over occupied buckets that could match a given hash. /// /// `RawTable` only stores 7 bits of the hash value, so this iterator may return /// items that have a hash value different than the one provided. You should /// always validate the returned values before using them. -pub struct RawIterHash<'a, T, A: Allocator + Clone = Global> { - inner: RawIterHashInner<'a, A>, +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding that bucket. +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator. +/// - The order in which the iterator yields buckets is unspecified and may +/// change in the future. +pub struct RawIterHash { + inner: RawIterHashInner, _marker: PhantomData, } -struct RawIterHashInner<'a, A: Allocator + Clone> { - table: &'a RawTableInner, +struct RawIterHashInner { + // See `RawTableInner`'s corresponding fields for details. + // We can't store a `*const RawTableInner` as it would get + // invalidated by the user calling `&mut` methods on `RawTable`. + bucket_mask: usize, + ctrl: NonNull, // The top 7 bits of the hash. h2_hash: u8, @@ -2350,71 +4468,105 @@ struct RawIterHashInner<'a, A: Allocator + Clone> { bitmask: BitMaskIter, } -impl<'a, T, A: Allocator + Clone> RawIterHash<'a, T, A> { +impl RawIterHash { #[cfg_attr(feature = "inline-more", inline)] #[cfg(feature = "raw")] - fn new(table: &'a RawTable, hash: u64) -> Self { + unsafe fn new(table: &RawTable, hash: u64) -> Self { RawIterHash { inner: RawIterHashInner::new(&table.table, hash), _marker: PhantomData, } } } -impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { +impl RawIterHashInner { #[cfg_attr(feature = "inline-more", inline)] #[cfg(feature = "raw")] - fn new(table: &'a RawTableInner, hash: u64) -> Self { - unsafe { - let h2_hash = h2(hash); - let probe_seq = table.probe_seq(hash); - let group = Group::load(table.ctrl(probe_seq.pos)); - let bitmask = group.match_byte(h2_hash).into_iter(); - - RawIterHashInner { - table, - h2_hash, - probe_seq, - group, - bitmask, - } + unsafe fn new(table: &RawTableInner, hash: u64) -> Self { + let h2_hash = h2(hash); + let probe_seq = table.probe_seq(hash); + let group = Group::load(table.ctrl(probe_seq.pos)); + let bitmask = group.match_byte(h2_hash).into_iter(); + + RawIterHashInner { + bucket_mask: table.bucket_mask, + ctrl: table.ctrl, + h2_hash, + probe_seq, + group, + bitmask, } } } -impl<'a, T, A: Allocator + Clone> Iterator for RawIterHash<'a, T, A> { +impl Iterator for RawIterHash { type Item = Bucket; fn next(&mut self) -> Option> { unsafe { match self.inner.next() { - Some(index) => Some(self.inner.table.bucket(index)), + Some(index) => { + // Can't use `RawTable::bucket` here as we don't have + // an actual `RawTable` reference to use. + debug_assert!(index <= self.inner.bucket_mask); + let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index); + Some(bucket) + } None => None, } } } } -impl<'a, A: Allocator + Clone> Iterator for RawIterHashInner<'a, A> { +impl Iterator for RawIterHashInner { type Item = usize; fn next(&mut self) -> Option { unsafe { loop { if let Some(bit) = self.bitmask.next() { - let index = (self.probe_seq.pos + bit) & self.table.bucket_mask; + let index = (self.probe_seq.pos + bit) & self.bucket_mask; return Some(index); } if likely(self.group.match_empty().any_bit_set()) { return None; } - self.probe_seq.move_next(self.table.bucket_mask); - self.group = Group::load(self.table.ctrl(self.probe_seq.pos)); + self.probe_seq.move_next(self.bucket_mask); + + // Can't use `RawTableInner::ctrl` here as we don't have + // an actual `RawTableInner` reference to use. + let index = self.probe_seq.pos; + debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH); + let group_ctrl = self.ctrl.as_ptr().add(index); + + self.group = Group::load(group_ctrl); self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); } } } } +pub(crate) struct RawExtractIf<'a, T, A: Allocator> { + pub iter: RawIter, + pub table: &'a mut RawTable, +} + +impl RawExtractIf<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + pub(crate) fn next(&mut self, mut f: F) -> Option + where + F: FnMut(&mut T) -> bool, + { + unsafe { + for item in &mut self.iter { + if f(item.as_mut()) { + return Some(self.table.remove(item).0); + } + } + } + None + } +} + #[cfg(test)] mod test_map { use super::*; @@ -2457,4 +4609,214 @@ mod test_map { assert!(table.find(i + 100, |x| *x == i + 100).is_none()); } } + + /// CHECKING THAT WE ARE NOT TRYING TO READ THE MEMORY OF + /// AN UNINITIALIZED TABLE DURING THE DROP + #[test] + fn test_drop_uninitialized() { + use ::alloc::vec::Vec; + + let table = unsafe { + // SAFETY: The `buckets` is power of two and we're not + // trying to actually use the returned RawTable. + RawTable::<(u64, Vec)>::new_uninitialized(Global, 8, Fallibility::Infallible) + .unwrap() + }; + drop(table); + } + + /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS` + /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES. + #[test] + fn test_drop_zero_items() { + use ::alloc::vec::Vec; + unsafe { + // SAFETY: The `buckets` is power of two and we're not + // trying to actually use the returned RawTable. + let table = + RawTable::<(u64, Vec)>::new_uninitialized(Global, 8, Fallibility::Infallible) + .unwrap(); + + // WE SIMULATE, AS IT WERE, A FULL TABLE. + + // SAFETY: We checked that the table is allocated and therefore the table already has + // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) + // so writing `table.table.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. + table + .table + .ctrl(0) + .write_bytes(EMPTY, table.table.num_ctrl_bytes()); + + // SAFETY: table.capacity() is guaranteed to be smaller than table.buckets() + table.table.ctrl(0).write_bytes(0, table.capacity()); + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if table.buckets() < Group::WIDTH { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes, + // so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to + // `Group::WIDTH` is safe + table + .table + .ctrl(0) + .copy_to(table.table.ctrl(Group::WIDTH), table.table.buckets()); + } else { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of + // control bytes,so copying `Group::WIDTH` bytes with offset equal + // to `self.buckets() == self.bucket_mask + 1` is safe + table + .table + .ctrl(0) + .copy_to(table.table.ctrl(table.table.buckets()), Group::WIDTH); + } + drop(table); + } + } + + /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS` + /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES. + #[test] + fn test_catch_panic_clone_from() { + use ::alloc::sync::Arc; + use ::alloc::vec::Vec; + use allocator_api2::alloc::{AllocError, Allocator, Global}; + use core::sync::atomic::{AtomicI8, Ordering}; + use std::thread; + + struct MyAllocInner { + drop_count: Arc, + } + + #[derive(Clone)] + struct MyAlloc { + _inner: Arc, + } + + impl Drop for MyAllocInner { + fn drop(&mut self) { + println!("MyAlloc freed."); + self.drop_count.fetch_sub(1, Ordering::SeqCst); + } + } + + unsafe impl Allocator for MyAlloc { + fn allocate(&self, layout: Layout) -> std::result::Result, AllocError> { + let g = Global; + g.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + let g = Global; + g.deallocate(ptr, layout) + } + } + + const DISARMED: bool = false; + const ARMED: bool = true; + + struct CheckedCloneDrop { + panic_in_clone: bool, + dropped: bool, + need_drop: Vec, + } + + impl Clone for CheckedCloneDrop { + fn clone(&self) -> Self { + if self.panic_in_clone { + panic!("panic in clone") + } + Self { + panic_in_clone: self.panic_in_clone, + dropped: self.dropped, + need_drop: self.need_drop.clone(), + } + } + } + + impl Drop for CheckedCloneDrop { + fn drop(&mut self) { + if self.dropped { + panic!("double drop"); + } + self.dropped = true; + } + } + + let dropped: Arc = Arc::new(AtomicI8::new(2)); + + let mut table = RawTable::new_in(MyAlloc { + _inner: Arc::new(MyAllocInner { + drop_count: dropped.clone(), + }), + }); + + for (idx, panic_in_clone) in core::iter::repeat(DISARMED).take(7).enumerate() { + let idx = idx as u64; + table.insert( + idx, + ( + idx, + CheckedCloneDrop { + panic_in_clone, + dropped: false, + need_drop: vec![idx], + }, + ), + |(k, _)| *k, + ); + } + + assert_eq!(table.len(), 7); + + thread::scope(|s| { + let result = s.spawn(|| { + let armed_flags = [ + DISARMED, DISARMED, ARMED, DISARMED, DISARMED, DISARMED, DISARMED, + ]; + let mut scope_table = RawTable::new_in(MyAlloc { + _inner: Arc::new(MyAllocInner { + drop_count: dropped.clone(), + }), + }); + for (idx, &panic_in_clone) in armed_flags.iter().enumerate() { + let idx = idx as u64; + scope_table.insert( + idx, + ( + idx, + CheckedCloneDrop { + panic_in_clone, + dropped: false, + need_drop: vec![idx + 100], + }, + ), + |(k, _)| *k, + ); + } + table.clone_from(&scope_table); + }); + assert!(result.join().is_err()); + }); + + // Let's check that all iterators work fine and do not return elements + // (especially `RawIterRange`, which does not depend on the number of + // elements in the table, but looks directly at the control bytes) + // + // SAFETY: We know for sure that `RawTable` will outlive + // the returned `RawIter / RawIterRange` iterator. + assert_eq!(table.len(), 0); + assert_eq!(unsafe { table.iter().count() }, 0); + assert_eq!(unsafe { table.iter().iter.count() }, 0); + + for idx in 0..table.buckets() { + let idx = idx as u64; + assert!( + table.find(idx, |(k, _)| *k == idx).is_none(), + "Index: {idx}" + ); + } + + // All allocator clones should already be dropped. + assert_eq!(dropped.load(Ordering::SeqCst), 1); + } } diff --git a/vendor/hashbrown/src/raw/neon.rs b/vendor/hashbrown/src/raw/neon.rs new file mode 100644 index 0000000..44e82d5 --- /dev/null +++ b/vendor/hashbrown/src/raw/neon.rs @@ -0,0 +1,124 @@ +use super::bitmask::BitMask; +use super::EMPTY; +use core::arch::aarch64 as neon; +use core::mem; +use core::num::NonZeroU64; + +pub(crate) type BitMaskWord = u64; +pub(crate) type NonZeroBitMaskWord = NonZeroU64; +pub(crate) const BITMASK_STRIDE: usize = 8; +pub(crate) const BITMASK_MASK: BitMaskWord = !0; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080; + +/// Abstraction over a group of control bytes which can be scanned in +/// parallel. +/// +/// This implementation uses a 64-bit NEON value. +#[derive(Copy, Clone)] +pub(crate) struct Group(neon::uint8x8_t); + +#[allow(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty bytes, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] { + #[repr(C)] + struct AlignedBytes { + _align: [Group; 0], + bytes: [u8; Group::WIDTH], + } + const ALIGNED_BYTES: AlignedBytes = AlignedBytes { + _align: [], + bytes: [EMPTY; Group::WIDTH], + }; + &ALIGNED_BYTES.bytes + } + + /// Loads a group of bytes starting at the given address. + #[inline] + #[allow(clippy::cast_ptr_alignment)] // unaligned load + pub(crate) unsafe fn load(ptr: *const u8) -> Self { + Group(neon::vld1_u8(ptr)) + } + + /// Loads a group of bytes starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + #[allow(clippy::cast_ptr_alignment)] + pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self { + // FIXME: use align_offset once it stabilizes + debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); + Group(neon::vld1_u8(ptr)) + } + + /// Stores the group of bytes to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + #[allow(clippy::cast_ptr_alignment)] + pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) { + // FIXME: use align_offset once it stabilizes + debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); + neon::vst1_u8(ptr, self.0); + } + + /// Returns a `BitMask` indicating all bytes in the group which *may* + /// have the given value. + #[inline] + pub(crate) fn match_byte(self, byte: u8) -> BitMask { + unsafe { + let cmp = neon::vceq_u8(self.0, neon::vdup_n_u8(byte)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Returns a `BitMask` indicating all bytes in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + self.match_byte(EMPTY) + } + + /// Returns a `BitMask` indicating all bytes in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + unsafe { + let cmp = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Returns a `BitMask` indicating all bytes in the group which are full. + #[inline] + pub(crate) fn match_full(self) -> BitMask { + unsafe { + let cmp = neon::vcgez_s8(neon::vreinterpret_s8_u8(self.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Performs the following transformation on all bytes in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + unsafe { + let special = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0)); + Group(neon::vorr_u8(special, neon::vdup_n_u8(0x80))) + } + } +} diff --git a/vendor/hashbrown/src/raw/sse2.rs b/vendor/hashbrown/src/raw/sse2.rs index a0bf6da..956ba5d 100644 --- a/vendor/hashbrown/src/raw/sse2.rs +++ b/vendor/hashbrown/src/raw/sse2.rs @@ -1,28 +1,31 @@ use super::bitmask::BitMask; use super::EMPTY; use core::mem; +use core::num::NonZeroU16; #[cfg(target_arch = "x86")] use core::arch::x86; #[cfg(target_arch = "x86_64")] use core::arch::x86_64 as x86; -pub type BitMaskWord = u16; -pub const BITMASK_STRIDE: usize = 1; -pub const BITMASK_MASK: BitMaskWord = 0xffff; +pub(crate) type BitMaskWord = u16; +pub(crate) type NonZeroBitMaskWord = NonZeroU16; +pub(crate) const BITMASK_STRIDE: usize = 1; +pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; /// Abstraction over a group of control bytes which can be scanned in /// parallel. /// /// This implementation uses a 128-bit SSE value. #[derive(Copy, Clone)] -pub struct Group(x86::__m128i); +pub(crate) struct Group(x86::__m128i); // FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 #[allow(clippy::use_self)] impl Group { /// Number of bytes in the group. - pub const WIDTH: usize = mem::size_of::(); + pub(crate) const WIDTH: usize = mem::size_of::(); /// Returns a full group of empty bytes, suitable for use as the initial /// value for an empty hash table. @@ -30,7 +33,7 @@ impl Group { /// This is guaranteed to be aligned to the group size. #[inline] #[allow(clippy::items_after_statements)] - pub const fn static_empty() -> &'static [u8; Group::WIDTH] { + pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] { #[repr(C)] struct AlignedBytes { _align: [Group; 0], @@ -46,7 +49,7 @@ impl Group { /// Loads a group of bytes starting at the given address. #[inline] #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub unsafe fn load(ptr: *const u8) -> Self { + pub(crate) unsafe fn load(ptr: *const u8) -> Self { Group(x86::_mm_loadu_si128(ptr.cast())) } @@ -54,7 +57,7 @@ impl Group { /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub unsafe fn load_aligned(ptr: *const u8) -> Self { + pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); Group(x86::_mm_load_si128(ptr.cast())) @@ -64,7 +67,7 @@ impl Group { /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub unsafe fn store_aligned(self, ptr: *mut u8) { + pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); x86::_mm_store_si128(ptr.cast(), self.0); @@ -73,7 +76,7 @@ impl Group { /// Returns a `BitMask` indicating all bytes in the group which have /// the given value. #[inline] - pub fn match_byte(self, byte: u8) -> BitMask { + pub(crate) fn match_byte(self, byte: u8) -> BitMask { #[allow( clippy::cast_possible_wrap, // byte: u8 as i8 // byte: i32 as u16 @@ -91,14 +94,14 @@ impl Group { /// Returns a `BitMask` indicating all bytes in the group which are /// `EMPTY`. #[inline] - pub fn match_empty(self) -> BitMask { + pub(crate) fn match_empty(self) -> BitMask { self.match_byte(EMPTY) } /// Returns a `BitMask` indicating all bytes in the group which are /// `EMPTY` or `DELETED`. #[inline] - pub fn match_empty_or_deleted(self) -> BitMask { + pub(crate) fn match_empty_or_deleted(self) -> BitMask { #[allow( // byte: i32 as u16 // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the @@ -114,7 +117,7 @@ impl Group { /// Returns a `BitMask` indicating all bytes in the group which are full. #[inline] - pub fn match_full(&self) -> BitMask { + pub(crate) fn match_full(&self) -> BitMask { self.match_empty_or_deleted().invert() } @@ -123,7 +126,7 @@ impl Group { /// - `DELETED => EMPTY` /// - `FULL => DELETED` #[inline] - pub fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 // and high_bit = 0 (FULL) to 1000_0000 // diff --git a/vendor/hashbrown/src/rustc_entry.rs b/vendor/hashbrown/src/rustc_entry.rs index 2e84595..defbd4b 100644 --- a/vendor/hashbrown/src/rustc_entry.rs +++ b/vendor/hashbrown/src/rustc_entry.rs @@ -1,5 +1,5 @@ use self::RustcEntry::*; -use crate::map::{make_insert_hash, Drain, HashMap, IntoIter, Iter, IterMut}; +use crate::map::{make_hash, Drain, HashMap, IntoIter, Iter, IterMut}; use crate::raw::{Allocator, Bucket, Global, RawTable}; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; @@ -9,7 +9,7 @@ impl HashMap where K: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { /// Gets the given key's corresponding entry in the map for in-place manipulation. /// @@ -32,7 +32,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn rustc_entry(&mut self, key: K) -> RustcEntry<'_, K, V, A> { - let hash = make_insert_hash(&self.hash_builder, &key); + let hash = make_hash(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, |q| q.0.eq(&key)) { RustcEntry::Occupied(RustcOccupiedEntry { key: Some(key), @@ -62,7 +62,7 @@ where /// [`rustc_entry`]: struct.HashMap.html#method.rustc_entry pub enum RustcEntry<'a, K, V, A = Global> where - A: Allocator + Clone, + A: Allocator, { /// An occupied entry. Occupied(RustcOccupiedEntry<'a, K, V, A>), @@ -71,7 +71,7 @@ where Vacant(RustcVacantEntry<'a, K, V, A>), } -impl Debug for RustcEntry<'_, K, V, A> { +impl Debug for RustcEntry<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Vacant(ref v) => f.debug_tuple("Entry").field(v).finish(), @@ -86,7 +86,7 @@ impl Debug for RustcEntry<'_, K, V, A> /// [`RustcEntry`]: enum.RustcEntry.html pub struct RustcOccupiedEntry<'a, K, V, A = Global> where - A: Allocator + Clone, + A: Allocator, { key: Option, elem: Bucket<(K, V)>, @@ -97,18 +97,18 @@ unsafe impl Send for RustcOccupiedEntry<'_, K, V, A> where K: Send, V: Send, - A: Allocator + Clone + Send, + A: Allocator + Send, { } unsafe impl Sync for RustcOccupiedEntry<'_, K, V, A> where K: Sync, V: Sync, - A: Allocator + Clone + Sync, + A: Allocator + Sync, { } -impl Debug for RustcOccupiedEntry<'_, K, V, A> { +impl Debug for RustcOccupiedEntry<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OccupiedEntry") .field("key", self.key()) @@ -123,20 +123,20 @@ impl Debug for RustcOccupiedEntry<'_, /// [`RustcEntry`]: enum.RustcEntry.html pub struct RustcVacantEntry<'a, K, V, A = Global> where - A: Allocator + Clone, + A: Allocator, { hash: u64, key: K, table: &'a mut RawTable<(K, V), A>, } -impl Debug for RustcVacantEntry<'_, K, V, A> { +impl Debug for RustcVacantEntry<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("VacantEntry").field(self.key()).finish() } } -impl<'a, K, V, A: Allocator + Clone> RustcEntry<'a, K, V, A> { +impl<'a, K, V, A: Allocator> RustcEntry<'a, K, V, A> { /// Sets the value of the entry, and returns a RustcOccupiedEntry. /// /// # Examples @@ -265,7 +265,7 @@ impl<'a, K, V, A: Allocator + Clone> RustcEntry<'a, K, V, A> { } } -impl<'a, K, V: Default, A: Allocator + Clone> RustcEntry<'a, K, V, A> { +impl<'a, K, V: Default, A: Allocator> RustcEntry<'a, K, V, A> { /// Ensures a value is in the entry by inserting the default value if empty, /// and returns a mutable reference to the value in the entry. /// @@ -293,7 +293,7 @@ impl<'a, K, V: Default, A: Allocator + Clone> RustcEntry<'a, K, V, A> { } } -impl<'a, K, V, A: Allocator + Clone> RustcOccupiedEntry<'a, K, V, A> { +impl<'a, K, V, A: Allocator> RustcOccupiedEntry<'a, K, V, A> { /// Gets a reference to the key in the entry. /// /// # Examples @@ -330,7 +330,7 @@ impl<'a, K, V, A: Allocator + Clone> RustcOccupiedEntry<'a, K, V, A> { /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove_entry(self) -> (K, V) { - unsafe { self.table.remove(self.elem) } + unsafe { self.table.remove(self.elem).0 } } /// Gets a reference to the value in the entry. @@ -518,7 +518,7 @@ impl<'a, K, V, A: Allocator + Clone> RustcOccupiedEntry<'a, K, V, A> { } } -impl<'a, K, V, A: Allocator + Clone> RustcVacantEntry<'a, K, V, A> { +impl<'a, K, V, A: Allocator> RustcVacantEntry<'a, K, V, A> { /// Gets a reference to the key that would be used when inserting a value /// through the `RustcVacantEntry`. /// diff --git a/vendor/hashbrown/src/scopeguard.rs b/vendor/hashbrown/src/scopeguard.rs index f85e6ab..382d060 100644 --- a/vendor/hashbrown/src/scopeguard.rs +++ b/vendor/hashbrown/src/scopeguard.rs @@ -1,6 +1,6 @@ // Extracted from the scopeguard crate use core::{ - mem, + mem::ManuallyDrop, ops::{Deref, DerefMut}, ptr, }; @@ -28,15 +28,13 @@ where #[inline] pub fn into_inner(guard: Self) -> T { // Cannot move out of Drop-implementing types, so - // ptr::read the value and forget the guard. + // ptr::read the value out of a ManuallyDrop + // Don't use mem::forget as that might invalidate value + let guard = ManuallyDrop::new(guard); unsafe { let value = ptr::read(&guard.value); - // read the closure so that it is dropped, and assign it to a local - // variable to ensure that it is only dropped after the guard has - // been forgotten. (In case the Drop impl of the closure, or that - // of any consumed captured variable, panics). - let _dropfn = ptr::read(&guard.dropfn); - mem::forget(guard); + // read the closure so that it is dropped + let _ = ptr::read(&guard.dropfn); value } } diff --git a/vendor/hashbrown/src/set.rs b/vendor/hashbrown/src/set.rs index 2a4dcea..fdff46d 100644 --- a/vendor/hashbrown/src/set.rs +++ b/vendor/hashbrown/src/set.rs @@ -1,14 +1,14 @@ -use crate::TryReserveError; +#[cfg(feature = "raw")] +use crate::raw::RawTable; +use crate::{Equivalent, TryReserveError}; use alloc::borrow::ToOwned; -use core::borrow::Borrow; use core::fmt; use core::hash::{BuildHasher, Hash}; use core::iter::{Chain, FromIterator, FusedIterator}; -use core::mem; use core::ops::{BitAnd, BitOr, BitXor, Sub}; -use super::map::{self, ConsumeAllOnDrop, DefaultHashBuilder, DrainFilterInner, HashMap, Keys}; -use crate::raw::{Allocator, Global}; +use super::map::{self, DefaultHashBuilder, HashMap, Keys}; +use crate::raw::{Allocator, Global, RawExtractIf}; // Future Optimization (FIXME!) // ============================= @@ -112,7 +112,7 @@ use crate::raw::{Allocator, Global}; /// [`HashMap`]: struct.HashMap.html /// [`PartialEq`]: https://doc.rust-lang.org/std/cmp/trait.PartialEq.html /// [`RefCell`]: https://doc.rust-lang.org/std/cell/struct.RefCell.html -pub struct HashSet { +pub struct HashSet { pub(crate) map: HashMap, } @@ -135,6 +135,18 @@ impl HashSet { /// The hash set is initially created with a capacity of 0, so it will not allocate until it /// is first inserted into. /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`], for example with + /// [`with_hasher`](HashSet::with_hasher) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// /// # Examples /// /// ``` @@ -153,6 +165,18 @@ impl HashSet { /// The hash set will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash set will not allocate. /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`], for example with + /// [`with_capacity_and_hasher`](HashSet::with_capacity_and_hasher) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// /// # Examples /// /// ``` @@ -169,12 +193,24 @@ impl HashSet { } #[cfg(feature = "ahash")] -impl HashSet { +impl HashSet { /// Creates an empty `HashSet`. /// /// The hash set is initially created with a capacity of 0, so it will not allocate until it /// is first inserted into. /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`], for example with + /// [`with_hasher_in`](HashSet::with_hasher_in) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// /// # Examples /// /// ``` @@ -193,6 +229,18 @@ impl HashSet { /// The hash set will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash set will not allocate. /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`], for example with + /// [`with_capacity_and_hasher_in`](HashSet::with_capacity_and_hasher_in) method. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// /// # Examples /// /// ``` @@ -208,7 +256,7 @@ impl HashSet { } } -impl HashSet { +impl HashSet { /// Returns the number of elements the set can hold without reallocating. /// /// # Examples @@ -331,8 +379,11 @@ impl HashSet { /// In other words, move all elements `e` such that `f(&e)` returns `true` out /// into another iterator. /// - /// When the returned DrainedFilter is dropped, any remaining elements that satisfy - /// the predicate are dropped from the set. + /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating + /// or the iteration short-circuits, then the remaining elements will be retained. + /// Use [`retain()`] with a negated predicate if you do not need the returned iterator. + /// + /// [`retain()`]: HashSet::retain /// /// # Examples /// @@ -340,7 +391,7 @@ impl HashSet { /// use hashbrown::HashSet; /// /// let mut set: HashSet = (0..8).collect(); - /// let drained: HashSet = set.drain_filter(|v| v % 2 == 0).collect(); + /// let drained: HashSet = set.extract_if(|v| v % 2 == 0).collect(); /// /// let mut evens = drained.into_iter().collect::>(); /// let mut odds = set.into_iter().collect::>(); @@ -351,13 +402,13 @@ impl HashSet { /// assert_eq!(odds, vec![1, 3, 5, 7]); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn drain_filter(&mut self, f: F) -> DrainFilter<'_, T, F, A> + pub fn extract_if(&mut self, f: F) -> ExtractIf<'_, T, F, A> where F: FnMut(&T) -> bool, { - DrainFilter { + ExtractIf { f, - inner: DrainFilterInner { + inner: RawExtractIf { iter: unsafe { self.map.table.iter() }, table: &mut self.map.table, }, @@ -386,16 +437,23 @@ impl HashSet { /// Creates a new empty hash set which will use the given hasher to hash /// keys. /// - /// The hash set is also created with the default initial capacity. + /// The hash set is initially created with a capacity of 0, so it will not + /// allocate until it is first inserted into. + /// + /// # HashDoS resistance /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for - /// the HashMap to be useful, see its documentation for details. + /// the HashSet to be useful, see its documentation for details. /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html /// /// # Examples /// @@ -407,8 +465,6 @@ impl HashSet { /// let mut set = HashSet::with_hasher(s); /// set.insert(2); /// ``` - /// - /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html #[cfg_attr(feature = "inline-more", inline)] pub const fn with_hasher(hasher: S) -> Self { Self { @@ -422,13 +478,20 @@ impl HashSet { /// The hash set will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash set will not allocate. /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for - /// the HashMap to be useful, see its documentation for details. + /// the HashSet to be useful, see its documentation for details. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html /// /// # Examples /// @@ -440,8 +503,6 @@ impl HashSet { /// let mut set = HashSet::with_capacity_and_hasher(10, s); /// set.insert(1); /// ``` - /// - /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html #[cfg_attr(feature = "inline-more", inline)] pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self { Self { @@ -452,7 +513,7 @@ impl HashSet { impl HashSet where - A: Allocator + Clone, + A: Allocator, { /// Returns a reference to the underlying allocator. #[inline] @@ -463,12 +524,23 @@ where /// Creates a new empty hash set which will use the given hasher to hash /// keys. /// - /// The hash set is also created with the default initial capacity. + /// The hash set is initially created with a capacity of 0, so it will not + /// allocate until it is first inserted into. + /// + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`]. + /// + /// The `hash_builder` passed should implement the [`BuildHasher`] trait for + /// the HashSet to be useful, see its documentation for details. /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html /// /// # Examples /// @@ -481,7 +553,7 @@ where /// set.insert(2); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn with_hasher_in(hasher: S, alloc: A) -> Self { + pub const fn with_hasher_in(hasher: S, alloc: A) -> Self { Self { map: HashMap::with_hasher_in(hasher, alloc), } @@ -493,10 +565,20 @@ where /// The hash set will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash set will not allocate. /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. + /// # HashDoS resistance + /// + /// The `hash_builder` normally use a fixed key by default and that does + /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. + /// Users who require HashDoS resistance should explicitly use + /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// as the hasher when creating a [`HashSet`]. + /// + /// The `hash_builder` passed should implement the [`BuildHasher`] trait for + /// the HashSet to be useful, see its documentation for details. + /// + /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack + /// [`std::collections::hash_map::RandomState`]: https://doc.rust-lang.org/std/collections/hash_map/struct.RandomState.html + /// [`BuildHasher`]: https://doc.rust-lang.org/std/hash/trait.BuildHasher.html /// /// # Examples /// @@ -539,7 +621,7 @@ impl HashSet where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { /// Reserves capacity for at least `additional` more elements to be inserted /// in the `HashSet`. The collection may reserve more space to avoid @@ -547,7 +629,12 @@ where /// /// # Panics /// - /// Panics if the new allocation size overflows `usize`. + /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program + /// in case of allocation error. Use [`try_reserve`](HashSet::try_reserve) instead + /// if you want to handle memory allocation failure. + /// + /// [`isize::MAX`]: https://doc.rust-lang.org/std/primitive.isize.html + /// [`abort`]: https://doc.rust-lang.org/alloc/alloc/fn.handle_alloc_error.html /// /// # Examples /// @@ -773,8 +860,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn contains(&self, value: &Q) -> bool where - T: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { self.map.contains_key(value) } @@ -800,8 +886,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn get(&self, value: &Q) -> Option<&T> where - T: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { // Avoid `Option::map` because it bloats LLVM IR. match self.map.get_key_value(value) { @@ -856,8 +941,7 @@ where #[inline] pub fn get_or_insert_owned(&mut self, value: &Q) -> &T where - T: Borrow, - Q: Hash + Eq + ToOwned, + Q: Hash + Equivalent + ToOwned, { // Although the raw entry gives us `&mut T`, we only return `&T` to be consistent with // `get`. Key mutation is "raw" because you're not supposed to affect `Eq` or `Hash`. @@ -889,8 +973,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn get_or_insert_with(&mut self, value: &Q, f: F) -> &T where - T: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, F: FnOnce(&Q) -> T, { // Although the raw entry gives us `&mut T`, we only return `&T` to be consistent with @@ -1106,8 +1189,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn remove(&mut self, value: &Q) -> bool where - T: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { self.map.remove(value).is_some() } @@ -1133,8 +1215,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn take(&mut self, value: &Q) -> Option where - T: Borrow, - Q: Hash + Eq, + Q: Hash + Equivalent, { // Avoid `Option::map` because it bloats LLVM IR. match self.map.remove_entry(value) { @@ -1144,11 +1225,53 @@ where } } +impl HashSet { + /// Returns a reference to the [`RawTable`] used underneath [`HashSet`]. + /// This function is only available if the `raw` feature of the crate is enabled. + /// + /// # Note + /// + /// Calling this function is safe, but using the raw hash table API may require + /// unsafe functions or blocks. + /// + /// `RawTable` API gives the lowest level of control under the set that can be useful + /// for extending the HashSet's API, but may lead to *[undefined behavior]*. + /// + /// [`HashSet`]: struct.HashSet.html + /// [`RawTable`]: crate::raw::RawTable + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cfg(feature = "raw")] + #[cfg_attr(feature = "inline-more", inline)] + pub fn raw_table(&self) -> &RawTable<(T, ()), A> { + self.map.raw_table() + } + + /// Returns a mutable reference to the [`RawTable`] used underneath [`HashSet`]. + /// This function is only available if the `raw` feature of the crate is enabled. + /// + /// # Note + /// + /// Calling this function is safe, but using the raw hash table API may require + /// unsafe functions or blocks. + /// + /// `RawTable` API gives the lowest level of control under the set that can be useful + /// for extending the HashSet's API, but may lead to *[undefined behavior]*. + /// + /// [`HashSet`]: struct.HashSet.html + /// [`RawTable`]: crate::raw::RawTable + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cfg(feature = "raw")] + #[cfg_attr(feature = "inline-more", inline)] + pub fn raw_table_mut(&mut self) -> &mut RawTable<(T, ()), A> { + self.map.raw_table_mut() + } +} + impl PartialEq for HashSet where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn eq(&self, other: &Self) -> bool { if self.len() != other.len() { @@ -1163,14 +1286,14 @@ impl Eq for HashSet where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { } impl fmt::Debug for HashSet where T: fmt::Debug, - A: Allocator + Clone, + A: Allocator, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_set().entries(self.iter()).finish() @@ -1179,7 +1302,7 @@ where impl From> for HashSet where - A: Allocator + Clone, + A: Allocator, { fn from(map: HashMap) -> Self { Self { map } @@ -1190,7 +1313,7 @@ impl FromIterator for HashSet where T: Eq + Hash, S: BuildHasher + Default, - A: Default + Allocator + Clone, + A: Default + Allocator, { #[cfg_attr(feature = "inline-more", inline)] fn from_iter>(iter: I) -> Self { @@ -1205,7 +1328,7 @@ where impl From<[T; N]> for HashSet where T: Eq + Hash, - A: Default + Allocator + Clone, + A: Default + Allocator, { /// # Examples /// @@ -1225,7 +1348,7 @@ impl Extend for HashSet where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { #[cfg_attr(feature = "inline-more", inline)] fn extend>(&mut self, iter: I) { @@ -1249,7 +1372,7 @@ impl<'a, T, S, A> Extend<&'a T> for HashSet where T: 'a + Eq + Hash + Copy, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { #[cfg_attr(feature = "inline-more", inline)] fn extend>(&mut self, iter: I) { @@ -1272,7 +1395,7 @@ where impl Default for HashSet where S: Default, - A: Default + Allocator + Clone, + A: Default + Allocator, { /// Creates an empty `HashSet` with the `Default` value for the hasher. #[cfg_attr(feature = "inline-more", inline)] @@ -1287,7 +1410,7 @@ impl BitOr<&HashSet> for &HashSet where T: Eq + Hash + Clone, S: BuildHasher + Default, - A: Allocator + Clone, + A: Allocator, { type Output = HashSet; @@ -1320,7 +1443,7 @@ impl BitAnd<&HashSet> for &HashSet where T: Eq + Hash + Clone, S: BuildHasher + Default, - A: Allocator + Clone, + A: Allocator, { type Output = HashSet; @@ -1431,7 +1554,7 @@ pub struct Iter<'a, K> { /// /// [`HashSet`]: struct.HashSet.html /// [`into_iter`]: struct.HashSet.html#method.into_iter -pub struct IntoIter { +pub struct IntoIter { iter: map::IntoIter, } @@ -1442,23 +1565,24 @@ pub struct IntoIter { /// /// [`HashSet`]: struct.HashSet.html /// [`drain`]: struct.HashSet.html#method.drain -pub struct Drain<'a, K, A: Allocator + Clone = Global> { +pub struct Drain<'a, K, A: Allocator = Global> { iter: map::Drain<'a, K, (), A>, } /// A draining iterator over entries of a `HashSet` which don't satisfy the predicate `f`. /// -/// This `struct` is created by the [`drain_filter`] method on [`HashSet`]. See its +/// This `struct` is created by the [`extract_if`] method on [`HashSet`]. See its /// documentation for more. /// -/// [`drain_filter`]: struct.HashSet.html#method.drain_filter +/// [`extract_if`]: struct.HashSet.html#method.extract_if /// [`HashSet`]: struct.HashSet.html -pub struct DrainFilter<'a, K, F, A: Allocator + Clone = Global> +#[must_use = "Iterators are lazy unless consumed"] +pub struct ExtractIf<'a, K, F, A: Allocator = Global> where F: FnMut(&K) -> bool, { f: F, - inner: DrainFilterInner<'a, K, (), A>, + inner: RawExtractIf<'a, (K, ()), A>, } /// A lazy iterator producing elements in the intersection of `HashSet`s. @@ -1468,7 +1592,7 @@ where /// /// [`HashSet`]: struct.HashSet.html /// [`intersection`]: struct.HashSet.html#method.intersection -pub struct Intersection<'a, T, S, A: Allocator + Clone = Global> { +pub struct Intersection<'a, T, S, A: Allocator = Global> { // iterator of the first set iter: Iter<'a, T>, // the second set @@ -1482,7 +1606,7 @@ pub struct Intersection<'a, T, S, A: Allocator + Clone = Global> { /// /// [`HashSet`]: struct.HashSet.html /// [`difference`]: struct.HashSet.html#method.difference -pub struct Difference<'a, T, S, A: Allocator + Clone = Global> { +pub struct Difference<'a, T, S, A: Allocator = Global> { // iterator of the first set iter: Iter<'a, T>, // the second set @@ -1496,7 +1620,7 @@ pub struct Difference<'a, T, S, A: Allocator + Clone = Global> { /// /// [`HashSet`]: struct.HashSet.html /// [`symmetric_difference`]: struct.HashSet.html#method.symmetric_difference -pub struct SymmetricDifference<'a, T, S, A: Allocator + Clone = Global> { +pub struct SymmetricDifference<'a, T, S, A: Allocator = Global> { iter: Chain, Difference<'a, T, S, A>>, } @@ -1507,11 +1631,11 @@ pub struct SymmetricDifference<'a, T, S, A: Allocator + Clone = Global> { /// /// [`HashSet`]: struct.HashSet.html /// [`union`]: struct.HashSet.html#method.union -pub struct Union<'a, T, S, A: Allocator + Clone = Global> { +pub struct Union<'a, T, S, A: Allocator = Global> { iter: Chain, Difference<'a, T, S, A>>, } -impl<'a, T, S, A: Allocator + Clone> IntoIterator for &'a HashSet { +impl<'a, T, S, A: Allocator> IntoIterator for &'a HashSet { type Item = &'a T; type IntoIter = Iter<'a, T>; @@ -1521,7 +1645,7 @@ impl<'a, T, S, A: Allocator + Clone> IntoIterator for &'a HashSet { } } -impl IntoIterator for HashSet { +impl IntoIterator for HashSet { type Item = T; type IntoIter = IntoIter; @@ -1572,6 +1696,14 @@ impl<'a, K> Iterator for Iter<'a, K> { fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } } impl<'a, K> ExactSizeIterator for Iter<'a, K> { #[cfg_attr(feature = "inline-more", inline)] @@ -1587,7 +1719,7 @@ impl fmt::Debug for Iter<'_, K> { } } -impl Iterator for IntoIter { +impl Iterator for IntoIter { type Item = K; #[cfg_attr(feature = "inline-more", inline)] @@ -1602,23 +1734,31 @@ impl Iterator for IntoIter { fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, (k, ())| f(acc, k)) + } } -impl ExactSizeIterator for IntoIter { +impl ExactSizeIterator for IntoIter { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.iter.len() } } -impl FusedIterator for IntoIter {} +impl FusedIterator for IntoIter {} -impl fmt::Debug for IntoIter { +impl fmt::Debug for IntoIter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let entries_iter = self.iter.iter().map(|(k, _)| k); f.debug_list().entries(entries_iter).finish() } } -impl Iterator for Drain<'_, K, A> { +impl Iterator for Drain<'_, K, A> { type Item = K; #[cfg_attr(feature = "inline-more", inline)] @@ -1633,37 +1773,31 @@ impl Iterator for Drain<'_, K, A> { fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, (k, ())| f(acc, k)) + } } -impl ExactSizeIterator for Drain<'_, K, A> { +impl ExactSizeIterator for Drain<'_, K, A> { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.iter.len() } } -impl FusedIterator for Drain<'_, K, A> {} +impl FusedIterator for Drain<'_, K, A> {} -impl fmt::Debug for Drain<'_, K, A> { +impl fmt::Debug for Drain<'_, K, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let entries_iter = self.iter.iter().map(|(k, _)| k); f.debug_list().entries(entries_iter).finish() } } -impl<'a, K, F, A: Allocator + Clone> Drop for DrainFilter<'a, K, F, A> -where - F: FnMut(&K) -> bool, -{ - #[cfg_attr(feature = "inline-more", inline)] - fn drop(&mut self) { - while let Some(item) = self.next() { - let guard = ConsumeAllOnDrop(self); - drop(item); - mem::forget(guard); - } - } -} - -impl Iterator for DrainFilter<'_, K, F, A> +impl Iterator for ExtractIf<'_, K, F, A> where F: FnMut(&K) -> bool, { @@ -1671,9 +1805,9 @@ where #[cfg_attr(feature = "inline-more", inline)] fn next(&mut self) -> Option { - let f = &mut self.f; - let (k, _) = self.inner.next(&mut |k, _| f(k))?; - Some(k) + self.inner + .next(|&mut (ref k, ())| (self.f)(k)) + .map(|(k, ())| k) } #[inline] @@ -1682,12 +1816,9 @@ where } } -impl FusedIterator for DrainFilter<'_, K, F, A> where - F: FnMut(&K) -> bool -{ -} +impl FusedIterator for ExtractIf<'_, K, F, A> where F: FnMut(&K) -> bool {} -impl Clone for Intersection<'_, T, S, A> { +impl Clone for Intersection<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Intersection { @@ -1701,7 +1832,7 @@ impl<'a, T, S, A> Iterator for Intersection<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { type Item = &'a T; @@ -1720,13 +1851,27 @@ where let (_, upper) = self.iter.size_hint(); (0, upper) } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, elt| { + if self.other.contains(elt) { + f(acc, elt) + } else { + acc + } + }) + } } impl fmt::Debug for Intersection<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() @@ -1737,11 +1882,11 @@ impl FusedIterator for Intersection<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { } -impl Clone for Difference<'_, T, S, A> { +impl Clone for Difference<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Difference { @@ -1755,7 +1900,7 @@ impl<'a, T, S, A> Iterator for Difference<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { type Item = &'a T; @@ -1774,13 +1919,27 @@ where let (_, upper) = self.iter.size_hint(); (0, upper) } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, elt| { + if self.other.contains(elt) { + acc + } else { + f(acc, elt) + } + }) + } } impl FusedIterator for Difference<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { } @@ -1788,14 +1947,14 @@ impl fmt::Debug for Difference<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } -impl Clone for SymmetricDifference<'_, T, S, A> { +impl Clone for SymmetricDifference<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { SymmetricDifference { @@ -1808,7 +1967,7 @@ impl<'a, T, S, A> Iterator for SymmetricDifference<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { type Item = &'a T; @@ -1820,13 +1979,21 @@ where fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } } impl FusedIterator for SymmetricDifference<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { } @@ -1834,14 +2001,14 @@ impl fmt::Debug for SymmetricDifference<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } -impl Clone for Union<'_, T, S, A> { +impl Clone for Union<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Union { @@ -1854,7 +2021,7 @@ impl FusedIterator for Union<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { } @@ -1862,7 +2029,7 @@ impl fmt::Debug for Union<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() @@ -1873,7 +2040,7 @@ impl<'a, T, S, A> Iterator for Union<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, - A: Allocator + Clone, + A: Allocator, { type Item = &'a T; @@ -1885,6 +2052,14 @@ where fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } } /// A view into a single entry in a set, which may either be vacant or occupied. @@ -1925,7 +2100,7 @@ where /// ``` pub enum Entry<'a, T, S, A = Global> where - A: Allocator + Clone, + A: Allocator, { /// An occupied entry. /// @@ -1958,7 +2133,7 @@ where Vacant(VacantEntry<'a, T, S, A>), } -impl fmt::Debug for Entry<'_, T, S, A> { +impl fmt::Debug for Entry<'_, T, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Entry::Vacant(ref v) => f.debug_tuple("Entry").field(v).finish(), @@ -2003,11 +2178,11 @@ impl fmt::Debug for Entry<'_, T, S, A> { /// assert_eq!(set.get(&"c"), None); /// assert_eq!(set.len(), 2); /// ``` -pub struct OccupiedEntry<'a, T, S, A: Allocator + Clone = Global> { +pub struct OccupiedEntry<'a, T, S, A: Allocator = Global> { inner: map::OccupiedEntry<'a, T, (), S, A>, } -impl fmt::Debug for OccupiedEntry<'_, T, S, A> { +impl fmt::Debug for OccupiedEntry<'_, T, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OccupiedEntry") .field("value", self.get()) @@ -2041,17 +2216,17 @@ impl fmt::Debug for OccupiedEntry<'_, T, /// } /// assert!(set.contains("b") && set.len() == 2); /// ``` -pub struct VacantEntry<'a, T, S, A: Allocator + Clone = Global> { +pub struct VacantEntry<'a, T, S, A: Allocator = Global> { inner: map::VacantEntry<'a, T, (), S, A>, } -impl fmt::Debug for VacantEntry<'_, T, S, A> { +impl fmt::Debug for VacantEntry<'_, T, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("VacantEntry").field(self.get()).finish() } } -impl<'a, T, S, A: Allocator + Clone> Entry<'a, T, S, A> { +impl<'a, T, S, A: Allocator> Entry<'a, T, S, A> { /// Sets the value of the entry, and returns an OccupiedEntry. /// /// # Examples @@ -2128,7 +2303,7 @@ impl<'a, T, S, A: Allocator + Clone> Entry<'a, T, S, A> { } } -impl OccupiedEntry<'_, T, S, A> { +impl OccupiedEntry<'_, T, S, A> { /// Gets a reference to the value in the entry. /// /// # Examples @@ -2215,7 +2390,7 @@ impl OccupiedEntry<'_, T, S, A> { } } -impl<'a, T, S, A: Allocator + Clone> VacantEntry<'a, T, S, A> { +impl<'a, T, S, A: Allocator> VacantEntry<'a, T, S, A> { /// Gets a reference to the value that would be used when inserting /// through the `VacantEntry`. /// @@ -2295,34 +2470,30 @@ fn assert_covariance() { fn iter<'a, 'new>(v: Iter<'a, &'static str>) -> Iter<'a, &'new str> { v } - fn into_iter<'new, A: Allocator + Clone>( - v: IntoIter<&'static str, A>, - ) -> IntoIter<&'new str, A> { + fn into_iter<'new, A: Allocator>(v: IntoIter<&'static str, A>) -> IntoIter<&'new str, A> { v } - fn difference<'a, 'new, A: Allocator + Clone>( + fn difference<'a, 'new, A: Allocator>( v: Difference<'a, &'static str, DefaultHashBuilder, A>, ) -> Difference<'a, &'new str, DefaultHashBuilder, A> { v } - fn symmetric_difference<'a, 'new, A: Allocator + Clone>( + fn symmetric_difference<'a, 'new, A: Allocator>( v: SymmetricDifference<'a, &'static str, DefaultHashBuilder, A>, ) -> SymmetricDifference<'a, &'new str, DefaultHashBuilder, A> { v } - fn intersection<'a, 'new, A: Allocator + Clone>( + fn intersection<'a, 'new, A: Allocator>( v: Intersection<'a, &'static str, DefaultHashBuilder, A>, ) -> Intersection<'a, &'new str, DefaultHashBuilder, A> { v } - fn union<'a, 'new, A: Allocator + Clone>( + fn union<'a, 'new, A: Allocator>( v: Union<'a, &'static str, DefaultHashBuilder, A>, ) -> Union<'a, &'new str, DefaultHashBuilder, A> { v } - fn drain<'new, A: Allocator + Clone>( - d: Drain<'static, &'static str, A>, - ) -> Drain<'new, &'new str, A> { + fn drain<'new, A: Allocator>(d: Drain<'static, &'static str, A>) -> Drain<'new, &'new str, A> { d } } @@ -2613,10 +2784,10 @@ mod test_set { set.insert(1); set.insert(2); - let set_str = format!("{:?}", set); + let set_str = format!("{set:?}"); assert!(set_str == "{1, 2}" || set_str == "{2, 1}"); - assert_eq!(format!("{:?}", empty), "{}"); + assert_eq!(format!("{empty:?}"), "{}"); } #[test] @@ -2691,11 +2862,12 @@ mod test_set { } #[test] + #[allow(clippy::needless_borrow)] fn test_extend_ref() { let mut a = HashSet::new(); a.insert(1); - a.extend(&[2, 3, 4]); + a.extend([2, 3, 4]); assert_eq!(a.len(), 4); assert!(a.contains(&1)); @@ -2730,10 +2902,10 @@ mod test_set { } #[test] - fn test_drain_filter() { + fn test_extract_if() { { let mut set: HashSet = (0..8).collect(); - let drained = set.drain_filter(|&k| k % 2 == 0); + let drained = set.extract_if(|&k| k % 2 == 0); let mut out = drained.collect::>(); out.sort_unstable(); assert_eq!(vec![0, 2, 4, 6], out); @@ -2741,7 +2913,7 @@ mod test_set { } { let mut set: HashSet = (0..8).collect(); - drop(set.drain_filter(|&k| k % 2 == 0)); + set.extract_if(|&k| k % 2 == 0).for_each(drop); assert_eq!(set.len(), 4, "Removes non-matching items on drop"); } } @@ -2787,4 +2959,11 @@ mod test_set { set.insert(i); } } + + #[test] + fn collect() { + // At the time of writing, this hits the ZST case in from_base_index + // (and without the `map`, it does not). + let mut _set: HashSet<_> = (0..3).map(|_| ()).collect(); + } } diff --git a/vendor/hashbrown/src/table.rs b/vendor/hashbrown/src/table.rs new file mode 100644 index 0000000..a7bb5fc --- /dev/null +++ b/vendor/hashbrown/src/table.rs @@ -0,0 +1,2064 @@ +use core::{fmt, iter::FusedIterator, marker::PhantomData}; + +use crate::{ + raw::{ + Allocator, Bucket, Global, InsertSlot, RawDrain, RawExtractIf, RawIntoIter, RawIter, + RawTable, + }, + TryReserveError, +}; + +/// Low-level hash table with explicit hashing. +/// +/// The primary use case for this type over [`HashMap`] or [`HashSet`] is to +/// support types that do not implement the [`Hash`] and [`Eq`] traits, but +/// instead require additional data not contained in the key itself to compute a +/// hash and compare two elements for equality. +/// +/// Examples of when this can be useful include: +/// - An `IndexMap` implementation where indices into a `Vec` are stored as +/// elements in a `HashTable`. Hashing and comparing the elements +/// requires indexing the associated `Vec` to get the actual value referred to +/// by the index. +/// - Avoiding re-computing a hash when it is already known. +/// - Mutating the key of an element in a way that doesn't affect its hash. +/// +/// To achieve this, `HashTable` methods that search for an element in the table +/// require a hash value and equality function to be explicitly passed in as +/// arguments. The method will then iterate over the elements with the given +/// hash and call the equality function on each of them, until a match is found. +/// +/// In most cases, a `HashTable` will not be exposed directly in an API. It will +/// instead be wrapped in a helper type which handles the work of calculating +/// hash values and comparing elements. +/// +/// Due to its low-level nature, this type provides fewer guarantees than +/// [`HashMap`] and [`HashSet`]. Specifically, the API allows you to shoot +/// yourself in the foot by having multiple elements with identical keys in the +/// table. The table itself will still function correctly and lookups will +/// arbitrarily return one of the matching elements. However you should avoid +/// doing this because it changes the runtime of hash table operations from +/// `O(1)` to `O(k)` where `k` is the number of duplicate entries. +/// +/// [`HashMap`]: super::HashMap +/// [`HashSet`]: super::HashSet +pub struct HashTable +where + A: Allocator, +{ + pub(crate) raw: RawTable, +} + +impl HashTable { + /// Creates an empty `HashTable`. + /// + /// The hash table is initially created with a capacity of 0, so it will not allocate until it + /// is first inserted into. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashTable; + /// let mut table: HashTable<&str> = HashTable::new(); + /// assert_eq!(table.len(), 0); + /// assert_eq!(table.capacity(), 0); + /// ``` + pub const fn new() -> Self { + Self { + raw: RawTable::new(), + } + } + + /// Creates an empty `HashTable` with the specified capacity. + /// + /// The hash table will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash table will not allocate. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashTable; + /// let mut table: HashTable<&str> = HashTable::with_capacity(10); + /// assert_eq!(table.len(), 0); + /// assert!(table.capacity() >= 10); + /// ``` + pub fn with_capacity(capacity: usize) -> Self { + Self { + raw: RawTable::with_capacity(capacity), + } + } +} + +impl HashTable +where + A: Allocator, +{ + /// Creates an empty `HashTable` using the given allocator. + /// + /// The hash table is initially created with a capacity of 0, so it will not allocate until it + /// is first inserted into. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use bumpalo::Bump; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let bump = Bump::new(); + /// let mut table = HashTable::new_in(&bump); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// // The created HashTable holds none elements + /// assert_eq!(table.len(), 0); + /// + /// // The created HashTable also doesn't allocate memory + /// assert_eq!(table.capacity(), 0); + /// + /// // Now we insert element inside created HashTable + /// table.insert_unique(hasher(&"One"), "One", hasher); + /// // We can see that the HashTable holds 1 element + /// assert_eq!(table.len(), 1); + /// // And it also allocates some capacity + /// assert!(table.capacity() > 1); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub const fn new_in(alloc: A) -> Self { + Self { + raw: RawTable::new_in(alloc), + } + } + + /// Creates an empty `HashTable` with the specified capacity using the given allocator. + /// + /// The hash table will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash table will not allocate. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use bumpalo::Bump; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let bump = Bump::new(); + /// let mut table = HashTable::with_capacity_in(5, &bump); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// // The created HashTable holds none elements + /// assert_eq!(table.len(), 0); + /// // But it can hold at least 5 elements without reallocating + /// let empty_map_capacity = table.capacity(); + /// assert!(empty_map_capacity >= 5); + /// + /// // Now we insert some 5 elements inside created HashTable + /// table.insert_unique(hasher(&"One"), "One", hasher); + /// table.insert_unique(hasher(&"Two"), "Two", hasher); + /// table.insert_unique(hasher(&"Three"), "Three", hasher); + /// table.insert_unique(hasher(&"Four"), "Four", hasher); + /// table.insert_unique(hasher(&"Five"), "Five", hasher); + /// + /// // We can see that the HashTable holds 5 elements + /// assert_eq!(table.len(), 5); + /// // But its capacity isn't changed + /// assert_eq!(table.capacity(), empty_map_capacity) + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Self { + raw: RawTable::with_capacity_in(capacity, alloc), + } + } + + /// Returns a reference to the underlying allocator. + pub fn allocator(&self) -> &A { + self.raw.allocator() + } + + /// Returns a reference to an entry in the table with the given hash and + /// which satisfies the equality function passed. + /// + /// This method will call `eq` for all entries with the given hash, but may + /// also call it for entries with a different hash. `eq` should only return + /// true for the desired entry, at which point the search is stopped. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), 1, hasher); + /// table.insert_unique(hasher(&2), 2, hasher); + /// table.insert_unique(hasher(&3), 3, hasher); + /// assert_eq!(table.find(hasher(&2), |&val| val == 2), Some(&2)); + /// assert_eq!(table.find(hasher(&4), |&val| val == 4), None); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn find(&self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option<&T> { + self.raw.get(hash, eq) + } + + /// Returns a mutable reference to an entry in the table with the given hash + /// and which satisfies the equality function passed. + /// + /// This method will call `eq` for all entries with the given hash, but may + /// also call it for entries with a different hash. `eq` should only return + /// true for the desired entry, at which point the search is stopped. + /// + /// When mutating an entry, you should ensure that it still retains the same + /// hash value as when it was inserted, otherwise lookups of that entry may + /// fail to find it. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, "a"), |val| hasher(&val.0)); + /// if let Some(val) = table.find_mut(hasher(&1), |val| val.0 == 1) { + /// val.1 = "b"; + /// } + /// assert_eq!(table.find(hasher(&1), |val| val.0 == 1), Some(&(1, "b"))); + /// assert_eq!(table.find(hasher(&2), |val| val.0 == 2), None); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn find_mut(&mut self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option<&mut T> { + self.raw.get_mut(hash, eq) + } + + /// Returns an `OccupiedEntry` for an entry in the table with the given hash + /// and which satisfies the equality function passed. + /// + /// This can be used to remove the entry from the table. Call + /// [`HashTable::entry`] instead if you wish to insert an entry if the + /// lookup fails. + /// + /// This method will call `eq` for all entries with the given hash, but may + /// also call it for entries with a different hash. `eq` should only return + /// true for the desired entry, at which point the search is stopped. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, "a"), |val| hasher(&val.0)); + /// if let Ok(entry) = table.find_entry(hasher(&1), |val| val.0 == 1) { + /// entry.remove(); + /// } + /// assert_eq!(table.find(hasher(&1), |val| val.0 == 1), None); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn find_entry( + &mut self, + hash: u64, + eq: impl FnMut(&T) -> bool, + ) -> Result, AbsentEntry<'_, T, A>> { + match self.raw.find(hash, eq) { + Some(bucket) => Ok(OccupiedEntry { + hash, + bucket, + table: self, + }), + None => Err(AbsentEntry { table: self }), + } + } + + /// Returns an `Entry` for an entry in the table with the given hash + /// and which satisfies the equality function passed. + /// + /// This can be used to remove the entry from the table, or insert a new + /// entry with the given hash if one doesn't already exist. + /// + /// This method will call `eq` for all entries with the given hash, but may + /// also call it for entries with a different hash. `eq` should only return + /// true for the desired entry, at which point the search is stopped. + /// + /// This method may grow the table in preparation for an insertion. Call + /// [`HashTable::find_entry`] if this is undesirable. + /// + /// `hasher` is called if entries need to be moved or copied to a new table. + /// This must return the same hash value that each entry was inserted with. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, "a"), |val| hasher(&val.0)); + /// if let Entry::Occupied(entry) = table.entry(hasher(&1), |val| val.0 == 1, |val| hasher(&val.0)) + /// { + /// entry.remove(); + /// } + /// if let Entry::Vacant(entry) = table.entry(hasher(&2), |val| val.0 == 2, |val| hasher(&val.0)) { + /// entry.insert((2, "b")); + /// } + /// assert_eq!(table.find(hasher(&1), |val| val.0 == 1), None); + /// assert_eq!(table.find(hasher(&2), |val| val.0 == 2), Some(&(2, "b"))); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn entry( + &mut self, + hash: u64, + eq: impl FnMut(&T) -> bool, + hasher: impl Fn(&T) -> u64, + ) -> Entry<'_, T, A> { + match self.raw.find_or_find_insert_slot(hash, eq, hasher) { + Ok(bucket) => Entry::Occupied(OccupiedEntry { + hash, + bucket, + table: self, + }), + Err(insert_slot) => Entry::Vacant(VacantEntry { + hash, + insert_slot, + table: self, + }), + } + } + + /// Inserts an element into the `HashTable` with the given hash value, but + /// without checking whether an equivalent element already exists within the + /// table. + /// + /// `hasher` is called if entries need to be moved or copied to a new table. + /// This must return the same hash value that each entry was inserted with. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut v = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// v.insert_unique(hasher(&1), 1, hasher); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn insert_unique( + &mut self, + hash: u64, + value: T, + hasher: impl Fn(&T) -> u64, + ) -> OccupiedEntry<'_, T, A> { + let bucket = self.raw.insert(hash, value, hasher); + OccupiedEntry { + hash, + bucket, + table: self, + } + } + + /// Clears the table, removing all values. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut v = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// v.insert_unique(hasher(&1), 1, hasher); + /// v.clear(); + /// assert!(v.is_empty()); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn clear(&mut self) { + self.raw.clear(); + } + + /// Shrinks the capacity of the table as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + /// + /// `hasher` is called if entries need to be moved or copied to a new table. + /// This must return the same hash value that each entry was inserted with. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::with_capacity(100); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), 1, hasher); + /// table.insert_unique(hasher(&2), 2, hasher); + /// assert!(table.capacity() >= 100); + /// table.shrink_to_fit(hasher); + /// assert!(table.capacity() >= 2); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn shrink_to_fit(&mut self, hasher: impl Fn(&T) -> u64) { + self.raw.shrink_to(self.len(), hasher) + } + + /// Shrinks the capacity of the table with a lower limit. It will drop + /// down no lower than the supplied limit while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + /// + /// `hasher` is called if entries need to be moved or copied to a new table. + /// This must return the same hash value that each entry was inserted with. + /// + /// Panics if the current capacity is smaller than the supplied + /// minimum capacity. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::with_capacity(100); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), 1, hasher); + /// table.insert_unique(hasher(&2), 2, hasher); + /// assert!(table.capacity() >= 100); + /// table.shrink_to(10, hasher); + /// assert!(table.capacity() >= 10); + /// table.shrink_to(0, hasher); + /// assert!(table.capacity() >= 2); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn shrink_to(&mut self, min_capacity: usize, hasher: impl Fn(&T) -> u64) { + self.raw.shrink_to(min_capacity, hasher); + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `HashTable`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// `hasher` is called if entries need to be moved or copied to a new table. + /// This must return the same hash value that each entry was inserted with. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program + /// in case of allocation error. Use [`try_reserve`](HashTable::try_reserve) instead + /// if you want to handle memory allocation failure. + /// + /// [`isize::MAX`]: https://doc.rust-lang.org/std/primitive.isize.html + /// [`abort`]: https://doc.rust-lang.org/alloc/alloc/fn.handle_alloc_error.html + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.reserve(10, hasher); + /// assert!(table.capacity() >= 10); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) { + self.raw.reserve(additional, hasher) + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `HashTable`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// `hasher` is called if entries need to be moved or copied to a new table. + /// This must return the same hash value that each entry was inserted with. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table + /// .try_reserve(10, hasher) + /// .expect("why is the test harness OOMing on 10 bytes?"); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn try_reserve( + &mut self, + additional: usize, + hasher: impl Fn(&T) -> u64, + ) -> Result<(), TryReserveError> { + self.raw.try_reserve(additional, hasher) + } + + /// Returns the number of elements the table can hold without reallocating. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashTable; + /// let table: HashTable = HashTable::with_capacity(100); + /// assert!(table.capacity() >= 100); + /// ``` + pub fn capacity(&self) -> usize { + self.raw.capacity() + } + + /// Returns the number of elements in the table. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// let mut v = HashTable::new(); + /// assert_eq!(v.len(), 0); + /// v.insert_unique(hasher(&1), 1, hasher); + /// assert_eq!(v.len(), 1); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn len(&self) -> usize { + self.raw.len() + } + + /// Returns `true` if the set contains no elements. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// let mut v = HashTable::new(); + /// assert!(v.is_empty()); + /// v.insert_unique(hasher(&1), 1, hasher); + /// assert!(!v.is_empty()); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn is_empty(&self) -> bool { + self.raw.is_empty() + } + + /// An iterator visiting all elements in arbitrary order. + /// The iterator element type is `&'a T`. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&"a"), "b", hasher); + /// table.insert_unique(hasher(&"b"), "b", hasher); + /// + /// // Will print in an arbitrary order. + /// for x in table.iter() { + /// println!("{}", x); + /// } + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn iter(&self) -> Iter<'_, T> { + Iter { + inner: unsafe { self.raw.iter() }, + marker: PhantomData, + } + } + + /// An iterator visiting all elements in arbitrary order, + /// with mutable references to the elements. + /// The iterator element type is `&'a mut T`. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), 1, hasher); + /// table.insert_unique(hasher(&2), 2, hasher); + /// table.insert_unique(hasher(&3), 3, hasher); + /// + /// // Update all values + /// for val in table.iter_mut() { + /// *val *= 2; + /// } + /// + /// assert_eq!(table.len(), 3); + /// let mut vec: Vec = Vec::new(); + /// + /// for val in &table { + /// println!("val: {}", val); + /// vec.push(*val); + /// } + /// + /// // The `Iter` iterator produces items in arbitrary order, so the + /// // items must be sorted to test them against a sorted array. + /// vec.sort_unstable(); + /// assert_eq!(vec, [2, 4, 6]); + /// + /// assert_eq!(table.len(), 3); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn iter_mut(&mut self) -> IterMut<'_, T> { + IterMut { + inner: unsafe { self.raw.iter() }, + marker: PhantomData, + } + } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all elements `e` such that `f(&e)` returns `false`. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// for x in 1..=6 { + /// table.insert_unique(hasher(&x), x, hasher); + /// } + /// table.retain(|&mut x| x % 2 == 0); + /// assert_eq!(table.len(), 3); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn retain(&mut self, mut f: impl FnMut(&mut T) -> bool) { + // Here we only use `iter` as a temporary, preventing use-after-free + unsafe { + for item in self.raw.iter() { + if !f(item.as_mut()) { + self.raw.erase(item); + } + } + } + } + + /// Clears the set, returning all elements in an iterator. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// for x in 1..=3 { + /// table.insert_unique(hasher(&x), x, hasher); + /// } + /// assert!(!table.is_empty()); + /// + /// // print 1, 2, 3 in an arbitrary order + /// for i in table.drain() { + /// println!("{}", i); + /// } + /// + /// assert!(table.is_empty()); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn drain(&mut self) -> Drain<'_, T, A> { + Drain { + inner: self.raw.drain(), + } + } + + /// Drains elements which are true under the given predicate, + /// and returns an iterator over the removed items. + /// + /// In other words, move all elements `e` such that `f(&e)` returns `true` out + /// into another iterator. + /// + /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating + /// or the iteration short-circuits, then the remaining elements will be retained. + /// Use [`retain()`] with a negated predicate if you do not need the returned iterator. + /// + /// [`retain()`]: HashTable::retain + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// for x in 0..8 { + /// table.insert_unique(hasher(&x), x, hasher); + /// } + /// let drained: Vec = table.extract_if(|&mut v| v % 2 == 0).collect(); + /// + /// let mut evens = drained.into_iter().collect::>(); + /// let mut odds = table.into_iter().collect::>(); + /// evens.sort(); + /// odds.sort(); + /// + /// assert_eq!(evens, vec![0, 2, 4, 6]); + /// assert_eq!(odds, vec![1, 3, 5, 7]); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn extract_if(&mut self, f: F) -> ExtractIf<'_, T, F, A> + where + F: FnMut(&mut T) -> bool, + { + ExtractIf { + f, + inner: RawExtractIf { + iter: unsafe { self.raw.iter() }, + table: &mut self.raw, + }, + } + } + + /// Attempts to get mutable references to `N` values in the map at once. + /// + /// The `eq` argument should be a closure such that `eq(i, k)` returns true if `k` is equal to + /// the `i`th key to be looked up. + /// + /// Returns an array of length `N` with the results of each query. For soundness, at most one + /// mutable reference will be returned to any value. `None` will be returned if any of the + /// keys are duplicates or missing. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut libraries: HashTable<(&str, u32)> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// for (k, v) in [ + /// ("Bodleian Library", 1602), + /// ("Athenæum", 1807), + /// ("Herzogin-Anna-Amalia-Bibliothek", 1691), + /// ("Library of Congress", 1800), + /// ] { + /// libraries.insert_unique(hasher(&k), (k, v), |(k, _)| hasher(&k)); + /// } + /// + /// let keys = ["Athenæum", "Library of Congress"]; + /// let got = libraries.get_many_mut(keys.map(|k| hasher(&k)), |i, val| keys[i] == val.0); + /// assert_eq!( + /// got, + /// Some([&mut ("Athenæum", 1807), &mut ("Library of Congress", 1800),]), + /// ); + /// + /// // Missing keys result in None + /// let keys = ["Athenæum", "New York Public Library"]; + /// let got = libraries.get_many_mut(keys.map(|k| hasher(&k)), |i, val| keys[i] == val.0); + /// assert_eq!(got, None); + /// + /// // Duplicate keys result in None + /// let keys = ["Athenæum", "Athenæum"]; + /// let got = libraries.get_many_mut(keys.map(|k| hasher(&k)), |i, val| keys[i] == val.0); + /// assert_eq!(got, None); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn get_many_mut( + &mut self, + hashes: [u64; N], + eq: impl FnMut(usize, &T) -> bool, + ) -> Option<[&'_ mut T; N]> { + self.raw.get_many_mut(hashes, eq) + } + + /// Attempts to get mutable references to `N` values in the map at once, without validating that + /// the values are unique. + /// + /// The `eq` argument should be a closure such that `eq(i, k)` returns true if `k` is equal to + /// the `i`th key to be looked up. + /// + /// Returns an array of length `N` with the results of each query. `None` will be returned if + /// any of the keys are missing. + /// + /// For a safe alternative see [`get_many_mut`](`HashTable::get_many_mut`). + /// + /// # Safety + /// + /// Calling this method with overlapping keys is *[undefined behavior]* even if the resulting + /// references are not used. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut libraries: HashTable<(&str, u32)> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// for (k, v) in [ + /// ("Bodleian Library", 1602), + /// ("Athenæum", 1807), + /// ("Herzogin-Anna-Amalia-Bibliothek", 1691), + /// ("Library of Congress", 1800), + /// ] { + /// libraries.insert_unique(hasher(&k), (k, v), |(k, _)| hasher(&k)); + /// } + /// + /// let keys = ["Athenæum", "Library of Congress"]; + /// let got = libraries.get_many_mut(keys.map(|k| hasher(&k)), |i, val| keys[i] == val.0); + /// assert_eq!( + /// got, + /// Some([&mut ("Athenæum", 1807), &mut ("Library of Congress", 1800),]), + /// ); + /// + /// // Missing keys result in None + /// let keys = ["Athenæum", "New York Public Library"]; + /// let got = libraries.get_many_mut(keys.map(|k| hasher(&k)), |i, val| keys[i] == val.0); + /// assert_eq!(got, None); + /// + /// // Duplicate keys result in None + /// let keys = ["Athenæum", "Athenæum"]; + /// let got = libraries.get_many_mut(keys.map(|k| hasher(&k)), |i, val| keys[i] == val.0); + /// assert_eq!(got, None); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub unsafe fn get_many_unchecked_mut( + &mut self, + hashes: [u64; N], + eq: impl FnMut(usize, &T) -> bool, + ) -> Option<[&'_ mut T; N]> { + self.raw.get_many_unchecked_mut(hashes, eq) + } +} + +impl IntoIterator for HashTable +where + A: Allocator, +{ + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> IntoIter { + IntoIter { + inner: self.raw.into_iter(), + } + } +} + +impl<'a, T, A> IntoIterator for &'a HashTable +where + A: Allocator, +{ + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Iter<'a, T> { + self.iter() + } +} + +impl<'a, T, A> IntoIterator for &'a mut HashTable +where + A: Allocator, +{ + type Item = &'a mut T; + type IntoIter = IterMut<'a, T>; + + fn into_iter(self) -> IterMut<'a, T> { + self.iter_mut() + } +} + +impl Default for HashTable +where + A: Allocator + Default, +{ + fn default() -> Self { + Self { + raw: Default::default(), + } + } +} + +impl Clone for HashTable +where + T: Clone, + A: Allocator + Clone, +{ + fn clone(&self) -> Self { + Self { + raw: self.raw.clone(), + } + } +} + +impl fmt::Debug for HashTable +where + T: fmt::Debug, + A: Allocator, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +/// A view into a single entry in a table, which may either be vacant or occupied. +/// +/// This `enum` is constructed from the [`entry`] method on [`HashTable`]. +/// +/// [`HashTable`]: struct.HashTable.html +/// [`entry`]: struct.HashTable.html#method.entry +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "nightly")] +/// # fn test() { +/// use ahash::AHasher; +/// use hashbrown::hash_table::{Entry, HashTable, OccupiedEntry}; +/// use std::hash::{BuildHasher, BuildHasherDefault}; +/// +/// let mut table = HashTable::new(); +/// let hasher = BuildHasherDefault::::default(); +/// let hasher = |val: &_| hasher.hash_one(val); +/// for x in ["a", "b", "c"] { +/// table.insert_unique(hasher(&x), x, hasher); +/// } +/// assert_eq!(table.len(), 3); +/// +/// // Existing value (insert) +/// let entry: Entry<_> = table.entry(hasher(&"a"), |&x| x == "a", hasher); +/// let _raw_o: OccupiedEntry<_, _> = entry.insert("a"); +/// assert_eq!(table.len(), 3); +/// // Nonexistent value (insert) +/// table.entry(hasher(&"d"), |&x| x == "d", hasher).insert("d"); +/// +/// // Existing value (or_insert) +/// table +/// .entry(hasher(&"b"), |&x| x == "b", hasher) +/// .or_insert("b"); +/// // Nonexistent value (or_insert) +/// table +/// .entry(hasher(&"e"), |&x| x == "e", hasher) +/// .or_insert("e"); +/// +/// println!("Our HashTable: {:?}", table); +/// +/// let mut vec: Vec<_> = table.iter().copied().collect(); +/// // The `Iter` iterator produces items in arbitrary order, so the +/// // items must be sorted to test them against a sorted array. +/// vec.sort_unstable(); +/// assert_eq!(vec, ["a", "b", "c", "d", "e"]); +/// # } +/// # fn main() { +/// # #[cfg(feature = "nightly")] +/// # test() +/// # } +/// ``` +pub enum Entry<'a, T, A = Global> +where + A: Allocator, +{ + /// An occupied entry. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::{Entry, HashTable, OccupiedEntry}; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// for x in ["a", "b"] { + /// table.insert_unique(hasher(&x), x, hasher); + /// } + /// + /// match table.entry(hasher(&"a"), |&x| x == "a", hasher) { + /// Entry::Vacant(_) => unreachable!(), + /// Entry::Occupied(_) => {} + /// } + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + Occupied(OccupiedEntry<'a, T, A>), + + /// A vacant entry. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::{Entry, HashTable, OccupiedEntry}; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table = HashTable::<&str>::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// match table.entry(hasher(&"a"), |&x| x == "a", hasher) { + /// Entry::Vacant(_) => {} + /// Entry::Occupied(_) => unreachable!(), + /// } + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + Vacant(VacantEntry<'a, T, A>), +} + +impl fmt::Debug for Entry<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Entry::Vacant(ref v) => f.debug_tuple("Entry").field(v).finish(), + Entry::Occupied(ref o) => f.debug_tuple("Entry").field(o).finish(), + } + } +} + +impl<'a, T, A> Entry<'a, T, A> +where + A: Allocator, +{ + /// Sets the value of the entry, replacing any existing value if there is + /// one, and returns an [`OccupiedEntry`]. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<&str> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// let entry = table + /// .entry(hasher(&"horseyland"), |&x| x == "horseyland", hasher) + /// .insert("horseyland"); + /// + /// assert_eq!(entry.get(), &"horseyland"); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn insert(self, value: T) -> OccupiedEntry<'a, T, A> { + match self { + Entry::Occupied(mut entry) => { + *entry.get_mut() = value; + entry + } + Entry::Vacant(entry) => entry.insert(value), + } + } + + /// Ensures a value is in the entry by inserting if it was vacant. + /// + /// Returns an [`OccupiedEntry`] pointing to the now-occupied entry. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<&str> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// // nonexistent key + /// table + /// .entry(hasher(&"poneyland"), |&x| x == "poneyland", hasher) + /// .or_insert("poneyland"); + /// assert!(table + /// .find(hasher(&"poneyland"), |&x| x == "poneyland") + /// .is_some()); + /// + /// // existing key + /// table + /// .entry(hasher(&"poneyland"), |&x| x == "poneyland", hasher) + /// .or_insert("poneyland"); + /// assert!(table + /// .find(hasher(&"poneyland"), |&x| x == "poneyland") + /// .is_some()); + /// assert_eq!(table.len(), 1); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn or_insert(self, default: T) -> OccupiedEntry<'a, T, A> { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(default), + } + } + + /// Ensures a value is in the entry by inserting the result of the default function if empty.. + /// + /// Returns an [`OccupiedEntry`] pointing to the now-occupied entry. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// table + /// .entry(hasher("poneyland"), |x| x == "poneyland", |val| hasher(val)) + /// .or_insert_with(|| "poneyland".to_string()); + /// + /// assert!(table + /// .find(hasher(&"poneyland"), |x| x == "poneyland") + /// .is_some()); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn or_insert_with(self, default: impl FnOnce() -> T) -> OccupiedEntry<'a, T, A> { + match self { + Entry::Occupied(entry) => entry, + Entry::Vacant(entry) => entry.insert(default()), + } + } + + /// Provides in-place mutable access to an occupied entry before any + /// potential inserts into the table. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<(&str, u32)> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// table + /// .entry( + /// hasher(&"poneyland"), + /// |&(x, _)| x == "poneyland", + /// |(k, _)| hasher(&k), + /// ) + /// .and_modify(|(_, v)| *v += 1) + /// .or_insert(("poneyland", 42)); + /// assert_eq!( + /// table.find(hasher(&"poneyland"), |&(k, _)| k == "poneyland"), + /// Some(&("poneyland", 42)) + /// ); + /// + /// table + /// .entry( + /// hasher(&"poneyland"), + /// |&(x, _)| x == "poneyland", + /// |(k, _)| hasher(&k), + /// ) + /// .and_modify(|(_, v)| *v += 1) + /// .or_insert(("poneyland", 42)); + /// assert_eq!( + /// table.find(hasher(&"poneyland"), |&(k, _)| k == "poneyland"), + /// Some(&("poneyland", 43)) + /// ); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn and_modify(self, f: impl FnOnce(&mut T)) -> Self { + match self { + Entry::Occupied(mut entry) => { + f(entry.get_mut()); + Entry::Occupied(entry) + } + Entry::Vacant(entry) => Entry::Vacant(entry), + } + } +} + +/// A view into an occupied entry in a `HashTable`. +/// It is part of the [`Entry`] enum. +/// +/// [`Entry`]: enum.Entry.html +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "nightly")] +/// # fn test() { +/// use ahash::AHasher; +/// use hashbrown::hash_table::{Entry, HashTable, OccupiedEntry}; +/// use std::hash::{BuildHasher, BuildHasherDefault}; +/// +/// let mut table = HashTable::new(); +/// let hasher = BuildHasherDefault::::default(); +/// let hasher = |val: &_| hasher.hash_one(val); +/// for x in ["a", "b", "c"] { +/// table.insert_unique(hasher(&x), x, hasher); +/// } +/// assert_eq!(table.len(), 3); +/// +/// let _entry_o: OccupiedEntry<_, _> = table.find_entry(hasher(&"a"), |&x| x == "a").unwrap(); +/// assert_eq!(table.len(), 3); +/// +/// // Existing key +/// match table.entry(hasher(&"a"), |&x| x == "a", hasher) { +/// Entry::Vacant(_) => unreachable!(), +/// Entry::Occupied(view) => { +/// assert_eq!(view.get(), &"a"); +/// } +/// } +/// +/// assert_eq!(table.len(), 3); +/// +/// // Existing key (take) +/// match table.entry(hasher(&"c"), |&x| x == "c", hasher) { +/// Entry::Vacant(_) => unreachable!(), +/// Entry::Occupied(view) => { +/// assert_eq!(view.remove().0, "c"); +/// } +/// } +/// assert_eq!(table.find(hasher(&"c"), |&x| x == "c"), None); +/// assert_eq!(table.len(), 2); +/// # } +/// # fn main() { +/// # #[cfg(feature = "nightly")] +/// # test() +/// # } +/// ``` +pub struct OccupiedEntry<'a, T, A = Global> +where + A: Allocator, +{ + hash: u64, + bucket: Bucket, + table: &'a mut HashTable, +} + +unsafe impl Send for OccupiedEntry<'_, T, A> +where + T: Send, + A: Send + Allocator, +{ +} +unsafe impl Sync for OccupiedEntry<'_, T, A> +where + T: Sync, + A: Sync + Allocator, +{ +} + +impl fmt::Debug for OccupiedEntry<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("OccupiedEntry") + .field("value", self.get()) + .finish() + } +} + +impl<'a, T, A> OccupiedEntry<'a, T, A> +where + A: Allocator, +{ + /// Takes the value out of the entry, and returns it along with a + /// `VacantEntry` that can be used to insert another value with the same + /// hash as the one that was just removed. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<&str> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// // The table is empty + /// assert!(table.is_empty() && table.capacity() == 0); + /// + /// table.insert_unique(hasher(&"poneyland"), "poneyland", hasher); + /// let capacity_before_remove = table.capacity(); + /// + /// if let Entry::Occupied(o) = table.entry(hasher(&"poneyland"), |&x| x == "poneyland", hasher) { + /// assert_eq!(o.remove().0, "poneyland"); + /// } + /// + /// assert!(table + /// .find(hasher(&"poneyland"), |&x| x == "poneyland") + /// .is_none()); + /// // Now table hold none elements but capacity is equal to the old one + /// assert!(table.len() == 0 && table.capacity() == capacity_before_remove); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn remove(self) -> (T, VacantEntry<'a, T, A>) { + let (val, slot) = unsafe { self.table.raw.remove(self.bucket) }; + ( + val, + VacantEntry { + hash: self.hash, + insert_slot: slot, + table: self.table, + }, + ) + } + + /// Gets a reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<&str> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&"poneyland"), "poneyland", hasher); + /// + /// match table.entry(hasher(&"poneyland"), |&x| x == "poneyland", hasher) { + /// Entry::Vacant(_) => panic!(), + /// Entry::Occupied(entry) => assert_eq!(entry.get(), &"poneyland"), + /// } + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn get(&self) -> &T { + unsafe { self.bucket.as_ref() } + } + + /// Gets a mutable reference to the value in the entry. + /// + /// If you need a reference to the `OccupiedEntry` which may outlive the + /// destruction of the `Entry` value, see [`into_mut`]. + /// + /// [`into_mut`]: #method.into_mut + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<(&str, u32)> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&"poneyland"), ("poneyland", 12), |(k, _)| hasher(&k)); + /// + /// assert_eq!( + /// table.find(hasher(&"poneyland"), |&(x, _)| x == "poneyland",), + /// Some(&("poneyland", 12)) + /// ); + /// + /// if let Entry::Occupied(mut o) = table.entry( + /// hasher(&"poneyland"), + /// |&(x, _)| x == "poneyland", + /// |(k, _)| hasher(&k), + /// ) { + /// o.get_mut().1 += 10; + /// assert_eq!(o.get().1, 22); + /// + /// // We can use the same Entry multiple times. + /// o.get_mut().1 += 2; + /// } + /// + /// assert_eq!( + /// table.find(hasher(&"poneyland"), |&(x, _)| x == "poneyland",), + /// Some(&("poneyland", 24)) + /// ); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn get_mut(&mut self) -> &mut T { + unsafe { self.bucket.as_mut() } + } + + /// Converts the OccupiedEntry into a mutable reference to the value in the entry + /// with a lifetime bound to the table itself. + /// + /// If you need multiple references to the `OccupiedEntry`, see [`get_mut`]. + /// + /// [`get_mut`]: #method.get_mut + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<(&str, u32)> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&"poneyland"), ("poneyland", 12), |(k, _)| hasher(&k)); + /// + /// assert_eq!( + /// table.find(hasher(&"poneyland"), |&(x, _)| x == "poneyland",), + /// Some(&("poneyland", 12)) + /// ); + /// + /// let value: &mut (&str, u32); + /// match table.entry( + /// hasher(&"poneyland"), + /// |&(x, _)| x == "poneyland", + /// |(k, _)| hasher(&k), + /// ) { + /// Entry::Occupied(entry) => value = entry.into_mut(), + /// Entry::Vacant(_) => panic!(), + /// } + /// value.1 += 10; + /// + /// assert_eq!( + /// table.find(hasher(&"poneyland"), |&(x, _)| x == "poneyland",), + /// Some(&("poneyland", 22)) + /// ); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn into_mut(self) -> &'a mut T { + unsafe { self.bucket.as_mut() } + } + + /// Converts the OccupiedEntry into a mutable reference to the underlying + /// table. + pub fn into_table(self) -> &'a mut HashTable { + self.table + } +} + +/// A view into a vacant entry in a `HashTable`. +/// It is part of the [`Entry`] enum. +/// +/// [`Entry`]: enum.Entry.html +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "nightly")] +/// # fn test() { +/// use ahash::AHasher; +/// use hashbrown::hash_table::{Entry, HashTable, VacantEntry}; +/// use std::hash::{BuildHasher, BuildHasherDefault}; +/// +/// let mut table: HashTable<&str> = HashTable::new(); +/// let hasher = BuildHasherDefault::::default(); +/// let hasher = |val: &_| hasher.hash_one(val); +/// +/// let entry_v: VacantEntry<_, _> = match table.entry(hasher(&"a"), |&x| x == "a", hasher) { +/// Entry::Vacant(view) => view, +/// Entry::Occupied(_) => unreachable!(), +/// }; +/// entry_v.insert("a"); +/// assert!(table.find(hasher(&"a"), |&x| x == "a").is_some() && table.len() == 1); +/// +/// // Nonexistent key (insert) +/// match table.entry(hasher(&"b"), |&x| x == "b", hasher) { +/// Entry::Vacant(view) => { +/// view.insert("b"); +/// } +/// Entry::Occupied(_) => unreachable!(), +/// } +/// assert!(table.find(hasher(&"b"), |&x| x == "b").is_some() && table.len() == 2); +/// # } +/// # fn main() { +/// # #[cfg(feature = "nightly")] +/// # test() +/// # } +/// ``` +pub struct VacantEntry<'a, T, A = Global> +where + A: Allocator, +{ + hash: u64, + insert_slot: InsertSlot, + table: &'a mut HashTable, +} + +impl fmt::Debug for VacantEntry<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("VacantEntry") + } +} + +impl<'a, T, A> VacantEntry<'a, T, A> +where + A: Allocator, +{ + /// Inserts a new element into the table with the hash that was used to + /// obtain the `VacantEntry`. + /// + /// An `OccupiedEntry` is returned for the newly inserted element. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use ahash::AHasher; + /// use hashbrown::hash_table::Entry; + /// use hashbrown::HashTable; + /// use std::hash::{BuildHasher, BuildHasherDefault}; + /// + /// let mut table: HashTable<&str> = HashTable::new(); + /// let hasher = BuildHasherDefault::::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// + /// if let Entry::Vacant(o) = table.entry(hasher(&"poneyland"), |&x| x == "poneyland", hasher) { + /// o.insert("poneyland"); + /// } + /// assert_eq!( + /// table.find(hasher(&"poneyland"), |&x| x == "poneyland"), + /// Some(&"poneyland") + /// ); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn insert(self, value: T) -> OccupiedEntry<'a, T, A> { + let bucket = unsafe { + self.table + .raw + .insert_in_slot(self.hash, self.insert_slot, value) + }; + OccupiedEntry { + hash: self.hash, + bucket, + table: self.table, + } + } + + /// Converts the VacantEntry into a mutable reference to the underlying + /// table. + pub fn into_table(self) -> &'a mut HashTable { + self.table + } +} + +/// Type representing the absence of an entry, as returned by [`HashTable::find_entry`]. +/// +/// This type only exists due to [limitations] in Rust's NLL borrow checker. In +/// the future, `find_entry` will return an `Option` and this +/// type will be removed. +/// +/// [limitations]: https://smallcultfollowing.com/babysteps/blog/2018/06/15/mir-based-borrow-check-nll-status-update/#polonius +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "nightly")] +/// # fn test() { +/// use ahash::AHasher; +/// use hashbrown::hash_table::{AbsentEntry, Entry, HashTable}; +/// use std::hash::{BuildHasher, BuildHasherDefault}; +/// +/// let mut table: HashTable<&str> = HashTable::new(); +/// let hasher = BuildHasherDefault::::default(); +/// let hasher = |val: &_| hasher.hash_one(val); +/// +/// let entry_v: AbsentEntry<_, _> = table.find_entry(hasher(&"a"), |&x| x == "a").unwrap_err(); +/// entry_v +/// .into_table() +/// .insert_unique(hasher(&"a"), "a", hasher); +/// assert!(table.find(hasher(&"a"), |&x| x == "a").is_some() && table.len() == 1); +/// +/// // Nonexistent key (insert) +/// match table.entry(hasher(&"b"), |&x| x == "b", hasher) { +/// Entry::Vacant(view) => { +/// view.insert("b"); +/// } +/// Entry::Occupied(_) => unreachable!(), +/// } +/// assert!(table.find(hasher(&"b"), |&x| x == "b").is_some() && table.len() == 2); +/// # } +/// # fn main() { +/// # #[cfg(feature = "nightly")] +/// # test() +/// # } +/// ``` +pub struct AbsentEntry<'a, T, A = Global> +where + A: Allocator, +{ + table: &'a mut HashTable, +} + +impl fmt::Debug for AbsentEntry<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("AbsentEntry") + } +} + +impl<'a, T, A> AbsentEntry<'a, T, A> +where + A: Allocator, +{ + /// Converts the AbsentEntry into a mutable reference to the underlying + /// table. + pub fn into_table(self) -> &'a mut HashTable { + self.table + } +} + +/// An iterator over the entries of a `HashTable` in arbitrary order. +/// The iterator element type is `&'a T`. +/// +/// This `struct` is created by the [`iter`] method on [`HashTable`]. See its +/// documentation for more. +/// +/// [`iter`]: struct.HashTable.html#method.iter +/// [`HashTable`]: struct.HashTable.html +pub struct Iter<'a, T> { + inner: RawIter, + marker: PhantomData<&'a T>, +} + +impl<'a, T> Iterator for Iter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + // Avoid `Option::map` because it bloats LLVM IR. + match self.inner.next() { + Some(bucket) => Some(unsafe { bucket.as_ref() }), + None => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner + .fold(init, |acc, bucket| unsafe { f(acc, bucket.as_ref()) }) + } +} + +impl ExactSizeIterator for Iter<'_, T> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl FusedIterator for Iter<'_, T> {} + +/// A mutable iterator over the entries of a `HashTable` in arbitrary order. +/// The iterator element type is `&'a mut T`. +/// +/// This `struct` is created by the [`iter_mut`] method on [`HashTable`]. See its +/// documentation for more. +/// +/// [`iter_mut`]: struct.HashTable.html#method.iter_mut +/// [`HashTable`]: struct.HashTable.html +pub struct IterMut<'a, T> { + inner: RawIter, + marker: PhantomData<&'a mut T>, +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + // Avoid `Option::map` because it bloats LLVM IR. + match self.inner.next() { + Some(bucket) => Some(unsafe { bucket.as_mut() }), + None => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner + .fold(init, |acc, bucket| unsafe { f(acc, bucket.as_mut()) }) + } +} + +impl ExactSizeIterator for IterMut<'_, T> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl FusedIterator for IterMut<'_, T> {} + +/// An owning iterator over the entries of a `HashTable` in arbitrary order. +/// The iterator element type is `T`. +/// +/// This `struct` is created by the [`into_iter`] method on [`HashTable`] +/// (provided by the [`IntoIterator`] trait). See its documentation for more. +/// The table cannot be used after calling that method. +/// +/// [`into_iter`]: struct.HashTable.html#method.into_iter +/// [`HashTable`]: struct.HashTable.html +/// [`IntoIterator`]: https://doc.rust-lang.org/core/iter/trait.IntoIterator.html +pub struct IntoIter +where + A: Allocator, +{ + inner: RawIntoIter, +} + +impl Iterator for IntoIter +where + A: Allocator, +{ + type Item = T; + + fn next(&mut self) -> Option { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, f) + } +} + +impl ExactSizeIterator for IntoIter +where + A: Allocator, +{ + fn len(&self) -> usize { + self.inner.len() + } +} + +impl FusedIterator for IntoIter where A: Allocator {} + +/// A draining iterator over the items of a `HashTable`. +/// +/// This `struct` is created by the [`drain`] method on [`HashTable`]. +/// See its documentation for more. +/// +/// [`HashTable`]: struct.HashTable.html +/// [`drain`]: struct.HashTable.html#method.drain +pub struct Drain<'a, T, A: Allocator = Global> { + inner: RawDrain<'a, T, A>, +} + +impl Drain<'_, T, A> { + /// Returns a iterator of references over the remaining items. + fn iter(&self) -> Iter<'_, T> { + Iter { + inner: self.inner.iter(), + marker: PhantomData, + } + } +} + +impl Iterator for Drain<'_, T, A> { + type Item = T; + + fn next(&mut self) -> Option { + self.inner.next() + } + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl ExactSizeIterator for Drain<'_, T, A> { + fn len(&self) -> usize { + self.inner.len() + } +} +impl FusedIterator for Drain<'_, T, A> {} + +impl fmt::Debug for Drain<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +/// A draining iterator over entries of a `HashTable` which don't satisfy the predicate `f`. +/// +/// This `struct` is created by [`HashTable::extract_if`]. See its +/// documentation for more. +#[must_use = "Iterators are lazy unless consumed"] +pub struct ExtractIf<'a, T, F, A: Allocator = Global> +where + F: FnMut(&mut T) -> bool, +{ + f: F, + inner: RawExtractIf<'a, T, A>, +} + +impl Iterator for ExtractIf<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next(|val| (self.f)(val)) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (0, self.inner.iter.size_hint().1) + } +} + +impl FusedIterator for ExtractIf<'_, T, F, A> where F: FnMut(&mut T) -> bool {} diff --git a/vendor/hashbrown/tests/equivalent_trait.rs b/vendor/hashbrown/tests/equivalent_trait.rs new file mode 100644 index 0000000..713dddd --- /dev/null +++ b/vendor/hashbrown/tests/equivalent_trait.rs @@ -0,0 +1,53 @@ +use hashbrown::Equivalent; +use hashbrown::HashMap; + +use std::hash::Hash; + +#[derive(Debug, Hash)] +pub struct Pair(pub A, pub B); + +impl PartialEq<(A, B)> for Pair +where + C: PartialEq, + D: PartialEq, +{ + fn eq(&self, rhs: &(A, B)) -> bool { + self.0 == rhs.0 && self.1 == rhs.1 + } +} + +impl Equivalent for Pair +where + Pair: PartialEq, + A: Hash + Eq, + B: Hash + Eq, +{ + fn equivalent(&self, other: &X) -> bool { + *self == *other + } +} + +#[test] +fn test_lookup() { + let s = String::from; + let mut map = HashMap::new(); + map.insert((s("a"), s("b")), 1); + map.insert((s("a"), s("x")), 2); + + assert!(map.contains_key(&Pair("a", "b"))); + assert!(!map.contains_key(&Pair("b", "a"))); +} + +#[test] +fn test_string_str() { + let s = String::from; + let mut map = HashMap::new(); + map.insert(s("a"), 1); + map.insert(s("b"), 2); + map.insert(s("x"), 3); + map.insert(s("y"), 4); + + assert!(map.contains_key("a")); + assert!(!map.contains_key("z")); + assert_eq!(map.remove("b"), Some(2)); +} diff --git a/vendor/hashbrown/tests/raw.rs b/vendor/hashbrown/tests/raw.rs new file mode 100644 index 0000000..858836e --- /dev/null +++ b/vendor/hashbrown/tests/raw.rs @@ -0,0 +1,11 @@ +#![cfg(feature = "raw")] + +use hashbrown::raw::RawTable; +use std::mem; + +#[test] +fn test_allocation_info() { + assert_eq!(RawTable::<()>::new().allocation_info().1.size(), 0); + assert_eq!(RawTable::::new().allocation_info().1.size(), 0); + assert!(RawTable::::with_capacity(1).allocation_info().1.size() > mem::size_of::()); +} diff --git a/vendor/hashbrown/tests/rayon.rs b/vendor/hashbrown/tests/rayon.rs index 8c603c5..d55e5a9 100644 --- a/vendor/hashbrown/tests/rayon.rs +++ b/vendor/hashbrown/tests/rayon.rs @@ -356,7 +356,9 @@ fn set_seq_par_equivalence_into_iter_empty() { let vec_seq = SET_EMPTY.clone().into_iter().collect::>(); let vec_par = SET_EMPTY.clone().into_par_iter().collect::>(); - assert_eq3!(vec_seq, vec_par, []); + // Work around type inference failure introduced by rend dev-dependency. + let empty: [char; 0] = []; + assert_eq3!(vec_seq, vec_par, empty); } #[test] diff --git a/vendor/hashbrown/tests/set.rs b/vendor/hashbrown/tests/set.rs index 5ae1ec9..86ec964 100644 --- a/vendor/hashbrown/tests/set.rs +++ b/vendor/hashbrown/tests/set.rs @@ -27,7 +27,7 @@ fn test_hashset_insert_remove() { assert_eq!(m.insert(x.clone()), true); } for (i, x) in tx.iter().enumerate() { - println!("removing {} {:?}", i, x); + println!("removing {i} {x:?}"); assert_eq!(m.remove(x), true); } } diff --git a/vendor/indexmap/.cargo-checksum.json b/vendor/indexmap/.cargo-checksum.json index 0f44683..e58b146 100644 --- a/vendor/indexmap/.cargo-checksum.json +++ b/vendor/indexmap/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"1290d383adfdcd24f158a4619afb5547d633c83c0a1ab3b5c1ee0dabe4fb1f36","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"ecc269ef87fd38a1d98e30bfac9ba964a9dbd9315c3770fed98d4d7cb5882055","README.md":"f8b02aa7c20fc0f5bc13de9e9e78899ec8cdbc16c2db880a1d0bc14c25b07542","RELEASES.md":"85d9d9bc78e94df7ce90300bb94099b9ab2696d1b3f6b815c22761522878e679","benches/bench.rs":"3b2900abbc9e8a60af78b0395222ee75e86bc68519a0f38477387d1572eed397","benches/faststring.rs":"5fdd6cdb19d0557ed58f241e809a240cf8939d9e5b87a72d5f127f81ab98380b","build.rs":"558b4d0b9e9b3a44f7e1a2b69f7a7567ea721cd45cb54f4e458e850bf702f35c","src/arbitrary.rs":"bb8bda10f686abe57eef1446d3fc3fc6fb251f95629b28c20e620a4838c43db8","src/equivalent.rs":"2e6ae24ef09a09b917f4e2b0f6288f901878e42f5080f61b1bd1afdcc90aba87","src/lib.rs":"ea2cbe4f6cc2c4a75f42c9fc936503e6bee0f136c60f6811a2a9907ed8886443","src/macros.rs":"80c22f630e7f81e6fa663ca4c9e50cf5f332c8905d72d1338bd16f24eb353c2a","src/map.rs":"2e9cbfa240865cfd6b6b972bdbdb39283e6302dd2d0d72b3c2bfce4414bf5729","src/map/core.rs":"8422cd774c5db7d83cdeb0c5836c10f29caa1bee8d95b0d674b01b32e7ce80d8","src/map/core/raw.rs":"4e5fac4fecccc352268351d8b1f82b345067b5c029bba7e6ab88e8f8bc799c6a","src/mutable_keys.rs":"a919065b59000286eb11c7d46f6896bf0a1d484c9dac5e61d80bb8990c9fbedb","src/rayon/map.rs":"1a508c7c95c5d56113b851f7ce140d62ad541f1c6129352a7ec62d5bea7af4a1","src/rayon/mod.rs":"019e9379ccab57a299ab5b5a2c0efc7561b77a715a5afe8f797c7e8330c6206c","src/rayon/set.rs":"ba00e88e90fb7ab803589f99f24b595d60309e541aae3d01fdde21bff3840194","src/rustc.rs":"fe7a348c5a10a66880cb6c737593fe79d3b6de40f44ba0d7b89204aa95e14a3a","src/serde.rs":"d45ec8fb9c02594ca6f2e9b20764778b2b4193a24a52f1e233160a33efc6e683","src/serde_seq.rs":"c54a52fa607b6ccddda1e76e829778ca304c49b5f434edc5e582a5386c35d662","src/set.rs":"0a57affb623fa6b28df18cc14841e4f076cbd1da5c809635d202f865640af1ee","src/util.rs":"ab712bce71b54cf2763e6010e64bb5944d1d59ce15e2f2beffa7ceed204d6a68","tests/equivalent_trait.rs":"efe9393069e3cfc893d2c9c0343679979578e437fdb98a10baefeced027ba310","tests/macros_full_path.rs":"c33c86d7341581fdd08e2e6375a4afca507fa603540c54a3b9e51c4cd011cd71","tests/quick.rs":"1addbc6cbcb1aae5b8bde0fb0e18197d947e8f13244e4ae7ebf97bdda00eafea","tests/tests.rs":"f6dbeeb0e2950402b0e66ac52bf74c9e4197d3c5d9c0dde64a7998a2ef74d327"},"package":"bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"} \ No newline at end of file +{"files":{"Cargo.toml":"7049b39c9bc740c313a738053b3d058aa66a5fdc1bd9f2e195c961d6d0dd5e92","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"ecc269ef87fd38a1d98e30bfac9ba964a9dbd9315c3770fed98d4d7cb5882055","README.md":"37c2da326dcc144e67b9371ec98c78becfbbb8fab93d0537999b6083bdb80243","RELEASES.md":"c22f67c4bc30852920125c80eb52ad00152b2b61b5254acb8d39081d7d8b5f32","benches/bench.rs":"3b2900abbc9e8a60af78b0395222ee75e86bc68519a0f38477387d1572eed397","benches/faststring.rs":"5fdd6cdb19d0557ed58f241e809a240cf8939d9e5b87a72d5f127f81ab98380b","src/arbitrary.rs":"068713b1e8e762dbe9e4d19d555e77c17e59408335a40f4777d6100340605655","src/lib.rs":"66bfcbc906d703f19715a03d42caa84435f71174316bd31d44beca698c22c8d4","src/macros.rs":"32ab59dfb19713598769252e5fbcef31744dd0399bc839375c4edb5f00701b0d","src/map.rs":"197b39ea7eca1497129a9d4059a1731d5306c9a08dd7cfd5eac13c89f759c8cb","src/map/core.rs":"d59dfa0a1838b6a034c32cc41426b42d9180127d5d36324a106c732f94db7e1e","src/map/core/raw.rs":"f8b19c728978278754ae49e98deb8de1034f007805d1f254d43208b0c453ed6f","src/map/iter.rs":"23278a1787d08dcbc30885c0653369d82a695a7b8f1084d7709fbf903e53a776","src/map/serde_seq.rs":"eff1ccbefe20c8f4338afa92732473e28fd0b752cb2bc153600ee5387d484627","src/map/slice.rs":"1aa189fe7b0cef6362a7c06cce31ee43c2c842ee288e5dab86196743789b76fd","src/map/tests.rs":"1866c233e34a06cc51a2e66824d68a11ac4fd3d1621a88a24af5a3882ef0de6a","src/mutable_keys.rs":"2bdd739bde1dd7f81c82e90595b877fbdb1cd9f963b180fcc819128bb1c750a7","src/rayon/map.rs":"22a30fa68437f69c24752c8ed17be6e084d02b0c12730e48e238a4e92f208b55","src/rayon/mod.rs":"019e9379ccab57a299ab5b5a2c0efc7561b77a715a5afe8f797c7e8330c6206c","src/rayon/set.rs":"cea2517db405ee64306e24b571ed773e129eae43ecfc866e93e365c118edc0ed","src/rustc.rs":"fe7a348c5a10a66880cb6c737593fe79d3b6de40f44ba0d7b89204aa95e14a3a","src/serde.rs":"91bbf14e3afbcf518aeb7f8a9b65011d0ede2f88d12857dfddda451f35574281","src/set.rs":"3d46a0d43878d965df81a2de16155c7ca8400cea245a81e28b8f9dd3f94727b2","src/set/iter.rs":"9b90c736185889fca8b0a461293da4a1a1e50d9db539bd5b9f72254bee461a3e","src/set/slice.rs":"18556170598113341e6381ad1a6234441969e7ac0e3674b7e6da29d7ffb82979","src/set/tests.rs":"155c6eec14c2979c3acebfa9c2675f249937b427780f61670a83fafd85cf1f3b","src/util.rs":"dbd57cfdac2a72db8c5ce83bf288bcaf33b5ae59adddcd088792a624c4c0e909","tests/equivalent_trait.rs":"efe9393069e3cfc893d2c9c0343679979578e437fdb98a10baefeced027ba310","tests/macros_full_path.rs":"c33c86d7341581fdd08e2e6375a4afca507fa603540c54a3b9e51c4cd011cd71","tests/quick.rs":"f34ae9ce8aa51d9338595c299a2088ab56dbb5c04786050009a2fa22a6bbfa32","tests/tests.rs":"f6dbeeb0e2950402b0e66ac52bf74c9e4197d3c5d9c0dde64a7998a2ef74d327"},"package":"d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"} \ No newline at end of file diff --git a/vendor/indexmap/Cargo.toml b/vendor/indexmap/Cargo.toml index 28f3618..65be975 100644 --- a/vendor/indexmap/Cargo.toml +++ b/vendor/indexmap/Cargo.toml @@ -11,9 +11,9 @@ [package] edition = "2021" -rust-version = "1.56" +rust-version = "1.63" name = "indexmap" -version = "1.9.3" +version = "2.1.0" description = "A hash table with consistent order and fast iteration." documentation = "https://docs.rs/indexmap/" readme = "README.md" @@ -28,20 +28,24 @@ categories = [ license = "Apache-2.0 OR MIT" repository = "https://github.com/bluss/indexmap" -[package.metadata.release] -no-dev-version = true -tag-name = "{{version}}" - [package.metadata.docs.rs] features = [ "arbitrary", "quickcheck", - "serde-1", + "serde", "rayon", ] +rustdoc-args = [ + "--cfg", + "docsrs", +] + +[package.metadata.release] +no-dev-version = true +tag-name = "{{version}}" [profile.bench] -debug = true +debug = 2 [lib] bench = false @@ -51,8 +55,12 @@ version = "1.0" optional = true default-features = false +[dependencies.equivalent] +version = "1.0" +default-features = false + [dependencies.hashbrown] -version = "0.12" +version = "0.14.1" features = ["raw"] default-features = false @@ -62,7 +70,7 @@ optional = true default-features = false [dependencies.rayon] -version = "1.4.1" +version = "1.5.3" optional = true [dependencies.rustc-rayon] @@ -82,7 +90,7 @@ version = "1.0" version = "0.2.1" [dev-dependencies.itertools] -version = "0.10" +version = "0.11" [dev-dependencies.lazy_static] version = "1.3" @@ -98,11 +106,7 @@ features = ["small_rng"] [dev-dependencies.serde_derive] version = "1.0" -[build-dependencies.autocfg] -version = "1" - [features] -serde-1 = ["serde"] +default = ["std"] std = [] test_debug = [] -test_low_transition_point = [] diff --git a/vendor/indexmap/README.md b/vendor/indexmap/README.md index d80b709..bf86932 100644 --- a/vendor/indexmap/README.md +++ b/vendor/indexmap/README.md @@ -3,7 +3,7 @@ [![build status](https://github.com/bluss/indexmap/workflows/Continuous%20integration/badge.svg?branch=master)](https://github.com/bluss/indexmap/actions) [![crates.io](https://img.shields.io/crates/v/indexmap.svg)](https://crates.io/crates/indexmap) [![docs](https://docs.rs/indexmap/badge.svg)](https://docs.rs/indexmap) -[![rustc](https://img.shields.io/badge/rust-1.56%2B-orange.svg)](https://img.shields.io/badge/rust-1.56%2B-orange.svg) +[![rustc](https://img.shields.io/badge/rust-1.63%2B-orange.svg)](https://img.shields.io/badge/rust-1.63%2B-orange.svg) A pure-Rust hash table which preserves (in a limited sense) insertion order. diff --git a/vendor/indexmap/RELEASES.md b/vendor/indexmap/RELEASES.md index 7ea8b70..817a2dc 100644 --- a/vendor/indexmap/RELEASES.md +++ b/vendor/indexmap/RELEASES.md @@ -1,3 +1,61 @@ +- 2.1.0 + + - Empty slices can now be created with `map::Slice::{new, new_mut}` and + `set::Slice::new`. In addition, `Slice::new`, `len`, and `is_empty` are + now `const` functions on both types. + + - `IndexMap`, `IndexSet`, and their respective `Slice`s all have binary + search methods for sorted data: map `binary_search_keys` and set + `binary_search` for plain comparision, `binary_search_by` for custom + comparators, `binary_search_by_key` for key extraction, and + `partition_point` for boolean conditions. + +- 2.0.2 + + - The `hashbrown` dependency has been updated to version 0.14.1 to + complete the support for Rust 1.63. + +- 2.0.1 + + - **MSRV**: Rust 1.63.0 is now supported as well, pending publication of + `hashbrown`'s relaxed MSRV (or use cargo `--ignore-rust-version`). + +- 2.0.0 + + - **MSRV**: Rust 1.64.0 or later is now required. + + - The `"std"` feature is no longer auto-detected. It is included in the + default feature set, or else can be enabled like any other Cargo feature. + + - The `"serde-1"` feature has been removed, leaving just the optional + `"serde"` dependency to be enabled like a feature itself. + + - `IndexMap::get_index_mut` now returns `Option<(&K, &mut V)>`, changing + the key part from `&mut K` to `&K`. There is also a new alternative + `MutableKeys::get_index_mut2` to access the former behavior. + + - The new `map::Slice` and `set::Slice` offer a linear view of maps + and sets, behaving a lot like normal `[(K, V)]` and `[T]` slices. Notably, + comparison traits like `Eq` only consider items in order, rather than hash + lookups, and slices even implement `Hash`. + + - `IndexMap` and `IndexSet` now have `sort_by_cached_key` and + `par_sort_by_cached_key` methods which perform stable sorts in place + using a key extraction function. + + - `IndexMap` and `IndexSet` now have `reserve_exact`, `try_reserve`, and + `try_reserve_exact` methods that correspond to the same methods on `Vec`. + However, exactness only applies to the direct capacity for items, while the + raw hash table still follows its own rules for capacity and load factor. + + - The `Equivalent` trait is now re-exported from the `equivalent` crate, + intended as a common base to allow types to work with multiple map types. + + - The `hashbrown` dependency has been updated to version 0.14. + + - The `serde_seq` module has been moved from the crate root to below the + `map` module. + - 1.9.3 - Bump the `rustc-rayon` dependency, for compiler use only. diff --git a/vendor/indexmap/build.rs b/vendor/indexmap/build.rs deleted file mode 100644 index 9f9fa05..0000000 --- a/vendor/indexmap/build.rs +++ /dev/null @@ -1,8 +0,0 @@ -fn main() { - // If "std" is explicitly requested, don't bother probing the target for it. - match std::env::var_os("CARGO_FEATURE_STD") { - Some(_) => autocfg::emit("has_std"), - None => autocfg::new().emit_sysroot_crate("std"), - } - autocfg::rerun_path("build.rs"); -} diff --git a/vendor/indexmap/src/arbitrary.rs b/vendor/indexmap/src/arbitrary.rs index 1347c8b..7798438 100644 --- a/vendor/indexmap/src/arbitrary.rs +++ b/vendor/indexmap/src/arbitrary.rs @@ -1,4 +1,5 @@ #[cfg(feature = "arbitrary")] +#[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))] mod impl_arbitrary { use crate::{IndexMap, IndexSet}; use arbitrary::{Arbitrary, Result, Unstructured}; @@ -35,6 +36,7 @@ mod impl_arbitrary { } #[cfg(feature = "quickcheck")] +#[cfg_attr(docsrs, doc(cfg(feature = "quickcheck")))] mod impl_quickcheck { use crate::{IndexMap, IndexSet}; use alloc::boxed::Box; diff --git a/vendor/indexmap/src/equivalent.rs b/vendor/indexmap/src/equivalent.rs deleted file mode 100644 index ad6635f..0000000 --- a/vendor/indexmap/src/equivalent.rs +++ /dev/null @@ -1,27 +0,0 @@ -use core::borrow::Borrow; - -/// Key equivalence trait. -/// -/// This trait allows hash table lookup to be customized. -/// It has one blanket implementation that uses the regular `Borrow` solution, -/// just like `HashMap` and `BTreeMap` do, so that you can pass `&str` to lookup -/// into a map with `String` keys and so on. -/// -/// # Contract -/// -/// The implementor **must** hash like `K`, if it is hashable. -pub trait Equivalent { - /// Compare self to `key` and return `true` if they are equal. - fn equivalent(&self, key: &K) -> bool; -} - -impl Equivalent for Q -where - Q: Eq, - K: Borrow, -{ - #[inline] - fn equivalent(&self, key: &K) -> bool { - *self == *key.borrow() - } -} diff --git a/vendor/indexmap/src/lib.rs b/vendor/indexmap/src/lib.rs index 6e94936..5e42784 100644 --- a/vendor/indexmap/src/lib.rs +++ b/vendor/indexmap/src/lib.rs @@ -14,7 +14,7 @@ //! [`IndexSet`]: set/struct.IndexSet.html //! //! -//! ### Feature Highlights +//! ### Highlights //! //! [`IndexMap`] and [`IndexSet`] are drop-in compatible with the std `HashMap` //! and `HashSet`, but they also have some features of note: @@ -26,6 +26,34 @@ //! - The [`MutableKeys`][map::MutableKeys] trait, which gives opt-in mutable //! access to hash map keys. //! +//! ### Feature Flags +//! +//! To reduce the amount of compiled code in the crate by default, certain +//! features are gated behind [feature flags]. These allow you to opt in to (or +//! out of) functionality. Below is a list of the features available in this +//! crate. +//! +//! * `std`: Enables features which require the Rust standard library. For more +//! information see the section on [`no_std`]. +//! * `rayon`: Enables parallel iteration and other parallel methods. +//! * `serde`: Adds implementations for [`Serialize`] and [`Deserialize`] +//! to [`IndexMap`] and [`IndexSet`]. Alternative implementations for +//! (de)serializing [`IndexMap`] as an ordered sequence are available in the +//! [`map::serde_seq`] module. +//! * `arbitrary`: Adds implementations for the [`arbitrary::Arbitrary`] trait +//! to [`IndexMap`] and [`IndexSet`]. +//! * `quickcheck`: Adds implementations for the [`quickcheck::Arbitrary`] trait +//! to [`IndexMap`] and [`IndexSet`]. +//! +//! _Note: only the `std` feature is enabled by default._ +//! +//! [feature flags]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-features-section +//! [`no_std`]: #no-standard-library-targets +//! [`Serialize`]: `::serde::Serialize` +//! [`Deserialize`]: `::serde::Deserialize` +//! [`arbitrary::Arbitrary`]: `::arbitrary::Arbitrary` +//! [`quickcheck::Arbitrary`]: `::quickcheck::Arbitrary` +//! //! ### Alternate Hashers //! //! [`IndexMap`] and [`IndexSet`] have a default hasher type `S = RandomState`, @@ -53,21 +81,20 @@ //! //! ### Rust Version //! -//! This version of indexmap requires Rust 1.56 or later. +//! This version of indexmap requires Rust 1.63 or later. //! -//! The indexmap 1.x release series will use a carefully considered version -//! upgrade policy, where in a later 1.x version, we will raise the minimum +//! The indexmap 2.x release series will use a carefully considered version +//! upgrade policy, where in a later 2.x version, we will raise the minimum //! required Rust version. //! //! ## No Standard Library Targets //! -//! This crate supports being built without `std`, requiring -//! `alloc` instead. This is enabled automatically when it is detected that -//! `std` is not available. There is no crate feature to enable/disable to -//! trigger this. It can be tested by building for a std-less target. +//! This crate supports being built without `std`, requiring `alloc` instead. +//! This is chosen by disabling the default "std" cargo feature, by adding +//! `default-features = false` to your dependency specification. //! //! - Creating maps and sets using [`new`][IndexMap::new] and -//! [`with_capacity`][IndexMap::with_capacity] is unavailable without `std`. +//! [`with_capacity`][IndexMap::with_capacity] is unavailable without `std`. //! Use methods [`IndexMap::default`][def], //! [`with_hasher`][IndexMap::with_hasher], //! [`with_capacity_and_hasher`][IndexMap::with_capacity_and_hasher] instead. @@ -77,9 +104,11 @@ //! //! [def]: map/struct.IndexMap.html#impl-Default +#![cfg_attr(docsrs, feature(doc_cfg))] + extern crate alloc; -#[cfg(has_std)] +#[cfg(feature = "std")] #[macro_use] extern crate std; @@ -88,12 +117,10 @@ use alloc::vec::{self, Vec}; mod arbitrary; #[macro_use] mod macros; -mod equivalent; mod mutable_keys; #[cfg(feature = "serde")] +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] mod serde; -#[cfg(feature = "serde")] -pub mod serde_seq; mod util; pub mod map; @@ -102,14 +129,15 @@ pub mod set; // Placed after `map` and `set` so new `rayon` methods on the types // are documented after the "normal" methods. #[cfg(feature = "rayon")] +#[cfg_attr(docsrs, doc(cfg(feature = "rayon")))] mod rayon; #[cfg(feature = "rustc-rayon")] mod rustc; -pub use crate::equivalent::Equivalent; pub use crate::map::IndexMap; pub use crate::set::IndexSet; +pub use equivalent::Equivalent; // shared private items @@ -192,3 +220,59 @@ trait Entries { where F: FnOnce(&mut [Self::Entry]); } + +/// The error type for `try_reserve` methods. +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct TryReserveError { + kind: TryReserveErrorKind, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +enum TryReserveErrorKind { + // The standard library's kind is currently opaque to us, otherwise we could unify this. + Std(alloc::collections::TryReserveError), + CapacityOverflow, + AllocError { layout: alloc::alloc::Layout }, +} + +// These are not `From` so we don't expose them in our public API. +impl TryReserveError { + fn from_alloc(error: alloc::collections::TryReserveError) -> Self { + Self { + kind: TryReserveErrorKind::Std(error), + } + } + + fn from_hashbrown(error: hashbrown::TryReserveError) -> Self { + Self { + kind: match error { + hashbrown::TryReserveError::CapacityOverflow => { + TryReserveErrorKind::CapacityOverflow + } + hashbrown::TryReserveError::AllocError { layout } => { + TryReserveErrorKind::AllocError { layout } + } + }, + } + } +} + +impl core::fmt::Display for TryReserveError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let reason = match &self.kind { + TryReserveErrorKind::Std(e) => return core::fmt::Display::fmt(e, f), + TryReserveErrorKind::CapacityOverflow => { + " because the computed capacity exceeded the collection's maximum" + } + TryReserveErrorKind::AllocError { .. } => { + " because the memory allocator returned an error" + } + }; + f.write_str("memory allocation failed")?; + f.write_str(reason) + } +} + +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +impl std::error::Error for TryReserveError {} diff --git a/vendor/indexmap/src/macros.rs b/vendor/indexmap/src/macros.rs index ca26287..5317f1c 100644 --- a/vendor/indexmap/src/macros.rs +++ b/vendor/indexmap/src/macros.rs @@ -1,4 +1,5 @@ -#[cfg(has_std)] +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] #[macro_export] /// Create an `IndexMap` from a list of key-value pairs /// @@ -19,23 +20,23 @@ /// assert_eq!(map.keys().next(), Some(&"a")); /// ``` macro_rules! indexmap { - (@single $($x:tt)*) => (()); - (@count $($rest:expr),*) => (<[()]>::len(&[$($crate::indexmap!(@single $rest)),*])); - ($($key:expr => $value:expr,)+) => { $crate::indexmap!($($key => $value),+) }; ($($key:expr => $value:expr),*) => { { - let _cap = $crate::indexmap!(@count $($key),*); - let mut _map = $crate::IndexMap::with_capacity(_cap); + // Note: `stringify!($key)` is just here to consume the repetition, + // but we throw away that string literal during constant evaluation. + const CAP: usize = <[()]>::len(&[$({ stringify!($key); }),*]); + let mut map = $crate::IndexMap::with_capacity(CAP); $( - _map.insert($key, $value); + map.insert($key, $value); )* - _map + map } }; } -#[cfg(has_std)] +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] #[macro_export] /// Create an `IndexSet` from a list of values /// @@ -56,18 +57,17 @@ macro_rules! indexmap { /// assert_eq!(set.iter().next(), Some(&"a")); /// ``` macro_rules! indexset { - (@single $($x:tt)*) => (()); - (@count $($rest:expr),*) => (<[()]>::len(&[$($crate::indexset!(@single $rest)),*])); - ($($value:expr,)+) => { $crate::indexset!($($value),+) }; ($($value:expr),*) => { { - let _cap = $crate::indexset!(@count $($value),*); - let mut _set = $crate::IndexSet::with_capacity(_cap); + // Note: `stringify!($value)` is just here to consume the repetition, + // but we throw away that string literal during constant evaluation. + const CAP: usize = <[()]>::len(&[$({ stringify!($value); }),*]); + let mut set = $crate::IndexSet::with_capacity(CAP); $( - _set.insert($value); + set.insert($value); )* - _set + set } }; } diff --git a/vendor/indexmap/src/map.rs b/vendor/indexmap/src/map.rs index d39448d..4ee24d5 100644 --- a/vendor/indexmap/src/map.rs +++ b/vendor/indexmap/src/map.rs @@ -2,29 +2,39 @@ //! pairs is independent of the hash values of the keys. mod core; +mod iter; +mod slice; +#[cfg(feature = "serde")] +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] +pub mod serde_seq; + +#[cfg(test)] +mod tests; + +pub use self::core::{Entry, OccupiedEntry, VacantEntry}; +pub use self::iter::{ + Drain, IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +pub use self::slice::Slice; pub use crate::mutable_keys::MutableKeys; #[cfg(feature = "rayon")] pub use crate::rayon::map as rayon; -use crate::vec::{self, Vec}; use ::core::cmp::Ordering; use ::core::fmt; use ::core::hash::{BuildHasher, Hash, Hasher}; -use ::core::iter::FusedIterator; use ::core::ops::{Index, IndexMut, RangeBounds}; -use ::core::slice::{Iter as SliceIter, IterMut as SliceIterMut}; +use alloc::boxed::Box; +use alloc::vec::Vec; -#[cfg(has_std)] +#[cfg(feature = "std")] use std::collections::hash_map::RandomState; use self::core::IndexMapCore; -use crate::equivalent::Equivalent; -use crate::util::third; -use crate::{Bucket, Entries, HashValue}; - -pub use self::core::{Entry, OccupiedEntry, VacantEntry}; +use crate::util::{third, try_simplify_range}; +use crate::{Bucket, Entries, Equivalent, HashValue, TryReserveError}; /// A hash table where the iteration order of the key-value pairs is independent /// of the hash values of the keys. @@ -67,12 +77,12 @@ pub use self::core::{Entry, OccupiedEntry, VacantEntry}; /// assert_eq!(letters[&'u'], 1); /// assert_eq!(letters.get(&'y'), None); /// ``` -#[cfg(has_std)] +#[cfg(feature = "std")] pub struct IndexMap { pub(crate) core: IndexMapCore, hash_builder: S, } -#[cfg(not(has_std))] +#[cfg(not(feature = "std"))] pub struct IndexMap { pub(crate) core: IndexMapCore, hash_builder: S, @@ -140,7 +150,8 @@ where } } -#[cfg(has_std)] +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl IndexMap { /// Create a new map. (Does not allocate.) #[inline] @@ -186,6 +197,11 @@ impl IndexMap { } } + /// Return the number of elements the map can hold without reallocating. + /// + /// This number is a lower bound; the map might be able to hold more, + /// but is guaranteed to be able to hold at least this many. + /// /// Computes in **O(1)** time. pub fn capacity(&self) -> usize { self.core.capacity() @@ -214,52 +230,38 @@ impl IndexMap { /// Return an iterator over the key-value pairs of the map, in their order pub fn iter(&self) -> Iter<'_, K, V> { - Iter { - iter: self.as_entries().iter(), - } + Iter::new(self.as_entries()) } /// Return an iterator over the key-value pairs of the map, in their order pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { - IterMut { - iter: self.as_entries_mut().iter_mut(), - } + IterMut::new(self.as_entries_mut()) } /// Return an iterator over the keys of the map, in their order pub fn keys(&self) -> Keys<'_, K, V> { - Keys { - iter: self.as_entries().iter(), - } + Keys::new(self.as_entries()) } /// Return an owning iterator over the keys of the map, in their order pub fn into_keys(self) -> IntoKeys { - IntoKeys { - iter: self.into_entries().into_iter(), - } + IntoKeys::new(self.into_entries()) } /// Return an iterator over the values of the map, in their order pub fn values(&self) -> Values<'_, K, V> { - Values { - iter: self.as_entries().iter(), - } + Values::new(self.as_entries()) } /// Return an iterator over mutable references to the values of the map, /// in their order pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { - ValuesMut { - iter: self.as_entries_mut().iter_mut(), - } + ValuesMut::new(self.as_entries_mut()) } /// Return an owning iterator over the values of the map, in their order pub fn into_values(self) -> IntoValues { - IntoValues { - iter: self.into_entries().into_iter(), - } + IntoValues::new(self.into_entries()) } /// Remove all key-value pairs in the map, while preserving its capacity. @@ -293,9 +295,7 @@ impl IndexMap { where R: RangeBounds, { - Drain { - iter: self.core.drain(range), - } + Drain::new(self.core.drain(range)) } /// Splits the collection into two at the given index. @@ -328,6 +328,37 @@ where self.core.reserve(additional); } + /// Reserve capacity for `additional` more key-value pairs, without over-allocating. + /// + /// Unlike `reserve`, this does not deliberately over-allocate the entry capacity to avoid + /// frequent re-allocations. However, the underlying data structures may still have internal + /// capacity requirements, and the allocator itself may give more space than requested, so this + /// cannot be relied upon to be precisely minimal. + /// + /// Computes in **O(n)** time. + pub fn reserve_exact(&mut self, additional: usize) { + self.core.reserve_exact(additional); + } + + /// Try to reserve capacity for `additional` more key-value pairs. + /// + /// Computes in **O(n)** time. + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.core.try_reserve(additional) + } + + /// Try to reserve capacity for `additional` more key-value pairs, without over-allocating. + /// + /// Unlike `try_reserve`, this does not deliberately over-allocate the entry capacity to avoid + /// frequent re-allocations. However, the underlying data structures may still have internal + /// capacity requirements, and the allocator itself may give more space than requested, so this + /// cannot be relied upon to be precisely minimal. + /// + /// Computes in **O(n)** time. + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.core.try_reserve_exact(additional) + } + /// Shrink the capacity of the map as much as possible. /// /// Computes in **O(n)** time. @@ -486,21 +517,6 @@ where } } - pub(crate) fn get_full_mut2_impl( - &mut self, - key: &Q, - ) -> Option<(usize, &mut K, &mut V)> - where - Q: Hash + Equivalent, - { - if let Some(i) = self.get_index_of(key) { - let entry = &mut self.as_entries_mut()[i]; - Some((i, &mut entry.key, &mut entry.value)) - } else { - None - } - } - /// Remove the key-value pair equivalent to `key` and return /// its value. /// @@ -713,9 +729,7 @@ where { let mut entries = self.into_entries(); entries.sort_by(move |a, b| cmp(&a.key, &a.value, &b.key, &b.value)); - IntoIter { - iter: entries.into_iter(), - } + IntoIter::new(entries) } /// Sort the map's key-value pairs by the default ordering of the keys, but @@ -759,9 +773,82 @@ where { let mut entries = self.into_entries(); entries.sort_unstable_by(move |a, b| cmp(&a.key, &a.value, &b.key, &b.value)); - IntoIter { - iter: entries.into_iter(), - } + IntoIter::new(entries) + } + + /// Sort the map’s key-value pairs in place using a sort-key extraction function. + /// + /// During sorting, the function is called at most once per entry, by using temporary storage + /// to remember the results of its evaluation. The order of calls to the function is + /// unspecified and may change between versions of `indexmap` or the standard library. + /// + /// Computes in **O(m n + n log n + c)** time () and **O(n)** space, where the function is + /// **O(m)**, *n* is the length of the map, and *c* the capacity. The sort is stable. + pub fn sort_by_cached_key(&mut self, mut sort_key: F) + where + T: Ord, + F: FnMut(&K, &V) -> T, + { + self.with_entries(move |entries| { + entries.sort_by_cached_key(move |a| sort_key(&a.key, &a.value)); + }); + } + + /// Search over a sorted map for a key. + /// + /// Returns the position where that key is present, or the position where it can be inserted to + /// maintain the sort. See [`slice::binary_search`] for more details. + /// + /// Computes in **O(log(n))** time, which is notably less scalable than looking the key up + /// using [`get_index_of`][IndexMap::get_index_of], but this can also position missing keys. + pub fn binary_search_keys(&self, x: &K) -> Result + where + K: Ord, + { + self.as_slice().binary_search_keys(x) + } + + /// Search over a sorted map with a comparator function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by<'a, F>(&'a self, f: F) -> Result + where + F: FnMut(&'a K, &'a V) -> Ordering, + { + self.as_slice().binary_search_by(f) + } + + /// Search over a sorted map with an extraction function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by_key`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by_key<'a, B, F>(&'a self, b: &B, f: F) -> Result + where + F: FnMut(&'a K, &'a V) -> B, + B: Ord, + { + self.as_slice().binary_search_by_key(b, f) + } + + /// Returns the index of the partition point of a sorted map according to the given predicate + /// (the index of the first element of the second partition). + /// + /// See [`slice::partition_point`] for more details. + /// + /// Computes in **O(log(n))** time. + #[must_use] + pub fn partition_point

(&self, pred: P) -> usize + where + P: FnMut(&K, &V) -> bool, + { + self.as_slice().partition_point(pred) } /// Reverses the order of the map’s key-value pairs in place. @@ -773,6 +860,27 @@ where } impl IndexMap { + /// Returns a slice of all the key-value pairs in the map. + /// + /// Computes in **O(1)** time. + pub fn as_slice(&self) -> &Slice { + Slice::from_slice(self.as_entries()) + } + + /// Returns a mutable slice of all the key-value pairs in the map. + /// + /// Computes in **O(1)** time. + pub fn as_mut_slice(&mut self) -> &mut Slice { + Slice::from_mut_slice(self.as_entries_mut()) + } + + /// Converts into a boxed slice of all the key-value pairs in the map. + /// + /// Note that this will drop the inner hash table and any excess capacity. + pub fn into_boxed_slice(self) -> Box> { + Slice::from_boxed(self.into_entries().into_boxed_slice()) + } + /// Get a key-value pair by index /// /// Valid indices are *0 <= index < self.len()* @@ -787,8 +895,30 @@ impl IndexMap { /// Valid indices are *0 <= index < self.len()* /// /// Computes in **O(1)** time. - pub fn get_index_mut(&mut self, index: usize) -> Option<(&mut K, &mut V)> { - self.as_entries_mut().get_mut(index).map(Bucket::muts) + pub fn get_index_mut(&mut self, index: usize) -> Option<(&K, &mut V)> { + self.as_entries_mut().get_mut(index).map(Bucket::ref_mut) + } + + /// Returns a slice of key-value pairs in the given range of indices. + /// + /// Valid indices are *0 <= index < self.len()* + /// + /// Computes in **O(1)** time. + pub fn get_range>(&self, range: R) -> Option<&Slice> { + let entries = self.as_entries(); + let range = try_simplify_range(range, entries.len())?; + entries.get(range).map(Slice::from_slice) + } + + /// Returns a mutable slice of key-value pairs in the given range of indices. + /// + /// Valid indices are *0 <= index < self.len()* + /// + /// Computes in **O(1)** time. + pub fn get_range_mut>(&mut self, range: R) -> Option<&mut Slice> { + let entries = self.as_entries_mut(); + let range = try_simplify_range(range, entries.len())?; + entries.get_mut(range).map(Slice::from_mut_slice) } /// Get the first key-value pair @@ -866,380 +996,6 @@ impl IndexMap { } } -/// An iterator over the keys of a `IndexMap`. -/// -/// This `struct` is created by the [`keys`] method on [`IndexMap`]. See its -/// documentation for more. -/// -/// [`keys`]: struct.IndexMap.html#method.keys -/// [`IndexMap`]: struct.IndexMap.html -pub struct Keys<'a, K, V> { - iter: SliceIter<'a, Bucket>, -} - -impl<'a, K, V> Iterator for Keys<'a, K, V> { - type Item = &'a K; - - iterator_methods!(Bucket::key_ref); -} - -impl DoubleEndedIterator for Keys<'_, K, V> { - double_ended_iterator_methods!(Bucket::key_ref); -} - -impl ExactSizeIterator for Keys<'_, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for Keys<'_, K, V> {} - -// FIXME(#26925) Remove in favor of `#[derive(Clone)]` -impl Clone for Keys<'_, K, V> { - fn clone(&self) -> Self { - Keys { - iter: self.iter.clone(), - } - } -} - -impl fmt::Debug for Keys<'_, K, V> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - -/// An owning iterator over the keys of a `IndexMap`. -/// -/// This `struct` is created by the [`into_keys`] method on [`IndexMap`]. -/// See its documentation for more. -/// -/// [`IndexMap`]: struct.IndexMap.html -/// [`into_keys`]: struct.IndexMap.html#method.into_keys -pub struct IntoKeys { - iter: vec::IntoIter>, -} - -impl Iterator for IntoKeys { - type Item = K; - - iterator_methods!(Bucket::key); -} - -impl DoubleEndedIterator for IntoKeys { - double_ended_iterator_methods!(Bucket::key); -} - -impl ExactSizeIterator for IntoKeys { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for IntoKeys {} - -impl fmt::Debug for IntoKeys { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::key_ref); - f.debug_list().entries(iter).finish() - } -} - -/// An iterator over the values of a `IndexMap`. -/// -/// This `struct` is created by the [`values`] method on [`IndexMap`]. See its -/// documentation for more. -/// -/// [`values`]: struct.IndexMap.html#method.values -/// [`IndexMap`]: struct.IndexMap.html -pub struct Values<'a, K, V> { - iter: SliceIter<'a, Bucket>, -} - -impl<'a, K, V> Iterator for Values<'a, K, V> { - type Item = &'a V; - - iterator_methods!(Bucket::value_ref); -} - -impl DoubleEndedIterator for Values<'_, K, V> { - double_ended_iterator_methods!(Bucket::value_ref); -} - -impl ExactSizeIterator for Values<'_, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for Values<'_, K, V> {} - -// FIXME(#26925) Remove in favor of `#[derive(Clone)]` -impl Clone for Values<'_, K, V> { - fn clone(&self) -> Self { - Values { - iter: self.iter.clone(), - } - } -} - -impl fmt::Debug for Values<'_, K, V> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - -/// A mutable iterator over the values of a `IndexMap`. -/// -/// This `struct` is created by the [`values_mut`] method on [`IndexMap`]. See its -/// documentation for more. -/// -/// [`values_mut`]: struct.IndexMap.html#method.values_mut -/// [`IndexMap`]: struct.IndexMap.html -pub struct ValuesMut<'a, K, V> { - iter: SliceIterMut<'a, Bucket>, -} - -impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { - type Item = &'a mut V; - - iterator_methods!(Bucket::value_mut); -} - -impl DoubleEndedIterator for ValuesMut<'_, K, V> { - double_ended_iterator_methods!(Bucket::value_mut); -} - -impl ExactSizeIterator for ValuesMut<'_, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for ValuesMut<'_, K, V> {} - -impl fmt::Debug for ValuesMut<'_, K, V> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::value_ref); - f.debug_list().entries(iter).finish() - } -} - -/// An owning iterator over the values of a `IndexMap`. -/// -/// This `struct` is created by the [`into_values`] method on [`IndexMap`]. -/// See its documentation for more. -/// -/// [`IndexMap`]: struct.IndexMap.html -/// [`into_values`]: struct.IndexMap.html#method.into_values -pub struct IntoValues { - iter: vec::IntoIter>, -} - -impl Iterator for IntoValues { - type Item = V; - - iterator_methods!(Bucket::value); -} - -impl DoubleEndedIterator for IntoValues { - double_ended_iterator_methods!(Bucket::value); -} - -impl ExactSizeIterator for IntoValues { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for IntoValues {} - -impl fmt::Debug for IntoValues { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::value_ref); - f.debug_list().entries(iter).finish() - } -} - -/// An iterator over the entries of a `IndexMap`. -/// -/// This `struct` is created by the [`iter`] method on [`IndexMap`]. See its -/// documentation for more. -/// -/// [`iter`]: struct.IndexMap.html#method.iter -/// [`IndexMap`]: struct.IndexMap.html -pub struct Iter<'a, K, V> { - iter: SliceIter<'a, Bucket>, -} - -impl<'a, K, V> Iterator for Iter<'a, K, V> { - type Item = (&'a K, &'a V); - - iterator_methods!(Bucket::refs); -} - -impl DoubleEndedIterator for Iter<'_, K, V> { - double_ended_iterator_methods!(Bucket::refs); -} - -impl ExactSizeIterator for Iter<'_, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for Iter<'_, K, V> {} - -// FIXME(#26925) Remove in favor of `#[derive(Clone)]` -impl Clone for Iter<'_, K, V> { - fn clone(&self) -> Self { - Iter { - iter: self.iter.clone(), - } - } -} - -impl fmt::Debug for Iter<'_, K, V> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - -/// A mutable iterator over the entries of a `IndexMap`. -/// -/// This `struct` is created by the [`iter_mut`] method on [`IndexMap`]. See its -/// documentation for more. -/// -/// [`iter_mut`]: struct.IndexMap.html#method.iter_mut -/// [`IndexMap`]: struct.IndexMap.html -pub struct IterMut<'a, K, V> { - iter: SliceIterMut<'a, Bucket>, -} - -impl<'a, K, V> Iterator for IterMut<'a, K, V> { - type Item = (&'a K, &'a mut V); - - iterator_methods!(Bucket::ref_mut); -} - -impl DoubleEndedIterator for IterMut<'_, K, V> { - double_ended_iterator_methods!(Bucket::ref_mut); -} - -impl ExactSizeIterator for IterMut<'_, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for IterMut<'_, K, V> {} - -impl fmt::Debug for IterMut<'_, K, V> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::refs); - f.debug_list().entries(iter).finish() - } -} - -/// An owning iterator over the entries of a `IndexMap`. -/// -/// This `struct` is created by the [`into_iter`] method on [`IndexMap`] -/// (provided by the `IntoIterator` trait). See its documentation for more. -/// -/// [`into_iter`]: struct.IndexMap.html#method.into_iter -/// [`IndexMap`]: struct.IndexMap.html -pub struct IntoIter { - iter: vec::IntoIter>, -} - -impl Iterator for IntoIter { - type Item = (K, V); - - iterator_methods!(Bucket::key_value); -} - -impl DoubleEndedIterator for IntoIter { - double_ended_iterator_methods!(Bucket::key_value); -} - -impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for IntoIter {} - -impl fmt::Debug for IntoIter { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::refs); - f.debug_list().entries(iter).finish() - } -} - -/// A draining iterator over the entries of a `IndexMap`. -/// -/// This `struct` is created by the [`drain`] method on [`IndexMap`]. See its -/// documentation for more. -/// -/// [`drain`]: struct.IndexMap.html#method.drain -/// [`IndexMap`]: struct.IndexMap.html -pub struct Drain<'a, K, V> { - pub(crate) iter: vec::Drain<'a, Bucket>, -} - -impl Iterator for Drain<'_, K, V> { - type Item = (K, V); - - iterator_methods!(Bucket::key_value); -} - -impl DoubleEndedIterator for Drain<'_, K, V> { - double_ended_iterator_methods!(Bucket::key_value); -} - -impl ExactSizeIterator for Drain<'_, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for Drain<'_, K, V> {} - -impl fmt::Debug for Drain<'_, K, V> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::refs); - f.debug_list().entries(iter).finish() - } -} - -impl<'a, K, V, S> IntoIterator for &'a IndexMap { - type Item = (&'a K, &'a V); - type IntoIter = Iter<'a, K, V>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -impl<'a, K, V, S> IntoIterator for &'a mut IndexMap { - type Item = (&'a K, &'a mut V); - type IntoIter = IterMut<'a, K, V>; - fn into_iter(self) -> Self::IntoIter { - self.iter_mut() - } -} - -impl IntoIterator for IndexMap { - type Item = (K, V); - type IntoIter = IntoIter; - fn into_iter(self) -> Self::IntoIter { - IntoIter { - iter: self.into_entries().into_iter(), - } - } -} - /// Access `IndexMap` values corresponding to a key. /// /// # Examples @@ -1421,7 +1177,8 @@ where } } -#[cfg(has_std)] +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl From<[(K, V); N]> for IndexMap where K: Hash + Eq, @@ -1521,427 +1278,3 @@ where S: BuildHasher, { } - -#[cfg(test)] -mod tests { - use super::*; - use std::string::String; - - #[test] - fn it_works() { - let mut map = IndexMap::new(); - assert_eq!(map.is_empty(), true); - map.insert(1, ()); - map.insert(1, ()); - assert_eq!(map.len(), 1); - assert!(map.get(&1).is_some()); - assert_eq!(map.is_empty(), false); - } - - #[test] - fn new() { - let map = IndexMap::::new(); - println!("{:?}", map); - assert_eq!(map.capacity(), 0); - assert_eq!(map.len(), 0); - assert_eq!(map.is_empty(), true); - } - - #[test] - fn insert() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5]; - let not_present = [1, 3, 6, 9, 10]; - let mut map = IndexMap::with_capacity(insert.len()); - - for (i, &elt) in insert.iter().enumerate() { - assert_eq!(map.len(), i); - map.insert(elt, elt); - assert_eq!(map.len(), i + 1); - assert_eq!(map.get(&elt), Some(&elt)); - assert_eq!(map[&elt], elt); - } - println!("{:?}", map); - - for &elt in ¬_present { - assert!(map.get(&elt).is_none()); - } - } - - #[test] - fn insert_full() { - let insert = vec![9, 2, 7, 1, 4, 6, 13]; - let present = vec![1, 6, 2]; - let mut map = IndexMap::with_capacity(insert.len()); - - for (i, &elt) in insert.iter().enumerate() { - assert_eq!(map.len(), i); - let (index, existing) = map.insert_full(elt, elt); - assert_eq!(existing, None); - assert_eq!(Some(index), map.get_full(&elt).map(|x| x.0)); - assert_eq!(map.len(), i + 1); - } - - let len = map.len(); - for &elt in &present { - let (index, existing) = map.insert_full(elt, elt); - assert_eq!(existing, Some(elt)); - assert_eq!(Some(index), map.get_full(&elt).map(|x| x.0)); - assert_eq!(map.len(), len); - } - } - - #[test] - fn insert_2() { - let mut map = IndexMap::with_capacity(16); - - let mut keys = vec![]; - keys.extend(0..16); - keys.extend(if cfg!(miri) { 32..64 } else { 128..267 }); - - for &i in &keys { - let old_map = map.clone(); - map.insert(i, ()); - for key in old_map.keys() { - if map.get(key).is_none() { - println!("old_map: {:?}", old_map); - println!("map: {:?}", map); - panic!("did not find {} in map", key); - } - } - } - - for &i in &keys { - assert!(map.get(&i).is_some(), "did not find {}", i); - } - } - - #[test] - fn insert_order() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; - let mut map = IndexMap::new(); - - for &elt in &insert { - map.insert(elt, ()); - } - - assert_eq!(map.keys().count(), map.len()); - assert_eq!(map.keys().count(), insert.len()); - for (a, b) in insert.iter().zip(map.keys()) { - assert_eq!(a, b); - } - for (i, k) in (0..insert.len()).zip(map.keys()) { - assert_eq!(map.get_index(i).unwrap().0, k); - } - } - - #[test] - fn grow() { - let insert = [0, 4, 2, 12, 8, 7, 11]; - let not_present = [1, 3, 6, 9, 10]; - let mut map = IndexMap::with_capacity(insert.len()); - - for (i, &elt) in insert.iter().enumerate() { - assert_eq!(map.len(), i); - map.insert(elt, elt); - assert_eq!(map.len(), i + 1); - assert_eq!(map.get(&elt), Some(&elt)); - assert_eq!(map[&elt], elt); - } - - println!("{:?}", map); - for &elt in &insert { - map.insert(elt * 10, elt); - } - for &elt in &insert { - map.insert(elt * 100, elt); - } - for (i, &elt) in insert.iter().cycle().enumerate().take(100) { - map.insert(elt * 100 + i as i32, elt); - } - println!("{:?}", map); - for &elt in ¬_present { - assert!(map.get(&elt).is_none()); - } - } - - #[test] - fn reserve() { - let mut map = IndexMap::::new(); - assert_eq!(map.capacity(), 0); - map.reserve(100); - let capacity = map.capacity(); - assert!(capacity >= 100); - for i in 0..capacity { - assert_eq!(map.len(), i); - map.insert(i, i * i); - assert_eq!(map.len(), i + 1); - assert_eq!(map.capacity(), capacity); - assert_eq!(map.get(&i), Some(&(i * i))); - } - map.insert(capacity, std::usize::MAX); - assert_eq!(map.len(), capacity + 1); - assert!(map.capacity() > capacity); - assert_eq!(map.get(&capacity), Some(&std::usize::MAX)); - } - - #[test] - fn shrink_to_fit() { - let mut map = IndexMap::::new(); - assert_eq!(map.capacity(), 0); - for i in 0..100 { - assert_eq!(map.len(), i); - map.insert(i, i * i); - assert_eq!(map.len(), i + 1); - assert!(map.capacity() >= i + 1); - assert_eq!(map.get(&i), Some(&(i * i))); - map.shrink_to_fit(); - assert_eq!(map.len(), i + 1); - assert_eq!(map.capacity(), i + 1); - assert_eq!(map.get(&i), Some(&(i * i))); - } - } - - #[test] - fn remove() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; - let mut map = IndexMap::new(); - - for &elt in &insert { - map.insert(elt, elt); - } - - assert_eq!(map.keys().count(), map.len()); - assert_eq!(map.keys().count(), insert.len()); - for (a, b) in insert.iter().zip(map.keys()) { - assert_eq!(a, b); - } - - let remove_fail = [99, 77]; - let remove = [4, 12, 8, 7]; - - for &key in &remove_fail { - assert!(map.swap_remove_full(&key).is_none()); - } - println!("{:?}", map); - for &key in &remove { - //println!("{:?}", map); - let index = map.get_full(&key).unwrap().0; - assert_eq!(map.swap_remove_full(&key), Some((index, key, key))); - } - println!("{:?}", map); - - for key in &insert { - assert_eq!(map.get(key).is_some(), !remove.contains(key)); - } - assert_eq!(map.len(), insert.len() - remove.len()); - assert_eq!(map.keys().count(), insert.len() - remove.len()); - } - - #[test] - fn remove_to_empty() { - let mut map = indexmap! { 0 => 0, 4 => 4, 5 => 5 }; - map.swap_remove(&5).unwrap(); - map.swap_remove(&4).unwrap(); - map.swap_remove(&0).unwrap(); - assert!(map.is_empty()); - } - - #[test] - fn swap_remove_index() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; - let mut map = IndexMap::new(); - - for &elt in &insert { - map.insert(elt, elt * 2); - } - - let mut vector = insert.to_vec(); - let remove_sequence = &[3, 3, 10, 4, 5, 4, 3, 0, 1]; - - // check that the same swap remove sequence on vec and map - // have the same result. - for &rm in remove_sequence { - let out_vec = vector.swap_remove(rm); - let (out_map, _) = map.swap_remove_index(rm).unwrap(); - assert_eq!(out_vec, out_map); - } - assert_eq!(vector.len(), map.len()); - for (a, b) in vector.iter().zip(map.keys()) { - assert_eq!(a, b); - } - } - - #[test] - fn partial_eq_and_eq() { - let mut map_a = IndexMap::new(); - map_a.insert(1, "1"); - map_a.insert(2, "2"); - let mut map_b = map_a.clone(); - assert_eq!(map_a, map_b); - map_b.swap_remove(&1); - assert_ne!(map_a, map_b); - - let map_c: IndexMap<_, String> = map_b.into_iter().map(|(k, v)| (k, v.into())).collect(); - assert_ne!(map_a, map_c); - assert_ne!(map_c, map_a); - } - - #[test] - fn extend() { - let mut map = IndexMap::new(); - map.extend(vec![(&1, &2), (&3, &4)]); - map.extend(vec![(5, 6)]); - assert_eq!( - map.into_iter().collect::>(), - vec![(1, 2), (3, 4), (5, 6)] - ); - } - - #[test] - fn entry() { - let mut map = IndexMap::new(); - - map.insert(1, "1"); - map.insert(2, "2"); - { - let e = map.entry(3); - assert_eq!(e.index(), 2); - let e = e.or_insert("3"); - assert_eq!(e, &"3"); - } - - let e = map.entry(2); - assert_eq!(e.index(), 1); - assert_eq!(e.key(), &2); - match e { - Entry::Occupied(ref e) => assert_eq!(e.get(), &"2"), - Entry::Vacant(_) => panic!(), - } - assert_eq!(e.or_insert("4"), &"2"); - } - - #[test] - fn entry_and_modify() { - let mut map = IndexMap::new(); - - map.insert(1, "1"); - map.entry(1).and_modify(|x| *x = "2"); - assert_eq!(Some(&"2"), map.get(&1)); - - map.entry(2).and_modify(|x| *x = "doesn't exist"); - assert_eq!(None, map.get(&2)); - } - - #[test] - fn entry_or_default() { - let mut map = IndexMap::new(); - - #[derive(Debug, PartialEq)] - enum TestEnum { - DefaultValue, - NonDefaultValue, - } - - impl Default for TestEnum { - fn default() -> Self { - TestEnum::DefaultValue - } - } - - map.insert(1, TestEnum::NonDefaultValue); - assert_eq!(&mut TestEnum::NonDefaultValue, map.entry(1).or_default()); - - assert_eq!(&mut TestEnum::DefaultValue, map.entry(2).or_default()); - } - - #[test] - fn occupied_entry_key() { - // These keys match hash and equality, but their addresses are distinct. - let (k1, k2) = (&mut 1, &mut 1); - let k1_ptr = k1 as *const i32; - let k2_ptr = k2 as *const i32; - assert_ne!(k1_ptr, k2_ptr); - - let mut map = IndexMap::new(); - map.insert(k1, "value"); - match map.entry(k2) { - Entry::Occupied(ref e) => { - // `OccupiedEntry::key` should reference the key in the map, - // not the key that was used to find the entry. - let ptr = *e.key() as *const i32; - assert_eq!(ptr, k1_ptr); - assert_ne!(ptr, k2_ptr); - } - Entry::Vacant(_) => panic!(), - } - } - - #[test] - fn keys() { - let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; - let map: IndexMap<_, _> = vec.into_iter().collect(); - let keys: Vec<_> = map.keys().copied().collect(); - assert_eq!(keys.len(), 3); - assert!(keys.contains(&1)); - assert!(keys.contains(&2)); - assert!(keys.contains(&3)); - } - - #[test] - fn into_keys() { - let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; - let map: IndexMap<_, _> = vec.into_iter().collect(); - let keys: Vec = map.into_keys().collect(); - assert_eq!(keys.len(), 3); - assert!(keys.contains(&1)); - assert!(keys.contains(&2)); - assert!(keys.contains(&3)); - } - - #[test] - fn values() { - let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; - let map: IndexMap<_, _> = vec.into_iter().collect(); - let values: Vec<_> = map.values().copied().collect(); - assert_eq!(values.len(), 3); - assert!(values.contains(&'a')); - assert!(values.contains(&'b')); - assert!(values.contains(&'c')); - } - - #[test] - fn values_mut() { - let vec = vec![(1, 1), (2, 2), (3, 3)]; - let mut map: IndexMap<_, _> = vec.into_iter().collect(); - for value in map.values_mut() { - *value *= 2 - } - let values: Vec<_> = map.values().copied().collect(); - assert_eq!(values.len(), 3); - assert!(values.contains(&2)); - assert!(values.contains(&4)); - assert!(values.contains(&6)); - } - - #[test] - fn into_values() { - let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; - let map: IndexMap<_, _> = vec.into_iter().collect(); - let values: Vec = map.into_values().collect(); - assert_eq!(values.len(), 3); - assert!(values.contains(&'a')); - assert!(values.contains(&'b')); - assert!(values.contains(&'c')); - } - - #[test] - #[cfg(has_std)] - fn from_array() { - let map = IndexMap::from([(1, 2), (3, 4)]); - let mut expected = IndexMap::new(); - expected.insert(1, 2); - expected.insert(3, 4); - - assert_eq!(map, expected) - } -} diff --git a/vendor/indexmap/src/map/core.rs b/vendor/indexmap/src/map/core.rs index ea7aaae..4a78035 100644 --- a/vendor/indexmap/src/map/core.rs +++ b/vendor/indexmap/src/map/core.rs @@ -12,14 +12,13 @@ mod raw; use hashbrown::raw::RawTable; use crate::vec::{Drain, Vec}; -use core::cmp; +use crate::TryReserveError; use core::fmt; -use core::mem::replace; +use core::mem; use core::ops::RangeBounds; -use crate::equivalent::Equivalent; use crate::util::simplify_range; -use crate::{Bucket, Entries, HashValue}; +use crate::{Bucket, Entries, Equivalent, HashValue}; /// Core of the map that does not depend on S pub(crate) struct IndexMapCore { @@ -62,18 +61,18 @@ where V: Clone, { fn clone(&self) -> Self { - let indices = self.indices.clone(); - let mut entries = Vec::with_capacity(indices.capacity()); - entries.clone_from(&self.entries); - IndexMapCore { indices, entries } + let mut new = Self::new(); + new.clone_from(self); + new } fn clone_from(&mut self, other: &Self) { let hasher = get_hash(&other.entries); self.indices.clone_from_with_hasher(&other.indices, hasher); if self.entries.capacity() < other.entries.len() { - // If we must resize, match the indices capacity - self.reserve_entries(); + // If we must resize, match the indices capacity. + let additional = other.entries.len() - self.entries.len(); + self.reserve_entries(additional); } self.entries.clone_from(&other.entries); } @@ -120,6 +119,9 @@ impl Entries for IndexMapCore { } impl IndexMapCore { + /// The maximum capacity before the `entries` allocation would exceed `isize::MAX`. + const MAX_ENTRIES_CAPACITY: usize = (isize::MAX as usize) / mem::size_of::>(); + #[inline] pub(crate) const fn new() -> Self { IndexMapCore { @@ -143,7 +145,7 @@ impl IndexMapCore { #[inline] pub(crate) fn capacity(&self) -> usize { - cmp::min(self.indices.capacity(), self.entries.capacity()) + Ord::min(self.indices.capacity(), self.entries.capacity()) } pub(crate) fn clear(&mut self) { @@ -193,15 +195,67 @@ impl IndexMapCore { /// Reserve capacity for `additional` more key-value pairs. pub(crate) fn reserve(&mut self, additional: usize) { self.indices.reserve(additional, get_hash(&self.entries)); - self.reserve_entries(); + // Only grow entries if necessary, since we also round up capacity. + if additional > self.entries.capacity() - self.entries.len() { + self.reserve_entries(additional); + } + } + + /// Reserve entries capacity, rounded up to match the indices + fn reserve_entries(&mut self, additional: usize) { + // Use a soft-limit on the maximum capacity, but if the caller explicitly + // requested more, do it and let them have the resulting panic. + let new_capacity = Ord::min(self.indices.capacity(), Self::MAX_ENTRIES_CAPACITY); + let try_add = new_capacity - self.entries.len(); + if try_add > additional && self.entries.try_reserve_exact(try_add).is_ok() { + return; + } + self.entries.reserve_exact(additional); } - /// Reserve entries capacity to match the indices - fn reserve_entries(&mut self) { - let additional = self.indices.capacity() - self.entries.len(); + /// Reserve capacity for `additional` more key-value pairs, without over-allocating. + pub(crate) fn reserve_exact(&mut self, additional: usize) { + self.indices.reserve(additional, get_hash(&self.entries)); self.entries.reserve_exact(additional); } + /// Try to reserve capacity for `additional` more key-value pairs. + pub(crate) fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.indices + .try_reserve(additional, get_hash(&self.entries)) + .map_err(TryReserveError::from_hashbrown)?; + // Only grow entries if necessary, since we also round up capacity. + if additional > self.entries.capacity() - self.entries.len() { + self.try_reserve_entries(additional) + } else { + Ok(()) + } + } + + /// Try to reserve entries capacity, rounded up to match the indices + fn try_reserve_entries(&mut self, additional: usize) -> Result<(), TryReserveError> { + // Use a soft-limit on the maximum capacity, but if the caller explicitly + // requested more, do it and let them have the resulting error. + let new_capacity = Ord::min(self.indices.capacity(), Self::MAX_ENTRIES_CAPACITY); + let try_add = new_capacity - self.entries.len(); + if try_add > additional && self.entries.try_reserve_exact(try_add).is_ok() { + return Ok(()); + } + self.entries + .try_reserve_exact(additional) + .map_err(TryReserveError::from_alloc) + } + + /// Try to reserve capacity for `additional` more key-value pairs, without over-allocating. + pub(crate) fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.indices + .try_reserve(additional, get_hash(&self.entries)) + .map_err(TryReserveError::from_hashbrown)?; + self.entries + .try_reserve_exact(additional) + .map_err(TryReserveError::from_alloc) + } + /// Shrink the capacity of the map with a lower bound pub(crate) fn shrink_to(&mut self, min_capacity: usize) { self.indices @@ -220,18 +274,14 @@ impl IndexMapCore { } } - /// Append a key-value pair, *without* checking whether it already exists, - /// and return the pair's new index. - fn push(&mut self, hash: HashValue, key: K, value: V) -> usize { - let i = self.entries.len(); - self.indices.insert(hash.get(), i, get_hash(&self.entries)); - if i == self.entries.capacity() { + /// Append a key-value pair to `entries`, *without* checking whether it already exists. + fn push_entry(&mut self, hash: HashValue, key: K, value: V) { + if self.entries.len() == self.entries.capacity() { // Reserve our own capacity synced to the indices, // rather than letting `Vec::push` just double it. - self.reserve_entries(); + self.reserve_entries(1); } self.entries.push(Bucket { hash, key, value }); - i } /// Return the index in `entries` where an equivalent key can be found @@ -247,9 +297,13 @@ impl IndexMapCore { where K: Eq, { - match self.get_index_of(hash, &key) { - Some(i) => (i, Some(replace(&mut self.entries[i].value, value))), - None => (self.push(hash, key, value), None), + match self.find_or_insert(hash, &key) { + Ok(i) => (i, Some(mem::replace(&mut self.entries[i].value, value))), + Err(i) => { + debug_assert_eq!(i, self.entries.len()); + self.push_entry(hash, key, value); + (i, None) + } } } @@ -339,7 +393,7 @@ impl IndexMapCore { pub(super) fn move_index(&mut self, from: usize, to: usize) { let from_hash = self.entries[from].hash; if from != to { - // Use a sentinal index so other indices don't collide. + // Use a sentinel index so other indices don't collide. update_index(&mut self.indices, from_hash, from, usize::MAX); // Update all other indices and rotate the entry positions. @@ -351,11 +405,31 @@ impl IndexMapCore { self.entries[to..=from].rotate_right(1); } - // Change the sentinal index to its final position. + // Change the sentinel index to its final position. update_index(&mut self.indices, from_hash, usize::MAX, to); } } + pub(crate) fn swap_indices(&mut self, a: usize, b: usize) { + // If they're equal and in-bounds, there's nothing to do. + if a == b && a < self.entries.len() { + return; + } + + // We'll get a "nice" bounds-check from indexing `self.entries`, + // and then we expect to find it in the table as well. + let [ref_a, ref_b] = self + .indices + .get_many_mut( + [self.entries[a].hash.get(), self.entries[b].hash.get()], + move |i, &x| if i == 0 { x == a } else { x == b }, + ) + .expect("indices not found"); + + mem::swap(ref_a, ref_b); + self.entries.swap(a, b); + } + /// Remove an entry by swapping it with the last pub(crate) fn swap_remove_full(&mut self, hash: HashValue, key: &Q) -> Option<(usize, K, V)> where @@ -447,25 +521,9 @@ impl IndexMapCore { where F: FnMut(&mut K, &mut V) -> bool, { - // FIXME: This could use Vec::retain_mut with MSRV 1.61. - // Like Vec::retain in self.entries, but with mutable K and V. - // We swap-shift all the items we want to keep, truncate the rest, - // then rebuild the raw hash table with the new indexes. - let len = self.entries.len(); - let mut n_deleted = 0; - for i in 0..len { - let will_keep = { - let entry = &mut self.entries[i]; - keep(&mut entry.key, &mut entry.value) - }; - if !will_keep { - n_deleted += 1; - } else if n_deleted > 0 { - self.entries.swap(i - n_deleted, i); - } - } - if n_deleted > 0 { - self.entries.truncate(len - n_deleted); + self.entries + .retain_mut(|entry| keep(&mut entry.key, &mut entry.value)); + if self.entries.len() < self.indices.len() { self.rebuild_hash_table(); } } @@ -601,7 +659,7 @@ pub use self::raw::OccupiedEntry; impl OccupiedEntry<'_, K, V> { /// Sets the value of the entry to `value`, and returns the entry's old value. pub fn insert(&mut self, value: V) -> V { - replace(self.get_mut(), value) + mem::replace(self.get_mut(), value) } /// Remove the key, value pair stored in the map for this entry, and return the value. @@ -673,14 +731,18 @@ impl<'a, K, V> VacantEntry<'a, K, V> { /// Return the index where the key-value pair will be inserted. pub fn index(&self) -> usize { - self.map.len() + self.map.indices.len() } /// Inserts the entry's key and the given value into the map, and returns a mutable reference /// to the value. pub fn insert(self, value: V) -> &'a mut V { - let i = self.map.push(self.hash, self.key, value); - &mut self.map.entries[i].value + let i = self.index(); + let Self { map, hash, key } = self; + map.indices.insert(hash.get(), i, get_hash(&map.entries)); + debug_assert_eq!(i, map.entries.len()); + map.push_entry(hash, key, value); + &mut map.entries[i].value } } diff --git a/vendor/indexmap/src/map/core/raw.rs b/vendor/indexmap/src/map/core/raw.rs index bf1672d..be71c9c 100644 --- a/vendor/indexmap/src/map/core/raw.rs +++ b/vendor/indexmap/src/map/core/raw.rs @@ -2,7 +2,7 @@ //! This module encapsulates the `unsafe` access to `hashbrown::raw::RawTable`, //! mostly in dealing with its bucket "pointers". -use super::{equivalent, Bucket, Entry, HashValue, IndexMapCore, VacantEntry}; +use super::{equivalent, get_hash, Bucket, Entry, HashValue, IndexMapCore, VacantEntry}; use core::fmt; use core::mem::replace; use hashbrown::raw::RawTable; @@ -26,7 +26,7 @@ pub(super) struct DebugIndices<'a>(pub &'a RawTable); impl fmt::Debug for DebugIndices<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // SAFETY: we're not letting any of the buckets escape this function - let indices = unsafe { self.0.iter().map(|raw_bucket| raw_bucket.read()) }; + let indices = unsafe { self.0.iter().map(|raw_bucket| *raw_bucket.as_ref()) }; f.debug_list().entries(indices).finish() } } @@ -38,16 +38,42 @@ impl IndexMapCore { unsafe { let offset = end - start; for bucket in self.indices.iter() { - let i = bucket.read(); - if i >= end { - bucket.write(i - offset); - } else if i >= start { + let i = bucket.as_mut(); + if *i >= end { + *i -= offset; + } else if *i >= start { self.indices.erase(bucket); } } } } + /// Search for a key in the table and return `Ok(entry_index)` if found. + /// Otherwise, insert the key and return `Err(new_index)`. + /// + /// Note that hashbrown may resize the table to reserve space for insertion, + /// even before checking if it's already present, so this is somewhat biased + /// towards new items. + pub(crate) fn find_or_insert(&mut self, hash: HashValue, key: &K) -> Result + where + K: Eq, + { + let hash = hash.get(); + let eq = equivalent(key, &self.entries); + let hasher = get_hash(&self.entries); + // SAFETY: We're not mutating between find and read/insert. + unsafe { + match self.indices.find_or_find_insert_slot(hash, eq, hasher) { + Ok(raw_bucket) => Ok(*raw_bucket.as_ref()), + Err(slot) => { + let index = self.indices.len(); + self.indices.insert_in_slot(hash, slot, index); + Err(index) + } + } + } + } + pub(crate) fn entry(&mut self, hash: HashValue, key: K) -> Entry<'_, K, V> where K: Eq, @@ -74,29 +100,6 @@ impl IndexMapCore { // only the item references that are appropriately bound to `&mut self`. unsafe { self.indices.iter().map(|bucket| bucket.as_mut()) } } - - /// Return the raw bucket for the given index - fn find_index(&self, index: usize) -> RawBucket { - // We'll get a "nice" bounds-check from indexing `self.entries`, - // and then we expect to find it in the table as well. - let hash = self.entries[index].hash.get(); - self.indices - .find(hash, move |&i| i == index) - .expect("index not found") - } - - pub(crate) fn swap_indices(&mut self, a: usize, b: usize) { - // SAFETY: Can't take two `get_mut` references from one table, so we - // must use raw buckets to do the swap. This is still safe because we - // are locally sure they won't dangle, and we write them individually. - unsafe { - let raw_bucket_a = self.find_index(a); - let raw_bucket_b = self.find_index(b); - raw_bucket_a.write(b); - raw_bucket_b.write(a); - } - self.entries.swap(a, b); - } } /// A view into an occupied entry in a `IndexMap`. @@ -151,7 +154,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { #[inline] pub fn index(&self) -> usize { // SAFETY: we have &mut map keep keeping the bucket stable - unsafe { self.raw_bucket.read() } + unsafe { *self.raw_bucket.as_ref() } } /// Converts into a mutable reference to the entry's value in the map, @@ -171,7 +174,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { pub fn swap_remove_entry(self) -> (K, V) { // SAFETY: This is safe because it can only happen once (self is consumed) // and map.indices have not been modified since entry construction - let index = unsafe { self.map.indices.remove(self.raw_bucket) }; + let (index, _slot) = unsafe { self.map.indices.remove(self.raw_bucket) }; self.map.swap_remove_finish(index) } @@ -185,7 +188,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { pub fn shift_remove_entry(self) -> (K, V) { // SAFETY: This is safe because it can only happen once (self is consumed) // and map.indices have not been modified since entry construction - let index = unsafe { self.map.indices.remove(self.raw_bucket) }; + let (index, _slot) = unsafe { self.map.indices.remove(self.raw_bucket) }; self.map.shift_remove_finish(index) } } diff --git a/vendor/indexmap/src/map/iter.rs b/vendor/indexmap/src/map/iter.rs new file mode 100644 index 0000000..db6e140 --- /dev/null +++ b/vendor/indexmap/src/map/iter.rs @@ -0,0 +1,541 @@ +use super::{Bucket, Entries, IndexMap, Slice}; + +use alloc::vec::{self, Vec}; +use core::fmt; +use core::iter::FusedIterator; +use core::slice; + +impl<'a, K, V, S> IntoIterator for &'a IndexMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut IndexMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl IntoIterator for IndexMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.into_entries()) + } +} + +/// An iterator over the entries of a `IndexMap`. +/// +/// This `struct` is created by the [`iter`] method on [`IndexMap`]. See its +/// documentation for more. +/// +/// [`iter`]: struct.IndexMap.html#method.iter +/// [`IndexMap`]: struct.IndexMap.html +pub struct Iter<'a, K, V> { + iter: slice::Iter<'a, Bucket>, +} + +impl<'a, K, V> Iter<'a, K, V> { + pub(super) fn new(entries: &'a [Bucket]) -> Self { + Self { + iter: entries.iter(), + } + } + + /// Returns a slice of the remaining entries in the iterator. + pub fn as_slice(&self) -> &'a Slice { + Slice::from_slice(self.iter.as_slice()) + } +} + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + iterator_methods!(Bucket::refs); +} + +impl DoubleEndedIterator for Iter<'_, K, V> { + double_ended_iterator_methods!(Bucket::refs); +} + +impl ExactSizeIterator for Iter<'_, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for Iter<'_, K, V> {} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +impl Clone for Iter<'_, K, V> { + fn clone(&self) -> Self { + Iter { + iter: self.iter.clone(), + } + } +} + +impl fmt::Debug for Iter<'_, K, V> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl Default for Iter<'_, K, V> { + fn default() -> Self { + Self { iter: [].iter() } + } +} + +/// A mutable iterator over the entries of a `IndexMap`. +/// +/// This `struct` is created by the [`iter_mut`] method on [`IndexMap`]. See its +/// documentation for more. +/// +/// [`iter_mut`]: struct.IndexMap.html#method.iter_mut +/// [`IndexMap`]: struct.IndexMap.html +pub struct IterMut<'a, K, V> { + iter: slice::IterMut<'a, Bucket>, +} + +impl<'a, K, V> IterMut<'a, K, V> { + pub(super) fn new(entries: &'a mut [Bucket]) -> Self { + Self { + iter: entries.iter_mut(), + } + } + + /// Returns a slice of the remaining entries in the iterator. + pub fn as_slice(&self) -> &Slice { + Slice::from_slice(self.iter.as_slice()) + } + + /// Returns a mutable slice of the remaining entries in the iterator. + /// + /// To avoid creating `&mut` references that alias, this is forced to consume the iterator. + pub fn into_slice(self) -> &'a mut Slice { + Slice::from_mut_slice(self.iter.into_slice()) + } +} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + iterator_methods!(Bucket::ref_mut); +} + +impl DoubleEndedIterator for IterMut<'_, K, V> { + double_ended_iterator_methods!(Bucket::ref_mut); +} + +impl ExactSizeIterator for IterMut<'_, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for IterMut<'_, K, V> {} + +impl fmt::Debug for IterMut<'_, K, V> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::refs); + f.debug_list().entries(iter).finish() + } +} + +impl Default for IterMut<'_, K, V> { + fn default() -> Self { + Self { + iter: [].iter_mut(), + } + } +} + +/// An owning iterator over the entries of a `IndexMap`. +/// +/// This `struct` is created by the [`into_iter`] method on [`IndexMap`] +/// (provided by the `IntoIterator` trait). See its documentation for more. +/// +/// [`into_iter`]: struct.IndexMap.html#method.into_iter +/// [`IndexMap`]: struct.IndexMap.html +pub struct IntoIter { + iter: vec::IntoIter>, +} + +impl IntoIter { + pub(super) fn new(entries: Vec>) -> Self { + Self { + iter: entries.into_iter(), + } + } + + /// Returns a slice of the remaining entries in the iterator. + pub fn as_slice(&self) -> &Slice { + Slice::from_slice(self.iter.as_slice()) + } + + /// Returns a mutable slice of the remaining entries in the iterator. + pub fn as_mut_slice(&mut self) -> &mut Slice { + Slice::from_mut_slice(self.iter.as_mut_slice()) + } +} + +impl Iterator for IntoIter { + type Item = (K, V); + + iterator_methods!(Bucket::key_value); +} + +impl DoubleEndedIterator for IntoIter { + double_ended_iterator_methods!(Bucket::key_value); +} + +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for IntoIter {} + +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::refs); + f.debug_list().entries(iter).finish() + } +} + +impl Default for IntoIter { + fn default() -> Self { + Self { + iter: Vec::new().into_iter(), + } + } +} + +/// A draining iterator over the entries of a `IndexMap`. +/// +/// This `struct` is created by the [`drain`] method on [`IndexMap`]. See its +/// documentation for more. +/// +/// [`drain`]: struct.IndexMap.html#method.drain +/// [`IndexMap`]: struct.IndexMap.html +pub struct Drain<'a, K, V> { + iter: vec::Drain<'a, Bucket>, +} + +impl<'a, K, V> Drain<'a, K, V> { + pub(super) fn new(iter: vec::Drain<'a, Bucket>) -> Self { + Self { iter } + } + + /// Returns a slice of the remaining entries in the iterator. + pub fn as_slice(&self) -> &Slice { + Slice::from_slice(self.iter.as_slice()) + } +} + +impl Iterator for Drain<'_, K, V> { + type Item = (K, V); + + iterator_methods!(Bucket::key_value); +} + +impl DoubleEndedIterator for Drain<'_, K, V> { + double_ended_iterator_methods!(Bucket::key_value); +} + +impl ExactSizeIterator for Drain<'_, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for Drain<'_, K, V> {} + +impl fmt::Debug for Drain<'_, K, V> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::refs); + f.debug_list().entries(iter).finish() + } +} + +/// An iterator over the keys of a `IndexMap`. +/// +/// This `struct` is created by the [`keys`] method on [`IndexMap`]. See its +/// documentation for more. +/// +/// [`keys`]: struct.IndexMap.html#method.keys +/// [`IndexMap`]: struct.IndexMap.html +pub struct Keys<'a, K, V> { + iter: slice::Iter<'a, Bucket>, +} + +impl<'a, K, V> Keys<'a, K, V> { + pub(super) fn new(entries: &'a [Bucket]) -> Self { + Self { + iter: entries.iter(), + } + } +} + +impl<'a, K, V> Iterator for Keys<'a, K, V> { + type Item = &'a K; + + iterator_methods!(Bucket::key_ref); +} + +impl DoubleEndedIterator for Keys<'_, K, V> { + double_ended_iterator_methods!(Bucket::key_ref); +} + +impl ExactSizeIterator for Keys<'_, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for Keys<'_, K, V> {} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +impl Clone for Keys<'_, K, V> { + fn clone(&self) -> Self { + Keys { + iter: self.iter.clone(), + } + } +} + +impl fmt::Debug for Keys<'_, K, V> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl Default for Keys<'_, K, V> { + fn default() -> Self { + Self { iter: [].iter() } + } +} + +/// An owning iterator over the keys of a `IndexMap`. +/// +/// This `struct` is created by the [`into_keys`] method on [`IndexMap`]. +/// See its documentation for more. +/// +/// [`IndexMap`]: struct.IndexMap.html +/// [`into_keys`]: struct.IndexMap.html#method.into_keys +pub struct IntoKeys { + iter: vec::IntoIter>, +} + +impl IntoKeys { + pub(super) fn new(entries: Vec>) -> Self { + Self { + iter: entries.into_iter(), + } + } +} + +impl Iterator for IntoKeys { + type Item = K; + + iterator_methods!(Bucket::key); +} + +impl DoubleEndedIterator for IntoKeys { + double_ended_iterator_methods!(Bucket::key); +} + +impl ExactSizeIterator for IntoKeys { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for IntoKeys {} + +impl fmt::Debug for IntoKeys { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::key_ref); + f.debug_list().entries(iter).finish() + } +} + +impl Default for IntoKeys { + fn default() -> Self { + Self { + iter: Vec::new().into_iter(), + } + } +} + +/// An iterator over the values of a `IndexMap`. +/// +/// This `struct` is created by the [`values`] method on [`IndexMap`]. See its +/// documentation for more. +/// +/// [`values`]: struct.IndexMap.html#method.values +/// [`IndexMap`]: struct.IndexMap.html +pub struct Values<'a, K, V> { + iter: slice::Iter<'a, Bucket>, +} + +impl<'a, K, V> Values<'a, K, V> { + pub(super) fn new(entries: &'a [Bucket]) -> Self { + Self { + iter: entries.iter(), + } + } +} + +impl<'a, K, V> Iterator for Values<'a, K, V> { + type Item = &'a V; + + iterator_methods!(Bucket::value_ref); +} + +impl DoubleEndedIterator for Values<'_, K, V> { + double_ended_iterator_methods!(Bucket::value_ref); +} + +impl ExactSizeIterator for Values<'_, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for Values<'_, K, V> {} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +impl Clone for Values<'_, K, V> { + fn clone(&self) -> Self { + Values { + iter: self.iter.clone(), + } + } +} + +impl fmt::Debug for Values<'_, K, V> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl Default for Values<'_, K, V> { + fn default() -> Self { + Self { iter: [].iter() } + } +} + +/// A mutable iterator over the values of a `IndexMap`. +/// +/// This `struct` is created by the [`values_mut`] method on [`IndexMap`]. See its +/// documentation for more. +/// +/// [`values_mut`]: struct.IndexMap.html#method.values_mut +/// [`IndexMap`]: struct.IndexMap.html +pub struct ValuesMut<'a, K, V> { + iter: slice::IterMut<'a, Bucket>, +} + +impl<'a, K, V> ValuesMut<'a, K, V> { + pub(super) fn new(entries: &'a mut [Bucket]) -> Self { + Self { + iter: entries.iter_mut(), + } + } +} + +impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { + type Item = &'a mut V; + + iterator_methods!(Bucket::value_mut); +} + +impl DoubleEndedIterator for ValuesMut<'_, K, V> { + double_ended_iterator_methods!(Bucket::value_mut); +} + +impl ExactSizeIterator for ValuesMut<'_, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for ValuesMut<'_, K, V> {} + +impl fmt::Debug for ValuesMut<'_, K, V> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::value_ref); + f.debug_list().entries(iter).finish() + } +} + +impl Default for ValuesMut<'_, K, V> { + fn default() -> Self { + Self { + iter: [].iter_mut(), + } + } +} + +/// An owning iterator over the values of a `IndexMap`. +/// +/// This `struct` is created by the [`into_values`] method on [`IndexMap`]. +/// See its documentation for more. +/// +/// [`IndexMap`]: struct.IndexMap.html +/// [`into_values`]: struct.IndexMap.html#method.into_values +pub struct IntoValues { + iter: vec::IntoIter>, +} + +impl IntoValues { + pub(super) fn new(entries: Vec>) -> Self { + Self { + iter: entries.into_iter(), + } + } +} + +impl Iterator for IntoValues { + type Item = V; + + iterator_methods!(Bucket::value); +} + +impl DoubleEndedIterator for IntoValues { + double_ended_iterator_methods!(Bucket::value); +} + +impl ExactSizeIterator for IntoValues { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for IntoValues {} + +impl fmt::Debug for IntoValues { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::value_ref); + f.debug_list().entries(iter).finish() + } +} + +impl Default for IntoValues { + fn default() -> Self { + Self { + iter: Vec::new().into_iter(), + } + } +} diff --git a/vendor/indexmap/src/serde_seq.rs b/vendor/indexmap/src/map/serde_seq.rs similarity index 73% rename from vendor/indexmap/src/serde_seq.rs rename to vendor/indexmap/src/map/serde_seq.rs index d326a02..f10aa57 100644 --- a/vendor/indexmap/src/serde_seq.rs +++ b/vendor/indexmap/src/map/serde_seq.rs @@ -12,13 +12,11 @@ //! # use serde_derive::{Deserialize, Serialize}; //! #[derive(Deserialize, Serialize)] //! struct Data { -//! #[serde(with = "indexmap::serde_seq")] +//! #[serde(with = "indexmap::map::serde_seq")] //! map: IndexMap, //! // ... //! } //! ``` -//! -//! Requires crate feature `"serde"` or `"serde-1"` use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor}; use serde::ser::{Serialize, Serializer}; @@ -27,8 +25,40 @@ use core::fmt::{self, Formatter}; use core::hash::{BuildHasher, Hash}; use core::marker::PhantomData; +use crate::map::Slice as MapSlice; +use crate::set::Slice as SetSlice; use crate::IndexMap; +/// Serializes a `map::Slice` as an ordered sequence. +/// +/// This behaves like [`crate::map::serde_seq`] for `IndexMap`, serializing a sequence +/// of `(key, value)` pairs, rather than as a map that might not preserve order. +impl Serialize for MapSlice +where + K: Serialize, + V: Serialize, +{ + fn serialize(&self, serializer: T) -> Result + where + T: Serializer, + { + serializer.collect_seq(self) + } +} + +/// Serializes a `set::Slice` as an ordered sequence. +impl Serialize for SetSlice +where + T: Serialize, +{ + fn serialize(&self, serializer: Se) -> Result + where + Se: Serializer, + { + serializer.collect_seq(self) + } +} + /// Serializes an `IndexMap` as an ordered sequence. /// /// This function may be used in a field attribute for deriving `Serialize`: @@ -38,13 +68,11 @@ use crate::IndexMap; /// # use serde_derive::Serialize; /// #[derive(Serialize)] /// struct Data { -/// #[serde(serialize_with = "indexmap::serde_seq::serialize")] +/// #[serde(serialize_with = "indexmap::map::serde_seq::serialize")] /// map: IndexMap, /// // ... /// } /// ``` -/// -/// Requires crate feature `"serde"` or `"serde-1"` pub fn serialize(map: &IndexMap, serializer: T) -> Result where K: Serialize + Hash + Eq, @@ -94,13 +122,11 @@ where /// # use serde_derive::Deserialize; /// #[derive(Deserialize)] /// struct Data { -/// #[serde(deserialize_with = "indexmap::serde_seq::deserialize")] +/// #[serde(deserialize_with = "indexmap::map::serde_seq::deserialize")] /// map: IndexMap, /// // ... /// } /// ``` -/// -/// Requires crate feature `"serde"` or `"serde-1"` pub fn deserialize<'de, D, K, V, S>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, diff --git a/vendor/indexmap/src/map/slice.rs b/vendor/indexmap/src/map/slice.rs new file mode 100644 index 0000000..ebab208 --- /dev/null +++ b/vendor/indexmap/src/map/slice.rs @@ -0,0 +1,540 @@ +use super::{ + Bucket, Entries, IndexMap, IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, + ValuesMut, +}; +use crate::util::try_simplify_range; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::ops::{self, Bound, Index, IndexMut, RangeBounds}; + +/// A dynamically-sized slice of key-value pairs in an `IndexMap`. +/// +/// This supports indexed operations much like a `[(K, V)]` slice, +/// but not any hashed operations on the map keys. +/// +/// Unlike `IndexMap`, `Slice` does consider the order for `PartialEq` +/// and `Eq`, and it also implements `PartialOrd`, `Ord`, and `Hash`. +#[repr(transparent)] +pub struct Slice { + pub(crate) entries: [Bucket], +} + +// SAFETY: `Slice` is a transparent wrapper around `[Bucket]`, +// and reference lifetimes are bound together in function signatures. +#[allow(unsafe_code)] +impl Slice { + pub(super) const fn from_slice(entries: &[Bucket]) -> &Self { + unsafe { &*(entries as *const [Bucket] as *const Self) } + } + + pub(super) fn from_mut_slice(entries: &mut [Bucket]) -> &mut Self { + unsafe { &mut *(entries as *mut [Bucket] as *mut Self) } + } + + pub(super) fn from_boxed(entries: Box<[Bucket]>) -> Box { + unsafe { Box::from_raw(Box::into_raw(entries) as *mut Self) } + } + + fn into_boxed(self: Box) -> Box<[Bucket]> { + unsafe { Box::from_raw(Box::into_raw(self) as *mut [Bucket]) } + } +} + +impl Slice { + pub(crate) fn into_entries(self: Box) -> Vec> { + self.into_boxed().into_vec() + } + + /// Returns an empty slice. + pub const fn new<'a>() -> &'a Self { + Self::from_slice(&[]) + } + + /// Returns an empty mutable slice. + pub fn new_mut<'a>() -> &'a mut Self { + Self::from_mut_slice(&mut []) + } + + /// Return the number of key-value pairs in the map slice. + #[inline] + pub const fn len(&self) -> usize { + self.entries.len() + } + + /// Returns true if the map slice contains no elements. + #[inline] + pub const fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Get a key-value pair by index. + /// + /// Valid indices are *0 <= index < self.len()* + pub fn get_index(&self, index: usize) -> Option<(&K, &V)> { + self.entries.get(index).map(Bucket::refs) + } + + /// Get a key-value pair by index, with mutable access to the value. + /// + /// Valid indices are *0 <= index < self.len()* + pub fn get_index_mut(&mut self, index: usize) -> Option<(&K, &mut V)> { + self.entries.get_mut(index).map(Bucket::ref_mut) + } + + /// Returns a slice of key-value pairs in the given range of indices. + /// + /// Valid indices are *0 <= index < self.len()* + pub fn get_range>(&self, range: R) -> Option<&Self> { + let range = try_simplify_range(range, self.entries.len())?; + self.entries.get(range).map(Slice::from_slice) + } + + /// Returns a mutable slice of key-value pairs in the given range of indices. + /// + /// Valid indices are *0 <= index < self.len()* + pub fn get_range_mut>(&mut self, range: R) -> Option<&mut Self> { + let range = try_simplify_range(range, self.entries.len())?; + self.entries.get_mut(range).map(Slice::from_mut_slice) + } + + /// Get the first key-value pair. + pub fn first(&self) -> Option<(&K, &V)> { + self.entries.first().map(Bucket::refs) + } + + /// Get the first key-value pair, with mutable access to the value. + pub fn first_mut(&mut self) -> Option<(&K, &mut V)> { + self.entries.first_mut().map(Bucket::ref_mut) + } + + /// Get the last key-value pair. + pub fn last(&self) -> Option<(&K, &V)> { + self.entries.last().map(Bucket::refs) + } + + /// Get the last key-value pair, with mutable access to the value. + pub fn last_mut(&mut self) -> Option<(&K, &mut V)> { + self.entries.last_mut().map(Bucket::ref_mut) + } + + /// Divides one slice into two at an index. + /// + /// ***Panics*** if `index > len`. + pub fn split_at(&self, index: usize) -> (&Self, &Self) { + let (first, second) = self.entries.split_at(index); + (Self::from_slice(first), Self::from_slice(second)) + } + + /// Divides one mutable slice into two at an index. + /// + /// ***Panics*** if `index > len`. + pub fn split_at_mut(&mut self, index: usize) -> (&mut Self, &mut Self) { + let (first, second) = self.entries.split_at_mut(index); + (Self::from_mut_slice(first), Self::from_mut_slice(second)) + } + + /// Returns the first key-value pair and the rest of the slice, + /// or `None` if it is empty. + pub fn split_first(&self) -> Option<((&K, &V), &Self)> { + if let [first, rest @ ..] = &self.entries { + Some((first.refs(), Self::from_slice(rest))) + } else { + None + } + } + + /// Returns the first key-value pair and the rest of the slice, + /// with mutable access to the value, or `None` if it is empty. + pub fn split_first_mut(&mut self) -> Option<((&K, &mut V), &mut Self)> { + if let [first, rest @ ..] = &mut self.entries { + Some((first.ref_mut(), Self::from_mut_slice(rest))) + } else { + None + } + } + + /// Returns the last key-value pair and the rest of the slice, + /// or `None` if it is empty. + pub fn split_last(&self) -> Option<((&K, &V), &Self)> { + if let [rest @ .., last] = &self.entries { + Some((last.refs(), Self::from_slice(rest))) + } else { + None + } + } + + /// Returns the last key-value pair and the rest of the slice, + /// with mutable access to the value, or `None` if it is empty. + pub fn split_last_mut(&mut self) -> Option<((&K, &mut V), &mut Self)> { + if let [rest @ .., last] = &mut self.entries { + Some((last.ref_mut(), Self::from_mut_slice(rest))) + } else { + None + } + } + + /// Return an iterator over the key-value pairs of the map slice. + pub fn iter(&self) -> Iter<'_, K, V> { + Iter::new(&self.entries) + } + + /// Return an iterator over the key-value pairs of the map slice. + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(&mut self.entries) + } + + /// Return an iterator over the keys of the map slice. + pub fn keys(&self) -> Keys<'_, K, V> { + Keys::new(&self.entries) + } + + /// Return an owning iterator over the keys of the map slice. + pub fn into_keys(self: Box) -> IntoKeys { + IntoKeys::new(self.into_entries()) + } + + /// Return an iterator over the values of the map slice. + pub fn values(&self) -> Values<'_, K, V> { + Values::new(&self.entries) + } + + /// Return an iterator over mutable references to the the values of the map slice. + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(&mut self.entries) + } + + /// Return an owning iterator over the values of the map slice. + pub fn into_values(self: Box) -> IntoValues { + IntoValues::new(self.into_entries()) + } + + /// Search over a sorted map for a key. + /// + /// Returns the position where that key is present, or the position where it can be inserted to + /// maintain the sort. See [`slice::binary_search`] for more details. + /// + /// Computes in **O(log(n))** time, which is notably less scalable than looking the key up in + /// the map this is a slice from using [`IndexMap::get_index_of`], but this can also position + /// missing keys. + pub fn binary_search_keys(&self, x: &K) -> Result + where + K: Ord, + { + self.binary_search_by(|p, _| p.cmp(x)) + } + + /// Search over a sorted map with a comparator function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by<'a, F>(&'a self, mut f: F) -> Result + where + F: FnMut(&'a K, &'a V) -> Ordering, + { + self.entries.binary_search_by(move |a| f(&a.key, &a.value)) + } + + /// Search over a sorted map with an extraction function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by_key`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by_key<'a, B, F>(&'a self, b: &B, mut f: F) -> Result + where + F: FnMut(&'a K, &'a V) -> B, + B: Ord, + { + self.binary_search_by(|k, v| f(k, v).cmp(b)) + } + + /// Returns the index of the partition point of a sorted map according to the given predicate + /// (the index of the first element of the second partition). + /// + /// See [`slice::partition_point`] for more details. + /// + /// Computes in **O(log(n))** time. + #[must_use] + pub fn partition_point

(&self, mut pred: P) -> usize + where + P: FnMut(&K, &V) -> bool, + { + self.entries + .partition_point(move |a| pred(&a.key, &a.value)) + } +} + +impl<'a, K, V> IntoIterator for &'a Slice { + type IntoIter = Iter<'a, K, V>; + type Item = (&'a K, &'a V); + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V> IntoIterator for &'a mut Slice { + type IntoIter = IterMut<'a, K, V>; + type Item = (&'a K, &'a mut V); + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl IntoIterator for Box> { + type IntoIter = IntoIter; + type Item = (K, V); + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.into_entries()) + } +} + +impl Default for &'_ Slice { + fn default() -> Self { + Slice::from_slice(&[]) + } +} + +impl Default for &'_ mut Slice { + fn default() -> Self { + Slice::from_mut_slice(&mut []) + } +} + +impl Default for Box> { + fn default() -> Self { + Slice::from_boxed(Box::default()) + } +} + +impl Clone for Box> { + fn clone(&self) -> Self { + Slice::from_boxed(self.entries.to_vec().into_boxed_slice()) + } +} + +impl From<&Slice> for Box> { + fn from(slice: &Slice) -> Self { + Slice::from_boxed(Box::from(&slice.entries)) + } +} + +impl fmt::Debug for Slice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self).finish() + } +} + +impl PartialEq for Slice { + fn eq(&self, other: &Self) -> bool { + self.len() == other.len() && self.iter().eq(other) + } +} + +impl Eq for Slice {} + +impl PartialOrd for Slice { + fn partial_cmp(&self, other: &Self) -> Option { + self.iter().partial_cmp(other) + } +} + +impl Ord for Slice { + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other) + } +} + +impl Hash for Slice { + fn hash(&self, state: &mut H) { + self.len().hash(state); + for (key, value) in self { + key.hash(state); + value.hash(state); + } + } +} + +impl Index for Slice { + type Output = V; + + fn index(&self, index: usize) -> &V { + &self.entries[index].value + } +} + +impl IndexMut for Slice { + fn index_mut(&mut self, index: usize) -> &mut V { + &mut self.entries[index].value + } +} + +// We can't have `impl> Index` because that conflicts +// both upstream with `Index` and downstream with `Index<&Q>`. +// Instead, we repeat the implementations for all the core range types. +macro_rules! impl_index { + ($($range:ty),*) => {$( + impl Index<$range> for IndexMap { + type Output = Slice; + + fn index(&self, range: $range) -> &Self::Output { + Slice::from_slice(&self.as_entries()[range]) + } + } + + impl IndexMut<$range> for IndexMap { + fn index_mut(&mut self, range: $range) -> &mut Self::Output { + Slice::from_mut_slice(&mut self.as_entries_mut()[range]) + } + } + + impl Index<$range> for Slice { + type Output = Slice; + + fn index(&self, range: $range) -> &Self { + Self::from_slice(&self.entries[range]) + } + } + + impl IndexMut<$range> for Slice { + fn index_mut(&mut self, range: $range) -> &mut Self { + Self::from_mut_slice(&mut self.entries[range]) + } + } + )*} +} +impl_index!( + ops::Range, + ops::RangeFrom, + ops::RangeFull, + ops::RangeInclusive, + ops::RangeTo, + ops::RangeToInclusive, + (Bound, Bound) +); + +#[cfg(test)] +mod tests { + use super::*; + use alloc::vec::Vec; + + #[test] + fn slice_index() { + fn check( + vec_slice: &[(i32, i32)], + map_slice: &Slice, + sub_slice: &Slice, + ) { + assert_eq!(map_slice as *const _, sub_slice as *const _); + itertools::assert_equal( + vec_slice.iter().copied(), + map_slice.iter().map(|(&k, &v)| (k, v)), + ); + itertools::assert_equal(vec_slice.iter().map(|(k, _)| k), map_slice.keys()); + itertools::assert_equal(vec_slice.iter().map(|(_, v)| v), map_slice.values()); + } + + let vec: Vec<(i32, i32)> = (0..10).map(|i| (i, i * i)).collect(); + let map: IndexMap = vec.iter().cloned().collect(); + let slice = map.as_slice(); + + // RangeFull + check(&vec[..], &map[..], &slice[..]); + + for i in 0usize..10 { + // Index + assert_eq!(vec[i].1, map[i]); + assert_eq!(vec[i].1, slice[i]); + assert_eq!(map[&(i as i32)], map[i]); + assert_eq!(map[&(i as i32)], slice[i]); + + // RangeFrom + check(&vec[i..], &map[i..], &slice[i..]); + + // RangeTo + check(&vec[..i], &map[..i], &slice[..i]); + + // RangeToInclusive + check(&vec[..=i], &map[..=i], &slice[..=i]); + + // (Bound, Bound) + let bounds = (Bound::Excluded(i), Bound::Unbounded); + check(&vec[i + 1..], &map[bounds], &slice[bounds]); + + for j in i..=10 { + // Range + check(&vec[i..j], &map[i..j], &slice[i..j]); + } + + for j in i..10 { + // RangeInclusive + check(&vec[i..=j], &map[i..=j], &slice[i..=j]); + } + } + } + + #[test] + fn slice_index_mut() { + fn check_mut( + vec_slice: &[(i32, i32)], + map_slice: &mut Slice, + sub_slice: &mut Slice, + ) { + assert_eq!(map_slice, sub_slice); + itertools::assert_equal( + vec_slice.iter().copied(), + map_slice.iter_mut().map(|(&k, &mut v)| (k, v)), + ); + itertools::assert_equal( + vec_slice.iter().map(|&(_, v)| v), + map_slice.values_mut().map(|&mut v| v), + ); + } + + let vec: Vec<(i32, i32)> = (0..10).map(|i| (i, i * i)).collect(); + let mut map: IndexMap = vec.iter().cloned().collect(); + let mut map2 = map.clone(); + let slice = map2.as_mut_slice(); + + // RangeFull + check_mut(&vec[..], &mut map[..], &mut slice[..]); + + for i in 0usize..10 { + // IndexMut + assert_eq!(&mut map[i], &mut slice[i]); + + // RangeFrom + check_mut(&vec[i..], &mut map[i..], &mut slice[i..]); + + // RangeTo + check_mut(&vec[..i], &mut map[..i], &mut slice[..i]); + + // RangeToInclusive + check_mut(&vec[..=i], &mut map[..=i], &mut slice[..=i]); + + // (Bound, Bound) + let bounds = (Bound::Excluded(i), Bound::Unbounded); + check_mut(&vec[i + 1..], &mut map[bounds], &mut slice[bounds]); + + for j in i..=10 { + // Range + check_mut(&vec[i..j], &mut map[i..j], &mut slice[i..j]); + } + + for j in i..10 { + // RangeInclusive + check_mut(&vec[i..=j], &mut map[i..=j], &mut slice[i..=j]); + } + } + } +} diff --git a/vendor/indexmap/src/map/tests.rs b/vendor/indexmap/src/map/tests.rs new file mode 100644 index 0000000..3d2315d --- /dev/null +++ b/vendor/indexmap/src/map/tests.rs @@ -0,0 +1,670 @@ +use super::*; +use std::string::String; + +#[test] +fn it_works() { + let mut map = IndexMap::new(); + assert_eq!(map.is_empty(), true); + map.insert(1, ()); + map.insert(1, ()); + assert_eq!(map.len(), 1); + assert!(map.get(&1).is_some()); + assert_eq!(map.is_empty(), false); +} + +#[test] +fn new() { + let map = IndexMap::::new(); + println!("{:?}", map); + assert_eq!(map.capacity(), 0); + assert_eq!(map.len(), 0); + assert_eq!(map.is_empty(), true); +} + +#[test] +fn insert() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5]; + let not_present = [1, 3, 6, 9, 10]; + let mut map = IndexMap::with_capacity(insert.len()); + + for (i, &elt) in insert.iter().enumerate() { + assert_eq!(map.len(), i); + map.insert(elt, elt); + assert_eq!(map.len(), i + 1); + assert_eq!(map.get(&elt), Some(&elt)); + assert_eq!(map[&elt], elt); + } + println!("{:?}", map); + + for &elt in ¬_present { + assert!(map.get(&elt).is_none()); + } +} + +#[test] +fn insert_full() { + let insert = vec![9, 2, 7, 1, 4, 6, 13]; + let present = vec![1, 6, 2]; + let mut map = IndexMap::with_capacity(insert.len()); + + for (i, &elt) in insert.iter().enumerate() { + assert_eq!(map.len(), i); + let (index, existing) = map.insert_full(elt, elt); + assert_eq!(existing, None); + assert_eq!(Some(index), map.get_full(&elt).map(|x| x.0)); + assert_eq!(map.len(), i + 1); + } + + let len = map.len(); + for &elt in &present { + let (index, existing) = map.insert_full(elt, elt); + assert_eq!(existing, Some(elt)); + assert_eq!(Some(index), map.get_full(&elt).map(|x| x.0)); + assert_eq!(map.len(), len); + } +} + +#[test] +fn insert_2() { + let mut map = IndexMap::with_capacity(16); + + let mut keys = vec![]; + keys.extend(0..16); + keys.extend(if cfg!(miri) { 32..64 } else { 128..267 }); + + for &i in &keys { + let old_map = map.clone(); + map.insert(i, ()); + for key in old_map.keys() { + if map.get(key).is_none() { + println!("old_map: {:?}", old_map); + println!("map: {:?}", map); + panic!("did not find {} in map", key); + } + } + } + + for &i in &keys { + assert!(map.get(&i).is_some(), "did not find {}", i); + } +} + +#[test] +fn insert_order() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut map = IndexMap::new(); + + for &elt in &insert { + map.insert(elt, ()); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + for (i, k) in (0..insert.len()).zip(map.keys()) { + assert_eq!(map.get_index(i).unwrap().0, k); + } +} + +#[test] +fn grow() { + let insert = [0, 4, 2, 12, 8, 7, 11]; + let not_present = [1, 3, 6, 9, 10]; + let mut map = IndexMap::with_capacity(insert.len()); + + for (i, &elt) in insert.iter().enumerate() { + assert_eq!(map.len(), i); + map.insert(elt, elt); + assert_eq!(map.len(), i + 1); + assert_eq!(map.get(&elt), Some(&elt)); + assert_eq!(map[&elt], elt); + } + + println!("{:?}", map); + for &elt in &insert { + map.insert(elt * 10, elt); + } + for &elt in &insert { + map.insert(elt * 100, elt); + } + for (i, &elt) in insert.iter().cycle().enumerate().take(100) { + map.insert(elt * 100 + i as i32, elt); + } + println!("{:?}", map); + for &elt in ¬_present { + assert!(map.get(&elt).is_none()); + } +} + +#[test] +fn reserve() { + let mut map = IndexMap::::new(); + assert_eq!(map.capacity(), 0); + map.reserve(100); + let capacity = map.capacity(); + assert!(capacity >= 100); + for i in 0..capacity { + assert_eq!(map.len(), i); + map.insert(i, i * i); + assert_eq!(map.len(), i + 1); + assert_eq!(map.capacity(), capacity); + assert_eq!(map.get(&i), Some(&(i * i))); + } + map.insert(capacity, std::usize::MAX); + assert_eq!(map.len(), capacity + 1); + assert!(map.capacity() > capacity); + assert_eq!(map.get(&capacity), Some(&std::usize::MAX)); +} + +#[test] +fn try_reserve() { + let mut map = IndexMap::::new(); + assert_eq!(map.capacity(), 0); + assert_eq!(map.try_reserve(100), Ok(())); + assert!(map.capacity() >= 100); + assert!(map.try_reserve(usize::MAX).is_err()); +} + +#[test] +fn shrink_to_fit() { + let mut map = IndexMap::::new(); + assert_eq!(map.capacity(), 0); + for i in 0..100 { + assert_eq!(map.len(), i); + map.insert(i, i * i); + assert_eq!(map.len(), i + 1); + assert!(map.capacity() >= i + 1); + assert_eq!(map.get(&i), Some(&(i * i))); + map.shrink_to_fit(); + assert_eq!(map.len(), i + 1); + assert_eq!(map.capacity(), i + 1); + assert_eq!(map.get(&i), Some(&(i * i))); + } +} + +#[test] +fn remove() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut map = IndexMap::new(); + + for &elt in &insert { + map.insert(elt, elt); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + + let remove_fail = [99, 77]; + let remove = [4, 12, 8, 7]; + + for &key in &remove_fail { + assert!(map.swap_remove_full(&key).is_none()); + } + println!("{:?}", map); + for &key in &remove { + //println!("{:?}", map); + let index = map.get_full(&key).unwrap().0; + assert_eq!(map.swap_remove_full(&key), Some((index, key, key))); + } + println!("{:?}", map); + + for key in &insert { + assert_eq!(map.get(key).is_some(), !remove.contains(key)); + } + assert_eq!(map.len(), insert.len() - remove.len()); + assert_eq!(map.keys().count(), insert.len() - remove.len()); +} + +#[test] +fn remove_to_empty() { + let mut map = indexmap! { 0 => 0, 4 => 4, 5 => 5 }; + map.swap_remove(&5).unwrap(); + map.swap_remove(&4).unwrap(); + map.swap_remove(&0).unwrap(); + assert!(map.is_empty()); +} + +#[test] +fn swap_remove_index() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut map = IndexMap::new(); + + for &elt in &insert { + map.insert(elt, elt * 2); + } + + let mut vector = insert.to_vec(); + let remove_sequence = &[3, 3, 10, 4, 5, 4, 3, 0, 1]; + + // check that the same swap remove sequence on vec and map + // have the same result. + for &rm in remove_sequence { + let out_vec = vector.swap_remove(rm); + let (out_map, _) = map.swap_remove_index(rm).unwrap(); + assert_eq!(out_vec, out_map); + } + assert_eq!(vector.len(), map.len()); + for (a, b) in vector.iter().zip(map.keys()) { + assert_eq!(a, b); + } +} + +#[test] +fn partial_eq_and_eq() { + let mut map_a = IndexMap::new(); + map_a.insert(1, "1"); + map_a.insert(2, "2"); + let mut map_b = map_a.clone(); + assert_eq!(map_a, map_b); + map_b.swap_remove(&1); + assert_ne!(map_a, map_b); + + let map_c: IndexMap<_, String> = map_b.into_iter().map(|(k, v)| (k, v.into())).collect(); + assert_ne!(map_a, map_c); + assert_ne!(map_c, map_a); +} + +#[test] +fn extend() { + let mut map = IndexMap::new(); + map.extend(vec![(&1, &2), (&3, &4)]); + map.extend(vec![(5, 6)]); + assert_eq!( + map.into_iter().collect::>(), + vec![(1, 2), (3, 4), (5, 6)] + ); +} + +#[test] +fn entry() { + let mut map = IndexMap::new(); + + map.insert(1, "1"); + map.insert(2, "2"); + { + let e = map.entry(3); + assert_eq!(e.index(), 2); + let e = e.or_insert("3"); + assert_eq!(e, &"3"); + } + + let e = map.entry(2); + assert_eq!(e.index(), 1); + assert_eq!(e.key(), &2); + match e { + Entry::Occupied(ref e) => assert_eq!(e.get(), &"2"), + Entry::Vacant(_) => panic!(), + } + assert_eq!(e.or_insert("4"), &"2"); +} + +#[test] +fn entry_and_modify() { + let mut map = IndexMap::new(); + + map.insert(1, "1"); + map.entry(1).and_modify(|x| *x = "2"); + assert_eq!(Some(&"2"), map.get(&1)); + + map.entry(2).and_modify(|x| *x = "doesn't exist"); + assert_eq!(None, map.get(&2)); +} + +#[test] +fn entry_or_default() { + let mut map = IndexMap::new(); + + #[derive(Debug, PartialEq)] + enum TestEnum { + DefaultValue, + NonDefaultValue, + } + + impl Default for TestEnum { + fn default() -> Self { + TestEnum::DefaultValue + } + } + + map.insert(1, TestEnum::NonDefaultValue); + assert_eq!(&mut TestEnum::NonDefaultValue, map.entry(1).or_default()); + + assert_eq!(&mut TestEnum::DefaultValue, map.entry(2).or_default()); +} + +#[test] +fn occupied_entry_key() { + // These keys match hash and equality, but their addresses are distinct. + let (k1, k2) = (&mut 1, &mut 1); + let k1_ptr = k1 as *const i32; + let k2_ptr = k2 as *const i32; + assert_ne!(k1_ptr, k2_ptr); + + let mut map = IndexMap::new(); + map.insert(k1, "value"); + match map.entry(k2) { + Entry::Occupied(ref e) => { + // `OccupiedEntry::key` should reference the key in the map, + // not the key that was used to find the entry. + let ptr = *e.key() as *const i32; + assert_eq!(ptr, k1_ptr); + assert_ne!(ptr, k2_ptr); + } + Entry::Vacant(_) => panic!(), + } +} + +#[test] +fn keys() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: IndexMap<_, _> = vec.into_iter().collect(); + let keys: Vec<_> = map.keys().copied().collect(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&1)); + assert!(keys.contains(&2)); + assert!(keys.contains(&3)); +} + +#[test] +fn into_keys() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: IndexMap<_, _> = vec.into_iter().collect(); + let keys: Vec = map.into_keys().collect(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&1)); + assert!(keys.contains(&2)); + assert!(keys.contains(&3)); +} + +#[test] +fn values() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: IndexMap<_, _> = vec.into_iter().collect(); + let values: Vec<_> = map.values().copied().collect(); + assert_eq!(values.len(), 3); + assert!(values.contains(&'a')); + assert!(values.contains(&'b')); + assert!(values.contains(&'c')); +} + +#[test] +fn values_mut() { + let vec = vec![(1, 1), (2, 2), (3, 3)]; + let mut map: IndexMap<_, _> = vec.into_iter().collect(); + for value in map.values_mut() { + *value *= 2 + } + let values: Vec<_> = map.values().copied().collect(); + assert_eq!(values.len(), 3); + assert!(values.contains(&2)); + assert!(values.contains(&4)); + assert!(values.contains(&6)); +} + +#[test] +fn into_values() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: IndexMap<_, _> = vec.into_iter().collect(); + let values: Vec = map.into_values().collect(); + assert_eq!(values.len(), 3); + assert!(values.contains(&'a')); + assert!(values.contains(&'b')); + assert!(values.contains(&'c')); +} + +#[test] +#[cfg(feature = "std")] +fn from_array() { + let map = IndexMap::from([(1, 2), (3, 4)]); + let mut expected = IndexMap::new(); + expected.insert(1, 2); + expected.insert(3, 4); + + assert_eq!(map, expected) +} + +#[test] +fn iter_default() { + struct K; + struct V; + fn assert_default() + where + T: Default + Iterator, + { + assert!(T::default().next().is_none()); + } + assert_default::>(); + assert_default::>(); + assert_default::>(); + assert_default::>(); + assert_default::>(); + assert_default::>(); + assert_default::>(); + assert_default::>(); +} + +#[test] +fn test_binary_search_by() { + // adapted from std's test for binary_search + let b: IndexMap<_, i32> = [] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&5)), Err(0)); + + let b: IndexMap<_, i32> = [4] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&3)), Err(0)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&4)), Ok(0)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&5)), Err(1)); + + let b: IndexMap<_, i32> = [1, 2, 4, 6, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&5)), Err(3)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&6)), Ok(3)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&7)), Err(4)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&8)), Ok(4)); + + let b: IndexMap<_, i32> = [1, 2, 4, 5, 6, 8] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&9)), Err(6)); + + let b: IndexMap<_, i32> = [1, 2, 4, 6, 7, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&6)), Ok(3)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&5)), Err(3)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&8)), Ok(5)); + + let b: IndexMap<_, i32> = [1, 2, 4, 5, 6, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&7)), Err(5)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&0)), Err(0)); + + let b: IndexMap<_, i32> = [1, 3, 3, 3, 7] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&0)), Err(0)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&1)), Ok(0)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&2)), Err(1)); + assert!(match b.binary_search_by(|_, x| x.cmp(&3)) { + Ok(1..=3) => true, + _ => false, + }); + assert!(match b.binary_search_by(|_, x| x.cmp(&3)) { + Ok(1..=3) => true, + _ => false, + }); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&4)), Err(4)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&5)), Err(4)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&6)), Err(4)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&7)), Ok(4)); + assert_eq!(b.binary_search_by(|_, x| x.cmp(&8)), Err(5)); +} + +#[test] +fn test_binary_search_by_key() { + // adapted from std's test for binary_search + let b: IndexMap<_, i32> = [] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by_key(&5, |_, &x| x), Err(0)); + + let b: IndexMap<_, i32> = [4] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by_key(&3, |_, &x| x), Err(0)); + assert_eq!(b.binary_search_by_key(&4, |_, &x| x), Ok(0)); + assert_eq!(b.binary_search_by_key(&5, |_, &x| x), Err(1)); + + let b: IndexMap<_, i32> = [1, 2, 4, 6, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by_key(&5, |_, &x| x), Err(3)); + assert_eq!(b.binary_search_by_key(&6, |_, &x| x), Ok(3)); + assert_eq!(b.binary_search_by_key(&7, |_, &x| x), Err(4)); + assert_eq!(b.binary_search_by_key(&8, |_, &x| x), Ok(4)); + + let b: IndexMap<_, i32> = [1, 2, 4, 5, 6, 8] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by_key(&9, |_, &x| x), Err(6)); + + let b: IndexMap<_, i32> = [1, 2, 4, 6, 7, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by_key(&6, |_, &x| x), Ok(3)); + assert_eq!(b.binary_search_by_key(&5, |_, &x| x), Err(3)); + assert_eq!(b.binary_search_by_key(&8, |_, &x| x), Ok(5)); + + let b: IndexMap<_, i32> = [1, 2, 4, 5, 6, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by_key(&7, |_, &x| x), Err(5)); + assert_eq!(b.binary_search_by_key(&0, |_, &x| x), Err(0)); + + let b: IndexMap<_, i32> = [1, 3, 3, 3, 7] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.binary_search_by_key(&0, |_, &x| x), Err(0)); + assert_eq!(b.binary_search_by_key(&1, |_, &x| x), Ok(0)); + assert_eq!(b.binary_search_by_key(&2, |_, &x| x), Err(1)); + assert!(match b.binary_search_by_key(&3, |_, &x| x) { + Ok(1..=3) => true, + _ => false, + }); + assert!(match b.binary_search_by_key(&3, |_, &x| x) { + Ok(1..=3) => true, + _ => false, + }); + assert_eq!(b.binary_search_by_key(&4, |_, &x| x), Err(4)); + assert_eq!(b.binary_search_by_key(&5, |_, &x| x), Err(4)); + assert_eq!(b.binary_search_by_key(&6, |_, &x| x), Err(4)); + assert_eq!(b.binary_search_by_key(&7, |_, &x| x), Ok(4)); + assert_eq!(b.binary_search_by_key(&8, |_, &x| x), Err(5)); +} + +#[test] +fn test_partition_point() { + // adapted from std's test for partition_point + let b: IndexMap<_, i32> = [] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.partition_point(|_, &x| x < 5), 0); + + let b: IndexMap<_, i32> = [4] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.partition_point(|_, &x| x < 3), 0); + assert_eq!(b.partition_point(|_, &x| x < 4), 0); + assert_eq!(b.partition_point(|_, &x| x < 5), 1); + + let b: IndexMap<_, i32> = [1, 2, 4, 6, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.partition_point(|_, &x| x < 5), 3); + assert_eq!(b.partition_point(|_, &x| x < 6), 3); + assert_eq!(b.partition_point(|_, &x| x < 7), 4); + assert_eq!(b.partition_point(|_, &x| x < 8), 4); + + let b: IndexMap<_, i32> = [1, 2, 4, 5, 6, 8] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.partition_point(|_, &x| x < 9), 6); + + let b: IndexMap<_, i32> = [1, 2, 4, 6, 7, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.partition_point(|_, &x| x < 6), 3); + assert_eq!(b.partition_point(|_, &x| x < 5), 3); + assert_eq!(b.partition_point(|_, &x| x < 8), 5); + + let b: IndexMap<_, i32> = [1, 2, 4, 5, 6, 8, 9] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.partition_point(|_, &x| x < 7), 5); + assert_eq!(b.partition_point(|_, &x| x < 0), 0); + + let b: IndexMap<_, i32> = [1, 3, 3, 3, 7] + .into_iter() + .enumerate() + .map(|(i, x)| (i + 100, x)) + .collect(); + assert_eq!(b.partition_point(|_, &x| x < 0), 0); + assert_eq!(b.partition_point(|_, &x| x < 1), 0); + assert_eq!(b.partition_point(|_, &x| x < 2), 1); + assert_eq!(b.partition_point(|_, &x| x < 3), 1); + assert_eq!(b.partition_point(|_, &x| x < 4), 4); + assert_eq!(b.partition_point(|_, &x| x < 5), 4); + assert_eq!(b.partition_point(|_, &x| x < 6), 4); + assert_eq!(b.partition_point(|_, &x| x < 7), 4); + assert_eq!(b.partition_point(|_, &x| x < 8), 5); +} diff --git a/vendor/indexmap/src/mutable_keys.rs b/vendor/indexmap/src/mutable_keys.rs index 35a90c4..7efc779 100644 --- a/vendor/indexmap/src/mutable_keys.rs +++ b/vendor/indexmap/src/mutable_keys.rs @@ -1,8 +1,6 @@ use core::hash::{BuildHasher, Hash}; -use super::{Equivalent, IndexMap}; - -pub struct PrivateMarker {} +use super::{Bucket, Entries, Equivalent, IndexMap}; /// Opt-in mutable access to keys. /// @@ -16,11 +14,15 @@ pub struct PrivateMarker {} /// implementing PartialEq, Eq, or Hash incorrectly would be). /// /// `use` this trait to enable its methods for `IndexMap`. -pub trait MutableKeys { +/// +/// This trait is sealed and cannot be implemented for types outside this crate. +pub trait MutableKeys: private::Sealed { type Key; type Value; /// Return item index, mutable reference to key and value + /// + /// Computes in **O(1)** time (average). fn get_full_mut2( &mut self, key: &Q, @@ -28,6 +30,13 @@ pub trait MutableKeys { where Q: Hash + Equivalent; + /// Return mutable reference to key and value at an index. + /// + /// Valid indices are *0 <= index < self.len()* + /// + /// Computes in **O(1)** time. + fn get_index_mut2(&mut self, index: usize) -> Option<(&mut Self::Key, &mut Self::Value)>; + /// Scan through each key-value pair in the map and keep those where the /// closure `keep` returns `true`. /// @@ -38,11 +47,6 @@ pub trait MutableKeys { fn retain2(&mut self, keep: F) where F: FnMut(&mut Self::Key, &mut Self::Value) -> bool; - - /// This method is not useful in itself – it is there to “seal” the trait - /// for external implementation, so that we can add methods without - /// causing breaking changes. - fn __private_marker(&self) -> PrivateMarker; } /// Opt-in mutable access to keys. @@ -55,11 +59,21 @@ where { type Key = K; type Value = V; + fn get_full_mut2(&mut self, key: &Q) -> Option<(usize, &mut K, &mut V)> where Q: Hash + Equivalent, { - self.get_full_mut2_impl(key) + if let Some(i) = self.get_index_of(key) { + let entry = &mut self.as_entries_mut()[i]; + Some((i, &mut entry.key, &mut entry.value)) + } else { + None + } + } + + fn get_index_mut2(&mut self, index: usize) -> Option<(&mut K, &mut V)> { + self.as_entries_mut().get_mut(index).map(Bucket::muts) } fn retain2(&mut self, keep: F) @@ -68,8 +82,10 @@ where { self.retain_mut(keep) } +} - fn __private_marker(&self) -> PrivateMarker { - PrivateMarker {} - } +mod private { + pub trait Sealed {} + + impl Sealed for super::IndexMap {} } diff --git a/vendor/indexmap/src/rayon/map.rs b/vendor/indexmap/src/rayon/map.rs index 8819f13..d5325f2 100644 --- a/vendor/indexmap/src/rayon/map.rs +++ b/vendor/indexmap/src/rayon/map.rs @@ -2,24 +2,23 @@ //! //! You will rarely need to interact with this module directly unless you need to name one of the //! iterator types. -//! -//! Requires crate feature `"rayon"` use super::collect; use rayon::iter::plumbing::{Consumer, ProducerCallback, UnindexedConsumer}; use rayon::prelude::*; use crate::vec::Vec; +use alloc::boxed::Box; use core::cmp::Ordering; use core::fmt; use core::hash::{BuildHasher, Hash}; use core::ops::RangeBounds; +use crate::map::Slice; use crate::Bucket; use crate::Entries; use crate::IndexMap; -/// Requires crate feature `"rayon"`. impl IntoParallelIterator for IndexMap where K: Send, @@ -35,6 +34,21 @@ where } } +impl IntoParallelIterator for Box> +where + K: Send, + V: Send, +{ + type Item = (K, V); + type Iter = IntoParIter; + + fn into_par_iter(self) -> Self::Iter { + IntoParIter { + entries: self.into_entries(), + } + } +} + /// A parallel owning iterator over the entries of a `IndexMap`. /// /// This `struct` is created by the [`into_par_iter`] method on [`IndexMap`] @@ -63,7 +77,6 @@ impl IndexedParallelIterator for IntoParIter { indexed_parallel_iterator_methods!(Bucket::key_value); } -/// Requires crate feature `"rayon"`. impl<'a, K, V, S> IntoParallelIterator for &'a IndexMap where K: Sync, @@ -79,6 +92,21 @@ where } } +impl<'a, K, V> IntoParallelIterator for &'a Slice +where + K: Sync, + V: Sync, +{ + type Item = (&'a K, &'a V); + type Iter = ParIter<'a, K, V>; + + fn into_par_iter(self) -> Self::Iter { + ParIter { + entries: &self.entries, + } + } +} + /// A parallel iterator over the entries of a `IndexMap`. /// /// This `struct` is created by the [`par_iter`] method on [`IndexMap`] @@ -113,7 +141,6 @@ impl IndexedParallelIterator for ParIter<'_, K, V> { indexed_parallel_iterator_methods!(Bucket::refs); } -/// Requires crate feature `"rayon"`. impl<'a, K, V, S> IntoParallelIterator for &'a mut IndexMap where K: Sync + Send, @@ -129,6 +156,21 @@ where } } +impl<'a, K, V> IntoParallelIterator for &'a mut Slice +where + K: Sync + Send, + V: Send, +{ + type Item = (&'a K, &'a mut V); + type Iter = ParIterMut<'a, K, V>; + + fn into_par_iter(self) -> Self::Iter { + ParIterMut { + entries: &mut self.entries, + } + } +} + /// A parallel mutable iterator over the entries of a `IndexMap`. /// /// This `struct` is created by the [`par_iter_mut`] method on [`IndexMap`] @@ -157,7 +199,6 @@ impl IndexedParallelIterator for ParIterMut<'_, K, V> { indexed_parallel_iterator_methods!(Bucket::ref_mut); } -/// Requires crate feature `"rayon"`. impl<'a, K, V, S> ParallelDrainRange for &'a mut IndexMap where K: Send, @@ -225,6 +266,37 @@ where } } +/// Parallel iterator methods and other parallel methods. +/// +/// The following methods **require crate feature `"rayon"`**. +/// +/// See also the `IntoParallelIterator` implementations. +impl Slice +where + K: Sync, + V: Sync, +{ + /// Return a parallel iterator over the keys of the map slice. + /// + /// While parallel iterators can process items in any order, their relative order + /// in the slice is still preserved for operations like `reduce` and `collect`. + pub fn par_keys(&self) -> ParKeys<'_, K, V> { + ParKeys { + entries: &self.entries, + } + } + + /// Return a parallel iterator over the values of the map slice. + /// + /// While parallel iterators can process items in any order, their relative order + /// in the slice is still preserved for operations like `reduce` and `collect`. + pub fn par_values(&self) -> ParValues<'_, K, V> { + ParValues { + entries: &self.entries, + } + } +} + impl IndexMap where K: Hash + Eq + Sync, @@ -314,7 +386,6 @@ impl IndexedParallelIterator for ParValues<'_, K, V> { indexed_parallel_iterator_methods!(Bucket::value_ref); } -/// Requires crate feature `"rayon"`. impl IndexMap where K: Send, @@ -331,6 +402,22 @@ where } } +impl Slice +where + K: Send, + V: Send, +{ + /// Return a parallel iterator over mutable references to the the values of the map slice. + /// + /// While parallel iterators can process items in any order, their relative order + /// in the slice is still preserved for operations like `reduce` and `collect`. + pub fn par_values_mut(&mut self) -> ParValuesMut<'_, K, V> { + ParValuesMut { + entries: &mut self.entries, + } + } +} + impl IndexMap where K: Hash + Eq + Send, @@ -406,6 +493,18 @@ where entries.par_sort_unstable_by(move |a, b| cmp(&a.key, &a.value, &b.key, &b.value)); IntoParIter { entries } } + + /// Sort the map’s key-value pairs in place and in parallel, using a sort-key extraction + /// function. + pub fn par_sort_by_cached_key(&mut self, sort_key: F) + where + T: Ord + Send, + F: Fn(&K, &V) -> T + Sync, + { + self.with_entries(move |entries| { + entries.par_sort_by_cached_key(move |a| sort_key(&a.key, &a.value)); + }); + } } /// A parallel mutable iterator over the values of a `IndexMap`. @@ -436,7 +535,6 @@ impl IndexedParallelIterator for ParValuesMut<'_, K, V> { indexed_parallel_iterator_methods!(Bucket::value_mut); } -/// Requires crate feature `"rayon"`. impl FromParallelIterator<(K, V)> for IndexMap where K: Eq + Hash + Send, @@ -457,7 +555,6 @@ where } } -/// Requires crate feature `"rayon"`. impl ParallelExtend<(K, V)> for IndexMap where K: Eq + Hash + Send, @@ -474,7 +571,6 @@ where } } -/// Requires crate feature `"rayon"`. impl<'a, K: 'a, V: 'a, S> ParallelExtend<(&'a K, &'a V)> for IndexMap where K: Copy + Eq + Hash + Send + Sync, diff --git a/vendor/indexmap/src/rayon/set.rs b/vendor/indexmap/src/rayon/set.rs index 6749dc0..0fc478e 100644 --- a/vendor/indexmap/src/rayon/set.rs +++ b/vendor/indexmap/src/rayon/set.rs @@ -2,25 +2,24 @@ //! //! You will rarely need to interact with this module directly unless you need to name one of the //! iterator types. -//! -//! Requires crate feature `"rayon"`. use super::collect; use rayon::iter::plumbing::{Consumer, ProducerCallback, UnindexedConsumer}; use rayon::prelude::*; use crate::vec::Vec; +use alloc::boxed::Box; use core::cmp::Ordering; use core::fmt; use core::hash::{BuildHasher, Hash}; use core::ops::RangeBounds; +use crate::set::Slice; use crate::Entries; use crate::IndexSet; type Bucket = crate::Bucket; -/// Requires crate feature `"rayon"`. impl IntoParallelIterator for IndexSet where T: Send, @@ -35,6 +34,20 @@ where } } +impl IntoParallelIterator for Box> +where + T: Send, +{ + type Item = T; + type Iter = IntoParIter; + + fn into_par_iter(self) -> Self::Iter { + IntoParIter { + entries: self.into_entries(), + } + } +} + /// A parallel owning iterator over the items of a `IndexSet`. /// /// This `struct` is created by the [`into_par_iter`] method on [`IndexSet`] @@ -63,7 +76,6 @@ impl IndexedParallelIterator for IntoParIter { indexed_parallel_iterator_methods!(Bucket::key); } -/// Requires crate feature `"rayon"`. impl<'a, T, S> IntoParallelIterator for &'a IndexSet where T: Sync, @@ -78,6 +90,20 @@ where } } +impl<'a, T> IntoParallelIterator for &'a Slice +where + T: Sync, +{ + type Item = &'a T; + type Iter = ParIter<'a, T>; + + fn into_par_iter(self) -> Self::Iter { + ParIter { + entries: &self.entries, + } + } +} + /// A parallel iterator over the items of a `IndexSet`. /// /// This `struct` is created by the [`par_iter`] method on [`IndexSet`] @@ -112,7 +138,6 @@ impl IndexedParallelIterator for ParIter<'_, T> { indexed_parallel_iterator_methods!(Bucket::key_ref); } -/// Requires crate feature `"rayon"`. impl<'a, T, S> ParallelDrainRange for &'a mut IndexSet where T: Send, @@ -540,9 +565,19 @@ where entries.par_sort_unstable_by(move |a, b| cmp(&a.key, &b.key)); IntoParIter { entries } } + + /// Sort the set’s values in place and in parallel, using a key extraction function. + pub fn par_sort_by_cached_key(&mut self, sort_key: F) + where + K: Ord + Send, + F: Fn(&T) -> K + Sync, + { + self.with_entries(move |entries| { + entries.par_sort_by_cached_key(move |a| sort_key(&a.key)); + }); + } } -/// Requires crate feature `"rayon"`. impl FromParallelIterator for IndexSet where T: Eq + Hash + Send, @@ -562,7 +597,6 @@ where } } -/// Requires crate feature `"rayon"`. impl ParallelExtend for IndexSet where T: Eq + Hash + Send, @@ -578,7 +612,6 @@ where } } -/// Requires crate feature `"rayon"`. impl<'a, T: 'a, S> ParallelExtend<&'a T> for IndexSet where T: Copy + Eq + Hash + Send + Sync, diff --git a/vendor/indexmap/src/serde.rs b/vendor/indexmap/src/serde.rs index c6dd6d5..37de3e3 100644 --- a/vendor/indexmap/src/serde.rs +++ b/vendor/indexmap/src/serde.rs @@ -10,7 +10,6 @@ use core::marker::PhantomData; use crate::IndexMap; -/// Requires crate feature `"serde"` or `"serde-1"` impl Serialize for IndexMap where K: Serialize + Hash + Eq, @@ -54,7 +53,6 @@ where } } -/// Requires crate feature `"serde"` or `"serde-1"` impl<'de, K, V, S> Deserialize<'de> for IndexMap where K: Deserialize<'de> + Eq + Hash, @@ -85,7 +83,6 @@ where use crate::IndexSet; -/// Requires crate feature `"serde"` or `"serde-1"` impl Serialize for IndexSet where T: Serialize + Hash + Eq, @@ -127,7 +124,6 @@ where } } -/// Requires crate feature `"serde"` or `"serde-1"` impl<'de, T, S> Deserialize<'de> for IndexSet where T: Deserialize<'de> + Eq + Hash, diff --git a/vendor/indexmap/src/set.rs b/vendor/indexmap/src/set.rs index 3728947..c047b0b 100644 --- a/vendor/indexmap/src/set.rs +++ b/vendor/indexmap/src/set.rs @@ -1,18 +1,28 @@ //! A hash set implemented using `IndexMap` +mod iter; +mod slice; + +#[cfg(test)] +mod tests; + +pub use self::iter::{Difference, Drain, Intersection, IntoIter, Iter, SymmetricDifference, Union}; +pub use self::slice::Slice; + #[cfg(feature = "rayon")] pub use crate::rayon::set as rayon; +use crate::TryReserveError; -#[cfg(has_std)] +#[cfg(feature = "std")] use std::collections::hash_map::RandomState; -use crate::vec::{self, Vec}; +use crate::util::try_simplify_range; +use alloc::boxed::Box; +use alloc::vec::Vec; use core::cmp::Ordering; use core::fmt; use core::hash::{BuildHasher, Hash}; -use core::iter::{Chain, FusedIterator}; use core::ops::{BitAnd, BitOr, BitXor, Index, RangeBounds, Sub}; -use core::slice; use super::{Entries, Equivalent, IndexMap}; @@ -46,6 +56,11 @@ type Bucket = super::Bucket; /// `0..self.len()`. For example, the method `.get_full` looks up the index for /// a value, and the method `.get_index` looks up the value by index. /// +/// # Complexity +/// +/// Internally, `IndexSet` just holds an [`IndexMap`](IndexMap). Thus the complexity +/// of the two are the same for most methods. +/// /// # Examples /// /// ``` @@ -59,11 +74,11 @@ type Bucket = super::Bucket; /// assert!(letters.contains(&'u')); /// assert!(!letters.contains(&'y')); /// ``` -#[cfg(has_std)] +#[cfg(feature = "std")] pub struct IndexSet { pub(crate) map: IndexMap, } -#[cfg(not(has_std))] +#[cfg(not(feature = "std"))] pub struct IndexSet { pub(crate) map: IndexMap, } @@ -124,7 +139,8 @@ where } } -#[cfg(has_std)] +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl IndexSet { /// Create a new set. (Does not allocate.) pub fn new() -> Self { @@ -165,6 +181,11 @@ impl IndexSet { } } + /// Return the number of elements the set can hold without reallocating. + /// + /// This number is a lower bound; the set might be able to hold more, + /// but is guaranteed to be able to hold at least this many. + /// /// Computes in **O(1)** time. pub fn capacity(&self) -> usize { self.map.capacity() @@ -191,9 +212,7 @@ impl IndexSet { /// Return an iterator over the values of the set, in their order pub fn iter(&self) -> Iter<'_, T> { - Iter { - iter: self.map.as_entries().iter(), - } + Iter::new(self.as_entries()) } /// Remove all elements in the set, while preserving its capacity. @@ -227,9 +246,7 @@ impl IndexSet { where R: RangeBounds, { - Drain { - iter: self.map.drain(range).iter, - } + Drain::new(self.map.core.drain(range)) } /// Splits the collection into two at the given index. @@ -261,6 +278,37 @@ where self.map.reserve(additional); } + /// Reserve capacity for `additional` more values, without over-allocating. + /// + /// Unlike `reserve`, this does not deliberately over-allocate the entry capacity to avoid + /// frequent re-allocations. However, the underlying data structures may still have internal + /// capacity requirements, and the allocator itself may give more space than requested, so this + /// cannot be relied upon to be precisely minimal. + /// + /// Computes in **O(n)** time. + pub fn reserve_exact(&mut self, additional: usize) { + self.map.reserve_exact(additional); + } + + /// Try to reserve capacity for `additional` more values. + /// + /// Computes in **O(n)** time. + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.map.try_reserve(additional) + } + + /// Try to reserve capacity for `additional` more values, without over-allocating. + /// + /// Unlike `try_reserve`, this does not deliberately over-allocate the entry capacity to avoid + /// frequent re-allocations. However, the underlying data structures may still have internal + /// capacity requirements, and the allocator itself may give more space than requested, so this + /// cannot be relied upon to be precisely minimal. + /// + /// Computes in **O(n)** time. + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.map.try_reserve_exact(additional) + } + /// Shrink the capacity of the set as much as possible. /// /// Computes in **O(n)** time. @@ -297,16 +345,8 @@ where /// /// Computes in **O(1)** time (amortized average). pub fn insert_full(&mut self, value: T) -> (usize, bool) { - use super::map::Entry::*; - - match self.map.entry(value) { - Occupied(e) => (e.index(), false), - Vacant(e) => { - let index = e.index(); - e.insert(()); - (index, true) - } - } + let (index, existing) = self.map.insert_full(value, ()); + (index, existing.is_none()) } /// Return an iterator over the values that are in `self` but not `other`. @@ -316,10 +356,7 @@ where where S2: BuildHasher, { - Difference { - iter: self.iter(), - other, - } + Difference::new(self, other) } /// Return an iterator over the values that are in `self` or `other`, @@ -334,9 +371,7 @@ where where S2: BuildHasher, { - SymmetricDifference { - iter: self.difference(other).chain(other.difference(self)), - } + SymmetricDifference::new(self, other) } /// Return an iterator over the values that are in both `self` and `other`. @@ -346,10 +381,7 @@ where where S2: BuildHasher, { - Intersection { - iter: self.iter(), - other, - } + Intersection::new(self, other) } /// Return an iterator over all values that are in `self` or `other`. @@ -360,9 +392,7 @@ where where S2: BuildHasher, { - Union { - iter: self.iter().chain(other.difference(self)), - } + Union::new(self, other) } /// Return `true` if an equivalent to `value` exists in the set. @@ -395,6 +425,8 @@ where } /// Return item index, if it exists in the set + /// + /// Computes in **O(1)** time (average). pub fn get_index_of(&self, value: &Q) -> Option where Q: Hash + Equivalent, @@ -604,9 +636,7 @@ where { let mut entries = self.into_entries(); entries.sort_by(move |a, b| cmp(&a.key, &b.key)); - IntoIter { - iter: entries.into_iter(), - } + IntoIter::new(entries) } /// Sort the set's values by their default ordering. @@ -619,7 +649,7 @@ where self.map.sort_unstable_keys() } - /// Sort the set's values in place using the comparison funtion `cmp`. + /// Sort the set's values in place using the comparison function `cmp`. /// /// Computes in **O(n log n)** time. The sort is unstable. pub fn sort_unstable_by(&mut self, mut cmp: F) @@ -637,9 +667,82 @@ where { let mut entries = self.into_entries(); entries.sort_unstable_by(move |a, b| cmp(&a.key, &b.key)); - IntoIter { - iter: entries.into_iter(), - } + IntoIter::new(entries) + } + + /// Sort the set’s values in place using a key extraction function. + /// + /// During sorting, the function is called at most once per entry, by using temporary storage + /// to remember the results of its evaluation. The order of calls to the function is + /// unspecified and may change between versions of `indexmap` or the standard library. + /// + /// Computes in **O(m n + n log n + c)** time () and **O(n)** space, where the function is + /// **O(m)**, *n* is the length of the map, and *c* the capacity. The sort is stable. + pub fn sort_by_cached_key(&mut self, mut sort_key: F) + where + K: Ord, + F: FnMut(&T) -> K, + { + self.with_entries(move |entries| { + entries.sort_by_cached_key(move |a| sort_key(&a.key)); + }); + } + + /// Search over a sorted set for a value. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search`] for more details. + /// + /// Computes in **O(log(n))** time, which is notably less scalable than looking the value up + /// using [`get_index_of`][IndexSet::get_index_of], but this can also position missing values. + pub fn binary_search(&self, x: &T) -> Result + where + T: Ord, + { + self.as_slice().binary_search(x) + } + + /// Search over a sorted set with a comparator function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by<'a, F>(&'a self, f: F) -> Result + where + F: FnMut(&'a T) -> Ordering, + { + self.as_slice().binary_search_by(f) + } + + /// Search over a sorted set with an extraction function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by_key`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by_key<'a, B, F>(&'a self, b: &B, f: F) -> Result + where + F: FnMut(&'a T) -> B, + B: Ord, + { + self.as_slice().binary_search_by_key(b, f) + } + + /// Returns the index of the partition point of a sorted set according to the given predicate + /// (the index of the first element of the second partition). + /// + /// See [`slice::partition_point`] for more details. + /// + /// Computes in **O(log(n))** time. + #[must_use] + pub fn partition_point

(&self, pred: P) -> usize + where + P: FnMut(&T) -> bool, + { + self.as_slice().partition_point(pred) } /// Reverses the order of the set’s values in place. @@ -651,6 +754,20 @@ where } impl IndexSet { + /// Returns a slice of all the values in the set. + /// + /// Computes in **O(1)** time. + pub fn as_slice(&self) -> &Slice { + Slice::from_slice(self.as_entries()) + } + + /// Converts into a boxed slice of all the values in the set. + /// + /// Note that this will drop the inner hash table and any excess capacity. + pub fn into_boxed_slice(self) -> Box> { + Slice::from_boxed(self.into_entries().into_boxed_slice()) + } + /// Get a value by index /// /// Valid indices are *0 <= index < self.len()* @@ -660,6 +777,17 @@ impl IndexSet { self.as_entries().get(index).map(Bucket::key_ref) } + /// Returns a slice of values in the given range of indices. + /// + /// Valid indices are *0 <= index < self.len()* + /// + /// Computes in **O(1)** time. + pub fn get_range>(&self, range: R) -> Option<&Slice> { + let entries = self.as_entries(); + let range = try_simplify_range(range, entries.len())?; + entries.get(range).map(Slice::from_slice) + } + /// Get the first value /// /// Computes in **O(1)** time. @@ -761,141 +889,6 @@ impl Index for IndexSet { } } -/// An owning iterator over the items of a `IndexSet`. -/// -/// This `struct` is created by the [`into_iter`] method on [`IndexSet`] -/// (provided by the `IntoIterator` trait). See its documentation for more. -/// -/// [`IndexSet`]: struct.IndexSet.html -/// [`into_iter`]: struct.IndexSet.html#method.into_iter -pub struct IntoIter { - iter: vec::IntoIter>, -} - -impl Iterator for IntoIter { - type Item = T; - - iterator_methods!(Bucket::key); -} - -impl DoubleEndedIterator for IntoIter { - double_ended_iterator_methods!(Bucket::key); -} - -impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for IntoIter {} - -impl fmt::Debug for IntoIter { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::key_ref); - f.debug_list().entries(iter).finish() - } -} - -/// An iterator over the items of a `IndexSet`. -/// -/// This `struct` is created by the [`iter`] method on [`IndexSet`]. -/// See its documentation for more. -/// -/// [`IndexSet`]: struct.IndexSet.html -/// [`iter`]: struct.IndexSet.html#method.iter -pub struct Iter<'a, T> { - iter: slice::Iter<'a, Bucket>, -} - -impl<'a, T> Iterator for Iter<'a, T> { - type Item = &'a T; - - iterator_methods!(Bucket::key_ref); -} - -impl DoubleEndedIterator for Iter<'_, T> { - double_ended_iterator_methods!(Bucket::key_ref); -} - -impl ExactSizeIterator for Iter<'_, T> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for Iter<'_, T> {} - -impl Clone for Iter<'_, T> { - fn clone(&self) -> Self { - Iter { - iter: self.iter.clone(), - } - } -} - -impl fmt::Debug for Iter<'_, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - -/// A draining iterator over the items of a `IndexSet`. -/// -/// This `struct` is created by the [`drain`] method on [`IndexSet`]. -/// See its documentation for more. -/// -/// [`IndexSet`]: struct.IndexSet.html -/// [`drain`]: struct.IndexSet.html#method.drain -pub struct Drain<'a, T> { - iter: vec::Drain<'a, Bucket>, -} - -impl Iterator for Drain<'_, T> { - type Item = T; - - iterator_methods!(Bucket::key); -} - -impl DoubleEndedIterator for Drain<'_, T> { - double_ended_iterator_methods!(Bucket::key); -} - -impl ExactSizeIterator for Drain<'_, T> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl FusedIterator for Drain<'_, T> {} - -impl fmt::Debug for Drain<'_, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.iter.as_slice().iter().map(Bucket::key_ref); - f.debug_list().entries(iter).finish() - } -} - -impl<'a, T, S> IntoIterator for &'a IndexSet { - type Item = &'a T; - type IntoIter = Iter<'a, T>; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -impl IntoIterator for IndexSet { - type Item = T; - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - IntoIter { - iter: self.into_entries().into_iter(), - } - } -} - impl FromIterator for IndexSet where T: Hash + Eq, @@ -909,7 +902,8 @@ where } } -#[cfg(has_std)] +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl From<[T; N]> for IndexSet where T: Eq + Hash, @@ -1014,310 +1008,6 @@ where } } -/// A lazy iterator producing elements in the difference of `IndexSet`s. -/// -/// This `struct` is created by the [`difference`] method on [`IndexSet`]. -/// See its documentation for more. -/// -/// [`IndexSet`]: struct.IndexSet.html -/// [`difference`]: struct.IndexSet.html#method.difference -pub struct Difference<'a, T, S> { - iter: Iter<'a, T>, - other: &'a IndexSet, -} - -impl<'a, T, S> Iterator for Difference<'a, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ - type Item = &'a T; - - fn next(&mut self) -> Option { - while let Some(item) = self.iter.next() { - if !self.other.contains(item) { - return Some(item); - } - } - None - } - - fn size_hint(&self) -> (usize, Option) { - (0, self.iter.size_hint().1) - } -} - -impl DoubleEndedIterator for Difference<'_, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ - fn next_back(&mut self) -> Option { - while let Some(item) = self.iter.next_back() { - if !self.other.contains(item) { - return Some(item); - } - } - None - } -} - -impl FusedIterator for Difference<'_, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ -} - -impl Clone for Difference<'_, T, S> { - fn clone(&self) -> Self { - Difference { - iter: self.iter.clone(), - ..*self - } - } -} - -impl fmt::Debug for Difference<'_, T, S> -where - T: fmt::Debug + Eq + Hash, - S: BuildHasher, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - -/// A lazy iterator producing elements in the intersection of `IndexSet`s. -/// -/// This `struct` is created by the [`intersection`] method on [`IndexSet`]. -/// See its documentation for more. -/// -/// [`IndexSet`]: struct.IndexSet.html -/// [`intersection`]: struct.IndexSet.html#method.intersection -pub struct Intersection<'a, T, S> { - iter: Iter<'a, T>, - other: &'a IndexSet, -} - -impl<'a, T, S> Iterator for Intersection<'a, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ - type Item = &'a T; - - fn next(&mut self) -> Option { - while let Some(item) = self.iter.next() { - if self.other.contains(item) { - return Some(item); - } - } - None - } - - fn size_hint(&self) -> (usize, Option) { - (0, self.iter.size_hint().1) - } -} - -impl DoubleEndedIterator for Intersection<'_, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ - fn next_back(&mut self) -> Option { - while let Some(item) = self.iter.next_back() { - if self.other.contains(item) { - return Some(item); - } - } - None - } -} - -impl FusedIterator for Intersection<'_, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ -} - -impl Clone for Intersection<'_, T, S> { - fn clone(&self) -> Self { - Intersection { - iter: self.iter.clone(), - ..*self - } - } -} - -impl fmt::Debug for Intersection<'_, T, S> -where - T: fmt::Debug + Eq + Hash, - S: BuildHasher, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - -/// A lazy iterator producing elements in the symmetric difference of `IndexSet`s. -/// -/// This `struct` is created by the [`symmetric_difference`] method on -/// [`IndexSet`]. See its documentation for more. -/// -/// [`IndexSet`]: struct.IndexSet.html -/// [`symmetric_difference`]: struct.IndexSet.html#method.symmetric_difference -pub struct SymmetricDifference<'a, T, S1, S2> { - iter: Chain, Difference<'a, T, S1>>, -} - -impl<'a, T, S1, S2> Iterator for SymmetricDifference<'a, T, S1, S2> -where - T: Eq + Hash, - S1: BuildHasher, - S2: BuildHasher, -{ - type Item = &'a T; - - fn next(&mut self) -> Option { - self.iter.next() - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } - - fn fold(self, init: B, f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.iter.fold(init, f) - } -} - -impl DoubleEndedIterator for SymmetricDifference<'_, T, S1, S2> -where - T: Eq + Hash, - S1: BuildHasher, - S2: BuildHasher, -{ - fn next_back(&mut self) -> Option { - self.iter.next_back() - } - - fn rfold(self, init: B, f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.iter.rfold(init, f) - } -} - -impl FusedIterator for SymmetricDifference<'_, T, S1, S2> -where - T: Eq + Hash, - S1: BuildHasher, - S2: BuildHasher, -{ -} - -impl Clone for SymmetricDifference<'_, T, S1, S2> { - fn clone(&self) -> Self { - SymmetricDifference { - iter: self.iter.clone(), - } - } -} - -impl fmt::Debug for SymmetricDifference<'_, T, S1, S2> -where - T: fmt::Debug + Eq + Hash, - S1: BuildHasher, - S2: BuildHasher, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - -/// A lazy iterator producing elements in the union of `IndexSet`s. -/// -/// This `struct` is created by the [`union`] method on [`IndexSet`]. -/// See its documentation for more. -/// -/// [`IndexSet`]: struct.IndexSet.html -/// [`union`]: struct.IndexSet.html#method.union -pub struct Union<'a, T, S> { - iter: Chain, Difference<'a, T, S>>, -} - -impl<'a, T, S> Iterator for Union<'a, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ - type Item = &'a T; - - fn next(&mut self) -> Option { - self.iter.next() - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } - - fn fold(self, init: B, f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.iter.fold(init, f) - } -} - -impl DoubleEndedIterator for Union<'_, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ - fn next_back(&mut self) -> Option { - self.iter.next_back() - } - - fn rfold(self, init: B, f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.iter.rfold(init, f) - } -} - -impl FusedIterator for Union<'_, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ -} - -impl Clone for Union<'_, T, S> { - fn clone(&self) -> Self { - Union { - iter: self.iter.clone(), - } - } -} - -impl fmt::Debug for Union<'_, T, S> -where - T: fmt::Debug + Eq + Hash, - S: BuildHasher, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.clone()).finish() - } -} - impl BitAnd<&IndexSet> for &IndexSet where T: Eq + Hash + Clone, @@ -1383,530 +1073,3 @@ where self.difference(other).cloned().collect() } } - -#[cfg(test)] -mod tests { - use super::*; - use std::string::String; - - #[test] - fn it_works() { - let mut set = IndexSet::new(); - assert_eq!(set.is_empty(), true); - set.insert(1); - set.insert(1); - assert_eq!(set.len(), 1); - assert!(set.get(&1).is_some()); - assert_eq!(set.is_empty(), false); - } - - #[test] - fn new() { - let set = IndexSet::::new(); - println!("{:?}", set); - assert_eq!(set.capacity(), 0); - assert_eq!(set.len(), 0); - assert_eq!(set.is_empty(), true); - } - - #[test] - fn insert() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5]; - let not_present = [1, 3, 6, 9, 10]; - let mut set = IndexSet::with_capacity(insert.len()); - - for (i, &elt) in insert.iter().enumerate() { - assert_eq!(set.len(), i); - set.insert(elt); - assert_eq!(set.len(), i + 1); - assert_eq!(set.get(&elt), Some(&elt)); - } - println!("{:?}", set); - - for &elt in ¬_present { - assert!(set.get(&elt).is_none()); - } - } - - #[test] - fn insert_full() { - let insert = vec![9, 2, 7, 1, 4, 6, 13]; - let present = vec![1, 6, 2]; - let mut set = IndexSet::with_capacity(insert.len()); - - for (i, &elt) in insert.iter().enumerate() { - assert_eq!(set.len(), i); - let (index, success) = set.insert_full(elt); - assert!(success); - assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); - assert_eq!(set.len(), i + 1); - } - - let len = set.len(); - for &elt in &present { - let (index, success) = set.insert_full(elt); - assert!(!success); - assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); - assert_eq!(set.len(), len); - } - } - - #[test] - fn insert_2() { - let mut set = IndexSet::with_capacity(16); - - let mut values = vec![]; - values.extend(0..16); - values.extend(if cfg!(miri) { 32..64 } else { 128..267 }); - - for &i in &values { - let old_set = set.clone(); - set.insert(i); - for value in old_set.iter() { - if set.get(value).is_none() { - println!("old_set: {:?}", old_set); - println!("set: {:?}", set); - panic!("did not find {} in set", value); - } - } - } - - for &i in &values { - assert!(set.get(&i).is_some(), "did not find {}", i); - } - } - - #[test] - fn insert_dup() { - let mut elements = vec![0, 2, 4, 6, 8]; - let mut set: IndexSet = elements.drain(..).collect(); - { - let (i, v) = set.get_full(&0).unwrap(); - assert_eq!(set.len(), 5); - assert_eq!(i, 0); - assert_eq!(*v, 0); - } - { - let inserted = set.insert(0); - let (i, v) = set.get_full(&0).unwrap(); - assert_eq!(set.len(), 5); - assert_eq!(inserted, false); - assert_eq!(i, 0); - assert_eq!(*v, 0); - } - } - - #[test] - fn insert_order() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; - let mut set = IndexSet::new(); - - for &elt in &insert { - set.insert(elt); - } - - assert_eq!(set.iter().count(), set.len()); - assert_eq!(set.iter().count(), insert.len()); - for (a, b) in insert.iter().zip(set.iter()) { - assert_eq!(a, b); - } - for (i, v) in (0..insert.len()).zip(set.iter()) { - assert_eq!(set.get_index(i).unwrap(), v); - } - } - - #[test] - fn replace() { - let replace = [0, 4, 2, 12, 8, 7, 11, 5]; - let not_present = [1, 3, 6, 9, 10]; - let mut set = IndexSet::with_capacity(replace.len()); - - for (i, &elt) in replace.iter().enumerate() { - assert_eq!(set.len(), i); - set.replace(elt); - assert_eq!(set.len(), i + 1); - assert_eq!(set.get(&elt), Some(&elt)); - } - println!("{:?}", set); - - for &elt in ¬_present { - assert!(set.get(&elt).is_none()); - } - } - - #[test] - fn replace_full() { - let replace = vec![9, 2, 7, 1, 4, 6, 13]; - let present = vec![1, 6, 2]; - let mut set = IndexSet::with_capacity(replace.len()); - - for (i, &elt) in replace.iter().enumerate() { - assert_eq!(set.len(), i); - let (index, replaced) = set.replace_full(elt); - assert!(replaced.is_none()); - assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); - assert_eq!(set.len(), i + 1); - } - - let len = set.len(); - for &elt in &present { - let (index, replaced) = set.replace_full(elt); - assert_eq!(Some(elt), replaced); - assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); - assert_eq!(set.len(), len); - } - } - - #[test] - fn replace_2() { - let mut set = IndexSet::with_capacity(16); - - let mut values = vec![]; - values.extend(0..16); - values.extend(if cfg!(miri) { 32..64 } else { 128..267 }); - - for &i in &values { - let old_set = set.clone(); - set.replace(i); - for value in old_set.iter() { - if set.get(value).is_none() { - println!("old_set: {:?}", old_set); - println!("set: {:?}", set); - panic!("did not find {} in set", value); - } - } - } - - for &i in &values { - assert!(set.get(&i).is_some(), "did not find {}", i); - } - } - - #[test] - fn replace_dup() { - let mut elements = vec![0, 2, 4, 6, 8]; - let mut set: IndexSet = elements.drain(..).collect(); - { - let (i, v) = set.get_full(&0).unwrap(); - assert_eq!(set.len(), 5); - assert_eq!(i, 0); - assert_eq!(*v, 0); - } - { - let replaced = set.replace(0); - let (i, v) = set.get_full(&0).unwrap(); - assert_eq!(set.len(), 5); - assert_eq!(replaced, Some(0)); - assert_eq!(i, 0); - assert_eq!(*v, 0); - } - } - - #[test] - fn replace_order() { - let replace = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; - let mut set = IndexSet::new(); - - for &elt in &replace { - set.replace(elt); - } - - assert_eq!(set.iter().count(), set.len()); - assert_eq!(set.iter().count(), replace.len()); - for (a, b) in replace.iter().zip(set.iter()) { - assert_eq!(a, b); - } - for (i, v) in (0..replace.len()).zip(set.iter()) { - assert_eq!(set.get_index(i).unwrap(), v); - } - } - - #[test] - fn grow() { - let insert = [0, 4, 2, 12, 8, 7, 11]; - let not_present = [1, 3, 6, 9, 10]; - let mut set = IndexSet::with_capacity(insert.len()); - - for (i, &elt) in insert.iter().enumerate() { - assert_eq!(set.len(), i); - set.insert(elt); - assert_eq!(set.len(), i + 1); - assert_eq!(set.get(&elt), Some(&elt)); - } - - println!("{:?}", set); - for &elt in &insert { - set.insert(elt * 10); - } - for &elt in &insert { - set.insert(elt * 100); - } - for (i, &elt) in insert.iter().cycle().enumerate().take(100) { - set.insert(elt * 100 + i as i32); - } - println!("{:?}", set); - for &elt in ¬_present { - assert!(set.get(&elt).is_none()); - } - } - - #[test] - fn reserve() { - let mut set = IndexSet::::new(); - assert_eq!(set.capacity(), 0); - set.reserve(100); - let capacity = set.capacity(); - assert!(capacity >= 100); - for i in 0..capacity { - assert_eq!(set.len(), i); - set.insert(i); - assert_eq!(set.len(), i + 1); - assert_eq!(set.capacity(), capacity); - assert_eq!(set.get(&i), Some(&i)); - } - set.insert(capacity); - assert_eq!(set.len(), capacity + 1); - assert!(set.capacity() > capacity); - assert_eq!(set.get(&capacity), Some(&capacity)); - } - - #[test] - fn shrink_to_fit() { - let mut set = IndexSet::::new(); - assert_eq!(set.capacity(), 0); - for i in 0..100 { - assert_eq!(set.len(), i); - set.insert(i); - assert_eq!(set.len(), i + 1); - assert!(set.capacity() >= i + 1); - assert_eq!(set.get(&i), Some(&i)); - set.shrink_to_fit(); - assert_eq!(set.len(), i + 1); - assert_eq!(set.capacity(), i + 1); - assert_eq!(set.get(&i), Some(&i)); - } - } - - #[test] - fn remove() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; - let mut set = IndexSet::new(); - - for &elt in &insert { - set.insert(elt); - } - - assert_eq!(set.iter().count(), set.len()); - assert_eq!(set.iter().count(), insert.len()); - for (a, b) in insert.iter().zip(set.iter()) { - assert_eq!(a, b); - } - - let remove_fail = [99, 77]; - let remove = [4, 12, 8, 7]; - - for &value in &remove_fail { - assert!(set.swap_remove_full(&value).is_none()); - } - println!("{:?}", set); - for &value in &remove { - //println!("{:?}", set); - let index = set.get_full(&value).unwrap().0; - assert_eq!(set.swap_remove_full(&value), Some((index, value))); - } - println!("{:?}", set); - - for value in &insert { - assert_eq!(set.get(value).is_some(), !remove.contains(value)); - } - assert_eq!(set.len(), insert.len() - remove.len()); - assert_eq!(set.iter().count(), insert.len() - remove.len()); - } - - #[test] - fn swap_remove_index() { - let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; - let mut set = IndexSet::new(); - - for &elt in &insert { - set.insert(elt); - } - - let mut vector = insert.to_vec(); - let remove_sequence = &[3, 3, 10, 4, 5, 4, 3, 0, 1]; - - // check that the same swap remove sequence on vec and set - // have the same result. - for &rm in remove_sequence { - let out_vec = vector.swap_remove(rm); - let out_set = set.swap_remove_index(rm).unwrap(); - assert_eq!(out_vec, out_set); - } - assert_eq!(vector.len(), set.len()); - for (a, b) in vector.iter().zip(set.iter()) { - assert_eq!(a, b); - } - } - - #[test] - fn partial_eq_and_eq() { - let mut set_a = IndexSet::new(); - set_a.insert(1); - set_a.insert(2); - let mut set_b = set_a.clone(); - assert_eq!(set_a, set_b); - set_b.swap_remove(&1); - assert_ne!(set_a, set_b); - - let set_c: IndexSet<_> = set_b.into_iter().collect(); - assert_ne!(set_a, set_c); - assert_ne!(set_c, set_a); - } - - #[test] - fn extend() { - let mut set = IndexSet::new(); - set.extend(vec![&1, &2, &3, &4]); - set.extend(vec![5, 6]); - assert_eq!(set.into_iter().collect::>(), vec![1, 2, 3, 4, 5, 6]); - } - - #[test] - fn comparisons() { - let set_a: IndexSet<_> = (0..3).collect(); - let set_b: IndexSet<_> = (3..6).collect(); - let set_c: IndexSet<_> = (0..6).collect(); - let set_d: IndexSet<_> = (3..9).collect(); - - assert!(!set_a.is_disjoint(&set_a)); - assert!(set_a.is_subset(&set_a)); - assert!(set_a.is_superset(&set_a)); - - assert!(set_a.is_disjoint(&set_b)); - assert!(set_b.is_disjoint(&set_a)); - assert!(!set_a.is_subset(&set_b)); - assert!(!set_b.is_subset(&set_a)); - assert!(!set_a.is_superset(&set_b)); - assert!(!set_b.is_superset(&set_a)); - - assert!(!set_a.is_disjoint(&set_c)); - assert!(!set_c.is_disjoint(&set_a)); - assert!(set_a.is_subset(&set_c)); - assert!(!set_c.is_subset(&set_a)); - assert!(!set_a.is_superset(&set_c)); - assert!(set_c.is_superset(&set_a)); - - assert!(!set_c.is_disjoint(&set_d)); - assert!(!set_d.is_disjoint(&set_c)); - assert!(!set_c.is_subset(&set_d)); - assert!(!set_d.is_subset(&set_c)); - assert!(!set_c.is_superset(&set_d)); - assert!(!set_d.is_superset(&set_c)); - } - - #[test] - fn iter_comparisons() { - use std::iter::empty; - - fn check<'a, I1, I2>(iter1: I1, iter2: I2) - where - I1: Iterator, - I2: Iterator, - { - assert!(iter1.copied().eq(iter2)); - } - - let set_a: IndexSet<_> = (0..3).collect(); - let set_b: IndexSet<_> = (3..6).collect(); - let set_c: IndexSet<_> = (0..6).collect(); - let set_d: IndexSet<_> = (3..9).rev().collect(); - - check(set_a.difference(&set_a), empty()); - check(set_a.symmetric_difference(&set_a), empty()); - check(set_a.intersection(&set_a), 0..3); - check(set_a.union(&set_a), 0..3); - - check(set_a.difference(&set_b), 0..3); - check(set_b.difference(&set_a), 3..6); - check(set_a.symmetric_difference(&set_b), 0..6); - check(set_b.symmetric_difference(&set_a), (3..6).chain(0..3)); - check(set_a.intersection(&set_b), empty()); - check(set_b.intersection(&set_a), empty()); - check(set_a.union(&set_b), 0..6); - check(set_b.union(&set_a), (3..6).chain(0..3)); - - check(set_a.difference(&set_c), empty()); - check(set_c.difference(&set_a), 3..6); - check(set_a.symmetric_difference(&set_c), 3..6); - check(set_c.symmetric_difference(&set_a), 3..6); - check(set_a.intersection(&set_c), 0..3); - check(set_c.intersection(&set_a), 0..3); - check(set_a.union(&set_c), 0..6); - check(set_c.union(&set_a), 0..6); - - check(set_c.difference(&set_d), 0..3); - check(set_d.difference(&set_c), (6..9).rev()); - check( - set_c.symmetric_difference(&set_d), - (0..3).chain((6..9).rev()), - ); - check(set_d.symmetric_difference(&set_c), (6..9).rev().chain(0..3)); - check(set_c.intersection(&set_d), 3..6); - check(set_d.intersection(&set_c), (3..6).rev()); - check(set_c.union(&set_d), (0..6).chain((6..9).rev())); - check(set_d.union(&set_c), (3..9).rev().chain(0..3)); - } - - #[test] - fn ops() { - let empty = IndexSet::::new(); - let set_a: IndexSet<_> = (0..3).collect(); - let set_b: IndexSet<_> = (3..6).collect(); - let set_c: IndexSet<_> = (0..6).collect(); - let set_d: IndexSet<_> = (3..9).rev().collect(); - - #[allow(clippy::eq_op)] - { - assert_eq!(&set_a & &set_a, set_a); - assert_eq!(&set_a | &set_a, set_a); - assert_eq!(&set_a ^ &set_a, empty); - assert_eq!(&set_a - &set_a, empty); - } - - assert_eq!(&set_a & &set_b, empty); - assert_eq!(&set_b & &set_a, empty); - assert_eq!(&set_a | &set_b, set_c); - assert_eq!(&set_b | &set_a, set_c); - assert_eq!(&set_a ^ &set_b, set_c); - assert_eq!(&set_b ^ &set_a, set_c); - assert_eq!(&set_a - &set_b, set_a); - assert_eq!(&set_b - &set_a, set_b); - - assert_eq!(&set_a & &set_c, set_a); - assert_eq!(&set_c & &set_a, set_a); - assert_eq!(&set_a | &set_c, set_c); - assert_eq!(&set_c | &set_a, set_c); - assert_eq!(&set_a ^ &set_c, set_b); - assert_eq!(&set_c ^ &set_a, set_b); - assert_eq!(&set_a - &set_c, empty); - assert_eq!(&set_c - &set_a, set_b); - - assert_eq!(&set_c & &set_d, set_b); - assert_eq!(&set_d & &set_c, set_b); - assert_eq!(&set_c | &set_d, &set_a | &set_d); - assert_eq!(&set_d | &set_c, &set_a | &set_d); - assert_eq!(&set_c ^ &set_d, &set_a | &(&set_d - &set_b)); - assert_eq!(&set_d ^ &set_c, &set_a | &(&set_d - &set_b)); - assert_eq!(&set_c - &set_d, set_a); - assert_eq!(&set_d - &set_c, &set_d - &set_b); - } - - #[test] - #[cfg(has_std)] - fn from_array() { - let set1 = IndexSet::from([1, 2, 3, 4]); - let set2: IndexSet<_> = [1, 2, 3, 4].into(); - - assert_eq!(set1, set2); - } -} diff --git a/vendor/indexmap/src/set/iter.rs b/vendor/indexmap/src/set/iter.rs new file mode 100644 index 0000000..828756d --- /dev/null +++ b/vendor/indexmap/src/set/iter.rs @@ -0,0 +1,543 @@ +use super::{Bucket, Entries, IndexSet, Slice}; + +use alloc::vec::{self, Vec}; +use core::fmt; +use core::hash::{BuildHasher, Hash}; +use core::iter::{Chain, FusedIterator}; +use core::slice::Iter as SliceIter; + +impl<'a, T, S> IntoIterator for &'a IndexSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl IntoIterator for IndexSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.into_entries()) + } +} + +/// An iterator over the items of a `IndexSet`. +/// +/// This `struct` is created by the [`iter`] method on [`IndexSet`]. +/// See its documentation for more. +/// +/// [`IndexSet`]: struct.IndexSet.html +/// [`iter`]: struct.IndexSet.html#method.iter +pub struct Iter<'a, T> { + iter: SliceIter<'a, Bucket>, +} + +impl<'a, T> Iter<'a, T> { + pub(super) fn new(entries: &'a [Bucket]) -> Self { + Self { + iter: entries.iter(), + } + } + + /// Returns a slice of the remaining entries in the iterator. + pub fn as_slice(&self) -> &'a Slice { + Slice::from_slice(self.iter.as_slice()) + } +} + +impl<'a, T> Iterator for Iter<'a, T> { + type Item = &'a T; + + iterator_methods!(Bucket::key_ref); +} + +impl DoubleEndedIterator for Iter<'_, T> { + double_ended_iterator_methods!(Bucket::key_ref); +} + +impl ExactSizeIterator for Iter<'_, T> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for Iter<'_, T> {} + +impl Clone for Iter<'_, T> { + fn clone(&self) -> Self { + Iter { + iter: self.iter.clone(), + } + } +} + +impl fmt::Debug for Iter<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl Default for Iter<'_, T> { + fn default() -> Self { + Self { iter: [].iter() } + } +} + +/// An owning iterator over the items of a `IndexSet`. +/// +/// This `struct` is created by the [`into_iter`] method on [`IndexSet`] +/// (provided by the `IntoIterator` trait). See its documentation for more. +/// +/// [`IndexSet`]: struct.IndexSet.html +/// [`into_iter`]: struct.IndexSet.html#method.into_iter +pub struct IntoIter { + iter: vec::IntoIter>, +} + +impl IntoIter { + pub(super) fn new(entries: Vec>) -> Self { + Self { + iter: entries.into_iter(), + } + } + + /// Returns a slice of the remaining entries in the iterator. + pub fn as_slice(&self) -> &Slice { + Slice::from_slice(self.iter.as_slice()) + } +} + +impl Iterator for IntoIter { + type Item = T; + + iterator_methods!(Bucket::key); +} + +impl DoubleEndedIterator for IntoIter { + double_ended_iterator_methods!(Bucket::key); +} + +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for IntoIter {} + +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::key_ref); + f.debug_list().entries(iter).finish() + } +} + +impl Default for IntoIter { + fn default() -> Self { + Self { + iter: Vec::new().into_iter(), + } + } +} + +/// A draining iterator over the items of a `IndexSet`. +/// +/// This `struct` is created by the [`drain`] method on [`IndexSet`]. +/// See its documentation for more. +/// +/// [`IndexSet`]: struct.IndexSet.html +/// [`drain`]: struct.IndexSet.html#method.drain +pub struct Drain<'a, T> { + iter: vec::Drain<'a, Bucket>, +} + +impl<'a, T> Drain<'a, T> { + pub(super) fn new(iter: vec::Drain<'a, Bucket>) -> Self { + Self { iter } + } + + /// Returns a slice of the remaining entries in the iterator. + pub fn as_slice(&self) -> &Slice { + Slice::from_slice(self.iter.as_slice()) + } +} + +impl Iterator for Drain<'_, T> { + type Item = T; + + iterator_methods!(Bucket::key); +} + +impl DoubleEndedIterator for Drain<'_, T> { + double_ended_iterator_methods!(Bucket::key); +} + +impl ExactSizeIterator for Drain<'_, T> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for Drain<'_, T> {} + +impl fmt::Debug for Drain<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.iter.as_slice().iter().map(Bucket::key_ref); + f.debug_list().entries(iter).finish() + } +} + +/// A lazy iterator producing elements in the difference of `IndexSet`s. +/// +/// This `struct` is created by the [`difference`] method on [`IndexSet`]. +/// See its documentation for more. +/// +/// [`IndexSet`]: struct.IndexSet.html +/// [`difference`]: struct.IndexSet.html#method.difference +pub struct Difference<'a, T, S> { + iter: Iter<'a, T>, + other: &'a IndexSet, +} + +impl<'a, T, S> Difference<'a, T, S> { + pub(super) fn new(set: &'a IndexSet, other: &'a IndexSet) -> Self { + Self { + iter: set.iter(), + other, + } + } +} + +impl<'a, T, S> Iterator for Difference<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option { + while let Some(item) = self.iter.next() { + if !self.other.contains(item) { + return Some(item); + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + (0, self.iter.size_hint().1) + } +} + +impl DoubleEndedIterator for Difference<'_, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + fn next_back(&mut self) -> Option { + while let Some(item) = self.iter.next_back() { + if !self.other.contains(item) { + return Some(item); + } + } + None + } +} + +impl FusedIterator for Difference<'_, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ +} + +impl Clone for Difference<'_, T, S> { + fn clone(&self) -> Self { + Difference { + iter: self.iter.clone(), + ..*self + } + } +} + +impl fmt::Debug for Difference<'_, T, S> +where + T: fmt::Debug + Eq + Hash, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// A lazy iterator producing elements in the intersection of `IndexSet`s. +/// +/// This `struct` is created by the [`intersection`] method on [`IndexSet`]. +/// See its documentation for more. +/// +/// [`IndexSet`]: struct.IndexSet.html +/// [`intersection`]: struct.IndexSet.html#method.intersection +pub struct Intersection<'a, T, S> { + iter: Iter<'a, T>, + other: &'a IndexSet, +} + +impl<'a, T, S> Intersection<'a, T, S> { + pub(super) fn new(set: &'a IndexSet, other: &'a IndexSet) -> Self { + Self { + iter: set.iter(), + other, + } + } +} + +impl<'a, T, S> Iterator for Intersection<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option { + while let Some(item) = self.iter.next() { + if self.other.contains(item) { + return Some(item); + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + (0, self.iter.size_hint().1) + } +} + +impl DoubleEndedIterator for Intersection<'_, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + fn next_back(&mut self) -> Option { + while let Some(item) = self.iter.next_back() { + if self.other.contains(item) { + return Some(item); + } + } + None + } +} + +impl FusedIterator for Intersection<'_, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ +} + +impl Clone for Intersection<'_, T, S> { + fn clone(&self) -> Self { + Intersection { + iter: self.iter.clone(), + ..*self + } + } +} + +impl fmt::Debug for Intersection<'_, T, S> +where + T: fmt::Debug + Eq + Hash, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// A lazy iterator producing elements in the symmetric difference of `IndexSet`s. +/// +/// This `struct` is created by the [`symmetric_difference`] method on +/// [`IndexSet`]. See its documentation for more. +/// +/// [`IndexSet`]: struct.IndexSet.html +/// [`symmetric_difference`]: struct.IndexSet.html#method.symmetric_difference +pub struct SymmetricDifference<'a, T, S1, S2> { + iter: Chain, Difference<'a, T, S1>>, +} + +impl<'a, T, S1, S2> SymmetricDifference<'a, T, S1, S2> +where + T: Eq + Hash, + S1: BuildHasher, + S2: BuildHasher, +{ + pub(super) fn new(set1: &'a IndexSet, set2: &'a IndexSet) -> Self { + let diff1 = set1.difference(set2); + let diff2 = set2.difference(set1); + Self { + iter: diff1.chain(diff2), + } + } +} + +impl<'a, T, S1, S2> Iterator for SymmetricDifference<'a, T, S1, S2> +where + T: Eq + Hash, + S1: BuildHasher, + S2: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + fn fold(self, init: B, f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } +} + +impl DoubleEndedIterator for SymmetricDifference<'_, T, S1, S2> +where + T: Eq + Hash, + S1: BuildHasher, + S2: BuildHasher, +{ + fn next_back(&mut self) -> Option { + self.iter.next_back() + } + + fn rfold(self, init: B, f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + self.iter.rfold(init, f) + } +} + +impl FusedIterator for SymmetricDifference<'_, T, S1, S2> +where + T: Eq + Hash, + S1: BuildHasher, + S2: BuildHasher, +{ +} + +impl Clone for SymmetricDifference<'_, T, S1, S2> { + fn clone(&self) -> Self { + SymmetricDifference { + iter: self.iter.clone(), + } + } +} + +impl fmt::Debug for SymmetricDifference<'_, T, S1, S2> +where + T: fmt::Debug + Eq + Hash, + S1: BuildHasher, + S2: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// A lazy iterator producing elements in the union of `IndexSet`s. +/// +/// This `struct` is created by the [`union`] method on [`IndexSet`]. +/// See its documentation for more. +/// +/// [`IndexSet`]: struct.IndexSet.html +/// [`union`]: struct.IndexSet.html#method.union +pub struct Union<'a, T, S> { + iter: Chain, Difference<'a, T, S>>, +} + +impl<'a, T, S> Union<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + pub(super) fn new(set1: &'a IndexSet, set2: &'a IndexSet) -> Self + where + S2: BuildHasher, + { + Self { + iter: set1.iter().chain(set2.difference(set1)), + } + } +} + +impl<'a, T, S> Iterator for Union<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + fn fold(self, init: B, f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } +} + +impl DoubleEndedIterator for Union<'_, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + fn next_back(&mut self) -> Option { + self.iter.next_back() + } + + fn rfold(self, init: B, f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + self.iter.rfold(init, f) + } +} + +impl FusedIterator for Union<'_, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ +} + +impl Clone for Union<'_, T, S> { + fn clone(&self) -> Self { + Union { + iter: self.iter.clone(), + } + } +} + +impl fmt::Debug for Union<'_, T, S> +where + T: fmt::Debug + Eq + Hash, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} diff --git a/vendor/indexmap/src/set/slice.rs b/vendor/indexmap/src/set/slice.rs new file mode 100644 index 0000000..251d067 --- /dev/null +++ b/vendor/indexmap/src/set/slice.rs @@ -0,0 +1,341 @@ +use super::{Bucket, Entries, IndexSet, IntoIter, Iter}; +use crate::util::try_simplify_range; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::ops::{self, Bound, Index, RangeBounds}; + +/// A dynamically-sized slice of values in an `IndexSet`. +/// +/// This supports indexed operations much like a `[T]` slice, +/// but not any hashed operations on the values. +/// +/// Unlike `IndexSet`, `Slice` does consider the order for `PartialEq` +/// and `Eq`, and it also implements `PartialOrd`, `Ord`, and `Hash`. +#[repr(transparent)] +pub struct Slice { + pub(crate) entries: [Bucket], +} + +// SAFETY: `Slice` is a transparent wrapper around `[Bucket]`, +// and reference lifetimes are bound together in function signatures. +#[allow(unsafe_code)] +impl Slice { + pub(super) const fn from_slice(entries: &[Bucket]) -> &Self { + unsafe { &*(entries as *const [Bucket] as *const Self) } + } + + pub(super) fn from_boxed(entries: Box<[Bucket]>) -> Box { + unsafe { Box::from_raw(Box::into_raw(entries) as *mut Self) } + } + + fn into_boxed(self: Box) -> Box<[Bucket]> { + unsafe { Box::from_raw(Box::into_raw(self) as *mut [Bucket]) } + } +} + +impl Slice { + pub(crate) fn into_entries(self: Box) -> Vec> { + self.into_boxed().into_vec() + } + + /// Returns an empty slice. + pub const fn new<'a>() -> &'a Self { + Self::from_slice(&[]) + } + + /// Return the number of elements in the set slice. + pub const fn len(&self) -> usize { + self.entries.len() + } + + /// Returns true if the set slice contains no elements. + pub const fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Get a value by index. + /// + /// Valid indices are *0 <= index < self.len()* + pub fn get_index(&self, index: usize) -> Option<&T> { + self.entries.get(index).map(Bucket::key_ref) + } + + /// Returns a slice of values in the given range of indices. + /// + /// Valid indices are *0 <= index < self.len()* + pub fn get_range>(&self, range: R) -> Option<&Self> { + let range = try_simplify_range(range, self.entries.len())?; + self.entries.get(range).map(Self::from_slice) + } + + /// Get the first value. + pub fn first(&self) -> Option<&T> { + self.entries.first().map(Bucket::key_ref) + } + + /// Get the last value. + pub fn last(&self) -> Option<&T> { + self.entries.last().map(Bucket::key_ref) + } + + /// Divides one slice into two at an index. + /// + /// ***Panics*** if `index > len`. + pub fn split_at(&self, index: usize) -> (&Self, &Self) { + let (first, second) = self.entries.split_at(index); + (Self::from_slice(first), Self::from_slice(second)) + } + + /// Returns the first value and the rest of the slice, + /// or `None` if it is empty. + pub fn split_first(&self) -> Option<(&T, &Self)> { + if let [first, rest @ ..] = &self.entries { + Some((&first.key, Self::from_slice(rest))) + } else { + None + } + } + + /// Returns the last value and the rest of the slice, + /// or `None` if it is empty. + pub fn split_last(&self) -> Option<(&T, &Self)> { + if let [rest @ .., last] = &self.entries { + Some((&last.key, Self::from_slice(rest))) + } else { + None + } + } + + /// Return an iterator over the values of the set slice. + pub fn iter(&self) -> Iter<'_, T> { + Iter::new(&self.entries) + } + + /// Search over a sorted set for a value. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search`] for more details. + /// + /// Computes in **O(log(n))** time, which is notably less scalable than looking the value up in + /// the set this is a slice from using [`IndexSet::get_index_of`], but this can also position + /// missing values. + pub fn binary_search(&self, x: &T) -> Result + where + T: Ord, + { + self.binary_search_by(|p| p.cmp(x)) + } + + /// Search over a sorted set with a comparator function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by<'a, F>(&'a self, mut f: F) -> Result + where + F: FnMut(&'a T) -> Ordering, + { + self.entries.binary_search_by(move |a| f(&a.key)) + } + + /// Search over a sorted set with an extraction function. + /// + /// Returns the position where that value is present, or the position where it can be inserted + /// to maintain the sort. See [`slice::binary_search_by_key`] for more details. + /// + /// Computes in **O(log(n))** time. + #[inline] + pub fn binary_search_by_key<'a, B, F>(&'a self, b: &B, mut f: F) -> Result + where + F: FnMut(&'a T) -> B, + B: Ord, + { + self.binary_search_by(|k| f(k).cmp(b)) + } + + /// Returns the index of the partition point of a sorted set according to the given predicate + /// (the index of the first element of the second partition). + /// + /// See [`slice::partition_point`] for more details. + /// + /// Computes in **O(log(n))** time. + #[must_use] + pub fn partition_point

(&self, mut pred: P) -> usize + where + P: FnMut(&T) -> bool, + { + self.entries.partition_point(move |a| pred(&a.key)) + } +} + +impl<'a, T> IntoIterator for &'a Slice { + type IntoIter = Iter<'a, T>; + type Item = &'a T; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl IntoIterator for Box> { + type IntoIter = IntoIter; + type Item = T; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.into_entries()) + } +} + +impl Default for &'_ Slice { + fn default() -> Self { + Slice::from_slice(&[]) + } +} + +impl Default for Box> { + fn default() -> Self { + Slice::from_boxed(Box::default()) + } +} + +impl Clone for Box> { + fn clone(&self) -> Self { + Slice::from_boxed(self.entries.to_vec().into_boxed_slice()) + } +} + +impl From<&Slice> for Box> { + fn from(slice: &Slice) -> Self { + Slice::from_boxed(Box::from(&slice.entries)) + } +} + +impl fmt::Debug for Slice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self).finish() + } +} + +impl PartialEq for Slice { + fn eq(&self, other: &Self) -> bool { + self.len() == other.len() && self.iter().eq(other) + } +} + +impl Eq for Slice {} + +impl PartialOrd for Slice { + fn partial_cmp(&self, other: &Self) -> Option { + self.iter().partial_cmp(other) + } +} + +impl Ord for Slice { + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other) + } +} + +impl Hash for Slice { + fn hash(&self, state: &mut H) { + self.len().hash(state); + for value in self { + value.hash(state); + } + } +} + +impl Index for Slice { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + &self.entries[index].key + } +} + +// We can't have `impl> Index` because that conflicts with `Index`. +// Instead, we repeat the implementations for all the core range types. +macro_rules! impl_index { + ($($range:ty),*) => {$( + impl Index<$range> for IndexSet { + type Output = Slice; + + fn index(&self, range: $range) -> &Self::Output { + Slice::from_slice(&self.as_entries()[range]) + } + } + + impl Index<$range> for Slice { + type Output = Self; + + fn index(&self, range: $range) -> &Self::Output { + Slice::from_slice(&self.entries[range]) + } + } + )*} +} +impl_index!( + ops::Range, + ops::RangeFrom, + ops::RangeFull, + ops::RangeInclusive, + ops::RangeTo, + ops::RangeToInclusive, + (Bound, Bound) +); + +#[cfg(test)] +mod tests { + use super::*; + use alloc::vec::Vec; + + #[test] + fn slice_index() { + fn check(vec_slice: &[i32], set_slice: &Slice, sub_slice: &Slice) { + assert_eq!(set_slice as *const _, sub_slice as *const _); + itertools::assert_equal(vec_slice, set_slice); + } + + let vec: Vec = (0..10).map(|i| i * i).collect(); + let set: IndexSet = vec.iter().cloned().collect(); + let slice = set.as_slice(); + + // RangeFull + check(&vec[..], &set[..], &slice[..]); + + for i in 0usize..10 { + // Index + assert_eq!(vec[i], set[i]); + assert_eq!(vec[i], slice[i]); + + // RangeFrom + check(&vec[i..], &set[i..], &slice[i..]); + + // RangeTo + check(&vec[..i], &set[..i], &slice[..i]); + + // RangeToInclusive + check(&vec[..=i], &set[..=i], &slice[..=i]); + + // (Bound, Bound) + let bounds = (Bound::Excluded(i), Bound::Unbounded); + check(&vec[i + 1..], &set[bounds], &slice[bounds]); + + for j in i..=10 { + // Range + check(&vec[i..j], &set[i..j], &slice[i..j]); + } + + for j in i..10 { + // RangeInclusive + check(&vec[i..=j], &set[i..=j], &slice[i..=j]); + } + } + } +} diff --git a/vendor/indexmap/src/set/tests.rs b/vendor/indexmap/src/set/tests.rs new file mode 100644 index 0000000..d7bb9de --- /dev/null +++ b/vendor/indexmap/src/set/tests.rs @@ -0,0 +1,684 @@ +use super::*; +use std::string::String; + +#[test] +fn it_works() { + let mut set = IndexSet::new(); + assert_eq!(set.is_empty(), true); + set.insert(1); + set.insert(1); + assert_eq!(set.len(), 1); + assert!(set.get(&1).is_some()); + assert_eq!(set.is_empty(), false); +} + +#[test] +fn new() { + let set = IndexSet::::new(); + println!("{:?}", set); + assert_eq!(set.capacity(), 0); + assert_eq!(set.len(), 0); + assert_eq!(set.is_empty(), true); +} + +#[test] +fn insert() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5]; + let not_present = [1, 3, 6, 9, 10]; + let mut set = IndexSet::with_capacity(insert.len()); + + for (i, &elt) in insert.iter().enumerate() { + assert_eq!(set.len(), i); + set.insert(elt); + assert_eq!(set.len(), i + 1); + assert_eq!(set.get(&elt), Some(&elt)); + } + println!("{:?}", set); + + for &elt in ¬_present { + assert!(set.get(&elt).is_none()); + } +} + +#[test] +fn insert_full() { + let insert = vec![9, 2, 7, 1, 4, 6, 13]; + let present = vec![1, 6, 2]; + let mut set = IndexSet::with_capacity(insert.len()); + + for (i, &elt) in insert.iter().enumerate() { + assert_eq!(set.len(), i); + let (index, success) = set.insert_full(elt); + assert!(success); + assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); + assert_eq!(set.len(), i + 1); + } + + let len = set.len(); + for &elt in &present { + let (index, success) = set.insert_full(elt); + assert!(!success); + assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); + assert_eq!(set.len(), len); + } +} + +#[test] +fn insert_2() { + let mut set = IndexSet::with_capacity(16); + + let mut values = vec![]; + values.extend(0..16); + values.extend(if cfg!(miri) { 32..64 } else { 128..267 }); + + for &i in &values { + let old_set = set.clone(); + set.insert(i); + for value in old_set.iter() { + if set.get(value).is_none() { + println!("old_set: {:?}", old_set); + println!("set: {:?}", set); + panic!("did not find {} in set", value); + } + } + } + + for &i in &values { + assert!(set.get(&i).is_some(), "did not find {}", i); + } +} + +#[test] +fn insert_dup() { + let mut elements = vec![0, 2, 4, 6, 8]; + let mut set: IndexSet = elements.drain(..).collect(); + { + let (i, v) = set.get_full(&0).unwrap(); + assert_eq!(set.len(), 5); + assert_eq!(i, 0); + assert_eq!(*v, 0); + } + { + let inserted = set.insert(0); + let (i, v) = set.get_full(&0).unwrap(); + assert_eq!(set.len(), 5); + assert_eq!(inserted, false); + assert_eq!(i, 0); + assert_eq!(*v, 0); + } +} + +#[test] +fn insert_order() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut set = IndexSet::new(); + + for &elt in &insert { + set.insert(elt); + } + + assert_eq!(set.iter().count(), set.len()); + assert_eq!(set.iter().count(), insert.len()); + for (a, b) in insert.iter().zip(set.iter()) { + assert_eq!(a, b); + } + for (i, v) in (0..insert.len()).zip(set.iter()) { + assert_eq!(set.get_index(i).unwrap(), v); + } +} + +#[test] +fn replace() { + let replace = [0, 4, 2, 12, 8, 7, 11, 5]; + let not_present = [1, 3, 6, 9, 10]; + let mut set = IndexSet::with_capacity(replace.len()); + + for (i, &elt) in replace.iter().enumerate() { + assert_eq!(set.len(), i); + set.replace(elt); + assert_eq!(set.len(), i + 1); + assert_eq!(set.get(&elt), Some(&elt)); + } + println!("{:?}", set); + + for &elt in ¬_present { + assert!(set.get(&elt).is_none()); + } +} + +#[test] +fn replace_full() { + let replace = vec![9, 2, 7, 1, 4, 6, 13]; + let present = vec![1, 6, 2]; + let mut set = IndexSet::with_capacity(replace.len()); + + for (i, &elt) in replace.iter().enumerate() { + assert_eq!(set.len(), i); + let (index, replaced) = set.replace_full(elt); + assert!(replaced.is_none()); + assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); + assert_eq!(set.len(), i + 1); + } + + let len = set.len(); + for &elt in &present { + let (index, replaced) = set.replace_full(elt); + assert_eq!(Some(elt), replaced); + assert_eq!(Some(index), set.get_full(&elt).map(|x| x.0)); + assert_eq!(set.len(), len); + } +} + +#[test] +fn replace_2() { + let mut set = IndexSet::with_capacity(16); + + let mut values = vec![]; + values.extend(0..16); + values.extend(if cfg!(miri) { 32..64 } else { 128..267 }); + + for &i in &values { + let old_set = set.clone(); + set.replace(i); + for value in old_set.iter() { + if set.get(value).is_none() { + println!("old_set: {:?}", old_set); + println!("set: {:?}", set); + panic!("did not find {} in set", value); + } + } + } + + for &i in &values { + assert!(set.get(&i).is_some(), "did not find {}", i); + } +} + +#[test] +fn replace_dup() { + let mut elements = vec![0, 2, 4, 6, 8]; + let mut set: IndexSet = elements.drain(..).collect(); + { + let (i, v) = set.get_full(&0).unwrap(); + assert_eq!(set.len(), 5); + assert_eq!(i, 0); + assert_eq!(*v, 0); + } + { + let replaced = set.replace(0); + let (i, v) = set.get_full(&0).unwrap(); + assert_eq!(set.len(), 5); + assert_eq!(replaced, Some(0)); + assert_eq!(i, 0); + assert_eq!(*v, 0); + } +} + +#[test] +fn replace_order() { + let replace = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut set = IndexSet::new(); + + for &elt in &replace { + set.replace(elt); + } + + assert_eq!(set.iter().count(), set.len()); + assert_eq!(set.iter().count(), replace.len()); + for (a, b) in replace.iter().zip(set.iter()) { + assert_eq!(a, b); + } + for (i, v) in (0..replace.len()).zip(set.iter()) { + assert_eq!(set.get_index(i).unwrap(), v); + } +} + +#[test] +fn grow() { + let insert = [0, 4, 2, 12, 8, 7, 11]; + let not_present = [1, 3, 6, 9, 10]; + let mut set = IndexSet::with_capacity(insert.len()); + + for (i, &elt) in insert.iter().enumerate() { + assert_eq!(set.len(), i); + set.insert(elt); + assert_eq!(set.len(), i + 1); + assert_eq!(set.get(&elt), Some(&elt)); + } + + println!("{:?}", set); + for &elt in &insert { + set.insert(elt * 10); + } + for &elt in &insert { + set.insert(elt * 100); + } + for (i, &elt) in insert.iter().cycle().enumerate().take(100) { + set.insert(elt * 100 + i as i32); + } + println!("{:?}", set); + for &elt in ¬_present { + assert!(set.get(&elt).is_none()); + } +} + +#[test] +fn reserve() { + let mut set = IndexSet::::new(); + assert_eq!(set.capacity(), 0); + set.reserve(100); + let capacity = set.capacity(); + assert!(capacity >= 100); + for i in 0..capacity { + assert_eq!(set.len(), i); + set.insert(i); + assert_eq!(set.len(), i + 1); + assert_eq!(set.capacity(), capacity); + assert_eq!(set.get(&i), Some(&i)); + } + set.insert(capacity); + assert_eq!(set.len(), capacity + 1); + assert!(set.capacity() > capacity); + assert_eq!(set.get(&capacity), Some(&capacity)); +} + +#[test] +fn try_reserve() { + let mut set = IndexSet::::new(); + assert_eq!(set.capacity(), 0); + assert_eq!(set.try_reserve(100), Ok(())); + assert!(set.capacity() >= 100); + assert!(set.try_reserve(usize::MAX).is_err()); +} + +#[test] +fn shrink_to_fit() { + let mut set = IndexSet::::new(); + assert_eq!(set.capacity(), 0); + for i in 0..100 { + assert_eq!(set.len(), i); + set.insert(i); + assert_eq!(set.len(), i + 1); + assert!(set.capacity() >= i + 1); + assert_eq!(set.get(&i), Some(&i)); + set.shrink_to_fit(); + assert_eq!(set.len(), i + 1); + assert_eq!(set.capacity(), i + 1); + assert_eq!(set.get(&i), Some(&i)); + } +} + +#[test] +fn remove() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut set = IndexSet::new(); + + for &elt in &insert { + set.insert(elt); + } + + assert_eq!(set.iter().count(), set.len()); + assert_eq!(set.iter().count(), insert.len()); + for (a, b) in insert.iter().zip(set.iter()) { + assert_eq!(a, b); + } + + let remove_fail = [99, 77]; + let remove = [4, 12, 8, 7]; + + for &value in &remove_fail { + assert!(set.swap_remove_full(&value).is_none()); + } + println!("{:?}", set); + for &value in &remove { + //println!("{:?}", set); + let index = set.get_full(&value).unwrap().0; + assert_eq!(set.swap_remove_full(&value), Some((index, value))); + } + println!("{:?}", set); + + for value in &insert { + assert_eq!(set.get(value).is_some(), !remove.contains(value)); + } + assert_eq!(set.len(), insert.len() - remove.len()); + assert_eq!(set.iter().count(), insert.len() - remove.len()); +} + +#[test] +fn swap_remove_index() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut set = IndexSet::new(); + + for &elt in &insert { + set.insert(elt); + } + + let mut vector = insert.to_vec(); + let remove_sequence = &[3, 3, 10, 4, 5, 4, 3, 0, 1]; + + // check that the same swap remove sequence on vec and set + // have the same result. + for &rm in remove_sequence { + let out_vec = vector.swap_remove(rm); + let out_set = set.swap_remove_index(rm).unwrap(); + assert_eq!(out_vec, out_set); + } + assert_eq!(vector.len(), set.len()); + for (a, b) in vector.iter().zip(set.iter()) { + assert_eq!(a, b); + } +} + +#[test] +fn partial_eq_and_eq() { + let mut set_a = IndexSet::new(); + set_a.insert(1); + set_a.insert(2); + let mut set_b = set_a.clone(); + assert_eq!(set_a, set_b); + set_b.swap_remove(&1); + assert_ne!(set_a, set_b); + + let set_c: IndexSet<_> = set_b.into_iter().collect(); + assert_ne!(set_a, set_c); + assert_ne!(set_c, set_a); +} + +#[test] +fn extend() { + let mut set = IndexSet::new(); + set.extend(vec![&1, &2, &3, &4]); + set.extend(vec![5, 6]); + assert_eq!(set.into_iter().collect::>(), vec![1, 2, 3, 4, 5, 6]); +} + +#[test] +fn comparisons() { + let set_a: IndexSet<_> = (0..3).collect(); + let set_b: IndexSet<_> = (3..6).collect(); + let set_c: IndexSet<_> = (0..6).collect(); + let set_d: IndexSet<_> = (3..9).collect(); + + assert!(!set_a.is_disjoint(&set_a)); + assert!(set_a.is_subset(&set_a)); + assert!(set_a.is_superset(&set_a)); + + assert!(set_a.is_disjoint(&set_b)); + assert!(set_b.is_disjoint(&set_a)); + assert!(!set_a.is_subset(&set_b)); + assert!(!set_b.is_subset(&set_a)); + assert!(!set_a.is_superset(&set_b)); + assert!(!set_b.is_superset(&set_a)); + + assert!(!set_a.is_disjoint(&set_c)); + assert!(!set_c.is_disjoint(&set_a)); + assert!(set_a.is_subset(&set_c)); + assert!(!set_c.is_subset(&set_a)); + assert!(!set_a.is_superset(&set_c)); + assert!(set_c.is_superset(&set_a)); + + assert!(!set_c.is_disjoint(&set_d)); + assert!(!set_d.is_disjoint(&set_c)); + assert!(!set_c.is_subset(&set_d)); + assert!(!set_d.is_subset(&set_c)); + assert!(!set_c.is_superset(&set_d)); + assert!(!set_d.is_superset(&set_c)); +} + +#[test] +fn iter_comparisons() { + use std::iter::empty; + + fn check<'a, I1, I2>(iter1: I1, iter2: I2) + where + I1: Iterator, + I2: Iterator, + { + assert!(iter1.copied().eq(iter2)); + } + + let set_a: IndexSet<_> = (0..3).collect(); + let set_b: IndexSet<_> = (3..6).collect(); + let set_c: IndexSet<_> = (0..6).collect(); + let set_d: IndexSet<_> = (3..9).rev().collect(); + + check(set_a.difference(&set_a), empty()); + check(set_a.symmetric_difference(&set_a), empty()); + check(set_a.intersection(&set_a), 0..3); + check(set_a.union(&set_a), 0..3); + + check(set_a.difference(&set_b), 0..3); + check(set_b.difference(&set_a), 3..6); + check(set_a.symmetric_difference(&set_b), 0..6); + check(set_b.symmetric_difference(&set_a), (3..6).chain(0..3)); + check(set_a.intersection(&set_b), empty()); + check(set_b.intersection(&set_a), empty()); + check(set_a.union(&set_b), 0..6); + check(set_b.union(&set_a), (3..6).chain(0..3)); + + check(set_a.difference(&set_c), empty()); + check(set_c.difference(&set_a), 3..6); + check(set_a.symmetric_difference(&set_c), 3..6); + check(set_c.symmetric_difference(&set_a), 3..6); + check(set_a.intersection(&set_c), 0..3); + check(set_c.intersection(&set_a), 0..3); + check(set_a.union(&set_c), 0..6); + check(set_c.union(&set_a), 0..6); + + check(set_c.difference(&set_d), 0..3); + check(set_d.difference(&set_c), (6..9).rev()); + check( + set_c.symmetric_difference(&set_d), + (0..3).chain((6..9).rev()), + ); + check(set_d.symmetric_difference(&set_c), (6..9).rev().chain(0..3)); + check(set_c.intersection(&set_d), 3..6); + check(set_d.intersection(&set_c), (3..6).rev()); + check(set_c.union(&set_d), (0..6).chain((6..9).rev())); + check(set_d.union(&set_c), (3..9).rev().chain(0..3)); +} + +#[test] +fn ops() { + let empty = IndexSet::::new(); + let set_a: IndexSet<_> = (0..3).collect(); + let set_b: IndexSet<_> = (3..6).collect(); + let set_c: IndexSet<_> = (0..6).collect(); + let set_d: IndexSet<_> = (3..9).rev().collect(); + + #[allow(clippy::eq_op)] + { + assert_eq!(&set_a & &set_a, set_a); + assert_eq!(&set_a | &set_a, set_a); + assert_eq!(&set_a ^ &set_a, empty); + assert_eq!(&set_a - &set_a, empty); + } + + assert_eq!(&set_a & &set_b, empty); + assert_eq!(&set_b & &set_a, empty); + assert_eq!(&set_a | &set_b, set_c); + assert_eq!(&set_b | &set_a, set_c); + assert_eq!(&set_a ^ &set_b, set_c); + assert_eq!(&set_b ^ &set_a, set_c); + assert_eq!(&set_a - &set_b, set_a); + assert_eq!(&set_b - &set_a, set_b); + + assert_eq!(&set_a & &set_c, set_a); + assert_eq!(&set_c & &set_a, set_a); + assert_eq!(&set_a | &set_c, set_c); + assert_eq!(&set_c | &set_a, set_c); + assert_eq!(&set_a ^ &set_c, set_b); + assert_eq!(&set_c ^ &set_a, set_b); + assert_eq!(&set_a - &set_c, empty); + assert_eq!(&set_c - &set_a, set_b); + + assert_eq!(&set_c & &set_d, set_b); + assert_eq!(&set_d & &set_c, set_b); + assert_eq!(&set_c | &set_d, &set_a | &set_d); + assert_eq!(&set_d | &set_c, &set_a | &set_d); + assert_eq!(&set_c ^ &set_d, &set_a | &(&set_d - &set_b)); + assert_eq!(&set_d ^ &set_c, &set_a | &(&set_d - &set_b)); + assert_eq!(&set_c - &set_d, set_a); + assert_eq!(&set_d - &set_c, &set_d - &set_b); +} + +#[test] +#[cfg(feature = "std")] +fn from_array() { + let set1 = IndexSet::from([1, 2, 3, 4]); + let set2: IndexSet<_> = [1, 2, 3, 4].into(); + + assert_eq!(set1, set2); +} + +#[test] +fn iter_default() { + struct Item; + fn assert_default() + where + T: Default + Iterator, + { + assert!(T::default().next().is_none()); + } + assert_default::>(); + assert_default::>(); +} + +#[test] +fn test_binary_search_by() { + // adapted from std's test for binary_search + let b: IndexSet = [].into(); + assert_eq!(b.binary_search_by(|x| x.cmp(&5)), Err(0)); + + let b: IndexSet = [4].into(); + assert_eq!(b.binary_search_by(|x| x.cmp(&3)), Err(0)); + assert_eq!(b.binary_search_by(|x| x.cmp(&4)), Ok(0)); + assert_eq!(b.binary_search_by(|x| x.cmp(&5)), Err(1)); + + let b: IndexSet = [1, 2, 4, 6, 8, 9].into(); + assert_eq!(b.binary_search_by(|x| x.cmp(&5)), Err(3)); + assert_eq!(b.binary_search_by(|x| x.cmp(&6)), Ok(3)); + assert_eq!(b.binary_search_by(|x| x.cmp(&7)), Err(4)); + assert_eq!(b.binary_search_by(|x| x.cmp(&8)), Ok(4)); + + let b: IndexSet = [1, 2, 4, 5, 6, 8].into(); + assert_eq!(b.binary_search_by(|x| x.cmp(&9)), Err(6)); + + let b: IndexSet = [1, 2, 4, 6, 7, 8, 9].into(); + assert_eq!(b.binary_search_by(|x| x.cmp(&6)), Ok(3)); + assert_eq!(b.binary_search_by(|x| x.cmp(&5)), Err(3)); + assert_eq!(b.binary_search_by(|x| x.cmp(&8)), Ok(5)); + + let b: IndexSet = [1, 2, 4, 5, 6, 8, 9].into(); + assert_eq!(b.binary_search_by(|x| x.cmp(&7)), Err(5)); + assert_eq!(b.binary_search_by(|x| x.cmp(&0)), Err(0)); + + let b: IndexSet = [1, 3, 3, 3, 7].into(); + assert_eq!(b.binary_search_by(|x| x.cmp(&0)), Err(0)); + assert_eq!(b.binary_search_by(|x| x.cmp(&1)), Ok(0)); + assert_eq!(b.binary_search_by(|x| x.cmp(&2)), Err(1)); + // diff from std as set merges the duplicate keys + assert!(match b.binary_search_by(|x| x.cmp(&3)) { + Ok(1..=2) => true, + _ => false, + }); + assert!(match b.binary_search_by(|x| x.cmp(&3)) { + Ok(1..=2) => true, + _ => false, + }); + assert_eq!(b.binary_search_by(|x| x.cmp(&4)), Err(2)); + assert_eq!(b.binary_search_by(|x| x.cmp(&5)), Err(2)); + assert_eq!(b.binary_search_by(|x| x.cmp(&6)), Err(2)); + assert_eq!(b.binary_search_by(|x| x.cmp(&7)), Ok(2)); + assert_eq!(b.binary_search_by(|x| x.cmp(&8)), Err(3)); +} + +#[test] +fn test_binary_search_by_key() { + // adapted from std's test for binary_search + let b: IndexSet = [].into(); + assert_eq!(b.binary_search_by_key(&5, |&x| x), Err(0)); + + let b: IndexSet = [4].into(); + assert_eq!(b.binary_search_by_key(&3, |&x| x), Err(0)); + assert_eq!(b.binary_search_by_key(&4, |&x| x), Ok(0)); + assert_eq!(b.binary_search_by_key(&5, |&x| x), Err(1)); + + let b: IndexSet = [1, 2, 4, 6, 8, 9].into(); + assert_eq!(b.binary_search_by_key(&5, |&x| x), Err(3)); + assert_eq!(b.binary_search_by_key(&6, |&x| x), Ok(3)); + assert_eq!(b.binary_search_by_key(&7, |&x| x), Err(4)); + assert_eq!(b.binary_search_by_key(&8, |&x| x), Ok(4)); + + let b: IndexSet = [1, 2, 4, 5, 6, 8].into(); + assert_eq!(b.binary_search_by_key(&9, |&x| x), Err(6)); + + let b: IndexSet = [1, 2, 4, 6, 7, 8, 9].into(); + assert_eq!(b.binary_search_by_key(&6, |&x| x), Ok(3)); + assert_eq!(b.binary_search_by_key(&5, |&x| x), Err(3)); + assert_eq!(b.binary_search_by_key(&8, |&x| x), Ok(5)); + + let b: IndexSet = [1, 2, 4, 5, 6, 8, 9].into(); + assert_eq!(b.binary_search_by_key(&7, |&x| x), Err(5)); + assert_eq!(b.binary_search_by_key(&0, |&x| x), Err(0)); + + let b: IndexSet = [1, 3, 3, 3, 7].into(); + assert_eq!(b.binary_search_by_key(&0, |&x| x), Err(0)); + assert_eq!(b.binary_search_by_key(&1, |&x| x), Ok(0)); + assert_eq!(b.binary_search_by_key(&2, |&x| x), Err(1)); + // diff from std as set merges the duplicate keys + assert!(match b.binary_search_by_key(&3, |&x| x) { + Ok(1..=2) => true, + _ => false, + }); + assert!(match b.binary_search_by_key(&3, |&x| x) { + Ok(1..=2) => true, + _ => false, + }); + assert_eq!(b.binary_search_by_key(&4, |&x| x), Err(2)); + assert_eq!(b.binary_search_by_key(&5, |&x| x), Err(2)); + assert_eq!(b.binary_search_by_key(&6, |&x| x), Err(2)); + assert_eq!(b.binary_search_by_key(&7, |&x| x), Ok(2)); + assert_eq!(b.binary_search_by_key(&8, |&x| x), Err(3)); +} + +#[test] +fn test_partition_point() { + // adapted from std's test for partition_point + let b: IndexSet = [].into(); + assert_eq!(b.partition_point(|&x| x < 5), 0); + + let b: IndexSet<_> = [4].into(); + assert_eq!(b.partition_point(|&x| x < 3), 0); + assert_eq!(b.partition_point(|&x| x < 4), 0); + assert_eq!(b.partition_point(|&x| x < 5), 1); + + let b: IndexSet<_> = [1, 2, 4, 6, 8, 9].into(); + assert_eq!(b.partition_point(|&x| x < 5), 3); + assert_eq!(b.partition_point(|&x| x < 6), 3); + assert_eq!(b.partition_point(|&x| x < 7), 4); + assert_eq!(b.partition_point(|&x| x < 8), 4); + + let b: IndexSet<_> = [1, 2, 4, 5, 6, 8].into(); + assert_eq!(b.partition_point(|&x| x < 9), 6); + + let b: IndexSet<_> = [1, 2, 4, 6, 7, 8, 9].into(); + assert_eq!(b.partition_point(|&x| x < 6), 3); + assert_eq!(b.partition_point(|&x| x < 5), 3); + assert_eq!(b.partition_point(|&x| x < 8), 5); + + let b: IndexSet<_> = [1, 2, 4, 5, 6, 8, 9].into(); + assert_eq!(b.partition_point(|&x| x < 7), 5); + assert_eq!(b.partition_point(|&x| x < 0), 0); + + let b: IndexSet<_> = [1, 3, 3, 3, 7].into(); + assert_eq!(b.partition_point(|&x| x < 0), 0); + assert_eq!(b.partition_point(|&x| x < 1), 0); + assert_eq!(b.partition_point(|&x| x < 2), 1); + assert_eq!(b.partition_point(|&x| x < 3), 1); + assert_eq!(b.partition_point(|&x| x < 4), 2); // diff from std as set merges the duplicate keys + assert_eq!(b.partition_point(|&x| x < 5), 2); + assert_eq!(b.partition_point(|&x| x < 6), 2); + assert_eq!(b.partition_point(|&x| x < 7), 2); + assert_eq!(b.partition_point(|&x| x < 8), 3); +} diff --git a/vendor/indexmap/src/util.rs b/vendor/indexmap/src/util.rs index a24dfaf..377ff51 100644 --- a/vendor/indexmap/src/util.rs +++ b/vendor/indexmap/src/util.rs @@ -29,3 +29,25 @@ where } start..end } + +pub(crate) fn try_simplify_range(range: R, len: usize) -> Option> +where + R: RangeBounds, +{ + let start = match range.start_bound() { + Bound::Unbounded => 0, + Bound::Included(&i) if i <= len => i, + Bound::Excluded(&i) if i < len => i + 1, + _ => return None, + }; + let end = match range.end_bound() { + Bound::Unbounded => len, + Bound::Excluded(&i) if i <= len => i, + Bound::Included(&i) if i < len => i + 1, + _ => return None, + }; + if start > end { + return None; + } + Some(start..end) +} diff --git a/vendor/indexmap/tests/quick.rs b/vendor/indexmap/tests/quick.rs index e9d96ac..4142e2d 100644 --- a/vendor/indexmap/tests/quick.rs +++ b/vendor/indexmap/tests/quick.rs @@ -452,6 +452,12 @@ quickcheck_limit! { assert_sorted_by_key(map, |t| t.1); } + fn sort_3(keyvals: Large>) -> () { + let mut map: IndexMap<_, _> = IndexMap::from_iter(keyvals.to_vec()); + map.sort_by_cached_key(|&k, _| std::cmp::Reverse(k)); + assert_sorted_by_key(map, |t| std::cmp::Reverse(t.0)); + } + fn reverse(keyvals: Large>) -> () { let mut map: IndexMap<_, _> = IndexMap::from_iter(keyvals.to_vec()); diff --git a/vendor/memchr/.cargo-checksum.json b/vendor/memchr/.cargo-checksum.json index f496380..4b915f3 100644 --- a/vendor/memchr/.cargo-checksum.json +++ b/vendor/memchr/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"fdeda7d32fa12e4a1589d13c74ae5fd4f1065d0219ba73f8492e28248d84d146","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"51d941627e004588863b137918e908e34c4d599d12e03afd3e489e2bb61e3704","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","build.rs":"5638d9b60d40f44db96767ce32246de42158571364cce92531a85307ac7eda6c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","scripts/make-byte-frequency-table":"21d1ded41fe5a780507bb88e1910d471b4081cc626a48891a408712e45b7b2bf","src/cow.rs":"a23c3b009e5215b5c3ac46627a5dd844235bef0136d76b3fc1eeeb744565c125","src/lib.rs":"9430cd37b13399df8f8c27a752ccdf6422a563e24171d1b4802424f9193a8f37","src/memchr/c.rs":"34f7caf79316f4b03908832fdbd4aff367f2bc30eae291478cc5a0a108ce6e76","src/memchr/fallback.rs":"48764f18b7ff1f00a9ac1c4ed8ec96ad11f7b09b2d062a8ed3fe81160add627d","src/memchr/iter.rs":"61463e7fa22ca8f212c2cbfb882af0c87b0fb1bc6b4676678a4822a581ec1037","src/memchr/mod.rs":"d5bfc881c7c089e1a0825209a4d21c3f792f38c6f16f3bc715d0d539477376b6","src/memchr/naive.rs":"c7453bc99cc4e58eb37cf5a50c88688833e50a270ee1849baefddb8acc0ccd94","src/memchr/x86/avx.rs":"3c2750174ce7ff033daa4096e7961bbee9a2da898068266b27dee22ef8cfddad","src/memchr/x86/mod.rs":"a642d5aefdb7452ead4ab7946b5c6cfb6cc6df636dcd0ebbd6f5e6e1ac8305c0","src/memchr/x86/sse2.rs":"79ede1aba71a655e86eb5873d682c5da26933bffa4fffd7042a2313f18cf4675","src/memchr/x86/sse42.rs":"de4c6f354dbfec170876cddb8d9157b35928f96ed2339a0c5d094cc953a2f52d","src/memmem/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/memmem/genericsimd.rs":"9ce7283db0994438eb6df2bea6ad984e80512b6f643ebae7ae7d82eb5d39fa11","src/memmem/mod.rs":"949fb8e11a23030d59b34fd8c7c196150f133e909a8448705c77a751c436907d","src/memmem/prefilter/fallback.rs":"d32248c41aa09701c2410c52f948bbe009dd1b13a01b444ce0fb8c4b4e404ede","src/memmem/prefilter/genericsimd.rs":"57d5523cf0299b37ef1dd1b351e3d387d5070f2f7ecffc9a9ca66528101ebd3f","src/memmem/prefilter/mod.rs":"ad8b4ac72c025f11d6b641c5fc0888468112758dcdc6bb72b43f932d2005ea4e","src/memmem/prefilter/wasm.rs":"14f684412fca35445a94760a6973d772dfd22d329ebae3b52b525d2a1f3acd63","src/memmem/prefilter/x86/avx.rs":"e344cae36a88b59c07a1c1d395edeb9c636a399e1528ce69b2bc7c94d8d8bb0b","src/memmem/prefilter/x86/mod.rs":"df2d84b23b22574383c281d33671a121b5faf7b1a48dd6f67c3085cd02cd4498","src/memmem/prefilter/x86/sse.rs":"daa648fc2a90d37299803a80d632e8a47a30ce8719d0ac2a2ea2cde3b30b6fef","src/memmem/rabinkarp.rs":"9b44eb092524a51792eba4deaca6c6d3cbc51db98cb548ea4fa7e5d8988cc71a","src/memmem/rarebytes.rs":"571082c71fc3dca5e4304171d41fb3c44e241df6dcd88bac4d7a15b52f9521e0","src/memmem/twoway.rs":"102f8bbb29696d5656cd2f5a1769a3af96d044fb09972881455cfb6424d6b50a","src/memmem/util.rs":"0194d40b912137e2352863af9cc1c0273baf97fdf6b27799628680846c06febd","src/memmem/vector.rs":"96e6f45f8ad11a822c4f18393839225d7f40f898ad657e109ba1b3288af0ef8f","src/memmem/wasm.rs":"87da03c964f054db30cc972d07a74e8902ec1248e2338ecd1dbac430f43fffc2","src/memmem/x86/avx.rs":"de85dbc415603c844baf94fbc92d676a738dd4b99246be468bd5f7be5921b25f","src/memmem/x86/mod.rs":"5012fca41b91caf229278aa221e8dd514ede497fe4938d64562d03fef2fc46e6","src/memmem/x86/sse.rs":"148a40c0952aca8b16d9eb3e724a5b9b60693bc7b2bcc5209bcc43c94faf560a","src/tests/memchr/iter.rs":"b68c7ecdb6222c5dbf61212e6863f78f98ad343868a74cb8612692fc790240b2","src/tests/memchr/memchr.rs":"09589c5899324c9b26ea4513c80389a2ffdf6ddc460031e2ca8da43bd493ae3f","src/tests/memchr/mod.rs":"29e0855f946c7babf603b3d610a29235a56a26a4c867fef0768542388eac4c95","src/tests/memchr/simple.rs":"b9997903ede972272c01c1750522a20692a28488cc7c5cf745ea83ff96d65fe3","src/tests/memchr/testdata.rs":"3e34377fe60eca3687d1ebc66127bd631af27ceaccc8f08806a293199b69a83f","src/tests/mod.rs":"9054a2a2f9af140f305ca29155d942fafbac9fb0874067611adc8a5990546be4","src/tests/x86_64-soft_float.json":"c0e416487fe9b4809534edb7db2a9eff3453dc40d9f1e23362c37f45a77ec717"},"package":"2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"} \ No newline at end of file +{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"8f521805349c8df78da2ccaeec8b48f2f9fa1667887aaf89950ae555cbf6f8b2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"92a74aaffe011bdaa06fbc34a01686a6eba58ca1322e976759417a547fddf734","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/arch/aarch64/memchr.rs":"5bb70f915084e629d940dbc322f5b9096b2e658cf63fea8a2f6e7550412e73a0","src/arch/aarch64/mod.rs":"44cd1a614bd66f1e66fc86c541d3c3b8d3a14a644c13e8bf816df3f555eac2d4","src/arch/aarch64/neon/memchr.rs":"e8c00b8fb2c7e2711832ae3cedefe59f32ebedd7dfa4d0ec6de2a566c979daea","src/arch/aarch64/neon/mod.rs":"eab6d56c2b2354db4ee395f40282cd49f97e2ab853547be5de6e65fbe1b2f634","src/arch/aarch64/neon/packedpair.rs":"fbdfdbfaf7b76b234db261fbe55a55c4479d32cdc65a654d60417c2d1c237849","src/arch/all/memchr.rs":"f6c51e50309f80e749887572297aa5efc19a3c7e5434b4404a101b3e6b152be1","src/arch/all/mod.rs":"05f3fc2b069682eb1545fc6366d167bb620a454365dac8b8dd6cde6cd64de18a","src/arch/all/packedpair/default_rank.rs":"abffd1b5b8b7a3be95c03dd1105b905c246a379854dc56f1e846ea7c4408f2c7","src/arch/all/packedpair/mod.rs":"292b66042c5b5c78bba33db6526aeae6904db803d601fcdd29032b87b3eb3754","src/arch/all/rabinkarp.rs":"236f69c04b90c14c253ae6c8d9b78150b4a56df75bb50af6d63b15145668b7cc","src/arch/all/shiftor.rs":"0d79117f52a1e4795843603a3bb0b45397df4ad5e4184bbc923658dab9dc3b5f","src/arch/all/twoway.rs":"47c97a265bfbafde90a618946643d3e97dfd9a85f01aa4ac758cd4c1573a450d","src/arch/generic/memchr.rs":"88290761bab740878401e914d71866da6501cdcef53d1249ec6fda4c7f9c12ae","src/arch/generic/mod.rs":"1dd75f61e0ea2563b8205a08aaa7b55500130aa331d18b9e9f995724b66c7a39","src/arch/generic/packedpair.rs":"a4a6efb29877ced9cf4c4e5ae9f36a79f019a16b831f2b9424899a1513d458ad","src/arch/mod.rs":"6dbd9e0b1b89fecb9faac5df6edfc87e24607e9099136aa831f3f056b14e22db","src/arch/wasm32/memchr.rs":"bfaaeca702cc32e605a06d5078d26ac59263d3c4eb04f9756e6be5e2850c3d0d","src/arch/wasm32/mod.rs":"a20377aa8fe07d68594879101dc73061e4f51d9c8d812b593b1f376e3c8add79","src/arch/wasm32/simd128/memchr.rs":"bac2c4c43fe710c83a6f2b1118fede043be89dd821d4b532907f129f09fdb5cf","src/arch/wasm32/simd128/mod.rs":"c157b373faedbfd65323be432e25bc411d97aa1b7bc58e76048614c7b2bf3bf6","src/arch/wasm32/simd128/packedpair.rs":"47e7875f1a0b502f3f30ddfd9257ed7ad4568fb7d968b5e6c01ba9e2aab2a459","src/arch/x86_64/avx2/memchr.rs":"576ec0c30f49874f7fd9f6caeb490d56132c0fbbaa4d877b1aa532cafce19323","src/arch/x86_64/avx2/mod.rs":"0033d1b712d0b10f0f273ef9aa8caa53e05e49f4c56a64f39af0b9df97eec584","src/arch/x86_64/avx2/packedpair.rs":"87b69cb4301815906127db4f6370f572c7c5d5dad35c0946c00ad888dbcaec8c","src/arch/x86_64/memchr.rs":"99a1dbe4156d498e6f910d06d3d3b31e7f6d06dff7d13a4c51b33a02b7e2fba9","src/arch/x86_64/mod.rs":"61b2aa876942fd3e78714c2ae21e356c8634545c06995020f443fa50218df027","src/arch/x86_64/sse2/memchr.rs":"68fc3b8f9eddf82192979c3aa11e5141f085cbb993c49c340558719a904679dc","src/arch/x86_64/sse2/mod.rs":"38b70ae52a64ec974dbb91d04d6ca8013d9e06d1fe4af852206bbc2faf1c59aa","src/arch/x86_64/sse2/packedpair.rs":"241ea981d8eea6024769f1c9375f726a9bb9700160c5857781d4befd9f5ef55d","src/cow.rs":"34eddd02cb82cc2d5a2c640891d64efe332dabcc1eea5115764200d8f46b66f7","src/ext.rs":"c472bcc41a7ef48980d976e954e87ef9fdfdfd30ac0199b959cc7e5b9d563ab3","src/lib.rs":"22509be5f0c201773ee26bb21bf5c0491e287c4414c02b7faa6ea5177ce0f32f","src/macros.rs":"3e4b39252bfa471fad384160a43f113ebfec7bec46a85d16f006622881dd2081","src/memchr.rs":"36f1c03304261877cd7f75c7ed8f7daff7a5c570cedce375e38e9b1ca44467f7","src/memmem/mod.rs":"1b0a9d6a681fd0887c677c4fc8d4c8f9719ddde250bdd5ea545365c1a7fb9094","src/memmem/searcher.rs":"69c38fb33d8f1a2a26769a81e514428240c8f8f15cea5302873d90b80391dd89","src/tests/memchr/mod.rs":"269f8e4b4f7f5ea458f27a3c174eb1020ffb2484eeba9464170beb51747df69b","src/tests/memchr/naive.rs":"6a0bee033e5edfb5b1d5769a5fa1c78388f7e9ff7bb91cb67f0ad029289e00e7","src/tests/memchr/prop.rs":"7bf7435087fbf08c5014c216b76575349735590d6b1d0e448921a1dc17bc0ea7","src/tests/mod.rs":"7cec8f809e279310a465c6a7725087970f219a676cc76c83de30c695bb490740","src/tests/packedpair.rs":"b02ec4fbb61a8653cb5f2268c31bc9168b8043347f2abdcc74081acf83b98e15","src/tests/substring/mod.rs":"c7660d10749363ac4687e7da2b5fda60768230425df8ba416c0c28b8d56a5c74","src/tests/substring/naive.rs":"df6f55d165382b8a53762ba4c324926cac13ebc62cde1805f4ce08740b326483","src/tests/substring/prop.rs":"38c15992609b5681a95d838ae6f2933e00a1219f2c971bfba245f96e0729fcdc","src/tests/x86_64-soft_float.json":"c0e416487fe9b4809534edb7db2a9eff3453dc40d9f1e23362c37f45a77ec717","src/vector.rs":"ef823ae8c54053780a0e7aeaee14b6c6ac2aea4567bf701ae8be137806c6d293"},"package":"523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"} \ No newline at end of file diff --git a/vendor/memchr/Cargo.toml b/vendor/memchr/Cargo.toml index 6301952..9e95051 100644 --- a/vendor/memchr/Cargo.toml +++ b/vendor/memchr/Cargo.toml @@ -10,41 +10,50 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" +rust-version = "1.61" name = "memchr" -version = "2.5.0" +version = "2.7.1" authors = [ "Andrew Gallant ", "bluss", ] exclude = [ - "/bench", "/.github", + "/benchmarks", "/fuzz", + "/scripts", + "/tmp", ] -description = "Safe interface to memchr." +description = """ +Provides extremely fast (uses SIMD on x86_64, aarch64 and wasm32) routines for +1, 2 or 3 byte search and single substring search. +""" homepage = "https://github.com/BurntSushi/memchr" documentation = "https://docs.rs/memchr/" readme = "README.md" keywords = [ "memchr", - "char", - "scan", - "strchr", - "string", + "memmem", + "substring", + "find", + "search", ] -license = "Unlicense/MIT" +license = "Unlicense OR MIT" repository = "https://github.com/BurntSushi/memchr" +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] + [profile.bench] -debug = true +debug = 2 [profile.release] -debug = true +debug = 2 [profile.test] opt-level = 3 -debug = true +debug = 2 [lib] name = "memchr" @@ -59,20 +68,22 @@ version = "1.0.0" optional = true package = "rustc-std-workspace-core" -[dependencies.libc] -version = "0.2.18" +[dependencies.log] +version = "0.4.20" optional = true -default-features = false [dev-dependencies.quickcheck] version = "1.0.3" default-features = false [features] +alloc = [] default = ["std"] +libc = [] +logging = ["dep:log"] rustc-dep-of-std = [ "core", "compiler_builtins", ] -std = [] +std = ["alloc"] use_std = ["std"] diff --git a/vendor/memchr/README.md b/vendor/memchr/README.md index 77a7a0f..db00ebb 100644 --- a/vendor/memchr/README.md +++ b/vendor/memchr/README.md @@ -35,30 +35,19 @@ memchr links to the standard library by default, but you can disable the memchr = { version = "2", default-features = false } ``` -On x86 platforms, when the `std` feature is disabled, the SSE2 accelerated -implementations will be used. When `std` is enabled, AVX accelerated +On `x86_64` platforms, when the `std` feature is disabled, the SSE2 accelerated +implementations will be used. When `std` is enabled, AVX2 accelerated implementations will be used if the CPU is determined to support it at runtime. -### Using libc - -`memchr` is a routine that is part of libc, although this crate does not use -libc by default. Instead, it uses its own routines, which are either vectorized -or generic fallback routines. In general, these should be competitive with -what's in libc, although this has not been tested for all architectures. If -using `memchr` from libc is desirable and a vectorized routine is not otherwise -available in this crate, then enabling the `libc` feature will use libc's -version of `memchr`. - -The rest of the functions in this crate, e.g., `memchr2` or `memrchr3` and the -substring search routines, will always use the implementations in this crate. -One exception to this is `memrchr`, which is an extension in `libc` found on -Linux. On Linux, `memrchr` is used in precisely the same scenario as `memchr`, -as described above. +SIMD accelerated routines are also available on the `wasm32` and `aarch64` +targets. The `std` feature is not required to use them. +When a SIMD version is not available, then this crate falls back to +[SWAR](https://en.wikipedia.org/wiki/SWAR) techniques. ### Minimum Rust version policy -This crate's minimum supported `rustc` version is `1.41.1`. +This crate's minimum supported `rustc` version is `1.61.0`. The current policy is that the minimum Rust version required to use this crate can be increased in minor version updates. For example, if `crate 1.0` requires @@ -105,3 +94,103 @@ has a few different algorithms to choose from depending on the situation. is used. If possible, a prefilter based on the "Generic SIMD" algorithm linked above is used to find candidates quickly. A dynamic heuristic is used to detect if the prefilter is ineffective, and if so, disables it. + + +### Why is the standard library's substring search so much slower? + +We'll start by establishing what the difference in performance actually +is. There are two relevant benchmark classes to consider: `prebuilt` and +`oneshot`. The `prebuilt` benchmarks are designed to measure---to the extent +possible---search time only. That is, the benchmark first starts by building a +searcher and then only tracking the time for _using_ the searcher: + +``` +$ rebar rank benchmarks/record/x86_64/2023-08-26.csv --intersection -e memchr/memmem/prebuilt -e std/memmem/prebuilt +Engine Version Geometric mean of speed ratios Benchmark count +------ ------- ------------------------------ --------------- +rust/memchr/memmem/prebuilt 2.5.0 1.03 53 +rust/std/memmem/prebuilt 1.73.0-nightly 180dffba1 6.50 53 +``` + +Conversely, the `oneshot` benchmark class measures the time it takes to both +build the searcher _and_ use it: + +``` +$ rebar rank benchmarks/record/x86_64/2023-08-26.csv --intersection -e memchr/memmem/oneshot -e std/memmem/oneshot +Engine Version Geometric mean of speed ratios Benchmark count +------ ------- ------------------------------ --------------- +rust/memchr/memmem/oneshot 2.5.0 1.04 53 +rust/std/memmem/oneshot 1.73.0-nightly 180dffba1 5.26 53 +``` + +**NOTE:** Replace `rebar rank` with `rebar cmp` in the above commands to +explore the specific benchmarks and their differences. + +So in both cases, this crate is quite a bit faster over a broad sampling of +benchmarks regardless of whether you measure only search time or search time +plus construction time. The difference is a little smaller when you include +construction time in your measurements. + +These two different types of benchmark classes make for a nice segue into +one reason why the standard library's substring search can be slower: API +design. In the standard library, the only APIs available to you require +one to re-construct the searcher for every search. While you can benefit +from building a searcher once and iterating over all matches in a single +string, you cannot reuse that searcher to search other strings. This might +come up when, for example, searching a file one line at a time. You'll need +to re-build the searcher for every line searched, and this can [really +matter][burntsushi-bstr-blog]. + +**NOTE:** The `prebuilt` benchmark for the standard library can't actually +avoid measuring searcher construction at some level, because there is no API +for it. Instead, the benchmark consists of building the searcher once and then +finding all matches in a single string via an iterator. This tends to +approximate a benchmark where searcher construction isn't measured, but it +isn't perfect. While this means the comparison is not strictly +apples-to-apples, it does reflect what is maximally possible with the standard +library, and thus reflects the best that one could do in a real world scenario. + +While there is more to the story than just API design here, it's important to +point out that even if the standard library's substring search were a precise +clone of this crate internally, it would still be at a disadvantage in some +workloads because of its API. (The same also applies to C's standard library +`memmem` function. There is no way to amortize construction of the searcher. +You need to pay for it on every call.) + +The other reason for the difference in performance is that +the standard library has trouble using SIMD. In particular, substring search +is implemented in the `core` library, where platform specific code generally +can't exist. That's an issue because in order to utilize SIMD beyond SSE2 +while maintaining portable binaries, one needs to use [dynamic CPU feature +detection][dynamic-cpu], and that in turn requires platform specific code. +While there is [an RFC for enabling target feature detection in +`core`][core-feature], it doesn't yet exist. + +The bottom line here is that `core`'s substring search implementation is +limited to making use of SSE2, but not AVX. + +Still though, this crate does accelerate substring search even when only SSE2 +is available. The standard library could therefore adopt the techniques in this +crate just for SSE2. The reason why that hasn't happened yet isn't totally +clear to me. It likely needs a champion to push it through. The standard +library tends to be more conservative in these things. With that said, the +standard library does use some [SSE2 acceleration on `x86-64`][std-sse2] added +in [this PR][std-sse2-pr]. However, at the time of writing, it is only used +for short needles and doesn't use the frequency based heuristics found in this +crate. + +**NOTE:** Another thing worth mentioning is that the standard library's +substring search routine requires that both the needle and haystack have type +`&str`. Unless you can assume that your data is valid UTF-8, building a `&str` +will come with the overhead of UTF-8 validation. This may in turn result in +overall slower searching depending on your workload. In contrast, the `memchr` +crate permits both the needle and the haystack to have type `&[u8]`, where +`&[u8]` can be created from a `&str` with zero cost. Therefore, the substring +search in this crate is strictly more flexible than what the standard library +provides. + +[burntsushi-bstr-blog]: https://blog.burntsushi.net/bstr/#motivation-based-on-performance +[dynamic-cpu]: https://doc.rust-lang.org/std/arch/index.html#dynamic-cpu-feature-detection +[core-feature]: https://github.com/rust-lang/rfcs/pull/3469 +[std-sse2]: https://github.com/rust-lang/rust/blob/bf9229a2e366b4c311f059014a4aa08af16de5d8/library/core/src/str/pattern.rs#L1719-L1857 +[std-sse2-pr]: https://github.com/rust-lang/rust/pull/103779 diff --git a/vendor/memchr/build.rs b/vendor/memchr/build.rs deleted file mode 100644 index 584a608..0000000 --- a/vendor/memchr/build.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::env; - -fn main() { - enable_simd_optimizations(); - enable_libc(); -} - -// This adds various simd cfgs if this compiler and target support it. -// -// This can be disabled with RUSTFLAGS="--cfg memchr_disable_auto_simd", but -// this is generally only intended for testing. -// -// On targets which don't feature SSE2, this is disabled, as LLVM wouln't know -// how to work with SSE2 operands. Enabling SSE4.2 and AVX on SSE2-only targets -// is not a problem. In that case, the fastest option will be chosen at -// runtime. -fn enable_simd_optimizations() { - if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD") { - return; - } - let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - match &arch[..] { - "x86_64" => { - if !target_has_feature("sse2") { - return; - } - println!("cargo:rustc-cfg=memchr_runtime_simd"); - println!("cargo:rustc-cfg=memchr_runtime_sse2"); - println!("cargo:rustc-cfg=memchr_runtime_sse42"); - println!("cargo:rustc-cfg=memchr_runtime_avx"); - } - "wasm32" | "wasm64" => { - if !target_has_feature("simd128") { - return; - } - println!("cargo:rustc-cfg=memchr_runtime_simd"); - println!("cargo:rustc-cfg=memchr_runtime_wasm128"); - } - _ => {} - } -} - -// This adds a `memchr_libc` cfg if and only if libc can be used, if no other -// better option is available. -// -// This could be performed in the source code, but it's simpler to do it once -// here and consolidate it into one cfg knob. -// -// Basically, we use libc only if its enabled and if we aren't targeting a -// known bad platform. For example, wasm32 doesn't have a libc and the -// performance of memchr on Windows is seemingly worse than the fallback -// implementation. -fn enable_libc() { - const NO_ARCH: &'static [&'static str] = &["wasm32", "windows"]; - const NO_ENV: &'static [&'static str] = &["sgx"]; - - if !is_feature_set("LIBC") { - return; - } - - let arch = match env::var("CARGO_CFG_TARGET_ARCH") { - Err(_) => return, - Ok(arch) => arch, - }; - let env = match env::var("CARGO_CFG_TARGET_ENV") { - Err(_) => return, - Ok(env) => env, - }; - if NO_ARCH.contains(&&*arch) || NO_ENV.contains(&&*env) { - return; - } - - println!("cargo:rustc-cfg=memchr_libc"); -} - -fn is_feature_set(name: &str) -> bool { - is_env_set(&format!("CARGO_FEATURE_{}", name)) -} - -fn is_env_set(name: &str) -> bool { - env::var_os(name).is_some() -} - -fn target_has_feature(feature: &str) -> bool { - env::var("CARGO_CFG_TARGET_FEATURE") - .map(|features| features.contains(feature)) - .unwrap_or(false) -} diff --git a/vendor/memchr/scripts/make-byte-frequency-table b/vendor/memchr/scripts/make-byte-frequency-table deleted file mode 100755 index 37eeca7..0000000 --- a/vendor/memchr/scripts/make-byte-frequency-table +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -# This does simple normalized frequency analysis on UTF-8 encoded text. The -# result of the analysis is translated to a ranked list, where every byte is -# assigned a rank. This list is written to src/freqs.rs. -# -# Currently, the frequencies are generated from the following corpuses: -# -# * The CIA world fact book -# * The source code of rustc -# * Septuaginta - -from __future__ import absolute_import, division, print_function - -import argparse -from collections import Counter -import sys - -preamble = ''' -// NOTE: The following code was generated by "scripts/frequencies.py", do not -// edit directly -'''.lstrip() - - -def eprint(*args, **kwargs): - kwargs['file'] = sys.stderr - print(*args, **kwargs) - - -def main(): - p = argparse.ArgumentParser() - p.add_argument('corpus', metavar='FILE', nargs='+') - args = p.parse_args() - - # Get frequency counts of each byte. - freqs = Counter() - for i in range(0, 256): - freqs[i] = 0 - - eprint('reading entire corpus into memory') - corpus = [] - for fpath in args.corpus: - corpus.append(open(fpath, 'rb').read()) - - eprint('computing byte frequencies') - for c in corpus: - for byte in c: - freqs[byte] += 1.0 / float(len(c)) - - eprint('writing Rust code') - # Get the rank of each byte. A lower rank => lower relative frequency. - rank = [0] * 256 - for i, (byte, _) in enumerate(freqs.most_common()): - # print(byte) - rank[byte] = 255 - i - - # Forcefully set the highest rank possible for bytes that start multi-byte - # UTF-8 sequences. The idea here is that a continuation byte will be more - # discerning in a homogenous haystack. - for byte in range(0xC0, 0xFF + 1): - rank[byte] = 255 - - # Now write Rust. - olines = ['pub const BYTE_FREQUENCIES: [u8; 256] = ['] - for byte in range(256): - olines.append(' %3d, // %r' % (rank[byte], chr(byte))) - olines.append('];') - - print(preamble) - print('\n'.join(olines)) - - -if __name__ == '__main__': - main() diff --git a/vendor/memchr/src/arch/aarch64/memchr.rs b/vendor/memchr/src/arch/aarch64/memchr.rs new file mode 100644 index 0000000..e0053b2 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/memchr.rs @@ -0,0 +1,137 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines choose the best implementation at compile time. (This is +different from `x86_64` because it is expected that `neon` is almost always +available for `aarch64` targets.) +*/ + +macro_rules! defraw { + ($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{ + #[cfg(target_feature = "neon")] + { + use crate::arch::aarch64::neon::memchr::$ty; + + debug!("chose neon for {}", stringify!($ty)); + debug_assert!($ty::is_available()); + // SAFETY: We know that wasm memchr is always available whenever + // code is compiled for `aarch64` with the `neon` target feature + // enabled. + $ty::new_unchecked($($needles),+).$find($start, $end) + } + #[cfg(not(target_feature = "neon"))] + { + use crate::arch::all::memchr::$ty; + + debug!( + "no neon feature available, using fallback for {}", + stringify!($ty), + ); + $ty::new($($needles),+).$find($start, $end) + } + }} +} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, find_raw, start, end, n1) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, rfind_raw, start, end, n1) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, find_raw, start, end, n1, n2) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, rfind_raw, start, end, n1, n2) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, find_raw, start, end, n1, n2, n3) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, rfind_raw, start, end, n1, n2, n3) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) unsafe fn count_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> usize { + defraw!(One, count_raw, start, end, n1) +} diff --git a/vendor/memchr/src/arch/aarch64/mod.rs b/vendor/memchr/src/arch/aarch64/mod.rs new file mode 100644 index 0000000..7b32912 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/mod.rs @@ -0,0 +1,7 @@ +/*! +Vector algorithms for the `aarch64` target. +*/ + +pub mod neon; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/aarch64/neon/memchr.rs b/vendor/memchr/src/arch/aarch64/neon/memchr.rs new file mode 100644 index 0000000..5fcc762 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/memchr.rs @@ -0,0 +1,1031 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::aarch64::uint8x16_t; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One); + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use neon vector instructions + /// that typically make it quite fast. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that neon is available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to neon vectors and routines without + /// checking that neon is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `neon` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to neon + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "neon")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One(generic::One::new(needle)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Like `count`, but accepts and returns raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::count_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.count_raw_impl(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Execute a count using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn count_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.0.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two); + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use neon vector instructions + /// that typically make it quite fast. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that neon is available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to neon vectors and routines without + /// checking that neon is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `neon` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to neon + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "neon")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two(generic::Two::new(needle1, needle2)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three); + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use neon vector instructions + /// that typically make it quite fast. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that neon is available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to neon vectors and routines without + /// checking that neon is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `neon` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to neon + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "neon")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three(generic::Three::new(needle1, needle2, needle3)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/aarch64/neon/mod.rs b/vendor/memchr/src/arch/aarch64/neon/mod.rs new file mode 100644 index 0000000..ccf9cf8 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `aarch64` target using 128-bit vectors via NEON. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/aarch64/neon/packedpair.rs b/vendor/memchr/src/arch/aarch64/neon/packedpair.rs new file mode 100644 index 0000000..6884882 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/packedpair.rs @@ -0,0 +1,236 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::aarch64::uint8x16_t; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder); + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If neon is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that sse2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to neon vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that neon is available. + #[target_feature(enable = "neon")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let finder = packedpair::Finder::::new(needle, pair); + Finder(finder) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Execute a search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Execute a search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + self.0.find(haystack, needle) + } + + /// Execute a prefilter search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + self.0.find_prefilter(haystack) + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.0.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/all/memchr.rs b/vendor/memchr/src/arch/all/memchr.rs new file mode 100644 index 0000000..435b1be --- /dev/null +++ b/vendor/memchr/src/arch/all/memchr.rs @@ -0,0 +1,996 @@ +/*! +Provides architecture independent implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers +are typically slower than hand-coded vector routines accomplishing the same +task, but are also typically faster than naive scalar code. These routines +effectively work by treating a `usize` as a vector of 8-bit lanes, and thus +achieves some level of data parallelism even without explicit vector support. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use crate::{arch::generic::memchr as generic, ext::Pointer}; + +/// The number of bytes in a single `usize` value. +const USIZE_BYTES: usize = (usize::BITS / 8) as usize; +/// The bits that must be zero for a `*const usize` to be properly aligned. +const USIZE_ALIGN: usize = USIZE_BYTES - 1; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One { + s1: u8, + v1: usize, +} + +impl One { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_BYTES: usize = 2 * USIZE_BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline] + pub fn new(needle: u8) -> One { + One { s1: needle, v1: splat(needle) } + } + + /// A test-only routine so that we can bundle a bunch of quickcheck + /// properties into a single macro. Basically, this provides a constructor + /// that makes it identical to most other memchr implementations, which + /// have fallible constructors. + #[cfg(test)] + pub(crate) fn try_new(needle: u8) -> Option { + Some(One::new(needle)) + } + + /// Return the first occurrence of the needle in the given haystack. If no + /// such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of the needle in the given haystack. If no + /// such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // The start of the search may not be aligned to `*const usize`, + // so we do an unaligned load here. + let chunk = start.cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // And now we start our search at a guaranteed aligned position. + // The first iteration of the loop below will overlap with the the + // unaligned chunk above in cases where the search starts at an + // unaligned offset, but that's okay as we're only here if that + // above didn't find a match. + let mut cur = + start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); + debug_assert!(cur > start); + if len <= One::LOOP_BYTES { + return generic::fwd_byte_by_byte(cur, end, confirm); + } + debug_assert!(end.sub(One::LOOP_BYTES) >= start); + while cur <= end.sub(One::LOOP_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let a = cur.cast::().read(); + let b = cur.add(USIZE_BYTES).cast::().read(); + if self.has_needle(a) || self.has_needle(b) { + break; + } + cur = cur.add(One::LOOP_BYTES); + } + generic::fwd_byte_by_byte(cur, end, confirm) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let chunk = end.sub(USIZE_BYTES).cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); + debug_assert!(start <= cur && cur <= end); + if len <= One::LOOP_BYTES { + return generic::rev_byte_by_byte(start, cur, confirm); + } + while cur >= start.add(One::LOOP_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let a = cur.sub(2 * USIZE_BYTES).cast::().read(); + let b = cur.sub(1 * USIZE_BYTES).cast::().read(); + if self.has_needle(a) || self.has_needle(b) { + break; + } + cur = cur.sub(One::LOOP_BYTES); + } + generic::rev_byte_by_byte(start, cur, confirm) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + // Sadly I couldn't get the SWAR approach to work here, so we just do + // one byte at a time for now. PRs to improve this are welcome. + let mut ptr = start; + let mut count = 0; + while ptr < end { + count += (ptr.read() == self.s1) as usize; + ptr = ptr.offset(1); + } + count + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } + + #[inline(always)] + fn has_needle(&self, chunk: usize) -> bool { + has_zero_byte(self.v1 ^ chunk) + } + + #[inline(always)] + fn confirm(&self, haystack_byte: u8) -> bool { + self.s1 == haystack_byte + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + /// The underlying memchr searcher. + searcher: &'a One, + /// Generic iterator implementation. + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two { + s1: u8, + s2: u8, + v1: usize, + v2: usize, +} + +impl Two { + /// Create a new searcher that finds occurrences of the two needle bytes + /// given. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Two { + Two { + s1: needle1, + s2: needle2, + v1: splat(needle1), + v2: splat(needle2), + } + } + + /// A test-only routine so that we can bundle a bunch of quickcheck + /// properties into a single macro. Basically, this provides a constructor + /// that makes it identical to most other memchr implementations, which + /// have fallible constructors. + #[cfg(test)] + pub(crate) fn try_new(needle1: u8, needle2: u8) -> Option { + Some(Two::new(needle1, needle2)) + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // The start of the search may not be aligned to `*const usize`, + // so we do an unaligned load here. + let chunk = start.cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // And now we start our search at a guaranteed aligned position. + // The first iteration of the loop below will overlap with the the + // unaligned chunk above in cases where the search starts at an + // unaligned offset, but that's okay as we're only here if that + // above didn't find a match. + let mut cur = + start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); + debug_assert!(cur > start); + debug_assert!(end.sub(USIZE_BYTES) >= start); + while cur <= end.sub(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.add(USIZE_BYTES); + } + generic::fwd_byte_by_byte(cur, end, confirm) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let chunk = end.sub(USIZE_BYTES).cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); + debug_assert!(start <= cur && cur <= end); + while cur >= start.add(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.sub(USIZE_BYTES).cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.sub(USIZE_BYTES); + } + generic::rev_byte_by_byte(start, cur, confirm) + } + + /// Returns an iterator over all occurrences of one of the needle bytes in + /// the given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } + + #[inline(always)] + fn has_needle(&self, chunk: usize) -> bool { + has_zero_byte(self.v1 ^ chunk) || has_zero_byte(self.v2 ^ chunk) + } + + #[inline(always)] + fn confirm(&self, haystack_byte: u8) -> bool { + self.s1 == haystack_byte || self.s2 == haystack_byte + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + /// The underlying memchr searcher. + searcher: &'a Two, + /// Generic iterator implementation. + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three { + s1: u8, + s2: u8, + s3: u8, + v1: usize, + v2: usize, + v3: usize, +} + +impl Three { + /// Create a new searcher that finds occurrences of the three needle bytes + /// given. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Three { + Three { + s1: needle1, + s2: needle2, + s3: needle3, + v1: splat(needle1), + v2: splat(needle2), + v3: splat(needle3), + } + } + + /// A test-only routine so that we can bundle a bunch of quickcheck + /// properties into a single macro. Basically, this provides a constructor + /// that makes it identical to most other memchr implementations, which + /// have fallible constructors. + #[cfg(test)] + pub(crate) fn try_new( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Option { + Some(Three::new(needle1, needle2, needle3)) + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // The start of the search may not be aligned to `*const usize`, + // so we do an unaligned load here. + let chunk = start.cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // And now we start our search at a guaranteed aligned position. + // The first iteration of the loop below will overlap with the the + // unaligned chunk above in cases where the search starts at an + // unaligned offset, but that's okay as we're only here if that + // above didn't find a match. + let mut cur = + start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); + debug_assert!(cur > start); + debug_assert!(end.sub(USIZE_BYTES) >= start); + while cur <= end.sub(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.add(USIZE_BYTES); + } + generic::fwd_byte_by_byte(cur, end, confirm) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let chunk = end.sub(USIZE_BYTES).cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); + debug_assert!(start <= cur && cur <= end); + while cur >= start.add(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.sub(USIZE_BYTES).cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.sub(USIZE_BYTES); + } + generic::rev_byte_by_byte(start, cur, confirm) + } + + /// Returns an iterator over all occurrences of one of the needle bytes in + /// the given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } + + #[inline(always)] + fn has_needle(&self, chunk: usize) -> bool { + has_zero_byte(self.v1 ^ chunk) + || has_zero_byte(self.v2 ^ chunk) + || has_zero_byte(self.v3 ^ chunk) + } + + #[inline(always)] + fn confirm(&self, haystack_byte: u8) -> bool { + self.s1 == haystack_byte + || self.s2 == haystack_byte + || self.s3 == haystack_byte + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + /// The underlying memchr searcher. + searcher: &'a Three, + /// Generic iterator implementation. + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +/// Return `true` if `x` contains any zero byte. +/// +/// That is, this routine treats `x` as a register of 8-bit lanes and returns +/// true when any of those lanes is `0`. +/// +/// From "Matters Computational" by J. Arndt. +#[inline(always)] +fn has_zero_byte(x: usize) -> bool { + // "The idea is to subtract one from each of the bytes and then look for + // bytes where the borrow propagated all the way to the most significant + // bit." + const LO: usize = splat(0x01); + const HI: usize = splat(0x80); + + (x.wrapping_sub(LO) & !x & HI) != 0 +} + +/// Repeat the given byte into a word size number. That is, every 8 bits +/// is equivalent to the given byte. For example, if `b` is `\x4E` or +/// `01001110` in binary, then the returned value on a 32-bit system would be: +/// `01001110_01001110_01001110_01001110`. +#[inline(always)] +const fn splat(b: u8) -> usize { + // TODO: use `usize::from` once it can be used in const context. + (b as usize) * (usize::MAX / 255) +} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super, try_new); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0]).iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0]).iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0]).iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2).iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2).iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3).iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3).iter(haystack).rev().collect()) + }, + ) + } + + // This was found by quickcheck in the course of refactoring this crate + // after memchr 2.5.0. + #[test] + fn regression_double_ended_iterator() { + let finder = One::new(b'a'); + let haystack = "a"; + let mut it = finder.iter(haystack.as_bytes()); + assert_eq!(Some(0), it.next()); + assert_eq!(None, it.next_back()); + } + + // This regression test was caught by ripgrep's test suite on i686 when + // upgrading to memchr 2.6. Namely, something about the \x0B bytes here + // screws with the SWAR counting approach I was using. This regression test + // prompted me to remove the SWAR counting approach and just replace it + // with a byte-at-a-time loop. + #[test] + fn regression_count_new_lines() { + let haystack = "01234567\x0b\n\x0b\n\x0b\n\x0b\nx"; + let count = One::new(b'\n').count(haystack.as_bytes()); + assert_eq!(4, count); + } +} diff --git a/vendor/memchr/src/arch/all/mod.rs b/vendor/memchr/src/arch/all/mod.rs new file mode 100644 index 0000000..559cb75 --- /dev/null +++ b/vendor/memchr/src/arch/all/mod.rs @@ -0,0 +1,234 @@ +/*! +Contains architecture independent routines. + +These routines are often used as a "fallback" implementation when the more +specialized architecture dependent routines are unavailable. +*/ + +pub mod memchr; +pub mod packedpair; +pub mod rabinkarp; +#[cfg(feature = "alloc")] +pub mod shiftor; +pub mod twoway; + +/// Returns true if and only if `needle` is a prefix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { + needle.len() <= haystack.len() + && is_equal(&haystack[..needle.len()], needle) +} + +/// Returns true if and only if `needle` is a suffix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { + needle.len() <= haystack.len() + && is_equal(&haystack[haystack.len() - needle.len()..], needle) +} + +/// Compare corresponding bytes in `x` and `y` for equality. +/// +/// That is, this returns true if and only if `x.len() == y.len()` and +/// `x[i] == y[i]` for all `0 <= i < x.len()`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +#[inline(always)] +pub fn is_equal(x: &[u8], y: &[u8]) -> bool { + if x.len() != y.len() { + return false; + } + // SAFETY: Our pointers are derived directly from borrowed slices which + // uphold all of our safety guarantees except for length. We account for + // length with the check above. + unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) } +} + +/// Compare `n` bytes at the given pointers for equality. +/// +/// This returns true if and only if `*x.add(i) == *y.add(i)` for all +/// `0 <= i < n`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +/// +/// # Safety +/// +/// * Both `x` and `y` must be valid for reads of up to `n` bytes. +/// * Both `x` and `y` must point to an initialized value. +/// * Both `x` and `y` must each point to an allocated object and +/// must either be in bounds or at most one byte past the end of the +/// allocated object. `x` and `y` do not need to point to the same allocated +/// object, but they may. +/// * Both `x` and `y` must be _derived from_ a pointer to their respective +/// allocated objects. +/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly +/// for `y` and `y+n`. +/// * The distance being in bounds must not rely on "wrapping around" the +/// address space. +#[inline(always)] +pub unsafe fn is_equal_raw( + mut x: *const u8, + mut y: *const u8, + mut n: usize, +) -> bool { + // When we have 4 or more bytes to compare, then proceed in chunks of 4 at + // a time using unaligned loads. + // + // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is + // that this particular version of memcmp is likely to be called with tiny + // needles. That means that if we do 8 byte loads, then a higher proportion + // of memcmp calls will use the slower variant above. With that said, this + // is a hypothesis and is only loosely supported by benchmarks. There's + // likely some improvement that could be made here. The main thing here + // though is to optimize for latency, not throughput. + + // SAFETY: The caller is responsible for ensuring the pointers we get are + // valid and readable for at least `n` bytes. We also do unaligned loads, + // so there's no need to ensure we're aligned. (This is justified by this + // routine being specifically for short strings.) + while n >= 4 { + let vx = x.cast::().read_unaligned(); + let vy = y.cast::().read_unaligned(); + if vx != vy { + return false; + } + x = x.add(4); + y = y.add(4); + n -= 4; + } + // If we don't have enough bytes to do 4-byte at a time loads, then + // do partial loads. Note that I used to have a byte-at-a-time + // loop here and that turned out to be quite a bit slower for the + // memmem/pathological/defeat-simple-vector-alphabet benchmark. + if n >= 2 { + let vx = x.cast::().read_unaligned(); + let vy = y.cast::().read_unaligned(); + if vx != vy { + return false; + } + x = x.add(2); + y = y.add(2); + n -= 2; + } + if n > 0 { + if x.read() != y.read() { + return false; + } + } + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn equals_different_lengths() { + assert!(!is_equal(b"", b"a")); + assert!(!is_equal(b"a", b"")); + assert!(!is_equal(b"ab", b"a")); + assert!(!is_equal(b"a", b"ab")); + } + + #[test] + fn equals_mismatch() { + let one_mismatch = [ + (&b"a"[..], &b"x"[..]), + (&b"ab"[..], &b"ax"[..]), + (&b"abc"[..], &b"abx"[..]), + (&b"abcd"[..], &b"abcx"[..]), + (&b"abcde"[..], &b"abcdx"[..]), + (&b"abcdef"[..], &b"abcdex"[..]), + (&b"abcdefg"[..], &b"abcdefx"[..]), + (&b"abcdefgh"[..], &b"abcdefgx"[..]), + (&b"abcdefghi"[..], &b"abcdefghx"[..]), + (&b"abcdefghij"[..], &b"abcdefghix"[..]), + (&b"abcdefghijk"[..], &b"abcdefghijx"[..]), + (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]), + (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]), + (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]), + ]; + for (x, y) in one_mismatch { + assert_eq!(x.len(), y.len(), "lengths should match"); + assert!(!is_equal(x, y)); + assert!(!is_equal(y, x)); + } + } + + #[test] + fn equals_yes() { + assert!(is_equal(b"", b"")); + assert!(is_equal(b"a", b"a")); + assert!(is_equal(b"ab", b"ab")); + assert!(is_equal(b"abc", b"abc")); + assert!(is_equal(b"abcd", b"abcd")); + assert!(is_equal(b"abcde", b"abcde")); + assert!(is_equal(b"abcdef", b"abcdef")); + assert!(is_equal(b"abcdefg", b"abcdefg")); + assert!(is_equal(b"abcdefgh", b"abcdefgh")); + assert!(is_equal(b"abcdefghi", b"abcdefghi")); + } + + #[test] + fn prefix() { + assert!(is_prefix(b"", b"")); + assert!(is_prefix(b"a", b"")); + assert!(is_prefix(b"ab", b"")); + assert!(is_prefix(b"foo", b"foo")); + assert!(is_prefix(b"foobar", b"foo")); + + assert!(!is_prefix(b"foo", b"fob")); + assert!(!is_prefix(b"foobar", b"fob")); + } + + #[test] + fn suffix() { + assert!(is_suffix(b"", b"")); + assert!(is_suffix(b"a", b"")); + assert!(is_suffix(b"ab", b"")); + assert!(is_suffix(b"foo", b"foo")); + assert!(is_suffix(b"foobar", b"bar")); + + assert!(!is_suffix(b"foo", b"goo")); + assert!(!is_suffix(b"foobar", b"gar")); + } +} diff --git a/vendor/memchr/src/memmem/byte_frequencies.rs b/vendor/memchr/src/arch/all/packedpair/default_rank.rs similarity index 99% rename from vendor/memchr/src/memmem/byte_frequencies.rs rename to vendor/memchr/src/arch/all/packedpair/default_rank.rs index c313b62..6aa3895 100644 --- a/vendor/memchr/src/memmem/byte_frequencies.rs +++ b/vendor/memchr/src/arch/all/packedpair/default_rank.rs @@ -1,4 +1,4 @@ -pub const BYTE_FREQUENCIES: [u8; 256] = [ +pub(crate) const RANK: [u8; 256] = [ 55, // '\x00' 52, // '\x01' 51, // '\x02' diff --git a/vendor/memchr/src/arch/all/packedpair/mod.rs b/vendor/memchr/src/arch/all/packedpair/mod.rs new file mode 100644 index 0000000..148a985 --- /dev/null +++ b/vendor/memchr/src/arch/all/packedpair/mod.rs @@ -0,0 +1,359 @@ +/*! +Provides an architecture independent implementation of the "packed pair" +algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. Note that +this module provides an architecture independent version that doesn't do as +good of a job keeping the search for candidates inside a SIMD hot path. It +however can be good enough in many circumstances. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use crate::memchr; + +mod default_rank; + +/// An architecture independent "packed pair" finder. +/// +/// This finder picks two bytes that it believes have high predictive power for +/// indicating an overall match of a needle. At search time, it reports offsets +/// where the needle could match based on whether the pair of bytes it chose +/// match. +/// +/// This is architecture independent because it utilizes `memchr` to find the +/// occurrence of one of the bytes in the pair, and then checks whether the +/// second byte matches. If it does, in the case of [`Finder::find_prefilter`], +/// the location at which the needle could match is returned. +/// +/// It is generally preferred to use architecture specific routines for a +/// "packed pair" prefilter, but this can be a useful fallback when the +/// architecture independent routines are unavailable. +#[derive(Clone, Copy, Debug)] +pub struct Finder { + pair: Pair, + byte1: u8, + byte2: u8, +} + +impl Finder { + /// Create a new prefilter that reports possible locations where the given + /// needle matches. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new prefilter using the pair given. + /// + /// If the prefilter could not be constructed, then `None` is returned. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + let byte1 = needle[usize::from(pair.index1())]; + let byte2 = needle[usize::from(pair.index2())]; + // Currently this can never fail so we could just return a Finder, + // but it's conceivable this could change. + Some(Finder { pair, byte1, byte2 }) + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + let mut i = 0; + let index1 = usize::from(self.pair.index1()); + let index2 = usize::from(self.pair.index2()); + loop { + // Use a fast vectorized implementation to skip to the next + // occurrence of the rarest byte (heuristically chosen) in the + // needle. + i += memchr(self.byte1, &haystack[i..])?; + let found = i; + i += 1; + + // If we can't align our first byte match with the haystack, then a + // match is impossible. + let aligned1 = match found.checked_sub(index1) { + None => continue, + Some(aligned1) => aligned1, + }; + + // Now align the second byte match with the haystack. A mismatch + // means that a match is impossible. + let aligned2 = match aligned1.checked_add(index2) { + None => continue, + Some(aligned_index2) => aligned_index2, + }; + if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) { + continue; + } + + // We've done what we can. There might be a match here. + return Some(aligned1); + } + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + &self.pair + } +} + +/// A pair of byte offsets into a needle to use as a predicate. +/// +/// This pair is used as a predicate to quickly filter out positions in a +/// haystack in which a needle cannot match. In some cases, this pair can even +/// be used in vector algorithms such that the vector algorithm only switches +/// over to scalar code once this pair has been found. +/// +/// A pair of offsets can be used in both substring search implementations and +/// in prefilters. The former will report matches of a needle in a haystack +/// where as the latter will only report possible matches of a needle. +/// +/// The offsets are limited each to a maximum of 255 to keep memory usage low. +/// Moreover, it's rarely advantageous to create a predicate using offsets +/// greater than 255 anyway. +/// +/// The only guarantee enforced on the pair of offsets is that they are not +/// equivalent. It is not necessarily the case that `index1 < index2` for +/// example. By convention, `index1` corresponds to the byte in the needle +/// that is believed to be most the predictive. Note also that because of the +/// requirement that the indices be both valid for the needle used to build +/// the pair and not equal, it follows that a pair can only be constructed for +/// needles with length at least 2. +#[derive(Clone, Copy, Debug)] +pub struct Pair { + index1: u8, + index2: u8, +} + +impl Pair { + /// Create a new pair of offsets from the given needle. + /// + /// If a pair could not be created (for example, if the needle is too + /// short), then `None` is returned. + /// + /// This chooses the pair in the needle that is believed to be as + /// predictive of an overall match of the needle as possible. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Pair::with_ranker(needle, DefaultFrequencyRank) + } + + /// Create a new pair of offsets from the given needle and ranker. + /// + /// This permits the caller to choose a background frequency distribution + /// with which bytes are selected. The idea is to select a pair of bytes + /// that is believed to strongly predict a match in the haystack. This + /// usually means selecting bytes that occur rarely in a haystack. + /// + /// If a pair could not be created (for example, if the needle is too + /// short), then `None` is returned. + #[inline] + pub fn with_ranker( + needle: &[u8], + ranker: R, + ) -> Option { + if needle.len() <= 1 { + return None; + } + // Find the rarest two bytes. We make them distinct indices by + // construction. (The actual byte value may be the same in degenerate + // cases, but that's OK.) + let (mut rare1, mut index1) = (needle[0], 0); + let (mut rare2, mut index2) = (needle[1], 1); + if ranker.rank(rare2) < ranker.rank(rare1) { + core::mem::swap(&mut rare1, &mut rare2); + core::mem::swap(&mut index1, &mut index2); + } + let max = usize::from(core::u8::MAX); + for (i, &b) in needle.iter().enumerate().take(max).skip(2) { + if ranker.rank(b) < ranker.rank(rare1) { + rare2 = rare1; + index2 = index1; + rare1 = b; + index1 = u8::try_from(i).unwrap(); + } else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) { + rare2 = b; + index2 = u8::try_from(i).unwrap(); + } + } + // While not strictly required for how a Pair is normally used, we + // really don't want these to be equivalent. If they were, it would + // reduce the effectiveness of candidate searching using these rare + // bytes by increasing the rate of false positives. + assert_ne!(index1, index2); + Some(Pair { index1, index2 }) + } + + /// Create a new pair using the offsets given for the needle given. + /// + /// This bypasses any sort of heuristic process for choosing the offsets + /// and permits the caller to choose the offsets themselves. + /// + /// Indices are limited to valid `u8` values so that a `Pair` uses less + /// memory. It is not possible to create a `Pair` with offsets bigger than + /// `u8::MAX`. It's likely that such a thing is not needed, but if it is, + /// it's suggested to build your own bespoke algorithm because you're + /// likely working on a very niche case. (File an issue if this suggestion + /// does not make sense to you.) + /// + /// If a pair could not be created (for example, if the needle is too + /// short), then `None` is returned. + #[inline] + pub fn with_indices( + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option { + // While not strictly required for how a Pair is normally used, we + // really don't want these to be equivalent. If they were, it would + // reduce the effectiveness of candidate searching using these rare + // bytes by increasing the rate of false positives. + if index1 == index2 { + return None; + } + // Similarly, invalid indices means the Pair is invalid too. + if usize::from(index1) >= needle.len() { + return None; + } + if usize::from(index2) >= needle.len() { + return None; + } + Some(Pair { index1, index2 }) + } + + /// Returns the first offset of the pair. + #[inline] + pub fn index1(&self) -> u8 { + self.index1 + } + + /// Returns the second offset of the pair. + #[inline] + pub fn index2(&self) -> u8 { + self.index2 + } +} + +/// This trait allows the user to customize the heuristic used to determine the +/// relative frequency of a given byte in the dataset being searched. +/// +/// The use of this trait can have a dramatic impact on performance depending +/// on the type of data being searched. The details of why are explained in the +/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses +/// a prefilter to quickly identify candidate matches that are later verified +/// more slowly. This prefilter is implemented in terms of trying to find +/// `rare` bytes at specific offsets that will occur less frequently in the +/// dataset. While the concept of a `rare` byte is similar for most datasets, +/// there are some specific datasets (like binary executables) that have +/// dramatically different byte distributions. For these datasets customizing +/// the byte frequency heuristic can have a massive impact on performance, and +/// might even need to be done at runtime. +/// +/// The default implementation of `HeuristicFrequencyRank` reads from the +/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This +/// is optimal for most inputs, so if you are unsure of the impact of using a +/// custom `HeuristicFrequencyRank` you should probably just use the default. +/// +/// # Example +/// +/// ``` +/// use memchr::{ +/// arch::all::packedpair::HeuristicFrequencyRank, +/// memmem::FinderBuilder, +/// }; +/// +/// /// A byte-frequency table that is good for scanning binary executables. +/// struct Binary; +/// +/// impl HeuristicFrequencyRank for Binary { +/// fn rank(&self, byte: u8) -> u8 { +/// const TABLE: [u8; 256] = [ +/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, +/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, +/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, +/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8, +/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, +/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19, +/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, +/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, +/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, +/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, +/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, +/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5, +/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, +/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, +/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, +/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175, +/// ]; +/// TABLE[byte as usize] +/// } +/// } +/// // Create a new finder with the custom heuristic. +/// let finder = FinderBuilder::new() +/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd"); +/// // Find needle with custom heuristic. +/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some()); +/// ``` +pub trait HeuristicFrequencyRank { + /// Return the heuristic frequency rank of the given byte. A lower rank + /// means the byte is believed to occur less frequently in the haystack. + /// + /// Some uses of this heuristic may treat arbitrary absolute rank values as + /// significant. For example, an implementation detail in this crate may + /// determine that heuristic prefilters are inappropriate if every byte in + /// the needle has a "high" rank. + fn rank(&self, byte: u8) -> u8; +} + +/// The default byte frequency heuristic that is good for most haystacks. +pub(crate) struct DefaultFrequencyRank; + +impl HeuristicFrequencyRank for DefaultFrequencyRank { + fn rank(&self, byte: u8) -> u8 { + self::default_rank::RANK[usize::from(byte)] + } +} + +/// This permits passing any implementation of `HeuristicFrequencyRank` as a +/// borrowed version of itself. +impl<'a, R> HeuristicFrequencyRank for &'a R +where + R: HeuristicFrequencyRank, +{ + fn rank(&self, byte: u8) -> u8 { + (**self).rank(byte) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + _index1: u8, + _index2: u8, + ) -> Option> { + // We ignore the index positions requested since it winds up making + // this test too slow overall. + let f = Finder::new(needle)?; + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/all/rabinkarp.rs b/vendor/memchr/src/arch/all/rabinkarp.rs new file mode 100644 index 0000000..e0bafba --- /dev/null +++ b/vendor/memchr/src/arch/all/rabinkarp.rs @@ -0,0 +1,390 @@ +/*! +An implementation of the [Rabin-Karp substring search algorithm][rabinkarp]. + +Rabin-Karp works by creating a hash of the needle provided and then computing +a rolling hash for each needle sized window in the haystack. When the rolling +hash matches the hash of the needle, a byte-wise comparison is done to check +if a match exists. The worst case time complexity of Rabin-Karp is `O(m * +n)` where `m ~ len(needle)` and `n ~ len(haystack)`. Its worst case space +complexity is constant. + +The main utility of Rabin-Karp is that the searcher can be constructed very +quickly with very little memory. This makes it especially useful when searching +for small needles in small haystacks, as it might finish its search before a +beefier algorithm (like Two-Way) even starts. + +[rabinkarp]: https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm +*/ + +/* +(This was the comment I wrote for this module originally when it was not +exposed. The comment still looks useful, but it's a bit in the weeds, so it's +not public itself.) + +This module implements the classical Rabin-Karp substring search algorithm, +with no extra frills. While its use would seem to break our time complexity +guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only +ever use RK on a constant subset of haystacks. The main point here is that +RK has good latency properties for small needles/haystacks. It's very quick +to compute a needle hash and zip through the haystack when compared to +initializing Two-Way, for example. And this is especially useful for cases +where the haystack is just too short for vector instructions to do much good. + +The hashing function used here is the same one recommended by ESMAJ. + +Another choice instead of Rabin-Karp would be Shift-Or. But its latency +isn't quite as good since its preprocessing time is a bit more expensive +(both in practice and in theory). However, perhaps Shift-Or has a place +somewhere else for short patterns. I think the main problem is that it +requires space proportional to the alphabet and the needle. If we, for +example, supported needles up to length 16, then the total table size would be +len(alphabet)*size_of::()==512 bytes. Which isn't exactly small, and it's +probably bad to put that on the stack. So ideally, we'd throw it on the heap, +but we'd really like to write as much code without using alloc/std as possible. +But maybe it's worth the special casing. It's a TODO to benchmark. + +Wikipedia has a decent explanation, if a bit heavy on the theory: +https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm + +But ESMAJ provides something a bit more concrete: +http://www-igm.univ-mlv.fr/~lecroq/string/node5.html + +Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: +https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs +*/ + +use crate::ext::Pointer; + +/// A forward substring searcher using the Rabin-Karp algorithm. +/// +/// Note that, as a lower level API, a `Finder` does not have access to the +/// needle it was constructed with. For this reason, executing a search +/// with a `Finder` requires passing both the needle and the haystack, +/// where the needle is exactly equivalent to the one given to the `Finder` +/// at construction time. This design was chosen so that callers can have +/// more precise control over where and how many times a needle is stored. +/// For example, in cases where Rabin-Karp is just one of several possible +/// substring search algorithms. +#[derive(Clone, Debug)] +pub struct Finder { + /// The actual hash. + hash: Hash, + /// The factor needed to multiply a byte by in order to subtract it from + /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), + /// where n is the length of the needle. This is how we "remove" a byte + /// from the hash once the hash window rolls past it. + hash_2pow: u32, +} + +impl Finder { + /// Create a new Rabin-Karp forward searcher for the given `needle`. + /// + /// The needle may be empty. The empty needle matches at every byte offset. + /// + /// Note that callers must pass the same needle to all search calls using + /// this `Finder`. + #[inline] + pub fn new(needle: &[u8]) -> Finder { + let mut s = Finder { hash: Hash::new(), hash_2pow: 1 }; + let first_byte = match needle.get(0) { + None => return s, + Some(&first_byte) => first_byte, + }; + s.hash.add(first_byte); + for b in needle.iter().copied().skip(1) { + s.hash.add(b); + s.hash_2pow = s.hash_2pow.wrapping_shl(1); + } + s + } + + /// Return the first occurrence of the `needle` in the `haystack` + /// given. If no such occurrence exists, then `None` is returned. + /// + /// The `needle` provided must match the needle given to this finder at + /// construction time. + /// + /// The maximum value this can return is `haystack.len()`, which can only + /// occur when the needle and haystack both have length zero. Otherwise, + /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + unsafe { + let hstart = haystack.as_ptr(); + let hend = hstart.add(haystack.len()); + let nstart = needle.as_ptr(); + let nend = nstart.add(needle.len()); + let found = self.find_raw(hstart, hend, nstart, nend)?; + Some(found.distance(hstart)) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `<= end`. The pointer returned is only ever equivalent + /// to `end` when both the needle and haystack are empty. (That is, the + /// empty string matches the empty string.) + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// Note that `start` and `end` below refer to both pairs of pointers given + /// to this routine. That is, the conditions apply to both `hstart`/`hend` + /// and `nstart`/`nend`. + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// * It must be the case that `start <= end`. + #[inline] + pub unsafe fn find_raw( + &self, + hstart: *const u8, + hend: *const u8, + nstart: *const u8, + nend: *const u8, + ) -> Option<*const u8> { + let hlen = hend.distance(hstart); + let nlen = nend.distance(nstart); + if nlen > hlen { + return None; + } + let mut cur = hstart; + let end = hend.sub(nlen); + let mut hash = Hash::forward(cur, cur.add(nlen)); + loop { + if self.hash == hash && is_equal_raw(cur, nstart, nlen) { + return Some(cur); + } + if cur >= end { + return None; + } + hash.roll(self, cur.read(), cur.add(nlen).read()); + cur = cur.add(1); + } + } +} + +/// A reverse substring searcher using the Rabin-Karp algorithm. +#[derive(Clone, Debug)] +pub struct FinderRev(Finder); + +impl FinderRev { + /// Create a new Rabin-Karp reverse searcher for the given `needle`. + #[inline] + pub fn new(needle: &[u8]) -> FinderRev { + let mut s = FinderRev(Finder { hash: Hash::new(), hash_2pow: 1 }); + let last_byte = match needle.last() { + None => return s, + Some(&last_byte) => last_byte, + }; + s.0.hash.add(last_byte); + for b in needle.iter().rev().copied().skip(1) { + s.0.hash.add(b); + s.0.hash_2pow = s.0.hash_2pow.wrapping_shl(1); + } + s + } + + /// Return the last occurrence of the `needle` in the `haystack` + /// given. If no such occurrence exists, then `None` is returned. + /// + /// The `needle` provided must match the needle given to this finder at + /// construction time. + /// + /// The maximum value this can return is `haystack.len()`, which can only + /// occur when the needle and haystack both have length zero. Otherwise, + /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option { + unsafe { + let hstart = haystack.as_ptr(); + let hend = hstart.add(haystack.len()); + let nstart = needle.as_ptr(); + let nend = nstart.add(needle.len()); + let found = self.rfind_raw(hstart, hend, nstart, nend)?; + Some(found.distance(hstart)) + } + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `<= end`. The pointer returned is only ever equivalent + /// to `end` when both the needle and haystack are empty. (That is, the + /// empty string matches the empty string.) + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// Note that `start` and `end` below refer to both pairs of pointers given + /// to this routine. That is, the conditions apply to both `hstart`/`hend` + /// and `nstart`/`nend`. + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// * It must be the case that `start <= end`. + #[inline] + pub unsafe fn rfind_raw( + &self, + hstart: *const u8, + hend: *const u8, + nstart: *const u8, + nend: *const u8, + ) -> Option<*const u8> { + let hlen = hend.distance(hstart); + let nlen = nend.distance(nstart); + if nlen > hlen { + return None; + } + let mut cur = hend.sub(nlen); + let start = hstart; + let mut hash = Hash::reverse(cur, cur.add(nlen)); + loop { + if self.0.hash == hash && is_equal_raw(cur, nstart, nlen) { + return Some(cur); + } + if cur <= start { + return None; + } + cur = cur.sub(1); + hash.roll(&self.0, cur.add(nlen).read(), cur.read()); + } + } +} + +/// Whether RK is believed to be very fast for the given needle/haystack. +#[inline] +pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { + haystack.len() < 16 +} + +/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash +/// of a rolling window in the haystack. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +struct Hash(u32); + +impl Hash { + /// Create a new hash that represents the empty string. + #[inline(always)] + fn new() -> Hash { + Hash(0) + } + + /// Create a new hash from the bytes given for use in forward searches. + /// + /// # Safety + /// + /// The given pointers must be valid to read from within their range. + #[inline(always)] + unsafe fn forward(mut start: *const u8, end: *const u8) -> Hash { + let mut hash = Hash::new(); + while start < end { + hash.add(start.read()); + start = start.add(1); + } + hash + } + + /// Create a new hash from the bytes given for use in reverse searches. + /// + /// # Safety + /// + /// The given pointers must be valid to read from within their range. + #[inline(always)] + unsafe fn reverse(start: *const u8, mut end: *const u8) -> Hash { + let mut hash = Hash::new(); + while start < end { + end = end.sub(1); + hash.add(end.read()); + } + hash + } + + /// Add 'new' and remove 'old' from this hash. The given needle hash should + /// correspond to the hash computed for the needle being searched for. + /// + /// This is meant to be used when the rolling window of the haystack is + /// advanced. + #[inline(always)] + fn roll(&mut self, finder: &Finder, old: u8, new: u8) { + self.del(finder, old); + self.add(new); + } + + /// Add a byte to this hash. + #[inline(always)] + fn add(&mut self, byte: u8) { + self.0 = self.0.wrapping_shl(1).wrapping_add(u32::from(byte)); + } + + /// Remove a byte from this hash. The given needle hash should correspond + /// to the hash computed for the needle being searched for. + #[inline(always)] + fn del(&mut self, finder: &Finder, byte: u8) { + let factor = finder.hash_2pow; + self.0 = self.0.wrapping_sub(u32::from(byte).wrapping_mul(factor)); + } +} + +/// Returns true when `x[i] == y[i]` for all `0 <= i < n`. +/// +/// We forcefully don't inline this to hint at the compiler that it is unlikely +/// to be called. This causes the inner rabinkarp loop above to be a bit +/// tighter and leads to some performance improvement. See the +/// memmem/krate/prebuilt/sliceslice-words/words benchmark. +/// +/// # Safety +/// +/// Same as `crate::arch::all::is_equal_raw`. +#[cold] +#[inline(never)] +unsafe fn is_equal_raw(x: *const u8, y: *const u8, n: usize) -> bool { + crate::arch::all::is_equal_raw(x, y, n) +} + +#[cfg(test)] +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some( + Finder::new(n).find(h, n) + )); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h, n) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h, n))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h, n))) + .run(); + } +} diff --git a/vendor/memchr/src/arch/all/shiftor.rs b/vendor/memchr/src/arch/all/shiftor.rs new file mode 100644 index 0000000..b690564 --- /dev/null +++ b/vendor/memchr/src/arch/all/shiftor.rs @@ -0,0 +1,89 @@ +/*! +An implementation of the [Shift-Or substring search algorithm][shiftor]. + +[shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm +*/ + +use alloc::boxed::Box; + +/// The type of our mask. +/// +/// While we don't expose anyway to configure this in the public API, if one +/// really needs less memory usage or support for longer needles, then it is +/// suggested to copy the code from this module and modify it to fit your +/// needs. The code below is written to be correct regardless of whether Mask +/// is a u8, u16, u32, u64 or u128. +type Mask = u16; + +/// A forward substring searcher using the Shift-Or algorithm. +#[derive(Debug)] +pub struct Finder { + masks: Box<[Mask; 256]>, + needle_len: usize, +} + +impl Finder { + const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize; + + /// Create a new Shift-Or forward searcher for the given `needle`. + /// + /// The needle may be empty. The empty needle matches at every byte offset. + #[inline] + pub fn new(needle: &[u8]) -> Option { + let needle_len = needle.len(); + if needle_len > Finder::MAX_NEEDLE_LEN { + // A match is found when bit 7 is set in 'result' in the search + // routine below. So our needle can't be bigger than 7. We could + // permit bigger needles by using u16, u32 or u64 for our mask + // entries. But this is all we need for this example. + return None; + } + let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len }; + for (i, &byte) in needle.iter().enumerate() { + searcher.masks[usize::from(byte)] &= !(1 << i); + } + Some(searcher) + } + + /// Return the first occurrence of the needle given to `Finder::new` in + /// the `haystack` given. If no such occurrence exists, then `None` is + /// returned. + /// + /// Unlike most other substring search implementations in this crate, this + /// finder does not require passing the needle at search time. A match can + /// be determined without the needle at all since the required information + /// is already encoded into this finder at construction time. + /// + /// The maximum value this can return is `haystack.len()`, which can only + /// occur when the needle and haystack both have length zero. Otherwise, + /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + if self.needle_len == 0 { + return Some(0); + } + let mut result = !1; + for (i, &byte) in haystack.iter().enumerate() { + result |= self.masks[usize::from(byte)]; + result <<= 1; + if result & (1 << self.needle_len) == 0 { + return Some(i + 1 - self.needle_len); + } + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h))); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n)?.find(h))) + .run(); + } +} diff --git a/vendor/memchr/src/memmem/twoway.rs b/vendor/memchr/src/arch/all/twoway.rs similarity index 79% rename from vendor/memchr/src/memmem/twoway.rs rename to vendor/memchr/src/arch/all/twoway.rs index 7f82ed1..0df3b4a 100644 --- a/vendor/memchr/src/memmem/twoway.rs +++ b/vendor/memchr/src/arch/all/twoway.rs @@ -1,31 +1,62 @@ +/*! +An implementation of the [Two-Way substring search algorithm][two-way]. + +[`Finder`] can be built for forward searches, while [`FinderRev`] can be built +for reverse searches. + +Two-Way makes for a nice general purpose substring search algorithm because of +its time and space complexity properties. It also performs well in practice. +Namely, with `m = len(needle)` and `n = len(haystack)`, Two-Way takes `O(m)` +time to create a finder, `O(1)` space and `O(n)` search time. In other words, +the preprocessing step is quick, doesn't require any heap memory and the worst +case search time is guaranteed to be linear in the haystack regardless of the +size of the needle. + +While vector algorithms will usually beat Two-Way handedly, vector algorithms +also usually have pathological or edge cases that are better handled by Two-Way. +Moreover, not all targets support vector algorithms or implementations for them +simply may not exist yet. + +Two-Way can be found in the `memmem` implementations in at least [GNU libc] and +[musl]. + +[two-way]: https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm +[GNU libc]: https://www.gnu.org/software/libc/ +[musl]: https://www.musl-libc.org/ +*/ + use core::cmp; -use crate::memmem::{prefilter::Pre, util}; +use crate::{ + arch::all::{is_prefix, is_suffix}, + memmem::Pre, +}; -/// Two-Way search in the forward direction. +/// A forward substring searcher that uses the Two-Way algorithm. #[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(TwoWay); +pub struct Finder(TwoWay); -/// Two-Way search in the reverse direction. +/// A reverse substring searcher that uses the Two-Way algorithm. #[derive(Clone, Copy, Debug)] -pub(crate) struct Reverse(TwoWay); +pub struct FinderRev(TwoWay); -/// An implementation of the TwoWay substring search algorithm, with heuristics -/// for accelerating search based on frequency analysis. +/// An implementation of the TwoWay substring search algorithm. /// /// This searcher supports forward and reverse search, although not -/// simultaneously. It runs in O(n + m) time and O(1) space, where +/// simultaneously. It runs in `O(n + m)` time and `O(1)` space, where /// `n ~ len(needle)` and `m ~ len(haystack)`. /// /// The implementation here roughly matches that which was developed by /// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The /// changes in this implementation are 1) the use of zero-based indices, 2) a /// heuristic skip table based on the last byte (borrowed from Rust's standard -/// library) and 3) the addition of heuristics for a fast skip loop. That is, -/// (3) this will detect bytes that are believed to be rare in the needle and -/// use fast vectorized instructions to find their occurrences quickly. The -/// Two-Way algorithm is then used to confirm whether a match at that location -/// occurred. +/// library) and 3) the addition of heuristics for a fast skip loop. For (3), +/// callers can pass any kind of prefilter they want, but usually it's one +/// based on a heuristic that uses an approximate background frequency of bytes +/// to choose rare bytes to quickly look for candidate match positions. Note +/// though that currently, this prefilter functionality is not exposed directly +/// in the public API. (File an issue if you want it and provide a use case +/// please.) /// /// The heuristic for fast skipping is automatically shut off if it's /// detected to be ineffective at search time. Generally, this only occurs in @@ -36,20 +67,20 @@ pub(crate) struct Reverse(TwoWay); /// likely necessary to read the Two-Way paper cited above in order to fully /// grok this code. The essence of it is: /// -/// 1) Do something to detect a "critical" position in the needle. -/// 2) For the current position in the haystack, look if needle[critical..] -/// matches at that position. -/// 3) If so, look if needle[..critical] matches. -/// 4) If a mismatch occurs, shift the search by some amount based on the -/// critical position and a pre-computed shift. +/// 1. Do something to detect a "critical" position in the needle. +/// 2. For the current position in the haystack, look if `needle[critical..]` +/// matches at that position. +/// 3. If so, look if `needle[..critical]` matches. +/// 4. If a mismatch occurs, shift the search by some amount based on the +/// critical position and a pre-computed shift. /// -/// This type is wrapped in Forward and Reverse types that expose consistent -/// forward or reverse APIs. +/// This type is wrapped in the forward and reverse finders that expose +/// consistent forward or reverse APIs. #[derive(Clone, Copy, Debug)] struct TwoWay { - /// A small bitset used as a quick prefilter (in addition to the faster - /// SIMD based prefilter). Namely, a bit 'i' is set if and only if b%64==i - /// for any b in the needle. + /// A small bitset used as a quick prefilter (in addition to any prefilter + /// given by the caller). Namely, a bit `i` is set if and only if `b%64==i` + /// for any `b == needle[i]`. /// /// When used as a prefilter, if the last byte at the current candidate /// position is NOT in this set, then we can skip that entire candidate @@ -74,14 +105,13 @@ struct TwoWay { shift: Shift, } -impl Forward { - /// Create a searcher that uses the Two-Way algorithm by searching forwards - /// through any haystack. - pub(crate) fn new(needle: &[u8]) -> Forward { - if needle.is_empty() { - return Forward(TwoWay::empty()); - } - +impl Finder { + /// Create a searcher that finds occurrences of the given `needle`. + /// + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn new(needle: &[u8]) -> Finder { let byteset = ApproximateByteSet::new(needle); let min_suffix = Suffix::forward(needle, SuffixKind::Minimal); let max_suffix = Suffix::forward(needle, SuffixKind::Maximal); @@ -92,27 +122,38 @@ impl Forward { (max_suffix.period, max_suffix.pos) }; let shift = Shift::forward(needle, period_lower_bound, critical_pos); - Forward(TwoWay { byteset, critical_pos, shift }) + Finder(TwoWay { byteset, critical_pos, shift }) } - /// Find the position of the first occurrence of this searcher's needle in - /// the given haystack. If one does not exist, then return None. + /// Returns the first occurrence of `needle` in the given `haystack`, or + /// `None` if no such occurrence could be found. + /// + /// The `needle` given must be the same as the `needle` provided to + /// [`Finder::new`]. /// - /// This accepts prefilter state that is useful when using the same - /// searcher multiple times, such as in an iterator. + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + self.find_with_prefilter(None, haystack, needle) + } + + /// This is like [`Finder::find`], but it accepts a prefilter for + /// accelerating searches. /// - /// Callers must guarantee that the needle is non-empty and its length is - /// <= the haystack's length. + /// Currently this is not exposed in the public API because, at the time + /// of writing, I didn't want to spend time thinking about how to expose + /// the prefilter infrastructure (if at all). If you have a compelling use + /// case for exposing this routine, please create an issue. Do *not* open + /// a PR that just exposes `Pre` and friends. Exporting this routine will + /// require API design. #[inline(always)] - pub(crate) fn find( + pub(crate) fn find_with_prefilter( &self, - pre: Option<&mut Pre<'_>>, + pre: Option>, haystack: &[u8], needle: &[u8], ) -> Option { - debug_assert!(!needle.is_empty(), "needle should not be empty"); - debug_assert!(needle.len() <= haystack.len(), "haystack too short"); - match self.0.shift { Shift::Small { period } => { self.find_small_imp(pre, haystack, needle, period) @@ -123,25 +164,6 @@ impl Forward { } } - /// Like find, but handles the degenerate substring test cases. This is - /// only useful for conveniently testing this substring implementation in - /// isolation. - #[cfg(test)] - fn find_general( - &self, - pre: Option<&mut Pre<'_>>, - haystack: &[u8], - needle: &[u8], - ) -> Option { - if needle.is_empty() { - Some(0) - } else if haystack.len() < needle.len() { - None - } else { - self.find(pre, haystack, needle) - } - } - // Each of the two search implementations below can be accelerated by a // prefilter, but it is not always enabled. To avoid its overhead when // its disabled, we explicitly inline each search implementation based on @@ -151,19 +173,22 @@ impl Forward { #[inline(always)] fn find_small_imp( &self, - mut pre: Option<&mut Pre<'_>>, + mut pre: Option>, haystack: &[u8], needle: &[u8], period: usize, ) -> Option { - let last_byte = needle.len() - 1; let mut pos = 0; let mut shift = 0; + let last_byte_pos = match needle.len().checked_sub(1) { + None => return Some(pos), + Some(last_byte) => last_byte, + }; while pos + needle.len() <= haystack.len() { let mut i = cmp::max(self.0.critical_pos, shift); if let Some(pre) = pre.as_mut() { - if pre.should_call() { - pos += pre.call(&haystack[pos..], needle)?; + if pre.is_effective() { + pos += pre.find(&haystack[pos..])?; shift = 0; i = self.0.critical_pos; if pos + needle.len() > haystack.len() { @@ -171,7 +196,7 @@ impl Forward { } } } - if !self.0.byteset.contains(haystack[pos + last_byte]) { + if !self.0.byteset.contains(haystack[pos + last_byte_pos]) { pos += needle.len(); shift = 0; continue; @@ -200,24 +225,27 @@ impl Forward { #[inline(always)] fn find_large_imp( &self, - mut pre: Option<&mut Pre<'_>>, + mut pre: Option>, haystack: &[u8], needle: &[u8], shift: usize, ) -> Option { - let last_byte = needle.len() - 1; let mut pos = 0; + let last_byte_pos = match needle.len().checked_sub(1) { + None => return Some(pos), + Some(last_byte) => last_byte, + }; 'outer: while pos + needle.len() <= haystack.len() { if let Some(pre) = pre.as_mut() { - if pre.should_call() { - pos += pre.call(&haystack[pos..], needle)?; + if pre.is_effective() { + pos += pre.find(&haystack[pos..])?; if pos + needle.len() > haystack.len() { return None; } } } - if !self.0.byteset.contains(haystack[pos + last_byte]) { + if !self.0.byteset.contains(haystack[pos + last_byte_pos]) { pos += needle.len(); continue; } @@ -241,14 +269,13 @@ impl Forward { } } -impl Reverse { - /// Create a searcher that uses the Two-Way algorithm by searching in - /// reverse through any haystack. - pub(crate) fn new(needle: &[u8]) -> Reverse { - if needle.is_empty() { - return Reverse(TwoWay::empty()); - } - +impl FinderRev { + /// Create a searcher that finds occurrences of the given `needle`. + /// + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn new(needle: &[u8]) -> FinderRev { let byteset = ApproximateByteSet::new(needle); let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal); let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal); @@ -258,27 +285,20 @@ impl Reverse { } else { (max_suffix.period, max_suffix.pos) }; - // let critical_pos = needle.len() - critical_pos; let shift = Shift::reverse(needle, period_lower_bound, critical_pos); - Reverse(TwoWay { byteset, critical_pos, shift }) + FinderRev(TwoWay { byteset, critical_pos, shift }) } - /// Find the position of the last occurrence of this searcher's needle - /// in the given haystack. If one does not exist, then return None. + /// Returns the last occurrence of `needle` in the given `haystack`, or + /// `None` if no such occurrence could be found. /// - /// This will automatically initialize prefilter state. This should only - /// be used for one-off searches. + /// The `needle` given must be the same as the `needle` provided to + /// [`FinderRev::new`]. /// - /// Callers must guarantee that the needle is non-empty and its length is - /// <= the haystack's length. - #[inline(always)] - pub(crate) fn rfind( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - debug_assert!(!needle.is_empty(), "needle should not be empty"); - debug_assert!(needle.len() <= haystack.len(), "haystack too short"); + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option { // For the reverse case, we don't use a prefilter. It's plausible that // perhaps we should, but it's a lot of additional code to do it, and // it's not clear that it's actually worth it. If you have a really @@ -293,20 +313,6 @@ impl Reverse { } } - /// Like rfind, but handles the degenerate substring test cases. This is - /// only useful for conveniently testing this substring implementation in - /// isolation. - #[cfg(test)] - fn rfind_general(&self, haystack: &[u8], needle: &[u8]) -> Option { - if needle.is_empty() { - Some(haystack.len()) - } else if haystack.len() < needle.len() { - None - } else { - self.rfind(haystack, needle) - } - } - #[inline(always)] fn rfind_small_imp( &self, @@ -317,6 +323,10 @@ impl Reverse { let nlen = needle.len(); let mut pos = haystack.len(); let mut shift = nlen; + let first_byte = match needle.get(0) { + None => return Some(pos), + Some(&first_byte) => first_byte, + }; while pos >= nlen { if !self.0.byteset.contains(haystack[pos - nlen]) { pos -= nlen; @@ -327,7 +337,7 @@ impl Reverse { while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { i -= 1; } - if i > 0 || needle[0] != haystack[pos - nlen] { + if i > 0 || first_byte != haystack[pos - nlen] { pos -= self.0.critical_pos - i + 1; shift = nlen; } else { @@ -354,6 +364,10 @@ impl Reverse { ) -> Option { let nlen = needle.len(); let mut pos = haystack.len(); + let first_byte = match needle.get(0) { + None => return Some(pos), + Some(&first_byte) => first_byte, + }; while pos >= nlen { if !self.0.byteset.contains(haystack[pos - nlen]) { pos -= nlen; @@ -363,7 +377,7 @@ impl Reverse { while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { i -= 1; } - if i > 0 || needle[0] != haystack[pos - nlen] { + if i > 0 || first_byte != haystack[pos - nlen] { pos -= self.0.critical_pos - i + 1; } else { let mut j = self.0.critical_pos; @@ -380,16 +394,6 @@ impl Reverse { } } -impl TwoWay { - fn empty() -> TwoWay { - TwoWay { - byteset: ApproximateByteSet::new(b""), - critical_pos: 0, - shift: Shift::Large { shift: 0 }, - } - } -} - /// A representation of the amount we're allowed to shift by during Two-Way /// search. /// @@ -444,7 +448,7 @@ impl Shift { } let (u, v) = needle.split_at(critical_pos); - if !util::is_suffix(&v[..period_lower_bound], u) { + if !is_suffix(&v[..period_lower_bound], u) { return Shift::Large { shift: large }; } Shift::Small { period: period_lower_bound } @@ -467,7 +471,7 @@ impl Shift { } let (v, u) = needle.split_at(critical_pos); - if !util::is_prefix(&v[v.len() - period_lower_bound..], u) { + if !is_prefix(&v[v.len() - period_lower_bound..], u) { return Shift::Large { shift: large }; } Shift::Small { period: period_lower_bound } @@ -494,8 +498,6 @@ struct Suffix { impl Suffix { fn forward(needle: &[u8], kind: SuffixKind) -> Suffix { - debug_assert!(!needle.is_empty()); - // suffix represents our maximal (or minimal) suffix, along with // its period. let mut suffix = Suffix { pos: 0, period: 1 }; @@ -544,14 +546,15 @@ impl Suffix { } fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix { - debug_assert!(!needle.is_empty()); - // See the comments in `forward` for how this works. let mut suffix = Suffix { pos: needle.len(), period: 1 }; if needle.len() == 1 { return suffix; } - let mut candidate_start = needle.len() - 1; + let mut candidate_start = match needle.len().checked_sub(1) { + None => return suffix, + Some(candidate_start) => candidate_start, + }; let mut offset = 0; while offset < candidate_start { @@ -665,17 +668,12 @@ impl ApproximateByteSet { } } -#[cfg(all(test, feature = "std", not(miri)))] +#[cfg(test)] mod tests { - use quickcheck::quickcheck; + use alloc::vec::Vec; use super::*; - define_memmem_quickcheck_tests!( - super::simpletests::twoway_find, - super::simpletests::twoway_rfind - ); - /// Convenience wrapper for computing the suffix as a byte string. fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { let s = Suffix::forward(needle, kind); @@ -710,13 +708,34 @@ mod tests { got } + define_substring_forward_quickcheck!(|h, n| Some( + Finder::new(n).find(h, n) + )); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h, n) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h, n))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h, n))) + .run(); + } + #[test] fn suffix_forward() { macro_rules! assert_suffix_min { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_forward($given.as_bytes(), SuffixKind::Minimal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -725,7 +744,7 @@ mod tests { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_forward($given.as_bytes(), SuffixKind::Maximal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -773,7 +792,7 @@ mod tests { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -782,7 +801,7 @@ mod tests { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -821,7 +840,8 @@ mod tests { assert_suffix_max!("aaa", "aaa", 1); } - quickcheck! { + #[cfg(not(miri))] + quickcheck::quickcheck! { fn qc_suffix_forward_maximal(bytes: Vec) -> bool { if bytes.is_empty() { return true; @@ -842,27 +862,6 @@ mod tests { expected == got } } -} - -#[cfg(test)] -mod simpletests { - use super::*; - - pub(crate) fn twoway_find( - haystack: &[u8], - needle: &[u8], - ) -> Option { - Forward::new(needle).find_general(None, haystack, needle) - } - - pub(crate) fn twoway_rfind( - haystack: &[u8], - needle: &[u8], - ) -> Option { - Reverse::new(needle).rfind_general(haystack, needle) - } - - define_memmem_simple_tests!(twoway_find, twoway_rfind); // This is a regression test caught by quickcheck that exercised a bug in // the reverse small period handling. The bug was that we were using 'if j @@ -870,7 +869,7 @@ mod simpletests { // j >= shift', which matches the corresponding guard in the forward impl. #[test] fn regression_rev_small_period() { - let rfind = super::simpletests::twoway_rfind; + let rfind = |h, n| FinderRev::new(n).rfind(h, n); let haystack = "ababaz"; let needle = "abab"; assert_eq!(Some(0), rfind(haystack.as_bytes(), needle.as_bytes())); diff --git a/vendor/memchr/src/arch/generic/memchr.rs b/vendor/memchr/src/arch/generic/memchr.rs new file mode 100644 index 0000000..580b3cc --- /dev/null +++ b/vendor/memchr/src/arch/generic/memchr.rs @@ -0,0 +1,1214 @@ +/*! +Generic crate-internal routines for the `memchr` family of functions. +*/ + +// What follows is a vector algorithm generic over the specific vector +// type to detect the position of one, two or three needles in a haystack. +// From what I know, this is a "classic" algorithm, although I don't +// believe it has been published in any peer reviewed journal. I believe +// it can be found in places like glibc and Go's standard library. It +// appears to be well known and is elaborated on in more detail here: +// https://gms.tf/stdfind-and-memchr-optimizations.html +// +// While the routine below is fairly long and perhaps intimidating, the basic +// idea is actually very simple and can be expressed straight-forwardly in +// pseudo code. The psuedo code below is written for 128 bit vectors, but the +// actual code below works for anything that implements the Vector trait. +// +// needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 +// // Note: shift amount is in bytes +// +// while i <= haystack.len() - 16: +// // A 16 byte vector. Each byte in chunk corresponds to a byte in +// // the haystack. +// chunk = haystack[i:i+16] +// // Compare bytes in needle with bytes in chunk. The result is a 16 +// // byte chunk where each byte is 0xFF if the corresponding bytes +// // in needle and chunk were equal, or 0x00 otherwise. +// eqs = cmpeq(needle, chunk) +// // Return a 32 bit integer where the most significant 16 bits +// // are always 0 and the lower 16 bits correspond to whether the +// // most significant bit in the correspond byte in `eqs` is set. +// // In other words, `mask as u16` has bit i set if and only if +// // needle[i] == chunk[i]. +// mask = movemask(eqs) +// +// // Mask is 0 if there is no match, and non-zero otherwise. +// if mask != 0: +// // trailing_zeros tells us the position of the least significant +// // bit that is set. +// return i + trailing_zeros(mask) +// +// // haystack length may not be a multiple of 16, so search the rest. +// while i < haystack.len(): +// if haystack[i] == n1: +// return i +// +// // No match found. +// return NULL +// +// In fact, we could loosely translate the above code to Rust line-for-line +// and it would be a pretty fast algorithm. But, we pull out all the stops +// to go as fast as possible: +// +// 1. We use aligned loads. That is, we do some finagling to make sure our +// primary loop not only proceeds in increments of 16 bytes, but that +// the address of haystack's pointer that we dereference is aligned to +// 16 bytes. 16 is a magic number here because it is the size of SSE2 +// 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) +// Therefore, to get aligned loads, our pointer's address must be evenly +// divisible by 16. +// 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's +// kind of like loop unrolling, but we combine the equality comparisons +// using a vector OR such that we only need to extract a single mask to +// determine whether a match exists or not. If so, then we do some +// book-keeping to determine the precise location but otherwise mush on. +// 3. We use our "chunk" comparison routine in as many places as possible, +// even if it means using unaligned loads. In particular, if haystack +// starts with an unaligned address, then we do an unaligned load to +// search the first 16 bytes. We then start our primary loop at the +// smallest subsequent aligned address, which will actually overlap with +// previously searched bytes. But we're OK with that. We do a similar +// dance at the end of our primary loop. Finally, to avoid a +// byte-at-a-time loop at the end, we do a final 16 byte unaligned load +// that may overlap with a previous load. This is OK because it converts +// a loop into a small number of very fast vector instructions. The overlap +// is OK because we know the place where the overlap occurs does not +// contain a match. +// +// And that's pretty all there is to it. Note that since the below is +// generic and since it's meant to be inlined into routines with a +// `#[target_feature(enable = "...")]` annotation, we must mark all routines as +// both unsafe and `#[inline(always)]`. +// +// The fact that the code below is generic does somewhat inhibit us. For +// example, I've noticed that introducing an unlineable `#[cold]` function to +// handle the match case in the loop generates tighter assembly, but there is +// no way to do this in the generic code below because the generic code doesn't +// know what `target_feature` annotation to apply to the unlineable function. +// We could make such functions part of the `Vector` trait, but we instead live +// with the slightly sub-optimal codegen for now since it doesn't seem to have +// a noticeable perf difference. + +use crate::{ + ext::Pointer, + vector::{MoveMask, Vector}, +}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub(crate) struct One { + s1: u8, + v1: V, +} + +impl One { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_SIZE: usize = 4 * V::BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline(always)] + pub(crate) unsafe fn new(needle: u8) -> One { + One { s1: needle, v1: V::splat(needle) } + } + + /// Returns the needle given to `One::new`. + #[inline(always)] + pub(crate) fn needle1(&self) -> u8 { + self.s1 + } + + /// Return a pointer to the first occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::first_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Search a possibly unaligned chunk at `start`. This covers any part + // of the haystack prior to where aligned loads can start. + if let Some(cur) = self.search_chunk(start, topos) { + return Some(cur); + } + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(1 * V::BYTES)); + let c = V::load_aligned(cur.add(2 * V::BYTES)); + let d = V::load_aligned(cur.add(3 * V::BYTES)); + let eqa = self.v1.cmpeq(a); + let eqb = self.v1.cmpeq(b); + let eqc = self.v1.cmpeq(c); + let eqd = self.v1.cmpeq(d); + let or1 = eqa.or(eqb); + let or2 = eqc.or(eqd); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqa.movemask(); + if mask.has_non_zero() { + return Some(cur.add(topos(mask))); + } + + let mask = eqb.movemask(); + if mask.has_non_zero() { + return Some(cur.add(1 * V::BYTES).add(topos(mask))); + } + + let mask = eqc.movemask(); + if mask.has_non_zero() { + return Some(cur.add(2 * V::BYTES).add(topos(mask))); + } + + let mask = eqd.movemask(); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(3 * V::BYTES).add(topos(mask))); + } + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + cur = cur.add(V::BYTES); + } + // Finally handle any remaining bytes less than the size of V. In this + // case, our pointer may indeed be unaligned and the load may overlap + // with the previous one. But that's okay since we know the previous + // load didn't lead to a match (otherwise we wouldn't be here). + if cur < end { + debug_assert!(end.distance(cur) < V::BYTES); + cur = cur.sub(V::BYTES - end.distance(cur)); + debug_assert_eq!(end.distance(cur), V::BYTES); + return self.search_chunk(cur, topos); + } + None + } + + /// Return a pointer to the last occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::last_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { + return Some(cur); + } + let mut cur = end.sub(end.as_usize() & V::ALIGN); + debug_assert!(start <= cur && cur <= end); + if len >= Self::LOOP_SIZE { + while cur >= start.add(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + cur = cur.sub(Self::LOOP_SIZE); + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(1 * V::BYTES)); + let c = V::load_aligned(cur.add(2 * V::BYTES)); + let d = V::load_aligned(cur.add(3 * V::BYTES)); + let eqa = self.v1.cmpeq(a); + let eqb = self.v1.cmpeq(b); + let eqc = self.v1.cmpeq(c); + let eqd = self.v1.cmpeq(d); + let or1 = eqa.or(eqb); + let or2 = eqc.or(eqd); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqd.movemask(); + if mask.has_non_zero() { + return Some(cur.add(3 * V::BYTES).add(topos(mask))); + } + + let mask = eqc.movemask(); + if mask.has_non_zero() { + return Some(cur.add(2 * V::BYTES).add(topos(mask))); + } + + let mask = eqb.movemask(); + if mask.has_non_zero() { + return Some(cur.add(1 * V::BYTES).add(topos(mask))); + } + + let mask = eqa.movemask(); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(topos(mask))); + } + } + } + while cur >= start.add(V::BYTES) { + debug_assert!(cur.distance(start) >= V::BYTES); + cur = cur.sub(V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + } + if cur > start { + debug_assert!(cur.distance(start) < V::BYTES); + return self.search_chunk(start, topos); + } + None + } + + /// Return a count of all matching bytes in the given haystack. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn count_raw( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let confirm = |b| b == self.needle1(); + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + // Count any matching bytes before we start our aligned loop. + let mut count = count_byte_by_byte(start, cur, confirm); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(1 * V::BYTES)); + let c = V::load_aligned(cur.add(2 * V::BYTES)); + let d = V::load_aligned(cur.add(3 * V::BYTES)); + let eqa = self.v1.cmpeq(a); + let eqb = self.v1.cmpeq(b); + let eqc = self.v1.cmpeq(c); + let eqd = self.v1.cmpeq(d); + count += eqa.movemask().count_ones(); + count += eqb.movemask().count_ones(); + count += eqc.movemask().count_ones(); + count += eqd.movemask().count_ones(); + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + let chunk = V::load_unaligned(cur); + count += self.v1.cmpeq(chunk).movemask().count_ones(); + cur = cur.add(V::BYTES); + } + // And finally count any leftovers that weren't caught above. + count += count_byte_by_byte(cur, end, confirm); + count + } + + /// Search `V::BYTES` starting at `cur` via an unaligned load. + /// + /// `mask_to_offset` should be a function that converts a `movemask` to + /// an offset such that `cur.add(offset)` corresponds to a pointer to the + /// match location if one is found. Generally it is expected to use either + /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether + /// one is implementing a forward or reverse search, respectively. + /// + /// # Safety + /// + /// `cur` must be a valid pointer and it must be valid to do an unaligned + /// load of size `V::BYTES` at `cur`. + #[inline(always)] + unsafe fn search_chunk( + &self, + cur: *const u8, + mask_to_offset: impl Fn(V::Mask) -> usize, + ) -> Option<*const u8> { + let chunk = V::load_unaligned(cur); + let mask = self.v1.cmpeq(chunk).movemask(); + if mask.has_non_zero() { + Some(cur.add(mask_to_offset(mask))) + } else { + None + } + } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Two { + s1: u8, + s2: u8, + v1: V, + v2: V, +} + +impl Two { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_SIZE: usize = 2 * V::BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline(always)] + pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two { + Two { + s1: needle1, + s2: needle2, + v1: V::splat(needle1), + v2: V::splat(needle2), + } + } + + /// Returns the first needle given to `Two::new`. + #[inline(always)] + pub(crate) fn needle1(&self) -> u8 { + self.s1 + } + + /// Returns the second needle given to `Two::new`. + #[inline(always)] + pub(crate) fn needle2(&self) -> u8 { + self.s2 + } + + /// Return a pointer to the first occurrence of one of the needles in the + /// given haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::first_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Search a possibly unaligned chunk at `start`. This covers any part + // of the haystack prior to where aligned loads can start. + if let Some(cur) = self.search_chunk(start, topos) { + return Some(cur); + } + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqa1.movemask().or(eqa2.movemask()); + if mask.has_non_zero() { + return Some(cur.add(topos(mask))); + } + + let mask = eqb1.movemask().or(eqb2.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(V::BYTES).add(topos(mask))); + } + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + cur = cur.add(V::BYTES); + } + // Finally handle any remaining bytes less than the size of V. In this + // case, our pointer may indeed be unaligned and the load may overlap + // with the previous one. But that's okay since we know the previous + // load didn't lead to a match (otherwise we wouldn't be here). + if cur < end { + debug_assert!(end.distance(cur) < V::BYTES); + cur = cur.sub(V::BYTES - end.distance(cur)); + debug_assert_eq!(end.distance(cur), V::BYTES); + return self.search_chunk(cur, topos); + } + None + } + + /// Return a pointer to the last occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::last_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { + return Some(cur); + } + let mut cur = end.sub(end.as_usize() & V::ALIGN); + debug_assert!(start <= cur && cur <= end); + if len >= Self::LOOP_SIZE { + while cur >= start.add(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + cur = cur.sub(Self::LOOP_SIZE); + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqb1.movemask().or(eqb2.movemask()); + if mask.has_non_zero() { + return Some(cur.add(V::BYTES).add(topos(mask))); + } + + let mask = eqa1.movemask().or(eqa2.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(topos(mask))); + } + } + } + while cur >= start.add(V::BYTES) { + debug_assert!(cur.distance(start) >= V::BYTES); + cur = cur.sub(V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + } + if cur > start { + debug_assert!(cur.distance(start) < V::BYTES); + return self.search_chunk(start, topos); + } + None + } + + /// Search `V::BYTES` starting at `cur` via an unaligned load. + /// + /// `mask_to_offset` should be a function that converts a `movemask` to + /// an offset such that `cur.add(offset)` corresponds to a pointer to the + /// match location if one is found. Generally it is expected to use either + /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether + /// one is implementing a forward or reverse search, respectively. + /// + /// # Safety + /// + /// `cur` must be a valid pointer and it must be valid to do an unaligned + /// load of size `V::BYTES` at `cur`. + #[inline(always)] + unsafe fn search_chunk( + &self, + cur: *const u8, + mask_to_offset: impl Fn(V::Mask) -> usize, + ) -> Option<*const u8> { + let chunk = V::load_unaligned(cur); + let eq1 = self.v1.cmpeq(chunk); + let eq2 = self.v2.cmpeq(chunk); + let mask = eq1.or(eq2).movemask(); + if mask.has_non_zero() { + let mask1 = eq1.movemask(); + let mask2 = eq2.movemask(); + Some(cur.add(mask_to_offset(mask1.or(mask2)))) + } else { + None + } + } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Three { + s1: u8, + s2: u8, + s3: u8, + v1: V, + v2: V, + v3: V, +} + +impl Three { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_SIZE: usize = 2 * V::BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline(always)] + pub(crate) unsafe fn new( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three { + s1: needle1, + s2: needle2, + s3: needle3, + v1: V::splat(needle1), + v2: V::splat(needle2), + v3: V::splat(needle3), + } + } + + /// Returns the first needle given to `Three::new`. + #[inline(always)] + pub(crate) fn needle1(&self) -> u8 { + self.s1 + } + + /// Returns the second needle given to `Three::new`. + #[inline(always)] + pub(crate) fn needle2(&self) -> u8 { + self.s2 + } + + /// Returns the third needle given to `Three::new`. + #[inline(always)] + pub(crate) fn needle3(&self) -> u8 { + self.s3 + } + + /// Return a pointer to the first occurrence of one of the needles in the + /// given haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::first_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Search a possibly unaligned chunk at `start`. This covers any part + // of the haystack prior to where aligned loads can start. + if let Some(cur) = self.search_chunk(start, topos) { + return Some(cur); + } + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let eqa3 = self.v3.cmpeq(a); + let eqb3 = self.v3.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = eqa3.or(eqb3); + let or4 = or1.or(or2); + let or5 = or3.or(or4); + if or5.movemask_will_have_non_zero() { + let mask = eqa1 + .movemask() + .or(eqa2.movemask()) + .or(eqa3.movemask()); + if mask.has_non_zero() { + return Some(cur.add(topos(mask))); + } + + let mask = eqb1 + .movemask() + .or(eqb2.movemask()) + .or(eqb3.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(V::BYTES).add(topos(mask))); + } + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + cur = cur.add(V::BYTES); + } + // Finally handle any remaining bytes less than the size of V. In this + // case, our pointer may indeed be unaligned and the load may overlap + // with the previous one. But that's okay since we know the previous + // load didn't lead to a match (otherwise we wouldn't be here). + if cur < end { + debug_assert!(end.distance(cur) < V::BYTES); + cur = cur.sub(V::BYTES - end.distance(cur)); + debug_assert_eq!(end.distance(cur), V::BYTES); + return self.search_chunk(cur, topos); + } + None + } + + /// Return a pointer to the last occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::last_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { + return Some(cur); + } + let mut cur = end.sub(end.as_usize() & V::ALIGN); + debug_assert!(start <= cur && cur <= end); + if len >= Self::LOOP_SIZE { + while cur >= start.add(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + cur = cur.sub(Self::LOOP_SIZE); + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let eqa3 = self.v3.cmpeq(a); + let eqb3 = self.v3.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = eqa3.or(eqb3); + let or4 = or1.or(or2); + let or5 = or3.or(or4); + if or5.movemask_will_have_non_zero() { + let mask = eqb1 + .movemask() + .or(eqb2.movemask()) + .or(eqb3.movemask()); + if mask.has_non_zero() { + return Some(cur.add(V::BYTES).add(topos(mask))); + } + + let mask = eqa1 + .movemask() + .or(eqa2.movemask()) + .or(eqa3.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(topos(mask))); + } + } + } + while cur >= start.add(V::BYTES) { + debug_assert!(cur.distance(start) >= V::BYTES); + cur = cur.sub(V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + } + if cur > start { + debug_assert!(cur.distance(start) < V::BYTES); + return self.search_chunk(start, topos); + } + None + } + + /// Search `V::BYTES` starting at `cur` via an unaligned load. + /// + /// `mask_to_offset` should be a function that converts a `movemask` to + /// an offset such that `cur.add(offset)` corresponds to a pointer to the + /// match location if one is found. Generally it is expected to use either + /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether + /// one is implementing a forward or reverse search, respectively. + /// + /// # Safety + /// + /// `cur` must be a valid pointer and it must be valid to do an unaligned + /// load of size `V::BYTES` at `cur`. + #[inline(always)] + unsafe fn search_chunk( + &self, + cur: *const u8, + mask_to_offset: impl Fn(V::Mask) -> usize, + ) -> Option<*const u8> { + let chunk = V::load_unaligned(cur); + let eq1 = self.v1.cmpeq(chunk); + let eq2 = self.v2.cmpeq(chunk); + let eq3 = self.v3.cmpeq(chunk); + let mask = eq1.or(eq2).or(eq3).movemask(); + if mask.has_non_zero() { + let mask1 = eq1.movemask(); + let mask2 = eq2.movemask(); + let mask3 = eq3.movemask(); + Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3)))) + } else { + None + } + } +} + +/// An iterator over all occurrences of a set of bytes in a haystack. +/// +/// This iterator implements the routines necessary to provide a +/// `DoubleEndedIterator` impl, which means it can also be used to find +/// occurrences in reverse order. +/// +/// The lifetime parameters are as follows: +/// +/// * `'h` refers to the lifetime of the haystack being searched. +/// +/// This type is intended to be used to implement all iterators for the +/// `memchr` family of functions. It handles a tiny bit of marginally tricky +/// raw pointer math, but otherwise expects the caller to provide `find_raw` +/// and `rfind_raw` routines for each call of `next` and `next_back`, +/// respectively. +#[derive(Clone, Debug)] +pub(crate) struct Iter<'h> { + /// The original starting point into the haystack. We use this to convert + /// pointers to offsets. + original_start: *const u8, + /// The current starting point into the haystack. That is, where the next + /// search will begin. + start: *const u8, + /// The current ending point into the haystack. That is, where the next + /// reverse search will begin. + end: *const u8, + /// A marker for tracking the lifetime of the start/cur_start/cur_end + /// pointers above, which all point into the haystack. + haystack: core::marker::PhantomData<&'h [u8]>, +} + +// SAFETY: Iter contains no shared references to anything that performs any +// interior mutations. Also, the lifetime guarantees that Iter will not outlive +// the haystack. +unsafe impl<'h> Send for Iter<'h> {} + +// SAFETY: Iter perform no interior mutations, therefore no explicit +// synchronization is necessary. Also, the lifetime guarantees that Iter will +// not outlive the haystack. +unsafe impl<'h> Sync for Iter<'h> {} + +impl<'h> Iter<'h> { + /// Create a new generic memchr iterator. + #[inline(always)] + pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> { + Iter { + original_start: haystack.as_ptr(), + start: haystack.as_ptr(), + end: haystack.as_ptr().wrapping_add(haystack.len()), + haystack: core::marker::PhantomData, + } + } + + /// Returns the next occurrence in the forward direction. + /// + /// # Safety + /// + /// Callers must ensure that if a pointer is returned from the closure + /// provided, then it must be greater than or equal to the start pointer + /// and less than the end pointer. + #[inline(always)] + pub(crate) unsafe fn next( + &mut self, + mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, + ) -> Option { + // SAFETY: Pointers are derived directly from the same &[u8] haystack. + // We only ever modify start/end corresponding to a matching offset + // found between start and end. Thus all changes to start/end maintain + // our safety requirements. + // + // The only other assumption we rely on is that the pointer returned + // by `find_raw` satisfies `self.start <= found < self.end`, and that + // safety contract is forwarded to the caller. + let found = find_raw(self.start, self.end)?; + let result = found.distance(self.original_start); + self.start = found.add(1); + Some(result) + } + + /// Returns the number of remaining elements in this iterator. + #[inline(always)] + pub(crate) fn count( + self, + mut count_raw: impl FnMut(*const u8, *const u8) -> usize, + ) -> usize { + // SAFETY: Pointers are derived directly from the same &[u8] haystack. + // We only ever modify start/end corresponding to a matching offset + // found between start and end. Thus all changes to start/end maintain + // our safety requirements. + count_raw(self.start, self.end) + } + + /// Returns the next occurrence in reverse. + /// + /// # Safety + /// + /// Callers must ensure that if a pointer is returned from the closure + /// provided, then it must be greater than or equal to the start pointer + /// and less than the end pointer. + #[inline(always)] + pub(crate) unsafe fn next_back( + &mut self, + mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, + ) -> Option { + // SAFETY: Pointers are derived directly from the same &[u8] haystack. + // We only ever modify start/end corresponding to a matching offset + // found between start and end. Thus all changes to start/end maintain + // our safety requirements. + // + // The only other assumption we rely on is that the pointer returned + // by `rfind_raw` satisfies `self.start <= found < self.end`, and that + // safety contract is forwarded to the caller. + let found = rfind_raw(self.start, self.end)?; + let result = found.distance(self.original_start); + self.end = found; + Some(result) + } + + /// Provides an implementation of `Iterator::size_hint`. + #[inline(always)] + pub(crate) fn size_hint(&self) -> (usize, Option) { + (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize()))) + } +} + +/// Search a slice using a function that operates on raw pointers. +/// +/// Given a function to search a contiguous sequence of memory for the location +/// of a non-empty set of bytes, this will execute that search on a slice of +/// bytes. The pointer returned by the given function will be converted to an +/// offset relative to the starting point of the given slice. That is, if a +/// match is found, the offset returned by this routine is guaranteed to be a +/// valid index into `haystack`. +/// +/// Callers may use this for a forward or reverse search. +/// +/// # Safety +/// +/// Callers must ensure that if a pointer is returned by `find_raw`, then the +/// pointer must be greater than or equal to the starting pointer and less than +/// the end pointer. +#[inline(always)] +pub(crate) unsafe fn search_slice_with_raw( + haystack: &[u8], + mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, +) -> Option { + // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but + // otherwise, `start` and `end` are valid due to the guarantees provided by + // a &[u8]. + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + let found = find_raw(start, end)?; + Some(found.distance(start)) +} + +/// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or +/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is +/// returned. If the latter occurs, then the pointer at which `confirm` returns +/// `true` is returned. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn fwd_byte_by_byte bool>( + start: *const u8, + end: *const u8, + confirm: F, +) -> Option<*const u8> { + debug_assert!(start <= end); + let mut ptr = start; + while ptr < end { + if confirm(*ptr) { + return Some(ptr); + } + ptr = ptr.offset(1); + } + None +} + +/// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or +/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is +/// returned. If the latter occurs, then the pointer at which `confirm` returns +/// `true` is returned. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn rev_byte_by_byte bool>( + start: *const u8, + end: *const u8, + confirm: F, +) -> Option<*const u8> { + debug_assert!(start <= end); + + let mut ptr = end; + while ptr > start { + ptr = ptr.offset(-1); + if confirm(*ptr) { + return Some(ptr); + } + } + None +} + +/// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns +/// the number of times `confirm(*ptr)` returns `true`. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn count_byte_by_byte bool>( + start: *const u8, + end: *const u8, + confirm: F, +) -> usize { + debug_assert!(start <= end); + let mut ptr = start; + let mut count = 0; + while ptr < end { + if confirm(*ptr) { + count += 1; + } + ptr = ptr.offset(1); + } + count +} diff --git a/vendor/memchr/src/arch/generic/mod.rs b/vendor/memchr/src/arch/generic/mod.rs new file mode 100644 index 0000000..63ee3f0 --- /dev/null +++ b/vendor/memchr/src/arch/generic/mod.rs @@ -0,0 +1,14 @@ +/*! +This module defines "generic" routines that can be specialized to specific +architectures. + +We don't expose this module primarily because it would require exposing all +of the internal infrastructure required to write these generic routines. +That infrastructure should be treated as an implementation detail so that +it is allowed to evolve. Instead, what we expose are architecture specific +instantiations of these generic implementations. The generic code just lets us +write the code once (usually). +*/ + +pub(crate) mod memchr; +pub(crate) mod packedpair; diff --git a/vendor/memchr/src/arch/generic/packedpair.rs b/vendor/memchr/src/arch/generic/packedpair.rs new file mode 100644 index 0000000..8d97cf2 --- /dev/null +++ b/vendor/memchr/src/arch/generic/packedpair.rs @@ -0,0 +1,317 @@ +/*! +Generic crate-internal routines for the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use crate::{ + arch::all::{is_equal_raw, packedpair::Pair}, + ext::Pointer, + vector::{MoveMask, Vector}, +}; + +/// A generic architecture dependent "packed pair" finder. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +/// +/// This is architecture dependent because it uses specific vector operations +/// to look for occurrences of the pair of bytes. +/// +/// This type is not meant to be exported and is instead meant to be used as +/// the implementation for architecture specific facades. Why? Because it's a +/// bit of a quirky API that requires `inline(always)` annotations. And pretty +/// much everything has safety obligations due (at least) to the caller needing +/// to inline calls into routines marked with +/// `#[target_feature(enable = "...")]`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Finder { + pair: Pair, + v1: V, + v2: V, + min_haystack_len: usize, +} + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// # Safety + /// + /// Callers must ensure that whatever vector type this routine is called + /// with is supported by the current environment. + /// + /// Callers must also ensure that `needle.len() >= 2`. + #[inline(always)] + pub(crate) unsafe fn new(needle: &[u8], pair: Pair) -> Finder { + let max_index = pair.index1().max(pair.index2()); + let min_haystack_len = + core::cmp::max(needle.len(), usize::from(max_index) + V::BYTES); + let v1 = V::splat(needle[usize::from(pair.index1())]); + let v2 = V::splat(needle[usize::from(pair.index2())]); + Finder { pair, v1, v2, min_haystack_len } + } + + /// Searches the given haystack for the given needle. The needle given + /// should be the same as the needle that this finder was initialized + /// with. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// Since this is meant to be used with vector functions, callers need to + /// specialize this inside of a function with a `target_feature` attribute. + /// Therefore, callers must ensure that whatever target feature is being + /// used supports the vector functions that this function is specialized + /// for. (For the specific vector functions used, see the Vector trait + /// implementations.) + #[inline(always)] + pub(crate) unsafe fn find( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + assert!( + haystack.len() >= self.min_haystack_len, + "haystack too small, should be at least {} but got {}", + self.min_haystack_len, + haystack.len(), + ); + + let all = V::Mask::all_zeros_except_least_significant(0); + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + let max = end.sub(self.min_haystack_len); + let mut cur = start; + + // N.B. I did experiment with unrolling the loop to deal with size(V) + // bytes at a time and 2*size(V) bytes at a time. The double unroll + // was marginally faster while the quadruple unroll was unambiguously + // slower. In the end, I decided the complexity from unrolling wasn't + // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to + // compare. + while cur <= max { + if let Some(chunki) = self.find_in_chunk(needle, cur, end, all) { + return Some(matched(start, cur, chunki)); + } + cur = cur.add(V::BYTES); + } + if cur < end { + let remaining = end.distance(cur); + debug_assert!( + remaining < self.min_haystack_len, + "remaining bytes should be smaller than the minimum haystack \ + length of {}, but there are {} bytes remaining", + self.min_haystack_len, + remaining, + ); + if remaining < needle.len() { + return None; + } + debug_assert!( + max < cur, + "after main loop, cur should have exceeded max", + ); + let overlap = cur.distance(max); + debug_assert!( + overlap > 0, + "overlap ({}) must always be non-zero", + overlap, + ); + debug_assert!( + overlap < V::BYTES, + "overlap ({}) cannot possibly be >= than a vector ({})", + overlap, + V::BYTES, + ); + // The mask has all of its bits set except for the first N least + // significant bits, where N=overlap. This way, any matches that + // occur in find_in_chunk within the overlap are automatically + // ignored. + let mask = V::Mask::all_zeros_except_least_significant(overlap); + cur = max; + let m = self.find_in_chunk(needle, cur, end, mask); + if let Some(chunki) = m { + return Some(matched(start, cur, chunki)); + } + } + None + } + + /// Searches the given haystack for offsets that represent candidate + /// matches of the `needle` given to this finder's constructor. The offsets + /// returned, if they are a match, correspond to the starting offset of + /// `needle` in the given `haystack`. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// Since this is meant to be used with vector functions, callers need to + /// specialize this inside of a function with a `target_feature` attribute. + /// Therefore, callers must ensure that whatever target feature is being + /// used supports the vector functions that this function is specialized + /// for. (For the specific vector functions used, see the Vector trait + /// implementations.) + #[inline(always)] + pub(crate) unsafe fn find_prefilter( + &self, + haystack: &[u8], + ) -> Option { + assert!( + haystack.len() >= self.min_haystack_len, + "haystack too small, should be at least {} but got {}", + self.min_haystack_len, + haystack.len(), + ); + + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + let max = end.sub(self.min_haystack_len); + let mut cur = start; + + // N.B. I did experiment with unrolling the loop to deal with size(V) + // bytes at a time and 2*size(V) bytes at a time. The double unroll + // was marginally faster while the quadruple unroll was unambiguously + // slower. In the end, I decided the complexity from unrolling wasn't + // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to + // compare. + while cur <= max { + if let Some(chunki) = self.find_prefilter_in_chunk(cur) { + return Some(matched(start, cur, chunki)); + } + cur = cur.add(V::BYTES); + } + if cur < end { + // This routine immediately quits if a candidate match is found. + // That means that if we're here, no candidate matches have been + // found at or before 'ptr'. Thus, we don't need to mask anything + // out even though we might technically search part of the haystack + // that we've already searched (because we know it can't match). + cur = max; + if let Some(chunki) = self.find_prefilter_in_chunk(cur) { + return Some(matched(start, cur, chunki)); + } + } + None + } + + /// Search for an occurrence of our byte pair from the needle in the chunk + /// pointed to by cur, with the end of the haystack pointed to by end. + /// When an occurrence is found, memcmp is run to check if a match occurs + /// at the corresponding position. + /// + /// `mask` should have bits set corresponding the positions in the chunk + /// in which matches are considered. This is only used for the last vector + /// load where the beginning of the vector might have overlapped with the + /// last load in the main loop. The mask lets us avoid visiting positions + /// that have already been discarded as matches. + /// + /// # Safety + /// + /// It must be safe to do an unaligned read of size(V) bytes starting at + /// both (cur + self.index1) and (cur + self.index2). It must also be safe + /// to do unaligned loads on cur up to (end - needle.len()). + #[inline(always)] + unsafe fn find_in_chunk( + &self, + needle: &[u8], + cur: *const u8, + end: *const u8, + mask: V::Mask, + ) -> Option { + let index1 = usize::from(self.pair.index1()); + let index2 = usize::from(self.pair.index2()); + let chunk1 = V::load_unaligned(cur.add(index1)); + let chunk2 = V::load_unaligned(cur.add(index2)); + let eq1 = chunk1.cmpeq(self.v1); + let eq2 = chunk2.cmpeq(self.v2); + + let mut offsets = eq1.and(eq2).movemask().and(mask); + while offsets.has_non_zero() { + let offset = offsets.first_offset(); + let cur = cur.add(offset); + if end.sub(needle.len()) < cur { + return None; + } + if is_equal_raw(needle.as_ptr(), cur, needle.len()) { + return Some(offset); + } + offsets = offsets.clear_least_significant_bit(); + } + None + } + + /// Search for an occurrence of our byte pair from the needle in the chunk + /// pointed to by cur, with the end of the haystack pointed to by end. + /// When an occurrence is found, memcmp is run to check if a match occurs + /// at the corresponding position. + /// + /// # Safety + /// + /// It must be safe to do an unaligned read of size(V) bytes starting at + /// both (cur + self.index1) and (cur + self.index2). It must also be safe + /// to do unaligned reads on cur up to (end - needle.len()). + #[inline(always)] + unsafe fn find_prefilter_in_chunk(&self, cur: *const u8) -> Option { + let index1 = usize::from(self.pair.index1()); + let index2 = usize::from(self.pair.index2()); + let chunk1 = V::load_unaligned(cur.add(index1)); + let chunk2 = V::load_unaligned(cur.add(index2)); + let eq1 = chunk1.cmpeq(self.v1); + let eq2 = chunk2.cmpeq(self.v2); + + let offsets = eq1.and(eq2).movemask(); + if !offsets.has_non_zero() { + return None; + } + Some(offsets.first_offset()) + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub(crate) fn pair(&self) -> &Pair { + &self.pair + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Providing a haystack to this `Finder` shorter than this length is + /// guaranteed to result in a panic. + #[inline(always)] + pub(crate) fn min_haystack_len(&self) -> usize { + self.min_haystack_len + } +} + +/// Accepts a chunk-relative offset and returns a haystack relative offset. +/// +/// This used to be marked `#[cold]` and `#[inline(never)]`, but I couldn't +/// observe a consistent measureable difference between that and just inlining +/// it. So we go with inlining it. +/// +/// # Safety +/// +/// Same at `ptr::offset_from` in addition to `cur >= start`. +#[inline(always)] +unsafe fn matched(start: *const u8, cur: *const u8, chunki: usize) -> usize { + cur.distance(start) + chunki +} + +// If you're looking for tests, those are run for each instantiation of the +// above code. So for example, see arch::x86_64::sse2::packedpair. diff --git a/vendor/memchr/src/arch/mod.rs b/vendor/memchr/src/arch/mod.rs new file mode 100644 index 0000000..2f63a1a --- /dev/null +++ b/vendor/memchr/src/arch/mod.rs @@ -0,0 +1,16 @@ +/*! +A module with low-level architecture dependent routines. + +These routines are useful as primitives for tasks not covered by the higher +level crate API. +*/ + +pub mod all; +pub(crate) mod generic; + +#[cfg(target_arch = "aarch64")] +pub mod aarch64; +#[cfg(target_arch = "wasm32")] +pub mod wasm32; +#[cfg(target_arch = "x86_64")] +pub mod x86_64; diff --git a/vendor/memchr/src/arch/wasm32/memchr.rs b/vendor/memchr/src/arch/wasm32/memchr.rs new file mode 100644 index 0000000..b0bbd1c --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/memchr.rs @@ -0,0 +1,137 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines choose the best implementation at compile time. (This is +different from `x86_64` because it is expected that `simd128` is almost always +available for `wasm32` targets.) +*/ + +macro_rules! defraw { + ($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{ + #[cfg(target_feature = "simd128")] + { + use crate::arch::wasm32::simd128::memchr::$ty; + + debug!("chose simd128 for {}", stringify!($ty)); + debug_assert!($ty::is_available()); + // SAFETY: We know that wasm memchr is always available whenever + // code is compiled for `wasm32` with the `simd128` target feature + // enabled. + $ty::new_unchecked($($needles),+).$find($start, $end) + } + #[cfg(not(target_feature = "simd128"))] + { + use crate::arch::all::memchr::$ty; + + debug!( + "no simd128 feature available, using fallback for {}", + stringify!($ty), + ); + $ty::new($($needles),+).$find($start, $end) + } + }} +} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, find_raw, start, end, n1) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, rfind_raw, start, end, n1) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, find_raw, start, end, n1, n2) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, rfind_raw, start, end, n1, n2) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, find_raw, start, end, n1, n2, n3) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, rfind_raw, start, end, n1, n2, n3) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) unsafe fn count_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> usize { + defraw!(One, count_raw, start, end, n1) +} diff --git a/vendor/memchr/src/arch/wasm32/mod.rs b/vendor/memchr/src/arch/wasm32/mod.rs new file mode 100644 index 0000000..209f876 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/mod.rs @@ -0,0 +1,7 @@ +/*! +Vector algorithms for the `wasm32` target. +*/ + +pub mod simd128; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/wasm32/simd128/memchr.rs b/vendor/memchr/src/arch/wasm32/simd128/memchr.rs new file mode 100644 index 0000000..fa314c9 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/memchr.rs @@ -0,0 +1,1020 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::wasm32::v128; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One); + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use simd128 vector + /// instructions that typically make it quite fast. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that simd128 is available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to simd128 vectors and routines without + /// checking that simd128 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `simd128` + /// instructions in the current environment. + #[target_feature(enable = "simd128")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One(generic::One::new(needle)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::count_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.count_raw_impl(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Execute a count using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn count_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.0.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two); + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use simd128 vector + /// instructions that typically make it quite fast. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that simd128 is available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to simd128 vectors and routines without + /// checking that simd128 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `simd128` + /// instructions in the current environment. + #[target_feature(enable = "simd128")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two(generic::Two::new(needle1, needle2)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three); + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use simd128 vector + /// instructions that typically make it quite fast. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that simd128 is available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to simd128 vectors and routines without + /// checking that simd128 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `simd128` + /// instructions in the current environment. + #[target_feature(enable = "simd128")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three(generic::Three::new(needle1, needle2, needle3)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'simd128' + // routines. Also, we've checked that our haystack is big enough to run + // on the vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'simd128' + // routines. Also, we've checked that our haystack is big enough to run + // on the vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/wasm32/simd128/mod.rs b/vendor/memchr/src/arch/wasm32/simd128/mod.rs new file mode 100644 index 0000000..b55d1f0 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `wasm32` target using 128-bit vectors via simd128. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs b/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs new file mode 100644 index 0000000..b629377 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs @@ -0,0 +1,229 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::wasm32::v128; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder); + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If simd128 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that simd128 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that simd128 is available. + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let finder = packedpair::Finder::::new(needle, pair); + Finder(finder) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Execute a search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + self.find_impl(haystack, needle) + } + + /// Execute a search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + self.find_prefilter_impl(haystack) + } + + /// Execute a search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: The target feature safety obligation is automatically + // fulfilled by virtue of being a method on `Finder`, which can only be + // constructed when it is safe to call `simd128` routines. + unsafe { self.0.find(haystack, needle) } + } + + /// Execute a prefilter search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + // SAFETY: The target feature safety obligation is automatically + // fulfilled by virtue of being a method on `Finder`, which can only be + // constructed when it is safe to call `simd128` routines. + unsafe { self.0.find_prefilter(haystack) } + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.0.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/x86_64/avx2/memchr.rs b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs new file mode 100644 index 0000000..59f8c7f --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs @@ -0,0 +1,1352 @@ +/*! +This module defines 256-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One { + /// Used for haystacks less than 32 bytes. + sse2: generic::One<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::One<__m256i>, +} + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One { + sse2: generic::One::new(needle), + avx2: generic::One::new(needle), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `One`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::count_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.count_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + self.count_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a count using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn count_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.sse2.count_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Execute a count using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn count_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.avx2.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two { + /// Used for haystacks less than 32 bytes. + sse2: generic::Two<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::Two<__m256i>, +} + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two { + sse2: generic::Two::new(needle1, needle2), + avx2: generic::Two::new(needle1, needle2), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() || b == self.sse2.needle2() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Two` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Two`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() || b == self.sse2.needle2() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Two` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three { + /// Used for haystacks less than 32 bytes. + sse2: generic::Three<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::Three<__m256i>, +} + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three { + sse2: generic::Three::new(needle1, needle2, needle3), + avx2: generic::Three::new(needle1, needle2, needle3), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + || b == self.sse2.needle2() + || b == self.sse2.needle3() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Three` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Three`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + || b == self.sse2.needle2() + || b == self.sse2.needle3() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Three` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/x86_64/avx2/mod.rs b/vendor/memchr/src/arch/x86_64/avx2/mod.rs new file mode 100644 index 0000000..ee4097d --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 256-bit vectors via AVX2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs new file mode 100644 index 0000000..efae7b6 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs @@ -0,0 +1,272 @@ +/*! +A 256-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 256-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder { + sse2: packedpair::Finder<__m128i>, + avx2: packedpair::Finder<__m256i>, +} + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If AVX2 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If AVX2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that sse2/avx2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that both SSE2 and AVX2 are available. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let sse2 = packedpair::Finder::<__m128i>::new(needle, pair); + let avx2 = packedpair::Finder::<__m256i>::new(needle, pair); + Finder { sse2, avx2 } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` and `avx2` routines.) + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + if haystack.len() < self.avx2.min_haystack_len() { + self.sse2.find(haystack, needle) + } else { + self.avx2.find(haystack, needle) + } + } + + /// Execute a prefilter search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` and `avx2` routines.) + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + if haystack.len() < self.avx2.min_haystack_len() { + self.sse2.find_prefilter(haystack) + } else { + self.avx2.find_prefilter(haystack) + } + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.avx2.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + // The caller doesn't need to care about AVX2's min_haystack_len + // since this implementation will automatically switch to the SSE2 + // implementation if the haystack is too short for AVX2. Therefore, the + // caller only needs to care about SSE2's min_haystack_len. + // + // This does assume that SSE2's min_haystack_len is less than or + // equal to AVX2's min_haystack_len. In practice, this is true and + // there is no way it could be false based on how this Finder is + // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If + // they used different pairs, then it's possible (although perhaps + // pathological) for SSE2's min_haystack_len to be bigger than AVX2's. + self.sse2.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + if !cfg!(target_feature = "sse2") { + return None; + } + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/x86_64/memchr.rs b/vendor/memchr/src/arch/x86_64/memchr.rs new file mode 100644 index 0000000..fcb1399 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/memchr.rs @@ -0,0 +1,335 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines efficiently dispatch to the best implementation based on what +the CPU supports. +*/ + +/// Provides a way to run a memchr-like function while amortizing the cost of +/// runtime CPU feature detection. +/// +/// This works by loading a function pointer from an atomic global. Initially, +/// this global is set to a function that does CPU feature detection. For +/// example, if AVX2 is enabled, then the AVX2 implementation is used. +/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And +/// in some niche cases, if SSE2 isn't available, then the architecture +/// independent fallback implementation is used.) +/// +/// After the first call to this function, the atomic global is replaced with +/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then +/// will directly call the chosen routine instead of needing to go through the +/// CPU feature detection branching again. +/// +/// This particular macro is specifically written to provide the implementation +/// of functions with the following signature: +/// +/// ```ignore +/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option; +/// ``` +/// +/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and +/// `needle3`, respectively. The `start` and `end` parameters correspond to the +/// start and end of the haystack, respectively. +/// +/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so +/// that the function is compatible with our lower level iterator logic that +/// operates on raw pointers. We use this macro to implement "raw" memchr +/// routines with the signature above, and then define memchr routines using +/// regular slices on top of them. +/// +/// Note that we use `#[cfg(target_feature = "sse2")]` below even though +/// it shouldn't be strictly necessary because without it, it seems to +/// cause the compiler to blow up. I guess it can't handle a function +/// pointer being created with a sse target feature? Dunno. See the +/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with +/// this. +/// +/// # Safety +/// +/// Primarily callers must that `$fnty` is a correct function pointer type and +/// not something else. +/// +/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a +/// routine that returns a valid function pointer when a match is found. That +/// is, a pointer that is `>= start` and `< end`. +/// +/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers +/// correspond to valid pointers. +macro_rules! unsafe_ifunc { + ( + $memchrty:ident, + $memchrfind:ident, + $fnty:ty, + $retty:ty, + $hay_start:ident, + $hay_end:ident, + $($needle:ident),+ + ) => {{ + #![allow(unused_unsafe)] + + use core::sync::atomic::{AtomicPtr, Ordering}; + + type Fn = *mut (); + type RealFn = $fnty; + static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); + + #[cfg(target_feature = "sse2")] + #[target_feature(enable = "sse2", enable = "avx2")] + unsafe fn find_avx2( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::x86_64::avx2::memchr::$memchrty; + $memchrty::new_unchecked($($needle),+) + .$memchrfind($hay_start, $hay_end) + } + + #[cfg(target_feature = "sse2")] + #[target_feature(enable = "sse2")] + unsafe fn find_sse2( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::x86_64::sse2::memchr::$memchrty; + $memchrty::new_unchecked($($needle),+) + .$memchrfind($hay_start, $hay_end) + } + + unsafe fn find_fallback( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::all::memchr::$memchrty; + $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) + } + + unsafe fn detect( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + let fun = { + #[cfg(not(target_feature = "sse2"))] + { + debug!( + "no sse2 feature available, using fallback for {}", + stringify!($memchrty), + ); + find_fallback as RealFn + } + #[cfg(target_feature = "sse2")] + { + use crate::arch::x86_64::{sse2, avx2}; + if avx2::memchr::$memchrty::is_available() { + debug!("chose AVX2 for {}", stringify!($memchrty)); + find_avx2 as RealFn + } else if sse2::memchr::$memchrty::is_available() { + debug!("chose SSE2 for {}", stringify!($memchrty)); + find_sse2 as RealFn + } else { + debug!("chose fallback for {}", stringify!($memchrty)); + find_fallback as RealFn + } + } + }; + FN.store(fun as Fn, Ordering::Relaxed); + // SAFETY: The only thing we need to uphold here is the + // `#[target_feature]` requirements. Since we check is_available + // above before using the corresponding implementation, we are + // guaranteed to only call code that is supported on the current + // CPU. + fun($($needle),+, $hay_start, $hay_end) + } + + // SAFETY: By virtue of the caller contract, RealFn is a function + // pointer, which is always safe to transmute with a *mut (). Also, + // since we use $memchrty::is_available, it is guaranteed to be safe + // to call $memchrty::$memchrfind. + unsafe { + let fun = FN.load(Ordering::Relaxed); + core::mem::transmute::(fun)( + $($needle),+, + $hay_start, + $hay_end, + ) + } + }}; +} + +// The routines below dispatch to AVX2, SSE2 or a fallback routine based on +// what's available in the current environment. The secret sauce here is that +// we only check for which one to use approximately once, and then "cache" that +// choice into a global function pointer. Subsequent invocations then just call +// the appropriate function directly. + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) fn memchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + find_raw, + unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1 + ) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + rfind_raw, + unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1 + ) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) fn memchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Two, + find_raw, + unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2 + ) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Two, + rfind_raw, + unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2 + ) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) fn memchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Three, + find_raw, + unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2, + n3 + ) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Three, + rfind_raw, + unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2, + n3 + ) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + count_raw, + unsafe fn(u8, *const u8, *const u8) -> usize, + usize, + start, + end, + n1 + ) +} diff --git a/vendor/memchr/src/arch/x86_64/mod.rs b/vendor/memchr/src/arch/x86_64/mod.rs new file mode 100644 index 0000000..5dad721 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/mod.rs @@ -0,0 +1,8 @@ +/*! +Vector algorithms for the `x86_64` target. +*/ + +pub mod avx2; +pub mod sse2; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/x86_64/sse2/memchr.rs b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs new file mode 100644 index 0000000..c6f75df --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs @@ -0,0 +1,1077 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::__m128i; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One<__m128i>); + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One(generic::One::new(needle)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `One`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::count_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.count_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Execute a count using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn count_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.0.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two<__m128i>); + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two(generic::Two::new(needle1, needle2)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Two`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three<__m128i>); + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three(generic::Three::new(needle1, needle2, needle3)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Three`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/x86_64/sse2/mod.rs b/vendor/memchr/src/arch/x86_64/sse2/mod.rs new file mode 100644 index 0000000..bcb8307 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 128-bit vectors via SSE2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs new file mode 100644 index 0000000..c8b5b99 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs @@ -0,0 +1,232 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::__m128i; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder<__m128i>); + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If SSE2 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that sse2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that SSE2 is available. + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let finder = packedpair::Finder::<__m128i>::new(needle, pair); + Finder(finder) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + true + } + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + self.0.find(haystack, needle) + } + + /// Execute a prefilter search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + self.0.find_prefilter(haystack) + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.0.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/cow.rs b/vendor/memchr/src/cow.rs index 0b7d0da..f291645 100644 --- a/vendor/memchr/src/cow.rs +++ b/vendor/memchr/src/cow.rs @@ -4,22 +4,23 @@ use core::ops; /// /// The purpose of this type is to permit usage of a "borrowed or owned /// byte string" in a way that keeps std/no-std compatibility. That is, in -/// no-std mode, this type devolves into a simple &[u8] with no owned variant -/// available. We can't just use a plain Cow because Cow is not in core. +/// no-std/alloc mode, this type devolves into a simple &[u8] with no owned +/// variant available. We can't just use a plain Cow because Cow is not in +/// core. #[derive(Clone, Debug)] pub struct CowBytes<'a>(Imp<'a>); -// N.B. We don't use std::borrow::Cow here since we can get away with a +// N.B. We don't use alloc::borrow::Cow here since we can get away with a // Box<[u8]> for our use case, which is 1/3 smaller than the Vec that // a Cow<[u8]> would use. -#[cfg(feature = "std")] +#[cfg(feature = "alloc")] #[derive(Clone, Debug)] enum Imp<'a> { Borrowed(&'a [u8]), - Owned(Box<[u8]>), + Owned(alloc::boxed::Box<[u8]>), } -#[cfg(not(feature = "std"))] +#[cfg(not(feature = "alloc"))] #[derive(Clone, Debug)] struct Imp<'a>(&'a [u8]); @@ -35,21 +36,21 @@ impl<'a> ops::Deref for CowBytes<'a> { impl<'a> CowBytes<'a> { /// Create a new borrowed CowBytes. #[inline(always)] - pub fn new>(bytes: &'a B) -> CowBytes<'a> { + pub(crate) fn new>(bytes: &'a B) -> CowBytes<'a> { CowBytes(Imp::new(bytes.as_ref())) } /// Create a new owned CowBytes. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline(always)] - pub fn new_owned(bytes: Box<[u8]>) -> CowBytes<'static> { + fn new_owned(bytes: alloc::boxed::Box<[u8]>) -> CowBytes<'static> { CowBytes(Imp::Owned(bytes)) } /// Return a borrowed byte string, regardless of whether this is an owned /// or borrowed byte string internally. #[inline(always)] - pub fn as_slice(&self) -> &[u8] { + pub(crate) fn as_slice(&self) -> &[u8] { self.0.as_slice() } @@ -57,39 +58,48 @@ impl<'a> CowBytes<'a> { /// /// If this is already an owned byte string internally, then this is a /// no-op. Otherwise, the internal byte string is copied. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline(always)] - pub fn into_owned(self) -> CowBytes<'static> { + pub(crate) fn into_owned(self) -> CowBytes<'static> { match self.0 { - Imp::Borrowed(b) => CowBytes::new_owned(Box::from(b)), + Imp::Borrowed(b) => { + CowBytes::new_owned(alloc::boxed::Box::from(b)) + } Imp::Owned(b) => CowBytes::new_owned(b), } } } impl<'a> Imp<'a> { - #[cfg(feature = "std")] - #[inline(always)] - pub fn new(bytes: &'a [u8]) -> Imp<'a> { - Imp::Borrowed(bytes) - } - - #[cfg(not(feature = "std"))] #[inline(always)] pub fn new(bytes: &'a [u8]) -> Imp<'a> { - Imp(bytes) + #[cfg(feature = "alloc")] + { + Imp::Borrowed(bytes) + } + #[cfg(not(feature = "alloc"))] + { + Imp(bytes) + } } - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline(always)] pub fn as_slice(&self) -> &[u8] { - match self { - Imp::Owned(ref x) => x, - Imp::Borrowed(x) => x, + #[cfg(feature = "alloc")] + { + match self { + Imp::Owned(ref x) => x, + Imp::Borrowed(x) => x, + } + } + #[cfg(not(feature = "alloc"))] + { + self.0 } } - #[cfg(not(feature = "std"))] + #[cfg(not(feature = "alloc"))] #[inline(always)] pub fn as_slice(&self) -> &[u8] { self.0 diff --git a/vendor/memchr/src/ext.rs b/vendor/memchr/src/ext.rs new file mode 100644 index 0000000..1bb21dd --- /dev/null +++ b/vendor/memchr/src/ext.rs @@ -0,0 +1,52 @@ +/// A trait for adding some helper routines to pointers. +pub(crate) trait Pointer { + /// Returns the distance, in units of `T`, between `self` and `origin`. + /// + /// # Safety + /// + /// Same as `ptr::offset_from` in addition to `self >= origin`. + unsafe fn distance(self, origin: Self) -> usize; + + /// Casts this pointer to `usize`. + /// + /// Callers should not convert the `usize` back to a pointer if at all + /// possible. (And if you believe it's necessary, open an issue to discuss + /// why. Otherwise, it has the potential to violate pointer provenance.) + /// The purpose of this function is just to be able to do arithmetic, i.e., + /// computing offsets or alignments. + fn as_usize(self) -> usize; +} + +impl Pointer for *const T { + unsafe fn distance(self, origin: *const T) -> usize { + // TODO: Replace with `ptr::sub_ptr` once stabilized. + usize::try_from(self.offset_from(origin)).unwrap_unchecked() + } + + fn as_usize(self) -> usize { + self as usize + } +} + +impl Pointer for *mut T { + unsafe fn distance(self, origin: *mut T) -> usize { + (self as *const T).distance(origin as *const T) + } + + fn as_usize(self) -> usize { + (self as *const T).as_usize() + } +} + +/// A trait for adding some helper routines to raw bytes. +pub(crate) trait Byte { + /// Converts this byte to a `char` if it's ASCII. Otherwise panics. + fn to_char(self) -> char; +} + +impl Byte for u8 { + fn to_char(self) -> char { + assert!(self.is_ascii()); + char::from(self) + } +} diff --git a/vendor/memchr/src/lib.rs b/vendor/memchr/src/lib.rs index e0b4ce3..de366fb 100644 --- a/vendor/memchr/src/lib.rs +++ b/vendor/memchr/src/lib.rs @@ -113,9 +113,9 @@ solution presented above, however, its throughput can easily be over an order of magnitude faster. This is a good general purpose trade off to make. You rarely lose, but often gain big. -**NOTE:** The name `memchr` comes from the corresponding routine in libc. A key -advantage of using this library is that its performance is not tied to its -quality of implementation in the libc you happen to be using, which can vary +**NOTE:** The name `memchr` comes from the corresponding routine in `libc`. A +key advantage of using this library is that its performance is not tied to its +quality of implementation in the `libc` you happen to be using, which can vary greatly from platform to platform. But what about substring search? This one is a bit more complicated. The @@ -131,32 +131,58 @@ implementation in the standard library, even if only for searching on UTF-8? The reason is that the implementation details for using SIMD in the standard library haven't quite been worked out yet. -**NOTE:** Currently, only `x86_64` targets have highly accelerated -implementations of substring search. For `memchr`, all targets have -somewhat-accelerated implementations, while only `x86_64` targets have highly -accelerated implementations. This limitation is expected to be lifted once the -standard library exposes a platform independent SIMD API. +**NOTE:** Currently, only `x86_64`, `wasm32` and `aarch64` targets have vector +accelerated implementations of `memchr` (and friends) and `memmem`. # Crate features -* **std** - When enabled (the default), this will permit this crate to use - features specific to the standard library. Currently, the only thing used - from the standard library is runtime SIMD CPU feature detection. This means - that this feature must be enabled to get AVX accelerated routines. When - `std` is not enabled, this crate will still attempt to use SSE2 accelerated - routines on `x86_64`. -* **libc** - When enabled (**not** the default), this library will use your - platform's libc implementation of `memchr` (and `memrchr` on Linux). This - can be useful on non-`x86_64` targets where the fallback implementation in - this crate is not as good as the one found in your libc. All other routines - (e.g., `memchr[23]` and substring search) unconditionally use the - implementation in this crate. +* **std** - When enabled (the default), this will permit features specific to +the standard library. Currently, the only thing used from the standard library +is runtime SIMD CPU feature detection. This means that this feature must be +enabled to get AVX2 accelerated routines on `x86_64` targets without enabling +the `avx2` feature at compile time, for example. When `std` is not enabled, +this crate will still attempt to use SSE2 accelerated routines on `x86_64`. It +will also use AVX2 accelerated routines when the `avx2` feature is enabled at +compile time. In general, enable this feature if you can. +* **alloc** - When enabled (the default), APIs in this crate requiring some +kind of allocation will become available. For example, the +[`memmem::Finder::into_owned`](crate::memmem::Finder::into_owned) API and the +[`arch::all::shiftor`](crate::arch::all::shiftor) substring search +implementation. Otherwise, this crate is designed from the ground up to be +usable in core-only contexts, so the `alloc` feature doesn't add much +currently. Notably, disabling `std` but enabling `alloc` will **not** result +in the use of AVX2 on `x86_64` targets unless the `avx2` feature is enabled +at compile time. (With `std` enabled, AVX2 can be used even without the `avx2` +feature enabled at compile time by way of runtime CPU feature detection.) +* **logging** - When enabled (disabled by default), the `log` crate is used +to emit log messages about what kinds of `memchr` and `memmem` algorithms +are used. Namely, both `memchr` and `memmem` have a number of different +implementation choices depending on the target and CPU, and the log messages +can help show what specific implementations are being used. Generally, this is +useful for debugging performance issues. +* **libc** - **DEPRECATED**. Previously, this enabled the use of the target's +`memchr` function from whatever `libc` was linked into the program. This +feature is now a no-op because this crate's implementation of `memchr` should +now be sufficiently fast on a number of platforms that `libc` should no longer +be needed. (This feature is somewhat of a holdover from this crate's origins. +Originally, this crate was literally just a safe wrapper function around the +`memchr` function from `libc`.) */ #![deny(missing_docs)] -#![cfg_attr(not(feature = "std"), no_std)] -// It's not worth trying to gate all code on just miri, so turn off relevant -// dead code warnings. +#![no_std] +// It's just not worth trying to squash all dead code warnings. Pretty +// unfortunate IMO. Not really sure how to fix this other than to either +// live with it or sprinkle a whole mess of `cfg` annotations everywhere. +#![cfg_attr( + not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "wasm32", + target_arch = "aarch64", + )), + allow(dead_code) +)] +// Same deal for miri. #![cfg_attr(miri, allow(dead_code, unused_macros))] // Supporting 8-bit (or others) would be fine. If you need it, please submit a @@ -168,14 +194,28 @@ standard library exposes a platform independent SIMD API. )))] compile_error!("memchr currently not supported on non-{16,32,64}"); +#[cfg(any(test, feature = "std"))] +extern crate std; + +#[cfg(any(test, feature = "alloc"))] +extern crate alloc; + pub use crate::memchr::{ memchr, memchr2, memchr2_iter, memchr3, memchr3_iter, memchr_iter, memrchr, memrchr2, memrchr2_iter, memrchr3, memrchr3_iter, memrchr_iter, Memchr, Memchr2, Memchr3, }; +#[macro_use] +mod macros; + +#[cfg(test)] +#[macro_use] +mod tests; + +pub mod arch; mod cow; +mod ext; mod memchr; pub mod memmem; -#[cfg(test)] -mod tests; +mod vector; diff --git a/vendor/memchr/src/macros.rs b/vendor/memchr/src/macros.rs new file mode 100644 index 0000000..31b4ca3 --- /dev/null +++ b/vendor/memchr/src/macros.rs @@ -0,0 +1,20 @@ +// Some feature combinations result in some of these macros never being used. +// Which is fine. Just squash the warnings. +#![allow(unused_macros)] + +macro_rules! log { + ($($tt:tt)*) => { + #[cfg(feature = "logging")] + { + $($tt)* + } + } +} + +macro_rules! debug { + ($($tt:tt)*) => { log!(log::debug!($($tt)*)) } +} + +macro_rules! trace { + ($($tt:tt)*) => { log!(log::trace!($($tt)*)) } +} diff --git a/vendor/memchr/src/memchr.rs b/vendor/memchr/src/memchr.rs new file mode 100644 index 0000000..68adb9a --- /dev/null +++ b/vendor/memchr/src/memchr.rs @@ -0,0 +1,903 @@ +use core::iter::Rev; + +use crate::arch::generic::memchr as generic; + +/// Search for the first occurrence of a byte in a slice. +/// +/// This returns the index corresponding to the first occurrence of `needle` in +/// `haystack`, or `None` if one is not found. If an index is returned, it is +/// guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle)`, this routine will attempt to +/// use highly optimized vector operations that can be an order of magnitude +/// faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of a byte in a byte string. +/// +/// ``` +/// use memchr::memchr; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr(b'k', haystack), Some(8)); +/// ``` +#[inline] +pub fn memchr(needle: u8, haystack: &[u8]) -> Option { + // SAFETY: memchr_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memchr_raw(needle, start, end) + }) + } +} + +/// Search for the last occurrence of a byte in a slice. +/// +/// This returns the index corresponding to the last occurrence of `needle` in +/// `haystack`, or `None` if one is not found. If an index is returned, it is +/// guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle)`, this routine will attempt to +/// use highly optimized vector operations that can be an order of magnitude +/// faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of a byte in a byte string. +/// +/// ``` +/// use memchr::memrchr; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr(b'o', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { + // SAFETY: memrchr_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memrchr_raw(needle, start, end) + }) + } +} + +/// Search for the first occurrence of two possible bytes in a haystack. +/// +/// This returns the index corresponding to the first occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, this routine +/// will attempt to use highly optimized vector operations that can be an order +/// of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of one of two possible bytes in a +/// haystack. +/// +/// ``` +/// use memchr::memchr2; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4)); +/// ``` +#[inline] +pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { + // SAFETY: memchr2_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memchr2_raw(needle1, needle2, start, end) + }) + } +} + +/// Search for the last occurrence of two possible bytes in a haystack. +/// +/// This returns the index corresponding to the last occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, this +/// routine will attempt to use highly optimized vector operations that can be +/// an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of one of two possible bytes in a +/// haystack. +/// +/// ``` +/// use memchr::memrchr2; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr2(b'k', b'o', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { + // SAFETY: memrchr2_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memrchr2_raw(needle1, needle2, start, end) + }) + } +} + +/// Search for the first occurrence of three possible bytes in a haystack. +/// +/// This returns the index corresponding to the first occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle1 || b == needle2 || b == needle3)`, +/// this routine will attempt to use highly optimized vector operations that +/// can be an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of one of three possible bytes in +/// a haystack. +/// +/// ``` +/// use memchr::memchr3; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr3(b'k', b'q', b'u', haystack), Some(4)); +/// ``` +#[inline] +pub fn memchr3( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &[u8], +) -> Option { + // SAFETY: memchr3_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memchr3_raw(needle1, needle2, needle3, start, end) + }) + } +} + +/// Search for the last occurrence of three possible bytes in a haystack. +/// +/// This returns the index corresponding to the last occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 || b == needle3)`, +/// this routine will attempt to use highly optimized vector operations that +/// can be an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of one of three possible bytes in +/// a haystack. +/// +/// ``` +/// use memchr::memrchr3; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr3(b'k', b'o', b'n', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr3( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &[u8], +) -> Option { + // SAFETY: memrchr3_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memrchr3_raw(needle1, needle2, needle3, start, end) + }) + } +} + +/// Returns an iterator over all occurrences of the needle in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr_iter<'h>(needle: u8, haystack: &'h [u8]) -> Memchr<'h> { + Memchr::new(needle, haystack) +} + +/// Returns an iterator over all occurrences of the needle in a haystack, in +/// reverse. +#[inline] +pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev> { + Memchr::new(needle, haystack).rev() +} + +/// Returns an iterator over all occurrences of the needles in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr2_iter<'h>( + needle1: u8, + needle2: u8, + haystack: &'h [u8], +) -> Memchr2<'h> { + Memchr2::new(needle1, needle2, haystack) +} + +/// Returns an iterator over all occurrences of the needles in a haystack, in +/// reverse. +#[inline] +pub fn memrchr2_iter( + needle1: u8, + needle2: u8, + haystack: &[u8], +) -> Rev> { + Memchr2::new(needle1, needle2, haystack).rev() +} + +/// Returns an iterator over all occurrences of the needles in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr3_iter<'h>( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &'h [u8], +) -> Memchr3<'h> { + Memchr3::new(needle1, needle2, needle3, haystack) +} + +/// Returns an iterator over all occurrences of the needles in a haystack, in +/// reverse. +#[inline] +pub fn memrchr3_iter( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &[u8], +) -> Rev> { + Memchr3::new(needle1, needle2, needle3, haystack).rev() +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr_iter`] or `[memrchr_iter`] +/// functions. It can also be created with the [`Memchr::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr<'h> { + needle1: u8, + it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr<'h> { + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn new(needle1: u8, haystack: &'h [u8]) -> Memchr<'h> { + Memchr { + needle1, + it: crate::arch::generic::memchr::Iter::new(haystack), + } + } +} + +impl<'h> Iterator for Memchr<'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next`. + unsafe { + // NOTE: I attempted to define an enum of previously created + // searchers and then switch on those here instead of just + // calling `memchr_raw` (or `One::new(..).find_raw(..)`). But + // that turned out to have a fair bit of extra overhead when + // searching very small haystacks. + self.it.next(|s, e| memchr_raw(self.needle1, s, e)) + } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { count_raw(self.needle1, s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'h> DoubleEndedIterator for Memchr<'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next_back`. + unsafe { self.it.next_back(|s, e| memrchr_raw(self.needle1, s, e)) } + } +} + +impl<'h> core::iter::FusedIterator for Memchr<'h> {} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr2_iter`] or `[memrchr2_iter`] +/// functions. It can also be created with the [`Memchr2::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr2<'h> { + needle1: u8, + needle2: u8, + it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr2<'h> { + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn new(needle1: u8, needle2: u8, haystack: &'h [u8]) -> Memchr2<'h> { + Memchr2 { + needle1, + needle2, + it: crate::arch::generic::memchr::Iter::new(haystack), + } + } +} + +impl<'h> Iterator for Memchr2<'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next`. + unsafe { + self.it.next(|s, e| memchr2_raw(self.needle1, self.needle2, s, e)) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'h> DoubleEndedIterator for Memchr2<'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next_back`. + unsafe { + self.it.next_back(|s, e| { + memrchr2_raw(self.needle1, self.needle2, s, e) + }) + } + } +} + +impl<'h> core::iter::FusedIterator for Memchr2<'h> {} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr2_iter`] or `[memrchr2_iter`] +/// functions. It can also be created with the [`Memchr3::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr3<'h> { + needle1: u8, + needle2: u8, + needle3: u8, + it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr3<'h> { + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn new( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &'h [u8], + ) -> Memchr3<'h> { + Memchr3 { + needle1, + needle2, + needle3, + it: crate::arch::generic::memchr::Iter::new(haystack), + } + } +} + +impl<'h> Iterator for Memchr3<'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next`. + unsafe { + self.it.next(|s, e| { + memchr3_raw(self.needle1, self.needle2, self.needle3, s, e) + }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'h> DoubleEndedIterator for Memchr3<'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next_back`. + unsafe { + self.it.next_back(|s, e| { + memrchr3_raw(self.needle1, self.needle2, self.needle3, s, e) + }) + } + } +} + +impl<'h> core::iter::FusedIterator for Memchr3<'h> {} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline] +unsafe fn memchr_raw( + needle: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + // x86_64 does CPU feature detection at runtime in order to use AVX2 + // instructions even when the `avx2` feature isn't enabled at compile + // time. This function also handles using a fallback if neither AVX2 + // nor SSE2 (unusual) are available. + crate::arch::x86_64::memchr::memchr_raw(needle, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memchr_raw(needle, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memchr_raw(needle, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::One::new(needle).find_raw(start, end) + } +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline] +unsafe fn memrchr_raw( + needle: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memrchr_raw(needle, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memrchr_raw(needle, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memrchr_raw(needle, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::One::new(needle).rfind_raw(start, end) + } +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline] +unsafe fn memchr2_raw( + needle1: u8, + needle2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memchr2_raw(needle1, needle2, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Two::new(needle1, needle2) + .find_raw(start, end) + } +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline] +unsafe fn memrchr2_raw( + needle1: u8, + needle2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memrchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memrchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memrchr2_raw( + needle1, needle2, start, end, + ) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Two::new(needle1, needle2) + .rfind_raw(start, end) + } +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline] +unsafe fn memchr3_raw( + needle1: u8, + needle2: u8, + needle3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Three::new(needle1, needle2, needle3) + .find_raw(start, end) + } +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline] +unsafe fn memrchr3_raw( + needle1: u8, + needle2: u8, + needle3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memrchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memrchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memrchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Three::new(needle1, needle2, needle3) + .rfind_raw(start, end) + } +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline] +unsafe fn count_raw(needle: u8, start: *const u8, end: *const u8) -> usize { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::count_raw(needle, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::count_raw(needle, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::count_raw(needle, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::One::new(needle).count_raw(start, end) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn forward1_iter() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(memchr_iter(needles[0], haystack).collect()) + }, + ) + } + + #[test] + fn forward1_oneshot() { + crate::tests::memchr::Runner::new(1).forward_oneshot( + |haystack, needles| Some(memchr(needles[0], haystack)), + ) + } + + #[test] + fn reverse1_iter() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(memrchr_iter(needles[0], haystack).collect()) + }, + ) + } + + #[test] + fn reverse1_oneshot() { + crate::tests::memchr::Runner::new(1).reverse_oneshot( + |haystack, needles| Some(memrchr(needles[0], haystack)), + ) + } + + #[test] + fn count1_iter() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(memchr_iter(needles[0], haystack).count()) + }) + } + + #[test] + fn forward2_iter() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memchr2_iter(n1, n2, haystack).collect()) + }, + ) + } + + #[test] + fn forward2_oneshot() { + crate::tests::memchr::Runner::new(2).forward_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memchr2(n1, n2, haystack)) + }, + ) + } + + #[test] + fn reverse2_iter() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memrchr2_iter(n1, n2, haystack).collect()) + }, + ) + } + + #[test] + fn reverse2_oneshot() { + crate::tests::memchr::Runner::new(2).reverse_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memrchr2(n1, n2, haystack)) + }, + ) + } + + #[test] + fn forward3_iter() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memchr3_iter(n1, n2, n3, haystack).collect()) + }, + ) + } + + #[test] + fn forward3_oneshot() { + crate::tests::memchr::Runner::new(3).forward_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memchr3(n1, n2, n3, haystack)) + }, + ) + } + + #[test] + fn reverse3_iter() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memrchr3_iter(n1, n2, n3, haystack).collect()) + }, + ) + } + + #[test] + fn reverse3_oneshot() { + crate::tests::memchr::Runner::new(3).reverse_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memrchr3(n1, n2, n3, haystack)) + }, + ) + } + + // Prior to memchr 2.6, the memchr iterators both implemented Send and + // Sync. But in memchr 2.6, the iterator changed to use raw pointers + // internally and I didn't add explicit Send/Sync impls. This ended up + // regressing the API. This test ensures we don't do that again. + // + // See: https://github.com/BurntSushi/memchr/issues/133 + #[test] + fn sync_regression() { + use core::panic::{RefUnwindSafe, UnwindSafe}; + + fn assert_send_sync() {} + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::() + } +} diff --git a/vendor/memchr/src/memchr/c.rs b/vendor/memchr/src/memchr/c.rs deleted file mode 100644 index 608aabc..0000000 --- a/vendor/memchr/src/memchr/c.rs +++ /dev/null @@ -1,44 +0,0 @@ -// This module defines safe wrappers around memchr (POSIX) and memrchr (GNU -// extension). - -#![allow(dead_code)] - -use libc::{c_int, c_void, size_t}; - -pub fn memchr(needle: u8, haystack: &[u8]) -> Option { - // SAFETY: This is safe to call since all pointers are valid. - let p = unsafe { - libc::memchr( - haystack.as_ptr() as *const c_void, - needle as c_int, - haystack.len() as size_t, - ) - }; - if p.is_null() { - None - } else { - Some(p as usize - (haystack.as_ptr() as usize)) - } -} - -// memrchr is a GNU extension. We know it's available on Linux at least. -#[cfg(target_os = "linux")] -pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { - // GNU's memrchr() will - unlike memchr() - error if haystack is empty. - if haystack.is_empty() { - return None; - } - // SAFETY: This is safe to call since all pointers are valid. - let p = unsafe { - libc::memrchr( - haystack.as_ptr() as *const c_void, - needle as c_int, - haystack.len() as size_t, - ) - }; - if p.is_null() { - None - } else { - Some(p as usize - (haystack.as_ptr() as usize)) - } -} diff --git a/vendor/memchr/src/memchr/fallback.rs b/vendor/memchr/src/memchr/fallback.rs deleted file mode 100644 index b01f224..0000000 --- a/vendor/memchr/src/memchr/fallback.rs +++ /dev/null @@ -1,329 +0,0 @@ -// This module defines pure Rust platform independent implementations of all -// the memchr routines. We do our best to make them fast. Some of them may even -// get auto-vectorized. - -use core::{cmp, usize}; - -#[cfg(target_pointer_width = "16")] -const USIZE_BYTES: usize = 2; - -#[cfg(target_pointer_width = "32")] -const USIZE_BYTES: usize = 4; - -#[cfg(target_pointer_width = "64")] -const USIZE_BYTES: usize = 8; - -// The number of bytes to loop at in one iteration of memchr/memrchr. -const LOOP_SIZE: usize = 2 * USIZE_BYTES; - -/// Return `true` if `x` contains any zero byte. -/// -/// From *Matters Computational*, J. Arndt -/// -/// "The idea is to subtract one from each of the bytes and then look for -/// bytes where the borrow propagated all the way to the most significant -/// bit." -#[inline(always)] -fn contains_zero_byte(x: usize) -> bool { - const LO_U64: u64 = 0x0101010101010101; - const HI_U64: u64 = 0x8080808080808080; - - const LO_USIZE: usize = LO_U64 as usize; - const HI_USIZE: usize = HI_U64 as usize; - - x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 -} - -/// Repeat the given byte into a word size number. That is, every 8 bits -/// is equivalent to the given byte. For example, if `b` is `\x4E` or -/// `01001110` in binary, then the returned value on a 32-bit system would be: -/// `01001110_01001110_01001110_01001110`. -#[inline(always)] -fn repeat_byte(b: u8) -> usize { - (b as usize) * (usize::MAX / 255) -} - -pub fn memchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let confirm = |byte| byte == n1; - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let mut ptr = start_ptr; - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - if haystack.len() < USIZE_BYTES { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr as *const usize).read_unaligned(); - if contains_zero_byte(chunk ^ vn1) { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); - while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let a = *(ptr as *const usize); - let b = *(ptr.add(USIZE_BYTES) as *const usize); - let eqa = contains_zero_byte(a ^ vn1); - let eqb = contains_zero_byte(b ^ vn1); - if eqa || eqb { - break; - } - ptr = ptr.add(LOOP_SIZE); - } - forward_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memchr`, but searches for two bytes instead of one. -pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let confirm = |byte| byte == n1 || byte == n2; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let mut ptr = start_ptr; - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - if haystack.len() < USIZE_BYTES { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); - while ptr <= end_ptr.sub(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - break; - } - ptr = ptr.add(USIZE_BYTES); - } - forward_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memchr`, but searches for three bytes instead of one. -pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let vn3 = repeat_byte(n3); - let confirm = |byte| byte == n1 || byte == n2 || byte == n3; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let mut ptr = start_ptr; - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - if haystack.len() < USIZE_BYTES { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); - while ptr <= end_ptr.sub(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - break; - } - ptr = ptr.add(USIZE_BYTES); - } - forward_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Return the last index matching the byte `x` in `text`. -pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let confirm = |byte| byte == n1; - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - if haystack.len() < USIZE_BYTES { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); - if contains_zero_byte(chunk ^ vn1) { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = (end_ptr as usize & !align) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize); - let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize); - let eqa = contains_zero_byte(a ^ vn1); - let eqb = contains_zero_byte(b ^ vn1); - if eqa || eqb { - break; - } - ptr = ptr.sub(loop_size); - } - reverse_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memrchr`, but searches for two bytes instead of one. -pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let confirm = |byte| byte == n1 || byte == n2; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - if haystack.len() < USIZE_BYTES { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = (end_ptr as usize & !align) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while ptr >= start_ptr.add(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - break; - } - ptr = ptr.sub(USIZE_BYTES); - } - reverse_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memrchr`, but searches for three bytes instead of one. -pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let vn3 = repeat_byte(n3); - let confirm = |byte| byte == n1 || byte == n2 || byte == n3; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - if haystack.len() < USIZE_BYTES { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = (end_ptr as usize & !align) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while ptr >= start_ptr.add(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - break; - } - ptr = ptr.sub(USIZE_BYTES); - } - reverse_search(start_ptr, end_ptr, ptr, confirm) - } -} - -#[inline(always)] -unsafe fn forward_search bool>( - start_ptr: *const u8, - end_ptr: *const u8, - mut ptr: *const u8, - confirm: F, -) -> Option { - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr); - - while ptr < end_ptr { - if confirm(*ptr) { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - None -} - -#[inline(always)] -unsafe fn reverse_search bool>( - start_ptr: *const u8, - end_ptr: *const u8, - mut ptr: *const u8, - confirm: F, -) -> Option { - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr); - - while ptr > start_ptr { - ptr = ptr.offset(-1); - if confirm(*ptr) { - return Some(sub(ptr, start_ptr)); - } - } - None -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memchr/iter.rs b/vendor/memchr/src/memchr/iter.rs deleted file mode 100644 index 16e203f..0000000 --- a/vendor/memchr/src/memchr/iter.rs +++ /dev/null @@ -1,173 +0,0 @@ -use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; - -macro_rules! iter_next { - // Common code for the memchr iterators: - // update haystack and position and produce the index - // - // self: &mut Self where Self is the iterator - // search_result: Option which is the result of the corresponding - // memchr function. - // - // Returns Option (the next iterator element) - ($self_:expr, $search_result:expr) => { - $search_result.map(move |index| { - // split and take the remaining back half - $self_.haystack = $self_.haystack.split_at(index + 1).1; - let found_position = $self_.position + index; - $self_.position = found_position + 1; - found_position - }) - }; -} - -macro_rules! iter_next_back { - ($self_:expr, $search_result:expr) => { - $search_result.map(move |index| { - // split and take the remaining front half - $self_.haystack = $self_.haystack.split_at(index).0; - $self_.position + index - }) - }; -} - -/// An iterator for `memchr`. -pub struct Memchr<'a> { - needle: u8, - // The haystack to iterate over - haystack: &'a [u8], - // The index - position: usize, -} - -impl<'a> Memchr<'a> { - /// Creates a new iterator that yields all positions of needle in haystack. - #[inline] - pub fn new(needle: u8, haystack: &[u8]) -> Memchr<'_> { - Memchr { needle: needle, haystack: haystack, position: 0 } - } -} - -impl<'a> Iterator for Memchr<'a> { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - iter_next!(self, memchr(self.needle, self.haystack)) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.haystack.len())) - } -} - -impl<'a> DoubleEndedIterator for Memchr<'a> { - #[inline] - fn next_back(&mut self) -> Option { - iter_next_back!(self, memrchr(self.needle, self.haystack)) - } -} - -/// An iterator for `memchr2`. -pub struct Memchr2<'a> { - needle1: u8, - needle2: u8, - // The haystack to iterate over - haystack: &'a [u8], - // The index - position: usize, -} - -impl<'a> Memchr2<'a> { - /// Creates a new iterator that yields all positions of needle in haystack. - #[inline] - pub fn new(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> { - Memchr2 { - needle1: needle1, - needle2: needle2, - haystack: haystack, - position: 0, - } - } -} - -impl<'a> Iterator for Memchr2<'a> { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - iter_next!(self, memchr2(self.needle1, self.needle2, self.haystack)) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.haystack.len())) - } -} - -impl<'a> DoubleEndedIterator for Memchr2<'a> { - #[inline] - fn next_back(&mut self) -> Option { - iter_next_back!( - self, - memrchr2(self.needle1, self.needle2, self.haystack) - ) - } -} - -/// An iterator for `memchr3`. -pub struct Memchr3<'a> { - needle1: u8, - needle2: u8, - needle3: u8, - // The haystack to iterate over - haystack: &'a [u8], - // The index - position: usize, -} - -impl<'a> Memchr3<'a> { - /// Create a new `Memchr3` that's initialized to zero with a haystack - #[inline] - pub fn new( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], - ) -> Memchr3<'_> { - Memchr3 { - needle1: needle1, - needle2: needle2, - needle3: needle3, - haystack: haystack, - position: 0, - } - } -} - -impl<'a> Iterator for Memchr3<'a> { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - iter_next!( - self, - memchr3(self.needle1, self.needle2, self.needle3, self.haystack) - ) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.haystack.len())) - } -} - -impl<'a> DoubleEndedIterator for Memchr3<'a> { - #[inline] - fn next_back(&mut self) -> Option { - iter_next_back!( - self, - memrchr3(self.needle1, self.needle2, self.needle3, self.haystack) - ) - } -} diff --git a/vendor/memchr/src/memchr/mod.rs b/vendor/memchr/src/memchr/mod.rs deleted file mode 100644 index 09ce6ef..0000000 --- a/vendor/memchr/src/memchr/mod.rs +++ /dev/null @@ -1,410 +0,0 @@ -use core::iter::Rev; - -pub use self::iter::{Memchr, Memchr2, Memchr3}; - -// N.B. If you're looking for the cfg knobs for libc, see build.rs. -#[cfg(memchr_libc)] -mod c; -#[allow(dead_code)] -pub mod fallback; -mod iter; -pub mod naive; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; - -/// An iterator over all occurrences of the needle in a haystack. -#[inline] -pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr<'_> { - Memchr::new(needle, haystack) -} - -/// An iterator over all occurrences of the needles in a haystack. -#[inline] -pub fn memchr2_iter(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> { - Memchr2::new(needle1, needle2, haystack) -} - -/// An iterator over all occurrences of the needles in a haystack. -#[inline] -pub fn memchr3_iter( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Memchr3<'_> { - Memchr3::new(needle1, needle2, needle3, haystack) -} - -/// An iterator over all occurrences of the needle in a haystack, in reverse. -#[inline] -pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev> { - Memchr::new(needle, haystack).rev() -} - -/// An iterator over all occurrences of the needles in a haystack, in reverse. -#[inline] -pub fn memrchr2_iter( - needle1: u8, - needle2: u8, - haystack: &[u8], -) -> Rev> { - Memchr2::new(needle1, needle2, haystack).rev() -} - -/// An iterator over all occurrences of the needles in a haystack, in reverse. -#[inline] -pub fn memrchr3_iter( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Rev> { - Memchr3::new(needle1, needle2, needle3, haystack).rev() -} - -/// Search for the first occurrence of a byte in a slice. -/// -/// This returns the index corresponding to the first occurrence of `needle` in -/// `haystack`, or `None` if one is not found. If an index is returned, it is -/// guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly -/// optimized routine that can be up to an order of magnitude faster in some -/// cases. -/// -/// # Example -/// -/// This shows how to find the first position of a byte in a byte string. -/// -/// ``` -/// use memchr::memchr; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memchr(b'k', haystack), Some(8)); -/// ``` -#[inline] -pub fn memchr(needle: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - naive::memchr(n1, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - x86::memchr(n1, haystack) - } - - #[cfg(all( - memchr_libc, - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - c::memchr(n1, haystack) - } - - #[cfg(all( - not(memchr_libc), - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - fallback::memchr(n1, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle, haystack) - } -} - -/// Like `memchr`, but searches for either of two bytes instead of just one. -/// -/// This returns the index corresponding to the first occurrence of `needle1` -/// or the first occurrence of `needle2` in `haystack` (whichever occurs -/// earlier), or `None` if neither one is found. If an index is returned, it is -/// guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, `memchr2` -/// will use a highly optimized routine that can be up to an order of magnitude -/// faster in some cases. -/// -/// # Example -/// -/// This shows how to find the first position of either of two bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memchr2; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4)); -/// ``` -#[inline] -pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - naive::memchr2(n1, n2, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - x86::memchr2(n1, n2, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - fallback::memchr2(n1, n2, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, haystack) - } -} - -/// Like `memchr`, but searches for any of three bytes instead of just one. -/// -/// This returns the index corresponding to the first occurrence of `needle1`, -/// the first occurrence of `needle2`, or the first occurrence of `needle3` in -/// `haystack` (whichever occurs earliest), or `None` if none are found. If an -/// index is returned, it is guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().position(|&b| b == needle1 || b == needle2 || -/// b == needle3)`, `memchr3` will use a highly optimized routine that can be -/// up to an order of magnitude faster in some cases. -/// -/// # Example -/// -/// This shows how to find the first position of any of three bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memchr3; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memchr3(b'k', b'q', b'e', haystack), Some(2)); -/// ``` -#[inline] -pub fn memchr3( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - naive::memchr3(n1, n2, n3, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - x86::memchr3(n1, n2, n3, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - fallback::memchr3(n1, n2, n3, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, needle3, haystack) - } -} - -/// Search for the last occurrence of a byte in a slice. -/// -/// This returns the index corresponding to the last occurrence of `needle` in -/// `haystack`, or `None` if one is not found. If an index is returned, it is -/// guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly -/// optimized routine that can be up to an order of magnitude faster in some -/// cases. -/// -/// # Example -/// -/// This shows how to find the last position of a byte in a byte string. -/// -/// ``` -/// use memchr::memrchr; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memrchr(b'o', haystack), Some(17)); -/// ``` -#[inline] -pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - naive::memrchr(n1, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - x86::memrchr(n1, haystack) - } - - #[cfg(all( - memchr_libc, - target_os = "linux", - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri) - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - c::memrchr(n1, haystack) - } - - #[cfg(all( - not(all(memchr_libc, target_os = "linux")), - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - fallback::memrchr(n1, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle, haystack) - } -} - -/// Like `memrchr`, but searches for either of two bytes instead of just one. -/// -/// This returns the index corresponding to the last occurrence of `needle1` or -/// the last occurrence of `needle2` in `haystack` (whichever occurs later), or -/// `None` if neither one is found. If an index is returned, it is guaranteed -/// to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, `memrchr2` -/// will use a highly optimized routine that can be up to an order of magnitude -/// faster in some cases. -/// -/// # Example -/// -/// This shows how to find the last position of either of two bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memrchr2; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memrchr2(b'k', b'q', haystack), Some(8)); -/// ``` -#[inline] -pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - naive::memrchr2(n1, n2, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - x86::memrchr2(n1, n2, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - fallback::memrchr2(n1, n2, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, haystack) - } -} - -/// Like `memrchr`, but searches for any of three bytes instead of just one. -/// -/// This returns the index corresponding to the last occurrence of `needle1`, -/// the last occurrence of `needle2`, or the last occurrence of `needle3` in -/// `haystack` (whichever occurs later), or `None` if none are found. If an -/// index is returned, it is guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 || -/// b == needle3)`, `memrchr3` will use a highly optimized routine that can be -/// up to an order of magnitude faster in some cases. -/// -/// # Example -/// -/// This shows how to find the last position of any of three bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memrchr3; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memrchr3(b'k', b'q', b'e', haystack), Some(8)); -/// ``` -#[inline] -pub fn memrchr3( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - naive::memrchr3(n1, n2, n3, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - x86::memrchr3(n1, n2, n3, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - fallback::memrchr3(n1, n2, n3, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, needle3, haystack) - } -} diff --git a/vendor/memchr/src/memchr/naive.rs b/vendor/memchr/src/memchr/naive.rs deleted file mode 100644 index 3f3053d..0000000 --- a/vendor/memchr/src/memchr/naive.rs +++ /dev/null @@ -1,25 +0,0 @@ -#![allow(dead_code)] - -pub fn memchr(n1: u8, haystack: &[u8]) -> Option { - haystack.iter().position(|&b| b == n1) -} - -pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - haystack.iter().position(|&b| b == n1 || b == n2) -} - -pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) -} - -pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { - haystack.iter().rposition(|&b| b == n1) -} - -pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - haystack.iter().rposition(|&b| b == n1 || b == n2) -} - -pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) -} diff --git a/vendor/memchr/src/memchr/x86/avx.rs b/vendor/memchr/src/memchr/x86/avx.rs deleted file mode 100644 index 5351230..0000000 --- a/vendor/memchr/src/memchr/x86/avx.rs +++ /dev/null @@ -1,755 +0,0 @@ -use core::{arch::x86_64::*, cmp, mem::size_of}; - -use super::sse2; - -const VECTOR_SIZE: usize = size_of::<__m256i>(); -const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; - -// The number of bytes to loop at in one iteration of memchr/memrchr. -const LOOP_SIZE: usize = 4 * VECTOR_SIZE; - -// The number of bytes to loop at in one iteration of memchr2/memrchr2 and -// memchr3/memrchr3. There was no observable difference between 128 and 64 -// bytes in benchmarks. memchr3 in particular only gets a very slight speed up -// from the loop unrolling. -const LOOP_SIZE2: usize = 2 * VECTOR_SIZE; - -#[target_feature(enable = "avx2")] -pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option { - // For a high level explanation for how this algorithm works, see the - // sse2 implementation. The avx implementation here is the same, but with - // 256-bit vectors instead of 128-bit vectors. - - // This routine is called whenever a match is detected. It is specifically - // marked as unlineable because it improves the codegen of the unrolled - // loop below. Inlining this seems to cause codegen with some extra adds - // and a load that aren't necessary. This seems to result in about a 10% - // improvement for the memchr1/crate/huge/never benchmark. - // - // Interestingly, I couldn't observe a similar improvement for memrchr. - #[cold] - #[inline(never)] - #[target_feature(enable = "avx2")] - unsafe fn matched( - start_ptr: *const u8, - ptr: *const u8, - eqa: __m256i, - eqb: __m256i, - eqc: __m256i, - eqd: __m256i, - ) -> usize { - let mut at = sub(ptr, start_ptr); - let mask = _mm256_movemask_epi8(eqa); - if mask != 0 { - return at + forward_pos(mask); - } - - at += VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqb); - if mask != 0 { - return at + forward_pos(mask); - } - - at += VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqc); - if mask != 0 { - return at + forward_pos(mask); - } - - at += VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqd); - debug_assert!(mask != 0); - at + forward_pos(mask) - } - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - // For small haystacks, defer to the SSE2 implementation. Codegen - // suggests this completely avoids touching the AVX vectors. - return sse2::memchr(n1, haystack); - } - - let vn1 = _mm256_set1_epi8(n1 as i8); - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i); - let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i); - let eqa = _mm256_cmpeq_epi8(vn1, a); - let eqb = _mm256_cmpeq_epi8(vn1, b); - let eqc = _mm256_cmpeq_epi8(vn1, c); - let eqd = _mm256_cmpeq_epi8(vn1, d); - let or1 = _mm256_or_si256(eqa, eqb); - let or2 = _mm256_or_si256(eqc, eqd); - let or3 = _mm256_or_si256(or1, or2); - - if _mm256_movemask_epi8(or3) != 0 { - return Some(matched(start_ptr, ptr, eqa, eqb, eqc, eqd)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); - - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search1(start_ptr, end_ptr, ptr, vn1); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - #[cold] - #[inline(never)] - #[target_feature(enable = "avx2")] - unsafe fn matched( - start_ptr: *const u8, - ptr: *const u8, - eqa1: __m256i, - eqa2: __m256i, - eqb1: __m256i, - eqb2: __m256i, - ) -> usize { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - if mask1 != 0 || mask2 != 0 { - return at + forward_pos2(mask1, mask2); - } - - at += VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - at + forward_pos2(mask1, mask2) - } - - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(or1, or2); - if _mm256_movemask_epi8(or3) != 0 { - return Some(matched(start_ptr, ptr, eqa1, eqa2, eqb1, eqb2)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - #[cold] - #[inline(never)] - #[target_feature(enable = "avx2")] - unsafe fn matched( - start_ptr: *const u8, - ptr: *const u8, - eqa1: __m256i, - eqa2: __m256i, - eqa3: __m256i, - eqb1: __m256i, - eqb2: __m256i, - eqb3: __m256i, - ) -> usize { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - let mask3 = _mm256_movemask_epi8(eqa3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return at + forward_pos3(mask1, mask2, mask3); - } - - at += VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - let mask3 = _mm256_movemask_epi8(eqb3); - at + forward_pos3(mask1, mask2, mask3) - } - - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let vn3 = _mm256_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let eqa3 = _mm256_cmpeq_epi8(vn3, a); - let eqb3 = _mm256_cmpeq_epi8(vn3, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(eqa3, eqb3); - let or4 = _mm256_or_si256(or1, or2); - let or5 = _mm256_or_si256(or3, or4); - if _mm256_movemask_epi8(or5) != 0 { - return Some(matched( - start_ptr, ptr, eqa1, eqa2, eqa3, eqb1, eqb2, eqb3, - )); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = - forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = _mm256_set1_epi8(n1 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i); - let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i); - let eqa = _mm256_cmpeq_epi8(vn1, a); - let eqb = _mm256_cmpeq_epi8(vn1, b); - let eqc = _mm256_cmpeq_epi8(vn1, c); - let eqd = _mm256_cmpeq_epi8(vn1, d); - let or1 = _mm256_or_si256(eqa, eqb); - let or2 = _mm256_or_si256(eqc, eqd); - let or3 = _mm256_or_si256(or1, or2); - if _mm256_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr); - let mask = _mm256_movemask_epi8(eqd); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqc); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqb); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqa); - debug_assert!(mask != 0); - return Some(at + reverse_pos(mask)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search1(start_ptr, end_ptr, start_ptr, vn1); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(or1, or2); - if _mm256_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - if mask1 != 0 || mask2 != 0 { - return Some(at + reverse_pos2(mask1, mask2)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - return Some(at + reverse_pos2(mask1, mask2)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memrchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let vn3 = _mm256_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let eqa3 = _mm256_cmpeq_epi8(vn3, a); - let eqb3 = _mm256_cmpeq_epi8(vn3, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(eqa3, eqb3); - let or4 = _mm256_or_si256(or1, or2); - let or5 = _mm256_or_si256(or3, or4); - if _mm256_movemask_epi8(or5) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - let mask3 = _mm256_movemask_epi8(eqb3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - let mask3 = _mm256_movemask_epi8(eqa3); - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = - reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "avx2")] -unsafe fn forward_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vn1)); - if mask != 0 { - Some(sub(ptr, start_ptr) + forward_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn forward_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn forward_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, - vn3: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - let eq3 = _mm256_cmpeq_epi8(chunk, vn3); - let or = _mm256_or_si256(eq1, eq2); - if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - let mask3 = _mm256_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn reverse_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(vn1, chunk)); - if mask != 0 { - Some(sub(ptr, start_ptr) + reverse_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn reverse_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn reverse_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, - vn3: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - let eq3 = _mm256_cmpeq_epi8(chunk, vn3); - let or = _mm256_or_si256(eq1, eq2); - if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - let mask3 = _mm256_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -/// Compute the position of the first matching byte from the given mask. The -/// position returned is always in the range [0, 31]. -/// -/// The mask given is expected to be the result of _mm256_movemask_epi8. -fn forward_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the least significant bit that is set - // corresponds to the position of our first matching byte. That position - // corresponds to the number of zeros after the least significant bit. - mask.trailing_zeros() as usize -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - forward_pos(mask1 | mask2) -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - forward_pos(mask1 | mask2 | mask3) -} - -/// Compute the position of the last matching byte from the given mask. The -/// position returned is always in the range [0, 31]. -/// -/// The mask given is expected to be the result of _mm256_movemask_epi8. -fn reverse_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the most significant bit that is set - // corresponds to the position of our last matching byte. The position from - // the end of the mask is therefore the number of leading zeros in a 32 - // bit integer, and the position from the start of the mask is therefore - // 32 - (leading zeros) - 1. - VECTOR_SIZE - (mask as u32).leading_zeros() as usize - 1 -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - reverse_pos(mask1 | mask2) -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - reverse_pos(mask1 | mask2 | mask3) -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memchr/x86/mod.rs b/vendor/memchr/src/memchr/x86/mod.rs deleted file mode 100644 index aec35db..0000000 --- a/vendor/memchr/src/memchr/x86/mod.rs +++ /dev/null @@ -1,148 +0,0 @@ -use super::fallback; - -// We only use AVX when we can detect at runtime whether it's available, which -// requires std. -#[cfg(feature = "std")] -mod avx; -mod sse2; - -/// This macro employs a gcc-like "ifunc" trick where by upon first calling -/// `memchr` (for example), CPU feature detection will be performed at runtime -/// to determine the best implementation to use. After CPU feature detection -/// is done, we replace `memchr`'s function pointer with the selection. Upon -/// subsequent invocations, the CPU-specific routine is invoked directly, which -/// skips the CPU feature detection and subsequent branch that's required. -/// -/// While this typically doesn't matter for rare occurrences or when used on -/// larger haystacks, `memchr` can be called in tight loops where the overhead -/// of this branch can actually add up *and is measurable*. This trick was -/// necessary to bring this implementation up to glibc's speeds for the 'tiny' -/// benchmarks, for example. -/// -/// At some point, I expect the Rust ecosystem will get a nice macro for doing -/// exactly this, at which point, we can replace our hand-jammed version of it. -/// -/// N.B. The ifunc strategy does prevent function inlining of course, but -/// on modern CPUs, you'll probably end up with the AVX2 implementation, -/// which probably can't be inlined anyway---unless you've compiled your -/// entire program with AVX2 enabled. However, even then, the various memchr -/// implementations aren't exactly small, so inlining might not help anyway! -/// -/// # Safety -/// -/// Callers must ensure that fnty is function pointer type. -#[cfg(feature = "std")] -macro_rules! unsafe_ifunc { - ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{ - use std::{mem, sync::atomic::{AtomicPtr, Ordering}}; - - type FnRaw = *mut (); - - static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw); - - fn detect($($needle: u8),+, haystack: &[u8]) -> Option { - let fun = - if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") { - avx::$name as FnRaw - } else if cfg!(memchr_runtime_sse2) { - sse2::$name as FnRaw - } else { - fallback::$name as FnRaw - }; - FN.store(fun as FnRaw, Ordering::Relaxed); - // SAFETY: By virtue of the caller contract, $fnty is a function - // pointer, which is always safe to transmute with a *mut (). - // Also, if 'fun is the AVX routine, then it is guaranteed to be - // supported since we checked the avx2 feature. - unsafe { - mem::transmute::(fun)($($needle),+, haystack) - } - } - - // SAFETY: By virtue of the caller contract, $fnty is a function - // pointer, which is always safe to transmute with a *mut (). Also, if - // 'fun is the AVX routine, then it is guaranteed to be supported since - // we checked the avx2 feature. - unsafe { - let fun = FN.load(Ordering::Relaxed); - mem::transmute::(fun)($($needle),+, $haystack) - } - }} -} - -/// When std isn't available to provide runtime CPU feature detection, or if -/// runtime CPU feature detection has been explicitly disabled, then just -/// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 -/// targets, so no CPU feature detection is necessary. -/// -/// # Safety -/// -/// There are no safety requirements for this definition of the macro. It is -/// safe for all inputs since it is restricted to either the fallback routine -/// or the SSE routine, which is always safe to call on x86_64. -#[cfg(not(feature = "std"))] -macro_rules! unsafe_ifunc { - ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{ - if cfg!(memchr_runtime_sse2) { - unsafe { sse2::$name($($needle),+, $haystack) } - } else { - fallback::$name($($needle),+, $haystack) - } - }} -} - -#[inline(always)] -pub fn memchr(n1: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!(fn(u8, &[u8]) -> Option, memchr, haystack, n1) -} - -#[inline(always)] -pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, &[u8]) -> Option, - memchr2, - haystack, - n1, - n2 - ) -} - -#[inline(always)] -pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, u8, &[u8]) -> Option, - memchr3, - haystack, - n1, - n2, - n3 - ) -} - -#[inline(always)] -pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!(fn(u8, &[u8]) -> Option, memrchr, haystack, n1) -} - -#[inline(always)] -pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, &[u8]) -> Option, - memrchr2, - haystack, - n1, - n2 - ) -} - -#[inline(always)] -pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, u8, &[u8]) -> Option, - memrchr3, - haystack, - n1, - n2, - n3 - ) -} diff --git a/vendor/memchr/src/memchr/x86/sse2.rs b/vendor/memchr/src/memchr/x86/sse2.rs deleted file mode 100644 index b7b3a93..0000000 --- a/vendor/memchr/src/memchr/x86/sse2.rs +++ /dev/null @@ -1,791 +0,0 @@ -use core::{arch::x86_64::*, cmp, mem::size_of}; - -const VECTOR_SIZE: usize = size_of::<__m128i>(); -const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; - -// The number of bytes to loop at in one iteration of memchr/memrchr. -const LOOP_SIZE: usize = 4 * VECTOR_SIZE; - -// The number of bytes to loop at in one iteration of memchr2/memrchr2 and -// memchr3/memrchr3. There was no observable difference between 64 and 32 bytes -// in benchmarks. memchr3 in particular only gets a very slight speed up from -// the loop unrolling. -const LOOP_SIZE2: usize = 2 * VECTOR_SIZE; - -#[target_feature(enable = "sse2")] -pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option { - // What follows is a fast SSE2-only algorithm to detect the position of - // `n1` in `haystack` if it exists. From what I know, this is the "classic" - // algorithm. I believe it can be found in places like glibc and Go's - // standard library. It appears to be well known and is elaborated on in - // more detail here: https://gms.tf/stdfind-and-memchr-optimizations.html - // - // While this routine is very long, the basic idea is actually very simple - // and can be expressed straight-forwardly in pseudo code: - // - // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 - // // Note: shift amount in bytes - // - // while i <= haystack.len() - 16: - // // A 16 byte vector. Each byte in chunk corresponds to a byte in - // // the haystack. - // chunk = haystack[i:i+16] - // // Compare bytes in needle with bytes in chunk. The result is a 16 - // // byte chunk where each byte is 0xFF if the corresponding bytes - // // in needle and chunk were equal, or 0x00 otherwise. - // eqs = cmpeq(needle, chunk) - // // Return a 32 bit integer where the most significant 16 bits - // // are always 0 and the lower 16 bits correspond to whether the - // // most significant bit in the correspond byte in `eqs` is set. - // // In other words, `mask as u16` has bit i set if and only if - // // needle[i] == chunk[i]. - // mask = movemask(eqs) - // - // // Mask is 0 if there is no match, and non-zero otherwise. - // if mask != 0: - // // trailing_zeros tells us the position of the least significant - // // bit that is set. - // return i + trailing_zeros(mask) - // - // // haystack length may not be a multiple of 16, so search the rest. - // while i < haystack.len(): - // if haystack[i] == n1: - // return i - // - // // No match found. - // return NULL - // - // In fact, we could loosely translate the above code to Rust line-for-line - // and it would be a pretty fast algorithm. But, we pull out all the stops - // to go as fast as possible: - // - // 1. We use aligned loads. That is, we do some finagling to make sure our - // primary loop not only proceeds in increments of 16 bytes, but that - // the address of haystack's pointer that we dereference is aligned to - // 16 bytes. 16 is a magic number here because it is the size of SSE2 - // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) - // Therefore, to get aligned loads, our pointer's address must be evenly - // divisible by 16. - // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's - // kind of like loop unrolling, but we combine the equality comparisons - // using a vector OR such that we only need to extract a single mask to - // determine whether a match exists or not. If so, then we do some - // book-keeping to determine the precise location but otherwise mush on. - // 3. We use our "chunk" comparison routine in as many places as possible, - // even if it means using unaligned loads. In particular, if haystack - // starts with an unaligned address, then we do an unaligned load to - // search the first 16 bytes. We then start our primary loop at the - // smallest subsequent aligned address, which will actually overlap with - // previously searched bytes. But we're OK with that. We do a similar - // dance at the end of our primary loop. Finally, to avoid a - // byte-at-a-time loop at the end, we do a final 16 byte unaligned load - // that may overlap with a previous load. This is OK because it converts - // a loop into a small number of very fast vector instructions. - // - // The primary downside of this algorithm is that it's effectively - // completely unsafe. Therefore, we have to be super careful to avoid - // undefined behavior: - // - // 1. We use raw pointers everywhere. Not only does dereferencing a pointer - // require the pointer to be valid, but we actually can't even store the - // address of an invalid pointer (unless it's 1 past the end of - // haystack) without sacrificing performance. - // 2. _mm_loadu_si128 is used when you don't care about alignment, and - // _mm_load_si128 is used when you do care. You cannot use the latter - // on unaligned pointers. - // 3. We make liberal use of debug_assert! to check assumptions. - // 4. We make a concerted effort to stick with pointers instead of indices. - // Indices are nicer because there's less to worry about with them (see - // above about pointer offsets), but I could not get the compiler to - // produce as good of code as what the below produces. In any case, - // pointers are what we really care about here, and alignment is - // expressed a bit more naturally with them. - // - // In general, most of the algorithms in this crate have a similar - // structure to what you see below, so this comment applies fairly well to - // all of them. - - let vn1 = _mm_set1_epi8(n1 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); - let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); - let eqa = _mm_cmpeq_epi8(vn1, a); - let eqb = _mm_cmpeq_epi8(vn1, b); - let eqc = _mm_cmpeq_epi8(vn1, c); - let eqd = _mm_cmpeq_epi8(vn1, d); - let or1 = _mm_or_si128(eqa, eqb); - let or2 = _mm_or_si128(eqc, eqd); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr, start_ptr); - let mask = _mm_movemask_epi8(eqa); - if mask != 0 { - return Some(at + forward_pos(mask)); - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqb); - if mask != 0 { - return Some(at + forward_pos(mask)); - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqc); - if mask != 0 { - return Some(at + forward_pos(mask)); - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqd); - debug_assert!(mask != 0); - return Some(at + forward_pos(mask)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); - - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search1(start_ptr, end_ptr, ptr, vn1); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - if mask1 != 0 || mask2 != 0 { - return Some(at + forward_pos2(mask1, mask2)); - } - - at += VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - return Some(at + forward_pos2(mask1, mask2)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let vn3 = _mm_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let eqa3 = _mm_cmpeq_epi8(vn3, a); - let eqb3 = _mm_cmpeq_epi8(vn3, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(eqa3, eqb3); - let or4 = _mm_or_si128(or1, or2); - let or5 = _mm_or_si128(or3, or4); - if _mm_movemask_epi8(or5) != 0 { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - let mask3 = _mm_movemask_epi8(eqa3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return Some(at + forward_pos3(mask1, mask2, mask3)); - } - - at += VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - let mask3 = _mm_movemask_epi8(eqb3); - return Some(at + forward_pos3(mask1, mask2, mask3)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = - forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); - let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); - let eqa = _mm_cmpeq_epi8(vn1, a); - let eqb = _mm_cmpeq_epi8(vn1, b); - let eqc = _mm_cmpeq_epi8(vn1, c); - let eqd = _mm_cmpeq_epi8(vn1, d); - let or1 = _mm_or_si128(eqa, eqb); - let or2 = _mm_or_si128(eqc, eqd); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr); - let mask = _mm_movemask_epi8(eqd); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqc); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqb); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqa); - debug_assert!(mask != 0); - return Some(at + reverse_pos(mask)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search1(start_ptr, end_ptr, start_ptr, vn1); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - if mask1 != 0 || mask2 != 0 { - return Some(at + reverse_pos2(mask1, mask2)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - return Some(at + reverse_pos2(mask1, mask2)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memrchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let vn3 = _mm_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let eqa3 = _mm_cmpeq_epi8(vn3, a); - let eqb3 = _mm_cmpeq_epi8(vn3, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(eqa3, eqb3); - let or4 = _mm_or_si128(or1, or2); - let or5 = _mm_or_si128(or3, or4); - if _mm_movemask_epi8(or5) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - let mask3 = _mm_movemask_epi8(eqb3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - let mask3 = _mm_movemask_epi8(eqa3); - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = - reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn forward_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vn1)); - if mask != 0 { - Some(sub(ptr, start_ptr) + forward_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn forward_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -pub unsafe fn forward_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, - vn3: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - let eq3 = _mm_cmpeq_epi8(chunk, vn3); - let or = _mm_or_si128(eq1, eq2); - if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - let mask3 = _mm_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn reverse_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(vn1, chunk)); - if mask != 0 { - Some(sub(ptr, start_ptr) + reverse_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn reverse_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn reverse_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, - vn3: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - let eq3 = _mm_cmpeq_epi8(chunk, vn3); - let or = _mm_or_si128(eq1, eq2); - if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - let mask3 = _mm_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -/// Compute the position of the first matching byte from the given mask. The -/// position returned is always in the range [0, 15]. -/// -/// The mask given is expected to be the result of _mm_movemask_epi8. -fn forward_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the least significant bit that is set - // corresponds to the position of our first matching byte. That position - // corresponds to the number of zeros after the least significant bit. - mask.trailing_zeros() as usize -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - forward_pos(mask1 | mask2) -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - forward_pos(mask1 | mask2 | mask3) -} - -/// Compute the position of the last matching byte from the given mask. The -/// position returned is always in the range [0, 15]. -/// -/// The mask given is expected to be the result of _mm_movemask_epi8. -fn reverse_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the most significant bit that is set - // corresponds to the position of our last matching byte. The position from - // the end of the mask is therefore the number of leading zeros in a 16 - // bit integer, and the position from the start of the mask is therefore - // 16 - (leading zeros) - 1. - VECTOR_SIZE - (mask as u16).leading_zeros() as usize - 1 -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - reverse_pos(mask1 | mask2) -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - reverse_pos(mask1 | mask2 | mask3) -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memchr/x86/sse42.rs b/vendor/memchr/src/memchr/x86/sse42.rs deleted file mode 100644 index da38e50..0000000 --- a/vendor/memchr/src/memchr/x86/sse42.rs +++ /dev/null @@ -1,72 +0,0 @@ -// This code is unused. PCMPESTRI is gratuitously slow. I imagine it might -// start winning with a hypothetical memchr4 (or greater). This technique might -// also be good for exposing searches over ranges of bytes, but that departs -// from the standard memchr API, so it's not clear whether we actually want -// that or not. -// -// N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind -// of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means -// I don't see a way of effectively using PCMPISTRI unless there's some fast -// way to replace zero bytes with a byte that is not not a needle byte. - -use core::{arch::x86_64::*, mem::size_of}; - -use x86::sse2; - -const VECTOR_SIZE: usize = size_of::<__m128i>(); -const CONTROL_ANY: i32 = _SIDD_UBYTE_OPS - | _SIDD_CMP_EQUAL_ANY - | _SIDD_POSITIVE_POLARITY - | _SIDD_LEAST_SIGNIFICANT; - -#[target_feature(enable = "sse4.2")] -pub unsafe fn memchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let vn3 = _mm_set1_epi8(n3 as i8); - let vn = _mm_setr_epi8( - n1 as i8, n2 as i8, n3 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ); - let len = haystack.len(); - let start_ptr = haystack.as_ptr(); - let end_ptr = haystack[haystack.len()..].as_ptr(); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY); - if res < 16 { - return Some(sub(ptr, start_ptr) + res as usize); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); - } - None -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/genericsimd.rs b/vendor/memchr/src/memmem/genericsimd.rs deleted file mode 100644 index 28bfdab..0000000 --- a/vendor/memchr/src/memmem/genericsimd.rs +++ /dev/null @@ -1,266 +0,0 @@ -use core::mem::size_of; - -use crate::memmem::{util::memcmp, vector::Vector, NeedleInfo}; - -/// The minimum length of a needle required for this algorithm. The minimum -/// is 2 since a length of 1 should just use memchr and a length of 0 isn't -/// a case handled by this searcher. -pub(crate) const MIN_NEEDLE_LEN: usize = 2; - -/// The maximum length of a needle required for this algorithm. -/// -/// In reality, there is no hard max here. The code below can handle any -/// length needle. (Perhaps that suggests there are missing optimizations.) -/// Instead, this is a heuristic and a bound guaranteeing our linear time -/// complexity. -/// -/// It is a heuristic because when a candidate match is found, memcmp is run. -/// For very large needles with lots of false positives, memcmp can make the -/// code run quite slow. -/// -/// It is a bound because the worst case behavior with memcmp is multiplicative -/// in the size of the needle and haystack, and we want to keep that additive. -/// This bound ensures we still meet that bound theoretically, since it's just -/// a constant. We aren't acting in bad faith here, memcmp on tiny needles -/// is so fast that even in pathological cases (see pathological vector -/// benchmarks), this is still just as fast or faster in practice. -/// -/// This specific number was chosen by tweaking a bit and running benchmarks. -/// The rare-medium-needle, for example, gets about 5% faster by using this -/// algorithm instead of a prefilter-accelerated Two-Way. There's also a -/// theoretical desire to keep this number reasonably low, to mitigate the -/// impact of pathological cases. I did try 64, and some benchmarks got a -/// little better, and others (particularly the pathological ones), got a lot -/// worse. So... 32 it is? -pub(crate) const MAX_NEEDLE_LEN: usize = 32; - -/// The implementation of the forward vector accelerated substring search. -/// -/// This is extremely similar to the prefilter vector module by the same name. -/// The key difference is that this is not a prefilter. Instead, it handles -/// confirming its own matches. The trade off is that this only works with -/// smaller needles. The speed up here is that an inlined memcmp on a tiny -/// needle is very quick, even on pathological inputs. This is much better than -/// combining a prefilter with Two-Way, where using Two-Way to confirm the -/// match has higher latency. -/// -/// So why not use this for all needles? We could, and it would probably work -/// really well on most inputs. But its worst case is multiplicative and we -/// want to guarantee worst case additive time. Some of the benchmarks try to -/// justify this (see the pathological ones). -/// -/// The prefilter variant of this has more comments. Also note that we only -/// implement this for forward searches for now. If you have a compelling use -/// case for accelerated reverse search, please file an issue. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward { - rare1i: u8, - rare2i: u8, -} - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_u8(); - // If the needle is too short or too long, give up. Also, give up - // if the rare bytes detected are at the same position. (It likely - // suggests a degenerate case, although it should technically not be - // possible.) - if needle.len() < MIN_NEEDLE_LEN - || needle.len() > MAX_NEEDLE_LEN - || rare1i == rare2i - { - return None; - } - Some(Forward { rare1i, rare2i }) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work for a particular vector. Passing a haystack with a length - /// smaller than this will cause `fwd_find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.rare2i as usize + size_of::() - } -} - -/// Searches the given haystack for the given needle. The needle given should -/// be the same as the needle that this searcher was initialized with. -/// -/// # Panics -/// -/// When the given haystack has a length smaller than `min_haystack_len`. -/// -/// # Safety -/// -/// Since this is meant to be used with vector functions, callers need to -/// specialize this inside of a function with a `target_feature` attribute. -/// Therefore, callers must ensure that whatever target feature is being used -/// supports the vector functions that this function is specialized for. (For -/// the specific vector functions used, see the Vector trait implementations.) -#[inline(always)] -pub(crate) unsafe fn fwd_find( - fwd: &Forward, - haystack: &[u8], - needle: &[u8], -) -> Option { - // It would be nice if we didn't have this check here, since the meta - // searcher should handle it for us. But without this, I don't think we - // guarantee that end_ptr.sub(needle.len()) won't result in UB. We could - // put it as part of the safety contract, but it makes it more complicated - // than necessary. - if haystack.len() < needle.len() { - return None; - } - let min_haystack_len = fwd.min_haystack_len::(); - assert!(haystack.len() >= min_haystack_len, "haystack too small"); - debug_assert!(needle.len() <= haystack.len()); - debug_assert!( - needle.len() >= MIN_NEEDLE_LEN, - "needle must be at least {} bytes", - MIN_NEEDLE_LEN, - ); - debug_assert!( - needle.len() <= MAX_NEEDLE_LEN, - "needle must be at most {} bytes", - MAX_NEEDLE_LEN, - ); - - let (rare1i, rare2i) = (fwd.rare1i as usize, fwd.rare2i as usize); - let rare1chunk = V::splat(needle[rare1i]); - let rare2chunk = V::splat(needle[rare2i]); - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let max_ptr = end_ptr.sub(min_haystack_len); - let mut ptr = start_ptr; - - // N.B. I did experiment with unrolling the loop to deal with size(V) - // bytes at a time and 2*size(V) bytes at a time. The double unroll was - // marginally faster while the quadruple unroll was unambiguously slower. - // In the end, I decided the complexity from unrolling wasn't worth it. I - // used the memmem/krate/prebuilt/huge-en/ benchmarks to compare. - while ptr <= max_ptr { - let m = fwd_find_in_chunk( - fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, !0, - ); - if let Some(chunki) = m { - return Some(matched(start_ptr, ptr, chunki)); - } - ptr = ptr.add(size_of::()); - } - if ptr < end_ptr { - let remaining = diff(end_ptr, ptr); - debug_assert!( - remaining < min_haystack_len, - "remaining bytes should be smaller than the minimum haystack \ - length of {}, but there are {} bytes remaining", - min_haystack_len, - remaining, - ); - if remaining < needle.len() { - return None; - } - debug_assert!( - max_ptr < ptr, - "after main loop, ptr should have exceeded max_ptr", - ); - let overlap = diff(ptr, max_ptr); - debug_assert!( - overlap > 0, - "overlap ({}) must always be non-zero", - overlap, - ); - debug_assert!( - overlap < size_of::(), - "overlap ({}) cannot possibly be >= than a vector ({})", - overlap, - size_of::(), - ); - // The mask has all of its bits set except for the first N least - // significant bits, where N=overlap. This way, any matches that - // occur in find_in_chunk within the overlap are automatically - // ignored. - let mask = !((1 << overlap) - 1); - ptr = max_ptr; - let m = fwd_find_in_chunk( - fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, mask, - ); - if let Some(chunki) = m { - return Some(matched(start_ptr, ptr, chunki)); - } - } - None -} - -/// Search for an occurrence of two rare bytes from the needle in the chunk -/// pointed to by ptr, with the end of the haystack pointed to by end_ptr. When -/// an occurrence is found, memcmp is run to check if a match occurs at the -/// corresponding position. -/// -/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 -/// bytes repeated in each 8-bit lane, respectively. -/// -/// mask should have bits set corresponding the positions in the chunk in which -/// matches are considered. This is only used for the last vector load where -/// the beginning of the vector might have overlapped with the last load in -/// the main loop. The mask lets us avoid visiting positions that have already -/// been discarded as matches. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at both -/// (ptr + rare1i) and (ptr + rare2i). It must also be safe to do unaligned -/// loads on ptr up to (end_ptr - needle.len()). -#[inline(always)] -unsafe fn fwd_find_in_chunk( - fwd: &Forward, - needle: &[u8], - ptr: *const u8, - end_ptr: *const u8, - rare1chunk: V, - rare2chunk: V, - mask: u32, -) -> Option { - let chunk0 = V::load_unaligned(ptr.add(fwd.rare1i as usize)); - let chunk1 = V::load_unaligned(ptr.add(fwd.rare2i as usize)); - - let eq0 = chunk0.cmpeq(rare1chunk); - let eq1 = chunk1.cmpeq(rare2chunk); - - let mut match_offsets = eq0.and(eq1).movemask() & mask; - while match_offsets != 0 { - let offset = match_offsets.trailing_zeros() as usize; - let ptr = ptr.add(offset); - if end_ptr.sub(needle.len()) < ptr { - return None; - } - let chunk = core::slice::from_raw_parts(ptr, needle.len()); - if memcmp(needle, chunk) { - return Some(offset); - } - match_offsets &= match_offsets - 1; - } - None -} - -/// Accepts a chunk-relative offset and returns a haystack relative offset -/// after updating the prefilter state. -/// -/// See the same function with the same name in the prefilter variant of this -/// algorithm to learned why it's tagged with inline(never). Even here, where -/// the function is simpler, inlining it leads to poorer codegen. (Although -/// it does improve some benchmarks, like prebuiltiter/huge-en/common-you.) -#[cold] -#[inline(never)] -fn matched(start_ptr: *const u8, ptr: *const u8, chunki: usize) -> usize { - diff(ptr, start_ptr) + chunki -} - -/// Subtract `b` from `a` and return the difference. `a` must be greater than -/// or equal to `b`. -fn diff(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/mod.rs b/vendor/memchr/src/memmem/mod.rs index e1cd1ae..4f04943 100644 --- a/vendor/memchr/src/memmem/mod.rs +++ b/vendor/memchr/src/memmem/mod.rs @@ -66,99 +66,25 @@ assert_eq!(None, finder.find(b"quux baz bar")); ``` */ -pub use self::prefilter::Prefilter; +pub use crate::memmem::searcher::PrefilterConfig as Prefilter; + +// This is exported here for use in the crate::arch::all::twoway +// implementation. This is essentially an abstraction breaker. Namely, the +// public API of twoway doesn't support providing a prefilter, but its crate +// internal API does. The main reason for this is that I didn't want to do the +// API design required to support it without a concrete use case. +pub(crate) use crate::memmem::searcher::Pre; use crate::{ - cow::CowBytes, - memmem::{ - prefilter::{Pre, PrefilterFn, PrefilterState}, - rabinkarp::NeedleHash, - rarebytes::RareNeedleBytes, + arch::all::{ + packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, + rabinkarp, }, + cow::CowBytes, + memmem::searcher::{PrefilterState, Searcher, SearcherRev}, }; -/// Defines a suite of quickcheck properties for forward and reverse -/// substring searching. -/// -/// This is defined in this specific spot so that it can be used freely among -/// the different substring search implementations. I couldn't be bothered to -/// fight with the macro-visibility rules enough to figure out how to stuff it -/// somewhere more convenient. -#[cfg(all(test, feature = "std"))] -macro_rules! define_memmem_quickcheck_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::proptests; - - quickcheck::quickcheck! { - fn qc_fwd_prefix_is_substring(bs: Vec) -> bool { - proptests::prefix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_suffix_is_substring(bs: Vec) -> bool { - proptests::suffix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_matches_naive( - haystack: Vec, - needle: Vec - ) -> bool { - proptests::matches_naive(false, &haystack, &needle, $fwd) - } - - fn qc_rev_prefix_is_substring(bs: Vec) -> bool { - proptests::prefix_is_substring(true, &bs, $rev) - } - - fn qc_rev_suffix_is_substring(bs: Vec) -> bool { - proptests::suffix_is_substring(true, &bs, $rev) - } - - fn qc_rev_matches_naive( - haystack: Vec, - needle: Vec - ) -> bool { - proptests::matches_naive(true, &haystack, &needle, $rev) - } - } - }; -} - -/// Defines a suite of "simple" hand-written tests for a substring -/// implementation. -/// -/// This is defined here for the same reason that -/// define_memmem_quickcheck_tests is defined here. -#[cfg(test)] -macro_rules! define_memmem_simple_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::testsimples; - - #[test] - fn simple_forward() { - testsimples::run_search_tests_fwd($fwd); - } - - #[test] - fn simple_reverse() { - testsimples::run_search_tests_rev($rev); - } - }; -} - -mod byte_frequencies; -#[cfg(memchr_runtime_simd)] -mod genericsimd; -mod prefilter; -mod rabinkarp; -mod rarebytes; -mod twoway; -mod util; -#[cfg(memchr_runtime_simd)] -mod vector; -#[cfg(all(memchr_runtime_wasm128))] -mod wasm; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; +mod searcher; /// Returns an iterator over all non-overlapping occurrences of a substring in /// a haystack. @@ -258,7 +184,7 @@ pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( #[inline] pub fn find(haystack: &[u8], needle: &[u8]) -> Option { if haystack.len() < 64 { - rabinkarp::find(haystack, needle) + rabinkarp::Finder::new(needle).find(haystack, needle) } else { Finder::new(needle).find(haystack) } @@ -295,7 +221,7 @@ pub fn find(haystack: &[u8], needle: &[u8]) -> Option { #[inline] pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option { if haystack.len() < 64 { - rabinkarp::rfind(haystack, needle) + rabinkarp::FinderRev::new(needle).rfind(haystack, needle) } else { FinderRev::new(needle).rfind(haystack) } @@ -307,7 +233,7 @@ pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option { /// /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the /// needle. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FindIter<'h, 'n> { haystack: &'h [u8], prestate: PrefilterState, @@ -321,7 +247,7 @@ impl<'h, 'n> FindIter<'h, 'n> { haystack: &'h [u8], finder: Finder<'n>, ) -> FindIter<'h, 'n> { - let prestate = finder.searcher.prefilter_state(); + let prestate = PrefilterState::new(); FindIter { haystack, prestate, finder, pos: 0 } } @@ -331,8 +257,8 @@ impl<'h, 'n> FindIter<'h, 'n> { /// If this is already an owned iterator, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindIter<'h, 'static> { FindIter { @@ -348,20 +274,32 @@ impl<'h, 'n> Iterator for FindIter<'h, 'n> { type Item = usize; fn next(&mut self) -> Option { - if self.pos > self.haystack.len() { - return None; - } - let result = self - .finder - .searcher - .find(&mut self.prestate, &self.haystack[self.pos..]); - match result { - None => None, - Some(i) => { - let pos = self.pos + i; - self.pos = pos + core::cmp::max(1, self.finder.needle().len()); - Some(pos) - } + let needle = self.finder.needle(); + let haystack = self.haystack.get(self.pos..)?; + let idx = + self.finder.searcher.find(&mut self.prestate, haystack, needle)?; + + let pos = self.pos + idx; + self.pos = pos + needle.len().max(1); + + Some(pos) + } + + fn size_hint(&self) -> (usize, Option) { + // The largest possible number of non-overlapping matches is the + // quotient of the haystack and the needle (or the length of the + // haystack, if the needle is empty) + match self.haystack.len().checked_sub(self.pos) { + None => (0, Some(0)), + Some(haystack_len) => match self.finder.needle().len() { + // Empty needles always succeed and match at every point + // (including the very end) + 0 => ( + haystack_len.saturating_add(1), + haystack_len.checked_add(1), + ), + needle_len => (0, Some(haystack_len / needle_len)), + }, } } } @@ -372,7 +310,7 @@ impl<'h, 'n> Iterator for FindIter<'h, 'n> { /// /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the /// needle. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct FindRevIter<'h, 'n> { haystack: &'h [u8], finder: FinderRev<'n>, @@ -398,7 +336,7 @@ impl<'h, 'n> FindRevIter<'h, 'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindRevIter<'h, 'static> { FindRevIter { @@ -447,7 +385,8 @@ impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct Finder<'n> { - searcher: Searcher<'n>, + needle: CowBytes<'n>, + searcher: Searcher, } impl<'n> Finder<'n> { @@ -481,8 +420,11 @@ impl<'n> Finder<'n> { /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); /// assert_eq!(None, Finder::new("quux").find(haystack)); /// ``` + #[inline] pub fn find(&self, haystack: &[u8]) -> Option { - self.searcher.find(&mut self.searcher.prefilter_state(), haystack) + let mut prestate = PrefilterState::new(); + let needle = self.needle.as_slice(); + self.searcher.find(&mut prestate, haystack, needle) } /// Returns an iterator over all occurrences of a substring in a haystack. @@ -525,11 +467,14 @@ impl<'n> Finder<'n> { /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> Finder<'static> { - Finder { searcher: self.searcher.into_owned() } + Finder { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -544,7 +489,10 @@ impl<'n> Finder<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> Finder<'_> { - Finder { searcher: self.searcher.as_ref() } + Finder { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -555,7 +503,7 @@ impl<'n> Finder<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -574,7 +522,8 @@ impl<'n> Finder<'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct FinderRev<'n> { - searcher: SearcherRev<'n>, + needle: CowBytes<'n>, + searcher: SearcherRev, } impl<'n> FinderRev<'n> { @@ -612,7 +561,7 @@ impl<'n> FinderRev<'n> { /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); /// ``` pub fn rfind>(&self, haystack: B) -> Option { - self.searcher.rfind(haystack.as_ref()) + self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) } /// Returns a reverse iterator over all occurrences of a substring in a @@ -657,10 +606,13 @@ impl<'n> FinderRev<'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FinderRev<'static> { - FinderRev { searcher: self.searcher.into_owned() } + FinderRev { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -675,7 +627,10 @@ impl<'n> FinderRev<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> FinderRev<'_> { - FinderRev { searcher: self.searcher.as_ref() } + FinderRev { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -686,7 +641,7 @@ impl<'n> FinderRev<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -697,7 +652,7 @@ impl<'n> FinderRev<'n> { /// heuristic prefilters used to speed up certain searches. #[derive(Clone, Debug, Default)] pub struct FinderBuilder { - config: SearcherConfig, + prefilter: Prefilter, } impl FinderBuilder { @@ -712,7 +667,26 @@ impl FinderBuilder { &self, needle: &'n B, ) -> Finder<'n> { - Finder { searcher: Searcher::new(self.config, needle.as_ref()) } + self.build_forward_with_ranker(DefaultFrequencyRank, needle) + } + + /// Build a forward finder using the given needle and a custom heuristic for + /// determining the frequency of a given byte in the dataset. + /// See [`HeuristicFrequencyRank`] for more details. + pub fn build_forward_with_ranker< + 'n, + R: HeuristicFrequencyRank, + B: ?Sized + AsRef<[u8]>, + >( + &self, + ranker: R, + needle: &'n B, + ) -> Finder<'n> { + let needle = needle.as_ref(); + Finder { + needle: CowBytes::new(needle), + searcher: Searcher::new(self.prefilter, ranker, needle), + } } /// Build a reverse finder using the given needle from the current @@ -721,7 +695,11 @@ impl FinderBuilder { &self, needle: &'n B, ) -> FinderRev<'n> { - FinderRev { searcher: SearcherRev::new(needle.as_ref()) } + let needle = needle.as_ref(); + FinderRev { + needle: CowBytes::new(needle), + searcher: SearcherRev::new(needle), + } } /// Configure the prefilter setting for the finder. @@ -729,593 +707,31 @@ impl FinderBuilder { /// See the documentation for [`Prefilter`] for more discussion on why /// you might want to configure this. pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { - self.config.prefilter = prefilter; + self.prefilter = prefilter; self } } -/// The internal implementation of a forward substring searcher. -/// -/// The reality is that this is a "meta" searcher. Namely, depending on a -/// variety of parameters (CPU support, target, needle size, haystack size and -/// even dynamic properties such as prefilter effectiveness), the actual -/// algorithm employed to do substring search may change. -#[derive(Clone, Debug)] -struct Searcher<'n> { - /// The actual needle we're searching for. - /// - /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is - /// specialized to a single variant (the borrowed form). - needle: CowBytes<'n>, - /// A collection of facts computed on the needle that are useful for more - /// than one substring search algorithm. - ninfo: NeedleInfo, - /// A prefilter function, if it was deemed appropriate. - /// - /// Some substring search implementations (like Two-Way) benefit greatly - /// if we can quickly find candidate starting positions for a match. - prefn: Option, - /// The actual substring implementation in use. - kind: SearcherKind, -} - -/// A collection of facts computed about a search needle. -/// -/// We group these things together because it's useful to be able to hand them -/// to prefilters or substring algorithms that want them. -#[derive(Clone, Copy, Debug)] -pub(crate) struct NeedleInfo { - /// The offsets of "rare" bytes detected in the needle. - /// - /// This is meant to be a heuristic in order to maximize the effectiveness - /// of vectorized code. Namely, vectorized code tends to focus on only - /// one or two bytes. If we pick bytes from the needle that occur - /// infrequently, then more time will be spent in the vectorized code and - /// will likely make the overall search (much) faster. - /// - /// Of course, this is only a heuristic based on a background frequency - /// distribution of bytes. But it tends to work very well in practice. - pub(crate) rarebytes: RareNeedleBytes, - /// A Rabin-Karp hash of the needle. - /// - /// This is store here instead of in a more specific Rabin-Karp search - /// since Rabin-Karp may be used even if another SearchKind corresponds - /// to some other search implementation. e.g., If measurements suggest RK - /// is faster in some cases or if a search implementation can't handle - /// particularly small haystack. (Moreover, we cannot use RK *generally*, - /// since its worst case time is multiplicative. Instead, we only use it - /// some small haystacks, where "small" is a constant.) - pub(crate) nhash: NeedleHash, -} - -/// Configuration for substring search. -#[derive(Clone, Copy, Debug, Default)] -struct SearcherConfig { - /// This permits changing the behavior of the prefilter, since it can have - /// a variable impact on performance. - prefilter: Prefilter, -} - -#[derive(Clone, Debug)] -enum SearcherKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD128(x86::sse::Forward), - #[cfg(memchr_runtime_wasm128)] - GenericSIMD128(wasm::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD256(x86::avx::Forward), -} - -impl<'n> Searcher<'n> { - fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { - use self::SearcherKind::*; - - let ninfo = NeedleInfo::new(needle); - let mk = |kind: SearcherKind| { - let prefn = prefilter::forward( - &config.prefilter, - &ninfo.rarebytes, - needle, - ); - Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } - }; - if needle.len() == 0 { - return mk(Empty); - } - if needle.len() == 1 { - return mk(OneByte(needle[0])); - } - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - { - if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) { - return mk(GenericSIMD256(fwd)); - } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - #[cfg(all(target_arch = "wasm32", memchr_runtime_simd))] - { - if let Some(fwd) = wasm::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - - mk(TwoWay(twoway::Forward::new(needle))) - } - - /// Return a fresh prefilter state that can be used with this searcher. - /// A prefilter state is used to track the effectiveness of a searcher's - /// prefilter for speeding up searches. Therefore, the prefilter state - /// should generally be reused on subsequent searches (such as in an - /// iterator). For searches on a different haystack, then a new prefilter - /// state should be used. - /// - /// This always initializes a valid (but possibly inert) prefilter state - /// even if this searcher does not have a prefilter enabled. - fn prefilter_state(&self) -> PrefilterState { - if self.prefn.is_none() { - PrefilterState::inert() - } else { - PrefilterState::new() - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> Searcher<'_> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: CowBytes::new(self.needle()), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> Searcher<'static> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: self.needle.into_owned(), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - /// Implements forward substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn find( - &self, - state: &mut PrefilterState, - haystack: &[u8], - ) -> Option { - use self::SearcherKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(0), - OneByte(b) => crate::memchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - self.find_tw(tw, state, haystack, needle) - } - } - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - } - } - - /// Calls Two-Way on the given haystack/needle. - /// - /// This is marked as unlineable since it seems to have a better overall - /// effect on benchmarks. However, this is one of those cases where - /// inlining it results an improvement in other benchmarks too, so I - /// suspect we just don't have enough data yet to make the right call here. - /// - /// I suspect the main problem is that this function contains two different - /// inlined copies of Two-Way: one with and one without prefilters enabled. - #[inline(never)] - fn find_tw( - &self, - tw: &twoway::Forward, - state: &mut PrefilterState, - haystack: &[u8], - needle: &[u8], - ) -> Option { - if let Some(prefn) = self.prefn { - // We used to look at the length of a haystack here. That is, if - // it was too small, then don't bother with the prefilter. But two - // things changed: the prefilter falls back to memchr for small - // haystacks, and, above, Rabin-Karp is employed for tiny haystacks - // anyway. - if state.is_effective() { - let mut pre = Pre { state, prefn, ninfo: &self.ninfo }; - return tw.find(Some(&mut pre), haystack, needle); - } - } - tw.find(None, haystack, needle) - } -} - -impl NeedleInfo { - pub(crate) fn new(needle: &[u8]) -> NeedleInfo { - NeedleInfo { - rarebytes: RareNeedleBytes::forward(needle), - nhash: NeedleHash::forward(needle), - } - } -} - -/// The internal implementation of a reverse substring searcher. -/// -/// See the forward searcher docs for more details. Currently, the reverse -/// searcher is considerably simpler since it lacks prefilter support. This -/// was done because it adds a lot of code, and more surface area to test. And -/// in particular, it's not clear whether a prefilter on reverse searching is -/// worth it. (If you have a compelling use case, please file an issue!) -#[derive(Clone, Debug)] -struct SearcherRev<'n> { - /// The actual needle we're searching for. - needle: CowBytes<'n>, - /// A Rabin-Karp hash of the needle. - nhash: NeedleHash, - /// The actual substring implementation in use. - kind: SearcherRevKind, -} - -#[derive(Clone, Debug)] -enum SearcherRevKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Reverse), -} - -impl<'n> SearcherRev<'n> { - fn new(needle: &'n [u8]) -> SearcherRev<'n> { - use self::SearcherRevKind::*; - - let kind = if needle.len() == 0 { - Empty - } else if needle.len() == 1 { - OneByte(needle[0]) - } else { - TwoWay(twoway::Reverse::new(needle)) - }; - SearcherRev { - needle: CowBytes::new(needle), - nhash: NeedleHash::reverse(needle), - kind, - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> SearcherRev<'_> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: CowBytes::new(self.needle()), - nhash: self.nhash, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> SearcherRev<'static> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: self.needle.into_owned(), - nhash: self.nhash, - kind, - } - } - - /// Implements reverse substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn rfind(&self, haystack: &[u8]) -> Option { - use self::SearcherRevKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(haystack.len()), - OneByte(b) => crate::memrchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::rfind_with(&self.nhash, haystack, needle) - } else { - tw.rfind(haystack, needle) - } - } - } - } -} - -/// This module defines some generic quickcheck properties useful for testing -/// any substring search algorithm. It also runs those properties for the -/// top-level public API memmem routines. (The properties are also used to -/// test various substring search implementations more granularly elsewhere as -/// well.) -#[cfg(all(test, feature = "std", not(miri)))] -mod proptests { - // N.B. This defines the quickcheck tests using the properties defined - // below. Because of macro-visibility weirdness, the actual macro is - // defined at the top of this file. - define_memmem_quickcheck_tests!(super::find, super::rfind); - - /// Check that every prefix of the given byte string is a substring. - pub(crate) fn prefix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let prefix = &bs[..i]; - if reverse { - assert_eq!(naive_rfind(bs, prefix), search(bs, prefix)); - } else { - assert_eq!(naive_find(bs, prefix), search(bs, prefix)); - } - } - true - } - - /// Check that every suffix of the given byte string is a substring. - pub(crate) fn suffix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let suffix = &bs[i..]; - if reverse { - assert_eq!(naive_rfind(bs, suffix), search(bs, suffix)); - } else { - assert_eq!(naive_find(bs, suffix), search(bs, suffix)); - } - } - true - } - - /// Check that naive substring search matches the result of the given search - /// algorithm. - pub(crate) fn matches_naive( - reverse: bool, - haystack: &[u8], - needle: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) -> bool { - if reverse { - naive_rfind(haystack, needle) == search(haystack, needle) - } else { - naive_find(haystack, needle) == search(haystack, needle) - } - } - - /// Naively search forwards for the given needle in the given haystack. - fn naive_find(haystack: &[u8], needle: &[u8]) -> Option { - if needle.is_empty() { - return Some(0); - } else if haystack.len() < needle.len() { - return None; - } - for i in 0..(haystack.len() - needle.len() + 1) { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } - - /// Naively search in reverse for the given needle in the given haystack. - fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option { - if needle.is_empty() { - return Some(haystack.len()); - } else if haystack.len() < needle.len() { - return None; - } - for i in (0..(haystack.len() - needle.len() + 1)).rev() { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } -} - -/// This module defines some hand-written "simple" substring tests. It -/// also provides routines for easily running them on any substring search -/// implementation. #[cfg(test)] -mod testsimples { - define_memmem_simple_tests!(super::find, super::rfind); - - /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. - type SearchTest = - (&'static str, &'static str, Option, Option); - - const SEARCH_TESTS: &'static [SearchTest] = &[ - ("", "", Some(0), Some(0)), - ("", "a", Some(0), Some(1)), - ("", "ab", Some(0), Some(2)), - ("", "abc", Some(0), Some(3)), - ("a", "", None, None), - ("a", "a", Some(0), Some(0)), - ("a", "aa", Some(0), Some(1)), - ("a", "ba", Some(1), Some(1)), - ("a", "bba", Some(2), Some(2)), - ("a", "bbba", Some(3), Some(3)), - ("a", "bbbab", Some(3), Some(3)), - ("a", "bbbabb", Some(3), Some(3)), - ("a", "bbbabbb", Some(3), Some(3)), - ("a", "bbbbbb", None, None), - ("ab", "", None, None), - ("ab", "a", None, None), - ("ab", "b", None, None), - ("ab", "ab", Some(0), Some(0)), - ("ab", "aab", Some(1), Some(1)), - ("ab", "aaab", Some(2), Some(2)), - ("ab", "abaab", Some(0), Some(3)), - ("ab", "baaab", Some(3), Some(3)), - ("ab", "acb", None, None), - ("ab", "abba", Some(0), Some(0)), - ("abc", "ab", None, None), - ("abc", "abc", Some(0), Some(0)), - ("abc", "abcz", Some(0), Some(0)), - ("abc", "abczz", Some(0), Some(0)), - ("abc", "zabc", Some(1), Some(1)), - ("abc", "zzabc", Some(2), Some(2)), - ("abc", "azbc", None, None), - ("abc", "abzc", None, None), - ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), - ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), - ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)), - // Failures caught by quickcheck. - ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), - ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), - ]; - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position - /// of the first occurrence of needle in the haystack, or `None` if one - /// doesn't exist. - pub(crate) fn run_search_tests_fwd( - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) { - for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_fwd, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_fwd - ); - } - } - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position of - /// the last occurrence of needle in the haystack, or `None` if one doesn't - /// exist. - pub(crate) fn run_search_tests_rev( - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) { - for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_rev, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_rev - ); - } +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h))) + .run(); } } diff --git a/vendor/memchr/src/memmem/prefilter/fallback.rs b/vendor/memchr/src/memmem/prefilter/fallback.rs deleted file mode 100644 index ae1bbcc..0000000 --- a/vendor/memchr/src/memmem/prefilter/fallback.rs +++ /dev/null @@ -1,122 +0,0 @@ -/* -This module implements a "fallback" prefilter that only relies on memchr to -function. While memchr works best when it's explicitly vectorized, its -fallback implementations are fast enough to make a prefilter like this -worthwhile. - -The essence of this implementation is to identify two rare bytes in a needle -based on a background frequency distribution of bytes. We then run memchr on the -rarer byte. For each match, we use the second rare byte as a guard to quickly -check if a match is possible. If the position passes the guard test, then we do -a naive memcmp to confirm the match. - -In practice, this formulation works amazingly well, primarily because of the -heuristic use of a background frequency distribution. However, it does have a -number of weaknesses where it can get quite slow when its background frequency -distribution doesn't line up with the haystack being searched. This is why we -have specialized vector routines that essentially take this idea and move the -guard check into vectorized code. (Those specialized vector routines do still -make use of the background frequency distribution of bytes though.) - -This fallback implementation was originally formulated in regex many moons ago: -https://github.com/rust-lang/regex/blob/3db8722d0b204a85380fe2a65e13d7065d7dd968/src/literal/imp.rs#L370-L501 -Prior to that, I'm not aware of anyone using this technique in any prominent -substring search implementation. Although, I'm sure folks have had this same -insight long before me. - -Another version of this also appeared in bstr: -https://github.com/BurntSushi/bstr/blob/a444256ca7407fe180ee32534688549655b7a38e/src/search/prefilter.rs#L83-L340 -*/ - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// Look for a possible occurrence of needle. The position returned -/// corresponds to the beginning of the occurrence, if one exists. -/// -/// Callers may assume that this never returns false negatives (i.e., it -/// never misses an actual occurrence), but must check that the returned -/// position corresponds to a match. That is, it can return false -/// positives. -/// -/// This should only be used when Freqy is constructed for forward -/// searching. -pub(crate) fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - let mut i = 0; - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_usize(); - let (rare1, rare2) = ninfo.rarebytes.as_rare_bytes(needle); - while prestate.is_effective() { - // Use a fast vectorized implementation to skip to the next - // occurrence of the rarest byte (heuristically chosen) in the - // needle. - let found = crate::memchr(rare1, &haystack[i..])?; - prestate.update(found); - i += found; - - // If we can't align our first match with the haystack, then a - // match is impossible. - if i < rare1i { - i += 1; - continue; - } - - // Align our rare2 byte with the haystack. A mismatch means that - // a match is impossible. - let aligned_rare2i = i - rare1i + rare2i; - if haystack.get(aligned_rare2i) != Some(&rare2) { - i += 1; - continue; - } - - // We've done what we can. There might be a match here. - return Some(i - rare1i); - } - // The only way we get here is if we believe our skipping heuristic - // has become ineffective. We're allowed to return false positives, - // so return the position at which we advanced to, aligned to the - // haystack. - Some(i.saturating_sub(rare1i)) -} - -#[cfg(all(test, feature = "std"))] -mod tests { - use super::*; - - fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option { - let ninfo = NeedleInfo::new(needle); - let mut prestate = PrefilterState::new(); - find(&mut prestate, &ninfo, haystack, needle) - } - - #[test] - fn freqy_forward() { - assert_eq!(Some(0), freqy_find(b"BARFOO", b"BAR")); - assert_eq!(Some(3), freqy_find(b"FOOBAR", b"BAR")); - assert_eq!(Some(0), freqy_find(b"zyzz", b"zyzy")); - assert_eq!(Some(2), freqy_find(b"zzzy", b"zyzy")); - assert_eq!(None, freqy_find(b"zazb", b"zyzy")); - assert_eq!(Some(0), freqy_find(b"yzyy", b"yzyz")); - assert_eq!(Some(2), freqy_find(b"yyyz", b"yzyz")); - assert_eq!(None, freqy_find(b"yayb", b"yzyz")); - } - - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - // SAFETY: super::find is safe to call for all inputs and on all - // platforms. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/genericsimd.rs b/vendor/memchr/src/memmem/prefilter/genericsimd.rs deleted file mode 100644 index 1a6e387..0000000 --- a/vendor/memchr/src/memmem/prefilter/genericsimd.rs +++ /dev/null @@ -1,207 +0,0 @@ -use core::mem::size_of; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - vector::Vector, - NeedleInfo, -}; - -/// The implementation of the forward vector accelerated candidate finder. -/// -/// This is inspired by the "generic SIMD" algorithm described here: -/// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd -/// -/// The main difference is that this is just a prefilter. That is, it reports -/// candidates once they are seen and doesn't attempt to confirm them. Also, -/// the bytes this routine uses to check for candidates are selected based on -/// an a priori background frequency distribution. This means that on most -/// haystacks, this will on average spend more time in vectorized code than you -/// would if you just selected the first and last bytes of the needle. -/// -/// Note that a non-prefilter variant of this algorithm can be found in the -/// parent module, but it only works on smaller needles. -/// -/// `prestate`, `ninfo`, `haystack` and `needle` are the four prefilter -/// function parameters. `fallback` is a prefilter that is used if the haystack -/// is too small to be handled with the given vector size. -/// -/// This routine is not safe because it is intended for callers to specialize -/// this with a particular vector (e.g., __m256i) and then call it with the -/// relevant target feature (e.g., avx2) enabled. -/// -/// # Panics -/// -/// If `needle.len() <= 1`, then this panics. -/// -/// # Safety -/// -/// Since this is meant to be used with vector functions, callers need to -/// specialize this inside of a function with a `target_feature` attribute. -/// Therefore, callers must ensure that whatever target feature is being used -/// supports the vector functions that this function is specialized for. (For -/// the specific vector functions used, see the Vector trait implementations.) -#[inline(always)] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - fallback: PrefilterFnTy, -) -> Option { - assert!(needle.len() >= 2, "needle must be at least 2 bytes"); - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_usize(); - let min_haystack_len = rare2i + size_of::(); - if haystack.len() < min_haystack_len { - return fallback(prestate, ninfo, haystack, needle); - } - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let max_ptr = end_ptr.sub(min_haystack_len); - let mut ptr = start_ptr; - - let rare1chunk = V::splat(needle[rare1i]); - let rare2chunk = V::splat(needle[rare2i]); - - // N.B. I did experiment with unrolling the loop to deal with size(V) - // bytes at a time and 2*size(V) bytes at a time. The double unroll - // was marginally faster while the quadruple unroll was unambiguously - // slower. In the end, I decided the complexity from unrolling wasn't - // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to - // compare. - while ptr <= max_ptr { - let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); - if let Some(chunki) = m { - return Some(matched(prestate, start_ptr, ptr, chunki)); - } - ptr = ptr.add(size_of::()); - } - if ptr < end_ptr { - // This routine immediately quits if a candidate match is found. - // That means that if we're here, no candidate matches have been - // found at or before 'ptr'. Thus, we don't need to mask anything - // out even though we might technically search part of the haystack - // that we've already searched (because we know it can't match). - ptr = max_ptr; - let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); - if let Some(chunki) = m { - return Some(matched(prestate, start_ptr, ptr, chunki)); - } - } - prestate.update(haystack.len()); - None -} - -// Below are two different techniques for checking whether a candidate -// match exists in a given chunk or not. find_in_chunk2 checks two bytes -// where as find_in_chunk3 checks three bytes. The idea behind checking -// three bytes is that while we do a bit more work per iteration, we -// decrease the chances of a false positive match being reported and thus -// make the search faster overall. This actually works out for the -// memmem/krate/prebuilt/huge-en/never-all-common-bytes benchmark, where -// using find_in_chunk3 is about 25% faster than find_in_chunk2. However, -// it turns out that find_in_chunk2 is faster for all other benchmarks, so -// perhaps the extra check isn't worth it in practice. -// -// For now, we go with find_in_chunk2, but we leave find_in_chunk3 around -// to make it easy to switch to and benchmark when possible. - -/// Search for an occurrence of two rare bytes from the needle in the current -/// chunk pointed to by ptr. -/// -/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 -/// bytes repeated in each 8-bit lane, respectively. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at both -/// (ptr + rare1i) and (ptr + rare2i). -#[inline(always)] -unsafe fn find_in_chunk2( - ptr: *const u8, - rare1i: usize, - rare2i: usize, - rare1chunk: V, - rare2chunk: V, -) -> Option { - let chunk0 = V::load_unaligned(ptr.add(rare1i)); - let chunk1 = V::load_unaligned(ptr.add(rare2i)); - - let eq0 = chunk0.cmpeq(rare1chunk); - let eq1 = chunk1.cmpeq(rare2chunk); - - let match_offsets = eq0.and(eq1).movemask(); - if match_offsets == 0 { - return None; - } - Some(match_offsets.trailing_zeros() as usize) -} - -/// Search for an occurrence of two rare bytes and the first byte (even if one -/// of the rare bytes is equivalent to the first byte) from the needle in the -/// current chunk pointed to by ptr. -/// -/// firstchunk, rare1chunk and rare2chunk correspond to vectors with the first, -/// rare1 and rare2 bytes repeated in each 8-bit lane, respectively. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at ptr, -/// (ptr + rare1i) and (ptr + rare2i). -#[allow(dead_code)] -#[inline(always)] -unsafe fn find_in_chunk3( - ptr: *const u8, - rare1i: usize, - rare2i: usize, - firstchunk: V, - rare1chunk: V, - rare2chunk: V, -) -> Option { - let chunk0 = V::load_unaligned(ptr); - let chunk1 = V::load_unaligned(ptr.add(rare1i)); - let chunk2 = V::load_unaligned(ptr.add(rare2i)); - - let eq0 = chunk0.cmpeq(firstchunk); - let eq1 = chunk1.cmpeq(rare1chunk); - let eq2 = chunk2.cmpeq(rare2chunk); - - let match_offsets = eq0.and(eq1).and(eq2).movemask(); - if match_offsets == 0 { - return None; - } - Some(match_offsets.trailing_zeros() as usize) -} - -/// Accepts a chunk-relative offset and returns a haystack relative offset -/// after updating the prefilter state. -/// -/// Why do we use this unlineable function when a search completes? Well, -/// I don't know. Really. Obviously this function was not here initially. -/// When doing profiling, the codegen for the inner loop here looked bad and -/// I didn't know why. There were a couple extra 'add' instructions and an -/// extra 'lea' instruction that I couldn't explain. I hypothesized that the -/// optimizer was having trouble untangling the hot code in the loop from the -/// code that deals with a candidate match. By putting the latter into an -/// unlineable function, it kind of forces the issue and it had the intended -/// effect: codegen improved measurably. It's good for a ~10% improvement -/// across the board on the memmem/krate/prebuilt/huge-en/ benchmarks. -#[cold] -#[inline(never)] -fn matched( - prestate: &mut PrefilterState, - start_ptr: *const u8, - ptr: *const u8, - chunki: usize, -) -> usize { - let found = diff(ptr, start_ptr) + chunki; - prestate.update(found); - found -} - -/// Subtract `b` from `a` and return the difference. `a` must be greater than -/// or equal to `b`. -fn diff(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/prefilter/mod.rs b/vendor/memchr/src/memmem/prefilter/mod.rs deleted file mode 100644 index 015d3b2..0000000 --- a/vendor/memchr/src/memmem/prefilter/mod.rs +++ /dev/null @@ -1,570 +0,0 @@ -use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo}; - -mod fallback; -#[cfg(memchr_runtime_simd)] -mod genericsimd; -#[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] -mod wasm; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; - -/// The maximum frequency rank permitted for the fallback prefilter. If the -/// rarest byte in the needle has a frequency rank above this value, then no -/// prefilter is used if the fallback prefilter would otherwise be selected. -const MAX_FALLBACK_RANK: usize = 250; - -/// A combination of prefilter effectiveness state, the prefilter function and -/// the needle info required to run a prefilter. -/// -/// For the most part, these are grouped into a single type for convenience, -/// instead of needing to pass around all three as distinct function -/// parameters. -pub(crate) struct Pre<'a> { - /// State that tracks the effectiveness of a prefilter. - pub(crate) state: &'a mut PrefilterState, - /// The actual prefilter function. - pub(crate) prefn: PrefilterFn, - /// Information about a needle, such as its RK hash and rare byte offsets. - pub(crate) ninfo: &'a NeedleInfo, -} - -impl<'a> Pre<'a> { - /// Call this prefilter on the given haystack with the given needle. - #[inline(always)] - pub(crate) fn call( - &mut self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - self.prefn.call(self.state, self.ninfo, haystack, needle) - } - - /// Return true if and only if this prefilter should be used. - #[inline(always)] - pub(crate) fn should_call(&mut self) -> bool { - self.state.is_effective() - } -} - -/// A prefilter function. -/// -/// A prefilter function describes both forward and reverse searches. -/// (Although, we don't currently implement prefilters for reverse searching.) -/// In the case of a forward search, the position returned corresponds to -/// the starting offset of a match (confirmed or possible). Its minimum -/// value is `0`, and its maximum value is `haystack.len() - 1`. In the case -/// of a reverse search, the position returned corresponds to the position -/// immediately after a match (confirmed or possible). Its minimum value is `1` -/// and its maximum value is `haystack.len()`. -/// -/// In both cases, the position returned is the starting (or ending) point of a -/// _possible_ match. That is, returning a false positive is okay. A prefilter, -/// however, must never return any false negatives. That is, if a match exists -/// at a particular position `i`, then a prefilter _must_ return that position. -/// It cannot skip past it. -/// -/// # Safety -/// -/// A prefilter function is not safe to create, since not all prefilters are -/// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions -/// may only be called on x86_64 CPUs with the relevant AVX feature enabled.) -/// Thus, callers must ensure that when a prefilter function is created that it -/// is safe to call for the current environment. -#[derive(Clone, Copy)] -pub(crate) struct PrefilterFn(PrefilterFnTy); - -/// The type of a prefilter function. All prefilters must satisfy this -/// signature. -/// -/// Using a function pointer like this does inhibit inlining, but it does -/// eliminate branching and the extra costs associated with copying a larger -/// enum. Note also, that using Box can't really work -/// here, since we want to work in contexts that don't have dynamic memory -/// allocation. Moreover, in the default configuration of this crate on x86_64 -/// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter, -/// which generally won't be inlineable into the surrounding code anyway. -/// (Unless AVX2 is enabled at compile time, but this is typically rare, since -/// it produces a non-portable binary.) -pub(crate) type PrefilterFnTy = unsafe fn( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option; - -// If the haystack is too small for SSE2, then just run memchr on the -// rarest byte and be done with it. (It is likely that this code path is -// rarely exercised, since a higher level routine will probably dispatch to -// Rabin-Karp for such a small haystack.) -#[cfg(memchr_runtime_simd)] -fn simple_memchr_fallback( - _prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - let (rare, _) = ninfo.rarebytes.as_rare_ordered_usize(); - crate::memchr(needle[rare], haystack).map(|i| i.saturating_sub(rare)) -} - -impl PrefilterFn { - /// Create a new prefilter function from the function pointer given. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function is safe to call - /// for all inputs in the current environment. For example, if the given - /// prefilter function uses AVX instructions, then the caller must ensure - /// that the appropriate AVX CPU features are enabled. - pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn { - PrefilterFn(prefn) - } - - /// Call the underlying prefilter function with the given arguments. - pub fn call( - self, - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - // SAFETY: Callers have the burden of ensuring that a prefilter - // function is safe to call for all inputs in the current environment. - unsafe { (self.0)(prestate, ninfo, haystack, needle) } - } -} - -impl core::fmt::Debug for PrefilterFn { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - "".fmt(f) - } -} - -/// Prefilter controls whether heuristics are used to accelerate searching. -/// -/// A prefilter refers to the idea of detecting candidate matches very quickly, -/// and then confirming whether those candidates are full matches. This -/// idea can be quite effective since it's often the case that looking for -/// candidates can be a lot faster than running a complete substring search -/// over the entire input. Namely, looking for candidates can be done with -/// extremely fast vectorized code. -/// -/// The downside of a prefilter is that it assumes false positives (which are -/// candidates generated by a prefilter that aren't matches) are somewhat rare -/// relative to the frequency of full matches. That is, if a lot of false -/// positives are generated, then it's possible for search time to be worse -/// than if the prefilter wasn't enabled in the first place. -/// -/// Another downside of a prefilter is that it can result in highly variable -/// performance, where some cases are extraordinarily fast and others aren't. -/// Typically, variable performance isn't a problem, but it may be for your use -/// case. -/// -/// The use of prefilters in this implementation does use a heuristic to detect -/// when a prefilter might not be carrying its weight, and will dynamically -/// disable its use. Nevertheless, this configuration option gives callers -/// the ability to disable prefilters if you have knowledge that they won't be -/// useful. -#[derive(Clone, Copy, Debug)] -#[non_exhaustive] -pub enum Prefilter { - /// Never used a prefilter in substring search. - None, - /// Automatically detect whether a heuristic prefilter should be used. If - /// it is used, then heuristics will be used to dynamically disable the - /// prefilter if it is believed to not be carrying its weight. - Auto, -} - -impl Default for Prefilter { - fn default() -> Prefilter { - Prefilter::Auto - } -} - -impl Prefilter { - pub(crate) fn is_none(&self) -> bool { - match *self { - Prefilter::None => true, - _ => false, - } - } -} - -/// PrefilterState tracks state associated with the effectiveness of a -/// prefilter. It is used to track how many bytes, on average, are skipped by -/// the prefilter. If this average dips below a certain threshold over time, -/// then the state renders the prefilter inert and stops using it. -/// -/// A prefilter state should be created for each search. (Where creating an -/// iterator is treated as a single search.) A prefilter state should only be -/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert -/// `PrefilterState`. -#[derive(Clone, Debug)] -pub(crate) struct PrefilterState { - /// The number of skips that has been executed. This is always 1 greater - /// than the actual number of skips. The special sentinel value of 0 - /// indicates that the prefilter is inert. This is useful to avoid - /// additional checks to determine whether the prefilter is still - /// "effective." Once a prefilter becomes inert, it should no longer be - /// used (according to our heuristics). - skips: u32, - /// The total number of bytes that have been skipped. - skipped: u32, -} - -impl PrefilterState { - /// The minimum number of skip attempts to try before considering whether - /// a prefilter is effective or not. - const MIN_SKIPS: u32 = 50; - - /// The minimum amount of bytes that skipping must average. - /// - /// This value was chosen based on varying it and checking - /// the microbenchmarks. In particular, this can impact the - /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set - /// too low. - const MIN_SKIP_BYTES: u32 = 8; - - /// Create a fresh prefilter state. - pub(crate) fn new() -> PrefilterState { - PrefilterState { skips: 1, skipped: 0 } - } - - /// Create a fresh prefilter state that is always inert. - pub(crate) fn inert() -> PrefilterState { - PrefilterState { skips: 0, skipped: 0 } - } - - /// Update this state with the number of bytes skipped on the last - /// invocation of the prefilter. - #[inline] - pub(crate) fn update(&mut self, skipped: usize) { - self.skips = self.skips.saturating_add(1); - // We need to do this dance since it's technically possible for - // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the - // size of a prefilter state.) - if skipped > core::u32::MAX as usize { - self.skipped = core::u32::MAX; - } else { - self.skipped = self.skipped.saturating_add(skipped as u32); - } - } - - /// Return true if and only if this state indicates that a prefilter is - /// still effective. - #[inline] - pub(crate) fn is_effective(&mut self) -> bool { - if self.is_inert() { - return false; - } - if self.skips() < PrefilterState::MIN_SKIPS { - return true; - } - if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { - return true; - } - - // We're inert. - self.skips = 0; - false - } - - #[inline] - fn is_inert(&self) -> bool { - self.skips == 0 - } - - #[inline] - fn skips(&self) -> u32 { - self.skips.saturating_sub(1) - } -} - -/// Determine which prefilter function, if any, to use. -/// -/// This only applies to x86_64 when runtime SIMD detection is enabled (which -/// is the default). In general, we try to use an AVX prefilter, followed by -/// SSE and then followed by a generic one based on memchr. -#[inline(always)] -pub(crate) fn forward( - config: &Prefilter, - rare: &RareNeedleBytes, - needle: &[u8], -) -> Option { - if config.is_none() || needle.len() <= 1 { - return None; - } - - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - { - #[cfg(feature = "std")] - { - if cfg!(memchr_runtime_avx) { - if is_x86_feature_detected!("avx2") { - // SAFETY: x86::avx::find only requires the avx2 feature, - // which we've just checked above. - return unsafe { Some(PrefilterFn::new(x86::avx::find)) }; - } - } - } - if cfg!(memchr_runtime_sse2) { - // SAFETY: x86::sse::find only requires the sse2 feature, which is - // guaranteed to be available on x86_64. - return unsafe { Some(PrefilterFn::new(x86::sse::find)) }; - } - } - #[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] - { - // SAFETY: `wasm::find` is actually a safe function - // - // Also note that the `if true` is here to prevent, on wasm with simd, - // rustc warning about the code below being dead code. - if true { - return unsafe { Some(PrefilterFn::new(wasm::find)) }; - } - } - // Check that our rarest byte has a reasonably low rank. The main issue - // here is that the fallback prefilter can perform pretty poorly if it's - // given common bytes. So we try to avoid the worst cases here. - let (rare1_rank, _) = rare.as_ranks(needle); - if rare1_rank <= MAX_FALLBACK_RANK { - // SAFETY: fallback::find is safe to call in all environments. - return unsafe { Some(PrefilterFn::new(fallback::find)) }; - } - None -} - -/// Return the minimum length of the haystack in which a prefilter should be -/// used. If the haystack is below this length, then it's probably not worth -/// the overhead of running the prefilter. -/// -/// We used to look at the length of a haystack here. That is, if it was too -/// small, then don't bother with the prefilter. But two things changed: -/// the prefilter falls back to memchr for small haystacks, and, at the -/// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway. -/// -/// We keep it around for now in case we want to bring it back. -#[allow(dead_code)] -pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize { - // If the haystack length isn't greater than needle.len() * FACTOR, then - // no prefilter will be used. The presumption here is that since there - // are so few bytes to check, it's not worth running the prefilter since - // there will need to be a validation step anyway. Thus, the prefilter is - // largely redundant work. - // - // Increase the factor noticeably hurts the - // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks. - const PREFILTER_LENGTH_FACTOR: usize = 2; - const VECTOR_MIN_LENGTH: usize = 16; - let min = core::cmp::max( - VECTOR_MIN_LENGTH, - PREFILTER_LENGTH_FACTOR * needle.len(), - ); - // For haystacks with length==min, we still want to avoid the prefilter, - // so add 1. - min + 1 -} - -#[cfg(all(test, feature = "std", not(miri)))] -pub(crate) mod tests { - use std::convert::{TryFrom, TryInto}; - - use super::*; - use crate::memmem::{ - prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes, - }; - - // Below is a small jig that generates prefilter tests. The main purpose - // of this jig is to generate tests of varying needle/haystack lengths - // in order to try and exercise all code paths in our prefilters. And in - // particular, this is especially important for vectorized prefilters where - // certain code paths might only be exercised at certain lengths. - - /// A test that represents the input and expected output to a prefilter - /// function. The test should be able to run with any prefilter function - /// and get the expected output. - pub(crate) struct PrefilterTest { - // These fields represent the inputs and expected output of a forwards - // prefilter function. - pub(crate) ninfo: NeedleInfo, - pub(crate) haystack: Vec, - pub(crate) needle: Vec, - pub(crate) output: Option, - } - - impl PrefilterTest { - /// Run all generated forward prefilter tests on the given prefn. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) { - PrefilterTest::run_all_tests_filter(prefn, |_| true) - } - - /// Run all generated forward prefilter tests that pass the given - /// predicate on the given prefn. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - pub(crate) unsafe fn run_all_tests_filter( - prefn: PrefilterFnTy, - mut predicate: impl FnMut(&PrefilterTest) -> bool, - ) { - for seed in PREFILTER_TEST_SEEDS { - for test in seed.generate() { - if predicate(&test) { - test.run(prefn); - } - } - } - } - - /// Create a new prefilter test from a seed and some chose offsets to - /// rare bytes in the seed's needle. - /// - /// If a valid test could not be constructed, then None is returned. - /// (Currently, we take the approach of massaging tests to be valid - /// instead of rejecting them outright.) - fn new( - seed: PrefilterTestSeed, - rare1i: usize, - rare2i: usize, - haystack_len: usize, - needle_len: usize, - output: Option, - ) -> Option { - let mut rare1i: u8 = rare1i.try_into().unwrap(); - let mut rare2i: u8 = rare2i.try_into().unwrap(); - // The '#' byte is never used in a haystack (unless we're expecting - // a match), while the '@' byte is never used in a needle. - let mut haystack = vec![b'@'; haystack_len]; - let mut needle = vec![b'#'; needle_len]; - needle[0] = seed.first; - needle[rare1i as usize] = seed.rare1; - needle[rare2i as usize] = seed.rare2; - // If we're expecting a match, then make sure the needle occurs - // in the haystack at the expected position. - if let Some(i) = output { - haystack[i..i + needle.len()].copy_from_slice(&needle); - } - // If the operations above lead to rare offsets pointing to the - // non-first occurrence of a byte, then adjust it. This might lead - // to redundant tests, but it's simpler than trying to change the - // generation process I think. - if let Some(i) = crate::memchr(seed.rare1, &needle) { - rare1i = u8::try_from(i).unwrap(); - } - if let Some(i) = crate::memchr(seed.rare2, &needle) { - rare2i = u8::try_from(i).unwrap(); - } - let ninfo = NeedleInfo { - rarebytes: RareNeedleBytes::new(rare1i, rare2i), - nhash: rabinkarp::NeedleHash::forward(&needle), - }; - Some(PrefilterTest { ninfo, haystack, needle, output }) - } - - /// Run this specific test on the given prefilter function. If the - /// outputs do no match, then this routine panics with a failure - /// message. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - unsafe fn run(&self, prefn: PrefilterFnTy) { - let mut prestate = PrefilterState::new(); - assert_eq!( - self.output, - prefn( - &mut prestate, - &self.ninfo, - &self.haystack, - &self.needle - ), - "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}", - self.ninfo, - self.haystack.len(), - std::str::from_utf8(&self.haystack).unwrap(), - self.needle.len(), - std::str::from_utf8(&self.needle).unwrap(), - ); - } - } - - /// A set of prefilter test seeds. Each seed serves as the base for the - /// generation of many other tests. In essence, the seed captures the - /// "rare" and first bytes among our needle. The tests generated from each - /// seed essentially vary the length of the needle and haystack, while - /// using the rare/first byte configuration from the seed. - /// - /// The purpose of this is to test many different needle/haystack lengths. - /// In particular, some of the vector optimizations might only have bugs - /// in haystacks of a certain size. - const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[ - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' }, - PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' }, - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' }, - PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' }, - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' }, - ]; - - /// Data that describes a single prefilter test seed. - #[derive(Clone, Copy)] - struct PrefilterTestSeed { - first: u8, - rare1: u8, - rare2: u8, - } - - impl PrefilterTestSeed { - /// Generate a series of prefilter tests from this seed. - fn generate(self) -> impl Iterator { - let len_start = 2; - // The iterator below generates *a lot* of tests. The number of - // tests was chosen somewhat empirically to be "bearable" when - // running the test suite. - // - // We use an iterator here because the collective haystacks of all - // these test cases add up to enough memory to OOM a conservative - // sandbox or a small laptop. - (len_start..=40).flat_map(move |needle_len| { - let rare_start = len_start - 1; - (rare_start..needle_len).flat_map(move |rare1i| { - (rare1i..needle_len).flat_map(move |rare2i| { - (needle_len..=66).flat_map(move |haystack_len| { - PrefilterTest::new( - self, - rare1i, - rare2i, - haystack_len, - needle_len, - None, - ) - .into_iter() - .chain( - (0..=(haystack_len - needle_len)).flat_map( - move |output| { - PrefilterTest::new( - self, - rare1i, - rare2i, - haystack_len, - needle_len, - Some(output), - ) - }, - ), - ) - }) - }) - }) - }) - } - } -} diff --git a/vendor/memchr/src/memmem/prefilter/wasm.rs b/vendor/memchr/src/memmem/prefilter/wasm.rs deleted file mode 100644 index 5470c92..0000000 --- a/vendor/memchr/src/memmem/prefilter/wasm.rs +++ /dev/null @@ -1,39 +0,0 @@ -use core::arch::wasm32::v128; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// A `v128`-accelerated candidate finder for single-substring search. -#[target_feature(enable = "simd128")] -pub(crate) fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - unsafe { - super::genericsimd::find::( - prestate, - ninfo, - haystack, - needle, - super::simple_memchr_fallback, - ) - } -} - -#[cfg(all(test, feature = "std"))] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - // SAFETY: super::find is safe to call for all inputs on x86. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/x86/avx.rs b/vendor/memchr/src/memmem/prefilter/x86/avx.rs deleted file mode 100644 index fb11f33..0000000 --- a/vendor/memchr/src/memmem/prefilter/x86/avx.rs +++ /dev/null @@ -1,46 +0,0 @@ -use core::arch::x86_64::__m256i; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// An AVX2 accelerated candidate finder for single-substring search. -/// -/// # Safety -/// -/// Callers must ensure that the avx2 CPU feature is enabled in the current -/// environment. -#[target_feature(enable = "avx2")] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - super::super::genericsimd::find::<__m256i>( - prestate, - ninfo, - haystack, - needle, - super::sse::find, - ) -} - -#[cfg(test)] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - if !is_x86_feature_detected!("avx2") { - return; - } - // SAFETY: The safety of super::find only requires that the current - // CPU support AVX2, which we checked above. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/x86/mod.rs b/vendor/memchr/src/memmem/prefilter/x86/mod.rs deleted file mode 100644 index 91381e5..0000000 --- a/vendor/memchr/src/memmem/prefilter/x86/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -// We only use AVX when we can detect at runtime whether it's available, which -// requires std. -#[cfg(feature = "std")] -pub(crate) mod avx; -pub(crate) mod sse; diff --git a/vendor/memchr/src/memmem/prefilter/x86/sse.rs b/vendor/memchr/src/memmem/prefilter/x86/sse.rs deleted file mode 100644 index b1c48e1..0000000 --- a/vendor/memchr/src/memmem/prefilter/x86/sse.rs +++ /dev/null @@ -1,42 +0,0 @@ -use core::arch::x86_64::__m128i; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// An SSE2 accelerated candidate finder for single-substring search. -/// -/// # Safety -/// -/// Callers must ensure that the sse2 CPU feature is enabled in the current -/// environment. This feature should be enabled in all x86_64 targets. -#[target_feature(enable = "sse2")] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - super::super::genericsimd::find::<__m128i>( - prestate, - ninfo, - haystack, - needle, - super::super::simple_memchr_fallback, - ) -} - -#[cfg(all(test, feature = "std"))] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - // SAFETY: super::find is safe to call for all inputs on x86. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/rabinkarp.rs b/vendor/memchr/src/memmem/rabinkarp.rs deleted file mode 100644 index daa4015..0000000 --- a/vendor/memchr/src/memmem/rabinkarp.rs +++ /dev/null @@ -1,233 +0,0 @@ -/* -This module implements the classical Rabin-Karp substring search algorithm, -with no extra frills. While its use would seem to break our time complexity -guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only -ever use RK on a constant subset of haystacks. The main point here is that -RK has good latency properties for small needles/haystacks. It's very quick -to compute a needle hash and zip through the haystack when compared to -initializing Two-Way, for example. And this is especially useful for cases -where the haystack is just too short for vector instructions to do much good. - -The hashing function used here is the same one recommended by ESMAJ. - -Another choice instead of Rabin-Karp would be Shift-Or. But its latency -isn't quite as good since its preprocessing time is a bit more expensive -(both in practice and in theory). However, perhaps Shift-Or has a place -somewhere else for short patterns. I think the main problem is that it -requires space proportional to the alphabet and the needle. If we, for -example, supported needles up to length 16, then the total table size would be -len(alphabet)*size_of::()==512 bytes. Which isn't exactly small, and it's -probably bad to put that on the stack. So ideally, we'd throw it on the heap, -but we'd really like to write as much code without using alloc/std as possible. -But maybe it's worth the special casing. It's a TODO to benchmark. - -Wikipedia has a decent explanation, if a bit heavy on the theory: -https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm - -But ESMAJ provides something a bit more concrete: -http://www-igm.univ-mlv.fr/~lecroq/string/node5.html - -Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: -https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs -*/ - -/// Whether RK is believed to be very fast for the given needle/haystack. -pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { - haystack.len() < 16 -} - -/// Search for the first occurrence of needle in haystack using Rabin-Karp. -pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option { - find_with(&NeedleHash::forward(needle), haystack, needle) -} - -/// Search for the first occurrence of needle in haystack using Rabin-Karp with -/// a pre-computed needle hash. -pub(crate) fn find_with( - nhash: &NeedleHash, - mut haystack: &[u8], - needle: &[u8], -) -> Option { - if haystack.len() < needle.len() { - return None; - } - let start = haystack.as_ptr() as usize; - let mut hash = Hash::from_bytes_fwd(&haystack[..needle.len()]); - // N.B. I've experimented with unrolling this loop, but couldn't realize - // any obvious gains. - loop { - if nhash.eq(hash) && is_prefix(haystack, needle) { - return Some(haystack.as_ptr() as usize - start); - } - if needle.len() >= haystack.len() { - return None; - } - hash.roll(&nhash, haystack[0], haystack[needle.len()]); - haystack = &haystack[1..]; - } -} - -/// Search for the last occurrence of needle in haystack using Rabin-Karp. -pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option { - rfind_with(&NeedleHash::reverse(needle), haystack, needle) -} - -/// Search for the last occurrence of needle in haystack using Rabin-Karp with -/// a pre-computed needle hash. -pub(crate) fn rfind_with( - nhash: &NeedleHash, - mut haystack: &[u8], - needle: &[u8], -) -> Option { - if haystack.len() < needle.len() { - return None; - } - let mut hash = - Hash::from_bytes_rev(&haystack[haystack.len() - needle.len()..]); - loop { - if nhash.eq(hash) && is_suffix(haystack, needle) { - return Some(haystack.len() - needle.len()); - } - if needle.len() >= haystack.len() { - return None; - } - hash.roll( - &nhash, - haystack[haystack.len() - 1], - haystack[haystack.len() - needle.len() - 1], - ); - haystack = &haystack[..haystack.len() - 1]; - } -} - -/// A hash derived from a needle. -#[derive(Clone, Copy, Debug, Default)] -pub(crate) struct NeedleHash { - /// The actual hash. - hash: Hash, - /// The factor needed to multiply a byte by in order to subtract it from - /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), - /// where n is the length of the needle. This is how we "remove" a byte - /// from the hash once the hash window rolls past it. - hash_2pow: u32, -} - -impl NeedleHash { - /// Create a new Rabin-Karp hash for the given needle for use in forward - /// searching. - pub(crate) fn forward(needle: &[u8]) -> NeedleHash { - let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; - if needle.is_empty() { - return nh; - } - nh.hash.add(needle[0]); - for &b in needle.iter().skip(1) { - nh.hash.add(b); - nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); - } - nh - } - - /// Create a new Rabin-Karp hash for the given needle for use in reverse - /// searching. - pub(crate) fn reverse(needle: &[u8]) -> NeedleHash { - let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; - if needle.is_empty() { - return nh; - } - nh.hash.add(needle[needle.len() - 1]); - for &b in needle.iter().rev().skip(1) { - nh.hash.add(b); - nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); - } - nh - } - - /// Return true if the hashes are equivalent. - fn eq(&self, hash: Hash) -> bool { - self.hash == hash - } -} - -/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash -/// of a rolling window in the haystack. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub(crate) struct Hash(u32); - -impl Hash { - /// Create a new hash that represents the empty string. - pub(crate) fn new() -> Hash { - Hash(0) - } - - /// Create a new hash from the bytes given for use in forward searches. - pub(crate) fn from_bytes_fwd(bytes: &[u8]) -> Hash { - let mut hash = Hash::new(); - for &b in bytes { - hash.add(b); - } - hash - } - - /// Create a new hash from the bytes given for use in reverse searches. - fn from_bytes_rev(bytes: &[u8]) -> Hash { - let mut hash = Hash::new(); - for &b in bytes.iter().rev() { - hash.add(b); - } - hash - } - - /// Add 'new' and remove 'old' from this hash. The given needle hash should - /// correspond to the hash computed for the needle being searched for. - /// - /// This is meant to be used when the rolling window of the haystack is - /// advanced. - fn roll(&mut self, nhash: &NeedleHash, old: u8, new: u8) { - self.del(nhash, old); - self.add(new); - } - - /// Add a byte to this hash. - fn add(&mut self, byte: u8) { - self.0 = self.0.wrapping_shl(1).wrapping_add(byte as u32); - } - - /// Remove a byte from this hash. The given needle hash should correspond - /// to the hash computed for the needle being searched for. - fn del(&mut self, nhash: &NeedleHash, byte: u8) { - let factor = nhash.hash_2pow; - self.0 = self.0.wrapping_sub((byte as u32).wrapping_mul(factor)); - } -} - -/// Returns true if the given needle is a prefix of the given haystack. -/// -/// We forcefully don't inline the is_prefix call and hint at the compiler that -/// it is unlikely to be called. This causes the inner rabinkarp loop above -/// to be a bit tighter and leads to some performance improvement. See the -/// memmem/krate/prebuilt/sliceslice-words/words benchmark. -#[cold] -#[inline(never)] -fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { - crate::memmem::util::is_prefix(haystack, needle) -} - -/// Returns true if the given needle is a suffix of the given haystack. -/// -/// See is_prefix for why this is forcefully not inlined. -#[cold] -#[inline(never)] -fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { - crate::memmem::util::is_suffix(haystack, needle) -} - -#[cfg(test)] -mod simpletests { - define_memmem_simple_tests!(super::find, super::rfind); -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod proptests { - define_memmem_quickcheck_tests!(super::find, super::rfind); -} diff --git a/vendor/memchr/src/memmem/rarebytes.rs b/vendor/memchr/src/memmem/rarebytes.rs deleted file mode 100644 index fb33f68..0000000 --- a/vendor/memchr/src/memmem/rarebytes.rs +++ /dev/null @@ -1,136 +0,0 @@ -/// A heuristic frequency based detection of rare bytes for substring search. -/// -/// This detector attempts to pick out two bytes in a needle that are predicted -/// to occur least frequently. The purpose is to use these bytes to implement -/// fast candidate search using vectorized code. -/// -/// A set of offsets is only computed for needles of length 2 or greater. -/// Smaller needles should be special cased by the substring search algorithm -/// in use. (e.g., Use memchr for single byte needles.) -/// -/// Note that we use `u8` to represent the offsets of the rare bytes in a -/// needle to reduce space usage. This means that rare byte occurring after the -/// first 255 bytes in a needle will never be used. -#[derive(Clone, Copy, Debug, Default)] -pub(crate) struct RareNeedleBytes { - /// The leftmost offset of the rarest byte in the needle, according to - /// pre-computed frequency analysis. The "leftmost offset" means that - /// rare1i <= i for all i where needle[i] == needle[rare1i]. - rare1i: u8, - /// The leftmost offset of the second rarest byte in the needle, according - /// to pre-computed frequency analysis. The "leftmost offset" means that - /// rare2i <= i for all i where needle[i] == needle[rare2i]. - /// - /// The second rarest byte is used as a type of guard for quickly detecting - /// a mismatch if the first byte matches. This is a hedge against - /// pathological cases where the pre-computed frequency analysis may be - /// off. (But of course, does not prevent *all* pathological cases.) - /// - /// In general, rare1i != rare2i by construction, although there is no hard - /// requirement that they be different. However, since the case of a single - /// byte needle is handled specially by memchr itself, rare2i generally - /// always should be different from rare1i since it would otherwise be - /// ineffective as a guard. - rare2i: u8, -} - -impl RareNeedleBytes { - /// Create a new pair of rare needle bytes with the given offsets. This is - /// only used in tests for generating input data. - #[cfg(all(test, feature = "std"))] - pub(crate) fn new(rare1i: u8, rare2i: u8) -> RareNeedleBytes { - RareNeedleBytes { rare1i, rare2i } - } - - /// Detect the leftmost offsets of the two rarest bytes in the given - /// needle. - pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes { - if needle.len() <= 1 || needle.len() > core::u8::MAX as usize { - // For needles bigger than u8::MAX, our offsets aren't big enough. - // (We make our offsets small to reduce stack copying.) - // If you have a use case for it, please file an issue. In that - // case, we should probably just adjust the routine below to pick - // some rare bytes from the first 255 bytes of the needle. - // - // Also note that for needles of size 0 or 1, they are special - // cased in Two-Way. - // - // TODO: Benchmar this. - return RareNeedleBytes { rare1i: 0, rare2i: 0 }; - } - - // Find the rarest two bytes. We make them distinct by construction. - let (mut rare1, mut rare1i) = (needle[0], 0); - let (mut rare2, mut rare2i) = (needle[1], 1); - if rank(rare2) < rank(rare1) { - core::mem::swap(&mut rare1, &mut rare2); - core::mem::swap(&mut rare1i, &mut rare2i); - } - for (i, &b) in needle.iter().enumerate().skip(2) { - if rank(b) < rank(rare1) { - rare2 = rare1; - rare2i = rare1i; - rare1 = b; - rare1i = i as u8; - } else if b != rare1 && rank(b) < rank(rare2) { - rare2 = b; - rare2i = i as u8; - } - } - // While not strictly required, we really don't want these to be - // equivalent. If they were, it would reduce the effectiveness of - // candidate searching using these rare bytes by increasing the rate of - // false positives. - assert_ne!(rare1i, rare2i); - RareNeedleBytes { rare1i, rare2i } - } - - /// Return the rare bytes in the given needle in the forward direction. - /// The needle given must be the same one given to the RareNeedleBytes - /// constructor. - pub(crate) fn as_rare_bytes(&self, needle: &[u8]) -> (u8, u8) { - (needle[self.rare1i as usize], needle[self.rare2i as usize]) - } - - /// Return the rare offsets such that the first offset is always <= to the - /// second offset. This is useful when the caller doesn't care whether - /// rare1 is rarer than rare2, but just wants to ensure that they are - /// ordered with respect to one another. - #[cfg(memchr_runtime_simd)] - pub(crate) fn as_rare_ordered_usize(&self) -> (usize, usize) { - let (rare1i, rare2i) = self.as_rare_ordered_u8(); - (rare1i as usize, rare2i as usize) - } - - /// Like as_rare_ordered_usize, but returns the offsets as their native - /// u8 values. - #[cfg(memchr_runtime_simd)] - pub(crate) fn as_rare_ordered_u8(&self) -> (u8, u8) { - if self.rare1i <= self.rare2i { - (self.rare1i, self.rare2i) - } else { - (self.rare2i, self.rare1i) - } - } - - /// Return the rare offsets as usize values in the order in which they were - /// constructed. rare1, for example, is constructed as the "rarer" byte, - /// and thus, callers may want to treat it differently from rare2. - pub(crate) fn as_rare_usize(&self) -> (usize, usize) { - (self.rare1i as usize, self.rare2i as usize) - } - - /// Return the byte frequency rank of each byte. The higher the rank, the - /// more frequency the byte is predicted to be. The needle given must be - /// the same one given to the RareNeedleBytes constructor. - pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) { - let (b1, b2) = self.as_rare_bytes(needle); - (rank(b1), rank(b2)) - } -} - -/// Return the heuristical frequency rank of the given byte. A lower rank -/// means the byte is believed to occur less frequently. -fn rank(b: u8) -> usize { - crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize -} diff --git a/vendor/memchr/src/memmem/searcher.rs b/vendor/memchr/src/memmem/searcher.rs new file mode 100644 index 0000000..98b9bd6 --- /dev/null +++ b/vendor/memchr/src/memmem/searcher.rs @@ -0,0 +1,1030 @@ +use crate::arch::all::{ + packedpair::{HeuristicFrequencyRank, Pair}, + rabinkarp, twoway, +}; + +#[cfg(target_arch = "aarch64")] +use crate::arch::aarch64::neon::packedpair as neon; +#[cfg(target_arch = "wasm32")] +use crate::arch::wasm32::simd128::packedpair as simd128; +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +use crate::arch::x86_64::{ + avx2::packedpair as avx2, sse2::packedpair as sse2, +}; + +/// A "meta" substring searcher. +/// +/// To a first approximation, this chooses what it believes to be the "best" +/// substring search implemnetation based on the needle at construction time. +/// Then, every call to `find` will execute that particular implementation. To +/// a second approximation, multiple substring search algorithms may be used, +/// depending on the haystack. For example, for supremely short haystacks, +/// Rabin-Karp is typically used. +/// +/// See the documentation on `Prefilter` for an explanation of the dispatching +/// mechanism. The quick summary is that an enum has too much overhead and +/// we can't use dynamic dispatch via traits because we need to work in a +/// core-only environment. (Dynamic dispatch works in core-only, but you +/// need `&dyn Trait` and we really need a `Box` here. The latter +/// requires `alloc`.) So instead, we use a union and an appropriately paired +/// free function to read from the correct field on the union and execute the +/// chosen substring search implementation. +#[derive(Clone)] +pub(crate) struct Searcher { + call: SearcherKindFn, + kind: SearcherKind, + rabinkarp: rabinkarp::Finder, +} + +impl Searcher { + /// Creates a new "meta" substring searcher that attempts to choose the + /// best algorithm based on the needle, heuristics and what the current + /// target supports. + #[inline] + pub(crate) fn new( + prefilter: PrefilterConfig, + ranker: R, + needle: &[u8], + ) -> Searcher { + let rabinkarp = rabinkarp::Finder::new(needle); + if needle.len() <= 1 { + return if needle.is_empty() { + trace!("building empty substring searcher"); + Searcher { + call: searcher_kind_empty, + kind: SearcherKind { empty: () }, + rabinkarp, + } + } else { + trace!("building one-byte substring searcher"); + debug_assert_eq!(1, needle.len()); + Searcher { + call: searcher_kind_one_byte, + kind: SearcherKind { one_byte: needle[0] }, + rabinkarp, + } + }; + } + let pair = match Pair::with_ranker(needle, &ranker) { + Some(pair) => pair, + None => return Searcher::twoway(needle, rabinkarp, None), + }; + debug_assert_ne!( + pair.index1(), + pair.index2(), + "pair offsets should not be equivalent" + ); + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + { + if let Some(pp) = avx2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 AVX2 substring searcher"); + let kind = SearcherKind { avx2: pp }; + Searcher { call: searcher_kind_avx2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::avx2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 SSE2 substring searcher"); + let kind = SearcherKind { sse2: pp }; + Searcher { call: searcher_kind_sse2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::sse2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + // We're pretty unlikely to get to this point, but it is + // possible to be running on x86_64 without SSE2. Namely, it's + // really up to the OS whether it wants to support vector + // registers or not. + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "wasm32")] + { + if let Some(pp) = simd128::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building wasm32 simd128 substring searcher"); + let kind = SearcherKind { simd128: pp }; + Searcher { call: searcher_kind_simd128, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::simd128(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "aarch64")] + { + if let Some(pp) = neon::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building aarch64 neon substring searcher"); + let kind = SearcherKind { neon: pp }; + Searcher { call: searcher_kind_neon, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::neon(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + } + + /// Creates a new searcher that always uses the Two-Way algorithm. This is + /// typically used when vector algorithms are unavailable or inappropriate. + /// (For example, when the needle is "too long.") + /// + /// If a prefilter is given, then the searcher returned will be accelerated + /// by the prefilter. + #[inline] + fn twoway( + needle: &[u8], + rabinkarp: rabinkarp::Finder, + prestrat: Option, + ) -> Searcher { + let finder = twoway::Finder::new(needle); + match prestrat { + None => { + trace!("building scalar two-way substring searcher"); + let kind = SearcherKind { two_way: finder }; + Searcher { call: searcher_kind_two_way, kind, rabinkarp } + } + Some(prestrat) => { + trace!( + "building scalar two-way \ + substring searcher with a prefilter" + ); + let two_way_with_prefilter = + TwoWayWithPrefilter { finder, prestrat }; + let kind = SearcherKind { two_way_with_prefilter }; + Searcher { + call: searcher_kind_two_way_with_prefilter, + kind, + rabinkarp, + } + } + } + } + + /// Searches the given haystack for the given needle. The needle given + /// should be the same as the needle that this finder was initialized + /// with. + /// + /// Inlining this can lead to big wins for latency, and #[inline] doesn't + /// seem to be enough in some cases. + #[inline(always)] + pub(crate) fn find( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], + ) -> Option { + if haystack.len() < needle.len() { + None + } else { + // SAFETY: By construction, we've ensured that the function + // in `self.call` is properly paired with the union used in + // `self.kind`. + unsafe { (self.call)(self, prestate, haystack, needle) } + } + } +} + +impl core::fmt::Debug for Searcher { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Searcher") + .field("call", &"") + .field("kind", &"") + .field("rabinkarp", &self.rabinkarp) + .finish() + } +} + +/// A union indicating one of several possible substring search implementations +/// that are in active use. +/// +/// This union should only be read by one of the functions prefixed with +/// `searcher_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union SearcherKind { + empty: (), + one_byte: u8, + two_way: twoway::Finder, + two_way_with_prefilter: TwoWayWithPrefilter, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// A two-way substring searcher with a prefilter. +#[derive(Copy, Clone, Debug)] +struct TwoWayWithPrefilter { + finder: twoway::Finder, + prestrat: Prefilter, +} + +/// The type of a substring search function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `SearcherKind` union. +type SearcherKindFn = unsafe fn( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option; + +/// Reads from the `empty` field of `SearcherKind` to handle the case of +/// searching for the empty needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.empty` union field is set. +unsafe fn searcher_kind_empty( + _searcher: &Searcher, + _prestate: &mut PrefilterState, + _haystack: &[u8], + _needle: &[u8], +) -> Option { + Some(0) +} + +/// Reads from the `one_byte` field of `SearcherKind` to handle the case of +/// searching for a single byte needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.one_byte` union field is set. +unsafe fn searcher_kind_one_byte( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + _needle: &[u8], +) -> Option { + let needle = searcher.kind.one_byte; + crate::memchr(needle, haystack) +} + +/// Reads from the `two_way` field of `SearcherKind` to handle the case of +/// searching for an arbitrary needle without prefilter acceleration. Works on +/// all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way` union field is set. +unsafe fn searcher_kind_two_way( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + searcher.kind.two_way.find(haystack, needle) + } +} + +/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle +/// the case of searching for an arbitrary needle with prefilter acceleration. +/// Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union +/// field is set. +unsafe fn searcher_kind_two_way_with_prefilter( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + let TwoWayWithPrefilter { ref finder, ref prestrat } = + searcher.kind.two_way_with_prefilter; + let pre = Pre { prestate, prestrat }; + finder.find_with_prefilter(Some(pre), haystack, needle) + } +} + +/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_sse2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_avx2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32 +/// simd128 vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn searcher_kind_simd128( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn searcher_kind_neon( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.neon; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// A reverse substring searcher. +#[derive(Clone, Debug)] +pub(crate) struct SearcherRev { + kind: SearcherRevKind, + rabinkarp: rabinkarp::FinderRev, +} + +/// The kind of the reverse searcher. +/// +/// For the reverse case, we don't do any SIMD acceleration or prefilters. +/// There is no specific technical reason why we don't, but rather don't do it +/// because it's not clear it's worth the extra code to do so. If you have a +/// use case for it, please file an issue. +/// +/// We also don't do the union trick as we do with the forward case and +/// prefilters. Basically for the same reason we don't have prefilters or +/// vector algorithms for reverse searching: it's not clear it's worth doing. +/// Please file an issue if you have a compelling use case for fast reverse +/// substring search. +#[derive(Clone, Debug)] +enum SearcherRevKind { + Empty, + OneByte { needle: u8 }, + TwoWay { finder: twoway::FinderRev }, +} + +impl SearcherRev { + /// Creates a new searcher for finding occurrences of the given needle in + /// reverse. That is, it reports the last (instead of the first) occurrence + /// of a needle in a haystack. + #[inline] + pub(crate) fn new(needle: &[u8]) -> SearcherRev { + let kind = if needle.len() <= 1 { + if needle.is_empty() { + trace!("building empty reverse substring searcher"); + SearcherRevKind::Empty + } else { + trace!("building one-byte reverse substring searcher"); + debug_assert_eq!(1, needle.len()); + SearcherRevKind::OneByte { needle: needle[0] } + } + } else { + trace!("building scalar two-way reverse substring searcher"); + let finder = twoway::FinderRev::new(needle); + SearcherRevKind::TwoWay { finder } + }; + let rabinkarp = rabinkarp::FinderRev::new(needle); + SearcherRev { kind, rabinkarp } + } + + /// Searches the given haystack for the last occurrence of the given + /// needle. The needle given should be the same as the needle that this + /// finder was initialized with. + #[inline] + pub(crate) fn rfind( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + if haystack.len() < needle.len() { + return None; + } + match self.kind { + SearcherRevKind::Empty => Some(haystack.len()), + SearcherRevKind::OneByte { needle } => { + crate::memrchr(needle, haystack) + } + SearcherRevKind::TwoWay { ref finder } => { + if rabinkarp::is_fast(haystack, needle) { + self.rabinkarp.rfind(haystack, needle) + } else { + finder.rfind(haystack, needle) + } + } + } + } +} + +/// Prefilter controls whether heuristics are used to accelerate searching. +/// +/// A prefilter refers to the idea of detecting candidate matches very quickly, +/// and then confirming whether those candidates are full matches. This +/// idea can be quite effective since it's often the case that looking for +/// candidates can be a lot faster than running a complete substring search +/// over the entire input. Namely, looking for candidates can be done with +/// extremely fast vectorized code. +/// +/// The downside of a prefilter is that it assumes false positives (which are +/// candidates generated by a prefilter that aren't matches) are somewhat rare +/// relative to the frequency of full matches. That is, if a lot of false +/// positives are generated, then it's possible for search time to be worse +/// than if the prefilter wasn't enabled in the first place. +/// +/// Another downside of a prefilter is that it can result in highly variable +/// performance, where some cases are extraordinarily fast and others aren't. +/// Typically, variable performance isn't a problem, but it may be for your use +/// case. +/// +/// The use of prefilters in this implementation does use a heuristic to detect +/// when a prefilter might not be carrying its weight, and will dynamically +/// disable its use. Nevertheless, this configuration option gives callers +/// the ability to disable prefilters if you have knowledge that they won't be +/// useful. +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +pub enum PrefilterConfig { + /// Never used a prefilter in substring search. + None, + /// Automatically detect whether a heuristic prefilter should be used. If + /// it is used, then heuristics will be used to dynamically disable the + /// prefilter if it is believed to not be carrying its weight. + Auto, +} + +impl Default for PrefilterConfig { + fn default() -> PrefilterConfig { + PrefilterConfig::Auto + } +} + +impl PrefilterConfig { + /// Returns true when this prefilter is set to the `None` variant. + fn is_none(&self) -> bool { + matches!(*self, PrefilterConfig::None) + } +} + +/// The implementation of a prefilter. +/// +/// This type encapsulates dispatch to one of several possible choices for a +/// prefilter. Generally speaking, all prefilters have the same approximate +/// algorithm: they choose a couple of bytes from the needle that are believed +/// to be rare, use a fast vector algorithm to look for those bytes and return +/// positions as candidates for some substring search algorithm (currently only +/// Two-Way) to confirm as a match or not. +/// +/// The differences between the algorithms are actually at the vector +/// implementation level. Namely, we need different routines based on both +/// which target architecture we're on and what CPU features are supported. +/// +/// The straight-forwardly obvious approach here is to use an enum, and make +/// `Prefilter::find` do case analysis to determine which algorithm was +/// selected and invoke it. However, I've observed that this leads to poor +/// codegen in some cases, especially in latency sensitive benchmarks. That is, +/// this approach comes with overhead that I wasn't able to eliminate. +/// +/// The second obvious approach is to use dynamic dispatch with traits. Doing +/// that in this context where `Prefilter` owns the selection generally +/// requires heap allocation, and this code is designed to run in core-only +/// environments. +/// +/// So we settle on using a union (that's `PrefilterKind`) and a function +/// pointer (that's `PrefilterKindFn`). We select the right function pointer +/// based on which field in the union we set, and that function in turn +/// knows which field of the union to access. The downside of this approach +/// is that it forces us to think about safety, but the upside is that +/// there are some nice latency improvements to benchmarks. (Especially the +/// `memmem/sliceslice/short` benchmark.) +/// +/// In cases where we've selected a vector algorithm and the haystack given +/// is too short, we fallback to the scalar version of `memchr` on the +/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive +/// byte-at-a-time loop because it will read in `usize`-sized chunks at a +/// time.) +#[derive(Clone, Copy)] +struct Prefilter { + call: PrefilterKindFn, + kind: PrefilterKind, + rarest_byte: u8, + rarest_offset: u8, +} + +impl Prefilter { + /// Return a "fallback" prefilter, but only if it is believed to be + /// effective. + #[inline] + fn fallback( + ranker: R, + pair: Pair, + needle: &[u8], + ) -> Option { + /// The maximum frequency rank permitted for the fallback prefilter. + /// If the rarest byte in the needle has a frequency rank above this + /// value, then no prefilter is used if the fallback prefilter would + /// otherwise be selected. + const MAX_FALLBACK_RANK: u8 = 250; + + trace!("building fallback prefilter"); + let rarest_offset = pair.index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + let rarest_rank = ranker.rank(rarest_byte); + if rarest_rank > MAX_FALLBACK_RANK { + None + } else { + let finder = crate::arch::all::packedpair::Finder::with_pair( + needle, + pair.clone(), + )?; + let call = prefilter_kind_fallback; + let kind = PrefilterKind { fallback: finder }; + Some(Prefilter { call, kind, rarest_byte, rarest_offset }) + } + } + + /// Return a prefilter using a x86_64 SSE2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 SSE2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_sse2, + kind: PrefilterKind { sse2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a x86_64 AVX2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 AVX2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_avx2, + kind: PrefilterKind { avx2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a wasm32 simd128 vector algorithm. + #[cfg(target_arch = "wasm32")] + #[inline] + fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter { + trace!("building wasm32 simd128 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_simd128, + kind: PrefilterKind { simd128: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a aarch64 neon vector algorithm. + #[cfg(target_arch = "aarch64")] + #[inline] + fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { + trace!("building aarch64 neon prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_neon, + kind: PrefilterKind { neon: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a *candidate* position for a match. + /// + /// When this returns an offset, it implies that a match could begin at + /// that offset, but it may not. That is, it is possible for a false + /// positive to be returned. + /// + /// When `None` is returned, then it is guaranteed that there are no + /// matches for the needle in the given haystack. That is, it is impossible + /// for a false negative to be returned. + /// + /// The purpose of this routine is to look for candidate matching positions + /// as quickly as possible before running a (likely) slower confirmation + /// step. + #[inline] + fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: By construction, we've ensured that the function in + // `self.call` is properly paired with the union used in `self.kind`. + unsafe { (self.call)(self, haystack) } + } + + /// A "simple" prefilter that just looks for the occurrence of the rarest + /// byte from the needle. This is generally only used for very small + /// haystacks. + #[inline] + fn find_simple(&self, haystack: &[u8]) -> Option { + // We don't use crate::memchr here because the haystack should be small + // enough that memchr won't be able to use vector routines anyway. So + // we just skip straight to the fallback implementation which is likely + // faster. (A byte-at-a-time loop is only used when the haystack is + // smaller than `size_of::()`.) + crate::arch::all::memchr::One::new(self.rarest_byte) + .find(haystack) + .map(|i| i.saturating_sub(usize::from(self.rarest_offset))) + } +} + +impl core::fmt::Debug for Prefilter { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Prefilter") + .field("call", &"") + .field("kind", &"") + .field("rarest_byte", &self.rarest_byte) + .field("rarest_offset", &self.rarest_offset) + .finish() + } +} + +/// A union indicating one of several possible prefilters that are in active +/// use. +/// +/// This union should only be read by one of the functions prefixed with +/// `prefilter_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union PrefilterKind { + fallback: crate::arch::all::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// The type of a prefilter function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `PrefilterKind` union. +type PrefilterKindFn = + unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option; + +/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback +/// prefilter. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.fallback` union field is set. +unsafe fn prefilter_kind_fallback( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + strat.kind.fallback.find_prefilter(haystack) +} + +/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_sse2( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_avx2( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32 +/// simd128 prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn prefilter_kind_simd128( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn prefilter_kind_neon( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.neon; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// PrefilterState tracks state associated with the effectiveness of a +/// prefilter. It is used to track how many bytes, on average, are skipped by +/// the prefilter. If this average dips below a certain threshold over time, +/// then the state renders the prefilter inert and stops using it. +/// +/// A prefilter state should be created for each search. (Where creating an +/// iterator is treated as a single search.) A prefilter state should only be +/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert +/// `PrefilterState`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct PrefilterState { + /// The number of skips that has been executed. This is always 1 greater + /// than the actual number of skips. The special sentinel value of 0 + /// indicates that the prefilter is inert. This is useful to avoid + /// additional checks to determine whether the prefilter is still + /// "effective." Once a prefilter becomes inert, it should no longer be + /// used (according to our heuristics). + skips: u32, + /// The total number of bytes that have been skipped. + skipped: u32, +} + +impl PrefilterState { + /// The minimum number of skip attempts to try before considering whether + /// a prefilter is effective or not. + const MIN_SKIPS: u32 = 50; + + /// The minimum amount of bytes that skipping must average. + /// + /// This value was chosen based on varying it and checking + /// the microbenchmarks. In particular, this can impact the + /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set + /// too low. + const MIN_SKIP_BYTES: u32 = 8; + + /// Create a fresh prefilter state. + #[inline] + pub(crate) fn new() -> PrefilterState { + PrefilterState { skips: 1, skipped: 0 } + } + + /// Update this state with the number of bytes skipped on the last + /// invocation of the prefilter. + #[inline] + fn update(&mut self, skipped: usize) { + self.skips = self.skips.saturating_add(1); + // We need to do this dance since it's technically possible for + // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the + // size of a prefilter state.) + self.skipped = match u32::try_from(skipped) { + Err(_) => core::u32::MAX, + Ok(skipped) => self.skipped.saturating_add(skipped), + }; + } + + /// Return true if and only if this state indicates that a prefilter is + /// still effective. + #[inline] + fn is_effective(&mut self) -> bool { + if self.is_inert() { + return false; + } + if self.skips() < PrefilterState::MIN_SKIPS { + return true; + } + if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { + return true; + } + + // We're inert. + self.skips = 0; + false + } + + /// Returns true if the prefilter this state represents should no longer + /// be used. + #[inline] + fn is_inert(&self) -> bool { + self.skips == 0 + } + + /// Returns the total number of times the prefilter has been used. + #[inline] + fn skips(&self) -> u32 { + // Remember, `0` is a sentinel value indicating inertness, so we + // always need to subtract `1` to get our actual number of skips. + self.skips.saturating_sub(1) + } +} + +/// A combination of prefilter effectiveness state and the prefilter itself. +#[derive(Debug)] +pub(crate) struct Pre<'a> { + /// State that tracks the effectiveness of a prefilter. + prestate: &'a mut PrefilterState, + /// The actual prefilter. + prestrat: &'a Prefilter, +} + +impl<'a> Pre<'a> { + /// Call this prefilter on the given haystack with the given needle. + #[inline] + pub(crate) fn find(&mut self, haystack: &[u8]) -> Option { + let result = self.prestrat.find(haystack); + self.prestate.update(result.unwrap_or(haystack.len())); + result + } + + /// Return true if and only if this prefilter should be used. + #[inline] + pub(crate) fn is_effective(&mut self) -> bool { + self.prestate.is_effective() + } +} + +/// Returns true if the needle has the right characteristics for a vector +/// algorithm to handle the entirety of substring search. +/// +/// Vector algorithms can be used for prefilters for other substring search +/// algorithms (like Two-Way), but they can also be used for substring search +/// on their own. When used for substring search, vector algorithms will +/// quickly identify candidate match positions (just like in the prefilter +/// case), but instead of returning the candidate position they will try to +/// confirm the match themselves. Confirmation happens via `memcmp`. This +/// works well for short needles, but can break down when many false candidate +/// positions are generated for large needles. Thus, we only permit vector +/// algorithms to own substring search when the needle is of a certain length. +#[inline] +fn do_packed_search(needle: &[u8]) -> bool { + /// The minimum length of a needle required for this algorithm. The minimum + /// is 2 since a length of 1 should just use memchr and a length of 0 isn't + /// a case handled by this searcher. + const MIN_LEN: usize = 2; + + /// The maximum length of a needle required for this algorithm. + /// + /// In reality, there is no hard max here. The code below can handle any + /// length needle. (Perhaps that suggests there are missing optimizations.) + /// Instead, this is a heuristic and a bound guaranteeing our linear time + /// complexity. + /// + /// It is a heuristic because when a candidate match is found, memcmp is + /// run. For very large needles with lots of false positives, memcmp can + /// make the code run quite slow. + /// + /// It is a bound because the worst case behavior with memcmp is + /// multiplicative in the size of the needle and haystack, and we want + /// to keep that additive. This bound ensures we still meet that bound + /// theoretically, since it's just a constant. We aren't acting in bad + /// faith here, memcmp on tiny needles is so fast that even in pathological + /// cases (see pathological vector benchmarks), this is still just as fast + /// or faster in practice. + /// + /// This specific number was chosen by tweaking a bit and running + /// benchmarks. The rare-medium-needle, for example, gets about 5% faster + /// by using this algorithm instead of a prefilter-accelerated Two-Way. + /// There's also a theoretical desire to keep this number reasonably + /// low, to mitigate the impact of pathological cases. I did try 64, and + /// some benchmarks got a little better, and others (particularly the + /// pathological ones), got a lot worse. So... 32 it is? + const MAX_LEN: usize = 32; + MIN_LEN <= needle.len() && needle.len() <= MAX_LEN +} diff --git a/vendor/memchr/src/memmem/util.rs b/vendor/memchr/src/memmem/util.rs deleted file mode 100644 index de0e385..0000000 --- a/vendor/memchr/src/memmem/util.rs +++ /dev/null @@ -1,88 +0,0 @@ -// These routines are meant to be optimized specifically for low latency as -// compared to the equivalent routines offered by std. (Which may invoke the -// dynamic linker and call out to libc, which introduces a bit more latency -// than we'd like.) - -/// Returns true if and only if needle is a prefix of haystack. -#[inline(always)] -pub(crate) fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { - needle.len() <= haystack.len() && memcmp(&haystack[..needle.len()], needle) -} - -/// Returns true if and only if needle is a suffix of haystack. -#[inline(always)] -pub(crate) fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { - needle.len() <= haystack.len() - && memcmp(&haystack[haystack.len() - needle.len()..], needle) -} - -/// Return true if and only if x.len() == y.len() && x[i] == y[i] for all -/// 0 <= i < x.len(). -/// -/// Why not just use actual memcmp for this? Well, memcmp requires calling out -/// to libc, and this routine is called in fairly hot code paths. Other than -/// just calling out to libc, it also seems to result in worse codegen. By -/// rolling our own memcmp in pure Rust, it seems to appear more friendly to -/// the optimizer. -/// -/// We mark this as inline always, although, some callers may not want it -/// inlined for better codegen (like Rabin-Karp). In that case, callers are -/// advised to create a non-inlineable wrapper routine that calls memcmp. -#[inline(always)] -pub(crate) fn memcmp(x: &[u8], y: &[u8]) -> bool { - if x.len() != y.len() { - return false; - } - // If we don't have enough bytes to do 4-byte at a time loads, then - // fall back to the naive slow version. - // - // TODO: We could do a copy_nonoverlapping combined with a mask instead - // of a loop. Benchmark it. - if x.len() < 4 { - for (&b1, &b2) in x.iter().zip(y) { - if b1 != b2 { - return false; - } - } - return true; - } - // When we have 4 or more bytes to compare, then proceed in chunks of 4 at - // a time using unaligned loads. - // - // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is - // that this particular version of memcmp is likely to be called with tiny - // needles. That means that if we do 8 byte loads, then a higher proportion - // of memcmp calls will use the slower variant above. With that said, this - // is a hypothesis and is only loosely supported by benchmarks. There's - // likely some improvement that could be made here. The main thing here - // though is to optimize for latency, not throughput. - - // SAFETY: Via the conditional above, we know that both `px` and `py` - // have the same length, so `px < pxend` implies that `py < pyend`. - // Thus, derefencing both `px` and `py` in the loop below is safe. - // - // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual - // end of of `px` and `py`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. (The final comparison will overlap with - // the last comparison done in the loop for lengths that aren't multiples - // of four.) - // - // Finally, we needn't worry about alignment here, since we do unaligned - // loads. - unsafe { - let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); - let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); - while px < pxend { - let vx = (px as *const u32).read_unaligned(); - let vy = (py as *const u32).read_unaligned(); - if vx != vy { - return false; - } - px = px.add(4); - py = py.add(4); - } - let vx = (pxend as *const u32).read_unaligned(); - let vy = (pyend as *const u32).read_unaligned(); - vx == vy - } -} diff --git a/vendor/memchr/src/memmem/vector.rs b/vendor/memchr/src/memmem/vector.rs deleted file mode 100644 index b81165f..0000000 --- a/vendor/memchr/src/memmem/vector.rs +++ /dev/null @@ -1,131 +0,0 @@ -/// A trait for describing vector operations used by vectorized searchers. -/// -/// The trait is highly constrained to low level vector operations needed. In -/// general, it was invented mostly to be generic over x86's __m128i and -/// __m256i types. It's likely that once std::simd becomes a thing, we can -/// migrate to that since the operations required are quite simple. -/// -/// TODO: Consider moving this trait up a level and using it to implement -/// memchr as well. The trait might need to grow one or two methods, but -/// otherwise should be close to sufficient already. -/// -/// # Safety -/// -/// All methods are not safe since they are intended to be implemented using -/// vendor intrinsics, which are also not safe. Callers must ensure that the -/// appropriate target features are enabled in the calling function, and that -/// the current CPU supports them. All implementations should avoid marking the -/// routines with #[target_feature] and instead mark them as #[inline(always)] -/// to ensure they get appropriately inlined. (inline(always) cannot be used -/// with target_feature.) -pub(crate) trait Vector: Copy + core::fmt::Debug { - /// _mm_set1_epi8 or _mm256_set1_epi8 - unsafe fn splat(byte: u8) -> Self; - /// _mm_loadu_si128 or _mm256_loadu_si256 - unsafe fn load_unaligned(data: *const u8) -> Self; - /// _mm_movemask_epi8 or _mm256_movemask_epi8 - unsafe fn movemask(self) -> u32; - /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 - unsafe fn cmpeq(self, vector2: Self) -> Self; - /// _mm_and_si128 or _mm256_and_si256 - unsafe fn and(self, vector2: Self) -> Self; -} - -#[cfg(target_arch = "x86_64")] -mod x86sse { - use super::Vector; - use core::arch::x86_64::*; - - impl Vector for __m128i { - #[inline(always)] - unsafe fn splat(byte: u8) -> __m128i { - _mm_set1_epi8(byte as i8) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> __m128i { - _mm_loadu_si128(data as *const __m128i) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - _mm_movemask_epi8(self) as u32 - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> __m128i { - _mm_cmpeq_epi8(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> __m128i { - _mm_and_si128(self, vector2) - } - } -} - -#[cfg(all(feature = "std", target_arch = "x86_64"))] -mod x86avx { - use super::Vector; - use core::arch::x86_64::*; - - impl Vector for __m256i { - #[inline(always)] - unsafe fn splat(byte: u8) -> __m256i { - _mm256_set1_epi8(byte as i8) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> __m256i { - _mm256_loadu_si256(data as *const __m256i) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - _mm256_movemask_epi8(self) as u32 - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> __m256i { - _mm256_cmpeq_epi8(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> __m256i { - _mm256_and_si256(self, vector2) - } - } -} - -#[cfg(target_arch = "wasm32")] -mod wasm_simd128 { - use super::Vector; - use core::arch::wasm32::*; - - impl Vector for v128 { - #[inline(always)] - unsafe fn splat(byte: u8) -> v128 { - u8x16_splat(byte) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> v128 { - v128_load(data.cast()) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - u8x16_bitmask(self).into() - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> v128 { - u8x16_eq(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> v128 { - v128_and(self, vector2) - } - } -} diff --git a/vendor/memchr/src/memmem/wasm.rs b/vendor/memchr/src/memmem/wasm.rs deleted file mode 100644 index 4e3ea98..0000000 --- a/vendor/memchr/src/memmem/wasm.rs +++ /dev/null @@ -1,75 +0,0 @@ -use core::arch::wasm32::v128; - -use crate::memmem::{genericsimd, NeedleInfo}; - -/// A `v128` accelerated vectorized substring search routine that only works on -/// small needles. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(genericsimd::Forward); - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { - if !cfg!(memchr_runtime_simd) { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work. Passing a haystack with a length smaller than this will cause - /// `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - self.find_impl(haystack, needle) - } - - /// The implementation of find marked with the appropriate target feature. - #[target_feature(enable = "simd128")] - fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option { - unsafe { genericsimd::fwd_find::(&self.0, haystack, needle) } - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/memmem/x86/avx.rs b/vendor/memchr/src/memmem/x86/avx.rs deleted file mode 100644 index ce168dd..0000000 --- a/vendor/memchr/src/memmem/x86/avx.rs +++ /dev/null @@ -1,139 +0,0 @@ -#[cfg(not(feature = "std"))] -pub(crate) use self::nostd::Forward; -#[cfg(feature = "std")] -pub(crate) use self::std::Forward; - -#[cfg(feature = "std")] -mod std { - use core::arch::x86_64::{__m128i, __m256i}; - - use crate::memmem::{genericsimd, NeedleInfo}; - - /// An AVX accelerated vectorized substring search routine that only works - /// on small needles. - #[derive(Clone, Copy, Debug)] - pub(crate) struct Forward(genericsimd::Forward); - - impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new( - ninfo: &NeedleInfo, - needle: &[u8], - ) -> Option { - if !cfg!(memchr_runtime_avx) || !is_x86_feature_detected!("avx2") { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this - /// searcher to work. Passing a haystack with a length smaller than - /// this will cause `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::<__m128i>() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - // SAFETY: The only way a Forward value can exist is if the avx2 - // target feature is enabled. This is the only safety requirement - // for calling the genericsimd searcher. - unsafe { self.find_impl(haystack, needle) } - } - - /// The implementation of find marked with the appropriate target - /// feature. - /// - /// # Safety - /// - /// Callers must ensure that the avx2 CPU feature is enabled in the - /// current environment. - #[target_feature(enable = "avx2")] - unsafe fn find_impl( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - if haystack.len() < self.0.min_haystack_len::<__m256i>() { - genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) - } else { - genericsimd::fwd_find::<__m256i>(&self.0, haystack, needle) - } - } - } -} - -// We still define the avx "forward" type on nostd to make caller code a bit -// simpler. This avoids needing a lot more conditional compilation. -#[cfg(not(feature = "std"))] -mod nostd { - use crate::memmem::NeedleInfo; - - #[derive(Clone, Copy, Debug)] - pub(crate) struct Forward(()); - - impl Forward { - pub(crate) fn new( - ninfo: &NeedleInfo, - needle: &[u8], - ) -> Option { - None - } - - pub(crate) fn min_haystack_len(&self) -> usize { - unreachable!() - } - - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - unreachable!() - } - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - if !is_x86_feature_detected!("avx2") { - return; - } - // SAFETY: The safety of find only requires that the current CPU - // support AVX2, which we checked above. - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/memmem/x86/mod.rs b/vendor/memchr/src/memmem/x86/mod.rs deleted file mode 100644 index c1cc73f..0000000 --- a/vendor/memchr/src/memmem/x86/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod avx; -pub(crate) mod sse; diff --git a/vendor/memchr/src/memmem/x86/sse.rs b/vendor/memchr/src/memmem/x86/sse.rs deleted file mode 100644 index 22e7d99..0000000 --- a/vendor/memchr/src/memmem/x86/sse.rs +++ /dev/null @@ -1,89 +0,0 @@ -use core::arch::x86_64::__m128i; - -use crate::memmem::{genericsimd, NeedleInfo}; - -/// An SSE accelerated vectorized substring search routine that only works on -/// small needles. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(genericsimd::Forward); - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { - if !cfg!(memchr_runtime_sse2) { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work. Passing a haystack with a length smaller than this will cause - /// `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::<__m128i>() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - // SAFETY: sse2 is enabled on all x86_64 targets, so this is always - // safe to call. - unsafe { self.find_impl(haystack, needle) } - } - - /// The implementation of find marked with the appropriate target feature. - /// - /// # Safety - /// - /// This is safe to call in all cases since sse2 is guaranteed to be part - /// of x86_64. It is marked as unsafe because of the target feature - /// attribute. - #[target_feature(enable = "sse2")] - unsafe fn find_impl( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - // SAFETY: sse2 is enabled on all x86_64 targets, so this is always - // safe to call. - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/tests/memchr/iter.rs b/vendor/memchr/src/tests/memchr/iter.rs deleted file mode 100644 index 80ea5c2..0000000 --- a/vendor/memchr/src/tests/memchr/iter.rs +++ /dev/null @@ -1,230 +0,0 @@ -use quickcheck::quickcheck; - -use crate::{tests::memchr::testdata::memchr_tests, Memchr, Memchr2, Memchr3}; - -#[test] -fn memchr1_iter() { - for test in memchr_tests() { - test.iter_one(false, Memchr::new); - } -} - -#[test] -fn memchr2_iter() { - for test in memchr_tests() { - test.iter_two(false, Memchr2::new); - } -} - -#[test] -fn memchr3_iter() { - for test in memchr_tests() { - test.iter_three(false, Memchr3::new); - } -} - -#[test] -fn memrchr1_iter() { - for test in memchr_tests() { - test.iter_one(true, |n1, corpus| Memchr::new(n1, corpus).rev()); - } -} - -#[test] -fn memrchr2_iter() { - for test in memchr_tests() { - test.iter_two(true, |n1, n2, corpus| { - Memchr2::new(n1, n2, corpus).rev() - }) - } -} - -#[test] -fn memrchr3_iter() { - for test in memchr_tests() { - test.iter_three(true, |n1, n2, n3, corpus| { - Memchr3::new(n1, n2, n3, corpus).rev() - }) - } -} - -quickcheck! { - fn qc_memchr_double_ended_iter( - needle: u8, data: Vec, take_side: Vec - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr::new(needle, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found.iter().cloned().eq(positions1(needle, &data)) - } - - fn qc_memchr2_double_ended_iter( - needle1: u8, needle2: u8, data: Vec, take_side: Vec - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr2::new(needle1, needle2, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found.iter().cloned().eq(positions2(needle1, needle2, &data)) - } - - fn qc_memchr3_double_ended_iter( - needle1: u8, needle2: u8, needle3: u8, - data: Vec, take_side: Vec - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr3::new(needle1, needle2, needle3, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found - .iter() - .cloned() - .eq(positions3(needle1, needle2, needle3, &data)) - } - - fn qc_memchr1_iter(data: Vec) -> bool { - let needle = 0; - let answer = positions1(needle, &data); - answer.eq(Memchr::new(needle, &data)) - } - - fn qc_memchr1_rev_iter(data: Vec) -> bool { - let needle = 0; - let answer = positions1(needle, &data); - answer.rev().eq(Memchr::new(needle, &data).rev()) - } - - fn qc_memchr2_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let answer = positions2(needle1, needle2, &data); - answer.eq(Memchr2::new(needle1, needle2, &data)) - } - - fn qc_memchr2_rev_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let answer = positions2(needle1, needle2, &data); - answer.rev().eq(Memchr2::new(needle1, needle2, &data).rev()) - } - - fn qc_memchr3_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let needle3 = 2; - let answer = positions3(needle1, needle2, needle3, &data); - answer.eq(Memchr3::new(needle1, needle2, needle3, &data)) - } - - fn qc_memchr3_rev_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let needle3 = 2; - let answer = positions3(needle1, needle2, needle3, &data); - answer.rev().eq(Memchr3::new(needle1, needle2, needle3, &data).rev()) - } - - fn qc_memchr1_iter_size_hint(data: Vec) -> bool { - // test that the size hint is within reasonable bounds - let needle = 0; - let mut iter = Memchr::new(needle, &data); - let mut real_count = data - .iter() - .filter(|&&elt| elt == needle) - .count(); - - while let Some(index) = iter.next() { - real_count -= 1; - let (lower, upper) = iter.size_hint(); - assert!(lower <= real_count); - assert!(upper.unwrap() >= real_count); - assert!(upper.unwrap() <= data.len() - index); - } - true - } -} - -// take items from a DEI, taking front for each true and back for each false. -// Return a vector with the concatenation of the fronts and the reverse of the -// backs. -fn double_ended_take(mut iter: I, take_side: J) -> Vec -where - I: DoubleEndedIterator, - J: Iterator, -{ - let mut found_front = Vec::new(); - let mut found_back = Vec::new(); - - for take_front in take_side { - if take_front { - if let Some(pos) = iter.next() { - found_front.push(pos); - } else { - break; - } - } else { - if let Some(pos) = iter.next_back() { - found_back.push(pos); - } else { - break; - } - }; - } - - let mut all_found = found_front; - all_found.extend(found_back.into_iter().rev()); - all_found -} - -// return an iterator of the 0-based indices of haystack that match the needle -fn positions1<'a>( - n1: u8, - haystack: &'a [u8], -) -> Box + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1) - .map(|t| t.0); - Box::new(it) -} - -fn positions2<'a>( - n1: u8, - n2: u8, - haystack: &'a [u8], -) -> Box + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1 || b == n2) - .map(|t| t.0); - Box::new(it) -} - -fn positions3<'a>( - n1: u8, - n2: u8, - n3: u8, - haystack: &'a [u8], -) -> Box + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) - .map(|t| t.0); - Box::new(it) -} diff --git a/vendor/memchr/src/tests/memchr/memchr.rs b/vendor/memchr/src/tests/memchr/memchr.rs deleted file mode 100644 index ac955ed..0000000 --- a/vendor/memchr/src/tests/memchr/memchr.rs +++ /dev/null @@ -1,134 +0,0 @@ -use quickcheck::quickcheck; - -use crate::{ - memchr, - memchr::{fallback, naive}, - memchr2, memchr3, memrchr, memrchr2, memrchr3, - tests::memchr::testdata::memchr_tests, -}; - -#[test] -fn memchr1_find() { - for test in memchr_tests() { - test.one(false, memchr); - } -} - -#[test] -fn memchr1_fallback_find() { - for test in memchr_tests() { - test.one(false, fallback::memchr); - } -} - -#[test] -fn memchr2_find() { - for test in memchr_tests() { - test.two(false, memchr2); - } -} - -#[test] -fn memchr2_fallback_find() { - for test in memchr_tests() { - test.two(false, fallback::memchr2); - } -} - -#[test] -fn memchr3_find() { - for test in memchr_tests() { - test.three(false, memchr3); - } -} - -#[test] -fn memchr3_fallback_find() { - for test in memchr_tests() { - test.three(false, fallback::memchr3); - } -} - -#[test] -fn memrchr1_find() { - for test in memchr_tests() { - test.one(true, memrchr); - } -} - -#[test] -fn memrchr1_fallback_find() { - for test in memchr_tests() { - test.one(true, fallback::memrchr); - } -} - -#[test] -fn memrchr2_find() { - for test in memchr_tests() { - test.two(true, memrchr2); - } -} - -#[test] -fn memrchr2_fallback_find() { - for test in memchr_tests() { - test.two(true, fallback::memrchr2); - } -} - -#[test] -fn memrchr3_find() { - for test in memchr_tests() { - test.three(true, memrchr3); - } -} - -#[test] -fn memrchr3_fallback_find() { - for test in memchr_tests() { - test.three(true, fallback::memrchr3); - } -} - -quickcheck! { - fn qc_memchr1_matches_naive(n1: u8, corpus: Vec) -> bool { - memchr(n1, &corpus) == naive::memchr(n1, &corpus) - } -} - -quickcheck! { - fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> bool { - memchr2(n1, n2, &corpus) == naive::memchr2(n1, n2, &corpus) - } -} - -quickcheck! { - fn qc_memchr3_matches_naive( - n1: u8, n2: u8, n3: u8, - corpus: Vec - ) -> bool { - memchr3(n1, n2, n3, &corpus) == naive::memchr3(n1, n2, n3, &corpus) - } -} - -quickcheck! { - fn qc_memrchr1_matches_naive(n1: u8, corpus: Vec) -> bool { - memrchr(n1, &corpus) == naive::memrchr(n1, &corpus) - } -} - -quickcheck! { - fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> bool { - memrchr2(n1, n2, &corpus) == naive::memrchr2(n1, n2, &corpus) - } -} - -quickcheck! { - fn qc_memrchr3_matches_naive( - n1: u8, n2: u8, n3: u8, - corpus: Vec - ) -> bool { - memrchr3(n1, n2, n3, &corpus) == naive::memrchr3(n1, n2, n3, &corpus) - } -} diff --git a/vendor/memchr/src/tests/memchr/mod.rs b/vendor/memchr/src/tests/memchr/mod.rs index 79f94ab..0564ad4 100644 --- a/vendor/memchr/src/tests/memchr/mod.rs +++ b/vendor/memchr/src/tests/memchr/mod.rs @@ -1,7 +1,307 @@ -#[cfg(all(feature = "std", not(miri)))] -mod iter; -#[cfg(all(feature = "std", not(miri)))] -mod memchr; -mod simple; -#[cfg(all(feature = "std", not(miri)))] -mod testdata; +use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::ext::Byte; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed { haystack: "a", needles: &[b'a'], positions: &[0] }, + Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] }, + Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] }, + Seed { haystack: "", needles: &[b'a'], positions: &[] }, + Seed { haystack: "z", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zz", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zza", needles: &[b'a'], positions: &[2] }, + Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] }, + Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] }, + Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] }, + Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] }, + Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] }, + Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] }, + Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] }, + Seed { + haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", + needles: &[b'a'], + positions: &[32], + }, + // two needles (applied to memchr2 + memchr3) + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] }, + Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] }, + Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] }, + Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] }, + Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] }, + // three needles (applied to memchr3) + Seed { + haystack: "xyz", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { + haystack: "zxy", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] }, + Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] }, +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + needle_len: usize, +} + +impl Runner { + /// Create a new test runner for forward and reverse byte search + /// implementations. + /// + /// The `needle_len` given must be at most `3` and at least `1`. It + /// corresponds to the number of needle bytes to search for. + pub(crate) fn new(needle_len: usize) -> Runner { + assert!(needle_len >= 1, "needle_len must be at least 1"); + assert!(needle_len <= 3, "needle_len must be at most 3"); + Runner { needle_len } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn forward_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let results = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(results) => results, + }; + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Run all tests in the reverse direction. This panics on the first + /// failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn reverse_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let mut results = match test(t.haystack.as_bytes(), &t.needles) + { + None => continue, + Some(results) => results, + }; + results.reverse(); + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Run all tests as counting tests. This panics on the first failure. + /// + /// That is, this only checks that the number of matches is correct and + /// not whether the offsets of each match are. + pub(crate) fn count_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let got = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(got) => got, + }; + assert_eq!( + t.expected.len(), + got, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Like `Runner::forward`, but for a function that returns only the next + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn forward_oneshot(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + self.forward_iter(move |haystack, needles| { + let mut start = 0; + let mut results = vec![]; + while let Some(i) = test(&haystack[start..], needles)? { + results.push(start + i); + start += i + 1; + } + Some(results) + }) + } + + /// Like `Runner::reverse`, but for a function that returns only the last + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn reverse_oneshot(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + self.reverse_iter(move |haystack, needles| { + let mut end = haystack.len(); + let mut results = vec![]; + while let Some(i) = test(&haystack[..end], needles)? { + results.push(i); + end = i; + } + Some(results) + }) + } +} + +/// A single test for memr?chr{,2,3}. +#[derive(Clone, Debug)] +struct Test { + /// The string to search in. + haystack: String, + /// The needles to look for. + needles: Vec, + /// The offsets that are expected to be found for all needles in the + /// forward direction. + expected: Vec, +} + +impl Test { + fn new(seed: &Seed) -> Test { + Test { + haystack: seed.haystack.to_string(), + needles: seed.needles.to_vec(), + expected: seed.positions.to_vec(), + } + } +} + +/// Data that can be expanded into many memchr tests by padding out the corpus. +#[derive(Clone, Debug)] +struct Seed { + /// The thing to search. We use `&str` instead of `&[u8]` because they + /// are nicer to write in tests, and we don't miss much since memchr + /// doesn't care about UTF-8. + /// + /// Corpora cannot contain either '%' or '#'. We use these bytes when + /// expanding test cases into many test cases, and we assume they are not + /// used. If they are used, `memchr_tests` will panic. + haystack: &'static str, + /// The needles to search for. This is intended to be an alternation of + /// needles. The number of needles may cause this test to be skipped for + /// some memchr variants. For example, a test with 2 needles cannot be used + /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. + /// However, a test with only 1 needle can be used to test all of `memchr`, + /// `memchr2` and `memchr3`. We achieve this by filling in the needles with + /// bytes that we never used in the corpus (such as '#'). + needles: &'static [u8], + /// The positions expected to match for all of the needles. + positions: &'static [usize], +} + +impl Seed { + /// Controls how much we expand the haystack on either side for each test. + /// We lower this on Miri because otherwise running the tests would take + /// forever. + const EXPAND_LEN: usize = { + #[cfg(not(miri))] + { + 515 + } + #[cfg(miri)] + { + 6 + } + }; + + /// Expand this test into many variations of the same test. + /// + /// In particular, this will generate more tests with larger corpus sizes. + /// The expected positions are updated to maintain the integrity of the + /// test. + /// + /// This is important in testing a memchr implementation, because there are + /// often different cases depending on the length of the corpus. + /// + /// Note that we extend the corpus by adding `%` bytes, which we + /// don't otherwise use as a needle. + fn generate(&self) -> impl Iterator { + let mut more = vec![]; + + // Add bytes to the start of the corpus. + for i in 0..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let mut new: String = core::iter::repeat('%').take(i).collect(); + new.push_str(&t.haystack); + t.haystack = new; + t.expected = t.expected.into_iter().map(|p| p + i).collect(); + more.push(t); + } + // Add bytes to the end of the corpus. + for i in 1..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let padding: String = core::iter::repeat('%').take(i).collect(); + t.haystack.push_str(&padding); + more.push(t); + } + + more.into_iter() + } +} diff --git a/vendor/memchr/src/tests/memchr/naive.rs b/vendor/memchr/src/tests/memchr/naive.rs new file mode 100644 index 0000000..6ebcdae --- /dev/null +++ b/vendor/memchr/src/tests/memchr/naive.rs @@ -0,0 +1,33 @@ +pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1) +} + +pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2) +} + +pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) +} + +pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1) +} + +pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2) +} + +pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) +} diff --git a/vendor/memchr/src/tests/memchr/prop.rs b/vendor/memchr/src/tests/memchr/prop.rs new file mode 100644 index 0000000..b988260 --- /dev/null +++ b/vendor/memchr/src/tests/memchr/prop.rs @@ -0,0 +1,321 @@ +#[cfg(miri)] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($($tt:tt)*) => {}; +} + +#[cfg(not(miri))] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($mod:ident) => { + define_memchr_quickcheck!($mod, new); + }; + ($mod:ident, $cons:ident) => { + use alloc::vec::Vec; + + use quickcheck::TestResult; + + use crate::tests::memchr::{ + naive, + prop::{double_ended_take, naive1_iter, naive2_iter, naive3_iter}, + }; + + quickcheck::quickcheck! { + fn qc_memchr_matches_naive(n1: u8, corpus: Vec) -> TestResult { + let expected = naive::memchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr_matches_naive(n1: u8, corpus: Vec) -> TestResult { + let expected = naive::memrchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> TestResult { + let expected = naive::memchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> TestResult { + let expected = naive::memrchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec + ) -> TestResult { + let expected = naive::memchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec + ) -> TestResult { + let expected = naive::memrchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr_double_ended_iter( + needle: u8, data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive1_iter(needle, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr2_double_ended_iter( + needle1: u8, needle2: u8, data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive2_iter(needle1, needle2, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr3_double_ended_iter( + needle1: u8, needle2: u8, needle3: u8, + data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive3_iter(needle1, needle2, needle3, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr1_iter(data: Vec) -> TestResult { + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive1_iter(needle, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_rev_iter(data: Vec) -> TestResult { + let needle = 0; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive1_iter(needle, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive2_iter(needle1, needle2, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_rev_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive2_iter(needle1, needle2, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive3_iter(needle1, needle2, needle3, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_rev_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive3_iter(needle1, needle2, needle3, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_iter_size_hint(data: Vec) -> TestResult { + // test that the size hint is within reasonable bounds + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let mut iter = finder.iter(&data); + let mut real_count = data + .iter() + .filter(|&&elt| elt == needle) + .count(); + + while let Some(index) = iter.next() { + real_count -= 1; + let (lower, upper) = iter.size_hint(); + assert!(lower <= real_count); + assert!(upper.unwrap() >= real_count); + assert!(upper.unwrap() <= data.len() - index); + } + TestResult::passed() + } + } + }; +} + +// take items from a DEI, taking front for each true and back for each false. +// Return a vector with the concatenation of the fronts and the reverse of the +// backs. +#[cfg(not(miri))] +pub(crate) fn double_ended_take( + mut iter: I, + take_side: J, +) -> alloc::vec::Vec +where + I: DoubleEndedIterator, + J: Iterator, +{ + let mut found_front = alloc::vec![]; + let mut found_back = alloc::vec![]; + + for take_front in take_side { + if take_front { + if let Some(pos) = iter.next() { + found_front.push(pos); + } else { + break; + } + } else { + if let Some(pos) = iter.next_back() { + found_back.push(pos); + } else { + break; + } + }; + } + + let mut all_found = found_front; + all_found.extend(found_back.into_iter().rev()); + all_found +} + +// return an iterator of the 0-based indices of haystack that match the needle +#[cfg(not(miri))] +pub(crate) fn naive1_iter<'a>( + n1: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack.iter().enumerate().filter(move |&(_, &b)| b == n1).map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive2_iter<'a>( + n1: u8, + n2: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2) + .map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive3_iter<'a>( + n1: u8, + n2: u8, + n3: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) + .map(|t| t.0) +} diff --git a/vendor/memchr/src/tests/memchr/simple.rs b/vendor/memchr/src/tests/memchr/simple.rs deleted file mode 100644 index bed5b48..0000000 --- a/vendor/memchr/src/tests/memchr/simple.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Simple tests using MIRI. These are intended only to be a simple exercise of -// memchr when tests are run under miri. These are mostly necessary because the -// other tests are far more extensive and take too long to run under miri. -// -// These tests are also run when the 'std' feature is not enabled. - -use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; - -#[test] -fn simple() { - assert_eq!(memchr(b'a', b"abcda"), Some(0)); - assert_eq!(memchr(b'z', b"abcda"), None); - assert_eq!(memchr2(b'a', b'z', b"abcda"), Some(0)); - assert_eq!(memchr2(b'z', b'y', b"abcda"), None); - assert_eq!(memchr3(b'a', b'z', b'b', b"abcda"), Some(0)); - assert_eq!(memchr3(b'z', b'y', b'x', b"abcda"), None); - assert_eq!(memrchr(b'a', b"abcda"), Some(4)); - assert_eq!(memrchr(b'z', b"abcda"), None); - assert_eq!(memrchr2(b'a', b'z', b"abcda"), Some(4)); - assert_eq!(memrchr2(b'z', b'y', b"abcda"), None); - assert_eq!(memrchr3(b'a', b'z', b'b', b"abcda"), Some(4)); - assert_eq!(memrchr3(b'z', b'y', b'x', b"abcda"), None); -} diff --git a/vendor/memchr/src/tests/memchr/testdata.rs b/vendor/memchr/src/tests/memchr/testdata.rs deleted file mode 100644 index 6dda524..0000000 --- a/vendor/memchr/src/tests/memchr/testdata.rs +++ /dev/null @@ -1,351 +0,0 @@ -use std::iter::repeat; - -/// Create a sequence of tests that should be run by memchr implementations. -pub fn memchr_tests() -> Vec { - let mut tests = Vec::new(); - for statict in MEMCHR_TESTS { - assert!(!statict.corpus.contains("%"), "% is not allowed in corpora"); - assert!(!statict.corpus.contains("#"), "# is not allowed in corpora"); - assert!(!statict.needles.contains(&b'%'), "% is an invalid needle"); - assert!(!statict.needles.contains(&b'#'), "# is an invalid needle"); - - let t = MemchrTest { - corpus: statict.corpus.to_string(), - needles: statict.needles.to_vec(), - positions: statict.positions.to_vec(), - }; - tests.push(t.clone()); - tests.extend(t.expand()); - } - tests -} - -/// A set of tests for memchr-like functions. -/// -/// These tests mostly try to cover the short string cases. We cover the longer -/// string cases via the benchmarks (which are tests themselves), via -/// quickcheck tests and via automatic expansion of each test case (by -/// increasing the corpus size). Finally, we cover different alignment cases -/// in the tests by varying the starting point of the slice. -const MEMCHR_TESTS: &[MemchrTestStatic] = &[ - // one needle (applied to memchr + memchr2 + memchr3) - MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] }, - MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] }, - MemchrTestStatic { - corpus: "aaa", - needles: &[b'a'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] }, - MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] }, - MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] }, - MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] }, - MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] }, - MemchrTestStatic { - corpus: "\x00\x00", - needles: &[b'\x00'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "\x00a\x00", - needles: &[b'\x00'], - positions: &[0, 2], - }, - MemchrTestStatic { - corpus: "zzzzzzzzzzzzzzzza", - needles: &[b'a'], - positions: &[16], - }, - MemchrTestStatic { - corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", - needles: &[b'a'], - positions: &[32], - }, - // two needles (applied to memchr2 + memchr3) - MemchrTestStatic { - corpus: "az", - needles: &[b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "az", - needles: &[b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] }, - MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] }, - MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] }, - MemchrTestStatic { - corpus: "yyyyaz", - needles: &[b'a', b'z'], - positions: &[4, 5], - }, - MemchrTestStatic { - corpus: "yyyyaz", - needles: &[b'z', b'a'], - positions: &[4, 5], - }, - // three needles (applied to memchr3) - MemchrTestStatic { - corpus: "xyz", - needles: &[b'x', b'y', b'z'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b'x', b'y', b'z'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b'x', b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b't', b'a', b'z'], - positions: &[0], - }, - MemchrTestStatic { - corpus: "yxz", - needles: &[b't', b'a', b'z'], - positions: &[2], - }, -]; - -/// A description of a test on a memchr like function. -#[derive(Clone, Debug)] -pub struct MemchrTest { - /// The thing to search. We use `&str` instead of `&[u8]` because they - /// are nicer to write in tests, and we don't miss much since memchr - /// doesn't care about UTF-8. - /// - /// Corpora cannot contain either '%' or '#'. We use these bytes when - /// expanding test cases into many test cases, and we assume they are not - /// used. If they are used, `memchr_tests` will panic. - corpus: String, - /// The needles to search for. This is intended to be an "alternation" of - /// needles. The number of needles may cause this test to be skipped for - /// some memchr variants. For example, a test with 2 needles cannot be used - /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. - /// However, a test with only 1 needle can be used to test all of `memchr`, - /// `memchr2` and `memchr3`. We achieve this by filling in the needles with - /// bytes that we never used in the corpus (such as '#'). - needles: Vec, - /// The positions expected to match for all of the needles. - positions: Vec, -} - -/// Like MemchrTest, but easier to define as a constant. -#[derive(Clone, Debug)] -pub struct MemchrTestStatic { - corpus: &'static str, - needles: &'static [u8], - positions: &'static [usize], -} - -impl MemchrTest { - pub fn one Option>(&self, reverse: bool, f: F) { - let needles = match self.needles(1) { - None => return, - Some(needles) => needles, - }; - // We test different alignments here. Since some implementations use - // AVX2, which can read 32 bytes at a time, we test at least that. - // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128 - // (avx) bytes at a time, so we include that in our offsets as well. - // - // You might think this would cause most needles to not be found, but - // we actually expand our tests to include corpus sizes all the way up - // to >500 bytes, so we should exercise most branches. - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], corpus.as_bytes()), - "search for {:?} failed in: {:?} (len: {}, alignment: {})", - needles[0] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn two Option>( - &self, - reverse: bool, - f: F, - ) { - let needles = match self.needles(2) { - None => return, - Some(needles) => needles, - }; - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], needles[1], corpus.as_bytes()), - "search for {:?}|{:?} failed in: {:?} \ - (len: {}, alignment: {})", - needles[0] as char, - needles[1] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn three Option>( - &self, - reverse: bool, - f: F, - ) { - let needles = match self.needles(3) { - None => return, - Some(needles) => needles, - }; - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], needles[1], needles[2], corpus.as_bytes()), - "search for {:?}|{:?}|{:?} failed in: {:?} \ - (len: {}, alignment: {})", - needles[0] as char, - needles[1] as char, - needles[2] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, &'a [u8]) -> I, - I: Iterator, - { - if let Some(ns) = self.needles(1) { - self.iter(reverse, f(ns[0], self.corpus.as_bytes())); - } - } - - pub fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, u8, &'a [u8]) -> I, - I: Iterator, - { - if let Some(ns) = self.needles(2) { - self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes())); - } - } - - pub fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, u8, u8, &'a [u8]) -> I, - I: Iterator, - { - if let Some(ns) = self.needles(3) { - self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes())); - } - } - - /// Test that the positions yielded by the given iterator match the - /// positions in this test. If reverse is true, then reverse the positions - /// before comparing them. - fn iter>(&self, reverse: bool, it: I) { - assert_eq!( - self.positions(0, reverse), - it.collect::>(), - r"search for {:?} failed in: {:?}", - self.needles.iter().map(|&b| b as char).collect::>(), - self.corpus - ); - } - - /// Expand this test into many variations of the same test. - /// - /// In particular, this will generate more tests with larger corpus sizes. - /// The expected positions are updated to maintain the integrity of the - /// test. - /// - /// This is important in testing a memchr implementation, because there are - /// often different cases depending on the length of the corpus. - /// - /// Note that we extend the corpus by adding `%` bytes, which we - /// don't otherwise use as a needle. - fn expand(&self) -> Vec { - let mut more = Vec::new(); - - // Add bytes to the start of the corpus. - for i in 1..515 { - let mut t = self.clone(); - let mut new_corpus: String = repeat('%').take(i).collect(); - new_corpus.push_str(&t.corpus); - t.corpus = new_corpus; - t.positions = t.positions.into_iter().map(|p| p + i).collect(); - more.push(t); - } - // Add bytes to the end of the corpus. - for i in 1..515 { - let mut t = self.clone(); - let padding: String = repeat('%').take(i).collect(); - t.corpus.push_str(&padding); - more.push(t); - } - - more - } - - /// Return the corpus at the given alignment. - /// - /// If the alignment exceeds the length of the corpus, then this returns - /// an empty slice. - fn corpus(&self, align: usize) -> &str { - self.corpus.get(align..).unwrap_or("") - } - - /// Return exactly `count` needles from this test. If this test has less - /// than `count` needles, then add `#` until the number of needles - /// matches `count`. If this test has more than `count` needles, then - /// return `None` (because there is no way to use this test data for a - /// search using fewer needles). - fn needles(&self, count: usize) -> Option> { - if self.needles.len() > count { - return None; - } - - let mut needles = self.needles.to_vec(); - for _ in needles.len()..count { - // we assume # is never used in tests. - needles.push(b'#'); - } - Some(needles) - } - - /// Return the positions in this test, reversed if `reverse` is true. - /// - /// If alignment is given, then all positions greater than or equal to that - /// alignment are offset by the alignment. Positions less than the - /// alignment are dropped. - fn positions(&self, align: usize, reverse: bool) -> Vec { - let positions = if reverse { - let mut positions = self.positions.to_vec(); - positions.reverse(); - positions - } else { - self.positions.to_vec() - }; - positions - .into_iter() - .filter(|&p| p >= align) - .map(|p| p - align) - .collect() - } -} diff --git a/vendor/memchr/src/tests/mod.rs b/vendor/memchr/src/tests/mod.rs index f4d406c..259b678 100644 --- a/vendor/memchr/src/tests/mod.rs +++ b/vendor/memchr/src/tests/mod.rs @@ -1,15 +1,15 @@ -mod memchr; +#[macro_use] +pub(crate) mod memchr; +pub(crate) mod packedpair; +#[macro_use] +pub(crate) mod substring; // For debugging, particularly in CI, print out the byte order of the current // target. -#[cfg(all(feature = "std", target_endian = "little"))] #[test] fn byte_order() { - eprintln!("LITTLE ENDIAN"); -} - -#[cfg(all(feature = "std", target_endian = "big"))] -#[test] -fn byte_order() { - eprintln!("BIG ENDIAN"); + #[cfg(target_endian = "little")] + std::eprintln!("LITTLE ENDIAN"); + #[cfg(target_endian = "big")] + std::eprintln!("BIG ENDIAN"); } diff --git a/vendor/memchr/src/tests/packedpair.rs b/vendor/memchr/src/tests/packedpair.rs new file mode 100644 index 0000000..204635b --- /dev/null +++ b/vendor/memchr/src/tests/packedpair.rs @@ -0,0 +1,216 @@ +use alloc::{boxed::Box, vec, vec::Vec}; + +/// A set of "packed pair" test seeds. Each seed serves as the base for the +/// generation of many other tests. In essence, the seed captures the pair of +/// bytes we used for a predicate and first byte among our needle. The tests +/// generated from each seed essentially vary the length of the needle and +/// haystack, while using the rare/first byte configuration from the seed. +/// +/// The purpose of this is to test many different needle/haystack lengths. +/// In particular, some of the vector optimizations might only have bugs +/// in haystacks of a certain size. +const SEEDS: &[Seed] = &[ + // Why not use different 'first' bytes? It seemed like a good idea to be + // able to configure it, but when I wrote the test generator below, it + // didn't seem necessary to use for reasons that I forget. + Seed { first: b'x', index1: b'y', index2: b'z' }, + Seed { first: b'x', index1: b'x', index2: b'z' }, + Seed { first: b'x', index1: b'y', index2: b'x' }, + Seed { first: b'x', index1: b'x', index2: b'x' }, + Seed { first: b'x', index1: b'y', index2: b'y' }, +]; + +/// Runs a host of "packed pair" search tests. +/// +/// These tests specifically look for the occurrence of a possible substring +/// match based on a pair of bytes matching at the right offsets. +pub(crate) struct Runner { + fwd: Option< + Box< + dyn FnMut(&[u8], &[u8], u8, u8) -> Option> + 'static, + >, + >, +} + +impl Runner { + /// Create a new test runner for "packed pair" substring search. + pub(crate) fn new() -> Runner { + Runner { fwd: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(&t.haystack, &t.needle, t.index1, t.index2) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}, \ + index1: {:?}, index2: {:?}", + t.needle, t.haystack, t.index1, t.index2, + ) + } + } + } + } + } + } + + /// Set the implementation for forward "packed pair" substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward "packed pair" search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8], u8, u8) -> Option> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } +} + +/// A test that represents the input and expected output to a "packed pair" +/// search function. The test should be able to run with any "packed pair" +/// implementation and get the expected output. +struct Test { + haystack: Vec, + needle: Vec, + index1: u8, + index2: u8, + fwd: Option, +} + +impl Test { + /// Create a new "packed pair" test from a seed and some given offsets to + /// the pair of bytes to use as a predicate in the seed's needle. + /// + /// If a valid test could not be constructed, then None is returned. + /// (Currently, we take the approach of massaging tests to be valid + /// instead of rejecting them outright.) + fn new( + seed: Seed, + index1: usize, + index2: usize, + haystack_len: usize, + needle_len: usize, + fwd: Option, + ) -> Option { + let mut index1: u8 = index1.try_into().unwrap(); + let mut index2: u8 = index2.try_into().unwrap(); + // The '#' byte is never used in a haystack (unless we're expecting + // a match), while the '@' byte is never used in a needle. + let mut haystack = vec![b'@'; haystack_len]; + let mut needle = vec![b'#'; needle_len]; + needle[0] = seed.first; + needle[index1 as usize] = seed.index1; + needle[index2 as usize] = seed.index2; + // If we're expecting a match, then make sure the needle occurs + // in the haystack at the expected position. + if let Some(i) = fwd { + haystack[i..i + needle.len()].copy_from_slice(&needle); + } + // If the operations above lead to rare offsets pointing to the + // non-first occurrence of a byte, then adjust it. This might lead + // to redundant tests, but it's simpler than trying to change the + // generation process I think. + if let Some(i) = crate::memchr(seed.index1, &needle) { + index1 = u8::try_from(i).unwrap(); + } + if let Some(i) = crate::memchr(seed.index2, &needle) { + index2 = u8::try_from(i).unwrap(); + } + Some(Test { haystack, needle, index1, index2, fwd }) + } +} + +/// Data that describes a single prefilter test seed. +#[derive(Clone, Copy)] +struct Seed { + first: u8, + index1: u8, + index2: u8, +} + +impl Seed { + const NEEDLE_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 33 + } + #[cfg(miri)] + { + 5 + } + }; + + const HAYSTACK_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 65 + } + #[cfg(miri)] + { + 8 + } + }; + + /// Generate a series of prefilter tests from this seed. + fn generate(self) -> impl Iterator { + let len_start = 2; + // The iterator below generates *a lot* of tests. The number of + // tests was chosen somewhat empirically to be "bearable" when + // running the test suite. + // + // We use an iterator here because the collective haystacks of all + // these test cases add up to enough memory to OOM a conservative + // sandbox or a small laptop. + (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { + let index_start = len_start - 1; + (index_start..needle_len).flat_map(move |index1| { + (index1..needle_len).flat_map(move |index2| { + (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( + move |haystack_len| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + None, + ) + .into_iter() + .chain( + (0..=(haystack_len - needle_len)).flat_map( + move |output| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + Some(output), + ) + }, + ), + ) + }, + ) + }) + }) + }) + } +} diff --git a/vendor/memchr/src/tests/substring/mod.rs b/vendor/memchr/src/tests/substring/mod.rs new file mode 100644 index 0000000..dd10cbd --- /dev/null +++ b/vendor/memchr/src/tests/substring/mod.rs @@ -0,0 +1,232 @@ +/*! +This module defines tests and test helpers for substring implementations. +*/ + +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, +}; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed::new("", "", Some(0), Some(0)), + Seed::new("", "a", Some(0), Some(1)), + Seed::new("", "ab", Some(0), Some(2)), + Seed::new("", "abc", Some(0), Some(3)), + Seed::new("a", "", None, None), + Seed::new("a", "a", Some(0), Some(0)), + Seed::new("a", "aa", Some(0), Some(1)), + Seed::new("a", "ba", Some(1), Some(1)), + Seed::new("a", "bba", Some(2), Some(2)), + Seed::new("a", "bbba", Some(3), Some(3)), + Seed::new("a", "bbbab", Some(3), Some(3)), + Seed::new("a", "bbbabb", Some(3), Some(3)), + Seed::new("a", "bbbabbb", Some(3), Some(3)), + Seed::new("a", "bbbbbb", None, None), + Seed::new("ab", "", None, None), + Seed::new("ab", "a", None, None), + Seed::new("ab", "b", None, None), + Seed::new("ab", "ab", Some(0), Some(0)), + Seed::new("ab", "aab", Some(1), Some(1)), + Seed::new("ab", "aaab", Some(2), Some(2)), + Seed::new("ab", "abaab", Some(0), Some(3)), + Seed::new("ab", "baaab", Some(3), Some(3)), + Seed::new("ab", "acb", None, None), + Seed::new("ab", "abba", Some(0), Some(0)), + Seed::new("abc", "ab", None, None), + Seed::new("abc", "abc", Some(0), Some(0)), + Seed::new("abc", "abcz", Some(0), Some(0)), + Seed::new("abc", "abczz", Some(0), Some(0)), + Seed::new("abc", "zabc", Some(1), Some(1)), + Seed::new("abc", "zzabc", Some(2), Some(2)), + Seed::new("abc", "azbc", None, None), + Seed::new("abc", "abzc", None, None), + Seed::new("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), + Seed::new("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), + Seed::new( + "xyz", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", + Some(32), + Some(32), + ), + Seed::new("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), + Seed::new("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + fwd: Option< + Box Option> + 'static>, + >, + rev: Option< + Box Option> + 'static>, + >, +} + +impl Runner { + /// Create a new test runner for forward and reverse substring search + /// implementations. + pub(crate) fn new() -> Runner { + Runner { fwd: None, rev: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + if let Some(mut rev) = self.rev { + for seed in SEEDS.iter() { + for t in seed.generate() { + match rev(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.rev, result, + "REVERSE, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + } + + /// Set the implementation for forward substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward substring search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } + + /// Set the implementation for reverse substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then reverse substring search is not tested. + pub(crate) fn rev( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option> + 'static, + ) -> Runner { + self.rev = Some(Box::new(search)); + self + } +} + +/// A single substring test for forward and reverse searches. +#[derive(Clone, Debug)] +struct Test { + needle: String, + haystack: String, + fwd: Option, + rev: Option, +} + +/// A single substring test for forward and reverse searches. +/// +/// Each seed is valid on its own, but it also serves as a starting point +/// to generate more tests. Namely, we pad out the haystacks with other +/// characters so that we get more complete coverage. This is especially useful +/// for testing vector algorithms that tend to have weird special cases for +/// alignment and loop unrolling. +/// +/// Padding works by assuming certain characters never otherwise appear in a +/// needle or a haystack. Neither should contain a `#` character. +#[derive(Clone, Copy, Debug)] +struct Seed { + needle: &'static str, + haystack: &'static str, + fwd: Option, + rev: Option, +} + +impl Seed { + const MAX_PAD: usize = 34; + + const fn new( + needle: &'static str, + haystack: &'static str, + fwd: Option, + rev: Option, + ) -> Seed { + Seed { needle, haystack, fwd, rev } + } + + fn generate(self) -> impl Iterator { + assert!(!self.needle.contains('#'), "needle must not contain '#'"); + assert!(!self.haystack.contains('#'), "haystack must not contain '#'"); + (0..=Seed::MAX_PAD) + // Generate tests for padding at the beginning of haystack. + .map(move |pad| { + let needle = self.needle.to_string(); + let prefix = "#".repeat(pad); + let haystack = format!("{}{}", prefix, self.haystack); + let fwd = if needle.is_empty() { + Some(0) + } else { + self.fwd.map(|i| pad + i) + }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev.map(|i| pad + i) + }; + Test { needle, haystack, fwd, rev } + }) + // Generate tests for padding at the end of haystack. + .chain((1..=Seed::MAX_PAD).map(move |pad| { + let needle = self.needle.to_string(); + let suffix = "#".repeat(pad); + let haystack = format!("{}{}", self.haystack, suffix); + let fwd = if needle.is_empty() { Some(0) } else { self.fwd }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev + }; + Test { needle, haystack, fwd, rev } + })) + } +} diff --git a/vendor/memchr/src/tests/substring/naive.rs b/vendor/memchr/src/tests/substring/naive.rs new file mode 100644 index 0000000..1bc6009 --- /dev/null +++ b/vendor/memchr/src/tests/substring/naive.rs @@ -0,0 +1,45 @@ +/*! +This module defines "naive" implementations of substring search. + +These are sometimes useful to compare with "real" substring implementations. +The idea is that they are so simple that they are unlikely to be incorrect. +*/ + +/// Naively search forwards for the given needle in the given haystack. +pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in 0..end { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +/// Naively search in reverse for the given needle in the given haystack. +pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in (0..end).rev() { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +#[cfg(test)] +mod tests { + use crate::tests::substring; + + use super::*; + + #[test] + fn forward() { + substring::Runner::new().fwd(|h, n| Some(find(h, n))).run() + } + + #[test] + fn reverse() { + substring::Runner::new().rev(|h, n| Some(rfind(h, n))).run() + } +} diff --git a/vendor/memchr/src/tests/substring/prop.rs b/vendor/memchr/src/tests/substring/prop.rs new file mode 100644 index 0000000..a8352ec --- /dev/null +++ b/vendor/memchr/src/tests/substring/prop.rs @@ -0,0 +1,126 @@ +/*! +This module defines a few quickcheck properties for substring search. + +It also provides a forward and reverse macro for conveniently defining +quickcheck tests that run these properties over any substring search +implementation. +*/ + +use crate::tests::substring::naive; + +/// $fwd is a `impl FnMut(haystack, needle) -> Option>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_forward_quickcheck { + ($fwd:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_fwd_prefix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $fwd) + } + + fn qc_fwd_suffix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $fwd) + } + + fn qc_fwd_matches_naive( + haystack: alloc::vec::Vec, + needle: alloc::vec::Vec + ) -> bool { + crate::tests::substring::prop::same_as_naive( + false, + &haystack, + &needle, + $fwd, + ) + } + } + }; +} + +/// $rev is a `impl FnMut(haystack, needle) -> Option>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_reverse_quickcheck { + ($rev:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_rev_prefix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $rev) + } + + fn qc_rev_suffix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $rev) + } + + fn qc_rev_matches_naive( + haystack: alloc::vec::Vec, + needle: alloc::vec::Vec + ) -> bool { + crate::tests::substring::prop::same_as_naive( + true, + &haystack, + &needle, + $rev, + ) + } + } + }; +} + +/// Check that every prefix of the given byte string is a substring. +pub(crate) fn prefix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let prefix = &bs[..i]; + let result = match search(bs, prefix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that every suffix of the given byte string is a substring. +pub(crate) fn suffix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let suffix = &bs[i..]; + let result = match search(bs, suffix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that naive substring search matches the result of the given search +/// algorithm. +pub(crate) fn same_as_naive( + reverse: bool, + haystack: &[u8], + needle: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + let result = match search(haystack, needle) { + None => return true, + Some(result) => result, + }; + if reverse { + result == naive::rfind(haystack, needle) + } else { + result == naive::find(haystack, needle) + } +} diff --git a/vendor/memchr/src/vector.rs b/vendor/memchr/src/vector.rs new file mode 100644 index 0000000..f360176 --- /dev/null +++ b/vendor/memchr/src/vector.rs @@ -0,0 +1,515 @@ +/// A trait for describing vector operations used by vectorized searchers. +/// +/// The trait is highly constrained to low level vector operations needed. +/// In general, it was invented mostly to be generic over x86's __m128i and +/// __m256i types. At time of writing, it also supports wasm and aarch64 +/// 128-bit vector types as well. +/// +/// # Safety +/// +/// All methods are not safe since they are intended to be implemented using +/// vendor intrinsics, which are also not safe. Callers must ensure that the +/// appropriate target features are enabled in the calling function, and that +/// the current CPU supports them. All implementations should avoid marking the +/// routines with #[target_feature] and instead mark them as #[inline(always)] +/// to ensure they get appropriately inlined. (inline(always) cannot be used +/// with target_feature.) +pub(crate) trait Vector: Copy + core::fmt::Debug { + /// The number of bits in the vector. + const BITS: usize; + /// The number of bytes in the vector. That is, this is the size of the + /// vector in memory. + const BYTES: usize; + /// The bits that must be zero in order for a `*const u8` pointer to be + /// correctly aligned to read vector values. + const ALIGN: usize; + + /// The type of the value returned by `Vector::movemask`. + /// + /// This supports abstracting over the specific representation used in + /// order to accommodate different representations in different ISAs. + type Mask: MoveMask; + + /// Create a vector with 8-bit lanes with the given byte repeated into each + /// lane. + unsafe fn splat(byte: u8) -> Self; + + /// Read a vector-size number of bytes from the given pointer. The pointer + /// must be aligned to the size of the vector. + /// + /// # Safety + /// + /// Callers must guarantee that at least `BYTES` bytes are readable from + /// `data` and that `data` is aligned to a `BYTES` boundary. + unsafe fn load_aligned(data: *const u8) -> Self; + + /// Read a vector-size number of bytes from the given pointer. The pointer + /// does not need to be aligned. + /// + /// # Safety + /// + /// Callers must guarantee that at least `BYTES` bytes are readable from + /// `data`. + unsafe fn load_unaligned(data: *const u8) -> Self; + + /// _mm_movemask_epi8 or _mm256_movemask_epi8 + unsafe fn movemask(self) -> Self::Mask; + /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 + unsafe fn cmpeq(self, vector2: Self) -> Self; + /// _mm_and_si128 or _mm256_and_si256 + unsafe fn and(self, vector2: Self) -> Self; + /// _mm_or or _mm256_or_si256 + unsafe fn or(self, vector2: Self) -> Self; + /// Returns true if and only if `Self::movemask` would return a mask that + /// contains at least one non-zero bit. + unsafe fn movemask_will_have_non_zero(self) -> bool { + self.movemask().has_non_zero() + } +} + +/// A trait that abstracts over a vector-to-scalar operation called +/// "move mask." +/// +/// On x86-64, this is `_mm_movemask_epi8` for SSE2 and `_mm256_movemask_epi8` +/// for AVX2. It takes a vector of `u8` lanes and returns a scalar where the +/// `i`th bit is set if and only if the most significant bit in the `i`th lane +/// of the vector is set. The simd128 ISA for wasm32 also supports this +/// exact same operation natively. +/// +/// ... But aarch64 doesn't. So we have to fake it with more instructions and +/// a slightly different representation. We could do extra work to unify the +/// representations, but then would require additional costs in the hot path +/// for `memchr` and `packedpair`. So instead, we abstraction over the specific +/// representation with this trait an ddefine the operations we actually need. +pub(crate) trait MoveMask: Copy + core::fmt::Debug { + /// Return a mask that is all zeros except for the least significant `n` + /// lanes in a corresponding vector. + fn all_zeros_except_least_significant(n: usize) -> Self; + + /// Returns true if and only if this mask has a a non-zero bit anywhere. + fn has_non_zero(self) -> bool; + + /// Returns the number of bits set to 1 in this mask. + fn count_ones(self) -> usize; + + /// Does a bitwise `and` operation between `self` and `other`. + fn and(self, other: Self) -> Self; + + /// Does a bitwise `or` operation between `self` and `other`. + fn or(self, other: Self) -> Self; + + /// Returns a mask that is equivalent to `self` but with the least + /// significant 1-bit set to 0. + fn clear_least_significant_bit(self) -> Self; + + /// Returns the offset of the first non-zero lane this mask represents. + fn first_offset(self) -> usize; + + /// Returns the offset of the last non-zero lane this mask represents. + fn last_offset(self) -> usize; +} + +/// This is a "sensible" movemask implementation where each bit represents +/// whether the most significant bit is set in each corresponding lane of a +/// vector. This is used on x86-64 and wasm, but such a mask is more expensive +/// to get on aarch64 so we use something a little different. +/// +/// We call this "sensible" because this is what we get using native sse/avx +/// movemask instructions. But neon has no such native equivalent. +#[derive(Clone, Copy, Debug)] +pub(crate) struct SensibleMoveMask(u32); + +impl SensibleMoveMask { + /// Get the mask in a form suitable for computing offsets. + /// + /// Basically, this normalizes to little endian. On big endian, this swaps + /// the bytes. + #[inline(always)] + fn get_for_offset(self) -> u32 { + #[cfg(target_endian = "big")] + { + self.0.swap_bytes() + } + #[cfg(target_endian = "little")] + { + self.0 + } + } +} + +impl MoveMask for SensibleMoveMask { + #[inline(always)] + fn all_zeros_except_least_significant(n: usize) -> SensibleMoveMask { + debug_assert!(n < 32); + SensibleMoveMask(!((1 << n) - 1)) + } + + #[inline(always)] + fn has_non_zero(self) -> bool { + self.0 != 0 + } + + #[inline(always)] + fn count_ones(self) -> usize { + self.0.count_ones() as usize + } + + #[inline(always)] + fn and(self, other: SensibleMoveMask) -> SensibleMoveMask { + SensibleMoveMask(self.0 & other.0) + } + + #[inline(always)] + fn or(self, other: SensibleMoveMask) -> SensibleMoveMask { + SensibleMoveMask(self.0 | other.0) + } + + #[inline(always)] + fn clear_least_significant_bit(self) -> SensibleMoveMask { + SensibleMoveMask(self.0 & (self.0 - 1)) + } + + #[inline(always)] + fn first_offset(self) -> usize { + // We are dealing with little endian here (and if we aren't, we swap + // the bytes so we are in practice), where the most significant byte + // is at a higher address. That means the least significant bit that + // is set corresponds to the position of our first matching byte. + // That position corresponds to the number of zeros after the least + // significant bit. + self.get_for_offset().trailing_zeros() as usize + } + + #[inline(always)] + fn last_offset(self) -> usize { + // We are dealing with little endian here (and if we aren't, we swap + // the bytes so we are in practice), where the most significant byte is + // at a higher address. That means the most significant bit that is set + // corresponds to the position of our last matching byte. The position + // from the end of the mask is therefore the number of leading zeros + // in a 32 bit integer, and the position from the start of the mask is + // therefore 32 - (leading zeros) - 1. + 32 - self.get_for_offset().leading_zeros() as usize - 1 + } +} + +#[cfg(target_arch = "x86_64")] +mod x86sse2 { + use core::arch::x86_64::*; + + use super::{SensibleMoveMask, Vector}; + + impl Vector for __m128i { + const BITS: usize = 128; + const BYTES: usize = 16; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = SensibleMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m128i { + _mm_set1_epi8(byte as i8) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> __m128i { + _mm_load_si128(data as *const __m128i) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m128i { + _mm_loadu_si128(data as *const __m128i) + } + + #[inline(always)] + unsafe fn movemask(self) -> SensibleMoveMask { + SensibleMoveMask(_mm_movemask_epi8(self) as u32) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m128i { + _mm_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m128i { + _mm_and_si128(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m128i { + _mm_or_si128(self, vector2) + } + } +} + +#[cfg(target_arch = "x86_64")] +mod x86avx2 { + use core::arch::x86_64::*; + + use super::{SensibleMoveMask, Vector}; + + impl Vector for __m256i { + const BITS: usize = 256; + const BYTES: usize = 32; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = SensibleMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m256i { + _mm256_set1_epi8(byte as i8) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> __m256i { + _mm256_load_si256(data as *const __m256i) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m256i { + _mm256_loadu_si256(data as *const __m256i) + } + + #[inline(always)] + unsafe fn movemask(self) -> SensibleMoveMask { + SensibleMoveMask(_mm256_movemask_epi8(self) as u32) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m256i { + _mm256_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m256i { + _mm256_and_si256(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m256i { + _mm256_or_si256(self, vector2) + } + } +} + +#[cfg(target_arch = "aarch64")] +mod aarch64neon { + use core::arch::aarch64::*; + + use super::{MoveMask, Vector}; + + impl Vector for uint8x16_t { + const BITS: usize = 128; + const BYTES: usize = 16; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = NeonMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> uint8x16_t { + vdupq_n_u8(byte) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> uint8x16_t { + // I've tried `data.cast::().read()` instead, but + // couldn't observe any benchmark differences. + Self::load_unaligned(data) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> uint8x16_t { + vld1q_u8(data) + } + + #[inline(always)] + unsafe fn movemask(self) -> NeonMoveMask { + let asu16s = vreinterpretq_u16_u8(self); + let mask = vshrn_n_u16(asu16s, 4); + let asu64 = vreinterpret_u64_u8(mask); + let scalar64 = vget_lane_u64(asu64, 0); + NeonMoveMask(scalar64 & 0x8888888888888888) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> uint8x16_t { + vceqq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> uint8x16_t { + vandq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> uint8x16_t { + vorrq_u8(self, vector2) + } + + /// This is the only interesting implementation of this routine. + /// Basically, instead of doing the "shift right narrow" dance, we use + /// adajacent folding max to determine whether there are any non-zero + /// bytes in our mask. If there are, *then* we'll do the "shift right + /// narrow" dance. In benchmarks, this does lead to slightly better + /// throughput, but the win doesn't appear huge. + #[inline(always)] + unsafe fn movemask_will_have_non_zero(self) -> bool { + let low = vreinterpretq_u64_u8(vpmaxq_u8(self, self)); + vgetq_lane_u64(low, 0) != 0 + } + } + + /// Neon doesn't have a `movemask` that works like the one in x86-64, so we + /// wind up using a different method[1]. The different method also produces + /// a mask, but 4 bits are set in the neon case instead of a single bit set + /// in the x86-64 case. We do an extra step to zero out 3 of the 4 bits, + /// but we still wind up with at least 3 zeroes between each set bit. This + /// generally means that we need to do some division by 4 before extracting + /// offsets. + /// + /// In fact, the existence of this type is the entire reason that we have + /// the `MoveMask` trait in the first place. This basically lets us keep + /// the different representations of masks without being forced to unify + /// them into a single representation, which could result in extra and + /// unnecessary work. + /// + /// [1]: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon + #[derive(Clone, Copy, Debug)] + pub(crate) struct NeonMoveMask(u64); + + impl NeonMoveMask { + /// Get the mask in a form suitable for computing offsets. + /// + /// Basically, this normalizes to little endian. On big endian, this + /// swaps the bytes. + #[inline(always)] + fn get_for_offset(self) -> u64 { + #[cfg(target_endian = "big")] + { + self.0.swap_bytes() + } + #[cfg(target_endian = "little")] + { + self.0 + } + } + } + + impl MoveMask for NeonMoveMask { + #[inline(always)] + fn all_zeros_except_least_significant(n: usize) -> NeonMoveMask { + debug_assert!(n < 16); + NeonMoveMask(!(((1 << n) << 2) - 1)) + } + + #[inline(always)] + fn has_non_zero(self) -> bool { + self.0 != 0 + } + + #[inline(always)] + fn count_ones(self) -> usize { + self.0.count_ones() as usize + } + + #[inline(always)] + fn and(self, other: NeonMoveMask) -> NeonMoveMask { + NeonMoveMask(self.0 & other.0) + } + + #[inline(always)] + fn or(self, other: NeonMoveMask) -> NeonMoveMask { + NeonMoveMask(self.0 | other.0) + } + + #[inline(always)] + fn clear_least_significant_bit(self) -> NeonMoveMask { + NeonMoveMask(self.0 & (self.0 - 1)) + } + + #[inline(always)] + fn first_offset(self) -> usize { + // We are dealing with little endian here (and if we aren't, + // we swap the bytes so we are in practice), where the most + // significant byte is at a higher address. That means the least + // significant bit that is set corresponds to the position of our + // first matching byte. That position corresponds to the number of + // zeros after the least significant bit. + // + // Note that unlike `SensibleMoveMask`, this mask has its bits + // spread out over 64 bits instead of 16 bits (for a 128 bit + // vector). Namely, where as x86-64 will turn + // + // 0x00 0xFF 0x00 0x00 0xFF + // + // into 10010, our neon approach will turn it into + // + // 10000000000010000000 + // + // And this happens because neon doesn't have a native `movemask` + // instruction, so we kind of fake it[1]. Thus, we divide the + // number of trailing zeros by 4 to get the "real" offset. + // + // [1]: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon + (self.get_for_offset().trailing_zeros() >> 2) as usize + } + + #[inline(always)] + fn last_offset(self) -> usize { + // See comment in `first_offset` above. This is basically the same, + // but coming from the other direction. + 16 - (self.get_for_offset().leading_zeros() >> 2) as usize - 1 + } + } +} + +#[cfg(target_arch = "wasm32")] +mod wasm_simd128 { + use core::arch::wasm32::*; + + use super::{SensibleMoveMask, Vector}; + + impl Vector for v128 { + const BITS: usize = 128; + const BYTES: usize = 16; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = SensibleMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> v128 { + u8x16_splat(byte) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> v128 { + *data.cast() + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> v128 { + v128_load(data.cast()) + } + + #[inline(always)] + unsafe fn movemask(self) -> SensibleMoveMask { + SensibleMoveMask(u8x16_bitmask(self).into()) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> v128 { + u8x16_eq(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> v128 { + v128_and(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> v128 { + v128_or(self, vector2) + } + } +} diff --git a/vendor/proc-macro2/.cargo-checksum.json b/vendor/proc-macro2/.cargo-checksum.json new file mode 100644 index 0000000..744053b --- /dev/null +++ b/vendor/proc-macro2/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"4a85db50c7866e4eba8673358705982eaf715207ae4b8e16d3f485671fa9f473","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"c609b6865476d6c35879784e9155367a97a0da496aa5c3c61488440a20f59883","build.rs":"8b4facae0d125ca3b437b4f5ebcd6ea3da3fcc65fcfc2cf357ae544423aa4568","build/probe.rs":"827da142d033f027d9f2a52ffdc0a619c7c34a2a280635e38c64fcd46cf7b635","rust-toolchain.toml":"6bbb61302978c736b2da03e4fb40e3beab908f85d533ab46fd541e637b5f3e0f","src/detection.rs":"ed9a5f9a979ab01247d7a68eeb1afa3c13209334c5bfff0f9289cb07e5bb4e8b","src/extra.rs":"d378a9e799e5c49933b067cd38f5364d16a152ef337eef86ce42fdc86005ddf3","src/fallback.rs":"35e46d4fa73175dcf857084e12f5bb7e094481738dcf59a98b1c584552d076bc","src/lib.rs":"d0f6c5e918b827df600cf5e73cf92ca52c16584b4ac7dd96eb63562947d36bd5","src/location.rs":"f55d2e61f1bb1af65e14ed04c9e91eb1ddbf8430e8c05f2048d1cd538d27368e","src/marker.rs":"c8c90351b8ebcf5b11520831b199add628bc613b0f5559260b51a3c4f6406d8a","src/parse.rs":"4b77cddbc2752bc4d38a65acd8b96b6786c5220d19b1e1b37810257b5d24132d","src/rcvec.rs":"1c3c48c4f819927cc445ae15ca3bb06775feff2fd1cb21901ae4c40c7e6b4e82","src/wrapper.rs":"46ae8c6bc87edb04c3b3fc3506234bcca050c309c209b93bc61b2d21235c8362","tests/comments.rs":"31115b3a56c83d93eef2fb4c9566bf4543e302560732986161b98aef504785ed","tests/features.rs":"a86deb8644992a4eb64d9fd493eff16f9cf9c5cb6ade3a634ce0c990cf87d559","tests/marker.rs":"3190ee07dae510251f360db701ce257030f94a479b6689c3a9ef804bd5d8d099","tests/test.rs":"7511be57e097b15403cf36feb858b4aabdc832fac7024571059a559a7e2ed2a0","tests/test_fmt.rs":"b7743b612af65f2c88cbe109d50a093db7aa7e87f9e37bf45b7bbaeb240aa020","tests/test_size.rs":"acf05963c1e62052d769d237b50844a2c59b4182b491231b099a4f74e5456ab0"},"package":"95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"} \ No newline at end of file diff --git a/vendor/proc-macro2/Cargo.toml b/vendor/proc-macro2/Cargo.toml new file mode 100644 index 0000000..280bbdf --- /dev/null +++ b/vendor/proc-macro2/Cargo.toml @@ -0,0 +1,67 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.56" +name = "proc-macro2" +version = "1.0.76" +authors = [ + "David Tolnay ", + "Alex Crichton ", +] +autobenches = false +description = "A substitute implementation of the compiler's `proc_macro` API to decouple token-based libraries from the procedural macro use case." +documentation = "https://docs.rs/proc-macro2" +readme = "README.md" +keywords = [ + "macros", + "syn", +] +categories = ["development-tools::procedural-macro-helpers"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/dtolnay/proc-macro2" + +[package.metadata.docs.rs] +rustc-args = [ + "--cfg", + "procmacro2_semver_exempt", +] +rustdoc-args = [ + "--cfg", + "procmacro2_semver_exempt", + "--cfg", + "doc_cfg", + "--generate-link-to-definition", +] +targets = ["x86_64-unknown-linux-gnu"] + +[package.metadata.playground] +features = ["span-locations"] + +[lib] +doc-scrape-examples = false + +[dependencies.unicode-ident] +version = "1.0" + +[dev-dependencies.quote] +version = "1.0" +default_features = false + +[dev-dependencies.rustversion] +version = "1" + +[features] +default = ["proc-macro"] +nightly = [] +proc-macro = [] +span-locations = [] diff --git a/vendor/proc-macro2/LICENSE-APACHE b/vendor/proc-macro2/LICENSE-APACHE new file mode 100644 index 0000000..1b5ec8b --- /dev/null +++ b/vendor/proc-macro2/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/proc-macro2/LICENSE-MIT b/vendor/proc-macro2/LICENSE-MIT new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/vendor/proc-macro2/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/proc-macro2/README.md b/vendor/proc-macro2/README.md new file mode 100644 index 0000000..3a29ce8 --- /dev/null +++ b/vendor/proc-macro2/README.md @@ -0,0 +1,94 @@ +# proc-macro2 + +[github](https://github.com/dtolnay/proc-macro2) +[crates.io](https://crates.io/crates/proc-macro2) +[docs.rs](https://docs.rs/proc-macro2) +[build status](https://github.com/dtolnay/proc-macro2/actions?query=branch%3Amaster) + +A wrapper around the procedural macro API of the compiler's `proc_macro` crate. +This library serves two purposes: + +- **Bring proc-macro-like functionality to other contexts like build.rs and + main.rs.** Types from `proc_macro` are entirely specific to procedural macros + and cannot ever exist in code outside of a procedural macro. Meanwhile + `proc_macro2` types may exist anywhere including non-macro code. By developing + foundational libraries like [syn] and [quote] against `proc_macro2` rather + than `proc_macro`, the procedural macro ecosystem becomes easily applicable to + many other use cases and we avoid reimplementing non-macro equivalents of + those libraries. + +- **Make procedural macros unit testable.** As a consequence of being specific + to procedural macros, nothing that uses `proc_macro` can be executed from a + unit test. In order for helper libraries or components of a macro to be + testable in isolation, they must be implemented using `proc_macro2`. + +[syn]: https://github.com/dtolnay/syn +[quote]: https://github.com/dtolnay/quote + +## Usage + +```toml +[dependencies] +proc-macro2 = "1.0" +``` + +The skeleton of a typical procedural macro typically looks like this: + +```rust +extern crate proc_macro; + +#[proc_macro_derive(MyDerive)] +pub fn my_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = proc_macro2::TokenStream::from(input); + + let output: proc_macro2::TokenStream = { + /* transform input */ + }; + + proc_macro::TokenStream::from(output) +} +``` + +If parsing with [Syn], you'll use [`parse_macro_input!`] instead to propagate +parse errors correctly back to the compiler when parsing fails. + +[`parse_macro_input!`]: https://docs.rs/syn/2.0/syn/macro.parse_macro_input.html + +## Unstable features + +The default feature set of proc-macro2 tracks the most recent stable compiler +API. Functionality in `proc_macro` that is not yet stable is not exposed by +proc-macro2 by default. + +To opt into the additional APIs available in the most recent nightly compiler, +the `procmacro2_semver_exempt` config flag must be passed to rustc. We will +polyfill those nightly-only APIs back to Rust 1.56.0. As these are unstable APIs +that track the nightly compiler, minor versions of proc-macro2 may make breaking +changes to them at any time. + +``` +RUSTFLAGS='--cfg procmacro2_semver_exempt' cargo build +``` + +Note that this must not only be done for your crate, but for any crate that +depends on your crate. This infectious nature is intentional, as it serves as a +reminder that you are outside of the normal semver guarantees. + +Semver exempt methods are marked as such in the proc-macro2 documentation. + +
+ +#### License + + +Licensed under either of
Apache License, Version +2.0 or MIT license at your option. + + +
+ + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this crate by you, as defined in the Apache-2.0 license, shall +be dual licensed as above, without any additional terms or conditions. + diff --git a/vendor/proc-macro2/build.rs b/vendor/proc-macro2/build.rs new file mode 100644 index 0000000..3347f87 --- /dev/null +++ b/vendor/proc-macro2/build.rs @@ -0,0 +1,202 @@ +// rustc-cfg emitted by the build script: +// +// "wrap_proc_macro" +// Wrap types from libproc_macro rather than polyfilling the whole API. +// Enabled on rustc 1.29+ as long as procmacro2_semver_exempt is not set, +// because we can't emulate the unstable API without emulating everything +// else. Also enabled unconditionally on nightly, in which case the +// procmacro2_semver_exempt surface area is implemented by using the +// nightly-only proc_macro API. +// +// "hygiene" +// Enable Span::mixed_site() and non-dummy behavior of Span::resolved_at +// and Span::located_at. Enabled on Rust 1.45+. +// +// "proc_macro_span" +// Enable non-dummy behavior of Span::start and Span::end methods which +// requires an unstable compiler feature. Enabled when building with +// nightly, unless `-Z allow-feature` in RUSTFLAGS disallows unstable +// features. +// +// "super_unstable" +// Implement the semver exempt API in terms of the nightly-only proc_macro +// API. Enabled when using procmacro2_semver_exempt on a nightly compiler. +// +// "span_locations" +// Provide methods Span::start and Span::end which give the line/column +// location of a token. Enabled by procmacro2_semver_exempt or the +// "span-locations" Cargo cfg. This is behind a cfg because tracking +// location inside spans is a performance hit. +// +// "is_available" +// Use proc_macro::is_available() to detect if the proc macro API is +// available or needs to be polyfilled instead of trying to use the proc +// macro API and catching a panic if it isn't available. Enabled on Rust +// 1.57+. + +use std::env; +use std::ffi::OsString; +use std::path::Path; +use std::process::{self, Command, Stdio}; +use std::str; +use std::u32; + +fn main() { + let rustc = rustc_minor_version().unwrap_or(u32::MAX); + + let docs_rs = env::var_os("DOCS_RS").is_some(); + let semver_exempt = cfg!(procmacro2_semver_exempt) || docs_rs; + if semver_exempt { + // https://github.com/dtolnay/proc-macro2/issues/147 + println!("cargo:rustc-cfg=procmacro2_semver_exempt"); + } + + if semver_exempt || cfg!(feature = "span-locations") { + println!("cargo:rustc-cfg=span_locations"); + } + + if rustc < 57 { + println!("cargo:rustc-cfg=no_is_available"); + } + + if rustc < 66 { + println!("cargo:rustc-cfg=no_source_text"); + } + + if !cfg!(feature = "proc-macro") { + println!("cargo:rerun-if-changed=build.rs"); + return; + } + + println!("cargo:rerun-if-changed=build/probe.rs"); + + let proc_macro_span; + let consider_rustc_bootstrap; + if compile_probe(false) { + // This is a nightly or dev compiler, so it supports unstable features + // regardless of RUSTC_BOOTSTRAP. No need to rerun build script if + // RUSTC_BOOTSTRAP is changed. + proc_macro_span = true; + consider_rustc_bootstrap = false; + } else if let Some(rustc_bootstrap) = env::var_os("RUSTC_BOOTSTRAP") { + if compile_probe(true) { + // This is a stable or beta compiler for which the user has set + // RUSTC_BOOTSTRAP to turn on unstable features. Rerun build script + // if they change it. + proc_macro_span = true; + consider_rustc_bootstrap = true; + } else if rustc_bootstrap == "1" { + // This compiler does not support the proc macro Span API in the + // form that proc-macro2 expects. No need to pay attention to + // RUSTC_BOOTSTRAP. + proc_macro_span = false; + consider_rustc_bootstrap = false; + } else { + // This is a stable or beta compiler for which RUSTC_BOOTSTRAP is + // set to restrict the use of unstable features by this crate. + proc_macro_span = false; + consider_rustc_bootstrap = true; + } + } else { + // Without RUSTC_BOOTSTRAP, this compiler does not support the proc + // macro Span API in the form that proc-macro2 expects, but try again if + // the user turns on unstable features. + proc_macro_span = false; + consider_rustc_bootstrap = true; + } + + if proc_macro_span || !semver_exempt { + println!("cargo:rustc-cfg=wrap_proc_macro"); + } + + if proc_macro_span { + println!("cargo:rustc-cfg=proc_macro_span"); + } + + if semver_exempt && proc_macro_span { + println!("cargo:rustc-cfg=super_unstable"); + } + + if consider_rustc_bootstrap { + println!("cargo:rerun-if-env-changed=RUSTC_BOOTSTRAP"); + } +} + +fn compile_probe(rustc_bootstrap: bool) -> bool { + if env::var_os("RUSTC_STAGE").is_some() { + // We are running inside rustc bootstrap. This is a highly non-standard + // environment with issues such as: + // + // https://github.com/rust-lang/cargo/issues/11138 + // https://github.com/rust-lang/rust/issues/114839 + // + // Let's just not use nightly features here. + return false; + } + + let rustc = cargo_env_var("RUSTC"); + let out_dir = cargo_env_var("OUT_DIR"); + let probefile = Path::new("build").join("probe.rs"); + + // Make sure to pick up Cargo rustc configuration. + let mut cmd = if let Some(wrapper) = env::var_os("RUSTC_WRAPPER") { + let mut cmd = Command::new(wrapper); + // The wrapper's first argument is supposed to be the path to rustc. + cmd.arg(rustc); + cmd + } else { + Command::new(rustc) + }; + + if !rustc_bootstrap { + cmd.env_remove("RUSTC_BOOTSTRAP"); + } + + cmd.stderr(Stdio::null()) + .arg("--edition=2021") + .arg("--crate-name=proc_macro2") + .arg("--crate-type=lib") + .arg("--emit=dep-info,metadata") + .arg("--out-dir") + .arg(out_dir) + .arg(probefile); + + if let Some(target) = env::var_os("TARGET") { + cmd.arg("--target").arg(target); + } + + // If Cargo wants to set RUSTFLAGS, use that. + if let Ok(rustflags) = env::var("CARGO_ENCODED_RUSTFLAGS") { + if !rustflags.is_empty() { + for arg in rustflags.split('\x1f') { + cmd.arg(arg); + } + } + } + + match cmd.status() { + Ok(status) => status.success(), + Err(_) => false, + } +} + +fn rustc_minor_version() -> Option { + let rustc = cargo_env_var("RUSTC"); + let output = Command::new(rustc).arg("--version").output().ok()?; + let version = str::from_utf8(&output.stdout).ok()?; + let mut pieces = version.split('.'); + if pieces.next() != Some("rustc 1") { + return None; + } + pieces.next()?.parse().ok() +} + +fn cargo_env_var(key: &str) -> OsString { + env::var_os(key).unwrap_or_else(|| { + eprintln!( + "Environment variable ${} is not set during execution of build script", + key, + ); + process::exit(1); + }) +} diff --git a/vendor/proc-macro2/build/probe.rs b/vendor/proc-macro2/build/probe.rs new file mode 100644 index 0000000..5afa13a --- /dev/null +++ b/vendor/proc-macro2/build/probe.rs @@ -0,0 +1,21 @@ +// This code exercises the surface area that we expect of Span's unstable API. +// If the current toolchain is able to compile it, then proc-macro2 is able to +// offer these APIs too. + +#![feature(proc_macro_span)] + +extern crate proc_macro; + +use core::ops::RangeBounds; +use proc_macro::{Literal, Span}; + +pub fn join(this: &Span, other: Span) -> Option { + this.join(other) +} + +pub fn subspan>(this: &Literal, range: R) -> Option { + this.subspan(range) +} + +// Include in sccache cache key. +const _: Option<&str> = option_env!("RUSTC_BOOTSTRAP"); diff --git a/vendor/proc-macro2/rust-toolchain.toml b/vendor/proc-macro2/rust-toolchain.toml new file mode 100644 index 0000000..20fe888 --- /dev/null +++ b/vendor/proc-macro2/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +components = ["rust-src"] diff --git a/vendor/proc-macro2/src/detection.rs b/vendor/proc-macro2/src/detection.rs new file mode 100644 index 0000000..beba7b2 --- /dev/null +++ b/vendor/proc-macro2/src/detection.rs @@ -0,0 +1,75 @@ +use core::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Once; + +static WORKS: AtomicUsize = AtomicUsize::new(0); +static INIT: Once = Once::new(); + +pub(crate) fn inside_proc_macro() -> bool { + match WORKS.load(Ordering::Relaxed) { + 1 => return false, + 2 => return true, + _ => {} + } + + INIT.call_once(initialize); + inside_proc_macro() +} + +pub(crate) fn force_fallback() { + WORKS.store(1, Ordering::Relaxed); +} + +pub(crate) fn unforce_fallback() { + initialize(); +} + +#[cfg(not(no_is_available))] +fn initialize() { + let available = proc_macro::is_available(); + WORKS.store(available as usize + 1, Ordering::Relaxed); +} + +// Swap in a null panic hook to avoid printing "thread panicked" to stderr, +// then use catch_unwind to determine whether the compiler's proc_macro is +// working. When proc-macro2 is used from outside of a procedural macro all +// of the proc_macro crate's APIs currently panic. +// +// The Once is to prevent the possibility of this ordering: +// +// thread 1 calls take_hook, gets the user's original hook +// thread 1 calls set_hook with the null hook +// thread 2 calls take_hook, thinks null hook is the original hook +// thread 2 calls set_hook with the null hook +// thread 1 calls set_hook with the actual original hook +// thread 2 calls set_hook with what it thinks is the original hook +// +// in which the user's hook has been lost. +// +// There is still a race condition where a panic in a different thread can +// happen during the interval that the user's original panic hook is +// unregistered such that their hook is incorrectly not called. This is +// sufficiently unlikely and less bad than printing panic messages to stderr +// on correct use of this crate. Maybe there is a libstd feature request +// here. For now, if a user needs to guarantee that this failure mode does +// not occur, they need to call e.g. `proc_macro2::Span::call_site()` from +// the main thread before launching any other threads. +#[cfg(no_is_available)] +fn initialize() { + use std::panic::{self, PanicInfo}; + + type PanicHook = dyn Fn(&PanicInfo) + Sync + Send + 'static; + + let null_hook: Box = Box::new(|_panic_info| { /* ignore */ }); + let sanity_check = &*null_hook as *const PanicHook; + let original_hook = panic::take_hook(); + panic::set_hook(null_hook); + + let works = panic::catch_unwind(proc_macro::Span::call_site).is_ok(); + WORKS.store(works as usize + 1, Ordering::Relaxed); + + let hopefully_null_hook = panic::take_hook(); + panic::set_hook(original_hook); + if sanity_check != &*hopefully_null_hook { + panic!("observed race condition in proc_macro2::inside_proc_macro"); + } +} diff --git a/vendor/proc-macro2/src/extra.rs b/vendor/proc-macro2/src/extra.rs new file mode 100644 index 0000000..4a69d46 --- /dev/null +++ b/vendor/proc-macro2/src/extra.rs @@ -0,0 +1,84 @@ +//! Items which do not have a correspondence to any API in the proc_macro crate, +//! but are necessary to include in proc-macro2. + +use crate::fallback; +use crate::imp; +use crate::marker::Marker; +use crate::Span; +use core::fmt::{self, Debug}; + +/// An object that holds a [`Group`]'s `span_open()` and `span_close()` together +/// (in a more compact representation than holding those 2 spans individually. +/// +/// [`Group`]: crate::Group +#[derive(Copy, Clone)] +pub struct DelimSpan { + inner: DelimSpanEnum, + _marker: Marker, +} + +#[derive(Copy, Clone)] +enum DelimSpanEnum { + #[cfg(wrap_proc_macro)] + Compiler { + join: proc_macro::Span, + open: proc_macro::Span, + close: proc_macro::Span, + }, + Fallback(fallback::Span), +} + +impl DelimSpan { + pub(crate) fn new(group: &imp::Group) -> Self { + #[cfg(wrap_proc_macro)] + let inner = match group { + imp::Group::Compiler(group) => DelimSpanEnum::Compiler { + join: group.span(), + open: group.span_open(), + close: group.span_close(), + }, + imp::Group::Fallback(group) => DelimSpanEnum::Fallback(group.span()), + }; + + #[cfg(not(wrap_proc_macro))] + let inner = DelimSpanEnum::Fallback(group.span()); + + DelimSpan { + inner, + _marker: Marker, + } + } + + /// Returns a span covering the entire delimited group. + pub fn join(&self) -> Span { + match &self.inner { + #[cfg(wrap_proc_macro)] + DelimSpanEnum::Compiler { join, .. } => Span::_new(imp::Span::Compiler(*join)), + DelimSpanEnum::Fallback(span) => Span::_new_fallback(*span), + } + } + + /// Returns a span for the opening punctuation of the group only. + pub fn open(&self) -> Span { + match &self.inner { + #[cfg(wrap_proc_macro)] + DelimSpanEnum::Compiler { open, .. } => Span::_new(imp::Span::Compiler(*open)), + DelimSpanEnum::Fallback(span) => Span::_new_fallback(span.first_byte()), + } + } + + /// Returns a span for the closing punctuation of the group only. + pub fn close(&self) -> Span { + match &self.inner { + #[cfg(wrap_proc_macro)] + DelimSpanEnum::Compiler { close, .. } => Span::_new(imp::Span::Compiler(*close)), + DelimSpanEnum::Fallback(span) => Span::_new_fallback(span.last_byte()), + } + } +} + +impl Debug for DelimSpan { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.join(), f) + } +} diff --git a/vendor/proc-macro2/src/fallback.rs b/vendor/proc-macro2/src/fallback.rs new file mode 100644 index 0000000..7b40427 --- /dev/null +++ b/vendor/proc-macro2/src/fallback.rs @@ -0,0 +1,1143 @@ +#[cfg(span_locations)] +use crate::location::LineColumn; +use crate::parse::{self, Cursor}; +use crate::rcvec::{RcVec, RcVecBuilder, RcVecIntoIter, RcVecMut}; +use crate::{Delimiter, Spacing, TokenTree}; +#[cfg(all(span_locations, not(fuzzing)))] +use alloc::collections::BTreeMap; +#[cfg(all(span_locations, not(fuzzing)))] +use core::cell::RefCell; +#[cfg(span_locations)] +use core::cmp; +use core::fmt::{self, Debug, Display, Write}; +use core::mem::ManuallyDrop; +use core::ops::RangeBounds; +use core::ptr; +use core::str::FromStr; +use std::path::PathBuf; + +/// Force use of proc-macro2's fallback implementation of the API for now, even +/// if the compiler's implementation is available. +pub fn force() { + #[cfg(wrap_proc_macro)] + crate::detection::force_fallback(); +} + +/// Resume using the compiler's implementation of the proc macro API if it is +/// available. +pub fn unforce() { + #[cfg(wrap_proc_macro)] + crate::detection::unforce_fallback(); +} + +#[derive(Clone)] +pub(crate) struct TokenStream { + inner: RcVec, +} + +#[derive(Debug)] +pub(crate) struct LexError { + pub(crate) span: Span, +} + +impl LexError { + pub(crate) fn span(&self) -> Span { + self.span + } + + pub(crate) fn call_site() -> Self { + LexError { + span: Span::call_site(), + } + } +} + +impl TokenStream { + pub fn new() -> Self { + TokenStream { + inner: RcVecBuilder::new().build(), + } + } + + pub fn is_empty(&self) -> bool { + self.inner.len() == 0 + } + + fn take_inner(self) -> RcVecBuilder { + let nodrop = ManuallyDrop::new(self); + unsafe { ptr::read(&nodrop.inner) }.make_owned() + } +} + +fn push_token_from_proc_macro(mut vec: RcVecMut, token: TokenTree) { + // https://github.com/dtolnay/proc-macro2/issues/235 + match token { + TokenTree::Literal(crate::Literal { + #[cfg(wrap_proc_macro)] + inner: crate::imp::Literal::Fallback(literal), + #[cfg(not(wrap_proc_macro))] + inner: literal, + .. + }) if literal.repr.starts_with('-') => { + push_negative_literal(vec, literal); + } + _ => vec.push(token), + } + + #[cold] + fn push_negative_literal(mut vec: RcVecMut, mut literal: Literal) { + literal.repr.remove(0); + let mut punct = crate::Punct::new('-', Spacing::Alone); + punct.set_span(crate::Span::_new_fallback(literal.span)); + vec.push(TokenTree::Punct(punct)); + vec.push(TokenTree::Literal(crate::Literal::_new_fallback(literal))); + } +} + +// Nonrecursive to prevent stack overflow. +impl Drop for TokenStream { + fn drop(&mut self) { + let mut inner = match self.inner.get_mut() { + Some(inner) => inner, + None => return, + }; + while let Some(token) = inner.pop() { + let group = match token { + TokenTree::Group(group) => group.inner, + _ => continue, + }; + #[cfg(wrap_proc_macro)] + let group = match group { + crate::imp::Group::Fallback(group) => group, + crate::imp::Group::Compiler(_) => continue, + }; + inner.extend(group.stream.take_inner()); + } + } +} + +pub(crate) struct TokenStreamBuilder { + inner: RcVecBuilder, +} + +impl TokenStreamBuilder { + pub fn new() -> Self { + TokenStreamBuilder { + inner: RcVecBuilder::new(), + } + } + + pub fn with_capacity(cap: usize) -> Self { + TokenStreamBuilder { + inner: RcVecBuilder::with_capacity(cap), + } + } + + pub fn push_token_from_parser(&mut self, tt: TokenTree) { + self.inner.push(tt); + } + + pub fn build(self) -> TokenStream { + TokenStream { + inner: self.inner.build(), + } + } +} + +#[cfg(span_locations)] +fn get_cursor(src: &str) -> Cursor { + #[cfg(fuzzing)] + return Cursor { rest: src, off: 1 }; + + // Create a dummy file & add it to the source map + #[cfg(not(fuzzing))] + SOURCE_MAP.with(|cm| { + let mut cm = cm.borrow_mut(); + let span = cm.add_file(src); + Cursor { + rest: src, + off: span.lo, + } + }) +} + +#[cfg(not(span_locations))] +fn get_cursor(src: &str) -> Cursor { + Cursor { rest: src } +} + +impl FromStr for TokenStream { + type Err = LexError; + + fn from_str(src: &str) -> Result { + // Create a dummy file & add it to the source map + let mut cursor = get_cursor(src); + + // Strip a byte order mark if present + const BYTE_ORDER_MARK: &str = "\u{feff}"; + if cursor.starts_with(BYTE_ORDER_MARK) { + cursor = cursor.advance(BYTE_ORDER_MARK.len()); + } + + parse::token_stream(cursor) + } +} + +impl Display for LexError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("cannot parse string into token stream") + } +} + +impl Display for TokenStream { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut joint = false; + for (i, tt) in self.inner.iter().enumerate() { + if i != 0 && !joint { + write!(f, " ")?; + } + joint = false; + match tt { + TokenTree::Group(tt) => Display::fmt(tt, f), + TokenTree::Ident(tt) => Display::fmt(tt, f), + TokenTree::Punct(tt) => { + joint = tt.spacing() == Spacing::Joint; + Display::fmt(tt, f) + } + TokenTree::Literal(tt) => Display::fmt(tt, f), + }?; + } + + Ok(()) + } +} + +impl Debug for TokenStream { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("TokenStream ")?; + f.debug_list().entries(self.clone()).finish() + } +} + +#[cfg(feature = "proc-macro")] +impl From for TokenStream { + fn from(inner: proc_macro::TokenStream) -> Self { + inner + .to_string() + .parse() + .expect("compiler token stream parse failed") + } +} + +#[cfg(feature = "proc-macro")] +impl From for proc_macro::TokenStream { + fn from(inner: TokenStream) -> Self { + inner + .to_string() + .parse() + .expect("failed to parse to compiler tokens") + } +} + +impl From for TokenStream { + fn from(tree: TokenTree) -> Self { + let mut stream = RcVecBuilder::new(); + push_token_from_proc_macro(stream.as_mut(), tree); + TokenStream { + inner: stream.build(), + } + } +} + +impl FromIterator for TokenStream { + fn from_iter>(tokens: I) -> Self { + let mut stream = TokenStream::new(); + stream.extend(tokens); + stream + } +} + +impl FromIterator for TokenStream { + fn from_iter>(streams: I) -> Self { + let mut v = RcVecBuilder::new(); + + for stream in streams { + v.extend(stream.take_inner()); + } + + TokenStream { inner: v.build() } + } +} + +impl Extend for TokenStream { + fn extend>(&mut self, tokens: I) { + let mut vec = self.inner.make_mut(); + tokens + .into_iter() + .for_each(|token| push_token_from_proc_macro(vec.as_mut(), token)); + } +} + +impl Extend for TokenStream { + fn extend>(&mut self, streams: I) { + self.inner.make_mut().extend(streams.into_iter().flatten()); + } +} + +pub(crate) type TokenTreeIter = RcVecIntoIter; + +impl IntoIterator for TokenStream { + type Item = TokenTree; + type IntoIter = TokenTreeIter; + + fn into_iter(self) -> TokenTreeIter { + self.take_inner().into_iter() + } +} + +#[derive(Clone, PartialEq, Eq)] +pub(crate) struct SourceFile { + path: PathBuf, +} + +impl SourceFile { + /// Get the path to this source file as a string. + pub fn path(&self) -> PathBuf { + self.path.clone() + } + + pub fn is_real(&self) -> bool { + false + } +} + +impl Debug for SourceFile { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("SourceFile") + .field("path", &self.path()) + .field("is_real", &self.is_real()) + .finish() + } +} + +#[cfg(all(span_locations, not(fuzzing)))] +thread_local! { + static SOURCE_MAP: RefCell = RefCell::new(SourceMap { + // Start with a single dummy file which all call_site() and def_site() + // spans reference. + files: vec![FileInfo { + source_text: String::new(), + span: Span { lo: 0, hi: 0 }, + lines: vec![0], + char_index_to_byte_offset: BTreeMap::new(), + }], + }); +} + +#[cfg(all(span_locations, not(fuzzing)))] +struct FileInfo { + source_text: String, + span: Span, + lines: Vec, + char_index_to_byte_offset: BTreeMap, +} + +#[cfg(all(span_locations, not(fuzzing)))] +impl FileInfo { + fn offset_line_column(&self, offset: usize) -> LineColumn { + assert!(self.span_within(Span { + lo: offset as u32, + hi: offset as u32, + })); + let offset = offset - self.span.lo as usize; + match self.lines.binary_search(&offset) { + Ok(found) => LineColumn { + line: found + 1, + column: 0, + }, + Err(idx) => LineColumn { + line: idx, + column: offset - self.lines[idx - 1], + }, + } + } + + fn span_within(&self, span: Span) -> bool { + span.lo >= self.span.lo && span.hi <= self.span.hi + } + + fn source_text(&mut self, span: Span) -> String { + let lo_char = (span.lo - self.span.lo) as usize; + + // Look up offset of the largest already-computed char index that is + // less than or equal to the current requested one. We resume counting + // chars from that point. + let (&last_char_index, &last_byte_offset) = self + .char_index_to_byte_offset + .range(..=lo_char) + .next_back() + .unwrap_or((&0, &0)); + + let lo_byte = if last_char_index == lo_char { + last_byte_offset + } else { + let total_byte_offset = match self.source_text[last_byte_offset..] + .char_indices() + .nth(lo_char - last_char_index) + { + Some((additional_offset, _ch)) => last_byte_offset + additional_offset, + None => self.source_text.len(), + }; + self.char_index_to_byte_offset + .insert(lo_char, total_byte_offset); + total_byte_offset + }; + + let trunc_lo = &self.source_text[lo_byte..]; + let char_len = (span.hi - span.lo) as usize; + let source_text = match trunc_lo.char_indices().nth(char_len) { + Some((offset, _ch)) => &trunc_lo[..offset], + None => trunc_lo, + }; + source_text.to_owned() + } +} + +/// Computes the offsets of each line in the given source string +/// and the total number of characters +#[cfg(all(span_locations, not(fuzzing)))] +fn lines_offsets(s: &str) -> (usize, Vec) { + let mut lines = vec![0]; + let mut total = 0; + + for ch in s.chars() { + total += 1; + if ch == '\n' { + lines.push(total); + } + } + + (total, lines) +} + +#[cfg(all(span_locations, not(fuzzing)))] +struct SourceMap { + files: Vec, +} + +#[cfg(all(span_locations, not(fuzzing)))] +impl SourceMap { + fn next_start_pos(&self) -> u32 { + // Add 1 so there's always space between files. + // + // We'll always have at least 1 file, as we initialize our files list + // with a dummy file. + self.files.last().unwrap().span.hi + 1 + } + + fn add_file(&mut self, src: &str) -> Span { + let (len, lines) = lines_offsets(src); + let lo = self.next_start_pos(); + let span = Span { + lo, + hi: lo + (len as u32), + }; + + self.files.push(FileInfo { + source_text: src.to_owned(), + span, + lines, + // Populated lazily by source_text(). + char_index_to_byte_offset: BTreeMap::new(), + }); + + span + } + + #[cfg(procmacro2_semver_exempt)] + fn filepath(&self, span: Span) -> PathBuf { + for (i, file) in self.files.iter().enumerate() { + if file.span_within(span) { + return PathBuf::from(if i == 0 { + "".to_owned() + } else { + format!("", i) + }); + } + } + unreachable!("Invalid span with no related FileInfo!"); + } + + fn fileinfo(&self, span: Span) -> &FileInfo { + for file in &self.files { + if file.span_within(span) { + return file; + } + } + unreachable!("Invalid span with no related FileInfo!"); + } + + fn fileinfo_mut(&mut self, span: Span) -> &mut FileInfo { + for file in &mut self.files { + if file.span_within(span) { + return file; + } + } + unreachable!("Invalid span with no related FileInfo!"); + } +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) struct Span { + #[cfg(span_locations)] + pub(crate) lo: u32, + #[cfg(span_locations)] + pub(crate) hi: u32, +} + +impl Span { + #[cfg(not(span_locations))] + pub fn call_site() -> Self { + Span {} + } + + #[cfg(span_locations)] + pub fn call_site() -> Self { + Span { lo: 0, hi: 0 } + } + + pub fn mixed_site() -> Self { + Span::call_site() + } + + #[cfg(procmacro2_semver_exempt)] + pub fn def_site() -> Self { + Span::call_site() + } + + pub fn resolved_at(&self, _other: Span) -> Span { + // Stable spans consist only of line/column information, so + // `resolved_at` and `located_at` only select which span the + // caller wants line/column information from. + *self + } + + pub fn located_at(&self, other: Span) -> Span { + other + } + + #[cfg(procmacro2_semver_exempt)] + pub fn source_file(&self) -> SourceFile { + #[cfg(fuzzing)] + return SourceFile { + path: PathBuf::from(""), + }; + + #[cfg(not(fuzzing))] + SOURCE_MAP.with(|cm| { + let cm = cm.borrow(); + let path = cm.filepath(*self); + SourceFile { path } + }) + } + + #[cfg(span_locations)] + pub fn start(&self) -> LineColumn { + #[cfg(fuzzing)] + return LineColumn { line: 0, column: 0 }; + + #[cfg(not(fuzzing))] + SOURCE_MAP.with(|cm| { + let cm = cm.borrow(); + let fi = cm.fileinfo(*self); + fi.offset_line_column(self.lo as usize) + }) + } + + #[cfg(span_locations)] + pub fn end(&self) -> LineColumn { + #[cfg(fuzzing)] + return LineColumn { line: 0, column: 0 }; + + #[cfg(not(fuzzing))] + SOURCE_MAP.with(|cm| { + let cm = cm.borrow(); + let fi = cm.fileinfo(*self); + fi.offset_line_column(self.hi as usize) + }) + } + + #[cfg(not(span_locations))] + pub fn join(&self, _other: Span) -> Option { + Some(Span {}) + } + + #[cfg(span_locations)] + pub fn join(&self, other: Span) -> Option { + #[cfg(fuzzing)] + return { + let _ = other; + None + }; + + #[cfg(not(fuzzing))] + SOURCE_MAP.with(|cm| { + let cm = cm.borrow(); + // If `other` is not within the same FileInfo as us, return None. + if !cm.fileinfo(*self).span_within(other) { + return None; + } + Some(Span { + lo: cmp::min(self.lo, other.lo), + hi: cmp::max(self.hi, other.hi), + }) + }) + } + + #[cfg(not(span_locations))] + pub fn source_text(&self) -> Option { + None + } + + #[cfg(span_locations)] + pub fn source_text(&self) -> Option { + #[cfg(fuzzing)] + return None; + + #[cfg(not(fuzzing))] + { + if self.is_call_site() { + None + } else { + Some(SOURCE_MAP.with(|cm| cm.borrow_mut().fileinfo_mut(*self).source_text(*self))) + } + } + } + + #[cfg(not(span_locations))] + pub(crate) fn first_byte(self) -> Self { + self + } + + #[cfg(span_locations)] + pub(crate) fn first_byte(self) -> Self { + Span { + lo: self.lo, + hi: cmp::min(self.lo.saturating_add(1), self.hi), + } + } + + #[cfg(not(span_locations))] + pub(crate) fn last_byte(self) -> Self { + self + } + + #[cfg(span_locations)] + pub(crate) fn last_byte(self) -> Self { + Span { + lo: cmp::max(self.hi.saturating_sub(1), self.lo), + hi: self.hi, + } + } + + #[cfg(span_locations)] + fn is_call_site(&self) -> bool { + self.lo == 0 && self.hi == 0 + } +} + +impl Debug for Span { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + #[cfg(span_locations)] + return write!(f, "bytes({}..{})", self.lo, self.hi); + + #[cfg(not(span_locations))] + write!(f, "Span") + } +} + +pub(crate) fn debug_span_field_if_nontrivial(debug: &mut fmt::DebugStruct, span: Span) { + #[cfg(span_locations)] + { + if span.is_call_site() { + return; + } + } + + if cfg!(span_locations) { + debug.field("span", &span); + } +} + +#[derive(Clone)] +pub(crate) struct Group { + delimiter: Delimiter, + stream: TokenStream, + span: Span, +} + +impl Group { + pub fn new(delimiter: Delimiter, stream: TokenStream) -> Self { + Group { + delimiter, + stream, + span: Span::call_site(), + } + } + + pub fn delimiter(&self) -> Delimiter { + self.delimiter + } + + pub fn stream(&self) -> TokenStream { + self.stream.clone() + } + + pub fn span(&self) -> Span { + self.span + } + + pub fn span_open(&self) -> Span { + self.span.first_byte() + } + + pub fn span_close(&self) -> Span { + self.span.last_byte() + } + + pub fn set_span(&mut self, span: Span) { + self.span = span; + } +} + +impl Display for Group { + // We attempt to match libproc_macro's formatting. + // Empty parens: () + // Nonempty parens: (...) + // Empty brackets: [] + // Nonempty brackets: [...] + // Empty braces: { } + // Nonempty braces: { ... } + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let (open, close) = match self.delimiter { + Delimiter::Parenthesis => ("(", ")"), + Delimiter::Brace => ("{ ", "}"), + Delimiter::Bracket => ("[", "]"), + Delimiter::None => ("", ""), + }; + + f.write_str(open)?; + Display::fmt(&self.stream, f)?; + if self.delimiter == Delimiter::Brace && !self.stream.inner.is_empty() { + f.write_str(" ")?; + } + f.write_str(close)?; + + Ok(()) + } +} + +impl Debug for Group { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let mut debug = fmt.debug_struct("Group"); + debug.field("delimiter", &self.delimiter); + debug.field("stream", &self.stream); + debug_span_field_if_nontrivial(&mut debug, self.span); + debug.finish() + } +} + +#[derive(Clone)] +pub(crate) struct Ident { + sym: String, + span: Span, + raw: bool, +} + +impl Ident { + #[track_caller] + pub fn new_checked(string: &str, span: Span) -> Self { + validate_ident(string); + Ident::new_unchecked(string, span) + } + + pub fn new_unchecked(string: &str, span: Span) -> Self { + Ident { + sym: string.to_owned(), + span, + raw: false, + } + } + + #[track_caller] + pub fn new_raw_checked(string: &str, span: Span) -> Self { + validate_ident_raw(string); + Ident::new_raw_unchecked(string, span) + } + + pub fn new_raw_unchecked(string: &str, span: Span) -> Self { + Ident { + sym: string.to_owned(), + span, + raw: true, + } + } + + pub fn span(&self) -> Span { + self.span + } + + pub fn set_span(&mut self, span: Span) { + self.span = span; + } +} + +pub(crate) fn is_ident_start(c: char) -> bool { + c == '_' || unicode_ident::is_xid_start(c) +} + +pub(crate) fn is_ident_continue(c: char) -> bool { + unicode_ident::is_xid_continue(c) +} + +#[track_caller] +fn validate_ident(string: &str) { + if string.is_empty() { + panic!("Ident is not allowed to be empty; use Option"); + } + + if string.bytes().all(|digit| b'0' <= digit && digit <= b'9') { + panic!("Ident cannot be a number; use Literal instead"); + } + + fn ident_ok(string: &str) -> bool { + let mut chars = string.chars(); + let first = chars.next().unwrap(); + if !is_ident_start(first) { + return false; + } + for ch in chars { + if !is_ident_continue(ch) { + return false; + } + } + true + } + + if !ident_ok(string) { + panic!("{:?} is not a valid Ident", string); + } +} + +#[track_caller] +fn validate_ident_raw(string: &str) { + validate_ident(string); + + match string { + "_" | "super" | "self" | "Self" | "crate" => { + panic!("`r#{}` cannot be a raw identifier", string); + } + _ => {} + } +} + +impl PartialEq for Ident { + fn eq(&self, other: &Ident) -> bool { + self.sym == other.sym && self.raw == other.raw + } +} + +impl PartialEq for Ident +where + T: ?Sized + AsRef, +{ + fn eq(&self, other: &T) -> bool { + let other = other.as_ref(); + if self.raw { + other.starts_with("r#") && self.sym == other[2..] + } else { + self.sym == other + } + } +} + +impl Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.raw { + f.write_str("r#")?; + } + Display::fmt(&self.sym, f) + } +} + +#[allow(clippy::missing_fields_in_debug)] +impl Debug for Ident { + // Ident(proc_macro), Ident(r#union) + #[cfg(not(span_locations))] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut debug = f.debug_tuple("Ident"); + debug.field(&format_args!("{}", self)); + debug.finish() + } + + // Ident { + // sym: proc_macro, + // span: bytes(128..138) + // } + #[cfg(span_locations)] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut debug = f.debug_struct("Ident"); + debug.field("sym", &format_args!("{}", self)); + debug_span_field_if_nontrivial(&mut debug, self.span); + debug.finish() + } +} + +#[derive(Clone)] +pub(crate) struct Literal { + repr: String, + span: Span, +} + +macro_rules! suffixed_numbers { + ($($name:ident => $kind:ident,)*) => ($( + pub fn $name(n: $kind) -> Literal { + Literal::_new(format!(concat!("{}", stringify!($kind)), n)) + } + )*) +} + +macro_rules! unsuffixed_numbers { + ($($name:ident => $kind:ident,)*) => ($( + pub fn $name(n: $kind) -> Literal { + Literal::_new(n.to_string()) + } + )*) +} + +impl Literal { + pub(crate) fn _new(repr: String) -> Self { + Literal { + repr, + span: Span::call_site(), + } + } + + pub(crate) unsafe fn from_str_unchecked(repr: &str) -> Self { + Literal::_new(repr.to_owned()) + } + + suffixed_numbers! { + u8_suffixed => u8, + u16_suffixed => u16, + u32_suffixed => u32, + u64_suffixed => u64, + u128_suffixed => u128, + usize_suffixed => usize, + i8_suffixed => i8, + i16_suffixed => i16, + i32_suffixed => i32, + i64_suffixed => i64, + i128_suffixed => i128, + isize_suffixed => isize, + + f32_suffixed => f32, + f64_suffixed => f64, + } + + unsuffixed_numbers! { + u8_unsuffixed => u8, + u16_unsuffixed => u16, + u32_unsuffixed => u32, + u64_unsuffixed => u64, + u128_unsuffixed => u128, + usize_unsuffixed => usize, + i8_unsuffixed => i8, + i16_unsuffixed => i16, + i32_unsuffixed => i32, + i64_unsuffixed => i64, + i128_unsuffixed => i128, + isize_unsuffixed => isize, + } + + pub fn f32_unsuffixed(f: f32) -> Literal { + let mut s = f.to_string(); + if !s.contains('.') { + s.push_str(".0"); + } + Literal::_new(s) + } + + pub fn f64_unsuffixed(f: f64) -> Literal { + let mut s = f.to_string(); + if !s.contains('.') { + s.push_str(".0"); + } + Literal::_new(s) + } + + pub fn string(t: &str) -> Literal { + let mut repr = String::with_capacity(t.len() + 2); + repr.push('"'); + let mut chars = t.chars(); + while let Some(ch) = chars.next() { + if ch == '\0' { + repr.push_str( + if chars + .as_str() + .starts_with(|next| '0' <= next && next <= '7') + { + // circumvent clippy::octal_escapes lint + "\\x00" + } else { + "\\0" + }, + ); + } else if ch == '\'' { + // escape_debug turns this into "\'" which is unnecessary. + repr.push(ch); + } else { + repr.extend(ch.escape_debug()); + } + } + repr.push('"'); + Literal::_new(repr) + } + + pub fn character(t: char) -> Literal { + let mut repr = String::new(); + repr.push('\''); + if t == '"' { + // escape_debug turns this into '\"' which is unnecessary. + repr.push(t); + } else { + repr.extend(t.escape_debug()); + } + repr.push('\''); + Literal::_new(repr) + } + + pub fn byte_string(bytes: &[u8]) -> Literal { + let mut escaped = "b\"".to_string(); + let mut bytes = bytes.iter(); + while let Some(&b) = bytes.next() { + #[allow(clippy::match_overlapping_arm)] + match b { + b'\0' => escaped.push_str(match bytes.as_slice().first() { + // circumvent clippy::octal_escapes lint + Some(b'0'..=b'7') => r"\x00", + _ => r"\0", + }), + b'\t' => escaped.push_str(r"\t"), + b'\n' => escaped.push_str(r"\n"), + b'\r' => escaped.push_str(r"\r"), + b'"' => escaped.push_str("\\\""), + b'\\' => escaped.push_str("\\\\"), + b'\x20'..=b'\x7E' => escaped.push(b as char), + _ => { + let _ = write!(escaped, "\\x{:02X}", b); + } + } + } + escaped.push('"'); + Literal::_new(escaped) + } + + pub fn span(&self) -> Span { + self.span + } + + pub fn set_span(&mut self, span: Span) { + self.span = span; + } + + pub fn subspan>(&self, range: R) -> Option { + #[cfg(not(span_locations))] + { + let _ = range; + None + } + + #[cfg(span_locations)] + { + use core::ops::Bound; + + let lo = match range.start_bound() { + Bound::Included(start) => { + let start = u32::try_from(*start).ok()?; + self.span.lo.checked_add(start)? + } + Bound::Excluded(start) => { + let start = u32::try_from(*start).ok()?; + self.span.lo.checked_add(start)?.checked_add(1)? + } + Bound::Unbounded => self.span.lo, + }; + let hi = match range.end_bound() { + Bound::Included(end) => { + let end = u32::try_from(*end).ok()?; + self.span.lo.checked_add(end)?.checked_add(1)? + } + Bound::Excluded(end) => { + let end = u32::try_from(*end).ok()?; + self.span.lo.checked_add(end)? + } + Bound::Unbounded => self.span.hi, + }; + if lo <= hi && hi <= self.span.hi { + Some(Span { lo, hi }) + } else { + None + } + } + } +} + +impl FromStr for Literal { + type Err = LexError; + + fn from_str(repr: &str) -> Result { + let mut cursor = get_cursor(repr); + #[cfg(span_locations)] + let lo = cursor.off; + + let negative = cursor.starts_with_char('-'); + if negative { + cursor = cursor.advance(1); + if !cursor.starts_with_fn(|ch| ch.is_ascii_digit()) { + return Err(LexError::call_site()); + } + } + + if let Ok((rest, mut literal)) = parse::literal(cursor) { + if rest.is_empty() { + if negative { + literal.repr.insert(0, '-'); + } + literal.span = Span { + #[cfg(span_locations)] + lo, + #[cfg(span_locations)] + hi: rest.off, + }; + return Ok(literal); + } + } + Err(LexError::call_site()) + } +} + +impl Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.repr, f) + } +} + +impl Debug for Literal { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let mut debug = fmt.debug_struct("Literal"); + debug.field("lit", &format_args!("{}", self.repr)); + debug_span_field_if_nontrivial(&mut debug, self.span); + debug.finish() + } +} diff --git a/vendor/proc-macro2/src/lib.rs b/vendor/proc-macro2/src/lib.rs new file mode 100644 index 0000000..7e8f543 --- /dev/null +++ b/vendor/proc-macro2/src/lib.rs @@ -0,0 +1,1328 @@ +//! [![github]](https://github.com/dtolnay/proc-macro2) [![crates-io]](https://crates.io/crates/proc-macro2) [![docs-rs]](crate) +//! +//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github +//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs +//! +//!
+//! +//! A wrapper around the procedural macro API of the compiler's [`proc_macro`] +//! crate. This library serves two purposes: +//! +//! [`proc_macro`]: https://doc.rust-lang.org/proc_macro/ +//! +//! - **Bring proc-macro-like functionality to other contexts like build.rs and +//! main.rs.** Types from `proc_macro` are entirely specific to procedural +//! macros and cannot ever exist in code outside of a procedural macro. +//! Meanwhile `proc_macro2` types may exist anywhere including non-macro code. +//! By developing foundational libraries like [syn] and [quote] against +//! `proc_macro2` rather than `proc_macro`, the procedural macro ecosystem +//! becomes easily applicable to many other use cases and we avoid +//! reimplementing non-macro equivalents of those libraries. +//! +//! - **Make procedural macros unit testable.** As a consequence of being +//! specific to procedural macros, nothing that uses `proc_macro` can be +//! executed from a unit test. In order for helper libraries or components of +//! a macro to be testable in isolation, they must be implemented using +//! `proc_macro2`. +//! +//! [syn]: https://github.com/dtolnay/syn +//! [quote]: https://github.com/dtolnay/quote +//! +//! # Usage +//! +//! The skeleton of a typical procedural macro typically looks like this: +//! +//! ``` +//! extern crate proc_macro; +//! +//! # const IGNORE: &str = stringify! { +//! #[proc_macro_derive(MyDerive)] +//! # }; +//! # #[cfg(wrap_proc_macro)] +//! pub fn my_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { +//! let input = proc_macro2::TokenStream::from(input); +//! +//! let output: proc_macro2::TokenStream = { +//! /* transform input */ +//! # input +//! }; +//! +//! proc_macro::TokenStream::from(output) +//! } +//! ``` +//! +//! If parsing with [Syn], you'll use [`parse_macro_input!`] instead to +//! propagate parse errors correctly back to the compiler when parsing fails. +//! +//! [`parse_macro_input!`]: https://docs.rs/syn/2.0/syn/macro.parse_macro_input.html +//! +//! # Unstable features +//! +//! The default feature set of proc-macro2 tracks the most recent stable +//! compiler API. Functionality in `proc_macro` that is not yet stable is not +//! exposed by proc-macro2 by default. +//! +//! To opt into the additional APIs available in the most recent nightly +//! compiler, the `procmacro2_semver_exempt` config flag must be passed to +//! rustc. We will polyfill those nightly-only APIs back to Rust 1.56.0. As +//! these are unstable APIs that track the nightly compiler, minor versions of +//! proc-macro2 may make breaking changes to them at any time. +//! +//! ```sh +//! RUSTFLAGS='--cfg procmacro2_semver_exempt' cargo build +//! ``` +//! +//! Note that this must not only be done for your crate, but for any crate that +//! depends on your crate. This infectious nature is intentional, as it serves +//! as a reminder that you are outside of the normal semver guarantees. +//! +//! Semver exempt methods are marked as such in the proc-macro2 documentation. +//! +//! # Thread-Safety +//! +//! Most types in this crate are `!Sync` because the underlying compiler +//! types make use of thread-local memory, meaning they cannot be accessed from +//! a different thread. + +// Proc-macro2 types in rustdoc of other crates get linked to here. +#![doc(html_root_url = "https://docs.rs/proc-macro2/1.0.76")] +#![cfg_attr(any(proc_macro_span, super_unstable), feature(proc_macro_span))] +#![cfg_attr(super_unstable, feature(proc_macro_def_site))] +#![cfg_attr(doc_cfg, feature(doc_cfg))] +#![deny(unsafe_op_in_unsafe_fn)] +#![allow( + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::checked_conversions, + clippy::doc_markdown, + clippy::items_after_statements, + clippy::iter_without_into_iter, + clippy::let_underscore_untyped, + clippy::manual_assert, + clippy::manual_range_contains, + clippy::missing_safety_doc, + clippy::must_use_candidate, + clippy::needless_doctest_main, + clippy::new_without_default, + clippy::return_self_not_must_use, + clippy::shadow_unrelated, + clippy::trivially_copy_pass_by_ref, + clippy::unnecessary_wraps, + clippy::unused_self, + clippy::used_underscore_binding, + clippy::vec_init_then_push +)] + +#[cfg(all(procmacro2_semver_exempt, wrap_proc_macro, not(super_unstable)))] +compile_error! {"\ + Something is not right. If you've tried to turn on \ + procmacro2_semver_exempt, you need to ensure that it \ + is turned on for the compilation of the proc-macro2 \ + build script as well. +"} + +#[cfg(all( + procmacro2_nightly_testing, + feature = "proc-macro", + not(proc_macro_span) +))] +compile_error! {"\ + Build script probe failed to compile. +"} + +extern crate alloc; + +#[cfg(feature = "proc-macro")] +extern crate proc_macro; + +mod marker; +mod parse; +mod rcvec; + +#[cfg(wrap_proc_macro)] +mod detection; + +// Public for proc_macro2::fallback::force() and unforce(), but those are quite +// a niche use case so we omit it from rustdoc. +#[doc(hidden)] +pub mod fallback; + +pub mod extra; + +#[cfg(not(wrap_proc_macro))] +use crate::fallback as imp; +#[path = "wrapper.rs"] +#[cfg(wrap_proc_macro)] +mod imp; + +#[cfg(span_locations)] +mod location; + +use crate::extra::DelimSpan; +use crate::marker::Marker; +use core::cmp::Ordering; +use core::fmt::{self, Debug, Display}; +use core::hash::{Hash, Hasher}; +use core::ops::RangeBounds; +use core::str::FromStr; +use std::error::Error; +#[cfg(procmacro2_semver_exempt)] +use std::path::PathBuf; + +#[cfg(span_locations)] +#[cfg_attr(doc_cfg, doc(cfg(feature = "span-locations")))] +pub use crate::location::LineColumn; + +/// An abstract stream of tokens, or more concretely a sequence of token trees. +/// +/// This type provides interfaces for iterating over token trees and for +/// collecting token trees into one stream. +/// +/// Token stream is both the input and output of `#[proc_macro]`, +/// `#[proc_macro_attribute]` and `#[proc_macro_derive]` definitions. +#[derive(Clone)] +pub struct TokenStream { + inner: imp::TokenStream, + _marker: Marker, +} + +/// Error returned from `TokenStream::from_str`. +pub struct LexError { + inner: imp::LexError, + _marker: Marker, +} + +impl TokenStream { + fn _new(inner: imp::TokenStream) -> Self { + TokenStream { + inner, + _marker: Marker, + } + } + + fn _new_fallback(inner: fallback::TokenStream) -> Self { + TokenStream { + inner: inner.into(), + _marker: Marker, + } + } + + /// Returns an empty `TokenStream` containing no token trees. + pub fn new() -> Self { + TokenStream::_new(imp::TokenStream::new()) + } + + /// Checks if this `TokenStream` is empty. + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } +} + +/// `TokenStream::default()` returns an empty stream, +/// i.e. this is equivalent with `TokenStream::new()`. +impl Default for TokenStream { + fn default() -> Self { + TokenStream::new() + } +} + +/// Attempts to break the string into tokens and parse those tokens into a token +/// stream. +/// +/// May fail for a number of reasons, for example, if the string contains +/// unbalanced delimiters or characters not existing in the language. +/// +/// NOTE: Some errors may cause panics instead of returning `LexError`. We +/// reserve the right to change these errors into `LexError`s later. +impl FromStr for TokenStream { + type Err = LexError; + + fn from_str(src: &str) -> Result { + let e = src.parse().map_err(|e| LexError { + inner: e, + _marker: Marker, + })?; + Ok(TokenStream::_new(e)) + } +} + +#[cfg(feature = "proc-macro")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] +impl From for TokenStream { + fn from(inner: proc_macro::TokenStream) -> Self { + TokenStream::_new(inner.into()) + } +} + +#[cfg(feature = "proc-macro")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] +impl From for proc_macro::TokenStream { + fn from(inner: TokenStream) -> Self { + inner.inner.into() + } +} + +impl From for TokenStream { + fn from(token: TokenTree) -> Self { + TokenStream::_new(imp::TokenStream::from(token)) + } +} + +impl Extend for TokenStream { + fn extend>(&mut self, streams: I) { + self.inner.extend(streams); + } +} + +impl Extend for TokenStream { + fn extend>(&mut self, streams: I) { + self.inner + .extend(streams.into_iter().map(|stream| stream.inner)); + } +} + +/// Collects a number of token trees into a single stream. +impl FromIterator for TokenStream { + fn from_iter>(streams: I) -> Self { + TokenStream::_new(streams.into_iter().collect()) + } +} +impl FromIterator for TokenStream { + fn from_iter>(streams: I) -> Self { + TokenStream::_new(streams.into_iter().map(|i| i.inner).collect()) + } +} + +/// Prints the token stream as a string that is supposed to be losslessly +/// convertible back into the same token stream (modulo spans), except for +/// possibly `TokenTree::Group`s with `Delimiter::None` delimiters and negative +/// numeric literals. +impl Display for TokenStream { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.inner, f) + } +} + +/// Prints token in a form convenient for debugging. +impl Debug for TokenStream { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.inner, f) + } +} + +impl LexError { + pub fn span(&self) -> Span { + Span::_new(self.inner.span()) + } +} + +impl Debug for LexError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.inner, f) + } +} + +impl Display for LexError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.inner, f) + } +} + +impl Error for LexError {} + +/// The source file of a given `Span`. +/// +/// This type is semver exempt and not exposed by default. +#[cfg(all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)))] +#[cfg_attr(doc_cfg, doc(cfg(procmacro2_semver_exempt)))] +#[derive(Clone, PartialEq, Eq)] +pub struct SourceFile { + inner: imp::SourceFile, + _marker: Marker, +} + +#[cfg(all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)))] +impl SourceFile { + fn _new(inner: imp::SourceFile) -> Self { + SourceFile { + inner, + _marker: Marker, + } + } + + /// Get the path to this source file. + /// + /// ### Note + /// + /// If the code span associated with this `SourceFile` was generated by an + /// external macro, this may not be an actual path on the filesystem. Use + /// [`is_real`] to check. + /// + /// Also note that even if `is_real` returns `true`, if + /// `--remap-path-prefix` was passed on the command line, the path as given + /// may not actually be valid. + /// + /// [`is_real`]: #method.is_real + pub fn path(&self) -> PathBuf { + self.inner.path() + } + + /// Returns `true` if this source file is a real source file, and not + /// generated by an external macro's expansion. + pub fn is_real(&self) -> bool { + self.inner.is_real() + } +} + +#[cfg(all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)))] +impl Debug for SourceFile { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.inner, f) + } +} + +/// A region of source code, along with macro expansion information. +#[derive(Copy, Clone)] +pub struct Span { + inner: imp::Span, + _marker: Marker, +} + +impl Span { + fn _new(inner: imp::Span) -> Self { + Span { + inner, + _marker: Marker, + } + } + + fn _new_fallback(inner: fallback::Span) -> Self { + Span { + inner: inner.into(), + _marker: Marker, + } + } + + /// The span of the invocation of the current procedural macro. + /// + /// Identifiers created with this span will be resolved as if they were + /// written directly at the macro call location (call-site hygiene) and + /// other code at the macro call site will be able to refer to them as well. + pub fn call_site() -> Self { + Span::_new(imp::Span::call_site()) + } + + /// The span located at the invocation of the procedural macro, but with + /// local variables, labels, and `$crate` resolved at the definition site + /// of the macro. This is the same hygiene behavior as `macro_rules`. + pub fn mixed_site() -> Self { + Span::_new(imp::Span::mixed_site()) + } + + /// A span that resolves at the macro definition site. + /// + /// This method is semver exempt and not exposed by default. + #[cfg(procmacro2_semver_exempt)] + #[cfg_attr(doc_cfg, doc(cfg(procmacro2_semver_exempt)))] + pub fn def_site() -> Self { + Span::_new(imp::Span::def_site()) + } + + /// Creates a new span with the same line/column information as `self` but + /// that resolves symbols as though it were at `other`. + pub fn resolved_at(&self, other: Span) -> Span { + Span::_new(self.inner.resolved_at(other.inner)) + } + + /// Creates a new span with the same name resolution behavior as `self` but + /// with the line/column information of `other`. + pub fn located_at(&self, other: Span) -> Span { + Span::_new(self.inner.located_at(other.inner)) + } + + /// Convert `proc_macro2::Span` to `proc_macro::Span`. + /// + /// This method is available when building with a nightly compiler, or when + /// building with rustc 1.29+ *without* semver exempt features. + /// + /// # Panics + /// + /// Panics if called from outside of a procedural macro. Unlike + /// `proc_macro2::Span`, the `proc_macro::Span` type can only exist within + /// the context of a procedural macro invocation. + #[cfg(wrap_proc_macro)] + pub fn unwrap(self) -> proc_macro::Span { + self.inner.unwrap() + } + + // Soft deprecated. Please use Span::unwrap. + #[cfg(wrap_proc_macro)] + #[doc(hidden)] + pub fn unstable(self) -> proc_macro::Span { + self.unwrap() + } + + /// The original source file into which this span points. + /// + /// This method is semver exempt and not exposed by default. + #[cfg(all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)))] + #[cfg_attr(doc_cfg, doc(cfg(procmacro2_semver_exempt)))] + pub fn source_file(&self) -> SourceFile { + SourceFile::_new(self.inner.source_file()) + } + + /// Get the starting line/column in the source file for this span. + /// + /// This method requires the `"span-locations"` feature to be enabled. + /// + /// When executing in a procedural macro context, the returned line/column + /// are only meaningful if compiled with a nightly toolchain. The stable + /// toolchain does not have this information available. When executing + /// outside of a procedural macro, such as main.rs or build.rs, the + /// line/column are always meaningful regardless of toolchain. + #[cfg(span_locations)] + #[cfg_attr(doc_cfg, doc(cfg(feature = "span-locations")))] + pub fn start(&self) -> LineColumn { + self.inner.start() + } + + /// Get the ending line/column in the source file for this span. + /// + /// This method requires the `"span-locations"` feature to be enabled. + /// + /// When executing in a procedural macro context, the returned line/column + /// are only meaningful if compiled with a nightly toolchain. The stable + /// toolchain does not have this information available. When executing + /// outside of a procedural macro, such as main.rs or build.rs, the + /// line/column are always meaningful regardless of toolchain. + #[cfg(span_locations)] + #[cfg_attr(doc_cfg, doc(cfg(feature = "span-locations")))] + pub fn end(&self) -> LineColumn { + self.inner.end() + } + + /// Create a new span encompassing `self` and `other`. + /// + /// Returns `None` if `self` and `other` are from different files. + /// + /// Warning: the underlying [`proc_macro::Span::join`] method is + /// nightly-only. When called from within a procedural macro not using a + /// nightly compiler, this method will always return `None`. + /// + /// [`proc_macro::Span::join`]: https://doc.rust-lang.org/proc_macro/struct.Span.html#method.join + pub fn join(&self, other: Span) -> Option { + self.inner.join(other.inner).map(Span::_new) + } + + /// Compares two spans to see if they're equal. + /// + /// This method is semver exempt and not exposed by default. + #[cfg(procmacro2_semver_exempt)] + #[cfg_attr(doc_cfg, doc(cfg(procmacro2_semver_exempt)))] + pub fn eq(&self, other: &Span) -> bool { + self.inner.eq(&other.inner) + } + + /// Returns the source text behind a span. This preserves the original + /// source code, including spaces and comments. It only returns a result if + /// the span corresponds to real source code. + /// + /// Note: The observable result of a macro should only rely on the tokens + /// and not on this source text. The result of this function is a best + /// effort to be used for diagnostics only. + pub fn source_text(&self) -> Option { + self.inner.source_text() + } +} + +/// Prints a span in a form convenient for debugging. +impl Debug for Span { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.inner, f) + } +} + +/// A single token or a delimited sequence of token trees (e.g. `[1, (), ..]`). +#[derive(Clone)] +pub enum TokenTree { + /// A token stream surrounded by bracket delimiters. + Group(Group), + /// An identifier. + Ident(Ident), + /// A single punctuation character (`+`, `,`, `$`, etc.). + Punct(Punct), + /// A literal character (`'a'`), string (`"hello"`), number (`2.3`), etc. + Literal(Literal), +} + +impl TokenTree { + /// Returns the span of this tree, delegating to the `span` method of + /// the contained token or a delimited stream. + pub fn span(&self) -> Span { + match self { + TokenTree::Group(t) => t.span(), + TokenTree::Ident(t) => t.span(), + TokenTree::Punct(t) => t.span(), + TokenTree::Literal(t) => t.span(), + } + } + + /// Configures the span for *only this token*. + /// + /// Note that if this token is a `Group` then this method will not configure + /// the span of each of the internal tokens, this will simply delegate to + /// the `set_span` method of each variant. + pub fn set_span(&mut self, span: Span) { + match self { + TokenTree::Group(t) => t.set_span(span), + TokenTree::Ident(t) => t.set_span(span), + TokenTree::Punct(t) => t.set_span(span), + TokenTree::Literal(t) => t.set_span(span), + } + } +} + +impl From for TokenTree { + fn from(g: Group) -> Self { + TokenTree::Group(g) + } +} + +impl From for TokenTree { + fn from(g: Ident) -> Self { + TokenTree::Ident(g) + } +} + +impl From for TokenTree { + fn from(g: Punct) -> Self { + TokenTree::Punct(g) + } +} + +impl From for TokenTree { + fn from(g: Literal) -> Self { + TokenTree::Literal(g) + } +} + +/// Prints the token tree as a string that is supposed to be losslessly +/// convertible back into the same token tree (modulo spans), except for +/// possibly `TokenTree::Group`s with `Delimiter::None` delimiters and negative +/// numeric literals. +impl Display for TokenTree { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TokenTree::Group(t) => Display::fmt(t, f), + TokenTree::Ident(t) => Display::fmt(t, f), + TokenTree::Punct(t) => Display::fmt(t, f), + TokenTree::Literal(t) => Display::fmt(t, f), + } + } +} + +/// Prints token tree in a form convenient for debugging. +impl Debug for TokenTree { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // Each of these has the name in the struct type in the derived debug, + // so don't bother with an extra layer of indirection + match self { + TokenTree::Group(t) => Debug::fmt(t, f), + TokenTree::Ident(t) => { + let mut debug = f.debug_struct("Ident"); + debug.field("sym", &format_args!("{}", t)); + imp::debug_span_field_if_nontrivial(&mut debug, t.span().inner); + debug.finish() + } + TokenTree::Punct(t) => Debug::fmt(t, f), + TokenTree::Literal(t) => Debug::fmt(t, f), + } + } +} + +/// A delimited token stream. +/// +/// A `Group` internally contains a `TokenStream` which is surrounded by +/// `Delimiter`s. +#[derive(Clone)] +pub struct Group { + inner: imp::Group, +} + +/// Describes how a sequence of token trees is delimited. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Delimiter { + /// `( ... )` + Parenthesis, + /// `{ ... }` + Brace, + /// `[ ... ]` + Bracket, + /// `Ø ... Ø` + /// + /// An implicit delimiter, that may, for example, appear around tokens + /// coming from a "macro variable" `$var`. It is important to preserve + /// operator priorities in cases like `$var * 3` where `$var` is `1 + 2`. + /// Implicit delimiters may not survive roundtrip of a token stream through + /// a string. + None, +} + +impl Group { + fn _new(inner: imp::Group) -> Self { + Group { inner } + } + + fn _new_fallback(inner: fallback::Group) -> Self { + Group { + inner: inner.into(), + } + } + + /// Creates a new `Group` with the given delimiter and token stream. + /// + /// This constructor will set the span for this group to + /// `Span::call_site()`. To change the span you can use the `set_span` + /// method below. + pub fn new(delimiter: Delimiter, stream: TokenStream) -> Self { + Group { + inner: imp::Group::new(delimiter, stream.inner), + } + } + + /// Returns the punctuation used as the delimiter for this group: a set of + /// parentheses, square brackets, or curly braces. + pub fn delimiter(&self) -> Delimiter { + self.inner.delimiter() + } + + /// Returns the `TokenStream` of tokens that are delimited in this `Group`. + /// + /// Note that the returned token stream does not include the delimiter + /// returned above. + pub fn stream(&self) -> TokenStream { + TokenStream::_new(self.inner.stream()) + } + + /// Returns the span for the delimiters of this token stream, spanning the + /// entire `Group`. + /// + /// ```text + /// pub fn span(&self) -> Span { + /// ^^^^^^^ + /// ``` + pub fn span(&self) -> Span { + Span::_new(self.inner.span()) + } + + /// Returns the span pointing to the opening delimiter of this group. + /// + /// ```text + /// pub fn span_open(&self) -> Span { + /// ^ + /// ``` + pub fn span_open(&self) -> Span { + Span::_new(self.inner.span_open()) + } + + /// Returns the span pointing to the closing delimiter of this group. + /// + /// ```text + /// pub fn span_close(&self) -> Span { + /// ^ + /// ``` + pub fn span_close(&self) -> Span { + Span::_new(self.inner.span_close()) + } + + /// Returns an object that holds this group's `span_open()` and + /// `span_close()` together (in a more compact representation than holding + /// those 2 spans individually). + pub fn delim_span(&self) -> DelimSpan { + DelimSpan::new(&self.inner) + } + + /// Configures the span for this `Group`'s delimiters, but not its internal + /// tokens. + /// + /// This method will **not** set the span of all the internal tokens spanned + /// by this group, but rather it will only set the span of the delimiter + /// tokens at the level of the `Group`. + pub fn set_span(&mut self, span: Span) { + self.inner.set_span(span.inner); + } +} + +/// Prints the group as a string that should be losslessly convertible back +/// into the same group (modulo spans), except for possibly `TokenTree::Group`s +/// with `Delimiter::None` delimiters. +impl Display for Group { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.inner, formatter) + } +} + +impl Debug for Group { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.inner, formatter) + } +} + +/// A `Punct` is a single punctuation character like `+`, `-` or `#`. +/// +/// Multicharacter operators like `+=` are represented as two instances of +/// `Punct` with different forms of `Spacing` returned. +#[derive(Clone)] +pub struct Punct { + ch: char, + spacing: Spacing, + span: Span, +} + +/// Whether a `Punct` is followed immediately by another `Punct` or followed by +/// another token or whitespace. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Spacing { + /// E.g. `+` is `Alone` in `+ =`, `+ident` or `+()`. + Alone, + /// E.g. `+` is `Joint` in `+=` or `'` is `Joint` in `'#`. + /// + /// Additionally, single quote `'` can join with identifiers to form + /// lifetimes `'ident`. + Joint, +} + +impl Punct { + /// Creates a new `Punct` from the given character and spacing. + /// + /// The `ch` argument must be a valid punctuation character permitted by the + /// language, otherwise the function will panic. + /// + /// The returned `Punct` will have the default span of `Span::call_site()` + /// which can be further configured with the `set_span` method below. + pub fn new(ch: char, spacing: Spacing) -> Self { + Punct { + ch, + spacing, + span: Span::call_site(), + } + } + + /// Returns the value of this punctuation character as `char`. + pub fn as_char(&self) -> char { + self.ch + } + + /// Returns the spacing of this punctuation character, indicating whether + /// it's immediately followed by another `Punct` in the token stream, so + /// they can potentially be combined into a multicharacter operator + /// (`Joint`), or it's followed by some other token or whitespace (`Alone`) + /// so the operator has certainly ended. + pub fn spacing(&self) -> Spacing { + self.spacing + } + + /// Returns the span for this punctuation character. + pub fn span(&self) -> Span { + self.span + } + + /// Configure the span for this punctuation character. + pub fn set_span(&mut self, span: Span) { + self.span = span; + } +} + +/// Prints the punctuation character as a string that should be losslessly +/// convertible back into the same character. +impl Display for Punct { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.ch, f) + } +} + +impl Debug for Punct { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let mut debug = fmt.debug_struct("Punct"); + debug.field("char", &self.ch); + debug.field("spacing", &self.spacing); + imp::debug_span_field_if_nontrivial(&mut debug, self.span.inner); + debug.finish() + } +} + +/// A word of Rust code, which may be a keyword or legal variable name. +/// +/// An identifier consists of at least one Unicode code point, the first of +/// which has the XID_Start property and the rest of which have the XID_Continue +/// property. +/// +/// - The empty string is not an identifier. Use `Option`. +/// - A lifetime is not an identifier. Use `syn::Lifetime` instead. +/// +/// An identifier constructed with `Ident::new` is permitted to be a Rust +/// keyword, though parsing one through its [`Parse`] implementation rejects +/// Rust keywords. Use `input.call(Ident::parse_any)` when parsing to match the +/// behaviour of `Ident::new`. +/// +/// [`Parse`]: https://docs.rs/syn/2.0/syn/parse/trait.Parse.html +/// +/// # Examples +/// +/// A new ident can be created from a string using the `Ident::new` function. +/// A span must be provided explicitly which governs the name resolution +/// behavior of the resulting identifier. +/// +/// ``` +/// use proc_macro2::{Ident, Span}; +/// +/// fn main() { +/// let call_ident = Ident::new("calligraphy", Span::call_site()); +/// +/// println!("{}", call_ident); +/// } +/// ``` +/// +/// An ident can be interpolated into a token stream using the `quote!` macro. +/// +/// ``` +/// use proc_macro2::{Ident, Span}; +/// use quote::quote; +/// +/// fn main() { +/// let ident = Ident::new("demo", Span::call_site()); +/// +/// // Create a variable binding whose name is this ident. +/// let expanded = quote! { let #ident = 10; }; +/// +/// // Create a variable binding with a slightly different name. +/// let temp_ident = Ident::new(&format!("new_{}", ident), Span::call_site()); +/// let expanded = quote! { let #temp_ident = 10; }; +/// } +/// ``` +/// +/// A string representation of the ident is available through the `to_string()` +/// method. +/// +/// ``` +/// # use proc_macro2::{Ident, Span}; +/// # +/// # let ident = Ident::new("another_identifier", Span::call_site()); +/// # +/// // Examine the ident as a string. +/// let ident_string = ident.to_string(); +/// if ident_string.len() > 60 { +/// println!("Very long identifier: {}", ident_string) +/// } +/// ``` +#[derive(Clone)] +pub struct Ident { + inner: imp::Ident, + _marker: Marker, +} + +impl Ident { + fn _new(inner: imp::Ident) -> Self { + Ident { + inner, + _marker: Marker, + } + } + + /// Creates a new `Ident` with the given `string` as well as the specified + /// `span`. + /// + /// The `string` argument must be a valid identifier permitted by the + /// language, otherwise the function will panic. + /// + /// Note that `span`, currently in rustc, configures the hygiene information + /// for this identifier. + /// + /// As of this time `Span::call_site()` explicitly opts-in to "call-site" + /// hygiene meaning that identifiers created with this span will be resolved + /// as if they were written directly at the location of the macro call, and + /// other code at the macro call site will be able to refer to them as well. + /// + /// Later spans like `Span::def_site()` will allow to opt-in to + /// "definition-site" hygiene meaning that identifiers created with this + /// span will be resolved at the location of the macro definition and other + /// code at the macro call site will not be able to refer to them. + /// + /// Due to the current importance of hygiene this constructor, unlike other + /// tokens, requires a `Span` to be specified at construction. + /// + /// # Panics + /// + /// Panics if the input string is neither a keyword nor a legal variable + /// name. If you are not sure whether the string contains an identifier and + /// need to handle an error case, use + /// syn::parse_str::<Ident> + /// rather than `Ident::new`. + #[track_caller] + pub fn new(string: &str, span: Span) -> Self { + Ident::_new(imp::Ident::new_checked(string, span.inner)) + } + + /// Same as `Ident::new`, but creates a raw identifier (`r#ident`). The + /// `string` argument must be a valid identifier permitted by the language + /// (including keywords, e.g. `fn`). Keywords which are usable in path + /// segments (e.g. `self`, `super`) are not supported, and will cause a + /// panic. + #[track_caller] + pub fn new_raw(string: &str, span: Span) -> Self { + Ident::_new(imp::Ident::new_raw_checked(string, span.inner)) + } + + /// Returns the span of this `Ident`. + pub fn span(&self) -> Span { + Span::_new(self.inner.span()) + } + + /// Configures the span of this `Ident`, possibly changing its hygiene + /// context. + pub fn set_span(&mut self, span: Span) { + self.inner.set_span(span.inner); + } +} + +impl PartialEq for Ident { + fn eq(&self, other: &Ident) -> bool { + self.inner == other.inner + } +} + +impl PartialEq for Ident +where + T: ?Sized + AsRef, +{ + fn eq(&self, other: &T) -> bool { + self.inner == other + } +} + +impl Eq for Ident {} + +impl PartialOrd for Ident { + fn partial_cmp(&self, other: &Ident) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Ident { + fn cmp(&self, other: &Ident) -> Ordering { + self.to_string().cmp(&other.to_string()) + } +} + +impl Hash for Ident { + fn hash(&self, hasher: &mut H) { + self.to_string().hash(hasher); + } +} + +/// Prints the identifier as a string that should be losslessly convertible back +/// into the same identifier. +impl Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.inner, f) + } +} + +impl Debug for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.inner, f) + } +} + +/// A literal string (`"hello"`), byte string (`b"hello"`), character (`'a'`), +/// byte character (`b'a'`), an integer or floating point number with or without +/// a suffix (`1`, `1u8`, `2.3`, `2.3f32`). +/// +/// Boolean literals like `true` and `false` do not belong here, they are +/// `Ident`s. +#[derive(Clone)] +pub struct Literal { + inner: imp::Literal, + _marker: Marker, +} + +macro_rules! suffixed_int_literals { + ($($name:ident => $kind:ident,)*) => ($( + /// Creates a new suffixed integer literal with the specified value. + /// + /// This function will create an integer like `1u32` where the integer + /// value specified is the first part of the token and the integral is + /// also suffixed at the end. Literals created from negative numbers may + /// not survive roundtrips through `TokenStream` or strings and may be + /// broken into two tokens (`-` and positive literal). + /// + /// Literals created through this method have the `Span::call_site()` + /// span by default, which can be configured with the `set_span` method + /// below. + pub fn $name(n: $kind) -> Literal { + Literal::_new(imp::Literal::$name(n)) + } + )*) +} + +macro_rules! unsuffixed_int_literals { + ($($name:ident => $kind:ident,)*) => ($( + /// Creates a new unsuffixed integer literal with the specified value. + /// + /// This function will create an integer like `1` where the integer + /// value specified is the first part of the token. No suffix is + /// specified on this token, meaning that invocations like + /// `Literal::i8_unsuffixed(1)` are equivalent to + /// `Literal::u32_unsuffixed(1)`. Literals created from negative numbers + /// may not survive roundtrips through `TokenStream` or strings and may + /// be broken into two tokens (`-` and positive literal). + /// + /// Literals created through this method have the `Span::call_site()` + /// span by default, which can be configured with the `set_span` method + /// below. + pub fn $name(n: $kind) -> Literal { + Literal::_new(imp::Literal::$name(n)) + } + )*) +} + +impl Literal { + fn _new(inner: imp::Literal) -> Self { + Literal { + inner, + _marker: Marker, + } + } + + fn _new_fallback(inner: fallback::Literal) -> Self { + Literal { + inner: inner.into(), + _marker: Marker, + } + } + + suffixed_int_literals! { + u8_suffixed => u8, + u16_suffixed => u16, + u32_suffixed => u32, + u64_suffixed => u64, + u128_suffixed => u128, + usize_suffixed => usize, + i8_suffixed => i8, + i16_suffixed => i16, + i32_suffixed => i32, + i64_suffixed => i64, + i128_suffixed => i128, + isize_suffixed => isize, + } + + unsuffixed_int_literals! { + u8_unsuffixed => u8, + u16_unsuffixed => u16, + u32_unsuffixed => u32, + u64_unsuffixed => u64, + u128_unsuffixed => u128, + usize_unsuffixed => usize, + i8_unsuffixed => i8, + i16_unsuffixed => i16, + i32_unsuffixed => i32, + i64_unsuffixed => i64, + i128_unsuffixed => i128, + isize_unsuffixed => isize, + } + + /// Creates a new unsuffixed floating-point literal. + /// + /// This constructor is similar to those like `Literal::i8_unsuffixed` where + /// the float's value is emitted directly into the token but no suffix is + /// used, so it may be inferred to be a `f64` later in the compiler. + /// Literals created from negative numbers may not survive round-trips + /// through `TokenStream` or strings and may be broken into two tokens (`-` + /// and positive literal). + /// + /// # Panics + /// + /// This function requires that the specified float is finite, for example + /// if it is infinity or NaN this function will panic. + pub fn f64_unsuffixed(f: f64) -> Literal { + assert!(f.is_finite()); + Literal::_new(imp::Literal::f64_unsuffixed(f)) + } + + /// Creates a new suffixed floating-point literal. + /// + /// This constructor will create a literal like `1.0f64` where the value + /// specified is the preceding part of the token and `f64` is the suffix of + /// the token. This token will always be inferred to be an `f64` in the + /// compiler. Literals created from negative numbers may not survive + /// round-trips through `TokenStream` or strings and may be broken into two + /// tokens (`-` and positive literal). + /// + /// # Panics + /// + /// This function requires that the specified float is finite, for example + /// if it is infinity or NaN this function will panic. + pub fn f64_suffixed(f: f64) -> Literal { + assert!(f.is_finite()); + Literal::_new(imp::Literal::f64_suffixed(f)) + } + + /// Creates a new unsuffixed floating-point literal. + /// + /// This constructor is similar to those like `Literal::i8_unsuffixed` where + /// the float's value is emitted directly into the token but no suffix is + /// used, so it may be inferred to be a `f64` later in the compiler. + /// Literals created from negative numbers may not survive round-trips + /// through `TokenStream` or strings and may be broken into two tokens (`-` + /// and positive literal). + /// + /// # Panics + /// + /// This function requires that the specified float is finite, for example + /// if it is infinity or NaN this function will panic. + pub fn f32_unsuffixed(f: f32) -> Literal { + assert!(f.is_finite()); + Literal::_new(imp::Literal::f32_unsuffixed(f)) + } + + /// Creates a new suffixed floating-point literal. + /// + /// This constructor will create a literal like `1.0f32` where the value + /// specified is the preceding part of the token and `f32` is the suffix of + /// the token. This token will always be inferred to be an `f32` in the + /// compiler. Literals created from negative numbers may not survive + /// round-trips through `TokenStream` or strings and may be broken into two + /// tokens (`-` and positive literal). + /// + /// # Panics + /// + /// This function requires that the specified float is finite, for example + /// if it is infinity or NaN this function will panic. + pub fn f32_suffixed(f: f32) -> Literal { + assert!(f.is_finite()); + Literal::_new(imp::Literal::f32_suffixed(f)) + } + + /// String literal. + pub fn string(string: &str) -> Literal { + Literal::_new(imp::Literal::string(string)) + } + + /// Character literal. + pub fn character(ch: char) -> Literal { + Literal::_new(imp::Literal::character(ch)) + } + + /// Byte string literal. + pub fn byte_string(s: &[u8]) -> Literal { + Literal::_new(imp::Literal::byte_string(s)) + } + + /// Returns the span encompassing this literal. + pub fn span(&self) -> Span { + Span::_new(self.inner.span()) + } + + /// Configures the span associated for this literal. + pub fn set_span(&mut self, span: Span) { + self.inner.set_span(span.inner); + } + + /// Returns a `Span` that is a subset of `self.span()` containing only + /// the source bytes in range `range`. Returns `None` if the would-be + /// trimmed span is outside the bounds of `self`. + /// + /// Warning: the underlying [`proc_macro::Literal::subspan`] method is + /// nightly-only. When called from within a procedural macro not using a + /// nightly compiler, this method will always return `None`. + /// + /// [`proc_macro::Literal::subspan`]: https://doc.rust-lang.org/proc_macro/struct.Literal.html#method.subspan + pub fn subspan>(&self, range: R) -> Option { + self.inner.subspan(range).map(Span::_new) + } + + // Intended for the `quote!` macro to use when constructing a proc-macro2 + // token out of a macro_rules $:literal token, which is already known to be + // a valid literal. This avoids reparsing/validating the literal's string + // representation. This is not public API other than for quote. + #[doc(hidden)] + pub unsafe fn from_str_unchecked(repr: &str) -> Self { + Literal::_new(unsafe { imp::Literal::from_str_unchecked(repr) }) + } +} + +impl FromStr for Literal { + type Err = LexError; + + fn from_str(repr: &str) -> Result { + repr.parse().map(Literal::_new).map_err(|inner| LexError { + inner, + _marker: Marker, + }) + } +} + +impl Debug for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.inner, f) + } +} + +impl Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.inner, f) + } +} + +/// Public implementation details for the `TokenStream` type, such as iterators. +pub mod token_stream { + use crate::marker::Marker; + use crate::{imp, TokenTree}; + use core::fmt::{self, Debug}; + + pub use crate::TokenStream; + + /// An iterator over `TokenStream`'s `TokenTree`s. + /// + /// The iteration is "shallow", e.g. the iterator doesn't recurse into + /// delimited groups, and returns whole groups as token trees. + #[derive(Clone)] + pub struct IntoIter { + inner: imp::TokenTreeIter, + _marker: Marker, + } + + impl Iterator for IntoIter { + type Item = TokenTree; + + fn next(&mut self) -> Option { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + } + + impl Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("TokenStream ")?; + f.debug_list().entries(self.clone()).finish() + } + } + + impl IntoIterator for TokenStream { + type Item = TokenTree; + type IntoIter = IntoIter; + + fn into_iter(self) -> IntoIter { + IntoIter { + inner: self.inner.into_iter(), + _marker: Marker, + } + } + } +} diff --git a/vendor/proc-macro2/src/location.rs b/vendor/proc-macro2/src/location.rs new file mode 100644 index 0000000..463026c --- /dev/null +++ b/vendor/proc-macro2/src/location.rs @@ -0,0 +1,29 @@ +use core::cmp::Ordering; + +/// A line-column pair representing the start or end of a `Span`. +/// +/// This type is semver exempt and not exposed by default. +#[cfg_attr(doc_cfg, doc(cfg(feature = "span-locations")))] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct LineColumn { + /// The 1-indexed line in the source file on which the span starts or ends + /// (inclusive). + pub line: usize, + /// The 0-indexed column (in UTF-8 characters) in the source file on which + /// the span starts or ends (inclusive). + pub column: usize, +} + +impl Ord for LineColumn { + fn cmp(&self, other: &Self) -> Ordering { + self.line + .cmp(&other.line) + .then(self.column.cmp(&other.column)) + } +} + +impl PartialOrd for LineColumn { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/vendor/proc-macro2/src/marker.rs b/vendor/proc-macro2/src/marker.rs new file mode 100644 index 0000000..e8874bd --- /dev/null +++ b/vendor/proc-macro2/src/marker.rs @@ -0,0 +1,21 @@ +use alloc::rc::Rc; +use core::marker::PhantomData; +use core::panic::{RefUnwindSafe, UnwindSafe}; + +// Zero sized marker with the correct set of autotrait impls we want all proc +// macro types to have. +pub(crate) type Marker = PhantomData; + +pub(crate) use self::value::*; + +mod value { + pub(crate) use core::marker::PhantomData as Marker; +} + +pub(crate) struct ProcMacroAutoTraits( + #[allow(dead_code)] // https://github.com/rust-lang/rust/issues/119645 + Rc<()>, +); + +impl UnwindSafe for ProcMacroAutoTraits {} +impl RefUnwindSafe for ProcMacroAutoTraits {} diff --git a/vendor/proc-macro2/src/parse.rs b/vendor/proc-macro2/src/parse.rs new file mode 100644 index 0000000..07239bc --- /dev/null +++ b/vendor/proc-macro2/src/parse.rs @@ -0,0 +1,996 @@ +use crate::fallback::{ + self, is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream, + TokenStreamBuilder, +}; +use crate::{Delimiter, Punct, Spacing, TokenTree}; +use core::char; +use core::str::{Bytes, CharIndices, Chars}; + +#[derive(Copy, Clone, Eq, PartialEq)] +pub(crate) struct Cursor<'a> { + pub rest: &'a str, + #[cfg(span_locations)] + pub off: u32, +} + +impl<'a> Cursor<'a> { + pub fn advance(&self, bytes: usize) -> Cursor<'a> { + let (_front, rest) = self.rest.split_at(bytes); + Cursor { + rest, + #[cfg(span_locations)] + off: self.off + _front.chars().count() as u32, + } + } + + pub fn starts_with(&self, s: &str) -> bool { + self.rest.starts_with(s) + } + + pub fn starts_with_char(&self, ch: char) -> bool { + self.rest.starts_with(ch) + } + + pub fn starts_with_fn(&self, f: Pattern) -> bool + where + Pattern: FnMut(char) -> bool, + { + self.rest.starts_with(f) + } + + pub fn is_empty(&self) -> bool { + self.rest.is_empty() + } + + fn len(&self) -> usize { + self.rest.len() + } + + fn as_bytes(&self) -> &'a [u8] { + self.rest.as_bytes() + } + + fn bytes(&self) -> Bytes<'a> { + self.rest.bytes() + } + + fn chars(&self) -> Chars<'a> { + self.rest.chars() + } + + fn char_indices(&self) -> CharIndices<'a> { + self.rest.char_indices() + } + + fn parse(&self, tag: &str) -> Result, Reject> { + if self.starts_with(tag) { + Ok(self.advance(tag.len())) + } else { + Err(Reject) + } + } +} + +pub(crate) struct Reject; +type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>; + +fn skip_whitespace(input: Cursor) -> Cursor { + let mut s = input; + + while !s.is_empty() { + let byte = s.as_bytes()[0]; + if byte == b'/' { + if s.starts_with("//") + && (!s.starts_with("///") || s.starts_with("////")) + && !s.starts_with("//!") + { + let (cursor, _) = take_until_newline_or_eof(s); + s = cursor; + continue; + } else if s.starts_with("/**/") { + s = s.advance(4); + continue; + } else if s.starts_with("/*") + && (!s.starts_with("/**") || s.starts_with("/***")) + && !s.starts_with("/*!") + { + match block_comment(s) { + Ok((rest, _)) => { + s = rest; + continue; + } + Err(Reject) => return s, + } + } + } + match byte { + b' ' | 0x09..=0x0d => { + s = s.advance(1); + continue; + } + b if b.is_ascii() => {} + _ => { + let ch = s.chars().next().unwrap(); + if is_whitespace(ch) { + s = s.advance(ch.len_utf8()); + continue; + } + } + } + return s; + } + s +} + +fn block_comment(input: Cursor) -> PResult<&str> { + if !input.starts_with("/*") { + return Err(Reject); + } + + let mut depth = 0usize; + let bytes = input.as_bytes(); + let mut i = 0usize; + let upper = bytes.len() - 1; + + while i < upper { + if bytes[i] == b'/' && bytes[i + 1] == b'*' { + depth += 1; + i += 1; // eat '*' + } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { + depth -= 1; + if depth == 0 { + return Ok((input.advance(i + 2), &input.rest[..i + 2])); + } + i += 1; // eat '/' + } + i += 1; + } + + Err(Reject) +} + +fn is_whitespace(ch: char) -> bool { + // Rust treats left-to-right mark and right-to-left mark as whitespace + ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' +} + +fn word_break(input: Cursor) -> Result { + match input.chars().next() { + Some(ch) if is_ident_continue(ch) => Err(Reject), + Some(_) | None => Ok(input), + } +} + +// Rustc's representation of a macro expansion error in expression position or +// type position. +const ERROR: &str = "(/*ERROR*/)"; + +pub(crate) fn token_stream(mut input: Cursor) -> Result { + let mut trees = TokenStreamBuilder::new(); + let mut stack = Vec::new(); + + loop { + input = skip_whitespace(input); + + if let Ok((rest, ())) = doc_comment(input, &mut trees) { + input = rest; + continue; + } + + #[cfg(span_locations)] + let lo = input.off; + + let first = match input.bytes().next() { + Some(first) => first, + None => match stack.last() { + None => return Ok(trees.build()), + #[cfg(span_locations)] + Some((lo, _frame)) => { + return Err(LexError { + span: Span { lo: *lo, hi: *lo }, + }) + } + #[cfg(not(span_locations))] + Some(_frame) => return Err(LexError { span: Span {} }), + }, + }; + + if let Some(open_delimiter) = match first { + b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis), + b'[' => Some(Delimiter::Bracket), + b'{' => Some(Delimiter::Brace), + _ => None, + } { + input = input.advance(1); + let frame = (open_delimiter, trees); + #[cfg(span_locations)] + let frame = (lo, frame); + stack.push(frame); + trees = TokenStreamBuilder::new(); + } else if let Some(close_delimiter) = match first { + b')' => Some(Delimiter::Parenthesis), + b']' => Some(Delimiter::Bracket), + b'}' => Some(Delimiter::Brace), + _ => None, + } { + let frame = match stack.pop() { + Some(frame) => frame, + None => return Err(lex_error(input)), + }; + #[cfg(span_locations)] + let (lo, frame) = frame; + let (open_delimiter, outer) = frame; + if open_delimiter != close_delimiter { + return Err(lex_error(input)); + } + input = input.advance(1); + let mut g = Group::new(open_delimiter, trees.build()); + g.set_span(Span { + #[cfg(span_locations)] + lo, + #[cfg(span_locations)] + hi: input.off, + }); + trees = outer; + trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g))); + } else { + let (rest, mut tt) = match leaf_token(input) { + Ok((rest, tt)) => (rest, tt), + Err(Reject) => return Err(lex_error(input)), + }; + tt.set_span(crate::Span::_new_fallback(Span { + #[cfg(span_locations)] + lo, + #[cfg(span_locations)] + hi: rest.off, + })); + trees.push_token_from_parser(tt); + input = rest; + } + } +} + +fn lex_error(cursor: Cursor) -> LexError { + #[cfg(not(span_locations))] + let _ = cursor; + LexError { + span: Span { + #[cfg(span_locations)] + lo: cursor.off, + #[cfg(span_locations)] + hi: cursor.off, + }, + } +} + +fn leaf_token(input: Cursor) -> PResult { + if let Ok((input, l)) = literal(input) { + // must be parsed before ident + Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l)))) + } else if let Ok((input, p)) = punct(input) { + Ok((input, TokenTree::Punct(p))) + } else if let Ok((input, i)) = ident(input) { + Ok((input, TokenTree::Ident(i))) + } else if input.starts_with(ERROR) { + let rest = input.advance(ERROR.len()); + let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned())); + Ok((rest, TokenTree::Literal(repr))) + } else { + Err(Reject) + } +} + +fn ident(input: Cursor) -> PResult { + if [ + "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#", + ] + .iter() + .any(|prefix| input.starts_with(prefix)) + { + Err(Reject) + } else { + ident_any(input) + } +} + +fn ident_any(input: Cursor) -> PResult { + let raw = input.starts_with("r#"); + let rest = input.advance((raw as usize) << 1); + + let (rest, sym) = ident_not_raw(rest)?; + + if !raw { + let ident = crate::Ident::_new(crate::imp::Ident::new_unchecked( + sym, + fallback::Span::call_site(), + )); + return Ok((rest, ident)); + } + + match sym { + "_" | "super" | "self" | "Self" | "crate" => return Err(Reject), + _ => {} + } + + let ident = crate::Ident::_new(crate::imp::Ident::new_raw_unchecked( + sym, + fallback::Span::call_site(), + )); + Ok((rest, ident)) +} + +fn ident_not_raw(input: Cursor) -> PResult<&str> { + let mut chars = input.char_indices(); + + match chars.next() { + Some((_, ch)) if is_ident_start(ch) => {} + _ => return Err(Reject), + } + + let mut end = input.len(); + for (i, ch) in chars { + if !is_ident_continue(ch) { + end = i; + break; + } + } + + Ok((input.advance(end), &input.rest[..end])) +} + +pub(crate) fn literal(input: Cursor) -> PResult { + let rest = literal_nocapture(input)?; + let end = input.len() - rest.len(); + Ok((rest, Literal::_new(input.rest[..end].to_string()))) +} + +fn literal_nocapture(input: Cursor) -> Result { + if let Ok(ok) = string(input) { + Ok(ok) + } else if let Ok(ok) = byte_string(input) { + Ok(ok) + } else if let Ok(ok) = c_string(input) { + Ok(ok) + } else if let Ok(ok) = byte(input) { + Ok(ok) + } else if let Ok(ok) = character(input) { + Ok(ok) + } else if let Ok(ok) = float(input) { + Ok(ok) + } else if let Ok(ok) = int(input) { + Ok(ok) + } else { + Err(Reject) + } +} + +fn literal_suffix(input: Cursor) -> Cursor { + match ident_not_raw(input) { + Ok((input, _)) => input, + Err(Reject) => input, + } +} + +fn string(input: Cursor) -> Result { + if let Ok(input) = input.parse("\"") { + cooked_string(input) + } else if let Ok(input) = input.parse("r") { + raw_string(input) + } else { + Err(Reject) + } +} + +fn cooked_string(mut input: Cursor) -> Result { + let mut chars = input.char_indices(); + + while let Some((i, ch)) = chars.next() { + match ch { + '"' => { + let input = input.advance(i + 1); + return Ok(literal_suffix(input)); + } + '\r' => match chars.next() { + Some((_, '\n')) => {} + _ => break, + }, + '\\' => match chars.next() { + Some((_, 'x')) => { + backslash_x_char(&mut chars)?; + } + Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {} + Some((_, 'u')) => { + backslash_u(&mut chars)?; + } + Some((newline, ch @ ('\n' | '\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, ch as u8)?; + chars = input.char_indices(); + } + _ => break, + }, + _ch => {} + } + } + Err(Reject) +} + +fn raw_string(input: Cursor) -> Result { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + _ => {} + } + } + Err(Reject) +} + +fn byte_string(input: Cursor) -> Result { + if let Ok(input) = input.parse("b\"") { + cooked_byte_string(input) + } else if let Ok(input) = input.parse("br") { + raw_byte_string(input) + } else { + Err(Reject) + } +} + +fn cooked_byte_string(mut input: Cursor) -> Result { + let mut bytes = input.bytes().enumerate(); + while let Some((offset, b)) = bytes.next() { + match b { + b'"' => { + let input = input.advance(offset + 1); + return Ok(literal_suffix(input)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + b'\\' => match bytes.next() { + Some((_, b'x')) => { + backslash_x_byte(&mut bytes)?; + } + Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {} + Some((newline, b @ (b'\n' | b'\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, b)?; + bytes = input.bytes().enumerate(); + } + _ => break, + }, + b if b.is_ascii() => {} + _ => break, + } + } + Err(Reject) +} + +fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> { + for (i, byte) in input.bytes().enumerate() { + match byte { + b'"' => { + if i > 255 { + // https://github.com/rust-lang/rust/pull/95251 + return Err(Reject); + } + return Ok((input.advance(i + 1), &input.rest[..i])); + } + b'#' => {} + _ => break, + } + } + Err(Reject) +} + +fn raw_byte_string(input: Cursor) -> Result { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + other => { + if !other.is_ascii() { + break; + } + } + } + } + Err(Reject) +} + +fn c_string(input: Cursor) -> Result { + if let Ok(input) = input.parse("c\"") { + cooked_c_string(input) + } else if let Ok(input) = input.parse("cr") { + raw_c_string(input) + } else { + Err(Reject) + } +} + +fn raw_c_string(input: Cursor) -> Result { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + b'\0' => break, + _ => {} + } + } + Err(Reject) +} + +fn cooked_c_string(mut input: Cursor) -> Result { + let mut chars = input.char_indices(); + + while let Some((i, ch)) = chars.next() { + match ch { + '"' => { + let input = input.advance(i + 1); + return Ok(literal_suffix(input)); + } + '\r' => match chars.next() { + Some((_, '\n')) => {} + _ => break, + }, + '\\' => match chars.next() { + Some((_, 'x')) => { + backslash_x_nonzero(&mut chars)?; + } + Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {} + Some((_, 'u')) => { + if backslash_u(&mut chars)? == '\0' { + break; + } + } + Some((newline, ch @ ('\n' | '\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, ch as u8)?; + chars = input.char_indices(); + } + _ => break, + }, + '\0' => break, + _ch => {} + } + } + Err(Reject) +} + +fn byte(input: Cursor) -> Result { + let input = input.parse("b'")?; + let mut bytes = input.bytes().enumerate(); + let ok = match bytes.next().map(|(_, b)| b) { + Some(b'\\') => match bytes.next().map(|(_, b)| b) { + Some(b'x') => backslash_x_byte(&mut bytes).is_ok(), + Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true, + _ => false, + }, + b => b.is_some(), + }; + if !ok { + return Err(Reject); + } + let (offset, _) = bytes.next().ok_or(Reject)?; + if !input.chars().as_str().is_char_boundary(offset) { + return Err(Reject); + } + let input = input.advance(offset).parse("'")?; + Ok(literal_suffix(input)) +} + +fn character(input: Cursor) -> Result { + let input = input.parse("'")?; + let mut chars = input.char_indices(); + let ok = match chars.next().map(|(_, ch)| ch) { + Some('\\') => match chars.next().map(|(_, ch)| ch) { + Some('x') => backslash_x_char(&mut chars).is_ok(), + Some('u') => backslash_u(&mut chars).is_ok(), + Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true, + _ => false, + }, + ch => ch.is_some(), + }; + if !ok { + return Err(Reject); + } + let (idx, _) = chars.next().ok_or(Reject)?; + let input = input.advance(idx).parse("'")?; + Ok(literal_suffix(input)) +} + +macro_rules! next_ch { + ($chars:ident @ $pat:pat) => { + match $chars.next() { + Some((_, ch)) => match ch { + $pat => ch, + _ => return Err(Reject), + }, + None => return Err(Reject), + } + }; +} + +fn backslash_x_char(chars: &mut I) -> Result<(), Reject> +where + I: Iterator, +{ + next_ch!(chars @ '0'..='7'); + next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + Ok(()) +} + +fn backslash_x_byte(chars: &mut I) -> Result<(), Reject> +where + I: Iterator, +{ + next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); + next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); + Ok(()) +} + +fn backslash_x_nonzero(chars: &mut I) -> Result<(), Reject> +where + I: Iterator, +{ + let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + if first == '0' && second == '0' { + Err(Reject) + } else { + Ok(()) + } +} + +fn backslash_u(chars: &mut I) -> Result +where + I: Iterator, +{ + next_ch!(chars @ '{'); + let mut value = 0; + let mut len = 0; + for (_, ch) in chars { + let digit = match ch { + '0'..='9' => ch as u8 - b'0', + 'a'..='f' => 10 + ch as u8 - b'a', + 'A'..='F' => 10 + ch as u8 - b'A', + '_' if len > 0 => continue, + '}' if len > 0 => return char::from_u32(value).ok_or(Reject), + _ => break, + }; + if len == 6 { + break; + } + value *= 0x10; + value += u32::from(digit); + len += 1; + } + Err(Reject) +} + +fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> { + let mut whitespace = input.bytes().enumerate(); + loop { + if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') { + return Err(Reject); + } + match whitespace.next() { + Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => { + last = b; + } + Some((offset, _)) => { + *input = input.advance(offset); + return Ok(()); + } + None => return Err(Reject), + } + } +} + +fn float(input: Cursor) -> Result { + let mut rest = float_digits(input)?; + if let Some(ch) = rest.chars().next() { + if is_ident_start(ch) { + rest = ident_not_raw(rest)?.0; + } + } + word_break(rest) +} + +fn float_digits(input: Cursor) -> Result { + let mut chars = input.chars().peekable(); + match chars.next() { + Some(ch) if '0' <= ch && ch <= '9' => {} + _ => return Err(Reject), + } + + let mut len = 1; + let mut has_dot = false; + let mut has_exp = false; + while let Some(&ch) = chars.peek() { + match ch { + '0'..='9' | '_' => { + chars.next(); + len += 1; + } + '.' => { + if has_dot { + break; + } + chars.next(); + if chars + .peek() + .map_or(false, |&ch| ch == '.' || is_ident_start(ch)) + { + return Err(Reject); + } + len += 1; + has_dot = true; + } + 'e' | 'E' => { + chars.next(); + len += 1; + has_exp = true; + break; + } + _ => break, + } + } + + if !(has_dot || has_exp) { + return Err(Reject); + } + + if has_exp { + let token_before_exp = if has_dot { + Ok(input.advance(len - 1)) + } else { + Err(Reject) + }; + let mut has_sign = false; + let mut has_exp_value = false; + while let Some(&ch) = chars.peek() { + match ch { + '+' | '-' => { + if has_exp_value { + break; + } + if has_sign { + return token_before_exp; + } + chars.next(); + len += 1; + has_sign = true; + } + '0'..='9' => { + chars.next(); + len += 1; + has_exp_value = true; + } + '_' => { + chars.next(); + len += 1; + } + _ => break, + } + } + if !has_exp_value { + return token_before_exp; + } + } + + Ok(input.advance(len)) +} + +fn int(input: Cursor) -> Result { + let mut rest = digits(input)?; + if let Some(ch) = rest.chars().next() { + if is_ident_start(ch) { + rest = ident_not_raw(rest)?.0; + } + } + word_break(rest) +} + +fn digits(mut input: Cursor) -> Result { + let base = if input.starts_with("0x") { + input = input.advance(2); + 16 + } else if input.starts_with("0o") { + input = input.advance(2); + 8 + } else if input.starts_with("0b") { + input = input.advance(2); + 2 + } else { + 10 + }; + + let mut len = 0; + let mut empty = true; + for b in input.bytes() { + match b { + b'0'..=b'9' => { + let digit = (b - b'0') as u64; + if digit >= base { + return Err(Reject); + } + } + b'a'..=b'f' => { + let digit = 10 + (b - b'a') as u64; + if digit >= base { + break; + } + } + b'A'..=b'F' => { + let digit = 10 + (b - b'A') as u64; + if digit >= base { + break; + } + } + b'_' => { + if empty && base == 10 { + return Err(Reject); + } + len += 1; + continue; + } + _ => break, + }; + len += 1; + empty = false; + } + if empty { + Err(Reject) + } else { + Ok(input.advance(len)) + } +} + +fn punct(input: Cursor) -> PResult { + let (rest, ch) = punct_char(input)?; + if ch == '\'' { + if ident_any(rest)?.0.starts_with_char('\'') { + Err(Reject) + } else { + Ok((rest, Punct::new('\'', Spacing::Joint))) + } + } else { + let kind = match punct_char(rest) { + Ok(_) => Spacing::Joint, + Err(Reject) => Spacing::Alone, + }; + Ok((rest, Punct::new(ch, kind))) + } +} + +fn punct_char(input: Cursor) -> PResult { + if input.starts_with("//") || input.starts_with("/*") { + // Do not accept `/` of a comment as a punct. + return Err(Reject); + } + + let mut chars = input.chars(); + let first = match chars.next() { + Some(ch) => ch, + None => { + return Err(Reject); + } + }; + let recognized = "~!@#$%^&*-=+|;:,<.>/?'"; + if recognized.contains(first) { + Ok((input.advance(first.len_utf8()), first)) + } else { + Err(Reject) + } +} + +fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> { + #[cfg(span_locations)] + let lo = input.off; + let (rest, (comment, inner)) = doc_comment_contents(input)?; + let fallback_span = Span { + #[cfg(span_locations)] + lo, + #[cfg(span_locations)] + hi: rest.off, + }; + let span = crate::Span::_new_fallback(fallback_span); + + let mut scan_for_bare_cr = comment; + while let Some(cr) = scan_for_bare_cr.find('\r') { + let rest = &scan_for_bare_cr[cr + 1..]; + if !rest.starts_with('\n') { + return Err(Reject); + } + scan_for_bare_cr = rest; + } + + let mut pound = Punct::new('#', Spacing::Alone); + pound.set_span(span); + trees.push_token_from_parser(TokenTree::Punct(pound)); + + if inner { + let mut bang = Punct::new('!', Spacing::Alone); + bang.set_span(span); + trees.push_token_from_parser(TokenTree::Punct(bang)); + } + + let doc_ident = crate::Ident::_new(crate::imp::Ident::new_unchecked("doc", fallback_span)); + let mut equal = Punct::new('=', Spacing::Alone); + equal.set_span(span); + let mut literal = crate::Literal::string(comment); + literal.set_span(span); + let mut bracketed = TokenStreamBuilder::with_capacity(3); + bracketed.push_token_from_parser(TokenTree::Ident(doc_ident)); + bracketed.push_token_from_parser(TokenTree::Punct(equal)); + bracketed.push_token_from_parser(TokenTree::Literal(literal)); + let group = Group::new(Delimiter::Bracket, bracketed.build()); + let mut group = crate::Group::_new_fallback(group); + group.set_span(span); + trees.push_token_from_parser(TokenTree::Group(group)); + + Ok((rest, ())) +} + +fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> { + if input.starts_with("//!") { + let input = input.advance(3); + let (input, s) = take_until_newline_or_eof(input); + Ok((input, (s, true))) + } else if input.starts_with("/*!") { + let (input, s) = block_comment(input)?; + Ok((input, (&s[3..s.len() - 2], true))) + } else if input.starts_with("///") { + let input = input.advance(3); + if input.starts_with_char('/') { + return Err(Reject); + } + let (input, s) = take_until_newline_or_eof(input); + Ok((input, (s, false))) + } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') { + let (input, s) = block_comment(input)?; + Ok((input, (&s[3..s.len() - 2], false))) + } else { + Err(Reject) + } +} + +fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) { + let chars = input.char_indices(); + + for (i, ch) in chars { + if ch == '\n' { + return (input.advance(i), &input.rest[..i]); + } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') { + return (input.advance(i + 1), &input.rest[..i]); + } + } + + (input.advance(input.len()), input.rest) +} diff --git a/vendor/proc-macro2/src/rcvec.rs b/vendor/proc-macro2/src/rcvec.rs new file mode 100644 index 0000000..37955af --- /dev/null +++ b/vendor/proc-macro2/src/rcvec.rs @@ -0,0 +1,145 @@ +use alloc::rc::Rc; +use alloc::vec; +use core::mem; +use core::panic::RefUnwindSafe; +use core::slice; + +pub(crate) struct RcVec { + inner: Rc>, +} + +pub(crate) struct RcVecBuilder { + inner: Vec, +} + +pub(crate) struct RcVecMut<'a, T> { + inner: &'a mut Vec, +} + +#[derive(Clone)] +pub(crate) struct RcVecIntoIter { + inner: vec::IntoIter, +} + +impl RcVec { + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn len(&self) -> usize { + self.inner.len() + } + + pub fn iter(&self) -> slice::Iter { + self.inner.iter() + } + + pub fn make_mut(&mut self) -> RcVecMut + where + T: Clone, + { + RcVecMut { + inner: Rc::make_mut(&mut self.inner), + } + } + + pub fn get_mut(&mut self) -> Option> { + let inner = Rc::get_mut(&mut self.inner)?; + Some(RcVecMut { inner }) + } + + pub fn make_owned(mut self) -> RcVecBuilder + where + T: Clone, + { + let vec = if let Some(owned) = Rc::get_mut(&mut self.inner) { + mem::take(owned) + } else { + Vec::clone(&self.inner) + }; + RcVecBuilder { inner: vec } + } +} + +impl RcVecBuilder { + pub fn new() -> Self { + RcVecBuilder { inner: Vec::new() } + } + + pub fn with_capacity(cap: usize) -> Self { + RcVecBuilder { + inner: Vec::with_capacity(cap), + } + } + + pub fn push(&mut self, element: T) { + self.inner.push(element); + } + + pub fn extend(&mut self, iter: impl IntoIterator) { + self.inner.extend(iter); + } + + pub fn as_mut(&mut self) -> RcVecMut { + RcVecMut { + inner: &mut self.inner, + } + } + + pub fn build(self) -> RcVec { + RcVec { + inner: Rc::new(self.inner), + } + } +} + +impl<'a, T> RcVecMut<'a, T> { + pub fn push(&mut self, element: T) { + self.inner.push(element); + } + + pub fn extend(&mut self, iter: impl IntoIterator) { + self.inner.extend(iter); + } + + pub fn pop(&mut self) -> Option { + self.inner.pop() + } + + pub fn as_mut(&mut self) -> RcVecMut { + RcVecMut { inner: self.inner } + } +} + +impl Clone for RcVec { + fn clone(&self) -> Self { + RcVec { + inner: Rc::clone(&self.inner), + } + } +} + +impl IntoIterator for RcVecBuilder { + type Item = T; + type IntoIter = RcVecIntoIter; + + fn into_iter(self) -> Self::IntoIter { + RcVecIntoIter { + inner: self.inner.into_iter(), + } + } +} + +impl Iterator for RcVecIntoIter { + type Item = T; + + fn next(&mut self) -> Option { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl RefUnwindSafe for RcVec where T: RefUnwindSafe {} diff --git a/vendor/proc-macro2/src/wrapper.rs b/vendor/proc-macro2/src/wrapper.rs new file mode 100644 index 0000000..f5eb826 --- /dev/null +++ b/vendor/proc-macro2/src/wrapper.rs @@ -0,0 +1,930 @@ +use crate::detection::inside_proc_macro; +#[cfg(span_locations)] +use crate::location::LineColumn; +use crate::{fallback, Delimiter, Punct, Spacing, TokenTree}; +use core::fmt::{self, Debug, Display}; +use core::ops::RangeBounds; +use core::str::FromStr; +use std::panic; +#[cfg(super_unstable)] +use std::path::PathBuf; + +#[derive(Clone)] +pub(crate) enum TokenStream { + Compiler(DeferredTokenStream), + Fallback(fallback::TokenStream), +} + +// Work around https://github.com/rust-lang/rust/issues/65080. +// In `impl Extend for TokenStream` which is used heavily by quote, +// we hold on to the appended tokens and do proc_macro::TokenStream::extend as +// late as possible to batch together consecutive uses of the Extend impl. +#[derive(Clone)] +pub(crate) struct DeferredTokenStream { + stream: proc_macro::TokenStream, + extra: Vec, +} + +pub(crate) enum LexError { + Compiler(proc_macro::LexError), + Fallback(fallback::LexError), + + // Rustc was supposed to return a LexError, but it panicked instead. + // https://github.com/rust-lang/rust/issues/58736 + CompilerPanic, +} + +#[cold] +fn mismatch(line: u32) -> ! { + #[cfg(procmacro2_backtrace)] + { + let backtrace = std::backtrace::Backtrace::force_capture(); + panic!("compiler/fallback mismatch #{}\n\n{}", line, backtrace) + } + #[cfg(not(procmacro2_backtrace))] + { + panic!("compiler/fallback mismatch #{}", line) + } +} + +impl DeferredTokenStream { + fn new(stream: proc_macro::TokenStream) -> Self { + DeferredTokenStream { + stream, + extra: Vec::new(), + } + } + + fn is_empty(&self) -> bool { + self.stream.is_empty() && self.extra.is_empty() + } + + fn evaluate_now(&mut self) { + // If-check provides a fast short circuit for the common case of `extra` + // being empty, which saves a round trip over the proc macro bridge. + // Improves macro expansion time in winrt by 6% in debug mode. + if !self.extra.is_empty() { + self.stream.extend(self.extra.drain(..)); + } + } + + fn into_token_stream(mut self) -> proc_macro::TokenStream { + self.evaluate_now(); + self.stream + } +} + +impl TokenStream { + pub fn new() -> Self { + if inside_proc_macro() { + TokenStream::Compiler(DeferredTokenStream::new(proc_macro::TokenStream::new())) + } else { + TokenStream::Fallback(fallback::TokenStream::new()) + } + } + + pub fn is_empty(&self) -> bool { + match self { + TokenStream::Compiler(tts) => tts.is_empty(), + TokenStream::Fallback(tts) => tts.is_empty(), + } + } + + fn unwrap_nightly(self) -> proc_macro::TokenStream { + match self { + TokenStream::Compiler(s) => s.into_token_stream(), + TokenStream::Fallback(_) => mismatch(line!()), + } + } + + fn unwrap_stable(self) -> fallback::TokenStream { + match self { + TokenStream::Compiler(_) => mismatch(line!()), + TokenStream::Fallback(s) => s, + } + } +} + +impl FromStr for TokenStream { + type Err = LexError; + + fn from_str(src: &str) -> Result { + if inside_proc_macro() { + Ok(TokenStream::Compiler(DeferredTokenStream::new( + proc_macro_parse(src)?, + ))) + } else { + Ok(TokenStream::Fallback(src.parse()?)) + } + } +} + +// Work around https://github.com/rust-lang/rust/issues/58736. +fn proc_macro_parse(src: &str) -> Result { + let result = panic::catch_unwind(|| src.parse().map_err(LexError::Compiler)); + result.unwrap_or_else(|_| Err(LexError::CompilerPanic)) +} + +impl Display for TokenStream { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TokenStream::Compiler(tts) => Display::fmt(&tts.clone().into_token_stream(), f), + TokenStream::Fallback(tts) => Display::fmt(tts, f), + } + } +} + +impl From for TokenStream { + fn from(inner: proc_macro::TokenStream) -> Self { + TokenStream::Compiler(DeferredTokenStream::new(inner)) + } +} + +impl From for proc_macro::TokenStream { + fn from(inner: TokenStream) -> Self { + match inner { + TokenStream::Compiler(inner) => inner.into_token_stream(), + TokenStream::Fallback(inner) => inner.to_string().parse().unwrap(), + } + } +} + +impl From for TokenStream { + fn from(inner: fallback::TokenStream) -> Self { + TokenStream::Fallback(inner) + } +} + +// Assumes inside_proc_macro(). +fn into_compiler_token(token: TokenTree) -> proc_macro::TokenTree { + match token { + TokenTree::Group(tt) => tt.inner.unwrap_nightly().into(), + TokenTree::Punct(tt) => { + let spacing = match tt.spacing() { + Spacing::Joint => proc_macro::Spacing::Joint, + Spacing::Alone => proc_macro::Spacing::Alone, + }; + let mut punct = proc_macro::Punct::new(tt.as_char(), spacing); + punct.set_span(tt.span().inner.unwrap_nightly()); + punct.into() + } + TokenTree::Ident(tt) => tt.inner.unwrap_nightly().into(), + TokenTree::Literal(tt) => tt.inner.unwrap_nightly().into(), + } +} + +impl From for TokenStream { + fn from(token: TokenTree) -> Self { + if inside_proc_macro() { + TokenStream::Compiler(DeferredTokenStream::new(into_compiler_token(token).into())) + } else { + TokenStream::Fallback(token.into()) + } + } +} + +impl FromIterator for TokenStream { + fn from_iter>(trees: I) -> Self { + if inside_proc_macro() { + TokenStream::Compiler(DeferredTokenStream::new( + trees.into_iter().map(into_compiler_token).collect(), + )) + } else { + TokenStream::Fallback(trees.into_iter().collect()) + } + } +} + +impl FromIterator for TokenStream { + fn from_iter>(streams: I) -> Self { + let mut streams = streams.into_iter(); + match streams.next() { + Some(TokenStream::Compiler(mut first)) => { + first.evaluate_now(); + first.stream.extend(streams.map(|s| match s { + TokenStream::Compiler(s) => s.into_token_stream(), + TokenStream::Fallback(_) => mismatch(line!()), + })); + TokenStream::Compiler(first) + } + Some(TokenStream::Fallback(mut first)) => { + first.extend(streams.map(|s| match s { + TokenStream::Fallback(s) => s, + TokenStream::Compiler(_) => mismatch(line!()), + })); + TokenStream::Fallback(first) + } + None => TokenStream::new(), + } + } +} + +impl Extend for TokenStream { + fn extend>(&mut self, stream: I) { + match self { + TokenStream::Compiler(tts) => { + // Here is the reason for DeferredTokenStream. + for token in stream { + tts.extra.push(into_compiler_token(token)); + } + } + TokenStream::Fallback(tts) => tts.extend(stream), + } + } +} + +impl Extend for TokenStream { + fn extend>(&mut self, streams: I) { + match self { + TokenStream::Compiler(tts) => { + tts.evaluate_now(); + tts.stream + .extend(streams.into_iter().map(TokenStream::unwrap_nightly)); + } + TokenStream::Fallback(tts) => { + tts.extend(streams.into_iter().map(TokenStream::unwrap_stable)); + } + } + } +} + +impl Debug for TokenStream { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TokenStream::Compiler(tts) => Debug::fmt(&tts.clone().into_token_stream(), f), + TokenStream::Fallback(tts) => Debug::fmt(tts, f), + } + } +} + +impl LexError { + pub(crate) fn span(&self) -> Span { + match self { + LexError::Compiler(_) | LexError::CompilerPanic => Span::call_site(), + LexError::Fallback(e) => Span::Fallback(e.span()), + } + } +} + +impl From for LexError { + fn from(e: proc_macro::LexError) -> Self { + LexError::Compiler(e) + } +} + +impl From for LexError { + fn from(e: fallback::LexError) -> Self { + LexError::Fallback(e) + } +} + +impl Debug for LexError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + LexError::Compiler(e) => Debug::fmt(e, f), + LexError::Fallback(e) => Debug::fmt(e, f), + LexError::CompilerPanic => { + let fallback = fallback::LexError::call_site(); + Debug::fmt(&fallback, f) + } + } + } +} + +impl Display for LexError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + LexError::Compiler(e) => Display::fmt(e, f), + LexError::Fallback(e) => Display::fmt(e, f), + LexError::CompilerPanic => { + let fallback = fallback::LexError::call_site(); + Display::fmt(&fallback, f) + } + } + } +} + +#[derive(Clone)] +pub(crate) enum TokenTreeIter { + Compiler(proc_macro::token_stream::IntoIter), + Fallback(fallback::TokenTreeIter), +} + +impl IntoIterator for TokenStream { + type Item = TokenTree; + type IntoIter = TokenTreeIter; + + fn into_iter(self) -> TokenTreeIter { + match self { + TokenStream::Compiler(tts) => { + TokenTreeIter::Compiler(tts.into_token_stream().into_iter()) + } + TokenStream::Fallback(tts) => TokenTreeIter::Fallback(tts.into_iter()), + } + } +} + +impl Iterator for TokenTreeIter { + type Item = TokenTree; + + fn next(&mut self) -> Option { + let token = match self { + TokenTreeIter::Compiler(iter) => iter.next()?, + TokenTreeIter::Fallback(iter) => return iter.next(), + }; + Some(match token { + proc_macro::TokenTree::Group(tt) => crate::Group::_new(Group::Compiler(tt)).into(), + proc_macro::TokenTree::Punct(tt) => { + let spacing = match tt.spacing() { + proc_macro::Spacing::Joint => Spacing::Joint, + proc_macro::Spacing::Alone => Spacing::Alone, + }; + let mut o = Punct::new(tt.as_char(), spacing); + o.set_span(crate::Span::_new(Span::Compiler(tt.span()))); + o.into() + } + proc_macro::TokenTree::Ident(s) => crate::Ident::_new(Ident::Compiler(s)).into(), + proc_macro::TokenTree::Literal(l) => crate::Literal::_new(Literal::Compiler(l)).into(), + }) + } + + fn size_hint(&self) -> (usize, Option) { + match self { + TokenTreeIter::Compiler(tts) => tts.size_hint(), + TokenTreeIter::Fallback(tts) => tts.size_hint(), + } + } +} + +#[derive(Clone, PartialEq, Eq)] +#[cfg(super_unstable)] +pub(crate) enum SourceFile { + Compiler(proc_macro::SourceFile), + Fallback(fallback::SourceFile), +} + +#[cfg(super_unstable)] +impl SourceFile { + fn nightly(sf: proc_macro::SourceFile) -> Self { + SourceFile::Compiler(sf) + } + + /// Get the path to this source file as a string. + pub fn path(&self) -> PathBuf { + match self { + SourceFile::Compiler(a) => a.path(), + SourceFile::Fallback(a) => a.path(), + } + } + + pub fn is_real(&self) -> bool { + match self { + SourceFile::Compiler(a) => a.is_real(), + SourceFile::Fallback(a) => a.is_real(), + } + } +} + +#[cfg(super_unstable)] +impl Debug for SourceFile { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SourceFile::Compiler(a) => Debug::fmt(a, f), + SourceFile::Fallback(a) => Debug::fmt(a, f), + } + } +} + +#[derive(Copy, Clone)] +pub(crate) enum Span { + Compiler(proc_macro::Span), + Fallback(fallback::Span), +} + +impl Span { + pub fn call_site() -> Self { + if inside_proc_macro() { + Span::Compiler(proc_macro::Span::call_site()) + } else { + Span::Fallback(fallback::Span::call_site()) + } + } + + pub fn mixed_site() -> Self { + if inside_proc_macro() { + Span::Compiler(proc_macro::Span::mixed_site()) + } else { + Span::Fallback(fallback::Span::mixed_site()) + } + } + + #[cfg(super_unstable)] + pub fn def_site() -> Self { + if inside_proc_macro() { + Span::Compiler(proc_macro::Span::def_site()) + } else { + Span::Fallback(fallback::Span::def_site()) + } + } + + pub fn resolved_at(&self, other: Span) -> Span { + match (self, other) { + (Span::Compiler(a), Span::Compiler(b)) => Span::Compiler(a.resolved_at(b)), + (Span::Fallback(a), Span::Fallback(b)) => Span::Fallback(a.resolved_at(b)), + (Span::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Span::Fallback(_), Span::Compiler(_)) => mismatch(line!()), + } + } + + pub fn located_at(&self, other: Span) -> Span { + match (self, other) { + (Span::Compiler(a), Span::Compiler(b)) => Span::Compiler(a.located_at(b)), + (Span::Fallback(a), Span::Fallback(b)) => Span::Fallback(a.located_at(b)), + (Span::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Span::Fallback(_), Span::Compiler(_)) => mismatch(line!()), + } + } + + pub fn unwrap(self) -> proc_macro::Span { + match self { + Span::Compiler(s) => s, + Span::Fallback(_) => panic!("proc_macro::Span is only available in procedural macros"), + } + } + + #[cfg(super_unstable)] + pub fn source_file(&self) -> SourceFile { + match self { + Span::Compiler(s) => SourceFile::nightly(s.source_file()), + Span::Fallback(s) => SourceFile::Fallback(s.source_file()), + } + } + + #[cfg(span_locations)] + pub fn start(&self) -> LineColumn { + match self { + Span::Compiler(_) => LineColumn { line: 0, column: 0 }, + Span::Fallback(s) => s.start(), + } + } + + #[cfg(span_locations)] + pub fn end(&self) -> LineColumn { + match self { + Span::Compiler(_) => LineColumn { line: 0, column: 0 }, + Span::Fallback(s) => s.end(), + } + } + + pub fn join(&self, other: Span) -> Option { + let ret = match (self, other) { + #[cfg(proc_macro_span)] + (Span::Compiler(a), Span::Compiler(b)) => Span::Compiler(a.join(b)?), + (Span::Fallback(a), Span::Fallback(b)) => Span::Fallback(a.join(b)?), + _ => return None, + }; + Some(ret) + } + + #[cfg(super_unstable)] + pub fn eq(&self, other: &Span) -> bool { + match (self, other) { + (Span::Compiler(a), Span::Compiler(b)) => a.eq(b), + (Span::Fallback(a), Span::Fallback(b)) => a.eq(b), + _ => false, + } + } + + pub fn source_text(&self) -> Option { + match self { + #[cfg(not(no_source_text))] + Span::Compiler(s) => s.source_text(), + #[cfg(no_source_text)] + Span::Compiler(_) => None, + Span::Fallback(s) => s.source_text(), + } + } + + fn unwrap_nightly(self) -> proc_macro::Span { + match self { + Span::Compiler(s) => s, + Span::Fallback(_) => mismatch(line!()), + } + } +} + +impl From for crate::Span { + fn from(proc_span: proc_macro::Span) -> Self { + crate::Span::_new(Span::Compiler(proc_span)) + } +} + +impl From for Span { + fn from(inner: fallback::Span) -> Self { + Span::Fallback(inner) + } +} + +impl Debug for Span { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Span::Compiler(s) => Debug::fmt(s, f), + Span::Fallback(s) => Debug::fmt(s, f), + } + } +} + +pub(crate) fn debug_span_field_if_nontrivial(debug: &mut fmt::DebugStruct, span: Span) { + match span { + Span::Compiler(s) => { + debug.field("span", &s); + } + Span::Fallback(s) => fallback::debug_span_field_if_nontrivial(debug, s), + } +} + +#[derive(Clone)] +pub(crate) enum Group { + Compiler(proc_macro::Group), + Fallback(fallback::Group), +} + +impl Group { + pub fn new(delimiter: Delimiter, stream: TokenStream) -> Self { + match stream { + TokenStream::Compiler(tts) => { + let delimiter = match delimiter { + Delimiter::Parenthesis => proc_macro::Delimiter::Parenthesis, + Delimiter::Bracket => proc_macro::Delimiter::Bracket, + Delimiter::Brace => proc_macro::Delimiter::Brace, + Delimiter::None => proc_macro::Delimiter::None, + }; + Group::Compiler(proc_macro::Group::new(delimiter, tts.into_token_stream())) + } + TokenStream::Fallback(stream) => { + Group::Fallback(fallback::Group::new(delimiter, stream)) + } + } + } + + pub fn delimiter(&self) -> Delimiter { + match self { + Group::Compiler(g) => match g.delimiter() { + proc_macro::Delimiter::Parenthesis => Delimiter::Parenthesis, + proc_macro::Delimiter::Bracket => Delimiter::Bracket, + proc_macro::Delimiter::Brace => Delimiter::Brace, + proc_macro::Delimiter::None => Delimiter::None, + }, + Group::Fallback(g) => g.delimiter(), + } + } + + pub fn stream(&self) -> TokenStream { + match self { + Group::Compiler(g) => TokenStream::Compiler(DeferredTokenStream::new(g.stream())), + Group::Fallback(g) => TokenStream::Fallback(g.stream()), + } + } + + pub fn span(&self) -> Span { + match self { + Group::Compiler(g) => Span::Compiler(g.span()), + Group::Fallback(g) => Span::Fallback(g.span()), + } + } + + pub fn span_open(&self) -> Span { + match self { + Group::Compiler(g) => Span::Compiler(g.span_open()), + Group::Fallback(g) => Span::Fallback(g.span_open()), + } + } + + pub fn span_close(&self) -> Span { + match self { + Group::Compiler(g) => Span::Compiler(g.span_close()), + Group::Fallback(g) => Span::Fallback(g.span_close()), + } + } + + pub fn set_span(&mut self, span: Span) { + match (self, span) { + (Group::Compiler(g), Span::Compiler(s)) => g.set_span(s), + (Group::Fallback(g), Span::Fallback(s)) => g.set_span(s), + (Group::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Group::Fallback(_), Span::Compiler(_)) => mismatch(line!()), + } + } + + fn unwrap_nightly(self) -> proc_macro::Group { + match self { + Group::Compiler(g) => g, + Group::Fallback(_) => mismatch(line!()), + } + } +} + +impl From for Group { + fn from(g: fallback::Group) -> Self { + Group::Fallback(g) + } +} + +impl Display for Group { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self { + Group::Compiler(group) => Display::fmt(group, formatter), + Group::Fallback(group) => Display::fmt(group, formatter), + } + } +} + +impl Debug for Group { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self { + Group::Compiler(group) => Debug::fmt(group, formatter), + Group::Fallback(group) => Debug::fmt(group, formatter), + } + } +} + +#[derive(Clone)] +pub(crate) enum Ident { + Compiler(proc_macro::Ident), + Fallback(fallback::Ident), +} + +impl Ident { + #[track_caller] + pub fn new_checked(string: &str, span: Span) -> Self { + match span { + Span::Compiler(s) => Ident::Compiler(proc_macro::Ident::new(string, s)), + Span::Fallback(s) => Ident::Fallback(fallback::Ident::new_checked(string, s)), + } + } + + pub fn new_unchecked(string: &str, span: fallback::Span) -> Self { + Ident::Fallback(fallback::Ident::new_unchecked(string, span)) + } + + #[track_caller] + pub fn new_raw_checked(string: &str, span: Span) -> Self { + match span { + Span::Compiler(s) => Ident::Compiler(proc_macro::Ident::new_raw(string, s)), + Span::Fallback(s) => Ident::Fallback(fallback::Ident::new_raw_checked(string, s)), + } + } + + pub fn new_raw_unchecked(string: &str, span: fallback::Span) -> Self { + Ident::Fallback(fallback::Ident::new_raw_unchecked(string, span)) + } + + pub fn span(&self) -> Span { + match self { + Ident::Compiler(t) => Span::Compiler(t.span()), + Ident::Fallback(t) => Span::Fallback(t.span()), + } + } + + pub fn set_span(&mut self, span: Span) { + match (self, span) { + (Ident::Compiler(t), Span::Compiler(s)) => t.set_span(s), + (Ident::Fallback(t), Span::Fallback(s)) => t.set_span(s), + (Ident::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Ident::Fallback(_), Span::Compiler(_)) => mismatch(line!()), + } + } + + fn unwrap_nightly(self) -> proc_macro::Ident { + match self { + Ident::Compiler(s) => s, + Ident::Fallback(_) => mismatch(line!()), + } + } +} + +impl PartialEq for Ident { + fn eq(&self, other: &Ident) -> bool { + match (self, other) { + (Ident::Compiler(t), Ident::Compiler(o)) => t.to_string() == o.to_string(), + (Ident::Fallback(t), Ident::Fallback(o)) => t == o, + (Ident::Compiler(_), Ident::Fallback(_)) => mismatch(line!()), + (Ident::Fallback(_), Ident::Compiler(_)) => mismatch(line!()), + } + } +} + +impl PartialEq for Ident +where + T: ?Sized + AsRef, +{ + fn eq(&self, other: &T) -> bool { + let other = other.as_ref(); + match self { + Ident::Compiler(t) => t.to_string() == other, + Ident::Fallback(t) => t == other, + } + } +} + +impl Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Ident::Compiler(t) => Display::fmt(t, f), + Ident::Fallback(t) => Display::fmt(t, f), + } + } +} + +impl Debug for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Ident::Compiler(t) => Debug::fmt(t, f), + Ident::Fallback(t) => Debug::fmt(t, f), + } + } +} + +#[derive(Clone)] +pub(crate) enum Literal { + Compiler(proc_macro::Literal), + Fallback(fallback::Literal), +} + +macro_rules! suffixed_numbers { + ($($name:ident => $kind:ident,)*) => ($( + pub fn $name(n: $kind) -> Literal { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::$name(n)) + } else { + Literal::Fallback(fallback::Literal::$name(n)) + } + } + )*) +} + +macro_rules! unsuffixed_integers { + ($($name:ident => $kind:ident,)*) => ($( + pub fn $name(n: $kind) -> Literal { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::$name(n)) + } else { + Literal::Fallback(fallback::Literal::$name(n)) + } + } + )*) +} + +impl Literal { + pub unsafe fn from_str_unchecked(repr: &str) -> Self { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::from_str(repr).expect("invalid literal")) + } else { + Literal::Fallback(unsafe { fallback::Literal::from_str_unchecked(repr) }) + } + } + + suffixed_numbers! { + u8_suffixed => u8, + u16_suffixed => u16, + u32_suffixed => u32, + u64_suffixed => u64, + u128_suffixed => u128, + usize_suffixed => usize, + i8_suffixed => i8, + i16_suffixed => i16, + i32_suffixed => i32, + i64_suffixed => i64, + i128_suffixed => i128, + isize_suffixed => isize, + + f32_suffixed => f32, + f64_suffixed => f64, + } + + unsuffixed_integers! { + u8_unsuffixed => u8, + u16_unsuffixed => u16, + u32_unsuffixed => u32, + u64_unsuffixed => u64, + u128_unsuffixed => u128, + usize_unsuffixed => usize, + i8_unsuffixed => i8, + i16_unsuffixed => i16, + i32_unsuffixed => i32, + i64_unsuffixed => i64, + i128_unsuffixed => i128, + isize_unsuffixed => isize, + } + + pub fn f32_unsuffixed(f: f32) -> Literal { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::f32_unsuffixed(f)) + } else { + Literal::Fallback(fallback::Literal::f32_unsuffixed(f)) + } + } + + pub fn f64_unsuffixed(f: f64) -> Literal { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::f64_unsuffixed(f)) + } else { + Literal::Fallback(fallback::Literal::f64_unsuffixed(f)) + } + } + + pub fn string(t: &str) -> Literal { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::string(t)) + } else { + Literal::Fallback(fallback::Literal::string(t)) + } + } + + pub fn character(t: char) -> Literal { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::character(t)) + } else { + Literal::Fallback(fallback::Literal::character(t)) + } + } + + pub fn byte_string(bytes: &[u8]) -> Literal { + if inside_proc_macro() { + Literal::Compiler(proc_macro::Literal::byte_string(bytes)) + } else { + Literal::Fallback(fallback::Literal::byte_string(bytes)) + } + } + + pub fn span(&self) -> Span { + match self { + Literal::Compiler(lit) => Span::Compiler(lit.span()), + Literal::Fallback(lit) => Span::Fallback(lit.span()), + } + } + + pub fn set_span(&mut self, span: Span) { + match (self, span) { + (Literal::Compiler(lit), Span::Compiler(s)) => lit.set_span(s), + (Literal::Fallback(lit), Span::Fallback(s)) => lit.set_span(s), + (Literal::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Literal::Fallback(_), Span::Compiler(_)) => mismatch(line!()), + } + } + + pub fn subspan>(&self, range: R) -> Option { + match self { + #[cfg(proc_macro_span)] + Literal::Compiler(lit) => lit.subspan(range).map(Span::Compiler), + #[cfg(not(proc_macro_span))] + Literal::Compiler(_lit) => None, + Literal::Fallback(lit) => lit.subspan(range).map(Span::Fallback), + } + } + + fn unwrap_nightly(self) -> proc_macro::Literal { + match self { + Literal::Compiler(s) => s, + Literal::Fallback(_) => mismatch(line!()), + } + } +} + +impl From for Literal { + fn from(s: fallback::Literal) -> Self { + Literal::Fallback(s) + } +} + +impl FromStr for Literal { + type Err = LexError; + + fn from_str(repr: &str) -> Result { + if inside_proc_macro() { + let literal = proc_macro::Literal::from_str(repr)?; + Ok(Literal::Compiler(literal)) + } else { + let literal = fallback::Literal::from_str(repr)?; + Ok(Literal::Fallback(literal)) + } + } +} + +impl Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Literal::Compiler(t) => Display::fmt(t, f), + Literal::Fallback(t) => Display::fmt(t, f), + } + } +} + +impl Debug for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Literal::Compiler(t) => Debug::fmt(t, f), + Literal::Fallback(t) => Debug::fmt(t, f), + } + } +} diff --git a/vendor/proc-macro2/tests/comments.rs b/vendor/proc-macro2/tests/comments.rs new file mode 100644 index 0000000..4f7236d --- /dev/null +++ b/vendor/proc-macro2/tests/comments.rs @@ -0,0 +1,105 @@ +#![allow(clippy::assertions_on_result_states)] + +use proc_macro2::{Delimiter, Literal, Spacing, TokenStream, TokenTree}; + +// #[doc = "..."] -> "..." +fn lit_of_outer_doc_comment(tokens: &TokenStream) -> Literal { + lit_of_doc_comment(tokens, false) +} + +// #![doc = "..."] -> "..." +fn lit_of_inner_doc_comment(tokens: &TokenStream) -> Literal { + lit_of_doc_comment(tokens, true) +} + +fn lit_of_doc_comment(tokens: &TokenStream, inner: bool) -> Literal { + let mut iter = tokens.clone().into_iter(); + match iter.next().unwrap() { + TokenTree::Punct(punct) => { + assert_eq!(punct.as_char(), '#'); + assert_eq!(punct.spacing(), Spacing::Alone); + } + _ => panic!("wrong token {:?}", tokens), + } + if inner { + match iter.next().unwrap() { + TokenTree::Punct(punct) => { + assert_eq!(punct.as_char(), '!'); + assert_eq!(punct.spacing(), Spacing::Alone); + } + _ => panic!("wrong token {:?}", tokens), + } + } + iter = match iter.next().unwrap() { + TokenTree::Group(group) => { + assert_eq!(group.delimiter(), Delimiter::Bracket); + assert!(iter.next().is_none(), "unexpected token {:?}", tokens); + group.stream().into_iter() + } + _ => panic!("wrong token {:?}", tokens), + }; + match iter.next().unwrap() { + TokenTree::Ident(ident) => assert_eq!(ident.to_string(), "doc"), + _ => panic!("wrong token {:?}", tokens), + } + match iter.next().unwrap() { + TokenTree::Punct(punct) => { + assert_eq!(punct.as_char(), '='); + assert_eq!(punct.spacing(), Spacing::Alone); + } + _ => panic!("wrong token {:?}", tokens), + } + match iter.next().unwrap() { + TokenTree::Literal(literal) => { + assert!(iter.next().is_none(), "unexpected token {:?}", tokens); + literal + } + _ => panic!("wrong token {:?}", tokens), + } +} + +#[test] +fn closed_immediately() { + let stream = "/**/".parse::().unwrap(); + let tokens = stream.into_iter().collect::>(); + assert!(tokens.is_empty(), "not empty -- {:?}", tokens); +} + +#[test] +fn incomplete() { + assert!("/*/".parse::().is_err()); +} + +#[test] +fn lit() { + let stream = "/// doc".parse::().unwrap(); + let lit = lit_of_outer_doc_comment(&stream); + assert_eq!(lit.to_string(), "\" doc\""); + + let stream = "//! doc".parse::().unwrap(); + let lit = lit_of_inner_doc_comment(&stream); + assert_eq!(lit.to_string(), "\" doc\""); + + let stream = "/** doc */".parse::().unwrap(); + let lit = lit_of_outer_doc_comment(&stream); + assert_eq!(lit.to_string(), "\" doc \""); + + let stream = "/*! doc */".parse::().unwrap(); + let lit = lit_of_inner_doc_comment(&stream); + assert_eq!(lit.to_string(), "\" doc \""); +} + +#[test] +fn carriage_return() { + let stream = "///\r\n".parse::().unwrap(); + let lit = lit_of_outer_doc_comment(&stream); + assert_eq!(lit.to_string(), "\"\""); + + let stream = "/**\r\n*/".parse::().unwrap(); + let lit = lit_of_outer_doc_comment(&stream); + assert_eq!(lit.to_string(), "\"\\r\\n\""); + + "///\r".parse::().unwrap_err(); + "///\r \n".parse::().unwrap_err(); + "/**\r \n*/".parse::().unwrap_err(); +} diff --git a/vendor/proc-macro2/tests/features.rs b/vendor/proc-macro2/tests/features.rs new file mode 100644 index 0000000..073f6e6 --- /dev/null +++ b/vendor/proc-macro2/tests/features.rs @@ -0,0 +1,8 @@ +#[test] +#[ignore] +fn make_sure_no_proc_macro() { + assert!( + !cfg!(feature = "proc-macro"), + "still compiled with proc_macro?" + ); +} diff --git a/vendor/proc-macro2/tests/marker.rs b/vendor/proc-macro2/tests/marker.rs new file mode 100644 index 0000000..d08fbfc --- /dev/null +++ b/vendor/proc-macro2/tests/marker.rs @@ -0,0 +1,99 @@ +#![allow(clippy::extra_unused_type_parameters)] + +use proc_macro2::{ + Delimiter, Group, Ident, LexError, Literal, Punct, Spacing, Span, TokenStream, TokenTree, +}; + +macro_rules! assert_impl { + ($ty:ident is $($marker:ident) and +) => { + #[test] + #[allow(non_snake_case)] + fn $ty() { + fn assert_implemented() {} + assert_implemented::<$ty>(); + } + }; + + ($ty:ident is not $($marker:ident) or +) => { + #[test] + #[allow(non_snake_case)] + fn $ty() { + $( + { + // Implemented for types that implement $marker. + trait IsNotImplemented { + fn assert_not_implemented() {} + } + impl IsNotImplemented for T {} + + // Implemented for the type being tested. + trait IsImplemented { + fn assert_not_implemented() {} + } + impl IsImplemented for $ty {} + + // If $ty does not implement $marker, there is no ambiguity + // in the following trait method call. + <$ty>::assert_not_implemented(); + } + )+ + } + }; +} + +assert_impl!(Delimiter is Send and Sync); +assert_impl!(Spacing is Send and Sync); + +assert_impl!(Group is not Send or Sync); +assert_impl!(Ident is not Send or Sync); +assert_impl!(LexError is not Send or Sync); +assert_impl!(Literal is not Send or Sync); +assert_impl!(Punct is not Send or Sync); +assert_impl!(Span is not Send or Sync); +assert_impl!(TokenStream is not Send or Sync); +assert_impl!(TokenTree is not Send or Sync); + +#[cfg(procmacro2_semver_exempt)] +mod semver_exempt { + use proc_macro2::{LineColumn, SourceFile}; + + assert_impl!(LineColumn is Send and Sync); + + assert_impl!(SourceFile is not Send or Sync); +} + +mod unwind_safe { + use proc_macro2::{ + Delimiter, Group, Ident, LexError, Literal, Punct, Spacing, Span, TokenStream, TokenTree, + }; + #[cfg(procmacro2_semver_exempt)] + use proc_macro2::{LineColumn, SourceFile}; + use std::panic::{RefUnwindSafe, UnwindSafe}; + + macro_rules! assert_unwind_safe { + ($($types:ident)*) => { + $( + assert_impl!($types is UnwindSafe and RefUnwindSafe); + )* + }; + } + + assert_unwind_safe! { + Delimiter + Group + Ident + LexError + Literal + Punct + Spacing + Span + TokenStream + TokenTree + } + + #[cfg(procmacro2_semver_exempt)] + assert_unwind_safe! { + LineColumn + SourceFile + } +} diff --git a/vendor/proc-macro2/tests/test.rs b/vendor/proc-macro2/tests/test.rs new file mode 100644 index 0000000..b75cd55 --- /dev/null +++ b/vendor/proc-macro2/tests/test.rs @@ -0,0 +1,759 @@ +#![allow( + clippy::assertions_on_result_states, + clippy::items_after_statements, + clippy::non_ascii_literal, + clippy::octal_escapes +)] + +use proc_macro2::{Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree}; +use std::iter; +use std::str::{self, FromStr}; + +#[test] +fn idents() { + assert_eq!( + Ident::new("String", Span::call_site()).to_string(), + "String" + ); + assert_eq!(Ident::new("fn", Span::call_site()).to_string(), "fn"); + assert_eq!(Ident::new("_", Span::call_site()).to_string(), "_"); +} + +#[test] +fn raw_idents() { + assert_eq!( + Ident::new_raw("String", Span::call_site()).to_string(), + "r#String" + ); + assert_eq!(Ident::new_raw("fn", Span::call_site()).to_string(), "r#fn"); +} + +#[test] +#[should_panic(expected = "`r#_` cannot be a raw identifier")] +fn ident_raw_underscore() { + Ident::new_raw("_", Span::call_site()); +} + +#[test] +#[should_panic(expected = "`r#super` cannot be a raw identifier")] +fn ident_raw_reserved() { + Ident::new_raw("super", Span::call_site()); +} + +#[test] +#[should_panic(expected = "Ident is not allowed to be empty; use Option")] +fn ident_empty() { + Ident::new("", Span::call_site()); +} + +#[test] +#[should_panic(expected = "Ident cannot be a number; use Literal instead")] +fn ident_number() { + Ident::new("255", Span::call_site()); +} + +#[test] +#[should_panic(expected = "\"a#\" is not a valid Ident")] +fn ident_invalid() { + Ident::new("a#", Span::call_site()); +} + +#[test] +#[should_panic(expected = "not a valid Ident")] +fn raw_ident_empty() { + Ident::new("r#", Span::call_site()); +} + +#[test] +#[should_panic(expected = "not a valid Ident")] +fn raw_ident_number() { + Ident::new("r#255", Span::call_site()); +} + +#[test] +#[should_panic(expected = "\"r#a#\" is not a valid Ident")] +fn raw_ident_invalid() { + Ident::new("r#a#", Span::call_site()); +} + +#[test] +#[should_panic(expected = "not a valid Ident")] +fn lifetime_empty() { + Ident::new("'", Span::call_site()); +} + +#[test] +#[should_panic(expected = "not a valid Ident")] +fn lifetime_number() { + Ident::new("'255", Span::call_site()); +} + +#[test] +#[should_panic(expected = r#""'a#" is not a valid Ident"#)] +fn lifetime_invalid() { + Ident::new("'a#", Span::call_site()); +} + +#[test] +fn literal_string() { + assert_eq!(Literal::string("foo").to_string(), "\"foo\""); + assert_eq!(Literal::string("\"").to_string(), "\"\\\"\""); + assert_eq!(Literal::string("didn't").to_string(), "\"didn't\""); + assert_eq!( + Literal::string("a\00b\07c\08d\0e\0").to_string(), + "\"a\\x000b\\x007c\\08d\\0e\\0\"", + ); + + "\"\\\r\n x\"".parse::().unwrap(); + "\"\\\r\n \rx\"".parse::().unwrap_err(); +} + +#[test] +fn literal_raw_string() { + "r\"\r\n\"".parse::().unwrap(); + + fn raw_string_literal_with_hashes(n: usize) -> String { + let mut literal = String::new(); + literal.push('r'); + literal.extend(iter::repeat('#').take(n)); + literal.push('"'); + literal.push('"'); + literal.extend(iter::repeat('#').take(n)); + literal + } + + raw_string_literal_with_hashes(255) + .parse::() + .unwrap(); + + // https://github.com/rust-lang/rust/pull/95251 + raw_string_literal_with_hashes(256) + .parse::() + .unwrap_err(); +} + +#[test] +fn literal_byte_string() { + assert_eq!(Literal::byte_string(b"").to_string(), "b\"\""); + assert_eq!( + Literal::byte_string(b"\0\t\n\r\"\\2\x10").to_string(), + "b\"\\0\\t\\n\\r\\\"\\\\2\\x10\"", + ); + assert_eq!( + Literal::byte_string(b"a\00b\07c\08d\0e\0").to_string(), + "b\"a\\x000b\\x007c\\08d\\0e\\0\"", + ); + + "b\"\\\r\n x\"".parse::().unwrap(); + "b\"\\\r\n \rx\"".parse::().unwrap_err(); + "b\"\\\r\n \u{a0}x\"".parse::().unwrap_err(); + "br\"\u{a0}\"".parse::().unwrap_err(); +} + +#[test] +fn literal_c_string() { + let strings = r###" + c"hello\x80我叫\u{1F980}" // from the RFC + cr"\" + cr##"Hello "world"!"## + c"\t\n\r\"\\" + "###; + + let mut tokens = strings.parse::().unwrap().into_iter(); + + for expected in &[ + r#"c"hello\x80我叫\u{1F980}""#, + r#"cr"\""#, + r###"cr##"Hello "world"!"##"###, + r#"c"\t\n\r\"\\""#, + ] { + match tokens.next().unwrap() { + TokenTree::Literal(literal) => { + assert_eq!(literal.to_string(), *expected); + } + unexpected => panic!("unexpected token: {:?}", unexpected), + } + } + + if let Some(unexpected) = tokens.next() { + panic!("unexpected token: {:?}", unexpected); + } + + for invalid in &[r#"c"\0""#, r#"c"\x00""#, r#"c"\u{0}""#, "c\"\0\""] { + if let Ok(unexpected) = invalid.parse::() { + panic!("unexpected token: {:?}", unexpected); + } + } +} + +#[test] +fn literal_character() { + assert_eq!(Literal::character('x').to_string(), "'x'"); + assert_eq!(Literal::character('\'').to_string(), "'\\''"); + assert_eq!(Literal::character('"').to_string(), "'\"'"); +} + +#[test] +fn literal_integer() { + assert_eq!(Literal::u8_suffixed(10).to_string(), "10u8"); + assert_eq!(Literal::u16_suffixed(10).to_string(), "10u16"); + assert_eq!(Literal::u32_suffixed(10).to_string(), "10u32"); + assert_eq!(Literal::u64_suffixed(10).to_string(), "10u64"); + assert_eq!(Literal::u128_suffixed(10).to_string(), "10u128"); + assert_eq!(Literal::usize_suffixed(10).to_string(), "10usize"); + + assert_eq!(Literal::i8_suffixed(10).to_string(), "10i8"); + assert_eq!(Literal::i16_suffixed(10).to_string(), "10i16"); + assert_eq!(Literal::i32_suffixed(10).to_string(), "10i32"); + assert_eq!(Literal::i64_suffixed(10).to_string(), "10i64"); + assert_eq!(Literal::i128_suffixed(10).to_string(), "10i128"); + assert_eq!(Literal::isize_suffixed(10).to_string(), "10isize"); + + assert_eq!(Literal::u8_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::u16_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::u32_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::u64_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::u128_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::usize_unsuffixed(10).to_string(), "10"); + + assert_eq!(Literal::i8_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::i16_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::i32_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::i64_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::i128_unsuffixed(10).to_string(), "10"); + assert_eq!(Literal::isize_unsuffixed(10).to_string(), "10"); +} + +#[test] +fn literal_float() { + assert_eq!(Literal::f32_suffixed(10.0).to_string(), "10f32"); + assert_eq!(Literal::f64_suffixed(10.0).to_string(), "10f64"); + + assert_eq!(Literal::f32_unsuffixed(10.0).to_string(), "10.0"); + assert_eq!(Literal::f64_unsuffixed(10.0).to_string(), "10.0"); +} + +#[test] +fn literal_suffix() { + fn token_count(p: &str) -> usize { + p.parse::().unwrap().into_iter().count() + } + + assert_eq!(token_count("999u256"), 1); + assert_eq!(token_count("999r#u256"), 3); + assert_eq!(token_count("1."), 1); + assert_eq!(token_count("1.f32"), 3); + assert_eq!(token_count("1.0_0"), 1); + assert_eq!(token_count("1._0"), 3); + assert_eq!(token_count("1._m"), 3); + assert_eq!(token_count("\"\"s"), 1); + assert_eq!(token_count("r\"\"r"), 1); + assert_eq!(token_count("b\"\"b"), 1); + assert_eq!(token_count("br\"\"br"), 1); + assert_eq!(token_count("r#\"\"#r"), 1); + assert_eq!(token_count("'c'c"), 1); + assert_eq!(token_count("b'b'b"), 1); + assert_eq!(token_count("0E"), 1); + assert_eq!(token_count("0o0A"), 1); + assert_eq!(token_count("0E--0"), 4); + assert_eq!(token_count("0.0ECMA"), 1); +} + +#[test] +fn literal_iter_negative() { + let negative_literal = Literal::i32_suffixed(-3); + let tokens = TokenStream::from(TokenTree::Literal(negative_literal)); + let mut iter = tokens.into_iter(); + match iter.next().unwrap() { + TokenTree::Punct(punct) => { + assert_eq!(punct.as_char(), '-'); + assert_eq!(punct.spacing(), Spacing::Alone); + } + unexpected => panic!("unexpected token {:?}", unexpected), + } + match iter.next().unwrap() { + TokenTree::Literal(literal) => { + assert_eq!(literal.to_string(), "3i32"); + } + unexpected => panic!("unexpected token {:?}", unexpected), + } + assert!(iter.next().is_none()); +} + +#[test] +fn literal_parse() { + assert!("1".parse::().is_ok()); + assert!("-1".parse::().is_ok()); + assert!("-1u12".parse::().is_ok()); + assert!("1.0".parse::().is_ok()); + assert!("-1.0".parse::().is_ok()); + assert!("-1.0f12".parse::().is_ok()); + assert!("'a'".parse::().is_ok()); + assert!("\"\n\"".parse::().is_ok()); + assert!("0 1".parse::().is_err()); + assert!(" 0".parse::().is_err()); + assert!("0 ".parse::().is_err()); + assert!("/* comment */0".parse::().is_err()); + assert!("0/* comment */".parse::().is_err()); + assert!("0// comment".parse::().is_err()); + assert!("- 1".parse::().is_err()); + assert!("- 1.0".parse::().is_err()); + assert!("-\"\"".parse::().is_err()); +} + +#[test] +fn literal_span() { + let positive = "0.1".parse::().unwrap(); + let negative = "-0.1".parse::().unwrap(); + let subspan = positive.subspan(1..2); + + #[cfg(not(span_locations))] + { + let _ = negative; + assert!(subspan.is_none()); + } + + #[cfg(span_locations)] + { + assert_eq!(positive.span().start().column, 0); + assert_eq!(positive.span().end().column, 3); + assert_eq!(negative.span().start().column, 0); + assert_eq!(negative.span().end().column, 4); + assert_eq!(subspan.unwrap().source_text().unwrap(), "."); + } + + assert!(positive.subspan(1..4).is_none()); +} + +#[cfg(span_locations)] +#[test] +fn source_text() { + let input = " 𓀕 a z "; + let mut tokens = input + .parse::() + .unwrap() + .into_iter(); + + let first = tokens.next().unwrap(); + assert_eq!("𓀕", first.span().source_text().unwrap()); + + let second = tokens.next().unwrap(); + let third = tokens.next().unwrap(); + assert_eq!("z", third.span().source_text().unwrap()); + assert_eq!("a", second.span().source_text().unwrap()); +} + +#[test] +fn roundtrip() { + fn roundtrip(p: &str) { + println!("parse: {}", p); + let s = p.parse::().unwrap().to_string(); + println!("first: {}", s); + let s2 = s.parse::().unwrap().to_string(); + assert_eq!(s, s2); + } + roundtrip("a"); + roundtrip("<<"); + roundtrip("<<="); + roundtrip( + " + 1 + 1.0 + 1f32 + 2f64 + 1usize + 4isize + 4e10 + 1_000 + 1_0i32 + 8u8 + 9 + 0 + 0xffffffffffffffffffffffffffffffff + 1x + 1u80 + 1f320 + ", + ); + roundtrip("'a"); + roundtrip("'_"); + roundtrip("'static"); + roundtrip("'\\u{10__FFFF}'"); + roundtrip("\"\\u{10_F0FF__}foo\\u{1_0_0_0__}\""); +} + +#[test] +fn fail() { + fn fail(p: &str) { + if let Ok(s) = p.parse::() { + panic!("should have failed to parse: {}\n{:#?}", p, s); + } + } + fail("' static"); + fail("r#1"); + fail("r#_"); + fail("\"\\u{0000000}\""); // overlong unicode escape (rust allows at most 6 hex digits) + fail("\"\\u{999999}\""); // outside of valid range of char + fail("\"\\u{_0}\""); // leading underscore + fail("\"\\u{}\""); // empty + fail("b\"\r\""); // bare carriage return in byte string + fail("r\"\r\""); // bare carriage return in raw string + fail("\"\\\r \""); // backslash carriage return + fail("'aa'aa"); + fail("br##\"\"#"); + fail("\"\\\n\u{85}\r\""); +} + +#[cfg(span_locations)] +#[test] +fn span_test() { + check_spans( + "\ +/// This is a document comment +testing 123 +{ + testing 234 +}", + &[ + (1, 0, 1, 30), // # + (1, 0, 1, 30), // [ ... ] + (1, 0, 1, 30), // doc + (1, 0, 1, 30), // = + (1, 0, 1, 30), // "This is..." + (2, 0, 2, 7), // testing + (2, 8, 2, 11), // 123 + (3, 0, 5, 1), // { ... } + (4, 2, 4, 9), // testing + (4, 10, 4, 13), // 234 + ], + ); +} + +#[cfg(procmacro2_semver_exempt)] +#[cfg(not(nightly))] +#[test] +fn default_span() { + let start = Span::call_site().start(); + assert_eq!(start.line, 1); + assert_eq!(start.column, 0); + let end = Span::call_site().end(); + assert_eq!(end.line, 1); + assert_eq!(end.column, 0); + let source_file = Span::call_site().source_file(); + assert_eq!(source_file.path().to_string_lossy(), ""); + assert!(!source_file.is_real()); +} + +#[cfg(procmacro2_semver_exempt)] +#[test] +fn span_join() { + let source1 = "aaa\nbbb" + .parse::() + .unwrap() + .into_iter() + .collect::>(); + let source2 = "ccc\nddd" + .parse::() + .unwrap() + .into_iter() + .collect::>(); + + assert!(source1[0].span().source_file() != source2[0].span().source_file()); + assert_eq!( + source1[0].span().source_file(), + source1[1].span().source_file() + ); + + let joined1 = source1[0].span().join(source1[1].span()); + let joined2 = source1[0].span().join(source2[0].span()); + assert!(joined1.is_some()); + assert!(joined2.is_none()); + + let start = joined1.unwrap().start(); + let end = joined1.unwrap().end(); + assert_eq!(start.line, 1); + assert_eq!(start.column, 0); + assert_eq!(end.line, 2); + assert_eq!(end.column, 3); + + assert_eq!( + joined1.unwrap().source_file(), + source1[0].span().source_file() + ); +} + +#[test] +fn no_panic() { + let s = str::from_utf8(b"b\'\xc2\x86 \x00\x00\x00^\"").unwrap(); + assert!(s.parse::().is_err()); +} + +#[test] +fn punct_before_comment() { + let mut tts = TokenStream::from_str("~// comment").unwrap().into_iter(); + match tts.next().unwrap() { + TokenTree::Punct(tt) => { + assert_eq!(tt.as_char(), '~'); + assert_eq!(tt.spacing(), Spacing::Alone); + } + wrong => panic!("wrong token {:?}", wrong), + } +} + +#[test] +fn joint_last_token() { + // This test verifies that we match the behavior of libproc_macro *not* in + // the range nightly-2020-09-06 through nightly-2020-09-10, in which this + // behavior was temporarily broken. + // See https://github.com/rust-lang/rust/issues/76399 + + let joint_punct = Punct::new(':', Spacing::Joint); + let stream = TokenStream::from(TokenTree::Punct(joint_punct)); + let punct = match stream.into_iter().next().unwrap() { + TokenTree::Punct(punct) => punct, + _ => unreachable!(), + }; + assert_eq!(punct.spacing(), Spacing::Joint); +} + +#[test] +fn raw_identifier() { + let mut tts = TokenStream::from_str("r#dyn").unwrap().into_iter(); + match tts.next().unwrap() { + TokenTree::Ident(raw) => assert_eq!("r#dyn", raw.to_string()), + wrong => panic!("wrong token {:?}", wrong), + } + assert!(tts.next().is_none()); +} + +#[test] +fn test_debug_ident() { + let ident = Ident::new("proc_macro", Span::call_site()); + + #[cfg(not(span_locations))] + let expected = "Ident(proc_macro)"; + + #[cfg(span_locations)] + let expected = "Ident { sym: proc_macro }"; + + assert_eq!(expected, format!("{:?}", ident)); +} + +#[test] +fn test_debug_tokenstream() { + let tts = TokenStream::from_str("[a + 1]").unwrap(); + + #[cfg(not(span_locations))] + let expected = "\ +TokenStream [ + Group { + delimiter: Bracket, + stream: TokenStream [ + Ident { + sym: a, + }, + Punct { + char: '+', + spacing: Alone, + }, + Literal { + lit: 1, + }, + ], + }, +]\ + "; + + #[cfg(not(span_locations))] + let expected_before_trailing_commas = "\ +TokenStream [ + Group { + delimiter: Bracket, + stream: TokenStream [ + Ident { + sym: a + }, + Punct { + char: '+', + spacing: Alone + }, + Literal { + lit: 1 + } + ] + } +]\ + "; + + #[cfg(span_locations)] + let expected = "\ +TokenStream [ + Group { + delimiter: Bracket, + stream: TokenStream [ + Ident { + sym: a, + span: bytes(2..3), + }, + Punct { + char: '+', + spacing: Alone, + span: bytes(4..5), + }, + Literal { + lit: 1, + span: bytes(6..7), + }, + ], + span: bytes(1..8), + }, +]\ + "; + + #[cfg(span_locations)] + let expected_before_trailing_commas = "\ +TokenStream [ + Group { + delimiter: Bracket, + stream: TokenStream [ + Ident { + sym: a, + span: bytes(2..3) + }, + Punct { + char: '+', + spacing: Alone, + span: bytes(4..5) + }, + Literal { + lit: 1, + span: bytes(6..7) + } + ], + span: bytes(1..8) + } +]\ + "; + + let actual = format!("{:#?}", tts); + if actual.ends_with(",\n]") { + assert_eq!(expected, actual); + } else { + assert_eq!(expected_before_trailing_commas, actual); + } +} + +#[test] +fn default_tokenstream_is_empty() { + let default_token_stream = ::default(); + + assert!(default_token_stream.is_empty()); +} + +#[test] +fn tokenstream_size_hint() { + let tokens = "a b (c d) e".parse::().unwrap(); + + assert_eq!(tokens.into_iter().size_hint(), (4, Some(4))); +} + +#[test] +fn tuple_indexing() { + // This behavior may change depending on https://github.com/rust-lang/rust/pull/71322 + let mut tokens = "tuple.0.0".parse::().unwrap().into_iter(); + assert_eq!("tuple", tokens.next().unwrap().to_string()); + assert_eq!(".", tokens.next().unwrap().to_string()); + assert_eq!("0.0", tokens.next().unwrap().to_string()); + assert!(tokens.next().is_none()); +} + +#[cfg(span_locations)] +#[test] +fn non_ascii_tokens() { + check_spans("// abc", &[]); + check_spans("// ábc", &[]); + check_spans("// abc x", &[]); + check_spans("// ábc x", &[]); + check_spans("/* abc */ x", &[(1, 10, 1, 11)]); + check_spans("/* ábc */ x", &[(1, 10, 1, 11)]); + check_spans("/* ab\nc */ x", &[(2, 5, 2, 6)]); + check_spans("/* áb\nc */ x", &[(2, 5, 2, 6)]); + check_spans("/*** abc */ x", &[(1, 12, 1, 13)]); + check_spans("/*** ábc */ x", &[(1, 12, 1, 13)]); + check_spans(r#""abc""#, &[(1, 0, 1, 5)]); + check_spans(r#""ábc""#, &[(1, 0, 1, 5)]); + check_spans(r##"r#"abc"#"##, &[(1, 0, 1, 8)]); + check_spans(r##"r#"ábc"#"##, &[(1, 0, 1, 8)]); + check_spans("r#\"a\nc\"#", &[(1, 0, 2, 3)]); + check_spans("r#\"á\nc\"#", &[(1, 0, 2, 3)]); + check_spans("'a'", &[(1, 0, 1, 3)]); + check_spans("'á'", &[(1, 0, 1, 3)]); + check_spans("//! abc", &[(1, 0, 1, 7), (1, 0, 1, 7), (1, 0, 1, 7)]); + check_spans("//! ábc", &[(1, 0, 1, 7), (1, 0, 1, 7), (1, 0, 1, 7)]); + check_spans("//! abc\n", &[(1, 0, 1, 7), (1, 0, 1, 7), (1, 0, 1, 7)]); + check_spans("//! ábc\n", &[(1, 0, 1, 7), (1, 0, 1, 7), (1, 0, 1, 7)]); + check_spans("/*! abc */", &[(1, 0, 1, 10), (1, 0, 1, 10), (1, 0, 1, 10)]); + check_spans("/*! ábc */", &[(1, 0, 1, 10), (1, 0, 1, 10), (1, 0, 1, 10)]); + check_spans("/*! a\nc */", &[(1, 0, 2, 4), (1, 0, 2, 4), (1, 0, 2, 4)]); + check_spans("/*! á\nc */", &[(1, 0, 2, 4), (1, 0, 2, 4), (1, 0, 2, 4)]); + check_spans("abc", &[(1, 0, 1, 3)]); + check_spans("ábc", &[(1, 0, 1, 3)]); + check_spans("ábć", &[(1, 0, 1, 3)]); + check_spans("abc// foo", &[(1, 0, 1, 3)]); + check_spans("ábc// foo", &[(1, 0, 1, 3)]); + check_spans("ábć// foo", &[(1, 0, 1, 3)]); + check_spans("b\"a\\\n c\"", &[(1, 0, 2, 3)]); +} + +#[cfg(span_locations)] +fn check_spans(p: &str, mut lines: &[(usize, usize, usize, usize)]) { + let ts = p.parse::().unwrap(); + check_spans_internal(ts, &mut lines); + assert!(lines.is_empty(), "leftover ranges: {:?}", lines); +} + +#[cfg(span_locations)] +fn check_spans_internal(ts: TokenStream, lines: &mut &[(usize, usize, usize, usize)]) { + for i in ts { + if let Some((&(sline, scol, eline, ecol), rest)) = lines.split_first() { + *lines = rest; + + let start = i.span().start(); + assert_eq!(start.line, sline, "sline did not match for {}", i); + assert_eq!(start.column, scol, "scol did not match for {}", i); + + let end = i.span().end(); + assert_eq!(end.line, eline, "eline did not match for {}", i); + assert_eq!(end.column, ecol, "ecol did not match for {}", i); + + if let TokenTree::Group(g) = i { + check_spans_internal(g.stream().clone(), lines); + } + } + } +} + +#[test] +fn whitespace() { + // space, horizontal tab, vertical tab, form feed, carriage return, line + // feed, non-breaking space, left-to-right mark, right-to-left mark + let various_spaces = " \t\u{b}\u{c}\r\n\u{a0}\u{200e}\u{200f}"; + let tokens = various_spaces.parse::().unwrap(); + assert_eq!(tokens.into_iter().count(), 0); + + let lone_carriage_returns = " \r \r\r\n "; + lone_carriage_returns.parse::().unwrap(); +} + +#[test] +fn byte_order_mark() { + let string = "\u{feff}foo"; + let tokens = string.parse::().unwrap(); + match tokens.into_iter().next().unwrap() { + TokenTree::Ident(ident) => assert_eq!(ident, "foo"), + _ => unreachable!(), + } + + let string = "foo\u{feff}"; + string.parse::().unwrap_err(); +} diff --git a/vendor/proc-macro2/tests/test_fmt.rs b/vendor/proc-macro2/tests/test_fmt.rs new file mode 100644 index 0000000..86a4c38 --- /dev/null +++ b/vendor/proc-macro2/tests/test_fmt.rs @@ -0,0 +1,28 @@ +#![allow(clippy::from_iter_instead_of_collect)] + +use proc_macro2::{Delimiter, Group, Ident, Span, TokenStream, TokenTree}; +use std::iter; + +#[test] +fn test_fmt_group() { + let ident = Ident::new("x", Span::call_site()); + let inner = TokenStream::from_iter(iter::once(TokenTree::Ident(ident))); + let parens_empty = Group::new(Delimiter::Parenthesis, TokenStream::new()); + let parens_nonempty = Group::new(Delimiter::Parenthesis, inner.clone()); + let brackets_empty = Group::new(Delimiter::Bracket, TokenStream::new()); + let brackets_nonempty = Group::new(Delimiter::Bracket, inner.clone()); + let braces_empty = Group::new(Delimiter::Brace, TokenStream::new()); + let braces_nonempty = Group::new(Delimiter::Brace, inner.clone()); + let none_empty = Group::new(Delimiter::None, TokenStream::new()); + let none_nonempty = Group::new(Delimiter::None, inner); + + // Matches libproc_macro. + assert_eq!("()", parens_empty.to_string()); + assert_eq!("(x)", parens_nonempty.to_string()); + assert_eq!("[]", brackets_empty.to_string()); + assert_eq!("[x]", brackets_nonempty.to_string()); + assert_eq!("{ }", braces_empty.to_string()); + assert_eq!("{ x }", braces_nonempty.to_string()); + assert_eq!("", none_empty.to_string()); + assert_eq!("x", none_nonempty.to_string()); +} diff --git a/vendor/proc-macro2/tests/test_size.rs b/vendor/proc-macro2/tests/test_size.rs new file mode 100644 index 0000000..46e58db --- /dev/null +++ b/vendor/proc-macro2/tests/test_size.rs @@ -0,0 +1,42 @@ +extern crate proc_macro; + +use std::mem; + +#[rustversion::attr(before(1.32), ignore)] +#[test] +fn test_proc_macro_span_size() { + assert_eq!(mem::size_of::(), 4); + assert_eq!(mem::size_of::>(), 4); +} + +#[cfg_attr(not(all(not(wrap_proc_macro), not(span_locations))), ignore)] +#[test] +fn test_proc_macro2_fallback_span_size_without_locations() { + assert_eq!(mem::size_of::(), 0); + assert_eq!(mem::size_of::>(), 1); +} + +#[cfg_attr(not(all(not(wrap_proc_macro), span_locations)), ignore)] +#[test] +fn test_proc_macro2_fallback_span_size_with_locations() { + assert_eq!(mem::size_of::(), 8); + assert_eq!(mem::size_of::>(), 12); +} + +#[rustversion::attr(before(1.32), ignore)] +#[rustversion::attr( + since(1.32), + cfg_attr(not(all(wrap_proc_macro, not(span_locations))), ignore) +)] +#[test] +fn test_proc_macro2_wrapper_span_size_without_locations() { + assert_eq!(mem::size_of::(), 4); + assert_eq!(mem::size_of::>(), 8); +} + +#[cfg_attr(not(all(wrap_proc_macro, span_locations)), ignore)] +#[test] +fn test_proc_macro2_wrapper_span_size_with_locations() { + assert_eq!(mem::size_of::(), 12); + assert_eq!(mem::size_of::>(), 12); +} diff --git a/vendor/quote/.cargo-checksum.json b/vendor/quote/.cargo-checksum.json new file mode 100644 index 0000000..0d900a2 --- /dev/null +++ b/vendor/quote/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"0ec1e0fd36354750321a12d04a5e4d9a8d5dc6a8af753183de50da55fc10391b","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"626e7079eab0baacf0fcaf3e244f407b2014ebaeca45905d72e8fb8bed18aaea","rust-toolchain.toml":"6bbb61302978c736b2da03e4fb40e3beab908f85d533ab46fd541e637b5f3e0f","src/ext.rs":"9881576cac3e476a4bf04f9b601cf9a53b79399fb0ca9634e8b861ac91709843","src/format.rs":"c595015418f35e6992e710441b9999f09b2afe4678b138039d670d100c0bdd86","src/ident_fragment.rs":"0b3e6c2129e55910fd2d240e1e7efba6f1796801d24352d1c0bfbceb0e8b678f","src/lib.rs":"cef1b4c031d401fb87e88a2ed51858c5f8f471e62a6261c1ef0f55ef9e1906a1","src/runtime.rs":"7f37326edaeac2c42ed806b447eeba12e36dd4b1bc25fbf52f8eb23140f3be7a","src/spanned.rs":"3ccf5120593f35787442c0a37d243e802c5262e7f8b35aed503873008ec035c5","src/to_tokens.rs":"1c76311fcc82098e630056d71fd6f3929194ee31b0840e2aa643ed7e78026e3e","tests/compiletest.rs":"022a8e400ef813d7ea1875b944549cee5125f6a995dc33e93b48cba3e1b57bd1","tests/test.rs":"3be80741f84a707376c230d9cf70ce9537caa359691d8d4c34968e28175e4ad7","tests/ui/does-not-have-iter-interpolated-dup.rs":"ad13eea21d4cdd2ab6c082f633392e1ff20fb0d1af5f2177041e0bf7f30da695","tests/ui/does-not-have-iter-interpolated-dup.stderr":"90a4bdb9267535f5d2785940148338d6b7d905548051d2c9c5dcbd58f2c11d8e","tests/ui/does-not-have-iter-interpolated.rs":"83a5b3f240651adcbe4b6e51076d76d653ad439b37442cf4054f1fd3c073f3b7","tests/ui/does-not-have-iter-interpolated.stderr":"ae7c2739554c862b331705e82781aa4687a4375210cef6ae899a4be4a4ec2d97","tests/ui/does-not-have-iter-separated.rs":"fe413c48331d5e3a7ae5fef6a5892a90c72f610d54595879eb49d0a94154ba3f","tests/ui/does-not-have-iter-separated.stderr":"03fd560979ebcd5aa6f83858bc2c3c01ba6546c16335101275505304895c1ae9","tests/ui/does-not-have-iter.rs":"09dc9499d861b63cebb0848b855b78e2dc9497bfde37ba6339f3625ae009a62f","tests/ui/does-not-have-iter.stderr":"d6da483c29e232ced72059bbdf05d31afb1df9e02954edaa9cfaea1ec6df72dc","tests/ui/not-quotable.rs":"5759d0884943417609f28faadc70254a3e2fd3d9bd6ff7297a3fb70a77fafd8a","tests/ui/not-quotable.stderr":"459bdadbf1e73b9401cf7d5d578dc053774bb4e5aa25ad2abf25d6b0f61aa306","tests/ui/not-repeatable.rs":"a4b115c04e4e41049a05f5b69450503fbffeba031218b4189cb931839f7f9a9c","tests/ui/not-repeatable.stderr":"594249d59d16f039c16816f1aaf9933176994e296fcf81d1b8b24d5b66ae0d0a","tests/ui/wrong-type-span.rs":"6195e35ea844c0c52ba1cff5d790c3a371af6915d137d377834ad984229ef9ea","tests/ui/wrong-type-span.stderr":"cad072e40e0ecc04f375122ae41aede2f0da2a9244492b3fcf70249e59d1b128"},"package":"291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"} \ No newline at end of file diff --git a/vendor/quote/Cargo.toml b/vendor/quote/Cargo.toml new file mode 100644 index 0000000..f3222c2 --- /dev/null +++ b/vendor/quote/Cargo.toml @@ -0,0 +1,50 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.56" +name = "quote" +version = "1.0.35" +authors = ["David Tolnay "] +autobenches = false +description = "Quasi-quoting macro quote!(...)" +documentation = "https://docs.rs/quote/" +readme = "README.md" +keywords = [ + "macros", + "syn", +] +categories = ["development-tools::procedural-macro-helpers"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/dtolnay/quote" + +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] +targets = ["x86_64-unknown-linux-gnu"] + +[lib] +doc-scrape-examples = false + +[dependencies.proc-macro2] +version = "1.0.74" +default-features = false + +[dev-dependencies.rustversion] +version = "1.0" + +[dev-dependencies.trybuild] +version = "1.0.66" +features = ["diff"] + +[features] +default = ["proc-macro"] +proc-macro = ["proc-macro2/proc-macro"] diff --git a/vendor/quote/LICENSE-APACHE b/vendor/quote/LICENSE-APACHE new file mode 100644 index 0000000..1b5ec8b --- /dev/null +++ b/vendor/quote/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/quote/LICENSE-MIT b/vendor/quote/LICENSE-MIT new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/vendor/quote/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/quote/README.md b/vendor/quote/README.md new file mode 100644 index 0000000..bfc91a9 --- /dev/null +++ b/vendor/quote/README.md @@ -0,0 +1,272 @@ +Rust Quasi-Quoting +================== + +[github](https://github.com/dtolnay/quote) +[crates.io](https://crates.io/crates/quote) +[docs.rs](https://docs.rs/quote) +[build status](https://github.com/dtolnay/quote/actions?query=branch%3Amaster) + +This crate provides the [`quote!`] macro for turning Rust syntax tree data +structures into tokens of source code. + +[`quote!`]: https://docs.rs/quote/1.0/quote/macro.quote.html + +Procedural macros in Rust receive a stream of tokens as input, execute arbitrary +Rust code to determine how to manipulate those tokens, and produce a stream of +tokens to hand back to the compiler to compile into the caller's crate. +Quasi-quoting is a solution to one piece of that — producing tokens to +return to the compiler. + +The idea of quasi-quoting is that we write *code* that we treat as *data*. +Within the `quote!` macro, we can write what looks like code to our text editor +or IDE. We get all the benefits of the editor's brace matching, syntax +highlighting, indentation, and maybe autocompletion. But rather than compiling +that as code into the current crate, we can treat it as data, pass it around, +mutate it, and eventually hand it back to the compiler as tokens to compile into +the macro caller's crate. + +This crate is motivated by the procedural macro use case, but is a +general-purpose Rust quasi-quoting library and is not specific to procedural +macros. + +```toml +[dependencies] +quote = "1.0" +``` + +*Version requirement: Quote supports rustc 1.56 and up.*
+[*Release notes*](https://github.com/dtolnay/quote/releases) + +
+ +## Syntax + +The quote crate provides a [`quote!`] macro within which you can write Rust code +that gets packaged into a [`TokenStream`] and can be treated as data. You should +think of `TokenStream` as representing a fragment of Rust source code. + +[`TokenStream`]: https://docs.rs/proc-macro2/1.0/proc_macro2/struct.TokenStream.html + +Within the `quote!` macro, interpolation is done with `#var`. Any type +implementing the [`quote::ToTokens`] trait can be interpolated. This includes +most Rust primitive types as well as most of the syntax tree types from [`syn`]. + +[`quote::ToTokens`]: https://docs.rs/quote/1.0/quote/trait.ToTokens.html +[`syn`]: https://github.com/dtolnay/syn + +```rust +let tokens = quote! { + struct SerializeWith #generics #where_clause { + value: &'a #field_ty, + phantom: core::marker::PhantomData<#item_ty>, + } + + impl #generics serde::Serialize for SerializeWith #generics #where_clause { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + #path(self.value, serializer) + } + } + + SerializeWith { + value: #value, + phantom: core::marker::PhantomData::<#item_ty>, + } +}; +``` + +
+ +## Repetition + +Repetition is done using `#(...)*` or `#(...),*` similar to `macro_rules!`. This +iterates through the elements of any variable interpolated within the repetition +and inserts a copy of the repetition body for each one. The variables in an +interpolation may be anything that implements `IntoIterator`, including `Vec` or +a pre-existing iterator. + +- `#(#var)*` — no separators +- `#(#var),*` — the character before the asterisk is used as a separator +- `#( struct #var; )*` — the repetition can contain other things +- `#( #k => println!("{}", #v), )*` — even multiple interpolations + +Note that there is a difference between `#(#var ,)*` and `#(#var),*`—the latter +does not produce a trailing comma. This matches the behavior of delimiters in +`macro_rules!`. + +
+ +## Returning tokens to the compiler + +The `quote!` macro evaluates to an expression of type +`proc_macro2::TokenStream`. Meanwhile Rust procedural macros are expected to +return the type `proc_macro::TokenStream`. + +The difference between the two types is that `proc_macro` types are entirely +specific to procedural macros and cannot ever exist in code outside of a +procedural macro, while `proc_macro2` types may exist anywhere including tests +and non-macro code like main.rs and build.rs. This is why even the procedural +macro ecosystem is largely built around `proc_macro2`, because that ensures the +libraries are unit testable and accessible in non-macro contexts. + +There is a [`From`]-conversion in both directions so returning the output of +`quote!` from a procedural macro usually looks like `tokens.into()` or +`proc_macro::TokenStream::from(tokens)`. + +[`From`]: https://doc.rust-lang.org/std/convert/trait.From.html + +
+ +## Examples + +### Combining quoted fragments + +Usually you don't end up constructing an entire final `TokenStream` in one +piece. Different parts may come from different helper functions. The tokens +produced by `quote!` themselves implement `ToTokens` and so can be interpolated +into later `quote!` invocations to build up a final result. + +```rust +let type_definition = quote! {...}; +let methods = quote! {...}; + +let tokens = quote! { + #type_definition + #methods +}; +``` + +### Constructing identifiers + +Suppose we have an identifier `ident` which came from somewhere in a macro +input and we need to modify it in some way for the macro output. Let's consider +prepending the identifier with an underscore. + +Simply interpolating the identifier next to an underscore will not have the +behavior of concatenating them. The underscore and the identifier will continue +to be two separate tokens as if you had written `_ x`. + +```rust +// incorrect +quote! { + let mut _#ident = 0; +} +``` + +The solution is to build a new identifier token with the correct value. As this +is such a common case, the `format_ident!` macro provides a convenient utility +for doing so correctly. + +```rust +let varname = format_ident!("_{}", ident); +quote! { + let mut #varname = 0; +} +``` + +Alternatively, the APIs provided by Syn and proc-macro2 can be used to directly +build the identifier. This is roughly equivalent to the above, but will not +handle `ident` being a raw identifier. + +```rust +let concatenated = format!("_{}", ident); +let varname = syn::Ident::new(&concatenated, ident.span()); +quote! { + let mut #varname = 0; +} +``` + +### Making method calls + +Let's say our macro requires some type specified in the macro input to have a +constructor called `new`. We have the type in a variable called `field_type` of +type `syn::Type` and want to invoke the constructor. + +```rust +// incorrect +quote! { + let value = #field_type::new(); +} +``` + +This works only sometimes. If `field_type` is `String`, the expanded code +contains `String::new()` which is fine. But if `field_type` is something like +`Vec` then the expanded code is `Vec::new()` which is invalid syntax. +Ordinarily in handwritten Rust we would write `Vec::::new()` but for macros +often the following is more convenient. + +```rust +quote! { + let value = <#field_type>::new(); +} +``` + +This expands to `>::new()` which behaves correctly. + +A similar pattern is appropriate for trait methods. + +```rust +quote! { + let value = <#field_type as core::default::Default>::default(); +} +``` + +
+ +## Hygiene + +Any interpolated tokens preserve the `Span` information provided by their +`ToTokens` implementation. Tokens that originate within a `quote!` invocation +are spanned with [`Span::call_site()`]. + +[`Span::call_site()`]: https://docs.rs/proc-macro2/1.0/proc_macro2/struct.Span.html#method.call_site + +A different span can be provided explicitly through the [`quote_spanned!`] +macro. + +[`quote_spanned!`]: https://docs.rs/quote/1.0/quote/macro.quote_spanned.html + +
+ +## Non-macro code generators + +When using `quote` in a build.rs or main.rs and writing the output out to a +file, consider having the code generator pass the tokens through [prettyplease] +before writing. This way if an error occurs in the generated code it is +convenient for a human to read and debug. + +Be aware that no kind of hygiene or span information is retained when tokens are +written to a file; the conversion from tokens to source code is lossy. + +Example usage in build.rs: + +```rust +let output = quote! { ... }; +let syntax_tree = syn::parse2(output).unwrap(); +let formatted = prettyplease::unparse(&syntax_tree); + +let out_dir = env::var_os("OUT_DIR").unwrap(); +let dest_path = Path::new(&out_dir).join("out.rs"); +fs::write(dest_path, formatted).unwrap(); +``` + +[prettyplease]: https://github.com/dtolnay/prettyplease + +
+ +#### License + + +Licensed under either of Apache License, Version +2.0 or MIT license at your option. + + +
+ + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this crate by you, as defined in the Apache-2.0 license, shall +be dual licensed as above, without any additional terms or conditions. + diff --git a/vendor/quote/rust-toolchain.toml b/vendor/quote/rust-toolchain.toml new file mode 100644 index 0000000..20fe888 --- /dev/null +++ b/vendor/quote/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +components = ["rust-src"] diff --git a/vendor/quote/src/ext.rs b/vendor/quote/src/ext.rs new file mode 100644 index 0000000..92c2315 --- /dev/null +++ b/vendor/quote/src/ext.rs @@ -0,0 +1,110 @@ +use super::ToTokens; +use core::iter; +use proc_macro2::{TokenStream, TokenTree}; + +/// TokenStream extension trait with methods for appending tokens. +/// +/// This trait is sealed and cannot be implemented outside of the `quote` crate. +pub trait TokenStreamExt: private::Sealed { + /// For use by `ToTokens` implementations. + /// + /// Appends the token specified to this list of tokens. + fn append(&mut self, token: U) + where + U: Into; + + /// For use by `ToTokens` implementations. + /// + /// ``` + /// # use quote::{quote, TokenStreamExt, ToTokens}; + /// # use proc_macro2::TokenStream; + /// # + /// struct X; + /// + /// impl ToTokens for X { + /// fn to_tokens(&self, tokens: &mut TokenStream) { + /// tokens.append_all(&[true, false]); + /// } + /// } + /// + /// let tokens = quote!(#X); + /// assert_eq!(tokens.to_string(), "true false"); + /// ``` + fn append_all(&mut self, iter: I) + where + I: IntoIterator, + I::Item: ToTokens; + + /// For use by `ToTokens` implementations. + /// + /// Appends all of the items in the iterator `I`, separated by the tokens + /// `U`. + fn append_separated(&mut self, iter: I, op: U) + where + I: IntoIterator, + I::Item: ToTokens, + U: ToTokens; + + /// For use by `ToTokens` implementations. + /// + /// Appends all tokens in the iterator `I`, appending `U` after each + /// element, including after the last element of the iterator. + fn append_terminated(&mut self, iter: I, term: U) + where + I: IntoIterator, + I::Item: ToTokens, + U: ToTokens; +} + +impl TokenStreamExt for TokenStream { + fn append(&mut self, token: U) + where + U: Into, + { + self.extend(iter::once(token.into())); + } + + fn append_all(&mut self, iter: I) + where + I: IntoIterator, + I::Item: ToTokens, + { + for token in iter { + token.to_tokens(self); + } + } + + fn append_separated(&mut self, iter: I, op: U) + where + I: IntoIterator, + I::Item: ToTokens, + U: ToTokens, + { + for (i, token) in iter.into_iter().enumerate() { + if i > 0 { + op.to_tokens(self); + } + token.to_tokens(self); + } + } + + fn append_terminated(&mut self, iter: I, term: U) + where + I: IntoIterator, + I::Item: ToTokens, + U: ToTokens, + { + for token in iter { + token.to_tokens(self); + term.to_tokens(self); + } + } +} + +mod private { + use proc_macro2::TokenStream; + + pub trait Sealed {} + + impl Sealed for TokenStream {} +} diff --git a/vendor/quote/src/format.rs b/vendor/quote/src/format.rs new file mode 100644 index 0000000..3cddbd2 --- /dev/null +++ b/vendor/quote/src/format.rs @@ -0,0 +1,168 @@ +/// Formatting macro for constructing `Ident`s. +/// +///
+/// +/// # Syntax +/// +/// Syntax is copied from the [`format!`] macro, supporting both positional and +/// named arguments. +/// +/// Only a limited set of formatting traits are supported. The current mapping +/// of format types to traits is: +/// +/// * `{}` ⇒ [`IdentFragment`] +/// * `{:o}` ⇒ [`Octal`](std::fmt::Octal) +/// * `{:x}` ⇒ [`LowerHex`](std::fmt::LowerHex) +/// * `{:X}` ⇒ [`UpperHex`](std::fmt::UpperHex) +/// * `{:b}` ⇒ [`Binary`](std::fmt::Binary) +/// +/// See [`std::fmt`] for more information. +/// +///
+/// +/// # IdentFragment +/// +/// Unlike `format!`, this macro uses the [`IdentFragment`] formatting trait by +/// default. This trait is like `Display`, with a few differences: +/// +/// * `IdentFragment` is only implemented for a limited set of types, such as +/// unsigned integers and strings. +/// * [`Ident`] arguments will have their `r#` prefixes stripped, if present. +/// +/// [`IdentFragment`]: crate::IdentFragment +/// [`Ident`]: proc_macro2::Ident +/// +///
+/// +/// # Hygiene +/// +/// The [`Span`] of the first `Ident` argument is used as the span of the final +/// identifier, falling back to [`Span::call_site`] when no identifiers are +/// provided. +/// +/// ``` +/// # use quote::format_ident; +/// # let ident = format_ident!("Ident"); +/// // If `ident` is an Ident, the span of `my_ident` will be inherited from it. +/// let my_ident = format_ident!("My{}{}", ident, "IsCool"); +/// assert_eq!(my_ident, "MyIdentIsCool"); +/// ``` +/// +/// Alternatively, the span can be overridden by passing the `span` named +/// argument. +/// +/// ``` +/// # use quote::format_ident; +/// # const IGNORE_TOKENS: &'static str = stringify! { +/// let my_span = /* ... */; +/// # }; +/// # let my_span = proc_macro2::Span::call_site(); +/// format_ident!("MyIdent", span = my_span); +/// ``` +/// +/// [`Span`]: proc_macro2::Span +/// [`Span::call_site`]: proc_macro2::Span::call_site +/// +///


+/// +/// # Panics +/// +/// This method will panic if the resulting formatted string is not a valid +/// identifier. +/// +///
+/// +/// # Examples +/// +/// Composing raw and non-raw identifiers: +/// ``` +/// # use quote::format_ident; +/// let my_ident = format_ident!("My{}", "Ident"); +/// assert_eq!(my_ident, "MyIdent"); +/// +/// let raw = format_ident!("r#Raw"); +/// assert_eq!(raw, "r#Raw"); +/// +/// let my_ident_raw = format_ident!("{}Is{}", my_ident, raw); +/// assert_eq!(my_ident_raw, "MyIdentIsRaw"); +/// ``` +/// +/// Integer formatting options: +/// ``` +/// # use quote::format_ident; +/// let num: u32 = 10; +/// +/// let decimal = format_ident!("Id_{}", num); +/// assert_eq!(decimal, "Id_10"); +/// +/// let octal = format_ident!("Id_{:o}", num); +/// assert_eq!(octal, "Id_12"); +/// +/// let binary = format_ident!("Id_{:b}", num); +/// assert_eq!(binary, "Id_1010"); +/// +/// let lower_hex = format_ident!("Id_{:x}", num); +/// assert_eq!(lower_hex, "Id_a"); +/// +/// let upper_hex = format_ident!("Id_{:X}", num); +/// assert_eq!(upper_hex, "Id_A"); +/// ``` +#[macro_export] +macro_rules! format_ident { + ($fmt:expr) => { + $crate::format_ident_impl!([ + $crate::__private::Option::None, + $fmt + ]) + }; + + ($fmt:expr, $($rest:tt)*) => { + $crate::format_ident_impl!([ + $crate::__private::Option::None, + $fmt + ] $($rest)*) + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! format_ident_impl { + // Final state + ([$span:expr, $($fmt:tt)*]) => { + $crate::__private::mk_ident( + &$crate::__private::format!($($fmt)*), + $span, + ) + }; + + // Span argument + ([$old:expr, $($fmt:tt)*] span = $span:expr) => { + $crate::format_ident_impl!([$old, $($fmt)*] span = $span,) + }; + ([$old:expr, $($fmt:tt)*] span = $span:expr, $($rest:tt)*) => { + $crate::format_ident_impl!([ + $crate::__private::Option::Some::<$crate::__private::Span>($span), + $($fmt)* + ] $($rest)*) + }; + + // Named argument + ([$span:expr, $($fmt:tt)*] $name:ident = $arg:expr) => { + $crate::format_ident_impl!([$span, $($fmt)*] $name = $arg,) + }; + ([$span:expr, $($fmt:tt)*] $name:ident = $arg:expr, $($rest:tt)*) => { + match $crate::__private::IdentFragmentAdapter(&$arg) { + arg => $crate::format_ident_impl!([$span.or(arg.span()), $($fmt)*, $name = arg] $($rest)*), + } + }; + + // Positional argument + ([$span:expr, $($fmt:tt)*] $arg:expr) => { + $crate::format_ident_impl!([$span, $($fmt)*] $arg,) + }; + ([$span:expr, $($fmt:tt)*] $arg:expr, $($rest:tt)*) => { + match $crate::__private::IdentFragmentAdapter(&$arg) { + arg => $crate::format_ident_impl!([$span.or(arg.span()), $($fmt)*, arg] $($rest)*), + } + }; +} diff --git a/vendor/quote/src/ident_fragment.rs b/vendor/quote/src/ident_fragment.rs new file mode 100644 index 0000000..6c2a9a8 --- /dev/null +++ b/vendor/quote/src/ident_fragment.rs @@ -0,0 +1,88 @@ +use alloc::borrow::Cow; +use core::fmt; +use proc_macro2::{Ident, Span}; + +/// Specialized formatting trait used by `format_ident!`. +/// +/// [`Ident`] arguments formatted using this trait will have their `r#` prefix +/// stripped, if present. +/// +/// See [`format_ident!`] for more information. +/// +/// [`format_ident!`]: crate::format_ident +pub trait IdentFragment { + /// Format this value as an identifier fragment. + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result; + + /// Span associated with this `IdentFragment`. + /// + /// If non-`None`, may be inherited by formatted identifiers. + fn span(&self) -> Option { + None + } +} + +impl IdentFragment for &T { + fn span(&self) -> Option { + ::span(*self) + } + + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + IdentFragment::fmt(*self, f) + } +} + +impl IdentFragment for &mut T { + fn span(&self) -> Option { + ::span(*self) + } + + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + IdentFragment::fmt(*self, f) + } +} + +impl IdentFragment for Ident { + fn span(&self) -> Option { + Some(self.span()) + } + + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let id = self.to_string(); + if let Some(id) = id.strip_prefix("r#") { + fmt::Display::fmt(id, f) + } else { + fmt::Display::fmt(&id[..], f) + } + } +} + +impl IdentFragment for Cow<'_, T> +where + T: IdentFragment + ToOwned + ?Sized, +{ + fn span(&self) -> Option { + T::span(self) + } + + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + T::fmt(self, f) + } +} + +// Limited set of types which this is implemented for, as we want to avoid types +// which will often include non-identifier characters in their `Display` impl. +macro_rules! ident_fragment_display { + ($($T:ty),*) => { + $( + impl IdentFragment for $T { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self, f) + } + } + )* + }; +} + +ident_fragment_display!(bool, str, String, char); +ident_fragment_display!(u8, u16, u32, u64, u128, usize); diff --git a/vendor/quote/src/lib.rs b/vendor/quote/src/lib.rs new file mode 100644 index 0000000..8b97abd --- /dev/null +++ b/vendor/quote/src/lib.rs @@ -0,0 +1,1444 @@ +//! [![github]](https://github.com/dtolnay/quote) [![crates-io]](https://crates.io/crates/quote) [![docs-rs]](https://docs.rs/quote) +//! +//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github +//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs +//! +//!
+//! +//! This crate provides the [`quote!`] macro for turning Rust syntax tree data +//! structures into tokens of source code. +//! +//! [`quote!`]: macro.quote.html +//! +//! Procedural macros in Rust receive a stream of tokens as input, execute +//! arbitrary Rust code to determine how to manipulate those tokens, and produce +//! a stream of tokens to hand back to the compiler to compile into the caller's +//! crate. Quasi-quoting is a solution to one piece of that — producing +//! tokens to return to the compiler. +//! +//! The idea of quasi-quoting is that we write *code* that we treat as *data*. +//! Within the `quote!` macro, we can write what looks like code to our text +//! editor or IDE. We get all the benefits of the editor's brace matching, +//! syntax highlighting, indentation, and maybe autocompletion. But rather than +//! compiling that as code into the current crate, we can treat it as data, pass +//! it around, mutate it, and eventually hand it back to the compiler as tokens +//! to compile into the macro caller's crate. +//! +//! This crate is motivated by the procedural macro use case, but is a +//! general-purpose Rust quasi-quoting library and is not specific to procedural +//! macros. +//! +//! ```toml +//! [dependencies] +//! quote = "1.0" +//! ``` +//! +//!
+//! +//! # Example +//! +//! The following quasi-quoted block of code is something you might find in [a] +//! procedural macro having to do with data structure serialization. The `#var` +//! syntax performs interpolation of runtime variables into the quoted tokens. +//! Check out the documentation of the [`quote!`] macro for more detail about +//! the syntax. See also the [`quote_spanned!`] macro which is important for +//! implementing hygienic procedural macros. +//! +//! [a]: https://serde.rs/ +//! [`quote_spanned!`]: macro.quote_spanned.html +//! +//! ``` +//! # use quote::quote; +//! # +//! # let generics = ""; +//! # let where_clause = ""; +//! # let field_ty = ""; +//! # let item_ty = ""; +//! # let path = ""; +//! # let value = ""; +//! # +//! let tokens = quote! { +//! struct SerializeWith #generics #where_clause { +//! value: &'a #field_ty, +//! phantom: core::marker::PhantomData<#item_ty>, +//! } +//! +//! impl #generics serde::Serialize for SerializeWith #generics #where_clause { +//! fn serialize(&self, serializer: S) -> Result +//! where +//! S: serde::Serializer, +//! { +//! #path(self.value, serializer) +//! } +//! } +//! +//! SerializeWith { +//! value: #value, +//! phantom: core::marker::PhantomData::<#item_ty>, +//! } +//! }; +//! ``` +//! +//!
+//! +//! # Non-macro code generators +//! +//! When using `quote` in a build.rs or main.rs and writing the output out to a +//! file, consider having the code generator pass the tokens through +//! [prettyplease] before writing. This way if an error occurs in the generated +//! code it is convenient for a human to read and debug. +//! +//! [prettyplease]: https://github.com/dtolnay/prettyplease + +// Quote types in rustdoc of other crates get linked to here. +#![doc(html_root_url = "https://docs.rs/quote/1.0.35")] +#![allow( + clippy::doc_markdown, + clippy::missing_errors_doc, + clippy::missing_panics_doc, + clippy::module_name_repetitions, + // false positive https://github.com/rust-lang/rust-clippy/issues/6983 + clippy::wrong_self_convention, +)] + +extern crate alloc; + +#[cfg(feature = "proc-macro")] +extern crate proc_macro; + +mod ext; +mod format; +mod ident_fragment; +mod to_tokens; + +// Not public API. +#[doc(hidden)] +#[path = "runtime.rs"] +pub mod __private; + +pub use crate::ext::TokenStreamExt; +pub use crate::ident_fragment::IdentFragment; +pub use crate::to_tokens::ToTokens; + +// Not public API. +#[doc(hidden)] +pub mod spanned; + +/// The whole point. +/// +/// Performs variable interpolation against the input and produces it as +/// [`proc_macro2::TokenStream`]. +/// +/// Note: for returning tokens to the compiler in a procedural macro, use +/// `.into()` on the result to convert to [`proc_macro::TokenStream`]. +/// +/// [`TokenStream`]: https://docs.rs/proc-macro2/1.0/proc_macro2/struct.TokenStream.html +/// +///
+/// +/// # Interpolation +/// +/// Variable interpolation is done with `#var` (similar to `$var` in +/// `macro_rules!` macros). This grabs the `var` variable that is currently in +/// scope and inserts it in that location in the output tokens. Any type +/// implementing the [`ToTokens`] trait can be interpolated. This includes most +/// Rust primitive types as well as most of the syntax tree types from the [Syn] +/// crate. +/// +/// [`ToTokens`]: trait.ToTokens.html +/// [Syn]: https://github.com/dtolnay/syn +/// +/// Repetition is done using `#(...)*` or `#(...),*` again similar to +/// `macro_rules!`. This iterates through the elements of any variable +/// interpolated within the repetition and inserts a copy of the repetition body +/// for each one. The variables in an interpolation may be a `Vec`, slice, +/// `BTreeSet`, or any `Iterator`. +/// +/// - `#(#var)*` — no separators +/// - `#(#var),*` — the character before the asterisk is used as a separator +/// - `#( struct #var; )*` — the repetition can contain other tokens +/// - `#( #k => println!("{}", #v), )*` — even multiple interpolations +/// +///
+/// +/// # Hygiene +/// +/// Any interpolated tokens preserve the `Span` information provided by their +/// `ToTokens` implementation. Tokens that originate within the `quote!` +/// invocation are spanned with [`Span::call_site()`]. +/// +/// [`Span::call_site()`]: https://docs.rs/proc-macro2/1.0/proc_macro2/struct.Span.html#method.call_site +/// +/// A different span can be provided through the [`quote_spanned!`] macro. +/// +/// [`quote_spanned!`]: macro.quote_spanned.html +/// +///
+/// +/// # Return type +/// +/// The macro evaluates to an expression of type `proc_macro2::TokenStream`. +/// Meanwhile Rust procedural macros are expected to return the type +/// `proc_macro::TokenStream`. +/// +/// The difference between the two types is that `proc_macro` types are entirely +/// specific to procedural macros and cannot ever exist in code outside of a +/// procedural macro, while `proc_macro2` types may exist anywhere including +/// tests and non-macro code like main.rs and build.rs. This is why even the +/// procedural macro ecosystem is largely built around `proc_macro2`, because +/// that ensures the libraries are unit testable and accessible in non-macro +/// contexts. +/// +/// There is a [`From`]-conversion in both directions so returning the output of +/// `quote!` from a procedural macro usually looks like `tokens.into()` or +/// `proc_macro::TokenStream::from(tokens)`. +/// +/// [`From`]: https://doc.rust-lang.org/std/convert/trait.From.html +/// +///
+/// +/// # Examples +/// +/// ### Procedural macro +/// +/// The structure of a basic procedural macro is as follows. Refer to the [Syn] +/// crate for further useful guidance on using `quote!` as part of a procedural +/// macro. +/// +/// [Syn]: https://github.com/dtolnay/syn +/// +/// ``` +/// # #[cfg(any())] +/// extern crate proc_macro; +/// # extern crate proc_macro2; +/// +/// # #[cfg(any())] +/// use proc_macro::TokenStream; +/// # use proc_macro2::TokenStream; +/// use quote::quote; +/// +/// # const IGNORE_TOKENS: &'static str = stringify! { +/// #[proc_macro_derive(HeapSize)] +/// # }; +/// pub fn derive_heap_size(input: TokenStream) -> TokenStream { +/// // Parse the input and figure out what implementation to generate... +/// # const IGNORE_TOKENS: &'static str = stringify! { +/// let name = /* ... */; +/// let expr = /* ... */; +/// # }; +/// # +/// # let name = 0; +/// # let expr = 0; +/// +/// let expanded = quote! { +/// // The generated impl. +/// impl heapsize::HeapSize for #name { +/// fn heap_size_of_children(&self) -> usize { +/// #expr +/// } +/// } +/// }; +/// +/// // Hand the output tokens back to the compiler. +/// TokenStream::from(expanded) +/// } +/// ``` +/// +///


+/// +/// ### Combining quoted fragments +/// +/// Usually you don't end up constructing an entire final `TokenStream` in one +/// piece. Different parts may come from different helper functions. The tokens +/// produced by `quote!` themselves implement `ToTokens` and so can be +/// interpolated into later `quote!` invocations to build up a final result. +/// +/// ``` +/// # use quote::quote; +/// # +/// let type_definition = quote! {...}; +/// let methods = quote! {...}; +/// +/// let tokens = quote! { +/// #type_definition +/// #methods +/// }; +/// ``` +/// +///


+/// +/// ### Constructing identifiers +/// +/// Suppose we have an identifier `ident` which came from somewhere in a macro +/// input and we need to modify it in some way for the macro output. Let's +/// consider prepending the identifier with an underscore. +/// +/// Simply interpolating the identifier next to an underscore will not have the +/// behavior of concatenating them. The underscore and the identifier will +/// continue to be two separate tokens as if you had written `_ x`. +/// +/// ``` +/// # use proc_macro2::{self as syn, Span}; +/// # use quote::quote; +/// # +/// # let ident = syn::Ident::new("i", Span::call_site()); +/// # +/// // incorrect +/// quote! { +/// let mut _#ident = 0; +/// } +/// # ; +/// ``` +/// +/// The solution is to build a new identifier token with the correct value. As +/// this is such a common case, the [`format_ident!`] macro provides a +/// convenient utility for doing so correctly. +/// +/// ``` +/// # use proc_macro2::{Ident, Span}; +/// # use quote::{format_ident, quote}; +/// # +/// # let ident = Ident::new("i", Span::call_site()); +/// # +/// let varname = format_ident!("_{}", ident); +/// quote! { +/// let mut #varname = 0; +/// } +/// # ; +/// ``` +/// +/// Alternatively, the APIs provided by Syn and proc-macro2 can be used to +/// directly build the identifier. This is roughly equivalent to the above, but +/// will not handle `ident` being a raw identifier. +/// +/// ``` +/// # use proc_macro2::{self as syn, Span}; +/// # use quote::quote; +/// # +/// # let ident = syn::Ident::new("i", Span::call_site()); +/// # +/// let concatenated = format!("_{}", ident); +/// let varname = syn::Ident::new(&concatenated, ident.span()); +/// quote! { +/// let mut #varname = 0; +/// } +/// # ; +/// ``` +/// +///


+/// +/// ### Making method calls +/// +/// Let's say our macro requires some type specified in the macro input to have +/// a constructor called `new`. We have the type in a variable called +/// `field_type` of type `syn::Type` and want to invoke the constructor. +/// +/// ``` +/// # use quote::quote; +/// # +/// # let field_type = quote!(...); +/// # +/// // incorrect +/// quote! { +/// let value = #field_type::new(); +/// } +/// # ; +/// ``` +/// +/// This works only sometimes. If `field_type` is `String`, the expanded code +/// contains `String::new()` which is fine. But if `field_type` is something +/// like `Vec` then the expanded code is `Vec::new()` which is invalid +/// syntax. Ordinarily in handwritten Rust we would write `Vec::::new()` +/// but for macros often the following is more convenient. +/// +/// ``` +/// # use quote::quote; +/// # +/// # let field_type = quote!(...); +/// # +/// quote! { +/// let value = <#field_type>::new(); +/// } +/// # ; +/// ``` +/// +/// This expands to `>::new()` which behaves correctly. +/// +/// A similar pattern is appropriate for trait methods. +/// +/// ``` +/// # use quote::quote; +/// # +/// # let field_type = quote!(...); +/// # +/// quote! { +/// let value = <#field_type as core::default::Default>::default(); +/// } +/// # ; +/// ``` +/// +///


+/// +/// ### Interpolating text inside of doc comments +/// +/// Neither doc comments nor string literals get interpolation behavior in +/// quote: +/// +/// ```compile_fail +/// quote! { +/// /// try to interpolate: #ident +/// /// +/// /// ... +/// } +/// ``` +/// +/// ```compile_fail +/// quote! { +/// #[doc = "try to interpolate: #ident"] +/// } +/// ``` +/// +/// Instead the best way to build doc comments that involve variables is by +/// formatting the doc string literal outside of quote. +/// +/// ```rust +/// # use proc_macro2::{Ident, Span}; +/// # use quote::quote; +/// # +/// # const IGNORE: &str = stringify! { +/// let msg = format!(...); +/// # }; +/// # +/// # let ident = Ident::new("var", Span::call_site()); +/// # let msg = format!("try to interpolate: {}", ident); +/// quote! { +/// #[doc = #msg] +/// /// +/// /// ... +/// } +/// # ; +/// ``` +/// +///


+/// +/// ### Indexing into a tuple struct +/// +/// When interpolating indices of a tuple or tuple struct, we need them not to +/// appears suffixed as integer literals by interpolating them as [`syn::Index`] +/// instead. +/// +/// [`syn::Index`]: https://docs.rs/syn/2.0/syn/struct.Index.html +/// +/// ```compile_fail +/// let i = 0usize..self.fields.len(); +/// +/// // expands to 0 + self.0usize.heap_size() + self.1usize.heap_size() + ... +/// // which is not valid syntax +/// quote! { +/// 0 #( + self.#i.heap_size() )* +/// } +/// ``` +/// +/// ``` +/// # use proc_macro2::{Ident, TokenStream}; +/// # use quote::quote; +/// # +/// # mod syn { +/// # use proc_macro2::{Literal, TokenStream}; +/// # use quote::{ToTokens, TokenStreamExt}; +/// # +/// # pub struct Index(usize); +/// # +/// # impl From for Index { +/// # fn from(i: usize) -> Self { +/// # Index(i) +/// # } +/// # } +/// # +/// # impl ToTokens for Index { +/// # fn to_tokens(&self, tokens: &mut TokenStream) { +/// # tokens.append(Literal::usize_unsuffixed(self.0)); +/// # } +/// # } +/// # } +/// # +/// # struct Struct { +/// # fields: Vec, +/// # } +/// # +/// # impl Struct { +/// # fn example(&self) -> TokenStream { +/// let i = (0..self.fields.len()).map(syn::Index::from); +/// +/// // expands to 0 + self.0.heap_size() + self.1.heap_size() + ... +/// quote! { +/// 0 #( + self.#i.heap_size() )* +/// } +/// # } +/// # } +/// ``` +#[cfg(doc)] +#[macro_export] +macro_rules! quote { + ($($tt:tt)*) => { + ... + }; +} + +#[cfg(not(doc))] +#[macro_export] +macro_rules! quote { + () => { + $crate::__private::TokenStream::new() + }; + + // Special case rule for a single tt, for performance. + ($tt:tt) => {{ + let mut _s = $crate::__private::TokenStream::new(); + $crate::quote_token!{$tt _s} + _s + }}; + + // Special case rules for two tts, for performance. + (# $var:ident) => {{ + let mut _s = $crate::__private::TokenStream::new(); + $crate::ToTokens::to_tokens(&$var, &mut _s); + _s + }}; + ($tt1:tt $tt2:tt) => {{ + let mut _s = $crate::__private::TokenStream::new(); + $crate::quote_token!{$tt1 _s} + $crate::quote_token!{$tt2 _s} + _s + }}; + + // Rule for any other number of tokens. + ($($tt:tt)*) => {{ + let mut _s = $crate::__private::TokenStream::new(); + $crate::quote_each_token!{_s $($tt)*} + _s + }}; +} + +/// Same as `quote!`, but applies a given span to all tokens originating within +/// the macro invocation. +/// +///
+/// +/// # Syntax +/// +/// A span expression of type [`Span`], followed by `=>`, followed by the tokens +/// to quote. The span expression should be brief — use a variable for +/// anything more than a few characters. There should be no space before the +/// `=>` token. +/// +/// [`Span`]: https://docs.rs/proc-macro2/1.0/proc_macro2/struct.Span.html +/// +/// ``` +/// # use proc_macro2::Span; +/// # use quote::quote_spanned; +/// # +/// # const IGNORE_TOKENS: &'static str = stringify! { +/// let span = /* ... */; +/// # }; +/// # let span = Span::call_site(); +/// # let init = 0; +/// +/// // On one line, use parentheses. +/// let tokens = quote_spanned!(span=> Box::into_raw(Box::new(#init))); +/// +/// // On multiple lines, place the span at the top and use braces. +/// let tokens = quote_spanned! {span=> +/// Box::into_raw(Box::new(#init)) +/// }; +/// ``` +/// +/// The lack of space before the `=>` should look jarring to Rust programmers +/// and this is intentional. The formatting is designed to be visibly +/// off-balance and draw the eye a particular way, due to the span expression +/// being evaluated in the context of the procedural macro and the remaining +/// tokens being evaluated in the generated code. +/// +///
+/// +/// # Hygiene +/// +/// Any interpolated tokens preserve the `Span` information provided by their +/// `ToTokens` implementation. Tokens that originate within the `quote_spanned!` +/// invocation are spanned with the given span argument. +/// +///
+/// +/// # Example +/// +/// The following procedural macro code uses `quote_spanned!` to assert that a +/// particular Rust type implements the [`Sync`] trait so that references can be +/// safely shared between threads. +/// +/// [`Sync`]: https://doc.rust-lang.org/std/marker/trait.Sync.html +/// +/// ``` +/// # use quote::{quote_spanned, TokenStreamExt, ToTokens}; +/// # use proc_macro2::{Span, TokenStream}; +/// # +/// # struct Type; +/// # +/// # impl Type { +/// # fn span(&self) -> Span { +/// # Span::call_site() +/// # } +/// # } +/// # +/// # impl ToTokens for Type { +/// # fn to_tokens(&self, _tokens: &mut TokenStream) {} +/// # } +/// # +/// # let ty = Type; +/// # let call_site = Span::call_site(); +/// # +/// let ty_span = ty.span(); +/// let assert_sync = quote_spanned! {ty_span=> +/// struct _AssertSync where #ty: Sync; +/// }; +/// ``` +/// +/// If the assertion fails, the user will see an error like the following. The +/// input span of their type is highlighted in the error. +/// +/// ```text +/// error[E0277]: the trait bound `*const (): std::marker::Sync` is not satisfied +/// --> src/main.rs:10:21 +/// | +/// 10 | static ref PTR: *const () = &(); +/// | ^^^^^^^^^ `*const ()` cannot be shared between threads safely +/// ``` +/// +/// In this example it is important for the where-clause to be spanned with the +/// line/column information of the user's input type so that error messages are +/// placed appropriately by the compiler. +#[cfg(doc)] +#[macro_export] +macro_rules! quote_spanned { + ($span:expr=> $($tt:tt)*) => { + ... + }; +} + +#[cfg(not(doc))] +#[macro_export] +macro_rules! quote_spanned { + ($span:expr=>) => {{ + let _: $crate::__private::Span = $crate::__private::get_span($span).__into_span(); + $crate::__private::TokenStream::new() + }}; + + // Special case rule for a single tt, for performance. + ($span:expr=> $tt:tt) => {{ + let mut _s = $crate::__private::TokenStream::new(); + let _span: $crate::__private::Span = $crate::__private::get_span($span).__into_span(); + $crate::quote_token_spanned!{$tt _s _span} + _s + }}; + + // Special case rules for two tts, for performance. + ($span:expr=> # $var:ident) => {{ + let mut _s = $crate::__private::TokenStream::new(); + let _: $crate::__private::Span = $crate::__private::get_span($span).__into_span(); + $crate::ToTokens::to_tokens(&$var, &mut _s); + _s + }}; + ($span:expr=> $tt1:tt $tt2:tt) => {{ + let mut _s = $crate::__private::TokenStream::new(); + let _span: $crate::__private::Span = $crate::__private::get_span($span).__into_span(); + $crate::quote_token_spanned!{$tt1 _s _span} + $crate::quote_token_spanned!{$tt2 _s _span} + _s + }}; + + // Rule for any other number of tokens. + ($span:expr=> $($tt:tt)*) => {{ + let mut _s = $crate::__private::TokenStream::new(); + let _span: $crate::__private::Span = $crate::__private::get_span($span).__into_span(); + $crate::quote_each_token_spanned!{_s _span $($tt)*} + _s + }}; +} + +// Extract the names of all #metavariables and pass them to the $call macro. +// +// in: pounded_var_names!(then!(...) a #b c #( #d )* #e) +// out: then!(... b); +// then!(... d); +// then!(... e); +#[macro_export] +#[doc(hidden)] +macro_rules! pounded_var_names { + ($call:ident! $extra:tt $($tts:tt)*) => { + $crate::pounded_var_names_with_context!{$call! $extra + (@ $($tts)*) + ($($tts)* @) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! pounded_var_names_with_context { + ($call:ident! $extra:tt ($($b1:tt)*) ($($curr:tt)*)) => { + $( + $crate::pounded_var_with_context!{$call! $extra $b1 $curr} + )* + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! pounded_var_with_context { + ($call:ident! $extra:tt $b1:tt ( $($inner:tt)* )) => { + $crate::pounded_var_names!{$call! $extra $($inner)*} + }; + + ($call:ident! $extra:tt $b1:tt [ $($inner:tt)* ]) => { + $crate::pounded_var_names!{$call! $extra $($inner)*} + }; + + ($call:ident! $extra:tt $b1:tt { $($inner:tt)* }) => { + $crate::pounded_var_names!{$call! $extra $($inner)*} + }; + + ($call:ident!($($extra:tt)*) # $var:ident) => { + $crate::$call!($($extra)* $var); + }; + + ($call:ident! $extra:tt $b1:tt $curr:tt) => {}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! quote_bind_into_iter { + ($has_iter:ident $var:ident) => { + // `mut` may be unused if $var occurs multiple times in the list. + #[allow(unused_mut)] + let (mut $var, i) = $var.quote_into_iter(); + let $has_iter = $has_iter | i; + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! quote_bind_next_or_break { + ($var:ident) => { + let $var = match $var.next() { + Some(_x) => $crate::__private::RepInterp(_x), + None => break, + }; + }; +} + +// The obvious way to write this macro is as a tt muncher. This implementation +// does something more complex for two reasons. +// +// - With a tt muncher it's easy to hit Rust's built-in recursion_limit, which +// this implementation avoids because it isn't tail recursive. +// +// - Compile times for a tt muncher are quadratic relative to the length of +// the input. This implementation is linear, so it will be faster +// (potentially much faster) for big inputs. However, the constant factors +// of this implementation are higher than that of a tt muncher, so it is +// somewhat slower than a tt muncher if there are many invocations with +// short inputs. +// +// An invocation like this: +// +// quote_each_token!(_s a b c d e f g h i j); +// +// expands to this: +// +// quote_tokens_with_context!(_s +// (@ @ @ @ @ @ a b c d e f g h i j) +// (@ @ @ @ @ a b c d e f g h i j @) +// (@ @ @ @ a b c d e f g h i j @ @) +// (@ @ @ (a) (b) (c) (d) (e) (f) (g) (h) (i) (j) @ @ @) +// (@ @ a b c d e f g h i j @ @ @ @) +// (@ a b c d e f g h i j @ @ @ @ @) +// (a b c d e f g h i j @ @ @ @ @ @) +// ); +// +// which gets transposed and expanded to this: +// +// quote_token_with_context!(_s @ @ @ @ @ @ a); +// quote_token_with_context!(_s @ @ @ @ @ a b); +// quote_token_with_context!(_s @ @ @ @ a b c); +// quote_token_with_context!(_s @ @ @ (a) b c d); +// quote_token_with_context!(_s @ @ a (b) c d e); +// quote_token_with_context!(_s @ a b (c) d e f); +// quote_token_with_context!(_s a b c (d) e f g); +// quote_token_with_context!(_s b c d (e) f g h); +// quote_token_with_context!(_s c d e (f) g h i); +// quote_token_with_context!(_s d e f (g) h i j); +// quote_token_with_context!(_s e f g (h) i j @); +// quote_token_with_context!(_s f g h (i) j @ @); +// quote_token_with_context!(_s g h i (j) @ @ @); +// quote_token_with_context!(_s h i j @ @ @ @); +// quote_token_with_context!(_s i j @ @ @ @ @); +// quote_token_with_context!(_s j @ @ @ @ @ @); +// +// Without having used muncher-style recursion, we get one invocation of +// quote_token_with_context for each original tt, with three tts of context on +// either side. This is enough for the longest possible interpolation form (a +// repetition with separator, as in `# (#var) , *`) to be fully represented with +// the first or last tt in the middle. +// +// The middle tt (surrounded by parentheses) is the tt being processed. +// +// - When it is a `#`, quote_token_with_context can do an interpolation. The +// interpolation kind will depend on the three subsequent tts. +// +// - When it is within a later part of an interpolation, it can be ignored +// because the interpolation has already been done. +// +// - When it is not part of an interpolation it can be pushed as a single +// token into the output. +// +// - When the middle token is an unparenthesized `@`, that call is one of the +// first 3 or last 3 calls of quote_token_with_context and does not +// correspond to one of the original input tokens, so turns into nothing. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_each_token { + ($tokens:ident $($tts:tt)*) => { + $crate::quote_tokens_with_context!{$tokens + (@ @ @ @ @ @ $($tts)*) + (@ @ @ @ @ $($tts)* @) + (@ @ @ @ $($tts)* @ @) + (@ @ @ $(($tts))* @ @ @) + (@ @ $($tts)* @ @ @ @) + (@ $($tts)* @ @ @ @ @) + ($($tts)* @ @ @ @ @ @) + } + }; +} + +// See the explanation on quote_each_token. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_each_token_spanned { + ($tokens:ident $span:ident $($tts:tt)*) => { + $crate::quote_tokens_with_context_spanned!{$tokens $span + (@ @ @ @ @ @ $($tts)*) + (@ @ @ @ @ $($tts)* @) + (@ @ @ @ $($tts)* @ @) + (@ @ @ $(($tts))* @ @ @) + (@ @ $($tts)* @ @ @ @) + (@ $($tts)* @ @ @ @ @) + ($($tts)* @ @ @ @ @ @) + } + }; +} + +// See the explanation on quote_each_token. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_tokens_with_context { + ($tokens:ident + ($($b3:tt)*) ($($b2:tt)*) ($($b1:tt)*) + ($($curr:tt)*) + ($($a1:tt)*) ($($a2:tt)*) ($($a3:tt)*) + ) => { + $( + $crate::quote_token_with_context!{$tokens $b3 $b2 $b1 $curr $a1 $a2 $a3} + )* + }; +} + +// See the explanation on quote_each_token. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_tokens_with_context_spanned { + ($tokens:ident $span:ident + ($($b3:tt)*) ($($b2:tt)*) ($($b1:tt)*) + ($($curr:tt)*) + ($($a1:tt)*) ($($a2:tt)*) ($($a3:tt)*) + ) => { + $( + $crate::quote_token_with_context_spanned!{$tokens $span $b3 $b2 $b1 $curr $a1 $a2 $a3} + )* + }; +} + +// See the explanation on quote_each_token. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_token_with_context { + // Unparenthesized `@` indicates this call does not correspond to one of the + // original input tokens. Ignore it. + ($tokens:ident $b3:tt $b2:tt $b1:tt @ $a1:tt $a2:tt $a3:tt) => {}; + + // A repetition with no separator. + ($tokens:ident $b3:tt $b2:tt $b1:tt (#) ( $($inner:tt)* ) * $a3:tt) => {{ + use $crate::__private::ext::*; + let has_iter = $crate::__private::ThereIsNoIteratorInRepetition; + $crate::pounded_var_names!{quote_bind_into_iter!(has_iter) () $($inner)*} + let _: $crate::__private::HasIterator = has_iter; + // This is `while true` instead of `loop` because if there are no + // iterators used inside of this repetition then the body would not + // contain any `break`, so the compiler would emit unreachable code + // warnings on anything below the loop. We use has_iter to detect and + // fail to compile when there are no iterators, so here we just work + // around the unneeded extra warning. + while true { + $crate::pounded_var_names!{quote_bind_next_or_break!() () $($inner)*} + $crate::quote_each_token!{$tokens $($inner)*} + } + }}; + // ... and one step later. + ($tokens:ident $b3:tt $b2:tt # (( $($inner:tt)* )) * $a2:tt $a3:tt) => {}; + // ... and one step later. + ($tokens:ident $b3:tt # ( $($inner:tt)* ) (*) $a1:tt $a2:tt $a3:tt) => {}; + + // A repetition with separator. + ($tokens:ident $b3:tt $b2:tt $b1:tt (#) ( $($inner:tt)* ) $sep:tt *) => {{ + use $crate::__private::ext::*; + let mut _i = 0usize; + let has_iter = $crate::__private::ThereIsNoIteratorInRepetition; + $crate::pounded_var_names!{quote_bind_into_iter!(has_iter) () $($inner)*} + let _: $crate::__private::HasIterator = has_iter; + while true { + $crate::pounded_var_names!{quote_bind_next_or_break!() () $($inner)*} + if _i > 0 { + $crate::quote_token!{$sep $tokens} + } + _i += 1; + $crate::quote_each_token!{$tokens $($inner)*} + } + }}; + // ... and one step later. + ($tokens:ident $b3:tt $b2:tt # (( $($inner:tt)* )) $sep:tt * $a3:tt) => {}; + // ... and one step later. + ($tokens:ident $b3:tt # ( $($inner:tt)* ) ($sep:tt) * $a2:tt $a3:tt) => {}; + // (A special case for `#(var)**`, where the first `*` is treated as the + // repetition symbol and the second `*` is treated as an ordinary token.) + ($tokens:ident # ( $($inner:tt)* ) * (*) $a1:tt $a2:tt $a3:tt) => { + // https://github.com/dtolnay/quote/issues/130 + $crate::quote_token!{* $tokens} + }; + // ... and one step later. + ($tokens:ident # ( $($inner:tt)* ) $sep:tt (*) $a1:tt $a2:tt $a3:tt) => {}; + + // A non-repetition interpolation. + ($tokens:ident $b3:tt $b2:tt $b1:tt (#) $var:ident $a2:tt $a3:tt) => { + $crate::ToTokens::to_tokens(&$var, &mut $tokens); + }; + // ... and one step later. + ($tokens:ident $b3:tt $b2:tt # ($var:ident) $a1:tt $a2:tt $a3:tt) => {}; + + // An ordinary token, not part of any interpolation. + ($tokens:ident $b3:tt $b2:tt $b1:tt ($curr:tt) $a1:tt $a2:tt $a3:tt) => { + $crate::quote_token!{$curr $tokens} + }; +} + +// See the explanation on quote_each_token, and on the individual rules of +// quote_token_with_context. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_token_with_context_spanned { + ($tokens:ident $span:ident $b3:tt $b2:tt $b1:tt @ $a1:tt $a2:tt $a3:tt) => {}; + + ($tokens:ident $span:ident $b3:tt $b2:tt $b1:tt (#) ( $($inner:tt)* ) * $a3:tt) => {{ + use $crate::__private::ext::*; + let has_iter = $crate::__private::ThereIsNoIteratorInRepetition; + $crate::pounded_var_names!{quote_bind_into_iter!(has_iter) () $($inner)*} + let _: $crate::__private::HasIterator = has_iter; + while true { + $crate::pounded_var_names!{quote_bind_next_or_break!() () $($inner)*} + $crate::quote_each_token_spanned!{$tokens $span $($inner)*} + } + }}; + ($tokens:ident $span:ident $b3:tt $b2:tt # (( $($inner:tt)* )) * $a2:tt $a3:tt) => {}; + ($tokens:ident $span:ident $b3:tt # ( $($inner:tt)* ) (*) $a1:tt $a2:tt $a3:tt) => {}; + + ($tokens:ident $span:ident $b3:tt $b2:tt $b1:tt (#) ( $($inner:tt)* ) $sep:tt *) => {{ + use $crate::__private::ext::*; + let mut _i = 0usize; + let has_iter = $crate::__private::ThereIsNoIteratorInRepetition; + $crate::pounded_var_names!{quote_bind_into_iter!(has_iter) () $($inner)*} + let _: $crate::__private::HasIterator = has_iter; + while true { + $crate::pounded_var_names!{quote_bind_next_or_break!() () $($inner)*} + if _i > 0 { + $crate::quote_token_spanned!{$sep $tokens $span} + } + _i += 1; + $crate::quote_each_token_spanned!{$tokens $span $($inner)*} + } + }}; + ($tokens:ident $span:ident $b3:tt $b2:tt # (( $($inner:tt)* )) $sep:tt * $a3:tt) => {}; + ($tokens:ident $span:ident $b3:tt # ( $($inner:tt)* ) ($sep:tt) * $a2:tt $a3:tt) => {}; + ($tokens:ident $span:ident # ( $($inner:tt)* ) * (*) $a1:tt $a2:tt $a3:tt) => { + // https://github.com/dtolnay/quote/issues/130 + $crate::quote_token_spanned!{* $tokens $span} + }; + ($tokens:ident $span:ident # ( $($inner:tt)* ) $sep:tt (*) $a1:tt $a2:tt $a3:tt) => {}; + + ($tokens:ident $span:ident $b3:tt $b2:tt $b1:tt (#) $var:ident $a2:tt $a3:tt) => { + $crate::ToTokens::to_tokens(&$var, &mut $tokens); + }; + ($tokens:ident $span:ident $b3:tt $b2:tt # ($var:ident) $a1:tt $a2:tt $a3:tt) => {}; + + ($tokens:ident $span:ident $b3:tt $b2:tt $b1:tt ($curr:tt) $a1:tt $a2:tt $a3:tt) => { + $crate::quote_token_spanned!{$curr $tokens $span} + }; +} + +// These rules are ordered by approximate token frequency, at least for the +// first 10 or so, to improve compile times. Having `ident` first is by far the +// most important because it's typically 2-3x more common than the next most +// common token. +// +// Separately, we put the token being matched in the very front so that failing +// rules may fail to match as quickly as possible. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_token { + ($ident:ident $tokens:ident) => { + $crate::__private::push_ident(&mut $tokens, stringify!($ident)); + }; + + (:: $tokens:ident) => { + $crate::__private::push_colon2(&mut $tokens); + }; + + (( $($inner:tt)* ) $tokens:ident) => { + $crate::__private::push_group( + &mut $tokens, + $crate::__private::Delimiter::Parenthesis, + $crate::quote!($($inner)*), + ); + }; + + ([ $($inner:tt)* ] $tokens:ident) => { + $crate::__private::push_group( + &mut $tokens, + $crate::__private::Delimiter::Bracket, + $crate::quote!($($inner)*), + ); + }; + + ({ $($inner:tt)* } $tokens:ident) => { + $crate::__private::push_group( + &mut $tokens, + $crate::__private::Delimiter::Brace, + $crate::quote!($($inner)*), + ); + }; + + (# $tokens:ident) => { + $crate::__private::push_pound(&mut $tokens); + }; + + (, $tokens:ident) => { + $crate::__private::push_comma(&mut $tokens); + }; + + (. $tokens:ident) => { + $crate::__private::push_dot(&mut $tokens); + }; + + (; $tokens:ident) => { + $crate::__private::push_semi(&mut $tokens); + }; + + (: $tokens:ident) => { + $crate::__private::push_colon(&mut $tokens); + }; + + (+ $tokens:ident) => { + $crate::__private::push_add(&mut $tokens); + }; + + (+= $tokens:ident) => { + $crate::__private::push_add_eq(&mut $tokens); + }; + + (& $tokens:ident) => { + $crate::__private::push_and(&mut $tokens); + }; + + (&& $tokens:ident) => { + $crate::__private::push_and_and(&mut $tokens); + }; + + (&= $tokens:ident) => { + $crate::__private::push_and_eq(&mut $tokens); + }; + + (@ $tokens:ident) => { + $crate::__private::push_at(&mut $tokens); + }; + + (! $tokens:ident) => { + $crate::__private::push_bang(&mut $tokens); + }; + + (^ $tokens:ident) => { + $crate::__private::push_caret(&mut $tokens); + }; + + (^= $tokens:ident) => { + $crate::__private::push_caret_eq(&mut $tokens); + }; + + (/ $tokens:ident) => { + $crate::__private::push_div(&mut $tokens); + }; + + (/= $tokens:ident) => { + $crate::__private::push_div_eq(&mut $tokens); + }; + + (.. $tokens:ident) => { + $crate::__private::push_dot2(&mut $tokens); + }; + + (... $tokens:ident) => { + $crate::__private::push_dot3(&mut $tokens); + }; + + (..= $tokens:ident) => { + $crate::__private::push_dot_dot_eq(&mut $tokens); + }; + + (= $tokens:ident) => { + $crate::__private::push_eq(&mut $tokens); + }; + + (== $tokens:ident) => { + $crate::__private::push_eq_eq(&mut $tokens); + }; + + (>= $tokens:ident) => { + $crate::__private::push_ge(&mut $tokens); + }; + + (> $tokens:ident) => { + $crate::__private::push_gt(&mut $tokens); + }; + + (<= $tokens:ident) => { + $crate::__private::push_le(&mut $tokens); + }; + + (< $tokens:ident) => { + $crate::__private::push_lt(&mut $tokens); + }; + + (*= $tokens:ident) => { + $crate::__private::push_mul_eq(&mut $tokens); + }; + + (!= $tokens:ident) => { + $crate::__private::push_ne(&mut $tokens); + }; + + (| $tokens:ident) => { + $crate::__private::push_or(&mut $tokens); + }; + + (|= $tokens:ident) => { + $crate::__private::push_or_eq(&mut $tokens); + }; + + (|| $tokens:ident) => { + $crate::__private::push_or_or(&mut $tokens); + }; + + (? $tokens:ident) => { + $crate::__private::push_question(&mut $tokens); + }; + + (-> $tokens:ident) => { + $crate::__private::push_rarrow(&mut $tokens); + }; + + (<- $tokens:ident) => { + $crate::__private::push_larrow(&mut $tokens); + }; + + (% $tokens:ident) => { + $crate::__private::push_rem(&mut $tokens); + }; + + (%= $tokens:ident) => { + $crate::__private::push_rem_eq(&mut $tokens); + }; + + (=> $tokens:ident) => { + $crate::__private::push_fat_arrow(&mut $tokens); + }; + + (<< $tokens:ident) => { + $crate::__private::push_shl(&mut $tokens); + }; + + (<<= $tokens:ident) => { + $crate::__private::push_shl_eq(&mut $tokens); + }; + + (>> $tokens:ident) => { + $crate::__private::push_shr(&mut $tokens); + }; + + (>>= $tokens:ident) => { + $crate::__private::push_shr_eq(&mut $tokens); + }; + + (* $tokens:ident) => { + $crate::__private::push_star(&mut $tokens); + }; + + (- $tokens:ident) => { + $crate::__private::push_sub(&mut $tokens); + }; + + (-= $tokens:ident) => { + $crate::__private::push_sub_eq(&mut $tokens); + }; + + ($lifetime:lifetime $tokens:ident) => { + $crate::__private::push_lifetime(&mut $tokens, stringify!($lifetime)); + }; + + (_ $tokens:ident) => { + $crate::__private::push_underscore(&mut $tokens); + }; + + ($other:tt $tokens:ident) => { + $crate::__private::parse(&mut $tokens, stringify!($other)); + }; +} + +// See the comment above `quote_token!` about the rule ordering. +#[macro_export] +#[doc(hidden)] +macro_rules! quote_token_spanned { + ($ident:ident $tokens:ident $span:ident) => { + $crate::__private::push_ident_spanned(&mut $tokens, $span, stringify!($ident)); + }; + + (:: $tokens:ident $span:ident) => { + $crate::__private::push_colon2_spanned(&mut $tokens, $span); + }; + + (( $($inner:tt)* ) $tokens:ident $span:ident) => { + $crate::__private::push_group_spanned( + &mut $tokens, + $span, + $crate::__private::Delimiter::Parenthesis, + $crate::quote_spanned!($span=> $($inner)*), + ); + }; + + ([ $($inner:tt)* ] $tokens:ident $span:ident) => { + $crate::__private::push_group_spanned( + &mut $tokens, + $span, + $crate::__private::Delimiter::Bracket, + $crate::quote_spanned!($span=> $($inner)*), + ); + }; + + ({ $($inner:tt)* } $tokens:ident $span:ident) => { + $crate::__private::push_group_spanned( + &mut $tokens, + $span, + $crate::__private::Delimiter::Brace, + $crate::quote_spanned!($span=> $($inner)*), + ); + }; + + (# $tokens:ident $span:ident) => { + $crate::__private::push_pound_spanned(&mut $tokens, $span); + }; + + (, $tokens:ident $span:ident) => { + $crate::__private::push_comma_spanned(&mut $tokens, $span); + }; + + (. $tokens:ident $span:ident) => { + $crate::__private::push_dot_spanned(&mut $tokens, $span); + }; + + (; $tokens:ident $span:ident) => { + $crate::__private::push_semi_spanned(&mut $tokens, $span); + }; + + (: $tokens:ident $span:ident) => { + $crate::__private::push_colon_spanned(&mut $tokens, $span); + }; + + (+ $tokens:ident $span:ident) => { + $crate::__private::push_add_spanned(&mut $tokens, $span); + }; + + (+= $tokens:ident $span:ident) => { + $crate::__private::push_add_eq_spanned(&mut $tokens, $span); + }; + + (& $tokens:ident $span:ident) => { + $crate::__private::push_and_spanned(&mut $tokens, $span); + }; + + (&& $tokens:ident $span:ident) => { + $crate::__private::push_and_and_spanned(&mut $tokens, $span); + }; + + (&= $tokens:ident $span:ident) => { + $crate::__private::push_and_eq_spanned(&mut $tokens, $span); + }; + + (@ $tokens:ident $span:ident) => { + $crate::__private::push_at_spanned(&mut $tokens, $span); + }; + + (! $tokens:ident $span:ident) => { + $crate::__private::push_bang_spanned(&mut $tokens, $span); + }; + + (^ $tokens:ident $span:ident) => { + $crate::__private::push_caret_spanned(&mut $tokens, $span); + }; + + (^= $tokens:ident $span:ident) => { + $crate::__private::push_caret_eq_spanned(&mut $tokens, $span); + }; + + (/ $tokens:ident $span:ident) => { + $crate::__private::push_div_spanned(&mut $tokens, $span); + }; + + (/= $tokens:ident $span:ident) => { + $crate::__private::push_div_eq_spanned(&mut $tokens, $span); + }; + + (.. $tokens:ident $span:ident) => { + $crate::__private::push_dot2_spanned(&mut $tokens, $span); + }; + + (... $tokens:ident $span:ident) => { + $crate::__private::push_dot3_spanned(&mut $tokens, $span); + }; + + (..= $tokens:ident $span:ident) => { + $crate::__private::push_dot_dot_eq_spanned(&mut $tokens, $span); + }; + + (= $tokens:ident $span:ident) => { + $crate::__private::push_eq_spanned(&mut $tokens, $span); + }; + + (== $tokens:ident $span:ident) => { + $crate::__private::push_eq_eq_spanned(&mut $tokens, $span); + }; + + (>= $tokens:ident $span:ident) => { + $crate::__private::push_ge_spanned(&mut $tokens, $span); + }; + + (> $tokens:ident $span:ident) => { + $crate::__private::push_gt_spanned(&mut $tokens, $span); + }; + + (<= $tokens:ident $span:ident) => { + $crate::__private::push_le_spanned(&mut $tokens, $span); + }; + + (< $tokens:ident $span:ident) => { + $crate::__private::push_lt_spanned(&mut $tokens, $span); + }; + + (*= $tokens:ident $span:ident) => { + $crate::__private::push_mul_eq_spanned(&mut $tokens, $span); + }; + + (!= $tokens:ident $span:ident) => { + $crate::__private::push_ne_spanned(&mut $tokens, $span); + }; + + (| $tokens:ident $span:ident) => { + $crate::__private::push_or_spanned(&mut $tokens, $span); + }; + + (|= $tokens:ident $span:ident) => { + $crate::__private::push_or_eq_spanned(&mut $tokens, $span); + }; + + (|| $tokens:ident $span:ident) => { + $crate::__private::push_or_or_spanned(&mut $tokens, $span); + }; + + (? $tokens:ident $span:ident) => { + $crate::__private::push_question_spanned(&mut $tokens, $span); + }; + + (-> $tokens:ident $span:ident) => { + $crate::__private::push_rarrow_spanned(&mut $tokens, $span); + }; + + (<- $tokens:ident $span:ident) => { + $crate::__private::push_larrow_spanned(&mut $tokens, $span); + }; + + (% $tokens:ident $span:ident) => { + $crate::__private::push_rem_spanned(&mut $tokens, $span); + }; + + (%= $tokens:ident $span:ident) => { + $crate::__private::push_rem_eq_spanned(&mut $tokens, $span); + }; + + (=> $tokens:ident $span:ident) => { + $crate::__private::push_fat_arrow_spanned(&mut $tokens, $span); + }; + + (<< $tokens:ident $span:ident) => { + $crate::__private::push_shl_spanned(&mut $tokens, $span); + }; + + (<<= $tokens:ident $span:ident) => { + $crate::__private::push_shl_eq_spanned(&mut $tokens, $span); + }; + + (>> $tokens:ident $span:ident) => { + $crate::__private::push_shr_spanned(&mut $tokens, $span); + }; + + (>>= $tokens:ident $span:ident) => { + $crate::__private::push_shr_eq_spanned(&mut $tokens, $span); + }; + + (* $tokens:ident $span:ident) => { + $crate::__private::push_star_spanned(&mut $tokens, $span); + }; + + (- $tokens:ident $span:ident) => { + $crate::__private::push_sub_spanned(&mut $tokens, $span); + }; + + (-= $tokens:ident $span:ident) => { + $crate::__private::push_sub_eq_spanned(&mut $tokens, $span); + }; + + ($lifetime:lifetime $tokens:ident $span:ident) => { + $crate::__private::push_lifetime_spanned(&mut $tokens, $span, stringify!($lifetime)); + }; + + (_ $tokens:ident $span:ident) => { + $crate::__private::push_underscore_spanned(&mut $tokens, $span); + }; + + ($other:tt $tokens:ident $span:ident) => { + $crate::__private::parse_spanned(&mut $tokens, $span, stringify!($other)); + }; +} diff --git a/vendor/quote/src/runtime.rs b/vendor/quote/src/runtime.rs new file mode 100644 index 0000000..eff044a --- /dev/null +++ b/vendor/quote/src/runtime.rs @@ -0,0 +1,530 @@ +use self::get_span::{GetSpan, GetSpanBase, GetSpanInner}; +use crate::{IdentFragment, ToTokens, TokenStreamExt}; +use core::fmt; +use core::iter; +use core::ops::BitOr; +use proc_macro2::{Group, Ident, Punct, Spacing, TokenTree}; + +#[doc(hidden)] +pub use alloc::format; +#[doc(hidden)] +pub use core::option::Option; + +#[doc(hidden)] +pub type Delimiter = proc_macro2::Delimiter; +#[doc(hidden)] +pub type Span = proc_macro2::Span; +#[doc(hidden)] +pub type TokenStream = proc_macro2::TokenStream; + +#[doc(hidden)] +pub struct HasIterator; // True +#[doc(hidden)] +pub struct ThereIsNoIteratorInRepetition; // False + +impl BitOr for ThereIsNoIteratorInRepetition { + type Output = ThereIsNoIteratorInRepetition; + fn bitor(self, _rhs: ThereIsNoIteratorInRepetition) -> ThereIsNoIteratorInRepetition { + ThereIsNoIteratorInRepetition + } +} + +impl BitOr for HasIterator { + type Output = HasIterator; + fn bitor(self, _rhs: ThereIsNoIteratorInRepetition) -> HasIterator { + HasIterator + } +} + +impl BitOr for ThereIsNoIteratorInRepetition { + type Output = HasIterator; + fn bitor(self, _rhs: HasIterator) -> HasIterator { + HasIterator + } +} + +impl BitOr for HasIterator { + type Output = HasIterator; + fn bitor(self, _rhs: HasIterator) -> HasIterator { + HasIterator + } +} + +/// Extension traits used by the implementation of `quote!`. These are defined +/// in separate traits, rather than as a single trait due to ambiguity issues. +/// +/// These traits expose a `quote_into_iter` method which should allow calling +/// whichever impl happens to be applicable. Calling that method repeatedly on +/// the returned value should be idempotent. +#[doc(hidden)] +pub mod ext { + use super::RepInterp; + use super::{HasIterator as HasIter, ThereIsNoIteratorInRepetition as DoesNotHaveIter}; + use crate::ToTokens; + use alloc::collections::btree_set::{self, BTreeSet}; + use core::slice; + + /// Extension trait providing the `quote_into_iter` method on iterators. + #[doc(hidden)] + pub trait RepIteratorExt: Iterator + Sized { + fn quote_into_iter(self) -> (Self, HasIter) { + (self, HasIter) + } + } + + impl RepIteratorExt for T {} + + /// Extension trait providing the `quote_into_iter` method for + /// non-iterable types. These types interpolate the same value in each + /// iteration of the repetition. + #[doc(hidden)] + pub trait RepToTokensExt { + /// Pretend to be an iterator for the purposes of `quote_into_iter`. + /// This allows repeated calls to `quote_into_iter` to continue + /// correctly returning DoesNotHaveIter. + fn next(&self) -> Option<&Self> { + Some(self) + } + + fn quote_into_iter(&self) -> (&Self, DoesNotHaveIter) { + (self, DoesNotHaveIter) + } + } + + impl RepToTokensExt for T {} + + /// Extension trait providing the `quote_into_iter` method for types that + /// can be referenced as an iterator. + #[doc(hidden)] + pub trait RepAsIteratorExt<'q> { + type Iter: Iterator; + + fn quote_into_iter(&'q self) -> (Self::Iter, HasIter); + } + + impl<'q, 'a, T: RepAsIteratorExt<'q> + ?Sized> RepAsIteratorExt<'q> for &'a T { + type Iter = T::Iter; + + fn quote_into_iter(&'q self) -> (Self::Iter, HasIter) { + ::quote_into_iter(*self) + } + } + + impl<'q, 'a, T: RepAsIteratorExt<'q> + ?Sized> RepAsIteratorExt<'q> for &'a mut T { + type Iter = T::Iter; + + fn quote_into_iter(&'q self) -> (Self::Iter, HasIter) { + ::quote_into_iter(*self) + } + } + + impl<'q, T: 'q> RepAsIteratorExt<'q> for [T] { + type Iter = slice::Iter<'q, T>; + + fn quote_into_iter(&'q self) -> (Self::Iter, HasIter) { + (self.iter(), HasIter) + } + } + + impl<'q, T: 'q> RepAsIteratorExt<'q> for Vec { + type Iter = slice::Iter<'q, T>; + + fn quote_into_iter(&'q self) -> (Self::Iter, HasIter) { + (self.iter(), HasIter) + } + } + + impl<'q, T: 'q> RepAsIteratorExt<'q> for BTreeSet { + type Iter = btree_set::Iter<'q, T>; + + fn quote_into_iter(&'q self) -> (Self::Iter, HasIter) { + (self.iter(), HasIter) + } + } + + impl<'q, T: RepAsIteratorExt<'q>> RepAsIteratorExt<'q> for RepInterp { + type Iter = T::Iter; + + fn quote_into_iter(&'q self) -> (Self::Iter, HasIter) { + self.0.quote_into_iter() + } + } +} + +// Helper type used within interpolations to allow for repeated binding names. +// Implements the relevant traits, and exports a dummy `next()` method. +#[derive(Copy, Clone)] +#[doc(hidden)] +pub struct RepInterp(pub T); + +impl RepInterp { + // This method is intended to look like `Iterator::next`, and is called when + // a name is bound multiple times, as the previous binding will shadow the + // original `Iterator` object. This allows us to avoid advancing the + // iterator multiple times per iteration. + pub fn next(self) -> Option { + Some(self.0) + } +} + +impl Iterator for RepInterp { + type Item = T::Item; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl ToTokens for RepInterp { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.0.to_tokens(tokens); + } +} + +#[doc(hidden)] +#[inline] +pub fn get_span(span: T) -> GetSpan { + GetSpan(GetSpanInner(GetSpanBase(span))) +} + +mod get_span { + use core::ops::Deref; + use proc_macro2::extra::DelimSpan; + use proc_macro2::Span; + + pub struct GetSpan(pub(crate) GetSpanInner); + + pub struct GetSpanInner(pub(crate) GetSpanBase); + + pub struct GetSpanBase(pub(crate) T); + + impl GetSpan { + #[inline] + pub fn __into_span(self) -> Span { + ((self.0).0).0 + } + } + + impl GetSpanInner { + #[inline] + pub fn __into_span(&self) -> Span { + (self.0).0.join() + } + } + + impl GetSpanBase { + #[allow(clippy::unused_self)] + pub fn __into_span(&self) -> T { + unreachable!() + } + } + + impl Deref for GetSpan { + type Target = GetSpanInner; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl Deref for GetSpanInner { + type Target = GetSpanBase; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } + } +} + +#[doc(hidden)] +pub fn push_group(tokens: &mut TokenStream, delimiter: Delimiter, inner: TokenStream) { + tokens.append(Group::new(delimiter, inner)); +} + +#[doc(hidden)] +pub fn push_group_spanned( + tokens: &mut TokenStream, + span: Span, + delimiter: Delimiter, + inner: TokenStream, +) { + let mut g = Group::new(delimiter, inner); + g.set_span(span); + tokens.append(g); +} + +#[doc(hidden)] +pub fn parse(tokens: &mut TokenStream, s: &str) { + let s: TokenStream = s.parse().expect("invalid token stream"); + tokens.extend(iter::once(s)); +} + +#[doc(hidden)] +pub fn parse_spanned(tokens: &mut TokenStream, span: Span, s: &str) { + let s: TokenStream = s.parse().expect("invalid token stream"); + tokens.extend(s.into_iter().map(|t| respan_token_tree(t, span))); +} + +// Token tree with every span replaced by the given one. +fn respan_token_tree(mut token: TokenTree, span: Span) -> TokenTree { + match &mut token { + TokenTree::Group(g) => { + let stream = g + .stream() + .into_iter() + .map(|token| respan_token_tree(token, span)) + .collect(); + *g = Group::new(g.delimiter(), stream); + g.set_span(span); + } + other => other.set_span(span), + } + token +} + +#[doc(hidden)] +pub fn push_ident(tokens: &mut TokenStream, s: &str) { + let span = Span::call_site(); + push_ident_spanned(tokens, span, s); +} + +#[doc(hidden)] +pub fn push_ident_spanned(tokens: &mut TokenStream, span: Span, s: &str) { + tokens.append(ident_maybe_raw(s, span)); +} + +#[doc(hidden)] +pub fn push_lifetime(tokens: &mut TokenStream, lifetime: &str) { + struct Lifetime<'a> { + name: &'a str, + state: u8, + } + + impl<'a> Iterator for Lifetime<'a> { + type Item = TokenTree; + + fn next(&mut self) -> Option { + match self.state { + 0 => { + self.state = 1; + Some(TokenTree::Punct(Punct::new('\'', Spacing::Joint))) + } + 1 => { + self.state = 2; + Some(TokenTree::Ident(Ident::new(self.name, Span::call_site()))) + } + _ => None, + } + } + } + + tokens.extend(Lifetime { + name: &lifetime[1..], + state: 0, + }); +} + +#[doc(hidden)] +pub fn push_lifetime_spanned(tokens: &mut TokenStream, span: Span, lifetime: &str) { + struct Lifetime<'a> { + name: &'a str, + span: Span, + state: u8, + } + + impl<'a> Iterator for Lifetime<'a> { + type Item = TokenTree; + + fn next(&mut self) -> Option { + match self.state { + 0 => { + self.state = 1; + let mut apostrophe = Punct::new('\'', Spacing::Joint); + apostrophe.set_span(self.span); + Some(TokenTree::Punct(apostrophe)) + } + 1 => { + self.state = 2; + Some(TokenTree::Ident(Ident::new(self.name, self.span))) + } + _ => None, + } + } + } + + tokens.extend(Lifetime { + name: &lifetime[1..], + span, + state: 0, + }); +} + +macro_rules! push_punct { + ($name:ident $spanned:ident $char1:tt) => { + #[doc(hidden)] + pub fn $name(tokens: &mut TokenStream) { + tokens.append(Punct::new($char1, Spacing::Alone)); + } + #[doc(hidden)] + pub fn $spanned(tokens: &mut TokenStream, span: Span) { + let mut punct = Punct::new($char1, Spacing::Alone); + punct.set_span(span); + tokens.append(punct); + } + }; + ($name:ident $spanned:ident $char1:tt $char2:tt) => { + #[doc(hidden)] + pub fn $name(tokens: &mut TokenStream) { + tokens.append(Punct::new($char1, Spacing::Joint)); + tokens.append(Punct::new($char2, Spacing::Alone)); + } + #[doc(hidden)] + pub fn $spanned(tokens: &mut TokenStream, span: Span) { + let mut punct = Punct::new($char1, Spacing::Joint); + punct.set_span(span); + tokens.append(punct); + let mut punct = Punct::new($char2, Spacing::Alone); + punct.set_span(span); + tokens.append(punct); + } + }; + ($name:ident $spanned:ident $char1:tt $char2:tt $char3:tt) => { + #[doc(hidden)] + pub fn $name(tokens: &mut TokenStream) { + tokens.append(Punct::new($char1, Spacing::Joint)); + tokens.append(Punct::new($char2, Spacing::Joint)); + tokens.append(Punct::new($char3, Spacing::Alone)); + } + #[doc(hidden)] + pub fn $spanned(tokens: &mut TokenStream, span: Span) { + let mut punct = Punct::new($char1, Spacing::Joint); + punct.set_span(span); + tokens.append(punct); + let mut punct = Punct::new($char2, Spacing::Joint); + punct.set_span(span); + tokens.append(punct); + let mut punct = Punct::new($char3, Spacing::Alone); + punct.set_span(span); + tokens.append(punct); + } + }; +} + +push_punct!(push_add push_add_spanned '+'); +push_punct!(push_add_eq push_add_eq_spanned '+' '='); +push_punct!(push_and push_and_spanned '&'); +push_punct!(push_and_and push_and_and_spanned '&' '&'); +push_punct!(push_and_eq push_and_eq_spanned '&' '='); +push_punct!(push_at push_at_spanned '@'); +push_punct!(push_bang push_bang_spanned '!'); +push_punct!(push_caret push_caret_spanned '^'); +push_punct!(push_caret_eq push_caret_eq_spanned '^' '='); +push_punct!(push_colon push_colon_spanned ':'); +push_punct!(push_colon2 push_colon2_spanned ':' ':'); +push_punct!(push_comma push_comma_spanned ','); +push_punct!(push_div push_div_spanned '/'); +push_punct!(push_div_eq push_div_eq_spanned '/' '='); +push_punct!(push_dot push_dot_spanned '.'); +push_punct!(push_dot2 push_dot2_spanned '.' '.'); +push_punct!(push_dot3 push_dot3_spanned '.' '.' '.'); +push_punct!(push_dot_dot_eq push_dot_dot_eq_spanned '.' '.' '='); +push_punct!(push_eq push_eq_spanned '='); +push_punct!(push_eq_eq push_eq_eq_spanned '=' '='); +push_punct!(push_ge push_ge_spanned '>' '='); +push_punct!(push_gt push_gt_spanned '>'); +push_punct!(push_le push_le_spanned '<' '='); +push_punct!(push_lt push_lt_spanned '<'); +push_punct!(push_mul_eq push_mul_eq_spanned '*' '='); +push_punct!(push_ne push_ne_spanned '!' '='); +push_punct!(push_or push_or_spanned '|'); +push_punct!(push_or_eq push_or_eq_spanned '|' '='); +push_punct!(push_or_or push_or_or_spanned '|' '|'); +push_punct!(push_pound push_pound_spanned '#'); +push_punct!(push_question push_question_spanned '?'); +push_punct!(push_rarrow push_rarrow_spanned '-' '>'); +push_punct!(push_larrow push_larrow_spanned '<' '-'); +push_punct!(push_rem push_rem_spanned '%'); +push_punct!(push_rem_eq push_rem_eq_spanned '%' '='); +push_punct!(push_fat_arrow push_fat_arrow_spanned '=' '>'); +push_punct!(push_semi push_semi_spanned ';'); +push_punct!(push_shl push_shl_spanned '<' '<'); +push_punct!(push_shl_eq push_shl_eq_spanned '<' '<' '='); +push_punct!(push_shr push_shr_spanned '>' '>'); +push_punct!(push_shr_eq push_shr_eq_spanned '>' '>' '='); +push_punct!(push_star push_star_spanned '*'); +push_punct!(push_sub push_sub_spanned '-'); +push_punct!(push_sub_eq push_sub_eq_spanned '-' '='); + +#[doc(hidden)] +pub fn push_underscore(tokens: &mut TokenStream) { + push_underscore_spanned(tokens, Span::call_site()); +} + +#[doc(hidden)] +pub fn push_underscore_spanned(tokens: &mut TokenStream, span: Span) { + tokens.append(Ident::new("_", span)); +} + +// Helper method for constructing identifiers from the `format_ident!` macro, +// handling `r#` prefixes. +#[doc(hidden)] +pub fn mk_ident(id: &str, span: Option) -> Ident { + let span = span.unwrap_or_else(Span::call_site); + ident_maybe_raw(id, span) +} + +fn ident_maybe_raw(id: &str, span: Span) -> Ident { + if let Some(id) = id.strip_prefix("r#") { + Ident::new_raw(id, span) + } else { + Ident::new(id, span) + } +} + +// Adapts from `IdentFragment` to `fmt::Display` for use by the `format_ident!` +// macro, and exposes span information from these fragments. +// +// This struct also has forwarding implementations of the formatting traits +// `Octal`, `LowerHex`, `UpperHex`, and `Binary` to allow for their use within +// `format_ident!`. +#[derive(Copy, Clone)] +#[doc(hidden)] +pub struct IdentFragmentAdapter(pub T); + +impl IdentFragmentAdapter { + pub fn span(&self) -> Option { + self.0.span() + } +} + +impl fmt::Display for IdentFragmentAdapter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + IdentFragment::fmt(&self.0, f) + } +} + +impl fmt::Octal for IdentFragmentAdapter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Octal::fmt(&self.0, f) + } +} + +impl fmt::LowerHex for IdentFragmentAdapter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::LowerHex::fmt(&self.0, f) + } +} + +impl fmt::UpperHex for IdentFragmentAdapter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::UpperHex::fmt(&self.0, f) + } +} + +impl fmt::Binary for IdentFragmentAdapter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Binary::fmt(&self.0, f) + } +} diff --git a/vendor/quote/src/spanned.rs b/vendor/quote/src/spanned.rs new file mode 100644 index 0000000..6eba644 --- /dev/null +++ b/vendor/quote/src/spanned.rs @@ -0,0 +1,50 @@ +use crate::ToTokens; +use proc_macro2::extra::DelimSpan; +use proc_macro2::{Span, TokenStream}; + +// Not public API other than via the syn crate. Use syn::spanned::Spanned. +pub trait Spanned: private::Sealed { + fn __span(&self) -> Span; +} + +impl Spanned for Span { + fn __span(&self) -> Span { + *self + } +} + +impl Spanned for DelimSpan { + fn __span(&self) -> Span { + self.join() + } +} + +impl Spanned for T { + fn __span(&self) -> Span { + join_spans(self.into_token_stream()) + } +} + +fn join_spans(tokens: TokenStream) -> Span { + let mut iter = tokens.into_iter().map(|tt| tt.span()); + + let first = match iter.next() { + Some(span) => span, + None => return Span::call_site(), + }; + + iter.fold(None, |_prev, next| Some(next)) + .and_then(|last| first.join(last)) + .unwrap_or(first) +} + +mod private { + use crate::ToTokens; + use proc_macro2::extra::DelimSpan; + use proc_macro2::Span; + + pub trait Sealed {} + impl Sealed for Span {} + impl Sealed for DelimSpan {} + impl Sealed for T {} +} diff --git a/vendor/quote/src/to_tokens.rs b/vendor/quote/src/to_tokens.rs new file mode 100644 index 0000000..23b6ec2 --- /dev/null +++ b/vendor/quote/src/to_tokens.rs @@ -0,0 +1,209 @@ +use super::TokenStreamExt; +use alloc::borrow::Cow; +use alloc::rc::Rc; +use core::iter; +use proc_macro2::{Group, Ident, Literal, Punct, Span, TokenStream, TokenTree}; + +/// Types that can be interpolated inside a `quote!` invocation. +/// +/// [`quote!`]: macro.quote.html +pub trait ToTokens { + /// Write `self` to the given `TokenStream`. + /// + /// The token append methods provided by the [`TokenStreamExt`] extension + /// trait may be useful for implementing `ToTokens`. + /// + /// [`TokenStreamExt`]: trait.TokenStreamExt.html + /// + /// # Example + /// + /// Example implementation for a struct representing Rust paths like + /// `std::cmp::PartialEq`: + /// + /// ``` + /// use proc_macro2::{TokenTree, Spacing, Span, Punct, TokenStream}; + /// use quote::{TokenStreamExt, ToTokens}; + /// + /// pub struct Path { + /// pub global: bool, + /// pub segments: Vec, + /// } + /// + /// impl ToTokens for Path { + /// fn to_tokens(&self, tokens: &mut TokenStream) { + /// for (i, segment) in self.segments.iter().enumerate() { + /// if i > 0 || self.global { + /// // Double colon `::` + /// tokens.append(Punct::new(':', Spacing::Joint)); + /// tokens.append(Punct::new(':', Spacing::Alone)); + /// } + /// segment.to_tokens(tokens); + /// } + /// } + /// } + /// # + /// # pub struct PathSegment; + /// # + /// # impl ToTokens for PathSegment { + /// # fn to_tokens(&self, tokens: &mut TokenStream) { + /// # unimplemented!() + /// # } + /// # } + /// ``` + fn to_tokens(&self, tokens: &mut TokenStream); + + /// Convert `self` directly into a `TokenStream` object. + /// + /// This method is implicitly implemented using `to_tokens`, and acts as a + /// convenience method for consumers of the `ToTokens` trait. + fn to_token_stream(&self) -> TokenStream { + let mut tokens = TokenStream::new(); + self.to_tokens(&mut tokens); + tokens + } + + /// Convert `self` directly into a `TokenStream` object. + /// + /// This method is implicitly implemented using `to_tokens`, and acts as a + /// convenience method for consumers of the `ToTokens` trait. + fn into_token_stream(self) -> TokenStream + where + Self: Sized, + { + self.to_token_stream() + } +} + +impl<'a, T: ?Sized + ToTokens> ToTokens for &'a T { + fn to_tokens(&self, tokens: &mut TokenStream) { + (**self).to_tokens(tokens); + } +} + +impl<'a, T: ?Sized + ToTokens> ToTokens for &'a mut T { + fn to_tokens(&self, tokens: &mut TokenStream) { + (**self).to_tokens(tokens); + } +} + +impl<'a, T: ?Sized + ToOwned + ToTokens> ToTokens for Cow<'a, T> { + fn to_tokens(&self, tokens: &mut TokenStream) { + (**self).to_tokens(tokens); + } +} + +impl ToTokens for Box { + fn to_tokens(&self, tokens: &mut TokenStream) { + (**self).to_tokens(tokens); + } +} + +impl ToTokens for Rc { + fn to_tokens(&self, tokens: &mut TokenStream) { + (**self).to_tokens(tokens); + } +} + +impl ToTokens for Option { + fn to_tokens(&self, tokens: &mut TokenStream) { + if let Some(ref t) = *self { + t.to_tokens(tokens); + } + } +} + +impl ToTokens for str { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(Literal::string(self)); + } +} + +impl ToTokens for String { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.as_str().to_tokens(tokens); + } +} + +macro_rules! primitive { + ($($t:ident => $name:ident)*) => { + $( + impl ToTokens for $t { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(Literal::$name(*self)); + } + } + )* + }; +} + +primitive! { + i8 => i8_suffixed + i16 => i16_suffixed + i32 => i32_suffixed + i64 => i64_suffixed + i128 => i128_suffixed + isize => isize_suffixed + + u8 => u8_suffixed + u16 => u16_suffixed + u32 => u32_suffixed + u64 => u64_suffixed + u128 => u128_suffixed + usize => usize_suffixed + + f32 => f32_suffixed + f64 => f64_suffixed +} + +impl ToTokens for char { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(Literal::character(*self)); + } +} + +impl ToTokens for bool { + fn to_tokens(&self, tokens: &mut TokenStream) { + let word = if *self { "true" } else { "false" }; + tokens.append(Ident::new(word, Span::call_site())); + } +} + +impl ToTokens for Group { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(self.clone()); + } +} + +impl ToTokens for Ident { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(self.clone()); + } +} + +impl ToTokens for Punct { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(self.clone()); + } +} + +impl ToTokens for Literal { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(self.clone()); + } +} + +impl ToTokens for TokenTree { + fn to_tokens(&self, dst: &mut TokenStream) { + dst.append(self.clone()); + } +} + +impl ToTokens for TokenStream { + fn to_tokens(&self, dst: &mut TokenStream) { + dst.extend(iter::once(self.clone())); + } + + fn into_token_stream(self) -> TokenStream { + self + } +} diff --git a/vendor/quote/tests/compiletest.rs b/vendor/quote/tests/compiletest.rs new file mode 100644 index 0000000..7974a62 --- /dev/null +++ b/vendor/quote/tests/compiletest.rs @@ -0,0 +1,7 @@ +#[rustversion::attr(not(nightly), ignore)] +#[cfg_attr(miri, ignore)] +#[test] +fn ui() { + let t = trybuild::TestCases::new(); + t.compile_fail("tests/ui/*.rs"); +} diff --git a/vendor/quote/tests/test.rs b/vendor/quote/tests/test.rs new file mode 100644 index 0000000..eab4f55 --- /dev/null +++ b/vendor/quote/tests/test.rs @@ -0,0 +1,549 @@ +#![allow( + clippy::disallowed_names, + clippy::let_underscore_untyped, + clippy::shadow_unrelated, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding +)] + +extern crate proc_macro; + +use std::borrow::Cow; +use std::collections::BTreeSet; + +use proc_macro2::{Delimiter, Group, Ident, Span, TokenStream}; +use quote::{format_ident, quote, quote_spanned, TokenStreamExt}; + +struct X; + +impl quote::ToTokens for X { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(Ident::new("X", Span::call_site())); + } +} + +#[test] +fn test_quote_impl() { + let tokens = quote! { + impl<'a, T: ToTokens> ToTokens for &'a T { + fn to_tokens(&self, tokens: &mut TokenStream) { + (**self).to_tokens(tokens) + } + } + }; + + let expected = concat!( + "impl < 'a , T : ToTokens > ToTokens for & 'a T { ", + "fn to_tokens (& self , tokens : & mut TokenStream) { ", + "(* * self) . to_tokens (tokens) ", + "} ", + "}" + ); + + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_quote_spanned_impl() { + let span = Span::call_site(); + let tokens = quote_spanned! {span=> + impl<'a, T: ToTokens> ToTokens for &'a T { + fn to_tokens(&self, tokens: &mut TokenStream) { + (**self).to_tokens(tokens) + } + } + }; + + let expected = concat!( + "impl < 'a , T : ToTokens > ToTokens for & 'a T { ", + "fn to_tokens (& self , tokens : & mut TokenStream) { ", + "(* * self) . to_tokens (tokens) ", + "} ", + "}" + ); + + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_substitution() { + let x = X; + let tokens = quote!(#x <#x> (#x) [#x] {#x}); + + let expected = "X < X > (X) [X] { X }"; + + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_iter() { + let primes = &[X, X, X, X]; + + assert_eq!("X X X X", quote!(#(#primes)*).to_string()); + + assert_eq!("X , X , X , X ,", quote!(#(#primes,)*).to_string()); + + assert_eq!("X , X , X , X", quote!(#(#primes),*).to_string()); +} + +#[test] +fn test_array() { + let array: [u8; 40] = [0; 40]; + let _ = quote!(#(#array #array)*); + + let ref_array: &[u8; 40] = &[0; 40]; + let _ = quote!(#(#ref_array #ref_array)*); + + let ref_slice: &[u8] = &[0; 40]; + let _ = quote!(#(#ref_slice #ref_slice)*); + + let array: [X; 2] = [X, X]; // !Copy + let _ = quote!(#(#array #array)*); + + let ref_array: &[X; 2] = &[X, X]; + let _ = quote!(#(#ref_array #ref_array)*); + + let ref_slice: &[X] = &[X, X]; + let _ = quote!(#(#ref_slice #ref_slice)*); +} + +#[test] +fn test_advanced() { + let generics = quote!( <'a, T> ); + + let where_clause = quote!( where T: Serialize ); + + let field_ty = quote!(String); + + let item_ty = quote!(Cow<'a, str>); + + let path = quote!(SomeTrait::serialize_with); + + let value = quote!(self.x); + + let tokens = quote! { + struct SerializeWith #generics #where_clause { + value: &'a #field_ty, + phantom: ::std::marker::PhantomData<#item_ty>, + } + + impl #generics ::serde::Serialize for SerializeWith #generics #where_clause { + fn serialize(&self, s: &mut S) -> Result<(), S::Error> + where S: ::serde::Serializer + { + #path(self.value, s) + } + } + + SerializeWith { + value: #value, + phantom: ::std::marker::PhantomData::<#item_ty>, + } + }; + + let expected = concat!( + "struct SerializeWith < 'a , T > where T : Serialize { ", + "value : & 'a String , ", + "phantom : :: std :: marker :: PhantomData < Cow < 'a , str > > , ", + "} ", + "impl < 'a , T > :: serde :: Serialize for SerializeWith < 'a , T > where T : Serialize { ", + "fn serialize < S > (& self , s : & mut S) -> Result < () , S :: Error > ", + "where S : :: serde :: Serializer ", + "{ ", + "SomeTrait :: serialize_with (self . value , s) ", + "} ", + "} ", + "SerializeWith { ", + "value : self . x , ", + "phantom : :: std :: marker :: PhantomData :: < Cow < 'a , str > > , ", + "}" + ); + + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_integer() { + let ii8 = -1i8; + let ii16 = -1i16; + let ii32 = -1i32; + let ii64 = -1i64; + let ii128 = -1i128; + let iisize = -1isize; + let uu8 = 1u8; + let uu16 = 1u16; + let uu32 = 1u32; + let uu64 = 1u64; + let uu128 = 1u128; + let uusize = 1usize; + + let tokens = quote! { + 1 1i32 1u256 + #ii8 #ii16 #ii32 #ii64 #ii128 #iisize + #uu8 #uu16 #uu32 #uu64 #uu128 #uusize + }; + let expected = + "1 1i32 1u256 - 1i8 - 1i16 - 1i32 - 1i64 - 1i128 - 1isize 1u8 1u16 1u32 1u64 1u128 1usize"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_floating() { + let e32 = 2.345f32; + + let e64 = 2.345f64; + + let tokens = quote! { + #e32 + #e64 + }; + let expected = concat!("2.345f32 2.345f64"); + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_char() { + let zero = '\u{1}'; + let pound = '#'; + let quote = '"'; + let apost = '\''; + let newline = '\n'; + let heart = '\u{2764}'; + + let tokens = quote! { + #zero #pound #quote #apost #newline #heart + }; + let expected = "'\\u{1}' '#' '\"' '\\'' '\\n' '\u{2764}'"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_str() { + let s = "\u{1} a 'b \" c"; + let tokens = quote!(#s); + let expected = "\"\\u{1} a 'b \\\" c\""; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_string() { + let s = "\u{1} a 'b \" c".to_string(); + let tokens = quote!(#s); + let expected = "\"\\u{1} a 'b \\\" c\""; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_interpolated_literal() { + macro_rules! m { + ($literal:literal) => { + quote!($literal) + }; + } + + let tokens = m!(1); + let expected = "1"; + assert_eq!(expected, tokens.to_string()); + + let tokens = m!(-1); + let expected = "- 1"; + assert_eq!(expected, tokens.to_string()); + + let tokens = m!(true); + let expected = "true"; + assert_eq!(expected, tokens.to_string()); + + let tokens = m!(-true); + let expected = "- true"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_ident() { + let foo = Ident::new("Foo", Span::call_site()); + let bar = Ident::new(&format!("Bar{}", 7), Span::call_site()); + let tokens = quote!(struct #foo; enum #bar {}); + let expected = "struct Foo ; enum Bar7 { }"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_underscore() { + let tokens = quote!(let _;); + let expected = "let _ ;"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_duplicate() { + let ch = 'x'; + + let tokens = quote!(#ch #ch); + + let expected = "'x' 'x'"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_fancy_repetition() { + let foo = vec!["a", "b"]; + let bar = vec![true, false]; + + let tokens = quote! { + #(#foo: #bar),* + }; + + let expected = r#""a" : true , "b" : false"#; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_nested_fancy_repetition() { + let nested = vec![vec!['a', 'b', 'c'], vec!['x', 'y', 'z']]; + + let tokens = quote! { + #( + #(#nested)* + ),* + }; + + let expected = "'a' 'b' 'c' , 'x' 'y' 'z'"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_duplicate_name_repetition() { + let foo = &["a", "b"]; + + let tokens = quote! { + #(#foo: #foo),* + #(#foo: #foo),* + }; + + let expected = r#""a" : "a" , "b" : "b" "a" : "a" , "b" : "b""#; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_duplicate_name_repetition_no_copy() { + let foo = vec!["a".to_owned(), "b".to_owned()]; + + let tokens = quote! { + #(#foo: #foo),* + }; + + let expected = r#""a" : "a" , "b" : "b""#; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_btreeset_repetition() { + let mut set = BTreeSet::new(); + set.insert("a".to_owned()); + set.insert("b".to_owned()); + + let tokens = quote! { + #(#set: #set),* + }; + + let expected = r#""a" : "a" , "b" : "b""#; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_variable_name_conflict() { + // The implementation of `#(...),*` uses the variable `_i` but it should be + // fine, if a little confusing when debugging. + let _i = vec!['a', 'b']; + let tokens = quote! { #(#_i),* }; + let expected = "'a' , 'b'"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_nonrep_in_repetition() { + let rep = vec!["a", "b"]; + let nonrep = "c"; + + let tokens = quote! { + #(#rep #rep : #nonrep #nonrep),* + }; + + let expected = r#""a" "a" : "c" "c" , "b" "b" : "c" "c""#; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_empty_quote() { + let tokens = quote!(); + assert_eq!("", tokens.to_string()); +} + +#[test] +fn test_box_str() { + let b = "str".to_owned().into_boxed_str(); + let tokens = quote! { #b }; + assert_eq!("\"str\"", tokens.to_string()); +} + +#[test] +fn test_cow() { + let owned: Cow = Cow::Owned(Ident::new("owned", Span::call_site())); + + let ident = Ident::new("borrowed", Span::call_site()); + let borrowed = Cow::Borrowed(&ident); + + let tokens = quote! { #owned #borrowed }; + assert_eq!("owned borrowed", tokens.to_string()); +} + +#[test] +fn test_closure() { + fn field_i(i: usize) -> Ident { + format_ident!("__field{}", i) + } + + let fields = (0usize..3) + .map(field_i as fn(_) -> _) + .map(|var| quote! { #var }); + + let tokens = quote! { #(#fields)* }; + assert_eq!("__field0 __field1 __field2", tokens.to_string()); +} + +#[test] +fn test_append_tokens() { + let mut a = quote!(a); + let b = quote!(b); + a.append_all(b); + assert_eq!("a b", a.to_string()); +} + +#[test] +fn test_format_ident() { + let id0 = format_ident!("Aa"); + let id1 = format_ident!("Hello{x}", x = id0); + let id2 = format_ident!("Hello{x}", x = 5usize); + let id3 = format_ident!("Hello{}_{x}", id0, x = 10usize); + let id4 = format_ident!("Aa", span = Span::call_site()); + let id5 = format_ident!("Hello{}", Cow::Borrowed("World")); + + assert_eq!(id0, "Aa"); + assert_eq!(id1, "HelloAa"); + assert_eq!(id2, "Hello5"); + assert_eq!(id3, "HelloAa_10"); + assert_eq!(id4, "Aa"); + assert_eq!(id5, "HelloWorld"); +} + +#[test] +fn test_format_ident_strip_raw() { + let id = format_ident!("r#struct"); + let my_id = format_ident!("MyId{}", id); + let raw_my_id = format_ident!("r#MyId{}", id); + + assert_eq!(id, "r#struct"); + assert_eq!(my_id, "MyIdstruct"); + assert_eq!(raw_my_id, "r#MyIdstruct"); +} + +#[test] +fn test_outer_line_comment() { + let tokens = quote! { + /// doc + }; + let expected = "# [doc = r\" doc\"]"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_inner_line_comment() { + let tokens = quote! { + //! doc + }; + let expected = "# ! [doc = r\" doc\"]"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_outer_block_comment() { + let tokens = quote! { + /** doc */ + }; + let expected = "# [doc = r\" doc \"]"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_inner_block_comment() { + let tokens = quote! { + /*! doc */ + }; + let expected = "# ! [doc = r\" doc \"]"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_outer_attr() { + let tokens = quote! { + #[inline] + }; + let expected = "# [inline]"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_inner_attr() { + let tokens = quote! { + #![no_std] + }; + let expected = "# ! [no_std]"; + assert_eq!(expected, tokens.to_string()); +} + +// https://github.com/dtolnay/quote/issues/130 +#[test] +fn test_star_after_repetition() { + let c = vec!['0', '1']; + let tokens = quote! { + #( + f(#c); + )* + *out = None; + }; + let expected = "f ('0') ; f ('1') ; * out = None ;"; + assert_eq!(expected, tokens.to_string()); +} + +#[test] +fn test_quote_raw_id() { + let id = quote!(r#raw_id); + assert_eq!(id.to_string(), "r#raw_id"); +} + +#[test] +fn test_type_inference_for_span() { + trait CallSite { + fn get() -> Self; + } + + impl CallSite for Span { + fn get() -> Self { + Span::call_site() + } + } + + let span = Span::call_site(); + let _ = quote_spanned!(span=> ...); + + let delim_span = Group::new(Delimiter::Parenthesis, TokenStream::new()).delim_span(); + let _ = quote_spanned!(delim_span=> ...); + + let inferred = CallSite::get(); + let _ = quote_spanned!(inferred=> ...); + + if false { + let proc_macro_span = proc_macro::Span::call_site(); + let _ = quote_spanned!(proc_macro_span.into()=> ...); + } +} diff --git a/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.rs b/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.rs new file mode 100644 index 0000000..0a39f41 --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.rs @@ -0,0 +1,9 @@ +use quote::quote; + +fn main() { + let nonrep = ""; + + // Without some protection against repetitions with no iterator somewhere + // inside, this would loop infinitely. + quote!(#(#nonrep #nonrep)*); +} diff --git a/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr b/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr new file mode 100644 index 0000000..99c20a5 --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr @@ -0,0 +1,11 @@ +error[E0308]: mismatched types + --> tests/ui/does-not-have-iter-interpolated-dup.rs:8:5 + | +8 | quote!(#(#nonrep #nonrep)*); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | | + | expected `HasIterator`, found `ThereIsNoIteratorInRepetition` + | expected due to this + | here the type of `has_iter` is inferred to be `ThereIsNoIteratorInRepetition` + | + = note: this error originates in the macro `$crate::quote_token_with_context` which comes from the expansion of the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/does-not-have-iter-interpolated.rs b/vendor/quote/tests/ui/does-not-have-iter-interpolated.rs new file mode 100644 index 0000000..2c740cc --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter-interpolated.rs @@ -0,0 +1,9 @@ +use quote::quote; + +fn main() { + let nonrep = ""; + + // Without some protection against repetitions with no iterator somewhere + // inside, this would loop infinitely. + quote!(#(#nonrep)*); +} diff --git a/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr b/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr new file mode 100644 index 0000000..ef90813 --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr @@ -0,0 +1,11 @@ +error[E0308]: mismatched types + --> tests/ui/does-not-have-iter-interpolated.rs:8:5 + | +8 | quote!(#(#nonrep)*); + | ^^^^^^^^^^^^^^^^^^^ + | | + | expected `HasIterator`, found `ThereIsNoIteratorInRepetition` + | expected due to this + | here the type of `has_iter` is inferred to be `ThereIsNoIteratorInRepetition` + | + = note: this error originates in the macro `$crate::quote_token_with_context` which comes from the expansion of the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/does-not-have-iter-separated.rs b/vendor/quote/tests/ui/does-not-have-iter-separated.rs new file mode 100644 index 0000000..c027243 --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter-separated.rs @@ -0,0 +1,5 @@ +use quote::quote; + +fn main() { + quote!(#(a b),*); +} diff --git a/vendor/quote/tests/ui/does-not-have-iter-separated.stderr b/vendor/quote/tests/ui/does-not-have-iter-separated.stderr new file mode 100644 index 0000000..7c6e30f --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter-separated.stderr @@ -0,0 +1,10 @@ +error[E0308]: mismatched types + --> tests/ui/does-not-have-iter-separated.rs:4:5 + | +4 | quote!(#(a b),*); + | ^^^^^^^^^^^^^^^^ + | | + | expected `HasIterator`, found `ThereIsNoIteratorInRepetition` + | expected due to this + | + = note: this error originates in the macro `$crate::quote_token_with_context` which comes from the expansion of the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/does-not-have-iter.rs b/vendor/quote/tests/ui/does-not-have-iter.rs new file mode 100644 index 0000000..8908353 --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter.rs @@ -0,0 +1,5 @@ +use quote::quote; + +fn main() { + quote!(#(a b)*); +} diff --git a/vendor/quote/tests/ui/does-not-have-iter.stderr b/vendor/quote/tests/ui/does-not-have-iter.stderr new file mode 100644 index 0000000..0b13e5c --- /dev/null +++ b/vendor/quote/tests/ui/does-not-have-iter.stderr @@ -0,0 +1,10 @@ +error[E0308]: mismatched types + --> tests/ui/does-not-have-iter.rs:4:5 + | +4 | quote!(#(a b)*); + | ^^^^^^^^^^^^^^^ + | | + | expected `HasIterator`, found `ThereIsNoIteratorInRepetition` + | expected due to this + | + = note: this error originates in the macro `$crate::quote_token_with_context` which comes from the expansion of the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/not-quotable.rs b/vendor/quote/tests/ui/not-quotable.rs new file mode 100644 index 0000000..f991c18 --- /dev/null +++ b/vendor/quote/tests/ui/not-quotable.rs @@ -0,0 +1,7 @@ +use quote::quote; +use std::net::Ipv4Addr; + +fn main() { + let ip = Ipv4Addr::LOCALHOST; + let _ = quote! { #ip }; +} diff --git a/vendor/quote/tests/ui/not-quotable.stderr b/vendor/quote/tests/ui/not-quotable.stderr new file mode 100644 index 0000000..35cb6f2 --- /dev/null +++ b/vendor/quote/tests/ui/not-quotable.stderr @@ -0,0 +1,20 @@ +error[E0277]: the trait bound `Ipv4Addr: ToTokens` is not satisfied + --> tests/ui/not-quotable.rs:6:13 + | +6 | let _ = quote! { #ip }; + | ^^^^^^^^^^^^^^ + | | + | the trait `ToTokens` is not implemented for `Ipv4Addr` + | required by a bound introduced by this call + | + = help: the following other types implement trait `ToTokens`: + bool + char + isize + i8 + i16 + i32 + i64 + i128 + and $N others + = note: this error originates in the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/not-repeatable.rs b/vendor/quote/tests/ui/not-repeatable.rs new file mode 100644 index 0000000..a8f0fe7 --- /dev/null +++ b/vendor/quote/tests/ui/not-repeatable.rs @@ -0,0 +1,8 @@ +use quote::quote; + +struct Ipv4Addr; + +fn main() { + let ip = Ipv4Addr; + let _ = quote! { #(#ip)* }; +} diff --git a/vendor/quote/tests/ui/not-repeatable.stderr b/vendor/quote/tests/ui/not-repeatable.stderr new file mode 100644 index 0000000..2ed1da0 --- /dev/null +++ b/vendor/quote/tests/ui/not-repeatable.stderr @@ -0,0 +1,35 @@ +error[E0599]: the method `quote_into_iter` exists for struct `Ipv4Addr`, but its trait bounds were not satisfied + --> tests/ui/not-repeatable.rs:7:13 + | +3 | struct Ipv4Addr; + | --------------- + | | + | method `quote_into_iter` not found for this struct + | doesn't satisfy `Ipv4Addr: Iterator` + | doesn't satisfy `Ipv4Addr: ToTokens` + | doesn't satisfy `Ipv4Addr: ext::RepIteratorExt` + | doesn't satisfy `Ipv4Addr: ext::RepToTokensExt` +... +7 | let _ = quote! { #(#ip)* }; + | ^^^^^^^^^^^^^^^^^^ method cannot be called on `Ipv4Addr` due to unsatisfied trait bounds + | + = note: the following trait bounds were not satisfied: + `Ipv4Addr: Iterator` + which is required by `Ipv4Addr: ext::RepIteratorExt` + `&Ipv4Addr: Iterator` + which is required by `&Ipv4Addr: ext::RepIteratorExt` + `Ipv4Addr: ToTokens` + which is required by `Ipv4Addr: ext::RepToTokensExt` + `&mut Ipv4Addr: Iterator` + which is required by `&mut Ipv4Addr: ext::RepIteratorExt` +note: the traits `ToTokens` and `Iterator` must be implemented + --> src/to_tokens.rs + | + | pub trait ToTokens { + | ^^^^^^^^^^^^^^^^^^ + | + ::: $RUST/core/src/iter/traits/iterator.rs + | + | pub trait Iterator { + | ^^^^^^^^^^^^^^^^^^ + = note: this error originates in the macro `$crate::quote_bind_into_iter` which comes from the expansion of the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/wrong-type-span.rs b/vendor/quote/tests/ui/wrong-type-span.rs new file mode 100644 index 0000000..d5601c8 --- /dev/null +++ b/vendor/quote/tests/ui/wrong-type-span.rs @@ -0,0 +1,7 @@ +use quote::quote_spanned; + +fn main() { + let span = ""; + let x = 0i32; + quote_spanned!(span=> #x); +} diff --git a/vendor/quote/tests/ui/wrong-type-span.stderr b/vendor/quote/tests/ui/wrong-type-span.stderr new file mode 100644 index 0000000..12ad307 --- /dev/null +++ b/vendor/quote/tests/ui/wrong-type-span.stderr @@ -0,0 +1,10 @@ +error[E0308]: mismatched types + --> tests/ui/wrong-type-span.rs:6:5 + | +6 | quote_spanned!(span=> #x); + | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | | + | expected `Span`, found `&str` + | expected due to this + | + = note: this error originates in the macro `quote_spanned` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/regex-automata/.cargo-checksum.json b/vendor/regex-automata/.cargo-checksum.json new file mode 100644 index 0000000..718faae --- /dev/null +++ b/vendor/regex-automata/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"68de160610af9bb545a752c5da0d82b581e98e0f2631e2253c8a992fc51e322b","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"61db25dbf26092fc80e8db89165692e55f9fb86b14e8451ebb28303f45932254","src/dfa/accel.rs":"2a045b0f6715e913d18d2212a7804fabaadfc3bcffad9382e35574d32eb0c492","src/dfa/automaton.rs":"c14707007bbb915fd5607424b0a4c8e53fa7daf6c7c1f4e3045d51ef15f9b202","src/dfa/dense.rs":"1de3a93f33525b419e3f47ab98d364234ccd68fc30273fd362fe8161a9a37ea3","src/dfa/determinize.rs":"91b9f69d28bdd064aa86716fe0772e4145050fd458bb7869a28660b4f7b64872","src/dfa/minimize.rs":"b5cadb462b9f24cd4aa7a665e75fb813cd06858a92b8986c9c5ae7fd9a60dfab","src/dfa/mod.rs":"ab1ac378d81bb5ea40a23cf903928adae4758e30f54646afde71869234965723","src/dfa/onepass.rs":"013f09b795955aefd07936994f08df4bc5b39698797f586b85171f778162aeab","src/dfa/regex.rs":"d16f0434a0b0f1341d6d5e0a162e6afa29411a786fb37b0e98bbcc0c6ba3cfec","src/dfa/remapper.rs":"ca096abc0f8e45c43a2adf3a7743b8857714ae7411a623edea41cc3ce906a169","src/dfa/search.rs":"79b9ab2b0636177bc26d1ad6f0059ca033decf74824cb5a36f1ac19f020d2713","src/dfa/sparse.rs":"c863d92a4d919fa880dfca3d59a8b5b672c6ffa8423578b34fc0af2ae62e1d7a","src/dfa/special.rs":"c2e60de5b98e68c9c45aaffbc67a08f049831a764a1ed29d1d1db0fb68efdce5","src/dfa/start.rs":"46b1dbaf8e4518ddddda6bbe596621aae36f8ba694390483a22355d9d799be8e","src/hybrid/dfa.rs":"a6ed6d3268e4008f88c1469029a84391edfee7851df2912640763e4ba2188635","src/hybrid/error.rs":"37db2a9759721de4ca2c49e21ab74dd3d998b67c5ab0e65a62085b57ec1d7ba3","src/hybrid/id.rs":"6168aad5c81c627494ba0575a24d61fd0ae7efabaaceeadb8ff28472275e2813","src/hybrid/mod.rs":"ca21e89062bdb5a0998d5cd1bc78609af1f6b795533e5982be969c383ac0463a","src/hybrid/regex.rs":"47815d025526330291f4cd749b4dd79b1122ef208fe6f0a49715c70fc1ea47c8","src/hybrid/search.rs":"76067f3f8675013dcdf7e9c9cc4d9d33d1107fb2cbcd7adcc05cfd42177d90cc","src/lib.rs":"4e831d41057760c5f2f1274a206fa5a42f59dbca8f98ad3e782fe0fba0d6c37f","src/macros.rs":"3e4b39252bfa471fad384160a43f113ebfec7bec46a85d16f006622881dd2081","src/meta/error.rs":"710a6813314b1b11ace1b016a827067fff8b2624d47e15c7f52043bff5ab57da","src/meta/limited.rs":"98b6b2d19f67d4ce3ddb110e06045f22a040590262fde33614ab900bdd06b25b","src/meta/literal.rs":"52da98bb30995dedd22786e4728cb84e84c6093a284168bd91196b999dd0f6ec","src/meta/mod.rs":"f3b10b96fa08efaba3e4c9b81883cf40aac6e4c1f6ae55a497a534cf5805b46d","src/meta/regex.rs":"b0fab107d3f972db89568e14fec0199ba4cd8076cc5fd61c2582db42885f196e","src/meta/reverse_inner.rs":"945d6c2d4c7538e1609dbd430a096784d22abd33db58b1ba65c9c9af45a7d3c0","src/meta/stopat.rs":"acb6122e17d10a9b1b5e72d6030e6d95748227975bad0ff5cbbcc2587edfa6df","src/meta/strategy.rs":"c882c5c261de5fe58bc65251d2d407e4cb483b9b80c2bec5eba958ef90e0072d","src/meta/wrappers.rs":"3cb0717f87b7082cc75cb02148b8cde30cffbee689bdb6275abcf1416747ceb4","src/nfa/mod.rs":"1a731e217ed4053714500e84e58cc127f402e4e075f7d0e5b9aea715cd52405a","src/nfa/thompson/backtrack.rs":"041015ea153c1e485e9cf39ec60d1e51c7ab9e400ecd77cad2078af45775339b","src/nfa/thompson/builder.rs":"7adf6aba69171f6acd47fea0fec85ba589154fead83f2042a1c6fe9486aa4dbd","src/nfa/thompson/compiler.rs":"a8bb24f7f125a294cb75af9d8332821142738278d8eff354647ae08f66a597af","src/nfa/thompson/error.rs":"78488c2fdb85f819f53cc30bb11c7f96169112da5dd14c351e5cc3bcccf0e10e","src/nfa/thompson/literal_trie.rs":"c2d1d09b44da4648db797386c2410cbf63337afef8cb62e6e78cf34786892a11","src/nfa/thompson/map.rs":"4dcfc0a43bbd91ff69cc8a47486b5f7cac048f7d5cf269b63933d7f2e177a533","src/nfa/thompson/mod.rs":"0651520debd6f023ae1a2c422806aab37f8491e5bb092e20dfdc4fe4179d695c","src/nfa/thompson/nfa.rs":"410c3745c159eb17bea18256ec03ee92e1fccca630f01a24618a75fffcf86866","src/nfa/thompson/pikevm.rs":"aaf792832d1bf15fad8a8f0b2e6597170361eb3cbcb9343eb5bd242ff346d750","src/nfa/thompson/range_trie.rs":"f081e74e8c08e2268f1ce410608fefa4b14d7d1c0487dbc27d27d51ea6265e30","src/util/alphabet.rs":"94cd73ce2f4e34e0ae0a146d3efdc85478263afdfefd6dc105e0abf0ec79d82b","src/util/captures.rs":"d2a118ba509b70e9922a10ea9f78771b14a521abb0ed4029be3ef6aeea44d032","src/util/determinize/mod.rs":"5e9e1f7dd060d69521b743afc2b900b21ad7942e17397084ac6563ea5dcf2fd9","src/util/determinize/state.rs":"c30eac89137df0f0128143eeb2e0c8d7ea4bd659825fa6721b5315141a326e3a","src/util/empty.rs":"13ec7d6cbd1520db5b4c1dae294f4419fa88d39d2bfc16f4ef258473d609f91c","src/util/escape.rs":"5b2731b41a55cb50ab688132bb5640dbd51f14f141adaa864b9db7f0aa092c74","src/util/int.rs":"b7eec0a6cab0798ba66707988fce3ecfc841b93418028a7b1408c5d0f6271351","src/util/interpolate.rs":"5e4e6b6fb6e5a7603e393bf05c609735d86a7d1f54c2436e42111b4e1409b6dd","src/util/iter.rs":"58ae97b4156d7160a46b909f4635d88d10354d9d892d2fcb4c5e18e24cf38f14","src/util/lazy.rs":"e16b3ed139210ca546fc302c463ce52a5dcfa77382f07c9097400ed8cddf78c8","src/util/look.rs":"fbfcaace79d0c6ad3698c9d6c025cb952f2e00cf88a48cf690d087fa73466689","src/util/memchr.rs":"573109ce4983907083ae0b29a084a324b9b53da369b4d96f7f3a21fd5c8eb5c9","src/util/mod.rs":"6c828a493f0f88c8b515aee4f8faf91ba653eb07e8fc3c23c0524553410803f9","src/util/pool.rs":"da1fad31f2fdf15cf3a6a605ece8d6162d8f6c42770c160af4c0fbf4ef148aa5","src/util/prefilter/aho_corasick.rs":"c54fa95f4d9e7ab53e2c6463a43f8953df6a440997fc9cd528f225db0dd32582","src/util/prefilter/byteset.rs":"1c80fa432acc23223a75a5181e37c40034764dffe42410e4b77af6f24f48bd5c","src/util/prefilter/memchr.rs":"36c6fe6354b2e729db6830166dd4862e439bc48c9e59258d88e4b6c5654e20ef","src/util/prefilter/memmem.rs":"6f6ed9450b14abf3e4a33d395337e51fbaa9743a0a16aac0009f7680aa60c500","src/util/prefilter/mod.rs":"2818e2e92632aee1c46b0dc01b654e544bfbf460236be86d28a2d836e9fc189a","src/util/prefilter/teddy.rs":"ed54d26858b56e1c8c87e44afae5f63d81ab930787d79e671f3a3513f576e9cd","src/util/primitives.rs":"8a9cc19ef2e1ab183943cdc2d2f095b02252476e32b7e9fff4a06a251749b068","src/util/search.rs":"66bf320ebbe403c119a966f3dfbd53178de0ceebd2ca1922f1ddbb79aed36837","src/util/sparse_set.rs":"3d4aa30b6aa9fc875d36506487a5095dbe8ed528b89e4146a65c7e7497520a4d","src/util/start.rs":"73ebcf2550cea56f67b9048fa3dc91f3a8db9897fbd2400dd9941efb0cb4827e","src/util/syntax.rs":"720ac0d6600fad33f5967b5afe4e3de2096b857e4cda6fa16ba93b10a8230cab","src/util/unicode_data/mod.rs":"54c3e10bbc393e9881bfac3295815b160f59e69e2056bc29ee7cf0addd8e3cf7","src/util/unicode_data/perl_word.rs":"2e1a5d889598bd4e73af17d3a9f7d6b4cf2f6ab24920a5336e496bb255281e56","src/util/utf8.rs":"7a068009fdf07e693e521b1f0264725c0e6118dbe1eab55da9d0eab21785fcc1","src/util/wire.rs":"bfdf52615c516b6c07db3ce9c333ea61fdc535bd0b79560bbd7f6864ab83946e","test":"39d79ce3532c31a51c0be89a2939816fad0e4868d2b03992c202cbe64dce9f6c","tests/dfa/api.rs":"cc28e366b6bcbfcf379265acd492a92c62743c3f20e7a2b273019679aa9e1291","tests/dfa/mod.rs":"924d8fff500b9b7b140082623023e78007058a87323151cd8e361462945e4f16","tests/dfa/onepass/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/dfa/onepass/suite.rs":"6d63ec5469e6876656ae607cdbe07e6a4e17ace7836b67435763c9b1d233438a","tests/dfa/regression.rs":"ebcf2645290286aa7531eb2b7951385e5ed8167532437aeca2ad2049768fd796","tests/dfa/suite.rs":"2812aa0167ee5e93eff3f7d45096a78c5f3a2440197a513b3cf0310286640f51","tests/fuzz/dense.rs":"3e1099a0cce61e85abc0ad81bc592e85f497f159ef0e5d1d32bac1936aa6f20c","tests/fuzz/mod.rs":"043773510e02f51def43ee0c2b8b867c53ecc8638c8a9233b2ac098de9c3ac1e","tests/fuzz/sparse.rs":"ba61db4927ab28953037a4b20317399c86d01b4d774e46c020ade19029215e25","tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9":"8961279a8237c3e318452024dd971b1d5a26b058260c297382a74daca1b7f0d1","tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9":"c2d52e3dea78d3f159b5b521d433358a7fee45ce20ed1545067d461f45ef66b8","tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000":"5b2d273023de3fb04037eaf2e6b4f51cced4c5a08d2e6b44e4be540774f939b9","tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9":"e2e22e2f46a9a75b5c876476442276cf675fe244c5cf918789e4f6b14078fbd9","tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98":"24a12712e1f2ba0a40b5782707908a74dd19941dc372ef525d65a7134f91988c","tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838":"a97f39b2febf9c73535681f7a86201e4b06d5a1ffcf135299c96c1cabfa9f6c4","tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570":"44fe3ef878d35e2d51c2c17ff89bbbe3a4650e09d0cbbd48625c0f5e4dd0848b","tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b":"d5534be36653b4af6cb94a7c63be58869bb8c204c5c63d67a4d6c986b44bb2e1","tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9":"77b844898610560afa09f2b8de73a85a0ba9a3b8cee4ff1bbf26b8c97ad4e8a2","tests/gen/README.md":"c3bfdf2f9ced501dd5bd75d01509a34e503efb2dff2f5f7b260580dde5519ed4","tests/gen/dense/mod.rs":"5ae1cfb46212a674118ada2f66f37b25188e84643d406b95eb4665d722344262","tests/gen/dense/multi_pattern_v2.rs":"29b1e9a799adecbdbe7cd05e9748f664c2b915b10b1d2f5d36cfb6453826d1d2","tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa":"8421d5a1bfc0b6c3bdc8fc90dff591a046b0aaf8e06ef7de7cc293004a35d061","tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa":"dcf2fd5fd49f5f53cf1ec66f61623402f39401cb3aea30d6677b98bb1e9541bf","tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa":"73c4f20d984e544dfa4cf05f3009d0a9b52fa84bc97b501ea0ccd179e2def4bc","tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa":"74471209f05754e8e20c8a0222a5877b1b15b8b8f33cd8cac89ea65f708b4aff","tests/gen/mod.rs":"043773510e02f51def43ee0c2b8b867c53ecc8638c8a9233b2ac098de9c3ac1e","tests/gen/sparse/mod.rs":"5ae1cfb46212a674118ada2f66f37b25188e84643d406b95eb4665d722344262","tests/gen/sparse/multi_pattern_v2.rs":"e00fb2a510a215460aab84573196b1f51bb65884ff494c2382534c04f6fdbfe9","tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa":"3287956bd2003cd69653b125f82aade95d99adbb20229bfdbb4958b8877c0a0b","tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa":"bdf285901eaaac4596380115c5bbb20ab2f42f593d8d9e9238a00ed69863f9c9","tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa":"e466dc085dd68b2d2220932a0e4d28759edd161c1fdad652240aa3825fd85268","tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa":"80358d0c26c1cc7284065b0075f5b8804d83e673a8a8c8327f93a1c1ff455399","tests/hybrid/api.rs":"bd4862275c52f94c6f6737bf174c97e3de30f8075ca23f43c129c72a0d0afed7","tests/hybrid/mod.rs":"4856a49a4d9b5e9e079c2719a5e75c32408b37e9b76cbdea057b388a3537af6d","tests/hybrid/suite.rs":"688972275c5ef38cdc5112a1e6e54ccd2bf8290008ae2b17344c6c81e17e3a5a","tests/lib.rs":"9775b3c62fb338ea5c1bd3513a6589eff4b5c8d35c599439d9363dbf98c6f8d4","tests/meta/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/meta/suite.rs":"4c441f9df82508a5e60dd08f266183f772fc9b2b236fbf69cab87650ecf3b424","tests/nfa/mod.rs":"49055c358e38d97e42acb1602c671f97dddf24cafe089490f0e79ed208d74d9b","tests/nfa/thompson/backtrack/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/nfa/thompson/backtrack/suite.rs":"4e7baff70fc98b98b8297c6fd6d5818beb20343379e16cdb95bee46207ac4bd6","tests/nfa/thompson/mod.rs":"de9f5bcea1a8d1f03c85c55ad8c0747877d69e344fcd6c6886b0a402f0661291","tests/nfa/thompson/pikevm/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/nfa/thompson/pikevm/suite.rs":"263837ebf5b2e1906a06237982ea875386d83567e399b4ec1f669f10b1422599"},"package":"5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"} \ No newline at end of file diff --git a/vendor/regex-automata/Cargo.toml b/vendor/regex-automata/Cargo.toml new file mode 100644 index 0000000..8b7cf0a --- /dev/null +++ b/vendor/regex-automata/Cargo.toml @@ -0,0 +1,183 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.65" +name = "regex-automata" +version = "0.4.3" +authors = [ + "The Rust Project Developers", + "Andrew Gallant ", +] +autoexamples = false +description = "Automata construction and matching using regular expressions." +documentation = "https://docs.rs/regex-automata" +readme = "README.md" +keywords = [ + "regex", + "dfa", + "automata", + "automaton", + "nfa", +] +categories = ["text-processing"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/regex/tree/master/regex-automata" + +[lib] +bench = false + +[[test]] +name = "integration" +path = "tests/lib.rs" + +[dependencies.aho-corasick] +version = "1.0.0" +optional = true +default-features = false + +[dependencies.log] +version = "0.4.14" +optional = true + +[dependencies.memchr] +version = "2.6.0" +optional = true +default-features = false + +[dependencies.regex-syntax] +version = "0.8.2" +optional = true +default-features = false + +[dev-dependencies.anyhow] +version = "1.0.69" + +[dev-dependencies.bstr] +version = "1.3.0" +features = ["std"] +default-features = false + +[dev-dependencies.doc-comment] +version = "0.3.3" + +[dev-dependencies.env_logger] +version = "0.9.3" +features = [ + "atty", + "humantime", + "termcolor", +] +default-features = false + +[dev-dependencies.quickcheck] +version = "1.0.3" +default-features = false + +[dev-dependencies.regex-test] +version = "0.1.0" + +[features] +alloc = [] +default = [ + "std", + "syntax", + "perf", + "unicode", + "meta", + "nfa", + "dfa", + "hybrid", +] +dfa = [ + "dfa-build", + "dfa-search", + "dfa-onepass", +] +dfa-build = [ + "nfa-thompson", + "dfa-search", +] +dfa-onepass = ["nfa-thompson"] +dfa-search = [] +hybrid = [ + "alloc", + "nfa-thompson", +] +internal-instrument = ["internal-instrument-pikevm"] +internal-instrument-pikevm = [ + "logging", + "std", +] +logging = [ + "dep:log", + "aho-corasick?/logging", + "memchr?/logging", +] +meta = [ + "syntax", + "nfa-pikevm", +] +nfa = [ + "nfa-thompson", + "nfa-pikevm", + "nfa-backtrack", +] +nfa-backtrack = ["nfa-thompson"] +nfa-pikevm = ["nfa-thompson"] +nfa-thompson = ["alloc"] +perf = [ + "perf-inline", + "perf-literal", +] +perf-inline = [] +perf-literal = [ + "perf-literal-substring", + "perf-literal-multisubstring", +] +perf-literal-multisubstring = [ + "std", + "dep:aho-corasick", +] +perf-literal-substring = [ + "aho-corasick?/perf-literal", + "dep:memchr", +] +std = [ + "regex-syntax?/std", + "memchr?/std", + "aho-corasick?/std", + "alloc", +] +syntax = [ + "dep:regex-syntax", + "alloc", +] +unicode = [ + "unicode-age", + "unicode-bool", + "unicode-case", + "unicode-gencat", + "unicode-perl", + "unicode-script", + "unicode-segment", + "unicode-word-boundary", + "regex-syntax?/unicode", +] +unicode-age = ["regex-syntax?/unicode-age"] +unicode-bool = ["regex-syntax?/unicode-bool"] +unicode-case = ["regex-syntax?/unicode-case"] +unicode-gencat = ["regex-syntax?/unicode-gencat"] +unicode-perl = ["regex-syntax?/unicode-perl"] +unicode-script = ["regex-syntax?/unicode-script"] +unicode-segment = ["regex-syntax?/unicode-segment"] +unicode-word-boundary = [] diff --git a/vendor/regex-automata/LICENSE-APACHE b/vendor/regex-automata/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/vendor/regex-automata/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/vendor/regex-automata/LICENSE-MIT b/vendor/regex-automata/LICENSE-MIT new file mode 100644 index 0000000..39d4bdb --- /dev/null +++ b/vendor/regex-automata/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/regex-automata/README.md b/vendor/regex-automata/README.md new file mode 100644 index 0000000..c12b070 --- /dev/null +++ b/vendor/regex-automata/README.md @@ -0,0 +1,117 @@ +regex-automata +============== +This crate exposes a variety of regex engines used by the `regex` crate. +It provides a vast, sprawling and "expert" level API to each regex engine. +The regex engines provided by this crate focus heavily on finite automata +implementations and specifically guarantee worst case `O(m * n)` time +complexity for all searches. (Where `m ~ len(regex)` and `n ~ len(haystack)`.) + +[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) +[![Crates.io](https://img.shields.io/crates/v/regex-automata.svg)](https://crates.io/crates/regex-automata) + + +### Documentation + +https://docs.rs/regex-automata + + +### Example + +This example shows how to search for matches of multiple regexes, where each +regex uses the same capture group names to parse different key-value formats. + +```rust +use regex_automata::{meta::Regex, PatternID}; + +let re = Regex::new_many(&[ + r#"(?m)^(?[[:word:]]+)=(?[[:word:]]+)$"#, + r#"(?m)^(?[[:word:]]+)="(?[^"]+)"$"#, + r#"(?m)^(?[[:word:]]+)='(?[^']+)'$"#, + r#"(?m)^(?[[:word:]]+):\s*(?[[:word:]]+)$"#, +]).unwrap(); +let hay = r#" +best_album="Blow Your Face Out" +best_quote='"then as it was, then again it will be"' +best_year=1973 +best_simpsons_episode: HOMR +"#; +let mut kvs = vec![]; +for caps in re.captures_iter(hay) { + // N.B. One could use capture indices '1' and '2' here + // as well. Capture indices are local to each pattern. + // (Just like names are.) + let key = &hay[caps.get_group_by_name("key").unwrap()]; + let val = &hay[caps.get_group_by_name("val").unwrap()]; + kvs.push((key, val)); +} +assert_eq!(kvs, vec![ + ("best_album", "Blow Your Face Out"), + ("best_quote", "\"then as it was, then again it will be\""), + ("best_year", "1973"), + ("best_simpsons_episode", "HOMR"), +]); +``` + + +### Safety + +**I welcome audits of `unsafe` code.** + +This crate tries to be extremely conservative in its use of `unsafe`, but does +use it in a few spots. In general, I am very open to removing uses of `unsafe` +if it doesn't result in measurable performance regressions and doesn't result +in significantly more complex code. + +Below is an outline of how `unsafe` is used in this crate. + +* `util::pool::Pool` makes use of `unsafe` to implement a fast path for +accessing an element of the pool. The fast path applies to the first thread +that uses the pool. In effect, the fast path is fast because it avoid a mutex +lock. `unsafe` is also used in the no-std version of `Pool` to implement a spin +lock for synchronization. +* `util::lazy::Lazy` uses `unsafe` to implement a variant of +`once_cell::sync::Lazy` that works in no-std environments. A no-std no-alloc +implementation is also provided that requires use of `unsafe`. +* The `dfa` module makes extensive use of `unsafe` to support zero-copy +deserialization of DFAs. The high level problem is that you need to get from +`&[u8]` to the internal representation of a DFA without doing any copies. +This is required for support in no-std no-alloc environments. It also makes +deserialization extremely cheap. +* The `dfa` and `hybrid` modules use `unsafe` to explicitly elide bounds checks +in the core search loops. This makes the codegen tighter and typically leads to +consistent 5-10% performance improvements on some workloads. + +In general, the above reflect the only uses of `unsafe` throughout the entire +`regex` crate. At present, there are no plans to meaningfully expand the use +of `unsafe`. With that said, one thing folks have been asking for is cheap +deserialization of a `regex::Regex`. My sense is that this feature will require +a lot more `unsafe` in places to support zero-copy deserialization. It is +unclear at this point whether this will be pursued. + + +### Motivation + +I started out building this crate because I wanted to re-work the `regex` +crate internals to make it more amenable to optimizations. It turns out that +there are a lot of different ways to build regex engines and even more ways to +compose them. Moreover, heuristic literal optimizations are often tricky to +get correct, but the fruit they bear is attractive. All of these things were +difficult to expand upon without risking the introduction of more bugs. So I +decided to tear things down and start fresh. + +In the course of doing so, I ended up designing strong boundaries between each +component so that each component could be reasoned and tested independently. +This also made it somewhat natural to expose the components as a library unto +itself. Namely, folks have been asking for more capabilities in the regex +crate for a long time, but these capabilities usually come with additional API +complexity that I didn't want to introduce in the `regex` crate proper. But +exposing them in an "expert" level crate like `regex-automata` seemed quite +fine. + +In the end, I do still somewhat consider this crate an experiment. It is +unclear whether the strong boundaries between components will be an impediment +to ongoing development or not. De-coupling tends to lead to slower development +in my experience, and when you mix in the added cost of not introducing +breaking changes all of the time, things can get quite complicated. But, I +don't think anyone has ever release the internals of a regex engine as a +library before. So it will be interesting to see how it plays out! diff --git a/vendor/regex-automata/src/dfa/accel.rs b/vendor/regex-automata/src/dfa/accel.rs new file mode 100644 index 0000000..c0ba18e --- /dev/null +++ b/vendor/regex-automata/src/dfa/accel.rs @@ -0,0 +1,517 @@ +// This module defines some core types for dealing with accelerated DFA states. +// Briefly, a DFA state can be "accelerated" if all of its transitions except +// for a few loop back to itself. This directly implies that the only way out +// of such a state is if a byte corresponding to one of those non-loopback +// transitions is found. Such states are often found in simple repetitions in +// non-Unicode regexes. For example, consider '(?-u)[^a]+a'. We can look at its +// DFA with regex-cli: +// +// $ regex-cli debug dense dfa -p '(?-u)[^a]+a' -BbC --no-table +// D 000000: +// Q 000001: +// *000002: +// A 000003: \x00-` => 3, a => 8, b-\xFF => 3 +// A 000004: \x00-` => 4, a => 7, b-\xFF => 4 +// 000005: \x00-` => 4, b-\xFF => 4 +// 000006: \x00-` => 3, a => 6, b-\xFF => 3 +// 000007: \x00-\xFF => 2, EOI => 2 +// 000008: \x00-\xFF => 2, EOI => 2 +// +// In particular, state 3 is accelerated (shown via the 'A' indicator) since +// the only way to leave that state once entered is to see an 'a' byte. If +// there is a long run of non-'a' bytes, then using something like 'memchr' +// to find the next 'a' byte can be significantly faster than just using the +// standard byte-at-a-time state machine. +// +// Unfortunately, this optimization rarely applies when Unicode is enabled. +// For example, patterns like '[^a]' don't actually match any byte that isn't +// 'a', but rather, any UTF-8 encoding of a Unicode scalar value that isn't +// 'a'. This makes the state machine much more complex---far beyond a single +// state---and removes the ability to easily accelerate it. (Because if the +// machine sees a non-UTF-8 sequence, then the machine won't match through it.) +// +// In practice, we only consider accelerating states that have 3 or fewer +// non-loop transitions. At a certain point, you get diminishing returns, but +// also because that's what the memchr crate supports. The structures below +// hard-code this assumption and provide (de)serialization APIs for use inside +// a DFA. +// +// And finally, note that there is some trickery involved in making it very +// fast to not only check whether a state is accelerated at search time, but +// also to access the bytes to search for to implement the acceleration itself. +// dfa/special.rs provides more detail, but the short story is that all +// accelerated states appear contiguously in a DFA. This means we can represent +// the ID space of all accelerated DFA states with a single range. So given +// a state ID, we can determine whether it's accelerated via +// +// min_accel_id <= id <= max_accel_id +// +// And find its corresponding accelerator with: +// +// accels.get((id - min_accel_id) / dfa_stride) + +#[cfg(feature = "dfa-build")] +use alloc::{vec, vec::Vec}; + +use crate::util::{ + int::Pointer, + memchr, + wire::{self, DeserializeError, Endian, SerializeError}, +}; + +/// The base type used to represent a collection of accelerators. +/// +/// While an `Accel` is represented as a fixed size array of bytes, a +/// *collection* of `Accel`s (called `Accels`) is represented internally as a +/// slice of u32. While it's a bit unnatural to do this and costs us a bit of +/// fairly low-risk not-safe code, it lets us remove the need for a second type +/// parameter in the definition of dense::DFA. (Which really wants everything +/// to be a slice of u32.) +type AccelTy = u32; + +/// The size of the unit of representation for accelerators. +/// +/// ACCEL_CAP *must* be a multiple of this size. +const ACCEL_TY_SIZE: usize = core::mem::size_of::(); + +/// The maximum length in bytes that a single Accel can be. This is distinct +/// from the capacity of an accelerator in that the length represents only the +/// bytes that should be read. +const ACCEL_LEN: usize = 4; + +/// The capacity of each accelerator, in bytes. We set this to 8 since it's a +/// multiple of 4 (our ID size) and because it gives us a little wiggle room +/// if we want to support more accel bytes in the future without a breaking +/// change. +/// +/// This MUST be a multiple of ACCEL_TY_SIZE. +const ACCEL_CAP: usize = 8; + +/// Search for between 1 and 3 needle bytes in the given haystack, starting the +/// search at the given position. If `needles` has a length other than 1-3, +/// then this panics. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn find_fwd( + needles: &[u8], + haystack: &[u8], + at: usize, +) -> Option { + let bs = needles; + let i = match needles.len() { + 1 => memchr::memchr(bs[0], &haystack[at..])?, + 2 => memchr::memchr2(bs[0], bs[1], &haystack[at..])?, + 3 => memchr::memchr3(bs[0], bs[1], bs[2], &haystack[at..])?, + 0 => panic!("cannot find with empty needles"), + n => panic!("invalid needles length: {}", n), + }; + Some(at + i) +} + +/// Search for between 1 and 3 needle bytes in the given haystack in reverse, +/// starting the search at the given position. If `needles` has a length other +/// than 1-3, then this panics. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn find_rev( + needles: &[u8], + haystack: &[u8], + at: usize, +) -> Option { + let bs = needles; + match needles.len() { + 1 => memchr::memrchr(bs[0], &haystack[..at]), + 2 => memchr::memrchr2(bs[0], bs[1], &haystack[..at]), + 3 => memchr::memrchr3(bs[0], bs[1], bs[2], &haystack[..at]), + 0 => panic!("cannot find with empty needles"), + n => panic!("invalid needles length: {}", n), + } +} + +/// Represents the accelerators for all accelerated states in a dense DFA. +/// +/// The `A` type parameter represents the type of the underlying bytes. +/// Generally, this is either `&[AccelTy]` or `Vec`. +#[derive(Clone)] +pub(crate) struct Accels { + /// A length prefixed slice of contiguous accelerators. See the top comment + /// in this module for more details on how we can jump from a DFA's state + /// ID to an accelerator in this list. + /// + /// The first 4 bytes always correspond to the number of accelerators + /// that follow. + accels: A, +} + +#[cfg(feature = "dfa-build")] +impl Accels> { + /// Create an empty sequence of accelerators for a DFA. + pub fn empty() -> Accels> { + Accels { accels: vec![0] } + } + + /// Add an accelerator to this sequence. + /// + /// This adds to the accelerator to the end of the sequence and therefore + /// should be done in correspondence with its state in the DFA. + /// + /// This panics if this results in more accelerators than AccelTy::MAX. + pub fn add(&mut self, accel: Accel) { + self.accels.extend_from_slice(&accel.as_accel_tys()); + let len = self.len(); + self.set_len(len + 1); + } + + /// Set the number of accelerators in this sequence, which is encoded in + /// the first 4 bytes of the underlying bytes. + fn set_len(&mut self, new_len: usize) { + // The only way an accelerator gets added is if a state exists for + // it, and if a state exists, then its index is guaranteed to be + // representable by a AccelTy by virtue of the guarantees provided by + // StateID. + let new_len = AccelTy::try_from(new_len).unwrap(); + self.accels[0] = new_len; + } +} + +impl<'a> Accels<&'a [AccelTy]> { + /// Deserialize a sequence of accelerators from the given bytes. If there + /// was a problem deserializing, then an error is returned. + /// + /// This is guaranteed to run in constant time. This does not guarantee + /// that every accelerator in the returned collection is valid. Thus, + /// accessing one may panic, or not-safe code that relies on accelerators + /// being correct my result in UB. + /// + /// Callers may check the validity of every accelerator with the `validate` + /// method. + pub fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(Accels<&'a [AccelTy]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (accel_len, _) = + wire::try_read_u32_as_usize(slice, "accelerators length")?; + // The accelerator length is part of the accel_tys slice that + // we deserialize. This is perhaps a bit idiosyncratic. It would + // probably be better to split out the length into a real field. + + let accel_tys_len = wire::add( + wire::mul(accel_len, 2, "total number of accelerator accel_tys")?, + 1, + "total number of accel_tys", + )?; + let accel_tys_bytes_len = wire::mul( + ACCEL_TY_SIZE, + accel_tys_len, + "total number of bytes in accelerators", + )?; + wire::check_slice_len(slice, accel_tys_bytes_len, "accelerators")?; + wire::check_alignment::(slice)?; + let accel_tys = &slice[..accel_tys_bytes_len]; + slice = &slice[accel_tys_bytes_len..]; + // SAFETY: We've checked the length and alignment above, and since + // slice is just bytes and AccelTy is just a u32, we can safely cast to + // a slice of &[AccelTy]. + let accels = unsafe { + core::slice::from_raw_parts( + accel_tys.as_ptr().cast::(), + accel_tys_len, + ) + }; + Ok((Accels { accels }, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> Accels { + /// Return an owned version of the accelerators. + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> Accels> { + Accels { accels: self.accels.as_ref().to_vec() } + } + + /// Return a borrowed version of the accelerators. + pub fn as_ref(&self) -> Accels<&[AccelTy]> { + Accels { accels: self.accels.as_ref() } + } + + /// Return the bytes representing the serialization of the accelerators. + pub fn as_bytes(&self) -> &[u8] { + let accels = self.accels.as_ref(); + // SAFETY: This is safe because accels is a just a slice of AccelTy, + // and u8 always has a smaller alignment. + unsafe { + core::slice::from_raw_parts( + accels.as_ptr().cast::(), + accels.len() * ACCEL_TY_SIZE, + ) + } + } + + /// Returns the memory usage, in bytes, of these accelerators. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent all of the accelerators. + /// + /// This does **not** include the stack size used by this value. + pub fn memory_usage(&self) -> usize { + self.as_bytes().len() + } + + /// Return the bytes to search for corresponding to the accelerator in this + /// sequence at index `i`. If no such accelerator exists, then this panics. + /// + /// The significance of the index is that it should be in correspondence + /// with the index of the corresponding DFA. That is, accelerated DFA + /// states are stored contiguously in the DFA and have an ordering implied + /// by their respective state IDs. The state's index in that sequence + /// corresponds to the index of its corresponding accelerator. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn needles(&self, i: usize) -> &[u8] { + if i >= self.len() { + panic!("invalid accelerator index {}", i); + } + let bytes = self.as_bytes(); + let offset = ACCEL_TY_SIZE + i * ACCEL_CAP; + let len = usize::from(bytes[offset]); + &bytes[offset + 1..offset + 1 + len] + } + + /// Return the total number of accelerators in this sequence. + pub fn len(&self) -> usize { + // This should never panic since deserialization checks that the + // length can fit into a usize. + usize::try_from(self.accels.as_ref()[0]).unwrap() + } + + /// Return the accelerator in this sequence at index `i`. If no such + /// accelerator exists, then this returns None. + /// + /// See the docs for `needles` on the significance of the index. + fn get(&self, i: usize) -> Option { + if i >= self.len() { + return None; + } + let offset = ACCEL_TY_SIZE + i * ACCEL_CAP; + let accel = Accel::from_slice(&self.as_bytes()[offset..]) + .expect("Accels must contain valid accelerators"); + Some(accel) + } + + /// Returns an iterator of accelerators in this sequence. + fn iter(&self) -> IterAccels<'_, A> { + IterAccels { accels: self, i: 0 } + } + + /// Writes these accelerators to the given byte buffer using the indicated + /// endianness. If the given buffer is too small, then an error is + /// returned. Upon success, the total number of bytes written is returned. + /// The number of bytes written is guaranteed to be a multiple of 8. + pub fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + assert_eq!( + nwrite % ACCEL_TY_SIZE, + 0, + "expected accelerator bytes written to be a multiple of {}", + ACCEL_TY_SIZE, + ); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("accelerators")); + } + + // The number of accelerators can never exceed AccelTy::MAX. + E::write_u32(AccelTy::try_from(self.len()).unwrap(), dst); + // The actual accelerators are just raw bytes and thus their endianness + // is irrelevant. So we can copy them as bytes. + dst[ACCEL_TY_SIZE..nwrite] + .copy_from_slice(&self.as_bytes()[ACCEL_TY_SIZE..nwrite]); + Ok(nwrite) + } + + /// Validates that every accelerator in this collection can be successfully + /// deserialized as a valid accelerator. + pub fn validate(&self) -> Result<(), DeserializeError> { + for chunk in self.as_bytes()[ACCEL_TY_SIZE..].chunks(ACCEL_CAP) { + let _ = Accel::from_slice(chunk)?; + } + Ok(()) + } + + /// Returns the total number of bytes written by `write_to`. + pub fn write_to_len(&self) -> usize { + self.as_bytes().len() + } +} + +impl> core::fmt::Debug for Accels { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Accels(")?; + let mut list = f.debug_list(); + for a in self.iter() { + list.entry(&a); + } + list.finish()?; + write!(f, ")") + } +} + +#[derive(Debug)] +struct IterAccels<'a, A: AsRef<[AccelTy]>> { + accels: &'a Accels, + i: usize, +} + +impl<'a, A: AsRef<[AccelTy]>> Iterator for IterAccels<'a, A> { + type Item = Accel; + + fn next(&mut self) -> Option { + let accel = self.accels.get(self.i)?; + self.i += 1; + Some(accel) + } +} + +/// Accel represents a structure for determining how to "accelerate" a DFA +/// state. +/// +/// Namely, it contains zero or more bytes that must be seen in order for the +/// DFA to leave the state it is associated with. In practice, the actual range +/// is 1 to 3 bytes. +/// +/// The purpose of acceleration is to identify states whose vast majority +/// of transitions are just loops back to the same state. For example, +/// in the regex `(?-u)^[^a]+b`, the corresponding DFA will have a state +/// (corresponding to `[^a]+`) where all transitions *except* for `a` and +/// `b` loop back to itself. Thus, this state can be "accelerated" by simply +/// looking for the next occurrence of either `a` or `b` instead of explicitly +/// following transitions. (In this case, `b` transitions to the next state +/// where as `a` would transition to the dead state.) +#[derive(Clone)] +pub(crate) struct Accel { + /// The first byte is the length. Subsequent bytes are the accelerated + /// bytes. + /// + /// Note that we make every accelerator 8 bytes as a slightly wasteful + /// way of making sure alignment is always correct for state ID sizes of + /// 1, 2, 4 and 8. This should be okay since accelerated states aren't + /// particularly common, especially when Unicode is enabled. + bytes: [u8; ACCEL_CAP], +} + +impl Accel { + /// Returns an empty accel, where no bytes are accelerated. + #[cfg(feature = "dfa-build")] + pub fn new() -> Accel { + Accel { bytes: [0; ACCEL_CAP] } + } + + /// Returns a verified accelerator derived from the beginning of the given + /// slice. + /// + /// If the slice is not long enough or contains invalid bytes for an + /// accelerator, then this returns an error. + pub fn from_slice(mut slice: &[u8]) -> Result { + slice = &slice[..core::cmp::min(ACCEL_LEN, slice.len())]; + let bytes = slice + .try_into() + .map_err(|_| DeserializeError::buffer_too_small("accelerator"))?; + Accel::from_bytes(bytes) + } + + /// Returns a verified accelerator derived from raw bytes. + /// + /// If the given bytes are invalid, then this returns an error. + fn from_bytes(bytes: [u8; 4]) -> Result { + if usize::from(bytes[0]) >= ACCEL_LEN { + return Err(DeserializeError::generic( + "accelerator bytes cannot have length more than 3", + )); + } + Ok(Accel::from_bytes_unchecked(bytes)) + } + + /// Returns an accelerator derived from raw bytes. + /// + /// This does not check whether the given bytes are valid. Invalid bytes + /// cannot sacrifice memory safety, but may result in panics or silent + /// logic bugs. + fn from_bytes_unchecked(bytes: [u8; 4]) -> Accel { + Accel { bytes: [bytes[0], bytes[1], bytes[2], bytes[3], 0, 0, 0, 0] } + } + + /// Attempts to add the given byte to this accelerator. If the accelerator + /// is already full or thinks the byte is a poor accelerator, then this + /// returns false. Otherwise, returns true. + /// + /// If the given byte is already in this accelerator, then it panics. + #[cfg(feature = "dfa-build")] + pub fn add(&mut self, byte: u8) -> bool { + if self.len() >= 3 { + return false; + } + // As a special case, we totally reject trying to accelerate a state + // with an ASCII space. In most cases, it occurs very frequently, and + // tends to result in worse overall performance. + if byte == b' ' { + return false; + } + assert!( + !self.contains(byte), + "accelerator already contains {:?}", + crate::util::escape::DebugByte(byte) + ); + self.bytes[self.len() + 1] = byte; + self.bytes[0] += 1; + true + } + + /// Return the number of bytes in this accelerator. + pub fn len(&self) -> usize { + usize::from(self.bytes[0]) + } + + /// Returns true if and only if there are no bytes in this accelerator. + #[cfg(feature = "dfa-build")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the slice of bytes to accelerate. + /// + /// If this accelerator is empty, then this returns an empty slice. + fn needles(&self) -> &[u8] { + &self.bytes[1..1 + self.len()] + } + + /// Returns true if and only if this accelerator will accelerate the given + /// byte. + #[cfg(feature = "dfa-build")] + fn contains(&self, byte: u8) -> bool { + self.needles().iter().position(|&b| b == byte).is_some() + } + + /// Returns the accelerator bytes as an array of AccelTys. + #[cfg(feature = "dfa-build")] + fn as_accel_tys(&self) -> [AccelTy; 2] { + assert_eq!(ACCEL_CAP, 8); + // These unwraps are OK since ACCEL_CAP is set to 8. + let first = + AccelTy::from_ne_bytes(self.bytes[0..4].try_into().unwrap()); + let second = + AccelTy::from_ne_bytes(self.bytes[4..8].try_into().unwrap()); + [first, second] + } +} + +impl core::fmt::Debug for Accel { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Accel(")?; + let mut set = f.debug_set(); + for &b in self.needles() { + set.entry(&crate::util::escape::DebugByte(b)); + } + set.finish()?; + write!(f, ")") + } +} diff --git a/vendor/regex-automata/src/dfa/automaton.rs b/vendor/regex-automata/src/dfa/automaton.rs new file mode 100644 index 0000000..fcfcf29 --- /dev/null +++ b/vendor/regex-automata/src/dfa/automaton.rs @@ -0,0 +1,2260 @@ +#[cfg(feature = "alloc")] +use crate::util::search::PatternSet; +use crate::{ + dfa::search, + util::{ + empty, + prefilter::Prefilter, + primitives::{PatternID, StateID}, + search::{Anchored, HalfMatch, Input, MatchError}, + start, + }, +}; + +/// A trait describing the interface of a deterministic finite automaton (DFA). +/// +/// The complexity of this trait probably means that it's unlikely for others +/// to implement it. The primary purpose of the trait is to provide for a way +/// of abstracting over different types of DFAs. In this crate, that means +/// dense DFAs and sparse DFAs. (Dense DFAs are fast but memory hungry, where +/// as sparse DFAs are slower but come with a smaller memory footprint. But +/// they otherwise provide exactly equivalent expressive power.) For example, a +/// [`dfa::regex::Regex`](crate::dfa::regex::Regex) is generic over this trait. +/// +/// Normally, a DFA's execution model is very simple. You might have a single +/// start state, zero or more final or "match" states and a function that +/// transitions from one state to the next given the next byte of input. +/// Unfortunately, the interface described by this trait is significantly +/// more complicated than this. The complexity has a number of different +/// reasons, mostly motivated by performance, functionality or space savings: +/// +/// * A DFA can search for multiple patterns simultaneously. This +/// means extra information is returned when a match occurs. Namely, +/// a match is not just an offset, but an offset plus a pattern ID. +/// [`Automaton::pattern_len`] returns the number of patterns compiled into +/// the DFA, [`Automaton::match_len`] returns the total number of patterns +/// that match in a particular state and [`Automaton::match_pattern`] permits +/// iterating over the patterns that match in a particular state. +/// * A DFA can have multiple start states, and the choice of which start +/// state to use depends on the content of the string being searched and +/// position of the search, as well as whether the search is an anchored +/// search for a specific pattern in the DFA. Moreover, computing the start +/// state also depends on whether you're doing a forward or a reverse search. +/// [`Automaton::start_state_forward`] and [`Automaton::start_state_reverse`] +/// are used to compute the start state for forward and reverse searches, +/// respectively. +/// * All matches are delayed by one byte to support things like `$` and `\b` +/// at the end of a pattern. Therefore, every use of a DFA is required to use +/// [`Automaton::next_eoi_state`] +/// at the end of the search to compute the final transition. +/// * For optimization reasons, some states are treated specially. Every +/// state is either special or not, which can be determined via the +/// [`Automaton::is_special_state`] method. If it's special, then the state +/// must be at least one of a few possible types of states. (Note that some +/// types can overlap, for example, a match state can also be an accel state. +/// But some types can't. If a state is a dead state, then it can never be any +/// other type of state.) Those types are: +/// * A dead state. A dead state means the DFA will never enter a match +/// state. This can be queried via the [`Automaton::is_dead_state`] method. +/// * A quit state. A quit state occurs if the DFA had to stop the search +/// prematurely for some reason. This can be queried via the +/// [`Automaton::is_quit_state`] method. +/// * A match state. A match state occurs when a match is found. When a DFA +/// enters a match state, the search may stop immediately (when looking +/// for the earliest match), or it may continue to find the leftmost-first +/// match. This can be queried via the [`Automaton::is_match_state`] +/// method. +/// * A start state. A start state is where a search begins. For every +/// search, there is exactly one start state that is used, however, a +/// DFA may contain many start states. When the search is in a start +/// state, it may use a prefilter to quickly skip to candidate matches +/// without executing the DFA on every byte. This can be queried via the +/// [`Automaton::is_start_state`] method. +/// * An accel state. An accel state is a state that is accelerated. +/// That is, it is a state where _most_ of its transitions loop back to +/// itself and only a small number of transitions lead to other states. +/// This kind of state is said to be accelerated because a search routine +/// can quickly look for the bytes leading out of the state instead of +/// continuing to execute the DFA on each byte. This can be queried via the +/// [`Automaton::is_accel_state`] method. And the bytes that lead out of +/// the state can be queried via the [`Automaton::accelerator`] method. +/// +/// There are a number of provided methods on this trait that implement +/// efficient searching (for forwards and backwards) with a DFA using +/// all of the above features of this trait. In particular, given the +/// complexity of all these features, implementing a search routine in +/// this trait can be a little subtle. With that said, it is possible to +/// somewhat simplify the search routine. For example, handling accelerated +/// states is strictly optional, since it is always correct to assume that +/// `Automaton::is_accel_state` returns false. However, one complex part of +/// writing a search routine using this trait is handling the 1-byte delay of a +/// match. That is not optional. +/// +/// # Safety +/// +/// This trait is not safe to implement so that code may rely on the +/// correctness of implementations of this trait to avoid undefined behavior. +/// The primary correctness guarantees are: +/// +/// * `Automaton::start_state` always returns a valid state ID or an error or +/// panics. +/// * `Automaton::next_state`, when given a valid state ID, always returns +/// a valid state ID for all values of `anchored` and `byte`, or otherwise +/// panics. +/// +/// In general, the rest of the methods on `Automaton` need to uphold their +/// contracts as well. For example, `Automaton::is_dead` should only returns +/// true if the given state ID is actually a dead state. +pub unsafe trait Automaton { + /// Transitions from the current state to the next state, given the next + /// byte of input. + /// + /// Implementations must guarantee that the returned ID is always a valid + /// ID when `current` refers to a valid ID. Moreover, the transition + /// function must be defined for all possible values of `input`. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid ID. + /// However, if the caller provides an invalid ID then this must never + /// sacrifice memory safety. + /// + /// # Example + /// + /// This shows a simplistic example for walking a DFA for a given haystack + /// by using the `next_state` method. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, Input}; + /// + /// let dfa = dense::DFA::new(r"[a-z]+r")?; + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// state = dfa.next_state(state, b); + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk the + /// // special "EOI" transition at the end of the search. + /// state = dfa.next_eoi_state(state); + /// assert!(dfa.is_match_state(state)); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn next_state(&self, current: StateID, input: u8) -> StateID; + + /// Transitions from the current state to the next state, given the next + /// byte of input. + /// + /// Unlike [`Automaton::next_state`], implementations may implement this + /// more efficiently by assuming that the `current` state ID is valid. + /// Typically, this manifests by eliding bounds checks. + /// + /// # Safety + /// + /// Callers of this method must guarantee that `current` refers to a valid + /// state ID. If `current` is not a valid state ID for this automaton, then + /// calling this routine may result in undefined behavior. + /// + /// If `current` is valid, then implementations must guarantee that the ID + /// returned is valid for all possible values of `input`. + unsafe fn next_state_unchecked( + &self, + current: StateID, + input: u8, + ) -> StateID; + + /// Transitions from the current state to the next state for the special + /// EOI symbol. + /// + /// Implementations must guarantee that the returned ID is always a valid + /// ID when `current` refers to a valid ID. + /// + /// This routine must be called at the end of every search in a correct + /// implementation of search. Namely, DFAs in this crate delay matches + /// by one byte in order to support look-around operators. Thus, after + /// reaching the end of a haystack, a search implementation must follow one + /// last EOI transition. + /// + /// It is best to think of EOI as an additional symbol in the alphabet of + /// a DFA that is distinct from every other symbol. That is, the alphabet + /// of DFAs in this crate has a logical size of 257 instead of 256, where + /// 256 corresponds to every possible inhabitant of `u8`. (In practice, the + /// physical alphabet size may be smaller because of alphabet compression + /// via equivalence classes, but EOI is always represented somehow in the + /// alphabet.) + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid ID. + /// However, if the caller provides an invalid ID then this must never + /// sacrifice memory safety. + /// + /// # Example + /// + /// This shows a simplistic example for walking a DFA for a given haystack, + /// and then finishing the search with the final EOI transition. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, Input}; + /// + /// let dfa = dense::DFA::new(r"[a-z]+r")?; + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// // + /// // The unwrap is OK because we aren't requesting a start state for a + /// // specific pattern. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// state = dfa.next_state(state, b); + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. Without this + /// // final transition, the assert below will fail since the DFA will not + /// // have entered a match state yet! + /// state = dfa.next_eoi_state(state); + /// assert!(dfa.is_match_state(state)); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn next_eoi_state(&self, current: StateID) -> StateID; + + /// Return the ID of the start state for this DFA for the given starting + /// configuration. + /// + /// Unlike typical DFA implementations, the start state for DFAs in this + /// crate is dependent on a few different factors: + /// + /// * The [`Anchored`] mode of the search. Unanchored, anchored and + /// anchored searches for a specific [`PatternID`] all use different start + /// states. + /// * Whether a "look-behind" byte exists. For example, the `^` anchor + /// matches if and only if there is no look-behind byte. + /// * The specific value of that look-behind byte. For example, a `(?m:^)` + /// assertion only matches when there is either no look-behind byte, or + /// when the look-behind byte is a line terminator. + /// + /// The [starting configuration](start::Config) provides the above + /// information. + /// + /// This routine can be used for either forward or reverse searches. + /// Although, as a convenience, if you have an [`Input`], then it may + /// be more succinct to use [`Automaton::start_state_forward`] or + /// [`Automaton::start_state_reverse`]. Note, for example, that the + /// convenience routines return a [`MatchError`] on failure where as this + /// routine returns a [`StartError`]. + /// + /// # Errors + /// + /// This may return a [`StartError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte). + /// This can also return an error if the given configuration contains an + /// unsupported [`Anchored`] configuration. + fn start_state( + &self, + config: &start::Config, + ) -> Result; + + /// Return the ID of the start state for this DFA when executing a forward + /// search. + /// + /// This is a convenience routine for calling [`Automaton::start_state`] + /// that converts the given [`Input`] to a [start + /// configuration](start::Config). Additionally, if an error occurs, it is + /// converted from a [`StartError`] to a [`MatchError`] using the offset + /// information in the given [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up + /// when determining the start state (for example, if it sees a "quit" + /// byte). This can also return an error if the given `Input` contains an + /// unsupported [`Anchored`] configuration. + fn start_state_forward( + &self, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_forward(input); + self.start_state(&config).map_err(|err| match err { + StartError::Quit { byte } => { + let offset = input + .start() + .checked_sub(1) + .expect("no quit in start without look-behind"); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// Return the ID of the start state for this DFA when executing a reverse + /// search. + /// + /// This is a convenience routine for calling [`Automaton::start_state`] + /// that converts the given [`Input`] to a [start + /// configuration](start::Config). Additionally, if an error occurs, it is + /// converted from a [`StartError`] to a [`MatchError`] using the offset + /// information in the given [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up + /// when determining the start state (for example, if it sees a "quit" + /// byte). This can also return an error if the given `Input` contains an + /// unsupported [`Anchored`] configuration. + fn start_state_reverse( + &self, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_reverse(input); + self.start_state(&config).map_err(|err| match err { + StartError::Quit { byte } => { + let offset = input.end(); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// If this DFA has a universal starting state for the given anchor mode + /// and the DFA supports universal starting states, then this returns that + /// state's identifier. + /// + /// A DFA is said to have a universal starting state when the starting + /// state is invariant with respect to the haystack. Usually, the starting + /// state is chosen depending on the bytes immediately surrounding the + /// starting position of a search. However, the starting state only differs + /// when one or more of the patterns in the DFA have look-around assertions + /// in its prefix. + /// + /// Stated differently, if none of the patterns in a DFA have look-around + /// assertions in their prefix, then the DFA has a universal starting state + /// and _may_ be returned by this method. + /// + /// It is always correct for implementations to return `None`, and indeed, + /// this is what the default implementation does. When this returns `None`, + /// callers must use either `start_state_forward` or `start_state_reverse` + /// to get the starting state. + /// + /// # Use case + /// + /// There are a few reasons why one might want to use this: + /// + /// * If you know your regex patterns have no look-around assertions in + /// their prefix, then calling this routine is likely cheaper and perhaps + /// more semantically meaningful. + /// * When implementing prefilter support in a DFA regex implementation, + /// it is necessary to re-compute the start state after a candidate + /// is returned from the prefilter. However, this is only needed when + /// there isn't a universal start state. When one exists, one can avoid + /// re-computing the start state. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense::DFA}, + /// Anchored, + /// }; + /// + /// // There are no look-around assertions in the prefixes of any of the + /// // patterns, so we get a universal start state. + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+$", "[A-Z]+"])?; + /// assert!(dfa.universal_start_state(Anchored::No).is_some()); + /// assert!(dfa.universal_start_state(Anchored::Yes).is_some()); + /// + /// // One of the patterns has a look-around assertion in its prefix, + /// // so this means there is no longer a universal start state. + /// let dfa = DFA::new_many(&["[0-9]+", "^[a-z]+$", "[A-Z]+"])?; + /// assert!(!dfa.universal_start_state(Anchored::No).is_some()); + /// assert!(!dfa.universal_start_state(Anchored::Yes).is_some()); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn universal_start_state(&self, _mode: Anchored) -> Option { + None + } + + /// Returns true if and only if the given identifier corresponds to a + /// "special" state. A special state is one or more of the following: + /// a dead state, a quit state, a match state, a start state or an + /// accelerated state. + /// + /// A correct implementation _may_ always return false for states that + /// are either start states or accelerated states, since that information + /// is only intended to be used for optimization purposes. Correct + /// implementations must return true if the state is a dead, quit or match + /// state. This is because search routines using this trait must be able + /// to rely on `is_special_state` as an indicator that a state may need + /// special treatment. (For example, when a search routine sees a dead + /// state, it must terminate.) + /// + /// This routine permits search implementations to use a single branch to + /// check whether a state needs special attention before executing the next + /// transition. The example below shows how to do this. + /// + /// # Example + /// + /// This example shows how `is_special_state` can be used to implement a + /// correct search routine with minimal branching. In particular, this + /// search routine implements "leftmost" matching, which means that it + /// doesn't immediately stop once a match is found. Instead, it continues + /// until it reaches a dead state. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// HalfMatch, MatchError, Input, + /// }; + /// + /// fn find( + /// dfa: &A, + /// haystack: &[u8], + /// ) -> Result, MatchError> { + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. Note that start states can never + /// // be match states (since DFAs in this crate delay matches by 1 + /// // byte), so we don't need to check if the start state is a match. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// let mut last_match = None; + /// // Walk all the bytes in the haystack. We can quit early if we see + /// // a dead or a quit state. The former means the automaton will + /// // never transition to any other state. The latter means that the + /// // automaton entered a condition in which its search failed. + /// for (i, &b) in haystack.iter().enumerate() { + /// state = dfa.next_state(state, b); + /// if dfa.is_special_state(state) { + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// i, + /// )); + /// } else if dfa.is_dead_state(state) { + /// return Ok(last_match); + /// } else if dfa.is_quit_state(state) { + /// // It is possible to enter into a quit state after + /// // observing a match has occurred. In that case, we + /// // should return the match instead of an error. + /// if last_match.is_some() { + /// return Ok(last_match); + /// } + /// return Err(MatchError::quit(b, i)); + /// } + /// // Implementors may also want to check for start or accel + /// // states and handle them differently for performance + /// // reasons. But it is not necessary for correctness. + /// } + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. + /// state = dfa.next_eoi_state(state); + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// haystack.len(), + /// )); + /// } + /// Ok(last_match) + /// } + /// + /// // We use a greedy '+' operator to show how the search doesn't just + /// // stop once a match is detected. It continues extending the match. + /// // Using '[a-z]+?' would also work as expected and stop the search + /// // early. Greediness is built into the automaton. + /// let dfa = dense::DFA::new(r"[a-z]+")?; + /// let haystack = "123 foobar 4567".as_bytes(); + /// let mat = find(&dfa, haystack)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 10); + /// + /// // Here's another example that tests our handling of the special EOI + /// // transition. This will fail to find a match if we don't call + /// // 'next_eoi_state' at the end of the search since the match isn't + /// // found until the final byte in the haystack. + /// let dfa = dense::DFA::new(r"[0-9]{4}")?; + /// let haystack = "123 foobar 4567".as_bytes(); + /// let mat = find(&dfa, haystack)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 15); + /// + /// // And note that our search implementation above automatically works + /// // with multi-DFAs. Namely, `dfa.match_pattern(match_state, 0)` selects + /// // the appropriate pattern ID for us. + /// let dfa = dense::DFA::new_many(&[r"[a-z]+", r"[0-9]+"])?; + /// let haystack = "123 foobar 4567".as_bytes(); + /// let mat = find(&dfa, haystack)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 1); + /// assert_eq!(mat.offset(), 3); + /// let mat = find(&dfa, &haystack[3..])?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 7); + /// let mat = find(&dfa, &haystack[10..])?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 1); + /// assert_eq!(mat.offset(), 5); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_special_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to a dead + /// state. When a DFA enters a dead state, it is impossible to leave. That + /// is, every transition on a dead state by definition leads back to the + /// same dead state. + /// + /// In practice, the dead state always corresponds to the identifier `0`. + /// Moreover, in practice, there is only one dead state. + /// + /// The existence of a dead state is not strictly required in the classical + /// model of finite state machines, where one generally only cares about + /// the question of whether an input sequence matches or not. Dead states + /// are not needed to answer that question, since one can immediately quit + /// as soon as one enters a final or "match" state. However, we don't just + /// care about matches but also care about the location of matches, and + /// more specifically, care about semantics like "greedy" matching. + /// + /// For example, given the pattern `a+` and the input `aaaz`, the dead + /// state won't be entered until the state machine reaches `z` in the + /// input, at which point, the search routine can quit. But without the + /// dead state, the search routine wouldn't know when to quit. In a + /// classical representation, the search routine would stop after seeing + /// the first `a` (which is when the search would enter a match state). But + /// this wouldn't implement "greedy" matching where `a+` matches as many + /// `a`'s as possible. + /// + /// # Example + /// + /// See the example for [`Automaton::is_special_state`] for how to use this + /// method correctly. + fn is_dead_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to a quit + /// state. A quit state is like a dead state (it has no transitions other + /// than to itself), except it indicates that the DFA failed to complete + /// the search. When this occurs, callers can neither accept or reject that + /// a match occurred. + /// + /// In practice, the quit state always corresponds to the state immediately + /// following the dead state. (Which is not usually represented by `1`, + /// since state identifiers are pre-multiplied by the state machine's + /// alphabet stride, and the alphabet stride varies between DFAs.) + /// + /// The typical way in which a quit state can occur is when heuristic + /// support for Unicode word boundaries is enabled via the + /// [`dense::Config::unicode_word_boundary`](crate::dfa::dense::Config::unicode_word_boundary) + /// option. But other options, like the lower level + /// [`dense::Config::quit`](crate::dfa::dense::Config::quit) + /// configuration, can also result in a quit state being entered. The + /// purpose of the quit state is to provide a way to execute a fast DFA + /// in common cases while delegating to slower routines when the DFA quits. + /// + /// The default search implementations provided by this crate will return a + /// [`MatchError::quit`] error when a quit state is entered. + /// + /// # Example + /// + /// See the example for [`Automaton::is_special_state`] for how to use this + /// method correctly. + fn is_quit_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to a + /// match state. A match state is also referred to as a "final" state and + /// indicates that a match has been found. + /// + /// If all you care about is whether a particular pattern matches in the + /// input sequence, then a search routine can quit early as soon as the + /// machine enters a match state. However, if you're looking for the + /// standard "leftmost-first" match location, then search _must_ continue + /// until either the end of the input or until the machine enters a dead + /// state. (Since either condition implies that no other useful work can + /// be done.) Namely, when looking for the location of a match, then + /// search implementations should record the most recent location in + /// which a match state was entered, but otherwise continue executing the + /// search as normal. (The search may even leave the match state.) Once + /// the termination condition is reached, the most recently recorded match + /// location should be returned. + /// + /// Finally, one additional power given to match states in this crate + /// is that they are always associated with a specific pattern in order + /// to support multi-DFAs. See [`Automaton::match_pattern`] for more + /// details and an example for how to query the pattern associated with a + /// particular match state. + /// + /// # Example + /// + /// See the example for [`Automaton::is_special_state`] for how to use this + /// method correctly. + fn is_match_state(&self, id: StateID) -> bool; + + /// Returns true only if the given identifier corresponds to a start + /// state + /// + /// A start state is a state in which a DFA begins a search. + /// All searches begin in a start state. Moreover, since all matches are + /// delayed by one byte, a start state can never be a match state. + /// + /// The main role of a start state is, as mentioned, to be a starting + /// point for a DFA. This starting point is determined via one of + /// [`Automaton::start_state_forward`] or + /// [`Automaton::start_state_reverse`], depending on whether one is doing + /// a forward or a reverse search, respectively. + /// + /// A secondary use of start states is for prefix acceleration. Namely, + /// while executing a search, if one detects that you're in a start state, + /// then it may be faster to look for the next match of a prefix of the + /// pattern, if one exists. If a prefix exists and since all matches must + /// begin with that prefix, then skipping ahead to occurrences of that + /// prefix may be much faster than executing the DFA. + /// + /// As mentioned in the documentation for + /// [`is_special_state`](Automaton::is_special_state) implementations + /// _may_ always return false, even if the given identifier is a start + /// state. This is because knowing whether a state is a start state or not + /// is not necessary for correctness and is only treated as a potential + /// performance optimization. (For example, the implementations of this + /// trait in this crate will only return true when the given identifier + /// corresponds to a start state and when [specialization of start + /// states](crate::dfa::dense::Config::specialize_start_states) was enabled + /// during DFA construction. If start state specialization is disabled + /// (which is the default), then this method will always return false.) + /// + /// # Example + /// + /// This example shows how to implement your own search routine that does + /// a prefix search whenever the search enters a start state. + /// + /// Note that you do not need to implement your own search routine + /// to make use of prefilters like this. The search routines + /// provided by this crate already implement prefilter support via + /// the [`Prefilter`](crate::util::prefilter::Prefilter) trait. + /// A prefilter can be added to your search configuration with + /// [`dense::Config::prefilter`](crate::dfa::dense::Config::prefilter) for + /// dense and sparse DFAs in this crate. + /// + /// This example is meant to show how you might deal with prefilters in a + /// simplified case if you are implementing your own search routine. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// HalfMatch, MatchError, Input, + /// }; + /// + /// fn find_byte(slice: &[u8], at: usize, byte: u8) -> Option { + /// // Would be faster to use the memchr crate, but this is still + /// // faster than running through the DFA. + /// slice[at..].iter().position(|&b| b == byte).map(|i| at + i) + /// } + /// + /// fn find( + /// dfa: &A, + /// haystack: &[u8], + /// prefix_byte: Option, + /// ) -> Result, MatchError> { + /// // See the Automaton::is_special_state example for similar code + /// // with more comments. + /// + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// let mut last_match = None; + /// let mut pos = 0; + /// while pos < haystack.len() { + /// let b = haystack[pos]; + /// state = dfa.next_state(state, b); + /// pos += 1; + /// if dfa.is_special_state(state) { + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// pos - 1, + /// )); + /// } else if dfa.is_dead_state(state) { + /// return Ok(last_match); + /// } else if dfa.is_quit_state(state) { + /// // It is possible to enter into a quit state after + /// // observing a match has occurred. In that case, we + /// // should return the match instead of an error. + /// if last_match.is_some() { + /// return Ok(last_match); + /// } + /// return Err(MatchError::quit(b, pos - 1)); + /// } else if dfa.is_start_state(state) { + /// // If we're in a start state and know all matches begin + /// // with a particular byte, then we can quickly skip to + /// // candidate matches without running the DFA through + /// // every byte inbetween. + /// if let Some(prefix_byte) = prefix_byte { + /// pos = match find_byte(haystack, pos, prefix_byte) { + /// Some(pos) => pos, + /// None => break, + /// }; + /// } + /// } + /// } + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. + /// state = dfa.next_eoi_state(state); + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// haystack.len(), + /// )); + /// } + /// Ok(last_match) + /// } + /// + /// // In this example, it's obvious that all occurrences of our pattern + /// // begin with 'Z', so we pass in 'Z'. Note also that we need to + /// // enable start state specialization, or else it won't be possible to + /// // detect start states during a search. ('is_start_state' would always + /// // return false.) + /// let dfa = dense::DFA::builder() + /// .configure(dense::DFA::config().specialize_start_states(true)) + /// .build(r"Z[a-z]+")?; + /// let haystack = "123 foobar Zbaz quux".as_bytes(); + /// let mat = find(&dfa, haystack, Some(b'Z'))?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 15); + /// + /// // But note that we don't need to pass in a prefix byte. If we don't, + /// // then the search routine does no acceleration. + /// let mat = find(&dfa, haystack, None)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 15); + /// + /// // However, if we pass an incorrect byte, then the prefix search will + /// // result in incorrect results. + /// assert_eq!(find(&dfa, haystack, Some(b'X'))?, None); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_start_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to an + /// accelerated state. + /// + /// An accelerated state is a special optimization + /// trick implemented by this crate. Namely, if + /// [`dense::Config::accelerate`](crate::dfa::dense::Config::accelerate) is + /// enabled (and it is by default), then DFAs generated by this crate will + /// tag states meeting certain characteristics as accelerated. States meet + /// this criteria whenever most of their transitions are self-transitions. + /// That is, transitions that loop back to the same state. When a small + /// number of transitions aren't self-transitions, then it follows that + /// there are only a small number of bytes that can cause the DFA to leave + /// that state. Thus, there is an opportunity to look for those bytes + /// using more optimized routines rather than continuing to run through + /// the DFA. This trick is similar to the prefilter idea described in + /// the documentation of [`Automaton::is_start_state`] with two main + /// differences: + /// + /// 1. It is more limited since acceleration only applies to single bytes. + /// This means states are rarely accelerated when Unicode mode is enabled + /// (which is enabled by default). + /// 2. It can occur anywhere in the DFA, which increases optimization + /// opportunities. + /// + /// Like the prefilter idea, the main downside (and a possible reason to + /// disable it) is that it can lead to worse performance in some cases. + /// Namely, if a state is accelerated for very common bytes, then the + /// overhead of checking for acceleration and using the more optimized + /// routines to look for those bytes can cause overall performance to be + /// worse than if acceleration wasn't enabled at all. + /// + /// A simple example of a regex that has an accelerated state is + /// `(?-u)[^a]+a`. Namely, the `[^a]+` sub-expression gets compiled down + /// into a single state where all transitions except for `a` loop back to + /// itself, and where `a` is the only transition (other than the special + /// EOI transition) that goes to some other state. Thus, this state can + /// be accelerated and implemented more efficiently by calling an + /// optimized routine like `memchr` with `a` as the needle. Notice that + /// the `(?-u)` to disable Unicode is necessary here, as without it, + /// `[^a]` will match any UTF-8 encoding of any Unicode scalar value other + /// than `a`. This more complicated expression compiles down to many DFA + /// states and the simple acceleration optimization is no longer available. + /// + /// Typically, this routine is used to guard calls to + /// [`Automaton::accelerator`], which returns the accelerated bytes for + /// the specified state. + fn is_accel_state(&self, id: StateID) -> bool; + + /// Returns the total number of patterns compiled into this DFA. + /// + /// In the case of a DFA that contains no patterns, this must return `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a DFA that never matches: + /// + /// ``` + /// use regex_automata::dfa::{Automaton, dense::DFA}; + /// + /// let dfa: DFA> = DFA::never_match()?; + /// assert_eq!(dfa.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a DFA that matches at every position: + /// + /// ``` + /// use regex_automata::dfa::{Automaton, dense::DFA}; + /// + /// let dfa: DFA> = DFA::always_match()?; + /// assert_eq!(dfa.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a DFA that was constructed from multiple patterns: + /// + /// ``` + /// use regex_automata::dfa::{Automaton, dense::DFA}; + /// + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(dfa.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + fn pattern_len(&self) -> usize; + + /// Returns the total number of patterns that match in this state. + /// + /// If the given state is not a match state, then implementations may + /// panic. + /// + /// If the DFA was compiled with one pattern, then this must necessarily + /// always return `1` for all match states. + /// + /// Implementations must guarantee that [`Automaton::match_pattern`] can be + /// called with indices up to (but not including) the length returned by + /// this routine without panicking. + /// + /// # Panics + /// + /// Implementations are permitted to panic if the provided state ID does + /// not correspond to a match state. + /// + /// # Example + /// + /// This example shows a simple instance of implementing overlapping + /// matches. In particular, it shows not only how to determine how many + /// patterns have matched in a particular state, but also how to access + /// which specific patterns have matched. + /// + /// Notice that we must use + /// [`MatchKind::All`](crate::MatchKind::All) + /// when building the DFA. If we used + /// [`MatchKind::LeftmostFirst`](crate::MatchKind::LeftmostFirst) + /// instead, then the DFA would not be constructed in a way that + /// supports overlapping matches. (It would only report a single pattern + /// that matches at any particular point in time.) + /// + /// Another thing to take note of is the patterns used and the order in + /// which the pattern IDs are reported. In the example below, pattern `3` + /// is yielded first. Why? Because it corresponds to the match that + /// appears first. Namely, the `@` symbol is part of `\S+` but not part + /// of any of the other patterns. Since the `\S+` pattern has a match that + /// starts to the left of any other pattern, its ID is returned before any + /// other. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{Automaton, dense}, Input, MatchKind}; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().match_kind(MatchKind::All)) + /// .build_many(&[ + /// r"[[:word:]]+", r"[a-z]+", r"[A-Z]+", r"[[:^space:]]+", + /// ])?; + /// let haystack = "@bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// state = dfa.next_state(state, b); + /// } + /// state = dfa.next_eoi_state(state); + /// + /// assert!(dfa.is_match_state(state)); + /// assert_eq!(dfa.match_len(state), 3); + /// // The following calls are guaranteed to not panic since `match_len` + /// // returned `3` above. + /// assert_eq!(dfa.match_pattern(state, 0).as_usize(), 3); + /// assert_eq!(dfa.match_pattern(state, 1).as_usize(), 0); + /// assert_eq!(dfa.match_pattern(state, 2).as_usize(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn match_len(&self, id: StateID) -> usize; + + /// Returns the pattern ID corresponding to the given match index in the + /// given state. + /// + /// See [`Automaton::match_len`] for an example of how to use this + /// method correctly. Note that if you know your DFA is compiled with a + /// single pattern, then this routine is never necessary since it will + /// always return a pattern ID of `0` for an index of `0` when `id` + /// corresponds to a match state. + /// + /// Typically, this routine is used when implementing an overlapping + /// search, as the example for `Automaton::match_len` does. + /// + /// # Panics + /// + /// If the state ID is not a match state or if the match index is out + /// of bounds for the given state, then this routine may either panic + /// or produce an incorrect result. If the state ID is correct and the + /// match index is correct, then this routine must always produce a valid + /// `PatternID`. + fn match_pattern(&self, id: StateID, index: usize) -> PatternID; + + /// Returns true if and only if this automaton can match the empty string. + /// When it returns false, all possible matches are guaranteed to have a + /// non-zero length. + /// + /// This is useful as cheap way to know whether code needs to handle the + /// case of a zero length match. This is particularly important when UTF-8 + /// modes are enabled, as when UTF-8 mode is enabled, empty matches that + /// split a codepoint must never be reported. This extra handling can + /// sometimes be costly, and since regexes matching an empty string are + /// somewhat rare, it can be beneficial to treat such regexes specially. + /// + /// # Example + /// + /// This example shows a few different DFAs and whether they match the + /// empty string or not. Notice the empty string isn't merely a matter + /// of a string of length literally `0`, but rather, whether a match can + /// occur between specific pairs of bytes. + /// + /// ``` + /// use regex_automata::{dfa::{dense::DFA, Automaton}, util::syntax}; + /// + /// // The empty regex matches the empty string. + /// let dfa = DFA::new("")?; + /// assert!(dfa.has_empty(), "empty matches empty"); + /// // The '+' repetition operator requires at least one match, and so + /// // does not match the empty string. + /// let dfa = DFA::new("a+")?; + /// assert!(!dfa.has_empty(), "+ does not match empty"); + /// // But the '*' repetition operator does. + /// let dfa = DFA::new("a*")?; + /// assert!(dfa.has_empty(), "* does match empty"); + /// // And wrapping '+' in an operator that can match an empty string also + /// // causes it to match the empty string too. + /// let dfa = DFA::new("(a+)*")?; + /// assert!(dfa.has_empty(), "+ inside of * matches empty"); + /// + /// // If a regex is just made of a look-around assertion, even if the + /// // assertion requires some kind of non-empty string around it (such as + /// // \b), then it is still treated as if it matches the empty string. + /// // Namely, if a match occurs of just a look-around assertion, then the + /// // match returned is empty. + /// let dfa = DFA::builder() + /// .configure(DFA::config().unicode_word_boundary(true)) + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"^$\A\z\b\B(?-u:\b\B)")?; + /// assert!(dfa.has_empty(), "assertions match empty"); + /// // Even when an assertion is wrapped in a '+', it still matches the + /// // empty string. + /// let dfa = DFA::new(r"^+")?; + /// assert!(dfa.has_empty(), "+ of an assertion matches empty"); + /// + /// // An alternation with even one branch that can match the empty string + /// // is also said to match the empty string overall. + /// let dfa = DFA::new("foo|(bar)?|quux")?; + /// assert!(dfa.has_empty(), "alternations can match empty"); + /// + /// // An NFA that matches nothing does not match the empty string. + /// let dfa = DFA::new("[a&&b]")?; + /// assert!(!dfa.has_empty(), "never matching means not matching empty"); + /// // But if it's wrapped in something that doesn't require a match at + /// // all, then it can match the empty string! + /// let dfa = DFA::new("[a&&b]*")?; + /// assert!(dfa.has_empty(), "* on never-match still matches empty"); + /// // Since a '+' requires a match, using it on something that can never + /// // match will itself produce a regex that can never match anything, + /// // and thus does not match the empty string. + /// let dfa = DFA::new("[a&&b]+")?; + /// assert!(!dfa.has_empty(), "+ on never-match still matches nothing"); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn has_empty(&self) -> bool; + + /// Whether UTF-8 mode is enabled for this DFA or not. + /// + /// When UTF-8 mode is enabled, all matches reported by a DFA are + /// guaranteed to correspond to spans of valid UTF-8. This includes + /// zero-width matches. For example, the DFA must guarantee that the empty + /// regex will not match at the positions between code units in the UTF-8 + /// encoding of a single codepoint. + /// + /// See [`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) for + /// more information. + /// + /// # Example + /// + /// This example shows how UTF-8 mode can impact the match spans that may + /// be reported in certain cases. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// nfa::thompson, + /// HalfMatch, Input, + /// }; + /// + /// // UTF-8 mode is enabled by default. + /// let re = DFA::new("")?; + /// assert!(re.is_utf8()); + /// let mut input = Input::new("☃"); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 0)), got); + /// + /// // Even though an empty regex matches at 1..1, our next match is + /// // 3..3 because 1..1 and 2..2 split the snowman codepoint (which is + /// // three bytes long). + /// input.set_start(1); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 3)), got); + /// + /// // But if we disable UTF-8, then we'll get matches at 1..1 and 2..2: + /// let re = DFA::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build("")?; + /// assert!(!re.is_utf8()); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 1)), got); + /// + /// input.set_start(2); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 2)), got); + /// + /// input.set_start(3); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 3)), got); + /// + /// input.set_start(4); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(None, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_utf8(&self) -> bool; + + /// Returns true if and only if this DFA is limited to returning matches + /// whose start position is `0`. + /// + /// Note that if you're using DFAs provided by + /// this crate, then this is _orthogonal_ to + /// [`Config::start_kind`](crate::dfa::dense::Config::start_kind). + /// + /// This is useful in some cases because if a DFA is limited to producing + /// matches that start at offset `0`, then a reverse search is never + /// required for finding the start of a match. + /// + /// # Example + /// + /// ``` + /// use regex_automata::dfa::{dense::DFA, Automaton}; + /// + /// // The empty regex matches anywhere + /// let dfa = DFA::new("")?; + /// assert!(!dfa.is_always_start_anchored(), "empty matches anywhere"); + /// // 'a' matches anywhere. + /// let dfa = DFA::new("a")?; + /// assert!(!dfa.is_always_start_anchored(), "'a' matches anywhere"); + /// // '^' only matches at offset 0! + /// let dfa = DFA::new("^a")?; + /// assert!(dfa.is_always_start_anchored(), "'^a' matches only at 0"); + /// // But '(?m:^)' matches at 0 but at other offsets too. + /// let dfa = DFA::new("(?m:^)a")?; + /// assert!(!dfa.is_always_start_anchored(), "'(?m:^)a' matches anywhere"); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_always_start_anchored(&self) -> bool; + + /// Return a slice of bytes to accelerate for the given state, if possible. + /// + /// If the given state has no accelerator, then an empty slice must be + /// returned. If `Automaton::is_accel_state` returns true for the given ID, + /// then this routine _must_ return a non-empty slice. But note that it is + /// not required for an implementation of this trait to ever return `true` + /// for `is_accel_state`, even if the state _could_ be accelerated. That + /// is, acceleration is an optional optimization. But the return values of + /// `is_accel_state` and `accelerator` must be in sync. + /// + /// If the given ID is not a valid state ID for this automaton, then + /// implementations may panic or produce incorrect results. + /// + /// See [`Automaton::is_accel_state`] for more details on state + /// acceleration. + /// + /// By default, this method will always return an empty slice. + /// + /// # Example + /// + /// This example shows a contrived case in which we build a regex that we + /// know is accelerated and extract the accelerator from a state. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// util::{primitives::StateID, syntax}, + /// }; + /// + /// let dfa = dense::Builder::new() + /// // We disable Unicode everywhere and permit the regex to match + /// // invalid UTF-8. e.g., [^abc] matches \xFF, which is not valid + /// // UTF-8. If we left Unicode enabled, [^abc] would match any UTF-8 + /// // encoding of any Unicode scalar value except for 'a', 'b' or 'c'. + /// // That translates to a much more complicated DFA, and also + /// // inhibits the 'accelerator' optimization that we are trying to + /// // demonstrate in this example. + /// .syntax(syntax::Config::new().unicode(false).utf8(false)) + /// .build("[^abc]+a")?; + /// + /// // Here we just pluck out the state that we know is accelerated. + /// // While the stride calculations are something that can be relied + /// // on by callers, the specific position of the accelerated state is + /// // implementation defined. + /// // + /// // N.B. We get '3' by inspecting the state machine using 'regex-cli'. + /// // e.g., try `regex-cli debug dense dfa -p '[^abc]+a' -BbUC`. + /// let id = StateID::new(3 * dfa.stride()).unwrap(); + /// let accelerator = dfa.accelerator(id); + /// // The `[^abc]+` sub-expression permits [a, b, c] to be accelerated. + /// assert_eq!(accelerator, &[b'a', b'b', b'c']); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn accelerator(&self, _id: StateID) -> &[u8] { + &[] + } + + /// Returns the prefilter associated with a DFA, if one exists. + /// + /// The default implementation of this trait always returns `None`. And + /// indeed, it is always correct to return `None`. + /// + /// For DFAs in this crate, a prefilter can be attached to a DFA via + /// [`dense::Config::prefilter`](crate::dfa::dense::Config::prefilter). + /// + /// Do note that prefilters are not serialized by DFAs in this crate. + /// So if you deserialize a DFA that had a prefilter attached to it + /// at serialization time, then it will not have a prefilter after + /// deserialization. + #[inline] + fn get_prefilter(&self) -> Option<&Prefilter> { + None + } + + /// Executes a forward search and returns the end position of the leftmost + /// match that is found. If no match exists, then `None` is returned. + /// + /// In particular, this method continues searching even after it enters + /// a match state. The search only terminates once it has reached the + /// end of the input or when it has entered a dead or quit state. Upon + /// termination, the position of the last byte seen while still in a match + /// state is returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Notes for implementors + /// + /// Implementors of this trait are not required to implement any particular + /// match semantics (such as leftmost-first), which are instead manifest in + /// the DFA's transitions. But this search routine should behave as a + /// general "leftmost" search. + /// + /// In particular, this method must continue searching even after it enters + /// a match state. The search should only terminate once it has reached + /// the end of the input or when it has entered a dead or quit state. Upon + /// termination, the position of the last byte seen while still in a match + /// state is returned. + /// + /// Since this trait provides an implementation for this method by default, + /// it's unlikely that one will need to implement this. + /// + /// # Example + /// + /// This example shows how to use this method with a + /// [`dense::DFA`](crate::dfa::dense::DFA). + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::new("foo[0-9]+")?; + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(b"foo12345"))?); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over latter parts. + /// let dfa = dense::DFA::new("abc|a")?; + /// let expected = Some(HalfMatch::must(0, 3)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(b"abc"))?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-DFA that permits searching for + /// specific patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// Anchored, HalfMatch, PatternID, Input, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().starts_for_each_pattern(true)) + /// .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let haystack = "foo123".as_bytes(); + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// let expected = Some(HalfMatch::must(1, 6)); + /// let got = dfa.try_search_fwd(&input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// // N.B. We disable Unicode here so that we use a simple ASCII word + /// // boundary. Alternatively, we could enable heuristic support for + /// // Unicode word boundaries. + /// let dfa = dense::DFA::new(r"(?-u)\b[0-9]{3}\b")?; + /// let haystack = "foo123bar".as_bytes(); + /// + /// // Since we sub-slice the haystack, the search doesn't know about the + /// // larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `3` instead of `6`. + /// let input = Input::new(&haystack[3..6]); + /// let expected = Some(HalfMatch::must(0, 3)); + /// let got = dfa.try_search_fwd(&input)?; + /// assert_eq!(expected, got); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let input = Input::new(haystack).range(3..6); + /// let expected = None; + /// let got = dfa.try_search_fwd(&input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_fwd( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + let hm = match search::find_fwd(&self, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + // We get to this point when we know our DFA can match the empty string + // AND when UTF-8 mode is enabled. In this case, we skip any matches + // whose offset splits a codepoint. Such a match is necessarily a + // zero-width match, because UTF-8 mode requires the underlying NFA + // to be built such that all non-empty matches span valid UTF-8. + // Therefore, any match that ends in the middle of a codepoint cannot + // be part of a span of valid UTF-8 and thus must be an empty match. + // In such cases, we skip it, so as not to report matches that split a + // codepoint. + // + // Note that this is not a checked assumption. Callers *can* provide an + // NFA with UTF-8 mode enabled but produces non-empty matches that span + // invalid UTF-8. But doing so is documented to result in unspecified + // behavior. + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + let got = search::find_fwd(&self, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes a reverse search and returns the start of the position of the + /// leftmost match that is found. If no match exists, then `None` is + /// returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to use this method with a + /// [`dense::DFA`](crate::dfa::dense::DFA). In particular, this + /// routine is principally useful when used in conjunction with the + /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse) + /// configuration. In general, it's unlikely to be correct to use + /// both `try_search_fwd` and `try_search_rev` with the same DFA since + /// any particular DFA will only support searching in one direction with + /// respect to the pattern. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson, + /// dfa::{Automaton, dense}, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("foo[0-9]+")?; + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_rev(&Input::new(b"foo12345"))?); + /// + /// // Even though a match is found after reading the last byte (`c`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over latter parts. + /// let dfa = dense::Builder::new() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("abc|c")?; + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_rev(&Input::new(b"abc"))?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build(r"")?; + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// original NFA disabled (which results in disabling UTF-8 mode on the + /// DFA): + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build(r"")?; + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_rev( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + let hm = match search::find_rev(self, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + empty::skip_splits_rev(input, hm, hm.offset(), |input| { + let got = search::find_rev(self, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes an overlapping forward search. Matches, if one exists, can be + /// obtained via the [`OverlappingState::get_match`] method. + /// + /// This routine is principally only useful when searching for multiple + /// patterns on inputs where multiple patterns may match the same regions + /// of text. In particular, callers must preserve the automaton's search + /// state from prior calls so that the implementation knows where the last + /// match occurred. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should always be set to the end + /// of the last match. If more patterns match at the previous location, + /// then they will be immediately returned. (This is tracked by the given + /// overlapping state.) Otherwise, the search continues at the starting + /// position given. + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to run a basic overlapping search with a + /// [`dense::DFA`](crate::dfa::dense::DFA). Notice that we build the + /// automaton with a `MatchKind::All` configuration. Overlapping searches + /// are unlikely to work as one would expect when using the default + /// `MatchKind::LeftmostFirst` match semantics, since leftmost-first + /// matching is fundamentally incompatible with overlapping searches. + /// Namely, overlapping searches need to report matches as they are seen, + /// where as leftmost-first searches will continue searching even after a + /// match has been observed in order to find the conventional end position + /// of the match. More concretely, leftmost-first searches use dead states + /// to terminate a search after a specific match can no longer be extended. + /// Overlapping searches instead do the opposite by continuing the search + /// to find totally new matches (potentially of other patterns). + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, OverlappingState, dense}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().match_kind(MatchKind::All)) + /// .build_many(&[r"[[:word:]]+$", r"[[:^space:]]+$"])?; + /// let haystack = "@foo"; + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd(&Input::new(haystack), &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd(&Input::new(haystack), &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_overlapping_fwd( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + search::find_overlapping_fwd(self, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_fwd(self, input, state) + }, + ), + } + } + + /// Executes a reverse overlapping forward search. Matches, if one exists, + /// can be obtained via the [`OverlappingState::get_match`] method. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should remain invariant throughout + /// iteration. The `OverlappingState` given to the search will keep track + /// of the current position of the search. (This is because multiple + /// matches may be reported at the same position, so only the search + /// implementation itself knows when to advance the position.) + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true)) + /// .build_many(&[r"", r"☃"])?; + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// original NFA disabled (which results in disabling UTF-8 mode on the + /// DFA): + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build_many(&[r"", r"☃"])?; + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // Now *all* positions match, even within a codepoint, + /// // because we lifted the requirement that matches + /// // correspond to valid UTF-8 spans. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_overlapping_rev( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + search::find_overlapping_rev(self, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_rev(self, input, state) + }, + ), + } + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and the underlying DFA supports overlapping matches, then all + /// matching patterns are written to the given set. + /// + /// Unless all of the patterns in this DFA are anchored, then generally + /// speaking, this will visit every byte in the haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, dense::DFA}, + /// Input, MatchKind, PatternSet, + /// }; + /// + /// let patterns = &[ + /// r"[[:word:]]+", + /// r"[0-9]+", + /// r"[[:alpha:]]+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]; + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(dfa.pattern_len()); + /// dfa.try_which_overlapping_matches(&input, &mut patset)?; + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "alloc")] + #[inline] + fn try_which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), MatchError> { + let mut state = OverlappingState::start(); + while let Some(m) = { + self.try_search_overlapping_fwd(input, &mut state)?; + state.get_match() + } { + let _ = patset.insert(m.pattern()); + // There's nothing left to find, so we can stop. Or the caller + // asked us to. + if patset.is_full() || input.get_earliest() { + break; + } + } + Ok(()) + } +} + +unsafe impl<'a, A: Automaton + ?Sized> Automaton for &'a A { + #[inline] + fn next_state(&self, current: StateID, input: u8) -> StateID { + (**self).next_state(current, input) + } + + #[inline] + unsafe fn next_state_unchecked( + &self, + current: StateID, + input: u8, + ) -> StateID { + (**self).next_state_unchecked(current, input) + } + + #[inline] + fn next_eoi_state(&self, current: StateID) -> StateID { + (**self).next_eoi_state(current) + } + + #[inline] + fn start_state( + &self, + config: &start::Config, + ) -> Result { + (**self).start_state(config) + } + + #[inline] + fn start_state_forward( + &self, + input: &Input<'_>, + ) -> Result { + (**self).start_state_forward(input) + } + + #[inline] + fn start_state_reverse( + &self, + input: &Input<'_>, + ) -> Result { + (**self).start_state_reverse(input) + } + + #[inline] + fn universal_start_state(&self, mode: Anchored) -> Option { + (**self).universal_start_state(mode) + } + + #[inline] + fn is_special_state(&self, id: StateID) -> bool { + (**self).is_special_state(id) + } + + #[inline] + fn is_dead_state(&self, id: StateID) -> bool { + (**self).is_dead_state(id) + } + + #[inline] + fn is_quit_state(&self, id: StateID) -> bool { + (**self).is_quit_state(id) + } + + #[inline] + fn is_match_state(&self, id: StateID) -> bool { + (**self).is_match_state(id) + } + + #[inline] + fn is_start_state(&self, id: StateID) -> bool { + (**self).is_start_state(id) + } + + #[inline] + fn is_accel_state(&self, id: StateID) -> bool { + (**self).is_accel_state(id) + } + + #[inline] + fn pattern_len(&self) -> usize { + (**self).pattern_len() + } + + #[inline] + fn match_len(&self, id: StateID) -> usize { + (**self).match_len(id) + } + + #[inline] + fn match_pattern(&self, id: StateID, index: usize) -> PatternID { + (**self).match_pattern(id, index) + } + + #[inline] + fn has_empty(&self) -> bool { + (**self).has_empty() + } + + #[inline] + fn is_utf8(&self) -> bool { + (**self).is_utf8() + } + + #[inline] + fn is_always_start_anchored(&self) -> bool { + (**self).is_always_start_anchored() + } + + #[inline] + fn accelerator(&self, id: StateID) -> &[u8] { + (**self).accelerator(id) + } + + #[inline] + fn get_prefilter(&self) -> Option<&Prefilter> { + (**self).get_prefilter() + } + + #[inline] + fn try_search_fwd( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + (**self).try_search_fwd(input) + } + + #[inline] + fn try_search_rev( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + (**self).try_search_rev(input) + } + + #[inline] + fn try_search_overlapping_fwd( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + (**self).try_search_overlapping_fwd(input, state) + } + + #[inline] + fn try_search_overlapping_rev( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + (**self).try_search_overlapping_rev(input, state) + } + + #[cfg(feature = "alloc")] + #[inline] + fn try_which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), MatchError> { + (**self).try_which_overlapping_matches(input, patset) + } +} + +/// Represents the current state of an overlapping search. +/// +/// This is used for overlapping searches since they need to know something +/// about the previous search. For example, when multiple patterns match at the +/// same position, this state tracks the last reported pattern so that the next +/// search knows whether to report another matching pattern or continue with +/// the search at the next position. Additionally, it also tracks which state +/// the last search call terminated in. +/// +/// This type provides little introspection capabilities. The only thing a +/// caller can do is construct it and pass it around to permit search routines +/// to use it to track state, and also ask whether a match has been found. +/// +/// Callers should always provide a fresh state constructed via +/// [`OverlappingState::start`] when starting a new search. Reusing state from +/// a previous search may result in incorrect results. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OverlappingState { + /// The match reported by the most recent overlapping search to use this + /// state. + /// + /// If a search does not find any matches, then it is expected to clear + /// this value. + pub(crate) mat: Option, + /// The state ID of the state at which the search was in when the call + /// terminated. When this is a match state, `last_match` must be set to a + /// non-None value. + /// + /// A `None` value indicates the start state of the corresponding + /// automaton. We cannot use the actual ID, since any one automaton may + /// have many start states, and which one is in use depends on several + /// search-time factors. + pub(crate) id: Option, + /// The position of the search. + /// + /// When `id` is None (i.e., we are starting a search), this is set to + /// the beginning of the search as given by the caller regardless of its + /// current value. Subsequent calls to an overlapping search pick up at + /// this offset. + pub(crate) at: usize, + /// The index into the matching patterns of the next match to report if the + /// current state is a match state. Note that this may be 1 greater than + /// the total number of matches to report for the current match state. (In + /// which case, no more matches should be reported at the current position + /// and the search should advance to the next position.) + pub(crate) next_match_index: Option, + /// This is set to true when a reverse overlapping search has entered its + /// EOI transitions. + /// + /// This isn't used in a forward search because it knows to stop once the + /// position exceeds the end of the search range. In a reverse search, + /// since we use unsigned offsets, we don't "know" once we've gone past + /// `0`. So the only way to detect it is with this extra flag. The reverse + /// overlapping search knows to terminate specifically after it has + /// reported all matches after following the EOI transition. + pub(crate) rev_eoi: bool, +} + +impl OverlappingState { + /// Create a new overlapping state that begins at the start state of any + /// automaton. + pub fn start() -> OverlappingState { + OverlappingState { + mat: None, + id: None, + at: 0, + next_match_index: None, + rev_eoi: false, + } + } + + /// Return the match result of the most recent search to execute with this + /// state. + /// + /// A searches will clear this result automatically, such that if no + /// match is found, this will correctly report `None`. + pub fn get_match(&self) -> Option { + self.mat + } +} + +/// An error that can occur when computing the start state for a search. +/// +/// Computing a start state can fail for a few reasons, either based on +/// incorrect configuration or even based on whether the look-behind byte +/// triggers a quit state. Typically one does not need to handle this error +/// if you're using [`Automaton::start_state_forward`] (or its reverse +/// counterpart), as that routine automatically converts `StartError` to a +/// [`MatchError`] for you. +/// +/// This error may be returned by the [`Automaton::start_state`] routine. +/// +/// This error implements the `std::error::Error` trait when the `std` feature +/// is enabled. +/// +/// This error is marked as non-exhaustive. New variants may be added in a +/// semver compatible release. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub enum StartError { + /// An error that occurs when a starting configuration's look-behind byte + /// is in this DFA's quit set. + Quit { + /// The quit byte that was found. + byte: u8, + }, + /// An error that occurs when the caller requests an anchored mode that + /// isn't supported by the DFA. + UnsupportedAnchored { + /// The anchored mode given that is unsupported. + mode: Anchored, + }, +} + +impl StartError { + pub(crate) fn quit(byte: u8) -> StartError { + StartError::Quit { byte } + } + + pub(crate) fn unsupported_anchored(mode: Anchored) -> StartError { + StartError::UnsupportedAnchored { mode } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for StartError {} + +impl core::fmt::Display for StartError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + StartError::Quit { byte } => write!( + f, + "error computing start state because the look-behind byte \ + {:?} triggered a quit state", + crate::util::escape::DebugByte(byte), + ), + StartError::UnsupportedAnchored { mode: Anchored::Yes } => { + write!( + f, + "error computing start state because \ + anchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { mode: Anchored::No } => { + write!( + f, + "error computing start state because \ + unanchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { + mode: Anchored::Pattern(pid), + } => { + write!( + f, + "error computing start state because \ + anchored searches for a specific pattern ({}) \ + are not supported or enabled", + pid.as_usize(), + ) + } + } + } +} + +/// Runs the given overlapping `search` function (forwards or backwards) until +/// a match is found whose offset does not split a codepoint. +/// +/// This is *not* always correct to call. It should only be called when the DFA +/// has UTF-8 mode enabled *and* it can produce zero-width matches. Calling +/// this when both of those things aren't true might result in legitimate +/// matches getting skipped. +#[cold] +#[inline(never)] +fn skip_empty_utf8_splits_overlapping( + input: &Input<'_>, + state: &mut OverlappingState, + mut search: F, +) -> Result<(), MatchError> +where + F: FnMut(&Input<'_>, &mut OverlappingState) -> Result<(), MatchError>, +{ + // Note that this routine works for forwards and reverse searches + // even though there's no code here to handle those cases. That's + // because overlapping searches drive themselves to completion via + // `OverlappingState`. So all we have to do is push it until no matches are + // found. + + let mut hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + if input.get_anchored().is_anchored() { + if !input.is_char_boundary(hm.offset()) { + state.mat = None; + } + return Ok(()); + } + while !input.is_char_boundary(hm.offset()) { + search(input, state)?; + hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + } + Ok(()) +} + +/// Write a prefix "state" indicator for fmt::Debug impls. +/// +/// Specifically, this tries to succinctly distinguish the different types of +/// states: dead states, quit states, accelerated states, start states and +/// match states. It even accounts for the possible overlappings of different +/// state types. +pub(crate) fn fmt_state_indicator( + f: &mut core::fmt::Formatter<'_>, + dfa: A, + id: StateID, +) -> core::fmt::Result { + if dfa.is_dead_state(id) { + write!(f, "D")?; + if dfa.is_start_state(id) { + write!(f, ">")?; + } else { + write!(f, " ")?; + } + } else if dfa.is_quit_state(id) { + write!(f, "Q ")?; + } else if dfa.is_start_state(id) { + if dfa.is_accel_state(id) { + write!(f, "A>")?; + } else { + write!(f, " >")?; + } + } else if dfa.is_match_state(id) { + if dfa.is_accel_state(id) { + write!(f, "A*")?; + } else { + write!(f, " *")?; + } + } else if dfa.is_accel_state(id) { + write!(f, "A ")?; + } else { + write!(f, " ")?; + } + Ok(()) +} + +#[cfg(all(test, feature = "syntax", feature = "dfa-build"))] +mod tests { + // A basic test ensuring that our Automaton trait is object safe. (This is + // the main reason why we don't define the search routines as generic over + // Into.) + #[test] + fn object_safe() { + use crate::{ + dfa::{dense, Automaton}, + HalfMatch, Input, + }; + + let dfa = dense::DFA::new("abc").unwrap(); + let dfa: &dyn Automaton = &dfa; + assert_eq!( + Ok(Some(HalfMatch::must(0, 6))), + dfa.try_search_fwd(&Input::new(b"xyzabcxyz")), + ); + } +} diff --git a/vendor/regex-automata/src/dfa/dense.rs b/vendor/regex-automata/src/dfa/dense.rs new file mode 100644 index 0000000..fd96bc8 --- /dev/null +++ b/vendor/regex-automata/src/dfa/dense.rs @@ -0,0 +1,5147 @@ +/*! +Types and routines specific to dense DFAs. + +This module is the home of [`dense::DFA`](DFA). + +This module also contains a [`dense::Builder`](Builder) and a +[`dense::Config`](Config) for building and configuring a dense DFA. +*/ + +#[cfg(feature = "dfa-build")] +use core::cmp; +use core::{convert::TryFrom, fmt, iter, mem::size_of, slice}; + +#[cfg(feature = "dfa-build")] +use alloc::{ + collections::{BTreeMap, BTreeSet}, + vec, + vec::Vec, +}; + +#[cfg(feature = "dfa-build")] +use crate::{ + dfa::{ + accel::Accel, determinize, minimize::Minimizer, remapper::Remapper, + sparse, + }, + nfa::thompson, + util::{look::LookMatcher, search::MatchKind}, +}; +use crate::{ + dfa::{ + accel::Accels, + automaton::{fmt_state_indicator, Automaton, StartError}, + special::Special, + start::StartKind, + DEAD, + }, + util::{ + alphabet::{self, ByteClasses, ByteSet}, + int::{Pointer, Usize}, + prefilter::Prefilter, + primitives::{PatternID, StateID}, + search::Anchored, + start::{self, Start, StartByteMap}, + wire::{self, DeserializeError, Endian, SerializeError}, + }, +}; + +/// The label that is pre-pended to a serialized DFA. +const LABEL: &str = "rust-regex-automata-dfa-dense"; + +/// The format version of dense regexes. This version gets incremented when a +/// change occurs. A change may not necessarily be a breaking change, but the +/// version does permit good error messages in the case where a breaking change +/// is made. +const VERSION: u32 = 2; + +/// The configuration used for compiling a dense DFA. +/// +/// As a convenience, [`DFA::config`] is an alias for [`Config::new`]. The +/// advantage of the former is that it often lets you avoid importing the +/// `Config` type directly. +/// +/// A dense DFA configuration is a simple data object that is typically used +/// with [`dense::Builder::configure`](self::Builder::configure). +/// +/// The default configuration guarantees that a search will never return +/// a "quit" error, although it is possible for a search to fail if +/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is +/// not by default) and an [`Anchored::Pattern`] mode is requested via +/// [`Input`](crate::Input). +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug, Default)] +pub struct Config { + // As with other configuration types in this crate, we put all our knobs + // in options so that we can distinguish between "default" and "not set." + // This makes it possible to easily combine multiple configurations + // without default values overwriting explicitly specified values. See the + // 'overwrite' method. + // + // For docs on the fields below, see the corresponding method setters. + accelerate: Option, + pre: Option>, + minimize: Option, + match_kind: Option, + start_kind: Option, + starts_for_each_pattern: Option, + byte_classes: Option, + unicode_word_boundary: Option, + quitset: Option, + specialize_start_states: Option, + dfa_size_limit: Option>, + determinize_size_limit: Option>, +} + +#[cfg(feature = "dfa-build")] +impl Config { + /// Return a new default dense DFA compiler configuration. + pub fn new() -> Config { + Config::default() + } + + /// Enable state acceleration. + /// + /// When enabled, DFA construction will analyze each state to determine + /// whether it is eligible for simple acceleration. Acceleration typically + /// occurs when most of a state's transitions loop back to itself, leaving + /// only a select few bytes that will exit the state. When this occurs, + /// other routines like `memchr` can be used to look for those bytes which + /// may be much faster than traversing the DFA. + /// + /// Callers may elect to disable this if consistent performance is more + /// desirable than variable performance. Namely, acceleration can sometimes + /// make searching slower than it otherwise would be if the transitions + /// that leave accelerated states are traversed frequently. + /// + /// See [`Automaton::accelerator`] for an example. + /// + /// This is enabled by default. + pub fn accelerate(mut self, yes: bool) -> Config { + self.accelerate = Some(yes); + self + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// Note that unless [`Config::specialize_start_states`] has been + /// explicitly set, then setting this will also enable (when `pre` is + /// `Some`) or disable (when `pre` is `None`) start state specialization. + /// This occurs because without start state specialization, a prefilter + /// is likely to be less effective. And without a prefilter, start state + /// specialization is usually pointless. + /// + /// **WARNING:** Note that prefilters are not preserved as part of + /// serialization. Serializing a DFA will drop its prefilter. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// Some(HalfMatch::must(0, 11)), + /// re.try_search_fwd(&input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// // No match reported even though there clearly is one! + /// None, + /// re.try_search_fwd(&input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + if self.specialize_start_states.is_none() { + self.specialize_start_states = + Some(self.get_prefilter().is_some()); + } + self + } + + /// Minimize the DFA. + /// + /// When enabled, the DFA built will be minimized such that it is as small + /// as possible. + /// + /// Whether one enables minimization or not depends on the types of costs + /// you're willing to pay and how much you care about its benefits. In + /// particular, minimization has worst case `O(n*k*logn)` time and `O(k*n)` + /// space, where `n` is the number of DFA states and `k` is the alphabet + /// size. In practice, minimization can be quite costly in terms of both + /// space and time, so it should only be done if you're willing to wait + /// longer to produce a DFA. In general, you might want a minimal DFA in + /// the following circumstances: + /// + /// 1. You would like to optimize for the size of the automaton. This can + /// manifest in one of two ways. Firstly, if you're converting the + /// DFA into Rust code (or a table embedded in the code), then a minimal + /// DFA will translate into a corresponding reduction in code size, and + /// thus, also the final compiled binary size. Secondly, if you are + /// building many DFAs and putting them on the heap, you'll be able to + /// fit more if they are smaller. Note though that building a minimal + /// DFA itself requires additional space; you only realize the space + /// savings once the minimal DFA is constructed (at which point, the + /// space used for minimization is freed). + /// 2. You've observed that a smaller DFA results in faster match + /// performance. Naively, this isn't guaranteed since there is no + /// inherent difference between matching with a bigger-than-minimal + /// DFA and a minimal DFA. However, a smaller DFA may make use of your + /// CPU's cache more efficiently. + /// 3. You are trying to establish an equivalence between regular + /// languages. The standard method for this is to build a minimal DFA + /// for each language and then compare them. If the DFAs are equivalent + /// (up to state renaming), then the languages are equivalent. + /// + /// Typically, minimization only makes sense as an offline process. That + /// is, one might minimize a DFA before serializing it to persistent + /// storage. In practical terms, minimization can take around an order of + /// magnitude more time than compiling the initial DFA via determinization. + /// + /// This option is disabled by default. + pub fn minimize(mut self, yes: bool) -> Config { + self.minimize = Some(yes); + self + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to classical DFA construction + /// where all possible matches are added to the DFA. + /// + /// Typically, `All` is used when one wants to execute an overlapping + /// search and `LeftmostFirst` otherwise. In particular, it rarely makes + /// sense to use `All` with the various "leftmost" find routines, since the + /// leftmost routines depend on the `LeftmostFirst` automata construction + /// strategy. Specifically, `LeftmostFirst` adds dead states to the DFA + /// as a way to terminate the search and report a match. `LeftmostFirst` + /// also supports non-greedy matches using this strategy where as `All` + /// does not. + /// + /// # Example: overlapping search + /// + /// This example shows the typical use of `MatchKind::All`, which is to + /// report overlapping matches. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, OverlappingState, dense}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().match_kind(MatchKind::All)) + /// .build_many(&[r"\w+$", r"\S+$"])?; + /// let input = Input::new("@foo"); + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd(&input, &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd(&input, &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: reverse automaton to find start of match + /// + /// Another example for using `MatchKind::All` is for constructing a + /// reverse automaton to find the start of a match. `All` semantics are + /// used for this in order to find the longest possible match, which + /// corresponds to the leftmost starting position. + /// + /// Note that if you need the starting position then + /// [`dfa::regex::Regex`](crate::dfa::regex::Regex) will handle this for + /// you, so it's usually not necessary to do this yourself. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense, Automaton, StartKind}, + /// nfa::thompson::NFA, + /// Anchored, HalfMatch, Input, MatchKind, + /// }; + /// + /// let haystack = "123foobar456".as_bytes(); + /// let pattern = r"[a-z]+r"; + /// + /// let dfa_fwd = dense::DFA::new(pattern)?; + /// let dfa_rev = dense::Builder::new() + /// .thompson(NFA::config().reverse(true)) + /// .configure(dense::Config::new() + /// // This isn't strictly necessary since both anchored and + /// // unanchored searches are supported by default. But since + /// // finding the start-of-match only requires anchored searches, + /// // we can get rid of the unanchored configuration and possibly + /// // slim down our DFA considerably. + /// .start_kind(StartKind::Anchored) + /// .match_kind(MatchKind::All) + /// ) + /// .build(pattern)?; + /// let expected_fwd = HalfMatch::must(0, 9); + /// let expected_rev = HalfMatch::must(0, 3); + /// let got_fwd = dfa_fwd.try_search_fwd(&Input::new(haystack))?.unwrap(); + /// // Here we don't specify the pattern to search for since there's only + /// // one pattern and we're doing a leftmost search. But if this were an + /// // overlapping search, you'd need to specify the pattern that matched + /// // in the forward direction. (Otherwise, you might wind up finding the + /// // starting position of a match of some other pattern.) That in turn + /// // requires building the reverse automaton with starts_for_each_pattern + /// // enabled. Indeed, this is what Regex does internally. + /// let input = Input::new(haystack) + /// .range(..got_fwd.offset()) + /// .anchored(Anchored::Yes); + /// let got_rev = dfa_rev.try_search_rev(&input)?.unwrap(); + /// assert_eq!(expected_fwd, got_fwd); + /// assert_eq!(expected_rev, got_rev); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// The type of starting state configuration to use for a DFA. + /// + /// By default, the starting state configuration is [`StartKind::Both`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton, StartKind}, + /// Anchored, HalfMatch, Input, + /// }; + /// + /// let haystack = "quux foo123"; + /// let expected = HalfMatch::must(0, 11); + /// + /// // By default, DFAs support both anchored and unanchored searches. + /// let dfa = DFA::new(r"[0-9]+")?; + /// let input = Input::new(haystack); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// + /// // But if we only need anchored searches, then we can build a DFA + /// // that only supports anchored searches. This leads to a smaller DFA + /// // (potentially significantly smaller in some cases), but a DFA that + /// // will panic if you try to use it with an unanchored search. + /// let dfa = DFA::builder() + /// .configure(DFA::config().start_kind(StartKind::Anchored)) + /// .build(r"[0-9]+")?; + /// let input = Input::new(haystack) + /// .range(8..) + /// .anchored(Anchored::Yes); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn start_kind(mut self, kind: StartKind) -> Config { + self.start_kind = Some(kind); + self + } + + /// Whether to compile a separate start state for each pattern in the + /// automaton. + /// + /// When enabled, a separate **anchored** start state is added for each + /// pattern in the DFA. When this start state is used, then the DFA will + /// only search for matches for the pattern specified, even if there are + /// other patterns in the DFA. + /// + /// The main downside of this option is that it can potentially increase + /// the size of the DFA and/or increase the time it takes to build the DFA. + /// + /// There are a few reasons one might want to enable this (it's disabled + /// by default): + /// + /// 1. When looking for the start of an overlapping match (using a + /// reverse DFA), doing it correctly requires starting the reverse search + /// using the starting state of the pattern that matched in the forward + /// direction. Indeed, when building a [`Regex`](crate::dfa::regex::Regex), + /// it will automatically enable this option when building the reverse DFA + /// internally. + /// 2. When you want to use a DFA with multiple patterns to both search + /// for matches of any pattern or to search for anchored matches of one + /// particular pattern while using the same DFA. (Otherwise, you would need + /// to compile a new DFA for each pattern.) + /// 3. Since the start states added for each pattern are anchored, if you + /// compile an unanchored DFA with one pattern while also enabling this + /// option, then you can use the same DFA to perform anchored or unanchored + /// searches. The latter you get with the standard search APIs. The former + /// you get from the various `_at` search methods that allow you specify a + /// pattern ID to search for. + /// + /// By default this is disabled. + /// + /// # Example + /// + /// This example shows how to use this option to permit the same DFA to + /// run both anchored and unanchored searches for a single pattern. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense, Automaton}, + /// Anchored, HalfMatch, PatternID, Input, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().starts_for_each_pattern(true)) + /// .build(r"foo[0-9]+")?; + /// let haystack = "quux foo123"; + /// + /// // Here's a normal unanchored search. Notice that we use 'None' for the + /// // pattern ID. Since the DFA was built as an unanchored machine, it + /// // use its default unanchored starting state. + /// let expected = HalfMatch::must(0, 11); + /// let input = Input::new(haystack); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// // But now if we explicitly specify the pattern to search ('0' being + /// // the only pattern in the DFA), then it will use the starting state + /// // for that specific pattern which is always anchored. Since the + /// // pattern doesn't have a match at the beginning of the haystack, we + /// // find nothing. + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(0))); + /// assert_eq!(None, dfa.try_search_fwd(&input)?); + /// // And finally, an anchored search is not the same as putting a '^' at + /// // beginning of the pattern. An anchored search can only match at the + /// // beginning of the *search*, which we can change: + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(0))) + /// .range(5..); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn starts_for_each_pattern(mut self, yes: bool) -> Config { + self.starts_for_each_pattern = Some(yes); + self + } + + /// Whether to attempt to shrink the size of the DFA's alphabet or not. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging a generated DFA. + /// + /// When enabled, the DFA will use a map from all possible bytes to their + /// corresponding equivalence class. Each equivalence class represents a + /// set of bytes that does not discriminate between a match and a non-match + /// in the DFA. For example, the pattern `[ab]+` has at least two + /// equivalence classes: a set containing `a` and `b` and a set containing + /// every byte except for `a` and `b`. `a` and `b` are in the same + /// equivalence class because they never discriminate between a match and a + /// non-match. + /// + /// The advantage of this map is that the size of the transition table + /// can be reduced drastically from `#states * 256 * sizeof(StateID)` to + /// `#states * k * sizeof(StateID)` where `k` is the number of equivalence + /// classes (rounded up to the nearest power of 2). As a result, total + /// space usage can decrease substantially. Moreover, since a smaller + /// alphabet is used, DFA compilation becomes faster as well. + /// + /// **WARNING:** This is only useful for debugging DFAs. Disabling this + /// does not yield any speed advantages. Namely, even when this is + /// disabled, a byte class map is still used while searching. The only + /// difference is that every byte will be forced into its own distinct + /// equivalence class. This is useful for debugging the actual generated + /// transitions because it lets one see the transitions defined on actual + /// bytes instead of the equivalence classes. + pub fn byte_classes(mut self, yes: bool) -> Config { + self.byte_classes = Some(yes); + self + } + + /// Heuristically enable Unicode word boundaries. + /// + /// When set, this will attempt to implement Unicode word boundaries as if + /// they were ASCII word boundaries. This only works when the search input + /// is ASCII only. If a non-ASCII byte is observed while searching, then a + /// [`MatchError::quit`](crate::MatchError::quit) error is returned. + /// + /// A possible alternative to enabling this option is to simply use an + /// ASCII word boundary, e.g., via `(?-u:\b)`. The main reason to use this + /// option is if you absolutely need Unicode support. This option lets one + /// use a fast search implementation (a DFA) for some potentially very + /// common cases, while providing the option to fall back to some other + /// regex engine to handle the general case when an error is returned. + /// + /// If the pattern provided has no Unicode word boundary in it, then this + /// option has no effect. (That is, quitting on a non-ASCII byte only + /// occurs when this option is enabled _and_ a Unicode word boundary is + /// present in the pattern.) + /// + /// This is almost equivalent to setting all non-ASCII bytes to be quit + /// bytes. The only difference is that this will cause non-ASCII bytes to + /// be quit bytes _only_ when a Unicode word boundary is present in the + /// pattern. + /// + /// When enabling this option, callers _must_ be prepared to handle + /// a [`MatchError`](crate::MatchError) error during search. + /// When using a [`Regex`](crate::dfa::regex::Regex), this corresponds + /// to using the `try_` suite of methods. Alternatively, if + /// callers can guarantee that their input is ASCII only, then a + /// [`MatchError::quit`](crate::MatchError::quit) error will never be + /// returned while searching. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows how to heuristically enable Unicode word boundaries + /// in a pattern. It also shows what happens when a search comes across a + /// non-ASCII byte. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// HalfMatch, Input, MatchError, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().unicode_word_boundary(true)) + /// .build(r"\b[0-9]+\b")?; + /// + /// // The match occurs before the search ever observes the snowman + /// // character, so no error occurs. + /// let haystack = "foo 123 ☃".as_bytes(); + /// let expected = Some(HalfMatch::must(0, 7)); + /// let got = dfa.try_search_fwd(&Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // Notice that this search fails, even though the snowman character + /// // occurs after the ending match offset. This is because search + /// // routines read one byte past the end of the search to account for + /// // look-around, and indeed, this is required here to determine whether + /// // the trailing \b matches. + /// let haystack = "foo 123 ☃".as_bytes(); + /// let expected = MatchError::quit(0xE2, 8); + /// let got = dfa.try_search_fwd(&Input::new(haystack)); + /// assert_eq!(Err(expected), got); + /// + /// // Another example is executing a search where the span of the haystack + /// // we specify is all ASCII, but there is non-ASCII just before it. This + /// // correctly also reports an error. + /// let input = Input::new("β123").range(2..); + /// let expected = MatchError::quit(0xB2, 1); + /// let got = dfa.try_search_fwd(&input); + /// assert_eq!(Err(expected), got); + /// + /// // And similarly for the trailing word boundary. + /// let input = Input::new("123β").range(..3); + /// let expected = MatchError::quit(0xCE, 3); + /// let got = dfa.try_search_fwd(&input); + /// assert_eq!(Err(expected), got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn unicode_word_boundary(mut self, yes: bool) -> Config { + // We have a separate option for this instead of just setting the + // appropriate quit bytes here because we don't want to set quit bytes + // for every regex. We only want to set them when the regex contains a + // Unicode word boundary. + self.unicode_word_boundary = Some(yes); + self + } + + /// Add a "quit" byte to the DFA. + /// + /// When a quit byte is seen during search time, then search will return + /// a [`MatchError::quit`](crate::MatchError::quit) error indicating the + /// offset at which the search stopped. + /// + /// A quit byte will always overrule any other aspects of a regex. For + /// example, if the `x` byte is added as a quit byte and the regex `\w` is + /// used, then observing `x` will cause the search to quit immediately + /// despite the fact that `x` is in the `\w` class. + /// + /// This mechanism is primarily useful for heuristically enabling certain + /// features like Unicode word boundaries in a DFA. Namely, if the input + /// to search is ASCII, then a Unicode word boundary can be implemented + /// via an ASCII word boundary with no change in semantics. Thus, a DFA + /// can attempt to match a Unicode word boundary but give up as soon as it + /// observes a non-ASCII byte. Indeed, if callers set all non-ASCII bytes + /// to be quit bytes, then Unicode word boundaries will be permitted when + /// building DFAs. Of course, callers should enable + /// [`Config::unicode_word_boundary`] if they want this behavior instead. + /// (The advantage being that non-ASCII quit bytes will only be added if a + /// Unicode word boundary is in the pattern.) + /// + /// When enabling this option, callers _must_ be prepared to handle a + /// [`MatchError`](crate::MatchError) error during search. When using a + /// [`Regex`](crate::dfa::regex::Regex), this corresponds to using the + /// `try_` suite of methods. + /// + /// By default, there are no quit bytes set. + /// + /// # Panics + /// + /// This panics if heuristic Unicode word boundaries are enabled and any + /// non-ASCII byte is removed from the set of quit bytes. Namely, enabling + /// Unicode word boundaries requires setting every non-ASCII byte to a quit + /// byte. So if the caller attempts to undo any of that, then this will + /// panic. + /// + /// # Example + /// + /// This example shows how to cause a search to terminate if it sees a + /// `\n` byte. This could be useful if, for example, you wanted to prevent + /// a user supplied pattern from matching across a line boundary. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{Automaton, dense}, Input, MatchError}; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().quit(b'\n', true)) + /// .build(r"foo\p{any}+bar")?; + /// + /// let haystack = "foo\nbar".as_bytes(); + /// // Normally this would produce a match, since \p{any} contains '\n'. + /// // But since we instructed the automaton to enter a quit state if a + /// // '\n' is observed, this produces a match error instead. + /// let expected = MatchError::quit(b'\n', 3); + /// let got = dfa.try_search_fwd(&Input::new(haystack)).unwrap_err(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn quit(mut self, byte: u8, yes: bool) -> Config { + if self.get_unicode_word_boundary() && !byte.is_ascii() && !yes { + panic!( + "cannot set non-ASCII byte to be non-quit when \ + Unicode word boundaries are enabled" + ); + } + if self.quitset.is_none() { + self.quitset = Some(ByteSet::empty()); + } + if yes { + self.quitset.as_mut().unwrap().add(byte); + } else { + self.quitset.as_mut().unwrap().remove(byte); + } + self + } + + /// Enable specializing start states in the DFA. + /// + /// When start states are specialized, an implementor of a search routine + /// using a lazy DFA can tell when the search has entered a starting state. + /// When start states aren't specialized, then it is impossible to know + /// whether the search has entered a start state. + /// + /// Ideally, this option wouldn't need to exist and we could always + /// specialize start states. The problem is that start states can be quite + /// active. This in turn means that an efficient search routine is likely + /// to ping-pong between a heavily optimized hot loop that handles most + /// states and to a less optimized specialized handling of start states. + /// This causes branches to get heavily mispredicted and overall can + /// materially decrease throughput. Therefore, specializing start states + /// should only be enabled when it is needed. + /// + /// Knowing whether a search is in a start state is typically useful when a + /// prefilter is active for the search. A prefilter is typically only run + /// when in a start state and a prefilter can greatly accelerate a search. + /// Therefore, the possible cost of specializing start states is worth it + /// in this case. Otherwise, if you have no prefilter, there is likely no + /// reason to specialize start states. + /// + /// This is disabled by default, but note that it is automatically + /// enabled (or disabled) if [`Config::prefilter`] is set. Namely, unless + /// `specialize_start_states` has already been set, [`Config::prefilter`] + /// will automatically enable or disable it based on whether a prefilter + /// is present or not, respectively. This is done because a prefilter's + /// effectiveness is rooted in being executed whenever the DFA is in a + /// start state, and that's only possible to do when they are specialized. + /// + /// Note that it is plausibly reasonable to _disable_ this option + /// explicitly while _enabling_ a prefilter. In that case, a prefilter + /// will still be run at the beginning of a search, but never again. This + /// in theory could strike a good balance if you're in a situation where a + /// prefilter is likely to produce many false positive candidates. + /// + /// # Example + /// + /// This example shows how to enable start state specialization and then + /// shows how to check whether a state is a start state or not. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, Input}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().specialize_start_states(true)) + /// .build(r"[a-z]+")?; + /// + /// let haystack = "123 foobar 4567".as_bytes(); + /// let sid = dfa.start_state_forward(&Input::new(haystack))?; + /// // The ID returned by 'start_state_forward' will always be tagged as + /// // a start state when start state specialization is enabled. + /// assert!(dfa.is_special_state(sid)); + /// assert!(dfa.is_start_state(sid)); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Compare the above with the default DFA configuration where start states + /// are _not_ specialized. In this case, the start state is not tagged at + /// all: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+")?; + /// + /// let haystack = "123 foobar 4567"; + /// let sid = dfa.start_state_forward(&Input::new(haystack))?; + /// // Start states are not special in the default configuration! + /// assert!(!dfa.is_special_state(sid)); + /// assert!(!dfa.is_start_state(sid)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn specialize_start_states(mut self, yes: bool) -> Config { + self.specialize_start_states = Some(yes); + self + } + + /// Set a size limit on the total heap used by a DFA. + /// + /// This size limit is expressed in bytes and is applied during + /// determinization of an NFA into a DFA. If the DFA's heap usage, and only + /// the DFA, exceeds this configured limit, then determinization is stopped + /// and an error is returned. + /// + /// This limit does not apply to auxiliary storage used during + /// determinization that isn't part of the generated DFA. + /// + /// This limit is only applied during determinization. Currently, there is + /// no way to post-pone this check to after minimization if minimization + /// was enabled. + /// + /// The total limit on heap used during determinization is the sum of the + /// DFA and determinization size limits. + /// + /// The default is no limit. + /// + /// # Example + /// + /// This example shows a DFA that fails to build because of a configured + /// size limit. This particular example also serves as a cautionary tale + /// demonstrating just how big DFAs with large Unicode character classes + /// can get. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{dense, Automaton}, Input}; + /// + /// // 6MB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new().dfa_size_limit(Some(6_000_000))) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 7MB probably is! + /// // (Note that DFA sizes aren't necessarily stable between releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().dfa_size_limit(Some(7_000_000))) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// assert!(dfa.try_search_fwd(&Input::new(&haystack))?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// While one needs a little more than 6MB to represent `\w{20}`, it + /// turns out that you only need a little more than 6KB to represent + /// `(?-u:\w{20})`. So only use Unicode if you need it! + /// + /// As with [`Config::determinize_size_limit`], the size of a DFA is + /// influenced by other factors, such as what start state configurations + /// to support. For example, if you only need unanchored searches and not + /// anchored searches, then configuring the DFA to only support unanchored + /// searches can reduce its size. By default, DFAs support both unanchored + /// and anchored searches. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{dense, Automaton, StartKind}, Input}; + /// + /// // 3MB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new() + /// .dfa_size_limit(Some(3_000_000)) + /// .start_kind(StartKind::Unanchored) + /// ) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 4MB probably is! + /// // (Note that DFA sizes aren't necessarily stable between releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new() + /// .dfa_size_limit(Some(4_000_000)) + /// .start_kind(StartKind::Unanchored) + /// ) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// assert!(dfa.try_search_fwd(&Input::new(&haystack))?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn dfa_size_limit(mut self, bytes: Option) -> Config { + self.dfa_size_limit = Some(bytes); + self + } + + /// Set a size limit on the total heap used by determinization. + /// + /// This size limit is expressed in bytes and is applied during + /// determinization of an NFA into a DFA. If the heap used for auxiliary + /// storage during determinization (memory that is not in the DFA but + /// necessary for building the DFA) exceeds this configured limit, then + /// determinization is stopped and an error is returned. + /// + /// This limit does not apply to heap used by the DFA itself. + /// + /// The total limit on heap used during determinization is the sum of the + /// DFA and determinization size limits. + /// + /// The default is no limit. + /// + /// # Example + /// + /// This example shows a DFA that fails to build because of a + /// configured size limit on the amount of heap space used by + /// determinization. This particular example complements the example for + /// [`Config::dfa_size_limit`] by demonstrating that not only does Unicode + /// potentially make DFAs themselves big, but it also results in more + /// auxiliary storage during determinization. (Although, auxiliary storage + /// is still not as much as the DFA itself.) + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039 + /// use regex_automata::{dfa::{dense, Automaton}, Input}; + /// + /// // 700KB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(700_000)) + /// ) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 800KB probably is! + /// // (Note that auxiliary storage sizes aren't necessarily stable between + /// // releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(800_000)) + /// ) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// assert!(dfa.try_search_fwd(&Input::new(&haystack))?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Note that some parts of the configuration on a DFA can have a + /// big impact on how big the DFA is, and thus, how much memory is + /// used. For example, the default setting for [`Config::start_kind`] is + /// [`StartKind::Both`]. But if you only need an anchored search, for + /// example, then it can be much cheaper to build a DFA that only supports + /// anchored searches. (Running an unanchored search with it would panic.) + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039 + /// use regex_automata::{ + /// dfa::{dense, Automaton, StartKind}, + /// Anchored, Input, + /// }; + /// + /// // 200KB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(200_000)) + /// .start_kind(StartKind::Anchored) + /// ) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 300KB probably is! + /// // (Note that auxiliary storage sizes aren't necessarily stable between + /// // releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(300_000)) + /// .start_kind(StartKind::Anchored) + /// ) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// let input = Input::new(&haystack).anchored(Anchored::Yes); + /// assert!(dfa.try_search_fwd(&input)?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn determinize_size_limit(mut self, bytes: Option) -> Config { + self.determinize_size_limit = Some(bytes); + self + } + + /// Returns whether this configuration has enabled simple state + /// acceleration. + pub fn get_accelerate(&self) -> bool { + self.accelerate.unwrap_or(true) + } + + /// Returns the prefilter attached to this configuration, if any. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns whether this configuration has enabled the expensive process + /// of minimizing a DFA. + pub fn get_minimize(&self) -> bool { + self.minimize.unwrap_or(false) + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns the starting state configuration for a DFA. + pub fn get_starts(&self) -> StartKind { + self.start_kind.unwrap_or(StartKind::Both) + } + + /// Returns whether this configuration has enabled anchored starting states + /// for every pattern in the DFA. + pub fn get_starts_for_each_pattern(&self) -> bool { + self.starts_for_each_pattern.unwrap_or(false) + } + + /// Returns whether this configuration has enabled byte classes or not. + /// This is typically a debugging oriented option, as disabling it confers + /// no speed benefit. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns whether this configuration has enabled heuristic Unicode word + /// boundary support. When enabled, it is possible for a search to return + /// an error. + pub fn get_unicode_word_boundary(&self) -> bool { + self.unicode_word_boundary.unwrap_or(false) + } + + /// Returns whether this configuration will instruct the DFA to enter a + /// quit state whenever the given byte is seen during a search. When at + /// least one byte has this enabled, it is possible for a search to return + /// an error. + pub fn get_quit(&self, byte: u8) -> bool { + self.quitset.map_or(false, |q| q.contains(byte)) + } + + /// Returns whether this configuration will instruct the DFA to + /// "specialize" start states. When enabled, the DFA will mark start states + /// as "special" so that search routines using the DFA can detect when + /// it's in a start state and do some kind of optimization (like run a + /// prefilter). + pub fn get_specialize_start_states(&self) -> bool { + self.specialize_start_states.unwrap_or(false) + } + + /// Returns the DFA size limit of this configuration if one was set. + /// The size limit is total number of bytes on the heap that a DFA is + /// permitted to use. If the DFA exceeds this limit during construction, + /// then construction is stopped and an error is returned. + pub fn get_dfa_size_limit(&self) -> Option { + self.dfa_size_limit.unwrap_or(None) + } + + /// Returns the determinization size limit of this configuration if one + /// was set. The size limit is total number of bytes on the heap that + /// determinization is permitted to use. If determinization exceeds this + /// limit during construction, then construction is stopped and an error is + /// returned. + /// + /// This is different from the DFA size limit in that this only applies to + /// the auxiliary storage used during determinization. Once determinization + /// is complete, this memory is freed. + /// + /// The limit on the total heap memory used is the sum of the DFA and + /// determinization size limits. + pub fn get_determinize_size_limit(&self) -> Option { + self.determinize_size_limit.unwrap_or(None) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + accelerate: o.accelerate.or(self.accelerate), + pre: o.pre.or_else(|| self.pre.clone()), + minimize: o.minimize.or(self.minimize), + match_kind: o.match_kind.or(self.match_kind), + start_kind: o.start_kind.or(self.start_kind), + starts_for_each_pattern: o + .starts_for_each_pattern + .or(self.starts_for_each_pattern), + byte_classes: o.byte_classes.or(self.byte_classes), + unicode_word_boundary: o + .unicode_word_boundary + .or(self.unicode_word_boundary), + quitset: o.quitset.or(self.quitset), + specialize_start_states: o + .specialize_start_states + .or(self.specialize_start_states), + dfa_size_limit: o.dfa_size_limit.or(self.dfa_size_limit), + determinize_size_limit: o + .determinize_size_limit + .or(self.determinize_size_limit), + } + } +} + +/// A builder for constructing a deterministic finite automaton from regular +/// expressions. +/// +/// This builder provides two main things: +/// +/// 1. It provides a few different `build` routines for actually constructing +/// a DFA from different kinds of inputs. The most convenient is +/// [`Builder::build`], which builds a DFA directly from a pattern string. The +/// most flexible is [`Builder::build_from_nfa`], which builds a DFA straight +/// from an NFA. +/// 2. The builder permits configuring a number of things. +/// [`Builder::configure`] is used with [`Config`] to configure aspects of +/// the DFA and the construction process itself. [`Builder::syntax`] and +/// [`Builder::thompson`] permit configuring the regex parser and Thompson NFA +/// construction, respectively. The syntax and thompson configurations only +/// apply when building from a pattern string. +/// +/// This builder always constructs a *single* DFA. As such, this builder +/// can only be used to construct regexes that either detect the presence +/// of a match or find the end location of a match. A single DFA cannot +/// produce both the start and end of a match. For that information, use a +/// [`Regex`](crate::dfa::regex::Regex), which can be similarly configured +/// using [`regex::Builder`](crate::dfa::regex::Builder). The main reason to +/// use a DFA directly is if the end location of a match is enough for your use +/// case. Namely, a `Regex` will construct two DFAs instead of one, since a +/// second reverse DFA is needed to find the start of a match. +/// +/// Note that if one wants to build a sparse DFA, you must first build a dense +/// DFA and convert that to a sparse DFA. There is no way to build a sparse +/// DFA without first building a dense DFA. +/// +/// # Example +/// +/// This example shows how to build a minimized DFA that completely disables +/// Unicode. That is: +/// +/// * Things such as `\w`, `.` and `\b` are no longer Unicode-aware. `\w` +/// and `\b` are ASCII-only while `.` matches any byte except for `\n` +/// (instead of any UTF-8 encoding of a Unicode scalar value except for +/// `\n`). Things that are Unicode only, such as `\pL`, are not allowed. +/// * The pattern itself is permitted to match invalid UTF-8. For example, +/// things like `[^a]` that match any byte except for `a` are permitted. +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::syntax, +/// HalfMatch, Input, +/// }; +/// +/// let dfa = dense::Builder::new() +/// .configure(dense::Config::new().minimize(false)) +/// .syntax(syntax::Config::new().unicode(false).utf8(false)) +/// .build(r"foo[^b]ar.*")?; +/// +/// let haystack = b"\xFEfoo\xFFar\xE2\x98\xFF\n"; +/// let expected = Some(HalfMatch::must(0, 10)); +/// let got = dfa.try_search_fwd(&Input::new(haystack))?; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +#[cfg(feature = "dfa-build")] +impl Builder { + /// Create a new dense DFA builder with the default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a DFA from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a DFA from the given patterns. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self + .thompson + .clone() + // We can always forcefully disable captures because DFAs do not + // support them. + .configure( + thompson::Config::new() + .which_captures(thompson::WhichCaptures::None), + ) + .build_many(patterns) + .map_err(BuildError::nfa)?; + self.build_from_nfa(&nfa) + } + + /// Build a DFA from the given NFA. + /// + /// # Example + /// + /// This example shows how to build a DFA if you already have an NFA in + /// hand. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// nfa::thompson::NFA, + /// HalfMatch, Input, + /// }; + /// + /// let haystack = "foo123bar".as_bytes(); + /// + /// // This shows how to set non-default options for building an NFA. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().shrink(true)) + /// .build(r"[0-9]+")?; + /// let dfa = dense::Builder::new().build_from_nfa(&nfa)?; + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_nfa( + &self, + nfa: &thompson::NFA, + ) -> Result { + let mut quitset = self.config.quitset.unwrap_or(ByteSet::empty()); + if self.config.get_unicode_word_boundary() + && nfa.look_set_any().contains_word_unicode() + { + for b in 0x80..=0xFF { + quitset.add(b); + } + } + let classes = if !self.config.get_byte_classes() { + // DFAs will always use the equivalence class map, but enabling + // this option is useful for debugging. Namely, this will cause all + // transitions to be defined over their actual bytes instead of an + // opaque equivalence class identifier. The former is much easier + // to grok as a human. + ByteClasses::singletons() + } else { + let mut set = nfa.byte_class_set().clone(); + // It is important to distinguish any "quit" bytes from all other + // bytes. Otherwise, a non-quit byte may end up in the same + // class as a quit byte, and thus cause the DFA to stop when it + // shouldn't. + // + // Test case: + // + // regex-cli find match dense --unicode-word-boundary \ + // -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log + if !quitset.is_empty() { + set.add_set(&quitset); + } + set.byte_classes() + }; + + let mut dfa = DFA::initial( + classes, + nfa.pattern_len(), + self.config.get_starts(), + nfa.look_matcher(), + self.config.get_starts_for_each_pattern(), + self.config.get_prefilter().map(|p| p.clone()), + quitset, + Flags::from_nfa(&nfa), + )?; + determinize::Config::new() + .match_kind(self.config.get_match_kind()) + .quit(quitset) + .dfa_size_limit(self.config.get_dfa_size_limit()) + .determinize_size_limit(self.config.get_determinize_size_limit()) + .run(nfa, &mut dfa)?; + if self.config.get_minimize() { + dfa.minimize(); + } + if self.config.get_accelerate() { + dfa.accelerate(); + } + // The state shuffling done before this point always assumes that start + // states should be marked as "special," even though it isn't the + // default configuration. State shuffling is complex enough as it is, + // so it's simpler to just "fix" our special state ID ranges to not + // include starting states after-the-fact. + if !self.config.get_specialize_start_states() { + dfa.special.set_no_special_start_states(); + } + // Look for and set the universal starting states. + dfa.set_universal_starts(); + Ok(dfa) + } + + /// Apply the given dense DFA configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether the DFA should match the regex + /// in reverse or if additional time should be spent shrinking the size of + /// the NFA. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +#[cfg(feature = "dfa-build")] +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} + +/// A convenience alias for an owned DFA. We use this particular instantiation +/// a lot in this crate, so it's worth giving it a name. This instantiation +/// is commonly used for mutable APIs on the DFA while building it. The main +/// reason for making DFAs generic is no_std support, and more generally, +/// making it possible to load a DFA from an arbitrary slice of bytes. +#[cfg(feature = "alloc")] +pub(crate) type OwnedDFA = DFA>; + +/// A dense table-based deterministic finite automaton (DFA). +/// +/// All dense DFAs have one or more start states, zero or more match states +/// and a transition table that maps the current state and the current byte +/// of input to the next state. A DFA can use this information to implement +/// fast searching. In particular, the use of a dense DFA generally makes the +/// trade off that match speed is the most valuable characteristic, even if +/// building the DFA may take significant time *and* space. (More concretely, +/// building a DFA takes time and space that is exponential in the size of the +/// pattern in the worst case.) As such, the processing of every byte of input +/// is done with a small constant number of operations that does not vary with +/// the pattern, its size or the size of the alphabet. If your needs don't line +/// up with this trade off, then a dense DFA may not be an adequate solution to +/// your problem. +/// +/// In contrast, a [`sparse::DFA`] makes the opposite +/// trade off: it uses less space but will execute a variable number of +/// instructions per byte at match time, which makes it slower for matching. +/// (Note that space usage is still exponential in the size of the pattern in +/// the worst case.) +/// +/// A DFA can be built using the default configuration via the +/// [`DFA::new`] constructor. Otherwise, one can +/// configure various aspects via [`dense::Builder`](Builder). +/// +/// A single DFA fundamentally supports the following operations: +/// +/// 1. Detection of a match. +/// 2. Location of the end of a match. +/// 3. In the case of a DFA with multiple patterns, which pattern matched is +/// reported as well. +/// +/// A notable absence from the above list of capabilities is the location of +/// the *start* of a match. In order to provide both the start and end of +/// a match, *two* DFAs are required. This functionality is provided by a +/// [`Regex`](crate::dfa::regex::Regex). +/// +/// # Type parameters +/// +/// A `DFA` has one type parameter, `T`, which is used to represent state IDs, +/// pattern IDs and accelerators. `T` is typically a `Vec` or a `&[u32]`. +/// +/// # The `Automaton` trait +/// +/// This type implements the [`Automaton`] trait, which means it can be used +/// for searching. For example: +/// +/// ``` +/// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; +/// +/// let dfa = DFA::new("foo[0-9]+")?; +/// let expected = HalfMatch::must(0, 8); +/// assert_eq!(Some(expected), dfa.try_search_fwd(&Input::new("foo12345"))?); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct DFA { + /// The transition table for this DFA. This includes the transitions + /// themselves, along with the stride, number of states and the equivalence + /// class mapping. + tt: TransitionTable, + /// The set of starting state identifiers for this DFA. The starting state + /// IDs act as pointers into the transition table. The specific starting + /// state chosen for each search is dependent on the context at which the + /// search begins. + st: StartTable, + /// The set of match states and the patterns that match for each + /// corresponding match state. + /// + /// This structure is technically only needed because of support for + /// multi-regexes. Namely, multi-regexes require answering not just whether + /// a match exists, but _which_ patterns match. So we need to store the + /// matching pattern IDs for each match state. We do this even when there + /// is only one pattern for the sake of simplicity. In practice, this uses + /// up very little space for the case of one pattern. + ms: MatchStates, + /// Information about which states are "special." Special states are states + /// that are dead, quit, matching, starting or accelerated. For more info, + /// see the docs for `Special`. + special: Special, + /// The accelerators for this DFA. + /// + /// If a state is accelerated, then there exist only a small number of + /// bytes that can cause the DFA to leave the state. This permits searching + /// to use optimized routines to find those specific bytes instead of using + /// the transition table. + /// + /// All accelerated states exist in a contiguous range in the DFA's + /// transition table. See dfa/special.rs for more details on how states are + /// arranged. + accels: Accels, + /// Any prefilter attached to this DFA. + /// + /// Note that currently prefilters are not serialized. When deserializing + /// a DFA from bytes, this is always set to `None`. + pre: Option, + /// The set of "quit" bytes for this DFA. + /// + /// This is only used when computing the start state for a particular + /// position in a haystack. Namely, in the case where there is a quit + /// byte immediately before the start of the search, this set needs to be + /// explicitly consulted. In all other cases, quit bytes are detected by + /// the DFA itself, by transitioning all quit bytes to a special "quit + /// state." + quitset: ByteSet, + /// Various flags describing the behavior of this DFA. + flags: Flags, +} + +#[cfg(feature = "dfa-build")] +impl OwnedDFA { + /// Parse the given regular expression using a default configuration and + /// return the corresponding DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`](Builder) to set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::new("foo[0-9]+bar")?; + /// let expected = Some(HalfMatch::must(0, 11)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + Builder::new().build(pattern) + } + + /// Parse the given regular expressions using a default configuration and + /// return the corresponding multi-DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`](Builder) to set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::new_many(&["[0-9]+", "[a-z]+"])?; + /// let expected = Some(HalfMatch::must(1, 3)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + Builder::new().build_many(patterns) + } +} + +#[cfg(feature = "dfa-build")] +impl OwnedDFA { + /// Create a new DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::always_match()?; + /// + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + Builder::new().build_from_nfa(&nfa) + } + + /// Create a new DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, Input}; + /// + /// let dfa = dense::DFA::never_match()?; + /// assert_eq!(None, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(None, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + Builder::new().build_from_nfa(&nfa) + } + + /// Create an initial DFA with the given equivalence classes, pattern + /// length and whether anchored starting states are enabled for each + /// pattern. An initial DFA can be further mutated via determinization. + fn initial( + classes: ByteClasses, + pattern_len: usize, + starts: StartKind, + lookm: &LookMatcher, + starts_for_each_pattern: bool, + pre: Option, + quitset: ByteSet, + flags: Flags, + ) -> Result { + let start_pattern_len = + if starts_for_each_pattern { Some(pattern_len) } else { None }; + Ok(DFA { + tt: TransitionTable::minimal(classes), + st: StartTable::dead(starts, lookm, start_pattern_len)?, + ms: MatchStates::empty(pattern_len), + special: Special::new(), + accels: Accels::empty(), + pre, + quitset, + flags, + }) + } +} + +#[cfg(feature = "dfa-build")] +impl DFA<&[u32]> { + /// Return a new default dense DFA compiler configuration. + /// + /// This is a convenience routine to avoid needing to import the [`Config`] + /// type when customizing the construction of a dense DFA. + pub fn config() -> Config { + Config::new() + } + + /// Create a new dense DFA builder with the default configuration. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + pub fn builder() -> Builder { + Builder::new() + } +} + +impl> DFA { + /// Cheaply return a borrowed version of this dense DFA. Specifically, + /// the DFA returned always uses `&[u32]` for its transition table. + pub fn as_ref(&self) -> DFA<&'_ [u32]> { + DFA { + tt: self.tt.as_ref(), + st: self.st.as_ref(), + ms: self.ms.as_ref(), + special: self.special, + accels: self.accels(), + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Return an owned version of this sparse DFA. Specifically, the DFA + /// returned always uses `Vec` for its transition table. + /// + /// Effectively, this returns a dense DFA whose transition table lives on + /// the heap. + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> OwnedDFA { + DFA { + tt: self.tt.to_owned(), + st: self.st.to_owned(), + ms: self.ms.to_owned(), + special: self.special, + accels: self.accels().to_owned(), + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Returns the starting state configuration for this DFA. + /// + /// The default is [`StartKind::Both`], which means the DFA supports both + /// unanchored and anchored searches. However, this can generally lead to + /// bigger DFAs. Therefore, a DFA might be compiled with support for just + /// unanchored or anchored searches. In that case, running a search with + /// an unsupported configuration will panic. + pub fn start_kind(&self) -> StartKind { + self.st.kind + } + + /// Returns the start byte map used for computing the `Start` configuration + /// at the beginning of a search. + pub(crate) fn start_map(&self) -> &StartByteMap { + &self.st.start_map + } + + /// Returns true only if this DFA has starting states for each pattern. + /// + /// When a DFA has starting states for each pattern, then a search with the + /// DFA can be configured to only look for anchored matches of a specific + /// pattern. Specifically, APIs like [`Automaton::try_search_fwd`] can + /// accept a non-None `pattern_id` if and only if this method returns true. + /// Otherwise, calling `try_search_fwd` will panic. + /// + /// Note that if the DFA has no patterns, this always returns false. + pub fn starts_for_each_pattern(&self) -> bool { + self.st.pattern_len.is_some() + } + + /// Returns the equivalence classes that make up the alphabet for this DFA. + /// + /// Unless [`Config::byte_classes`] was disabled, it is possible that + /// multiple distinct bytes are grouped into the same equivalence class + /// if it is impossible for them to discriminate between a match and a + /// non-match. This has the effect of reducing the overall alphabet size + /// and in turn potentially substantially reducing the size of the DFA's + /// transition table. + /// + /// The downside of using equivalence classes like this is that every state + /// transition will automatically use this map to convert an arbitrary + /// byte to its corresponding equivalence class. In practice this has a + /// negligible impact on performance. + pub fn byte_classes(&self) -> &ByteClasses { + &self.tt.classes + } + + /// Returns the total number of elements in the alphabet for this DFA. + /// + /// That is, this returns the total number of transitions that each state + /// in this DFA must have. Typically, a normal byte oriented DFA would + /// always have an alphabet size of 256, corresponding to the number of + /// unique values in a single byte. However, this implementation has two + /// peculiarities that impact the alphabet length: + /// + /// * Every state has a special "EOI" transition that is only followed + /// after the end of some haystack is reached. This EOI transition is + /// necessary to account for one byte of look-ahead when implementing + /// things like `\b` and `$`. + /// * Bytes are grouped into equivalence classes such that no two bytes in + /// the same class can distinguish a match from a non-match. For example, + /// in the regex `^[a-z]+$`, the ASCII bytes `a-z` could all be in the + /// same equivalence class. This leads to a massive space savings. + /// + /// Note though that the alphabet length does _not_ necessarily equal the + /// total stride space taken up by a single DFA state in the transition + /// table. Namely, for performance reasons, the stride is always the + /// smallest power of two that is greater than or equal to the alphabet + /// length. For this reason, [`DFA::stride`] or [`DFA::stride2`] are + /// often more useful. The alphabet length is typically useful only for + /// informational purposes. + pub fn alphabet_len(&self) -> usize { + self.tt.alphabet_len() + } + + /// Returns the total stride for every state in this DFA, expressed as the + /// exponent of a power of 2. The stride is the amount of space each state + /// takes up in the transition table, expressed as a number of transitions. + /// (Unused transitions map to dead states.) + /// + /// The stride of a DFA is always equivalent to the smallest power of 2 + /// that is greater than or equal to the DFA's alphabet length. This + /// definition uses extra space, but permits faster translation between + /// premultiplied state identifiers and contiguous indices (by using shifts + /// instead of relying on integer division). + /// + /// For example, if the DFA's stride is 16 transitions, then its `stride2` + /// is `4` since `2^4 = 16`. + /// + /// The minimum `stride2` value is `1` (corresponding to a stride of `2`) + /// while the maximum `stride2` value is `9` (corresponding to a stride of + /// `512`). The maximum is not `8` since the maximum alphabet size is `257` + /// when accounting for the special EOI transition. However, an alphabet + /// length of that size is exceptionally rare since the alphabet is shrunk + /// into equivalence classes. + pub fn stride2(&self) -> usize { + self.tt.stride2 + } + + /// Returns the total stride for every state in this DFA. This corresponds + /// to the total number of transitions used by each state in this DFA's + /// transition table. + /// + /// Please see [`DFA::stride2`] for more information. In particular, this + /// returns the stride as the number of transitions, where as `stride2` + /// returns it as the exponent of a power of 2. + pub fn stride(&self) -> usize { + self.tt.stride() + } + + /// Returns the memory usage, in bytes, of this DFA. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent this DFA. + /// + /// This does **not** include the stack size used up by this DFA. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.tt.memory_usage() + + self.st.memory_usage() + + self.ms.memory_usage() + + self.accels.memory_usage() + } +} + +/// Routines for converting a dense DFA to other representations, such as +/// sparse DFAs or raw bytes suitable for persistent storage. +impl> DFA { + /// Convert this dense DFA to a sparse DFA. + /// + /// If a `StateID` is too small to represent all states in the sparse + /// DFA, then this returns an error. In most cases, if a dense DFA is + /// constructable with `StateID` then a sparse DFA will be as well. + /// However, it is not guaranteed. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dense = dense::DFA::new("foo[0-9]+")?; + /// let sparse = dense.to_sparse()?; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, sparse.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_sparse(&self) -> Result>, BuildError> { + sparse::DFA::from_dense(self) + } + + /// Serialize this DFA as raw bytes to a `Vec` in little endian + /// format. Upon success, the `Vec` and the initial padding length are + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// The padding returned is non-zero if the returned `Vec` starts at + /// an address that does not have the same alignment as `u32`. The padding + /// corresponds to the number of leading bytes written to the returned + /// `Vec`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_little_endian would work on a little endian target. + /// let (buf, _) = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_little_endian(&self) -> (Vec, usize) { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in big endian + /// format. Upon success, the `Vec` and the initial padding length are + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// The padding returned is non-zero if the returned `Vec` starts at + /// an address that does not have the same alignment as `u32`. The padding + /// corresponds to the number of leading bytes written to the returned + /// `Vec`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_big_endian would work on a big endian target. + /// let (buf, _) = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_big_endian(&self) -> (Vec, usize) { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in native endian + /// format. Upon success, the `Vec` and the initial padding length are + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// The padding returned is non-zero if the returned `Vec` starts at + /// an address that does not have the same alignment as `u32`. The padding + /// corresponds to the number of leading bytes written to the returned + /// `Vec`. + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let (buf, _) = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_native_endian(&self) -> (Vec, usize) { + self.to_bytes::() + } + + /// The implementation of the public `to_bytes` serialization methods, + /// which is generic over endianness. + #[cfg(feature = "dfa-build")] + fn to_bytes(&self) -> (Vec, usize) { + let len = self.write_to_len(); + let (mut buf, padding) = wire::alloc_aligned_buffer::(len); + // This should always succeed since the only possible serialization + // error is providing a buffer that's too small, but we've ensured that + // `buf` is big enough here. + self.as_ref().write_to::(&mut buf[padding..]).unwrap(); + (buf, padding) + } + + /// Serialize this DFA as raw bytes to the given slice, in little endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike the various `to_byte_*` routines, this does not write + /// any padding. Callers are responsible for handling alignment correctly. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. We + /// // need to use a special type to force the alignment of our [u8; N] + /// // array to be aligned to a 4 byte boundary. Otherwise, deserializing + /// // the DFA may fail because of an alignment mismatch. + /// #[repr(C)] + /// struct Aligned { + /// _align: [u32; 0], + /// bytes: B, + /// } + /// let mut buf = Aligned { _align: [], bytes: [0u8; 4 * (1<<10)] }; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_little_endian would work on a little endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf.bytes)?; + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf.bytes[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_little_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.as_ref().write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in big endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike the various `to_byte_*` routines, this does not write + /// any padding. Callers are responsible for handling alignment correctly. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. We + /// // need to use a special type to force the alignment of our [u8; N] + /// // array to be aligned to a 4 byte boundary. Otherwise, deserializing + /// // the DFA may fail because of an alignment mismatch. + /// #[repr(C)] + /// struct Aligned { + /// _align: [u32; 0], + /// bytes: B, + /// } + /// let mut buf = Aligned { _align: [], bytes: [0u8; 4 * (1<<10)] }; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_big_endian would work on a big endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf.bytes)?; + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf.bytes[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_big_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.as_ref().write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in native endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// Note that unlike the various `to_byte_*` routines, this does not write + /// any padding. Callers are responsible for handling alignment correctly. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. We + /// // need to use a special type to force the alignment of our [u8; N] + /// // array to be aligned to a 4 byte boundary. Otherwise, deserializing + /// // the DFA may fail because of an alignment mismatch. + /// #[repr(C)] + /// struct Aligned { + /// _align: [u32; 0], + /// bytes: B, + /// } + /// let mut buf = Aligned { _align: [], bytes: [0u8; 4 * (1<<10)] }; + /// let written = original_dfa.write_to_native_endian(&mut buf.bytes)?; + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf.bytes[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_native_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.as_ref().write_to::(dst) + } + + /// Return the total number of bytes required to serialize this DFA. + /// + /// This is useful for determining the size of the buffer required to pass + /// to one of the serialization routines: + /// + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// Passing a buffer smaller than the size returned by this method will + /// result in a serialization error. Serialization routines are guaranteed + /// to succeed when the buffer is big enough. + /// + /// # Example + /// + /// This example shows how to dynamically allocate enough room to serialize + /// a DFA. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let mut buf = vec![0; original_dfa.write_to_len()]; + /// // This is guaranteed to succeed, because the only serialization error + /// // that can occur is when the provided buffer is too small. But + /// // write_to_len guarantees a correct size. + /// let written = original_dfa.write_to_native_endian(&mut buf).unwrap(); + /// // But this is not guaranteed to succeed! In particular, + /// // deserialization requires proper alignment for &[u32], but our buffer + /// // was allocated as a &[u8] whose required alignment is smaller than + /// // &[u32]. However, it's likely to work in practice because of how most + /// // allocators work. So if you write code like this, make sure to either + /// // handle the error correctly and/or run it under Miri since Miri will + /// // likely provoke the error by returning Vec buffers with alignment + /// // less than &[u32]. + /// let dfa: DFA<&[u32]> = match DFA::from_bytes(&buf[..written]) { + /// // As mentioned above, it is legal for an error to be returned + /// // here. It is quite difficult to get a Vec with a guaranteed + /// // alignment equivalent to Vec. + /// Err(_) => return Ok(()), + /// Ok((dfa, _)) => dfa, + /// }; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Note that this example isn't actually guaranteed to work! In + /// particular, if `buf` is not aligned to a 4-byte boundary, then the + /// `DFA::from_bytes` call will fail. If you need this to work, then you + /// either need to deal with adding some initial padding yourself, or use + /// one of the `to_bytes` methods, which will do it for you. + pub fn write_to_len(&self) -> usize { + wire::write_label_len(LABEL) + + wire::write_endianness_check_len() + + wire::write_version_len() + + size_of::() // unused, intended for future flexibility + + self.flags.write_to_len() + + self.tt.write_to_len() + + self.st.write_to_len() + + self.ms.write_to_len() + + self.special.write_to_len() + + self.accels.write_to_len() + + self.quitset.write_to_len() + } +} + +impl<'a> DFA<&'a [u32]> { + /// Safely deserialize a DFA with a specific state identifier + /// representation. Upon success, this returns both the deserialized DFA + /// and the number of bytes read from the given slice. Namely, the contents + /// of the slice beyond the DFA are not read. + /// + /// Deserializing a DFA using this routine will never allocate heap memory. + /// For safety purposes, the DFA's transition table will be verified such + /// that every transition points to a valid state. If this verification is + /// too costly, then a [`DFA::from_bytes_unchecked`] API is provided, which + /// will always execute in constant time. + /// + /// The bytes given must be generated by one of the serialization APIs + /// of a `DFA` using a semver compatible release of this crate. Those + /// include: + /// + /// * [`DFA::to_bytes_little_endian`] + /// * [`DFA::to_bytes_big_endian`] + /// * [`DFA::to_bytes_native_endian`] + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// The `to_bytes` methods allocate and return a `Vec` for you, along + /// with handling alignment correctly. The `write_to` methods do not + /// allocate and write to an existing slice (which may be on the stack). + /// Since deserialization always uses the native endianness of the target + /// platform, the serialization API you use should match the endianness of + /// the target platform. (It's often a good idea to generate serialized + /// DFAs for both forms of endianness and then load the correct one based + /// on endianness.) + /// + /// # Errors + /// + /// Generally speaking, it's easier to state the conditions in which an + /// error is _not_ returned. All of the following must be true: + /// + /// * The bytes given must be produced by one of the serialization APIs + /// on this DFA, as mentioned above. + /// * The endianness of the target platform matches the endianness used to + /// serialized the provided DFA. + /// * The slice given must have the same alignment as `u32`. + /// + /// If any of the above are not true, then an error will be returned. + /// + /// # Panics + /// + /// This routine will never panic for any input. + /// + /// # Example + /// + /// This example shows how to serialize a DFA to raw bytes, deserialize it + /// and then use it for searching. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let (bytes, _) = initial.to_bytes_native_endian(); + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&bytes)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: dealing with alignment and padding + /// + /// In the above example, we used the `to_bytes_native_endian` method to + /// serialize a DFA, but we ignored part of its return value corresponding + /// to padding added to the beginning of the serialized DFA. This is OK + /// because deserialization will skip this initial padding. What matters + /// is that the address immediately following the padding has an alignment + /// that matches `u32`. That is, the following is an equivalent but + /// alternative way to write the above example: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// // Serialization returns the number of leading padding bytes added to + /// // the returned Vec. + /// let (bytes, pad) = initial.to_bytes_native_endian(); + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&bytes[pad..])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This padding is necessary because Rust's standard library does + /// not expose any safe and robust way of creating a `Vec` with a + /// guaranteed alignment other than 1. Now, in practice, the underlying + /// allocator is likely to provide a `Vec` that meets our alignment + /// requirements, which means `pad` is zero in practice most of the time. + /// + /// The purpose of exposing the padding like this is flexibility for the + /// caller. For example, if one wants to embed a serialized DFA into a + /// compiled program, then it's important to guarantee that it starts at a + /// `u32`-aligned address. The simplest way to do this is to discard the + /// padding bytes and set it up so that the serialized DFA itself begins at + /// a properly aligned address. We can show this in two parts. The first + /// part is serializing the DFA to a file: + /// + /// ```no_run + /// use regex_automata::dfa::dense::DFA; + /// + /// let dfa = DFA::new("foo[0-9]+")?; + /// + /// let (bytes, pad) = dfa.to_bytes_big_endian(); + /// // Write the contents of the DFA *without* the initial padding. + /// std::fs::write("foo.bigendian.dfa", &bytes[pad..])?; + /// + /// // Do it again, but this time for little endian. + /// let (bytes, pad) = dfa.to_bytes_little_endian(); + /// std::fs::write("foo.littleendian.dfa", &bytes[pad..])?; + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And now the second part is embedding the DFA into the compiled program + /// and deserializing it at runtime on first use. We use conditional + /// compilation to choose the correct endianness. + /// + /// ```no_run + /// use regex_automata::{ + /// dfa::{Automaton, dense::DFA}, + /// util::{lazy::Lazy, wire::AlignAs}, + /// HalfMatch, Input, + /// }; + /// + /// // This crate provides its own "lazy" type, kind of like + /// // lazy_static! or once_cell::sync::Lazy. But it works in no-alloc + /// // no-std environments and let's us write this using completely + /// // safe code. + /// static RE: Lazy> = Lazy::new(|| { + /// # const _: &str = stringify! { + /// // This assignment is made possible (implicitly) via the + /// // CoerceUnsized trait. This is what guarantees that our + /// // bytes are stored in memory on a 4 byte boundary. You + /// // *must* do this or something equivalent for correct + /// // deserialization. + /// static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + /// _align: [], + /// #[cfg(target_endian = "big")] + /// bytes: *include_bytes!("foo.bigendian.dfa"), + /// #[cfg(target_endian = "little")] + /// bytes: *include_bytes!("foo.littleendian.dfa"), + /// }; + /// # }; + /// # static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + /// # _align: [], + /// # bytes: [], + /// # }; + /// + /// let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + /// .expect("serialized DFA should be valid"); + /// dfa + /// }); + /// + /// let expected = Ok(Some(HalfMatch::must(0, 8))); + /// assert_eq!(expected, RE.try_search_fwd(&Input::new("foo12345"))); + /// ``` + /// + /// An alternative to [`util::lazy::Lazy`](crate::util::lazy::Lazy) + /// is [`lazy_static`](https://crates.io/crates/lazy_static) or + /// [`once_cell`](https://crates.io/crates/once_cell), which provide + /// stronger guarantees (like the initialization function only being + /// executed once). And `once_cell` in particular provides a more + /// expressive API. But a `Lazy` value from this crate is likely just fine + /// in most circumstances. + /// + /// Note that regardless of which initialization method you use, you + /// will still need to use the [`AlignAs`](crate::util::wire::AlignAs) + /// trick above to force correct alignment, but this is safe to do and + /// `from_bytes` will return an error if you get it wrong. + pub fn from_bytes( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u32]>, usize), DeserializeError> { + // SAFETY: This is safe because we validate the transition table, start + // table, match states and accelerators below. If any validation fails, + // then we return an error. + let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? }; + dfa.tt.validate(&dfa.special)?; + dfa.st.validate(&dfa.tt)?; + dfa.ms.validate(&dfa)?; + dfa.accels.validate()?; + // N.B. dfa.special doesn't have a way to do unchecked deserialization, + // so it has already been validated. + for state in dfa.states() { + // If the state is an accel state, then it must have a non-empty + // accelerator. + if dfa.is_accel_state(state.id()) { + let index = dfa.accelerator_index(state.id()); + if index >= dfa.accels.len() { + return Err(DeserializeError::generic( + "found DFA state with invalid accelerator index", + )); + } + let needles = dfa.accels.needles(index); + if !(1 <= needles.len() && needles.len() <= 3) { + return Err(DeserializeError::generic( + "accelerator needles has invalid length", + )); + } + } + } + Ok((dfa, nread)) + } + + /// Deserialize a DFA with a specific state identifier representation in + /// constant time by omitting the verification of the validity of the + /// transition table and other data inside the DFA. + /// + /// This is just like [`DFA::from_bytes`], except it can potentially return + /// a DFA that exhibits undefined behavior if its transition table contains + /// invalid state identifiers. + /// + /// This routine is useful if you need to deserialize a DFA cheaply + /// and cannot afford the transition table validation performed by + /// `from_bytes`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let (bytes, _) = initial.to_bytes_native_endian(); + /// // SAFETY: This is guaranteed to be safe since the bytes given come + /// // directly from a compatible serialization routine. + /// let dfa: DFA<&[u32]> = unsafe { DFA::from_bytes_unchecked(&bytes)?.0 }; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub unsafe fn from_bytes_unchecked( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u32]>, usize), DeserializeError> { + let mut nr = 0; + + nr += wire::skip_initial_padding(slice); + wire::check_alignment::(&slice[nr..])?; + nr += wire::read_label(&slice[nr..], LABEL)?; + nr += wire::read_endianness_check(&slice[nr..])?; + nr += wire::read_version(&slice[nr..], VERSION)?; + + let _unused = wire::try_read_u32(&slice[nr..], "unused space")?; + nr += size_of::(); + + let (flags, nread) = Flags::from_bytes(&slice[nr..])?; + nr += nread; + + let (tt, nread) = TransitionTable::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (st, nread) = StartTable::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (ms, nread) = MatchStates::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (special, nread) = Special::from_bytes(&slice[nr..])?; + nr += nread; + special.validate_state_len(tt.len(), tt.stride2)?; + + let (accels, nread) = Accels::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (quitset, nread) = ByteSet::from_bytes(&slice[nr..])?; + nr += nread; + + // Prefilters don't support serialization, so they're always absent. + let pre = None; + Ok((DFA { tt, st, ms, special, accels, pre, quitset, flags }, nr)) + } + + /// The implementation of the public `write_to` serialization methods, + /// which is generic over endianness. + /// + /// This is defined only for &[u32] to reduce binary size/compilation time. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("dense DFA")); + } + dst = &mut dst[..nwrite]; + + let mut nw = 0; + nw += wire::write_label(LABEL, &mut dst[nw..])?; + nw += wire::write_endianness_check::(&mut dst[nw..])?; + nw += wire::write_version::(VERSION, &mut dst[nw..])?; + nw += { + // Currently unused, intended for future flexibility + E::write_u32(0, &mut dst[nw..]); + size_of::() + }; + nw += self.flags.write_to::(&mut dst[nw..])?; + nw += self.tt.write_to::(&mut dst[nw..])?; + nw += self.st.write_to::(&mut dst[nw..])?; + nw += self.ms.write_to::(&mut dst[nw..])?; + nw += self.special.write_to::(&mut dst[nw..])?; + nw += self.accels.write_to::(&mut dst[nw..])?; + nw += self.quitset.write_to::(&mut dst[nw..])?; + Ok(nw) + } +} + +// The following methods implement mutable routines on the internal +// representation of a DFA. As such, we must fix the first type parameter to a +// `Vec` since a generic `T: AsRef<[u32]>` does not permit mutation. We +// can get away with this because these methods are internal to the crate and +// are exclusively used during construction of the DFA. +#[cfg(feature = "dfa-build")] +impl OwnedDFA { + /// Add a start state of this DFA. + pub(crate) fn set_start_state( + &mut self, + anchored: Anchored, + start: Start, + id: StateID, + ) { + assert!(self.tt.is_valid(id), "invalid start state"); + self.st.set_start(anchored, start, id); + } + + /// Set the given transition to this DFA. Both the `from` and `to` states + /// must already exist. + pub(crate) fn set_transition( + &mut self, + from: StateID, + byte: alphabet::Unit, + to: StateID, + ) { + self.tt.set(from, byte, to); + } + + /// An an empty state (a state where all transitions lead to a dead state) + /// and return its identifier. The identifier returned is guaranteed to + /// not point to any other existing state. + /// + /// If adding a state would exceed `StateID::LIMIT`, then this returns an + /// error. + pub(crate) fn add_empty_state(&mut self) -> Result { + self.tt.add_empty_state() + } + + /// Swap the two states given in the transition table. + /// + /// This routine does not do anything to check the correctness of this + /// swap. Callers must ensure that other states pointing to id1 and id2 are + /// updated appropriately. + pub(crate) fn swap_states(&mut self, id1: StateID, id2: StateID) { + self.tt.swap(id1, id2); + } + + /// Remap all of the state identifiers in this DFA according to the map + /// function given. This includes all transitions and all starting state + /// identifiers. + pub(crate) fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + // We could loop over each state ID and call 'remap_state' here, but + // this is more direct: just map every transition directly. This + // technically might do a little extra work since the alphabet length + // is likely less than the stride, but if that is indeed an issue we + // should benchmark it and fix it. + for sid in self.tt.table_mut().iter_mut() { + *sid = map(*sid); + } + for sid in self.st.table_mut().iter_mut() { + *sid = map(*sid); + } + } + + /// Remap the transitions for the state given according to the function + /// given. This applies the given map function to every transition in the + /// given state and changes the transition in place to the result of the + /// map function for that transition. + pub(crate) fn remap_state( + &mut self, + id: StateID, + map: impl Fn(StateID) -> StateID, + ) { + self.tt.remap(id, map); + } + + /// Truncate the states in this DFA to the given length. + /// + /// This routine does not do anything to check the correctness of this + /// truncation. Callers must ensure that other states pointing to truncated + /// states are updated appropriately. + pub(crate) fn truncate_states(&mut self, len: usize) { + self.tt.truncate(len); + } + + /// Minimize this DFA in place using Hopcroft's algorithm. + pub(crate) fn minimize(&mut self) { + Minimizer::new(self).run(); + } + + /// Updates the match state pattern ID map to use the one provided. + /// + /// This is useful when it's convenient to manipulate matching states + /// (and their corresponding pattern IDs) as a map. In particular, the + /// representation used by a DFA for this map is not amenable to mutation, + /// so if things need to be changed (like when shuffling states), it's + /// often easier to work with the map form. + pub(crate) fn set_pattern_map( + &mut self, + map: &BTreeMap>, + ) -> Result<(), BuildError> { + self.ms = self.ms.new_with_map(map)?; + Ok(()) + } + + /// Find states that have a small number of non-loop transitions and mark + /// them as candidates for acceleration during search. + pub(crate) fn accelerate(&mut self) { + // dead and quit states can never be accelerated. + if self.state_len() <= 2 { + return; + } + + // Go through every state and record their accelerator, if possible. + let mut accels = BTreeMap::new(); + // Count the number of accelerated match, start and non-match/start + // states. + let (mut cmatch, mut cstart, mut cnormal) = (0, 0, 0); + for state in self.states() { + if let Some(accel) = state.accelerate(self.byte_classes()) { + debug!( + "accelerating full DFA state {}: {:?}", + state.id().as_usize(), + accel, + ); + accels.insert(state.id(), accel); + if self.is_match_state(state.id()) { + cmatch += 1; + } else if self.is_start_state(state.id()) { + cstart += 1; + } else { + assert!(!self.is_dead_state(state.id())); + assert!(!self.is_quit_state(state.id())); + cnormal += 1; + } + } + } + // If no states were able to be accelerated, then we're done. + if accels.is_empty() { + return; + } + let original_accels_len = accels.len(); + + // A remapper keeps track of state ID changes. Once we're done + // shuffling, the remapper is used to rewrite all transitions in the + // DFA based on the new positions of states. + let mut remapper = Remapper::new(self); + + // As we swap states, if they are match states, we need to swap their + // pattern ID lists too (for multi-regexes). We do this by converting + // the lists to an easily swappable map, and then convert back to + // MatchStates once we're done. + let mut new_matches = self.ms.to_map(self); + + // There is at least one state that gets accelerated, so these are + // guaranteed to get set to sensible values below. + self.special.min_accel = StateID::MAX; + self.special.max_accel = StateID::ZERO; + let update_special_accel = + |special: &mut Special, accel_id: StateID| { + special.min_accel = cmp::min(special.min_accel, accel_id); + special.max_accel = cmp::max(special.max_accel, accel_id); + }; + + // Start by shuffling match states. Any match states that are + // accelerated get moved to the end of the match state range. + if cmatch > 0 && self.special.matches() { + // N.B. special.{min,max}_match do not need updating, since the + // range/number of match states does not change. Only the ordering + // of match states may change. + let mut next_id = self.special.max_match; + let mut cur_id = next_id; + while cur_id >= self.special.min_match { + if let Some(accel) = accels.remove(&cur_id) { + accels.insert(next_id, accel); + update_special_accel(&mut self.special, next_id); + + // No need to do any actual swapping for equivalent IDs. + if cur_id != next_id { + remapper.swap(self, cur_id, next_id); + + // Swap pattern IDs for match states. + let cur_pids = new_matches.remove(&cur_id).unwrap(); + let next_pids = new_matches.remove(&next_id).unwrap(); + new_matches.insert(cur_id, next_pids); + new_matches.insert(next_id, cur_pids); + } + next_id = self.tt.prev_state_id(next_id); + } + cur_id = self.tt.prev_state_id(cur_id); + } + } + + // This is where it gets tricky. Without acceleration, start states + // normally come right after match states. But we want accelerated + // states to be a single contiguous range (to make it very fast + // to determine whether a state *is* accelerated), while also keeping + // match and starting states as contiguous ranges for the same reason. + // So what we do here is shuffle states such that it looks like this: + // + // DQMMMMAAAAASSSSSSNNNNNNN + // | | + // |---------| + // accelerated states + // + // Where: + // D - dead state + // Q - quit state + // M - match state (may be accelerated) + // A - normal state that is accelerated + // S - start state (may be accelerated) + // N - normal state that is NOT accelerated + // + // We implement this by shuffling states, which is done by a sequence + // of pairwise swaps. We start by looking at all normal states to be + // accelerated. When we find one, we swap it with the earliest starting + // state, and then swap that with the earliest normal state. This + // preserves the contiguous property. + // + // Once we're done looking for accelerated normal states, now we look + // for accelerated starting states by moving them to the beginning + // of the starting state range (just like we moved accelerated match + // states to the end of the matching state range). + // + // For a more detailed/different perspective on this, see the docs + // in dfa/special.rs. + if cnormal > 0 { + // our next available starting and normal states for swapping. + let mut next_start_id = self.special.min_start; + let mut cur_id = self.to_state_id(self.state_len() - 1); + // This is guaranteed to exist since cnormal > 0. + let mut next_norm_id = + self.tt.next_state_id(self.special.max_start); + while cur_id >= next_norm_id { + if let Some(accel) = accels.remove(&cur_id) { + remapper.swap(self, next_start_id, cur_id); + remapper.swap(self, next_norm_id, cur_id); + // Keep our accelerator map updated with new IDs if the + // states we swapped were also accelerated. + if let Some(accel2) = accels.remove(&next_norm_id) { + accels.insert(cur_id, accel2); + } + if let Some(accel2) = accels.remove(&next_start_id) { + accels.insert(next_norm_id, accel2); + } + accels.insert(next_start_id, accel); + update_special_accel(&mut self.special, next_start_id); + // Our start range shifts one to the right now. + self.special.min_start = + self.tt.next_state_id(self.special.min_start); + self.special.max_start = + self.tt.next_state_id(self.special.max_start); + next_start_id = self.tt.next_state_id(next_start_id); + next_norm_id = self.tt.next_state_id(next_norm_id); + } + // This is pretty tricky, but if our 'next_norm_id' state also + // happened to be accelerated, then the result is that it is + // now in the position of cur_id, so we need to consider it + // again. This loop is still guaranteed to terminate though, + // because when accels contains cur_id, we're guaranteed to + // increment next_norm_id even if cur_id remains unchanged. + if !accels.contains_key(&cur_id) { + cur_id = self.tt.prev_state_id(cur_id); + } + } + } + // Just like we did for match states, but we want to move accelerated + // start states to the beginning of the range instead of the end. + if cstart > 0 { + // N.B. special.{min,max}_start do not need updating, since the + // range/number of start states does not change at this point. Only + // the ordering of start states may change. + let mut next_id = self.special.min_start; + let mut cur_id = next_id; + while cur_id <= self.special.max_start { + if let Some(accel) = accels.remove(&cur_id) { + remapper.swap(self, cur_id, next_id); + accels.insert(next_id, accel); + update_special_accel(&mut self.special, next_id); + next_id = self.tt.next_state_id(next_id); + } + cur_id = self.tt.next_state_id(cur_id); + } + } + + // Remap all transitions in our DFA and assert some things. + remapper.remap(self); + // This unwrap is OK because acceleration never changes the number of + // match states or patterns in those match states. Since acceleration + // runs after the pattern map has been set at least once, we know that + // our match states cannot error. + self.set_pattern_map(&new_matches).unwrap(); + self.special.set_max(); + self.special.validate().expect("special state ranges should validate"); + self.special + .validate_state_len(self.state_len(), self.stride2()) + .expect( + "special state ranges should be consistent with state length", + ); + assert_eq!( + self.special.accel_len(self.stride()), + // We record the number of accelerated states initially detected + // since the accels map is itself mutated in the process above. + // If mutated incorrectly, its size may change, and thus can't be + // trusted as a source of truth of how many accelerated states we + // expected there to be. + original_accels_len, + "mismatch with expected number of accelerated states", + ); + + // And finally record our accelerators. We kept our accels map updated + // as we shuffled states above, so the accelerators should now + // correspond to a contiguous range in the state ID space. (Which we + // assert.) + let mut prev: Option = None; + for (id, accel) in accels { + assert!(prev.map_or(true, |p| self.tt.next_state_id(p) == id)); + prev = Some(id); + self.accels.add(accel); + } + } + + /// Shuffle the states in this DFA so that starting states, match + /// states and accelerated states are all contiguous. + /// + /// See dfa/special.rs for more details. + pub(crate) fn shuffle( + &mut self, + mut matches: BTreeMap>, + ) -> Result<(), BuildError> { + // The determinizer always adds a quit state and it is always second. + self.special.quit_id = self.to_state_id(1); + // If all we have are the dead and quit states, then we're done and + // the DFA will never produce a match. + if self.state_len() <= 2 { + self.special.set_max(); + return Ok(()); + } + + // Collect all our non-DEAD start states into a convenient set and + // confirm there is no overlap with match states. In the classicl DFA + // construction, start states can be match states. But because of + // look-around, we delay all matches by a byte, which prevents start + // states from being match states. + let mut is_start: BTreeSet = BTreeSet::new(); + for (start_id, _, _) in self.starts() { + // If a starting configuration points to a DEAD state, then we + // don't want to shuffle it. The DEAD state is always the first + // state with ID=0. So we can just leave it be. + if start_id == DEAD { + continue; + } + assert!( + !matches.contains_key(&start_id), + "{:?} is both a start and a match state, which is not allowed", + start_id, + ); + is_start.insert(start_id); + } + + // We implement shuffling by a sequence of pairwise swaps of states. + // Since we have a number of things referencing states via their + // IDs and swapping them changes their IDs, we need to record every + // swap we make so that we can remap IDs. The remapper handles this + // book-keeping for us. + let mut remapper = Remapper::new(self); + + // Shuffle matching states. + if matches.is_empty() { + self.special.min_match = DEAD; + self.special.max_match = DEAD; + } else { + // The determinizer guarantees that the first two states are the + // dead and quit states, respectively. We want our match states to + // come right after quit. + let mut next_id = self.to_state_id(2); + let mut new_matches = BTreeMap::new(); + self.special.min_match = next_id; + for (id, pids) in matches { + remapper.swap(self, next_id, id); + new_matches.insert(next_id, pids); + // If we swapped a start state, then update our set. + if is_start.contains(&next_id) { + is_start.remove(&next_id); + is_start.insert(id); + } + next_id = self.tt.next_state_id(next_id); + } + matches = new_matches; + self.special.max_match = cmp::max( + self.special.min_match, + self.tt.prev_state_id(next_id), + ); + } + + // Shuffle starting states. + { + let mut next_id = self.to_state_id(2); + if self.special.matches() { + next_id = self.tt.next_state_id(self.special.max_match); + } + self.special.min_start = next_id; + for id in is_start { + remapper.swap(self, next_id, id); + next_id = self.tt.next_state_id(next_id); + } + self.special.max_start = cmp::max( + self.special.min_start, + self.tt.prev_state_id(next_id), + ); + } + + // Finally remap all transitions in our DFA. + remapper.remap(self); + self.set_pattern_map(&matches)?; + self.special.set_max(); + self.special.validate().expect("special state ranges should validate"); + self.special + .validate_state_len(self.state_len(), self.stride2()) + .expect( + "special state ranges should be consistent with state length", + ); + Ok(()) + } + + /// Checks whether there are universal start states (both anchored and + /// unanchored), and if so, sets the relevant fields to the start state + /// IDs. + /// + /// Universal start states occur precisely when the all patterns in the + /// DFA have no look-around assertions in their prefix. + fn set_universal_starts(&mut self) { + assert_eq!(6, Start::len(), "expected 6 start configurations"); + + let start_id = |dfa: &mut OwnedDFA, + anchored: Anchored, + start: Start| { + // This OK because we only call 'start' under conditions + // in which we know it will succeed. + dfa.st.start(anchored, start).expect("valid Input configuration") + }; + if self.start_kind().has_unanchored() { + let anchor = Anchored::No; + let sid = start_id(self, anchor, Start::NonWordByte); + if sid == start_id(self, anchor, Start::WordByte) + && sid == start_id(self, anchor, Start::Text) + && sid == start_id(self, anchor, Start::LineLF) + && sid == start_id(self, anchor, Start::LineCR) + && sid == start_id(self, anchor, Start::CustomLineTerminator) + { + self.st.universal_start_unanchored = Some(sid); + } + } + if self.start_kind().has_anchored() { + let anchor = Anchored::Yes; + let sid = start_id(self, anchor, Start::NonWordByte); + if sid == start_id(self, anchor, Start::WordByte) + && sid == start_id(self, anchor, Start::Text) + && sid == start_id(self, anchor, Start::LineLF) + && sid == start_id(self, anchor, Start::LineCR) + && sid == start_id(self, anchor, Start::CustomLineTerminator) + { + self.st.universal_start_anchored = Some(sid); + } + } + } +} + +// A variety of generic internal methods for accessing DFA internals. +impl> DFA { + /// Return the info about special states. + pub(crate) fn special(&self) -> &Special { + &self.special + } + + /// Return the info about special states as a mutable borrow. + #[cfg(feature = "dfa-build")] + pub(crate) fn special_mut(&mut self) -> &mut Special { + &mut self.special + } + + /// Returns the quit set (may be empty) used by this DFA. + pub(crate) fn quitset(&self) -> &ByteSet { + &self.quitset + } + + /// Returns the flags for this DFA. + pub(crate) fn flags(&self) -> &Flags { + &self.flags + } + + /// Returns an iterator over all states in this DFA. + /// + /// This iterator yields a tuple for each state. The first element of the + /// tuple corresponds to a state's identifier, and the second element + /// corresponds to the state itself (comprised of its transitions). + pub(crate) fn states(&self) -> StateIter<'_, T> { + self.tt.states() + } + + /// Return the total number of states in this DFA. Every DFA has at least + /// 1 state, even the empty DFA. + pub(crate) fn state_len(&self) -> usize { + self.tt.len() + } + + /// Return an iterator over all pattern IDs for the given match state. + /// + /// If the given state is not a match state, then this panics. + #[cfg(feature = "dfa-build")] + pub(crate) fn pattern_id_slice(&self, id: StateID) -> &[PatternID] { + assert!(self.is_match_state(id)); + self.ms.pattern_id_slice(self.match_state_index(id)) + } + + /// Return the total number of pattern IDs for the given match state. + /// + /// If the given state is not a match state, then this panics. + pub(crate) fn match_pattern_len(&self, id: StateID) -> usize { + assert!(self.is_match_state(id)); + self.ms.pattern_len(self.match_state_index(id)) + } + + /// Returns the total number of patterns matched by this DFA. + pub(crate) fn pattern_len(&self) -> usize { + self.ms.pattern_len + } + + /// Returns a map from match state ID to a list of pattern IDs that match + /// in that state. + #[cfg(feature = "dfa-build")] + pub(crate) fn pattern_map(&self) -> BTreeMap> { + self.ms.to_map(self) + } + + /// Returns the ID of the quit state for this DFA. + #[cfg(feature = "dfa-build")] + pub(crate) fn quit_id(&self) -> StateID { + self.to_state_id(1) + } + + /// Convert the given state identifier to the state's index. The state's + /// index corresponds to the position in which it appears in the transition + /// table. When a DFA is NOT premultiplied, then a state's identifier is + /// also its index. When a DFA is premultiplied, then a state's identifier + /// is equal to `index * alphabet_len`. This routine reverses that. + pub(crate) fn to_index(&self, id: StateID) -> usize { + self.tt.to_index(id) + } + + /// Convert an index to a state (in the range 0..self.state_len()) to an + /// actual state identifier. + /// + /// This is useful when using a `Vec` as an efficient map keyed by state + /// to some other information (such as a remapped state ID). + #[cfg(feature = "dfa-build")] + pub(crate) fn to_state_id(&self, index: usize) -> StateID { + self.tt.to_state_id(index) + } + + /// Return the table of state IDs for this DFA's start states. + pub(crate) fn starts(&self) -> StartStateIter<'_> { + self.st.iter() + } + + /// Returns the index of the match state for the given ID. If the + /// given ID does not correspond to a match state, then this may + /// panic or produce an incorrect result. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn match_state_index(&self, id: StateID) -> usize { + debug_assert!(self.is_match_state(id)); + // This is one of the places where we rely on the fact that match + // states are contiguous in the transition table. Namely, that the + // first match state ID always corresponds to dfa.special.min_match. + // From there, since we know the stride, we can compute the overall + // index of any match state given the match state's ID. + let min = self.special().min_match.as_usize(); + // CORRECTNESS: We're allowed to produce an incorrect result or panic, + // so both the subtraction and the unchecked StateID construction is + // OK. + self.to_index(StateID::new_unchecked(id.as_usize() - min)) + } + + /// Returns the index of the accelerator state for the given ID. If the + /// given ID does not correspond to an accelerator state, then this may + /// panic or produce an incorrect result. + fn accelerator_index(&self, id: StateID) -> usize { + let min = self.special().min_accel.as_usize(); + // CORRECTNESS: We're allowed to produce an incorrect result or panic, + // so both the subtraction and the unchecked StateID construction is + // OK. + self.to_index(StateID::new_unchecked(id.as_usize() - min)) + } + + /// Return the accelerators for this DFA. + fn accels(&self) -> Accels<&[u32]> { + self.accels.as_ref() + } + + /// Return this DFA's transition table as a slice. + fn trans(&self) -> &[StateID] { + self.tt.table() + } +} + +impl> fmt::Debug for DFA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "dense::DFA(")?; + for state in self.states() { + fmt_state_indicator(f, self, state.id())?; + let id = if f.alternate() { + state.id().as_usize() + } else { + self.to_index(state.id()) + }; + write!(f, "{:06?}: ", id)?; + state.fmt(f)?; + write!(f, "\n")?; + } + writeln!(f, "")?; + for (i, (start_id, anchored, sty)) in self.starts().enumerate() { + let id = if f.alternate() { + start_id.as_usize() + } else { + self.to_index(start_id) + }; + if i % self.st.stride == 0 { + match anchored { + Anchored::No => writeln!(f, "START-GROUP(unanchored)")?, + Anchored::Yes => writeln!(f, "START-GROUP(anchored)")?, + Anchored::Pattern(pid) => { + writeln!(f, "START_GROUP(pattern: {:?})", pid)? + } + } + } + writeln!(f, " {:?} => {:06?}", sty, id)?; + } + if self.pattern_len() > 1 { + writeln!(f, "")?; + for i in 0..self.ms.len() { + let id = self.ms.match_state_id(self, i); + let id = if f.alternate() { + id.as_usize() + } else { + self.to_index(id) + }; + write!(f, "MATCH({:06?}): ", id)?; + for (i, &pid) in self.ms.pattern_id_slice(i).iter().enumerate() + { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", pid)?; + } + writeln!(f, "")?; + } + } + writeln!(f, "state length: {:?}", self.state_len())?; + writeln!(f, "pattern length: {:?}", self.pattern_len())?; + writeln!(f, "flags: {:?}", self.flags)?; + writeln!(f, ")")?; + Ok(()) + } +} + +// SAFETY: We assert that our implementation of each method is correct. +unsafe impl> Automaton for DFA { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_special_state(&self, id: StateID) -> bool { + self.special.is_special_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_dead_state(&self, id: StateID) -> bool { + self.special.is_dead_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_quit_state(&self, id: StateID) -> bool { + self.special.is_quit_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match_state(&self, id: StateID) -> bool { + self.special.is_match_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_start_state(&self, id: StateID) -> bool { + self.special.is_start_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_accel_state(&self, id: StateID) -> bool { + self.special.is_accel_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next_state(&self, current: StateID, input: u8) -> StateID { + let input = self.byte_classes().get(input); + let o = current.as_usize() + usize::from(input); + self.trans()[o] + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + unsafe fn next_state_unchecked( + &self, + current: StateID, + byte: u8, + ) -> StateID { + // We don't (or shouldn't) need an unchecked variant for the byte + // class mapping, since bound checks should be omitted automatically + // by virtue of its representation. If this ends up not being true as + // confirmed by codegen, please file an issue. ---AG + let class = self.byte_classes().get(byte); + let o = current.as_usize() + usize::from(class); + let next = *self.trans().get_unchecked(o); + next + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next_eoi_state(&self, current: StateID) -> StateID { + let eoi = self.byte_classes().eoi().as_usize(); + let o = current.as_usize() + eoi; + self.trans()[o] + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_len(&self) -> usize { + self.ms.pattern_len + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn match_len(&self, id: StateID) -> usize { + self.match_pattern_len(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn match_pattern(&self, id: StateID, match_index: usize) -> PatternID { + // This is an optimization for the very common case of a DFA with a + // single pattern. This conditional avoids a somewhat more costly path + // that finds the pattern ID from the state machine, which requires + // a bit of slicing/pointer-chasing. This optimization tends to only + // matter when matches are frequent. + if self.ms.pattern_len == 1 { + return PatternID::ZERO; + } + let state_index = self.match_state_index(id); + self.ms.pattern_id(state_index, match_index) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn has_empty(&self) -> bool { + self.flags.has_empty + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_utf8(&self) -> bool { + self.flags.is_utf8 + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_always_start_anchored(&self) -> bool { + self.flags.is_always_start_anchored + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn start_state( + &self, + config: &start::Config, + ) -> Result { + let anchored = config.get_anchored(); + let start = match config.get_look_behind() { + None => Start::Text, + Some(byte) => { + if !self.quitset.is_empty() && self.quitset.contains(byte) { + return Err(StartError::quit(byte)); + } + self.st.start_map.get(byte) + } + }; + self.st.start(anchored, start) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn universal_start_state(&self, mode: Anchored) -> Option { + match mode { + Anchored::No => self.st.universal_start_unanchored, + Anchored::Yes => self.st.universal_start_anchored, + Anchored::Pattern(_) => None, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn accelerator(&self, id: StateID) -> &[u8] { + if !self.is_accel_state(id) { + return &[]; + } + self.accels.needles(self.accelerator_index(id)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref() + } +} + +/// The transition table portion of a dense DFA. +/// +/// The transition table is the core part of the DFA in that it describes how +/// to move from one state to another based on the input sequence observed. +#[derive(Clone)] +pub(crate) struct TransitionTable { + /// A contiguous region of memory representing the transition table in + /// row-major order. The representation is dense. That is, every state + /// has precisely the same number of transitions. The maximum number of + /// transitions per state is 257 (256 for each possible byte value, plus 1 + /// for the special EOI transition). If a DFA has been instructed to use + /// byte classes (the default), then the number of transitions is usually + /// substantially fewer. + /// + /// In practice, T is either `Vec` or `&[u32]`. + table: T, + /// A set of equivalence classes, where a single equivalence class + /// represents a set of bytes that never discriminate between a match + /// and a non-match in the DFA. Each equivalence class corresponds to a + /// single character in this DFA's alphabet, where the maximum number of + /// characters is 257 (each possible value of a byte plus the special + /// EOI transition). Consequently, the number of equivalence classes + /// corresponds to the number of transitions for each DFA state. Note + /// though that the *space* used by each DFA state in the transition table + /// may be larger. The total space used by each DFA state is known as the + /// stride. + /// + /// The only time the number of equivalence classes is fewer than 257 is if + /// the DFA's kind uses byte classes (which is the default). Equivalence + /// classes should generally only be disabled when debugging, so that + /// the transitions themselves aren't obscured. Disabling them has no + /// other benefit, since the equivalence class map is always used while + /// searching. In the vast majority of cases, the number of equivalence + /// classes is substantially smaller than 257, particularly when large + /// Unicode classes aren't used. + classes: ByteClasses, + /// The stride of each DFA state, expressed as a power-of-two exponent. + /// + /// The stride of a DFA corresponds to the total amount of space used by + /// each DFA state in the transition table. This may be bigger than the + /// size of a DFA's alphabet, since the stride is always the smallest + /// power of two greater than or equal to the alphabet size. + /// + /// While this wastes space, this avoids the need for integer division + /// to convert between premultiplied state IDs and their corresponding + /// indices. Instead, we can use simple bit-shifts. + /// + /// See the docs for the `stride2` method for more details. + /// + /// The minimum `stride2` value is `1` (corresponding to a stride of `2`) + /// while the maximum `stride2` value is `9` (corresponding to a stride of + /// `512`). The maximum is not `8` since the maximum alphabet size is `257` + /// when accounting for the special EOI transition. However, an alphabet + /// length of that size is exceptionally rare since the alphabet is shrunk + /// into equivalence classes. + stride2: usize, +} + +impl<'a> TransitionTable<&'a [u32]> { + /// Deserialize a transition table starting at the beginning of `slice`. + /// Upon success, return the total number of bytes read along with the + /// transition table. + /// + /// If there was a problem deserializing any part of the transition table, + /// then this returns an error. Notably, if the given slice does not have + /// the same alignment as `StateID`, then this will return an error (among + /// other possible errors). + /// + /// This is guaranteed to execute in constant time. + /// + /// # Safety + /// + /// This routine is not safe because it does not check the validity of the + /// transition table itself. In particular, the transition table can be + /// quite large, so checking its validity can be somewhat expensive. An + /// invalid transition table is not safe because other code may rely on the + /// transition table being correct (such as explicit bounds check elision). + /// Therefore, an invalid transition table can lead to undefined behavior. + /// + /// Callers that use this function must either pass on the safety invariant + /// or guarantee that the bytes given contain a valid transition table. + /// This guarantee is upheld by the bytes written by `write_to`. + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(TransitionTable<&'a [u32]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (state_len, nr) = + wire::try_read_u32_as_usize(slice, "state length")?; + slice = &slice[nr..]; + + let (stride2, nr) = wire::try_read_u32_as_usize(slice, "stride2")?; + slice = &slice[nr..]; + + let (classes, nr) = ByteClasses::from_bytes(slice)?; + slice = &slice[nr..]; + + // The alphabet length (determined by the byte class map) cannot be + // bigger than the stride (total space used by each DFA state). + if stride2 > 9 { + return Err(DeserializeError::generic( + "dense DFA has invalid stride2 (too big)", + )); + } + // It also cannot be zero, since even a DFA that never matches anything + // has a non-zero number of states with at least two equivalence + // classes: one for all 256 byte values and another for the EOI + // sentinel. + if stride2 < 1 { + return Err(DeserializeError::generic( + "dense DFA has invalid stride2 (too small)", + )); + } + // This is OK since 1 <= stride2 <= 9. + let stride = + 1usize.checked_shl(u32::try_from(stride2).unwrap()).unwrap(); + if classes.alphabet_len() > stride { + return Err(DeserializeError::generic( + "alphabet size cannot be bigger than transition table stride", + )); + } + + let trans_len = + wire::shl(state_len, stride2, "dense table transition length")?; + let table_bytes_len = wire::mul( + trans_len, + StateID::SIZE, + "dense table state byte length", + )?; + wire::check_slice_len(slice, table_bytes_len, "transition table")?; + wire::check_alignment::(slice)?; + let table_bytes = &slice[..table_bytes_len]; + slice = &slice[table_bytes_len..]; + // SAFETY: Since StateID is always representable as a u32, all we need + // to do is ensure that we have the proper length and alignment. We've + // checked both above, so the cast below is safe. + // + // N.B. This is the only not-safe code in this function. + let table = core::slice::from_raw_parts( + table_bytes.as_ptr().cast::(), + trans_len, + ); + let tt = TransitionTable { table, classes, stride2 }; + Ok((tt, slice.as_ptr().as_usize() - slice_start)) + } +} + +#[cfg(feature = "dfa-build")] +impl TransitionTable> { + /// Create a minimal transition table with just two states: a dead state + /// and a quit state. The alphabet length and stride of the transition + /// table is determined by the given set of equivalence classes. + fn minimal(classes: ByteClasses) -> TransitionTable> { + let mut tt = TransitionTable { + table: vec![], + classes, + stride2: classes.stride2(), + }; + // Two states, regardless of alphabet size, can always fit into u32. + tt.add_empty_state().unwrap(); // dead state + tt.add_empty_state().unwrap(); // quit state + tt + } + + /// Set a transition in this table. Both the `from` and `to` states must + /// already exist, otherwise this panics. `unit` should correspond to the + /// transition out of `from` to set to `to`. + fn set(&mut self, from: StateID, unit: alphabet::Unit, to: StateID) { + assert!(self.is_valid(from), "invalid 'from' state"); + assert!(self.is_valid(to), "invalid 'to' state"); + self.table[from.as_usize() + self.classes.get_by_unit(unit)] = + to.as_u32(); + } + + /// Add an empty state (a state where all transitions lead to a dead state) + /// and return its identifier. The identifier returned is guaranteed to + /// not point to any other existing state. + /// + /// If adding a state would exhaust the state identifier space, then this + /// returns an error. + fn add_empty_state(&mut self) -> Result { + // Normally, to get a fresh state identifier, we would just + // take the index of the next state added to the transition + // table. However, we actually perform an optimization here + // that premultiplies state IDs by the stride, such that they + // point immediately at the beginning of their transitions in + // the transition table. This avoids an extra multiplication + // instruction for state lookup at search time. + // + // Premultiplied identifiers means that instead of your matching + // loop looking something like this: + // + // state = dfa.start + // for byte in haystack: + // next = dfa.transitions[state * stride + byte] + // if dfa.is_match(next): + // return true + // return false + // + // it can instead look like this: + // + // state = dfa.start + // for byte in haystack: + // next = dfa.transitions[state + byte] + // if dfa.is_match(next): + // return true + // return false + // + // In other words, we save a multiplication instruction in the + // critical path. This turns out to be a decent performance win. + // The cost of using premultiplied state ids is that they can + // require a bigger state id representation. (And they also make + // the code a bit more complex, especially during minimization and + // when reshuffling states, as one needs to convert back and forth + // between state IDs and state indices.) + // + // To do this, we simply take the index of the state into the + // entire transition table, rather than the index of the state + // itself. e.g., If the stride is 64, then the ID of the 3rd state + // is 192, not 2. + let next = self.table.len(); + let id = + StateID::new(next).map_err(|_| BuildError::too_many_states())?; + self.table.extend(iter::repeat(0).take(self.stride())); + Ok(id) + } + + /// Swap the two states given in this transition table. + /// + /// This routine does not do anything to check the correctness of this + /// swap. Callers must ensure that other states pointing to id1 and id2 are + /// updated appropriately. + /// + /// Both id1 and id2 must point to valid states, otherwise this panics. + fn swap(&mut self, id1: StateID, id2: StateID) { + assert!(self.is_valid(id1), "invalid 'id1' state: {:?}", id1); + assert!(self.is_valid(id2), "invalid 'id2' state: {:?}", id2); + // We only need to swap the parts of the state that are used. So if the + // stride is 64, but the alphabet length is only 33, then we save a lot + // of work. + for b in 0..self.classes.alphabet_len() { + self.table.swap(id1.as_usize() + b, id2.as_usize() + b); + } + } + + /// Remap the transitions for the state given according to the function + /// given. This applies the given map function to every transition in the + /// given state and changes the transition in place to the result of the + /// map function for that transition. + fn remap(&mut self, id: StateID, map: impl Fn(StateID) -> StateID) { + for byte in 0..self.alphabet_len() { + let i = id.as_usize() + byte; + let next = self.table()[i]; + self.table_mut()[id.as_usize() + byte] = map(next); + } + } + + /// Truncate the states in this transition table to the given length. + /// + /// This routine does not do anything to check the correctness of this + /// truncation. Callers must ensure that other states pointing to truncated + /// states are updated appropriately. + fn truncate(&mut self, len: usize) { + self.table.truncate(len << self.stride2); + } +} + +impl> TransitionTable { + /// Writes a serialized form of this transition table to the buffer given. + /// If the buffer is too small, then an error is returned. To determine + /// how big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("transition table")); + } + dst = &mut dst[..nwrite]; + + // write state length + // Unwrap is OK since number of states is guaranteed to fit in a u32. + E::write_u32(u32::try_from(self.len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write state stride (as power of 2) + // Unwrap is OK since stride2 is guaranteed to be <= 9. + E::write_u32(u32::try_from(self.stride2).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write byte class map + let n = self.classes.write_to(dst)?; + dst = &mut dst[n..]; + + // write actual transitions + for &sid in self.table() { + let n = wire::write_state_id::(sid, &mut dst); + dst = &mut dst[n..]; + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this transition + /// table will use. + fn write_to_len(&self) -> usize { + size_of::() // state length + + size_of::() // stride2 + + self.classes.write_to_len() + + (self.table().len() * StateID::SIZE) + } + + /// Validates that every state ID in this transition table is valid. + /// + /// That is, every state ID can be used to correctly index a state in this + /// table. + fn validate(&self, sp: &Special) -> Result<(), DeserializeError> { + for state in self.states() { + // We check that the ID itself is well formed. That is, if it's + // a special state then it must actually be a quit, dead, accel, + // match or start state. + if sp.is_special_state(state.id()) { + let is_actually_special = sp.is_dead_state(state.id()) + || sp.is_quit_state(state.id()) + || sp.is_match_state(state.id()) + || sp.is_start_state(state.id()) + || sp.is_accel_state(state.id()); + if !is_actually_special { + // This is kind of a cryptic error message... + return Err(DeserializeError::generic( + "found dense state tagged as special but \ + wasn't actually special", + )); + } + } + for (_, to) in state.transitions() { + if !self.is_valid(to) { + return Err(DeserializeError::generic( + "found invalid state ID in transition table", + )); + } + } + } + Ok(()) + } + + /// Converts this transition table to a borrowed value. + fn as_ref(&self) -> TransitionTable<&'_ [u32]> { + TransitionTable { + table: self.table.as_ref(), + classes: self.classes.clone(), + stride2: self.stride2, + } + } + + /// Converts this transition table to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> TransitionTable> { + TransitionTable { + table: self.table.as_ref().to_vec(), + classes: self.classes.clone(), + stride2: self.stride2, + } + } + + /// Return the state for the given ID. If the given ID is not valid, then + /// this panics. + fn state(&self, id: StateID) -> State<'_> { + assert!(self.is_valid(id)); + + let i = id.as_usize(); + State { + id, + stride2: self.stride2, + transitions: &self.table()[i..i + self.alphabet_len()], + } + } + + /// Returns an iterator over all states in this transition table. + /// + /// This iterator yields a tuple for each state. The first element of the + /// tuple corresponds to a state's identifier, and the second element + /// corresponds to the state itself (comprised of its transitions). + fn states(&self) -> StateIter<'_, T> { + StateIter { + tt: self, + it: self.table().chunks(self.stride()).enumerate(), + } + } + + /// Convert a state identifier to an index to a state (in the range + /// 0..self.len()). + /// + /// This is useful when using a `Vec` as an efficient map keyed by state + /// to some other information (such as a remapped state ID). + /// + /// If the given ID is not valid, then this may panic or produce an + /// incorrect index. + fn to_index(&self, id: StateID) -> usize { + id.as_usize() >> self.stride2 + } + + /// Convert an index to a state (in the range 0..self.len()) to an actual + /// state identifier. + /// + /// This is useful when using a `Vec` as an efficient map keyed by state + /// to some other information (such as a remapped state ID). + /// + /// If the given index is not in the specified range, then this may panic + /// or produce an incorrect state ID. + fn to_state_id(&self, index: usize) -> StateID { + // CORRECTNESS: If the given index is not valid, then it is not + // required for this to panic or return a valid state ID. + StateID::new_unchecked(index << self.stride2) + } + + /// Returns the state ID for the state immediately following the one given. + /// + /// This does not check whether the state ID returned is invalid. In fact, + /// if the state ID given is the last state in this DFA, then the state ID + /// returned is guaranteed to be invalid. + #[cfg(feature = "dfa-build")] + fn next_state_id(&self, id: StateID) -> StateID { + self.to_state_id(self.to_index(id).checked_add(1).unwrap()) + } + + /// Returns the state ID for the state immediately preceding the one given. + /// + /// If the dead ID given (which is zero), then this panics. + #[cfg(feature = "dfa-build")] + fn prev_state_id(&self, id: StateID) -> StateID { + self.to_state_id(self.to_index(id).checked_sub(1).unwrap()) + } + + /// Returns the table as a slice of state IDs. + fn table(&self) -> &[StateID] { + wire::u32s_to_state_ids(self.table.as_ref()) + } + + /// Returns the total number of states in this transition table. + /// + /// Note that a DFA always has at least two states: the dead and quit + /// states. In particular, the dead state always has ID 0 and is + /// correspondingly always the first state. The dead state is never a match + /// state. + fn len(&self) -> usize { + self.table().len() >> self.stride2 + } + + /// Returns the total stride for every state in this DFA. This corresponds + /// to the total number of transitions used by each state in this DFA's + /// transition table. + fn stride(&self) -> usize { + 1 << self.stride2 + } + + /// Returns the total number of elements in the alphabet for this + /// transition table. This is always less than or equal to `self.stride()`. + /// It is only equal when the alphabet length is a power of 2. Otherwise, + /// it is always strictly less. + fn alphabet_len(&self) -> usize { + self.classes.alphabet_len() + } + + /// Returns true if and only if the given state ID is valid for this + /// transition table. Validity in this context means that the given ID can + /// be used as a valid offset with `self.stride()` to index this transition + /// table. + fn is_valid(&self, id: StateID) -> bool { + let id = id.as_usize(); + id < self.table().len() && id % self.stride() == 0 + } + + /// Return the memory usage, in bytes, of this transition table. + /// + /// This does not include the size of a `TransitionTable` value itself. + fn memory_usage(&self) -> usize { + self.table().len() * StateID::SIZE + } +} + +#[cfg(feature = "dfa-build")] +impl> TransitionTable { + /// Returns the table as a slice of state IDs. + fn table_mut(&mut self) -> &mut [StateID] { + wire::u32s_to_state_ids_mut(self.table.as_mut()) + } +} + +/// The set of all possible starting states in a DFA. +/// +/// The set of starting states corresponds to the possible choices one can make +/// in terms of starting a DFA. That is, before following the first transition, +/// you first need to select the state that you start in. +/// +/// Normally, a DFA converted from an NFA that has a single starting state +/// would itself just have one starting state. However, our support for look +/// around generally requires more starting states. The correct starting state +/// is chosen based on certain properties of the position at which we begin +/// our search. +/// +/// Before listing those properties, we first must define two terms: +/// +/// * `haystack` - The bytes to execute the search. The search always starts +/// at the beginning of `haystack` and ends before or at the end of +/// `haystack`. +/// * `context` - The (possibly empty) bytes surrounding `haystack`. `haystack` +/// must be contained within `context` such that `context` is at least as big +/// as `haystack`. +/// +/// This split is crucial for dealing with look-around. For example, consider +/// the context `foobarbaz`, the haystack `bar` and the regex `^bar$`. This +/// regex should _not_ match the haystack since `bar` does not appear at the +/// beginning of the input. Similarly, the regex `\Bbar\B` should match the +/// haystack because `bar` is not surrounded by word boundaries. But a search +/// that does not take context into account would not permit `\B` to match +/// since the beginning of any string matches a word boundary. Similarly, a +/// search that does not take context into account when searching `^bar$` in +/// the haystack `bar` would produce a match when it shouldn't. +/// +/// Thus, it follows that the starting state is chosen based on the following +/// criteria, derived from the position at which the search starts in the +/// `context` (corresponding to the start of `haystack`): +/// +/// 1. If the search starts at the beginning of `context`, then the `Text` +/// start state is used. (Since `^` corresponds to +/// `hir::Anchor::Start`.) +/// 2. If the search starts at a position immediately following a line +/// terminator, then the `Line` start state is used. (Since `(?m:^)` +/// corresponds to `hir::Anchor::StartLF`.) +/// 3. If the search starts at a position immediately following a byte +/// classified as a "word" character (`[_0-9a-zA-Z]`), then the `WordByte` +/// start state is used. (Since `(?-u:\b)` corresponds to a word boundary.) +/// 4. Otherwise, if the search starts at a position immediately following +/// a byte that is not classified as a "word" character (`[^_0-9a-zA-Z]`), +/// then the `NonWordByte` start state is used. (Since `(?-u:\B)` +/// corresponds to a not-word-boundary.) +/// +/// (N.B. Unicode word boundaries are not supported by the DFA because they +/// require multi-byte look-around and this is difficult to support in a DFA.) +/// +/// To further complicate things, we also support constructing individual +/// anchored start states for each pattern in the DFA. (Which is required to +/// implement overlapping regexes correctly, but is also generally useful.) +/// Thus, when individual start states for each pattern are enabled, then the +/// total number of start states represented is `4 + (4 * #patterns)`, where +/// the 4 comes from each of the 4 possibilities above. The first 4 represents +/// the starting states for the entire DFA, which support searching for +/// multiple patterns simultaneously (possibly unanchored). +/// +/// If individual start states are disabled, then this will only store 4 +/// start states. Typically, individual start states are only enabled when +/// constructing the reverse DFA for regex matching. But they are also useful +/// for building DFAs that can search for a specific pattern or even to support +/// both anchored and unanchored searches with the same DFA. +/// +/// Note though that while the start table always has either `4` or +/// `4 + (4 * #patterns)` starting state *ids*, the total number of states +/// might be considerably smaller. That is, many of the IDs may be duplicative. +/// (For example, if a regex doesn't have a `\b` sub-pattern, then there's no +/// reason to generate a unique starting state for handling word boundaries. +/// Similarly for start/end anchors.) +#[derive(Clone)] +pub(crate) struct StartTable { + /// The initial start state IDs. + /// + /// In practice, T is either `Vec` or `&[u32]`. + /// + /// The first `2 * stride` (currently always 8) entries always correspond + /// to the starts states for the entire DFA, with the first 4 entries being + /// for unanchored searches and the second 4 entries being for anchored + /// searches. To keep things simple, we always use 8 entries even if the + /// `StartKind` is not both. + /// + /// After that, there are `stride * patterns` state IDs, where `patterns` + /// may be zero in the case of a DFA with no patterns or in the case where + /// the DFA was built without enabling starting states for each pattern. + table: T, + /// The starting state configuration supported. When 'both', both + /// unanchored and anchored searches work. When 'unanchored', anchored + /// searches panic. When 'anchored', unanchored searches panic. + kind: StartKind, + /// The start state configuration for every possible byte. + start_map: StartByteMap, + /// The number of starting state IDs per pattern. + stride: usize, + /// The total number of patterns for which starting states are encoded. + /// This is `None` for DFAs that were built without start states for each + /// pattern. Thus, one cannot use this field to say how many patterns + /// are in the DFA in all cases. It is specific to how many patterns are + /// represented in this start table. + pattern_len: Option, + /// The universal starting state for unanchored searches. This is only + /// present when the DFA supports unanchored searches and when all starting + /// state IDs for an unanchored search are equivalent. + universal_start_unanchored: Option, + /// The universal starting state for anchored searches. This is only + /// present when the DFA supports anchored searches and when all starting + /// state IDs for an anchored search are equivalent. + universal_start_anchored: Option, +} + +#[cfg(feature = "dfa-build")] +impl StartTable> { + /// Create a valid set of start states all pointing to the dead state. + /// + /// When the corresponding DFA is constructed with start states for each + /// pattern, then `patterns` should be the number of patterns. Otherwise, + /// it should be zero. + /// + /// If the total table size could exceed the allocatable limit, then this + /// returns an error. In practice, this is unlikely to be able to occur, + /// since it's likely that allocation would have failed long before it got + /// to this point. + fn dead( + kind: StartKind, + lookm: &LookMatcher, + pattern_len: Option, + ) -> Result>, BuildError> { + if let Some(len) = pattern_len { + assert!(len <= PatternID::LIMIT); + } + let stride = Start::len(); + // OK because 2*4 is never going to overflow anything. + let starts_len = stride.checked_mul(2).unwrap(); + let pattern_starts_len = + match stride.checked_mul(pattern_len.unwrap_or(0)) { + Some(x) => x, + None => return Err(BuildError::too_many_start_states()), + }; + let table_len = match starts_len.checked_add(pattern_starts_len) { + Some(x) => x, + None => return Err(BuildError::too_many_start_states()), + }; + if let Err(_) = isize::try_from(table_len) { + return Err(BuildError::too_many_start_states()); + } + let table = vec![DEAD.as_u32(); table_len]; + let start_map = StartByteMap::new(lookm); + Ok(StartTable { + table, + kind, + start_map, + stride, + pattern_len, + universal_start_unanchored: None, + universal_start_anchored: None, + }) + } +} + +impl<'a> StartTable<&'a [u32]> { + /// Deserialize a table of start state IDs starting at the beginning of + /// `slice`. Upon success, return the total number of bytes read along with + /// the table of starting state IDs. + /// + /// If there was a problem deserializing any part of the starting IDs, + /// then this returns an error. Notably, if the given slice does not have + /// the same alignment as `StateID`, then this will return an error (among + /// other possible errors). + /// + /// This is guaranteed to execute in constant time. + /// + /// # Safety + /// + /// This routine is not safe because it does not check the validity of the + /// starting state IDs themselves. In particular, the number of starting + /// IDs can be of variable length, so it's possible that checking their + /// validity cannot be done in constant time. An invalid starting state + /// ID is not safe because other code may rely on the starting IDs being + /// correct (such as explicit bounds check elision). Therefore, an invalid + /// start ID can lead to undefined behavior. + /// + /// Callers that use this function must either pass on the safety invariant + /// or guarantee that the bytes given contain valid starting state IDs. + /// This guarantee is upheld by the bytes written by `write_to`. + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(StartTable<&'a [u32]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (kind, nr) = StartKind::from_bytes(slice)?; + slice = &slice[nr..]; + + let (start_map, nr) = StartByteMap::from_bytes(slice)?; + slice = &slice[nr..]; + + let (stride, nr) = + wire::try_read_u32_as_usize(slice, "start table stride")?; + slice = &slice[nr..]; + if stride != Start::len() { + return Err(DeserializeError::generic( + "invalid starting table stride", + )); + } + + let (maybe_pattern_len, nr) = + wire::try_read_u32_as_usize(slice, "start table patterns")?; + slice = &slice[nr..]; + let pattern_len = if maybe_pattern_len.as_u32() == u32::MAX { + None + } else { + Some(maybe_pattern_len) + }; + if pattern_len.map_or(false, |len| len > PatternID::LIMIT) { + return Err(DeserializeError::generic( + "invalid number of patterns", + )); + } + + let (universal_unanchored, nr) = + wire::try_read_u32(slice, "universal unanchored start")?; + slice = &slice[nr..]; + let universal_start_unanchored = if universal_unanchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_unanchored).map_err(|e| { + DeserializeError::state_id_error( + e, + "universal unanchored start", + ) + })?) + }; + + let (universal_anchored, nr) = + wire::try_read_u32(slice, "universal anchored start")?; + slice = &slice[nr..]; + let universal_start_anchored = if universal_anchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_anchored).map_err(|e| { + DeserializeError::state_id_error(e, "universal anchored start") + })?) + }; + + let pattern_table_size = wire::mul( + stride, + pattern_len.unwrap_or(0), + "invalid pattern length", + )?; + // Our start states always start with a two stride of start states for + // the entire automaton. The first stride is for unanchored starting + // states and the second stride is for anchored starting states. What + // follows it are an optional set of start states for each pattern. + let start_state_len = wire::add( + wire::mul(2, stride, "start state stride too big")?, + pattern_table_size, + "invalid 'any' pattern starts size", + )?; + let table_bytes_len = wire::mul( + start_state_len, + StateID::SIZE, + "pattern table bytes length", + )?; + wire::check_slice_len(slice, table_bytes_len, "start ID table")?; + wire::check_alignment::(slice)?; + let table_bytes = &slice[..table_bytes_len]; + slice = &slice[table_bytes_len..]; + // SAFETY: Since StateID is always representable as a u32, all we need + // to do is ensure that we have the proper length and alignment. We've + // checked both above, so the cast below is safe. + // + // N.B. This is the only not-safe code in this function. + let table = core::slice::from_raw_parts( + table_bytes.as_ptr().cast::(), + start_state_len, + ); + let st = StartTable { + table, + kind, + start_map, + stride, + pattern_len, + universal_start_unanchored, + universal_start_anchored, + }; + Ok((st, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> StartTable { + /// Writes a serialized form of this start table to the buffer given. If + /// the buffer is too small, then an error is returned. To determine how + /// big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "starting table ids", + )); + } + dst = &mut dst[..nwrite]; + + // write start kind + let nw = self.kind.write_to::(dst)?; + dst = &mut dst[nw..]; + // write start byte map + let nw = self.start_map.write_to(dst)?; + dst = &mut dst[nw..]; + // write stride + // Unwrap is OK since the stride is always 4 (currently). + E::write_u32(u32::try_from(self.stride).unwrap(), dst); + dst = &mut dst[size_of::()..]; + // write pattern length + // Unwrap is OK since number of patterns is guaranteed to fit in a u32. + E::write_u32( + u32::try_from(self.pattern_len.unwrap_or(0xFFFF_FFFF)).unwrap(), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start unanchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_unanchored + .map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start anchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_anchored.map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write start IDs + for &sid in self.table() { + let n = wire::write_state_id::(sid, &mut dst); + dst = &mut dst[n..]; + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this start ID table + /// will use. + fn write_to_len(&self) -> usize { + self.kind.write_to_len() + + self.start_map.write_to_len() + + size_of::() // stride + + size_of::() // # patterns + + size_of::() // universal unanchored start + + size_of::() // universal anchored start + + (self.table().len() * StateID::SIZE) + } + + /// Validates that every state ID in this start table is valid by checking + /// it against the given transition table (which must be for the same DFA). + /// + /// That is, every state ID can be used to correctly index a state. + fn validate( + &self, + tt: &TransitionTable, + ) -> Result<(), DeserializeError> { + if !self.universal_start_unanchored.map_or(true, |s| tt.is_valid(s)) { + return Err(DeserializeError::generic( + "found invalid universal unanchored starting state ID", + )); + } + if !self.universal_start_anchored.map_or(true, |s| tt.is_valid(s)) { + return Err(DeserializeError::generic( + "found invalid universal anchored starting state ID", + )); + } + for &id in self.table() { + if !tt.is_valid(id) { + return Err(DeserializeError::generic( + "found invalid starting state ID", + )); + } + } + Ok(()) + } + + /// Converts this start list to a borrowed value. + fn as_ref(&self) -> StartTable<&'_ [u32]> { + StartTable { + table: self.table.as_ref(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Converts this start list to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> StartTable> { + StartTable { + table: self.table.as_ref().to_vec(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Return the start state for the given input and starting configuration. + /// This returns an error if the input configuration is not supported by + /// this DFA. For example, requesting an unanchored search when the DFA was + /// not built with unanchored starting states. Or asking for an anchored + /// pattern search with an invalid pattern ID or on a DFA that was not + /// built with start states for each pattern. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn start( + &self, + anchored: Anchored, + start: Start, + ) -> Result { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => { + if !self.kind.has_unanchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + start_index + } + Anchored::Yes => { + if !self.kind.has_anchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + self.stride + start_index + } + Anchored::Pattern(pid) => { + let len = match self.pattern_len { + None => { + return Err(StartError::unsupported_anchored(anchored)) + } + Some(len) => len, + }; + if pid.as_usize() >= len { + return Ok(DEAD); + } + (2 * self.stride) + + (self.stride * pid.as_usize()) + + start_index + } + }; + Ok(self.table()[index]) + } + + /// Returns an iterator over all start state IDs in this table. + /// + /// Each item is a triple of: start state ID, the start state type and the + /// pattern ID (if any). + fn iter(&self) -> StartStateIter<'_> { + StartStateIter { st: self.as_ref(), i: 0 } + } + + /// Returns the table as a slice of state IDs. + fn table(&self) -> &[StateID] { + wire::u32s_to_state_ids(self.table.as_ref()) + } + + /// Return the memory usage, in bytes, of this start list. + /// + /// This does not include the size of a `StartList` value itself. + fn memory_usage(&self) -> usize { + self.table().len() * StateID::SIZE + } +} + +#[cfg(feature = "dfa-build")] +impl> StartTable { + /// Set the start state for the given index and pattern. + /// + /// If the pattern ID or state ID are not valid, then this will panic. + fn set_start(&mut self, anchored: Anchored, start: Start, id: StateID) { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => self.stride + start_index, + Anchored::Pattern(pid) => { + let pid = pid.as_usize(); + let len = self + .pattern_len + .expect("start states for each pattern enabled"); + assert!(pid < len, "invalid pattern ID {:?}", pid); + self.stride + .checked_mul(pid) + .unwrap() + .checked_add(self.stride.checked_mul(2).unwrap()) + .unwrap() + .checked_add(start_index) + .unwrap() + } + }; + self.table_mut()[index] = id; + } + + /// Returns the table as a mutable slice of state IDs. + fn table_mut(&mut self) -> &mut [StateID] { + wire::u32s_to_state_ids_mut(self.table.as_mut()) + } +} + +/// An iterator over start state IDs. +/// +/// This iterator yields a triple of start state ID, the anchored mode and the +/// start state type. If a pattern ID is relevant, then the anchored mode will +/// contain it. Start states with an anchored mode containing a pattern ID will +/// only occur when the DFA was compiled with start states for each pattern +/// (which is disabled by default). +pub(crate) struct StartStateIter<'a> { + st: StartTable<&'a [u32]>, + i: usize, +} + +impl<'a> Iterator for StartStateIter<'a> { + type Item = (StateID, Anchored, Start); + + fn next(&mut self) -> Option<(StateID, Anchored, Start)> { + let i = self.i; + let table = self.st.table(); + if i >= table.len() { + return None; + } + self.i += 1; + + // This unwrap is okay since the stride of the starting state table + // must always match the number of start state types. + let start_type = Start::from_usize(i % self.st.stride).unwrap(); + let anchored = if i < self.st.stride { + Anchored::No + } else if i < (2 * self.st.stride) { + Anchored::Yes + } else { + let pid = (i - (2 * self.st.stride)) / self.st.stride; + Anchored::Pattern(PatternID::new(pid).unwrap()) + }; + Some((table[i], anchored, start_type)) + } +} + +/// This type represents that patterns that should be reported whenever a DFA +/// enters a match state. This structure exists to support DFAs that search for +/// matches for multiple regexes. +/// +/// This structure relies on the fact that all match states in a DFA occur +/// contiguously in the DFA's transition table. (See dfa/special.rs for a more +/// detailed breakdown of the representation.) Namely, when a match occurs, we +/// know its state ID. Since we know the start and end of the contiguous region +/// of match states, we can use that to compute the position at which the match +/// state occurs. That in turn is used as an offset into this structure. +#[derive(Clone, Debug)] +struct MatchStates { + /// Slices is a flattened sequence of pairs, where each pair points to a + /// sub-slice of pattern_ids. The first element of the pair is an offset + /// into pattern_ids and the second element of the pair is the number + /// of 32-bit pattern IDs starting at that position. That is, each pair + /// corresponds to a single DFA match state and its corresponding match + /// IDs. The number of pairs always corresponds to the number of distinct + /// DFA match states. + /// + /// In practice, T is either Vec or &[u32]. + slices: T, + /// A flattened sequence of pattern IDs for each DFA match state. The only + /// way to correctly read this sequence is indirectly via `slices`. + /// + /// In practice, T is either Vec or &[u32]. + pattern_ids: T, + /// The total number of unique patterns represented by these match states. + pattern_len: usize, +} + +impl<'a> MatchStates<&'a [u32]> { + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(MatchStates<&'a [u32]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + // Read the total number of match states. + let (state_len, nr) = + wire::try_read_u32_as_usize(slice, "match state length")?; + slice = &slice[nr..]; + + // Read the slice start/length pairs. + let pair_len = wire::mul(2, state_len, "match state offset pairs")?; + let slices_bytes_len = wire::mul( + pair_len, + PatternID::SIZE, + "match state slice offset byte length", + )?; + wire::check_slice_len(slice, slices_bytes_len, "match state slices")?; + wire::check_alignment::(slice)?; + let slices_bytes = &slice[..slices_bytes_len]; + slice = &slice[slices_bytes_len..]; + // SAFETY: Since PatternID is always representable as a u32, all we + // need to do is ensure that we have the proper length and alignment. + // We've checked both above, so the cast below is safe. + // + // N.B. This is one of the few not-safe snippets in this function, + // so we mark it explicitly to call it out. + let slices = core::slice::from_raw_parts( + slices_bytes.as_ptr().cast::(), + pair_len, + ); + + // Read the total number of unique pattern IDs (which is always 1 more + // than the maximum pattern ID in this automaton, since pattern IDs are + // handed out contiguously starting at 0). + let (pattern_len, nr) = + wire::try_read_u32_as_usize(slice, "pattern length")?; + slice = &slice[nr..]; + + // Now read the pattern ID length. We don't need to store this + // explicitly, but we need it to know how many pattern IDs to read. + let (idlen, nr) = + wire::try_read_u32_as_usize(slice, "pattern ID length")?; + slice = &slice[nr..]; + + // Read the actual pattern IDs. + let pattern_ids_len = + wire::mul(idlen, PatternID::SIZE, "pattern ID byte length")?; + wire::check_slice_len(slice, pattern_ids_len, "match pattern IDs")?; + wire::check_alignment::(slice)?; + let pattern_ids_bytes = &slice[..pattern_ids_len]; + slice = &slice[pattern_ids_len..]; + // SAFETY: Since PatternID is always representable as a u32, all we + // need to do is ensure that we have the proper length and alignment. + // We've checked both above, so the cast below is safe. + // + // N.B. This is one of the few not-safe snippets in this function, + // so we mark it explicitly to call it out. + let pattern_ids = core::slice::from_raw_parts( + pattern_ids_bytes.as_ptr().cast::(), + idlen, + ); + + let ms = MatchStates { slices, pattern_ids, pattern_len }; + Ok((ms, slice.as_ptr().as_usize() - slice_start)) + } +} + +#[cfg(feature = "dfa-build")] +impl MatchStates> { + fn empty(pattern_len: usize) -> MatchStates> { + assert!(pattern_len <= PatternID::LIMIT); + MatchStates { slices: vec![], pattern_ids: vec![], pattern_len } + } + + fn new( + matches: &BTreeMap>, + pattern_len: usize, + ) -> Result>, BuildError> { + let mut m = MatchStates::empty(pattern_len); + for (_, pids) in matches.iter() { + let start = PatternID::new(m.pattern_ids.len()) + .map_err(|_| BuildError::too_many_match_pattern_ids())?; + m.slices.push(start.as_u32()); + // This is always correct since the number of patterns in a single + // match state can never exceed maximum number of allowable + // patterns. Why? Because a pattern can only appear once in a + // particular match state, by construction. (And since our pattern + // ID limit is one less than u32::MAX, we're guaranteed that the + // length fits in a u32.) + m.slices.push(u32::try_from(pids.len()).unwrap()); + for &pid in pids { + m.pattern_ids.push(pid.as_u32()); + } + } + m.pattern_len = pattern_len; + Ok(m) + } + + fn new_with_map( + &self, + matches: &BTreeMap>, + ) -> Result>, BuildError> { + MatchStates::new(matches, self.pattern_len) + } +} + +impl> MatchStates { + /// Writes a serialized form of these match states to the buffer given. If + /// the buffer is too small, then an error is returned. To determine how + /// big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("match states")); + } + dst = &mut dst[..nwrite]; + + // write state ID length + // Unwrap is OK since number of states is guaranteed to fit in a u32. + E::write_u32(u32::try_from(self.len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write slice offset pairs + for &pid in self.slices() { + let n = wire::write_pattern_id::(pid, &mut dst); + dst = &mut dst[n..]; + } + + // write unique pattern ID length + // Unwrap is OK since number of patterns is guaranteed to fit in a u32. + E::write_u32(u32::try_from(self.pattern_len).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write pattern ID length + // Unwrap is OK since we check at construction (and deserialization) + // that the number of patterns is representable as a u32. + E::write_u32(u32::try_from(self.pattern_ids().len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write pattern IDs + for &pid in self.pattern_ids() { + let n = wire::write_pattern_id::(pid, &mut dst); + dst = &mut dst[n..]; + } + + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of these match states + /// will use. + fn write_to_len(&self) -> usize { + size_of::() // match state length + + (self.slices().len() * PatternID::SIZE) + + size_of::() // unique pattern ID length + + size_of::() // pattern ID length + + (self.pattern_ids().len() * PatternID::SIZE) + } + + /// Valides that the match state info is itself internally consistent and + /// consistent with the recorded match state region in the given DFA. + fn validate(&self, dfa: &DFA) -> Result<(), DeserializeError> { + if self.len() != dfa.special.match_len(dfa.stride()) { + return Err(DeserializeError::generic( + "match state length mismatch", + )); + } + for si in 0..self.len() { + let start = self.slices()[si * 2].as_usize(); + let len = self.slices()[si * 2 + 1].as_usize(); + if start >= self.pattern_ids().len() { + return Err(DeserializeError::generic( + "invalid pattern ID start offset", + )); + } + if start + len > self.pattern_ids().len() { + return Err(DeserializeError::generic( + "invalid pattern ID length", + )); + } + for mi in 0..len { + let pid = self.pattern_id(si, mi); + if pid.as_usize() >= self.pattern_len { + return Err(DeserializeError::generic( + "invalid pattern ID", + )); + } + } + } + Ok(()) + } + + /// Converts these match states back into their map form. This is useful + /// when shuffling states, as the normal MatchStates representation is not + /// amenable to easy state swapping. But with this map, to swap id1 and + /// id2, all you need to do is: + /// + /// if let Some(pids) = map.remove(&id1) { + /// map.insert(id2, pids); + /// } + /// + /// Once shuffling is done, use MatchStates::new to convert back. + #[cfg(feature = "dfa-build")] + fn to_map(&self, dfa: &DFA) -> BTreeMap> { + let mut map = BTreeMap::new(); + for i in 0..self.len() { + let mut pids = vec![]; + for j in 0..self.pattern_len(i) { + pids.push(self.pattern_id(i, j)); + } + map.insert(self.match_state_id(dfa, i), pids); + } + map + } + + /// Converts these match states to a borrowed value. + fn as_ref(&self) -> MatchStates<&'_ [u32]> { + MatchStates { + slices: self.slices.as_ref(), + pattern_ids: self.pattern_ids.as_ref(), + pattern_len: self.pattern_len, + } + } + + /// Converts these match states to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> MatchStates> { + MatchStates { + slices: self.slices.as_ref().to_vec(), + pattern_ids: self.pattern_ids.as_ref().to_vec(), + pattern_len: self.pattern_len, + } + } + + /// Returns the match state ID given the match state index. (Where the + /// first match state corresponds to index 0.) + /// + /// This panics if there is no match state at the given index. + fn match_state_id(&self, dfa: &DFA, index: usize) -> StateID { + assert!(dfa.special.matches(), "no match states to index"); + // This is one of the places where we rely on the fact that match + // states are contiguous in the transition table. Namely, that the + // first match state ID always corresponds to dfa.special.min_start. + // From there, since we know the stride, we can compute the ID of any + // match state given its index. + let stride2 = u32::try_from(dfa.stride2()).unwrap(); + let offset = index.checked_shl(stride2).unwrap(); + let id = dfa.special.min_match.as_usize().checked_add(offset).unwrap(); + let sid = StateID::new(id).unwrap(); + assert!(dfa.is_match_state(sid)); + sid + } + + /// Returns the pattern ID at the given match index for the given match + /// state. + /// + /// The match state index is the state index minus the state index of the + /// first match state in the DFA. + /// + /// The match index is the index of the pattern ID for the given state. + /// The index must be less than `self.pattern_len(state_index)`. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_id(&self, state_index: usize, match_index: usize) -> PatternID { + self.pattern_id_slice(state_index)[match_index] + } + + /// Returns the number of patterns in the given match state. + /// + /// The match state index is the state index minus the state index of the + /// first match state in the DFA. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_len(&self, state_index: usize) -> usize { + self.slices()[state_index * 2 + 1].as_usize() + } + + /// Returns all of the pattern IDs for the given match state index. + /// + /// The match state index is the state index minus the state index of the + /// first match state in the DFA. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_id_slice(&self, state_index: usize) -> &[PatternID] { + let start = self.slices()[state_index * 2].as_usize(); + let len = self.pattern_len(state_index); + &self.pattern_ids()[start..start + len] + } + + /// Returns the pattern ID offset slice of u32 as a slice of PatternID. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn slices(&self) -> &[PatternID] { + wire::u32s_to_pattern_ids(self.slices.as_ref()) + } + + /// Returns the total number of match states. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn len(&self) -> usize { + assert_eq!(0, self.slices().len() % 2); + self.slices().len() / 2 + } + + /// Returns the pattern ID slice of u32 as a slice of PatternID. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_ids(&self) -> &[PatternID] { + wire::u32s_to_pattern_ids(self.pattern_ids.as_ref()) + } + + /// Return the memory usage, in bytes, of these match pairs. + fn memory_usage(&self) -> usize { + (self.slices().len() + self.pattern_ids().len()) * PatternID::SIZE + } +} + +/// A common set of flags for both dense and sparse DFAs. This primarily +/// centralizes the serialization format of these flags at a bitset. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Flags { + /// Whether the DFA can match the empty string. When this is false, all + /// matches returned by this DFA are guaranteed to have non-zero length. + pub(crate) has_empty: bool, + /// Whether the DFA should only produce matches with spans that correspond + /// to valid UTF-8. This also includes omitting any zero-width matches that + /// split the UTF-8 encoding of a codepoint. + pub(crate) is_utf8: bool, + /// Whether the DFA is always anchored or not, regardless of `Input` + /// configuration. This is useful for avoiding a reverse scan even when + /// executing unanchored searches. + pub(crate) is_always_start_anchored: bool, +} + +impl Flags { + /// Creates a set of flags for a DFA from an NFA. + /// + /// N.B. This constructor was defined at the time of writing because all + /// of the flags are derived directly from the NFA. If this changes in the + /// future, we might be more thoughtful about how the `Flags` value is + /// itself built. + #[cfg(feature = "dfa-build")] + fn from_nfa(nfa: &thompson::NFA) -> Flags { + Flags { + has_empty: nfa.has_empty(), + is_utf8: nfa.is_utf8(), + is_always_start_anchored: nfa.is_always_start_anchored(), + } + } + + /// Deserializes the flags from the given slice. On success, this also + /// returns the number of bytes read from the slice. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(Flags, usize), DeserializeError> { + let (bits, nread) = wire::try_read_u32(slice, "flag bitset")?; + let flags = Flags { + has_empty: bits & (1 << 0) != 0, + is_utf8: bits & (1 << 1) != 0, + is_always_start_anchored: bits & (1 << 2) != 0, + }; + Ok((flags, nread)) + } + + /// Writes these flags to the given byte slice. If the buffer is too small, + /// then an error is returned. To determine how big the buffer must be, + /// use `write_to_len`. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + fn bool_to_int(b: bool) -> u32 { + if b { + 1 + } else { + 0 + } + } + + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("flag bitset")); + } + let bits = (bool_to_int(self.has_empty) << 0) + | (bool_to_int(self.is_utf8) << 1) + | (bool_to_int(self.is_always_start_anchored) << 2); + E::write_u32(bits, dst); + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of these flags + /// will use. + pub(crate) fn write_to_len(&self) -> usize { + size_of::() + } +} + +/// An iterator over all states in a DFA. +/// +/// This iterator yields a tuple for each state. The first element of the +/// tuple corresponds to a state's identifier, and the second element +/// corresponds to the state itself (comprised of its transitions). +/// +/// `'a` corresponding to the lifetime of original DFA, `T` corresponds to +/// the type of the transition table itself. +pub(crate) struct StateIter<'a, T> { + tt: &'a TransitionTable, + it: iter::Enumerate>, +} + +impl<'a, T: AsRef<[u32]>> Iterator for StateIter<'a, T> { + type Item = State<'a>; + + fn next(&mut self) -> Option> { + self.it.next().map(|(index, _)| { + let id = self.tt.to_state_id(index); + self.tt.state(id) + }) + } +} + +/// An immutable representation of a single DFA state. +/// +/// `'a` correspondings to the lifetime of a DFA's transition table. +pub(crate) struct State<'a> { + id: StateID, + stride2: usize, + transitions: &'a [StateID], +} + +impl<'a> State<'a> { + /// Return an iterator over all transitions in this state. This yields + /// a number of transitions equivalent to the alphabet length of the + /// corresponding DFA. + /// + /// Each transition is represented by a tuple. The first element is + /// the input byte for that transition and the second element is the + /// transitions itself. + pub(crate) fn transitions(&self) -> StateTransitionIter<'_> { + StateTransitionIter { + len: self.transitions.len(), + it: self.transitions.iter().enumerate(), + } + } + + /// Return an iterator over a sparse representation of the transitions in + /// this state. Only non-dead transitions are returned. + /// + /// The "sparse" representation in this case corresponds to a sequence of + /// triples. The first two elements of the triple comprise an inclusive + /// byte range while the last element corresponds to the transition taken + /// for all bytes in the range. + /// + /// This is somewhat more condensed than the classical sparse + /// representation (where you have an element for every non-dead + /// transition), but in practice, checking if a byte is in a range is very + /// cheap and using ranges tends to conserve quite a bit more space. + pub(crate) fn sparse_transitions(&self) -> StateSparseTransitionIter<'_> { + StateSparseTransitionIter { dense: self.transitions(), cur: None } + } + + /// Returns the identifier for this state. + pub(crate) fn id(&self) -> StateID { + self.id + } + + /// Analyzes this state to determine whether it can be accelerated. If so, + /// it returns an accelerator that contains at least one byte. + #[cfg(feature = "dfa-build")] + fn accelerate(&self, classes: &ByteClasses) -> Option { + // We just try to add bytes to our accelerator. Once adding fails + // (because we've added too many bytes), then give up. + let mut accel = Accel::new(); + for (class, id) in self.transitions() { + if id == self.id() { + continue; + } + for unit in classes.elements(class) { + if let Some(byte) = unit.as_u8() { + if !accel.add(byte) { + return None; + } + } + } + } + if accel.is_empty() { + None + } else { + Some(accel) + } + } +} + +impl<'a> fmt::Debug for State<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (i, (start, end, sid)) in self.sparse_transitions().enumerate() { + let id = if f.alternate() { + sid.as_usize() + } else { + sid.as_usize() >> self.stride2 + }; + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!(f, "{:?} => {:?}", start, id)?; + } else { + write!(f, "{:?}-{:?} => {:?}", start, end, id)?; + } + } + Ok(()) + } +} + +/// An iterator over all transitions in a single DFA state. This yields +/// a number of transitions equivalent to the alphabet length of the +/// corresponding DFA. +/// +/// Each transition is represented by a tuple. The first element is the input +/// byte for that transition and the second element is the transition itself. +#[derive(Debug)] +pub(crate) struct StateTransitionIter<'a> { + len: usize, + it: iter::Enumerate>, +} + +impl<'a> Iterator for StateTransitionIter<'a> { + type Item = (alphabet::Unit, StateID); + + fn next(&mut self) -> Option<(alphabet::Unit, StateID)> { + self.it.next().map(|(i, &id)| { + let unit = if i + 1 == self.len { + alphabet::Unit::eoi(i) + } else { + let b = u8::try_from(i) + .expect("raw byte alphabet is never exceeded"); + alphabet::Unit::u8(b) + }; + (unit, id) + }) + } +} + +/// An iterator over all non-DEAD transitions in a single DFA state using a +/// sparse representation. +/// +/// Each transition is represented by a triple. The first two elements of the +/// triple comprise an inclusive byte range while the last element corresponds +/// to the transition taken for all bytes in the range. +/// +/// As a convenience, this always returns `alphabet::Unit` values of the same +/// type. That is, you'll never get a (byte, EOI) or a (EOI, byte). Only (byte, +/// byte) and (EOI, EOI) values are yielded. +#[derive(Debug)] +pub(crate) struct StateSparseTransitionIter<'a> { + dense: StateTransitionIter<'a>, + cur: Option<(alphabet::Unit, alphabet::Unit, StateID)>, +} + +impl<'a> Iterator for StateSparseTransitionIter<'a> { + type Item = (alphabet::Unit, alphabet::Unit, StateID); + + fn next(&mut self) -> Option<(alphabet::Unit, alphabet::Unit, StateID)> { + while let Some((unit, next)) = self.dense.next() { + let (prev_start, prev_end, prev_next) = match self.cur { + Some(t) => t, + None => { + self.cur = Some((unit, unit, next)); + continue; + } + }; + if prev_next == next && !unit.is_eoi() { + self.cur = Some((prev_start, unit, prev_next)); + } else { + self.cur = Some((unit, unit, next)); + if prev_next != DEAD { + return Some((prev_start, prev_end, prev_next)); + } + } + } + if let Some((start, end, next)) = self.cur.take() { + if next != DEAD { + return Some((start, end, next)); + } + } + None + } +} + +/// An error that occurred during the construction of a DFA. +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying [`nfa::thompson::BuildError`](thompson::BuildError) +/// type from its `source` method via the `std::error::Error` trait. This error +/// only occurs when using convenience routines for building a DFA directly +/// from a pattern string. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +/// The kind of error that occurred during the construction of a DFA. +/// +/// Note that this error is non-exhaustive. Adding new variants is not +/// considered a breaking change. +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug)] +enum BuildErrorKind { + /// An error that occurred while constructing an NFA as a precursor step + /// before a DFA is compiled. + NFA(thompson::BuildError), + /// An error that occurred because an unsupported regex feature was used. + /// The message string describes which unsupported feature was used. + /// + /// The primary regex feature that is unsupported by DFAs is the Unicode + /// word boundary look-around assertion (`\b`). This can be worked around + /// by either using an ASCII word boundary (`(?-u:\b)`) or by enabling + /// Unicode word boundaries when building a DFA. + Unsupported(&'static str), + /// An error that occurs if too many states are produced while building a + /// DFA. + TooManyStates, + /// An error that occurs if too many start states are needed while building + /// a DFA. + /// + /// This is a kind of oddball error that occurs when building a DFA with + /// start states enabled for each pattern and enough patterns to cause + /// the table of start states to overflow `usize`. + TooManyStartStates, + /// This is another oddball error that can occur if there are too many + /// patterns spread out across too many match states. + TooManyMatchPatternIDs, + /// An error that occurs if the DFA got too big during determinization. + DFAExceededSizeLimit { limit: usize }, + /// An error that occurs if auxiliary storage (not the DFA) used during + /// determinization got too big. + DeterminizeExceededSizeLimit { limit: usize }, +} + +#[cfg(feature = "dfa-build")] +impl BuildError { + /// Return the kind of this error. + fn kind(&self) -> &BuildErrorKind { + &self.kind + } + + pub(crate) fn nfa(err: thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } + + pub(crate) fn unsupported_dfa_word_boundary_unicode() -> BuildError { + let msg = "cannot build DFAs for regexes with Unicode word \ + boundaries; switch to ASCII word boundaries, or \ + heuristically enable Unicode word boundaries or use a \ + different regex engine"; + BuildError { kind: BuildErrorKind::Unsupported(msg) } + } + + pub(crate) fn too_many_states() -> BuildError { + BuildError { kind: BuildErrorKind::TooManyStates } + } + + pub(crate) fn too_many_start_states() -> BuildError { + BuildError { kind: BuildErrorKind::TooManyStartStates } + } + + pub(crate) fn too_many_match_pattern_ids() -> BuildError { + BuildError { kind: BuildErrorKind::TooManyMatchPatternIDs } + } + + pub(crate) fn dfa_exceeded_size_limit(limit: usize) -> BuildError { + BuildError { kind: BuildErrorKind::DFAExceededSizeLimit { limit } } + } + + pub(crate) fn determinize_exceeded_size_limit(limit: usize) -> BuildError { + BuildError { + kind: BuildErrorKind::DeterminizeExceededSizeLimit { limit }, + } + } +} + +#[cfg(all(feature = "std", feature = "dfa-build"))] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind() { + BuildErrorKind::NFA(ref err) => Some(err), + _ => None, + } + } +} + +#[cfg(feature = "dfa-build")] +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind() { + BuildErrorKind::NFA(_) => write!(f, "error building NFA"), + BuildErrorKind::Unsupported(ref msg) => { + write!(f, "unsupported regex feature for DFAs: {}", msg) + } + BuildErrorKind::TooManyStates => write!( + f, + "number of DFA states exceeds limit of {}", + StateID::LIMIT, + ), + BuildErrorKind::TooManyStartStates => { + let stride = Start::len(); + // The start table has `stride` entries for starting states for + // the entire DFA, and then `stride` entries for each pattern + // if start states for each pattern are enabled (which is the + // only way this error can occur). Thus, the total number of + // patterns that can fit in the table is `stride` less than + // what we can allocate. + let max = usize::try_from(core::isize::MAX).unwrap(); + let limit = (max - stride) / stride; + write!( + f, + "compiling DFA with start states exceeds pattern \ + pattern limit of {}", + limit, + ) + } + BuildErrorKind::TooManyMatchPatternIDs => write!( + f, + "compiling DFA with total patterns in all match states \ + exceeds limit of {}", + PatternID::LIMIT, + ), + BuildErrorKind::DFAExceededSizeLimit { limit } => write!( + f, + "DFA exceeded size limit of {:?} during determinization", + limit, + ), + BuildErrorKind::DeterminizeExceededSizeLimit { limit } => { + write!(f, "determinization exceeded size limit of {:?}", limit) + } + } + } +} + +#[cfg(all(test, feature = "syntax", feature = "dfa-build"))] +mod tests { + use crate::{Input, MatchError}; + + use super::*; + + #[test] + fn errors_with_unicode_word_boundary() { + let pattern = r"\b"; + assert!(Builder::new().build(pattern).is_err()); + } + + #[test] + fn roundtrip_never_match() { + let dfa = DFA::never_match().unwrap(); + let (buf, _) = dfa.to_bytes_native_endian(); + let dfa: DFA<&[u32]> = DFA::from_bytes(&buf).unwrap().0; + + assert_eq!(None, dfa.try_search_fwd(&Input::new("foo12345")).unwrap()); + } + + #[test] + fn roundtrip_always_match() { + use crate::HalfMatch; + + let dfa = DFA::always_match().unwrap(); + let (buf, _) = dfa.to_bytes_native_endian(); + let dfa: DFA<&[u32]> = DFA::from_bytes(&buf).unwrap().0; + + assert_eq!( + Some(HalfMatch::must(0, 0)), + dfa.try_search_fwd(&Input::new("foo12345")).unwrap() + ); + } + + // See the analogous test in src/hybrid/dfa.rs. + #[test] + fn heuristic_unicode_reverse() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + } +} diff --git a/vendor/regex-automata/src/dfa/determinize.rs b/vendor/regex-automata/src/dfa/determinize.rs new file mode 100644 index 0000000..19f99f5 --- /dev/null +++ b/vendor/regex-automata/src/dfa/determinize.rs @@ -0,0 +1,599 @@ +use alloc::{collections::BTreeMap, vec::Vec}; + +use crate::{ + dfa::{ + dense::{self, BuildError}, + DEAD, + }, + nfa::thompson, + util::{ + self, + alphabet::{self, ByteSet}, + determinize::{State, StateBuilderEmpty, StateBuilderNFA}, + primitives::{PatternID, StateID}, + search::{Anchored, MatchKind}, + sparse_set::SparseSets, + start::Start, + }, +}; + +/// A builder for configuring and running a DFA determinizer. +#[derive(Clone, Debug)] +pub(crate) struct Config { + match_kind: MatchKind, + quit: ByteSet, + dfa_size_limit: Option, + determinize_size_limit: Option, +} + +impl Config { + /// Create a new default config for a determinizer. The determinizer may be + /// configured before calling `run`. + pub fn new() -> Config { + Config { + match_kind: MatchKind::LeftmostFirst, + quit: ByteSet::empty(), + dfa_size_limit: None, + determinize_size_limit: None, + } + } + + /// Run determinization on the given NFA and write the resulting DFA into + /// the one given. The DFA given should be initialized but otherwise empty. + /// "Initialized" means that it is setup to handle the NFA's byte classes, + /// number of patterns and whether to build start states for each pattern. + pub fn run( + &self, + nfa: &thompson::NFA, + dfa: &mut dense::OwnedDFA, + ) -> Result<(), BuildError> { + let dead = State::dead(); + let quit = State::dead(); + let mut cache = StateMap::default(); + // We only insert the dead state here since its representation is + // identical to the quit state. And we never want anything pointing + // to the quit state other than specific transitions derived from the + // determinizer's configured "quit" bytes. + // + // We do put the quit state into 'builder_states' below. This ensures + // that a proper DFA state ID is allocated for it, and that no other + // DFA state uses the "location after the DEAD state." That is, it + // is assumed that the quit state is always the state immediately + // following the DEAD state. + cache.insert(dead.clone(), DEAD); + + let runner = Runner { + config: self.clone(), + nfa, + dfa, + builder_states: alloc::vec![dead, quit], + cache, + memory_usage_state: 0, + sparses: SparseSets::new(nfa.states().len()), + stack: alloc::vec![], + scratch_state_builder: StateBuilderEmpty::new(), + }; + runner.run() + } + + /// The match semantics to use for determinization. + /// + /// MatchKind::All corresponds to the standard textbook construction. + /// All possible match states are represented in the DFA. + /// MatchKind::LeftmostFirst permits greediness and otherwise tries to + /// simulate the match semantics of backtracking regex engines. Namely, + /// only a subset of match states are built, and dead states are used to + /// stop searches with an unanchored prefix. + /// + /// The default is MatchKind::LeftmostFirst. + pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config { + self.match_kind = kind; + self + } + + /// The set of bytes to use that will cause the DFA to enter a quit state, + /// stop searching and return an error. By default, this is empty. + pub fn quit(&mut self, set: ByteSet) -> &mut Config { + self.quit = set; + self + } + + /// The limit, in bytes of the heap, that the DFA is permitted to use. This + /// does not include the auxiliary heap storage used by determinization. + pub fn dfa_size_limit(&mut self, bytes: Option) -> &mut Config { + self.dfa_size_limit = bytes; + self + } + + /// The limit, in bytes of the heap, that determinization itself is allowed + /// to use. This does not include the size of the DFA being built. + pub fn determinize_size_limit( + &mut self, + bytes: Option, + ) -> &mut Config { + self.determinize_size_limit = bytes; + self + } +} + +/// The actual implementation of determinization that converts an NFA to a DFA +/// through powerset construction. +/// +/// This determinizer roughly follows the typical powerset construction, where +/// each DFA state is comprised of one or more NFA states. In the worst case, +/// there is one DFA state for every possible combination of NFA states. In +/// practice, this only happens in certain conditions, typically when there are +/// bounded repetitions. +/// +/// The main differences between this implementation and typical deteminization +/// are that this implementation delays matches by one state and hackily makes +/// look-around work. Comments below attempt to explain this. +/// +/// The lifetime variable `'a` refers to the lifetime of the NFA or DFA, +/// whichever is shorter. +#[derive(Debug)] +struct Runner<'a> { + /// The configuration used to initialize determinization. + config: Config, + /// The NFA we're converting into a DFA. + nfa: &'a thompson::NFA, + /// The DFA we're building. + dfa: &'a mut dense::OwnedDFA, + /// Each DFA state being built is defined as an *ordered* set of NFA + /// states, along with some meta facts about the ordered set of NFA states. + /// + /// This is never empty. The first state is always a dummy state such that + /// a state id == 0 corresponds to a dead state. The second state is always + /// the quit state. + /// + /// Why do we have states in both a `Vec` and in a cache map below? + /// Well, they serve two different roles based on access patterns. + /// `builder_states` is the canonical home of each state, and provides + /// constant random access by a DFA state's ID. The cache map below, on + /// the other hand, provides a quick way of searching for identical DFA + /// states by using the DFA state as a key in the map. Of course, we use + /// reference counting to avoid actually duplicating the state's data + /// itself. (Although this has never been benchmarked.) Note that the cache + /// map does not give us full minimization; it just lets us avoid some very + /// obvious redundant states. + /// + /// Note that the index into this Vec isn't quite the DFA's state ID. + /// Rather, it's just an index. To get the state ID, you have to multiply + /// it by the DFA's stride. That's done by self.dfa.from_index. And the + /// inverse is self.dfa.to_index. + /// + /// Moreover, DFA states don't usually retain the IDs assigned to them + /// by their position in this Vec. After determinization completes, + /// states are shuffled around to support other optimizations. See the + /// sibling 'special' module for more details on that. (The reason for + /// mentioning this is that if you print out the DFA for debugging during + /// determinization, and then print out the final DFA after it is fully + /// built, then the state IDs likely won't match up.) + builder_states: Vec, + /// A cache of DFA states that already exist and can be easily looked up + /// via ordered sets of NFA states. + /// + /// See `builder_states` docs for why we store states in two different + /// ways. + cache: StateMap, + /// The memory usage, in bytes, used by builder_states and cache. We track + /// this as new states are added since states use a variable amount of + /// heap. Tracking this as we add states makes it possible to compute the + /// total amount of memory used by the determinizer in constant time. + memory_usage_state: usize, + /// A pair of sparse sets for tracking ordered sets of NFA state IDs. + /// These are reused throughout determinization. A bounded sparse set + /// gives us constant time insertion, membership testing and clearing. + sparses: SparseSets, + /// Scratch space for a stack of NFA states to visit, for depth first + /// visiting without recursion. + stack: Vec, + /// Scratch space for storing an ordered sequence of NFA states, for + /// amortizing allocation. This is principally useful for when we avoid + /// adding a new DFA state since it already exists. In order to detect this + /// case though, we still need an ordered set of NFA state IDs. So we use + /// this space to stage that ordered set before we know whether we need to + /// create a new DFA state or not. + scratch_state_builder: StateBuilderEmpty, +} + +/// A map from states to state identifiers. When using std, we use a standard +/// hashmap, since it's a bit faster for this use case. (Other maps, like +/// one's based on FNV, have not yet been benchmarked.) +/// +/// The main purpose of this map is to reuse states where possible. This won't +/// fully minimize the DFA, but it works well in a lot of cases. +#[cfg(feature = "std")] +type StateMap = std::collections::HashMap; +#[cfg(not(feature = "std"))] +type StateMap = BTreeMap; + +impl<'a> Runner<'a> { + /// Build the DFA. If there was a problem constructing the DFA (e.g., if + /// the chosen state identifier representation is too small), then an error + /// is returned. + fn run(mut self) -> Result<(), BuildError> { + if self.nfa.look_set_any().contains_word_unicode() + && !self.config.quit.contains_range(0x80, 0xFF) + { + return Err(BuildError::unsupported_dfa_word_boundary_unicode()); + } + + // A sequence of "representative" bytes drawn from each equivalence + // class. These representative bytes are fed to the NFA to compute + // state transitions. This allows us to avoid re-computing state + // transitions for bytes that are guaranteed to produce identical + // results. Since computing the representatives needs to do a little + // work, we do it once here because we'll be iterating over them a lot. + let representatives: Vec = + self.dfa.byte_classes().representatives(..).collect(); + // The set of all DFA state IDs that still need to have their + // transitions set. We start by seeding this with all starting states. + let mut uncompiled = alloc::vec![]; + self.add_all_starts(&mut uncompiled)?; + while let Some(dfa_id) = uncompiled.pop() { + for &unit in &representatives { + if unit.as_u8().map_or(false, |b| self.config.quit.contains(b)) + { + continue; + } + // In many cases, the state we transition to has already been + // computed. 'cached_state' will do the minimal amount of work + // to check this, and if it exists, immediately return an + // already existing state ID. + let (next_dfa_id, is_new) = self.cached_state(dfa_id, unit)?; + self.dfa.set_transition(dfa_id, unit, next_dfa_id); + // If the state ID we got back is newly created, then we need + // to compile it, so add it to our uncompiled frontier. + if is_new { + uncompiled.push(next_dfa_id); + } + } + } + debug!( + "determinization complete, memory usage: {}, \ + dense DFA size: {}, \ + is reverse? {}", + self.memory_usage(), + self.dfa.memory_usage(), + self.nfa.is_reverse(), + ); + + // A map from DFA state ID to one or more NFA match IDs. Each NFA match + // ID corresponds to a distinct regex pattern that matches in the state + // corresponding to the key. + let mut matches: BTreeMap> = BTreeMap::new(); + self.cache.clear(); + #[cfg(feature = "logging")] + let mut total_pat_len = 0; + for (i, state) in self.builder_states.into_iter().enumerate() { + if let Some(pat_ids) = state.match_pattern_ids() { + let id = self.dfa.to_state_id(i); + log! { + total_pat_len += pat_ids.len(); + } + matches.insert(id, pat_ids); + } + } + log! { + use core::mem::size_of; + let per_elem = size_of::() + size_of::>(); + let pats = total_pat_len * size_of::(); + let mem = (matches.len() * per_elem) + pats; + log::debug!("matches map built, memory usage: {}", mem); + } + // At this point, we shuffle the "special" states in the final DFA. + // This permits a DFA's match loop to detect a match condition (among + // other things) by merely inspecting the current state's identifier, + // and avoids the need for any additional auxiliary storage. + self.dfa.shuffle(matches)?; + Ok(()) + } + + /// Return the identifier for the next DFA state given an existing DFA + /// state and an input byte. If the next DFA state already exists, then + /// return its identifier from the cache. Otherwise, build the state, cache + /// it and return its identifier. + /// + /// This routine returns a boolean indicating whether a new state was + /// built. If a new state is built, then the caller needs to add it to its + /// frontier of uncompiled DFA states to compute transitions for. + fn cached_state( + &mut self, + dfa_id: StateID, + unit: alphabet::Unit, + ) -> Result<(StateID, bool), BuildError> { + // Compute the set of all reachable NFA states, including epsilons. + let empty_builder = self.get_state_builder(); + let builder = util::determinize::next( + self.nfa, + self.config.match_kind, + &mut self.sparses, + &mut self.stack, + &self.builder_states[self.dfa.to_index(dfa_id)], + unit, + empty_builder, + ); + self.maybe_add_state(builder) + } + + /// Compute the set of DFA start states and add their identifiers in + /// 'dfa_state_ids' (no duplicates are added). + fn add_all_starts( + &mut self, + dfa_state_ids: &mut Vec, + ) -> Result<(), BuildError> { + // These should be the first states added. + assert!(dfa_state_ids.is_empty()); + // We only want to add (un)anchored starting states that is consistent + // with our DFA's configuration. Unconditionally adding both (although + // it is the default) can make DFAs quite a bit bigger. + if self.dfa.start_kind().has_unanchored() { + self.add_start_group(Anchored::No, dfa_state_ids)?; + } + if self.dfa.start_kind().has_anchored() { + self.add_start_group(Anchored::Yes, dfa_state_ids)?; + } + // I previously has an 'assert' here checking that either + // 'dfa_state_ids' was non-empty, or the NFA had zero patterns. But it + // turns out this isn't always true. For example, the NFA might have + // one or more patterns but where all such patterns are just 'fail' + // states. These will ultimately just compile down to DFA dead states, + // and since the dead state was added earlier, no new DFA states are + // added. And thus, it is valid and okay for 'dfa_state_ids' to be + // empty even if there are a non-zero number of patterns in the NFA. + + // We only need to compute anchored start states for each pattern if it + // was requested to do so. + if self.dfa.starts_for_each_pattern() { + for pid in self.nfa.patterns() { + self.add_start_group(Anchored::Pattern(pid), dfa_state_ids)?; + } + } + Ok(()) + } + + /// Add a group of start states for the given match pattern ID. Any new + /// DFA states added are pushed on to 'dfa_state_ids'. (No duplicates are + /// pushed.) + /// + /// When pattern_id is None, then this will compile a group of unanchored + /// start states (if the DFA is unanchored). When the pattern_id is + /// present, then this will compile a group of anchored start states that + /// only match the given pattern. + /// + /// This panics if `anchored` corresponds to an invalid pattern ID. + fn add_start_group( + &mut self, + anchored: Anchored, + dfa_state_ids: &mut Vec, + ) -> Result<(), BuildError> { + let nfa_start = match anchored { + Anchored::No => self.nfa.start_unanchored(), + Anchored::Yes => self.nfa.start_anchored(), + Anchored::Pattern(pid) => { + self.nfa.start_pattern(pid).expect("valid pattern ID") + } + }; + + // When compiling start states, we're careful not to build additional + // states that aren't necessary. For example, if the NFA has no word + // boundary assertion, then there's no reason to have distinct start + // states for 'NonWordByte' and 'WordByte' starting configurations. + // Instead, the 'WordByte' starting configuration can just point + // directly to the start state for the 'NonWordByte' config. + // + // Note though that we only need to care about assertions in the prefix + // of an NFA since this only concerns the starting states. (Actually, + // the most precisely thing we could do it is look at the prefix + // assertions of each pattern when 'anchored == Anchored::Pattern', + // and then only compile extra states if the prefix is non-empty.) But + // we settle for simplicity here instead of absolute minimalism. It is + // somewhat rare, after all, for multiple patterns in the same regex to + // have different prefix look-arounds. + + let (id, is_new) = + self.add_one_start(nfa_start, Start::NonWordByte)?; + self.dfa.set_start_state(anchored, Start::NonWordByte, id); + if is_new { + dfa_state_ids.push(id); + } + + if !self.nfa.look_set_prefix_any().contains_word() { + self.dfa.set_start_state(anchored, Start::WordByte, id); + } else { + let (id, is_new) = + self.add_one_start(nfa_start, Start::WordByte)?; + self.dfa.set_start_state(anchored, Start::WordByte, id); + if is_new { + dfa_state_ids.push(id); + } + } + if !self.nfa.look_set_prefix_any().contains_anchor() { + self.dfa.set_start_state(anchored, Start::Text, id); + self.dfa.set_start_state(anchored, Start::LineLF, id); + self.dfa.set_start_state(anchored, Start::LineCR, id); + self.dfa.set_start_state( + anchored, + Start::CustomLineTerminator, + id, + ); + } else { + let (id, is_new) = self.add_one_start(nfa_start, Start::Text)?; + self.dfa.set_start_state(anchored, Start::Text, id); + if is_new { + dfa_state_ids.push(id); + } + + let (id, is_new) = self.add_one_start(nfa_start, Start::LineLF)?; + self.dfa.set_start_state(anchored, Start::LineLF, id); + if is_new { + dfa_state_ids.push(id); + } + + let (id, is_new) = self.add_one_start(nfa_start, Start::LineCR)?; + self.dfa.set_start_state(anchored, Start::LineCR, id); + if is_new { + dfa_state_ids.push(id); + } + + let (id, is_new) = + self.add_one_start(nfa_start, Start::CustomLineTerminator)?; + self.dfa.set_start_state( + anchored, + Start::CustomLineTerminator, + id, + ); + if is_new { + dfa_state_ids.push(id); + } + } + + Ok(()) + } + + /// Add a new DFA start state corresponding to the given starting NFA + /// state, and the starting search configuration. (The starting search + /// configuration essentially tells us which look-behind assertions are + /// true for this particular state.) + /// + /// The boolean returned indicates whether the state ID returned is a newly + /// created state, or a previously cached state. + fn add_one_start( + &mut self, + nfa_start: StateID, + start: Start, + ) -> Result<(StateID, bool), BuildError> { + // Compute the look-behind assertions that are true in this starting + // configuration, and the determine the epsilon closure. While + // computing the epsilon closure, we only follow condiional epsilon + // transitions that satisfy the look-behind assertions in 'look_have'. + let mut builder_matches = self.get_state_builder().into_matches(); + util::determinize::set_lookbehind_from_start( + self.nfa, + &start, + &mut builder_matches, + ); + self.sparses.set1.clear(); + util::determinize::epsilon_closure( + self.nfa, + nfa_start, + builder_matches.look_have(), + &mut self.stack, + &mut self.sparses.set1, + ); + let mut builder = builder_matches.into_nfa(); + util::determinize::add_nfa_states( + &self.nfa, + &self.sparses.set1, + &mut builder, + ); + self.maybe_add_state(builder) + } + + /// Adds the given state to the DFA being built depending on whether it + /// already exists in this determinizer's cache. + /// + /// If it does exist, then the memory used by 'state' is put back into the + /// determinizer and the previously created state's ID is returned. (Along + /// with 'false', indicating that no new state was added.) + /// + /// If it does not exist, then the state is added to the DFA being built + /// and a fresh ID is allocated (if ID allocation fails, then an error is + /// returned) and returned. (Along with 'true', indicating that a new state + /// was added.) + fn maybe_add_state( + &mut self, + builder: StateBuilderNFA, + ) -> Result<(StateID, bool), BuildError> { + if let Some(&cached_id) = self.cache.get(builder.as_bytes()) { + // Since we have a cached state, put the constructed state's + // memory back into our scratch space, so that it can be reused. + self.put_state_builder(builder); + return Ok((cached_id, false)); + } + self.add_state(builder).map(|sid| (sid, true)) + } + + /// Add the given state to the DFA and make it available in the cache. + /// + /// The state initially has no transitions. That is, it transitions to the + /// dead state for all possible inputs, and transitions to the quit state + /// for all quit bytes. + /// + /// If adding the state would exceed the maximum value for StateID, then an + /// error is returned. + fn add_state( + &mut self, + builder: StateBuilderNFA, + ) -> Result { + let id = self.dfa.add_empty_state()?; + if !self.config.quit.is_empty() { + for b in self.config.quit.iter() { + self.dfa.set_transition( + id, + alphabet::Unit::u8(b), + self.dfa.quit_id(), + ); + } + } + let state = builder.to_state(); + // States use reference counting internally, so we only need to count + // their memory usage once. + self.memory_usage_state += state.memory_usage(); + self.builder_states.push(state.clone()); + self.cache.insert(state, id); + self.put_state_builder(builder); + if let Some(limit) = self.config.dfa_size_limit { + if self.dfa.memory_usage() > limit { + return Err(BuildError::dfa_exceeded_size_limit(limit)); + } + } + if let Some(limit) = self.config.determinize_size_limit { + if self.memory_usage() > limit { + return Err(BuildError::determinize_exceeded_size_limit( + limit, + )); + } + } + Ok(id) + } + + /// Returns a state builder from this determinizer that might have existing + /// capacity. This helps avoid allocs in cases where a state is built that + /// turns out to already be cached. + /// + /// Callers must put the state builder back with 'put_state_builder', + /// otherwise the allocation reuse won't work. + fn get_state_builder(&mut self) -> StateBuilderEmpty { + core::mem::replace( + &mut self.scratch_state_builder, + StateBuilderEmpty::new(), + ) + } + + /// Puts the given state builder back into this determinizer for reuse. + /// + /// Note that building a 'State' from a builder always creates a new + /// alloc, so callers should always put the builder back. + fn put_state_builder(&mut self, builder: StateBuilderNFA) { + let _ = core::mem::replace( + &mut self.scratch_state_builder, + builder.clear(), + ); + } + + /// Return the memory usage, in bytes, of this determinizer at the current + /// point in time. This does not include memory used by the NFA or the + /// dense DFA itself. + fn memory_usage(&self) -> usize { + use core::mem::size_of; + + self.builder_states.len() * size_of::() + // Maps likely use more memory than this, but it's probably close. + + self.cache.len() * (size_of::() + size_of::()) + + self.memory_usage_state + + self.stack.capacity() * size_of::() + + self.scratch_state_builder.capacity() + } +} diff --git a/vendor/regex-automata/src/dfa/minimize.rs b/vendor/regex-automata/src/dfa/minimize.rs new file mode 100644 index 0000000..fea925b --- /dev/null +++ b/vendor/regex-automata/src/dfa/minimize.rs @@ -0,0 +1,463 @@ +use core::{cell::RefCell, fmt, mem}; + +use alloc::{collections::BTreeMap, rc::Rc, vec, vec::Vec}; + +use crate::{ + dfa::{automaton::Automaton, dense, DEAD}, + util::{ + alphabet, + primitives::{PatternID, StateID}, + }, +}; + +/// An implementation of Hopcroft's algorithm for minimizing DFAs. +/// +/// The algorithm implemented here is mostly taken from Wikipedia: +/// https://en.wikipedia.org/wiki/DFA_minimization#Hopcroft's_algorithm +/// +/// This code has had some light optimization attention paid to it, +/// particularly in the form of reducing allocation as much as possible. +/// However, it is still generally slow. Future optimization work should +/// probably focus on the bigger picture rather than micro-optimizations. For +/// example: +/// +/// 1. Figure out how to more intelligently create initial partitions. That is, +/// Hopcroft's algorithm starts by creating two partitions of DFA states +/// that are known to NOT be equivalent: match states and non-match states. +/// The algorithm proceeds by progressively refining these partitions into +/// smaller partitions. If we could start with more partitions, then we +/// could reduce the amount of work that Hopcroft's algorithm needs to do. +/// 2. For every partition that we visit, we find all incoming transitions to +/// every state in the partition for *every* element in the alphabet. (This +/// is why using byte classes can significantly decrease minimization times, +/// since byte classes shrink the alphabet.) This is quite costly and there +/// is perhaps some redundant work being performed depending on the specific +/// states in the set. For example, we might be able to only visit some +/// elements of the alphabet based on the transitions. +/// 3. Move parts of minimization into determinization. If minimization has +/// fewer states to deal with, then it should run faster. A prime example +/// of this might be large Unicode classes, which are generated in way that +/// can create a lot of redundant states. (Some work has been done on this +/// point during NFA compilation via the algorithm described in the +/// "Incremental Construction of MinimalAcyclic Finite-State Automata" +/// paper.) +pub(crate) struct Minimizer<'a> { + dfa: &'a mut dense::OwnedDFA, + in_transitions: Vec>>, + partitions: Vec, + waiting: Vec, +} + +impl<'a> fmt::Debug for Minimizer<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Minimizer") + .field("dfa", &self.dfa) + .field("in_transitions", &self.in_transitions) + .field("partitions", &self.partitions) + .field("waiting", &self.waiting) + .finish() + } +} + +/// A set of states. A state set makes up a single partition in Hopcroft's +/// algorithm. +/// +/// It is represented by an ordered set of state identifiers. We use shared +/// ownership so that a single state set can be in both the set of partitions +/// and in the set of waiting sets simultaneously without an additional +/// allocation. Generally, once a state set is built, it becomes immutable. +/// +/// We use this representation because it avoids the overhead of more +/// traditional set data structures (HashSet/BTreeSet), and also because +/// computing intersection/subtraction on this representation is especially +/// fast. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +struct StateSet { + ids: Rc>>, +} + +impl<'a> Minimizer<'a> { + pub fn new(dfa: &'a mut dense::OwnedDFA) -> Minimizer<'a> { + let in_transitions = Minimizer::incoming_transitions(dfa); + let partitions = Minimizer::initial_partitions(dfa); + let waiting = partitions.clone(); + Minimizer { dfa, in_transitions, partitions, waiting } + } + + pub fn run(mut self) { + let stride2 = self.dfa.stride2(); + let as_state_id = |index: usize| -> StateID { + StateID::new(index << stride2).unwrap() + }; + let as_index = |id: StateID| -> usize { id.as_usize() >> stride2 }; + + let mut incoming = StateSet::empty(); + let mut scratch1 = StateSet::empty(); + let mut scratch2 = StateSet::empty(); + let mut newparts = vec![]; + + // This loop is basically Hopcroft's algorithm. Everything else is just + // shuffling data around to fit our representation. + while let Some(set) = self.waiting.pop() { + for b in self.dfa.byte_classes().iter() { + self.find_incoming_to(b, &set, &mut incoming); + // If incoming is empty, then the intersection with any other + // set must also be empty. So 'newparts' just ends up being + // 'self.partitions'. So there's no need to go through the loop + // below. + // + // This actually turns out to be rather large optimization. On + // the order of making minimization 4-5x faster. It's likely + // that the vast majority of all states have very few incoming + // transitions. + if incoming.is_empty() { + continue; + } + + for p in 0..self.partitions.len() { + self.partitions[p].intersection(&incoming, &mut scratch1); + if scratch1.is_empty() { + newparts.push(self.partitions[p].clone()); + continue; + } + + self.partitions[p].subtract(&incoming, &mut scratch2); + if scratch2.is_empty() { + newparts.push(self.partitions[p].clone()); + continue; + } + + let (x, y) = + (scratch1.deep_clone(), scratch2.deep_clone()); + newparts.push(x.clone()); + newparts.push(y.clone()); + match self.find_waiting(&self.partitions[p]) { + Some(i) => { + self.waiting[i] = x; + self.waiting.push(y); + } + None => { + if x.len() <= y.len() { + self.waiting.push(x); + } else { + self.waiting.push(y); + } + } + } + } + newparts = mem::replace(&mut self.partitions, newparts); + newparts.clear(); + } + } + + // At this point, we now have a minimal partitioning of states, where + // each partition is an equivalence class of DFA states. Now we need to + // use this partitioning to update the DFA to only contain one state for + // each partition. + + // Create a map from DFA state ID to the representative ID of the + // equivalence class to which it belongs. The representative ID of an + // equivalence class of states is the minimum ID in that class. + let mut state_to_part = vec![DEAD; self.dfa.state_len()]; + for p in &self.partitions { + p.iter(|id| state_to_part[as_index(id)] = p.min()); + } + + // Generate a new contiguous sequence of IDs for minimal states, and + // create a map from equivalence IDs to the new IDs. Thus, the new + // minimal ID of *any* state in the unminimized DFA can be obtained + // with minimals_ids[state_to_part[old_id]]. + let mut minimal_ids = vec![DEAD; self.dfa.state_len()]; + let mut new_index = 0; + for state in self.dfa.states() { + if state_to_part[as_index(state.id())] == state.id() { + minimal_ids[as_index(state.id())] = as_state_id(new_index); + new_index += 1; + } + } + // The total number of states in the minimal DFA. + let minimal_count = new_index; + // Convenience function for remapping state IDs. This takes an old ID, + // looks up its Hopcroft partition and then maps that to the new ID + // range. + let remap = |old| minimal_ids[as_index(state_to_part[as_index(old)])]; + + // Re-map this DFA in place such that the only states remaining + // correspond to the representative states of every equivalence class. + for id in (0..self.dfa.state_len()).map(as_state_id) { + // If this state isn't a representative for an equivalence class, + // then we skip it since it won't appear in the minimal DFA. + if state_to_part[as_index(id)] != id { + continue; + } + self.dfa.remap_state(id, remap); + self.dfa.swap_states(id, minimal_ids[as_index(id)]); + } + // Trim off all unused states from the pre-minimized DFA. This + // represents all states that were merged into a non-singleton + // equivalence class of states, and appeared after the first state + // in each such class. (Because the state with the smallest ID in each + // equivalence class is its representative ID.) + self.dfa.truncate_states(minimal_count); + + // Update the new start states, which is now just the minimal ID of + // whatever state the old start state was collapsed into. Also, we + // collect everything before-hand to work around the borrow checker. + // We're already allocating so much that this is probably fine. If this + // turns out to be costly, then I guess add a `starts_mut` iterator. + let starts: Vec<_> = self.dfa.starts().collect(); + for (old_start_id, anchored, start_type) in starts { + self.dfa.set_start_state( + anchored, + start_type, + remap(old_start_id), + ); + } + + // Update the match state pattern ID list for multi-regexes. All we + // need to do is remap the match state IDs. The pattern ID lists are + // always the same as they were since match states with distinct + // pattern ID lists are always considered distinct states. + let mut pmap = BTreeMap::new(); + for (match_id, pattern_ids) in self.dfa.pattern_map() { + let new_id = remap(match_id); + pmap.insert(new_id, pattern_ids); + } + // This unwrap is OK because minimization never increases the number of + // match states or patterns in those match states. Since minimization + // runs after the pattern map has already been set at least once, we + // know that our match states cannot error. + self.dfa.set_pattern_map(&pmap).unwrap(); + + // In order to update the ID of the maximum match state, we need to + // find the maximum ID among all of the match states in the minimized + // DFA. This is not necessarily the new ID of the unminimized maximum + // match state, since that could have been collapsed with a much + // earlier match state. Therefore, to find the new max match state, + // we iterate over all previous match states, find their corresponding + // new minimal ID, and take the maximum of those. + let old = self.dfa.special().clone(); + let new = self.dfa.special_mut(); + // ... but only remap if we had match states. + if old.matches() { + new.min_match = StateID::MAX; + new.max_match = StateID::ZERO; + for i in as_index(old.min_match)..=as_index(old.max_match) { + let new_id = remap(as_state_id(i)); + if new_id < new.min_match { + new.min_match = new_id; + } + if new_id > new.max_match { + new.max_match = new_id; + } + } + } + // ... same, but for start states. + if old.starts() { + new.min_start = StateID::MAX; + new.max_start = StateID::ZERO; + for i in as_index(old.min_start)..=as_index(old.max_start) { + let new_id = remap(as_state_id(i)); + if new_id == DEAD { + continue; + } + if new_id < new.min_start { + new.min_start = new_id; + } + if new_id > new.max_start { + new.max_start = new_id; + } + } + if new.max_start == DEAD { + new.min_start = DEAD; + } + } + new.quit_id = remap(new.quit_id); + new.set_max(); + } + + fn find_waiting(&self, set: &StateSet) -> Option { + self.waiting.iter().position(|s| s == set) + } + + fn find_incoming_to( + &self, + b: alphabet::Unit, + set: &StateSet, + incoming: &mut StateSet, + ) { + incoming.clear(); + set.iter(|id| { + for &inid in + &self.in_transitions[self.dfa.to_index(id)][b.as_usize()] + { + incoming.add(inid); + } + }); + incoming.canonicalize(); + } + + fn initial_partitions(dfa: &dense::OwnedDFA) -> Vec { + // For match states, we know that two match states with different + // pattern ID lists will *always* be distinct, so we can partition them + // initially based on that. + let mut matching: BTreeMap, StateSet> = BTreeMap::new(); + let mut is_quit = StateSet::empty(); + let mut no_match = StateSet::empty(); + for state in dfa.states() { + if dfa.is_match_state(state.id()) { + let mut pids = vec![]; + for i in 0..dfa.match_len(state.id()) { + pids.push(dfa.match_pattern(state.id(), i)); + } + matching + .entry(pids) + .or_insert(StateSet::empty()) + .add(state.id()); + } else if dfa.is_quit_state(state.id()) { + is_quit.add(state.id()); + } else { + no_match.add(state.id()); + } + } + + let mut sets: Vec = + matching.into_iter().map(|(_, set)| set).collect(); + sets.push(no_match); + sets.push(is_quit); + sets + } + + fn incoming_transitions(dfa: &dense::OwnedDFA) -> Vec>> { + let mut incoming = vec![]; + for _ in dfa.states() { + incoming.push(vec![vec![]; dfa.alphabet_len()]); + } + for state in dfa.states() { + for (b, next) in state.transitions() { + incoming[dfa.to_index(next)][b.as_usize()].push(state.id()); + } + } + incoming + } +} + +impl StateSet { + fn empty() -> StateSet { + StateSet { ids: Rc::new(RefCell::new(vec![])) } + } + + fn add(&mut self, id: StateID) { + self.ids.borrow_mut().push(id); + } + + fn min(&self) -> StateID { + self.ids.borrow()[0] + } + + fn canonicalize(&mut self) { + self.ids.borrow_mut().sort(); + self.ids.borrow_mut().dedup(); + } + + fn clear(&mut self) { + self.ids.borrow_mut().clear(); + } + + fn len(&self) -> usize { + self.ids.borrow().len() + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn deep_clone(&self) -> StateSet { + let ids = self.ids.borrow().iter().cloned().collect(); + StateSet { ids: Rc::new(RefCell::new(ids)) } + } + + fn iter(&self, mut f: F) { + for &id in self.ids.borrow().iter() { + f(id); + } + } + + fn intersection(&self, other: &StateSet, dest: &mut StateSet) { + dest.clear(); + if self.is_empty() || other.is_empty() { + return; + } + + let (seta, setb) = (self.ids.borrow(), other.ids.borrow()); + let (mut ita, mut itb) = (seta.iter().cloned(), setb.iter().cloned()); + let (mut a, mut b) = (ita.next().unwrap(), itb.next().unwrap()); + loop { + if a == b { + dest.add(a); + a = match ita.next() { + None => break, + Some(a) => a, + }; + b = match itb.next() { + None => break, + Some(b) => b, + }; + } else if a < b { + a = match ita.next() { + None => break, + Some(a) => a, + }; + } else { + b = match itb.next() { + None => break, + Some(b) => b, + }; + } + } + } + + fn subtract(&self, other: &StateSet, dest: &mut StateSet) { + dest.clear(); + if self.is_empty() || other.is_empty() { + self.iter(|s| dest.add(s)); + return; + } + + let (seta, setb) = (self.ids.borrow(), other.ids.borrow()); + let (mut ita, mut itb) = (seta.iter().cloned(), setb.iter().cloned()); + let (mut a, mut b) = (ita.next().unwrap(), itb.next().unwrap()); + loop { + if a == b { + a = match ita.next() { + None => break, + Some(a) => a, + }; + b = match itb.next() { + None => { + dest.add(a); + break; + } + Some(b) => b, + }; + } else if a < b { + dest.add(a); + a = match ita.next() { + None => break, + Some(a) => a, + }; + } else { + b = match itb.next() { + None => { + dest.add(a); + break; + } + Some(b) => b, + }; + } + } + for a in ita { + dest.add(a); + } + } +} diff --git a/vendor/regex-automata/src/dfa/mod.rs b/vendor/regex-automata/src/dfa/mod.rs new file mode 100644 index 0000000..fd58cac --- /dev/null +++ b/vendor/regex-automata/src/dfa/mod.rs @@ -0,0 +1,360 @@ +/*! +A module for building and searching with deterministic finite automata (DFAs). + +Like other modules in this crate, DFAs support a rich regex syntax with Unicode +features. DFAs also have extensive options for configuring the best space vs +time trade off for your use case and provides support for cheap deserialization +of automata for use in `no_std` environments. + +If you're looking for lazy DFAs that build themselves incrementally during +search, then please see the top-level [`hybrid` module](crate::hybrid). + +# Overview + +This section gives a brief overview of the primary types in this module: + +* A [`regex::Regex`] provides a way to search for matches of a regular +expression using DFAs. This includes iterating over matches with both the start +and end positions of each match. +* A [`dense::DFA`] provides low level access to a DFA that uses a dense +representation (uses lots of space, but fast searching). +* A [`sparse::DFA`] provides the same API as a `dense::DFA`, but uses a sparse +representation (uses less space, but slower searching). +* An [`Automaton`] trait that defines an interface that both dense and sparse +DFAs implement. (A `regex::Regex` is generic over this trait.) +* Both dense DFAs and sparse DFAs support serialization to raw bytes (e.g., +[`dense::DFA::to_bytes_little_endian`]) and cheap deserialization (e.g., +[`dense::DFA::from_bytes`]). + +There is also a [`onepass`] module that provides a [one-pass +DFA](onepass::DFA). The unique advantage of this DFA is that, for the class +of regexes it can be built with, it supports reporting the spans of matching +capturing groups. It is the only DFA in this crate capable of such a thing. + +# Example: basic regex searching + +This example shows how to compile a regex using the default configuration +and then use it to find matches in a byte string: + +``` +use regex_automata::{Match, dfa::regex::Regex}; + +let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +# Example: searching with regex sets + +The DFAs in this module all fully support searching with multiple regexes +simultaneously. You can use this support with standard leftmost-first style +searching to find non-overlapping matches: + +``` +# if cfg!(miri) { return Ok(()); } // miri takes too long +use regex_automata::{Match, dfa::regex::Regex}; + +let re = Regex::new_many(&[r"\w+", r"\S+"])?; +let text = b"@foo bar"; +let matches: Vec = re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(1, 0..4), + Match::must(0, 5..8), +]); +# Ok::<(), Box>(()) +``` + +# Example: use sparse DFAs + +By default, compiling a regex will use dense DFAs internally. This uses more +memory, but executes searches more quickly. If you can abide slower searches +(somewhere around 3-5x), then sparse DFAs might make more sense since they can +use significantly less space. + +Using sparse DFAs is as easy as using `Regex::new_sparse` instead of +`Regex::new`: + +``` +use regex_automata::{Match, dfa::regex::Regex}; + +let re = Regex::new_sparse(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +If you already have dense DFAs for some reason, they can be converted to sparse +DFAs and used to build a new `Regex`. For example: + +``` +use regex_automata::{Match, dfa::regex::Regex}; + +let dense_re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +let sparse_re = Regex::builder().build_from_dfas( + dense_re.forward().to_sparse()?, + dense_re.reverse().to_sparse()?, +); +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = sparse_re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +# Example: deserialize a DFA + +This shows how to first serialize a DFA into raw bytes, and then deserialize +those raw bytes back into a DFA. While this particular example is a +bit contrived, this same technique can be used in your program to +deserialize a DFA at start up time or by memory mapping a file. + +``` +use regex_automata::{Match, dfa::{dense, regex::Regex}}; + +let re1 = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +// serialize both the forward and reverse DFAs, see note below +let (fwd_bytes, fwd_pad) = re1.forward().to_bytes_native_endian(); +let (rev_bytes, rev_pad) = re1.reverse().to_bytes_native_endian(); +// now deserialize both---we need to specify the correct type! +let fwd: dense::DFA<&[u32]> = dense::DFA::from_bytes(&fwd_bytes[fwd_pad..])?.0; +let rev: dense::DFA<&[u32]> = dense::DFA::from_bytes(&rev_bytes[rev_pad..])?.0; +// finally, reconstruct our regex +let re2 = Regex::builder().build_from_dfas(fwd, rev); + +// we can use it like normal +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re2.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +There are a few points worth noting here: + +* We need to extract the raw DFAs used by the regex and serialize those. You +can build the DFAs manually yourself using [`dense::Builder`], but using +the DFAs from a `Regex` guarantees that the DFAs are built correctly. (In +particular, a `Regex` constructs a reverse DFA for finding the starting +location of matches.) +* To convert the DFA to raw bytes, we use the `to_bytes_native_endian` method. +In practice, you'll want to use either [`dense::DFA::to_bytes_little_endian`] +or [`dense::DFA::to_bytes_big_endian`], depending on which platform you're +deserializing your DFA from. If you intend to deserialize on either platform, +then you'll need to serialize both and deserialize the right one depending on +your target's endianness. +* Safely deserializing a DFA requires verifying the raw bytes, particularly if +they are untrusted, since an invalid DFA could cause logical errors, panics +or even undefined behavior. This verification step requires visiting all of +the transitions in the DFA, which can be costly. If cheaper verification is +desired, then [`dense::DFA::from_bytes_unchecked`] is available that only does +verification that can be performed in constant time. However, one can only use +this routine if the caller can guarantee that the bytes provided encoded a +valid DFA. + +The same process can be achieved with sparse DFAs as well: + +``` +use regex_automata::{Match, dfa::{sparse, regex::Regex}}; + +let re1 = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +// serialize both +let fwd_bytes = re1.forward().to_sparse()?.to_bytes_native_endian(); +let rev_bytes = re1.reverse().to_sparse()?.to_bytes_native_endian(); +// now deserialize both---we need to specify the correct type! +let fwd: sparse::DFA<&[u8]> = sparse::DFA::from_bytes(&fwd_bytes)?.0; +let rev: sparse::DFA<&[u8]> = sparse::DFA::from_bytes(&rev_bytes)?.0; +// finally, reconstruct our regex +let re2 = Regex::builder().build_from_dfas(fwd, rev); + +// we can use it like normal +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re2.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +Note that unlike dense DFAs, sparse DFAs have no alignment requirements. +Conversely, dense DFAs must be be aligned to the same alignment as a +[`StateID`](crate::util::primitives::StateID). + +# Support for `no_std` and `alloc`-only + +This crate comes with `alloc` and `std` features that are enabled by default. +When the `alloc` or `std` features are enabled, the API of this module will +include the facilities necessary for compiling, serializing, deserializing +and searching with DFAs. When only the `alloc` feature is enabled, then +implementations of the `std::error::Error` trait are dropped, but everything +else generally remains the same. When both the `alloc` and `std` features are +disabled, the API of this module will shrink such that it only includes the +facilities necessary for deserializing and searching with DFAs. + +The intended workflow for `no_std` environments is thus as follows: + +* Write a program with the `alloc` or `std` features that compiles and +serializes a regular expression. You may need to serialize both little and big +endian versions of each DFA. (So that's 4 DFAs in total for each regex.) +* In your `no_std` environment, follow the examples above for deserializing +your previously serialized DFAs into regexes. You can then search with them as +you would any regex. + +Deserialization can happen anywhere. For example, with bytes embedded into a +binary or with a file memory mapped at runtime. + +The `regex-cli` command (found in the same repository as this crate) can be +used to serialize DFAs to files and generate Rust code to read them. + +# Syntax + +This module supports the same syntax as the `regex` crate, since they share the +same parser. You can find an exhaustive list of supported syntax in the +[documentation for the `regex` crate](https://docs.rs/regex/1/regex/#syntax). + +There are two things that are not supported by the DFAs in this module: + +* Capturing groups. The DFAs (and [`Regex`](regex::Regex)es built on top +of them) can only find the offsets of an entire match, but cannot resolve +the offsets of each capturing group. This is because DFAs do not have the +expressive power necessary. +* Unicode word boundaries. These present particularly difficult challenges for +DFA construction and would result in an explosion in the number of states. +One can enable [`dense::Config::unicode_word_boundary`] though, which provides +heuristic support for Unicode word boundaries that only works on ASCII text. +Otherwise, one can use `(?-u:\b)` for an ASCII word boundary, which will work +on any input. + +There are no plans to lift either of these limitations. + +Note that these restrictions are identical to the restrictions on lazy DFAs. + +# Differences with general purpose regexes + +The main goal of the [`regex`](https://docs.rs/regex) crate is to serve as a +general purpose regular expression engine. It aims to automatically balance low +compile times, fast search times and low memory usage, while also providing +a convenient API for users. In contrast, this module provides a lower level +regular expression interface based exclusively on DFAs that is a bit less +convenient while providing more explicit control over memory usage and search +times. + +Here are some specific negative differences: + +* **Compilation can take an exponential amount of time and space** in the size +of the regex pattern. While most patterns do not exhibit worst case exponential +time, such patterns do exist. For example, `[01]*1[01]{N}` will build a DFA +with approximately `2^(N+2)` states. For this reason, untrusted patterns should +not be compiled with this module. (In the future, the API may expose an option +to return an error if the DFA gets too big.) +* This module does not support sub-match extraction via capturing groups, which +can be achieved with the regex crate's "captures" API. +* While the regex crate doesn't necessarily sport fast compilation times, +the regexes in this module are almost universally slow to compile, especially +when they contain large Unicode character classes. For example, on my system, +compiling `\w{50}` takes about 1 second and almost 15MB of memory! (Compiling +a sparse regex takes about the same time but only uses about 1.2MB of +memory.) Conversely, compiling the same regex without Unicode support, e.g., +`(?-u)\w{50}`, takes under 1 millisecond and about 15KB of memory. For this +reason, you should only use Unicode character classes if you absolutely need +them! (They are enabled by default though.) +* This module does not support Unicode word boundaries. ASCII word bondaries +may be used though by disabling Unicode or selectively doing so in the syntax, +e.g., `(?-u:\b)`. There is also an option to +[heuristically enable Unicode word boundaries](crate::dfa::dense::Config::unicode_word_boundary), +where the corresponding DFA will give up if any non-ASCII byte is seen. +* As a lower level API, this module does not do literal optimizations +automatically. Although it does provide hooks in its API to make use of the +[`Prefilter`](crate::util::prefilter::Prefilter) trait. Missing literal +optimizations means that searches may run much slower than what you're +accustomed to, although, it does provide more predictable and consistent +performance. +* There is no `&str` API like in the regex crate. In this module, all APIs +operate on `&[u8]`. By default, match indices are +guaranteed to fall on UTF-8 boundaries, unless either of +[`syntax::Config::utf8`](crate::util::syntax::Config::utf8) or +[`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) are disabled. + +With some of the downsides out of the way, here are some positive differences: + +* Both dense and sparse DFAs can be serialized to raw bytes, and then cheaply +deserialized. Deserialization can be done in constant time with the unchecked +APIs, since searching can be performed directly on the raw serialized bytes of +a DFA. +* This module was specifically designed so that the searching phase of a +DFA has minimal runtime requirements, and can therefore be used in `no_std` +environments. While `no_std` environments cannot compile regexes, they can +deserialize pre-compiled regexes. +* Since this module builds DFAs ahead of time, it will generally out-perform +the `regex` crate on equivalent tasks. The performance difference is likely +not large. However, because of a complex set of optimizations in the regex +crate (like literal optimizations), an accurate performance comparison may be +difficult to do. +* Sparse DFAs provide a way to build a DFA ahead of time that sacrifices search +performance a small amount, but uses much less storage space. Potentially even +less than what the regex crate uses. +* This module exposes DFAs directly, such as [`dense::DFA`] and +[`sparse::DFA`], which enables one to do less work in some cases. For example, +if you only need the end of a match and not the start of a match, then you can +use a DFA directly without building a `Regex`, which always requires a second +DFA to find the start of a match. +* This module provides more control over memory usage. Aside from choosing +between dense and sparse DFAs, one can also choose a smaller state identifier +representation to use less space. Also, one can enable DFA minimization +via [`dense::Config::minimize`], but it can increase compilation times +dramatically. +*/ + +#[cfg(feature = "dfa-search")] +pub use crate::dfa::{ + automaton::{Automaton, OverlappingState, StartError}, + start::StartKind, +}; + +/// This is an alias for a state ID of zero. It has special significance +/// because it always corresponds to the first state in a DFA, and the first +/// state in a DFA is always "dead." That is, the dead state always has all +/// of its transitions set to itself. Moreover, the dead state is used as a +/// sentinel for various things. e.g., In search, reaching a dead state means +/// that the search must stop. +const DEAD: crate::util::primitives::StateID = + crate::util::primitives::StateID::ZERO; + +#[cfg(feature = "dfa-search")] +pub mod dense; +#[cfg(feature = "dfa-onepass")] +pub mod onepass; +#[cfg(feature = "dfa-search")] +pub mod regex; +#[cfg(feature = "dfa-search")] +pub mod sparse; + +#[cfg(feature = "dfa-search")] +pub(crate) mod accel; +#[cfg(feature = "dfa-search")] +mod automaton; +#[cfg(feature = "dfa-build")] +mod determinize; +#[cfg(feature = "dfa-build")] +mod minimize; +#[cfg(any(feature = "dfa-build", feature = "dfa-onepass"))] +mod remapper; +#[cfg(feature = "dfa-search")] +mod search; +#[cfg(feature = "dfa-search")] +mod special; +#[cfg(feature = "dfa-search")] +mod start; diff --git a/vendor/regex-automata/src/dfa/onepass.rs b/vendor/regex-automata/src/dfa/onepass.rs new file mode 100644 index 0000000..e62bbd3 --- /dev/null +++ b/vendor/regex-automata/src/dfa/onepass.rs @@ -0,0 +1,3192 @@ +/*! +A DFA that can return spans for matching capturing groups. + +This module is the home of a [one-pass DFA](DFA). + +This module also contains a [`Builder`] and a [`Config`] for building and +configuring a one-pass DFA. +*/ + +// A note on naming and credit: +// +// As far as I know, Russ Cox came up with the practical vision and +// implementation of a "one-pass regex engine." He mentions and describes it +// briefly in the third article of his regexp article series: +// https://swtch.com/~rsc/regexp/regexp3.html +// +// Cox's implementation is in RE2, and the implementation below is most +// heavily inspired by RE2's. The key thing they have in common is that +// their transitions are defined over an alphabet of bytes. In contrast, +// Go's regex engine also has a one-pass engine, but its transitions are +// more firmly rooted on Unicode codepoints. The ideas are the same, but the +// implementations are different. +// +// RE2 tends to call this a "one-pass NFA." Here, we call it a "one-pass DFA." +// They're both true in their own ways: +// +// * The "one-pass" criterion is generally a property of the NFA itself. In +// particular, it is said that an NFA is one-pass if, after each byte of input +// during a search, there is at most one "VM thread" remaining to take for the +// next byte of input. That is, there is never any ambiguity as to the path to +// take through the NFA during a search. +// +// * On the other hand, once a one-pass NFA has its representation converted +// to something where a constant number of instructions is used for each byte +// of input, the implementation looks a lot more like a DFA. It's technically +// more powerful than a DFA since it has side effects (storing offsets inside +// of slots activated by a transition), but it is far closer to a DFA than an +// NFA simulation. +// +// Thus, in this crate, we call it a one-pass DFA. + +use alloc::{vec, vec::Vec}; + +use crate::{ + dfa::{remapper::Remapper, DEAD}, + nfa::thompson::{self, NFA}, + util::{ + alphabet::ByteClasses, + captures::Captures, + escape::DebugByte, + int::{Usize, U32, U64, U8}, + look::{Look, LookSet, UnicodeWordBoundaryError}, + primitives::{NonMaxUsize, PatternID, StateID}, + search::{Anchored, Input, Match, MatchError, MatchKind, Span}, + sparse_set::SparseSet, + }, +}; + +/// The configuration used for building a [one-pass DFA](DFA). +/// +/// A one-pass DFA configuration is a simple data object that is typically used +/// with [`Builder::configure`]. It can be cheaply cloned. +/// +/// A default configuration can be created either with `Config::new`, or +/// perhaps more conveniently, with [`DFA::config`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + match_kind: Option, + starts_for_each_pattern: Option, + byte_classes: Option, + size_limit: Option>, +} + +impl Config { + /// Return a new default one-pass DFA configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to "classical DFA" construction + /// where all possible matches are visited. + /// + /// When it comes to the one-pass DFA, it is rarer for preference order and + /// "longest match" to actually disagree. Since if they did disagree, then + /// the regex typically isn't one-pass. For example, searching `Samwise` + /// for `Sam|Samwise` will report `Sam` for leftmost-first matching and + /// `Samwise` for "longest match" or "all" matching. However, this regex is + /// not one-pass if taken literally. The equivalent regex, `Sam(?:|wise)` + /// is one-pass and `Sam|Samwise` may be optimized to it. + /// + /// The other main difference is that "all" match semantics don't support + /// non-greedy matches. "All" match semantics always try to match as much + /// as possible. + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// Whether to compile a separate start state for each pattern in the + /// one-pass DFA. + /// + /// When enabled, a separate **anchored** start state is added for each + /// pattern in the DFA. When this start state is used, then the DFA will + /// only search for matches for the pattern specified, even if there are + /// other patterns in the DFA. + /// + /// The main downside of this option is that it can potentially increase + /// the size of the DFA and/or increase the time it takes to build the DFA. + /// + /// You might want to enable this option when you want to both search for + /// anchored matches of any pattern or to search for anchored matches of + /// one particular pattern while using the same DFA. (Otherwise, you would + /// need to compile a new DFA for each pattern.) + /// + /// By default this is disabled. + /// + /// # Example + /// + /// This example shows how to build a multi-regex and then search for + /// matches for a any of the patterns or matches for a specific pattern. + /// + /// ``` + /// use regex_automata::{ + /// dfa::onepass::DFA, Anchored, Input, Match, PatternID, + /// }; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&["[a-z]+", "[0-9]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "123abc"; + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// + /// // A normal multi-pattern search will show pattern 1 matches. + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// // If we only want to report pattern 0 matches, then we'll get no + /// // match here. + /// let input = input.anchored(Anchored::Pattern(PatternID::must(0))); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn starts_for_each_pattern(mut self, yes: bool) -> Config { + self.starts_for_each_pattern = Some(yes); + self + } + + /// Whether to attempt to shrink the size of the DFA's alphabet or not. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging a one-pass DFA. + /// + /// When enabled, the DFA will use a map from all possible bytes to their + /// corresponding equivalence class. Each equivalence class represents a + /// set of bytes that does not discriminate between a match and a non-match + /// in the DFA. For example, the pattern `[ab]+` has at least two + /// equivalence classes: a set containing `a` and `b` and a set containing + /// every byte except for `a` and `b`. `a` and `b` are in the same + /// equivalence class because they never discriminate between a match and a + /// non-match. + /// + /// The advantage of this map is that the size of the transition table + /// can be reduced drastically from (approximately) `#states * 256 * + /// sizeof(StateID)` to `#states * k * sizeof(StateID)` where `k` is the + /// number of equivalence classes (rounded up to the nearest power of 2). + /// As a result, total space usage can decrease substantially. Moreover, + /// since a smaller alphabet is used, DFA compilation becomes faster as + /// well. + /// + /// **WARNING:** This is only useful for debugging DFAs. Disabling this + /// does not yield any speed advantages. Namely, even when this is + /// disabled, a byte class map is still used while searching. The only + /// difference is that every byte will be forced into its own distinct + /// equivalence class. This is useful for debugging the actual generated + /// transitions because it lets one see the transitions defined on actual + /// bytes instead of the equivalence classes. + pub fn byte_classes(mut self, yes: bool) -> Config { + self.byte_classes = Some(yes); + self + } + + /// Set a size limit on the total heap used by a one-pass DFA. + /// + /// This size limit is expressed in bytes and is applied during + /// construction of a one-pass DFA. If the DFA's heap usage exceeds + /// this configured limit, then construction is stopped and an error is + /// returned. + /// + /// The default is no limit. + /// + /// # Example + /// + /// This example shows a one-pass DFA that fails to build because of + /// a configured size limit. This particular example also serves as a + /// cautionary tale demonstrating just how big DFAs with large Unicode + /// character classes can get. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// // 6MB isn't enough! + /// DFA::builder() + /// .configure(DFA::config().size_limit(Some(6_000_000))) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 7MB probably is! + /// // (Note that DFA sizes aren't necessarily stable between releases.) + /// let re = DFA::builder() + /// .configure(DFA::config().size_limit(Some(7_000_000))) + /// .build(r"\w{20}")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "A".repeat(20); + /// re.captures(&mut cache, &haystack, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..20)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// While one needs a little more than 3MB to represent `\w{20}`, it + /// turns out that you only need a little more than 4KB to represent + /// `(?-u:\w{20})`. So only use Unicode if you need it! + pub fn size_limit(mut self, limit: Option) -> Config { + self.size_limit = Some(limit); + self + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns whether this configuration has enabled anchored starting states + /// for every pattern in the DFA. + pub fn get_starts_for_each_pattern(&self) -> bool { + self.starts_for_each_pattern.unwrap_or(false) + } + + /// Returns whether this configuration has enabled byte classes or not. + /// This is typically a debugging oriented option, as disabling it confers + /// no speed benefit. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns the DFA size limit of this configuration if one was set. + /// The size limit is total number of bytes on the heap that a DFA is + /// permitted to use. If the DFA exceeds this limit during construction, + /// then construction is stopped and an error is returned. + pub fn get_size_limit(&self) -> Option { + self.size_limit.unwrap_or(None) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + starts_for_each_pattern: o + .starts_for_each_pattern + .or(self.starts_for_each_pattern), + byte_classes: o.byte_classes.or(self.byte_classes), + size_limit: o.size_limit.or(self.size_limit), + } + } +} + +/// A builder for a [one-pass DFA](DFA). +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction and the DFA construction. This builder is different from a +/// general purpose regex builder in that it permits fine grain configuration +/// of the construction process. The trade off for this is complexity, and +/// the possibility of setting a configuration that might not make sense. For +/// example, there are two different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`] controls whether empty matches that split a +/// Unicode codepoint are reported or not. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the NFA. +/// This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// dfa::onepass::DFA, +/// nfa::thompson, +/// util::syntax, +/// Match, +/// }; +/// +/// let re = DFA::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// let haystack = b"foo\xFFarzz\xE2\x98\xFF\n"; +/// re.captures(&mut cache, haystack, &mut caps); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// // +/// // N.B. This example does not show the impact of +/// // disabling UTF-8 mode on a one-pass DFA Config, +/// // since that only impacts regexes that can +/// // produce matches of length 0. +/// assert_eq!(Some(Match::must(0, 0..8)), caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new one-pass DFA builder with the default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a one-pass DFA from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a one-pass DFA from the given patterns. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = + self.thompson.build_many(patterns).map_err(BuildError::nfa)?; + self.build_from_nfa(nfa) + } + + /// Build a DFA from the given NFA. + /// + /// # Example + /// + /// This example shows how to build a DFA if you already have an NFA in + /// hand. + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, nfa::thompson::NFA, Match}; + /// + /// // This shows how to set non-default options for building an NFA. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().shrink(true)) + /// .build(r"[a-z0-9]+")?; + /// let re = DFA::builder().build_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// re.captures(&mut cache, "foo123bar", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..9)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_nfa(&self, nfa: NFA) -> Result { + // Why take ownership if we're just going to pass a reference to the + // NFA to our internal builder? Well, the first thing to note is that + // an NFA uses reference counting internally, so either choice is going + // to be cheap. So there isn't much cost either way. + // + // The real reason is that a one-pass DFA, semantically, shares + // ownership of an NFA. This is unlike other DFAs that don't share + // ownership of an NFA at all, primarily because they want to be + // self-contained in order to support cheap (de)serialization. + // + // But then why pass a '&nfa' below if we want to share ownership? + // Well, it turns out that using a '&NFA' in our internal builder + // separates its lifetime from the DFA we're building, and this turns + // out to make code a bit more composable. e.g., We can iterate over + // things inside the NFA while borrowing the builder as mutable because + // we know the NFA cannot be mutated. So TL;DR --- this weirdness is + // "because borrow checker." + InternalBuilder::new(self.config.clone(), &nfa).build() + } + + /// Apply the given one-pass DFA configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a one-pass DFA directly + /// from a pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether additional time should be + /// spent shrinking the size of the NFA. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// An internal builder for encapsulating the state necessary to build a +/// one-pass DFA. Typical use is just `InternalBuilder::new(..).build()`. +/// +/// There is no separate pass for determining whether the NFA is one-pass or +/// not. We just try to build the DFA. If during construction we discover that +/// it is not one-pass, we bail out. This is likely to lead to some undesirable +/// expense in some cases, so it might make sense to try an identify common +/// patterns in the NFA that make it definitively not one-pass. That way, we +/// can avoid ever trying to build a one-pass DFA in the first place. For +/// example, '\w*\s' is not one-pass, and since '\w' is Unicode-aware by +/// default, it's probably not a trivial cost to try and build a one-pass DFA +/// for it and then fail. +/// +/// Note that some (immutable) fields are duplicated here. For example, the +/// 'nfa' and 'classes' fields are both in the 'DFA'. They are the same thing, +/// but we duplicate them because it makes composition easier below. Otherwise, +/// since the borrow checker can't see through method calls, the mutable borrow +/// we use to mutate the DFA winds up preventing borrowing from any other part +/// of the DFA, even though we aren't mutating those parts. We only do this +/// because the duplication is cheap. +#[derive(Debug)] +struct InternalBuilder<'a> { + /// The DFA we're building. + dfa: DFA, + /// An unordered collection of NFA state IDs that we haven't yet tried to + /// build into a DFA state yet. + /// + /// This collection does not ultimately wind up including every NFA state + /// ID. Instead, each ID represents a "start" state for a sub-graph of the + /// NFA. The set of NFA states we then use to build a DFA state consists + /// of that "start" state and all states reachable from it via epsilon + /// transitions. + uncompiled_nfa_ids: Vec, + /// A map from NFA state ID to DFA state ID. This is useful for easily + /// determining whether an NFA state has been used as a "starting" point + /// to build a DFA state yet. If it hasn't, then it is mapped to DEAD, + /// and since DEAD is specially added and never corresponds to any NFA + /// state, it follows that a mapping to DEAD implies the NFA state has + /// no corresponding DFA state yet. + nfa_to_dfa_id: Vec, + /// A stack used to traverse the NFA states that make up a single DFA + /// state. Traversal occurs until the stack is empty, and we only push to + /// the stack when the state ID isn't in 'seen'. Actually, even more than + /// that, if we try to push something on to this stack that is already in + /// 'seen', then we bail out on construction completely, since it implies + /// that the NFA is not one-pass. + stack: Vec<(StateID, Epsilons)>, + /// The set of NFA states that we've visited via 'stack'. + seen: SparseSet, + /// Whether a match NFA state has been observed while constructing a + /// one-pass DFA state. Once a match state is seen, assuming we are using + /// leftmost-first match semantics, then we don't add any more transitions + /// to the DFA state we're building. + matched: bool, + /// The config passed to the builder. + /// + /// This is duplicated in dfa.config. + config: Config, + /// The NFA we're building a one-pass DFA from. + /// + /// This is duplicated in dfa.nfa. + nfa: &'a NFA, + /// The equivalence classes that make up the alphabet for this DFA> + /// + /// This is duplicated in dfa.classes. + classes: ByteClasses, +} + +impl<'a> InternalBuilder<'a> { + /// Create a new builder with an initial empty DFA. + fn new(config: Config, nfa: &'a NFA) -> InternalBuilder { + let classes = if !config.get_byte_classes() { + // A one-pass DFA will always use the equivalence class map, but + // enabling this option is useful for debugging. Namely, this will + // cause all transitions to be defined over their actual bytes + // instead of an opaque equivalence class identifier. The former is + // much easier to grok as a human. + ByteClasses::singletons() + } else { + nfa.byte_classes().clone() + }; + // Normally a DFA alphabet includes the EOI symbol, but we don't need + // that in the one-pass DFA since we handle look-around explicitly + // without encoding it into the DFA. Thus, we don't need to delay + // matches by 1 byte. However, we reuse the space that *would* be used + // by the EOI transition by putting match information there (like which + // pattern matches and which look-around assertions need to hold). So + // this means our real alphabet length is 1 fewer than what the byte + // classes report, since we don't use EOI. + let alphabet_len = classes.alphabet_len().checked_sub(1).unwrap(); + let stride2 = classes.stride2(); + let dfa = DFA { + config: config.clone(), + nfa: nfa.clone(), + table: vec![], + starts: vec![], + // Since one-pass DFAs have a smaller state ID max than + // StateID::MAX, it follows that StateID::MAX is a valid initial + // value for min_match_id since no state ID can ever be greater + // than it. In the case of a one-pass DFA with no match states, the + // min_match_id will keep this sentinel value. + min_match_id: StateID::MAX, + classes: classes.clone(), + alphabet_len, + stride2, + pateps_offset: alphabet_len, + // OK because PatternID::MAX*2 is guaranteed not to overflow. + explicit_slot_start: nfa.pattern_len().checked_mul(2).unwrap(), + }; + InternalBuilder { + dfa, + uncompiled_nfa_ids: vec![], + nfa_to_dfa_id: vec![DEAD; nfa.states().len()], + stack: vec![], + seen: SparseSet::new(nfa.states().len()), + matched: false, + config, + nfa, + classes, + } + } + + /// Build the DFA from the NFA given to this builder. If the NFA is not + /// one-pass, then return an error. An error may also be returned if a + /// particular limit is exceeded. (Some limits, like the total heap memory + /// used, are configurable. Others, like the total patterns or slots, are + /// hard-coded based on representational limitations.) + fn build(mut self) -> Result { + self.nfa.look_set_any().available().map_err(BuildError::word)?; + for look in self.nfa.look_set_any().iter() { + // This is a future incompatibility check where if we add any + // more look-around assertions, then the one-pass DFA either + // needs to reject them (what we do here) or it needs to have its + // Transition representation modified to be capable of storing the + // new assertions. + if look.as_repr() > Look::WordUnicodeNegate.as_repr() { + return Err(BuildError::unsupported_look(look)); + } + } + if self.nfa.pattern_len().as_u64() > PatternEpsilons::PATTERN_ID_LIMIT + { + return Err(BuildError::too_many_patterns( + PatternEpsilons::PATTERN_ID_LIMIT, + )); + } + if self.nfa.group_info().explicit_slot_len() > Slots::LIMIT { + return Err(BuildError::not_one_pass( + "too many explicit capturing groups (max is 16)", + )); + } + assert_eq!(DEAD, self.add_empty_state()?); + + // This is where the explicit slots start. We care about this because + // we only need to track explicit slots. The implicit slots---two for + // each pattern---are tracked as part of the search routine itself. + let explicit_slot_start = self.nfa.pattern_len() * 2; + self.add_start_state(None, self.nfa.start_anchored())?; + if self.config.get_starts_for_each_pattern() { + for pid in self.nfa.patterns() { + self.add_start_state( + Some(pid), + self.nfa.start_pattern(pid).unwrap(), + )?; + } + } + // NOTE: One wonders what the effects of treating 'uncompiled_nfa_ids' + // as a stack are. It is really an unordered *set* of NFA state IDs. + // If it, for example, in practice led to discovering whether a regex + // was or wasn't one-pass later than if we processed NFA state IDs in + // ascending order, then that would make this routine more costly in + // the somewhat common case of a regex that isn't one-pass. + while let Some(nfa_id) = self.uncompiled_nfa_ids.pop() { + let dfa_id = self.nfa_to_dfa_id[nfa_id]; + // Once we see a match, we keep going, but don't add any new + // transitions. Normally we'd just stop, but we have to keep + // going in order to verify that our regex is actually one-pass. + self.matched = false; + // The NFA states we've already explored for this DFA state. + self.seen.clear(); + // The NFA states to explore via epsilon transitions. If we ever + // try to push an NFA state that we've already seen, then the NFA + // is not one-pass because it implies there are multiple epsilon + // transition paths that lead to the same NFA state. In other + // words, there is ambiguity. + self.stack_push(nfa_id, Epsilons::empty())?; + while let Some((id, epsilons)) = self.stack.pop() { + match *self.nfa.state(id) { + thompson::State::ByteRange { ref trans } => { + self.compile_transition(dfa_id, trans, epsilons)?; + } + thompson::State::Sparse(ref sparse) => { + for trans in sparse.transitions.iter() { + self.compile_transition(dfa_id, trans, epsilons)?; + } + } + thompson::State::Dense(ref dense) => { + for trans in dense.iter() { + self.compile_transition(dfa_id, &trans, epsilons)?; + } + } + thompson::State::Look { look, next } => { + let looks = epsilons.looks().insert(look); + self.stack_push(next, epsilons.set_looks(looks))?; + } + thompson::State::Union { ref alternates } => { + for &sid in alternates.iter().rev() { + self.stack_push(sid, epsilons)?; + } + } + thompson::State::BinaryUnion { alt1, alt2 } => { + self.stack_push(alt2, epsilons)?; + self.stack_push(alt1, epsilons)?; + } + thompson::State::Capture { next, slot, .. } => { + let slot = slot.as_usize(); + let epsilons = if slot < explicit_slot_start { + // If this is an implicit slot, we don't care + // about it, since we handle implicit slots in + // the search routine. We can get away with that + // because there are 2 implicit slots for every + // pattern. + epsilons + } else { + // Offset our explicit slots so that they start + // at index 0. + let offset = slot - explicit_slot_start; + epsilons.set_slots(epsilons.slots().insert(offset)) + }; + self.stack_push(next, epsilons)?; + } + thompson::State::Fail => { + continue; + } + thompson::State::Match { pattern_id } => { + // If we found two different paths to a match state + // for the same DFA state, then we have ambiguity. + // Thus, it's not one-pass. + if self.matched { + return Err(BuildError::not_one_pass( + "multiple epsilon transitions to match state", + )); + } + self.matched = true; + // Shove the matching pattern ID and the 'epsilons' + // into the current DFA state's pattern epsilons. The + // 'epsilons' includes the slots we need to capture + // before reporting the match and also the conditional + // epsilon transitions we need to check before we can + // report a match. + self.dfa.set_pattern_epsilons( + dfa_id, + PatternEpsilons::empty() + .set_pattern_id(pattern_id) + .set_epsilons(epsilons), + ); + // N.B. It is tempting to just bail out here when + // compiling a leftmost-first DFA, since we will never + // compile any more transitions in that case. But we + // actually need to keep going in order to verify that + // we actually have a one-pass regex. e.g., We might + // see more Match states (e.g., for other patterns) + // that imply that we don't have a one-pass regex. + // So instead, we mark that we've found a match and + // continue on. When we go to compile a new DFA state, + // we just skip that part. But otherwise check that the + // one-pass property is upheld. + } + } + } + } + self.shuffle_states(); + Ok(self.dfa) + } + + /// Shuffle all match states to the end of the transition table and set + /// 'min_match_id' to the ID of the first such match state. + /// + /// The point of this is to make it extremely cheap to determine whether + /// a state is a match state or not. We need to check on this on every + /// transition during a search, so it being cheap is important. This + /// permits us to check it by simply comparing two state identifiers, as + /// opposed to looking for the pattern ID in the state's `PatternEpsilons`. + /// (Which requires a memory load and some light arithmetic.) + fn shuffle_states(&mut self) { + let mut remapper = Remapper::new(&self.dfa); + let mut next_dest = self.dfa.last_state_id(); + for i in (0..self.dfa.state_len()).rev() { + let id = StateID::must(i); + let is_match = + self.dfa.pattern_epsilons(id).pattern_id().is_some(); + if !is_match { + continue; + } + remapper.swap(&mut self.dfa, next_dest, id); + self.dfa.min_match_id = next_dest; + next_dest = self.dfa.prev_state_id(next_dest).expect( + "match states should be a proper subset of all states", + ); + } + remapper.remap(&mut self.dfa); + } + + /// Compile the given NFA transition into the DFA state given. + /// + /// 'Epsilons' corresponds to any conditional epsilon transitions that need + /// to be satisfied to follow this transition, and any slots that need to + /// be saved if the transition is followed. + /// + /// If this transition indicates that the NFA is not one-pass, then + /// this returns an error. (This occurs, for example, if the DFA state + /// already has a transition defined for the same input symbols as the + /// given transition, *and* the result of the old and new transitions is + /// different.) + fn compile_transition( + &mut self, + dfa_id: StateID, + trans: &thompson::Transition, + epsilons: Epsilons, + ) -> Result<(), BuildError> { + let next_dfa_id = self.add_dfa_state_for_nfa_state(trans.next)?; + for byte in self + .classes + .representatives(trans.start..=trans.end) + .filter_map(|r| r.as_u8()) + { + let oldtrans = self.dfa.transition(dfa_id, byte); + let newtrans = + Transition::new(self.matched, next_dfa_id, epsilons); + // If the old transition points to the DEAD state, then we know + // 'byte' has not been mapped to any transition for this DFA state + // yet. So set it unconditionally. Otherwise, we require that the + // old and new transitions are equivalent. Otherwise, there is + // ambiguity and thus the regex is not one-pass. + if oldtrans.state_id() == DEAD { + self.dfa.set_transition(dfa_id, byte, newtrans); + } else if oldtrans != newtrans { + return Err(BuildError::not_one_pass( + "conflicting transition", + )); + } + } + Ok(()) + } + + /// Add a start state to the DFA corresponding to the given NFA starting + /// state ID. + /// + /// If adding a state would blow any limits (configured or hard-coded), + /// then an error is returned. + /// + /// If the starting state is an anchored state for a particular pattern, + /// then callers must provide the pattern ID for that starting state. + /// Callers must also ensure that the first starting state added is the + /// start state for all patterns, and then each anchored starting state for + /// each pattern (if necessary) added in order. Otherwise, this panics. + fn add_start_state( + &mut self, + pid: Option, + nfa_id: StateID, + ) -> Result { + match pid { + // With no pid, this should be the start state for all patterns + // and thus be the first one. + None => assert!(self.dfa.starts.is_empty()), + // With a pid, we want it to be at self.dfa.starts[pid+1]. + Some(pid) => assert!(self.dfa.starts.len() == pid.one_more()), + } + let dfa_id = self.add_dfa_state_for_nfa_state(nfa_id)?; + self.dfa.starts.push(dfa_id); + Ok(dfa_id) + } + + /// Add a new DFA state corresponding to the given NFA state. If adding a + /// state would blow any limits (configured or hard-coded), then an error + /// is returned. If a DFA state already exists for the given NFA state, + /// then that DFA state's ID is returned and no new states are added. + /// + /// It is not expected that this routine is called for every NFA state. + /// Instead, an NFA state ID will usually correspond to the "start" state + /// for a sub-graph of the NFA, where all states in the sub-graph are + /// reachable via epsilon transitions (conditional or unconditional). That + /// sub-graph of NFA states is ultimately what produces a single DFA state. + fn add_dfa_state_for_nfa_state( + &mut self, + nfa_id: StateID, + ) -> Result { + // If we've already built a DFA state for the given NFA state, then + // just return that. We definitely do not want to have more than one + // DFA state in existence for the same NFA state, since all but one of + // them will likely become unreachable. And at least some of them are + // likely to wind up being incomplete. + let existing_dfa_id = self.nfa_to_dfa_id[nfa_id]; + if existing_dfa_id != DEAD { + return Ok(existing_dfa_id); + } + // If we don't have any DFA state yet, add it and then add the given + // NFA state to the list of states to explore. + let dfa_id = self.add_empty_state()?; + self.nfa_to_dfa_id[nfa_id] = dfa_id; + self.uncompiled_nfa_ids.push(nfa_id); + Ok(dfa_id) + } + + /// Unconditionally add a new empty DFA state. If adding it would exceed + /// any limits (configured or hard-coded), then an error is returned. The + /// ID of the new state is returned on success. + /// + /// The added state is *not* a match state. + fn add_empty_state(&mut self) -> Result { + let state_limit = Transition::STATE_ID_LIMIT; + // Note that unlike dense and lazy DFAs, we specifically do NOT + // premultiply our state IDs here. The reason is that we want to pack + // our state IDs into 64-bit transitions with other info, so the fewer + // the bits we use for state IDs the better. If we premultiply, then + // our state ID space shrinks. We justify this by the assumption that + // a one-pass DFA is just already doing a fair bit more work than a + // normal DFA anyway, so an extra multiplication to compute a state + // transition doesn't seem like a huge deal. + let next_id = self.dfa.table.len() >> self.dfa.stride2(); + let id = StateID::new(next_id) + .map_err(|_| BuildError::too_many_states(state_limit))?; + if id.as_u64() > Transition::STATE_ID_LIMIT { + return Err(BuildError::too_many_states(state_limit)); + } + self.dfa + .table + .extend(core::iter::repeat(Transition(0)).take(self.dfa.stride())); + // The default empty value for 'PatternEpsilons' is sadly not all + // zeroes. Instead, a special sentinel is used to indicate that there + // is no pattern. So we need to explicitly set the pattern epsilons to + // the correct "empty" PatternEpsilons. + self.dfa.set_pattern_epsilons(id, PatternEpsilons::empty()); + if let Some(size_limit) = self.config.get_size_limit() { + if self.dfa.memory_usage() > size_limit { + return Err(BuildError::exceeded_size_limit(size_limit)); + } + } + Ok(id) + } + + /// Push the given NFA state ID and its corresponding epsilons (slots and + /// conditional epsilon transitions) on to a stack for use in a depth first + /// traversal of a sub-graph of the NFA. + /// + /// If the given NFA state ID has already been pushed on to the stack, then + /// it indicates the regex is not one-pass and this correspondingly returns + /// an error. + fn stack_push( + &mut self, + nfa_id: StateID, + epsilons: Epsilons, + ) -> Result<(), BuildError> { + // If we already have seen a match and we are compiling a leftmost + // first DFA, then we shouldn't add any more states to look at. This is + // effectively how preference order and non-greediness is implemented. + // if !self.config.get_match_kind().continue_past_first_match() + // && self.matched + // { + // return Ok(()); + // } + if !self.seen.insert(nfa_id) { + return Err(BuildError::not_one_pass( + "multiple epsilon transitions to same state", + )); + } + self.stack.push((nfa_id, epsilons)); + Ok(()) + } +} + +/// A one-pass DFA for executing a subset of anchored regex searches while +/// resolving capturing groups. +/// +/// A one-pass DFA can be built from an NFA that is one-pass. An NFA is +/// one-pass when there is never any ambiguity about how to continue a search. +/// For example, `a*a` is not one-pass becuase during a search, it's not +/// possible to know whether to continue matching the `a*` or to move on to +/// the single `a`. However, `a*b` is one-pass, because for every byte in the +/// input, it's always clear when to move on from `a*` to `b`. +/// +/// # Only anchored searches are supported +/// +/// In this crate, especially for DFAs, unanchored searches are implemented by +/// treating the pattern as if it had a `(?s-u:.)*?` prefix. While the prefix +/// is one-pass on its own, adding anything after it, e.g., `(?s-u:.)*?a` will +/// make the overall pattern not one-pass. Why? Because the `(?s-u:.)` matches +/// any byte, and there is therefore ambiguity as to when the prefix should +/// stop matching and something else should start matching. +/// +/// Therefore, one-pass DFAs do not support unanchored searches. In addition +/// to many regexes simply not being one-pass, it implies that one-pass DFAs +/// have limited utility. With that said, when a one-pass DFA can be used, it +/// can potentially provide a dramatic speed up over alternatives like the +/// [`BoundedBacktracker`](crate::nfa::thompson::backtrack::BoundedBacktracker) +/// and the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). In particular, +/// a one-pass DFA is the only DFA capable of reporting the spans of matching +/// capturing groups. +/// +/// To clarify, when we say that unanchored searches are not supported, what +/// that actually means is: +/// +/// * The high level routines, [`DFA::is_match`] and [`DFA::captures`], always +/// do anchored searches. +/// * Since iterators are most useful in the context of unanchored searches, +/// there is no `DFA::captures_iter` method. +/// * For lower level routines like [`DFA::try_search`], an error will be +/// returned if the given [`Input`] is configured to do an unanchored search or +/// search for an invalid pattern ID. (Note that an [`Input`] is configured to +/// do an unanchored search by default, so just giving a `Input::new` is +/// guaranteed to return an error.) +/// +/// # Other limitations +/// +/// In addition to the [configurable heap limit](Config::size_limit) and +/// the requirement that a regex pattern be one-pass, there are some other +/// limitations: +/// +/// * There is an internal limit on the total number of explicit capturing +/// groups that appear across all patterns. It is somewhat small and there is +/// no way to configure it. If your pattern(s) exceed this limit, then building +/// a one-pass DFA will fail. +/// * If the number of patterns exceeds an internal unconfigurable limit, then +/// building a one-pass DFA will fail. This limit is quite large and you're +/// unlikely to hit it. +/// * If the total number of states exceeds an internal unconfigurable limit, +/// then building a one-pass DFA will fail. This limit is quite large and +/// you're unlikely to hit it. +/// +/// # Other examples of regexes that aren't one-pass +/// +/// One particularly unfortunate example is that enabling Unicode can cause +/// regexes that were one-pass to no longer be one-pass. Consider the regex +/// `(?-u)\w*\s` for example. It is one-pass because there is exactly no +/// overlap between the ASCII definitions of `\w` and `\s`. But `\w*\s` +/// (i.e., with Unicode enabled) is *not* one-pass because `\w` and `\s` get +/// translated to UTF-8 automatons. And while the *codepoints* in `\w` and `\s` +/// do not overlap, the underlying UTF-8 encodings do. Indeed, because of the +/// overlap between UTF-8 automata, the use of Unicode character classes will +/// tend to vastly increase the likelihood of a regex not being one-pass. +/// +/// # How does one know if a regex is one-pass or not? +/// +/// At the time of writing, the only way to know is to try and build a one-pass +/// DFA. The one-pass property is checked while constructing the DFA. +/// +/// This does mean that you might potentially waste some CPU cycles and memory +/// by optimistically trying to build a one-pass DFA. But this is currently the +/// only way. In the future, building a one-pass DFA might be able to use some +/// heuristics to detect common violations of the one-pass property and bail +/// more quickly. +/// +/// # Resource usage +/// +/// Unlike a general DFA, a one-pass DFA has stricter bounds on its resource +/// usage. Namely, construction of a one-pass DFA has a time and space +/// complexity of `O(n)`, where `n ~ nfa.states().len()`. (A general DFA's time +/// and space complexity is `O(2^n)`.) This smaller time bound is achieved +/// because there is at most one DFA state created for each NFA state. If +/// additional DFA states would be required, then the pattern is not one-pass +/// and construction will fail. +/// +/// Note though that currently, this DFA uses a fully dense representation. +/// This means that while its space complexity is no worse than an NFA, it may +/// in practice use more memory because of higher constant factors. The reason +/// for this trade off is two-fold. Firstly, a dense representation makes the +/// search faster. Secondly, the bigger an NFA, the more unlikely it is to be +/// one-pass. Therefore, most one-pass DFAs are usually pretty small. +/// +/// # Example +/// +/// This example shows that the one-pass DFA implements Unicode word boundaries +/// correctly while simultaneously reporting spans for capturing groups that +/// participate in a match. (This is the only DFA that implements full support +/// for Unicode word boundaries.) +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{dfa::onepass::DFA, Match, Span}; +/// +/// let re = DFA::new(r"\b(?P\w+)[[:space:]]+(?P\w+)\b")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// re.captures(&mut cache, "Шерлок Холмс", &mut caps); +/// assert_eq!(Some(Match::must(0, 0..23)), caps.get_match()); +/// assert_eq!(Some(Span::from(0..12)), caps.get_group_by_name("first")); +/// assert_eq!(Some(Span::from(13..23)), caps.get_group_by_name("last")); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: iteration +/// +/// Unlike other regex engines in this crate, this one does not provide +/// iterator search functions. This is because a one-pass DFA only supports +/// anchored searches, and so iterator functions are generally not applicable. +/// +/// However, if you know that all of your matches are +/// directly adjacent, then an iterator can be used. The +/// [`util::iter::Searcher`](crate::util::iter::Searcher) type can be used for +/// this purpose: +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// dfa::onepass::DFA, +/// util::iter::Searcher, +/// Anchored, Input, Span, +/// }; +/// +/// let re = DFA::new(r"\w(\d)\w")?; +/// let (mut cache, caps) = (re.create_cache(), re.create_captures()); +/// let input = Input::new("a1zb2yc3x").anchored(Anchored::Yes); +/// +/// let mut it = Searcher::new(input).into_captures_iter(caps, |input, caps| { +/// Ok(re.try_search(&mut cache, input, caps)?) +/// }).infallible(); +/// let caps0 = it.next().unwrap(); +/// assert_eq!(Some(Span::from(1..2)), caps0.get_group(1)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct DFA { + /// The configuration provided by the caller. + config: Config, + /// The NFA used to build this DFA. + /// + /// NOTE: We probably don't need to store the NFA here, but we use enough + /// bits from it that it's convenient to do so. And there really isn't much + /// cost to doing so either, since an NFA is reference counted internally. + nfa: NFA, + /// The transition table. Given a state ID 's' and a byte of haystack 'b', + /// the next state is `table[sid + classes[byte]]`. + /// + /// The stride of this table (i.e., the number of columns) is always + /// a power of 2, even if the alphabet length is smaller. This makes + /// converting between state IDs and state indices very cheap. + /// + /// Note that the stride always includes room for one extra "transition" + /// that isn't actually a transition. It is a 'PatternEpsilons' that is + /// used for match states only. Because of this, the maximum number of + /// active columns in the transition table is 257, which means the maximum + /// stride is 512 (the next power of 2 greater than or equal to 257). + table: Vec, + /// The DFA state IDs of the starting states. + /// + /// `starts[0]` is always present and corresponds to the starting state + /// when searching for matches of any pattern in the DFA. + /// + /// `starts[i]` where i>0 corresponds to the starting state for the pattern + /// ID 'i-1'. These starting states are optional. + starts: Vec, + /// Every state ID >= this value corresponds to a match state. + /// + /// This is what a search uses to detect whether a state is a match state + /// or not. It requires only a simple comparison instead of bit-unpacking + /// the PatternEpsilons from every state. + min_match_id: StateID, + /// The alphabet of this DFA, split into equivalence classes. Bytes in the + /// same equivalence class can never discriminate between a match and a + /// non-match. + classes: ByteClasses, + /// The number of elements in each state in the transition table. This may + /// be less than the stride, since the stride is always a power of 2 and + /// the alphabet length can be anything up to and including 256. + alphabet_len: usize, + /// The number of columns in the transition table, expressed as a power of + /// 2. + stride2: usize, + /// The offset at which the PatternEpsilons for a match state is stored in + /// the transition table. + /// + /// PERF: One wonders whether it would be better to put this in a separate + /// allocation, since only match states have a non-empty PatternEpsilons + /// and the number of match states tends be dwarfed by the number of + /// non-match states. So this would save '8*len(non_match_states)' for each + /// DFA. The question is whether moving this to a different allocation will + /// lead to a perf hit during searches. You might think dealing with match + /// states is rare, but some regexes spend a lot of time in match states + /// gobbling up input. But... match state handling is already somewhat + /// expensive, so maybe this wouldn't do much? Either way, it's worth + /// experimenting. + pateps_offset: usize, + /// The first explicit slot index. This refers to the first slot appearing + /// immediately after the last implicit slot. It is always 'patterns.len() + /// * 2'. + /// + /// We record this because we only store the explicit slots in our DFA + /// transition table that need to be saved. Implicit slots are handled + /// automatically as part of the search. + explicit_slot_start: usize, +} + +impl DFA { + /// Parse the given regular expression using the default configuration and + /// return the corresponding one-pass DFA. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re = DFA::new("foo[0-9]+bar")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "foo12345barzzz", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..11)), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + #[inline] + pub fn new(pattern: &str) -> Result { + DFA::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re = DFA::new_many(&["[a-z]+", "[0-9]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "abc123", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..3)), caps.get_match()); + /// + /// re.captures(&mut cache, "123abc", &mut caps); + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + #[inline] + pub fn new_many>(patterns: &[P]) -> Result { + DFA::builder().build_many(patterns) + } + + /// Like `new`, but builds a one-pass DFA directly from an NFA. This is + /// useful if you already have an NFA, or even if you hand-assembled the + /// NFA. + /// + /// # Example + /// + /// This shows how to hand assemble a regular expression via its HIR, + /// compile an NFA from it and build a one-pass DFA from the NFA. + /// + /// ``` + /// use regex_automata::{ + /// dfa::onepass::DFA, + /// nfa::thompson::NFA, + /// Match, + /// }; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = DFA::new_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let expected = Some(Match::must(0, 0..1)); + /// re.captures(&mut cache, "A", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_from_nfa(nfa: NFA) -> Result { + DFA::builder().build_from_nfa(nfa) + } + + /// Create a new one-pass DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let dfa = DFA::always_match()?; + /// let mut cache = dfa.create_cache(); + /// let mut caps = dfa.create_captures(); + /// + /// let expected = Match::must(0, 0..0); + /// dfa.captures(&mut cache, "", &mut caps); + /// assert_eq!(Some(expected), caps.get_match()); + /// dfa.captures(&mut cache, "foo", &mut caps); + /// assert_eq!(Some(expected), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Create a new one-pass DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::dfa::onepass::DFA; + /// + /// let dfa = DFA::never_match()?; + /// let mut cache = dfa.create_cache(); + /// let mut caps = dfa.create_captures(); + /// + /// dfa.captures(&mut cache, "", &mut caps); + /// assert_eq!(None, caps.get_match()); + /// dfa.captures(&mut cache, "foo", &mut caps); + /// assert_eq!(None, caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Return a default configuration for a DFA. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of a DFA. + /// + /// # Example + /// + /// This example shows how to change the match semantics of this DFA from + /// its default "leftmost first" to "all." When using "all," non-greediness + /// doesn't apply and neither does preference order matching. Instead, the + /// longest match possible is always returned. (Although, by construction, + /// it's impossible for a one-pass DFA to have a different answer for + /// "preference order" vs "longest match.") + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match, MatchKind}; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build(r"(abc)+?")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// re.captures(&mut cache, "abcabc", &mut caps); + /// // Normally, the non-greedy repetition would give us a 0..3 match. + /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a DFA. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::onepass::DFA, + /// nfa::thompson, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = DFA::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = b"foo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 0..8)); + /// re.captures(&mut cache, haystack, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new empty set of capturing groups that is guaranteed to be + /// valid for the search APIs on this DFA. + /// + /// A `Captures` value created for a specific DFA cannot be used with any + /// other DFA. + /// + /// This is a convenience function for [`Captures::all`]. See the + /// [`Captures`] documentation for an explanation of its alternative + /// constructors that permit the DFA to do less work during a search, and + /// thus might make it faster. + #[inline] + pub fn create_captures(&self) -> Captures { + Captures::all(self.nfa.group_info().clone()) + } + + /// Create a new cache for this DFA. + /// + /// The cache returned should only be used for searches for this + /// DFA. If you want to reuse the cache for another DFA, then you + /// must call [`Cache::reset`] with that DFA (or, equivalently, + /// [`DFA::reset_cache`]). + #[inline] + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this DFA (and only this DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different DFA. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re1 = DFA::new(r"\w")?; + /// let re2 = DFA::new(r"\W")?; + /// let mut caps1 = re1.create_captures(); + /// let mut caps2 = re2.create_captures(); + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// { re1.captures(&mut cache, "Δ", &mut caps1); caps1.get_match() }, + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the one-pass DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// { re2.captures(&mut cache, "☃", &mut caps2); caps2.get_match() }, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn reset_cache(&self, cache: &mut Cache) { + cache.reset(self); + } + + /// Return the config for this one-pass DFA. + #[inline] + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + #[inline] + pub fn get_nfa(&self) -> &NFA { + &self.nfa + } + + /// Returns the total number of patterns compiled into this DFA. + /// + /// In the case of a DFA that contains no patterns, this returns `0`. + #[inline] + pub fn pattern_len(&self) -> usize { + self.get_nfa().pattern_len() + } + + /// Returns the total number of states in this one-pass DFA. + /// + /// Note that unlike dense or sparse DFAs, a one-pass DFA does not expose + /// a low level DFA API. Therefore, this routine has little use other than + /// being informational. + #[inline] + pub fn state_len(&self) -> usize { + self.table.len() >> self.stride2() + } + + /// Returns the total number of elements in the alphabet for this DFA. + /// + /// That is, this returns the total number of transitions that each + /// state in this DFA must have. The maximum alphabet size is 256, which + /// corresponds to each possible byte value. + /// + /// The alphabet size may be less than 256 though, and unless + /// [`Config::byte_classes`] is disabled, it is typically must less than + /// 256. Namely, bytes are grouped into equivalence classes such that no + /// two bytes in the same class can distinguish a match from a non-match. + /// For example, in the regex `^[a-z]+$`, the ASCII bytes `a-z` could + /// all be in the same equivalence class. This leads to a massive space + /// savings. + /// + /// Note though that the alphabet length does _not_ necessarily equal the + /// total stride space taken up by a single DFA state in the transition + /// table. Namely, for performance reasons, the stride is always the + /// smallest power of two that is greater than or equal to the alphabet + /// length. For this reason, [`DFA::stride`] or [`DFA::stride2`] are + /// often more useful. The alphabet length is typically useful only for + /// informational purposes. + /// + /// Note also that unlike dense or sparse DFAs, a one-pass DFA does + /// not have a special end-of-input (EOI) transition. This is because + /// a one-pass DFA handles look-around assertions explicitly (like the + /// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM)) and does not build + /// them into the transitions of the DFA. + #[inline] + pub fn alphabet_len(&self) -> usize { + self.alphabet_len + } + + /// Returns the total stride for every state in this DFA, expressed as the + /// exponent of a power of 2. The stride is the amount of space each state + /// takes up in the transition table, expressed as a number of transitions. + /// (Unused transitions map to dead states.) + /// + /// The stride of a DFA is always equivalent to the smallest power of + /// 2 that is greater than or equal to the DFA's alphabet length. This + /// definition uses extra space, but possibly permits faster translation + /// between state identifiers and their corresponding offsets in this DFA's + /// transition table. + /// + /// For example, if the DFA's stride is 16 transitions, then its `stride2` + /// is `4` since `2^4 = 16`. + /// + /// The minimum `stride2` value is `1` (corresponding to a stride of `2`) + /// while the maximum `stride2` value is `9` (corresponding to a stride + /// of `512`). The maximum in theory should be `8`, but because of some + /// implementation quirks that may be relaxed in the future, it is one more + /// than `8`. (Do note that a maximal stride is incredibly rare, as it + /// would imply that there is almost no redundant in the regex pattern.) + /// + /// Note that unlike dense or sparse DFAs, a one-pass DFA does not expose + /// a low level DFA API. Therefore, this routine has little use other than + /// being informational. + #[inline] + pub fn stride2(&self) -> usize { + self.stride2 + } + + /// Returns the total stride for every state in this DFA. This corresponds + /// to the total number of transitions used by each state in this DFA's + /// transition table. + /// + /// Please see [`DFA::stride2`] for more information. In particular, this + /// returns the stride as the number of transitions, where as `stride2` + /// returns it as the exponent of a power of 2. + /// + /// Note that unlike dense or sparse DFAs, a one-pass DFA does not expose + /// a low level DFA API. Therefore, this routine has little use other than + /// being informational. + #[inline] + pub fn stride(&self) -> usize { + 1 << self.stride2() + } + + /// Returns the memory usage, in bytes, of this DFA. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent this DFA. + /// + /// This does **not** include the stack size used up by this DFA. To + /// compute that, use `std::mem::size_of::()`. + #[inline] + pub fn memory_usage(&self) -> usize { + use core::mem::size_of; + + self.table.len() * size_of::() + + self.starts.len() * size_of::() + } +} + +impl DFA { + /// Executes an anchored leftmost forward search, and returns true if and + /// only if this one-pass DFA matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future + /// input will never lead to a different result. In particular, if the + /// underlying DFA enters a match state, then this routine will return + /// `true` immediately without inspecting any future input. (Consider how + /// this might make a difference given the regex `a+` on the haystack + /// `aaaaaaaaaaaaaaa`. This routine can stop after it sees the first `a`, + /// but routines like `find` need to continue searching because `+` is + /// greedy by default.) + /// + /// The given `Input` is forcefully set to use [`Anchored::Yes`] if the + /// given configuration was [`Anchored::No`] (which is the default). + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`DFA::try_search`] if you want to handle these panics as error + /// values instead. + /// + /// # Example + /// + /// This shows basic usage: + /// + /// ``` + /// use regex_automata::dfa::onepass::DFA; + /// + /// let re = DFA::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "foo12345bar")); + /// assert!(!re.is_match(&mut cache, "foobar")); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `captures` returns + /// a match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Input}; + /// + /// let re = DFA::new("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(!re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, nfa::thompson::NFA, Input}; + /// + /// let re = DFA::builder() + /// .thompson(NFA::config().utf8(false)) + /// .build("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> bool { + let mut input = input.into().earliest(true); + if matches!(input.get_anchored(), Anchored::No) { + input.set_anchored(Anchored::Yes); + } + self.try_search_slots(cache, &input, &mut []).unwrap().is_some() + } + + /// Executes an anchored leftmost forward search, and returns a `Match` if + /// and only if this one-pass DFA matches the given haystack. + /// + /// This routine only includes the overall match span. To get access to the + /// individual spans of each capturing group, use [`DFA::captures`]. + /// + /// The given `Input` is forcefully set to use [`Anchored::Yes`] if the + /// given configuration was [`Anchored::No`] (which is the default). + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`DFA::try_search`] if you want to handle these panics as error + /// values instead. + /// + /// # Example + /// + /// Leftmost first match semantics corresponds to the match with the + /// smallest starting offset, but where the end offset is determined by + /// preferring earlier branches in the original regular expression. For + /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam` + /// will match `Samwise` in `Samwise`. + /// + /// Generally speaking, the "leftmost first" match is how most backtracking + /// regular expressions tend to work. This is in contrast to POSIX-style + /// regular expressions that yield "leftmost longest" matches. Namely, + /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using + /// leftmost longest semantics. (This crate does not currently support + /// leftmost longest semantics.) + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re = DFA::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..8); + /// assert_eq!(Some(expected), re.find(&mut cache, "foo12345")); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over later parts. + /// let re = DFA::new("abc|a")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..3); + /// assert_eq!(Some(expected), re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Option { + let mut input = input.into(); + if matches!(input.get_anchored(), Anchored::No) { + input.set_anchored(Anchored::Yes); + } + if self.get_nfa().pattern_len() == 1 { + let mut slots = [None, None]; + let pid = + self.try_search_slots(cache, &input, &mut slots).unwrap()?; + let start = slots[0].unwrap().get(); + let end = slots[1].unwrap().get(); + return Some(Match::new(pid, Span { start, end })); + } + let ginfo = self.get_nfa().group_info(); + let slots_len = ginfo.implicit_slot_len(); + let mut slots = vec![None; slots_len]; + let pid = self.try_search_slots(cache, &input, &mut slots).unwrap()?; + let start = slots[pid.as_usize() * 2].unwrap().get(); + let end = slots[pid.as_usize() * 2 + 1].unwrap().get(); + Some(Match::new(pid, Span { start, end })) + } + + /// Executes an anchored leftmost forward search and writes the spans + /// of capturing groups that participated in a match into the provided + /// [`Captures`] value. If no match was found, then [`Captures::is_match`] + /// is guaranteed to return `false`. + /// + /// The given `Input` is forcefully set to use [`Anchored::Yes`] if the + /// given configuration was [`Anchored::No`] (which is the default). + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`DFA::try_search`] if you want to handle these panics as error + /// values instead. + /// + /// # Example + /// + /// This shows a simple example of a one-pass regex that extracts + /// capturing group spans. + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match, Span}; + /// + /// let re = DFA::new( + /// // Notice that we use ASCII here. The corresponding Unicode regex + /// // is sadly not one-pass. + /// "(?P[[:alpha:]]+)[[:space:]]+(?P[[:alpha:]]+)", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match()); + /// assert_eq!(Some(Span::from(0..5)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(6..17)), caps.get_group_by_name("last")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + caps: &mut Captures, + ) { + let mut input = input.into(); + if matches!(input.get_anchored(), Anchored::No) { + input.set_anchored(Anchored::Yes); + } + self.try_search(cache, &input, caps).unwrap(); + } + + /// Executes an anchored leftmost forward search and writes the spans + /// of capturing groups that participated in a match into the provided + /// [`Captures`] value. If no match was found, then [`Captures::is_match`] + /// is guaranteed to return `false`. + /// + /// The differences with [`DFA::captures`] are: + /// + /// 1. This returns an error instead of panicking if the search fails. + /// 2. Accepts an `&Input` instead of a `Into`. This permits reusing + /// the same input for multiple searches, which _may_ be important for + /// latency. + /// 3. This does not automatically change the [`Anchored`] mode from `No` + /// to `Yes`. Instead, if [`Input::anchored`] is `Anchored::No`, then an + /// error is returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-regex that permits searching + /// for specific patterns. Note that this is somewhat less useful than + /// in other regex engines, since a one-pass DFA by definition has no + /// ambiguity about which pattern can match at a position. That is, if it + /// were possible for two different patterns to match at the same starting + /// position, then the multi-regex would not be one-pass and construction + /// would have failed. + /// + /// Nevertheless, this can still be useful if you only care about matches + /// for a specific pattern, and want the DFA to report "no match" even if + /// some other pattern would have matched. + /// + /// Note that in order to make use of this functionality, + /// [`Config::starts_for_each_pattern`] must be enabled. It is disabled + /// by default since it may result in higher memory usage. + /// + /// ``` + /// use regex_automata::{ + /// dfa::onepass::DFA, Anchored, Input, Match, PatternID, + /// }; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&["[a-z]+", "[0-9]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "123abc"; + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// + /// // A normal multi-pattern search will show pattern 1 matches. + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// // If we only want to report pattern 0 matches, then we'll get no + /// // match here. + /// let input = input.anchored(Anchored::Pattern(PatternID::must(0))); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Anchored, Input, Match}; + /// + /// // one-pass DFAs fully support Unicode word boundaries! + /// // A sad joke is that a Unicode aware regex like \w+\s is not one-pass. + /// // :-( + /// let re = DFA::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// let input = Input::new(&haystack[3..6]).anchored(Anchored::Yes); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6).anchored(Anchored::Yes); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) -> Result<(), MatchError> { + let pid = self.try_search_slots(cache, input, caps.slots_mut())?; + caps.set_pattern(pid); + Ok(()) + } + + /// Executes an anchored leftmost forward search and writes the spans + /// of capturing groups that participated in a match into the provided + /// `slots`, and returns the matching pattern ID. The contents of the + /// slots for patterns other than the matching pattern are unspecified. If + /// no match was found, then `None` is returned and the contents of all + /// `slots` is unspecified. + /// + /// This is like [`DFA::try_search`], but it accepts a raw slots slice + /// instead of a `Captures` value. This is useful in contexts where you + /// don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with + /// [`pattern_len() * 2`](crate::dfa::onepass::DFA::pattern_len) + /// slots, if you only care about the overall match spans for each matching + /// pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Anchored, Input, PatternID}; + /// + /// let re = DFA::new_many(&[ + /// r"[a-zA-Z]+", + /// r"[0-9]+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("123").anchored(Anchored::Yes); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.try_search_slots(&mut cache, &input, &mut slots)?; + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(0), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(3), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + if !utf8empty { + return self.try_search_slots_imp(cache, input, slots); + } + // See PikeVM::try_search_slots for why we do this. + let min = self.get_nfa().group_info().implicit_slot_len(); + if slots.len() >= min { + return self.try_search_slots_imp(cache, input, slots); + } + if self.get_nfa().pattern_len() == 1 { + let mut enough = [None, None]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger + // than `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + return Ok(got); + } + let mut enough = vec![None; min]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger than + // `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + Ok(got) + } + + #[inline(never)] + fn try_search_slots_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + match self.search_imp(cache, input, slots)? { + None => return Ok(None), + Some(pid) if !utf8empty => return Ok(Some(pid)), + Some(pid) => { + // These slot indices are always correct because we know our + // 'pid' is valid and thus we know that the slot indices for it + // are valid. + let slot_start = pid.as_usize().wrapping_mul(2); + let slot_end = slot_start.wrapping_add(1); + // OK because we know we have a match and we know our caller + // provided slots are big enough (which we make true above if + // the caller didn't). Namely, we're only here when 'utf8empty' + // is true, and when that's true, we require slots for every + // pattern. + let start = slots[slot_start].unwrap().get(); + let end = slots[slot_end].unwrap().get(); + // If our match splits a codepoint, then we cannot report is + // as a match. And since one-pass DFAs only support anchored + // searches, we don't try to skip ahead to find the next match. + // We can just quit with nothing. + if start == end && !input.is_char_boundary(start) { + return Ok(None); + } + Ok(Some(pid)) + } + } + } +} + +impl DFA { + fn search_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + // PERF: Some ideas. I ran out of steam after my initial impl to try + // many of these. + // + // 1) Try doing more state shuffling. Right now, all we do is push + // match states to the end of the transition table so that we can do + // 'if sid >= self.min_match_id' to know whether we're in a match + // state or not. But what about doing something like dense DFAs and + // pushing dead, match and states with captures/looks all toward the + // beginning of the transition table. Then we could do 'if sid <= + // self.max_special_id', in which case, we need to do some special + // handling of some sort. Otherwise, we get the happy path, just + // like in a DFA search. The main argument against this is that the + // one-pass DFA is likely to be used most often with capturing groups + // and if capturing groups are common, then this might wind up being a + // pessimization. + // + // 2) Consider moving 'PatternEpsilons' out of the transition table. + // It is only needed for match states and usually a small minority of + // states are match states. Therefore, we're using an extra 'u64' for + // most states. + // + // 3) I played around with the match state handling and it seems like + // there is probably a lot left on the table for improvement. The + // key tension is that the 'find_match' routine is a giant mess, but + // splitting it out into a non-inlineable function is a non-starter + // because the match state might consume input, so 'find_match' COULD + // be called quite a lot, and a function call at that point would trash + // perf. In theory, we could detect whether a match state consumes + // input and then specialize our search routine based on that. In that + // case, maybe an extra function call is OK, but even then, it might be + // too much of a latency hit. Another idea is to just try and figure + // out how to reduce the code size of 'find_match'. RE2 has a trick + // here where the match handling isn't done if we know the next byte of + // input yields a match too. Maybe we adopt that? + // + // This just might be a tricky DFA to optimize. + + if input.is_done() { + return Ok(None); + } + // We unfortunately have a bit of book-keeping to do to set things + // up. We do have to setup our cache and clear all of our slots. In + // particular, clearing the slots is necessary for the case where we + // report a match, but one of the capturing groups didn't participate + // in the match but had a span set from a previous search. That would + // be bad. In theory, we could avoid all this slot clearing if we knew + // that every slot was always activated for every match. Then we would + // know they would always be overwritten when a match is found. + let explicit_slots_len = core::cmp::min( + Slots::LIMIT, + slots.len().saturating_sub(self.explicit_slot_start), + ); + cache.setup_search(explicit_slots_len); + for slot in cache.explicit_slots() { + *slot = None; + } + for slot in slots.iter_mut() { + *slot = None; + } + // We set the starting slots for every pattern up front. This does + // increase our latency somewhat, but it avoids having to do it every + // time we see a match state (which could be many times in a single + // search if the match state consumes input). + for pid in self.nfa.patterns() { + let i = pid.as_usize() * 2; + if i >= slots.len() { + break; + } + slots[i] = NonMaxUsize::new(input.start()); + } + let mut pid = None; + let mut next_sid = match input.get_anchored() { + Anchored::Yes => self.start(), + Anchored::Pattern(pid) => self.start_pattern(pid)?, + Anchored::No => { + // If the regex is itself always anchored, then we're fine, + // even if the search is configured to be unanchored. + if !self.nfa.is_always_start_anchored() { + return Err(MatchError::unsupported_anchored( + Anchored::No, + )); + } + self.start() + } + }; + let leftmost_first = + matches!(self.config.get_match_kind(), MatchKind::LeftmostFirst); + for at in input.start()..input.end() { + let sid = next_sid; + let trans = self.transition(sid, input.haystack()[at]); + next_sid = trans.state_id(); + let epsilons = trans.epsilons(); + if sid >= self.min_match_id { + if self.find_match(cache, input, at, sid, slots, &mut pid) { + if input.get_earliest() + || (leftmost_first && trans.match_wins()) + { + return Ok(pid); + } + } + } + if sid == DEAD + || (!epsilons.looks().is_empty() + && !self.nfa.look_matcher().matches_set_inline( + epsilons.looks(), + input.haystack(), + at, + )) + { + return Ok(pid); + } + epsilons.slots().apply(at, cache.explicit_slots()); + } + if next_sid >= self.min_match_id { + self.find_match( + cache, + input, + input.end(), + next_sid, + slots, + &mut pid, + ); + } + Ok(pid) + } + + /// Assumes 'sid' is a match state and looks for whether a match can + /// be reported. If so, appropriate offsets are written to 'slots' and + /// 'matched_pid' is set to the matching pattern ID. + /// + /// Even when 'sid' is a match state, it's possible that a match won't + /// be reported. For example, when the conditional epsilon transitions + /// leading to the match state aren't satisfied at the given position in + /// the haystack. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn find_match( + &self, + cache: &mut Cache, + input: &Input<'_>, + at: usize, + sid: StateID, + slots: &mut [Option], + matched_pid: &mut Option, + ) -> bool { + debug_assert!(sid >= self.min_match_id); + let pateps = self.pattern_epsilons(sid); + let epsilons = pateps.epsilons(); + if !epsilons.looks().is_empty() + && !self.nfa.look_matcher().matches_set_inline( + epsilons.looks(), + input.haystack(), + at, + ) + { + return false; + } + let pid = pateps.pattern_id_unchecked(); + // This calculation is always correct because we know our 'pid' is + // valid and thus we know that the slot indices for it are valid. + let slot_end = pid.as_usize().wrapping_mul(2).wrapping_add(1); + // Set the implicit 'end' slot for the matching pattern. (The 'start' + // slot was set at the beginning of the search.) + if slot_end < slots.len() { + slots[slot_end] = NonMaxUsize::new(at); + } + // If the caller provided enough room, copy the previously recorded + // explicit slots from our scratch space to the caller provided slots. + // We *also* need to set any explicit slots that are active as part of + // the path to the match state. + if self.explicit_slot_start < slots.len() { + // NOTE: The 'cache.explicit_slots()' slice is setup at the + // beginning of every search such that it is guaranteed to return a + // slice of length equivalent to 'slots[explicit_slot_start..]'. + slots[self.explicit_slot_start..] + .copy_from_slice(cache.explicit_slots()); + epsilons.slots().apply(at, &mut slots[self.explicit_slot_start..]); + } + *matched_pid = Some(pid); + true + } +} + +impl DFA { + /// Returns the anchored start state for matching any pattern in this DFA. + fn start(&self) -> StateID { + self.starts[0] + } + + /// Returns the anchored start state for matching the given pattern. If + /// 'starts_for_each_pattern' + /// was not enabled, then this returns an error. If the given pattern is + /// not in this DFA, then `Ok(None)` is returned. + fn start_pattern(&self, pid: PatternID) -> Result { + if !self.config.get_starts_for_each_pattern() { + return Err(MatchError::unsupported_anchored(Anchored::Pattern( + pid, + ))); + } + // 'starts' always has non-zero length. The first entry is always the + // anchored starting state for all patterns, and the following entries + // are optional and correspond to the anchored starting states for + // patterns at pid+1. Thus, starts.len()-1 corresponds to the total + // number of patterns that one can explicitly search for. (And it may + // be zero.) + Ok(self.starts.get(pid.one_more()).copied().unwrap_or(DEAD)) + } + + /// Returns the transition from the given state ID and byte of input. The + /// transition includes the next state ID, the slots that should be saved + /// and any conditional epsilon transitions that must be satisfied in order + /// to take this transition. + fn transition(&self, sid: StateID, byte: u8) -> Transition { + let offset = sid.as_usize() << self.stride2(); + let class = self.classes.get(byte).as_usize(); + self.table[offset + class] + } + + /// Set the transition from the given state ID and byte of input to the + /// transition given. + fn set_transition(&mut self, sid: StateID, byte: u8, to: Transition) { + let offset = sid.as_usize() << self.stride2(); + let class = self.classes.get(byte).as_usize(); + self.table[offset + class] = to; + } + + /// Return an iterator of "sparse" transitions for the given state ID. + /// "sparse" in this context means that consecutive transitions that are + /// equivalent are returned as one group, and transitions to the DEAD state + /// are ignored. + /// + /// This winds up being useful for debug printing, since it's much terser + /// to display runs of equivalent transitions than the transition for every + /// possible byte value. Indeed, in practice, it's very common for runs + /// of equivalent transitions to appear. + fn sparse_transitions(&self, sid: StateID) -> SparseTransitionIter<'_> { + let start = sid.as_usize() << self.stride2(); + let end = start + self.alphabet_len(); + SparseTransitionIter { + it: self.table[start..end].iter().enumerate(), + cur: None, + } + } + + /// Return the pattern epsilons for the given state ID. + /// + /// If the given state ID does not correspond to a match state ID, then the + /// pattern epsilons returned is empty. + fn pattern_epsilons(&self, sid: StateID) -> PatternEpsilons { + let offset = sid.as_usize() << self.stride2(); + PatternEpsilons(self.table[offset + self.pateps_offset].0) + } + + /// Set the pattern epsilons for the given state ID. + fn set_pattern_epsilons(&mut self, sid: StateID, pateps: PatternEpsilons) { + let offset = sid.as_usize() << self.stride2(); + self.table[offset + self.pateps_offset] = Transition(pateps.0); + } + + /// Returns the state ID prior to the one given. This returns None if the + /// given ID is the first DFA state. + fn prev_state_id(&self, id: StateID) -> Option { + if id == DEAD { + None + } else { + // CORRECTNESS: Since 'id' is not the first state, subtracting 1 + // is always valid. + Some(StateID::new_unchecked(id.as_usize().checked_sub(1).unwrap())) + } + } + + /// Returns the state ID of the last state in this DFA's transition table. + /// "last" in this context means the last state to appear in memory, i.e., + /// the one with the greatest ID. + fn last_state_id(&self) -> StateID { + // CORRECTNESS: A DFA table is always non-empty since it always at + // least contains a DEAD state. Since every state has the same stride, + // we can just compute what the "next" state ID would have been and + // then subtract 1 from it. + StateID::new_unchecked( + (self.table.len() >> self.stride2()).checked_sub(1).unwrap(), + ) + } + + /// Move the transitions from 'id1' to 'id2' and vice versa. + /// + /// WARNING: This does not update the rest of the transition table to have + /// transitions to 'id1' changed to 'id2' and vice versa. This merely moves + /// the states in memory. + pub(super) fn swap_states(&mut self, id1: StateID, id2: StateID) { + let o1 = id1.as_usize() << self.stride2(); + let o2 = id2.as_usize() << self.stride2(); + for b in 0..self.stride() { + self.table.swap(o1 + b, o2 + b); + } + } + + /// Map all state IDs in this DFA (transition table + start states) + /// according to the closure given. + pub(super) fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + for i in 0..self.state_len() { + let offset = i << self.stride2(); + for b in 0..self.alphabet_len() { + let next = self.table[offset + b].state_id(); + self.table[offset + b].set_state_id(map(next)); + } + } + for i in 0..self.starts.len() { + self.starts[i] = map(self.starts[i]); + } + } +} + +impl core::fmt::Debug for DFA { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + fn debug_state_transitions( + f: &mut core::fmt::Formatter, + dfa: &DFA, + sid: StateID, + ) -> core::fmt::Result { + for (i, (start, end, trans)) in + dfa.sparse_transitions(sid).enumerate() + { + let next = trans.state_id(); + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!( + f, + "{:?} => {:?}", + DebugByte(start), + next.as_usize(), + )?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + next.as_usize(), + )?; + } + if trans.match_wins() { + write!(f, " (MW)")?; + } + if !trans.epsilons().is_empty() { + write!(f, " ({:?})", trans.epsilons())?; + } + } + Ok(()) + } + + writeln!(f, "onepass::DFA(")?; + for index in 0..self.state_len() { + let sid = StateID::must(index); + let pateps = self.pattern_epsilons(sid); + if sid == DEAD { + write!(f, "D ")?; + } else if pateps.pattern_id().is_some() { + write!(f, "* ")?; + } else { + write!(f, " ")?; + } + write!(f, "{:06?}", sid.as_usize())?; + if !pateps.is_empty() { + write!(f, " ({:?})", pateps)?; + } + write!(f, ": ")?; + debug_state_transitions(f, self, sid)?; + write!(f, "\n")?; + } + writeln!(f, "")?; + for (i, &sid) in self.starts.iter().enumerate() { + if i == 0 { + writeln!(f, "START(ALL): {:?}", sid.as_usize())?; + } else { + writeln!( + f, + "START(pattern: {:?}): {:?}", + i - 1, + sid.as_usize(), + )?; + } + } + writeln!(f, "state length: {:?}", self.state_len())?; + writeln!(f, "pattern length: {:?}", self.pattern_len())?; + writeln!(f, ")")?; + Ok(()) + } +} + +/// An iterator over groups of consecutive equivalent transitions in a single +/// state. +#[derive(Debug)] +struct SparseTransitionIter<'a> { + it: core::iter::Enumerate>, + cur: Option<(u8, u8, Transition)>, +} + +impl<'a> Iterator for SparseTransitionIter<'a> { + type Item = (u8, u8, Transition); + + fn next(&mut self) -> Option<(u8, u8, Transition)> { + while let Some((b, &trans)) = self.it.next() { + // Fine because we'll never have more than u8::MAX transitions in + // one state. + let b = b.as_u8(); + let (prev_start, prev_end, prev_trans) = match self.cur { + Some(t) => t, + None => { + self.cur = Some((b, b, trans)); + continue; + } + }; + if prev_trans == trans { + self.cur = Some((prev_start, b, prev_trans)); + } else { + self.cur = Some((b, b, trans)); + if prev_trans.state_id() != DEAD { + return Some((prev_start, prev_end, prev_trans)); + } + } + } + if let Some((start, end, trans)) = self.cur.take() { + if trans.state_id() != DEAD { + return Some((start, end, trans)); + } + } + None + } +} + +/// A cache represents mutable state that a one-pass [`DFA`] requires during a +/// search. +/// +/// For a given one-pass DFA, its corresponding cache may be created either via +/// [`DFA::create_cache`], or via [`Cache::new`]. They are equivalent in every +/// way, except the former does not require explicitly importing `Cache`. +/// +/// A particular `Cache` is coupled with the one-pass DFA from which it was +/// created. It may only be used with that one-pass DFA. A cache and its +/// allocations may be re-purposed via [`Cache::reset`], in which case, it can +/// only be used with the new one-pass DFA (and not the old one). +#[derive(Clone, Debug)] +pub struct Cache { + /// Scratch space used to store slots during a search. Basically, we use + /// the caller provided slots to store slots known when a match occurs. + /// But after a match occurs, we might continue a search but ultimately + /// fail to extend the match. When continuing the search, we need some + /// place to store candidate capture offsets without overwriting the slot + /// offsets recorded for the most recently seen match. + explicit_slots: Vec>, + /// The number of slots in the caller-provided 'Captures' value for the + /// current search. This is always at most 'explicit_slots.len()', but + /// might be less than it, if the caller provided fewer slots to fill. + explicit_slot_len: usize, +} + +impl Cache { + /// Create a new [`onepass::DFA`](DFA) cache. + /// + /// A potentially more convenient routine to create a cache is + /// [`DFA::create_cache`], as it does not require also importing the + /// `Cache` type. + /// + /// If you want to reuse the returned `Cache` with some other one-pass DFA, + /// then you must call [`Cache::reset`] with the desired one-pass DFA. + pub fn new(re: &DFA) -> Cache { + let mut cache = Cache { explicit_slots: vec![], explicit_slot_len: 0 }; + cache.reset(re); + cache + } + + /// Reset this cache such that it can be used for searching with a + /// different [`onepass::DFA`](DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different one-pass DFA. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different one-pass + /// DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re1 = DFA::new(r"\w")?; + /// let re2 = DFA::new(r"\W")?; + /// let mut caps1 = re1.create_captures(); + /// let mut caps2 = re2.create_captures(); + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// { re1.captures(&mut cache, "Δ", &mut caps1); caps1.get_match() }, + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the one-pass DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// { re2.captures(&mut cache, "☃", &mut caps2); caps2.get_match() }, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &DFA) { + let explicit_slot_len = re.get_nfa().group_info().explicit_slot_len(); + self.explicit_slots.resize(explicit_slot_len, None); + self.explicit_slot_len = explicit_slot_len; + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.explicit_slots.len() * core::mem::size_of::>() + } + + fn explicit_slots(&mut self) -> &mut [Option] { + &mut self.explicit_slots[..self.explicit_slot_len] + } + + fn setup_search(&mut self, explicit_slot_len: usize) { + self.explicit_slot_len = explicit_slot_len; + } +} + +/// Represents a single transition in a one-pass DFA. +/// +/// The high 21 bits corresponds to the state ID. The bit following corresponds +/// to the special "match wins" flag. The remaining low 42 bits corresponds to +/// the transition epsilons, which contains the slots that should be saved when +/// this transition is followed and the conditional epsilon transitions that +/// must be satisfied in order to follow this transition. +#[derive(Clone, Copy, Eq, PartialEq)] +struct Transition(u64); + +impl Transition { + const STATE_ID_BITS: u64 = 21; + const STATE_ID_SHIFT: u64 = 64 - Transition::STATE_ID_BITS; + const STATE_ID_LIMIT: u64 = 1 << Transition::STATE_ID_BITS; + const MATCH_WINS_SHIFT: u64 = 64 - (Transition::STATE_ID_BITS + 1); + const INFO_MASK: u64 = 0x000003FF_FFFFFFFF; + + /// Return a new transition to the given state ID with the given epsilons. + fn new(match_wins: bool, sid: StateID, epsilons: Epsilons) -> Transition { + let match_wins = + if match_wins { 1 << Transition::MATCH_WINS_SHIFT } else { 0 }; + let sid = sid.as_u64() << Transition::STATE_ID_SHIFT; + Transition(sid | match_wins | epsilons.0) + } + + /// Returns true if and only if this transition points to the DEAD state. + fn is_dead(self) -> bool { + self.state_id() == DEAD + } + + /// Return whether this transition has a "match wins" property. + /// + /// When a transition has this property, it means that if a match has been + /// found and the search uses leftmost-first semantics, then that match + /// should be returned immediately instead of continuing on. + /// + /// The "match wins" name comes from RE2, which uses a pretty much + /// identical mechanism for implementing leftmost-first semantics. + fn match_wins(&self) -> bool { + (self.0 >> Transition::MATCH_WINS_SHIFT & 1) == 1 + } + + /// Return the "next" state ID that this transition points to. + fn state_id(&self) -> StateID { + // OK because a Transition has a valid StateID in its upper bits by + // construction. The cast to usize is also correct, even on 16-bit + // targets because, again, we know the upper bits is a valid StateID, + // which can never overflow usize on any supported target. + StateID::new_unchecked( + (self.0 >> Transition::STATE_ID_SHIFT).as_usize(), + ) + } + + /// Set the "next" state ID in this transition. + fn set_state_id(&mut self, sid: StateID) { + *self = Transition::new(self.match_wins(), sid, self.epsilons()); + } + + /// Return the epsilons embedded in this transition. + fn epsilons(&self) -> Epsilons { + Epsilons(self.0 & Transition::INFO_MASK) + } +} + +impl core::fmt::Debug for Transition { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_dead() { + return write!(f, "0"); + } + write!(f, "{}", self.state_id().as_usize())?; + if self.match_wins() { + write!(f, "-MW")?; + } + if !self.epsilons().is_empty() { + write!(f, "-{:?}", self.epsilons())?; + } + Ok(()) + } +} + +/// A representation of a match state's pattern ID along with the epsilons for +/// when a match occurs. +/// +/// A match state in a one-pass DFA, unlike in a more general DFA, has exactly +/// one pattern ID. If it had more, then the original NFA would not have been +/// one-pass. +/// +/// The "epsilons" part of this corresponds to what was found in the epsilon +/// transitions between the transition taken in the last byte of input and the +/// ultimate match state. This might include saving slots and/or conditional +/// epsilon transitions that must be satisfied before one can report the match. +/// +/// Technically, every state has room for a 'PatternEpsilons', but it is only +/// ever non-empty for match states. +#[derive(Clone, Copy)] +struct PatternEpsilons(u64); + +impl PatternEpsilons { + const PATTERN_ID_BITS: u64 = 22; + const PATTERN_ID_SHIFT: u64 = 64 - PatternEpsilons::PATTERN_ID_BITS; + // A sentinel value indicating that this is not a match state. We don't + // use 0 since 0 is a valid pattern ID. + const PATTERN_ID_NONE: u64 = 0x00000000_003FFFFF; + const PATTERN_ID_LIMIT: u64 = PatternEpsilons::PATTERN_ID_NONE; + const PATTERN_ID_MASK: u64 = 0xFFFFFC00_00000000; + const EPSILONS_MASK: u64 = 0x000003FF_FFFFFFFF; + + /// Return a new empty pattern epsilons that has no pattern ID and has no + /// epsilons. This is suitable for non-match states. + fn empty() -> PatternEpsilons { + PatternEpsilons( + PatternEpsilons::PATTERN_ID_NONE + << PatternEpsilons::PATTERN_ID_SHIFT, + ) + } + + /// Whether this pattern epsilons is empty or not. It's empty when it has + /// no pattern ID and an empty epsilons. + fn is_empty(self) -> bool { + self.pattern_id().is_none() && self.epsilons().is_empty() + } + + /// Return the pattern ID in this pattern epsilons if one exists. + fn pattern_id(self) -> Option { + let pid = self.0 >> PatternEpsilons::PATTERN_ID_SHIFT; + if pid == PatternEpsilons::PATTERN_ID_LIMIT { + None + } else { + Some(PatternID::new_unchecked(pid.as_usize())) + } + } + + /// Returns the pattern ID without checking whether it's valid. If this is + /// called and there is no pattern ID in this `PatternEpsilons`, then this + /// will likely produce an incorrect result or possibly even a panic or + /// an overflow. But safety will not be violated. + /// + /// This is useful when you know a particular state is a match state. If + /// it's a match state, then it must have a pattern ID. + fn pattern_id_unchecked(self) -> PatternID { + let pid = self.0 >> PatternEpsilons::PATTERN_ID_SHIFT; + PatternID::new_unchecked(pid.as_usize()) + } + + /// Return a new pattern epsilons with the given pattern ID, but the same + /// epsilons. + fn set_pattern_id(self, pid: PatternID) -> PatternEpsilons { + PatternEpsilons( + (pid.as_u64() << PatternEpsilons::PATTERN_ID_SHIFT) + | (self.0 & PatternEpsilons::EPSILONS_MASK), + ) + } + + /// Return the epsilons part of this pattern epsilons. + fn epsilons(self) -> Epsilons { + Epsilons(self.0 & PatternEpsilons::EPSILONS_MASK) + } + + /// Return a new pattern epsilons with the given epsilons, but the same + /// pattern ID. + fn set_epsilons(self, epsilons: Epsilons) -> PatternEpsilons { + PatternEpsilons( + (self.0 & PatternEpsilons::PATTERN_ID_MASK) + | (u64::from(epsilons.0) & PatternEpsilons::EPSILONS_MASK), + ) + } +} + +impl core::fmt::Debug for PatternEpsilons { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_empty() { + return write!(f, "N/A"); + } + if let Some(pid) = self.pattern_id() { + write!(f, "{}", pid.as_usize())?; + } + if !self.epsilons().is_empty() { + if self.pattern_id().is_some() { + write!(f, "/")?; + } + write!(f, "{:?}", self.epsilons())?; + } + Ok(()) + } +} + +/// Epsilons represents all of the NFA epsilons transitions that went into a +/// single transition in a single DFA state. In this case, it only represents +/// the epsilon transitions that have some kind of non-consuming side effect: +/// either the transition requires storing the current position of the search +/// into a slot, or the transition is conditional and requires the current +/// position in the input to satisfy an assertion before the transition may be +/// taken. +/// +/// This folds the cumulative effect of a group of NFA states (all connected +/// by epsilon transitions) down into a single set of bits. While these bits +/// can represent all possible conditional epsilon transitions, it only permits +/// storing up to a somewhat small number of slots. +/// +/// Epsilons is represented as a 42-bit integer. For example, it is packed into +/// the lower 42 bits of a `Transition`. (Where the high 22 bits contains a +/// `StateID` and a special "match wins" property.) +#[derive(Clone, Copy)] +struct Epsilons(u64); + +impl Epsilons { + const SLOT_MASK: u64 = 0x000003FF_FFFFFC00; + const SLOT_SHIFT: u64 = 10; + const LOOK_MASK: u64 = 0x00000000_000003FF; + + /// Create a new empty epsilons. It has no slots and no assertions that + /// need to be satisfied. + fn empty() -> Epsilons { + Epsilons(0) + } + + /// Returns true if this epsilons contains no slots and no assertions. + fn is_empty(self) -> bool { + self.0 == 0 + } + + /// Returns the slot epsilon transitions. + fn slots(self) -> Slots { + Slots((self.0 >> Epsilons::SLOT_SHIFT).low_u32()) + } + + /// Set the slot epsilon transitions. + fn set_slots(self, slots: Slots) -> Epsilons { + Epsilons( + (u64::from(slots.0) << Epsilons::SLOT_SHIFT) + | (self.0 & Epsilons::LOOK_MASK), + ) + } + + /// Return the set of look-around assertions in these epsilon transitions. + fn looks(self) -> LookSet { + LookSet { bits: (self.0 & Epsilons::LOOK_MASK).low_u32() } + } + + /// Set the look-around assertions on these epsilon transitions. + fn set_looks(self, look_set: LookSet) -> Epsilons { + Epsilons( + (self.0 & Epsilons::SLOT_MASK) + | (u64::from(look_set.bits) & Epsilons::LOOK_MASK), + ) + } +} + +impl core::fmt::Debug for Epsilons { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut wrote = false; + if !self.slots().is_empty() { + write!(f, "{:?}", self.slots())?; + wrote = true; + } + if !self.looks().is_empty() { + if wrote { + write!(f, "/")?; + } + write!(f, "{:?}", self.looks())?; + wrote = true; + } + if !wrote { + write!(f, "N/A")?; + } + Ok(()) + } +} + +/// The set of epsilon transitions indicating that the current position in a +/// search should be saved to a slot. +/// +/// This *only* represents explicit slots. So for example, the pattern +/// `[a-z]+([0-9]+)([a-z]+)` has: +/// +/// * 3 capturing groups, thus 6 slots. +/// * 1 implicit capturing group, thus 2 implicit slots. +/// * 2 explicit capturing groups, thus 4 explicit slots. +/// +/// While implicit slots are represented by epsilon transitions in an NFA, we +/// do not explicitly represent them here. Instead, implicit slots are assumed +/// to be present and handled automatically in the search code. Therefore, +/// that means we only need to represent explicit slots in our epsilon +/// transitions. +/// +/// Its representation is a bit set. The bit 'i' is set if and only if there +/// exists an explicit slot at index 'c', where 'c = (#patterns * 2) + i'. That +/// is, the bit 'i' corresponds to the first explicit slot and the first +/// explicit slot appears immediately following the last implicit slot. (If +/// this is confusing, see `GroupInfo` for more details on how slots works.) +/// +/// A single `Slots` represents all the active slots in a sub-graph of an NFA, +/// where all the states are connected by epsilon transitions. In effect, when +/// traversing the one-pass DFA during a search, all slots set in a particular +/// transition must be captured by recording the current search position. +/// +/// The API of `Slots` requires the caller to handle the explicit slot offset. +/// That is, a `Slots` doesn't know where the explicit slots start for a +/// particular NFA. Thus, if the callers see's the bit 'i' is set, then they +/// need to do the arithmetic above to find 'c', which is the real actual slot +/// index in the corresponding NFA. +#[derive(Clone, Copy)] +struct Slots(u32); + +impl Slots { + const LIMIT: usize = 32; + + /// Insert the slot at the given bit index. + fn insert(self, slot: usize) -> Slots { + debug_assert!(slot < Slots::LIMIT); + Slots(self.0 | (1 << slot.as_u32())) + } + + /// Remove the slot at the given bit index. + fn remove(self, slot: usize) -> Slots { + debug_assert!(slot < Slots::LIMIT); + Slots(self.0 & !(1 << slot.as_u32())) + } + + /// Returns true if and only if this set contains no slots. + fn is_empty(self) -> bool { + self.0 == 0 + } + + /// Returns an iterator over all of the set bits in this set. + fn iter(self) -> SlotsIter { + SlotsIter { slots: self } + } + + /// For the position `at` in the current haystack, copy it to + /// `caller_explicit_slots` for all slots that are in this set. + /// + /// Callers may pass a slice of any length. Slots in this set bigger than + /// the length of the given explicit slots are simply skipped. + /// + /// The slice *must* correspond only to the explicit slots and the first + /// element of the slice must always correspond to the first explicit slot + /// in the corresponding NFA. + fn apply( + self, + at: usize, + caller_explicit_slots: &mut [Option], + ) { + if self.is_empty() { + return; + } + let at = NonMaxUsize::new(at); + for slot in self.iter() { + if slot >= caller_explicit_slots.len() { + break; + } + caller_explicit_slots[slot] = at; + } + } +} + +impl core::fmt::Debug for Slots { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "S")?; + for slot in self.iter() { + write!(f, "-{:?}", slot)?; + } + Ok(()) + } +} + +/// An iterator over all of the bits set in a slot set. +/// +/// This returns the bit index that is set, so callers may need to offset it +/// to get the actual NFA slot index. +#[derive(Debug)] +struct SlotsIter { + slots: Slots, +} + +impl Iterator for SlotsIter { + type Item = usize; + + fn next(&mut self) -> Option { + // Number of zeroes here is always <= u8::MAX, and so fits in a usize. + let slot = self.slots.0.trailing_zeros().as_usize(); + if slot >= Slots::LIMIT { + return None; + } + self.slots = self.slots.remove(slot); + Some(slot) + } +} + +/// An error that occurred during the construction of a one-pass DFA. +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying [`thompson::BuildError`] type from its `source` +/// method via the `std::error::Error` trait. This error only occurs when using +/// convenience routines for building a one-pass DFA directly from a pattern +/// string. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +/// The kind of error that occurred during the construction of a one-pass DFA. +#[derive(Clone, Debug)] +enum BuildErrorKind { + NFA(crate::nfa::thompson::BuildError), + Word(UnicodeWordBoundaryError), + TooManyStates { limit: u64 }, + TooManyPatterns { limit: u64 }, + UnsupportedLook { look: Look }, + ExceededSizeLimit { limit: usize }, + NotOnePass { msg: &'static str }, +} + +impl BuildError { + fn nfa(err: crate::nfa::thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } + + fn word(err: UnicodeWordBoundaryError) -> BuildError { + BuildError { kind: BuildErrorKind::Word(err) } + } + + fn too_many_states(limit: u64) -> BuildError { + BuildError { kind: BuildErrorKind::TooManyStates { limit } } + } + + fn too_many_patterns(limit: u64) -> BuildError { + BuildError { kind: BuildErrorKind::TooManyPatterns { limit } } + } + + fn unsupported_look(look: Look) -> BuildError { + BuildError { kind: BuildErrorKind::UnsupportedLook { look } } + } + + fn exceeded_size_limit(limit: usize) -> BuildError { + BuildError { kind: BuildErrorKind::ExceededSizeLimit { limit } } + } + + fn not_one_pass(msg: &'static str) -> BuildError { + BuildError { kind: BuildErrorKind::NotOnePass { msg } } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use self::BuildErrorKind::*; + + match self.kind { + NFA(ref err) => Some(err), + Word(ref err) => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use self::BuildErrorKind::*; + + match self.kind { + NFA(_) => write!(f, "error building NFA"), + Word(_) => write!(f, "NFA contains Unicode word boundary"), + TooManyStates { limit } => write!( + f, + "one-pass DFA exceeded a limit of {:?} for number of states", + limit, + ), + TooManyPatterns { limit } => write!( + f, + "one-pass DFA exceeded a limit of {:?} for number of patterns", + limit, + ), + UnsupportedLook { look } => write!( + f, + "one-pass DFA does not support the {:?} assertion", + look, + ), + ExceededSizeLimit { limit } => write!( + f, + "one-pass DFA exceeded size limit of {:?} during building", + limit, + ), + NotOnePass { msg } => write!( + f, + "one-pass DFA could not be built because \ + pattern is not one-pass: {}", + msg, + ), + } + } +} + +#[cfg(all(test, feature = "syntax"))] +mod tests { + use alloc::string::ToString; + + use super::*; + + #[test] + fn fail_conflicting_transition() { + let predicate = |err: &str| err.contains("conflicting transition"); + + let err = DFA::new(r"a*[ab]").unwrap_err().to_string(); + assert!(predicate(&err), "{}", err); + } + + #[test] + fn fail_multiple_epsilon() { + let predicate = |err: &str| { + err.contains("multiple epsilon transitions to same state") + }; + + let err = DFA::new(r"(^|$)a").unwrap_err().to_string(); + assert!(predicate(&err), "{}", err); + } + + #[test] + fn fail_multiple_match() { + let predicate = |err: &str| { + err.contains("multiple epsilon transitions to match state") + }; + + let err = DFA::new_many(&[r"^", r"$"]).unwrap_err().to_string(); + assert!(predicate(&err), "{}", err); + } + + // This test is meant to build a one-pass regex with the maximum number of + // possible slots. + // + // NOTE: Remember that the slot limit only applies to explicit capturing + // groups. Any number of implicit capturing groups is supported (up to the + // maximum number of supported patterns), since implicit groups are handled + // by the search loop itself. + #[test] + fn max_slots() { + // One too many... + let pat = r"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)(o)(p)(q)"; + assert!(DFA::new(pat).is_err()); + // Just right. + let pat = r"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)(o)(p)"; + assert!(DFA::new(pat).is_ok()); + } + + // This test ensures that the one-pass DFA works with all look-around + // assertions that we expect it to work with. + // + // The utility of this test is that each one-pass transition has a small + // amount of space to store look-around assertions. Currently, there is + // logic in the one-pass constructor to ensure there aren't more than ten + // possible assertions. And indeed, there are only ten possible assertions + // (at time of writing), so this is okay. But conceivably, more assertions + // could be added. So we check that things at least work with what we + // expect them to work with. + #[test] + fn assertions() { + // haystack anchors + assert!(DFA::new(r"^").is_ok()); + assert!(DFA::new(r"$").is_ok()); + + // line anchors + assert!(DFA::new(r"(?m)^").is_ok()); + assert!(DFA::new(r"(?m)$").is_ok()); + assert!(DFA::new(r"(?Rm)^").is_ok()); + assert!(DFA::new(r"(?Rm)$").is_ok()); + + // word boundaries + if cfg!(feature = "unicode-word-boundary") { + assert!(DFA::new(r"\b").is_ok()); + assert!(DFA::new(r"\B").is_ok()); + } + assert!(DFA::new(r"(?-u)\b").is_ok()); + assert!(DFA::new(r"(?-u)\B").is_ok()); + } + + #[cfg(not(miri))] // takes too long on miri + #[test] + fn is_one_pass() { + use crate::util::syntax; + + assert!(DFA::new(r"a*b").is_ok()); + if cfg!(feature = "unicode-perl") { + assert!(DFA::new(r"\w").is_ok()); + } + assert!(DFA::new(r"(?-u)\w*\s").is_ok()); + assert!(DFA::new(r"(?s:.)*?").is_ok()); + assert!(DFA::builder() + .syntax(syntax::Config::new().utf8(false)) + .build(r"(?s-u:.)*?") + .is_ok()); + } + + #[test] + fn is_not_one_pass() { + assert!(DFA::new(r"a*a").is_err()); + assert!(DFA::new(r"(?s-u:.)*?").is_err()); + assert!(DFA::new(r"(?s:.)*?a").is_err()); + } + + #[cfg(not(miri))] + #[test] + fn is_not_one_pass_bigger() { + assert!(DFA::new(r"\w*\s").is_err()); + } +} diff --git a/vendor/regex-automata/src/dfa/regex.rs b/vendor/regex-automata/src/dfa/regex.rs new file mode 100644 index 0000000..5e7e6e3 --- /dev/null +++ b/vendor/regex-automata/src/dfa/regex.rs @@ -0,0 +1,871 @@ +/*! +A DFA-backed `Regex`. + +This module provides [`Regex`], which is defined generically over the +[`Automaton`] trait. A `Regex` implements convenience routines you might have +come to expect, such as finding the start/end of a match and iterating over +all non-overlapping matches. This `Regex` type is limited in its capabilities +to what a DFA can provide. Therefore, APIs involving capturing groups, for +example, are not provided. + +Internally, a `Regex` is composed of two DFAs. One is a "forward" DFA that +finds the end offset of a match, where as the other is a "reverse" DFA that +find the start offset of a match. + +See the [parent module](crate::dfa) for examples. +*/ + +#[cfg(feature = "alloc")] +use alloc::vec::Vec; + +#[cfg(feature = "dfa-build")] +use crate::dfa::dense::BuildError; +use crate::{ + dfa::{automaton::Automaton, dense}, + util::{iter, search::Input}, + Anchored, Match, MatchError, +}; +#[cfg(feature = "alloc")] +use crate::{ + dfa::{sparse, StartKind}, + util::search::MatchKind, +}; + +// When the alloc feature is enabled, the regex type sets its A type parameter +// to default to an owned dense DFA. But without alloc, we set no default. This +// makes things a lot more convenient in the common case, since writing out the +// DFA types is pretty annoying. +// +// Since we have two different definitions but only want to write one doc +// string, we use a macro to capture the doc and other attributes once and then +// repeat them for each definition. +macro_rules! define_regex_type { + ($(#[$doc:meta])*) => { + #[cfg(feature = "alloc")] + $(#[$doc])* + pub struct Regex { + forward: A, + reverse: A, + } + + #[cfg(not(feature = "alloc"))] + $(#[$doc])* + pub struct Regex { + forward: A, + reverse: A, + } + }; +} + +define_regex_type!( + /// A regular expression that uses deterministic finite automata for fast + /// searching. + /// + /// A regular expression is comprised of two DFAs, a "forward" DFA and a + /// "reverse" DFA. The forward DFA is responsible for detecting the end of + /// a match while the reverse DFA is responsible for detecting the start + /// of a match. Thus, in order to find the bounds of any given match, a + /// forward search must first be run followed by a reverse search. A match + /// found by the forward DFA guarantees that the reverse DFA will also find + /// a match. + /// + /// The type of the DFA used by a `Regex` corresponds to the `A` type + /// parameter, which must satisfy the [`Automaton`] trait. Typically, + /// `A` is either a [`dense::DFA`](crate::dfa::dense::DFA) or a + /// [`sparse::DFA`](crate::dfa::sparse::DFA), where dense DFAs use more + /// memory but search faster, while sparse DFAs use less memory but search + /// more slowly. + /// + /// # Crate features + /// + /// Note that despite what the documentation auto-generates, the _only_ + /// crate feature needed to use this type is `dfa-search`. You do _not_ + /// need to enable the `alloc` feature. + /// + /// By default, a regex's automaton type parameter is set to + /// `dense::DFA>` when the `alloc` feature is enabled. For most + /// in-memory work loads, this is the most convenient type that gives the + /// best search performance. When the `alloc` feature is disabled, no + /// default type is used. + /// + /// # When should I use this? + /// + /// Generally speaking, if you can afford the overhead of building a full + /// DFA for your regex, and you don't need things like capturing groups, + /// then this is a good choice if you're looking to optimize for matching + /// speed. Note however that its speed may be worse than a general purpose + /// regex engine if you don't provide a [`dense::Config::prefilter`] to the + /// underlying DFA. + /// + /// # Sparse DFAs + /// + /// Since a `Regex` is generic over the [`Automaton`] trait, it can be + /// used with any kind of DFA. While this crate constructs dense DFAs by + /// default, it is easy enough to build corresponding sparse DFAs, and then + /// build a regex from them: + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// // First, build a regex that uses dense DFAs. + /// let dense_re = Regex::new("foo[0-9]+")?; + /// + /// // Second, build sparse DFAs from the forward and reverse dense DFAs. + /// let fwd = dense_re.forward().to_sparse()?; + /// let rev = dense_re.reverse().to_sparse()?; + /// + /// // Third, build a new regex from the constituent sparse DFAs. + /// let sparse_re = Regex::builder().build_from_dfas(fwd, rev); + /// + /// // A regex that uses sparse DFAs can be used just like with dense DFAs. + /// assert_eq!(true, sparse_re.is_match(b"foo123")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Alternatively, one can use a [`Builder`] to construct a sparse DFA + /// more succinctly. (Note though that dense DFAs are still constructed + /// first internally, and then converted to sparse DFAs, as in the example + /// above.) + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let sparse_re = Regex::builder().build_sparse(r"foo[0-9]+")?; + /// // A regex that uses sparse DFAs can be used just like with dense DFAs. + /// assert!(sparse_re.is_match(b"foo123")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Fallibility + /// + /// Most of the search routines defined on this type will _panic_ when the + /// underlying search fails. This might be because the DFA gave up because + /// it saw a quit byte, whether configured explicitly or via heuristic + /// Unicode word boundary support, although neither are enabled by default. + /// Or it might fail because an invalid `Input` configuration is given, + /// for example, with an unsupported [`Anchored`] mode. + /// + /// If you need to handle these error cases instead of allowing them to + /// trigger a panic, then the lower level [`Regex::try_search`] provides + /// a fallible API that never panics. + /// + /// # Example + /// + /// This example shows how to cause a search to terminate if it sees a + /// `\n` byte, and handle the error returned. This could be useful if, for + /// example, you wanted to prevent a user supplied pattern from matching + /// across a line boundary. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{self, regex::Regex}, Input, MatchError}; + /// + /// let re = Regex::builder() + /// .dense(dfa::dense::Config::new().quit(b'\n', true)) + /// .build(r"foo\p{any}+bar")?; + /// + /// let input = Input::new("foo\nbar"); + /// // Normally this would produce a match, since \p{any} contains '\n'. + /// // But since we instructed the automaton to enter a quit state if a + /// // '\n' is observed, this produces a match error instead. + /// let expected = MatchError::quit(b'\n', 3); + /// let got = re.try_search(&input).unwrap_err(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[derive(Clone, Debug)] +); + +#[cfg(all(feature = "syntax", feature = "dfa-build"))] +impl Regex { + /// Parse the given regular expression using the default configuration and + /// return the corresponding regex. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find(b"zzzfoo12345barzzz"), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new(pattern: &str) -> Result { + Builder::new().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "regex set." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new_many(&["[a-z]+", "[0-9]+"])?; + /// + /// let mut it = re.find_iter(b"abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_many>( + patterns: &[P], + ) -> Result { + Builder::new().build_many(patterns) + } +} + +#[cfg(all(feature = "syntax", feature = "dfa-build"))] +impl Regex>> { + /// Parse the given regular expression using the default configuration, + /// except using sparse DFAs, and return the corresponding regex. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new_sparse("foo[0-9]+bar")?; + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find(b"zzzfoo12345barzzz"), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_sparse( + pattern: &str, + ) -> Result>>, BuildError> { + Builder::new().build_sparse(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "regex set" + /// using sparse DFAs. This otherwise similarly uses the default regex + /// configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new_many_sparse(&["[a-z]+", "[0-9]+"])?; + /// + /// let mut it = re.find_iter(b"abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_many_sparse>( + patterns: &[P], + ) -> Result>>, BuildError> { + Builder::new().build_many_sparse(patterns) + } +} + +/// Convenience routines for regex construction. +impl Regex> { + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::regex::Regex, nfa::thompson, util::syntax, Match, + /// }; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// let got = re.find(haystack); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } +} + +/// Standard search routines for finding and iterating over matches. +impl Regex { + /// Returns true if and only if this regex matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future input + /// will never lead to a different result. In particular, if the underlying + /// DFA enters a match state or a dead state, then this routine will return + /// `true` or `false`, respectively, without inspecting any future input. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// assert_eq!(true, re.is_match("foo12345bar")); + /// assert_eq!(false, re.is_match("foobar")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>(&self, input: I) -> bool { + // Not only can we do an "earliest" search, but we can avoid doing a + // reverse scan too. + let input = input.into().earliest(true); + self.forward().try_search_fwd(&input).map(|x| x.is_some()).unwrap() + } + + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// // Greediness is applied appropriately. + /// let re = Regex::new("foo[0-9]+")?; + /// assert_eq!(Some(Match::must(0, 3..11)), re.find("zzzfoo12345zzz")); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the default leftmost-first match semantics demand that we find the + /// // earliest match that prefers earlier parts of the pattern over latter + /// // parts. + /// let re = Regex::new("abc|a")?; + /// assert_eq!(Some(Match::must(0, 0..3)), re.find("abc")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>(&self, input: I) -> Option { + self.try_search(&input.into()).unwrap() + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// This corresponds to the "standard" regex search iterator. + /// + /// # Panics + /// + /// If the search returns an error during iteration, then iteration + /// panics. See [`Regex::find`] for the panic conditions. + /// + /// Use [`Regex::try_search`] with + /// [`util::iter::Searcher`](crate::util::iter::Searcher) if you want to + /// handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(text).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> FindMatches<'r, 'h, A> { + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, it } + } +} + +/// Lower level fallible search routines that permit controlling where the +/// search starts and ends in a particular sequence. +impl Regex { + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// This is like [`Regex::find`] but with two differences: + /// + /// 1. It is not generic over `Into` and instead accepts a + /// `&Input`. This permits reusing the same `Input` for multiple searches + /// without needing to create a new one. This _may_ help with latency. + /// 2. It returns an error if the search could not complete where as + /// [`Regex::find`] will panic. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + #[inline] + pub fn try_search( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + let (fwd, rev) = (self.forward(), self.reverse()); + let end = match fwd.try_search_fwd(input)? { + None => return Ok(None), + Some(end) => end, + }; + // This special cases an empty match at the beginning of the search. If + // our end matches our start, then since a reverse DFA can't match past + // the start, it must follow that our starting position is also our end + // position. So short circuit and skip the reverse search. + if input.start() == end.offset() { + return Ok(Some(Match::new( + end.pattern(), + end.offset()..end.offset(), + ))); + } + // We can also skip the reverse search if we know our search was + // anchored. This occurs either when the input config is anchored or + // when we know the regex itself is anchored. In this case, we know the + // start of the match, if one is found, must be the start of the + // search. + if self.is_anchored(input) { + return Ok(Some(Match::new( + end.pattern(), + input.start()..end.offset(), + ))); + } + // N.B. I have tentatively convinced myself that it isn't necessary + // to specify the specific pattern for the reverse search since the + // reverse search will always find the same pattern to match as the + // forward search. But I lack a rigorous proof. Why not just provide + // the pattern anyway? Well, if it is needed, then leaving it out + // gives us a chance to find a witness. (Also, if we don't need to + // specify the pattern, then we don't need to build the reverse DFA + // with 'starts_for_each_pattern' enabled.) + // + // We also need to be careful to disable 'earliest' for the reverse + // search, since it could be enabled for the forward search. In the + // reverse case, to satisfy "leftmost" criteria, we need to match + // as much as we can. We also need to be careful to make the search + // anchored. We don't want the reverse search to report any matches + // other than the one beginning at the end of our forward search. + let revsearch = input + .clone() + .span(input.start()..end.offset()) + .anchored(Anchored::Yes) + .earliest(false); + let start = rev + .try_search_rev(&revsearch)? + .expect("reverse search must match if forward search does"); + assert_eq!( + start.pattern(), + end.pattern(), + "forward and reverse search must match same pattern", + ); + assert!(start.offset() <= end.offset()); + Ok(Some(Match::new(end.pattern(), start.offset()..end.offset()))) + } + + /// Returns true if either the given input specifies an anchored search + /// or if the underlying DFA is always anchored. + fn is_anchored(&self, input: &Input<'_>) -> bool { + match input.get_anchored() { + Anchored::No => self.forward().is_always_start_anchored(), + Anchored::Yes | Anchored::Pattern(_) => true, + } + } +} + +/// Non-search APIs for querying information about the regex and setting a +/// prefilter. +impl Regex { + /// Return the underlying DFA responsible for forward matching. + /// + /// This is useful for accessing the underlying DFA and converting it to + /// some other format or size. See the [`Builder::build_from_dfas`] docs + /// for an example of where this might be useful. + pub fn forward(&self) -> &A { + &self.forward + } + + /// Return the underlying DFA responsible for reverse matching. + /// + /// This is useful for accessing the underlying DFA and converting it to + /// some other format or size. See the [`Builder::build_from_dfas`] docs + /// for an example of where this might be useful. + pub fn reverse(&self) -> &A { + &self.reverse + } + + /// Returns the total number of patterns matched by this regex. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::dfa::regex::Regex; + /// + /// let re = Regex::new_many(&[r"[a-z]+", r"[0-9]+", r"\w+"])?; + /// assert_eq!(3, re.pattern_len()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + assert_eq!(self.forward().pattern_len(), self.reverse().pattern_len()); + self.forward().pattern_len() + } +} + +/// An iterator over all non-overlapping matches for an infallible search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// If the underlying regex engine returns an error, then a panic occurs. +/// +/// The type parameters are as follows: +/// +/// * `A` represents the type of the underlying DFA that implements the +/// [`Automaton`] trait. +/// +/// The lifetime parameters are as follows: +/// +/// * `'h` represents the lifetime of the haystack being searched. +/// * `'r` represents the lifetime of the regex object itself. +/// +/// This iterator can be created with the [`Regex::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'h, A> { + re: &'r Regex, + it: iter::Searcher<'h>, +} + +impl<'r, 'h, A: Automaton> Iterator for FindMatches<'r, 'h, A> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + let FindMatches { re, ref mut it } = *self; + it.advance(|input| re.try_search(input)) + } +} + +/// A builder for a regex based on deterministic finite automatons. +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction, the DFA construction and finally the regex searching +/// itself. This builder is different from a general purpose regex builder in +/// that it permits fine grain configuration of the construction process. The +/// trade off for this is complexity, and the possibility of setting a +/// configuration that might not make sense. For example, there are two +/// different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) controls +/// how the regex iterators themselves advance the starting position of the +/// next search when a match with zero length is found. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// Internally, building a regex requires building two DFAs, where one is +/// responsible for finding the end of a match and the other is responsible +/// for finding the start of a match. If you only need to detect whether +/// something matched, or only the end of a match, then you should use a +/// [`dense::Builder`] to construct a single DFA, which is cheaper than +/// building two DFAs. +/// +/// # Build methods +/// +/// This builder has a few "build" methods. In general, it's the result of +/// combining the following parameters: +/// +/// * Building one or many regexes. +/// * Building a regex with dense or sparse DFAs. +/// +/// The simplest "build" method is [`Builder::build`]. It accepts a single +/// pattern and builds a dense DFA using `usize` for the state identifier +/// representation. +/// +/// The most general "build" method is [`Builder::build_many`], which permits +/// building a regex that searches for multiple patterns simultaneously while +/// using a specific state identifier representation. +/// +/// The most flexible "build" method, but hardest to use, is +/// [`Builder::build_from_dfas`]. This exposes the fact that a [`Regex`] is +/// just a pair of DFAs, and this method allows you to specify those DFAs +/// exactly. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// dfa::regex::Regex, nfa::thompson, util::syntax, Match, +/// }; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Match::must(0, 1..9)); +/// let got = re.find(haystack); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + #[cfg(feature = "dfa-build")] + dfa: dense::Builder, +} + +impl Builder { + /// Create a new regex builder with the default configuration. + pub fn new() -> Builder { + Builder { + #[cfg(feature = "dfa-build")] + dfa: dense::Builder::new(), + } + } + + /// Build a regex from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a regex from the given pattern using sparse DFAs. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build_sparse( + &self, + pattern: &str, + ) -> Result>>, BuildError> { + self.build_many_sparse(&[pattern]) + } + + /// Build a regex from the given patterns. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let forward = self.dfa.build_many(patterns)?; + let reverse = self + .dfa + .clone() + .configure( + dense::Config::new() + .prefilter(None) + .specialize_start_states(false) + .start_kind(StartKind::Anchored) + .match_kind(MatchKind::All), + ) + .thompson(crate::nfa::thompson::Config::new().reverse(true)) + .build_many(patterns)?; + Ok(self.build_from_dfas(forward, reverse)) + } + + /// Build a sparse regex from the given patterns. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build_many_sparse>( + &self, + patterns: &[P], + ) -> Result>>, BuildError> { + let re = self.build_many(patterns)?; + let forward = re.forward().to_sparse()?; + let reverse = re.reverse().to_sparse()?; + Ok(self.build_from_dfas(forward, reverse)) + } + + /// Build a regex from its component forward and reverse DFAs. + /// + /// This is useful when deserializing a regex from some arbitrary + /// memory region. This is also useful for building regexes from other + /// types of DFAs. + /// + /// If you're building the DFAs from scratch instead of building new DFAs + /// from other DFAs, then you'll need to make sure that the reverse DFA is + /// configured correctly to match the intended semantics. Namely: + /// + /// * It should be anchored. + /// * It should use [`MatchKind::All`] semantics. + /// * It should match in reverse. + /// * Otherwise, its configuration should match the forward DFA. + /// + /// If these conditions aren't satisfied, then the behavior of searches is + /// unspecified. + /// + /// Note that when using this constructor, no configuration is applied. + /// Since this routine provides the DFAs to the builder, there is no + /// opportunity to apply other configuration options. + /// + /// # Example + /// + /// This example is a bit a contrived. The usual use of these methods + /// would involve serializing `initial_re` somewhere and then deserializing + /// it later to build a regex. But in this case, we do everything in + /// memory. + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let initial_re = Regex::new("foo[0-9]+")?; + /// assert_eq!(true, initial_re.is_match(b"foo123")); + /// + /// let (fwd, rev) = (initial_re.forward(), initial_re.reverse()); + /// let re = Regex::builder().build_from_dfas(fwd, rev); + /// assert_eq!(true, re.is_match(b"foo123")); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This example shows how to build a `Regex` that uses sparse DFAs instead + /// of dense DFAs without using one of the convenience `build_sparse` + /// routines: + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let initial_re = Regex::new("foo[0-9]+")?; + /// assert_eq!(true, initial_re.is_match(b"foo123")); + /// + /// let fwd = initial_re.forward().to_sparse()?; + /// let rev = initial_re.reverse().to_sparse()?; + /// let re = Regex::builder().build_from_dfas(fwd, rev); + /// assert_eq!(true, re.is_match(b"foo123")); + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_dfas( + &self, + forward: A, + reverse: A, + ) -> Regex { + Regex { forward, reverse } + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.dfa.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether additional time should be + /// spent shrinking the size of the NFA. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn thompson( + &mut self, + config: crate::nfa::thompson::Config, + ) -> &mut Builder { + self.dfa.thompson(config); + self + } + + /// Set the dense DFA compilation configuration for this builder using + /// [`dense::Config`]. + /// + /// This permits setting things like whether the underlying DFAs should + /// be minimized. + #[cfg(feature = "dfa-build")] + pub fn dense(&mut self, config: dense::Config) -> &mut Builder { + self.dfa.configure(config); + self + } +} + +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} diff --git a/vendor/regex-automata/src/dfa/remapper.rs b/vendor/regex-automata/src/dfa/remapper.rs new file mode 100644 index 0000000..6e49646 --- /dev/null +++ b/vendor/regex-automata/src/dfa/remapper.rs @@ -0,0 +1,242 @@ +use alloc::vec::Vec; + +use crate::util::primitives::StateID; + +/// Remappable is a tightly coupled abstraction that facilitates remapping +/// state identifiers in DFAs. +/// +/// The main idea behind remapping state IDs is that DFAs often need to check +/// if a certain state is a "special" state of some kind (like a match state) +/// during a search. Since this is extremely perf critical code, we want this +/// check to be as fast as possible. Partitioning state IDs into, for example, +/// into "non-match" and "match" states means one can tell if a state is a +/// match state via a simple comparison of the state ID. +/// +/// The issue is that during the DFA construction process, it's not +/// particularly easy to partition the states. Instead, the simplest thing is +/// to often just do a pass over all of the states and shuffle them into their +/// desired partitionings. To do that, we need a mechanism for swapping states. +/// Hence, this abstraction. +/// +/// Normally, for such little code, I would just duplicate it. But this is a +/// key optimization and the implementation is a bit subtle. So the abstraction +/// is basically a ham-fisted attempt at DRY. The only place we use this is in +/// the dense and one-pass DFAs. +/// +/// See also src/dfa/special.rs for a more detailed explanation of how dense +/// DFAs are partitioned. +pub(super) trait Remappable: core::fmt::Debug { + /// Return the total number of states. + fn state_len(&self) -> usize; + /// Return the power-of-2 exponent that yields the stride. The pertinent + /// laws here are, where N=stride2: 2^N=stride and len(alphabet) <= stride. + fn stride2(&self) -> usize; + /// Swap the states pointed to by the given IDs. The underlying finite + /// state machine should be mutated such that all of the transitions in + /// `id1` are now in the memory region where the transitions for `id2` + /// were, and all of the transitions in `id2` are now in the memory region + /// where the transitions for `id1` were. + /// + /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`. + /// + /// It is expected that, after calling this, the underlying value will be + /// left in an inconsistent state, since any other transitions pointing to, + /// e.g., `id1` need to be updated to point to `id2`, since that's where + /// `id1` moved to. + /// + /// In order to "fix" the underlying inconsistent state, a `Remapper` + /// should be used to guarantee that `remap` is called at the appropriate + /// time. + fn swap_states(&mut self, id1: StateID, id2: StateID); + /// This must remap every single state ID in the underlying value according + /// to the function given. For example, in a DFA, this should remap every + /// transition and every starting state ID. + fn remap(&mut self, map: impl Fn(StateID) -> StateID); +} + +/// Remapper is an abstraction the manages the remapping of state IDs in a +/// finite state machine. This is useful when one wants to shuffle states into +/// different positions in the machine. +/// +/// One of the key complexities this manages is the ability to correctly move +/// one state multiple times. +/// +/// Once shuffling is complete, `remap` must be called, which will rewrite +/// all pertinent transitions to updated state IDs. Neglecting to call `remap` +/// will almost certainly result in a corrupt machine. +#[derive(Debug)] +pub(super) struct Remapper { + /// A map from the index of a state to its pre-multiplied identifier. + /// + /// When a state is swapped with another, then their corresponding + /// locations in this map are also swapped. Thus, its new position will + /// still point to its old pre-multiplied StateID. + /// + /// While there is a bit more to it, this then allows us to rewrite the + /// state IDs in a DFA's transition table in a single pass. This is done + /// by iterating over every ID in this map, then iterating over each + /// transition for the state at that ID and re-mapping the transition from + /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position + /// in this map where `old_id` *started*, and set it to where it ended up + /// after all swaps have been completed. + map: Vec, + /// A mapper from state index to state ID (and back). + idxmap: IndexMapper, +} + +impl Remapper { + /// Create a new remapper from the given remappable implementation. The + /// remapper can then be used to swap states. The remappable value given + /// here must the same one given to `swap` and `remap`. + pub(super) fn new(r: &impl Remappable) -> Remapper { + let idxmap = IndexMapper { stride2: r.stride2() }; + let map = (0..r.state_len()).map(|i| idxmap.to_state_id(i)).collect(); + Remapper { map, idxmap } + } + + /// Swap two states. Once this is called, callers must follow through to + /// call `remap`, or else it's possible for the underlying remappable + /// value to be in a corrupt state. + pub(super) fn swap( + &mut self, + r: &mut impl Remappable, + id1: StateID, + id2: StateID, + ) { + if id1 == id2 { + return; + } + r.swap_states(id1, id2); + self.map.swap(self.idxmap.to_index(id1), self.idxmap.to_index(id2)); + } + + /// Complete the remapping process by rewriting all state IDs in the + /// remappable value according to the swaps performed. + pub(super) fn remap(mut self, r: &mut impl Remappable) { + // Update the map to account for states that have been swapped + // multiple times. For example, if (A, C) and (C, G) are swapped, then + // transitions previously pointing to A should now point to G. But if + // we don't update our map, they will erroneously be set to C. All we + // do is follow the swaps in our map until we see our original state + // ID. + // + // The intuition here is to think about how changes are made to the + // map: only through pairwise swaps. That means that starting at any + // given state, it is always possible to find the loop back to that + // state by following the swaps represented in the map (which might be + // 0 swaps). + // + // We are also careful to clone the map before starting in order to + // freeze it. We use the frozen map to find our loops, since we need to + // update our map as well. Without freezing it, our updates could break + // the loops referenced above and produce incorrect results. + let oldmap = self.map.clone(); + for i in 0..r.state_len() { + let cur_id = self.idxmap.to_state_id(i); + let mut new_id = oldmap[i]; + if cur_id == new_id { + continue; + } + loop { + let id = oldmap[self.idxmap.to_index(new_id)]; + if cur_id == id { + self.map[i] = new_id; + break; + } + new_id = id; + } + } + r.remap(|next| self.map[self.idxmap.to_index(next)]); + } +} + +/// A simple type for mapping between state indices and state IDs. +/// +/// The reason why this exists is because state IDs are "premultiplied." That +/// is, in order to get to the transitions for a particular state, one need +/// only use the state ID as-is, instead of having to multiple it by transition +/// table's stride. +/// +/// The downside of this is that it's inconvenient to map between state IDs +/// using a dense map, e.g., Vec. That's because state IDs look like +/// `0`, `0+stride`, `0+2*stride`, `0+3*stride`, etc., instead of `0`, `1`, +/// `2`, `3`, etc. +/// +/// Since our state IDs are premultiplied, we can convert back-and-forth +/// between IDs and indices by simply unmultiplying the IDs and multiplying the +/// indices. +#[derive(Debug)] +struct IndexMapper { + /// The power of 2 corresponding to the stride of the corresponding + /// transition table. 'id >> stride2' de-multiplies an ID while 'index << + /// stride2' pre-multiplies an index to an ID. + stride2: usize, +} + +impl IndexMapper { + /// Convert a state ID to a state index. + fn to_index(&self, id: StateID) -> usize { + id.as_usize() >> self.stride2 + } + + /// Convert a state index to a state ID. + fn to_state_id(&self, index: usize) -> StateID { + // CORRECTNESS: If the given index is not valid, then it is not + // required for this to panic or return a valid state ID. We'll "just" + // wind up with panics or silent logic errors at some other point. + StateID::new_unchecked(index << self.stride2) + } +} + +#[cfg(feature = "dfa-build")] +mod dense { + use crate::{dfa::dense::OwnedDFA, util::primitives::StateID}; + + use super::Remappable; + + impl Remappable for OwnedDFA { + fn state_len(&self) -> usize { + OwnedDFA::state_len(self) + } + + fn stride2(&self) -> usize { + OwnedDFA::stride2(self) + } + + fn swap_states(&mut self, id1: StateID, id2: StateID) { + OwnedDFA::swap_states(self, id1, id2) + } + + fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + OwnedDFA::remap(self, map) + } + } +} + +#[cfg(feature = "dfa-onepass")] +mod onepass { + use crate::{dfa::onepass::DFA, util::primitives::StateID}; + + use super::Remappable; + + impl Remappable for DFA { + fn state_len(&self) -> usize { + DFA::state_len(self) + } + + fn stride2(&self) -> usize { + // We don't do pre-multiplication for the one-pass DFA, so + // returning 0 has the effect of making state IDs and state indices + // equivalent. + 0 + } + + fn swap_states(&mut self, id1: StateID, id2: StateID) { + DFA::swap_states(self, id1, id2) + } + + fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + DFA::remap(self, map) + } + } +} diff --git a/vendor/regex-automata/src/dfa/search.rs b/vendor/regex-automata/src/dfa/search.rs new file mode 100644 index 0000000..5a82261 --- /dev/null +++ b/vendor/regex-automata/src/dfa/search.rs @@ -0,0 +1,644 @@ +use crate::{ + dfa::{ + accel, + automaton::{Automaton, OverlappingState}, + }, + util::{ + prefilter::Prefilter, + primitives::StateID, + search::{Anchored, HalfMatch, Input, Span}, + }, + MatchError, +}; + +#[inline(never)] +pub fn find_fwd( + dfa: &A, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_prefilter() + }; + // Searching with a pattern ID is always anchored, so we should never use + // a prefilter. + if pre.is_some() { + if input.get_earliest() { + find_fwd_imp(dfa, input, pre, true) + } else { + find_fwd_imp(dfa, input, pre, false) + } + } else { + if input.get_earliest() { + find_fwd_imp(dfa, input, None, true) + } else { + find_fwd_imp(dfa, input, None, false) + } + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_fwd_imp( + dfa: &A, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + earliest: bool, +) -> Result, MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.universal_start_state(Anchored::No).is_some(); + let mut mat = None; + let mut sid = init_fwd(dfa, input)?; + let mut at = input.start(); + // This could just be a closure, but then I think it would be unsound + // because it would need to be safe to invoke. This way, the lack of safety + // is clearer in the code below. + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_unchecked($sid, byte) + }}; + } + + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + // If a prefilter doesn't report false positives, then we don't need to + // touch the DFA at all. However, since all matches include the pattern + // ID, and the prefilter infrastructure doesn't report pattern IDs, we + // limit this optimization to cases where there is exactly one pattern. + // In that case, any match must be the 0th pattern. + match pre.find(input.haystack(), span) { + None => return Ok(mat), + Some(ref span) => { + at = span.start; + if !universal_start { + sid = prefilter_restart(dfa, &input, at)?; + } + } + } + } + while at < input.end() { + // SAFETY: There are two safety invariants we need to uphold here in + // the loops below: that 'sid' and 'prev_sid' are valid state IDs + // for this DFA, and that 'at' is a valid index into 'haystack'. + // For the former, we rely on the invariant that next_state* and + // start_state_forward always returns a valid state ID (given a valid + // state ID in the former case). For the latter safety invariant, we + // always guard unchecked access with a check that 'at' is less than + // 'end', where 'end <= haystack.len()'. In the unrolled loop below, we + // ensure that 'at' is always in bounds. + // + // PERF: See a similar comment in src/hybrid/search.rs that justifies + // this extra work to make the search loop fast. The same reasoning and + // benchmarks apply here. + let mut prev_sid; + while at < input.end() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) || at + 3 >= input.end() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at += 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at += 1; + } + if dfa.is_special_state(sid) { + if dfa.is_start_state(sid) { + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(mat), + Some(ref span) => { + // We want to skip any update to 'at' below + // at the end of this iteration and just + // jump immediately back to the next state + // transition at the leading position of the + // candidate match. + // + // ... but only if we actually made progress + // with our prefilter, otherwise if the start + // state has a self-loop, we can get stuck. + if span.start > at { + at = span.start; + if !universal_start { + sid = prefilter_restart(dfa, &input, at)?; + } + continue; + } + } + } + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_fwd(needles, input.haystack(), at + 1) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + mat = Some(HalfMatch::new(pattern, at)); + if earliest { + return Ok(mat); + } + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_fwd(needles, input.haystack(), at + 1) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + at = accel::find_fwd(needs, input.haystack(), at + 1) + .unwrap_or(input.end()); + continue; + } else if dfa.is_dead_state(sid) { + return Ok(mat); + } else { + // It's important that this is a debug_assert, since this can + // actually be tripped even if DFA::from_bytes succeeds and + // returns a supposedly valid DFA. + return Err(MatchError::quit(input.haystack()[at], at)); + } + } + at += 1; + } + eoi_fwd(dfa, input, &mut sid, &mut mat)?; + Ok(mat) +} + +#[inline(never)] +pub fn find_rev( + dfa: &A, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + if input.get_earliest() { + find_rev_imp(dfa, input, true) + } else { + find_rev_imp(dfa, input, false) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_rev_imp( + dfa: &A, + input: &Input<'_>, + earliest: bool, +) -> Result, MatchError> { + let mut mat = None; + let mut sid = init_rev(dfa, input)?; + // In reverse search, the loop below can't handle the case of searching an + // empty slice. Ideally we could write something congruent to the forward + // search, i.e., 'while at >= start', but 'start' might be 0. Since we use + // an unsigned offset, 'at >= 0' is trivially always true. We could avoid + // this extra case handling by using a signed offset, but Rust makes it + // annoying to do. So... We just handle the empty case separately. + if input.start() == input.end() { + eoi_rev(dfa, input, &mut sid, &mut mat)?; + return Ok(mat); + } + + let mut at = input.end() - 1; + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_unchecked($sid, byte) + }}; + } + loop { + // SAFETY: See comments in 'find_fwd' for a safety argument. + let mut prev_sid; + while at >= input.start() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) + || at <= input.start().saturating_add(3) + { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at -= 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at -= 1; + } + if dfa.is_special_state(sid) { + if dfa.is_start_state(sid) { + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_rev(needles, input.haystack(), at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } + } else if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + // Since reverse searches report the beginning of a match + // and the beginning is inclusive (not exclusive like the + // end of a match), we add 1 to make it inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + if earliest { + return Ok(mat); + } + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_rev(needles, input.haystack(), at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + // If the accelerator returns nothing, why don't we quit the + // search? Well, if the accelerator doesn't find anything, that + // doesn't mean we don't have a match. It just means that we + // can't leave the current state given one of the 255 possible + // byte values. However, there might be an EOI transition. So + // we set 'at' to the end of the haystack, which will cause + // this loop to stop and fall down into the EOI transition. + at = accel::find_rev(needles, input.haystack(), at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } else if dfa.is_dead_state(sid) { + return Ok(mat); + } else { + return Err(MatchError::quit(input.haystack()[at], at)); + } + } + if at == input.start() { + break; + } + at -= 1; + } + eoi_rev(dfa, input, &mut sid, &mut mat)?; + Ok(mat) +} + +#[inline(never)] +pub fn find_overlapping_fwd( + dfa: &A, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_prefilter() + }; + if pre.is_some() { + find_overlapping_fwd_imp(dfa, input, pre, state) + } else { + find_overlapping_fwd_imp(dfa, input, None, state) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_overlapping_fwd_imp( + dfa: &A, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.universal_start_state(Anchored::No).is_some(); + let mut sid = match state.id { + None => { + state.at = input.start(); + init_fwd(dfa, input)? + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need to + // advance the search to the next position. + state.at += 1; + if state.at > input.end() { + return Ok(()); + } + sid + } + }; + + // NOTE: We don't optimize the crap out of this routine primarily because + // it seems like most find_overlapping searches will have higher match + // counts, and thus, throughput is perhaps not as important. But if you + // have a use case for something faster, feel free to file an issue. + while state.at < input.end() { + sid = dfa.next_state(sid, input.haystack()[state.at]); + if dfa.is_special_state(sid) { + state.id = Some(sid); + if dfa.is_start_state(sid) { + if let Some(ref pre) = pre { + let span = Span::from(state.at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(()), + Some(ref span) => { + if span.start > state.at { + state.at = span.start; + if !universal_start { + sid = prefilter_restart( + dfa, &input, state.at, + )?; + } + continue; + } + } + } + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + state.at = accel::find_fwd( + needles, + input.haystack(), + state.at + 1, + ) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_match_state(sid) { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } else if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + // If the accelerator returns nothing, why don't we quit the + // search? Well, if the accelerator doesn't find anything, that + // doesn't mean we don't have a match. It just means that we + // can't leave the current state given one of the 255 possible + // byte values. However, there might be an EOI transition. So + // we set 'at' to the end of the haystack, which will cause + // this loop to stop and fall down into the EOI transition. + state.at = + accel::find_fwd(needs, input.haystack(), state.at + 1) + .unwrap_or(input.end()); + continue; + } else if dfa.is_dead_state(sid) { + return Ok(()); + } else { + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } + } + state.at += 1; + } + + let result = eoi_fwd(dfa, input, &mut sid, &mut state.mat); + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + result +} + +#[inline(never)] +pub(crate) fn find_overlapping_rev( + dfa: &A, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let mut sid = match state.id { + None => { + let sid = init_rev(dfa, input)?; + state.id = Some(sid); + if input.start() == input.end() { + state.rev_eoi = true; + } else { + state.at = input.end() - 1; + } + sid + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need + // to advance the search to the next position. However, if we've + // already followed the EOI transition, then we know we're done + // with the search and there cannot be any more matches to report. + if state.rev_eoi { + return Ok(()); + } else if state.at == input.start() { + // At this point, we should follow the EOI transition. This + // will cause us the skip the main loop below and fall through + // to the final 'eoi_rev' transition. + state.rev_eoi = true; + } else { + // We haven't hit the end of the search yet, so move on. + state.at -= 1; + } + sid + } + }; + while !state.rev_eoi { + sid = dfa.next_state(sid, input.haystack()[state.at]); + if dfa.is_special_state(sid) { + state.id = Some(sid); + if dfa.is_start_state(sid) { + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + state.at = + accel::find_rev(needles, input.haystack(), state.at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } + } else if dfa.is_match_state(sid) { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at + 1)); + return Ok(()); + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + // If the accelerator returns nothing, why don't we quit the + // search? Well, if the accelerator doesn't find anything, that + // doesn't mean we don't have a match. It just means that we + // can't leave the current state given one of the 255 possible + // byte values. However, there might be an EOI transition. So + // we set 'at' to the end of the haystack, which will cause + // this loop to stop and fall down into the EOI transition. + state.at = + accel::find_rev(needles, input.haystack(), state.at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } else if dfa.is_dead_state(sid) { + return Ok(()); + } else { + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } + } + if state.at == input.start() { + break; + } + state.at -= 1; + } + + let result = eoi_rev(dfa, input, &mut sid, &mut state.mat); + state.rev_eoi = true; + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + result +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_fwd( + dfa: &A, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_forward(input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!dfa.is_match_state(sid)); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_rev( + dfa: &A, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_reverse(input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!dfa.is_match_state(sid)); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_fwd( + dfa: &A, + input: &Input<'_>, + sid: &mut StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = dfa.next_state(*sid, b); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + } + } + Ok(()) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_rev( + dfa: &A, + input: &Input<'_>, + sid: &mut StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa.next_state(*sid, byte); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + } + Ok(()) +} + +/// Re-compute the starting state that a DFA should be in after finding a +/// prefilter candidate match at the position `at`. +/// +/// The function with the same name has a bit more docs in hybrid/search.rs. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn prefilter_restart( + dfa: &A, + input: &Input<'_>, + at: usize, +) -> Result { + let mut input = input.clone(); + input.set_start(at); + init_fwd(dfa, &input) +} diff --git a/vendor/regex-automata/src/dfa/sparse.rs b/vendor/regex-automata/src/dfa/sparse.rs new file mode 100644 index 0000000..d461e0a --- /dev/null +++ b/vendor/regex-automata/src/dfa/sparse.rs @@ -0,0 +1,2639 @@ +/*! +Types and routines specific to sparse DFAs. + +This module is the home of [`sparse::DFA`](DFA). + +Unlike the [`dense`] module, this module does not contain a builder or +configuration specific for sparse DFAs. Instead, the intended way to build a +sparse DFA is either by using a default configuration with its constructor +[`sparse::DFA::new`](DFA::new), or by first configuring the construction of a +dense DFA with [`dense::Builder`] and then calling [`dense::DFA::to_sparse`]. +For example, this configures a sparse DFA to do an overlapping search: + +``` +use regex_automata::{ + dfa::{Automaton, OverlappingState, dense}, + HalfMatch, Input, MatchKind, +}; + +let dense_re = dense::Builder::new() + .configure(dense::Config::new().match_kind(MatchKind::All)) + .build(r"Samwise|Sam")?; +let sparse_re = dense_re.to_sparse()?; + +// Setup our haystack and initial start state. +let input = Input::new("Samwise"); +let mut state = OverlappingState::start(); + +// First, 'Sam' will match. +sparse_re.try_search_overlapping_fwd(&input, &mut state)?; +assert_eq!(Some(HalfMatch::must(0, 3)), state.get_match()); + +// And now 'Samwise' will match. +sparse_re.try_search_overlapping_fwd(&input, &mut state)?; +assert_eq!(Some(HalfMatch::must(0, 7)), state.get_match()); +# Ok::<(), Box>(()) +``` +*/ + +#[cfg(feature = "dfa-build")] +use core::iter; +use core::{ + convert::{TryFrom, TryInto}, + fmt, + mem::size_of, +}; + +#[cfg(feature = "dfa-build")] +use alloc::{vec, vec::Vec}; + +#[cfg(feature = "dfa-build")] +use crate::dfa::dense::{self, BuildError}; +use crate::{ + dfa::{ + automaton::{fmt_state_indicator, Automaton, StartError}, + dense::Flags, + special::Special, + StartKind, DEAD, + }, + util::{ + alphabet::{ByteClasses, ByteSet}, + escape::DebugByte, + int::{Pointer, Usize, U16, U32}, + prefilter::Prefilter, + primitives::{PatternID, StateID}, + search::Anchored, + start::{self, Start, StartByteMap}, + wire::{self, DeserializeError, Endian, SerializeError}, + }, +}; + +const LABEL: &str = "rust-regex-automata-dfa-sparse"; +const VERSION: u32 = 2; + +/// A sparse deterministic finite automaton (DFA) with variable sized states. +/// +/// In contrast to a [dense::DFA], a sparse DFA uses a more space efficient +/// representation for its transitions. Consequently, sparse DFAs may use much +/// less memory than dense DFAs, but this comes at a price. In particular, +/// reading the more space efficient transitions takes more work, and +/// consequently, searching using a sparse DFA is typically slower than a dense +/// DFA. +/// +/// A sparse DFA can be built using the default configuration via the +/// [`DFA::new`] constructor. Otherwise, one can configure various aspects of a +/// dense DFA via [`dense::Builder`], and then convert a dense DFA to a sparse +/// DFA using [`dense::DFA::to_sparse`]. +/// +/// In general, a sparse DFA supports all the same search operations as a dense +/// DFA. +/// +/// Making the choice between a dense and sparse DFA depends on your specific +/// work load. If you can sacrifice a bit of search time performance, then a +/// sparse DFA might be the best choice. In particular, while sparse DFAs are +/// probably always slower than dense DFAs, you may find that they are easily +/// fast enough for your purposes! +/// +/// # Type parameters +/// +/// A `DFA` has one type parameter, `T`, which is used to represent the parts +/// of a sparse DFA. `T` is typically a `Vec` or a `&[u8]`. +/// +/// # The `Automaton` trait +/// +/// This type implements the [`Automaton`] trait, which means it can be used +/// for searching. For example: +/// +/// ``` +/// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; +/// +/// let dfa = DFA::new("foo[0-9]+")?; +/// let expected = Some(HalfMatch::must(0, 8)); +/// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct DFA { + // When compared to a dense DFA, a sparse DFA *looks* a lot simpler + // representation-wise. In reality, it is perhaps more complicated. Namely, + // in a dense DFA, all information needs to be very cheaply accessible + // using only state IDs. In a sparse DFA however, each state uses a + // variable amount of space because each state encodes more information + // than just its transitions. Each state also includes an accelerator if + // one exists, along with the matching pattern IDs if the state is a match + // state. + // + // That is, a lot of the complexity is pushed down into how each state + // itself is represented. + tt: Transitions, + st: StartTable, + special: Special, + pre: Option, + quitset: ByteSet, + flags: Flags, +} + +#[cfg(feature = "dfa-build")] +impl DFA> { + /// Parse the given regular expression using a default configuration and + /// return the corresponding sparse DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`] to set your own configuration, and then call + /// [`dense::DFA::to_sparse`] to create a sparse DFA. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse}, HalfMatch, Input}; + /// + /// let dfa = sparse::DFA::new("foo[0-9]+bar")?; + /// + /// let expected = Some(HalfMatch::must(0, 11)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result>, BuildError> { + dense::Builder::new() + .build(pattern) + .and_then(|dense| dense.to_sparse()) + } + + /// Parse the given regular expressions using a default configuration and + /// return the corresponding multi-DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`] to set your own configuration, and then call + /// [`dense::DFA::to_sparse`] to create a sparse DFA. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse}, HalfMatch, Input}; + /// + /// let dfa = sparse::DFA::new_many(&["[0-9]+", "[a-z]+"])?; + /// let expected = Some(HalfMatch::must(1, 3)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result>, BuildError> { + dense::Builder::new() + .build_many(patterns) + .and_then(|dense| dense.to_sparse()) + } +} + +#[cfg(feature = "dfa-build")] +impl DFA> { + /// Create a new DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, sparse}, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = sparse::DFA::always_match()?; + /// + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result>, BuildError> { + dense::DFA::always_match()?.to_sparse() + } + + /// Create a new sparse DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse}, Input}; + /// + /// let dfa = sparse::DFA::never_match()?; + /// assert_eq!(None, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(None, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result>, BuildError> { + dense::DFA::never_match()?.to_sparse() + } + + /// The implementation for constructing a sparse DFA from a dense DFA. + pub(crate) fn from_dense>( + dfa: &dense::DFA, + ) -> Result>, BuildError> { + // In order to build the transition table, we need to be able to write + // state identifiers for each of the "next" transitions in each state. + // Our state identifiers correspond to the byte offset in the + // transition table at which the state is encoded. Therefore, we do not + // actually know what the state identifiers are until we've allocated + // exactly as much space as we need for each state. Thus, construction + // of the transition table happens in two passes. + // + // In the first pass, we fill out the shell of each state, which + // includes the transition length, the input byte ranges and + // zero-filled space for the transitions and accelerators, if present. + // In this first pass, we also build up a map from the state identifier + // index of the dense DFA to the state identifier in this sparse DFA. + // + // In the second pass, we fill in the transitions based on the map + // built in the first pass. + + // The capacity given here reflects a minimum. (Well, the true minimum + // is likely even bigger, but hopefully this saves a few reallocs.) + let mut sparse = Vec::with_capacity(StateID::SIZE * dfa.state_len()); + // This maps state indices from the dense DFA to StateIDs in the sparse + // DFA. We build out this map on the first pass, and then use it in the + // second pass to back-fill our transitions. + let mut remap: Vec = vec![DEAD; dfa.state_len()]; + for state in dfa.states() { + let pos = sparse.len(); + + remap[dfa.to_index(state.id())] = StateID::new(pos) + .map_err(|_| BuildError::too_many_states())?; + // zero-filled space for the transition length + sparse.push(0); + sparse.push(0); + + let mut transition_len = 0; + for (unit1, unit2, _) in state.sparse_transitions() { + match (unit1.as_u8(), unit2.as_u8()) { + (Some(b1), Some(b2)) => { + transition_len += 1; + sparse.push(b1); + sparse.push(b2); + } + (None, None) => {} + (Some(_), None) | (None, Some(_)) => { + // can never occur because sparse_transitions never + // groups EOI with any other transition. + unreachable!() + } + } + } + // Add dummy EOI transition. This is never actually read while + // searching, but having space equivalent to the total number + // of transitions is convenient. Otherwise, we'd need to track + // a different number of transitions for the byte ranges as for + // the 'next' states. + // + // N.B. The loop above is not guaranteed to yield the EOI + // transition, since it may point to a DEAD state. By putting + // it here, we always write the EOI transition, and thus + // guarantee that our transition length is >0. Why do we always + // need the EOI transition? Because in order to implement + // Automaton::next_eoi_state, this lets us just ask for the last + // transition. There are probably other/better ways to do this. + transition_len += 1; + sparse.push(0); + sparse.push(0); + + // Check some assumptions about transition length. + assert_ne!( + transition_len, 0, + "transition length should be non-zero", + ); + assert!( + transition_len <= 257, + "expected transition length {} to be <= 257", + transition_len, + ); + + // Fill in the transition length. + // Since transition length is always <= 257, we use the most + // significant bit to indicate whether this is a match state or + // not. + let ntrans = if dfa.is_match_state(state.id()) { + transition_len | (1 << 15) + } else { + transition_len + }; + wire::NE::write_u16(ntrans, &mut sparse[pos..]); + + // zero-fill the actual transitions. + // Unwraps are OK since transition_length <= 257 and our minimum + // support usize size is 16-bits. + let zeros = usize::try_from(transition_len) + .unwrap() + .checked_mul(StateID::SIZE) + .unwrap(); + sparse.extend(iter::repeat(0).take(zeros)); + + // If this is a match state, write the pattern IDs matched by this + // state. + if dfa.is_match_state(state.id()) { + let plen = dfa.match_pattern_len(state.id()); + // Write the actual pattern IDs with a u32 length prefix. + // First, zero-fill space. + let mut pos = sparse.len(); + // Unwraps are OK since it's guaranteed that plen <= + // PatternID::LIMIT, which is in turn guaranteed to fit into a + // u32. + let zeros = size_of::() + .checked_mul(plen) + .unwrap() + .checked_add(size_of::()) + .unwrap(); + sparse.extend(iter::repeat(0).take(zeros)); + + // Now write the length prefix. + wire::NE::write_u32( + // Will never fail since u32::MAX is invalid pattern ID. + // Thus, the number of pattern IDs is representable by a + // u32. + plen.try_into().expect("pattern ID length fits in u32"), + &mut sparse[pos..], + ); + pos += size_of::(); + + // Now write the pattern IDs. + for &pid in dfa.pattern_id_slice(state.id()) { + pos += wire::write_pattern_id::( + pid, + &mut sparse[pos..], + ); + } + } + + // And now add the accelerator, if one exists. An accelerator is + // at most 4 bytes and at least 1 byte. The first byte is the + // length, N. N bytes follow the length. The set of bytes that + // follow correspond (exhaustively) to the bytes that must be seen + // to leave this state. + let accel = dfa.accelerator(state.id()); + sparse.push(accel.len().try_into().unwrap()); + sparse.extend_from_slice(accel); + } + + let mut new = DFA { + tt: Transitions { + sparse, + classes: dfa.byte_classes().clone(), + state_len: dfa.state_len(), + pattern_len: dfa.pattern_len(), + }, + st: StartTable::from_dense_dfa(dfa, &remap)?, + special: dfa.special().remap(|id| remap[dfa.to_index(id)]), + pre: dfa.get_prefilter().map(|p| p.clone()), + quitset: dfa.quitset().clone(), + flags: dfa.flags().clone(), + }; + // And here's our second pass. Iterate over all of the dense states + // again, and update the transitions in each of the states in the + // sparse DFA. + for old_state in dfa.states() { + let new_id = remap[dfa.to_index(old_state.id())]; + let mut new_state = new.tt.state_mut(new_id); + let sparse = old_state.sparse_transitions(); + for (i, (_, _, next)) in sparse.enumerate() { + let next = remap[dfa.to_index(next)]; + new_state.set_next_at(i, next); + } + } + debug!( + "created sparse DFA, memory usage: {} (dense memory usage: {})", + new.memory_usage(), + dfa.memory_usage(), + ); + Ok(new) + } +} + +impl> DFA { + /// Cheaply return a borrowed version of this sparse DFA. Specifically, the + /// DFA returned always uses `&[u8]` for its transitions. + pub fn as_ref<'a>(&'a self) -> DFA<&'a [u8]> { + DFA { + tt: self.tt.as_ref(), + st: self.st.as_ref(), + special: self.special, + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Return an owned version of this sparse DFA. Specifically, the DFA + /// returned always uses `Vec` for its transitions. + /// + /// Effectively, this returns a sparse DFA whose transitions live on the + /// heap. + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> DFA> { + DFA { + tt: self.tt.to_owned(), + st: self.st.to_owned(), + special: self.special, + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Returns the starting state configuration for this DFA. + /// + /// The default is [`StartKind::Both`], which means the DFA supports both + /// unanchored and anchored searches. However, this can generally lead to + /// bigger DFAs. Therefore, a DFA might be compiled with support for just + /// unanchored or anchored searches. In that case, running a search with + /// an unsupported configuration will panic. + pub fn start_kind(&self) -> StartKind { + self.st.kind + } + + /// Returns true only if this DFA has starting states for each pattern. + /// + /// When a DFA has starting states for each pattern, then a search with the + /// DFA can be configured to only look for anchored matches of a specific + /// pattern. Specifically, APIs like [`Automaton::try_search_fwd`] can + /// accept a [`Anchored::Pattern`] if and only if this method returns true. + /// Otherwise, an error will be returned. + /// + /// Note that if the DFA is empty, this always returns false. + pub fn starts_for_each_pattern(&self) -> bool { + self.st.pattern_len.is_some() + } + + /// Returns the equivalence classes that make up the alphabet for this DFA. + /// + /// Unless [`dense::Config::byte_classes`] was disabled, it is possible + /// that multiple distinct bytes are grouped into the same equivalence + /// class if it is impossible for them to discriminate between a match and + /// a non-match. This has the effect of reducing the overall alphabet size + /// and in turn potentially substantially reducing the size of the DFA's + /// transition table. + /// + /// The downside of using equivalence classes like this is that every state + /// transition will automatically use this map to convert an arbitrary + /// byte to its corresponding equivalence class. In practice this has a + /// negligible impact on performance. + pub fn byte_classes(&self) -> &ByteClasses { + &self.tt.classes + } + + /// Returns the memory usage, in bytes, of this DFA. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent this DFA. + /// + /// This does **not** include the stack size used up by this DFA. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.tt.memory_usage() + self.st.memory_usage() + } +} + +/// Routines for converting a sparse DFA to other representations, such as raw +/// bytes suitable for persistent storage. +impl> DFA { + /// Serialize this DFA as raw bytes to a `Vec` in little endian + /// format. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike a [`dense::DFA`]'s serialization methods, this does + /// not add any initial padding to the returned bytes. Padding isn't + /// required for sparse DFAs since they have no alignment requirements. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_little_endian would work on a little endian target. + /// let buf = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_little_endian(&self) -> Vec { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in big endian + /// format. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike a [`dense::DFA`]'s serialization methods, this does + /// not add any initial padding to the returned bytes. Padding isn't + /// required for sparse DFAs since they have no alignment requirements. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_big_endian would work on a big endian target. + /// let buf = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_big_endian(&self) -> Vec { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in native endian + /// format. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike a [`dense::DFA`]'s serialization methods, this does + /// not add any initial padding to the returned bytes. Padding isn't + /// required for sparse DFAs since they have no alignment requirements. + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let buf = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_native_endian(&self) -> Vec { + self.to_bytes::() + } + + /// The implementation of the public `to_bytes` serialization methods, + /// which is generic over endianness. + #[cfg(feature = "dfa-build")] + fn to_bytes(&self) -> Vec { + let mut buf = vec![0; self.write_to_len()]; + // This should always succeed since the only possible serialization + // error is providing a buffer that's too small, but we've ensured that + // `buf` is big enough here. + self.write_to::(&mut buf).unwrap(); + buf + } + + /// Serialize this DFA as raw bytes to the given slice, in little endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. + /// let mut buf = [0u8; 4 * (1<<10)]; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_little_endian would work on a little endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_little_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in big endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. + /// let mut buf = [0u8; 4 * (1<<10)]; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_big_endian would work on a big endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_big_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in native endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. + /// let mut buf = [0u8; 4 * (1<<10)]; + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_native_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.write_to::(dst) + } + + /// The implementation of the public `write_to` serialization methods, + /// which is generic over endianness. + fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let mut nw = 0; + nw += wire::write_label(LABEL, &mut dst[nw..])?; + nw += wire::write_endianness_check::(&mut dst[nw..])?; + nw += wire::write_version::(VERSION, &mut dst[nw..])?; + nw += { + // Currently unused, intended for future flexibility + E::write_u32(0, &mut dst[nw..]); + size_of::() + }; + nw += self.flags.write_to::(&mut dst[nw..])?; + nw += self.tt.write_to::(&mut dst[nw..])?; + nw += self.st.write_to::(&mut dst[nw..])?; + nw += self.special.write_to::(&mut dst[nw..])?; + nw += self.quitset.write_to::(&mut dst[nw..])?; + Ok(nw) + } + + /// Return the total number of bytes required to serialize this DFA. + /// + /// This is useful for determining the size of the buffer required to pass + /// to one of the serialization routines: + /// + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// Passing a buffer smaller than the size returned by this method will + /// result in a serialization error. + /// + /// # Example + /// + /// This example shows how to dynamically allocate enough room to serialize + /// a sparse DFA. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let mut buf = vec![0; original_dfa.write_to_len()]; + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_len(&self) -> usize { + wire::write_label_len(LABEL) + + wire::write_endianness_check_len() + + wire::write_version_len() + + size_of::() // unused, intended for future flexibility + + self.flags.write_to_len() + + self.tt.write_to_len() + + self.st.write_to_len() + + self.special.write_to_len() + + self.quitset.write_to_len() + } +} + +impl<'a> DFA<&'a [u8]> { + /// Safely deserialize a sparse DFA with a specific state identifier + /// representation. Upon success, this returns both the deserialized DFA + /// and the number of bytes read from the given slice. Namely, the contents + /// of the slice beyond the DFA are not read. + /// + /// Deserializing a DFA using this routine will never allocate heap memory. + /// For safety purposes, the DFA's transitions will be verified such that + /// every transition points to a valid state. If this verification is too + /// costly, then a [`DFA::from_bytes_unchecked`] API is provided, which + /// will always execute in constant time. + /// + /// The bytes given must be generated by one of the serialization APIs + /// of a `DFA` using a semver compatible release of this crate. Those + /// include: + /// + /// * [`DFA::to_bytes_little_endian`] + /// * [`DFA::to_bytes_big_endian`] + /// * [`DFA::to_bytes_native_endian`] + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// The `to_bytes` methods allocate and return a `Vec` for you. The + /// `write_to` methods do not allocate and write to an existing slice + /// (which may be on the stack). Since deserialization always uses the + /// native endianness of the target platform, the serialization API you use + /// should match the endianness of the target platform. (It's often a good + /// idea to generate serialized DFAs for both forms of endianness and then + /// load the correct one based on endianness.) + /// + /// # Errors + /// + /// Generally speaking, it's easier to state the conditions in which an + /// error is _not_ returned. All of the following must be true: + /// + /// * The bytes given must be produced by one of the serialization APIs + /// on this DFA, as mentioned above. + /// * The endianness of the target platform matches the endianness used to + /// serialized the provided DFA. + /// + /// If any of the above are not true, then an error will be returned. + /// + /// Note that unlike deserializing a [`dense::DFA`], deserializing a sparse + /// DFA has no alignment requirements. That is, an alignment of `1` is + /// valid. + /// + /// # Panics + /// + /// This routine will never panic for any input. + /// + /// # Example + /// + /// This example shows how to serialize a DFA to raw bytes, deserialize it + /// and then use it for searching. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let bytes = initial.to_bytes_native_endian(); + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&bytes)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: loading a DFA from static memory + /// + /// One use case this library supports is the ability to serialize a + /// DFA to disk and then use `include_bytes!` to store it in a compiled + /// Rust program. Those bytes can then be cheaply deserialized into a + /// `DFA` structure at runtime and used for searching without having to + /// re-compile the DFA (which can be quite costly). + /// + /// We can show this in two parts. The first part is serializing the DFA to + /// a file: + /// + /// ```no_run + /// use regex_automata::dfa::sparse::DFA; + /// + /// let dfa = DFA::new("foo[0-9]+")?; + /// + /// // Write a big endian serialized version of this DFA to a file. + /// let bytes = dfa.to_bytes_big_endian(); + /// std::fs::write("foo.bigendian.dfa", &bytes)?; + /// + /// // Do it again, but this time for little endian. + /// let bytes = dfa.to_bytes_little_endian(); + /// std::fs::write("foo.littleendian.dfa", &bytes)?; + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And now the second part is embedding the DFA into the compiled program + /// and deserializing it at runtime on first use. We use conditional + /// compilation to choose the correct endianness. We do not need to employ + /// any special tricks to ensure a proper alignment, since a sparse DFA has + /// no alignment requirements. + /// + /// ```no_run + /// use regex_automata::{ + /// dfa::{Automaton, sparse::DFA}, + /// util::lazy::Lazy, + /// HalfMatch, Input, + /// }; + /// + /// // This crate provides its own "lazy" type, kind of like + /// // lazy_static! or once_cell::sync::Lazy. But it works in no-alloc + /// // no-std environments and let's us write this using completely + /// // safe code. + /// static RE: Lazy> = Lazy::new(|| { + /// # const _: &str = stringify! { + /// #[cfg(target_endian = "big")] + /// static BYTES: &[u8] = include_bytes!("foo.bigendian.dfa"); + /// #[cfg(target_endian = "little")] + /// static BYTES: &[u8] = include_bytes!("foo.littleendian.dfa"); + /// # }; + /// # static BYTES: &[u8] = b""; + /// + /// let (dfa, _) = DFA::from_bytes(BYTES) + /// .expect("serialized DFA should be valid"); + /// dfa + /// }); + /// + /// let expected = Ok(Some(HalfMatch::must(0, 8))); + /// assert_eq!(expected, RE.try_search_fwd(&Input::new("foo12345"))); + /// ``` + /// + /// Alternatively, consider using + /// [`lazy_static`](https://crates.io/crates/lazy_static) + /// or + /// [`once_cell`](https://crates.io/crates/once_cell), + /// which will guarantee safety for you. + pub fn from_bytes( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u8]>, usize), DeserializeError> { + // SAFETY: This is safe because we validate both the sparse transitions + // (by trying to decode every state) and start state ID list below. If + // either validation fails, then we return an error. + let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? }; + let seen = dfa.tt.validate(&dfa.special)?; + dfa.st.validate(&dfa.special, &seen)?; + // N.B. dfa.special doesn't have a way to do unchecked deserialization, + // so it has already been validated. + Ok((dfa, nread)) + } + + /// Deserialize a DFA with a specific state identifier representation in + /// constant time by omitting the verification of the validity of the + /// sparse transitions. + /// + /// This is just like [`DFA::from_bytes`], except it can potentially return + /// a DFA that exhibits undefined behavior if its transitions contains + /// invalid state identifiers. + /// + /// This routine is useful if you need to deserialize a DFA cheaply and + /// cannot afford the transition validation performed by `from_bytes`. + /// + /// # Safety + /// + /// This routine is not safe because it permits callers to provide + /// arbitrary transitions with possibly incorrect state identifiers. While + /// the various serialization routines will never return an incorrect + /// DFA, there is no guarantee that the bytes provided here are correct. + /// While `from_bytes_unchecked` will still do several forms of basic + /// validation, this routine does not check that the transitions themselves + /// are correct. Given an incorrect transition table, it is possible for + /// the search routines to access out-of-bounds memory because of explicit + /// bounds check elision. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let bytes = initial.to_bytes_native_endian(); + /// // SAFETY: This is guaranteed to be safe since the bytes given come + /// // directly from a compatible serialization routine. + /// let dfa: DFA<&[u8]> = unsafe { DFA::from_bytes_unchecked(&bytes)?.0 }; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub unsafe fn from_bytes_unchecked( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u8]>, usize), DeserializeError> { + let mut nr = 0; + + nr += wire::read_label(&slice[nr..], LABEL)?; + nr += wire::read_endianness_check(&slice[nr..])?; + nr += wire::read_version(&slice[nr..], VERSION)?; + + let _unused = wire::try_read_u32(&slice[nr..], "unused space")?; + nr += size_of::(); + + let (flags, nread) = Flags::from_bytes(&slice[nr..])?; + nr += nread; + + let (tt, nread) = Transitions::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (st, nread) = StartTable::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (special, nread) = Special::from_bytes(&slice[nr..])?; + nr += nread; + if special.max.as_usize() >= tt.sparse().len() { + return Err(DeserializeError::generic( + "max should not be greater than or equal to sparse bytes", + )); + } + + let (quitset, nread) = ByteSet::from_bytes(&slice[nr..])?; + nr += nread; + + // Prefilters don't support serialization, so they're always absent. + let pre = None; + Ok((DFA { tt, st, special, pre, quitset, flags }, nr)) + } +} + +impl> fmt::Debug for DFA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "sparse::DFA(")?; + for state in self.tt.states() { + fmt_state_indicator(f, self, state.id())?; + writeln!(f, "{:06?}: {:?}", state.id().as_usize(), state)?; + } + writeln!(f, "")?; + for (i, (start_id, anchored, sty)) in self.st.iter().enumerate() { + if i % self.st.stride == 0 { + match anchored { + Anchored::No => writeln!(f, "START-GROUP(unanchored)")?, + Anchored::Yes => writeln!(f, "START-GROUP(anchored)")?, + Anchored::Pattern(pid) => writeln!( + f, + "START_GROUP(pattern: {:?})", + pid.as_usize() + )?, + } + } + writeln!(f, " {:?} => {:06?}", sty, start_id.as_usize())?; + } + writeln!(f, "state length: {:?}", self.tt.state_len)?; + writeln!(f, "pattern length: {:?}", self.pattern_len())?; + writeln!(f, "flags: {:?}", self.flags)?; + writeln!(f, ")")?; + Ok(()) + } +} + +// SAFETY: We assert that our implementation of each method is correct. +unsafe impl> Automaton for DFA { + #[inline] + fn is_special_state(&self, id: StateID) -> bool { + self.special.is_special_state(id) + } + + #[inline] + fn is_dead_state(&self, id: StateID) -> bool { + self.special.is_dead_state(id) + } + + #[inline] + fn is_quit_state(&self, id: StateID) -> bool { + self.special.is_quit_state(id) + } + + #[inline] + fn is_match_state(&self, id: StateID) -> bool { + self.special.is_match_state(id) + } + + #[inline] + fn is_start_state(&self, id: StateID) -> bool { + self.special.is_start_state(id) + } + + #[inline] + fn is_accel_state(&self, id: StateID) -> bool { + self.special.is_accel_state(id) + } + + // This is marked as inline to help dramatically boost sparse searching, + // which decodes each state it enters to follow the next transition. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next_state(&self, current: StateID, input: u8) -> StateID { + let input = self.tt.classes.get(input); + self.tt.state(current).next(input) + } + + #[inline] + unsafe fn next_state_unchecked( + &self, + current: StateID, + input: u8, + ) -> StateID { + self.next_state(current, input) + } + + #[inline] + fn next_eoi_state(&self, current: StateID) -> StateID { + self.tt.state(current).next_eoi() + } + + #[inline] + fn pattern_len(&self) -> usize { + self.tt.pattern_len + } + + #[inline] + fn match_len(&self, id: StateID) -> usize { + self.tt.state(id).pattern_len() + } + + #[inline] + fn match_pattern(&self, id: StateID, match_index: usize) -> PatternID { + // This is an optimization for the very common case of a DFA with a + // single pattern. This conditional avoids a somewhat more costly path + // that finds the pattern ID from the state machine, which requires + // a bit of slicing/pointer-chasing. This optimization tends to only + // matter when matches are frequent. + if self.tt.pattern_len == 1 { + return PatternID::ZERO; + } + self.tt.state(id).pattern_id(match_index) + } + + #[inline] + fn has_empty(&self) -> bool { + self.flags.has_empty + } + + #[inline] + fn is_utf8(&self) -> bool { + self.flags.is_utf8 + } + + #[inline] + fn is_always_start_anchored(&self) -> bool { + self.flags.is_always_start_anchored + } + + #[inline] + fn start_state( + &self, + config: &start::Config, + ) -> Result { + let anchored = config.get_anchored(); + let start = match config.get_look_behind() { + None => Start::Text, + Some(byte) => { + if !self.quitset.is_empty() && self.quitset.contains(byte) { + return Err(StartError::quit(byte)); + } + self.st.start_map.get(byte) + } + }; + self.st.start(anchored, start) + } + + #[inline] + fn universal_start_state(&self, mode: Anchored) -> Option { + match mode { + Anchored::No => self.st.universal_start_unanchored, + Anchored::Yes => self.st.universal_start_anchored, + Anchored::Pattern(_) => None, + } + } + + #[inline] + fn accelerator(&self, id: StateID) -> &[u8] { + self.tt.state(id).accelerator() + } + + #[inline] + fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref() + } +} + +/// The transition table portion of a sparse DFA. +/// +/// The transition table is the core part of the DFA in that it describes how +/// to move from one state to another based on the input sequence observed. +/// +/// Unlike a typical dense table based DFA, states in a sparse transition +/// table have variable size. That is, states with more transitions use more +/// space than states with fewer transitions. This means that finding the next +/// transition takes more work than with a dense DFA, but also typically uses +/// much less space. +#[derive(Clone)] +struct Transitions { + /// The raw encoding of each state in this DFA. + /// + /// Each state has the following information: + /// + /// * A set of transitions to subsequent states. Transitions to the dead + /// state are omitted. + /// * If the state can be accelerated, then any additional accelerator + /// information. + /// * If the state is a match state, then the state contains all pattern + /// IDs that match when in that state. + /// + /// To decode a state, use Transitions::state. + /// + /// In practice, T is either Vec or &[u8]. + sparse: T, + /// A set of equivalence classes, where a single equivalence class + /// represents a set of bytes that never discriminate between a match + /// and a non-match in the DFA. Each equivalence class corresponds to a + /// single character in this DFA's alphabet, where the maximum number of + /// characters is 257 (each possible value of a byte plus the special + /// EOI transition). Consequently, the number of equivalence classes + /// corresponds to the number of transitions for each DFA state. Note + /// though that the *space* used by each DFA state in the transition table + /// may be larger. The total space used by each DFA state is known as the + /// stride and is documented above. + /// + /// The only time the number of equivalence classes is fewer than 257 is + /// if the DFA's kind uses byte classes which is the default. Equivalence + /// classes should generally only be disabled when debugging, so that + /// the transitions themselves aren't obscured. Disabling them has no + /// other benefit, since the equivalence class map is always used while + /// searching. In the vast majority of cases, the number of equivalence + /// classes is substantially smaller than 257, particularly when large + /// Unicode classes aren't used. + /// + /// N.B. Equivalence classes aren't particularly useful in a sparse DFA + /// in the current implementation, since equivalence classes generally tend + /// to correspond to continuous ranges of bytes that map to the same + /// transition. So in a sparse DFA, equivalence classes don't really lead + /// to a space savings. In the future, it would be good to try and remove + /// them from sparse DFAs entirely, but requires a bit of work since sparse + /// DFAs are built from dense DFAs, which are in turn built on top of + /// equivalence classes. + classes: ByteClasses, + /// The total number of states in this DFA. Note that a DFA always has at + /// least one state---the dead state---even the empty DFA. In particular, + /// the dead state always has ID 0 and is correspondingly always the first + /// state. The dead state is never a match state. + state_len: usize, + /// The total number of unique patterns represented by these match states. + pattern_len: usize, +} + +impl<'a> Transitions<&'a [u8]> { + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(Transitions<&'a [u8]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (state_len, nr) = + wire::try_read_u32_as_usize(&slice, "state length")?; + slice = &slice[nr..]; + + let (pattern_len, nr) = + wire::try_read_u32_as_usize(&slice, "pattern length")?; + slice = &slice[nr..]; + + let (classes, nr) = ByteClasses::from_bytes(&slice)?; + slice = &slice[nr..]; + + let (len, nr) = + wire::try_read_u32_as_usize(&slice, "sparse transitions length")?; + slice = &slice[nr..]; + + wire::check_slice_len(slice, len, "sparse states byte length")?; + let sparse = &slice[..len]; + slice = &slice[len..]; + + let trans = Transitions { sparse, classes, state_len, pattern_len }; + Ok((trans, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> Transitions { + /// Writes a serialized form of this transition table to the buffer given. + /// If the buffer is too small, then an error is returned. To determine + /// how big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "sparse transition table", + )); + } + dst = &mut dst[..nwrite]; + + // write state length + E::write_u32(u32::try_from(self.state_len).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write pattern length + E::write_u32(u32::try_from(self.pattern_len).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write byte class map + let n = self.classes.write_to(dst)?; + dst = &mut dst[n..]; + + // write number of bytes in sparse transitions + E::write_u32(u32::try_from(self.sparse().len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write actual transitions + let mut id = DEAD; + while id.as_usize() < self.sparse().len() { + let state = self.state(id); + let n = state.write_to::(&mut dst)?; + dst = &mut dst[n..]; + // The next ID is the offset immediately following `state`. + id = StateID::new(id.as_usize() + state.write_to_len()).unwrap(); + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this transition + /// table will use. + fn write_to_len(&self) -> usize { + size_of::() // state length + + size_of::() // pattern length + + self.classes.write_to_len() + + size_of::() // sparse transitions length + + self.sparse().len() + } + + /// Validates that every state ID in this transition table is valid. + /// + /// That is, every state ID can be used to correctly index a state in this + /// table. + fn validate(&self, sp: &Special) -> Result { + let mut verified = Seen::new(); + // We need to make sure that we decode the correct number of states. + // Otherwise, an empty set of transitions would validate even if the + // recorded state length is non-empty. + let mut len = 0; + // We can't use the self.states() iterator because it assumes the state + // encodings are valid. It could panic if they aren't. + let mut id = DEAD; + while id.as_usize() < self.sparse().len() { + // Before we even decode the state, we check that the ID itself + // is well formed. That is, if it's a special state then it must + // actually be a quit, dead, accel, match or start state. + if sp.is_special_state(id) { + let is_actually_special = sp.is_dead_state(id) + || sp.is_quit_state(id) + || sp.is_match_state(id) + || sp.is_start_state(id) + || sp.is_accel_state(id); + if !is_actually_special { + // This is kind of a cryptic error message... + return Err(DeserializeError::generic( + "found sparse state tagged as special but \ + wasn't actually special", + )); + } + } + let state = self.try_state(sp, id)?; + verified.insert(id); + // The next ID should be the offset immediately following `state`. + id = StateID::new(wire::add( + id.as_usize(), + state.write_to_len(), + "next state ID offset", + )?) + .map_err(|err| { + DeserializeError::state_id_error(err, "next state ID offset") + })?; + len += 1; + } + // Now that we've checked that all top-level states are correct and + // importantly, collected a set of valid state IDs, we have all the + // information we need to check that all transitions are correct too. + // + // Note that we can't use `valid_ids` to iterate because it will + // be empty in no-std no-alloc contexts. (And yes, that means our + // verification isn't quite as good.) We can use `self.states()` + // though at least, since we know that all states can at least be + // decoded and traversed correctly. + for state in self.states() { + // Check that all transitions in this state are correct. + for i in 0..state.ntrans { + let to = state.next_at(i); + // For no-alloc, we just check that the state can decode. It is + // technically possible that the state ID could still point to + // a non-existent state even if it decodes (fuzzing proved this + // to be true), but it shouldn't result in any memory unsafety + // or panics in non-debug mode. + #[cfg(not(feature = "alloc"))] + { + let _ = self.try_state(sp, to)?; + } + #[cfg(feature = "alloc")] + { + if !verified.contains(&to) { + return Err(DeserializeError::generic( + "found transition that points to a \ + non-existent state", + )); + } + } + } + } + if len != self.state_len { + return Err(DeserializeError::generic( + "mismatching sparse state length", + )); + } + Ok(verified) + } + + /// Converts these transitions to a borrowed value. + fn as_ref(&self) -> Transitions<&'_ [u8]> { + Transitions { + sparse: self.sparse(), + classes: self.classes.clone(), + state_len: self.state_len, + pattern_len: self.pattern_len, + } + } + + /// Converts these transitions to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> Transitions> { + Transitions { + sparse: self.sparse().to_vec(), + classes: self.classes.clone(), + state_len: self.state_len, + pattern_len: self.pattern_len, + } + } + + /// Return a convenient representation of the given state. + /// + /// This panics if the state is invalid. + /// + /// This is marked as inline to help dramatically boost sparse searching, + /// which decodes each state it enters to follow the next transition. Other + /// functions involved are also inlined, which should hopefully eliminate + /// a lot of the extraneous decoding that is never needed just to follow + /// the next transition. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn state(&self, id: StateID) -> State<'_> { + let mut state = &self.sparse()[id.as_usize()..]; + let mut ntrans = wire::read_u16(&state).as_usize(); + let is_match = (1 << 15) & ntrans != 0; + ntrans &= !(1 << 15); + state = &state[2..]; + + let (input_ranges, state) = state.split_at(ntrans * 2); + let (next, state) = state.split_at(ntrans * StateID::SIZE); + let (pattern_ids, state) = if is_match { + let npats = wire::read_u32(&state).as_usize(); + state[4..].split_at(npats * 4) + } else { + (&[][..], state) + }; + + let accel_len = usize::from(state[0]); + let accel = &state[1..accel_len + 1]; + State { id, is_match, ntrans, input_ranges, next, pattern_ids, accel } + } + + /// Like `state`, but will return an error if the state encoding is + /// invalid. This is useful for verifying states after deserialization, + /// which is required for a safe deserialization API. + /// + /// Note that this only verifies that this state is decodable and that + /// all of its data is consistent. It does not verify that its state ID + /// transitions point to valid states themselves, nor does it verify that + /// every pattern ID is valid. + fn try_state( + &self, + sp: &Special, + id: StateID, + ) -> Result, DeserializeError> { + if id.as_usize() > self.sparse().len() { + return Err(DeserializeError::generic( + "invalid caller provided sparse state ID", + )); + } + let mut state = &self.sparse()[id.as_usize()..]; + // Encoding format starts with a u16 that stores the total number of + // transitions in this state. + let (mut ntrans, _) = + wire::try_read_u16_as_usize(state, "state transition length")?; + let is_match = ((1 << 15) & ntrans) != 0; + ntrans &= !(1 << 15); + state = &state[2..]; + if ntrans > 257 || ntrans == 0 { + return Err(DeserializeError::generic( + "invalid transition length", + )); + } + if is_match && !sp.is_match_state(id) { + return Err(DeserializeError::generic( + "state marked as match but not in match ID range", + )); + } else if !is_match && sp.is_match_state(id) { + return Err(DeserializeError::generic( + "state in match ID range but not marked as match state", + )); + } + + // Each transition has two pieces: an inclusive range of bytes on which + // it is defined, and the state ID that those bytes transition to. The + // pairs come first, followed by a corresponding sequence of state IDs. + let input_ranges_len = ntrans.checked_mul(2).unwrap(); + wire::check_slice_len(state, input_ranges_len, "sparse byte pairs")?; + let (input_ranges, state) = state.split_at(input_ranges_len); + // Every range should be of the form A-B, where A<=B. + for pair in input_ranges.chunks(2) { + let (start, end) = (pair[0], pair[1]); + if start > end { + return Err(DeserializeError::generic("invalid input range")); + } + } + + // And now extract the corresponding sequence of state IDs. We leave + // this sequence as a &[u8] instead of a &[S] because sparse DFAs do + // not have any alignment requirements. + let next_len = ntrans + .checked_mul(self.id_len()) + .expect("state size * #trans should always fit in a usize"); + wire::check_slice_len(state, next_len, "sparse trans state IDs")?; + let (next, state) = state.split_at(next_len); + // We can at least verify that every state ID is in bounds. + for idbytes in next.chunks(self.id_len()) { + let (id, _) = + wire::read_state_id(idbytes, "sparse state ID in try_state")?; + wire::check_slice_len( + self.sparse(), + id.as_usize(), + "invalid sparse state ID", + )?; + } + + // If this is a match state, then read the pattern IDs for this state. + // Pattern IDs is a u32-length prefixed sequence of native endian + // encoded 32-bit integers. + let (pattern_ids, state) = if is_match { + let (npats, nr) = + wire::try_read_u32_as_usize(state, "pattern ID length")?; + let state = &state[nr..]; + if npats == 0 { + return Err(DeserializeError::generic( + "state marked as a match, but pattern length is zero", + )); + } + + let pattern_ids_len = + wire::mul(npats, 4, "sparse pattern ID byte length")?; + wire::check_slice_len( + state, + pattern_ids_len, + "sparse pattern IDs", + )?; + let (pattern_ids, state) = state.split_at(pattern_ids_len); + for patbytes in pattern_ids.chunks(PatternID::SIZE) { + wire::read_pattern_id( + patbytes, + "sparse pattern ID in try_state", + )?; + } + (pattern_ids, state) + } else { + (&[][..], state) + }; + if is_match && pattern_ids.is_empty() { + return Err(DeserializeError::generic( + "state marked as a match, but has no pattern IDs", + )); + } + if sp.is_match_state(id) && pattern_ids.is_empty() { + return Err(DeserializeError::generic( + "state marked special as a match, but has no pattern IDs", + )); + } + if sp.is_match_state(id) != is_match { + return Err(DeserializeError::generic( + "whether state is a match or not is inconsistent", + )); + } + + // Now read this state's accelerator info. The first byte is the length + // of the accelerator, which is typically 0 (for no acceleration) but + // is no bigger than 3. The length indicates the number of bytes that + // follow, where each byte corresponds to a transition out of this + // state. + if state.is_empty() { + return Err(DeserializeError::generic("no accelerator length")); + } + let (accel_len, state) = (usize::from(state[0]), &state[1..]); + + if accel_len > 3 { + return Err(DeserializeError::generic( + "sparse invalid accelerator length", + )); + } else if accel_len == 0 && sp.is_accel_state(id) { + return Err(DeserializeError::generic( + "got no accelerators in state, but in accelerator ID range", + )); + } else if accel_len > 0 && !sp.is_accel_state(id) { + return Err(DeserializeError::generic( + "state in accelerator ID range, but has no accelerators", + )); + } + + wire::check_slice_len( + state, + accel_len, + "sparse corrupt accelerator length", + )?; + let (accel, _) = (&state[..accel_len], &state[accel_len..]); + + let state = State { + id, + is_match, + ntrans, + input_ranges, + next, + pattern_ids, + accel, + }; + if sp.is_quit_state(state.next_at(state.ntrans - 1)) { + return Err(DeserializeError::generic( + "state with EOI transition to quit state is illegal", + )); + } + Ok(state) + } + + /// Return an iterator over all of the states in this DFA. + /// + /// The iterator returned yields tuples, where the first element is the + /// state ID and the second element is the state itself. + fn states(&self) -> StateIter<'_, T> { + StateIter { trans: self, id: DEAD.as_usize() } + } + + /// Returns the sparse transitions as raw bytes. + fn sparse(&self) -> &[u8] { + self.sparse.as_ref() + } + + /// Returns the number of bytes represented by a single state ID. + fn id_len(&self) -> usize { + StateID::SIZE + } + + /// Return the memory usage, in bytes, of these transitions. + /// + /// This does not include the size of a `Transitions` value itself. + fn memory_usage(&self) -> usize { + self.sparse().len() + } +} + +#[cfg(feature = "dfa-build")] +impl> Transitions { + /// Return a convenient mutable representation of the given state. + /// This panics if the state is invalid. + fn state_mut(&mut self, id: StateID) -> StateMut<'_> { + let mut state = &mut self.sparse_mut()[id.as_usize()..]; + let mut ntrans = wire::read_u16(&state).as_usize(); + let is_match = (1 << 15) & ntrans != 0; + ntrans &= !(1 << 15); + state = &mut state[2..]; + + let (input_ranges, state) = state.split_at_mut(ntrans * 2); + let (next, state) = state.split_at_mut(ntrans * StateID::SIZE); + let (pattern_ids, state) = if is_match { + let npats = wire::read_u32(&state).as_usize(); + state[4..].split_at_mut(npats * 4) + } else { + (&mut [][..], state) + }; + + let accel_len = usize::from(state[0]); + let accel = &mut state[1..accel_len + 1]; + StateMut { + id, + is_match, + ntrans, + input_ranges, + next, + pattern_ids, + accel, + } + } + + /// Returns the sparse transitions as raw mutable bytes. + fn sparse_mut(&mut self) -> &mut [u8] { + self.sparse.as_mut() + } +} + +/// The set of all possible starting states in a DFA. +/// +/// See the eponymous type in the `dense` module for more details. This type +/// is very similar to `dense::StartTable`, except that its underlying +/// representation is `&[u8]` instead of `&[S]`. (The latter would require +/// sparse DFAs to be aligned, which is explicitly something we do not require +/// because we don't really need it.) +#[derive(Clone)] +struct StartTable { + /// The initial start state IDs as a contiguous table of native endian + /// encoded integers, represented by `S`. + /// + /// In practice, T is either Vec or &[u8] and has no alignment + /// requirements. + /// + /// The first `2 * stride` (currently always 8) entries always correspond + /// to the starts states for the entire DFA, with the first 4 entries being + /// for unanchored searches and the second 4 entries being for anchored + /// searches. To keep things simple, we always use 8 entries even if the + /// `StartKind` is not both. + /// + /// After that, there are `stride * patterns` state IDs, where `patterns` + /// may be zero in the case of a DFA with no patterns or in the case where + /// the DFA was built without enabling starting states for each pattern. + table: T, + /// The starting state configuration supported. When 'both', both + /// unanchored and anchored searches work. When 'unanchored', anchored + /// searches panic. When 'anchored', unanchored searches panic. + kind: StartKind, + /// The start state configuration for every possible byte. + start_map: StartByteMap, + /// The number of starting state IDs per pattern. + stride: usize, + /// The total number of patterns for which starting states are encoded. + /// This is `None` for DFAs that were built without start states for each + /// pattern. Thus, one cannot use this field to say how many patterns + /// are in the DFA in all cases. It is specific to how many patterns are + /// represented in this start table. + pattern_len: Option, + /// The universal starting state for unanchored searches. This is only + /// present when the DFA supports unanchored searches and when all starting + /// state IDs for an unanchored search are equivalent. + universal_start_unanchored: Option, + /// The universal starting state for anchored searches. This is only + /// present when the DFA supports anchored searches and when all starting + /// state IDs for an anchored search are equivalent. + universal_start_anchored: Option, +} + +#[cfg(feature = "dfa-build")] +impl StartTable> { + fn new>( + dfa: &dense::DFA, + pattern_len: Option, + ) -> StartTable> { + let stride = Start::len(); + // This is OK since the only way we're here is if a dense DFA could be + // constructed successfully, which uses the same space. + let len = stride + .checked_mul(pattern_len.unwrap_or(0)) + .unwrap() + .checked_add(stride.checked_mul(2).unwrap()) + .unwrap() + .checked_mul(StateID::SIZE) + .unwrap(); + StartTable { + table: vec![0; len], + kind: dfa.start_kind(), + start_map: dfa.start_map().clone(), + stride, + pattern_len, + universal_start_unanchored: dfa + .universal_start_state(Anchored::No), + universal_start_anchored: dfa.universal_start_state(Anchored::Yes), + } + } + + fn from_dense_dfa>( + dfa: &dense::DFA, + remap: &[StateID], + ) -> Result>, BuildError> { + // Unless the DFA has start states compiled for each pattern, then + // as far as the starting state table is concerned, there are zero + // patterns to account for. It will instead only store starting states + // for the entire DFA. + let start_pattern_len = if dfa.starts_for_each_pattern() { + Some(dfa.pattern_len()) + } else { + None + }; + let mut sl = StartTable::new(dfa, start_pattern_len); + for (old_start_id, anchored, sty) in dfa.starts() { + let new_start_id = remap[dfa.to_index(old_start_id)]; + sl.set_start(anchored, sty, new_start_id); + } + Ok(sl) + } +} + +impl<'a> StartTable<&'a [u8]> { + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(StartTable<&'a [u8]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (kind, nr) = StartKind::from_bytes(slice)?; + slice = &slice[nr..]; + + let (start_map, nr) = StartByteMap::from_bytes(slice)?; + slice = &slice[nr..]; + + let (stride, nr) = + wire::try_read_u32_as_usize(slice, "sparse start table stride")?; + slice = &slice[nr..]; + if stride != Start::len() { + return Err(DeserializeError::generic( + "invalid sparse starting table stride", + )); + } + + let (maybe_pattern_len, nr) = + wire::try_read_u32_as_usize(slice, "sparse start table patterns")?; + slice = &slice[nr..]; + let pattern_len = if maybe_pattern_len.as_u32() == u32::MAX { + None + } else { + Some(maybe_pattern_len) + }; + if pattern_len.map_or(false, |len| len > PatternID::LIMIT) { + return Err(DeserializeError::generic( + "sparse invalid number of patterns", + )); + } + + let (universal_unanchored, nr) = + wire::try_read_u32(slice, "universal unanchored start")?; + slice = &slice[nr..]; + let universal_start_unanchored = if universal_unanchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_unanchored).map_err(|e| { + DeserializeError::state_id_error( + e, + "universal unanchored start", + ) + })?) + }; + + let (universal_anchored, nr) = + wire::try_read_u32(slice, "universal anchored start")?; + slice = &slice[nr..]; + let universal_start_anchored = if universal_anchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_anchored).map_err(|e| { + DeserializeError::state_id_error(e, "universal anchored start") + })?) + }; + + let pattern_table_size = wire::mul( + stride, + pattern_len.unwrap_or(0), + "sparse invalid pattern length", + )?; + // Our start states always start with a single stride of start states + // for the entire automaton which permit it to match any pattern. What + // follows it are an optional set of start states for each pattern. + let start_state_len = wire::add( + wire::mul(2, stride, "start state stride too big")?, + pattern_table_size, + "sparse invalid 'any' pattern starts size", + )?; + let table_bytes_len = wire::mul( + start_state_len, + StateID::SIZE, + "sparse pattern table bytes length", + )?; + wire::check_slice_len( + slice, + table_bytes_len, + "sparse start ID table", + )?; + let table = &slice[..table_bytes_len]; + slice = &slice[table_bytes_len..]; + + let sl = StartTable { + table, + kind, + start_map, + stride, + pattern_len, + universal_start_unanchored, + universal_start_anchored, + }; + Ok((sl, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> StartTable { + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "sparse starting table ids", + )); + } + dst = &mut dst[..nwrite]; + + // write start kind + let nw = self.kind.write_to::(dst)?; + dst = &mut dst[nw..]; + // write start byte map + let nw = self.start_map.write_to(dst)?; + dst = &mut dst[nw..]; + // write stride + E::write_u32(u32::try_from(self.stride).unwrap(), dst); + dst = &mut dst[size_of::()..]; + // write pattern length + E::write_u32( + u32::try_from(self.pattern_len.unwrap_or(0xFFFF_FFFF)).unwrap(), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start unanchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_unanchored + .map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start anchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_anchored.map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write start IDs + for (sid, _, _) in self.iter() { + E::write_u32(sid.as_u32(), dst); + dst = &mut dst[StateID::SIZE..]; + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this transition + /// table will use. + fn write_to_len(&self) -> usize { + self.kind.write_to_len() + + self.start_map.write_to_len() + + size_of::() // stride + + size_of::() // # patterns + + size_of::() // universal unanchored start + + size_of::() // universal anchored start + + self.table().len() + } + + /// Validates that every starting state ID in this table is valid. + /// + /// That is, every starting state ID can be used to correctly decode a + /// state in the DFA's sparse transitions. + fn validate( + &self, + sp: &Special, + seen: &Seen, + ) -> Result<(), DeserializeError> { + for (id, _, _) in self.iter() { + if !seen.contains(&id) { + return Err(DeserializeError::generic( + "found invalid start state ID", + )); + } + if sp.is_match_state(id) { + return Err(DeserializeError::generic( + "start states cannot be match states", + )); + } + } + Ok(()) + } + + /// Converts this start list to a borrowed value. + fn as_ref(&self) -> StartTable<&'_ [u8]> { + StartTable { + table: self.table(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Converts this start list to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> StartTable> { + StartTable { + table: self.table().to_vec(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Return the start state for the given index and pattern ID. If the + /// pattern ID is None, then the corresponding start state for the entire + /// DFA is returned. If the pattern ID is not None, then the corresponding + /// starting state for the given pattern is returned. If this start table + /// does not have individual starting states for each pattern, then this + /// panics. + fn start( + &self, + anchored: Anchored, + start: Start, + ) -> Result { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => { + if !self.kind.has_unanchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + start_index + } + Anchored::Yes => { + if !self.kind.has_anchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + self.stride + start_index + } + Anchored::Pattern(pid) => { + let len = match self.pattern_len { + None => { + return Err(StartError::unsupported_anchored(anchored)) + } + Some(len) => len, + }; + if pid.as_usize() >= len { + return Ok(DEAD); + } + (2 * self.stride) + + (self.stride * pid.as_usize()) + + start_index + } + }; + let start = index * StateID::SIZE; + // This OK since we're allowed to assume that the start table contains + // valid StateIDs. + Ok(wire::read_state_id_unchecked(&self.table()[start..]).0) + } + + /// Return an iterator over all start IDs in this table. + fn iter(&self) -> StartStateIter<'_, T> { + StartStateIter { st: self, i: 0 } + } + + /// Returns the total number of start state IDs in this table. + fn len(&self) -> usize { + self.table().len() / StateID::SIZE + } + + /// Returns the table as a raw slice of bytes. + fn table(&self) -> &[u8] { + self.table.as_ref() + } + + /// Return the memory usage, in bytes, of this start list. + /// + /// This does not include the size of a `StartTable` value itself. + fn memory_usage(&self) -> usize { + self.table().len() + } +} + +#[cfg(feature = "dfa-build")] +impl> StartTable { + /// Set the start state for the given index and pattern. + /// + /// If the pattern ID or state ID are not valid, then this will panic. + fn set_start(&mut self, anchored: Anchored, start: Start, id: StateID) { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => self.stride + start_index, + Anchored::Pattern(pid) => { + let pid = pid.as_usize(); + let len = self + .pattern_len + .expect("start states for each pattern enabled"); + assert!(pid < len, "invalid pattern ID {:?}", pid); + self.stride + .checked_mul(pid) + .unwrap() + .checked_add(self.stride.checked_mul(2).unwrap()) + .unwrap() + .checked_add(start_index) + .unwrap() + } + }; + let start = index * StateID::SIZE; + let end = start + StateID::SIZE; + wire::write_state_id::( + id, + &mut self.table.as_mut()[start..end], + ); + } +} + +/// An iterator over all state state IDs in a sparse DFA. +struct StartStateIter<'a, T> { + st: &'a StartTable, + i: usize, +} + +impl<'a, T: AsRef<[u8]>> Iterator for StartStateIter<'a, T> { + type Item = (StateID, Anchored, Start); + + fn next(&mut self) -> Option<(StateID, Anchored, Start)> { + let i = self.i; + if i >= self.st.len() { + return None; + } + self.i += 1; + + // This unwrap is okay since the stride of any DFA must always match + // the number of start state types. + let start_type = Start::from_usize(i % self.st.stride).unwrap(); + let anchored = if i < self.st.stride { + Anchored::No + } else if i < (2 * self.st.stride) { + Anchored::Yes + } else { + let pid = (i - (2 * self.st.stride)) / self.st.stride; + Anchored::Pattern(PatternID::new(pid).unwrap()) + }; + let start = i * StateID::SIZE; + let end = start + StateID::SIZE; + let bytes = self.st.table()[start..end].try_into().unwrap(); + // This is OK since we're allowed to assume that any IDs in this start + // table are correct and valid for this DFA. + let id = StateID::from_ne_bytes_unchecked(bytes); + Some((id, anchored, start_type)) + } +} + +impl<'a, T> fmt::Debug for StartStateIter<'a, T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("StartStateIter").field("i", &self.i).finish() + } +} + +/// An iterator over all states in a sparse DFA. +/// +/// This iterator yields tuples, where the first element is the state ID and +/// the second element is the state itself. +struct StateIter<'a, T> { + trans: &'a Transitions, + id: usize, +} + +impl<'a, T: AsRef<[u8]>> Iterator for StateIter<'a, T> { + type Item = State<'a>; + + fn next(&mut self) -> Option> { + if self.id >= self.trans.sparse().len() { + return None; + } + let state = self.trans.state(StateID::new_unchecked(self.id)); + self.id = self.id + state.write_to_len(); + Some(state) + } +} + +impl<'a, T> fmt::Debug for StateIter<'a, T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("StateIter").field("id", &self.id).finish() + } +} + +/// A representation of a sparse DFA state that can be cheaply materialized +/// from a state identifier. +#[derive(Clone)] +struct State<'a> { + /// The identifier of this state. + id: StateID, + /// Whether this is a match state or not. + is_match: bool, + /// The number of transitions in this state. + ntrans: usize, + /// Pairs of input ranges, where there is one pair for each transition. + /// Each pair specifies an inclusive start and end byte range for the + /// corresponding transition. + input_ranges: &'a [u8], + /// Transitions to the next state. This slice contains native endian + /// encoded state identifiers, with `S` as the representation. Thus, there + /// are `ntrans * size_of::()` bytes in this slice. + next: &'a [u8], + /// If this is a match state, then this contains the pattern IDs that match + /// when the DFA is in this state. + /// + /// This is a contiguous sequence of 32-bit native endian encoded integers. + pattern_ids: &'a [u8], + /// An accelerator for this state, if present. If this state has no + /// accelerator, then this is an empty slice. When non-empty, this slice + /// has length at most 3 and corresponds to the exhaustive set of bytes + /// that must be seen in order to transition out of this state. + accel: &'a [u8], +} + +impl<'a> State<'a> { + /// Searches for the next transition given an input byte. If no such + /// transition could be found, then a dead state is returned. + /// + /// This is marked as inline to help dramatically boost sparse searching, + /// which decodes each state it enters to follow the next transition. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next(&self, input: u8) -> StateID { + // This straight linear search was observed to be much better than + // binary search on ASCII haystacks, likely because a binary search + // visits the ASCII case last but a linear search sees it first. A + // binary search does do a little better on non-ASCII haystacks, but + // not by much. There might be a better trade off lurking here. + for i in 0..(self.ntrans - 1) { + let (start, end) = self.range(i); + if start <= input && input <= end { + return self.next_at(i); + } + // We could bail early with an extra branch: if input < b1, then + // we know we'll never find a matching transition. Interestingly, + // this extra branch seems to not help performance, or will even + // hurt it. It's likely very dependent on the DFA itself and what + // is being searched. + } + DEAD + } + + /// Returns the next state ID for the special EOI transition. + fn next_eoi(&self) -> StateID { + self.next_at(self.ntrans - 1) + } + + /// Returns the identifier for this state. + fn id(&self) -> StateID { + self.id + } + + /// Returns the inclusive input byte range for the ith transition in this + /// state. + fn range(&self, i: usize) -> (u8, u8) { + (self.input_ranges[i * 2], self.input_ranges[i * 2 + 1]) + } + + /// Returns the next state for the ith transition in this state. + fn next_at(&self, i: usize) -> StateID { + let start = i * StateID::SIZE; + let end = start + StateID::SIZE; + let bytes = self.next[start..end].try_into().unwrap(); + StateID::from_ne_bytes_unchecked(bytes) + } + + /// Returns the pattern ID for the given match index. If the match index + /// is invalid, then this panics. + fn pattern_id(&self, match_index: usize) -> PatternID { + let start = match_index * PatternID::SIZE; + wire::read_pattern_id_unchecked(&self.pattern_ids[start..]).0 + } + + /// Returns the total number of pattern IDs for this state. This is always + /// zero when `is_match` is false. + fn pattern_len(&self) -> usize { + assert_eq!(0, self.pattern_ids.len() % 4); + self.pattern_ids.len() / 4 + } + + /// Return an accelerator for this state. + fn accelerator(&self) -> &'a [u8] { + self.accel + } + + /// Write the raw representation of this state to the given buffer using + /// the given endianness. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "sparse state transitions", + )); + } + + let ntrans = + if self.is_match { self.ntrans | (1 << 15) } else { self.ntrans }; + E::write_u16(u16::try_from(ntrans).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + dst[..self.input_ranges.len()].copy_from_slice(self.input_ranges); + dst = &mut dst[self.input_ranges.len()..]; + + for i in 0..self.ntrans { + E::write_u32(self.next_at(i).as_u32(), dst); + dst = &mut dst[StateID::SIZE..]; + } + + if self.is_match { + E::write_u32(u32::try_from(self.pattern_len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + for i in 0..self.pattern_len() { + let pid = self.pattern_id(i); + E::write_u32(pid.as_u32(), dst); + dst = &mut dst[PatternID::SIZE..]; + } + } + + dst[0] = u8::try_from(self.accel.len()).unwrap(); + dst[1..][..self.accel.len()].copy_from_slice(self.accel); + + Ok(nwrite) + } + + /// Return the total number of bytes that this state consumes in its + /// encoded form. + fn write_to_len(&self) -> usize { + let mut len = 2 + + (self.ntrans * 2) + + (self.ntrans * StateID::SIZE) + + (1 + self.accel.len()); + if self.is_match { + len += size_of::() + self.pattern_ids.len(); + } + len + } +} + +impl<'a> fmt::Debug for State<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut printed = false; + for i in 0..(self.ntrans - 1) { + let next = self.next_at(i); + if next == DEAD { + continue; + } + + if printed { + write!(f, ", ")?; + } + let (start, end) = self.range(i); + if start == end { + write!(f, "{:?} => {:?}", DebugByte(start), next.as_usize())?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + next.as_usize(), + )?; + } + printed = true; + } + let eoi = self.next_at(self.ntrans - 1); + if eoi != DEAD { + if printed { + write!(f, ", ")?; + } + write!(f, "EOI => {:?}", eoi.as_usize())?; + } + Ok(()) + } +} + +/// A representation of a mutable sparse DFA state that can be cheaply +/// materialized from a state identifier. +#[cfg(feature = "dfa-build")] +struct StateMut<'a> { + /// The identifier of this state. + id: StateID, + /// Whether this is a match state or not. + is_match: bool, + /// The number of transitions in this state. + ntrans: usize, + /// Pairs of input ranges, where there is one pair for each transition. + /// Each pair specifies an inclusive start and end byte range for the + /// corresponding transition. + input_ranges: &'a mut [u8], + /// Transitions to the next state. This slice contains native endian + /// encoded state identifiers, with `S` as the representation. Thus, there + /// are `ntrans * size_of::()` bytes in this slice. + next: &'a mut [u8], + /// If this is a match state, then this contains the pattern IDs that match + /// when the DFA is in this state. + /// + /// This is a contiguous sequence of 32-bit native endian encoded integers. + pattern_ids: &'a [u8], + /// An accelerator for this state, if present. If this state has no + /// accelerator, then this is an empty slice. When non-empty, this slice + /// has length at most 3 and corresponds to the exhaustive set of bytes + /// that must be seen in order to transition out of this state. + accel: &'a mut [u8], +} + +#[cfg(feature = "dfa-build")] +impl<'a> StateMut<'a> { + /// Sets the ith transition to the given state. + fn set_next_at(&mut self, i: usize, next: StateID) { + let start = i * StateID::SIZE; + let end = start + StateID::SIZE; + wire::write_state_id::(next, &mut self.next[start..end]); + } +} + +#[cfg(feature = "dfa-build")] +impl<'a> fmt::Debug for StateMut<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let state = State { + id: self.id, + is_match: self.is_match, + ntrans: self.ntrans, + input_ranges: self.input_ranges, + next: self.next, + pattern_ids: self.pattern_ids, + accel: self.accel, + }; + fmt::Debug::fmt(&state, f) + } +} + +// In order to validate everything, we not only need to make sure we +// can decode every state, but that every transition in every state +// points to a valid state. There are many duplicative transitions, so +// we record state IDs that we've verified so that we don't redo the +// decoding work. +// +// Except, when in no_std mode, we don't have dynamic memory allocation +// available to us, so we skip this optimization. It's not clear +// whether doing something more clever is worth it just yet. If you're +// profiling this code and need it to run faster, please file an issue. +// +// OK, so we also use this to record the set of valid state IDs. Since +// it is possible for a transition to point to an invalid state ID that +// still (somehow) deserializes to a valid state. So we need to make +// sure our transitions are limited to actually correct state IDs. +// The problem is, I'm not sure how to do this verification step in +// no-std no-alloc mode. I think we'd *have* to store the set of valid +// state IDs in the DFA itself. For now, we don't do this verification +// in no-std no-alloc mode. The worst thing that can happen is an +// incorrect result. But no panics or memory safety problems should +// result. Because we still do validate that the state itself is +// "valid" in the sense that everything it points to actually exists. +// +// ---AG +#[derive(Debug)] +struct Seen { + #[cfg(feature = "alloc")] + set: alloc::collections::BTreeSet, + #[cfg(not(feature = "alloc"))] + set: core::marker::PhantomData, +} + +#[cfg(feature = "alloc")] +impl Seen { + fn new() -> Seen { + Seen { set: alloc::collections::BTreeSet::new() } + } + fn insert(&mut self, id: StateID) { + self.set.insert(id); + } + fn contains(&self, id: &StateID) -> bool { + self.set.contains(id) + } +} + +#[cfg(not(feature = "alloc"))] +impl Seen { + fn new() -> Seen { + Seen { set: core::marker::PhantomData } + } + fn insert(&mut self, _id: StateID) {} + fn contains(&self, _id: &StateID) -> bool { + true + } +} + +/* +/// A binary search routine specialized specifically to a sparse DFA state's +/// transitions. Specifically, the transitions are defined as a set of pairs +/// of input bytes that delineate an inclusive range of bytes. If the input +/// byte is in the range, then the corresponding transition is a match. +/// +/// This binary search accepts a slice of these pairs and returns the position +/// of the matching pair (the ith transition), or None if no matching pair +/// could be found. +/// +/// Note that this routine is not currently used since it was observed to +/// either decrease performance when searching ASCII, or did not provide enough +/// of a boost on non-ASCII haystacks to be worth it. However, we leave it here +/// for posterity in case we can find a way to use it. +/// +/// In theory, we could use the standard library's search routine if we could +/// cast a `&[u8]` to a `&[(u8, u8)]`, but I don't believe this is currently +/// guaranteed to be safe and is thus UB (since I don't think the in-memory +/// representation of `(u8, u8)` has been nailed down). One could define a +/// repr(C) type, but the casting doesn't seem justified. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn binary_search_ranges(ranges: &[u8], needle: u8) -> Option { + debug_assert!(ranges.len() % 2 == 0, "ranges must have even length"); + debug_assert!(ranges.len() <= 512, "ranges should be short"); + + let (mut left, mut right) = (0, ranges.len() / 2); + while left < right { + let mid = (left + right) / 2; + let (b1, b2) = (ranges[mid * 2], ranges[mid * 2 + 1]); + if needle < b1 { + right = mid; + } else if needle > b2 { + left = mid + 1; + } else { + return Some(mid); + } + } + None +} +*/ + +#[cfg(all(test, feature = "syntax", feature = "dfa-build"))] +mod tests { + use crate::{ + dfa::{dense::DFA, Automaton}, + nfa::thompson, + Input, MatchError, + }; + + // See the analogous test in src/hybrid/dfa.rs and src/dfa/dense.rs. + #[test] + fn heuristic_unicode_forward() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap() + .to_sparse() + .unwrap(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_fwd(&input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_fwd(&input); + assert_eq!(Err(expected), got); + } + + // See the analogous test in src/hybrid/dfa.rs and src/dfa/dense.rs. + #[test] + fn heuristic_unicode_reverse() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap() + .to_sparse() + .unwrap(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + } +} diff --git a/vendor/regex-automata/src/dfa/special.rs b/vendor/regex-automata/src/dfa/special.rs new file mode 100644 index 0000000..a831df5 --- /dev/null +++ b/vendor/regex-automata/src/dfa/special.rs @@ -0,0 +1,494 @@ +use crate::{ + dfa::DEAD, + util::{ + primitives::StateID, + wire::{self, DeserializeError, Endian, SerializeError}, + }, +}; + +macro_rules! err { + ($msg:expr) => { + return Err(DeserializeError::generic($msg)); + }; +} + +// Special represents the identifiers in a DFA that correspond to "special" +// states. If a state is one or more of the following, then it is considered +// special: +// +// * dead - A non-matching state where all outgoing transitions lead back to +// itself. There is only one of these, regardless of whether minimization +// has run. The dead state always has an ID of 0. i.e., It is always the +// first state in a DFA. +// * quit - A state that is entered whenever a byte is seen that should cause +// a DFA to give up and stop searching. This results in a MatchError::quit +// error being returned at search time. The default configuration for a DFA +// has no quit bytes, which means this state is unreachable by default, +// although it is always present for reasons of implementation simplicity. +// This state is only reachable when the caller configures the DFA to quit +// on certain bytes. There is always exactly one of these states and it +// is always the second state. (Its actual ID depends on the size of the +// alphabet in dense DFAs, since state IDs are premultiplied in order to +// allow them to be used directly as indices into the transition table.) +// * match - An accepting state, i.e., indicative of a match. There may be +// zero or more of these states. +// * accelerated - A state where all of its outgoing transitions, except a +// few, loop back to itself. These states are candidates for acceleration +// via memchr during search. There may be zero or more of these states. +// * start - A non-matching state that indicates where the automaton should +// start during a search. There is always at least one starting state and +// all are guaranteed to be non-match states. (A start state cannot be a +// match state because the DFAs in this crate delay all matches by one byte. +// So every search that finds a match must move through one transition to +// some other match state, even when searching an empty string.) +// +// These are not mutually exclusive categories. Namely, the following +// overlappings can occur: +// +// * {dead, start} - If a DFA can never lead to a match and it is minimized, +// then it will typically compile to something where all starting IDs point +// to the DFA's dead state. +// * {match, accelerated} - It is possible for a match state to have the +// majority of its transitions loop back to itself, which means it's +// possible for a match state to be accelerated. +// * {start, accelerated} - Similarly, it is possible for a start state to be +// accelerated. Note that it is possible for an accelerated state to be +// neither a match or a start state. Also note that just because both match +// and start states overlap with accelerated states does not mean that +// match and start states overlap with each other. In fact, they are +// guaranteed not to overlap. +// +// As a special mention, every DFA always has a dead and a quit state, even +// though from the perspective of the DFA, they are equivalent. (Indeed, +// minimization special cases them to ensure they don't get merged.) The +// purpose of keeping them distinct is to use the quit state as a sentinel to +// distguish between whether a search finished successfully without finding +// anything or whether it gave up before finishing. +// +// So the main problem we want to solve here is the *fast* detection of whether +// a state is special or not. And we also want to do this while storing as +// little extra data as possible. AND we want to be able to quickly determine +// which categories a state falls into above if it is special. +// +// We achieve this by essentially shuffling all special states to the beginning +// of a DFA. That is, all special states appear before every other non-special +// state. By representing special states this way, we can determine whether a +// state is special or not by a single comparison, where special.max is the +// identifier of the last special state in the DFA: +// +// if current_state <= special.max: +// ... do something with special state +// +// The only thing left to do is to determine what kind of special state +// it is. Because what we do next depends on that. Since special states +// are typically rare, we can afford to do a bit more extra work, but we'd +// still like this to be as fast as possible. The trick we employ here is to +// continue shuffling states even within the special state range. Such that +// one contiguous region corresponds to match states, another for start states +// and then an overlapping range for accelerated states. At a high level, our +// special state detection might look like this (for leftmost searching, where +// we continue searching even after seeing a match): +// +// byte = input[offset] +// current_state = next_state(current_state, byte) +// offset += 1 +// if current_state <= special.max: +// if current_state == 0: +// # We can never leave a dead state, so this always marks the +// # end of our search. +// return last_match +// if current_state == special.quit_id: +// # A quit state means we give up. If he DFA has no quit state, +// # then special.quit_id == 0 == dead, which is handled by the +// # conditional above. +// return Err(MatchError::quit { byte, offset: offset - 1 }) +// if special.min_match <= current_state <= special.max_match: +// last_match = Some(offset) +// if special.min_accel <= current_state <= special.max_accel: +// offset = accelerate(input, offset) +// last_match = Some(offset) +// elif special.min_start <= current_state <= special.max_start: +// offset = prefilter.find(input, offset) +// if special.min_accel <= current_state <= special.max_accel: +// offset = accelerate(input, offset) +// elif special.min_accel <= current_state <= special.max_accel: +// offset = accelerate(input, offset) +// +// There are some small details left out of the logic above. For example, +// in order to accelerate a state, we need to know which bytes to search for. +// This in turn implies some extra data we need to store in the DFA. To keep +// things compact, we would ideally only store +// +// N = special.max_accel - special.min_accel + 1 +// +// items. But state IDs are premultiplied, which means they are not contiguous. +// So in order to take a state ID and index an array of accelerated structures, +// we need to do: +// +// i = (state_id - special.min_accel) / stride +// +// (N.B. 'stride' is always a power of 2, so the above can be implemented via +// '(state_id - special.min_accel) >> stride2', where 'stride2' is x in +// 2^x=stride.) +// +// Moreover, some of these specialty categories may be empty. For example, +// DFAs are not required to have any match states or any accelerated states. +// In that case, the lower and upper bounds are both set to 0 (the dead state +// ID) and the first `current_state == 0` check subsumes cases where the +// ranges are empty. +// +// Loop unrolling, if applicable, has also been left out of the logic above. +// +// Graphically, the ranges look like this, where asterisks indicate ranges +// that can be empty. Each 'x' is a state. +// +// quit +// dead| +// || +// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +// | | | | start | | +// | |-------------| |-------| | +// | match* | | | | +// | | | | | +// | |----------| | | +// | accel* | | +// | | | +// | | | +// |----------------------------|------------------------ +// special non-special* +#[derive(Clone, Copy, Debug)] +pub(crate) struct Special { + /// The identifier of the last special state in a DFA. A state is special + /// if and only if its identifier is less than or equal to `max`. + pub(crate) max: StateID, + /// The identifier of the quit state in a DFA. (There is no analogous field + /// for the dead state since the dead state's ID is always zero, regardless + /// of state ID size.) + pub(crate) quit_id: StateID, + /// The identifier of the first match state. + pub(crate) min_match: StateID, + /// The identifier of the last match state. + pub(crate) max_match: StateID, + /// The identifier of the first accelerated state. + pub(crate) min_accel: StateID, + /// The identifier of the last accelerated state. + pub(crate) max_accel: StateID, + /// The identifier of the first start state. + pub(crate) min_start: StateID, + /// The identifier of the last start state. + pub(crate) max_start: StateID, +} + +impl Special { + /// Creates a new set of special ranges for a DFA. All ranges are initially + /// set to only contain the dead state. This is interpreted as an empty + /// range. + #[cfg(feature = "dfa-build")] + pub(crate) fn new() -> Special { + Special { + max: DEAD, + quit_id: DEAD, + min_match: DEAD, + max_match: DEAD, + min_accel: DEAD, + max_accel: DEAD, + min_start: DEAD, + max_start: DEAD, + } + } + + /// Remaps all of the special state identifiers using the function given. + #[cfg(feature = "dfa-build")] + pub(crate) fn remap(&self, map: impl Fn(StateID) -> StateID) -> Special { + Special { + max: map(self.max), + quit_id: map(self.quit_id), + min_match: map(self.min_match), + max_match: map(self.max_match), + min_accel: map(self.min_accel), + max_accel: map(self.max_accel), + min_start: map(self.min_start), + max_start: map(self.max_start), + } + } + + /// Deserialize the given bytes into special state ranges. If the slice + /// given is not big enough, then this returns an error. Similarly, if + /// any of the expected invariants around special state ranges aren't + /// upheld, an error is returned. Note that this does not guarantee that + /// the information returned is correct. + /// + /// Upon success, this returns the number of bytes read in addition to the + /// special state IDs themselves. + pub(crate) fn from_bytes( + mut slice: &[u8], + ) -> Result<(Special, usize), DeserializeError> { + wire::check_slice_len(slice, 8 * StateID::SIZE, "special states")?; + + let mut nread = 0; + let mut read_id = |what| -> Result { + let (id, nr) = wire::try_read_state_id(slice, what)?; + nread += nr; + slice = &slice[StateID::SIZE..]; + Ok(id) + }; + + let max = read_id("special max id")?; + let quit_id = read_id("special quit id")?; + let min_match = read_id("special min match id")?; + let max_match = read_id("special max match id")?; + let min_accel = read_id("special min accel id")?; + let max_accel = read_id("special max accel id")?; + let min_start = read_id("special min start id")?; + let max_start = read_id("special max start id")?; + + let special = Special { + max, + quit_id, + min_match, + max_match, + min_accel, + max_accel, + min_start, + max_start, + }; + special.validate()?; + assert_eq!(nread, special.write_to_len()); + Ok((special, nread)) + } + + /// Validate that the information describing special states satisfies + /// all known invariants. + pub(crate) fn validate(&self) -> Result<(), DeserializeError> { + // Check that both ends of the range are DEAD or neither are. + if self.min_match == DEAD && self.max_match != DEAD { + err!("min_match is DEAD, but max_match is not"); + } + if self.min_match != DEAD && self.max_match == DEAD { + err!("max_match is DEAD, but min_match is not"); + } + if self.min_accel == DEAD && self.max_accel != DEAD { + err!("min_accel is DEAD, but max_accel is not"); + } + if self.min_accel != DEAD && self.max_accel == DEAD { + err!("max_accel is DEAD, but min_accel is not"); + } + if self.min_start == DEAD && self.max_start != DEAD { + err!("min_start is DEAD, but max_start is not"); + } + if self.min_start != DEAD && self.max_start == DEAD { + err!("max_start is DEAD, but min_start is not"); + } + + // Check that ranges are well formed. + if self.min_match > self.max_match { + err!("min_match should not be greater than max_match"); + } + if self.min_accel > self.max_accel { + err!("min_accel should not be greater than max_accel"); + } + if self.min_start > self.max_start { + err!("min_start should not be greater than max_start"); + } + + // Check that ranges are ordered with respect to one another. + if self.matches() && self.quit_id >= self.min_match { + err!("quit_id should not be greater than min_match"); + } + if self.accels() && self.quit_id >= self.min_accel { + err!("quit_id should not be greater than min_accel"); + } + if self.starts() && self.quit_id >= self.min_start { + err!("quit_id should not be greater than min_start"); + } + if self.matches() && self.accels() && self.min_accel < self.min_match { + err!("min_match should not be greater than min_accel"); + } + if self.matches() && self.starts() && self.min_start < self.min_match { + err!("min_match should not be greater than min_start"); + } + if self.accels() && self.starts() && self.min_start < self.min_accel { + err!("min_accel should not be greater than min_start"); + } + + // Check that max is at least as big as everything else. + if self.max < self.quit_id { + err!("quit_id should not be greater than max"); + } + if self.max < self.max_match { + err!("max_match should not be greater than max"); + } + if self.max < self.max_accel { + err!("max_accel should not be greater than max"); + } + if self.max < self.max_start { + err!("max_start should not be greater than max"); + } + + Ok(()) + } + + /// Validate that the special state information is compatible with the + /// given state len. + pub(crate) fn validate_state_len( + &self, + len: usize, + stride2: usize, + ) -> Result<(), DeserializeError> { + // We assume that 'validate' has already passed, so we know that 'max' + // is truly the max. So all we need to check is that the max state ID + // is less than the state ID len. The max legal value here is len-1, + // which occurs when there are no non-special states. + if (self.max.as_usize() >> stride2) >= len { + err!("max should not be greater than or equal to state length"); + } + Ok(()) + } + + /// Write the IDs and ranges for special states to the given byte buffer. + /// The buffer given must have enough room to store all data, otherwise + /// this will return an error. The number of bytes written is returned + /// on success. The number of bytes written is guaranteed to be a multiple + /// of 8. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + use crate::util::wire::write_state_id as write; + + if dst.len() < self.write_to_len() { + return Err(SerializeError::buffer_too_small("special state ids")); + } + + let mut nwrite = 0; + nwrite += write::(self.max, &mut dst[nwrite..]); + nwrite += write::(self.quit_id, &mut dst[nwrite..]); + nwrite += write::(self.min_match, &mut dst[nwrite..]); + nwrite += write::(self.max_match, &mut dst[nwrite..]); + nwrite += write::(self.min_accel, &mut dst[nwrite..]); + nwrite += write::(self.max_accel, &mut dst[nwrite..]); + nwrite += write::(self.min_start, &mut dst[nwrite..]); + nwrite += write::(self.max_start, &mut dst[nwrite..]); + + assert_eq!( + self.write_to_len(), + nwrite, + "expected to write certain number of bytes", + ); + assert_eq!( + nwrite % 8, + 0, + "expected to write multiple of 8 bytes for special states", + ); + Ok(nwrite) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 8 * StateID::SIZE + } + + /// Sets the maximum special state ID based on the current values. This + /// should be used once all possible state IDs are set. + #[cfg(feature = "dfa-build")] + pub(crate) fn set_max(&mut self) { + use core::cmp::max; + self.max = max( + self.quit_id, + max(self.max_match, max(self.max_accel, self.max_start)), + ); + } + + /// Sets the maximum special state ID such that starting states are not + /// considered "special." This also marks the min/max starting states as + /// DEAD such that 'is_start_state' always returns false, even if the state + /// is actually a starting state. + /// + /// This is useful when there is no prefilter set. It will avoid + /// ping-ponging between the hot path in the DFA search code and the start + /// state handling code, which is typically only useful for executing a + /// prefilter. + #[cfg(feature = "dfa-build")] + pub(crate) fn set_no_special_start_states(&mut self) { + use core::cmp::max; + self.max = max(self.quit_id, max(self.max_match, self.max_accel)); + self.min_start = DEAD; + self.max_start = DEAD; + } + + /// Returns true if and only if the given state ID is a special state. + #[inline] + pub(crate) fn is_special_state(&self, id: StateID) -> bool { + id <= self.max + } + + /// Returns true if and only if the given state ID is a dead state. + #[inline] + pub(crate) fn is_dead_state(&self, id: StateID) -> bool { + id == DEAD + } + + /// Returns true if and only if the given state ID is a quit state. + #[inline] + pub(crate) fn is_quit_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.quit_id == id + } + + /// Returns true if and only if the given state ID is a match state. + #[inline] + pub(crate) fn is_match_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.min_match <= id && id <= self.max_match + } + + /// Returns true if and only if the given state ID is an accel state. + #[inline] + pub(crate) fn is_accel_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.min_accel <= id && id <= self.max_accel + } + + /// Returns true if and only if the given state ID is a start state. + #[inline] + pub(crate) fn is_start_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.min_start <= id && id <= self.max_start + } + + /// Returns the total number of match states for a dense table based DFA. + #[inline] + pub(crate) fn match_len(&self, stride: usize) -> usize { + if self.matches() { + (self.max_match.as_usize() - self.min_match.as_usize() + stride) + / stride + } else { + 0 + } + } + + /// Returns true if and only if there is at least one match state. + #[inline] + pub(crate) fn matches(&self) -> bool { + self.min_match != DEAD + } + + /// Returns the total number of accel states. + #[cfg(feature = "dfa-build")] + pub(crate) fn accel_len(&self, stride: usize) -> usize { + if self.accels() { + (self.max_accel.as_usize() - self.min_accel.as_usize() + stride) + / stride + } else { + 0 + } + } + + /// Returns true if and only if there is at least one accel state. + #[inline] + pub(crate) fn accels(&self) -> bool { + self.min_accel != DEAD + } + + /// Returns true if and only if there is at least one start state. + #[inline] + pub(crate) fn starts(&self) -> bool { + self.min_start != DEAD + } +} diff --git a/vendor/regex-automata/src/dfa/start.rs b/vendor/regex-automata/src/dfa/start.rs new file mode 100644 index 0000000..fddc702 --- /dev/null +++ b/vendor/regex-automata/src/dfa/start.rs @@ -0,0 +1,74 @@ +use core::mem::size_of; + +use crate::util::wire::{self, DeserializeError, Endian, SerializeError}; + +/// The kind of anchored starting configurations to support in a DFA. +/// +/// Fully compiled DFAs need to be explicitly configured as to which anchored +/// starting configurations to support. The reason for not just supporting +/// everything unconditionally is that it can use more resources (such as +/// memory and build time). The downside of this is that if you try to execute +/// a search using an [`Anchored`](crate::Anchored) mode that is not supported +/// by the DFA, then the search will return an error. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum StartKind { + /// Support both anchored and unanchored searches. + Both, + /// Support only unanchored searches. Requesting an anchored search will + /// panic. + /// + /// Note that even if an unanchored search is requested, the pattern itself + /// may still be anchored. For example, `^abc` will only match `abc` at the + /// start of a haystack. This will remain true, even if the regex engine + /// only supported unanchored searches. + Unanchored, + /// Support only anchored searches. Requesting an unanchored search will + /// panic. + Anchored, +} + +impl StartKind { + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(StartKind, usize), DeserializeError> { + wire::check_slice_len(slice, size_of::(), "start kind bytes")?; + let (n, nr) = wire::try_read_u32(slice, "start kind integer")?; + match n { + 0 => Ok((StartKind::Both, nr)), + 1 => Ok((StartKind::Unanchored, nr)), + 2 => Ok((StartKind::Anchored, nr)), + _ => Err(DeserializeError::generic("unrecognized start kind")), + } + } + + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("start kind")); + } + let n = match *self { + StartKind::Both => 0, + StartKind::Unanchored => 1, + StartKind::Anchored => 2, + }; + E::write_u32(n, dst); + Ok(nwrite) + } + + pub(crate) fn write_to_len(&self) -> usize { + size_of::() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn has_unanchored(&self) -> bool { + matches!(*self, StartKind::Both | StartKind::Unanchored) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn has_anchored(&self) -> bool { + matches!(*self, StartKind::Both | StartKind::Anchored) + } +} diff --git a/vendor/regex-automata/src/hybrid/dfa.rs b/vendor/regex-automata/src/hybrid/dfa.rs new file mode 100644 index 0000000..bd9179b --- /dev/null +++ b/vendor/regex-automata/src/hybrid/dfa.rs @@ -0,0 +1,4418 @@ +/*! +Types and routines specific to lazy DFAs. + +This module is the home of [`hybrid::dfa::DFA`](DFA). + +This module also contains a [`hybrid::dfa::Builder`](Builder) and a +[`hybrid::dfa::Config`](Config) for configuring and building a lazy DFA. +*/ + +use core::{iter, mem::size_of}; + +use alloc::vec::Vec; + +use crate::{ + hybrid::{ + error::{BuildError, CacheError, StartError}, + id::{LazyStateID, LazyStateIDError}, + search, + }, + nfa::thompson, + util::{ + alphabet::{self, ByteClasses, ByteSet}, + determinize::{self, State, StateBuilderEmpty, StateBuilderNFA}, + empty, + prefilter::Prefilter, + primitives::{PatternID, StateID as NFAStateID}, + search::{ + Anchored, HalfMatch, Input, MatchError, MatchKind, PatternSet, + }, + sparse_set::SparseSets, + start::{self, Start, StartByteMap}, + }, +}; + +/// The minimum number of states that a lazy DFA's cache size must support. +/// +/// This is checked at time of construction to ensure that at least some small +/// number of states can fit in the given capacity allotment. If we can't fit +/// at least this number of states, then the thinking is that it's pretty +/// senseless to use the lazy DFA. More to the point, parts of the code do +/// assume that the cache can fit at least some small number of states. +const MIN_STATES: usize = SENTINEL_STATES + 2; + +/// The number of "sentinel" states that get added to every lazy DFA. +/// +/// These are special states indicating status conditions of a search: unknown, +/// dead and quit. These states in particular also use zero NFA states, so +/// their memory usage is quite small. This is relevant for computing the +/// minimum memory needed for a lazy DFA cache. +const SENTINEL_STATES: usize = 3; + +/// A hybrid NFA/DFA (also called a "lazy DFA") for regex searching. +/// +/// A lazy DFA is a DFA that builds itself at search time. It otherwise has +/// very similar characteristics as a [`dense::DFA`](crate::dfa::dense::DFA). +/// Indeed, both support precisely the same regex features with precisely the +/// same semantics. +/// +/// Where as a `dense::DFA` must be completely built to handle any input before +/// it may be used for search, a lazy DFA starts off effectively empty. During +/// a search, a lazy DFA will build itself depending on whether it has already +/// computed the next transition or not. If it has, then it looks a lot like +/// a `dense::DFA` internally: it does a very fast table based access to find +/// the next transition. Otherwise, if the state hasn't been computed, then it +/// does determinization _for that specific transition_ to compute the next DFA +/// state. +/// +/// The main selling point of a lazy DFA is that, in practice, it has +/// the performance profile of a `dense::DFA` without the weakness of it +/// taking worst case exponential time to build. Indeed, for each byte of +/// input, the lazy DFA will construct as most one new DFA state. Thus, a +/// lazy DFA achieves worst case `O(mn)` time for regex search (where `m ~ +/// pattern.len()` and `n ~ haystack.len()`). +/// +/// The main downsides of a lazy DFA are: +/// +/// 1. It requires mutable "cache" space during search. This is where the +/// transition table, among other things, is stored. +/// 2. In pathological cases (e.g., if the cache is too small), it will run +/// out of room and either require a bigger cache capacity or will repeatedly +/// clear the cache and thus repeatedly regenerate DFA states. Overall, this +/// will tend to be slower than a typical NFA simulation. +/// +/// # Capabilities +/// +/// Like a `dense::DFA`, a single lazy DFA fundamentally supports the following +/// operations: +/// +/// 1. Detection of a match. +/// 2. Location of the end of a match. +/// 3. In the case of a lazy DFA with multiple patterns, which pattern matched +/// is reported as well. +/// +/// A notable absence from the above list of capabilities is the location of +/// the *start* of a match. In order to provide both the start and end of +/// a match, *two* lazy DFAs are required. This functionality is provided by a +/// [`Regex`](crate::hybrid::regex::Regex). +/// +/// # Example +/// +/// This shows how to build a lazy DFA with the default configuration and +/// execute a search. Notice how, in contrast to a `dense::DFA`, we must create +/// a cache and pass it to our search routine. +/// +/// ``` +/// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; +/// +/// let dfa = DFA::new("foo[0-9]+")?; +/// let mut cache = dfa.create_cache(); +/// +/// let expected = Some(HalfMatch::must(0, 8)); +/// assert_eq!(expected, dfa.try_search_fwd( +/// &mut cache, &Input::new("foo12345"))?, +/// ); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct DFA { + config: Config, + nfa: thompson::NFA, + stride2: usize, + start_map: StartByteMap, + classes: ByteClasses, + quitset: ByteSet, + cache_capacity: usize, +} + +impl DFA { + /// Parse the given regular expression using a default configuration and + /// return the corresponding lazy DFA. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::new("foo[0-9]+bar")?; + /// let mut cache = dfa.create_cache(); + /// + /// let expected = HalfMatch::must(0, 11); + /// assert_eq!( + /// Some(expected), + /// dfa.try_search_fwd(&mut cache, &Input::new("foo12345bar"))?, + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + DFA::builder().build(pattern) + } + + /// Parse the given regular expressions using a default configuration and + /// return the corresponding lazy multi-DFA. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+"])?; + /// let mut cache = dfa.create_cache(); + /// + /// let expected = HalfMatch::must(1, 3); + /// assert_eq!( + /// Some(expected), + /// dfa.try_search_fwd(&mut cache, &Input::new("foo12345bar"))?, + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>(patterns: &[P]) -> Result { + DFA::builder().build_many(patterns) + } + + /// Create a new lazy DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::always_match()?; + /// let mut cache = dfa.create_cache(); + /// + /// let expected = HalfMatch::must(0, 0); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new(""))?, + /// ); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new("foo"))?, + /// ); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Create a new lazy DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::never_match()?; + /// let mut cache = dfa.create_cache(); + /// + /// assert_eq!(None, dfa.try_search_fwd(&mut cache, &Input::new(""))?); + /// assert_eq!(None, dfa.try_search_fwd(&mut cache, &Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Return a default configuration for a `DFA`. + /// + /// This is a convenience routine to avoid needing to import the [`Config`] + /// type when customizing the construction of a lazy DFA. + /// + /// # Example + /// + /// This example shows how to build a lazy DFA that heuristically supports + /// Unicode word boundaries. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError, Input}; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().unicode_word_boundary(true)) + /// .build(r"\b\w+\b")?; + /// let mut cache = re.create_cache(); + /// + /// // Since our haystack is all ASCII, the DFA search sees then and knows + /// // it is legal to interpret Unicode word boundaries as ASCII word + /// // boundaries. + /// let input = Input::new("!!foo!!"); + /// let expected = HalfMatch::must(0, 5); + /// assert_eq!(Some(expected), re.try_search_fwd(&mut cache, &input)?); + /// + /// // But if our haystack contains non-ASCII, then the search will fail + /// // with an error. + /// let input = Input::new("!!βββ!!"); + /// let expected = MatchError::quit(b'\xCE', 2); + /// assert_eq!(Err(expected), re.try_search_fwd(&mut cache, &input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere for lazy DFAs. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, util::syntax, HalfMatch, Input}; + /// + /// let re = DFA::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new(b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"); + /// let expected = Some(HalfMatch::must(0, 9)); + /// let got = re.try_search_fwd(&mut cache, &input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new cache for this lazy DFA. + /// + /// The cache returned should only be used for searches for this + /// lazy DFA. If you want to reuse the cache for another DFA, then + /// you must call [`Cache::reset`] with that DFA (or, equivalently, + /// [`DFA::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this lazy DFA (and only this DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different lazy DFA. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// lazy DFA has been configured to "give up" after it has cleared the + /// cache a certain number of times. + /// + /// Any lazy state ID generated by the cache prior to resetting it is + /// invalid after the reset. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa1 = DFA::new(r"\w")?; + /// let dfa2 = DFA::new(r"\W")?; + /// + /// let mut cache = dfa1.create_cache(); + /// assert_eq!( + /// Some(HalfMatch::must(0, 2)), + /// dfa1.try_search_fwd(&mut cache, &Input::new("Δ"))?, + /// ); + /// + /// // Using 'cache' with dfa2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 'dfa1' is also not + /// // allowed. + /// dfa2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(HalfMatch::must(0, 3)), + /// dfa2.try_search_fwd(&mut cache, &Input::new("☃"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + Lazy::new(self, cache).reset_cache() + } + + /// Returns the total number of patterns compiled into this lazy DFA. + /// + /// In the case of a DFA that contains no patterns, this returns `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a DFA that never matches: + /// + /// ``` + /// use regex_automata::hybrid::dfa::DFA; + /// + /// let dfa = DFA::never_match()?; + /// assert_eq!(dfa.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a DFA that matches at every position: + /// + /// ``` + /// use regex_automata::hybrid::dfa::DFA; + /// + /// let dfa = DFA::always_match()?; + /// assert_eq!(dfa.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a DFA that was constructed from multiple patterns: + /// + /// ``` + /// use regex_automata::hybrid::dfa::DFA; + /// + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(dfa.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.nfa.pattern_len() + } + + /// Returns the equivalence classes that make up the alphabet for this DFA. + /// + /// Unless [`Config::byte_classes`] was disabled, it is possible that + /// multiple distinct bytes are grouped into the same equivalence class + /// if it is impossible for them to discriminate between a match and a + /// non-match. This has the effect of reducing the overall alphabet size + /// and in turn potentially substantially reducing the size of the DFA's + /// transition table. + /// + /// The downside of using equivalence classes like this is that every state + /// transition will automatically use this map to convert an arbitrary + /// byte to its corresponding equivalence class. In practice this has a + /// negligible impact on performance. + pub fn byte_classes(&self) -> &ByteClasses { + &self.classes + } + + /// Returns this lazy DFA's configuration. + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + pub fn get_nfa(&self) -> &thompson::NFA { + &self.nfa + } + + /// Returns the stride, as a base-2 exponent, required for these + /// equivalence classes. + /// + /// The stride is always the smallest power of 2 that is greater than or + /// equal to the alphabet length. This is done so that converting between + /// state IDs and indices can be done with shifts alone, which is much + /// faster than integer division. + fn stride2(&self) -> usize { + self.stride2 + } + + /// Returns the total stride for every state in this lazy DFA. This + /// corresponds to the total number of transitions used by each state in + /// this DFA's transition table. + fn stride(&self) -> usize { + 1 << self.stride2() + } + + /// Returns the memory usage, in bytes, of this lazy DFA. + /// + /// This does **not** include the stack size used up by this lazy DFA. To + /// compute that, use `std::mem::size_of::()`. This also does not + /// include the size of the `Cache` used. + /// + /// This also does not include any heap memory used by the NFA inside of + /// this hybrid NFA/DFA. This is because the NFA's ownership is shared, and + /// thus not owned by this hybrid NFA/DFA. More practically, several regex + /// engines in this crate embed an NFA, and reporting the NFA's memory + /// usage in all of them would likely result in reporting higher heap + /// memory than is actually used. + pub fn memory_usage(&self) -> usize { + // The only thing that uses heap memory in a DFA is the NFA. But the + // NFA has shared ownership, so reporting its memory as part of the + // hybrid DFA is likely to lead to double-counting the NFA memory + // somehow. In particular, this DFA does not really own an NFA, so + // including it in the DFA's memory usage doesn't seem semantically + // correct. + 0 + } +} + +impl DFA { + /// Executes a forward search and returns the end position of the leftmost + /// match that is found. If no match exists, then `None` is returned. + /// + /// In particular, this method continues searching even after it enters + /// a match state. The search only terminates once it has reached the + /// end of the input or when it has entered a dead or quit state. Upon + /// termination, the position of the last byte seen while still in a match + /// state is returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to run a basic search. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::new("foo[0-9]+")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 8); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new("foo12345"))?, + /// ); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over later parts. + /// let dfa = DFA::new("abc|a")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 3); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new("abc"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a lazy multi-DFA that permits searching + /// for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// Anchored, HalfMatch, PatternID, Input, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(HalfMatch::must(1, 6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// let got = dfa.try_search_fwd(&mut cache, &input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// // N.B. We disable Unicode here so that we use a simple ASCII word + /// // boundary. Alternatively, we could enable heuristic support for + /// // Unicode word boundaries since our haystack is pure ASCII. + /// let dfa = DFA::new(r"(?-u)\b[0-9]{3}\b")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about the + /// // larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `3` instead of `6`. + /// let expected = Some(HalfMatch::must(0, 3)); + /// let got = dfa.try_search_fwd( + /// &mut cache, + /// &Input::new(&haystack[3..6]), + /// )?; + /// assert_eq!(expected, got); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let got = dfa.try_search_fwd( + /// &mut cache, + /// &Input::new(haystack).range(3..6), + /// )?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_fwd( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match search::find_fwd(self, cache, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + // We get to this point when we know our DFA can match the empty string + // AND when UTF-8 mode is enabled. In this case, we skip any matches + // whose offset splits a codepoint. Such a match is necessarily a + // zero-width match, because UTF-8 mode requires the underlying NFA + // to be built such that all non-empty matches span valid UTF-8. + // Therefore, any match that ends in the middle of a codepoint cannot + // be part of a span of valid UTF-8 and thus must be an empty match. + // In such cases, we skip it, so as not to report matches that split a + // codepoint. + // + // Note that this is not a checked assumption. Callers *can* provide an + // NFA with UTF-8 mode enabled but produces non-empty matches that span + // invalid UTF-8. But doing so is documented to result in unspecified + // behavior. + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + let got = search::find_fwd(self, cache, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes a reverse search and returns the start of the position of the + /// leftmost match that is found. If no match exists, then `None` is + /// returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This routine is principally useful when used in + /// conjunction with the + /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse) + /// configuration. In general, it's unlikely to be correct to use both + /// `try_search_fwd` and `try_search_rev` with the same DFA since any + /// particular DFA will only support searching in one direction with + /// respect to the pattern. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson, + /// hybrid::dfa::DFA, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("foo[0-9]+")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 0); + /// assert_eq!( + /// Some(expected), + /// dfa.try_search_rev(&mut cache, &Input::new("foo12345"))?, + /// ); + /// + /// // Even though a match is found after reading the last byte (`c`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over latter parts. + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("abc|c")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 0); + /// assert_eq!(Some(expected), dfa.try_search_rev( + /// &mut cache, &Input::new("abc"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build(r"")?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&mut cache, &input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// underlying NFA disabled: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build(r"")?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&mut cache, &input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_rev( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match search::find_rev(self, cache, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + empty::skip_splits_rev(input, hm, hm.offset(), |input| { + let got = search::find_rev(self, cache, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes an overlapping forward search and returns the end position of + /// matches as they are found. If no match exists, then `None` is returned. + /// + /// This routine is principally only useful when searching for multiple + /// patterns on inputs where multiple patterns may match the same regions + /// of text. In particular, callers must preserve the automaton's search + /// state from prior calls so that the implementation knows where the last + /// match occurred. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should remain invariant throughout + /// iteration. The `OverlappingState` given to the search will keep track + /// of the current position of the search. (This is because multiple + /// matches may be reported at the same position, so only the search + /// implementation itself knows when to advance the position.) + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to run a basic overlapping search. Notice + /// that we build the automaton with a `MatchKind::All` configuration. + /// Overlapping searches are unlikely to work as one would expect when + /// using the default `MatchKind::LeftmostFirst` match semantics, since + /// leftmost-first matching is fundamentally incompatible with overlapping + /// searches. Namely, overlapping searches need to report matches as they + /// are seen, where as leftmost-first searches will continue searching even + /// after a match has been observed in order to find the conventional end + /// position of the match. More concretely, leftmost-first searches use + /// dead states to terminate a search after a specific match can no longer + /// be extended. Overlapping searches instead do the opposite by continuing + /// the search to find totally new matches (potentially of other patterns). + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(&[r"\w+$", r"\S+$"])?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "@foo"; + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_overlapping_fwd( + &self, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + search::find_overlapping_fwd(self, cache, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_fwd(self, cache, input, state) + }, + ), + } + } + + /// Executes a reverse overlapping search and returns the start of the + /// position of the leftmost match that is found. If no match exists, then + /// `None` is returned. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should remain invariant throughout + /// iteration. The `OverlappingState` given to the search will keep track + /// of the current position of the search. (This is because multiple + /// matches may be reported at the same position, so only the search + /// implementation itself knows when to advance the position.) + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true)) + /// .build_many(&[r"", r"☃"])?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&mut cache, &input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// underlying NFA disabled: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build_many(&[r"", r"☃"])?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&mut cache, &input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // Now *all* positions match, even within a codepoint, + /// // because we lifted the requirement that matches + /// // correspond to valid UTF-8 spans. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_overlapping_rev( + &self, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + search::find_overlapping_rev(self, cache, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_rev(self, cache, input, state) + }, + ), + } + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and the underlying DFA supports overlapping matches, then all + /// matching patterns are written to the given set. + /// + /// Unless all of the patterns in this DFA are anchored, then generally + /// speaking, this will visit every byte in the haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// Input, MatchKind, PatternSet, + /// }; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// let mut cache = dfa.create_cache(); + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(dfa.pattern_len()); + /// dfa.try_which_overlapping_matches(&mut cache, &input, &mut patset)?; + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), MatchError> { + let mut state = OverlappingState::start(); + while let Some(m) = { + self.try_search_overlapping_fwd(cache, input, &mut state)?; + state.get_match() + } { + let _ = patset.try_insert(m.pattern()); + // There's nothing left to find, so we can stop. Or the caller + // asked us to. + if patset.is_full() || input.get_earliest() { + break; + } + } + Ok(()) + } +} + +impl DFA { + /// Transitions from the current state to the next state, given the next + /// byte of input. + /// + /// The given cache is used to either reuse pre-computed state + /// transitions, or to store this newly computed transition for future + /// reuse. Thus, this routine guarantees that it will never return a state + /// ID that has an "unknown" tag. + /// + /// # State identifier validity + /// + /// The only valid value for `current` is the lazy state ID returned + /// by the most recent call to `next_state`, `next_state_untagged`, + /// `next_state_untagged_unchecked`, `start_state_forward` or + /// `state_state_reverse` for the given `cache`. Any state ID returned from + /// prior calls to these routines (with the same `cache`) is considered + /// invalid (even if it gives an appearance of working). State IDs returned + /// from _any_ prior call for different `cache` values are also always + /// invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID. Moreover, this routine is defined for all possible values of + /// `input`. + /// + /// These validity rules are not checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid or + /// incorrect ID. + /// + /// # Example + /// + /// This shows a simplistic example for walking a lazy DFA for a given + /// haystack by using the `next_state` method. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+r")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// sid = dfa.next_state(&mut cache, sid, b)?; + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk the + /// // special "EOI" transition at the end of the search. + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// assert!(sid.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn next_state( + &self, + cache: &mut Cache, + current: LazyStateID, + input: u8, + ) -> Result { + let class = usize::from(self.classes.get(input)); + let offset = current.as_usize_untagged() + class; + let sid = cache.trans[offset]; + if !sid.is_unknown() { + return Ok(sid); + } + let unit = alphabet::Unit::u8(input); + Lazy::new(self, cache).cache_next_state(current, unit) + } + + /// Transitions from the current state to the next state, given the next + /// byte of input and a state ID that is not tagged. + /// + /// The only reason to use this routine is performance. In particular, the + /// `next_state` method needs to do some additional checks, among them is + /// to account for identifiers to states that are not yet computed. In + /// such a case, the transition is computed on the fly. However, if it is + /// known that the `current` state ID is untagged, then these checks can be + /// omitted. + /// + /// Since this routine does not compute states on the fly, it does not + /// modify the cache and thus cannot return an error. Consequently, `cache` + /// does not need to be mutable and it is possible for this routine to + /// return a state ID corresponding to the special "unknown" state. In + /// this case, it is the caller's responsibility to use the prior state + /// ID and `input` with `next_state` in order to force the computation of + /// the unknown transition. Otherwise, trying to use the "unknown" state + /// ID will just result in transitioning back to itself, and thus never + /// terminating. (This is technically a special exemption to the state ID + /// validity rules, but is permissible since this routine is guarateed to + /// never mutate the given `cache`, and thus the identifier is guaranteed + /// to remain valid.) + /// + /// See [`LazyStateID`] for more details on what it means for a state ID + /// to be tagged. Also, see + /// [`next_state_untagged_unchecked`](DFA::next_state_untagged_unchecked) + /// for this same idea, but with bounds checks forcefully elided. + /// + /// # State identifier validity + /// + /// The only valid value for `current` is an **untagged** lazy + /// state ID returned by the most recent call to `next_state`, + /// `next_state_untagged`, `next_state_untagged_unchecked`, + /// `start_state_forward` or `state_state_reverse` for the given `cache`. + /// Any state ID returned from prior calls to these routines (with the + /// same `cache`) is considered invalid (even if it gives an appearance + /// of working). State IDs returned from _any_ prior call for different + /// `cache` values are also always invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID, although it may be tagged. Moreover, this routine is defined for + /// all possible values of `input`. + /// + /// Not all validity rules are checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid or + /// incorrect ID. + /// + /// # Example + /// + /// This shows a simplistic example for walking a lazy DFA for a given + /// haystack by using the `next_state_untagged` method where possible. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+r")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// let mut at = 0; + /// while at < haystack.len() { + /// if sid.is_tagged() { + /// sid = dfa.next_state(&mut cache, sid, haystack[at])?; + /// } else { + /// let mut prev_sid = sid; + /// // We attempt to chew through as much as we can while moving + /// // through untagged state IDs. Thus, the transition function + /// // does less work on average per byte. (Unrolling this loop + /// // may help even more.) + /// while at < haystack.len() { + /// prev_sid = sid; + /// sid = dfa.next_state_untagged( + /// &mut cache, sid, haystack[at], + /// ); + /// at += 1; + /// if sid.is_tagged() { + /// break; + /// } + /// } + /// // We must ensure that we never proceed to the next iteration + /// // with an unknown state ID. If we don't account for this + /// // case, then search isn't guaranteed to terminate since all + /// // transitions on unknown states loop back to itself. + /// if sid.is_unknown() { + /// sid = dfa.next_state( + /// &mut cache, prev_sid, haystack[at - 1], + /// )?; + /// } + /// } + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk the + /// // special "EOI" transition at the end of the search. + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// assert!(sid.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn next_state_untagged( + &self, + cache: &Cache, + current: LazyStateID, + input: u8, + ) -> LazyStateID { + debug_assert!(!current.is_tagged()); + let class = usize::from(self.classes.get(input)); + let offset = current.as_usize_unchecked() + class; + cache.trans[offset] + } + + /// Transitions from the current state to the next state, eliding bounds + /// checks, given the next byte of input and a state ID that is not tagged. + /// + /// The only reason to use this routine is performance. In particular, the + /// `next_state` method needs to do some additional checks, among them is + /// to account for identifiers to states that are not yet computed. In + /// such a case, the transition is computed on the fly. However, if it is + /// known that the `current` state ID is untagged, then these checks can be + /// omitted. + /// + /// Since this routine does not compute states on the fly, it does not + /// modify the cache and thus cannot return an error. Consequently, `cache` + /// does not need to be mutable and it is possible for this routine to + /// return a state ID corresponding to the special "unknown" state. In + /// this case, it is the caller's responsibility to use the prior state + /// ID and `input` with `next_state` in order to force the computation of + /// the unknown transition. Otherwise, trying to use the "unknown" state + /// ID will just result in transitioning back to itself, and thus never + /// terminating. (This is technically a special exemption to the state ID + /// validity rules, but is permissible since this routine is guarateed to + /// never mutate the given `cache`, and thus the identifier is guaranteed + /// to remain valid.) + /// + /// See [`LazyStateID`] for more details on what it means for a state ID + /// to be tagged. Also, see + /// [`next_state_untagged`](DFA::next_state_untagged) + /// for this same idea, but with memory safety guaranteed by retaining + /// bounds checks. + /// + /// # State identifier validity + /// + /// The only valid value for `current` is an **untagged** lazy + /// state ID returned by the most recent call to `next_state`, + /// `next_state_untagged`, `next_state_untagged_unchecked`, + /// `start_state_forward` or `state_state_reverse` for the given `cache`. + /// Any state ID returned from prior calls to these routines (with the + /// same `cache`) is considered invalid (even if it gives an appearance + /// of working). State IDs returned from _any_ prior call for different + /// `cache` values are also always invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID, although it may be tagged. Moreover, this routine is defined for + /// all possible values of `input`. + /// + /// Not all validity rules are checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Safety + /// + /// Callers of this method must guarantee that `current` refers to a valid + /// state ID according to the rules described above. If `current` is not a + /// valid state ID for this automaton, then calling this routine may result + /// in undefined behavior. + /// + /// If `current` is valid, then the ID returned is valid for all possible + /// values of `input`. + #[inline] + pub unsafe fn next_state_untagged_unchecked( + &self, + cache: &Cache, + current: LazyStateID, + input: u8, + ) -> LazyStateID { + debug_assert!(!current.is_tagged()); + let class = usize::from(self.classes.get(input)); + let offset = current.as_usize_unchecked() + class; + *cache.trans.get_unchecked(offset) + } + + /// Transitions from the current state to the next state for the special + /// EOI symbol. + /// + /// The given cache is used to either reuse pre-computed state + /// transitions, or to store this newly computed transition for future + /// reuse. Thus, this routine guarantees that it will never return a state + /// ID that has an "unknown" tag. + /// + /// This routine must be called at the end of every search in a correct + /// implementation of search. Namely, lazy DFAs in this crate delay matches + /// by one byte in order to support look-around operators. Thus, after + /// reaching the end of a haystack, a search implementation must follow one + /// last EOI transition. + /// + /// It is best to think of EOI as an additional symbol in the alphabet of a + /// DFA that is distinct from every other symbol. That is, the alphabet of + /// lazy DFAs in this crate has a logical size of 257 instead of 256, where + /// 256 corresponds to every possible inhabitant of `u8`. (In practice, the + /// physical alphabet size may be smaller because of alphabet compression + /// via equivalence classes, but EOI is always represented somehow in the + /// alphabet.) + /// + /// # State identifier validity + /// + /// The only valid value for `current` is the lazy state ID returned + /// by the most recent call to `next_state`, `next_state_untagged`, + /// `next_state_untagged_unchecked`, `start_state_forward` or + /// `state_state_reverse` for the given `cache`. Any state ID returned from + /// prior calls to these routines (with the same `cache`) is considered + /// invalid (even if it gives an appearance of working). State IDs returned + /// from _any_ prior call for different `cache` values are also always + /// invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID. + /// + /// These validity rules are not checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid or + /// incorrect ID. + /// + /// # Example + /// + /// This shows a simplistic example for walking a DFA for a given haystack, + /// and then finishing the search with the final EOI transition. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+r")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// sid = dfa.next_state(&mut cache, sid, b)?; + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. Without this + /// // final transition, the assert below will fail since the DFA will not + /// // have entered a match state yet! + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// assert!(sid.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn next_eoi_state( + &self, + cache: &mut Cache, + current: LazyStateID, + ) -> Result { + let eoi = self.classes.eoi().as_usize(); + let offset = current.as_usize_untagged() + eoi; + let sid = cache.trans[offset]; + if !sid.is_unknown() { + return Ok(sid); + } + let unit = self.classes.eoi(); + Lazy::new(self, cache).cache_next_state(current, unit) + } + + /// Return the ID of the start state for this lazy DFA for the given + /// starting configuration. + /// + /// Unlike typical DFA implementations, the start state for DFAs in this + /// crate is dependent on a few different factors: + /// + /// * The [`Anchored`] mode of the search. Unanchored, anchored and + /// anchored searches for a specific [`PatternID`] all use different start + /// states. + /// * Whether a "look-behind" byte exists. For example, the `^` anchor + /// matches if and only if there is no look-behind byte. + /// * The specific value of that look-behind byte. For example, a `(?m:^)` + /// assertion only matches when there is either no look-behind byte, or + /// when the look-behind byte is a line terminator. + /// + /// The [starting configuration](start::Config) provides the above + /// information. + /// + /// This routine can be used for either forward or reverse searches. + /// Although, as a convenience, if you have an [`Input`], then it + /// may be more succinct to use [`DFA::start_state_forward`] or + /// [`DFA::start_state_reverse`]. Note, for example, that the convenience + /// routines return a [`MatchError`] on failure where as this routine + /// returns a [`StartError`]. + /// + /// # Errors + /// + /// This may return a [`StartError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte + /// or if the cache has become inefficient). This can also return an + /// error if the given configuration contains an unsupported [`Anchored`] + /// configuration. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn start_state( + &self, + cache: &mut Cache, + config: &start::Config, + ) -> Result { + let lazy = LazyRef::new(self, cache); + let anchored = config.get_anchored(); + let start = match config.get_look_behind() { + None => Start::Text, + Some(byte) => { + if !self.quitset.is_empty() && self.quitset.contains(byte) { + return Err(StartError::quit(byte)); + } + self.start_map.get(byte) + } + }; + let start_id = lazy.get_cached_start_id(anchored, start)?; + if !start_id.is_unknown() { + return Ok(start_id); + } + Lazy::new(self, cache).cache_start_group(anchored, start) + } + + /// Return the ID of the start state for this lazy DFA when executing a + /// forward search. + /// + /// This is a convenience routine for calling [`DFA::start_state`] that + /// converts the given [`Input`] to a [start configuration](start::Config). + /// Additionally, if an error occurs, it is converted from a [`StartError`] + /// to a [`MatchError`] using the offset information in the given + /// [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte or + /// if the cache has become inefficient). This can also return an error if + /// the given `Input` contains an unsupported [`Anchored`] configuration. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn start_state_forward( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_forward(input); + self.start_state(cache, &config).map_err(|err| match err { + StartError::Cache { .. } => MatchError::gave_up(input.start()), + StartError::Quit { byte } => { + let offset = input + .start() + .checked_sub(1) + .expect("no quit in start without look-behind"); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// Return the ID of the start state for this lazy DFA when executing a + /// reverse search. + /// + /// This is a convenience routine for calling [`DFA::start_state`] that + /// converts the given [`Input`] to a [start configuration](start::Config). + /// Additionally, if an error occurs, it is converted from a [`StartError`] + /// to a [`MatchError`] using the offset information in the given + /// [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte or + /// if the cache has become inefficient). This can also return an error if + /// the given `Input` contains an unsupported [`Anchored`] configuration. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn start_state_reverse( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_reverse(input); + self.start_state(cache, &config).map_err(|err| match err { + StartError::Cache { .. } => MatchError::gave_up(input.end()), + StartError::Quit { byte } => { + let offset = input.end(); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// Returns the total number of patterns that match in this state. + /// + /// If the lazy DFA was compiled with one pattern, then this must + /// necessarily always return `1` for all match states. + /// + /// A lazy DFA guarantees that [`DFA::match_pattern`] can be called with + /// indices up to (but not including) the length returned by this routine + /// without panicking. + /// + /// # Panics + /// + /// If the given state is not a match state, then this may either panic + /// or return an incorrect result. + /// + /// # Example + /// + /// This example shows a simple instance of implementing overlapping + /// matches. In particular, it shows not only how to determine how many + /// patterns have matched in a particular state, but also how to access + /// which specific patterns have matched. + /// + /// Notice that we must use [`MatchKind::All`] when building the DFA. If we + /// used [`MatchKind::LeftmostFirst`] instead, then the DFA would not be + /// constructed in a way that supports overlapping matches. (It would only + /// report a single pattern that matches at any particular point in time.) + /// + /// Another thing to take note of is the patterns used and the order in + /// which the pattern IDs are reported. In the example below, pattern `3` + /// is yielded first. Why? Because it corresponds to the match that + /// appears first. Namely, the `@` symbol is part of `\S+` but not part + /// of any of the other patterns. Since the `\S+` pattern has a match that + /// starts to the left of any other pattern, its ID is returned before any + /// other. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, Input, MatchKind}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(&[ + /// r"\w+", r"[a-z]+", r"[A-Z]+", r"\S+", + /// ])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "@bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// sid = dfa.next_state(&mut cache, sid, b)?; + /// } + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// + /// assert!(sid.is_match()); + /// assert_eq!(dfa.match_len(&mut cache, sid), 3); + /// // The following calls are guaranteed to not panic since `match_len` + /// // returned `3` above. + /// assert_eq!(dfa.match_pattern(&mut cache, sid, 0).as_usize(), 3); + /// assert_eq!(dfa.match_pattern(&mut cache, sid, 1).as_usize(), 0); + /// assert_eq!(dfa.match_pattern(&mut cache, sid, 2).as_usize(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn match_len(&self, cache: &Cache, id: LazyStateID) -> usize { + assert!(id.is_match()); + LazyRef::new(self, cache).get_cached_state(id).match_len() + } + + /// Returns the pattern ID corresponding to the given match index in the + /// given state. + /// + /// See [`DFA::match_len`] for an example of how to use this method + /// correctly. Note that if you know your lazy DFA is configured with a + /// single pattern, then this routine is never necessary since it will + /// always return a pattern ID of `0` for an index of `0` when `id` + /// corresponds to a match state. + /// + /// Typically, this routine is used when implementing an overlapping + /// search, as the example for `DFA::match_len` does. + /// + /// # Panics + /// + /// If the state ID is not a match state or if the match index is out + /// of bounds for the given state, then this routine may either panic + /// or produce an incorrect result. If the state ID is correct and the + /// match index is correct, then this routine always produces a valid + /// `PatternID`. + #[inline] + pub fn match_pattern( + &self, + cache: &Cache, + id: LazyStateID, + match_index: usize, + ) -> PatternID { + // This is an optimization for the very common case of a DFA with a + // single pattern. This conditional avoids a somewhat more costly path + // that finds the pattern ID from the corresponding `State`, which + // requires a bit of slicing/pointer-chasing. This optimization tends + // to only matter when matches are frequent. + if self.pattern_len() == 1 { + return PatternID::ZERO; + } + LazyRef::new(self, cache) + .get_cached_state(id) + .match_pattern(match_index) + } +} + +/// A cache represents a partially computed DFA. +/// +/// A cache is the key component that differentiates a classical DFA and a +/// hybrid NFA/DFA (also called a "lazy DFA"). Where a classical DFA builds a +/// complete transition table that can handle all possible inputs, a hybrid +/// NFA/DFA starts with an empty transition table and builds only the parts +/// required during search. The parts that are built are stored in a cache. For +/// this reason, a cache is a required parameter for nearly every operation on +/// a [`DFA`]. +/// +/// Caches can be created from their corresponding DFA via +/// [`DFA::create_cache`]. A cache can only be used with either the DFA that +/// created it, or the DFA that was most recently used to reset it with +/// [`Cache::reset`]. Using a cache with any other DFA may result in panics +/// or incorrect results. +#[derive(Clone, Debug)] +pub struct Cache { + // N.B. If you're looking to understand how determinization works, it + // is probably simpler to first grok src/dfa/determinize.rs, since that + // doesn't have the "laziness" component. + /// The transition table. + /// + /// Given a `current` LazyStateID and an `input` byte, the next state can + /// be computed via `trans[untagged(current) + equiv_class(input)]`. Notice + /// that no multiplication is used. That's because state identifiers are + /// "premultiplied." + /// + /// Note that the next state may be the "unknown" state. In this case, the + /// next state is not known and determinization for `current` on `input` + /// must be performed. + trans: Vec, + /// The starting states for this DFA. + /// + /// These are computed lazily. Initially, these are all set to "unknown" + /// lazy state IDs. + /// + /// When 'starts_for_each_pattern' is disabled (the default), then the size + /// of this is constrained to the possible starting configurations based + /// on the search parameters. (At time of writing, that's 4.) However, + /// when starting states for each pattern is enabled, then there are N + /// additional groups of starting states, where each group reflects the + /// different possible configurations and N is the number of patterns. + starts: Vec, + /// A sequence of NFA/DFA powerset states that have been computed for this + /// lazy DFA. This sequence is indexable by untagged LazyStateIDs. (Every + /// tagged LazyStateID can be used to index this sequence by converting it + /// to its untagged form.) + states: Vec, + /// A map from states to their corresponding IDs. This map may be accessed + /// via the raw byte representation of a state, which means that a `State` + /// does not need to be allocated to determine whether it already exists + /// in this map. Indeed, the existence of such a state is what determines + /// whether we allocate a new `State` or not. + /// + /// The higher level idea here is that we do just enough determinization + /// for a state to check whether we've already computed it. If we have, + /// then we can save a little (albeit not much) work. The real savings is + /// in memory usage. If we never checked for trivially duplicate states, + /// then our memory usage would explode to unreasonable levels. + states_to_id: StateMap, + /// Sparse sets used to track which NFA states have been visited during + /// various traversals. + sparses: SparseSets, + /// Scratch space for traversing the NFA graph. (We use space on the heap + /// instead of the call stack.) + stack: Vec, + /// Scratch space for building a NFA/DFA powerset state. This is used to + /// help amortize allocation since not every powerset state generated is + /// added to the cache. In particular, if it already exists in the cache, + /// then there is no need to allocate a new `State` for it. + scratch_state_builder: StateBuilderEmpty, + /// A simple abstraction for handling the saving of at most a single state + /// across a cache clearing. This is required for correctness. Namely, if + /// adding a new state after clearing the cache fails, then the caller + /// must retain the ability to continue using the state ID given. The + /// state corresponding to the state ID is what we preserve across cache + /// clearings. + state_saver: StateSaver, + /// The memory usage, in bytes, used by 'states' and 'states_to_id'. We + /// track this as new states are added since states use a variable amount + /// of heap. Tracking this as we add states makes it possible to compute + /// the total amount of memory used by the determinizer in constant time. + memory_usage_state: usize, + /// The number of times the cache has been cleared. When a minimum cache + /// clear count is set, then the cache will return an error instead of + /// clearing the cache if the count has been exceeded. + clear_count: usize, + /// The total number of bytes searched since the last time this cache was + /// cleared, not including the current search. + /// + /// This can be added to the length of the current search to get the true + /// total number of bytes searched. + /// + /// This is generally only non-zero when the + /// `Cache::search_{start,update,finish}` APIs are used to track search + /// progress. + bytes_searched: usize, + /// The progress of the current search. + /// + /// This is only non-`None` when callers utlize the `Cache::search_start`, + /// `Cache::search_update` and `Cache::search_finish` APIs. + /// + /// The purpose of recording search progress is to be able to make a + /// determination about the efficiency of the cache. Namely, by keeping + /// track of the + progress: Option, +} + +impl Cache { + /// Create a new cache for the given lazy DFA. + /// + /// The cache returned should only be used for searches for the given DFA. + /// If you want to reuse the cache for another DFA, then you must call + /// [`Cache::reset`] with that DFA. + pub fn new(dfa: &DFA) -> Cache { + let mut cache = Cache { + trans: alloc::vec![], + starts: alloc::vec![], + states: alloc::vec![], + states_to_id: StateMap::new(), + sparses: SparseSets::new(dfa.get_nfa().states().len()), + stack: alloc::vec![], + scratch_state_builder: StateBuilderEmpty::new(), + state_saver: StateSaver::none(), + memory_usage_state: 0, + clear_count: 0, + bytes_searched: 0, + progress: None, + }; + debug!("pre-init lazy DFA cache size: {}", cache.memory_usage()); + Lazy { dfa, cache: &mut cache }.init_cache(); + debug!("post-init lazy DFA cache size: {}", cache.memory_usage()); + cache + } + + /// Reset this cache such that it can be used for searching with the given + /// lazy DFA (and only that DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different lazy DFA. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// lazy DFA has been configured to "give up" after it has cleared the + /// cache a certain number of times. + /// + /// Any lazy state ID generated by the cache prior to resetting it is + /// invalid after the reset. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa1 = DFA::new(r"\w")?; + /// let dfa2 = DFA::new(r"\W")?; + /// + /// let mut cache = dfa1.create_cache(); + /// assert_eq!( + /// Some(HalfMatch::must(0, 2)), + /// dfa1.try_search_fwd(&mut cache, &Input::new("Δ"))?, + /// ); + /// + /// // Using 'cache' with dfa2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 'dfa1' is also not + /// // allowed. + /// cache.reset(&dfa2); + /// assert_eq!( + /// Some(HalfMatch::must(0, 3)), + /// dfa2.try_search_fwd(&mut cache, &Input::new("☃"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, dfa: &DFA) { + Lazy::new(dfa, self).reset_cache() + } + + /// Initializes a new search starting at the given position. + /// + /// If a previous search was unfinished, then it is finished automatically + /// and a new search is begun. + /// + /// Note that keeping track of search progress is _not necessary_ + /// for correct implementations of search using a lazy DFA. Keeping + /// track of search progress is only necessary if you want the + /// [`Config::minimum_bytes_per_state`] configuration knob to work. + #[inline] + pub fn search_start(&mut self, at: usize) { + // If a previous search wasn't marked as finished, then finish it + // now automatically. + if let Some(p) = self.progress.take() { + self.bytes_searched += p.len(); + } + self.progress = Some(SearchProgress { start: at, at }); + } + + /// Updates the current search to indicate that it has search to the + /// current position. + /// + /// No special care needs to be taken for reverse searches. Namely, the + /// position given may be _less than_ the starting position of the search. + /// + /// # Panics + /// + /// This panics if no search has been started by [`Cache::search_start`]. + #[inline] + pub fn search_update(&mut self, at: usize) { + let p = + self.progress.as_mut().expect("no in-progress search to update"); + p.at = at; + } + + /// Indicates that a search has finished at the given position. + /// + /// # Panics + /// + /// This panics if no search has been started by [`Cache::search_start`]. + #[inline] + pub fn search_finish(&mut self, at: usize) { + let mut p = + self.progress.take().expect("no in-progress search to finish"); + p.at = at; + self.bytes_searched += p.len(); + } + + /// Returns the total number of bytes that have been searched since this + /// cache was last cleared. + /// + /// This is useful for determining the efficiency of the cache. For + /// example, the lazy DFA uses this value in conjunction with the + /// [`Config::minimum_bytes_per_state`] knob to help determine whether it + /// should quit searching. + /// + /// This always returns `0` if search progress isn't being tracked. Note + /// that the lazy DFA search routines in this crate always track search + /// progress. + pub fn search_total_len(&self) -> usize { + self.bytes_searched + self.progress.as_ref().map_or(0, |p| p.len()) + } + + /// Returns the total number of times this cache has been cleared since it + /// was either created or last reset. + /// + /// This is useful for informational purposes or if you want to change + /// search strategies based on the number of times the cache has been + /// cleared. + pub fn clear_count(&self) -> usize { + self.clear_count + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + const ID_SIZE: usize = size_of::(); + const STATE_SIZE: usize = size_of::(); + + // NOTE: If you make changes to the below, then + // 'minimum_cache_capacity' should be updated correspondingly. + + self.trans.len() * ID_SIZE + + self.starts.len() * ID_SIZE + + self.states.len() * STATE_SIZE + // Maps likely use more memory than this, but it's probably close. + + self.states_to_id.len() * (STATE_SIZE + ID_SIZE) + + self.sparses.memory_usage() + + self.stack.capacity() * ID_SIZE + + self.scratch_state_builder.capacity() + // Heap memory used by 'State' in both 'states' and 'states_to_id'. + + self.memory_usage_state + } +} + +/// Keeps track of the progress of the current search. +/// +/// This is updated via the `Cache::search_{start,update,finish}` APIs to +/// record how many bytes have been searched. This permits computing a +/// heuristic that represents the efficiency of a cache, and thus helps inform +/// whether the lazy DFA should give up or not. +#[derive(Clone, Debug)] +struct SearchProgress { + start: usize, + at: usize, +} + +impl SearchProgress { + /// Returns the length, in bytes, of this search so far. + /// + /// This automatically handles the case of a reverse search, where `at` + /// is likely to be less than `start`. + fn len(&self) -> usize { + if self.start <= self.at { + self.at - self.start + } else { + self.start - self.at + } + } +} + +/// A map from states to state identifiers. When using std, we use a standard +/// hashmap, since it's a bit faster for this use case. (Other maps, like +/// one's based on FNV, have not yet been benchmarked.) +/// +/// The main purpose of this map is to reuse states where possible. This won't +/// fully minimize the DFA, but it works well in a lot of cases. +#[cfg(feature = "std")] +type StateMap = std::collections::HashMap; +#[cfg(not(feature = "std"))] +type StateMap = alloc::collections::BTreeMap; + +/// A type that groups methods that require the base NFA/DFA and writable +/// access to the cache. +#[derive(Debug)] +struct Lazy<'i, 'c> { + dfa: &'i DFA, + cache: &'c mut Cache, +} + +impl<'i, 'c> Lazy<'i, 'c> { + /// Creates a new 'Lazy' wrapper for a DFA and its corresponding cache. + fn new(dfa: &'i DFA, cache: &'c mut Cache) -> Lazy<'i, 'c> { + Lazy { dfa, cache } + } + + /// Return an immutable view by downgrading a writable cache to a read-only + /// cache. + fn as_ref<'a>(&'a self) -> LazyRef<'i, 'a> { + LazyRef::new(self.dfa, self.cache) + } + + /// This is marked as 'inline(never)' to avoid bloating methods on 'DFA' + /// like 'next_state' and 'next_eoi_state' that are called in critical + /// areas. The idea is to let the optimizer focus on the other areas of + /// those methods as the hot path. + /// + /// Here's an example that justifies 'inline(never)' + /// + /// ```ignore + /// regex-cli find match hybrid \ + /// --cache-capacity 100000000 \ + /// -p '\pL{100}' + /// all-codepoints-utf8-100x + /// ``` + /// + /// Where 'all-codepoints-utf8-100x' is the UTF-8 encoding of every + /// codepoint, in sequence, repeated 100 times. + /// + /// With 'inline(never)' hyperfine reports 1.1s per run. With + /// 'inline(always)', hyperfine reports 1.23s. So that's a 10% improvement. + #[cold] + #[inline(never)] + fn cache_next_state( + &mut self, + mut current: LazyStateID, + unit: alphabet::Unit, + ) -> Result { + let stride2 = self.dfa.stride2(); + let empty_builder = self.get_state_builder(); + let builder = determinize::next( + self.dfa.get_nfa(), + self.dfa.get_config().get_match_kind(), + &mut self.cache.sparses, + &mut self.cache.stack, + &self.cache.states[current.as_usize_untagged() >> stride2], + unit, + empty_builder, + ); + let save_state = !self.as_ref().state_builder_fits_in_cache(&builder); + if save_state { + self.save_state(current); + } + let next = self.add_builder_state(builder, |sid| sid)?; + if save_state { + current = self.saved_state_id(); + } + // This is the payoff. The next time 'next_state' is called with this + // state and alphabet unit, it will find this transition and avoid + // having to re-determinize this transition. + self.set_transition(current, unit, next); + Ok(next) + } + + /// Compute and cache the starting state for the given pattern ID (if + /// present) and the starting configuration. + /// + /// This panics if a pattern ID is given and the DFA isn't configured to + /// build anchored start states for each pattern. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + #[cold] + #[inline(never)] + fn cache_start_group( + &mut self, + anchored: Anchored, + start: Start, + ) -> Result { + let nfa_start_id = match anchored { + Anchored::No => self.dfa.get_nfa().start_unanchored(), + Anchored::Yes => self.dfa.get_nfa().start_anchored(), + Anchored::Pattern(pid) => { + if !self.dfa.get_config().get_starts_for_each_pattern() { + return Err(StartError::unsupported_anchored(anchored)); + } + match self.dfa.get_nfa().start_pattern(pid) { + None => return Ok(self.as_ref().dead_id()), + Some(sid) => sid, + } + } + }; + + let id = self + .cache_start_one(nfa_start_id, start) + .map_err(StartError::cache)?; + self.set_start_state(anchored, start, id); + Ok(id) + } + + /// Compute and cache the starting state for the given NFA state ID and the + /// starting configuration. The NFA state ID might be one of the following: + /// + /// 1) An unanchored start state to match any pattern. + /// 2) An anchored start state to match any pattern. + /// 3) An anchored start state for a particular pattern. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn cache_start_one( + &mut self, + nfa_start_id: NFAStateID, + start: Start, + ) -> Result { + let mut builder_matches = self.get_state_builder().into_matches(); + determinize::set_lookbehind_from_start( + self.dfa.get_nfa(), + &start, + &mut builder_matches, + ); + self.cache.sparses.set1.clear(); + determinize::epsilon_closure( + self.dfa.get_nfa(), + nfa_start_id, + builder_matches.look_have(), + &mut self.cache.stack, + &mut self.cache.sparses.set1, + ); + let mut builder = builder_matches.into_nfa(); + determinize::add_nfa_states( + &self.dfa.get_nfa(), + &self.cache.sparses.set1, + &mut builder, + ); + let tag_starts = self.dfa.get_config().get_specialize_start_states(); + self.add_builder_state(builder, |id| { + if tag_starts { + id.to_start() + } else { + id + } + }) + } + + /// Either add the given builder state to this cache, or return an ID to an + /// equivalent state already in this cache. + /// + /// In the case where no equivalent state exists, the idmap function given + /// may be used to transform the identifier allocated. This is useful if + /// the caller needs to tag the ID with additional information. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn add_builder_state( + &mut self, + builder: StateBuilderNFA, + idmap: impl Fn(LazyStateID) -> LazyStateID, + ) -> Result { + if let Some(&cached_id) = + self.cache.states_to_id.get(builder.as_bytes()) + { + // Since we have a cached state, put the constructed state's + // memory back into our scratch space, so that it can be reused. + self.put_state_builder(builder); + return Ok(cached_id); + } + let result = self.add_state(builder.to_state(), idmap); + self.put_state_builder(builder); + result + } + + /// Allocate a new state ID and add the given state to this cache. + /// + /// The idmap function given may be used to transform the identifier + /// allocated. This is useful if the caller needs to tag the ID with + /// additional information. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn add_state( + &mut self, + state: State, + idmap: impl Fn(LazyStateID) -> LazyStateID, + ) -> Result { + if !self.as_ref().state_fits_in_cache(&state) { + self.try_clear_cache()?; + } + // It's important for this to come second, since the above may clear + // the cache. If we clear the cache after ID generation, then the ID + // is likely bunk since it would have been generated based on a larger + // transition table. + let mut id = idmap(self.next_state_id()?); + if state.is_match() { + id = id.to_match(); + } + // Add room in the transition table. Since this is a fresh state, all + // of its transitions are unknown. + self.cache.trans.extend( + iter::repeat(self.as_ref().unknown_id()).take(self.dfa.stride()), + ); + // When we add a sentinel state, we never want to set any quit + // transitions. Technically, this is harmless, since sentinel states + // have all of their transitions set to loop back to themselves. But + // when creating sentinel states before the quit sentinel state, + // this will try to call 'set_transition' on a state ID that doesn't + // actually exist yet, which isn't allowed. So we just skip doing so + // entirely. + if !self.dfa.quitset.is_empty() && !self.as_ref().is_sentinel(id) { + let quit_id = self.as_ref().quit_id(); + for b in self.dfa.quitset.iter() { + self.set_transition(id, alphabet::Unit::u8(b), quit_id); + } + } + self.cache.memory_usage_state += state.memory_usage(); + self.cache.states.push(state.clone()); + self.cache.states_to_id.insert(state, id); + Ok(id) + } + + /// Allocate a new state ID. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn next_state_id(&mut self) -> Result { + let sid = match LazyStateID::new(self.cache.trans.len()) { + Ok(sid) => sid, + Err(_) => { + self.try_clear_cache()?; + // This has to pass since we check that ID capacity at + // construction time can fit at least MIN_STATES states. + LazyStateID::new(self.cache.trans.len()).unwrap() + } + }; + Ok(sid) + } + + /// Attempt to clear the cache used by this lazy DFA. + /// + /// If clearing the cache exceeds the minimum number of required cache + /// clearings, then this will return a cache error. In this case, + /// callers should bubble this up as the cache can't be used until it is + /// reset. Implementations of search should convert this error into a + /// [`MatchError::gave_up`]. + /// + /// If 'self.state_saver' is set to save a state, then this state is + /// persisted through cache clearing. Otherwise, the cache is returned to + /// its state after initialization with two exceptions: its clear count + /// is incremented and some of its memory likely has additional capacity. + /// That is, clearing a cache does _not_ release memory. + /// + /// Otherwise, any lazy state ID generated by the cache prior to resetting + /// it is invalid after the reset. + fn try_clear_cache(&mut self) -> Result<(), CacheError> { + let c = self.dfa.get_config(); + if let Some(min_count) = c.get_minimum_cache_clear_count() { + if self.cache.clear_count >= min_count { + if let Some(min_bytes_per) = c.get_minimum_bytes_per_state() { + let len = self.cache.search_total_len(); + let min_bytes = + min_bytes_per.saturating_mul(self.cache.states.len()); + // If we've searched 0 bytes then probably something has + // gone wrong and the lazy DFA search implementation isn't + // correctly updating the search progress state. + if len == 0 { + trace!( + "number of bytes searched is 0, but \ + a minimum bytes per state searched ({}) is \ + enabled, maybe Cache::search_update \ + is not being used?", + min_bytes_per, + ); + } + if len < min_bytes { + trace!( + "lazy DFA cache has been cleared {} times, \ + which exceeds the limit of {}, \ + AND its bytes searched per state is less \ + than the configured minimum of {}, \ + therefore lazy DFA is giving up \ + (bytes searched since cache clear = {}, \ + number of states = {})", + self.cache.clear_count, + min_count, + min_bytes_per, + len, + self.cache.states.len(), + ); + return Err(CacheError::bad_efficiency()); + } else { + trace!( + "lazy DFA cache has been cleared {} times, \ + which exceeds the limit of {}, \ + AND its bytes searched per state is greater \ + than the configured minimum of {}, \ + therefore lazy DFA is continuing! \ + (bytes searched since cache clear = {}, \ + number of states = {})", + self.cache.clear_count, + min_count, + min_bytes_per, + len, + self.cache.states.len(), + ); + } + } else { + trace!( + "lazy DFA cache has been cleared {} times, \ + which exceeds the limit of {}, \ + since there is no configured bytes per state \ + minimum, lazy DFA is giving up", + self.cache.clear_count, + min_count, + ); + return Err(CacheError::too_many_cache_clears()); + } + } + } + self.clear_cache(); + Ok(()) + } + + /// Clears _and_ resets the cache. Resetting the cache means that no + /// states are persisted and the clear count is reset to 0. No heap memory + /// is released. + /// + /// Note that the caller may reset a cache with a different DFA than what + /// it was created from. In which case, the cache can now be used with the + /// new DFA (and not the old DFA). + fn reset_cache(&mut self) { + self.cache.state_saver = StateSaver::none(); + self.clear_cache(); + // If a new DFA is used, it might have a different number of NFA + // states, so we need to make sure our sparse sets have the appropriate + // size. + self.cache.sparses.resize(self.dfa.get_nfa().states().len()); + self.cache.clear_count = 0; + self.cache.progress = None; + } + + /// Clear the cache used by this lazy DFA. + /// + /// If 'self.state_saver' is set to save a state, then this state is + /// persisted through cache clearing. Otherwise, the cache is returned to + /// its state after initialization with two exceptions: its clear count + /// is incremented and some of its memory likely has additional capacity. + /// That is, clearing a cache does _not_ release memory. + /// + /// Otherwise, any lazy state ID generated by the cache prior to resetting + /// it is invalid after the reset. + fn clear_cache(&mut self) { + self.cache.trans.clear(); + self.cache.starts.clear(); + self.cache.states.clear(); + self.cache.states_to_id.clear(); + self.cache.memory_usage_state = 0; + self.cache.clear_count += 1; + self.cache.bytes_searched = 0; + if let Some(ref mut progress) = self.cache.progress { + progress.start = progress.at; + } + trace!( + "lazy DFA cache has been cleared (count: {})", + self.cache.clear_count + ); + self.init_cache(); + // If the state we want to save is one of the sentinel + // (unknown/dead/quit) states, then 'init_cache' adds those back, and + // their identifier values remains invariant. So there's no need to add + // it again. (And indeed, doing so would be incorrect!) + if let Some((old_id, state)) = self.cache.state_saver.take_to_save() { + // If the state is one of the special sentinel states, then it is + // automatically added by cache initialization and its ID always + // remains the same. With that said, this should never occur since + // the sentinel states are all loop states back to themselves. So + // we should never be in a position where we're attempting to save + // a sentinel state since we never compute transitions out of a + // sentinel state. + assert!( + !self.as_ref().is_sentinel(old_id), + "cannot save sentinel state" + ); + let new_id = self + .add_state(state, |id| { + if old_id.is_start() { + // We don't need to consult the + // 'specialize_start_states' config knob here, because + // if it's disabled, old_id.is_start() will never + // return true. + id.to_start() + } else { + id + } + }) + // The unwrap here is OK because lazy DFA creation ensures that + // we have room in the cache to add MIN_STATES states. Since + // 'init_cache' above adds 3, this adds a 4th. + .expect("adding one state after cache clear must work"); + self.cache.state_saver = StateSaver::Saved(new_id); + } + } + + /// Initialize this cache from emptiness to a place where it can be used + /// for search. + /// + /// This is called both at cache creation time and after the cache has been + /// cleared. + /// + /// Primarily, this adds the three sentinel states and allocates some + /// initial memory. + fn init_cache(&mut self) { + // Why multiply by 2 here? Because we make room for both the unanchored + // and anchored start states. Unanchored is first and then anchored. + let mut starts_len = Start::len().checked_mul(2).unwrap(); + // ... but if we also want start states for every pattern, we make room + // for that too. + if self.dfa.get_config().get_starts_for_each_pattern() { + starts_len += Start::len() * self.dfa.pattern_len(); + } + self.cache + .starts + .extend(iter::repeat(self.as_ref().unknown_id()).take(starts_len)); + // This is the set of NFA states that corresponds to each of our three + // sentinel states: the empty set. + let dead = State::dead(); + // This sets up some states that we use as sentinels that are present + // in every DFA. While it would be technically possible to implement + // this DFA without explicitly putting these states in the transition + // table, this is convenient to do to make `next_state` correct for all + // valid state IDs without needing explicit conditionals to special + // case these sentinel states. + // + // All three of these states are "dead" states. That is, all of + // them transition only to themselves. So once you enter one of + // these states, it's impossible to leave them. Thus, any correct + // search routine must explicitly check for these state types. (Sans + // `unknown`, since that is only used internally to represent missing + // states.) + let unk_id = + self.add_state(dead.clone(), |id| id.to_unknown()).unwrap(); + let dead_id = self.add_state(dead.clone(), |id| id.to_dead()).unwrap(); + let quit_id = self.add_state(dead.clone(), |id| id.to_quit()).unwrap(); + assert_eq!(unk_id, self.as_ref().unknown_id()); + assert_eq!(dead_id, self.as_ref().dead_id()); + assert_eq!(quit_id, self.as_ref().quit_id()); + // The idea here is that if you start in an unknown/dead/quit state and + // try to transition on them, then you should end up where you started. + self.set_all_transitions(unk_id, unk_id); + self.set_all_transitions(dead_id, dead_id); + self.set_all_transitions(quit_id, quit_id); + // All of these states are technically equivalent from the FSM + // perspective, so putting all three of them in the cache isn't + // possible. (They are distinct merely because we use their + // identifiers as sentinels to mean something, as indicated by the + // names.) Moreover, we wouldn't want to do that. Unknown and quit + // states are special in that they are artificial constructions + // this implementation. But dead states are a natural part of + // determinization. When you reach a point in the NFA where you cannot + // go anywhere else, a dead state will naturally arise and we MUST + // reuse the canonical dead state that we've created here. Why? Because + // it is the state ID that tells the search routine whether a state is + // dead or not, and thus, whether to stop the search. Having a bunch of + // distinct dead states would be quite wasteful! + self.cache.states_to_id.insert(dead, dead_id); + } + + /// Save the state corresponding to the ID given such that the state + /// persists through a cache clearing. + /// + /// While the state may persist, the ID may not. In order to discover the + /// new state ID, one must call 'saved_state_id' after a cache clearing. + fn save_state(&mut self, id: LazyStateID) { + let state = self.as_ref().get_cached_state(id).clone(); + self.cache.state_saver = StateSaver::ToSave { id, state }; + } + + /// Returns the updated lazy state ID for a state that was persisted + /// through a cache clearing. + /// + /// It is only correct to call this routine when both a state has been + /// saved and the cache has just been cleared. Otherwise, this panics. + fn saved_state_id(&mut self) -> LazyStateID { + self.cache + .state_saver + .take_saved() + .expect("state saver does not have saved state ID") + } + + /// Set all transitions on the state 'from' to 'to'. + fn set_all_transitions(&mut self, from: LazyStateID, to: LazyStateID) { + for unit in self.dfa.classes.representatives(..) { + self.set_transition(from, unit, to); + } + } + + /// Set the transition on 'from' for 'unit' to 'to'. + /// + /// This panics if either 'from' or 'to' is invalid. + /// + /// All unit values are OK. + fn set_transition( + &mut self, + from: LazyStateID, + unit: alphabet::Unit, + to: LazyStateID, + ) { + assert!(self.as_ref().is_valid(from), "invalid 'from' id: {:?}", from); + assert!(self.as_ref().is_valid(to), "invalid 'to' id: {:?}", to); + let offset = + from.as_usize_untagged() + self.dfa.classes.get_by_unit(unit); + self.cache.trans[offset] = to; + } + + /// Set the start ID for the given pattern ID (if given) and starting + /// configuration to the ID given. + /// + /// This panics if 'id' is not valid or if a pattern ID is given and + /// 'starts_for_each_pattern' is not enabled. + fn set_start_state( + &mut self, + anchored: Anchored, + start: Start, + id: LazyStateID, + ) { + assert!(self.as_ref().is_valid(id)); + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => Start::len() + start_index, + Anchored::Pattern(pid) => { + assert!( + self.dfa.get_config().get_starts_for_each_pattern(), + "attempted to search for a specific pattern \ + without enabling starts_for_each_pattern", + ); + let pid = pid.as_usize(); + (2 * Start::len()) + (Start::len() * pid) + start_index + } + }; + self.cache.starts[index] = id; + } + + /// Returns a state builder from this DFA that might have existing + /// capacity. This helps avoid allocs in cases where a state is built that + /// turns out to already be cached. + /// + /// Callers must put the state builder back with 'put_state_builder', + /// otherwise the allocation reuse won't work. + fn get_state_builder(&mut self) -> StateBuilderEmpty { + core::mem::replace( + &mut self.cache.scratch_state_builder, + StateBuilderEmpty::new(), + ) + } + + /// Puts the given state builder back into this DFA for reuse. + /// + /// Note that building a 'State' from a builder always creates a new alloc, + /// so callers should always put the builder back. + fn put_state_builder(&mut self, builder: StateBuilderNFA) { + let _ = core::mem::replace( + &mut self.cache.scratch_state_builder, + builder.clear(), + ); + } +} + +/// A type that groups methods that require the base NFA/DFA and read-only +/// access to the cache. +#[derive(Debug)] +struct LazyRef<'i, 'c> { + dfa: &'i DFA, + cache: &'c Cache, +} + +impl<'i, 'c> LazyRef<'i, 'c> { + /// Creates a new 'Lazy' wrapper for a DFA and its corresponding cache. + fn new(dfa: &'i DFA, cache: &'c Cache) -> LazyRef<'i, 'c> { + LazyRef { dfa, cache } + } + + /// Return the ID of the start state for the given configuration. + /// + /// If the start state has not yet been computed, then this returns an + /// unknown lazy state ID. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn get_cached_start_id( + &self, + anchored: Anchored, + start: Start, + ) -> Result { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => Start::len() + start_index, + Anchored::Pattern(pid) => { + if !self.dfa.get_config().get_starts_for_each_pattern() { + return Err(StartError::unsupported_anchored(anchored)); + } + if pid.as_usize() >= self.dfa.pattern_len() { + return Ok(self.dead_id()); + } + (2 * Start::len()) + + (Start::len() * pid.as_usize()) + + start_index + } + }; + Ok(self.cache.starts[index]) + } + + /// Return the cached NFA/DFA powerset state for the given ID. + /// + /// This panics if the given ID does not address a valid state. + fn get_cached_state(&self, sid: LazyStateID) -> &State { + let index = sid.as_usize_untagged() >> self.dfa.stride2(); + &self.cache.states[index] + } + + /// Returns true if and only if the given ID corresponds to a "sentinel" + /// state. + /// + /// A sentinel state is a state that signifies a special condition of + /// search, and where every transition maps back to itself. See LazyStateID + /// for more details. Note that start and match states are _not_ sentinels + /// since they may otherwise be real states with non-trivial transitions. + /// The purposes of sentinel states is purely to indicate something. Their + /// transitions are not meant to be followed. + fn is_sentinel(&self, id: LazyStateID) -> bool { + id == self.unknown_id() || id == self.dead_id() || id == self.quit_id() + } + + /// Returns the ID of the unknown state for this lazy DFA. + fn unknown_id(&self) -> LazyStateID { + // This unwrap is OK since 0 is always a valid state ID. + LazyStateID::new(0).unwrap().to_unknown() + } + + /// Returns the ID of the dead state for this lazy DFA. + fn dead_id(&self) -> LazyStateID { + // This unwrap is OK since the maximum value here is 1 * 512 = 512, + // which is <= 2047 (the maximum state ID on 16-bit systems). Where + // 512 is the worst case for our equivalence classes (every byte is a + // distinct class). + LazyStateID::new(1 << self.dfa.stride2()).unwrap().to_dead() + } + + /// Returns the ID of the quit state for this lazy DFA. + fn quit_id(&self) -> LazyStateID { + // This unwrap is OK since the maximum value here is 2 * 512 = 1024, + // which is <= 2047 (the maximum state ID on 16-bit systems). Where + // 512 is the worst case for our equivalence classes (every byte is a + // distinct class). + LazyStateID::new(2 << self.dfa.stride2()).unwrap().to_quit() + } + + /// Returns true if and only if the given ID is valid. + /// + /// An ID is valid if it is both a valid index into the transition table + /// and is a multiple of the DFA's stride. + fn is_valid(&self, id: LazyStateID) -> bool { + let id = id.as_usize_untagged(); + id < self.cache.trans.len() && id % self.dfa.stride() == 0 + } + + /// Returns true if adding the state given would fit in this cache. + fn state_fits_in_cache(&self, state: &State) -> bool { + let needed = self.cache.memory_usage() + + self.memory_usage_for_one_more_state(state.memory_usage()); + trace!( + "lazy DFA cache capacity check: {:?} ?<=? {:?}", + needed, + self.dfa.cache_capacity + ); + needed <= self.dfa.cache_capacity + } + + /// Returns true if adding the state to be built by the given builder would + /// fit in this cache. + fn state_builder_fits_in_cache(&self, state: &StateBuilderNFA) -> bool { + let needed = self.cache.memory_usage() + + self.memory_usage_for_one_more_state(state.as_bytes().len()); + needed <= self.dfa.cache_capacity + } + + /// Returns the additional memory usage, in bytes, required to add one more + /// state to this cache. The given size should be the heap size, in bytes, + /// that would be used by the new state being added. + fn memory_usage_for_one_more_state( + &self, + state_heap_size: usize, + ) -> usize { + const ID_SIZE: usize = size_of::(); + const STATE_SIZE: usize = size_of::(); + + self.dfa.stride() * ID_SIZE // additional space needed in trans table + + STATE_SIZE // space in cache.states + + (STATE_SIZE + ID_SIZE) // space in cache.states_to_id + + state_heap_size // heap memory used by state itself + } +} + +/// A simple type that encapsulates the saving of a state ID through a cache +/// clearing. +/// +/// A state ID can be marked for saving with ToSave, while a state ID can be +/// saved itself with Saved. +#[derive(Clone, Debug)] +enum StateSaver { + /// An empty state saver. In this case, no states (other than the special + /// sentinel states) are preserved after clearing the cache. + None, + /// An ID of a state (and the state itself) that should be preserved after + /// the lazy DFA's cache has been cleared. After clearing, the updated ID + /// is stored in 'Saved' since it may have changed. + ToSave { id: LazyStateID, state: State }, + /// An ID that of a state that has been persisted through a lazy DFA + /// cache clearing. The ID recorded here corresponds to an ID that was + /// once marked as ToSave. The IDs are likely not equivalent even though + /// the states they point to are. + Saved(LazyStateID), +} + +impl StateSaver { + /// Create an empty state saver. + fn none() -> StateSaver { + StateSaver::None + } + + /// Replace this state saver with an empty saver, and if this saver is a + /// request to save a state, return that request. + fn take_to_save(&mut self) -> Option<(LazyStateID, State)> { + match core::mem::replace(self, StateSaver::None) { + StateSaver::None | StateSaver::Saved(_) => None, + StateSaver::ToSave { id, state } => Some((id, state)), + } + } + + /// Replace this state saver with an empty saver, and if this saver is a + /// saved state (or a request to save a state), return that state's ID. + /// + /// The idea here is that a request to save a state isn't necessarily + /// honored because it might not be needed. e.g., Some higher level code + /// might request a state to be saved on the off chance that the cache gets + /// cleared when a new state is added at a lower level. But if that new + /// state is never added, then the cache is never cleared and the state and + /// its ID remain unchanged. + fn take_saved(&mut self) -> Option { + match core::mem::replace(self, StateSaver::None) { + StateSaver::None => None, + StateSaver::Saved(id) | StateSaver::ToSave { id, .. } => Some(id), + } + } +} + +/// The configuration used for building a lazy DFA. +/// +/// As a convenience, [`DFA::config`] is an alias for [`Config::new`]. The +/// advantage of the former is that it often lets you avoid importing the +/// `Config` type directly. +/// +/// A lazy DFA configuration is a simple data object that is typically used +/// with [`Builder::configure`]. +/// +/// The default configuration guarantees that a search will never return a +/// "gave up" or "quit" error, although it is possible for a search to fail +/// if [`Config::starts_for_each_pattern`] wasn't enabled (which it is not by +/// default) and an [`Anchored::Pattern`] mode is requested via [`Input`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + // As with other configuration types in this crate, we put all our knobs + // in options so that we can distinguish between "default" and "not set." + // This makes it possible to easily combine multiple configurations + // without default values overwriting explicitly specified values. See the + // 'overwrite' method. + // + // For docs on the fields below, see the corresponding method setters. + match_kind: Option, + pre: Option>, + starts_for_each_pattern: Option, + byte_classes: Option, + unicode_word_boundary: Option, + quitset: Option, + specialize_start_states: Option, + cache_capacity: Option, + skip_cache_capacity_check: Option, + minimum_cache_clear_count: Option>, + minimum_bytes_per_state: Option>, +} + +impl Config { + /// Return a new default lazy DFA builder configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to classical DFA construction + /// where all possible matches are added to the lazy DFA. + /// + /// Typically, `All` is used when one wants to execute an overlapping + /// search and `LeftmostFirst` otherwise. In particular, it rarely makes + /// sense to use `All` with the various "leftmost" find routines, since the + /// leftmost routines depend on the `LeftmostFirst` automata construction + /// strategy. Specifically, `LeftmostFirst` adds dead states to the + /// lazy DFA as a way to terminate the search and report a match. + /// `LeftmostFirst` also supports non-greedy matches using this strategy + /// where as `All` does not. + /// + /// # Example: overlapping search + /// + /// This example shows the typical use of `MatchKind::All`, which is to + /// report overlapping matches. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(&[r"\w+$", r"\S+$"])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "@foo"; + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: reverse automaton to find start of match + /// + /// Another example for using `MatchKind::All` is for constructing a + /// reverse automaton to find the start of a match. `All` semantics are + /// used for this in order to find the longest possible match, which + /// corresponds to the leftmost starting position. + /// + /// Note that if you need the starting position then + /// [`hybrid::regex::Regex`](crate::hybrid::regex::Regex) will handle this + /// for you, so it's usually not necessary to do this yourself. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson::NFA, + /// Anchored, HalfMatch, Input, MatchKind, + /// }; + /// + /// let input = Input::new("123foobar456"); + /// let pattern = r"[a-z]+r"; + /// + /// let dfa_fwd = DFA::new(pattern)?; + /// let dfa_rev = DFA::builder() + /// .thompson(NFA::config().reverse(true)) + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build(pattern)?; + /// let mut cache_fwd = dfa_fwd.create_cache(); + /// let mut cache_rev = dfa_rev.create_cache(); + /// + /// let expected_fwd = HalfMatch::must(0, 9); + /// let expected_rev = HalfMatch::must(0, 3); + /// let got_fwd = dfa_fwd.try_search_fwd(&mut cache_fwd, &input)?.unwrap(); + /// // Here we don't specify the pattern to search for since there's only + /// // one pattern and we're doing a leftmost search. But if this were an + /// // overlapping search, you'd need to specify the pattern that matched + /// // in the forward direction. (Otherwise, you might wind up finding the + /// // starting position of a match of some other pattern.) That in turn + /// // requires building the reverse automaton with starts_for_each_pattern + /// // enabled. + /// let input = input + /// .clone() + /// .range(..got_fwd.offset()) + /// .anchored(Anchored::Yes); + /// let got_rev = dfa_rev.try_search_rev(&mut cache_rev, &input)?.unwrap(); + /// assert_eq!(expected_fwd, got_fwd); + /// assert_eq!(expected_rev, got_rev); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// Note that unless [`Config::specialize_start_states`] has been + /// explicitly set, then setting this will also enable (when `pre` is + /// `Some`) or disable (when `pre` is `None`) start state specialization. + /// This occurs because without start state specialization, a prefilter + /// is likely to be less effective. And without a prefilter, start state + /// specialization is usually pointless. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// Some(HalfMatch::must(0, 11)), + /// re.try_search_fwd(&mut cache, &input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// // No match reported even though there clearly is one! + /// None, + /// re.try_search_fwd(&mut cache, &input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + if self.specialize_start_states.is_none() { + self.specialize_start_states = + Some(self.get_prefilter().is_some()); + } + self + } + + /// Whether to compile a separate start state for each pattern in the + /// lazy DFA. + /// + /// When enabled, a separate **anchored** start state is added for each + /// pattern in the lazy DFA. When this start state is used, then the DFA + /// will only search for matches for the pattern specified, even if there + /// are other patterns in the DFA. + /// + /// The main downside of this option is that it can potentially increase + /// the size of the DFA and/or increase the time it takes to build the + /// DFA at search time. However, since this is configuration for a lazy + /// DFA, these states aren't actually built unless they're used. Enabling + /// this isn't necessarily free, however, as it may result in higher cache + /// usage. + /// + /// There are a few reasons one might want to enable this (it's disabled + /// by default): + /// + /// 1. When looking for the start of an overlapping match (using a reverse + /// DFA), doing it correctly requires starting the reverse search using the + /// starting state of the pattern that matched in the forward direction. + /// Indeed, when building a [`Regex`](crate::hybrid::regex::Regex), it + /// will automatically enable this option when building the reverse DFA + /// internally. + /// 2. When you want to use a DFA with multiple patterns to both search + /// for matches of any pattern or to search for anchored matches of one + /// particular pattern while using the same DFA. (Otherwise, you would need + /// to compile a new DFA for each pattern.) + /// + /// By default this is disabled. + /// + /// # Example + /// + /// This example shows how to use this option to permit the same lazy DFA + /// to run both general searches for any pattern and anchored searches for + /// a specific pattern. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// Anchored, HalfMatch, Input, PatternID, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&[r"[a-z0-9]{6}", r"[a-z][a-z0-9]{5}"])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar foo123"; + /// + /// // Here's a normal unanchored search that looks for any pattern. + /// let expected = HalfMatch::must(0, 10); + /// let input = Input::new(haystack); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?); + /// // We can also do a normal anchored search for any pattern. Since it's + /// // an anchored search, we position the start of the search where we + /// // know the match will begin. + /// let expected = HalfMatch::must(0, 10); + /// let input = Input::new(haystack).range(4..); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?); + /// // Since we compiled anchored start states for each pattern, we can + /// // also look for matches of other patterns explicitly, even if a + /// // different pattern would have normally matched. + /// let expected = HalfMatch::must(1, 10); + /// let input = Input::new(haystack) + /// .range(4..) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn starts_for_each_pattern(mut self, yes: bool) -> Config { + self.starts_for_each_pattern = Some(yes); + self + } + + /// Whether to attempt to shrink the size of the lazy DFA's alphabet or + /// not. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging the lazy DFA. + /// + /// When enabled, the lazy DFA will use a map from all possible bytes + /// to their corresponding equivalence class. Each equivalence class + /// represents a set of bytes that does not discriminate between a match + /// and a non-match in the DFA. For example, the pattern `[ab]+` has at + /// least two equivalence classes: a set containing `a` and `b` and a set + /// containing every byte except for `a` and `b`. `a` and `b` are in the + /// same equivalence classes because they never discriminate between a + /// match and a non-match. + /// + /// The advantage of this map is that the size of the transition table + /// can be reduced drastically from `#states * 256 * sizeof(LazyStateID)` + /// to `#states * k * sizeof(LazyStateID)` where `k` is the number of + /// equivalence classes (rounded up to the nearest power of 2). As a + /// result, total space usage can decrease substantially. Moreover, since a + /// smaller alphabet is used, DFA compilation during search becomes faster + /// as well since it will potentially be able to reuse a single transition + /// for multiple bytes. + /// + /// **WARNING:** This is only useful for debugging lazy DFAs. Disabling + /// this does not yield any speed advantages. Namely, even when this is + /// disabled, a byte class map is still used while searching. The only + /// difference is that every byte will be forced into its own distinct + /// equivalence class. This is useful for debugging the actual generated + /// transitions because it lets one see the transitions defined on actual + /// bytes instead of the equivalence classes. + pub fn byte_classes(mut self, yes: bool) -> Config { + self.byte_classes = Some(yes); + self + } + + /// Heuristically enable Unicode word boundaries. + /// + /// When set, this will attempt to implement Unicode word boundaries as if + /// they were ASCII word boundaries. This only works when the search input + /// is ASCII only. If a non-ASCII byte is observed while searching, then a + /// [`MatchError::quit`] error is returned. + /// + /// A possible alternative to enabling this option is to simply use an + /// ASCII word boundary, e.g., via `(?-u:\b)`. The main reason to use this + /// option is if you absolutely need Unicode support. This option lets one + /// use a fast search implementation (a DFA) for some potentially very + /// common cases, while providing the option to fall back to some other + /// regex engine to handle the general case when an error is returned. + /// + /// If the pattern provided has no Unicode word boundary in it, then this + /// option has no effect. (That is, quitting on a non-ASCII byte only + /// occurs when this option is enabled _and_ a Unicode word boundary is + /// present in the pattern.) + /// + /// This is almost equivalent to setting all non-ASCII bytes to be quit + /// bytes. The only difference is that this will cause non-ASCII bytes to + /// be quit bytes _only_ when a Unicode word boundary is present in the + /// pattern. + /// + /// When enabling this option, callers _must_ be prepared to + /// handle a [`MatchError`] error during search. When using a + /// [`Regex`](crate::hybrid::regex::Regex), this corresponds to using the + /// `try_` suite of methods. Alternatively, if callers can guarantee that + /// their input is ASCII only, then a [`MatchError::quit`] error will never + /// be returned while searching. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows how to heuristically enable Unicode word boundaries + /// in a pattern. It also shows what happens when a search comes across a + /// non-ASCII byte. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// HalfMatch, Input, MatchError, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().unicode_word_boundary(true)) + /// .build(r"\b[0-9]+\b")?; + /// let mut cache = dfa.create_cache(); + /// + /// // The match occurs before the search ever observes the snowman + /// // character, so no error occurs. + /// let haystack = "foo 123 ☃"; + /// let expected = Some(HalfMatch::must(0, 7)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // Notice that this search fails, even though the snowman character + /// // occurs after the ending match offset. This is because search + /// // routines read one byte past the end of the search to account for + /// // look-around, and indeed, this is required here to determine whether + /// // the trailing \b matches. + /// let haystack = "foo 123 ☃"; + /// let expected = MatchError::quit(0xE2, 8); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack)); + /// assert_eq!(Err(expected), got); + /// + /// // Another example is executing a search where the span of the haystack + /// // we specify is all ASCII, but there is non-ASCII just before it. This + /// // correctly also reports an error. + /// let input = Input::new("β123").range(2..); + /// let expected = MatchError::quit(0xB2, 1); + /// let got = dfa.try_search_fwd(&mut cache, &input); + /// assert_eq!(Err(expected), got); + /// + /// // And similarly for the trailing word boundary. + /// let input = Input::new("123β").range(..3); + /// let expected = MatchError::quit(0xCE, 3); + /// let got = dfa.try_search_fwd(&mut cache, &input); + /// assert_eq!(Err(expected), got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn unicode_word_boundary(mut self, yes: bool) -> Config { + // We have a separate option for this instead of just setting the + // appropriate quit bytes here because we don't want to set quit bytes + // for every regex. We only want to set them when the regex contains a + // Unicode word boundary. + self.unicode_word_boundary = Some(yes); + self + } + + /// Add a "quit" byte to the lazy DFA. + /// + /// When a quit byte is seen during search time, then search will return a + /// [`MatchError::quit`] error indicating the offset at which the search + /// stopped. + /// + /// A quit byte will always overrule any other aspects of a regex. For + /// example, if the `x` byte is added as a quit byte and the regex `\w` is + /// used, then observing `x` will cause the search to quit immediately + /// despite the fact that `x` is in the `\w` class. + /// + /// This mechanism is primarily useful for heuristically enabling certain + /// features like Unicode word boundaries in a DFA. Namely, if the input + /// to search is ASCII, then a Unicode word boundary can be implemented + /// via an ASCII word boundary with no change in semantics. Thus, a DFA + /// can attempt to match a Unicode word boundary but give up as soon as it + /// observes a non-ASCII byte. Indeed, if callers set all non-ASCII bytes + /// to be quit bytes, then Unicode word boundaries will be permitted when + /// building lazy DFAs. Of course, callers should enable + /// [`Config::unicode_word_boundary`] if they want this behavior instead. + /// (The advantage being that non-ASCII quit bytes will only be added if a + /// Unicode word boundary is in the pattern.) + /// + /// When enabling this option, callers _must_ be prepared to + /// handle a [`MatchError`] error during search. When using a + /// [`Regex`](crate::hybrid::regex::Regex), this corresponds to using the + /// `try_` suite of methods. + /// + /// By default, there are no quit bytes set. + /// + /// # Panics + /// + /// This panics if heuristic Unicode word boundaries are enabled and any + /// non-ASCII byte is removed from the set of quit bytes. Namely, enabling + /// Unicode word boundaries requires setting every non-ASCII byte to a quit + /// byte. So if the caller attempts to undo any of that, then this will + /// panic. + /// + /// # Example + /// + /// This example shows how to cause a search to terminate if it sees a + /// `\n` byte. This could be useful if, for example, you wanted to prevent + /// a user supplied pattern from matching across a line boundary. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().quit(b'\n', true)) + /// .build(r"foo\p{any}+bar")?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "foo\nbar"; + /// // Normally this would produce a match, since \p{any} contains '\n'. + /// // But since we instructed the automaton to enter a quit state if a + /// // '\n' is observed, this produces a match error instead. + /// let expected = MatchError::quit(b'\n', 3); + /// let got = dfa.try_search_fwd( + /// &mut cache, + /// &Input::new(haystack), + /// ).unwrap_err(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn quit(mut self, byte: u8, yes: bool) -> Config { + if self.get_unicode_word_boundary() && !byte.is_ascii() && !yes { + panic!( + "cannot set non-ASCII byte to be non-quit when \ + Unicode word boundaries are enabled" + ); + } + if self.quitset.is_none() { + self.quitset = Some(ByteSet::empty()); + } + if yes { + self.quitset.as_mut().unwrap().add(byte); + } else { + self.quitset.as_mut().unwrap().remove(byte); + } + self + } + + /// Enable specializing start states in the lazy DFA. + /// + /// When start states are specialized, an implementor of a search routine + /// using a lazy DFA can tell when the search has entered a starting state. + /// When start states aren't specialized, then it is impossible to know + /// whether the search has entered a start state. + /// + /// Ideally, this option wouldn't need to exist and we could always + /// specialize start states. The problem is that start states can be quite + /// active. This in turn means that an efficient search routine is likely + /// to ping-pong between a heavily optimized hot loop that handles most + /// states and to a less optimized specialized handling of start states. + /// This causes branches to get heavily mispredicted and overall can + /// materially decrease throughput. Therefore, specializing start states + /// should only be enabled when it is needed. + /// + /// Knowing whether a search is in a start state is typically useful when a + /// prefilter is active for the search. A prefilter is typically only run + /// when in a start state and a prefilter can greatly accelerate a search. + /// Therefore, the possible cost of specializing start states is worth it + /// in this case. Otherwise, if you have no prefilter, there is likely no + /// reason to specialize start states. + /// + /// This is disabled by default, but note that it is automatically + /// enabled (or disabled) if [`Config::prefilter`] is set. Namely, unless + /// `specialize_start_states` has already been set, [`Config::prefilter`] + /// will automatically enable or disable it based on whether a prefilter + /// is present or not, respectively. This is done because a prefilter's + /// effectiveness is rooted in being executed whenever the DFA is in a + /// start state, and that's only possible to do when they are specialized. + /// + /// Note that it is plausibly reasonable to _disable_ this option + /// explicitly while _enabling_ a prefilter. In that case, a prefilter + /// will still be run at the beginning of a search, but never again. This + /// in theory could strike a good balance if you're in a situation where a + /// prefilter is likely to produce many false positive candidates. + /// + /// # Example + /// + /// This example shows how to enable start state specialization and then + /// shows how to check whether a state is a start state or not. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().specialize_start_states(true)) + /// .build(r"[a-z]+")?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "123 foobar 4567".as_bytes(); + /// let sid = dfa.start_state_forward(&mut cache, &Input::new(haystack))?; + /// // The ID returned by 'start_state_forward' will always be tagged as + /// // a start state when start state specialization is enabled. + /// assert!(sid.is_tagged()); + /// assert!(sid.is_start()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Compare the above with the default lazy DFA configuration where + /// start states are _not_ specialized. In this case, the start state + /// is not tagged and `sid.is_start()` returns false. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+")?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "123 foobar 4567".as_bytes(); + /// let sid = dfa.start_state_forward(&mut cache, &Input::new(haystack))?; + /// // Start states are not tagged in the default configuration! + /// assert!(!sid.is_tagged()); + /// assert!(!sid.is_start()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn specialize_start_states(mut self, yes: bool) -> Config { + self.specialize_start_states = Some(yes); + self + } + + /// Sets the maximum amount of heap memory, in bytes, to allocate to the + /// cache for use during a lazy DFA search. If the lazy DFA would otherwise + /// use more heap memory, then, depending on other configuration knobs, + /// either stop the search and return an error or clear the cache and + /// continue the search. + /// + /// The default cache capacity is some "reasonable" number that will + /// accommodate most regular expressions. You may find that if you need + /// to build a large DFA then it may be necessary to increase the cache + /// capacity. + /// + /// Note that while building a lazy DFA will do a "minimum" check to ensure + /// the capacity is big enough, this is more or less about correctness. + /// If the cache is bigger than the minimum but still "too small," then the + /// lazy DFA could wind up spending a lot of time clearing the cache and + /// recomputing transitions, thus negating the performance benefits of a + /// lazy DFA. Thus, setting the cache capacity is mostly an experimental + /// endeavor. For most common patterns, however, the default should be + /// sufficient. + /// + /// For more details on how the lazy DFA's cache is used, see the + /// documentation for [`Cache`]. + /// + /// # Example + /// + /// This example shows what happens if the configured cache capacity is + /// too small. In such cases, one can override the cache capacity to make + /// it bigger. Alternatively, one might want to use less memory by setting + /// a smaller cache capacity. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let pattern = r"\p{L}{1000}"; + /// + /// // The default cache capacity is likely too small to deal with regexes + /// // that are very large. Large repetitions of large Unicode character + /// // classes are a common way to make very large regexes. + /// let _ = DFA::new(pattern).unwrap_err(); + /// // Bump up the capacity to something bigger. + /// let dfa = DFA::builder() + /// .configure(DFA::config().cache_capacity(100 * (1<<20))) // 100 MB + /// .build(pattern)?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50); + /// let expected = Some(HalfMatch::must(0, 2000)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(&haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn cache_capacity(mut self, bytes: usize) -> Config { + self.cache_capacity = Some(bytes); + self + } + + /// Configures construction of a lazy DFA to use the minimum cache capacity + /// if the configured capacity is otherwise too small for the provided NFA. + /// + /// This is useful if you never want lazy DFA construction to fail because + /// of a capacity that is too small. + /// + /// In general, this option is typically not a good idea. In particular, + /// while a minimum cache capacity does permit the lazy DFA to function + /// where it otherwise couldn't, it's plausible that it may not function + /// well if it's constantly running out of room. In that case, the speed + /// advantages of the lazy DFA may be negated. On the other hand, the + /// "minimum" cache capacity computed may not be completely accurate and + /// could actually be bigger than what is really necessary. Therefore, it + /// is plausible that using the minimum cache capacity could still result + /// in very good performance. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows what happens if the configured cache capacity is + /// too small. In such cases, one could override the capacity explicitly. + /// An alternative, demonstrated here, let's us force construction to use + /// the minimum cache capacity if the configured capacity is otherwise + /// too small. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let pattern = r"\p{L}{1000}"; + /// + /// // The default cache capacity is likely too small to deal with regexes + /// // that are very large. Large repetitions of large Unicode character + /// // classes are a common way to make very large regexes. + /// let _ = DFA::new(pattern).unwrap_err(); + /// // Configure construction such it automatically selects the minimum + /// // cache capacity if it would otherwise be too small. + /// let dfa = DFA::builder() + /// .configure(DFA::config().skip_cache_capacity_check(true)) + /// .build(pattern)?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50); + /// let expected = Some(HalfMatch::must(0, 2000)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(&haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn skip_cache_capacity_check(mut self, yes: bool) -> Config { + self.skip_cache_capacity_check = Some(yes); + self + } + + /// Configure a lazy DFA search to quit after a certain number of cache + /// clearings. + /// + /// When a minimum is set, then a lazy DFA search will *possibly* "give + /// up" after the minimum number of cache clearings has occurred. This is + /// typically useful in scenarios where callers want to detect whether the + /// lazy DFA search is "efficient" or not. If the cache is cleared too many + /// times, this is a good indicator that it is not efficient, and thus, the + /// caller may wish to use some other regex engine. + /// + /// Note that the number of times a cache is cleared is a property of + /// the cache itself. Thus, if a cache is used in a subsequent search + /// with a similarly configured lazy DFA, then it could cause the + /// search to "give up" if the cache needed to be cleared, depending + /// on its internal count and configured minimum. The cache clear + /// count can only be reset to `0` via [`DFA::reset_cache`] (or + /// [`Regex::reset_cache`](crate::hybrid::regex::Regex::reset_cache) if + /// you're using the `Regex` API). + /// + /// By default, no minimum is configured. Thus, a lazy DFA search will + /// never give up due to cache clearings. If you do set this option, you + /// might consider also setting [`Config::minimum_bytes_per_state`] in + /// order for the lazy DFA to take efficiency into account before giving + /// up. + /// + /// # Example + /// + /// This example uses a somewhat pathological configuration to demonstrate + /// the _possible_ behavior of cache clearing and how it might result + /// in a search that returns an error. + /// + /// It is important to note that the precise mechanics of how and when + /// a cache gets cleared is an implementation detail. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, Input, MatchError, MatchErrorKind}; + /// + /// // This is a carefully chosen regex. The idea is to pick one + /// // that requires some decent number of states (hence the bounded + /// // repetition). But we specifically choose to create a class with an + /// // ASCII letter and a non-ASCII letter so that we can check that no new + /// // states are created once the cache is full. Namely, if we fill up the + /// // cache on a haystack of 'a's, then in order to match one 'β', a new + /// // state will need to be created since a 'β' is encoded with multiple + /// // bytes. Since there's no room for this state, the search should quit + /// // at the very first position. + /// let pattern = r"[aβ]{100}"; + /// let dfa = DFA::builder() + /// .configure( + /// // Configure it so that we have the minimum cache capacity + /// // possible. And that if any clearings occur, the search quits. + /// DFA::config() + /// .skip_cache_capacity_check(true) + /// .cache_capacity(0) + /// .minimum_cache_clear_count(Some(0)), + /// ) + /// .build(pattern)?; + /// let mut cache = dfa.create_cache(); + /// + /// // Our search will give up before reaching the end! + /// let haystack = "a".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// // Now that we know the cache is full, if we search a haystack that we + /// // know will require creating at least one new state, it should not + /// // be able to make much progress. + /// let haystack = "β".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// // If we reset the cache, then we should be able to create more states + /// // and make more progress with searching for betas. + /// cache.reset(&dfa); + /// let haystack = "β".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// // ... switching back to ASCII still makes progress since it just needs + /// // to set transitions on existing states! + /// let haystack = "a".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn minimum_cache_clear_count(mut self, min: Option) -> Config { + self.minimum_cache_clear_count = Some(min); + self + } + + /// Configure a lazy DFA search to quit only when its efficiency drops + /// below the given minimum. + /// + /// The efficiency of the cache is determined by the number of DFA states + /// compiled per byte of haystack searched. For example, if the efficiency + /// is 2, then it means the lazy DFA is creating a new DFA state after + /// searching approximately 2 bytes in a haystack. Generally speaking, 2 + /// is quite bad and it's likely that even a slower regex engine like the + /// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) would be faster. + /// + /// This has no effect if [`Config::minimum_cache_clear_count`] is not set. + /// Namely, this option only kicks in when the cache has been cleared more + /// than the minimum number. If no minimum is set, then the cache is simply + /// cleared whenever it fills up and it is impossible for the lazy DFA to + /// quit due to ineffective use of the cache. + /// + /// In general, if one is setting [`Config::minimum_cache_clear_count`], + /// then one should probably also set this knob as well. The reason is + /// that the absolute number of times the cache is cleared is generally + /// not a great predictor of efficiency. For example, if a new DFA state + /// is created for every 1,000 bytes searched, then it wouldn't be hard + /// for the cache to get cleared more than `N` times and then cause the + /// lazy DFA to quit. But a new DFA state every 1,000 bytes is likely quite + /// good from a performance perspective, and it's likely that the lazy + /// DFA should continue searching, even if it requires clearing the cache + /// occasionally. + /// + /// Finally, note that if you're implementing your own lazy DFA search + /// routine and also want this efficiency check to work correctly, then + /// you'll need to use the following routines to record search progress: + /// + /// * Call [`Cache::search_start`] at the beginning of every search. + /// * Call [`Cache::search_update`] whenever [`DFA::next_state`] is + /// called. + /// * Call [`Cache::search_finish`] before completing a search. (It is + /// not strictly necessary to call this when an error is returned, as + /// `Cache::search_start` will automatically finish the previous search + /// for you. But calling it where possible before returning helps improve + /// the accuracy of how many bytes have actually been searched.) + pub fn minimum_bytes_per_state(mut self, min: Option) -> Config { + self.minimum_bytes_per_state = Some(min); + self + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns the prefilter set in this configuration, if one at all. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns whether this configuration has enabled anchored starting states + /// for every pattern in the DFA. + pub fn get_starts_for_each_pattern(&self) -> bool { + self.starts_for_each_pattern.unwrap_or(false) + } + + /// Returns whether this configuration has enabled byte classes or not. + /// This is typically a debugging oriented option, as disabling it confers + /// no speed benefit. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns whether this configuration has enabled heuristic Unicode word + /// boundary support. When enabled, it is possible for a search to return + /// an error. + pub fn get_unicode_word_boundary(&self) -> bool { + self.unicode_word_boundary.unwrap_or(false) + } + + /// Returns whether this configuration will instruct the lazy DFA to enter + /// a quit state whenever the given byte is seen during a search. When at + /// least one byte has this enabled, it is possible for a search to return + /// an error. + pub fn get_quit(&self, byte: u8) -> bool { + self.quitset.map_or(false, |q| q.contains(byte)) + } + + /// Returns whether this configuration will instruct the lazy DFA to + /// "specialize" start states. When enabled, the lazy DFA will tag start + /// states so that search routines using the lazy DFA can detect when + /// it's in a start state and do some kind of optimization (like run a + /// prefilter). + pub fn get_specialize_start_states(&self) -> bool { + self.specialize_start_states.unwrap_or(false) + } + + /// Returns the cache capacity set on this configuration. + pub fn get_cache_capacity(&self) -> usize { + self.cache_capacity.unwrap_or(2 * (1 << 20)) + } + + /// Returns whether the cache capacity check should be skipped. + pub fn get_skip_cache_capacity_check(&self) -> bool { + self.skip_cache_capacity_check.unwrap_or(false) + } + + /// Returns, if set, the minimum number of times the cache must be cleared + /// before a lazy DFA search can give up. When no minimum is set, then a + /// search will never quit and will always clear the cache whenever it + /// fills up. + pub fn get_minimum_cache_clear_count(&self) -> Option { + self.minimum_cache_clear_count.unwrap_or(None) + } + + /// Returns, if set, the minimum number of bytes per state that need to be + /// processed in order for the lazy DFA to keep going. If the minimum falls + /// below this number (and the cache has been cleared a minimum number of + /// times), then the lazy DFA will return a "gave up" error. + pub fn get_minimum_bytes_per_state(&self) -> Option { + self.minimum_bytes_per_state.unwrap_or(None) + } + + /// Returns the minimum lazy DFA cache capacity required for the given NFA. + /// + /// The cache capacity required for a particular NFA may change without + /// notice. Callers should not rely on it being stable. + /// + /// This is useful for informational purposes, but can also be useful for + /// other reasons. For example, if one wants to check the minimum cache + /// capacity themselves or if one wants to set the capacity based on the + /// minimum. + /// + /// This may return an error if this configuration does not support all of + /// the instructions used in the given NFA. For example, if the NFA has a + /// Unicode word boundary but this configuration does not enable heuristic + /// support for Unicode word boundaries. + pub fn get_minimum_cache_capacity( + &self, + nfa: &thompson::NFA, + ) -> Result { + let quitset = self.quit_set_from_nfa(nfa)?; + let classes = self.byte_classes_from_nfa(nfa, &quitset); + let starts = self.get_starts_for_each_pattern(); + Ok(minimum_cache_capacity(nfa, &classes, starts)) + } + + /// Returns the byte class map used during search from the given NFA. + /// + /// If byte classes are disabled on this configuration, then a map is + /// returned that puts each byte in its own equivalent class. + fn byte_classes_from_nfa( + &self, + nfa: &thompson::NFA, + quit: &ByteSet, + ) -> ByteClasses { + if !self.get_byte_classes() { + // The lazy DFA will always use the equivalence class map, but + // enabling this option is useful for debugging. Namely, this will + // cause all transitions to be defined over their actual bytes + // instead of an opaque equivalence class identifier. The former is + // much easier to grok as a human. + ByteClasses::singletons() + } else { + let mut set = nfa.byte_class_set().clone(); + // It is important to distinguish any "quit" bytes from all other + // bytes. Otherwise, a non-quit byte may end up in the same class + // as a quit byte, and thus cause the DFA stop when it shouldn't. + // + // Test case: + // + // regex-cli find match hybrid --unicode-word-boundary \ + // -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log + if !quit.is_empty() { + set.add_set(&quit); + } + set.byte_classes() + } + } + + /// Return the quit set for this configuration and the given NFA. + /// + /// This may return an error if the NFA is incompatible with this + /// configuration's quit set. For example, if the NFA has a Unicode word + /// boundary and the quit set doesn't include non-ASCII bytes. + fn quit_set_from_nfa( + &self, + nfa: &thompson::NFA, + ) -> Result { + let mut quit = self.quitset.unwrap_or(ByteSet::empty()); + if nfa.look_set_any().contains_word_unicode() { + if self.get_unicode_word_boundary() { + for b in 0x80..=0xFF { + quit.add(b); + } + } else { + // If heuristic support for Unicode word boundaries wasn't + // enabled, then we can still check if our quit set is correct. + // If the caller set their quit bytes in a way that causes the + // DFA to quit on at least all non-ASCII bytes, then that's all + // we need for heuristic support to work. + if !quit.contains_range(0x80, 0xFF) { + return Err( + BuildError::unsupported_dfa_word_boundary_unicode(), + ); + } + } + } + Ok(quit) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + pre: o.pre.or_else(|| self.pre.clone()), + starts_for_each_pattern: o + .starts_for_each_pattern + .or(self.starts_for_each_pattern), + byte_classes: o.byte_classes.or(self.byte_classes), + unicode_word_boundary: o + .unicode_word_boundary + .or(self.unicode_word_boundary), + quitset: o.quitset.or(self.quitset), + specialize_start_states: o + .specialize_start_states + .or(self.specialize_start_states), + cache_capacity: o.cache_capacity.or(self.cache_capacity), + skip_cache_capacity_check: o + .skip_cache_capacity_check + .or(self.skip_cache_capacity_check), + minimum_cache_clear_count: o + .minimum_cache_clear_count + .or(self.minimum_cache_clear_count), + minimum_bytes_per_state: o + .minimum_bytes_per_state + .or(self.minimum_bytes_per_state), + } + } +} + +/// A builder for constructing a lazy deterministic finite automaton from +/// regular expressions. +/// +/// As a convenience, [`DFA::builder`] is an alias for [`Builder::new`]. The +/// advantage of the former is that it often lets you avoid importing the +/// `Builder` type directly. +/// +/// This builder provides two main things: +/// +/// 1. It provides a few different `build` routines for actually constructing +/// a DFA from different kinds of inputs. The most convenient is +/// [`Builder::build`], which builds a DFA directly from a pattern string. The +/// most flexible is [`Builder::build_from_nfa`], which builds a DFA straight +/// from an NFA. +/// 2. The builder permits configuring a number of things. +/// [`Builder::configure`] is used with [`Config`] to configure aspects of +/// the DFA and the construction process itself. [`Builder::syntax`] and +/// [`Builder::thompson`] permit configuring the regex parser and Thompson NFA +/// construction, respectively. The syntax and thompson configurations only +/// apply when building from a pattern string. +/// +/// This builder always constructs a *single* lazy DFA. As such, this builder +/// can only be used to construct regexes that either detect the presence +/// of a match or find the end location of a match. A single DFA cannot +/// produce both the start and end of a match. For that information, use a +/// [`Regex`](crate::hybrid::regex::Regex), which can be similarly configured +/// using [`regex::Builder`](crate::hybrid::regex::Builder). The main reason +/// to use a DFA directly is if the end location of a match is enough for your +/// use case. Namely, a `Regex` will construct two lazy DFAs instead of one, +/// since a second reverse DFA is needed to find the start of a match. +/// +/// # Example +/// +/// This example shows how to build a lazy DFA that uses a tiny cache capacity +/// and completely disables Unicode. That is: +/// +/// * Things such as `\w`, `.` and `\b` are no longer Unicode-aware. `\w` +/// and `\b` are ASCII-only while `.` matches any byte except for `\n` +/// (instead of any UTF-8 encoding of a Unicode scalar value except for +/// `\n`). Things that are Unicode only, such as `\pL`, are not allowed. +/// * The pattern itself is permitted to match invalid UTF-8. For example, +/// things like `[^a]` that match any byte except for `a` are permitted. +/// +/// ``` +/// use regex_automata::{ +/// hybrid::dfa::DFA, +/// nfa::thompson, +/// util::syntax, +/// HalfMatch, Input, +/// }; +/// +/// let dfa = DFA::builder() +/// .configure(DFA::config().cache_capacity(5_000)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .syntax(syntax::Config::new().unicode(false).utf8(false)) +/// .build(r"foo[^b]ar.*")?; +/// let mut cache = dfa.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFar\xE2\x98\xFF\n"; +/// let expected = Some(HalfMatch::must(0, 10)); +/// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new lazy DFA builder with the default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a lazy DFA from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a lazy DFA from the given patterns. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self + .thompson + .clone() + // We can always forcefully disable captures because DFAs do not + // support them. + .configure( + thompson::Config::new() + .which_captures(thompson::WhichCaptures::None), + ) + .build_many(patterns) + .map_err(BuildError::nfa)?; + self.build_from_nfa(nfa) + } + + /// Build a DFA from the given NFA. + /// + /// Note that this requires owning a `thompson::NFA`. While this may force + /// you to clone the NFA, such a clone is not a deep clone. Namely, NFAs + /// are defined internally to support shared ownership such that cloning is + /// very cheap. + /// + /// # Example + /// + /// This example shows how to build a lazy DFA if you already have an NFA + /// in hand. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson, + /// HalfMatch, Input, + /// }; + /// + /// let haystack = "foo123bar"; + /// + /// // This shows how to set non-default options for building an NFA. + /// let nfa = thompson::Compiler::new() + /// .configure(thompson::Config::new().shrink(true)) + /// .build(r"[0-9]+")?; + /// let dfa = DFA::builder().build_from_nfa(nfa)?; + /// let mut cache = dfa.create_cache(); + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_nfa( + &self, + nfa: thompson::NFA, + ) -> Result { + let quitset = self.config.quit_set_from_nfa(&nfa)?; + let classes = self.config.byte_classes_from_nfa(&nfa, &quitset); + // Check that we can fit at least a few states into our cache, + // otherwise it's pretty senseless to use the lazy DFA. This does have + // a possible failure mode though. This assumes the maximum size of a + // state in powerset space (so, the total number of NFA states), which + // may never actually materialize, and could be quite a bit larger + // than the actual biggest state. If this turns out to be a problem, + // we could expose a knob that disables this check. But if so, we have + // to be careful not to panic in other areas of the code (the cache + // clearing and init code) that tend to assume some minimum useful + // cache capacity. + let min_cache = minimum_cache_capacity( + &nfa, + &classes, + self.config.get_starts_for_each_pattern(), + ); + let mut cache_capacity = self.config.get_cache_capacity(); + if cache_capacity < min_cache { + // When the caller has asked us to skip the cache capacity check, + // then we simply force the cache capacity to its minimum amount + // and mush on. + if self.config.get_skip_cache_capacity_check() { + debug!( + "given capacity ({}) is too small, \ + since skip_cache_capacity_check is enabled, \ + setting cache capacity to minimum ({})", + cache_capacity, min_cache, + ); + cache_capacity = min_cache; + } else { + return Err(BuildError::insufficient_cache_capacity( + min_cache, + cache_capacity, + )); + } + } + // We also need to check that we can fit at least some small number + // of states in our state ID space. This is unlikely to trigger in + // >=32-bit systems, but 16-bit systems have a pretty small state ID + // space since a number of bits are used up as sentinels. + if let Err(err) = minimum_lazy_state_id(&classes) { + return Err(BuildError::insufficient_state_id_capacity(err)); + } + let stride2 = classes.stride2(); + let start_map = StartByteMap::new(nfa.look_matcher()); + Ok(DFA { + config: self.config.clone(), + nfa, + stride2, + start_map, + classes, + quitset, + cache_capacity, + }) + } + + /// Apply the given lazy DFA configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a lazy DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether the DFA should match the regex + /// in reverse or if additional time should be spent shrinking the size of + /// the NFA. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// Represents the current state of an overlapping search. +/// +/// This is used for overlapping searches since they need to know something +/// about the previous search. For example, when multiple patterns match at the +/// same position, this state tracks the last reported pattern so that the next +/// search knows whether to report another matching pattern or continue with +/// the search at the next position. Additionally, it also tracks which state +/// the last search call terminated in. +/// +/// This type provides little introspection capabilities. The only thing a +/// caller can do is construct it and pass it around to permit search routines +/// to use it to track state, and also ask whether a match has been found. +/// +/// Callers should always provide a fresh state constructed via +/// [`OverlappingState::start`] when starting a new search. Reusing state from +/// a previous search may result in incorrect results. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OverlappingState { + /// The match reported by the most recent overlapping search to use this + /// state. + /// + /// If a search does not find any matches, then it is expected to clear + /// this value. + pub(crate) mat: Option, + /// The state ID of the state at which the search was in when the call + /// terminated. When this is a match state, `last_match` must be set to a + /// non-None value. + /// + /// A `None` value indicates the start state of the corresponding + /// automaton. We cannot use the actual ID, since any one automaton may + /// have many start states, and which one is in use depends on several + /// search-time factors. + pub(crate) id: Option, + /// The position of the search. + /// + /// When `id` is None (i.e., we are starting a search), this is set to + /// the beginning of the search as given by the caller regardless of its + /// current value. Subsequent calls to an overlapping search pick up at + /// this offset. + pub(crate) at: usize, + /// The index into the matching patterns of the next match to report if the + /// current state is a match state. Note that this may be 1 greater than + /// the total number of matches to report for the current match state. (In + /// which case, no more matches should be reported at the current position + /// and the search should advance to the next position.) + pub(crate) next_match_index: Option, + /// This is set to true when a reverse overlapping search has entered its + /// EOI transitions. + /// + /// This isn't used in a forward search because it knows to stop once the + /// position exceeds the end of the search range. In a reverse search, + /// since we use unsigned offsets, we don't "know" once we've gone past + /// `0`. So the only way to detect it is with this extra flag. The reverse + /// overlapping search knows to terminate specifically after it has + /// reported all matches after following the EOI transition. + pub(crate) rev_eoi: bool, +} + +impl OverlappingState { + /// Create a new overlapping state that begins at the start state of any + /// automaton. + pub fn start() -> OverlappingState { + OverlappingState { + mat: None, + id: None, + at: 0, + next_match_index: None, + rev_eoi: false, + } + } + + /// Return the match result of the most recent search to execute with this + /// state. + /// + /// A searches will clear this result automatically, such that if no + /// match is found, this will correctly report `None`. + pub fn get_match(&self) -> Option { + self.mat + } +} + +/// Runs the given overlapping `search` function (forwards or backwards) until +/// a match is found whose offset does not split a codepoint. +/// +/// This is *not* always correct to call. It should only be called when the +/// underlying NFA has UTF-8 mode enabled *and* it can produce zero-width +/// matches. Calling this when both of those things aren't true might result +/// in legitimate matches getting skipped. +#[cold] +#[inline(never)] +fn skip_empty_utf8_splits_overlapping( + input: &Input<'_>, + state: &mut OverlappingState, + mut search: F, +) -> Result<(), MatchError> +where + F: FnMut(&Input<'_>, &mut OverlappingState) -> Result<(), MatchError>, +{ + // Note that this routine works for forwards and reverse searches + // even though there's no code here to handle those cases. That's + // because overlapping searches drive themselves to completion via + // `OverlappingState`. So all we have to do is push it until no matches are + // found. + + let mut hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + if input.get_anchored().is_anchored() { + if !input.is_char_boundary(hm.offset()) { + state.mat = None; + } + return Ok(()); + } + while !input.is_char_boundary(hm.offset()) { + search(input, state)?; + hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + } + Ok(()) +} + +/// Based on the minimum number of states required for a useful lazy DFA cache, +/// this returns the minimum lazy state ID that must be representable. +/// +/// It's not likely for this to have any impact 32-bit systems (or higher), but +/// on 16-bit systems, the lazy state ID space is quite constrained and thus +/// may be insufficient if our MIN_STATES value is (for some reason) too high. +fn minimum_lazy_state_id( + classes: &ByteClasses, +) -> Result { + let stride = 1 << classes.stride2(); + let min_state_index = MIN_STATES.checked_sub(1).unwrap(); + LazyStateID::new(min_state_index * stride) +} + +/// Based on the minimum number of states required for a useful lazy DFA cache, +/// this returns a heuristic minimum number of bytes of heap space required. +/// +/// This is a "heuristic" because the minimum it returns is likely bigger than +/// the true minimum. Namely, it assumes that each powerset NFA/DFA state uses +/// the maximum number of NFA states (all of them). This is likely bigger +/// than what is required in practice. Computing the true minimum effectively +/// requires determinization, which is probably too much work to do for a +/// simple check like this. +/// +/// One of the issues with this approach IMO is that it requires that this +/// be in sync with the calculation above for computing how much heap memory +/// the DFA cache uses. If we get it wrong, it's possible for example for the +/// minimum to be smaller than the computed heap memory, and thus, it may be +/// the case that we can't add the required minimum number of states. That in +/// turn will make lazy DFA panic because we assume that we can add at least a +/// minimum number of states. +/// +/// Another approach would be to always allow the minimum number of states to +/// be added to the lazy DFA cache, even if it exceeds the configured cache +/// limit. This does mean that the limit isn't really a limit in all cases, +/// which is unfortunate. But it does at least guarantee that the lazy DFA can +/// always make progress, even if it is slow. (This approach is very similar to +/// enabling the 'skip_cache_capacity_check' config knob, except it wouldn't +/// rely on cache size calculation. Instead, it would just always permit a +/// minimum number of states to be added.) +fn minimum_cache_capacity( + nfa: &thompson::NFA, + classes: &ByteClasses, + starts_for_each_pattern: bool, +) -> usize { + const ID_SIZE: usize = size_of::(); + const STATE_SIZE: usize = size_of::(); + + let stride = 1 << classes.stride2(); + let states_len = nfa.states().len(); + let sparses = 2 * states_len * NFAStateID::SIZE; + let trans = MIN_STATES * stride * ID_SIZE; + + let mut starts = Start::len() * ID_SIZE; + if starts_for_each_pattern { + starts += (Start::len() * nfa.pattern_len()) * ID_SIZE; + } + + // The min number of states HAS to be at least 4: we have 3 sentinel states + // and then we need space for one more when we save a state after clearing + // the cache. We also need space for one more, otherwise we get stuck in a + // loop where we try to add a 5th state, which gets rejected, which clears + // the cache, which adds back a saved state (4th total state) which then + // tries to add the 5th state again. + assert!(MIN_STATES >= 5, "minimum number of states has to be at least 5"); + // The minimum number of non-sentinel states. We consider this separately + // because sentinel states are much smaller in that they contain no NFA + // states. Given our aggressive calculation here, it's worth being more + // precise with the number of states we need. + let non_sentinel = MIN_STATES.checked_sub(SENTINEL_STATES).unwrap(); + + // Every `State` has 5 bytes for flags, 4 bytes (max) for the number of + // patterns, followed by 32-bit encodings of patterns and then delta + // varint encodings of NFA state IDs. We use the worst case (which isn't + // technically possible) of 5 bytes for each NFA state ID. + // + // HOWEVER, three of the states needed by a lazy DFA are just the sentinel + // unknown, dead and quit states. Those states have a known size and it is + // small. + let dead_state_size = State::dead().memory_usage(); + let max_state_size = 5 + 4 + (nfa.pattern_len() * 4) + (states_len * 5); + let states = (SENTINEL_STATES * (STATE_SIZE + dead_state_size)) + + (non_sentinel * (STATE_SIZE + max_state_size)); + // NOTE: We don't double count heap memory used by State for this map since + // we use reference counting to avoid doubling memory usage. (This tends to + // be where most memory is allocated in the cache.) + let states_to_sid = (MIN_STATES * STATE_SIZE) + (MIN_STATES * ID_SIZE); + let stack = states_len * NFAStateID::SIZE; + let scratch_state_builder = max_state_size; + + trans + + starts + + states + + states_to_sid + + sparses + + stack + + scratch_state_builder +} + +#[cfg(all(test, feature = "syntax"))] +mod tests { + use super::*; + + // Tests that we handle heuristic Unicode word boundary support in reverse + // DFAs in the specific case of contextual searches. + // + // I wrote this test when I discovered a bug in how heuristic word + // boundaries were handled. Namely, that the starting state selection + // didn't consider the DFA's quit byte set when looking at the byte + // immediately before the start of the search (or immediately after the + // end of the search in the case of a reverse search). As a result, it was + // possible for '\bfoo\b' to match 'β123' because the trailing \xB2 byte + // in the 'β' codepoint would be treated as a non-word character. But of + // course, this search should trigger the DFA to quit, since there is a + // non-ASCII byte in consideration. + // + // Thus, I fixed 'start_state_{forward,reverse}' to check the quit byte set + // if it wasn't empty. The forward case is tested in the doc test for the + // Config::unicode_word_boundary API. We test the reverse case here, which + // is sufficiently niche that it doesn't really belong in a doc test. + #[test] + fn heuristic_unicode_reverse() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap(); + let mut cache = dfa.create_cache(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_rev(&mut cache, &input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_rev(&mut cache, &input); + assert_eq!(Err(expected), got); + } +} diff --git a/vendor/regex-automata/src/hybrid/error.rs b/vendor/regex-automata/src/hybrid/error.rs new file mode 100644 index 0000000..d134e7e --- /dev/null +++ b/vendor/regex-automata/src/hybrid/error.rs @@ -0,0 +1,242 @@ +use crate::{hybrid::id::LazyStateIDError, nfa, util::search::Anchored}; + +/// An error that occurs when initial construction of a lazy DFA fails. +/// +/// A build error can occur when insufficient cache capacity is configured or +/// if something about the NFA is unsupported. (For example, if one attempts +/// to build a lazy DFA without heuristic Unicode support but with an NFA that +/// contains a Unicode word boundary.) +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying +/// [`nfa::thompson::BuildError`](crate::nfa::thompson::BuildError) +/// type from its `source` method via the `std::error::Error` trait. This error +/// only occurs when using convenience routines for building a lazy DFA +/// directly from a pattern string. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +#[derive(Clone, Debug)] +enum BuildErrorKind { + NFA(nfa::thompson::BuildError), + InsufficientCacheCapacity { minimum: usize, given: usize }, + InsufficientStateIDCapacity { err: LazyStateIDError }, + Unsupported(&'static str), +} + +impl BuildError { + pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } + + pub(crate) fn insufficient_cache_capacity( + minimum: usize, + given: usize, + ) -> BuildError { + BuildError { + kind: BuildErrorKind::InsufficientCacheCapacity { minimum, given }, + } + } + + pub(crate) fn insufficient_state_id_capacity( + err: LazyStateIDError, + ) -> BuildError { + BuildError { + kind: BuildErrorKind::InsufficientStateIDCapacity { err }, + } + } + + pub(crate) fn unsupported_dfa_word_boundary_unicode() -> BuildError { + let msg = "cannot build lazy DFAs for regexes with Unicode word \ + boundaries; switch to ASCII word boundaries, or \ + heuristically enable Unicode word boundaries or use a \ + different regex engine"; + BuildError { kind: BuildErrorKind::Unsupported(msg) } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind { + BuildErrorKind::NFA(ref err) => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind { + BuildErrorKind::NFA(_) => write!(f, "error building NFA"), + BuildErrorKind::InsufficientCacheCapacity { minimum, given } => { + write!( + f, + "given cache capacity ({}) is smaller than \ + minimum required ({})", + given, minimum, + ) + } + BuildErrorKind::InsufficientStateIDCapacity { ref err } => { + err.fmt(f) + } + BuildErrorKind::Unsupported(ref msg) => { + write!(f, "unsupported regex feature for DFAs: {}", msg) + } + } + } +} + +/// An error that can occur when computing the start state for a search. +/// +/// Computing a start state can fail for a few reasons, either +/// based on incorrect configuration or even based on whether +/// the look-behind byte triggers a quit state. Typically +/// one does not need to handle this error if you're using +/// [`DFA::start_state_forward`](crate::hybrid::dfa::DFA::start_state_forward) +/// (or its reverse counterpart), as that routine automatically converts +/// `StartError` to a [`MatchError`](crate::MatchError) for you. +/// +/// This error may be returned by the +/// [`DFA::start_state`](crate::hybrid::dfa::DFA::start_state) routine. +/// +/// This error implements the `std::error::Error` trait when the `std` feature +/// is enabled. +/// +/// This error is marked as non-exhaustive. New variants may be added in a +/// semver compatible release. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub enum StartError { + /// An error that occurs when cache inefficiency has dropped below the + /// configured heuristic thresholds. + Cache { + /// The underlying cache error that occurred. + err: CacheError, + }, + /// An error that occurs when a starting configuration's look-behind byte + /// is in this DFA's quit set. + Quit { + /// The quit byte that was found. + byte: u8, + }, + /// An error that occurs when the caller requests an anchored mode that + /// isn't supported by the DFA. + UnsupportedAnchored { + /// The anchored mode given that is unsupported. + mode: Anchored, + }, +} + +impl StartError { + pub(crate) fn cache(err: CacheError) -> StartError { + StartError::Cache { err } + } + + pub(crate) fn quit(byte: u8) -> StartError { + StartError::Quit { byte } + } + + pub(crate) fn unsupported_anchored(mode: Anchored) -> StartError { + StartError::UnsupportedAnchored { mode } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for StartError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match *self { + StartError::Cache { ref err } => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for StartError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + StartError::Cache { .. } => write!( + f, + "error computing start state because of cache inefficiency" + ), + StartError::Quit { byte } => write!( + f, + "error computing start state because the look-behind byte \ + {:?} triggered a quit state", + crate::util::escape::DebugByte(byte), + ), + StartError::UnsupportedAnchored { mode: Anchored::Yes } => { + write!( + f, + "error computing start state because \ + anchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { mode: Anchored::No } => { + write!( + f, + "error computing start state because \ + unanchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { + mode: Anchored::Pattern(pid), + } => { + write!( + f, + "error computing start state because \ + anchored searches for a specific pattern ({}) \ + are not supported or enabled", + pid.as_usize(), + ) + } + } + } +} + +/// An error that occurs when cache usage has become inefficient. +/// +/// One of the weaknesses of a lazy DFA is that it may need to clear its +/// cache repeatedly if it's not big enough. If this happens too much, then it +/// can slow searching down significantly. A mitigation to this is to use +/// heuristics to detect whether the cache is being used efficiently or not. +/// If not, then a lazy DFA can return a `CacheError`. +/// +/// The default configuration of a lazy DFA in this crate is +/// set such that a `CacheError` will never occur. Instead, +/// callers must opt into this behavior with settings like +/// [`dfa::Config::minimum_cache_clear_count`](crate::hybrid::dfa::Config::minimum_cache_clear_count) +/// and +/// [`dfa::Config::minimum_bytes_per_state`](crate::hybrid::dfa::Config::minimum_bytes_per_state). +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug)] +pub struct CacheError(()); + +impl CacheError { + pub(crate) fn too_many_cache_clears() -> CacheError { + CacheError(()) + } + + pub(crate) fn bad_efficiency() -> CacheError { + CacheError(()) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CacheError {} + +impl core::fmt::Display for CacheError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "lazy DFA cache has been cleared too many times") + } +} diff --git a/vendor/regex-automata/src/hybrid/id.rs b/vendor/regex-automata/src/hybrid/id.rs new file mode 100644 index 0000000..662e3c9 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/id.rs @@ -0,0 +1,354 @@ +/// A state identifier specifically tailored for lazy DFAs. +/// +/// A lazy state ID logically represents a pointer to a DFA state. In practice, +/// by limiting the number of DFA states it can address, it reserves some +/// bits of its representation to encode some additional information. That +/// additional information is called a "tag." That tag is used to record +/// whether the state it points to is an unknown, dead, quit, start or match +/// state. +/// +/// When implementing a low level search routine with a lazy DFA, it is +/// necessary to query the type of the current state to know what to do: +/// +/// * **Unknown** - The state has not yet been computed. The +/// parameters used to get this state ID must be re-passed to +/// [`DFA::next_state`](crate::hybrid::dfa::DFA::next_state), which will never +/// return an unknown state ID. +/// * **Dead** - A dead state only has transitions to itself. It indicates that +/// the search cannot do anything else and should stop with whatever result it +/// has. +/// * **Quit** - A quit state indicates that the automaton could not answer +/// whether a match exists or not. Correct search implementations must return a +/// [`MatchError::quit`](crate::MatchError::quit) when a DFA enters a quit +/// state. +/// * **Start** - A start state is a state in which a search can begin. +/// Lazy DFAs usually have more than one start state. Branching on +/// this isn't required for correctness, but a common optimization is +/// to run a prefilter when a search enters a start state. Note that +/// start states are *not* tagged automatically, and one must enable the +/// [`Config::specialize_start_states`](crate::hybrid::dfa::Config::specialize_start_states) +/// setting for start states to be tagged. The reason for this is +/// that a DFA search loop is usually written to execute a prefilter once it +/// enters a start state. But if there is no prefilter, this handling can be +/// quite diastrous as the DFA may ping-pong between the special handling code +/// and a possible optimized hot path for handling untagged states. When start +/// states aren't specialized, then they are untagged and remain in the hot +/// path. +/// * **Match** - A match state indicates that a match has been found. +/// Depending on the semantics of your search implementation, it may either +/// continue until the end of the haystack or a dead state, or it might quit +/// and return the match immediately. +/// +/// As an optimization, the [`is_tagged`](LazyStateID::is_tagged) predicate +/// can be used to determine if a tag exists at all. This is useful to avoid +/// branching on all of the above types for every byte searched. +/// +/// # Example +/// +/// This example shows how `LazyStateID` can be used to implement a correct +/// search routine with minimal branching. In particular, this search routine +/// implements "leftmost" matching, which means that it doesn't immediately +/// stop once a match is found. Instead, it continues until it reaches a dead +/// state. +/// +/// Notice also how a correct search implementation deals with +/// [`CacheError`](crate::hybrid::CacheError)s returned by some of +/// the lazy DFA routines. When a `CacheError` occurs, it returns +/// [`MatchError::gave_up`](crate::MatchError::gave_up). +/// +/// ``` +/// use regex_automata::{ +/// hybrid::dfa::{Cache, DFA}, +/// HalfMatch, MatchError, Input, +/// }; +/// +/// fn find_leftmost_first( +/// dfa: &DFA, +/// cache: &mut Cache, +/// haystack: &[u8], +/// ) -> Result, MatchError> { +/// // The start state is determined by inspecting the position and the +/// // initial bytes of the haystack. Note that start states can never +/// // be match states (since DFAs in this crate delay matches by 1 +/// // byte), so we don't need to check if the start state is a match. +/// let mut sid = dfa.start_state_forward( +/// cache, +/// &Input::new(haystack), +/// )?; +/// let mut last_match = None; +/// // Walk all the bytes in the haystack. We can quit early if we see +/// // a dead or a quit state. The former means the automaton will +/// // never transition to any other state. The latter means that the +/// // automaton entered a condition in which its search failed. +/// for (i, &b) in haystack.iter().enumerate() { +/// sid = dfa +/// .next_state(cache, sid, b) +/// .map_err(|_| MatchError::gave_up(i))?; +/// if sid.is_tagged() { +/// if sid.is_match() { +/// last_match = Some(HalfMatch::new( +/// dfa.match_pattern(cache, sid, 0), +/// i, +/// )); +/// } else if sid.is_dead() { +/// return Ok(last_match); +/// } else if sid.is_quit() { +/// // It is possible to enter into a quit state after +/// // observing a match has occurred. In that case, we +/// // should return the match instead of an error. +/// if last_match.is_some() { +/// return Ok(last_match); +/// } +/// return Err(MatchError::quit(b, i)); +/// } +/// // Implementors may also want to check for start states and +/// // handle them differently for performance reasons. But it is +/// // not necessary for correctness. Note that in order to check +/// // for start states, you'll need to enable the +/// // 'specialize_start_states' config knob, otherwise start +/// // states will not be tagged. +/// } +/// } +/// // Matches are always delayed by 1 byte, so we must explicitly walk +/// // the special "EOI" transition at the end of the search. +/// sid = dfa +/// .next_eoi_state(cache, sid) +/// .map_err(|_| MatchError::gave_up(haystack.len()))?; +/// if sid.is_match() { +/// last_match = Some(HalfMatch::new( +/// dfa.match_pattern(cache, sid, 0), +/// haystack.len(), +/// )); +/// } +/// Ok(last_match) +/// } +/// +/// // We use a greedy '+' operator to show how the search doesn't just stop +/// // once a match is detected. It continues extending the match. Using +/// // '[a-z]+?' would also work as expected and stop the search early. +/// // Greediness is built into the automaton. +/// let dfa = DFA::new(r"[a-z]+")?; +/// let mut cache = dfa.create_cache(); +/// let haystack = "123 foobar 4567".as_bytes(); +/// let mat = find_leftmost_first(&dfa, &mut cache, haystack)?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 0); +/// assert_eq!(mat.offset(), 10); +/// +/// // Here's another example that tests our handling of the special +/// // EOI transition. This will fail to find a match if we don't call +/// // 'next_eoi_state' at the end of the search since the match isn't found +/// // until the final byte in the haystack. +/// let dfa = DFA::new(r"[0-9]{4}")?; +/// let mut cache = dfa.create_cache(); +/// let haystack = "123 foobar 4567".as_bytes(); +/// let mat = find_leftmost_first(&dfa, &mut cache, haystack)?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 0); +/// assert_eq!(mat.offset(), 15); +/// +/// // And note that our search implementation above automatically works +/// // with multi-DFAs. Namely, `dfa.match_pattern(match_state, 0)` selects +/// // the appropriate pattern ID for us. +/// let dfa = DFA::new_many(&[r"[a-z]+", r"[0-9]+"])?; +/// let mut cache = dfa.create_cache(); +/// let haystack = "123 foobar 4567".as_bytes(); +/// let mat = find_leftmost_first(&dfa, &mut cache, haystack)?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 1); +/// assert_eq!(mat.offset(), 3); +/// let mat = find_leftmost_first(&dfa, &mut cache, &haystack[3..])?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 0); +/// assert_eq!(mat.offset(), 7); +/// let mat = find_leftmost_first(&dfa, &mut cache, &haystack[10..])?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 1); +/// assert_eq!(mat.offset(), 5); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive( + Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, +)] +pub struct LazyStateID(u32); + +impl LazyStateID { + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + const MAX_BIT: usize = 31; + + #[cfg(target_pointer_width = "16")] + const MAX_BIT: usize = 15; + + const MASK_UNKNOWN: usize = 1 << (LazyStateID::MAX_BIT); + const MASK_DEAD: usize = 1 << (LazyStateID::MAX_BIT - 1); + const MASK_QUIT: usize = 1 << (LazyStateID::MAX_BIT - 2); + const MASK_START: usize = 1 << (LazyStateID::MAX_BIT - 3); + const MASK_MATCH: usize = 1 << (LazyStateID::MAX_BIT - 4); + const MAX: usize = LazyStateID::MASK_MATCH - 1; + + /// Create a new lazy state ID. + /// + /// If the given identifier exceeds [`LazyStateID::MAX`], then this returns + /// an error. + #[inline] + pub(crate) fn new(id: usize) -> Result { + if id > LazyStateID::MAX { + let attempted = u64::try_from(id).unwrap(); + return Err(LazyStateIDError { attempted }); + } + Ok(LazyStateID::new_unchecked(id)) + } + + /// Create a new lazy state ID without checking whether the given value + /// exceeds [`LazyStateID::MAX`]. + /// + /// While this is unchecked, providing an incorrect value must never + /// sacrifice memory safety. + #[inline] + const fn new_unchecked(id: usize) -> LazyStateID { + // FIXME: Use as_u32() once const functions in traits are stable. + LazyStateID(id as u32) + } + + /// Return this lazy state ID as an untagged `usize`. + /// + /// If this lazy state ID is tagged, then the usize returned is the state + /// ID without the tag. If the ID was not tagged, then the usize returned + /// is equivalent to the state ID. + #[inline] + pub(crate) fn as_usize_untagged(&self) -> usize { + self.as_usize_unchecked() & LazyStateID::MAX + } + + /// Return this lazy state ID as its raw internal `usize` value, which may + /// be tagged (and thus greater than LazyStateID::MAX). + #[inline] + pub(crate) const fn as_usize_unchecked(&self) -> usize { + // FIXME: Use as_usize() once const functions in traits are stable. + self.0 as usize + } + + #[inline] + pub(crate) const fn to_unknown(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_UNKNOWN, + ) + } + + #[inline] + pub(crate) const fn to_dead(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_DEAD, + ) + } + + #[inline] + pub(crate) const fn to_quit(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_QUIT, + ) + } + + /// Return this lazy state ID as a state ID that is tagged as a start + /// state. + #[inline] + pub(crate) const fn to_start(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_START, + ) + } + + /// Return this lazy state ID as a lazy state ID that is tagged as a match + /// state. + #[inline] + pub(crate) const fn to_match(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_MATCH, + ) + } + + /// Return true if and only if this lazy state ID is tagged. + /// + /// When a lazy state ID is tagged, then one can conclude that it is one + /// of a match, start, dead, quit or unknown state. + #[inline] + pub const fn is_tagged(&self) -> bool { + self.as_usize_unchecked() > LazyStateID::MAX + } + + /// Return true if and only if this represents a lazy state ID that is + /// "unknown." That is, the state has not yet been created. When a caller + /// sees this state ID, it generally means that a state has to be computed + /// in order to proceed. + #[inline] + pub const fn is_unknown(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_UNKNOWN > 0 + } + + /// Return true if and only if this represents a dead state. A dead state + /// is a state that can never transition to any other state except the + /// dead state. When a dead state is seen, it generally indicates that a + /// search should stop. + #[inline] + pub const fn is_dead(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_DEAD > 0 + } + + /// Return true if and only if this represents a quit state. A quit state + /// is a state that is representationally equivalent to a dead state, + /// except it indicates the automaton has reached a point at which it can + /// no longer determine whether a match exists or not. In general, this + /// indicates an error during search and the caller must either pass this + /// error up or use a different search technique. + #[inline] + pub const fn is_quit(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_QUIT > 0 + } + + /// Return true if and only if this lazy state ID has been tagged as a + /// start state. + /// + /// Note that if + /// [`Config::specialize_start_states`](crate::hybrid::dfa::Config) is + /// disabled (which is the default), then this will always return false + /// since start states won't be tagged. + #[inline] + pub const fn is_start(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_START > 0 + } + + /// Return true if and only if this lazy state ID has been tagged as a + /// match state. + #[inline] + pub const fn is_match(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_MATCH > 0 + } +} + +/// This error occurs when a lazy state ID could not be constructed. +/// +/// This occurs when given an integer exceeding the maximum lazy state ID +/// value. +/// +/// When the `std` feature is enabled, this implements the `Error` trait. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct LazyStateIDError { + attempted: u64, +} + +impl LazyStateIDError { + /// Returns the value that failed to constructed a lazy state ID. + pub(crate) fn attempted(&self) -> u64 { + self.attempted + } +} + +#[cfg(feature = "std")] +impl std::error::Error for LazyStateIDError {} + +impl core::fmt::Display for LazyStateIDError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create LazyStateID from {:?}, which exceeds {:?}", + self.attempted(), + LazyStateID::MAX, + ) + } +} diff --git a/vendor/regex-automata/src/hybrid/mod.rs b/vendor/regex-automata/src/hybrid/mod.rs new file mode 100644 index 0000000..2feb839 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/mod.rs @@ -0,0 +1,144 @@ +/*! +A module for building and searching with lazy deterministic finite automata +(DFAs). + +Like other modules in this crate, lazy DFAs support a rich regex syntax with +Unicode features. The key feature of a lazy DFA is that it builds itself +incrementally during search, and never uses more than a configured capacity of +memory. Thus, when searching with a lazy DFA, one must supply a mutable "cache" +in which the actual DFA's transition table is stored. + +If you're looking for fully compiled DFAs, then please see the top-level +[`dfa` module](crate::dfa). + +# Overview + +This section gives a brief overview of the primary types in this module: + +* A [`regex::Regex`] provides a way to search for matches of a regular +expression using lazy DFAs. This includes iterating over matches with both the +start and end positions of each match. +* A [`dfa::DFA`] provides direct low level access to a lazy DFA. + +# Example: basic regex searching + +This example shows how to compile a regex using the default configuration +and then use it to find matches in a byte string: + +``` +use regex_automata::{hybrid::regex::Regex, Match}; + +let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; +let mut cache = re.create_cache(); + +let haystack = "2018-12-24 2016-10-08"; +let matches: Vec = re.find_iter(&mut cache, haystack).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +# Example: searching with multiple regexes + +The lazy DFAs in this module all fully support searching with multiple regexes +simultaneously. You can use this support with standard leftmost-first style +searching to find non-overlapping matches: + +``` +# if cfg!(miri) { return Ok(()); } // miri takes too long +use regex_automata::{hybrid::regex::Regex, Match}; + +let re = Regex::new_many(&[r"\w+", r"\S+"])?; +let mut cache = re.create_cache(); + +let haystack = "@foo bar"; +let matches: Vec = re.find_iter(&mut cache, haystack).collect(); +assert_eq!(matches, vec![ + Match::must(1, 0..4), + Match::must(0, 5..8), +]); +# Ok::<(), Box>(()) +``` + +# When should I use this? + +Generally speaking, if you can abide the use of mutable state during search, +and you don't need things like capturing groups or Unicode word boundary +support in non-ASCII text, then a lazy DFA is likely a robust choice with +respect to both search speed and memory usage. Note however that its speed +may be worse than a general purpose regex engine if you don't select a good +[prefilter](crate::util::prefilter). + +If you know ahead of time that your pattern would result in a very large DFA +if it was fully compiled, it may be better to use an NFA simulation instead +of a lazy DFA. Either that, or increase the cache capacity of your lazy DFA +to something that is big enough to hold the state machine (likely through +experimentation). The issue here is that if the cache is too small, then it +could wind up being reset too frequently and this might decrease searching +speed significantly. + +# Differences with fully compiled DFAs + +A [`hybrid::regex::Regex`](crate::hybrid::regex::Regex) and a +[`dfa::regex::Regex`](crate::dfa::regex::Regex) both have the same capabilities +(and similarly for their underlying DFAs), but they achieve them through +different means. The main difference is that a hybrid or "lazy" regex builds +its DFA lazily during search, where as a fully compiled regex will build its +DFA at construction time. While building a DFA at search time might sound like +it's slow, it tends to work out where most bytes seen during a search will +reuse pre-built parts of the DFA and thus can be almost as fast as a fully +compiled DFA. The main downside is that searching requires mutable space to +store the DFA, and, in the worst case, a search can result in a new state being +created for each byte seen, which would make searching quite a bit slower. + +A fully compiled DFA never has to worry about searches being slower once +it's built. (Aside from, say, the transition table being so large that it +is subject to harsh CPU cache effects.) However, of course, building a full +DFA can be quite time consuming and memory hungry. Particularly when large +Unicode character classes are used, which tend to translate into very large +DFAs. + +A lazy DFA strikes a nice balance _in practice_, particularly in the +presence of Unicode mode, by only building what is needed. It avoids the +worst case exponential time complexity of DFA compilation by guaranteeing that +it will only build at most one state per byte searched. While the worst +case here can lead to a very high constant, it will never be exponential. + +# Syntax + +This module supports the same syntax as the `regex` crate, since they share the +same parser. You can find an exhaustive list of supported syntax in the +[documentation for the `regex` crate](https://docs.rs/regex/1/regex/#syntax). + +There are two things that are not supported by the lazy DFAs in this module: + +* Capturing groups. The DFAs (and [`Regex`](regex::Regex)es built on top +of them) can only find the offsets of an entire match, but cannot resolve +the offsets of each capturing group. This is because DFAs do not have the +expressive power necessary. Note that it is okay to build a lazy DFA from an +NFA that contains capture groups. The capture groups will simply be ignored. +* Unicode word boundaries. These present particularly difficult challenges for +DFA construction and would result in an explosion in the number of states. +One can enable [`dfa::Config::unicode_word_boundary`] though, which provides +heuristic support for Unicode word boundaries that only works on ASCII text. +Otherwise, one can use `(?-u:\b)` for an ASCII word boundary, which will work +on any input. + +There are no plans to lift either of these limitations. + +Note that these restrictions are identical to the restrictions on fully +compiled DFAs. +*/ + +pub use self::{ + error::{BuildError, CacheError, StartError}, + id::LazyStateID, +}; + +pub mod dfa; +mod error; +mod id; +pub mod regex; +mod search; diff --git a/vendor/regex-automata/src/hybrid/regex.rs b/vendor/regex-automata/src/hybrid/regex.rs new file mode 100644 index 0000000..b3b1fe3 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/regex.rs @@ -0,0 +1,895 @@ +/*! +A lazy DFA backed `Regex`. + +This module provides a [`Regex`] backed by a lazy DFA. A `Regex` implements +convenience routines you might have come to expect, such as finding a match +and iterating over all non-overlapping matches. This `Regex` type is limited +in its capabilities to what a lazy DFA can provide. Therefore, APIs involving +capturing groups, for example, are not provided. + +Internally, a `Regex` is composed of two DFAs. One is a "forward" DFA that +finds the end offset of a match, where as the other is a "reverse" DFA that +find the start offset of a match. + +See the [parent module](crate::hybrid) for examples. +*/ + +use crate::{ + hybrid::{ + dfa::{self, DFA}, + error::BuildError, + }, + nfa::thompson, + util::{ + iter, + search::{Anchored, Input, Match, MatchError, MatchKind}, + }, +}; + +/// A regular expression that uses hybrid NFA/DFAs (also called "lazy DFAs") +/// for searching. +/// +/// A regular expression is comprised of two lazy DFAs, a "forward" DFA and a +/// "reverse" DFA. The forward DFA is responsible for detecting the end of +/// a match while the reverse DFA is responsible for detecting the start +/// of a match. Thus, in order to find the bounds of any given match, a +/// forward search must first be run followed by a reverse search. A match +/// found by the forward DFA guarantees that the reverse DFA will also find +/// a match. +/// +/// # Fallibility +/// +/// Most of the search routines defined on this type will _panic_ when the +/// underlying search fails. This might be because the DFA gave up because it +/// saw a quit byte, whether configured explicitly or via heuristic Unicode +/// word boundary support, although neither are enabled by default. It might +/// also fail if the underlying DFA determines it isn't making effective use of +/// the cache (which also never happens by default). Or it might fail because +/// an invalid `Input` configuration is given, for example, with an unsupported +/// [`Anchored`] mode. +/// +/// If you need to handle these error cases instead of allowing them to trigger +/// a panic, then the lower level [`Regex::try_search`] provides a fallible API +/// that never panics. +/// +/// # Example +/// +/// This example shows how to cause a search to terminate if it sees a +/// `\n` byte, and handle the error returned. This could be useful if, for +/// example, you wanted to prevent a user supplied pattern from matching +/// across a line boundary. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{hybrid::{dfa, regex::Regex}, Input, MatchError}; +/// +/// let re = Regex::builder() +/// .dfa(dfa::Config::new().quit(b'\n', true)) +/// .build(r"foo\p{any}+bar")?; +/// let mut cache = re.create_cache(); +/// +/// let input = Input::new("foo\nbar"); +/// // Normally this would produce a match, since \p{any} contains '\n'. +/// // But since we instructed the automaton to enter a quit state if a +/// // '\n' is observed, this produces a match error instead. +/// let expected = MatchError::quit(b'\n', 3); +/// let got = re.try_search(&mut cache, &input).unwrap_err(); +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug)] +pub struct Regex { + /// The forward lazy DFA. This can only find the end of a match. + forward: DFA, + /// The reverse lazy DFA. This can only find the start of a match. + /// + /// This is built with 'all' match semantics (instead of leftmost-first) + /// so that it always finds the longest possible match (which corresponds + /// to the leftmost starting position). It is also compiled as an anchored + /// matcher and has 'starts_for_each_pattern' enabled. Including starting + /// states for each pattern is necessary to ensure that we only look for + /// matches of a pattern that matched in the forward direction. Otherwise, + /// we might wind up finding the "leftmost" starting position of a totally + /// different pattern! + reverse: DFA, +} + +/// Convenience routines for regex and cache construction. +impl Regex { + /// Parse the given regular expression using the default configuration and + /// return the corresponding regex. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find(&mut cache, "zzzfoo12345barzzz"), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + Regex::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re = Regex::new_many(&["[a-z]+", "[0-9]+"])?; + /// let mut cache = re.create_cache(); + /// + /// let mut it = re.find_iter(&mut cache, "abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + Regex::builder().build_many(patterns) + } + + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::regex::Regex, nfa::thompson, util::syntax, Match, + /// }; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let mut cache = re.create_cache(); + /// + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// let got = re.find(&mut cache, haystack); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new cache for this `Regex`. + /// + /// The cache returned should only be used for searches for this + /// `Regex`. If you want to reuse the cache for another `Regex`, then + /// you must call [`Cache::reset`] with that `Regex` (or, equivalently, + /// [`Regex::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this `Regex` (and only this `Regex`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `Regex`. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// `Regex` has been configured to "give up" after it has cleared the cache + /// a certain number of times. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `Regex`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re1 = Regex::new(r"\w")?; + /// let re2 = Regex::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find(&mut cache, "Δ"), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the Regex we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find(&mut cache, "☃"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + self.forward().reset_cache(&mut cache.forward); + self.reverse().reset_cache(&mut cache.reverse); + } +} + +/// Standard infallible search routines for finding and iterating over matches. +impl Regex { + /// Returns true if and only if this regex matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future input + /// will never lead to a different result. In particular, if the underlying + /// DFA enters a match state or a dead state, then this routine will return + /// `true` or `false`, respectively, without inspecting any future input. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::hybrid::regex::Regex; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "foo12345bar")); + /// assert!(!re.is_match(&mut cache, "foobar")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> bool { + // Not only can we do an "earliest" search, but we can avoid doing a + // reverse scan too. + self.forward() + .try_search_fwd(&mut cache.forward, &input.into().earliest(true)) + .unwrap() + .is_some() + } + + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, hybrid::regex::Regex}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 3..11)), + /// re.find(&mut cache, "zzzfoo12345zzz"), + /// ); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the default leftmost-first match semantics demand that we find the + /// // earliest match that prefers earlier parts of the pattern over latter + /// // parts. + /// let re = Regex::new("abc|a")?; + /// let mut cache = re.create_cache(); + /// assert_eq!(Some(Match::must(0, 0..3)), re.find(&mut cache, "abc")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Option { + self.try_search(cache, &input.into()).unwrap() + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// The above conditions also apply to the iterator returned as well. For + /// example, if the lazy DFA gives up or quits during a search using this + /// method, then a panic will occur during iteration. + /// + /// Use [`Regex::try_search`] with [`util::iter::Searcher`](iter::Searcher) + /// if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(&mut cache, text).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> FindMatches<'r, 'c, 'h> { + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, cache, it } + } +} + +/// Lower level "search" primitives that accept a `&Input` for cheap reuse +/// and return an error if one occurs instead of panicking. +impl Regex { + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// This is like [`Regex::find`] but with two differences: + /// + /// 1. It is not generic over `Into` and instead accepts a + /// `&Input`. This permits reusing the same `Input` for multiple searches + /// without needing to create a new one. This _may_ help with latency. + /// 2. It returns an error if the search could not complete where as + /// [`Regex::find`] will panic. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + #[inline] + pub fn try_search( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, MatchError> { + let (fcache, rcache) = (&mut cache.forward, &mut cache.reverse); + let end = match self.forward().try_search_fwd(fcache, input)? { + None => return Ok(None), + Some(end) => end, + }; + // This special cases an empty match at the beginning of the search. If + // our end matches our start, then since a reverse DFA can't match past + // the start, it must follow that our starting position is also our end + // position. So short circuit and skip the reverse search. + if input.start() == end.offset() { + return Ok(Some(Match::new( + end.pattern(), + end.offset()..end.offset(), + ))); + } + // We can also skip the reverse search if we know our search was + // anchored. This occurs either when the input config is anchored or + // when we know the regex itself is anchored. In this case, we know the + // start of the match, if one is found, must be the start of the + // search. + if self.is_anchored(input) { + return Ok(Some(Match::new( + end.pattern(), + input.start()..end.offset(), + ))); + } + // N.B. I have tentatively convinced myself that it isn't necessary + // to specify the specific pattern for the reverse search since the + // reverse search will always find the same pattern to match as the + // forward search. But I lack a rigorous proof. Why not just provide + // the pattern anyway? Well, if it is needed, then leaving it out + // gives us a chance to find a witness. (Also, if we don't need to + // specify the pattern, then we don't need to build the reverse DFA + // with 'starts_for_each_pattern' enabled. It doesn't matter too much + // for the lazy DFA, but does make the overall DFA bigger.) + // + // We also need to be careful to disable 'earliest' for the reverse + // search, since it could be enabled for the forward search. In the + // reverse case, to satisfy "leftmost" criteria, we need to match as + // much as we can. We also need to be careful to make the search + // anchored. We don't want the reverse search to report any matches + // other than the one beginning at the end of our forward search. + let revsearch = input + .clone() + .span(input.start()..end.offset()) + .anchored(Anchored::Yes) + .earliest(false); + let start = self + .reverse() + .try_search_rev(rcache, &revsearch)? + .expect("reverse search must match if forward search does"); + debug_assert_eq!( + start.pattern(), + end.pattern(), + "forward and reverse search must match same pattern", + ); + debug_assert!(start.offset() <= end.offset()); + Ok(Some(Match::new(end.pattern(), start.offset()..end.offset()))) + } + + /// Returns true if either the given input specifies an anchored search + /// or if the underlying NFA is always anchored. + fn is_anchored(&self, input: &Input<'_>) -> bool { + match input.get_anchored() { + Anchored::No => { + self.forward().get_nfa().is_always_start_anchored() + } + Anchored::Yes | Anchored::Pattern(_) => true, + } + } +} + +/// Non-search APIs for querying information about the regex and setting a +/// prefilter. +impl Regex { + /// Return the underlying lazy DFA responsible for forward matching. + /// + /// This is useful for accessing the underlying lazy DFA and using it + /// directly if the situation calls for it. + pub fn forward(&self) -> &DFA { + &self.forward + } + + /// Return the underlying lazy DFA responsible for reverse matching. + /// + /// This is useful for accessing the underlying lazy DFA and using it + /// directly if the situation calls for it. + pub fn reverse(&self) -> &DFA { + &self.reverse + } + + /// Returns the total number of patterns matched by this regex. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::hybrid::regex::Regex; + /// + /// let re = Regex::new_many(&[r"[a-z]+", r"[0-9]+", r"\w+"])?; + /// assert_eq!(3, re.pattern_len()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + assert_eq!(self.forward().pattern_len(), self.reverse().pattern_len()); + self.forward().pattern_len() + } +} + +/// An iterator over all non-overlapping matches for an infallible search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// If the underlying regex engine returns an error, then a panic occurs. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the regex object. +/// * `'h` represents the lifetime of the haystack being searched. +/// * `'c` represents the lifetime of the regex cache. +/// +/// This iterator can be created with the [`Regex::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'c, 'h> { + re: &'r Regex, + cache: &'c mut Cache, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + let FindMatches { re, ref mut cache, ref mut it } = *self; + it.advance(|input| re.try_search(cache, input)) + } +} + +/// A cache represents a partially computed forward and reverse DFA. +/// +/// A cache is the key component that differentiates a classical DFA and a +/// hybrid NFA/DFA (also called a "lazy DFA"). Where a classical DFA builds a +/// complete transition table that can handle all possible inputs, a hybrid +/// NFA/DFA starts with an empty transition table and builds only the parts +/// required during search. The parts that are built are stored in a cache. For +/// this reason, a cache is a required parameter for nearly every operation on +/// a [`Regex`]. +/// +/// Caches can be created from their corresponding `Regex` via +/// [`Regex::create_cache`]. A cache can only be used with either the `Regex` +/// that created it, or the `Regex` that was most recently used to reset it +/// with [`Cache::reset`]. Using a cache with any other `Regex` may result in +/// panics or incorrect results. +#[derive(Debug, Clone)] +pub struct Cache { + forward: dfa::Cache, + reverse: dfa::Cache, +} + +impl Cache { + /// Create a new cache for the given `Regex`. + /// + /// The cache returned should only be used for searches for the given + /// `Regex`. If you want to reuse the cache for another `Regex`, then you + /// must call [`Cache::reset`] with that `Regex`. + pub fn new(re: &Regex) -> Cache { + let forward = dfa::Cache::new(re.forward()); + let reverse = dfa::Cache::new(re.reverse()); + Cache { forward, reverse } + } + + /// Reset this cache such that it can be used for searching with the given + /// `Regex` (and only that `Regex`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `Regex`. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// `Regex` has been configured to "give up" after it has cleared the cache + /// a certain number of times. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `Regex`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re1 = Regex::new(r"\w")?; + /// let re2 = Regex::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find(&mut cache, "Δ"), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the Regex we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find(&mut cache, "☃"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &Regex) { + self.forward.reset(re.forward()); + self.reverse.reset(re.reverse()); + } + + /// Return a reference to the forward cache. + pub fn forward(&mut self) -> &dfa::Cache { + &self.forward + } + + /// Return a reference to the reverse cache. + pub fn reverse(&mut self) -> &dfa::Cache { + &self.reverse + } + + /// Return a mutable reference to the forward cache. + /// + /// If you need mutable references to both the forward and reverse caches, + /// then use [`Cache::as_parts_mut`]. + pub fn forward_mut(&mut self) -> &mut dfa::Cache { + &mut self.forward + } + + /// Return a mutable reference to the reverse cache. + /// + /// If you need mutable references to both the forward and reverse caches, + /// then use [`Cache::as_parts_mut`]. + pub fn reverse_mut(&mut self) -> &mut dfa::Cache { + &mut self.reverse + } + + /// Return references to the forward and reverse caches, respectively. + pub fn as_parts(&self) -> (&dfa::Cache, &dfa::Cache) { + (&self.forward, &self.reverse) + } + + /// Return mutable references to the forward and reverse caches, + /// respectively. + pub fn as_parts_mut(&mut self) -> (&mut dfa::Cache, &mut dfa::Cache) { + (&mut self.forward, &mut self.reverse) + } + + /// Returns the heap memory usage, in bytes, as a sum of the forward and + /// reverse lazy DFA caches. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.forward.memory_usage() + self.reverse.memory_usage() + } +} + +/// A builder for a regex based on a hybrid NFA/DFA. +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction, the lazy DFA construction and finally the regex searching +/// itself. This builder is different from a general purpose regex builder +/// in that it permits fine grain configuration of the construction process. +/// The trade off for this is complexity, and the possibility of setting a +/// configuration that might not make sense. For example, there are two +/// different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`] controls how the regex iterators themselves +/// advance the starting position of the next search when a match with zero +/// length is found. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// Internally, building a regex requires building two hybrid NFA/DFAs, +/// where one is responsible for finding the end of a match and the other is +/// responsible for finding the start of a match. If you only need to detect +/// whether something matched, or only the end of a match, then you should use +/// a [`dfa::Builder`] to construct a single hybrid NFA/DFA, which is cheaper +/// than building two of them. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// hybrid::regex::Regex, nfa::thompson, util::syntax, Match, +/// }; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let mut cache = re.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Match::must(0, 1..9)); +/// let got = re.find(&mut cache, haystack); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + dfa: dfa::Builder, +} + +impl Builder { + /// Create a new regex builder with the default configuration. + pub fn new() -> Builder { + Builder { dfa: DFA::builder() } + } + + /// Build a regex from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a regex from the given patterns. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let forward = self.dfa.build_many(patterns)?; + let reverse = self + .dfa + .clone() + .configure( + DFA::config() + .prefilter(None) + .specialize_start_states(false) + .match_kind(MatchKind::All), + ) + .thompson(thompson::Config::new().reverse(true)) + .build_many(patterns)?; + Ok(self.build_from_dfas(forward, reverse)) + } + + /// Build a regex from its component forward and reverse hybrid NFA/DFAs. + /// + /// This is useful when you've built a forward and reverse lazy DFA + /// separately, and want to combine them into a single regex. Once build, + /// the individual DFAs given can still be accessed via [`Regex::forward`] + /// and [`Regex::reverse`]. + /// + /// It is important that the reverse lazy DFA be compiled under the + /// following conditions: + /// + /// * It should use [`MatchKind::All`] semantics. + /// * It should match in reverse. + /// * Otherwise, its configuration should match the forward DFA. + /// + /// If these conditions aren't satisfied, then the behavior of searches is + /// unspecified. + /// + /// Note that when using this constructor, no configuration is applied. + /// Since this routine provides the DFAs to the builder, there is no + /// opportunity to apply other configuration options. + /// + /// # Example + /// + /// This shows how to build individual lazy forward and reverse DFAs, and + /// then combine them into a single `Regex`. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::{dfa::DFA, regex::Regex}, + /// nfa::thompson, + /// MatchKind, + /// }; + /// + /// let fwd = DFA::new(r"foo[0-9]+")?; + /// let rev = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true)) + /// .build(r"foo[0-9]+")?; + /// + /// let re = Regex::builder().build_from_dfas(fwd, rev); + /// let mut cache = re.create_cache(); + /// assert_eq!(true, re.is_match(&mut cache, "foo123")); + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_dfas(&self, forward: DFA, reverse: DFA) -> Regex { + Regex { forward, reverse } + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.dfa.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](thompson::Config). + /// + /// This permits setting things like whether additional time should be + /// spent shrinking the size of the NFA. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.dfa.thompson(config); + self + } + + /// Set the lazy DFA compilation configuration for this builder using + /// [`dfa::Config`]. + /// + /// This permits setting things like whether Unicode word boundaries should + /// be heuristically supported or settings how the behavior of the cache. + pub fn dfa(&mut self, config: dfa::Config) -> &mut Builder { + self.dfa.configure(config); + self + } +} + +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} diff --git a/vendor/regex-automata/src/hybrid/search.rs b/vendor/regex-automata/src/hybrid/search.rs new file mode 100644 index 0000000..1f4a505 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/search.rs @@ -0,0 +1,802 @@ +use crate::{ + hybrid::{ + dfa::{Cache, OverlappingState, DFA}, + id::LazyStateID, + }, + util::{ + prefilter::Prefilter, + search::{HalfMatch, Input, MatchError, Span}, + }, +}; + +#[inline(never)] +pub(crate) fn find_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_config().get_prefilter() + }; + // So what we do here is specialize four different versions of 'find_fwd': + // one for each of the combinations for 'has prefilter' and 'is earliest + // search'. The reason for doing this is that both of these things require + // branches and special handling in some code that can be very hot, + // and shaving off as much as we can when we don't need it tends to be + // beneficial in ad hoc benchmarks. To see these differences, you often + // need a query with a high match count. In other words, specializing these + // four routines *tends* to help latency more than throughput. + if pre.is_some() { + if input.get_earliest() { + find_fwd_imp(dfa, cache, input, pre, true) + } else { + find_fwd_imp(dfa, cache, input, pre, false) + } + } else { + if input.get_earliest() { + find_fwd_imp(dfa, cache, input, None, true) + } else { + find_fwd_imp(dfa, cache, input, None, false) + } + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_fwd_imp( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + earliest: bool, +) -> Result, MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); + let mut mat = None; + let mut sid = init_fwd(dfa, cache, input)?; + let mut at = input.start(); + // This could just be a closure, but then I think it would be unsound + // because it would need to be safe to invoke. This way, the lack of safety + // is clearer in the code below. + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_untagged_unchecked(cache, $sid, byte) + }}; + } + + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(mat), + Some(ref span) => { + at = span.start; + if !universal_start { + sid = prefilter_restart(dfa, cache, &input, at)?; + } + } + } + } + cache.search_start(at); + while at < input.end() { + if sid.is_tagged() { + cache.search_update(at); + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } else { + // SAFETY: There are two safety invariants we need to uphold + // here in the loops below: that 'sid' and 'prev_sid' are valid + // state IDs for this DFA, and that 'at' is a valid index into + // 'haystack'. For the former, we rely on the invariant that + // next_state* and start_state_forward always returns a valid state + // ID (given a valid state ID in the former case), and that we are + // only at this place in the code if 'sid' is untagged. Moreover, + // every call to next_state_untagged_unchecked below is guarded by + // a check that sid is untagged. For the latter safety invariant, + // we always guard unchecked access with a check that 'at' is less + // than 'end', where 'end <= haystack.len()'. In the unrolled loop + // below, we ensure that 'at' is always in bounds. + // + // PERF: For justification of omitting bounds checks, it gives us a + // ~10% bump in search time. This was used for a benchmark: + // + // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile + // + // PERF: For justification for the loop unrolling, we use a few + // different tests: + // + // regex-cli find half hybrid -p '\w{50}' -UBb bigfile + // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile + // regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile + // + // And there are three different configurations: + // + // nounroll: this entire 'else' block vanishes and we just + // always use 'dfa.next_state(..)'. + // unroll1: just the outer loop below + // unroll2: just the inner loop below + // unroll3: both the outer and inner loops below + // + // This results in a matrix of timings for each of the above + // regexes with each of the above unrolling configurations: + // + // '\w{50}' '(?m)^.+$' 'ZQZQZQZQ' + // nounroll 1.51s 2.34s 1.51s + // unroll1 1.53s 2.32s 1.56s + // unroll2 2.22s 1.50s 0.61s + // unroll3 1.67s 1.45s 0.61s + // + // Ideally we'd be able to find a configuration that yields the + // best time for all regexes, but alas we settle for unroll3 that + // gives us *almost* the best for '\w{50}' and the best for the + // other two regexes. + // + // So what exactly is going on here? The first unrolling (grouping + // together runs of untagged transitions) specifically targets + // our choice of representation. The second unrolling (grouping + // together runs of self-transitions) specifically targets a common + // DFA topology. Let's dig in a little bit by looking at our + // regexes: + // + // '\w{50}': This regex spends a lot of time outside of the DFA's + // start state matching some part of the '\w' repetition. This + // means that it's a bit of a worst case for loop unrolling that + // targets self-transitions since the self-transitions in '\w{50}' + // are not particularly active for this haystack. However, the + // first unrolling (grouping together untagged transitions) + // does apply quite well here since very few transitions hit + // match/dead/quit/unknown states. It is however worth mentioning + // that if start states are configured to be tagged (which you + // typically want to do if you have a prefilter), then this regex + // actually slows way down because it is constantly ping-ponging + // out of the unrolled loop and into the handling of a tagged start + // state below. But when start states aren't tagged, the unrolled + // loop stays hot. (This is why it's imperative that start state + // tagging be disabled when there isn't a prefilter!) + // + // '(?m)^.+$': There are two important aspects of this regex: 1) + // on this haystack, its match count is very high, much higher + // than the other two regex and 2) it spends the vast majority + // of its time matching '.+'. Since Unicode mode is disabled, + // this corresponds to repeatedly following self transitions for + // the vast majority of the input. This does benefit from the + // untagged unrolling since most of the transitions will be to + // untagged states, but the untagged unrolling does more work than + // what is actually required. Namely, it has to keep track of the + // previous and next state IDs, which I guess requires a bit more + // shuffling. This is supported by the fact that nounroll+unroll1 + // are both slower than unroll2+unroll3, where the latter has a + // loop unrolling that specifically targets self-transitions. + // + // 'ZQZQZQZQ': This one is very similar to '(?m)^.+$' because it + // spends the vast majority of its time in self-transitions for + // the (implicit) unanchored prefix. The main difference with + // '(?m)^.+$' is that it has a much lower match count. So there + // isn't much time spent in the overhead of reporting matches. This + // is the primary explainer in the perf difference here. We include + // this regex and the former to make sure we have comparison points + // with high and low match counts. + // + // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. + // + // NOTE: In a follow-up, it turns out that the "inner" loop + // mentioned above was a pretty big pessimization in some other + // cases. Namely, it resulted in too much ping-ponging into and out + // of the loop, which resulted in nearly ~2x regressions in search + // time when compared to the originally lazy DFA in the regex crate. + // So I've removed the second loop unrolling that targets the + // self-transition case. + let mut prev_sid = sid; + while at < input.end() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() || at + 3 >= input.end() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at += 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at += 1; + } + // If we quit out of the code above with an unknown state ID at + // any point, then we need to re-compute that transition using + // 'next_state', which will do NFA powerset construction for us. + if sid.is_unknown() { + cache.search_update(at); + sid = dfa + .next_state(cache, prev_sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } + } + if sid.is_tagged() { + if sid.is_start() { + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => { + cache.search_finish(span.end); + return Ok(mat); + } + Some(ref span) => { + // We want to skip any update to 'at' below + // at the end of this iteration and just + // jump immediately back to the next state + // transition at the leading position of the + // candidate match. + // + // ... but only if we actually made progress + // with our prefilter, otherwise if the start + // state has a self-loop, we can get stuck. + if span.start > at { + at = span.start; + if !universal_start { + sid = prefilter_restart( + dfa, cache, &input, at, + )?; + } + continue; + } + } + } + } + } else if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + // Since slice ranges are inclusive at the beginning and + // exclusive at the end, and since forward searches report + // the end, we can return 'at' as-is. This only works because + // matches are delayed by 1 byte. So by the time we observe a + // match, 'at' has already been set to 1 byte past the actual + // match location, which is precisely the exclusive ending + // bound of the match. + mat = Some(HalfMatch::new(pattern, at)); + if earliest { + cache.search_finish(at); + return Ok(mat); + } + } else if sid.is_dead() { + cache.search_finish(at); + return Ok(mat); + } else if sid.is_quit() { + cache.search_finish(at); + return Err(MatchError::quit(input.haystack()[at], at)); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + at += 1; + } + eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?; + cache.search_finish(input.end()); + Ok(mat) +} + +#[inline(never)] +pub(crate) fn find_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + if input.get_earliest() { + find_rev_imp(dfa, cache, input, true) + } else { + find_rev_imp(dfa, cache, input, false) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_rev_imp( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + earliest: bool, +) -> Result, MatchError> { + let mut mat = None; + let mut sid = init_rev(dfa, cache, input)?; + // In reverse search, the loop below can't handle the case of searching an + // empty slice. Ideally we could write something congruent to the forward + // search, i.e., 'while at >= start', but 'start' might be 0. Since we use + // an unsigned offset, 'at >= 0' is trivially always true. We could avoid + // this extra case handling by using a signed offset, but Rust makes it + // annoying to do. So... We just handle the empty case separately. + if input.start() == input.end() { + eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + return Ok(mat); + } + + let mut at = input.end() - 1; + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_untagged_unchecked(cache, $sid, byte) + }}; + } + cache.search_start(at); + loop { + if sid.is_tagged() { + cache.search_update(at); + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } else { + // SAFETY: See comments in 'find_fwd' for a safety argument. + // + // PERF: The comments in 'find_fwd' also provide a justification + // from a performance perspective as to 1) why we elide bounds + // checks and 2) why we do a specialized version of unrolling + // below. The reverse search does have a slightly different + // consideration in that most reverse searches tend to be + // anchored and on shorter haystacks. However, this still makes a + // difference. Take this command for example: + // + // regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile + // + // (Notice that we use 'find hybrid regex', not 'find hybrid dfa' + // like in the justification for the forward direction. The 'regex' + // sub-command will find start-of-match and thus run the reverse + // direction.) + // + // Without unrolling below, the above command takes around 3.76s. + // But with the unrolling below, we get down to 2.55s. If we keep + // the unrolling but add in bounds checks, then we get 2.86s. + // + // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. + let mut prev_sid = sid; + while at >= input.start() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() + || at <= input.start().saturating_add(3) + { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at -= 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at -= 1; + } + // If we quit out of the code above with an unknown state ID at + // any point, then we need to re-compute that transition using + // 'next_state', which will do NFA powerset construction for us. + if sid.is_unknown() { + cache.search_update(at); + sid = dfa + .next_state(cache, prev_sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } + } + if sid.is_tagged() { + if sid.is_start() { + // do nothing + } else if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + // Since reverse searches report the beginning of a match + // and the beginning is inclusive (not exclusive like the + // end of a match), we add 1 to make it inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + if earliest { + cache.search_finish(at); + return Ok(mat); + } + } else if sid.is_dead() { + cache.search_finish(at); + return Ok(mat); + } else if sid.is_quit() { + cache.search_finish(at); + return Err(MatchError::quit(input.haystack()[at], at)); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + if at == input.start() { + break; + } + at -= 1; + } + cache.search_finish(input.start()); + eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + Ok(mat) +} + +#[inline(never)] +pub(crate) fn find_overlapping_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_config().get_prefilter() + }; + if pre.is_some() { + find_overlapping_fwd_imp(dfa, cache, input, pre, state) + } else { + find_overlapping_fwd_imp(dfa, cache, input, None, state) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_overlapping_fwd_imp( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); + let mut sid = match state.id { + None => { + state.at = input.start(); + init_fwd(dfa, cache, input)? + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(cache, sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(cache, sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need to + // advance the search to the next position. + state.at += 1; + if state.at > input.end() { + return Ok(()); + } + sid + } + }; + + // NOTE: We don't optimize the crap out of this routine primarily because + // it seems like most overlapping searches will have higher match counts, + // and thus, throughput is perhaps not as important. But if you have a use + // case for something faster, feel free to file an issue. + cache.search_start(state.at); + while state.at < input.end() { + sid = dfa + .next_state(cache, sid, input.haystack()[state.at]) + .map_err(|_| gave_up(state.at))?; + if sid.is_tagged() { + state.id = Some(sid); + if sid.is_start() { + if let Some(ref pre) = pre { + let span = Span::from(state.at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(()), + Some(ref span) => { + if span.start > state.at { + state.at = span.start; + if !universal_start { + sid = prefilter_restart( + dfa, cache, &input, state.at, + )?; + } + continue; + } + } + } + } + } else if sid.is_match() { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(cache, sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at)); + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_dead() { + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_quit() { + cache.search_finish(state.at); + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + state.at += 1; + cache.search_update(state.at); + } + + let result = eoi_fwd(dfa, cache, input, &mut sid, &mut state.mat); + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + cache.search_finish(input.end()); + result +} + +#[inline(never)] +pub(crate) fn find_overlapping_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let mut sid = match state.id { + None => { + let sid = init_rev(dfa, cache, input)?; + state.id = Some(sid); + if input.start() == input.end() { + state.rev_eoi = true; + } else { + state.at = input.end() - 1; + } + sid + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(cache, sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(cache, sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need + // to advance the search to the next position. However, if we've + // already followed the EOI transition, then we know we're done + // with the search and there cannot be any more matches to report. + if state.rev_eoi { + return Ok(()); + } else if state.at == input.start() { + // At this point, we should follow the EOI transition. This + // will cause us the skip the main loop below and fall through + // to the final 'eoi_rev' transition. + state.rev_eoi = true; + } else { + // We haven't hit the end of the search yet, so move on. + state.at -= 1; + } + sid + } + }; + cache.search_start(state.at); + while !state.rev_eoi { + sid = dfa + .next_state(cache, sid, input.haystack()[state.at]) + .map_err(|_| gave_up(state.at))?; + if sid.is_tagged() { + state.id = Some(sid); + if sid.is_start() { + // do nothing + } else if sid.is_match() { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(cache, sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at + 1)); + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_dead() { + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_quit() { + cache.search_finish(state.at); + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + if state.at == input.start() { + break; + } + state.at -= 1; + cache.search_update(state.at); + } + + let result = eoi_rev(dfa, cache, input, &mut sid, &mut state.mat); + state.rev_eoi = true; + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + cache.search_finish(input.start()); + result +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_forward(cache, input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!sid.is_match()); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_reverse(cache, input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!sid.is_match()); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + sid: &mut LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = + dfa.next_state(cache, *sid, b).map_err(|_| gave_up(sp.end))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if sid.is_quit() { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa + .next_eoi_state(cache, *sid) + .map_err(|_| gave_up(input.haystack().len()))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + } + Ok(()) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + sid: &mut LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa + .next_state(cache, *sid, byte) + .map_err(|_| gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if sid.is_quit() { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = + dfa.next_eoi_state(cache, *sid).map_err(|_| gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + Ok(()) +} + +/// Re-compute the starting state that a DFA should be in after finding a +/// prefilter candidate match at the position `at`. +/// +/// It is always correct to call this, but not always necessary. Namely, +/// whenever the DFA has a universal start state, the DFA can remain in the +/// start state that it was in when it ran the prefilter. Why? Because in that +/// case, there is only one start state. +/// +/// When does a DFA have a universal start state? In precisely cases where +/// it has no look-around assertions in its prefix. So for example, `\bfoo` +/// does not have a universal start state because the start state depends on +/// whether the byte immediately before the start position is a word byte or +/// not. However, `foo\b` does have a universal start state because the word +/// boundary does not appear in the pattern's prefix. +/// +/// So... most cases don't need this, but when a pattern doesn't have a +/// universal start state, then after a prefilter candidate has been found, the +/// current state *must* be re-litigated as if computing the start state at the +/// beginning of the search because it might change. That is, not all start +/// states are created equal. +/// +/// Why avoid it? Because while it's not super expensive, it isn't a trivial +/// operation to compute the start state. It is much better to avoid it and +/// just state in the current state if you know it to be correct. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn prefilter_restart( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + at: usize, +) -> Result { + let mut input = input.clone(); + input.set_start(at); + init_fwd(dfa, cache, &input) +} + +/// A convenience routine for constructing a "gave up" match error. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn gave_up(offset: usize) -> MatchError { + MatchError::gave_up(offset) +} diff --git a/vendor/regex-automata/src/lib.rs b/vendor/regex-automata/src/lib.rs new file mode 100644 index 0000000..62260a5 --- /dev/null +++ b/vendor/regex-automata/src/lib.rs @@ -0,0 +1,648 @@ +/*! +This crate exposes a variety of regex engines used by the `regex` crate. +It provides a vast, sprawling and "expert" level API to each regex engine. +The regex engines provided by this crate focus heavily on finite automata +implementations and specifically guarantee worst case `O(m * n)` time +complexity for all searches. (Where `m ~ len(regex)` and `n ~ len(haystack)`.) + +The primary goal of this crate is to serve as an implementation detail for the +`regex` crate. A secondary goal is to make its internals available for use by +others. + +# Table of contents + +* [Should I be using this crate?](#should-i-be-using-this-crate) gives some +reasons for and against using this crate. +* [Examples](#examples) provides a small selection of things you can do with +this crate. +* [Available regex engines](#available-regex-engines) provides a hyperlinked +list of all regex engines in this crate. +* [API themes](#api-themes) discusses common elements used throughout this +crate. +* [Crate features](#crate-features) documents the extensive list of Cargo +features available. + +# Should I be using this crate? + +If you find yourself here because you just want to use regexes, then you should +first check out whether the [`regex` crate](https://docs.rs/regex) meets +your needs. It provides a streamlined and difficult-to-misuse API for regex +searching. + +If you're here because there is something specific you want to do that can't +be easily done with `regex` crate, then you are perhaps in the right place. +It's most likely that the first stop you'll want to make is to explore the +[`meta` regex APIs](meta). Namely, the `regex` crate is just a light wrapper +over a [`meta::Regex`], so its API will probably be the easiest to transition +to. In contrast to the `regex` crate, the `meta::Regex` API supports more +search parameters and does multi-pattern searches. However, it isn't quite as +ergonomic. + +Otherwise, the following is an inexhaustive list of reasons to use this crate: + +* You want to analyze or use a [Thompson `NFA`](nfa::thompson::NFA) directly. +* You want more powerful multi-pattern search than what is provided by +`RegexSet` in the `regex` crate. All regex engines in this crate support +multi-pattern searches. +* You want to use one of the `regex` crate's internal engines directly because +of some interesting configuration that isn't possible via the `regex` crate. +For example, a [lazy DFA's configuration](hybrid::dfa::Config) exposes a +dizzying number of options for controlling its execution. +* You want to use the lower level search APIs. For example, both the [lazy +DFA](hybrid::dfa) and [fully compiled DFAs](dfa) support searching by exploring +the automaton one state at a time. This might be useful, for example, for +stream searches or searches of strings stored in non-contiguous in memory. +* You want to build a fully compiled DFA and then [use zero-copy +deserialization](dfa::dense::DFA::from_bytes) to load it into memory and use +it for searching. This use case is supported in core-only no-std/no-alloc +environments. +* You want to run [anchored searches](Input::anchored) without using the `^` +anchor in your regex pattern. +* You need to work-around contention issues with +sharing a regex across multiple threads. The +[`meta::Regex::search_with`](meta::Regex::search_with) API permits bypassing +any kind of synchronization at all by requiring the caller to provide the +mutable scratch spaced needed during a search. +* You want to build your own regex engine on top of the `regex` crate's +infrastructure. + +# Examples + +This section tries to identify a few interesting things you can do with this +crate and demonstrates them. + +### Multi-pattern searches with capture groups + +One of the more frustrating limitations of `RegexSet` in the `regex` crate +(at the time of writing) is that it doesn't report match positions. With this +crate, multi-pattern support was intentionally designed in from the beginning, +which means it works in all regex engines and even for capture groups as well. + +This example shows how to search for matches of multiple regexes, where each +regex uses the same capture group names to parse different key-value formats. + +``` +use regex_automata::{meta::Regex, PatternID}; + +let re = Regex::new_many(&[ + r#"(?m)^(?[[:word:]]+)=(?[[:word:]]+)$"#, + r#"(?m)^(?[[:word:]]+)="(?[^"]+)"$"#, + r#"(?m)^(?[[:word:]]+)='(?[^']+)'$"#, + r#"(?m)^(?[[:word:]]+):\s*(?[[:word:]]+)$"#, +])?; +let hay = r#" +best_album="Blow Your Face Out" +best_quote='"then as it was, then again it will be"' +best_year=1973 +best_simpsons_episode: HOMR +"#; +let mut kvs = vec![]; +for caps in re.captures_iter(hay) { + // N.B. One could use capture indices '1' and '2' here + // as well. Capture indices are local to each pattern. + // (Just like names are.) + let key = &hay[caps.get_group_by_name("key").unwrap()]; + let val = &hay[caps.get_group_by_name("val").unwrap()]; + kvs.push((key, val)); +} +assert_eq!(kvs, vec![ + ("best_album", "Blow Your Face Out"), + ("best_quote", "\"then as it was, then again it will be\""), + ("best_year", "1973"), + ("best_simpsons_episode", "HOMR"), +]); + +# Ok::<(), Box>(()) +``` + +### Build a full DFA and walk it manually + +One of the regex engines in this crate is a fully compiled DFA. It takes worst +case exponential time to build, but once built, it can be easily explored and +used for searches. Here's a simple example that uses its lower level APIs to +implement a simple anchored search by hand. + +``` +use regex_automata::{dfa::{Automaton, dense}, Input}; + +let dfa = dense::DFA::new(r"(?-u)\b[A-Z]\w+z\b")?; +let haystack = "Quartz"; + +// The start state is determined by inspecting the position and the +// initial bytes of the haystack. +let mut state = dfa.start_state_forward(&Input::new(haystack))?; +// Walk all the bytes in the haystack. +for &b in haystack.as_bytes().iter() { + state = dfa.next_state(state, b); +} +// DFAs in this crate require an explicit +// end-of-input transition if a search reaches +// the end of a haystack. +state = dfa.next_eoi_state(state); +assert!(dfa.is_match_state(state)); + +# Ok::<(), Box>(()) +``` + +Or do the same with a lazy DFA that avoids exponential worst case compile time, +but requires mutable scratch space to lazily build the DFA during the search. + +``` +use regex_automata::{hybrid::dfa::DFA, Input}; + +let dfa = DFA::new(r"(?-u)\b[A-Z]\w+z\b")?; +let mut cache = dfa.create_cache(); +let hay = "Quartz"; + +// The start state is determined by inspecting the position and the +// initial bytes of the haystack. +let mut state = dfa.start_state_forward(&mut cache, &Input::new(hay))?; +// Walk all the bytes in the haystack. +for &b in hay.as_bytes().iter() { + state = dfa.next_state(&mut cache, state, b)?; +} +// DFAs in this crate require an explicit +// end-of-input transition if a search reaches +// the end of a haystack. +state = dfa.next_eoi_state(&mut cache, state)?; +assert!(state.is_match()); + +# Ok::<(), Box>(()) +``` + +### Find all overlapping matches + +This example shows how to build a DFA and use it to find all possible matches, +including overlapping matches. A similar example will work with a lazy DFA as +well. This also works with multiple patterns and will report all matches at the +same position where multiple patterns match. + +``` +use regex_automata::{ + dfa::{dense, Automaton, OverlappingState}, + Input, MatchKind, +}; + +let dfa = dense::DFA::builder() + .configure(dense::DFA::config().match_kind(MatchKind::All)) + .build(r"(?-u)\w{3,}")?; +let input = Input::new("homer marge bart lisa maggie"); +let mut state = OverlappingState::start(); + +let mut matches = vec![]; +while let Some(hm) = { + dfa.try_search_overlapping_fwd(&input, &mut state)?; + state.get_match() +} { + matches.push(hm.offset()); +} +assert_eq!(matches, vec![ + 3, 4, 5, // hom, home, homer + 9, 10, 11, // mar, marg, marge + 15, 16, // bar, bart + 20, 21, // lis, lisa + 25, 26, 27, 28, // mag, magg, maggi, maggie +]); + +# Ok::<(), Box>(()) +``` + +# Available regex engines + +The following is a complete list of all regex engines provided by this crate, +along with a very brief description of it and why you might want to use it. + +* [`dfa::regex::Regex`] is a regex engine that works on top of either +[dense](dfa::dense) or [sparse](dfa::sparse) fully compiled DFAs. You might +use a DFA if you need the fastest possible regex engine in this crate and can +afford the exorbitant memory usage usually required by DFAs. Low level APIs on +fully compiled DFAs are provided by the [`Automaton` trait](dfa::Automaton). +Fully compiled dense DFAs can handle all regexes except for searching a regex +with a Unicode word boundary on non-ASCII haystacks. A fully compiled DFA based +regex can only report the start and end of each match. +* [`hybrid::regex::Regex`] is a regex engine that works on top of a lazily +built DFA. Its performance profile is very similar to that of fully compiled +DFAs, but can be slower in some pathological cases. Fully compiled DFAs are +also amenable to more optimizations, such as state acceleration, that aren't +available in a lazy DFA. You might use this lazy DFA if you can't abide the +worst case exponential compile time of a full DFA, but still want the DFA +search performance in the vast majority of cases. A lazy DFA based regex can +only report the start and end of each match. +* [`dfa::onepass::DFA`] is a regex engine that is implemented as a DFA, but +can report the matches of each capture group in addition to the start and end +of each match. The catch is that it only works on a somewhat small subset of +regexes known as "one-pass." You'll want to use this for cases when you need +capture group matches and the regex is one-pass since it is likely to be faster +than any alternative. A one-pass DFA can handle all types of regexes, but does +have some reasonable limits on the number of capture groups it can handle. +* [`nfa::thompson::backtrack::BoundedBacktracker`] is a regex engine that uses +backtracking, but keeps track of the work it has done to avoid catastrophic +backtracking. Like the one-pass DFA, it provides the matches of each capture +group. It retains the `O(m * n)` worst case time bound. This tends to be slower +than the one-pass DFA regex engine, but faster than the PikeVM. It can handle +all types of regexes, but usually only works well with small haystacks and +small regexes due to the memory required to avoid redoing work. +* [`nfa::thompson::pikevm::PikeVM`] is a regex engine that can handle all +regexes, of all sizes and provides capture group matches. It tends to be a tool +of last resort because it is also usually the slowest regex engine. +* [`meta::Regex`] is the meta regex engine that combines *all* of the above +engines into one. The reason for this is that each of the engines above have +their own caveats such as, "only handles a subset of regexes" or "is generally +slow." The meta regex engine accounts for all of these caveats and composes +the engines in a way that attempts to mitigate each engine's weaknesses while +emphasizing its strengths. For example, it will attempt to run a lazy DFA even +if it might fail. In which case, it will restart the search with a likely +slower but more capable regex engine. The meta regex engine is what you should +default to. Use one of the above engines directly only if you have a specific +reason to. + +# API themes + +While each regex engine has its own APIs and configuration options, there are +some general themes followed by all of them. + +### The `Input` abstraction + +Most search routines in this crate accept anything that implements +`Into`. Both `&str` and `&[u8]` haystacks satisfy this constraint, which +means that things like `engine.search("foo")` will work as you would expect. + +By virtue of accepting an `Into` though, callers can provide more than +just a haystack. Indeed, the [`Input`] type has more details, but briefly, +callers can use it to configure various aspects of the search: + +* The span of the haystack to search via [`Input::span`] or [`Input::range`], +which might be a substring of the haystack. +* Whether to run an anchored search or not via [`Input::anchored`]. This +permits one to require matches to start at the same offset that the search +started. +* Whether to ask the regex engine to stop as soon as a match is seen via +[`Input::earliest`]. This can be used to find the offset of a match as soon +as it is known without waiting for the full leftmost-first match to be found. +This can also be used to avoid the worst case `O(m * n^2)` time complexity +of iteration. + +Some lower level search routines accept an `&Input` for performance reasons. +In which case, `&Input::new("haystack")` can be used for a simple search. + +### Error reporting + +Most, but not all, regex engines in this crate can fail to execute a search. +When a search fails, callers cannot determine whether or not a match exists. +That is, the result is indeterminate. + +Search failure, in all cases in this crate, is represented by a [`MatchError`]. +Routines that can fail start with the `try_` prefix in their name. For example, +[`hybrid::regex::Regex::try_search`] can fail for a number of reasons. +Conversely, routines that either can't fail or can panic on failure lack the +`try_` prefix. For example, [`hybrid::regex::Regex::find`] will panic in +cases where [`hybrid::regex::Regex::try_search`] would return an error, and +[`meta::Regex::find`] will never panic. Therefore, callers need to pay close +attention to the panicking conditions in the documentation. + +In most cases, the reasons that a search fails are either predictable or +configurable, albeit at some additional cost. + +An example of predictable failure is +[`BoundedBacktracker::try_search`](nfa::thompson::backtrack::BoundedBacktracker::try_search). +Namely, it fails whenever the multiplication of the haystack, the regex and some +constant exceeds the +[configured visited capacity](nfa::thompson::backtrack::Config::visited_capacity). +Callers can predict the failure in terms of haystack length via the +[`BoundedBacktracker::max_haystack_len`](nfa::thompson::backtrack::BoundedBacktracker::max_haystack_len) +method. While this form of failure is technically avoidable by increasing the +visited capacity, it isn't practical to do so for all inputs because the +memory usage required for larger haystacks becomes impractically large. So in +practice, if one is using the bounded backtracker, you really do have to deal +with the failure. + +An example of configurable failure happens when one enables heuristic support +for Unicode word boundaries in a DFA. Namely, since the DFAs in this crate +(except for the one-pass DFA) do not support Unicode word boundaries on +non-ASCII haystacks, building a DFA from an NFA that contains a Unicode word +boundary will itself fail. However, one can configure DFAs to still be built in +this case by +[configuring heuristic support for Unicode word boundaries](hybrid::dfa::Config::unicode_word_boundary). +If the NFA the DFA is built from contains a Unicode word boundary, then the +DFA will still be built, but special transitions will be added to every state +that cause the DFA to fail if any non-ASCII byte is seen. This failure happens +at search time and it requires the caller to opt into this. + +There are other ways for regex engines to fail in this crate, but the above +two should represent the general theme of failures one can find. Dealing +with these failures is, in part, one the responsibilities of the [meta regex +engine](meta). Notice, for example, that the meta regex engine exposes an API +that never returns an error nor panics. It carefully manages all of the ways +in which the regex engines can fail and either avoids the predictable ones +entirely (e.g., the bounded backtracker) or reacts to configured failures by +falling back to a different engine (e.g., the lazy DFA quitting because it saw +a non-ASCII byte). + +### Configuration and Builders + +Most of the regex engines in this crate come with two types to facilitate +building the regex engine: a `Config` and a `Builder`. A `Config` is usually +specific to that particular regex engine, but other objects such as parsing and +NFA compilation have `Config` types too. A `Builder` is the thing responsible +for taking inputs (either pattern strings or already-parsed patterns or even +NFAs directly) and turning them into an actual regex engine that can be used +for searching. + +The main reason why building a regex engine is a bit complicated is because +of the desire to permit composition with de-coupled components. For example, +you might want to [manually construct a Thompson NFA](nfa::thompson::Builder) +and then build a regex engine from it without ever using a regex parser +at all. On the other hand, you might also want to build a regex engine directly +from the concrete syntax. This demonstrates why regex engine construction is +so flexible: it needs to support not just convenient construction, but also +construction from parts built elsewhere. + +This is also in turn why there are many different `Config` structs in this +crate. Let's look more closely at an example: [`hybrid::regex::Builder`]. It +accepts three different `Config` types for configuring construction of a lazy +DFA regex: + +* [`hybrid::regex::Builder::syntax`] accepts a +[`util::syntax::Config`] for configuring the options found in the +[`regex-syntax`](regex_syntax) crate. For example, whether to match +case insensitively. +* [`hybrid::regex::Builder::thompson`] accepts a [`nfa::thompson::Config`] for +configuring construction of a [Thompson NFA](nfa::thompson::NFA). For example, +whether to build an NFA that matches the reverse language described by the +regex. +* [`hybrid::regex::Builder::dfa`] accept a [`hybrid::dfa::Config`] for +configuring construction of the pair of underlying lazy DFAs that make up the +lazy DFA regex engine. For example, changing the capacity of the cache used to +store the transition table. + +The lazy DFA regex engine uses all three of those configuration objects for +methods like [`hybrid::regex::Builder::build`], which accepts a pattern +string containing the concrete syntax of your regex. It uses the syntax +configuration to parse it into an AST and translate it into an HIR. Then the +NFA configuration when compiling the HIR into an NFA. And then finally the DFA +configuration when lazily determinizing the NFA into a DFA. + +Notice though that the builder also has a +[`hybrid::regex::Builder::build_from_dfas`] constructor. This permits callers +to build the underlying pair of lazy DFAs themselves (one for the forward +searching to find the end of a match and one for the reverse searching to find +the start of a match), and then build the regex engine from them. The lazy +DFAs, in turn, have their own builder that permits [construction directly from +a Thompson NFA](hybrid::dfa::Builder::build_from_nfa). Continuing down the +rabbit hole, a Thompson NFA has its own compiler that permits [construction +directly from an HIR](nfa::thompson::Compiler::build_from_hir). The lazy DFA +regex engine builder lets you follow this rabbit hole all the way down, but +also provides convenience routines that do it for you when you don't need +precise control over every component. + +The [meta regex engine](meta) is a good example of something that utilizes the +full flexibility of these builders. It often needs not only precise control +over each component, but also shares them across multiple regex engines. +(Most sharing is done by internal reference accounting. For example, an +[`NFA`](nfa::thompson::NFA) is reference counted internally which makes cloning +cheap.) + +### Size limits + +Unlike the `regex` crate, the `regex-automata` crate specifically does not +enable any size limits by default. That means users of this crate need to +be quite careful when using untrusted patterns. Namely, because bounded +repetitions can grow exponentially by stacking them, it is possible to build a +very large internal regex object from just a small pattern string. For example, +the NFA built from the pattern `a{10}{10}{10}{10}{10}{10}{10}` is over 240MB. + +There are multiple size limit options in this crate. If one or more size limits +are relevant for the object you're building, they will be configurable via +methods on a corresponding `Config` type. + +# Crate features + +This crate has a dizzying number of features. The main idea is to be able to +control how much stuff you pull in for your specific use case, since the full +crate is quite large and can dramatically increase compile times and binary +size. + +The most barebones but useful configuration is to disable all default features +and enable only `dfa-search`. This will bring in just the DFA deserialization +and search routines without any dependency on `std` or `alloc`. This does +require generating and serializing a DFA, and then storing it somewhere, but +it permits regex searches in freestanding or embedded environments. + +Because there are so many features, they are split into a few groups. + +The default set of features is: `std`, `syntax`, `perf`, `unicode`, `meta`, +`nfa`, `dfa` and `hybrid`. Basically, the default is to enable everything +except for development related features like `logging`. + +### Ecosystem features + +* **std** - Enables use of the standard library. In terms of APIs, this usually +just means that error types implement the `std::error::Error` trait. Otherwise, +`std` sometimes enables the code to be faster, for example, using a `HashMap` +instead of a `BTreeMap`. (The `std` feature matters more for dependencies like +`aho-corasick` and `memchr`, where `std` is required to enable certain classes +of SIMD optimizations.) Enabling `std` automatically enables `alloc`. +* **alloc** - Enables use of the `alloc` library. This is required for most +APIs in this crate. The main exception is deserializing and searching with +fully compiled DFAs. +* **logging** - Adds a dependency on the `log` crate and makes this crate emit +log messages of varying degrees of utility. The log messages are especially +useful in trying to understand what the meta regex engine is doing. + +### Performance features + +* **perf** - Enables all of the below features. +* **perf-inline** - When enabled, `inline(always)` is used in (many) strategic +locations to help performance at the expense of longer compile times and +increased binary size. +* **perf-literal** - Enables all literal related optimizations. + * **perf-literal-substring** - Enables all single substring literal + optimizations. This includes adding a dependency on the `memchr` crate. + * **perf-literal-multisubstring** - Enables all multiple substring literal + optimizations. This includes adding a dependency on the `aho-corasick` + crate. + +### Unicode features + +* **unicode** - + Enables all Unicode features. This feature is enabled by default, and will + always cover all Unicode features, even if more are added in the future. +* **unicode-age** - + Provide the data for the + [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age). + This makes it possible to use classes like `\p{Age:6.0}` to refer to all + codepoints first introduced in Unicode 6.0 +* **unicode-bool** - + Provide the data for numerous Unicode boolean properties. The full list + is not included here, but contains properties like `Alphabetic`, `Emoji`, + `Lowercase`, `Math`, `Uppercase` and `White_Space`. +* **unicode-case** - + Provide the data for case insensitive matching using + [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). +* **unicode-gencat** - + Provide the data for + [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). + This includes, but is not limited to, `Decimal_Number`, `Letter`, + `Math_Symbol`, `Number` and `Punctuation`. +* **unicode-perl** - + Provide the data for supporting the Unicode-aware Perl character classes, + corresponding to `\w`, `\s` and `\d`. This is also necessary for using + Unicode-aware word boundary assertions. Note that if this feature is + disabled, the `\s` and `\d` character classes are still available if the + `unicode-bool` and `unicode-gencat` features are enabled, respectively. +* **unicode-script** - + Provide the data for + [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/). + This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`, + `Latin` and `Thai`. +* **unicode-segment** - + Provide the data necessary to provide the properties used to implement the + [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/). + This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and + `\p{sb=ATerm}`. +* **unicode-word-boundary** - + Enables support for Unicode word boundaries, i.e., `\b`, in regexes. When + this and `unicode-perl` are enabled, then data tables from `regex-syntax` are + used to implement Unicode word boundaries. However, if `regex-syntax` isn't + enabled as a dependency then one can still enable this feature. It will + cause `regex-automata` to bundle its own data table that would otherwise be + redundant with `regex-syntax`'s table. + +### Regex engine features + +* **syntax** - Enables a dependency on `regex-syntax`. This makes APIs +for building regex engines from pattern strings available. Without the +`regex-syntax` dependency, the only way to build a regex engine is generally +to deserialize a previously built DFA or to hand assemble an NFA using its +[builder API](nfa::thompson::Builder). Once you have an NFA, you can build any +of the regex engines in this crate. The `syntax` feature also enables `alloc`. +* **meta** - Enables the meta regex engine. This also enables the `syntax` and +`nfa-pikevm` features, as both are the minimal requirements needed. The meta +regex engine benefits from enabling any of the other regex engines and will +use them automatically when appropriate. +* **nfa** - Enables all NFA related features below. + * **nfa-thompson** - Enables the Thompson NFA APIs. This enables `alloc`. + * **nfa-pikevm** - Enables the PikeVM regex engine. This enables + `nfa-thompson`. + * **nfa-backtrack** - Enables the bounded backtracker regex engine. This + enables `nfa-thompson`. +* **dfa** - Enables all DFA related features below. + * **dfa-build** - Enables APIs for determinizing DFAs from NFAs. This + enables `nfa-thompson` and `dfa-search`. + * **dfa-search** - Enables APIs for searching with DFAs. + * **dfa-onepass** - Enables the one-pass DFA API. This enables + `nfa-thompson`. +* **hybrid** - Enables the hybrid NFA/DFA or "lazy DFA" regex engine. This +enables `alloc` and `nfa-thompson`. + +*/ + +// We are no_std. +#![no_std] +// All APIs need docs! +#![deny(missing_docs)] +// Some intra-doc links are broken when certain features are disabled, so we +// only bleat about it when most (all?) features are enabled. But when we do, +// we block the build. Links need to work. +#![cfg_attr( + all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid" + ), + deny(rustdoc::broken_intra_doc_links) +)] +// Broken rustdoc links are very easy to come by when you start disabling +// features. Namely, features tend to change imports, and imports change what's +// available to link to. +// +// Basically, we just don't support rustdoc for anything other than the maximal +// feature configuration. Other configurations will work, they just won't be +// perfect. +// +// So here, we specifically allow them so we don't even get warned about them. +#![cfg_attr( + not(all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid" + )), + allow(rustdoc::broken_intra_doc_links) +)] +// Kinda similar, but eliminating all of the dead code and unused import +// warnings for every feature combo is a fool's errand. Instead, we just +// suppress those, but still let them through in a common configuration when we +// build most of everything. +// +// This does actually suggest that when features are disabled, we are actually +// compiling more code than we need to be. And this is perhaps not so great +// because disabling features is usually done in order to reduce compile times +// by reducing the amount of code one compiles... However, usually, most of the +// time this dead code is a relatively small amount from the 'util' module. +// But... I confess... There isn't a ton of visibility on this. +// +// I'm happy to try to address this in a different way, but "let's annotate +// every function in 'util' with some non-local combination of features" just +// cannot be the way forward. +#![cfg_attr( + not(all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid", + feature = "perf-literal-substring", + feature = "perf-literal-multisubstring", + )), + allow(dead_code, unused_imports, unused_variables) +)] +// We generally want all types to impl Debug. +#![warn(missing_debug_implementations)] +// No clue why this thing is still unstable because it's pretty amazing. This +// adds Cargo feature annotations to items in the rustdoc output. Which is +// sadly hugely beneficial for this crate due to the number of features. +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +// I have literally never tested this crate on 16-bit, so it is quite +// suspicious to advertise support for it. But... the regex crate, at time +// of writing, at least claims to support it by not doing any conditional +// compilation based on the target pointer width. So I guess I remain +// consistent with that here. +// +// If you are here because you're on a 16-bit system and you were somehow using +// the regex crate previously, please file an issue. Please be prepared to +// provide some kind of reproduction or carve out some path to getting 16-bit +// working in CI. (Via qemu?) +#[cfg(not(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" +)))] +compile_error!("not supported on non-{16,32,64}, please file an issue"); + +#[cfg(any(test, feature = "std"))] +extern crate std; + +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); + +#[doc(inline)] +pub use crate::util::primitives::PatternID; +pub use crate::util::search::*; + +#[macro_use] +mod macros; + +#[cfg(any(feature = "dfa-search", feature = "dfa-onepass"))] +pub mod dfa; +#[cfg(feature = "hybrid")] +pub mod hybrid; +#[cfg(feature = "meta")] +pub mod meta; +#[cfg(feature = "nfa-thompson")] +pub mod nfa; +pub mod util; diff --git a/vendor/regex-automata/src/macros.rs b/vendor/regex-automata/src/macros.rs new file mode 100644 index 0000000..31b4ca3 --- /dev/null +++ b/vendor/regex-automata/src/macros.rs @@ -0,0 +1,20 @@ +// Some feature combinations result in some of these macros never being used. +// Which is fine. Just squash the warnings. +#![allow(unused_macros)] + +macro_rules! log { + ($($tt:tt)*) => { + #[cfg(feature = "logging")] + { + $($tt)* + } + } +} + +macro_rules! debug { + ($($tt:tt)*) => { log!(log::debug!($($tt)*)) } +} + +macro_rules! trace { + ($($tt:tt)*) => { log!(log::trace!($($tt)*)) } +} diff --git a/vendor/regex-automata/src/meta/error.rs b/vendor/regex-automata/src/meta/error.rs new file mode 100644 index 0000000..ea9a316 --- /dev/null +++ b/vendor/regex-automata/src/meta/error.rs @@ -0,0 +1,241 @@ +use regex_syntax::{ast, hir}; + +use crate::{nfa, util::search::MatchError, PatternID}; + +/// An error that occurs when construction of a `Regex` fails. +/// +/// A build error is generally a result of one of two possible failure +/// modes. First is a parse or syntax error in the concrete syntax of a +/// pattern. Second is that the construction of the underlying regex matcher +/// fails, usually because it gets too big with respect to limits like +/// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit). +/// +/// This error provides very little introspection capabilities. You can: +/// +/// * Ask for the [`PatternID`] of the pattern that caused an error, if one +/// is available. This is available for things like syntax errors, but not for +/// cases where build limits are exceeded. +/// * Ask for the underlying syntax error, but only if the error is a syntax +/// error. +/// * Ask for a human readable message corresponding to the underlying error. +/// * The `BuildError::source` method (from the `std::error::Error` +/// trait implementation) may be used to query for an underlying error if one +/// exists. There are no API guarantees about which error is returned. +/// +/// When the `std` feature is enabled, this implements `std::error::Error`. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +#[derive(Clone, Debug)] +enum BuildErrorKind { + Syntax { pid: PatternID, err: regex_syntax::Error }, + NFA(nfa::thompson::BuildError), +} + +impl BuildError { + /// If it is known which pattern ID caused this build error to occur, then + /// this method returns it. + /// + /// Some errors are not associated with a particular pattern. However, any + /// errors that occur as part of parsing a pattern are guaranteed to be + /// associated with a pattern ID. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, PatternID}; + /// + /// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err(); + /// assert_eq!(Some(PatternID::must(2)), err.pattern()); + /// ``` + pub fn pattern(&self) -> Option { + match self.kind { + BuildErrorKind::Syntax { pid, .. } => Some(pid), + _ => None, + } + } + + /// If this error occurred because the regex exceeded the configured size + /// limit before being built, then this returns the configured size limit. + /// + /// The limit returned is what was configured, and corresponds to the + /// maximum amount of heap usage in bytes. + pub fn size_limit(&self) -> Option { + match self.kind { + BuildErrorKind::NFA(ref err) => err.size_limit(), + _ => None, + } + } + + /// If this error corresponds to a syntax error, then a reference to it is + /// returned by this method. + pub fn syntax_error(&self) -> Option<®ex_syntax::Error> { + match self.kind { + BuildErrorKind::Syntax { ref err, .. } => Some(err), + _ => None, + } + } + + pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError { + let err = regex_syntax::Error::from(err); + BuildError { kind: BuildErrorKind::Syntax { pid, err } } + } + + pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError { + let err = regex_syntax::Error::from(err); + BuildError { kind: BuildErrorKind::Syntax { pid, err } } + } + + pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind { + BuildErrorKind::Syntax { ref err, .. } => Some(err), + BuildErrorKind::NFA(ref err) => Some(err), + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind { + BuildErrorKind::Syntax { pid, .. } => { + write!(f, "error parsing pattern {}", pid.as_usize()) + } + BuildErrorKind::NFA(_) => write!(f, "error building NFA"), + } + } +} + +/// An error that occurs when a search should be retried. +/// +/// This retry error distinguishes between two different failure modes. +/// +/// The first is one where potential quadratic behavior has been detected. +/// In this case, whatever optimization that led to this behavior should be +/// stopped, and the next best strategy should be used. +/// +/// The second indicates that the underlying regex engine has failed for some +/// reason. This usually occurs because either a lazy DFA's cache has become +/// ineffective or because a non-ASCII byte has been seen *and* a Unicode word +/// boundary was used in one of the patterns. In this failure case, a different +/// regex engine that won't fail in these ways (PikeVM, backtracker or the +/// one-pass DFA) should be used. +/// +/// This is an internal error only and should never bleed into the public +/// API. +#[derive(Debug)] +pub(crate) enum RetryError { + Quadratic(RetryQuadraticError), + Fail(RetryFailError), +} + +#[cfg(feature = "std")] +impl std::error::Error for RetryError {} + +impl core::fmt::Display for RetryError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + RetryError::Quadratic(ref err) => err.fmt(f), + RetryError::Fail(ref err) => err.fmt(f), + } + } +} + +impl From for RetryError { + fn from(merr: MatchError) -> RetryError { + RetryError::Fail(RetryFailError::from(merr)) + } +} + +/// An error that occurs when potential quadratic behavior has been detected +/// when applying either the "reverse suffix" or "reverse inner" optimizations. +/// +/// When this error occurs, callers should abandon the "reverse" optimization +/// and use a normal forward search. +#[derive(Debug)] +pub(crate) struct RetryQuadraticError(()); + +impl RetryQuadraticError { + pub(crate) fn new() -> RetryQuadraticError { + RetryQuadraticError(()) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for RetryQuadraticError {} + +impl core::fmt::Display for RetryQuadraticError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "regex engine gave up to avoid quadratic behavior") + } +} + +impl From for RetryError { + fn from(err: RetryQuadraticError) -> RetryError { + RetryError::Quadratic(err) + } +} + +/// An error that occurs when a regex engine "gives up" for some reason before +/// finishing a search. Usually this occurs because of heuristic Unicode word +/// boundary support or because of ineffective cache usage in the lazy DFA. +/// +/// When this error occurs, callers should retry the regex search with a +/// different regex engine. +/// +/// Note that this has convenient `From` impls that will automatically +/// convert a `MatchError` into this error. This works because the meta +/// regex engine internals guarantee that errors like `HaystackTooLong` and +/// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and +/// `GaveUp`, which both correspond to this "failure" error. +#[derive(Debug)] +pub(crate) struct RetryFailError { + offset: usize, +} + +impl RetryFailError { + pub(crate) fn from_offset(offset: usize) -> RetryFailError { + RetryFailError { offset } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for RetryFailError {} + +impl core::fmt::Display for RetryFailError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "regex engine failed at offset {:?}", self.offset) + } +} + +impl From for RetryError { + fn from(err: RetryFailError) -> RetryError { + RetryError::Fail(err) + } +} + +impl From for RetryFailError { + fn from(merr: MatchError) -> RetryFailError { + use crate::util::search::MatchErrorKind::*; + + match *merr.kind() { + Quit { offset, .. } => RetryFailError::from_offset(offset), + GaveUp { offset } => RetryFailError::from_offset(offset), + // These can never occur because we avoid them by construction + // or with higher level control flow logic. For example, the + // backtracker's wrapper will never hand out a backtracker engine + // when the haystack would be too long. + HaystackTooLong { .. } | UnsupportedAnchored { .. } => { + unreachable!("found impossible error in meta engine: {}", merr) + } + } + } +} diff --git a/vendor/regex-automata/src/meta/limited.rs b/vendor/regex-automata/src/meta/limited.rs new file mode 100644 index 0000000..5653adc --- /dev/null +++ b/vendor/regex-automata/src/meta/limited.rs @@ -0,0 +1,255 @@ +/*! +This module defines two bespoke reverse DFA searching routines. (One for the +lazy DFA and one for the fully compiled DFA.) These routines differ from the +usual ones by permitting the caller to specify a minimum starting position. +That is, the search will begin at `input.end()` and will usually stop at +`input.start()`, unless `min_start > input.start()`, in which case, the search +will stop at `min_start`. + +In other words, this lets you say, "no, the search must not extend past this +point, even if it's within the bounds of the given `Input`." And if the search +*does* want to go past that point, it stops and returns a "may be quadratic" +error, which indicates that the caller should retry using some other technique. + +These routines specifically exist to protect against quadratic behavior when +employing the "reverse suffix" and "reverse inner" optimizations. Without the +backstop these routines provide, it is possible for parts of the haystack to +get re-scanned over and over again. The backstop not only prevents this, but +*tells you when it is happening* so that you can change the strategy. + +Why can't we just use the normal search routines? We could use the normal +search routines and just set the start bound on the provided `Input` to our +`min_start` position. The problem here is that it's impossible to distinguish +between "no match because we reached the end of input" and "determined there +was no match well before the end of input." The former case is what we care +about with respect to quadratic behavior. The latter case is totally fine. + +Why don't we modify the normal search routines to report the position at which +the search stops? I considered this, and I still wonder if it is indeed the +right thing to do. However, I think the straight-forward thing to do there +would be to complicate the return type signature of almost every search routine +in this crate, which I really do not want to do. It therefore might make more +sense to provide a richer way for search routines to report meta data, but that +was beyond my bandwidth to work on at the time of writing. + +See the 'opt/reverse-inner' and 'opt/reverse-suffix' benchmarks in rebar for a +real demonstration of how quadratic behavior is mitigated. +*/ + +use crate::{ + meta::error::{RetryError, RetryQuadraticError}, + HalfMatch, Input, MatchError, +}; + +#[cfg(feature = "dfa-build")] +pub(crate) fn dfa_try_search_half_rev( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, + min_start: usize, +) -> Result, RetryError> { + use crate::dfa::Automaton; + + let mut mat = None; + let mut sid = dfa.start_state_reverse(input)?; + if input.start() == input.end() { + dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?; + return Ok(mat); + } + let mut at = input.end() - 1; + loop { + sid = dfa.next_state(sid, input.haystack()[at]); + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + // Since reverse searches report the beginning of a + // match and the beginning is inclusive (not exclusive + // like the end of a match), we add 1 to make it + // inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + } else if dfa.is_dead_state(sid) { + return Ok(mat); + } else if dfa.is_quit_state(sid) { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } + } + if at == input.start() { + break; + } + at -= 1; + if at < min_start { + trace!( + "reached position {} which is before the previous literal \ + match, quitting to avoid quadratic behavior", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + } + let was_dead = dfa.is_dead_state(sid); + dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?; + // If we reach the beginning of the search and we could otherwise still + // potentially keep matching if there was more to match, then we actually + // return an error to indicate giving up on this optimization. Why? Because + // we can't prove that the real match begins at where we would report it. + // + // This only happens when all of the following are true: + // + // 1) We reach the starting point of our search span. + // 2) The match we found is before the starting point. + // 3) The FSM reports we could possibly find a longer match. + // + // We need (1) because otherwise the search stopped before the starting + // point and there is no possible way to find a more leftmost position. + // + // We need (2) because if the match found has an offset equal to the minimum + // possible offset, then there is no possible more leftmost match. + // + // We need (3) because if the FSM couldn't continue anyway (i.e., it's in + // a dead state), then we know we couldn't find anything more leftmost + // than what we have. (We have to check the state we were in prior to the + // EOI transition since the EOI transition will usually bring us to a dead + // state by virtue of it represents the end-of-input.) + if at == input.start() + && mat.map_or(false, |m| m.offset() > input.start()) + && !was_dead + { + trace!( + "reached beginning of search at offset {} without hitting \ + a dead state, quitting to avoid potential false positive match", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + Ok(mat) +} + +#[cfg(feature = "hybrid")] +pub(crate) fn hybrid_try_search_half_rev( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, + min_start: usize, +) -> Result, RetryError> { + let mut mat = None; + let mut sid = dfa.start_state_reverse(cache, input)?; + if input.start() == input.end() { + hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + return Ok(mat); + } + let mut at = input.end() - 1; + loop { + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| MatchError::gave_up(at))?; + if sid.is_tagged() { + if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + // Since reverse searches report the beginning of a + // match and the beginning is inclusive (not exclusive + // like the end of a match), we add 1 to make it + // inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + } else if sid.is_dead() { + return Ok(mat); + } else if sid.is_quit() { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } + } + if at == input.start() { + break; + } + at -= 1; + if at < min_start { + trace!( + "reached position {} which is before the previous literal \ + match, quitting to avoid quadratic behavior", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + } + let was_dead = sid.is_dead(); + hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + // See the comments in the full DFA routine above for why we need this. + if at == input.start() + && mat.map_or(false, |m| m.offset() > input.start()) + && !was_dead + { + trace!( + "reached beginning of search at offset {} without hitting \ + a dead state, quitting to avoid potential false positive match", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + Ok(mat) +} + +#[cfg(feature = "dfa-build")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn dfa_eoi_rev( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, + sid: &mut crate::util::primitives::StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + use crate::dfa::Automaton; + + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa.next_state(*sid, byte); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!dfa.is_quit_state(*sid)); + } + Ok(()) +} + +#[cfg(feature = "hybrid")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn hybrid_eoi_rev( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, + sid: &mut crate::hybrid::LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa + .next_state(cache, *sid, byte) + .map_err(|_| MatchError::gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if sid.is_quit() { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = dfa + .next_eoi_state(cache, *sid) + .map_err(|_| MatchError::gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + Ok(()) +} diff --git a/vendor/regex-automata/src/meta/literal.rs b/vendor/regex-automata/src/meta/literal.rs new file mode 100644 index 0000000..a68b93b --- /dev/null +++ b/vendor/regex-automata/src/meta/literal.rs @@ -0,0 +1,81 @@ +use alloc::{vec, vec::Vec}; + +use regex_syntax::hir::Hir; + +use crate::{meta::regex::RegexInfo, util::search::MatchKind}; + +/// Pull out an alternation of literals from the given sequence of HIR +/// expressions. +/// +/// There are numerous ways for this to fail. Generally, this only applies +/// to regexes of the form 'foo|bar|baz|...|quux'. It can also fail if there +/// are "too few" alternates, in which case, the regex engine is likely faster. +/// +/// And currently, this only returns something when 'hirs.len() == 1'. +pub(crate) fn alternation_literals( + info: &RegexInfo, + hirs: &[&Hir], +) -> Option>> { + use regex_syntax::hir::{HirKind, Literal}; + + // Might as well skip the work below if we know we can't build an + // Aho-Corasick searcher. + if !cfg!(feature = "perf-literal-multisubstring") { + return None; + } + // This is pretty hacky, but basically, if `is_alternation_literal` is + // true, then we can make several assumptions about the structure of our + // HIR. This is what justifies the `unreachable!` statements below. + if hirs.len() != 1 + || !info.props()[0].look_set().is_empty() + || info.props()[0].explicit_captures_len() > 0 + || !info.props()[0].is_alternation_literal() + || info.config().get_match_kind() != MatchKind::LeftmostFirst + { + return None; + } + let hir = &hirs[0]; + let alts = match *hir.kind() { + HirKind::Alternation(ref alts) => alts, + _ => return None, // one literal isn't worth it + }; + + let mut lits = vec![]; + for alt in alts { + let mut lit = vec![]; + match *alt.kind() { + HirKind::Literal(Literal(ref bytes)) => { + lit.extend_from_slice(bytes) + } + HirKind::Concat(ref exprs) => { + for e in exprs { + match *e.kind() { + HirKind::Literal(Literal(ref bytes)) => { + lit.extend_from_slice(bytes); + } + _ => unreachable!("expected literal, got {:?}", e), + } + } + } + _ => unreachable!("expected literal or concat, got {:?}", alt), + } + lits.push(lit); + } + // Why do this? Well, when the number of literals is small, it's likely + // that we'll use the lazy DFA which is in turn likely to be faster than + // Aho-Corasick in such cases. Primarily because Aho-Corasick doesn't have + // a "lazy DFA" but either a contiguous NFA or a full DFA. We rarely use + // the latter because it is so hungry (in time and space), and the former + // is decently fast, but not as fast as a well oiled lazy DFA. + // + // However, once the number starts getting large, the lazy DFA is likely + // to start thrashing because of the modest default cache size. When + // exactly does this happen? Dunno. But at whatever point that is (we make + // a guess below based on ad hoc benchmarking), we'll want to cut over to + // Aho-Corasick, where even the contiguous NFA is likely to do much better. + if lits.len() < 3000 { + debug!("skipping Aho-Corasick because there are too few literals"); + return None; + } + Some(lits) +} diff --git a/vendor/regex-automata/src/meta/mod.rs b/vendor/regex-automata/src/meta/mod.rs new file mode 100644 index 0000000..01f430f --- /dev/null +++ b/vendor/regex-automata/src/meta/mod.rs @@ -0,0 +1,62 @@ +/*! +Provides a regex matcher that composes several other regex matchers +automatically. + +This module is home to a meta [`Regex`], which provides a convenient high +level API for executing regular expressions in linear time. + +# Comparison with the `regex` crate + +A meta `Regex` is the implementation used directly by the `regex` crate. +Indeed, the `regex` crate API is essentially just a light wrapper over a meta +`Regex`. This means that if you need the full flexibility offered by this +API, then you should be able to switch to using this API directly without +any changes in match semantics or syntax. However, there are some API level +differences: + +* The `regex` crate API returns match objects that include references to the +haystack itself, which in turn makes it easy to access the matching strings +without having to slice the haystack yourself. In contrast, a meta `Regex` +returns match objects that only have offsets in them. +* At time of writing, a meta `Regex` doesn't have some of the convenience +routines that the `regex` crate has, such as replacements. Note though that +[`Captures::interpolate_string`](crate::util::captures::Captures::interpolate_string) +will handle the replacement string interpolation for you. +* A meta `Regex` supports the [`Input`](crate::Input) abstraction, which +provides a way to configure a search in more ways than is supported by the +`regex` crate. For example, [`Input::anchored`](crate::Input::anchored) can +be used to run an anchored search, regardless of whether the pattern is itself +anchored with a `^`. +* A meta `Regex` supports multi-pattern searching everywhere. +Indeed, every [`Match`](crate::Match) returned by the search APIs +include a [`PatternID`](crate::PatternID) indicating which pattern +matched. In the single pattern case, all matches correspond to +[`PatternID::ZERO`](crate::PatternID::ZERO). In contrast, the `regex` crate +has distinct `Regex` and a `RegexSet` APIs. The former only supports a single +pattern, while the latter supports multiple patterns but cannot report the +offsets of a match. +* A meta `Regex` provides the explicit capability of bypassing its internal +memory pool for automatically acquiring mutable scratch space required by its +internal regex engines. Namely, a [`Cache`] can be explicitly provided to lower +level routines such as [`Regex::search_with`]. + +*/ + +pub use self::{ + error::BuildError, + regex::{ + Builder, Cache, CapturesMatches, Config, FindMatches, Regex, Split, + SplitN, + }, +}; + +mod error; +#[cfg(any(feature = "dfa-build", feature = "hybrid"))] +mod limited; +mod literal; +mod regex; +mod reverse_inner; +#[cfg(any(feature = "dfa-build", feature = "hybrid"))] +mod stopat; +mod strategy; +mod wrappers; diff --git a/vendor/regex-automata/src/meta/regex.rs b/vendor/regex-automata/src/meta/regex.rs new file mode 100644 index 0000000..a06d2bb --- /dev/null +++ b/vendor/regex-automata/src/meta/regex.rs @@ -0,0 +1,3649 @@ +use core::{ + borrow::Borrow, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +use alloc::{boxed::Box, sync::Arc, vec, vec::Vec}; + +use regex_syntax::{ + ast, + hir::{self, Hir}, +}; + +use crate::{ + meta::{ + error::BuildError, + strategy::{self, Strategy}, + wrappers, + }, + nfa::thompson::WhichCaptures, + util::{ + captures::{Captures, GroupInfo}, + iter, + pool::{Pool, PoolGuard}, + prefilter::Prefilter, + primitives::{NonMaxUsize, PatternID}, + search::{HalfMatch, Input, Match, MatchKind, PatternSet, Span}, + }, +}; + +/// A type alias for our pool of meta::Cache that fixes the type parameters to +/// what we use for the meta regex below. +type CachePool = Pool; + +/// Same as above, but for the guard returned by a pool. +type CachePoolGuard<'a> = PoolGuard<'a, Cache, CachePoolFn>; + +/// The type of the closure we use to create new caches. We need to spell out +/// all of the marker traits or else we risk leaking !MARKER impls. +type CachePoolFn = + Box Cache + Send + Sync + UnwindSafe + RefUnwindSafe>; + +/// A regex matcher that works by composing several other regex matchers +/// automatically. +/// +/// In effect, a meta regex papers over a lot of the quirks or performance +/// problems in each of the regex engines in this crate. Its goal is to provide +/// an infallible and simple API that "just does the right thing" in the common +/// case. +/// +/// A meta regex is the implementation of a `Regex` in the `regex` crate. +/// Indeed, the `regex` crate API is essentially just a light wrapper over +/// this type. This includes the `regex` crate's `RegexSet` API! +/// +/// # Composition +/// +/// This is called a "meta" matcher precisely because it uses other regex +/// matchers to provide a convenient high level regex API. Here are some +/// examples of how other regex matchers are composed: +/// +/// * When calling [`Regex::captures`], instead of immediately +/// running a slower but more capable regex engine like the +/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM), the meta regex engine +/// will usually first look for the bounds of a match with a higher throughput +/// regex engine like a [lazy DFA](crate::hybrid). Only when a match is found +/// is a slower engine like `PikeVM` used to find the matching span for each +/// capture group. +/// * While higher throughout engines like the lazy DFA cannot handle +/// Unicode word boundaries in general, they can still be used on pure ASCII +/// haystacks by pretending that Unicode word boundaries are just plain ASCII +/// word boundaries. However, if a haystack is not ASCII, the meta regex engine +/// will automatically switch to a (possibly slower) regex engine that supports +/// Unicode word boundaries in general. +/// * In some cases where a regex pattern is just a simple literal or a small +/// set of literals, an actual regex engine won't be used at all. Instead, +/// substring or multi-substring search algorithms will be employed. +/// +/// There are many other forms of composition happening too, but the above +/// should give a general idea. In particular, it may perhaps be surprising +/// that *multiple* regex engines might get executed for a single search. That +/// is, the decision of what regex engine to use is not _just_ based on the +/// pattern, but also based on the dynamic execution of the search itself. +/// +/// The primary reason for this composition is performance. The fundamental +/// tension is that the faster engines tend to be less capable, and the more +/// capable engines tend to be slower. +/// +/// Note that the forms of composition that are allowed are determined by +/// compile time crate features and configuration. For example, if the `hybrid` +/// feature isn't enabled, or if [`Config::hybrid`] has been disabled, then the +/// meta regex engine will never use a lazy DFA. +/// +/// # Synchronization and cloning +/// +/// Most of the regex engines in this crate require some kind of mutable +/// "scratch" space to read and write from while performing a search. Since +/// a meta regex composes these regex engines, a meta regex also requires +/// mutable scratch space. This scratch space is called a [`Cache`]. +/// +/// Most regex engines _also_ usually have a read-only component, typically +/// a [Thompson `NFA`](crate::nfa::thompson::NFA). +/// +/// In order to make the `Regex` API convenient, most of the routines hide +/// the fact that a `Cache` is needed at all. To achieve this, a [memory +/// pool](crate::util::pool::Pool) is used internally to retrieve `Cache` +/// values in a thread safe way that also permits reuse. This in turn implies +/// that every such search call requires some form of synchronization. Usually +/// this synchronization is fast enough to not notice, but in some cases, it +/// can be a bottleneck. This typically occurs when all of the following are +/// true: +/// +/// * The same `Regex` is shared across multiple threads simultaneously, +/// usually via a [`util::lazy::Lazy`](crate::util::lazy::Lazy) or something +/// similar from the `once_cell` or `lazy_static` crates. +/// * The primary unit of work in each thread is a regex search. +/// * Searches are run on very short haystacks. +/// +/// This particular case can lead to high contention on the pool used by a +/// `Regex` internally, which can in turn increase latency to a noticeable +/// effect. This cost can be mitigated in one of the following ways: +/// +/// * Use a distinct copy of a `Regex` in each thread, usually by cloning it. +/// Cloning a `Regex` _does not_ do a deep copy of its read-only component. +/// But it does lead to each `Regex` having its own memory pool, which in +/// turn eliminates the problem of contention. In general, this technique should +/// not result in any additional memory usage when compared to sharing the same +/// `Regex` across multiple threads simultaneously. +/// * Use lower level APIs, like [`Regex::search_with`], which permit passing +/// a `Cache` explicitly. In this case, it is up to you to determine how best +/// to provide a `Cache`. For example, you might put a `Cache` in thread-local +/// storage if your use case allows for it. +/// +/// Overall, this is an issue that happens rarely in practice, but it can +/// happen. +/// +/// # Warning: spin-locks may be used in alloc-only mode +/// +/// When this crate is built without the `std` feature and the high level APIs +/// on a `Regex` are used, then a spin-lock will be used to synchronize access +/// to an internal pool of `Cache` values. This may be undesirable because +/// a spin-lock is [effectively impossible to implement correctly in user +/// space][spinlocks-are-bad]. That is, more concretely, the spin-lock could +/// result in a deadlock. +/// +/// [spinlocks-are-bad]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html +/// +/// If one wants to avoid the use of spin-locks when the `std` feature is +/// disabled, then you must use APIs that accept a `Cache` value explicitly. +/// For example, [`Regex::search_with`]. +/// +/// # Example +/// +/// ``` +/// use regex_automata::meta::Regex; +/// +/// let re = Regex::new(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$")?; +/// assert!(re.is_match("2010-03-14")); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: anchored search +/// +/// This example shows how to use [`Input::anchored`] to run an anchored +/// search, even when the regex pattern itself isn't anchored. An anchored +/// search guarantees that if a match is found, then the start offset of the +/// match corresponds to the offset at which the search was started. +/// +/// ``` +/// use regex_automata::{meta::Regex, Anchored, Input, Match}; +/// +/// let re = Regex::new(r"\bfoo\b")?; +/// let input = Input::new("xx foo xx").range(3..).anchored(Anchored::Yes); +/// // The offsets are in terms of the original haystack. +/// assert_eq!(Some(Match::must(0, 3..6)), re.find(input)); +/// +/// // Notice that no match occurs here, because \b still takes the +/// // surrounding context into account, even if it means looking back +/// // before the start of your search. +/// let hay = "xxfoo xx"; +/// let input = Input::new(hay).range(2..).anchored(Anchored::Yes); +/// assert_eq!(None, re.find(input)); +/// // Indeed, you cannot achieve the above by simply slicing the +/// // haystack itself, since the regex engine can't see the +/// // surrounding context. This is why 'Input' permits setting +/// // the bounds of a search! +/// let input = Input::new(&hay[2..]).anchored(Anchored::Yes); +/// // WRONG! +/// assert_eq!(Some(Match::must(0, 0..3)), re.find(input)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: earliest search +/// +/// This example shows how to use [`Input::earliest`] to run a search that +/// might stop before finding the typical leftmost match. +/// +/// ``` +/// use regex_automata::{meta::Regex, Anchored, Input, Match}; +/// +/// let re = Regex::new(r"[a-z]{3}|b")?; +/// let input = Input::new("abc").earliest(true); +/// assert_eq!(Some(Match::must(0, 1..2)), re.find(input)); +/// +/// // Note that "earliest" isn't really a match semantic unto itself. +/// // Instead, it is merely an instruction to whatever regex engine +/// // gets used internally to quit as soon as it can. For example, +/// // this regex uses a different search technique, and winds up +/// // producing a different (but valid) match! +/// let re = Regex::new(r"abc|b")?; +/// let input = Input::new("abc").earliest(true); +/// assert_eq!(Some(Match::must(0, 0..3)), re.find(input)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: change the line terminator +/// +/// This example shows how to enable multi-line mode by default and change +/// the line terminator to the NUL byte: +/// +/// ``` +/// use regex_automata::{meta::Regex, util::syntax, Match}; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().multi_line(true)) +/// .configure(Regex::config().line_terminator(b'\x00')) +/// .build(r"^foo$")?; +/// let hay = "\x00foo\x00"; +/// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug)] +pub struct Regex { + /// The actual regex implementation. + imp: Arc, + /// A thread safe pool of caches. + /// + /// For the higher level search APIs, a `Cache` is automatically plucked + /// from this pool before running a search. The lower level `with` methods + /// permit the caller to provide their own cache, thereby bypassing + /// accesses to this pool. + /// + /// Note that we put this outside the `Arc` so that cloning a `Regex` + /// results in creating a fresh `CachePool`. This in turn permits callers + /// to clone regexes into separate threads where each such regex gets + /// the pool's "thread owner" optimization. Otherwise, if one shares the + /// `Regex` directly, then the pool will go through a slower mutex path for + /// all threads except for the "owner." + pool: CachePool, +} + +/// The internal implementation of `Regex`, split out so that it can be wrapped +/// in an `Arc`. +#[derive(Debug)] +struct RegexI { + /// The core matching engine. + /// + /// Why is this reference counted when RegexI is already wrapped in an Arc? + /// Well, we need to capture this in a closure to our `Pool` below in order + /// to create new `Cache` values when needed. So since it needs to be in + /// two places, we make it reference counted. + /// + /// We make `RegexI` itself reference counted too so that `Regex` itself + /// stays extremely small and very cheap to clone. + strat: Arc, + /// Metadata about the regexes driving the strategy. The metadata is also + /// usually stored inside the strategy too, but we put it here as well + /// so that we can get quick access to it (without virtual calls) before + /// executing the regex engine. For example, we use this metadata to + /// detect a subset of cases where we know a match is impossible, and can + /// thus avoid calling into the strategy at all. + /// + /// Since `RegexInfo` is stored in multiple places, it is also reference + /// counted. + info: RegexInfo, +} + +/// Convenience constructors for a `Regex` using the default configuration. +impl Regex { + /// Builds a `Regex` from a single pattern string using the default + /// configuration. + /// + /// If there was a problem parsing the pattern or a problem turning it into + /// a regex matcher, then an error is returned. + /// + /// If you want to change the configuration of a `Regex`, use a [`Builder`] + /// with a [`Config`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new(r"(?Rm)^foo$")?; + /// let hay = "\r\nfoo\r\n"; + /// assert_eq!(Some(Match::must(0, 2..5)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new(pattern: &str) -> Result { + Regex::builder().build(pattern) + } + + /// Builds a `Regex` from many pattern strings using the default + /// configuration. + /// + /// If there was a problem parsing any of the patterns or a problem turning + /// them into a regex matcher, then an error is returned. + /// + /// If you want to change the configuration of a `Regex`, use a [`Builder`] + /// with a [`Config`]. + /// + /// # Example: simple lexer + /// + /// This simplistic example leverages the multi-pattern support to build a + /// simple little lexer. The pattern ID in the match tells you which regex + /// matched, which in turn might be used to map back to the "type" of the + /// token returned by the lexer. + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new_many(&[ + /// r"[[:space:]]", + /// r"[A-Za-z0-9][A-Za-z0-9_]+", + /// r"->", + /// r".", + /// ])?; + /// let haystack = "fn is_boss(bruce: i32, springsteen: String) -> bool;"; + /// let matches: Vec = re.find_iter(haystack).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(1, 0..2), // 'fn' + /// Match::must(0, 2..3), // ' ' + /// Match::must(1, 3..10), // 'is_boss' + /// Match::must(3, 10..11), // '(' + /// Match::must(1, 11..16), // 'bruce' + /// Match::must(3, 16..17), // ':' + /// Match::must(0, 17..18), // ' ' + /// Match::must(1, 18..21), // 'i32' + /// Match::must(3, 21..22), // ',' + /// Match::must(0, 22..23), // ' ' + /// Match::must(1, 23..34), // 'springsteen' + /// Match::must(3, 34..35), // ':' + /// Match::must(0, 35..36), // ' ' + /// Match::must(1, 36..42), // 'String' + /// Match::must(3, 42..43), // ')' + /// Match::must(0, 43..44), // ' ' + /// Match::must(2, 44..46), // '->' + /// Match::must(0, 46..47), // ' ' + /// Match::must(1, 47..51), // 'bool' + /// Match::must(3, 51..52), // ';' + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// One can write a lexer like the above using a regex like + /// `(?P[[:space:]])|(?P[A-Za-z0-9][A-Za-z0-9_]+)|...`, + /// but then you need to ask whether capture group matched to determine + /// which branch in the regex matched, and thus, which token the match + /// corresponds to. In contrast, the above example includes the pattern ID + /// in the match. There's no need to use capture groups at all. + /// + /// # Example: finding the pattern that caused an error + /// + /// When a syntax error occurs, it is possible to ask which pattern + /// caused the syntax error. + /// + /// ``` + /// use regex_automata::{meta::Regex, PatternID}; + /// + /// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err(); + /// assert_eq!(Some(PatternID::must(2)), err.pattern()); + /// ``` + /// + /// # Example: zero patterns is valid + /// + /// Building a regex with zero patterns results in a regex that never + /// matches anything. Because this routine is generic, passing an empty + /// slice usually requires a turbo-fish (or something else to help type + /// inference). + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::new_many::<&str>(&[])?; + /// assert_eq!(None, re.find("")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_many>( + patterns: &[P], + ) -> Result { + Regex::builder().build_many(patterns) + } + + /// Return a default configuration for a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the [`Config`] + /// type when customizing the construction of a `Regex`. + /// + /// # Example: lower the NFA size limit + /// + /// In some cases, the default size limit might be too big. The size limit + /// can be lowered, which will prevent large regex patterns from compiling. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().nfa_size_limit(Some(20 * (1<<10)))) + /// // Not even 20KB is enough to build a single large Unicode class! + /// .build(r"\pL"); + /// assert!(result.is_err()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example: change the line terminator + /// + /// This example shows how to enable multi-line mode by default and change + /// the line terminator to the NUL byte: + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().multi_line(true)) + /// .configure(Regex::config().line_terminator(b'\x00')) + /// .build(r"^foo$")?; + /// let hay = "\x00foo\x00"; + /// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } +} + +/// High level convenience routines for using a regex to search a haystack. +impl Regex { + /// Returns true if and only if this regex matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future input + /// will never lead to a different result. (Consider how this might make + /// a difference given the regex `a+` on the haystack `aaaaaaaaaaaaaaa`. + /// This routine _may_ stop after it sees the first `a`, but routines like + /// `find` need to continue searching because `+` is greedy by default.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// + /// assert!(re.is_match("foo12345bar")); + /// assert!(!re.is_match("foobar")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `find` returns a + /// match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{meta::Regex, Input}; + /// + /// let re = Regex::new("a*")?; + /// + /// // This doesn't match because the default configuration bans empty + /// // matches from splitting a codepoint. + /// assert!(!re.is_match(Input::new("☃").span(1..2))); + /// assert_eq!(None, re.find(Input::new("☃").span(1..2))); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build("a*")?; + /// + /// assert!(re.is_match(Input::new("☃").span(1..2))); + /// assert_eq!( + /// Some(Match::must(0, 1..1)), + /// re.find(Input::new("☃").span(1..2)), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// A similar idea applies when using line anchors with CRLF mode enabled, + /// which prevents them from matching between a `\r` and a `\n`. + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"(?Rm:$)")?; + /// assert!(!re.is_match(Input::new("\r\n").span(1..1))); + /// // A regular line anchor, which only considers \n as a + /// // line terminator, will match. + /// let re = Regex::new(r"(?m:$)")?; + /// assert!(re.is_match(Input::new("\r\n").span(1..1))); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>(&self, input: I) -> bool { + let input = input.into().earliest(true); + if self.imp.info.is_impossible(&input) { + return false; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.is_match(&mut guard, &input); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } + + /// Executes a leftmost search and returns the first match that is found, + /// if one exists. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// assert_eq!(Some(Match::must(0, 0..8)), re.find("foo12345")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>(&self, input: I) -> Option { + self.search(&input.into()) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Span}; + /// + /// let re = Regex::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?; + /// let mut caps = re.create_captures(); + /// + /// re.captures("2010-03-14", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); + /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures<'h, I: Into>>( + &self, + input: I, + caps: &mut Captures, + ) { + self.search_captures(&input.into(), caps) + } + + /// Returns an iterator over all non-overlapping leftmost matches in + /// the given haystack. If no match exists, then the iterator yields no + /// elements. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let haystack = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(haystack).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> FindMatches<'r, 'h> { + let cache = self.pool.get(); + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, cache, it } + } + + /// Returns an iterator over all non-overlapping `Captures` values. If no + /// match exists, then the iterator yields no elements. + /// + /// This yields the same matches as [`Regex::find_iter`], but it includes + /// the spans of all capturing groups that participate in each match. + /// + /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for + /// how to correctly iterate over all matches in a haystack while avoiding + /// the creation of a new `Captures` value for every match. (Which you are + /// forced to do with an `Iterator`.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Span}; + /// + /// let re = Regex::new("foo(?P[0-9]+)")?; + /// + /// let haystack = "foo1 foo12 foo123"; + /// let matches: Vec = re + /// .captures_iter(haystack) + /// // The unwrap is OK since 'numbers' matches if the pattern matches. + /// .map(|caps| caps.get_group_by_name("numbers").unwrap()) + /// .collect(); + /// assert_eq!(matches, vec![ + /// Span::from(3..4), + /// Span::from(8..10), + /// Span::from(14..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures_iter<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> CapturesMatches<'r, 'h> { + let cache = self.pool.get(); + let caps = self.create_captures(); + let it = iter::Searcher::new(input.into()); + CapturesMatches { re: self, cache, caps, it } + } + + /// Returns an iterator of spans of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"[ \t]+")?; + /// let hay = "a b \t c\td e"; + /// let fields: Vec<&str> = re.split(hay).map(|span| &hay[span]).collect(); + /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" ")?; + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = ""; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]); + /// + /// let re = Regex::new(r"::")?; + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "tiger", "leopard"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"X")?; + /// let hay = "XXXXaXXbXc"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// + /// let re = Regex::new(r"/")?; + /// let hay = "(///)"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["(", "", "", ")"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// spans. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"0")?; + /// let hay = "010"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "1", ""]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// When the empty string is used as a regex, it splits at every valid + /// UTF-8 boundary by default (which includes the beginning and end of the + /// haystack): + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"")?; + /// let hay = "rust"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]); + /// + /// // Splitting by an empty string is UTF-8 aware by default! + /// let re = Regex::new(r"")?; + /// let hay = "☃"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "☃", ""]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// But note that UTF-8 mode for empty strings can be disabled, which will + /// then result in a match at every byte offset in the haystack, + /// including between every UTF-8 code unit. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build(r"")?; + /// let hay = "☃".as_bytes(); + /// let got: Vec<&[u8]> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec![ + /// // Writing byte string slices is just brutal. The problem is that + /// // b"foo" has type &[u8; 3] instead of &[u8]. + /// &[][..], &[b'\xE2'][..], &[b'\x98'][..], &[b'\x83'][..], &[][..], + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" ")?; + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" +")?; + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec!["", "a", "b", "c"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn split<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> Split<'r, 'h> { + Split { finder: self.find_iter(input), last: 0 } + } + + /// Returns an iterator of at most `limit` spans of the haystack given, + /// delimited by a match of the regex. (A `limit` of `0` will return no + /// spans.) Namely, each element of the iterator corresponds to a part + /// of the haystack that *isn't* matched by the regular expression. The + /// remainder of the haystack that is not split will be the last element in + /// the iterator. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = "Hey! How are you?"; + /// let fields: Vec<&str> = + /// re.splitn(hay, 3).map(|span| &hay[span]).collect(); + /// assert_eq!(fields, vec!["Hey", "How", "are you?"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" ")?; + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a little lamb"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = ""; + /// let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "", "tigerXleopard"]); + /// + /// let re = Regex::new(r"::")?; + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.splitn(hay, 2).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "tiger::leopard"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 1).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["abcXdef"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "abcdef"; + /// let got: Vec<&str> = re.splitn(hay, 2).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["abcdef"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 0).map(|sp| &hay[sp]).collect(); + /// assert!(got.is_empty()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn splitn<'r, 'h, I: Into>>( + &'r self, + input: I, + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { splits: self.split(input), limit } + } +} + +/// Lower level search routines that give more control. +impl Regex { + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// This is like [`Regex::find`] but, but it accepts a concrete `&Input` + /// instead of an `Into`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!(Some(Match::must(0, 29..36)), re.search(&input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search(&self, input: &Input<'_>) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.search(&mut guard, input); + // We do this dance with the guard and explicitly put it back in the + // pool because it seems to result in better codegen. If we let the + // guard's Drop impl put it back in the pool, then functions like + // ptr::drop_in_place get called and they *don't* get inlined. This + // isn't usually a big deal, but in latency sensitive benchmarks the + // extra function call can matter. + // + // I used `rebar measure -f '^grep/every-line$' -e meta` to measure + // the effects here. + // + // Note that this doesn't eliminate the latency effects of using the + // pool. There is still some (minor) cost for the "thread owner" of the + // pool. (i.e., The thread that first calls a regex search routine.) + // However, for other threads using the regex, the pool access can be + // quite expensive as it goes through a mutex. Callers can avoid this + // by either cloning the Regex (which creates a distinct copy of the + // pool), or callers can use the lower level APIs that accept a 'Cache' + // directly and do their own handling. + PoolGuard::put(guard); + result + } + + /// Returns the end offset of the leftmost match. If no match exists, then + /// `None` is returned. + /// + /// This is distinct from [`Regex::search`] in that it only returns the end + /// of a match and not the start of the match. Depending on a variety of + /// implementation details, this _may_ permit the regex engine to do less + /// overall work. For example, if a DFA is being used to execute a search, + /// then the start of a match usually requires running a separate DFA in + /// reverse to the find the start of a match. If one only needs the end of + /// a match, then the separate reverse scan to find the start of a match + /// can be skipped. (Note that the reverse scan is avoided even when using + /// `Regex::search` when possible, for example, in the case of an anchored + /// search.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, HalfMatch}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!(Some(HalfMatch::must(0, 36)), re.search_half(&input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_half(&self, input: &Input<'_>) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.search_half(&mut guard, input); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// This is like [`Regex::captures`], but it accepts a concrete `&Input` + /// instead of an `Into`. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-pattern `Regex` that permits + /// searching for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// Anchored, Match, PatternID, Input, + /// }; + /// + /// let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let mut caps = re.create_captures(); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.search_captures(&Input::new(haystack), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.search_captures(&input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match, Input}; + /// + /// let re = Regex::new(r"\b[0-9]{3}\b")?; + /// let mut caps = re.create_captures(); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// let input = Input::new(&haystack[3..6]); + /// re.search_captures(&input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6); + /// re.search_captures(&input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_captures(&self, input: &Input<'_>, caps: &mut Captures) { + caps.set_pattern(None); + let pid = self.search_slots(input, caps.slots_mut()); + caps.set_pattern(pid); + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided `slots`, and + /// returns the matching pattern ID. The contents of the slots for patterns + /// other than the matching pattern are unspecified. If no match was found, + /// then `None` is returned and the contents of `slots` is unspecified. + /// + /// This is like [`Regex::search`], but it accepts a raw slots slice + /// instead of a `Captures` value. This is useful in contexts where you + /// don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with [`pattern_len() * 2`](Regex::pattern_len) slots, if you + /// only care about the overall match spans for each matching pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, PatternID, Input}; + /// + /// let re = Regex::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.search_slots(&input, &mut slots); + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_slots( + &self, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.search_slots(&mut guard, input, slots); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and this `Regex` was configured with [`MatchKind::All`] + /// semantics, then all matching patterns are written to the given set. + /// + /// Unless all of the patterns in this `Regex` are anchored, then generally + /// speaking, this will scan the entire haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// It is important that we configure the `Regex` with [`MatchKind::All`] + /// semantics here, or else overlapping matches will not be reported. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Input, MatchKind, PatternSet}; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let re = Regex::builder() + /// .configure(Regex::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(re.pattern_len()); + /// re.which_overlapping_matches(&input, &mut patset); + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if self.imp.info.is_impossible(input) { + return; + } + let mut guard = self.pool.get(); + let result = self + .imp + .strat + .which_overlapping_matches(&mut guard, input, patset); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } +} + +/// Lower level search routines that give more control, and require the caller +/// to provide an explicit [`Cache`] parameter. +impl Regex { + /// This is like [`Regex::search`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!( + /// Some(Match::must(0, 29..36)), + /// re.search_with(&mut cache, &input), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + self.imp.strat.search(cache, input) + } + + /// This is like [`Regex::search_half`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, HalfMatch}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!( + /// Some(HalfMatch::must(0, 36)), + /// re.search_half_with(&mut cache, &input), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_half_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + self.imp.strat.search_half(cache, input) + } + + /// This is like [`Regex::search_captures`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-pattern `Regex` that permits + /// searching for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// Anchored, Match, PatternID, Input, + /// }; + /// + /// let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.search_captures_with(&mut cache, &Input::new(haystack), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.search_captures_with(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match, Input}; + /// + /// let re = Regex::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// let input = Input::new(&haystack[3..6]); + /// re.search_captures_with(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6); + /// re.search_captures_with(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_captures_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) { + caps.set_pattern(None); + let pid = self.search_slots_with(cache, input, caps.slots_mut()); + caps.set_pattern(pid); + } + + /// This is like [`Regex::search_slots`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, PatternID, Input}; + /// + /// let re = Regex::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.search_slots_with(&mut cache, &input, &mut slots); + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_slots_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + self.imp.strat.search_slots(cache, input, slots) + } + + /// This is like [`Regex::which_overlapping_matches`], but requires the + /// caller to explicitly pass a [`Cache`]. + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Why pass a `Cache` explicitly? + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Input, MatchKind, PatternSet}; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let re = Regex::builder() + /// .configure(Regex::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(re.pattern_len()); + /// re.which_overlapping_matches_with(&mut cache, &input, &mut patset); + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn which_overlapping_matches_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if self.imp.info.is_impossible(input) { + return; + } + self.imp.strat.which_overlapping_matches(cache, input, patset) + } +} + +/// Various non-search routines for querying properties of a `Regex` and +/// convenience routines for creating [`Captures`] and [`Cache`] values. +impl Regex { + /// Creates a new object for recording capture group offsets. This is used + /// in search APIs like [`Regex::captures`] and [`Regex::search_captures`]. + /// + /// This is a convenience routine for + /// `Captures::all(re.group_info().clone())`. Callers may build other types + /// of `Captures` values that record less information (and thus require + /// less work from the regex engine) using [`Captures::matches`] and + /// [`Captures::empty`]. + /// + /// # Example + /// + /// This shows some alternatives to [`Regex::create_captures`]: + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// util::captures::Captures, + /// Match, PatternID, Span, + /// }; + /// + /// let re = Regex::new(r"(?[A-Z][a-z]+) (?[A-Z][a-z]+)")?; + /// + /// // This is equivalent to Regex::create_captures. It stores matching + /// // offsets for all groups in the regex. + /// let mut all = Captures::all(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut all); + /// assert_eq!(Some(Match::must(0, 0..17)), all.get_match()); + /// assert_eq!(Some(Span::from(0..5)), all.get_group_by_name("first")); + /// assert_eq!(Some(Span::from(6..17)), all.get_group_by_name("last")); + /// + /// // In this version, we only care about the implicit groups, which + /// // means offsets for the explicit groups will be unavailable. It can + /// // sometimes be faster to ask for fewer groups, since the underlying + /// // regex engine needs to do less work to keep track of them. + /// let mut matches = Captures::matches(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut matches); + /// // We still get the overall match info. + /// assert_eq!(Some(Match::must(0, 0..17)), matches.get_match()); + /// // But now the explicit groups are unavailable. + /// assert_eq!(None, matches.get_group_by_name("first")); + /// assert_eq!(None, matches.get_group_by_name("last")); + /// + /// // Finally, in this version, we don't ask to keep track of offsets for + /// // *any* groups. All we get back is whether a match occurred, and if + /// // so, the ID of the pattern that matched. + /// let mut empty = Captures::empty(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut empty); + /// // it's a match! + /// assert!(empty.is_match()); + /// // for pattern ID 0 + /// assert_eq!(Some(PatternID::ZERO), empty.pattern()); + /// // Match offsets are unavailable. + /// assert_eq!(None, empty.get_match()); + /// // And of course, explicit groups are unavailable too. + /// assert_eq!(None, empty.get_group_by_name("first")); + /// assert_eq!(None, empty.get_group_by_name("last")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn create_captures(&self) -> Captures { + Captures::all(self.group_info().clone()) + } + + /// Creates a new cache for use with lower level search APIs like + /// [`Regex::search_with`]. + /// + /// The cache returned should only be used for searches for this `Regex`. + /// If you want to reuse the cache for another `Regex`, then you must call + /// [`Cache::reset`] with that `Regex`. + /// + /// This is a convenience routine for [`Cache::new`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"(?-u)m\w+\s+m\w+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("crazy janey and her mission man"); + /// assert_eq!( + /// Some(Match::must(0, 20..31)), + /// re.search_with(&mut cache, &input), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn create_cache(&self) -> Cache { + self.imp.strat.create_cache() + } + + /// Returns the total number of patterns in this regex. + /// + /// The standard [`Regex::new`] constructor always results in a `Regex` + /// with a single pattern, but [`Regex::new_many`] permits building a + /// multi-pattern regex. + /// + /// A `Regex` guarantees that the maximum possible `PatternID` returned in + /// any match is `Regex::pattern_len() - 1`. In the case where the number + /// of patterns is `0`, a match is impossible. + /// + /// # Example + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"(?m)^[a-z]$")?; + /// assert_eq!(1, re.pattern_len()); + /// + /// let re = Regex::new_many::<&str>(&[])?; + /// assert_eq!(0, re.pattern_len()); + /// + /// let re = Regex::new_many(&["a", "b", "c"])?; + /// assert_eq!(3, re.pattern_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.imp.info.pattern_len() + } + + /// Returns the total number of capturing groups. + /// + /// This includes the implicit capturing group corresponding to the + /// entire match. Therefore, the minimum value returned is `1`. + /// + /// # Example + /// + /// This shows a few patterns and how many capture groups they have. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.captures_len()) + /// }; + /// + /// assert_eq!(1, len("a")?); + /// assert_eq!(2, len("(a)")?); + /// assert_eq!(3, len("(a)|(b)")?); + /// assert_eq!(5, len("(a)(b)|(c)(d)")?); + /// assert_eq!(2, len("(a)|b")?); + /// assert_eq!(2, len("a|(b)")?); + /// assert_eq!(2, len("(b)*")?); + /// assert_eq!(2, len("(b)+")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: multiple patterns + /// + /// This routine also works for multiple patterns. The total number is + /// the sum of the capture groups of each pattern. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |patterns| { + /// Regex::new_many(patterns).map(|re| re.captures_len()) + /// }; + /// + /// assert_eq!(2, len(&["a", "b"])?); + /// assert_eq!(4, len(&["(a)", "(b)"])?); + /// assert_eq!(6, len(&["(a)|(b)", "(c)|(d)"])?); + /// assert_eq!(8, len(&["(a)(b)|(c)(d)", "(x)(y)"])?); + /// assert_eq!(3, len(&["(a)", "b"])?); + /// assert_eq!(3, len(&["a", "(b)"])?); + /// assert_eq!(4, len(&["(a)", "(b)*"])?); + /// assert_eq!(4, len(&["(a)+", "(b)+"])?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn captures_len(&self) -> usize { + self.imp + .info + .props_union() + .explicit_captures_len() + .saturating_add(self.pattern_len()) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: multiple patterns + /// + /// This property extends to regexes with multiple patterns as well. In + /// order for their to be a static number of capture groups in this case, + /// every pattern must have the same static number. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |patterns| { + /// Regex::new_many(patterns).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len(&["a", "b"])?); + /// assert_eq!(Some(2), len(&["(a)", "(b)"])?); + /// assert_eq!(Some(2), len(&["(a)|(b)", "(c)|(d)"])?); + /// assert_eq!(Some(3), len(&["(a)(b)|(c)(d)", "(x)(y)"])?); + /// assert_eq!(None, len(&["(a)", "b"])?); + /// assert_eq!(None, len(&["a", "(b)"])?); + /// assert_eq!(None, len(&["(a)", "(b)*"])?); + /// assert_eq!(Some(2), len(&["(a)+", "(b)+"])?); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option { + self.imp + .info + .props_union() + .static_explicit_captures_len() + .map(|len| len.saturating_add(1)) + } + + /// Return information about the capture groups in this `Regex`. + /// + /// A `GroupInfo` is an immutable object that can be cheaply cloned. It + /// is responsible for maintaining a mapping between the capture groups + /// in the concrete syntax of zero or more regex patterns and their + /// internal representation used by some of the regex matchers. It is also + /// responsible for maintaining a mapping between the name of each group + /// (if one exists) and its corresponding group index. + /// + /// A `GroupInfo` is ultimately what is used to build a [`Captures`] value, + /// which is some mutable space where group offsets are stored as a result + /// of a search. + /// + /// # Example + /// + /// This shows some alternatives to [`Regex::create_captures`]: + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// util::captures::Captures, + /// Match, PatternID, Span, + /// }; + /// + /// let re = Regex::new(r"(?[A-Z][a-z]+) (?[A-Z][a-z]+)")?; + /// + /// // This is equivalent to Regex::create_captures. It stores matching + /// // offsets for all groups in the regex. + /// let mut all = Captures::all(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut all); + /// assert_eq!(Some(Match::must(0, 0..17)), all.get_match()); + /// assert_eq!(Some(Span::from(0..5)), all.get_group_by_name("first")); + /// assert_eq!(Some(Span::from(6..17)), all.get_group_by_name("last")); + /// + /// // In this version, we only care about the implicit groups, which + /// // means offsets for the explicit groups will be unavailable. It can + /// // sometimes be faster to ask for fewer groups, since the underlying + /// // regex engine needs to do less work to keep track of them. + /// let mut matches = Captures::matches(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut matches); + /// // We still get the overall match info. + /// assert_eq!(Some(Match::must(0, 0..17)), matches.get_match()); + /// // But now the explicit groups are unavailable. + /// assert_eq!(None, matches.get_group_by_name("first")); + /// assert_eq!(None, matches.get_group_by_name("last")); + /// + /// // Finally, in this version, we don't ask to keep track of offsets for + /// // *any* groups. All we get back is whether a match occurred, and if + /// // so, the ID of the pattern that matched. + /// let mut empty = Captures::empty(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut empty); + /// // it's a match! + /// assert!(empty.is_match()); + /// // for pattern ID 0 + /// assert_eq!(Some(PatternID::ZERO), empty.pattern()); + /// // Match offsets are unavailable. + /// assert_eq!(None, empty.get_match()); + /// // And of course, explicit groups are unavailable too. + /// assert_eq!(None, empty.get_group_by_name("first")); + /// assert_eq!(None, empty.get_group_by_name("last")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn group_info(&self) -> &GroupInfo { + self.imp.strat.group_info() + } + + /// Returns the configuration object used to build this `Regex`. + /// + /// If no configuration object was explicitly passed, then the + /// configuration returned represents the default. + #[inline] + pub fn get_config(&self) -> &Config { + self.imp.info.config() + } + + /// Returns true if this regex has a high chance of being "accelerated." + /// + /// The precise meaning of "accelerated" is specifically left unspecified, + /// but the general meaning is that the search is a high likelihood of + /// running faster than than a character-at-a-time loop inside a standard + /// regex engine. + /// + /// When a regex is accelerated, it is only a *probabilistic* claim. That + /// is, just because the regex is believed to be accelerated, that doesn't + /// mean it will definitely execute searches very fast. Similarly, if a + /// regex is *not* accelerated, that is also a probabilistic claim. That + /// is, a regex for which `is_accelerated` returns `false` could still run + /// searches more quickly than a regex for which `is_accelerated` returns + /// `true`. + /// + /// Whether a regex is marked as accelerated or not is dependent on + /// implementations details that may change in a semver compatible release. + /// That is, a regex that is accelerated in a `x.y.1` release might not be + /// accelerated in a `x.y.2` release. + /// + /// Basically, the value of acceleration boils down to a hedge: a hodge + /// podge of internal heuristics combine to make a probabilistic guess + /// that this regex search may run "fast." The value in knowing this from + /// a caller's perspective is that it may act as a signal that no further + /// work should be done to accelerate a search. For example, a grep-like + /// tool might try to do some extra work extracting literals from a regex + /// to create its own heuristic acceleration strategies. But it might + /// choose to defer to this crate's acceleration strategy if one exists. + /// This routine permits querying whether such a strategy is active for a + /// particular regex. + /// + /// # Example + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// // A simple literal is very likely to be accelerated. + /// let re = Regex::new(r"foo")?; + /// assert!(re.is_accelerated()); + /// + /// // A regex with no literals is likely to not be accelerated. + /// let re = Regex::new(r"\w")?; + /// assert!(!re.is_accelerated()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_accelerated(&self) -> bool { + self.imp.strat.is_accelerated() + } + + /// Return the total approximate heap memory, in bytes, used by this `Regex`. + /// + /// Note that currently, there is no high level configuration for setting + /// a limit on the specific value returned by this routine. Instead, the + /// following routines can be used to control heap memory at a bit of a + /// lower level: + /// + /// * [`Config::nfa_size_limit`] controls how big _any_ of the NFAs are + /// allowed to be. + /// * [`Config::onepass_size_limit`] controls how big the one-pass DFA is + /// allowed to be. + /// * [`Config::hybrid_cache_capacity`] controls how much memory the lazy + /// DFA is permitted to allocate to store its transition table. + /// * [`Config::dfa_size_limit`] controls how big a fully compiled DFA is + /// allowed to be. + /// * [`Config::dfa_state_limit`] controls the conditions under which the + /// meta regex engine will even attempt to build a fully compiled DFA. + #[inline] + pub fn memory_usage(&self) -> usize { + self.imp.strat.memory_usage() + } +} + +impl Clone for Regex { + fn clone(&self) -> Regex { + let imp = Arc::clone(&self.imp); + let pool = { + let strat = Arc::clone(&imp.strat); + let create: CachePoolFn = Box::new(move || strat.create_cache()); + Pool::new(create) + }; + Regex { imp, pool } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct RegexInfo(Arc); + +#[derive(Clone, Debug)] +struct RegexInfoI { + config: Config, + props: Vec, + props_union: hir::Properties, +} + +impl RegexInfo { + fn new(config: Config, hirs: &[&Hir]) -> RegexInfo { + // Collect all of the properties from each of the HIRs, and also + // union them into one big set of properties representing all HIRs + // as if they were in one big alternation. + let mut props = vec![]; + for hir in hirs.iter() { + props.push(hir.properties().clone()); + } + let props_union = hir::Properties::union(&props); + + RegexInfo(Arc::new(RegexInfoI { config, props, props_union })) + } + + pub(crate) fn config(&self) -> &Config { + &self.0.config + } + + pub(crate) fn props(&self) -> &[hir::Properties] { + &self.0.props + } + + pub(crate) fn props_union(&self) -> &hir::Properties { + &self.0.props_union + } + + pub(crate) fn pattern_len(&self) -> usize { + self.props().len() + } + + pub(crate) fn memory_usage(&self) -> usize { + self.props().iter().map(|p| p.memory_usage()).sum::() + + self.props_union().memory_usage() + } + + /// Returns true when the search is guaranteed to be anchored. That is, + /// when a match is reported, its offset is guaranteed to correspond to + /// the start of the search. + /// + /// This includes returning true when `input` _isn't_ anchored but the + /// underlying regex is. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_anchored_start(&self, input: &Input<'_>) -> bool { + input.get_anchored().is_anchored() || self.is_always_anchored_start() + } + + /// Returns true when this regex is always anchored to the start of a + /// search. And in particular, that regardless of an `Input` configuration, + /// if any match is reported it must start at `0`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_always_anchored_start(&self) -> bool { + use regex_syntax::hir::Look; + self.props_union().look_set_prefix().contains(Look::Start) + } + + /// Returns true when this regex is always anchored to the end of a + /// search. And in particular, that regardless of an `Input` configuration, + /// if any match is reported it must end at the end of the haystack. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_always_anchored_end(&self) -> bool { + use regex_syntax::hir::Look; + self.props_union().look_set_suffix().contains(Look::End) + } + + /// Returns true if and only if it is known that a match is impossible + /// for the given input. This is useful for short-circuiting and avoiding + /// running the regex engine if it's known no match can be reported. + /// + /// Note that this doesn't necessarily detect every possible case. For + /// example, when `pattern_len() == 0`, a match is impossible, but that + /// case is so rare that it's fine to be handled by the regex engine + /// itself. That is, it's not worth the cost of adding it here in order to + /// make it a little faster. The reason is that this is called for every + /// search. so there is some cost to adding checks here. Arguably, some of + /// the checks that are here already probably shouldn't be here... + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_impossible(&self, input: &Input<'_>) -> bool { + // The underlying regex is anchored, so if we don't start the search + // at position 0, a match is impossible, because the anchor can only + // match at position 0. + if input.start() > 0 && self.is_always_anchored_start() { + return true; + } + // Same idea, but for the end anchor. + if input.end() < input.haystack().len() + && self.is_always_anchored_end() + { + return true; + } + // If the haystack is smaller than the minimum length required, then + // we know there can be no match. + let minlen = match self.props_union().minimum_len() { + None => return false, + Some(minlen) => minlen, + }; + if input.get_span().len() < minlen { + return true; + } + // Same idea as minimum, but for maximum. This is trickier. We can + // only apply the maximum when we know the entire span that we're + // searching *has* to match according to the regex (and possibly the + // input configuration). If we know there is too much for the regex + // to match, we can bail early. + // + // I don't think we can apply the maximum otherwise unfortunately. + if self.is_anchored_start(input) && self.is_always_anchored_end() { + let maxlen = match self.props_union().maximum_len() { + None => return false, + Some(maxlen) => maxlen, + }; + if input.get_span().len() > maxlen { + return true; + } + } + false + } +} + +/// An iterator over all non-overlapping matches. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'h> { + re: &'r Regex, + cache: CachePoolGuard<'r>, + it: iter::Searcher<'h>, +} + +impl<'r, 'h> FindMatches<'r, 'h> { + /// Returns the `Regex` value that created this iterator. + #[inline] + pub fn regex(&self) -> &'r Regex { + self.re + } + + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.it.input() + } +} + +impl<'r, 'h> Iterator for FindMatches<'r, 'h> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + let FindMatches { re, ref mut cache, ref mut it } = *self; + it.advance(|input| Ok(re.search_with(cache, input))) + } + + #[inline] + fn count(self) -> usize { + // If all we care about is a count of matches, then we only need to + // find the end position of each match. This can give us a 2x perf + // boost in some cases, because it avoids needing to do a reverse scan + // to find the start of a match. + let FindMatches { re, mut cache, it } = self; + // This does the deref for PoolGuard once instead of every iter. + let cache = &mut *cache; + it.into_half_matches_iter( + |input| Ok(re.search_half_with(cache, input)), + ) + .count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for FindMatches<'r, 'h> {} + +/// An iterator over all non-overlapping leftmost matches with their capturing +/// groups. +/// +/// The iterator yields a [`Captures`] value until no more matches could be +/// found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::captures_iter`] method. +#[derive(Debug)] +pub struct CapturesMatches<'r, 'h> { + re: &'r Regex, + cache: CachePoolGuard<'r>, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'h> CapturesMatches<'r, 'h> { + /// Returns the `Regex` value that created this iterator. + #[inline] + pub fn regex(&self) -> &'r Regex { + self.re + } + + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.it.input() + } +} + +impl<'r, 'h> Iterator for CapturesMatches<'r, 'h> { + type Item = Captures; + + #[inline] + fn next(&mut self) -> Option { + // Splitting 'self' apart seems necessary to appease borrowck. + let CapturesMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + let _ = it.advance(|input| { + re.search_captures_with(cache, input, caps); + Ok(caps.get_match()) + }); + if caps.is_match() { + Some(caps.clone()) + } else { + None + } + } + + #[inline] + fn count(self) -> usize { + let CapturesMatches { re, mut cache, it, .. } = self; + // This does the deref for PoolGuard once instead of every iter. + let cache = &mut *cache; + it.into_half_matches_iter( + |input| Ok(re.search_half_with(cache, input)), + ) + .count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CapturesMatches<'r, 'h> {} + +/// Yields all substrings delimited by a regular expression match. +/// +/// The spans correspond to the offsets between matches. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::split`] method. +#[derive(Debug)] +pub struct Split<'r, 'h> { + finder: FindMatches<'r, 'h>, + last: usize, +} + +impl<'r, 'h> Split<'r, 'h> { + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.finder.input() + } +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = Span; + + fn next(&mut self) -> Option { + match self.finder.next() { + None => { + let len = self.finder.it.input().haystack().len(); + if self.last > len { + None + } else { + let span = Span::from(self.last..len); + self.last = len + 1; // Next call will return None + Some(span) + } + } + Some(m) => { + let span = Span::from(self.last..m.start()); + self.last = m.end(); + Some(span) + } + } + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// Yields at most `N` spans delimited by a regular expression match. +/// +/// The spans correspond to the offsets between matches. The last span will be +/// whatever remains after splitting. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::splitn`] method. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + splits: Split<'r, 'h>, + limit: usize, +} + +impl<'r, 'h> SplitN<'r, 'h> { + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.splits.input() + } +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = Span; + + fn next(&mut self) -> Option { + if self.limit == 0 { + return None; + } + + self.limit -= 1; + if self.limit > 0 { + return self.splits.next(); + } + + let len = self.splits.finder.it.input().haystack().len(); + if self.splits.last > len { + // We've already returned all substrings. + None + } else { + // self.n == 0, so future calls will return None immediately + Some(Span::from(self.splits.last..len)) + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.limit)) + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// Represents mutable scratch space used by regex engines during a search. +/// +/// Most of the regex engines in this crate require some kind of +/// mutable state in order to execute a search. This mutable state is +/// explicitly separated from the the core regex object (such as a +/// [`thompson::NFA`](crate::nfa::thompson::NFA)) so that the read-only regex +/// object can be shared across multiple threads simultaneously without any +/// synchronization. Conversely, a `Cache` must either be duplicated if using +/// the same `Regex` from multiple threads, or else there must be some kind of +/// synchronization that guarantees exclusive access while it's in use by one +/// thread. +/// +/// A `Regex` attempts to do this synchronization for you by using a thread +/// pool internally. Its size scales roughly with the number of simultaneous +/// regex searches. +/// +/// For cases where one does not want to rely on a `Regex`'s internal thread +/// pool, lower level routines such as [`Regex::search_with`] are provided +/// that permit callers to pass a `Cache` into the search routine explicitly. +/// +/// General advice is that the thread pool is often more than good enough. +/// However, it may be possible to observe the effects of its latency, +/// especially when searching many small haystacks from many threads +/// simultaneously. +/// +/// Caches can be created from their corresponding `Regex` via +/// [`Regex::create_cache`]. A cache can only be used with either the `Regex` +/// that created it, or the `Regex` that was most recently used to reset it +/// with [`Cache::reset`]. Using a cache with any other `Regex` may result in +/// panics or incorrect results. +/// +/// # Example +/// +/// ``` +/// use regex_automata::{meta::Regex, Input, Match}; +/// +/// let re = Regex::new(r"(?-u)m\w+\s+m\w+")?; +/// let mut cache = re.create_cache(); +/// let input = Input::new("crazy janey and her mission man"); +/// assert_eq!( +/// Some(Match::must(0, 20..31)), +/// re.search_with(&mut cache, &input), +/// ); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug, Clone)] +pub struct Cache { + pub(crate) capmatches: Captures, + pub(crate) pikevm: wrappers::PikeVMCache, + pub(crate) backtrack: wrappers::BoundedBacktrackerCache, + pub(crate) onepass: wrappers::OnePassCache, + pub(crate) hybrid: wrappers::HybridCache, + pub(crate) revhybrid: wrappers::ReverseHybridCache, +} + +impl Cache { + /// Creates a new `Cache` for use with this regex. + /// + /// The cache returned should only be used for searches for the given + /// `Regex`. If you want to reuse the cache for another `Regex`, then you + /// must call [`Cache::reset`] with that `Regex`. + pub fn new(re: &Regex) -> Cache { + re.create_cache() + } + + /// Reset this cache such that it can be used for searching with the given + /// `Regex` (and only that `Regex`). + /// + /// A cache reset permits potentially reusing memory already allocated in + /// this cache with a different `Regex`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `Regex`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match, Input}; + /// + /// let re1 = Regex::new(r"\w")?; + /// let re2 = Regex::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.search_with(&mut cache, &Input::new("Δ")), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the Regex we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.search_with(&mut cache, &Input::new("☃")), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &Regex) { + re.imp.strat.reset_cache(self) + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + let mut bytes = 0; + bytes += self.pikevm.memory_usage(); + bytes += self.backtrack.memory_usage(); + bytes += self.onepass.memory_usage(); + bytes += self.hybrid.memory_usage(); + bytes += self.revhybrid.memory_usage(); + bytes + } +} + +/// An object describing the configuration of a `Regex`. +/// +/// This configuration only includes options for the +/// non-syntax behavior of a `Regex`, and can be applied via the +/// [`Builder::configure`] method. For configuring the syntax options, see +/// [`util::syntax::Config`](crate::util::syntax::Config). +/// +/// # Example: lower the NFA size limit +/// +/// In some cases, the default size limit might be too big. The size limit can +/// be lowered, which will prevent large regex patterns from compiling. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::meta::Regex; +/// +/// let result = Regex::builder() +/// .configure(Regex::config().nfa_size_limit(Some(20 * (1<<10)))) +/// // Not even 20KB is enough to build a single large Unicode class! +/// .build(r"\pL"); +/// assert!(result.is_err()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug, Default)] +pub struct Config { + // As with other configuration types in this crate, we put all our knobs + // in options so that we can distinguish between "default" and "not set." + // This makes it possible to easily combine multiple configurations + // without default values overwriting explicitly specified values. See the + // 'overwrite' method. + // + // For docs on the fields below, see the corresponding method setters. + match_kind: Option, + utf8_empty: Option, + autopre: Option, + pre: Option>, + which_captures: Option, + nfa_size_limit: Option>, + onepass_size_limit: Option>, + hybrid_cache_capacity: Option, + hybrid: Option, + dfa: Option, + dfa_size_limit: Option>, + dfa_state_limit: Option>, + onepass: Option, + backtrack: Option, + byte_classes: Option, + line_terminator: Option, +} + +impl Config { + /// Create a new configuration object for a `Regex`. + pub fn new() -> Config { + Config::default() + } + + /// Set the match semantics for a `Regex`. + /// + /// The default value is [`MatchKind::LeftmostFirst`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match, MatchKind}; + /// + /// // By default, leftmost-first semantics are used, which + /// // disambiguates matches at the same position by selecting + /// // the one that corresponds earlier in the pattern. + /// let re = Regex::new("sam|samwise")?; + /// assert_eq!(Some(Match::must(0, 0..3)), re.find("samwise")); + /// + /// // But with 'all' semantics, match priority is ignored + /// // and all match states are included. When coupled with + /// // a leftmost search, the search will report the last + /// // possible match. + /// let re = Regex::builder() + /// .configure(Regex::config().match_kind(MatchKind::All)) + /// .build("sam|samwise")?; + /// assert_eq!(Some(Match::must(0, 0..7)), re.find("samwise")); + /// // Beware that this can lead to skipping matches! + /// // Usually 'all' is used for anchored reverse searches + /// // only, or for overlapping searches. + /// assert_eq!(Some(Match::must(0, 4..11)), re.find("sam samwise")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn match_kind(self, kind: MatchKind) -> Config { + Config { match_kind: Some(kind), ..self } + } + + /// Toggles whether empty matches are permitted to occur between the code + /// units of a UTF-8 encoded codepoint. + /// + /// This should generally be enabled when search a `&str` or anything that + /// you otherwise know is valid UTF-8. It should be disabled in all other + /// cases. Namely, if the haystack is not valid UTF-8 and this is enabled, + /// then behavior is unspecified. + /// + /// By default, this is enabled. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches only occur at the beginning and end of the snowman. + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 3..3), + /// ]); + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches now occur at every position! + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 1..1), + /// Match::must(0, 2..2), + /// Match::must(0, 3..3), + /// ]); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn utf8_empty(self, yes: bool) -> Config { + Config { utf8_empty: Some(yes), ..self } + } + + /// Toggles whether automatic prefilter support is enabled. + /// + /// If this is disabled and [`Config::prefilter`] is not set, then the + /// meta regex engine will not use any prefilters. This can sometimes + /// be beneficial in cases where you know (or have measured) that the + /// prefilter leads to overall worse search performance. + /// + /// By default, this is enabled. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().auto_prefilter(false)) + /// .build(r"Bruce \w+")?; + /// let hay = "Hello Bruce Springsteen!"; + /// assert_eq!(Some(Match::must(0, 6..23)), re.find(hay)); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn auto_prefilter(self, yes: bool) -> Config { + Config { autopre: Some(yes), ..self } + } + + /// Overrides and sets the prefilter to use inside a `Regex`. + /// + /// This permits one to forcefully set a prefilter in cases where the + /// caller knows better than whatever the automatic prefilter logic is + /// capable of. + /// + /// By default, this is set to `None` and an automatic prefilter will be + /// used if one could be built. (Assuming [`Config::auto_prefilter`] is + /// enabled, which it is by default.) + /// + /// # Example + /// + /// This example shows how to set your own prefilter. In the case of a + /// pattern like `Bruce \w+`, the automatic prefilter is likely to be + /// constructed in a way that it will look for occurrences of `Bruce `. + /// In most cases, this is the best choice. But in some cases, it may be + /// the case that running `memchr` on `B` is the best choice. One can + /// achieve that behavior by overriding the automatic prefilter logic + /// and providing a prefilter that just matches `B`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// meta::Regex, + /// util::prefilter::Prefilter, + /// Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["B"]) + /// .expect("a prefilter"); + /// let re = Regex::builder() + /// .configure(Regex::config().prefilter(Some(pre))) + /// .build(r"Bruce \w+")?; + /// let hay = "Hello Bruce Springsteen!"; + /// assert_eq!(Some(Match::must(0, 6..23)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: incorrect prefilters can lead to incorrect results! + /// + /// Be warned that setting an incorrect prefilter can lead to missed + /// matches. So if you use this option, ensure your prefilter can _never_ + /// report false negatives. (A false positive is, on the other hand, quite + /// okay and generally unavoidable.) + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// meta::Regex, + /// util::prefilter::Prefilter, + /// Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Z"]) + /// .expect("a prefilter"); + /// let re = Regex::builder() + /// .configure(Regex::config().prefilter(Some(pre))) + /// .build(r"Bruce \w+")?; + /// let hay = "Hello Bruce Springsteen!"; + /// // Oops! No match found, but there should be one! + /// assert_eq!(None, re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(self, pre: Option) -> Config { + Config { pre: Some(pre), ..self } + } + + /// Configures what kinds of groups are compiled as "capturing" in the + /// underlying regex engine. + /// + /// This is set to [`WhichCaptures::All`] by default. Callers may wish to + /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the + /// overhead of capture states for explicit groups. + /// + /// Note that another approach to avoiding the overhead of capture groups + /// is by using non-capturing groups in the regex pattern. That is, + /// `(?:a)` instead of `(a)`. This option is useful when you can't control + /// the concrete syntax but know that you don't need the underlying capture + /// states. For example, using `WhichCaptures::Implicit` will behave as if + /// all explicit capturing groups in the pattern were non-capturing. + /// + /// Setting this to `WhichCaptures::None` is usually not the right thing to + /// do. When no capture states are compiled, some regex engines (such as + /// the `PikeVM`) won't be able to report match offsets. This will manifest + /// as no match being found. + /// + /// # Example + /// + /// This example demonstrates how the results of capture groups can change + /// based on this option. First we show the default (all capture groups in + /// the pattern are capturing): + /// + /// ``` + /// use regex_automata::{meta::Regex, Match, Span}; + /// + /// let re = Regex::new(r"foo([0-9]+)bar")?; + /// let hay = "foo123bar"; + /// + /// let mut caps = re.create_captures(); + /// re.captures(hay, &mut caps); + /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0)); + /// assert_eq!(Some(Span::from(3..6)), caps.get_group(1)); + /// + /// Ok::<(), Box>(()) + /// ``` + /// + /// And now we show the behavior when we only include implicit capture + /// groups. In this case, we can only find the overall match span, but the + /// spans of any other explicit group don't exist because they are treated + /// as non-capturing. (In effect, when `WhichCaptures::Implicit` is used, + /// there is no real point in using [`Regex::captures`] since it will never + /// be able to report more information than [`Regex::find`].) + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// nfa::thompson::WhichCaptures, + /// Match, + /// Span, + /// }; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().which_captures(WhichCaptures::Implicit)) + /// .build(r"foo([0-9]+)bar")?; + /// let hay = "foo123bar"; + /// + /// let mut caps = re.create_captures(); + /// re.captures(hay, &mut caps); + /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0)); + /// assert_eq!(None, caps.get_group(1)); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config { + self.which_captures = Some(which_captures); + self + } + + /// Sets the size limit, in bytes, to enforce on the construction of every + /// NFA build by the meta regex engine. + /// + /// Setting it to `None` disables the limit. This is not recommended if + /// you're compiling untrusted patterns. + /// + /// Note that this limit is applied to _each_ NFA built, and if any of + /// them exceed the limit, then construction will fail. This limit does + /// _not_ correspond to the total memory used by all NFAs in the meta regex + /// engine. + /// + /// This defaults to some reasonable number that permits most reasonable + /// patterns. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().nfa_size_limit(Some(20 * (1<<10)))) + /// // Not even 20KB is enough to build a single large Unicode class! + /// .build(r"\pL"); + /// assert!(result.is_err()); + /// + /// // But notice that building such a regex with the exact same limit + /// // can succeed depending on other aspects of the configuration. For + /// // example, a single *forward* NFA will (at time of writing) fit into + /// // the 20KB limit, but a *reverse* NFA of the same pattern will not. + /// // So if one configures a meta regex such that a reverse NFA is never + /// // needed and thus never built, then the 20KB limit will be enough for + /// // a pattern like \pL! + /// let result = Regex::builder() + /// .configure(Regex::config() + /// .nfa_size_limit(Some(20 * (1<<10))) + /// // The DFAs are the only thing that (currently) need a reverse + /// // NFA. So if both are disabled, the meta regex engine will + /// // skip building the reverse NFA. Note that this isn't an API + /// // guarantee. A future semver compatible version may introduce + /// // new use cases for a reverse NFA. + /// .hybrid(false) + /// .dfa(false) + /// ) + /// // Not even 20KB is enough to build a single large Unicode class! + /// .build(r"\pL"); + /// assert!(result.is_ok()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn nfa_size_limit(self, limit: Option) -> Config { + Config { nfa_size_limit: Some(limit), ..self } + } + + /// Sets the size limit, in bytes, for the one-pass DFA. + /// + /// Setting it to `None` disables the limit. Disabling the limit is + /// strongly discouraged when compiling untrusted patterns. Even if the + /// patterns are trusted, it still may not be a good idea, since a one-pass + /// DFA can use a lot of memory. With that said, as the size of a regex + /// increases, the likelihood of it being one-pass likely decreases. + /// + /// This defaults to some reasonable number that permits most reasonable + /// one-pass patterns. + /// + /// # Example + /// + /// This shows how to set the one-pass DFA size limit. Note that since + /// a one-pass DFA is an optional component of the meta regex engine, + /// this size limit only impacts what is built internally and will never + /// determine whether a `Regex` itself fails to build. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().onepass_size_limit(Some(2 * (1<<20)))) + /// .build(r"\pL{5}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn onepass_size_limit(self, limit: Option) -> Config { + Config { onepass_size_limit: Some(limit), ..self } + } + + /// Set the cache capacity, in bytes, for the lazy DFA. + /// + /// The cache capacity of the lazy DFA determines approximately how much + /// heap memory it is allowed to use to store its state transitions. The + /// state transitions are computed at search time, and if the cache fills + /// up it, it is cleared. At this point, any previously generated state + /// transitions are lost and are re-generated if they're needed again. + /// + /// This sort of cache filling and clearing works quite well _so long as + /// cache clearing happens infrequently_. If it happens too often, then the + /// meta regex engine will stop using the lazy DFA and switch over to a + /// different regex engine. + /// + /// In cases where the cache is cleared too often, it may be possible to + /// give the cache more space and reduce (or eliminate) how often it is + /// cleared. Similarly, sometimes a regex is so big that the lazy DFA isn't + /// used at all if its cache capacity isn't big enough. + /// + /// The capacity set here is a _limit_ on how much memory is used. The + /// actual memory used is only allocated as it's needed. + /// + /// Determining the right value for this is a little tricky and will likely + /// required some profiling. Enabling the `logging` feature and setting the + /// log level to `trace` will also tell you how often the cache is being + /// cleared. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().hybrid_cache_capacity(20 * (1<<20))) + /// .build(r"\pL{5}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn hybrid_cache_capacity(self, limit: usize) -> Config { + Config { hybrid_cache_capacity: Some(limit), ..self } + } + + /// Sets the size limit, in bytes, for heap memory used for a fully + /// compiled DFA. + /// + /// **NOTE:** If you increase this, you'll likely also need to increase + /// [`Config::dfa_state_limit`]. + /// + /// In contrast to the lazy DFA, building a full DFA requires computing + /// all of its state transitions up front. This can be a very expensive + /// process, and runs in worst case `2^n` time and space (where `n` is + /// proportional to the size of the regex). However, a full DFA unlocks + /// some additional optimization opportunities. + /// + /// Because full DFAs can be so expensive, the default limits for them are + /// incredibly small. Generally speaking, if your regex is moderately big + /// or if you're using Unicode features (`\w` is Unicode-aware by default + /// for example), then you can expect that the meta regex engine won't even + /// attempt to build a DFA for it. + /// + /// If this and [`Config::dfa_state_limit`] are set to `None`, then the + /// meta regex will not use any sort of limits when deciding whether to + /// build a DFA. This in turn makes construction of a `Regex` take + /// worst case exponential time and space. Even short patterns can result + /// in huge space blow ups. So it is strongly recommended to keep some kind + /// of limit set! + /// + /// The default is set to a small number that permits some simple regexes + /// to get compiled into DFAs in reasonable time. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// // 100MB is much bigger than the default. + /// .configure(Regex::config() + /// .dfa_size_limit(Some(100 * (1<<20))) + /// // We don't care about size too much here, so just + /// // remove the NFA state limit altogether. + /// .dfa_state_limit(None)) + /// .build(r"\pL{5}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn dfa_size_limit(self, limit: Option) -> Config { + Config { dfa_size_limit: Some(limit), ..self } + } + + /// Sets a limit on the total number of NFA states, beyond which, a full + /// DFA is not attempted to be compiled. + /// + /// This limit works in concert with [`Config::dfa_size_limit`]. Namely, + /// where as `Config::dfa_size_limit` is applied by attempting to construct + /// a DFA, this limit is used to avoid the attempt in the first place. This + /// is useful to avoid hefty initialization costs associated with building + /// a DFA for cases where it is obvious the DFA will ultimately be too big. + /// + /// By default, this is set to a very small number. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config() + /// // Sometimes the default state limit rejects DFAs even + /// // if they would fit in the size limit. Here, we disable + /// // the check on the number of NFA states and just rely on + /// // the size limit. + /// .dfa_state_limit(None)) + /// .build(r"(?-u)\w{30}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn dfa_state_limit(self, limit: Option) -> Config { + Config { dfa_state_limit: Some(limit), ..self } + } + + /// Whether to attempt to shrink the size of the alphabet for the regex + /// pattern or not. When enabled, the alphabet is shrunk into a set of + /// equivalence classes, where every byte in the same equivalence class + /// cannot discriminate between a match or non-match. + /// + /// **WARNING:** This is only useful for debugging DFAs. Disabling this + /// does not yield any speed advantages. Indeed, disabling it can result + /// in much higher memory usage. Disabling byte classes is useful for + /// debugging the actual generated transitions because it lets one see the + /// transitions defined on actual bytes instead of the equivalence classes. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging the meta regex engine's internals. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().byte_classes(false)) + /// .build(r"[a-z]+")?; + /// let hay = "!!quux!!"; + /// assert_eq!(Some(Match::must(0, 2..6)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn byte_classes(self, yes: bool) -> Config { + Config { byte_classes: Some(yes), ..self } + } + + /// Set the line terminator to be used by the `^` and `$` anchors in + /// multi-line mode. + /// + /// This option has no effect when CRLF mode is enabled. That is, + /// regardless of this setting, `(?Rm:^)` and `(?Rm:$)` will always treat + /// `\r` and `\n` as line terminators (and will never match between a `\r` + /// and a `\n`). + /// + /// By default, `\n` is the line terminator. + /// + /// **Warning**: This does not change the behavior of `.`. To do that, + /// you'll need to configure the syntax option + /// [`syntax::Config::line_terminator`](crate::util::syntax::Config::line_terminator) + /// in addition to this. Otherwise, `.` will continue to match any + /// character other than `\n`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().multi_line(true)) + /// .configure(Regex::config().line_terminator(b'\x00')) + /// .build(r"^foo$")?; + /// let hay = "\x00foo\x00"; + /// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn line_terminator(self, byte: u8) -> Config { + Config { line_terminator: Some(byte), ..self } + } + + /// Toggle whether the hybrid NFA/DFA (also known as the "lazy DFA") should + /// be available for use by the meta regex engine. + /// + /// Enabling this does not necessarily mean that the lazy DFA will + /// definitely be used. It just means that it will be _available_ for use + /// if the meta regex engine thinks it will be useful. + /// + /// When the `hybrid` crate feature is enabled, then this is enabled by + /// default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn hybrid(self, yes: bool) -> Config { + Config { hybrid: Some(yes), ..self } + } + + /// Toggle whether a fully compiled DFA should be available for use by the + /// meta regex engine. + /// + /// Enabling this does not necessarily mean that a DFA will definitely be + /// used. It just means that it will be _available_ for use if the meta + /// regex engine thinks it will be useful. + /// + /// When the `dfa-build` crate feature is enabled, then this is enabled by + /// default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn dfa(self, yes: bool) -> Config { + Config { dfa: Some(yes), ..self } + } + + /// Toggle whether a one-pass DFA should be available for use by the meta + /// regex engine. + /// + /// Enabling this does not necessarily mean that a one-pass DFA will + /// definitely be used. It just means that it will be _available_ for + /// use if the meta regex engine thinks it will be useful. (Indeed, a + /// one-pass DFA can only be used when the regex is one-pass. See the + /// [`dfa::onepass`](crate::dfa::onepass) module for more details.) + /// + /// When the `dfa-onepass` crate feature is enabled, then this is enabled + /// by default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn onepass(self, yes: bool) -> Config { + Config { onepass: Some(yes), ..self } + } + + /// Toggle whether a bounded backtracking regex engine should be available + /// for use by the meta regex engine. + /// + /// Enabling this does not necessarily mean that a bounded backtracker will + /// definitely be used. It just means that it will be _available_ for use + /// if the meta regex engine thinks it will be useful. + /// + /// When the `nfa-backtrack` crate feature is enabled, then this is enabled + /// by default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn backtrack(self, yes: bool) -> Config { + Config { backtrack: Some(yes), ..self } + } + + /// Returns the match kind on this configuration, as set by + /// [`Config::match_kind`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns whether empty matches must fall on valid UTF-8 boundaries, as + /// set by [`Config::utf8_empty`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_utf8_empty(&self) -> bool { + self.utf8_empty.unwrap_or(true) + } + + /// Returns whether automatic prefilters are enabled, as set by + /// [`Config::auto_prefilter`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_auto_prefilter(&self) -> bool { + self.autopre.unwrap_or(true) + } + + /// Returns a manually set prefilter, if one was set by + /// [`Config::prefilter`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns the capture configuration, as set by + /// [`Config::which_captures`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_which_captures(&self) -> WhichCaptures { + self.which_captures.unwrap_or(WhichCaptures::All) + } + + /// Returns NFA size limit, as set by [`Config::nfa_size_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_nfa_size_limit(&self) -> Option { + self.nfa_size_limit.unwrap_or(Some(10 * (1 << 20))) + } + + /// Returns one-pass DFA size limit, as set by + /// [`Config::onepass_size_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_onepass_size_limit(&self) -> Option { + self.onepass_size_limit.unwrap_or(Some(1 * (1 << 20))) + } + + /// Returns hybrid NFA/DFA cache capacity, as set by + /// [`Config::hybrid_cache_capacity`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_hybrid_cache_capacity(&self) -> usize { + self.hybrid_cache_capacity.unwrap_or(2 * (1 << 20)) + } + + /// Returns DFA size limit, as set by [`Config::dfa_size_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_dfa_size_limit(&self) -> Option { + // The default for this is VERY small because building a full DFA is + // ridiculously costly. But for regexes that are very small, it can be + // beneficial to use a full DFA. In particular, a full DFA can enable + // additional optimizations via something called "accelerated" states. + // Namely, when there's a state with only a few outgoing transitions, + // we can temporary suspend walking the transition table and use memchr + // for just those outgoing transitions to skip ahead very quickly. + // + // Generally speaking, if Unicode is enabled in your regex and you're + // using some kind of Unicode feature, then it's going to blow this + // size limit. Moreover, Unicode tends to defeat the "accelerated" + // state optimization too, so it's a double whammy. + // + // We also use a limit on the number of NFA states to avoid even + // starting the DFA construction process. Namely, DFA construction + // itself could make lots of initial allocs proportional to the size + // of the NFA, and if the NFA is large, it doesn't make sense to pay + // that cost if we know it's likely to be blown by a large margin. + self.dfa_size_limit.unwrap_or(Some(40 * (1 << 10))) + } + + /// Returns DFA size limit in terms of the number of states in the NFA, as + /// set by [`Config::dfa_state_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_dfa_state_limit(&self) -> Option { + // Again, as with the size limit, we keep this very small. + self.dfa_state_limit.unwrap_or(Some(30)) + } + + /// Returns whether byte classes are enabled, as set by + /// [`Config::byte_classes`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns the line terminator for this configuration, as set by + /// [`Config::line_terminator`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_line_terminator(&self) -> u8 { + self.line_terminator.unwrap_or(b'\n') + } + + /// Returns whether the hybrid NFA/DFA regex engine may be used, as set by + /// [`Config::hybrid`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_hybrid(&self) -> bool { + #[cfg(feature = "hybrid")] + { + self.hybrid.unwrap_or(true) + } + #[cfg(not(feature = "hybrid"))] + { + false + } + } + + /// Returns whether the DFA regex engine may be used, as set by + /// [`Config::dfa`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_dfa(&self) -> bool { + #[cfg(feature = "dfa-build")] + { + self.dfa.unwrap_or(true) + } + #[cfg(not(feature = "dfa-build"))] + { + false + } + } + + /// Returns whether the one-pass DFA regex engine may be used, as set by + /// [`Config::onepass`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_onepass(&self) -> bool { + #[cfg(feature = "dfa-onepass")] + { + self.onepass.unwrap_or(true) + } + #[cfg(not(feature = "dfa-onepass"))] + { + false + } + } + + /// Returns whether the bounded backtracking regex engine may be used, as + /// set by [`Config::backtrack`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_backtrack(&self) -> bool { + #[cfg(feature = "nfa-backtrack")] + { + self.backtrack.unwrap_or(true) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + false + } + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + utf8_empty: o.utf8_empty.or(self.utf8_empty), + autopre: o.autopre.or(self.autopre), + pre: o.pre.or_else(|| self.pre.clone()), + which_captures: o.which_captures.or(self.which_captures), + nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit), + onepass_size_limit: o + .onepass_size_limit + .or(self.onepass_size_limit), + hybrid_cache_capacity: o + .hybrid_cache_capacity + .or(self.hybrid_cache_capacity), + hybrid: o.hybrid.or(self.hybrid), + dfa: o.dfa.or(self.dfa), + dfa_size_limit: o.dfa_size_limit.or(self.dfa_size_limit), + dfa_state_limit: o.dfa_state_limit.or(self.dfa_state_limit), + onepass: o.onepass.or(self.onepass), + backtrack: o.backtrack.or(self.backtrack), + byte_classes: o.byte_classes.or(self.byte_classes), + line_terminator: o.line_terminator.or(self.line_terminator), + } + } +} + +/// A builder for configuring and constructing a `Regex`. +/// +/// The builder permits configuring two different aspects of a `Regex`: +/// +/// * [`Builder::configure`] will set high-level configuration options as +/// described by a [`Config`]. +/// * [`Builder::syntax`] will set the syntax level configuration options +/// as described by a [`util::syntax::Config`](crate::util::syntax::Config). +/// This only applies when building a `Regex` from pattern strings. +/// +/// Once configured, the builder can then be used to construct a `Regex` from +/// one of 4 different inputs: +/// +/// * [`Builder::build`] creates a regex from a single pattern string. +/// * [`Builder::build_many`] creates a regex from many pattern strings. +/// * [`Builder::build_from_hir`] creates a regex from a +/// [`regex-syntax::Hir`](Hir) expression. +/// * [`Builder::build_many_from_hir`] creates a regex from many +/// [`regex-syntax::Hir`](Hir) expressions. +/// +/// The latter two methods in particular provide a way to construct a fully +/// feature regular expression matcher directly from an `Hir` expression +/// without having to first convert it to a string. (This is in contrast to the +/// top-level `regex` crate which intentionally provides no such API in order +/// to avoid making `regex-syntax` a public dependency.) +/// +/// As a convenience, this builder may be created via [`Regex::builder`], which +/// may help avoid an extra import. +/// +/// # Example: change the line terminator +/// +/// This example shows how to enable multi-line mode by default and change the +/// line terminator to the NUL byte: +/// +/// ``` +/// use regex_automata::{meta::Regex, util::syntax, Match}; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().multi_line(true)) +/// .configure(Regex::config().line_terminator(b'\x00')) +/// .build(r"^foo$")?; +/// let hay = "\x00foo\x00"; +/// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: disable UTF-8 requirement +/// +/// By default, regex patterns are required to match UTF-8. This includes +/// regex patterns that can produce matches of length zero. In the case of an +/// empty match, by default, matches will not appear between the code units of +/// a UTF-8 encoded codepoint. +/// +/// However, it can be useful to disable this requirement, particularly if +/// you're searching things like `&[u8]` that are not known to be valid UTF-8. +/// +/// ``` +/// use regex_automata::{meta::Regex, util::syntax, Match}; +/// +/// let mut builder = Regex::builder(); +/// // Disables the requirement that non-empty matches match UTF-8. +/// builder.syntax(syntax::Config::new().utf8(false)); +/// // Disables the requirement that empty matches match UTF-8 boundaries. +/// builder.configure(Regex::config().utf8_empty(false)); +/// +/// // We can match raw bytes via \xZZ syntax, but we need to disable +/// // Unicode mode to do that. We could disable it everywhere, or just +/// // selectively, as shown here. +/// let re = builder.build(r"(?-u:\xFF)foo(?-u:\xFF)")?; +/// let hay = b"\xFFfoo\xFF"; +/// assert_eq!(Some(Match::must(0, 0..5)), re.find(hay)); +/// +/// // We can also match between code units. +/// let re = builder.build(r"")?; +/// let hay = "☃"; +/// assert_eq!(re.find_iter(hay).collect::>(), vec![ +/// Match::must(0, 0..0), +/// Match::must(0, 1..1), +/// Match::must(0, 2..2), +/// Match::must(0, 3..3), +/// ]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + ast: ast::parse::ParserBuilder, + hir: hir::translate::TranslatorBuilder, +} + +impl Builder { + /// Creates a new builder for configuring and constructing a [`Regex`]. + pub fn new() -> Builder { + Builder { + config: Config::default(), + ast: ast::parse::ParserBuilder::new(), + hir: hir::translate::TranslatorBuilder::new(), + } + } + + /// Builds a `Regex` from a single pattern string. + /// + /// If there was a problem parsing the pattern or a problem turning it into + /// a regex matcher, then an error is returned. + /// + /// # Example + /// + /// This example shows how to configure syntax options. + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().crlf(true).multi_line(true)) + /// .build(r"^foo$")?; + /// let hay = "\r\nfoo\r\n"; + /// assert_eq!(Some(Match::must(0, 2..5)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Builds a `Regex` from many pattern strings. + /// + /// If there was a problem parsing any of the patterns or a problem turning + /// them into a regex matcher, then an error is returned. + /// + /// # Example: finding the pattern that caused an error + /// + /// When a syntax error occurs, it is possible to ask which pattern + /// caused the syntax error. + /// + /// ``` + /// use regex_automata::{meta::Regex, PatternID}; + /// + /// let err = Regex::builder() + /// .build_many(&["a", "b", r"\p{Foo}", "c"]) + /// .unwrap_err(); + /// assert_eq!(Some(PatternID::must(2)), err.pattern()); + /// ``` + /// + /// # Example: zero patterns is valid + /// + /// Building a regex with zero patterns results in a regex that never + /// matches anything. Because this routine is generic, passing an empty + /// slice usually requires a turbo-fish (or something else to help type + /// inference). + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .build_many::<&str>(&[])?; + /// assert_eq!(None, re.find("")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + use crate::util::primitives::IteratorIndexExt; + log! { + debug!("building meta regex with {} patterns:", patterns.len()); + for (pid, p) in patterns.iter().with_pattern_ids() { + let p = p.as_ref(); + // We might split a grapheme with this truncation logic, but + // that's fine. We at least avoid splitting a codepoint. + let maxoff = p + .char_indices() + .map(|(i, ch)| i + ch.len_utf8()) + .take(1000) + .last() + .unwrap_or(0); + if maxoff < p.len() { + debug!("{:?}: {}[... snip ...]", pid, &p[..maxoff]); + } else { + debug!("{:?}: {}", pid, p); + } + } + } + let (mut asts, mut hirs) = (vec![], vec![]); + for (pid, p) in patterns.iter().with_pattern_ids() { + let ast = self + .ast + .build() + .parse(p.as_ref()) + .map_err(|err| BuildError::ast(pid, err))?; + asts.push(ast); + } + for ((pid, p), ast) in + patterns.iter().with_pattern_ids().zip(asts.iter()) + { + let hir = self + .hir + .build() + .translate(p.as_ref(), ast) + .map_err(|err| BuildError::hir(pid, err))?; + hirs.push(hir); + } + self.build_many_from_hir(&hirs) + } + + /// Builds a `Regex` directly from an `Hir` expression. + /// + /// This is useful if you needed to parse a pattern string into an `Hir` + /// for other reasons (such as analysis or transformations). This routine + /// permits building a `Regex` directly from the `Hir` expression instead + /// of first converting the `Hir` back to a pattern string. + /// + /// When using this method, any options set via [`Builder::syntax`] are + /// ignored. Namely, the syntax options only apply when parsing a pattern + /// string, which isn't relevant here. + /// + /// If there was a problem building the underlying regex matcher for the + /// given `Hir`, then an error is returned. + /// + /// # Example + /// + /// This example shows how one can hand-construct an `Hir` expression and + /// build a regex from it without doing any parsing at all. + /// + /// ``` + /// use { + /// regex_automata::{meta::Regex, Match}, + /// regex_syntax::hir::{Hir, Look}, + /// }; + /// + /// // (?Rm)^foo$ + /// let hir = Hir::concat(vec![ + /// Hir::look(Look::StartCRLF), + /// Hir::literal("foo".as_bytes()), + /// Hir::look(Look::EndCRLF), + /// ]); + /// let re = Regex::builder() + /// .build_from_hir(&hir)?; + /// let hay = "\r\nfoo\r\n"; + /// assert_eq!(Some(Match::must(0, 2..5)), re.find(hay)); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn build_from_hir(&self, hir: &Hir) -> Result { + self.build_many_from_hir(&[hir]) + } + + /// Builds a `Regex` directly from many `Hir` expressions. + /// + /// This is useful if you needed to parse pattern strings into `Hir` + /// expressions for other reasons (such as analysis or transformations). + /// This routine permits building a `Regex` directly from the `Hir` + /// expressions instead of first converting the `Hir` expressions back to + /// pattern strings. + /// + /// When using this method, any options set via [`Builder::syntax`] are + /// ignored. Namely, the syntax options only apply when parsing a pattern + /// string, which isn't relevant here. + /// + /// If there was a problem building the underlying regex matcher for the + /// given `Hir` expressions, then an error is returned. + /// + /// Note that unlike [`Builder::build_many`], this can only fail as a + /// result of building the underlying matcher. In that case, there is + /// no single `Hir` expression that can be isolated as a reason for the + /// failure. So if this routine fails, it's not possible to determine which + /// `Hir` expression caused the failure. + /// + /// # Example + /// + /// This example shows how one can hand-construct multiple `Hir` + /// expressions and build a single regex from them without doing any + /// parsing at all. + /// + /// ``` + /// use { + /// regex_automata::{meta::Regex, Match}, + /// regex_syntax::hir::{Hir, Look}, + /// }; + /// + /// // (?Rm)^foo$ + /// let hir1 = Hir::concat(vec![ + /// Hir::look(Look::StartCRLF), + /// Hir::literal("foo".as_bytes()), + /// Hir::look(Look::EndCRLF), + /// ]); + /// // (?Rm)^bar$ + /// let hir2 = Hir::concat(vec![ + /// Hir::look(Look::StartCRLF), + /// Hir::literal("bar".as_bytes()), + /// Hir::look(Look::EndCRLF), + /// ]); + /// let re = Regex::builder() + /// .build_many_from_hir(&[&hir1, &hir2])?; + /// let hay = "\r\nfoo\r\nbar"; + /// let got: Vec = re.find_iter(hay).collect(); + /// let expected = vec![ + /// Match::must(0, 2..5), + /// Match::must(1, 7..10), + /// ]; + /// assert_eq!(expected, got); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn build_many_from_hir>( + &self, + hirs: &[H], + ) -> Result { + let config = self.config.clone(); + // We collect the HIRs into a vec so we can write internal routines + // with '&[&Hir]'. i.e., Don't use generics everywhere to keep code + // bloat down.. + let hirs: Vec<&Hir> = hirs.iter().map(|hir| hir.borrow()).collect(); + let info = RegexInfo::new(config, &hirs); + let strat = strategy::new(&info, &hirs)?; + let pool = { + let strat = Arc::clone(&strat); + let create: CachePoolFn = Box::new(move || strat.create_cache()); + Pool::new(create) + }; + Ok(Regex { imp: Arc::new(RegexI { strat, info }), pool }) + } + + /// Configure the behavior of a `Regex`. + /// + /// This configuration controls non-syntax options related to the behavior + /// of a `Regex`. This includes things like whether empty matches can split + /// a codepoint, prefilters, line terminators and a long list of options + /// for configuring which regex engines the meta regex engine will be able + /// to use internally. + /// + /// # Example + /// + /// This example shows how to disable UTF-8 empty mode. This will permit + /// empty matches to occur between the UTF-8 encoding of a codepoint. + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches only occur at the beginning and end of the snowman. + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 3..3), + /// ]); + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches now occur at every position! + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 1..1), + /// Match::must(0, 2..2), + /// Match::must(0, 3..3), + /// ]); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Configure the syntax options when parsing a pattern string while + /// building a `Regex`. + /// + /// These options _only_ apply when [`Builder::build`] or [`Builder::build_many`] + /// are used. The other build methods accept `Hir` values, which have + /// already been parsed. + /// + /// # Example + /// + /// This example shows how to enable case insensitive mode. + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().case_insensitive(true)) + /// .build(r"δ")?; + /// assert_eq!(Some(Match::must(0, 0..2)), re.find(r"Δ")); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + config.apply_ast(&mut self.ast); + config.apply_hir(&mut self.hir); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // I found this in the course of building out the benchmark suite for + // rebar. + #[test] + fn regression_suffix_literal_count() { + let _ = env_logger::try_init(); + + let re = Regex::new(r"[a-zA-Z]+ing").unwrap(); + assert_eq!(1, re.find_iter("tingling").count()); + } +} diff --git a/vendor/regex-automata/src/meta/reverse_inner.rs b/vendor/regex-automata/src/meta/reverse_inner.rs new file mode 100644 index 0000000..3d78779 --- /dev/null +++ b/vendor/regex-automata/src/meta/reverse_inner.rs @@ -0,0 +1,220 @@ +/*! +A module dedicated to plucking inner literals out of a regex pattern, and +then constructing a prefilter for them. We also include a regex pattern +"prefix" that corresponds to the bits of the regex that need to match before +the literals do. The reverse inner optimization then proceeds by looking for +matches of the inner literal(s), and then doing a reverse search of the prefix +from the start of the literal match to find the overall start position of the +match. + +The essential invariant we want to uphold here is that the literals we return +reflect a set where *at least* one of them must match in order for the overall +regex to match. We also need to maintain the invariant that the regex prefix +returned corresponds to the entirety of the regex up until the literals we +return. + +This somewhat limits what we can do. That is, if we a regex like +`\w+(@!|%%)\w+`, then we can pluck the `{@!, %%}` out and build a prefilter +from it. Then we just need to compile `\w+` in reverse. No fuss no muss. But if +we have a regex like \d+@!|\w+%%`, then we get kind of stymied. Technically, +we could still extract `{@!, %%}`, and it is true that at least of them must +match. But then, what is our regex prefix? Again, in theory, that could be +`\d+|\w+`, but that's not quite right, because the `\d+` only matches when `@!` +matches, and `\w+` only matches when `%%` matches. + +All of that is technically possible to do, but it seemingly requires a lot of +sophistication and machinery. Probably the way to tackle that is with some kind +of formalism and approach this problem more generally. + +For now, the code below basically just looks for a top-level concatenation. +And if it can find one, it looks for literals in each of the direct child +sub-expressions of that concatenation. If some good ones are found, we return +those and a concatenation of the Hir expressions seen up to that point. +*/ + +use alloc::vec::Vec; + +use regex_syntax::hir::{self, literal, Hir, HirKind}; + +use crate::{util::prefilter::Prefilter, MatchKind}; + +/// Attempts to extract an "inner" prefilter from the given HIR expressions. If +/// one was found, then a concatenation of the HIR expressions that precede it +/// is returned. +/// +/// The idea here is that the prefilter returned can be used to find candidate +/// matches. And then the HIR returned can be used to build a reverse regex +/// matcher, which will find the start of the candidate match. Finally, the +/// match still has to be confirmed with a normal anchored forward scan to find +/// the end position of the match. +/// +/// Note that this assumes leftmost-first match semantics, so callers must +/// not call this otherwise. +pub(crate) fn extract(hirs: &[&Hir]) -> Option<(Hir, Prefilter)> { + if hirs.len() != 1 { + debug!( + "skipping reverse inner optimization since it only \ + supports 1 pattern, {} were given", + hirs.len(), + ); + return None; + } + let mut concat = match top_concat(hirs[0]) { + Some(concat) => concat, + None => { + debug!( + "skipping reverse inner optimization because a top-level \ + concatenation could not found", + ); + return None; + } + }; + // We skip the first HIR because if it did have a prefix prefilter in it, + // we probably wouldn't be here looking for an inner prefilter. + for i in 1..concat.len() { + let hir = &concat[i]; + let pre = match prefilter(hir) { + None => continue, + Some(pre) => pre, + }; + // Even if we got a prefilter, if it isn't consider "fast," then we + // probably don't want to bother with it. Namely, since the reverse + // inner optimization requires some overhead, it likely only makes + // sense if the prefilter scan itself is (believed) to be much faster + // than the regex engine. + if !pre.is_fast() { + debug!( + "skipping extracted inner prefilter because \ + it probably isn't fast" + ); + continue; + } + let concat_suffix = Hir::concat(concat.split_off(i)); + let concat_prefix = Hir::concat(concat); + // Look for a prefilter again. Why? Because above we only looked for + // a prefilter on the individual 'hir', but we might be able to find + // something better and more discriminatory by looking at the entire + // suffix. We don't do this above to avoid making this loop worst case + // quadratic in the length of 'concat'. + let pre2 = match prefilter(&concat_suffix) { + None => pre, + Some(pre2) => { + if pre2.is_fast() { + pre2 + } else { + pre + } + } + }; + return Some((concat_prefix, pre2)); + } + debug!( + "skipping reverse inner optimization because a top-level \ + sub-expression with a fast prefilter could not be found" + ); + None +} + +/// Attempt to extract a prefilter from an HIR expression. +/// +/// We do a little massaging here to do our best that the prefilter we get out +/// of this is *probably* fast. Basically, the false positive rate has a much +/// higher impact for things like the reverse inner optimization because more +/// work needs to potentially be done for each candidate match. +/// +/// Note that this assumes leftmost-first match semantics, so callers must +/// not call this otherwise. +fn prefilter(hir: &Hir) -> Option { + let mut extractor = literal::Extractor::new(); + extractor.kind(literal::ExtractKind::Prefix); + let mut prefixes = extractor.extract(hir); + debug!( + "inner prefixes (len={:?}) extracted before optimization: {:?}", + prefixes.len(), + prefixes + ); + // Since these are inner literals, we know they cannot be exact. But the + // extractor doesn't know this. We mark them as inexact because this might + // impact literal optimization. Namely, optimization weights "all literals + // are exact" as very high, because it presumes that any match results in + // an overall match. But of course, that is not the case here. + // + // In practice, this avoids plucking out a ASCII-only \s as an alternation + // of single-byte whitespace characters. + prefixes.make_inexact(); + prefixes.optimize_for_prefix_by_preference(); + debug!( + "inner prefixes (len={:?}) extracted after optimization: {:?}", + prefixes.len(), + prefixes + ); + prefixes + .literals() + .and_then(|lits| Prefilter::new(MatchKind::LeftmostFirst, lits)) +} + +/// Looks for a "top level" HirKind::Concat item in the given HIR. This will +/// try to return one even if it's embedded in a capturing group, but is +/// otherwise pretty conservative in what is returned. +/// +/// The HIR returned is a complete copy of the concat with all capturing +/// groups removed. In effect, the concat returned is "flattened" with respect +/// to capturing groups. This makes the detection logic above for prefixes +/// a bit simpler, and it works because 1) capturing groups never influence +/// whether a match occurs or not and 2) capturing groups are not used when +/// doing the reverse inner search to find the start of the match. +fn top_concat(mut hir: &Hir) -> Option> { + loop { + hir = match hir.kind() { + HirKind::Empty + | HirKind::Literal(_) + | HirKind::Class(_) + | HirKind::Look(_) + | HirKind::Repetition(_) + | HirKind::Alternation(_) => return None, + HirKind::Capture(hir::Capture { ref sub, .. }) => sub, + HirKind::Concat(ref subs) => { + // We are careful to only do the flattening/copy when we know + // we have a "top level" concat we can inspect. This avoids + // doing extra work in cases where we definitely won't use it. + // (This might still be wasted work if we can't go on to find + // some literals to extract.) + let concat = + Hir::concat(subs.iter().map(|h| flatten(h)).collect()); + return match concat.into_kind() { + HirKind::Concat(xs) => Some(xs), + // It is actually possible for this case to occur, because + // 'Hir::concat' might simplify the expression to the point + // that concatenations are actually removed. One wonders + // whether this leads to other cases where we should be + // extracting literals, but in theory, I believe if we do + // get here, then it means that a "real" prefilter failed + // to be extracted and we should probably leave well enough + // alone. (A "real" prefilter is unbothered by "top-level + // concats" and "capturing groups.") + _ => return None, + }; + } + }; + } +} + +/// Returns a copy of the given HIR but with all capturing groups removed. +fn flatten(hir: &Hir) -> Hir { + match hir.kind() { + HirKind::Empty => Hir::empty(), + HirKind::Literal(hir::Literal(ref x)) => Hir::literal(x.clone()), + HirKind::Class(ref x) => Hir::class(x.clone()), + HirKind::Look(ref x) => Hir::look(x.clone()), + HirKind::Repetition(ref x) => Hir::repetition(x.with(flatten(&x.sub))), + // This is the interesting case. We just drop the group information + // entirely and use the child HIR itself. + HirKind::Capture(hir::Capture { ref sub, .. }) => flatten(sub), + HirKind::Alternation(ref xs) => { + Hir::alternation(xs.iter().map(|x| flatten(x)).collect()) + } + HirKind::Concat(ref xs) => { + Hir::concat(xs.iter().map(|x| flatten(x)).collect()) + } + } +} diff --git a/vendor/regex-automata/src/meta/stopat.rs b/vendor/regex-automata/src/meta/stopat.rs new file mode 100644 index 0000000..c4dcd79 --- /dev/null +++ b/vendor/regex-automata/src/meta/stopat.rs @@ -0,0 +1,212 @@ +/*! +This module defines two bespoke forward DFA search routines. One for the lazy +DFA and one for the fully compiled DFA. These routines differ from the normal +ones by reporting the position at which the search terminates when a match +*isn't* found. + +This position at which a search terminates is useful in contexts where the meta +regex engine runs optimizations that could go quadratic if we aren't careful. +Namely, a regex search *could* scan to the end of the haystack only to report a +non-match. If the caller doesn't know that the search scanned to the end of the +haystack, it might restart the search at the next literal candidate it finds +and repeat the process. + +Providing the caller with the position at which the search stopped provides a +way for the caller to determine the point at which subsequent scans should not +pass. This is principally used in the "reverse inner" optimization, which works +like this: + +1. Look for a match of an inner literal. Say, 'Z' in '\w+Z\d+'. +2. At the spot where 'Z' matches, do a reverse anchored search from there for +'\w+'. +3. If the reverse search matches, it corresponds to the start position of a +(possible) match. At this point, do a forward anchored search to find the end +position. If an end position is found, then we have a match and we know its +bounds. + +If the forward anchored search in (3) searches the entire rest of the haystack +but reports a non-match, then a naive implementation of the above will continue +back at step 1 looking for more candidates. There might still be a match to be +found! It's possible. But we already scanned the whole haystack. So if we keep +repeating the process, then we might wind up taking quadratic time in the size +of the haystack, which is not great. + +So if the forward anchored search in (3) reports the position at which it +stops, then we can detect whether quadratic behavior might be occurring in +steps (1) and (2). For (1), it occurs if the literal candidate found occurs +*before* the end of the previous search in (3), since that means we're now +going to look for another match in a place where the forward search has already +scanned. It is *correct* to do so, but our technique has become inefficient. +For (2), quadratic behavior occurs similarly when its reverse search extends +past the point where the previous forward search in (3) terminated. Indeed, to +implement (2), we use the sibling 'limited' module for ensuring our reverse +scan doesn't go further than we want. + +See the 'opt/reverse-inner' benchmarks in rebar for a real demonstration of +how quadratic behavior is mitigated. +*/ + +use crate::{meta::error::RetryFailError, HalfMatch, Input, MatchError}; + +#[cfg(feature = "dfa-build")] +pub(crate) fn dfa_try_search_half_fwd( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, +) -> Result, RetryFailError> { + use crate::dfa::{accel, Automaton}; + + let mut mat = None; + let mut sid = dfa.start_state_forward(input)?; + let mut at = input.start(); + while at < input.end() { + sid = dfa.next_state(sid, input.haystack()[at]); + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + mat = Some(HalfMatch::new(pattern, at)); + if input.get_earliest() { + return Ok(mat.ok_or(at)); + } + if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + at = accel::find_fwd(needs, input.haystack(), at) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + at = accel::find_fwd(needs, input.haystack(), at) + .unwrap_or(input.end()); + continue; + } else if dfa.is_dead_state(sid) { + return Ok(mat.ok_or(at)); + } else if dfa.is_quit_state(sid) { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } else { + // Ideally we wouldn't use a DFA that specialized start states + // and thus 'is_start_state()' could never be true here, but in + // practice we reuse the DFA created for the full regex which + // will specialize start states whenever there is a prefilter. + debug_assert!(dfa.is_start_state(sid)); + } + } + at += 1; + } + dfa_eoi_fwd(dfa, input, &mut sid, &mut mat)?; + Ok(mat.ok_or(at)) +} + +#[cfg(feature = "hybrid")] +pub(crate) fn hybrid_try_search_half_fwd( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, +) -> Result, RetryFailError> { + let mut mat = None; + let mut sid = dfa.start_state_forward(cache, input)?; + let mut at = input.start(); + while at < input.end() { + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| MatchError::gave_up(at))?; + if sid.is_tagged() { + if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + mat = Some(HalfMatch::new(pattern, at)); + if input.get_earliest() { + return Ok(mat.ok_or(at)); + } + } else if sid.is_dead() { + return Ok(mat.ok_or(at)); + } else if sid.is_quit() { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } else { + // We should NEVER get an unknown state ID back from + // dfa.next_state(). + debug_assert!(!sid.is_unknown()); + // Ideally we wouldn't use a lazy DFA that specialized start + // states and thus 'sid.is_start()' could never be true here, + // but in practice we reuse the lazy DFA created for the full + // regex which will specialize start states whenever there is + // a prefilter. + debug_assert!(sid.is_start()); + } + } + at += 1; + } + hybrid_eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?; + Ok(mat.ok_or(at)) +} + +#[cfg(feature = "dfa-build")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn dfa_eoi_fwd( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, + sid: &mut crate::util::primitives::StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + use crate::dfa::Automaton; + + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = dfa.next_state(*sid, b); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!dfa.is_quit_state(*sid)); + } + } + Ok(()) +} + +#[cfg(feature = "hybrid")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn hybrid_eoi_fwd( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, + sid: &mut crate::hybrid::LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = dfa + .next_state(cache, *sid, b) + .map_err(|_| MatchError::gave_up(sp.end))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if sid.is_quit() { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa + .next_eoi_state(cache, *sid) + .map_err(|_| MatchError::gave_up(input.haystack().len()))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + } + Ok(()) +} diff --git a/vendor/regex-automata/src/meta/strategy.rs b/vendor/regex-automata/src/meta/strategy.rs new file mode 100644 index 0000000..04f2ba3 --- /dev/null +++ b/vendor/regex-automata/src/meta/strategy.rs @@ -0,0 +1,1914 @@ +use core::{ + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +use alloc::sync::Arc; + +use regex_syntax::hir::{literal, Hir}; + +use crate::{ + meta::{ + error::{BuildError, RetryError, RetryFailError, RetryQuadraticError}, + regex::{Cache, RegexInfo}, + reverse_inner, wrappers, + }, + nfa::thompson::{self, WhichCaptures, NFA}, + util::{ + captures::{Captures, GroupInfo}, + look::LookMatcher, + prefilter::{self, Prefilter, PrefilterI}, + primitives::{NonMaxUsize, PatternID}, + search::{Anchored, HalfMatch, Input, Match, MatchKind, PatternSet}, + }, +}; + +/// A trait that represents a single meta strategy. Its main utility is in +/// providing a way to do dynamic dispatch over a few choices. +/// +/// Why dynamic dispatch? I actually don't have a super compelling reason, and +/// importantly, I have not benchmarked it with the main alternative: an enum. +/// I went with dynamic dispatch initially because the regex engine search code +/// really can't be inlined into caller code in most cases because it's just +/// too big. In other words, it is already expected that every regex search +/// will entail at least the cost of a function call. +/// +/// I do wonder whether using enums would result in better codegen overall +/// though. It's a worthwhile experiment to try. Probably the most interesting +/// benchmark to run in such a case would be one with a high match count. That +/// is, a benchmark to test the overall latency of a search call. +pub(super) trait Strategy: + Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static +{ + fn group_info(&self) -> &GroupInfo; + + fn create_cache(&self) -> Cache; + + fn reset_cache(&self, cache: &mut Cache); + + fn is_accelerated(&self) -> bool; + + fn memory_usage(&self) -> usize; + + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option; + + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option; + + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool; + + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option; + + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ); +} + +pub(super) fn new( + info: &RegexInfo, + hirs: &[&Hir], +) -> Result, BuildError> { + // At this point, we're committed to a regex engine of some kind. So pull + // out a prefilter if we can, which will feed to each of the constituent + // regex engines. + let pre = if info.is_always_anchored_start() { + // PERF: I'm not sure we necessarily want to do this... We may want to + // run a prefilter for quickly rejecting in some cases. The problem + // is that anchored searches overlap quite a bit with the use case + // of "run a regex on every line to extract data." In that case, the + // regex always matches, so running a prefilter doesn't really help us + // there. The main place where a prefilter helps in an anchored search + // is if the anchored search is not expected to match frequently. That + // is, the prefilter gives us a way to possibly reject a haystack very + // quickly. + // + // Maybe we should do use a prefilter, but only for longer haystacks? + // Or maybe we should only use a prefilter when we think it's "fast"? + // + // Interestingly, I think we currently lack the infrastructure for + // disabling a prefilter based on haystack length. That would probably + // need to be a new 'Input' option. (Interestingly, an 'Input' used to + // carry a 'Prefilter' with it, but I moved away from that.) + debug!("skipping literal extraction since regex is anchored"); + None + } else if let Some(pre) = info.config().get_prefilter() { + debug!( + "skipping literal extraction since the caller provided a prefilter" + ); + Some(pre.clone()) + } else if info.config().get_auto_prefilter() { + let kind = info.config().get_match_kind(); + let prefixes = crate::util::prefilter::prefixes(kind, hirs); + // If we can build a full `Strategy` from just the extracted prefixes, + // then we can short-circuit and avoid building a regex engine at all. + if let Some(pre) = Pre::from_prefixes(info, &prefixes) { + debug!( + "found that the regex can be broken down to a literal \ + search, avoiding the regex engine entirely", + ); + return Ok(pre); + } + // This now attempts another short-circuit of the regex engine: if we + // have a huge alternation of just plain literals, then we can just use + // Aho-Corasick for that and avoid the regex engine entirely. + // + // You might think this case would just be handled by + // `Pre::from_prefixes`, but that technique relies on heuristic literal + // extraction from the corresponding `Hir`. That works, but part of + // heuristics limit the size and number of literals returned. This case + // will specifically handle patterns with very large alternations. + // + // One wonders if we should just roll this our heuristic literal + // extraction, and then I think this case could disappear entirely. + if let Some(pre) = Pre::from_alternation_literals(info, hirs) { + debug!( + "found plain alternation of literals, \ + avoiding regex engine entirely and using Aho-Corasick" + ); + return Ok(pre); + } + prefixes.literals().and_then(|strings| { + debug!( + "creating prefilter from {} literals: {:?}", + strings.len(), + strings, + ); + Prefilter::new(kind, strings) + }) + } else { + debug!("skipping literal extraction since prefilters were disabled"); + None + }; + let mut core = Core::new(info.clone(), pre.clone(), hirs)?; + // Now that we have our core regex engines built, there are a few cases + // where we can do a little bit better than just a normal "search forward + // and maybe use a prefilter when in a start state." However, these cases + // may not always work or otherwise build on top of the Core searcher. + // For example, the reverse anchored optimization seems like it might + // always work, but only the DFAs support reverse searching and the DFAs + // might give up or quit for reasons. If we had, e.g., a PikeVM that + // supported reverse searching, then we could avoid building a full Core + // engine for this case. + core = match ReverseAnchored::new(core) { + Err(core) => core, + Ok(ra) => { + debug!("using reverse anchored strategy"); + return Ok(Arc::new(ra)); + } + }; + core = match ReverseSuffix::new(core, hirs) { + Err(core) => core, + Ok(rs) => { + debug!("using reverse suffix strategy"); + return Ok(Arc::new(rs)); + } + }; + core = match ReverseInner::new(core, hirs) { + Err(core) => core, + Ok(ri) => { + debug!("using reverse inner strategy"); + return Ok(Arc::new(ri)); + } + }; + debug!("using core strategy"); + Ok(Arc::new(core)) +} + +#[derive(Clone, Debug)] +struct Pre

{ + pre: P, + group_info: GroupInfo, +} + +impl Pre

{ + fn new(pre: P) -> Arc { + // The only thing we support when we use prefilters directly as a + // strategy is the start and end of the overall match for a single + // pattern. In other words, exactly one implicit capturing group. Which + // is exactly what we use here for a GroupInfo. + let group_info = GroupInfo::new([[None::<&str>]]).unwrap(); + Arc::new(Pre { pre, group_info }) + } +} + +// This is a little weird, but we don't actually care about the type parameter +// here because we're selecting which underlying prefilter to use. So we just +// define it on an arbitrary type. +impl Pre<()> { + /// Given a sequence of prefixes, attempt to return a full `Strategy` using + /// just the prefixes. + /// + /// Basically, this occurs when the prefixes given not just prefixes, + /// but an enumeration of the entire language matched by the regular + /// expression. + /// + /// A number of other conditions need to be true too. For example, there + /// can be only one pattern, the number of explicit capture groups is 0, no + /// look-around assertions and so on. + /// + /// Note that this ignores `Config::get_auto_prefilter` because if this + /// returns something, then it isn't a prefilter but a matcher itself. + /// Therefore, it shouldn't suffer from the problems typical to prefilters + /// (such as a high false positive rate). + fn from_prefixes( + info: &RegexInfo, + prefixes: &literal::Seq, + ) -> Option> { + let kind = info.config().get_match_kind(); + // Check to see if our prefixes are exact, which means we might be + // able to bypass the regex engine entirely and just rely on literal + // searches. + if !prefixes.is_exact() { + return None; + } + // We also require that we have a single regex pattern. Namely, + // we reuse the prefilter infrastructure to implement search and + // prefilters only report spans. Prefilters don't know about pattern + // IDs. The multi-regex case isn't a lost cause, we might still use + // Aho-Corasick and we might still just use a regular prefilter, but + // that's done below. + if info.pattern_len() != 1 { + return None; + } + // We can't have any capture groups either. The literal engines don't + // know how to deal with things like '(foo)(bar)'. In that case, a + // prefilter will just be used and then the regex engine will resolve + // the capture groups. + if info.props()[0].explicit_captures_len() != 0 { + return None; + } + // We also require that it has zero look-around assertions. Namely, + // literal extraction treats look-around assertions as if they match + // *every* empty string. But of course, that isn't true. So for + // example, 'foo\bquux' never matches anything, but 'fooquux' is + // extracted from that as an exact literal. Such cases should just run + // the regex engine. 'fooquux' will be used as a normal prefilter, and + // then the regex engine will try to look for an actual match. + if !info.props()[0].look_set().is_empty() { + return None; + } + // Finally, currently, our prefilters are all oriented around + // leftmost-first match semantics, so don't try to use them if the + // caller asked for anything else. + if kind != MatchKind::LeftmostFirst { + return None; + } + // The above seems like a lot of requirements to meet, but it applies + // to a lot of cases. 'foo', '[abc][123]' and 'foo|bar|quux' all meet + // the above criteria, for example. + // + // Note that this is effectively a latency optimization. If we didn't + // do this, then the extracted literals would still get bundled into + // a prefilter, and every regex engine capable of running unanchored + // searches supports prefilters. So this optimization merely sidesteps + // having to run the regex engine at all to confirm the match. Thus, it + // decreases the latency of a match. + + // OK because we know the set is exact and thus finite. + let prefixes = prefixes.literals().unwrap(); + debug!( + "trying to bypass regex engine by creating \ + prefilter from {} literals: {:?}", + prefixes.len(), + prefixes, + ); + let choice = match prefilter::Choice::new(kind, prefixes) { + Some(choice) => choice, + None => { + debug!( + "regex bypass failed because no prefilter could be built" + ); + return None; + } + }; + let strat: Arc = match choice { + prefilter::Choice::Memchr(pre) => Pre::new(pre), + prefilter::Choice::Memchr2(pre) => Pre::new(pre), + prefilter::Choice::Memchr3(pre) => Pre::new(pre), + prefilter::Choice::Memmem(pre) => Pre::new(pre), + prefilter::Choice::Teddy(pre) => Pre::new(pre), + prefilter::Choice::ByteSet(pre) => Pre::new(pre), + prefilter::Choice::AhoCorasick(pre) => Pre::new(pre), + }; + Some(strat) + } + + /// Attempts to extract an alternation of literals, and if it's deemed + /// worth doing, returns an Aho-Corasick prefilter as a strategy. + /// + /// And currently, this only returns something when 'hirs.len() == 1'. This + /// could in theory do something if there are multiple HIRs where all of + /// them are alternation of literals, but I haven't had the time to go down + /// that path yet. + fn from_alternation_literals( + info: &RegexInfo, + hirs: &[&Hir], + ) -> Option> { + use crate::util::prefilter::AhoCorasick; + + let lits = crate::meta::literal::alternation_literals(info, hirs)?; + let ac = AhoCorasick::new(MatchKind::LeftmostFirst, &lits)?; + Some(Pre::new(ac)) + } +} + +// This implements Strategy for anything that implements PrefilterI. +// +// Note that this must only be used for regexes of length 1. Multi-regexes +// don't work here. The prefilter interface only provides the span of a match +// and not the pattern ID. (I did consider making it more expressive, but I +// couldn't figure out how to tie everything together elegantly.) Thus, so long +// as the regex only contains one pattern, we can simply assume that a match +// corresponds to PatternID::ZERO. And indeed, that's what we do here. +// +// In practice, since this impl is used to report matches directly and thus +// completely bypasses the regex engine, we only wind up using this under the +// following restrictions: +// +// * There must be only one pattern. As explained above. +// * The literal sequence must be finite and only contain exact literals. +// * There must not be any look-around assertions. If there are, the literals +// extracted might be exact, but a match doesn't necessarily imply an overall +// match. As a trivial example, 'foo\bbar' does not match 'foobar'. +// * The pattern must not have any explicit capturing groups. If it does, the +// caller might expect them to be resolved. e.g., 'foo(bar)'. +// +// So when all of those things are true, we use a prefilter directly as a +// strategy. +// +// In the case where the number of patterns is more than 1, we don't use this +// but do use a special Aho-Corasick strategy if all of the regexes are just +// simple literals or alternations of literals. (We also use the Aho-Corasick +// strategy when len(patterns)==1 if the number of literals is large. In that +// case, literal extraction gives up and will return an infinite set.) +impl Strategy for Pre

{ + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + &self.group_info + } + + fn create_cache(&self) -> Cache { + Cache { + capmatches: Captures::all(self.group_info().clone()), + pikevm: wrappers::PikeVMCache::none(), + backtrack: wrappers::BoundedBacktrackerCache::none(), + onepass: wrappers::OnePassCache::none(), + hybrid: wrappers::HybridCache::none(), + revhybrid: wrappers::ReverseHybridCache::none(), + } + } + + fn reset_cache(&self, _cache: &mut Cache) {} + + fn is_accelerated(&self) -> bool { + self.pre.is_fast() + } + + fn memory_usage(&self) -> usize { + self.pre.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option { + if input.is_done() { + return None; + } + if input.get_anchored().is_anchored() { + return self + .pre + .prefix(input.haystack(), input.get_span()) + .map(|sp| Match::new(PatternID::ZERO, sp)); + } + self.pre + .find(input.haystack(), input.get_span()) + .map(|sp| Match::new(PatternID::ZERO, sp)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end())) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + self.search(cache, input).is_some() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + let m = self.search(cache, input)?; + if let Some(slot) = slots.get_mut(0) { + *slot = NonMaxUsize::new(m.start()); + } + if let Some(slot) = slots.get_mut(1) { + *slot = NonMaxUsize::new(m.end()); + } + Some(m.pattern()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if self.search(cache, input).is_some() { + patset.insert(PatternID::ZERO); + } + } +} + +#[derive(Debug)] +struct Core { + info: RegexInfo, + pre: Option, + nfa: NFA, + nfarev: Option, + pikevm: wrappers::PikeVM, + backtrack: wrappers::BoundedBacktracker, + onepass: wrappers::OnePass, + hybrid: wrappers::Hybrid, + dfa: wrappers::DFA, +} + +impl Core { + fn new( + info: RegexInfo, + pre: Option, + hirs: &[&Hir], + ) -> Result { + let mut lookm = LookMatcher::new(); + lookm.set_line_terminator(info.config().get_line_terminator()); + let thompson_config = thompson::Config::new() + .utf8(info.config().get_utf8_empty()) + .nfa_size_limit(info.config().get_nfa_size_limit()) + .shrink(false) + .which_captures(info.config().get_which_captures()) + .look_matcher(lookm); + let nfa = thompson::Compiler::new() + .configure(thompson_config.clone()) + .build_many_from_hir(hirs) + .map_err(BuildError::nfa)?; + // It's possible for the PikeVM or the BB to fail to build, even though + // at this point, we already have a full NFA in hand. They can fail + // when a Unicode word boundary is used but where Unicode word boundary + // support is disabled at compile time, thus making it impossible to + // match. (Construction can also fail if the NFA was compiled without + // captures, but we always enable that above.) + let pikevm = wrappers::PikeVM::new(&info, pre.clone(), &nfa)?; + let backtrack = + wrappers::BoundedBacktracker::new(&info, pre.clone(), &nfa)?; + // The onepass engine can of course fail to build, but we expect it to + // fail in many cases because it is an optimization that doesn't apply + // to all regexes. The 'OnePass' wrapper encapsulates this failure (and + // logs a message if it occurs). + let onepass = wrappers::OnePass::new(&info, &nfa); + // We try to encapsulate whether a particular regex engine should be + // used within each respective wrapper, but the DFAs need a reverse NFA + // to build itself, and we really do not want to build a reverse NFA if + // we know we aren't going to use the lazy DFA. So we do a config check + // up front, which is in practice the only way we won't try to use the + // DFA. + let (nfarev, hybrid, dfa) = + if !info.config().get_hybrid() && !info.config().get_dfa() { + (None, wrappers::Hybrid::none(), wrappers::DFA::none()) + } else { + // FIXME: Technically, we don't quite yet KNOW that we need + // a reverse NFA. It's possible for the DFAs below to both + // fail to build just based on the forward NFA. In which case, + // building the reverse NFA was totally wasted work. But... + // fixing this requires breaking DFA construction apart into + // two pieces: one for the forward part and another for the + // reverse part. Quite annoying. Making it worse, when building + // both DFAs fails, it's quite likely that the NFA is large and + // that it will take quite some time to build the reverse NFA + // too. So... it's really probably worth it to do this! + let nfarev = thompson::Compiler::new() + // Currently, reverse NFAs don't support capturing groups, + // so we MUST disable them. But even if we didn't have to, + // we would, because nothing in this crate does anything + // useful with capturing groups in reverse. And of course, + // the lazy DFA ignores capturing groups in all cases. + .configure( + thompson_config + .clone() + .which_captures(WhichCaptures::None) + .reverse(true), + ) + .build_many_from_hir(hirs) + .map_err(BuildError::nfa)?; + let dfa = if !info.config().get_dfa() { + wrappers::DFA::none() + } else { + wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev) + }; + let hybrid = if !info.config().get_hybrid() { + wrappers::Hybrid::none() + } else if dfa.is_some() { + debug!("skipping lazy DFA because we have a full DFA"); + wrappers::Hybrid::none() + } else { + wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev) + }; + (Some(nfarev), hybrid, dfa) + }; + Ok(Core { + info, + pre, + nfa, + nfarev, + pikevm, + backtrack, + onepass, + hybrid, + dfa, + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_mayfail( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option, RetryFailError>> { + if let Some(e) = self.dfa.get(input) { + trace!("using full DFA for search at {:?}", input.get_span()); + Some(e.try_search(input)) + } else if let Some(e) = self.hybrid.get(input) { + trace!("using lazy DFA for search at {:?}", input.get_span()); + Some(e.try_search(&mut cache.hybrid, input)) + } else { + None + } + } + + fn search_nofail( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + let caps = &mut cache.capmatches; + caps.set_pattern(None); + // We manually inline 'try_search_slots_nofail' here because we need to + // borrow from 'cache.capmatches' in this method, but if we do, then + // we can't pass 'cache' wholesale to to 'try_slots_no_hybrid'. It's a + // classic example of how the borrow checker inhibits decomposition. + // There are of course work-arounds (more types and/or interior + // mutability), but that's more annoying than this IMO. + let pid = if let Some(ref e) = self.onepass.get(input) { + trace!("using OnePass for search at {:?}", input.get_span()); + e.search_slots(&mut cache.onepass, input, caps.slots_mut()) + } else if let Some(ref e) = self.backtrack.get(input) { + trace!( + "using BoundedBacktracker for search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.backtrack, input, caps.slots_mut()) + } else { + trace!("using PikeVM for search at {:?}", input.get_span()); + let e = self.pikevm.get(); + e.search_slots(&mut cache.pikevm, input, caps.slots_mut()) + }; + caps.set_pattern(pid); + caps.get_match() + } + + fn search_half_nofail( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + // Only the lazy/full DFA returns half-matches, since the DFA requires + // a reverse scan to find the start position. These fallback regex + // engines can find the start and end in a single pass, so we just do + // that and throw away the start offset to conform to the API. + let m = self.search_nofail(cache, input)?; + Some(HalfMatch::new(m.pattern(), m.end())) + } + + fn search_slots_nofail( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if let Some(ref e) = self.onepass.get(input) { + trace!( + "using OnePass for capture search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.onepass, input, slots) + } else if let Some(ref e) = self.backtrack.get(input) { + trace!( + "using BoundedBacktracker for capture search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.backtrack, input, slots) + } else { + trace!( + "using PikeVM for capture search at {:?}", + input.get_span() + ); + let e = self.pikevm.get(); + e.search_slots(&mut cache.pikevm, input, slots) + } + } + + fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if let Some(ref e) = self.onepass.get(input) { + trace!( + "using OnePass for is-match search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.onepass, input, &mut []).is_some() + } else if let Some(ref e) = self.backtrack.get(input) { + trace!( + "using BoundedBacktracker for is-match search at {:?}", + input.get_span() + ); + e.is_match(&mut cache.backtrack, input) + } else { + trace!( + "using PikeVM for is-match search at {:?}", + input.get_span() + ); + let e = self.pikevm.get(); + e.is_match(&mut cache.pikevm, input) + } + } + + fn is_capture_search_needed(&self, slots_len: usize) -> bool { + slots_len > self.nfa.group_info().implicit_slot_len() + } +} + +impl Strategy for Core { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.nfa.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + Cache { + capmatches: Captures::all(self.group_info().clone()), + pikevm: self.pikevm.create_cache(), + backtrack: self.backtrack.create_cache(), + onepass: self.onepass.create_cache(), + hybrid: self.hybrid.create_cache(), + revhybrid: wrappers::ReverseHybridCache::none(), + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + cache.pikevm.reset(&self.pikevm); + cache.backtrack.reset(&self.backtrack); + cache.onepass.reset(&self.onepass); + cache.hybrid.reset(&self.hybrid); + } + + fn is_accelerated(&self) -> bool { + self.pre.as_ref().map_or(false, |pre| pre.is_fast()) + } + + fn memory_usage(&self) -> usize { + self.info.memory_usage() + + self.pre.as_ref().map_or(0, |pre| pre.memory_usage()) + + self.nfa.memory_usage() + + self.nfarev.as_ref().map_or(0, |nfa| nfa.memory_usage()) + + self.onepass.memory_usage() + + self.dfa.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + // We manually inline try_search_mayfail here because letting the + // compiler do it seems to produce pretty crappy codegen. + return if let Some(e) = self.dfa.get(input) { + trace!("using full DFA for full search at {:?}", input.get_span()); + match e.try_search(input) { + Ok(x) => x, + Err(_err) => { + trace!("full DFA search failed: {}", _err); + self.search_nofail(cache, input) + } + } + } else if let Some(e) = self.hybrid.get(input) { + trace!("using lazy DFA for full search at {:?}", input.get_span()); + match e.try_search(&mut cache.hybrid, input) { + Ok(x) => x, + Err(_err) => { + trace!("lazy DFA search failed: {}", _err); + self.search_nofail(cache, input) + } + } + } else { + self.search_nofail(cache, input) + }; + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + // The main difference with 'search' is that if we're using a DFA, we + // can use a single forward scan without needing to run the reverse + // DFA. + if let Some(e) = self.dfa.get(input) { + trace!("using full DFA for half search at {:?}", input.get_span()); + match e.try_search_half_fwd(input) { + Ok(x) => x, + Err(_err) => { + trace!("full DFA half search failed: {}", _err); + self.search_half_nofail(cache, input) + } + } + } else if let Some(e) = self.hybrid.get(input) { + trace!("using lazy DFA for half search at {:?}", input.get_span()); + match e.try_search_half_fwd(&mut cache.hybrid, input) { + Ok(x) => x, + Err(_err) => { + trace!("lazy DFA half search failed: {}", _err); + self.search_half_nofail(cache, input) + } + } + } else { + self.search_half_nofail(cache, input) + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if let Some(e) = self.dfa.get(input) { + trace!( + "using full DFA for is-match search at {:?}", + input.get_span() + ); + match e.try_search_half_fwd(input) { + Ok(x) => x.is_some(), + Err(_err) => { + trace!("full DFA half search failed: {}", _err); + self.is_match_nofail(cache, input) + } + } + } else if let Some(e) = self.hybrid.get(input) { + trace!( + "using lazy DFA for is-match search at {:?}", + input.get_span() + ); + match e.try_search_half_fwd(&mut cache.hybrid, input) { + Ok(x) => x.is_some(), + Err(_err) => { + trace!("lazy DFA half search failed: {}", _err); + self.is_match_nofail(cache, input) + } + } + } else { + self.is_match_nofail(cache, input) + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + // Even if the regex has explicit capture groups, if the caller didn't + // provide any explicit slots, then it doesn't make sense to try and do + // extra work to get offsets for those slots. Ideally the caller should + // realize this and not call this routine in the first place, but alas, + // we try to save the caller from themselves if they do. + if !self.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, trying fast path"); + let m = self.search(cache, input)?; + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + // If the onepass DFA is available for this search (which only happens + // when it's anchored), then skip running a fallible DFA. The onepass + // DFA isn't as fast as a full or lazy DFA, but it is typically quite + // a bit faster than the backtracker or the PikeVM. So it isn't as + // advantageous to try and do a full/lazy DFA scan first. + // + // We still theorize that it's better to do a full/lazy DFA scan, even + // when it's anchored, because it's usually much faster and permits us + // to say "no match" much more quickly. This does hurt the case of, + // say, parsing each line in a log file into capture groups, because + // in that case, the line always matches. So the lazy DFA scan is + // usually just wasted work. But, the lazy DFA is usually quite fast + // and doesn't cost too much here. + if self.onepass.get(&input).is_some() { + return self.search_slots_nofail(cache, &input, slots); + } + let m = match self.try_search_mayfail(cache, input) { + Some(Ok(Some(m))) => m, + Some(Ok(None)) => return None, + Some(Err(_err)) => { + trace!("fast capture search failed: {}", _err); + return self.search_slots_nofail(cache, input, slots); + } + None => { + return self.search_slots_nofail(cache, input, slots); + } + }; + // At this point, now that we've found the bounds of the + // match, we need to re-run something that can resolve + // capturing groups. But we only need to run on it on the + // match bounds and not the entire haystack. + trace!( + "match found at {}..{} in capture search, \ + using another engine to find captures", + m.start(), + m.end(), + ); + let input = input + .clone() + .span(m.start()..m.end()) + .anchored(Anchored::Pattern(m.pattern())); + Some( + self.search_slots_nofail(cache, &input, slots) + .expect("should find a match"), + ) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if let Some(e) = self.dfa.get(input) { + trace!( + "using full DFA for overlapping search at {:?}", + input.get_span() + ); + let _err = match e.try_which_overlapping_matches(input, patset) { + Ok(()) => return, + Err(err) => err, + }; + trace!("fast overlapping search failed: {}", _err); + } else if let Some(e) = self.hybrid.get(input) { + trace!( + "using lazy DFA for overlapping search at {:?}", + input.get_span() + ); + let _err = match e.try_which_overlapping_matches( + &mut cache.hybrid, + input, + patset, + ) { + Ok(()) => { + return; + } + Err(err) => err, + }; + trace!("fast overlapping search failed: {}", _err); + } + trace!( + "using PikeVM for overlapping search at {:?}", + input.get_span() + ); + let e = self.pikevm.get(); + e.which_overlapping_matches(&mut cache.pikevm, input, patset) + } +} + +#[derive(Debug)] +struct ReverseAnchored { + core: Core, +} + +impl ReverseAnchored { + fn new(core: Core) -> Result { + if !core.info.is_always_anchored_end() { + debug!( + "skipping reverse anchored optimization because \ + the regex is not always anchored at the end" + ); + return Err(core); + } + // Note that the caller can still request an anchored search even when + // the regex isn't anchored at the start. We detect that case in the + // search routines below and just fallback to the core engine. This + // is fine because both searches are anchored. It's just a matter of + // picking one. Falling back to the core engine is a little simpler, + // since if we used the reverse anchored approach, we'd have to add an + // extra check to ensure the match reported starts at the place where + // the caller requested the search to start. + if core.info.is_always_anchored_start() { + debug!( + "skipping reverse anchored optimization because \ + the regex is also anchored at the start" + ); + return Err(core); + } + // Only DFAs can do reverse searches (currently), so we need one of + // them in order to do this optimization. It's possible (although + // pretty unlikely) that we have neither and need to give up. + if !core.hybrid.is_some() && !core.dfa.is_some() { + debug!( + "skipping reverse anchored optimization because \ + we don't have a lazy DFA or a full DFA" + ); + return Err(core); + } + Ok(ReverseAnchored { core }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_anchored_rev( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + // We of course always want an anchored search. In theory, the + // underlying regex engines should automatically enable anchored + // searches since the regex is itself anchored, but this more clearly + // expresses intent and is always correct. + let input = input.clone().anchored(Anchored::Yes); + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for reverse anchored search at {:?}", + input.get_span() + ); + e.try_search_half_rev(&input) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for reverse anchored search at {:?}", + input.get_span() + ); + e.try_search_half_rev(&mut cache.hybrid, &input) + } else { + unreachable!("ReverseAnchored always has a DFA") + } + } +} + +// Note that in this impl, we don't check that 'input.end() == +// input.haystack().len()'. In particular, when that condition is false, a +// match is always impossible because we know that the regex is always anchored +// at the end (or else 'ReverseAnchored' won't be built). We don't check that +// here because the 'Regex' wrapper actually does that for us in all cases. +// Thus, in this impl, we can actually assume that the end position in 'input' +// is equivalent to the length of the haystack. +impl Strategy for ReverseAnchored { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.core.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + self.core.create_cache() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + self.core.reset_cache(cache); + } + + fn is_accelerated(&self) -> bool { + // Since this is anchored at the end, a reverse anchored search is + // almost certainly guaranteed to result in a much faster search than + // a standard forward search. + true + } + + fn memory_usage(&self) -> usize { + self.core.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search(cache, input); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.search_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm)) => { + Some(Match::new(hm.pattern(), hm.offset()..input.end())) + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_half(cache, input); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.search_half_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm)) => { + // Careful here! 'try_search_half' is a *forward* search that + // only cares about the *end* position of a match. But + // 'hm.offset()' is actually the start of the match. So we + // actually just throw that away here and, since we know we + // have a match, return the only possible position at which a + // match can occur: input.end(). + Some(HalfMatch::new(hm.pattern(), input.end())) + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if input.get_anchored().is_anchored() { + return self.core.is_match(cache, input); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Ok(None) => false, + Ok(Some(_)) => true, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_slots(cache, input, slots); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.search_slots_nofail(cache, input, slots) + } + Ok(None) => None, + Ok(Some(hm)) => { + if !self.core.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, skipping captures"); + let m = Match::new(hm.pattern(), hm.offset()..input.end()); + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + let start = hm.offset(); + let input = input + .clone() + .span(start..input.end()) + .anchored(Anchored::Pattern(hm.pattern())); + self.core.search_slots_nofail(cache, &input, slots) + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + // It seems like this could probably benefit from a reverse anchored + // optimization, perhaps by doing an overlapping reverse search (which + // the DFAs do support). I haven't given it much thought though, and + // I'm currently focus more on the single pattern case. + self.core.which_overlapping_matches(cache, input, patset) + } +} + +#[derive(Debug)] +struct ReverseSuffix { + core: Core, + pre: Prefilter, +} + +impl ReverseSuffix { + fn new(core: Core, hirs: &[&Hir]) -> Result { + if !core.info.config().get_auto_prefilter() { + debug!( + "skipping reverse suffix optimization because \ + automatic prefilters are disabled" + ); + return Err(core); + } + // Like the reverse inner optimization, we don't do this for regexes + // that are always anchored. It could lead to scanning too much, but + // could say "no match" much more quickly than running the regex + // engine if the initial literal scan doesn't match. With that said, + // the reverse suffix optimization has lower overhead, since it only + // requires a reverse scan after a literal match to confirm or reject + // the match. (Although, in the case of confirmation, it then needs to + // do another forward scan to find the end position.) + // + // Note that the caller can still request an anchored search even + // when the regex isn't anchored. We detect that case in the search + // routines below and just fallback to the core engine. Currently this + // optimization assumes all searches are unanchored, so if we do want + // to enable this optimization for anchored searches, it will need a + // little work to support it. + if core.info.is_always_anchored_start() { + debug!( + "skipping reverse suffix optimization because \ + the regex is always anchored at the start", + ); + return Err(core); + } + // Only DFAs can do reverse searches (currently), so we need one of + // them in order to do this optimization. It's possible (although + // pretty unlikely) that we have neither and need to give up. + if !core.hybrid.is_some() && !core.dfa.is_some() { + debug!( + "skipping reverse suffix optimization because \ + we don't have a lazy DFA or a full DFA" + ); + return Err(core); + } + if core.pre.as_ref().map_or(false, |p| p.is_fast()) { + debug!( + "skipping reverse suffix optimization because \ + we already have a prefilter that we think is fast" + ); + return Err(core); + } + let kind = core.info.config().get_match_kind(); + let suffixes = crate::util::prefilter::suffixes(kind, hirs); + let lcs = match suffixes.longest_common_suffix() { + None => { + debug!( + "skipping reverse suffix optimization because \ + a longest common suffix could not be found", + ); + return Err(core); + } + Some(lcs) if lcs.is_empty() => { + debug!( + "skipping reverse suffix optimization because \ + the longest common suffix is the empty string", + ); + return Err(core); + } + Some(lcs) => lcs, + }; + let pre = match Prefilter::new(kind, &[lcs]) { + Some(pre) => pre, + None => { + debug!( + "skipping reverse suffix optimization because \ + a prefilter could not be constructed from the \ + longest common suffix", + ); + return Err(core); + } + }; + if !pre.is_fast() { + debug!( + "skipping reverse suffix optimization because \ + while we have a suffix prefilter, it is not \ + believed to be 'fast'" + ); + return Err(core); + } + Ok(ReverseSuffix { core, pre }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_start( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryError> { + let mut span = input.get_span(); + let mut min_start = 0; + loop { + let litmatch = match self.pre.find(input.haystack(), span) { + None => return Ok(None), + Some(span) => span, + }; + trace!("reverse suffix scan found suffix match at {:?}", litmatch); + let revinput = input + .clone() + .anchored(Anchored::Yes) + .span(input.start()..litmatch.end); + match self + .try_search_half_rev_limited(cache, &revinput, min_start)? + { + None => { + if span.start >= span.end { + break; + } + span.start = litmatch.start.checked_add(1).unwrap(); + } + Some(hm) => return Ok(Some(hm)), + } + min_start = litmatch.end; + } + Ok(None) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_fwd( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for forward reverse suffix search at {:?}", + input.get_span() + ); + e.try_search_half_fwd(&input) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for forward reverse suffix search at {:?}", + input.get_span() + ); + e.try_search_half_fwd(&mut cache.hybrid, &input) + } else { + unreachable!("ReverseSuffix always has a DFA") + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_rev_limited( + &self, + cache: &mut Cache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for reverse suffix search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited(&input, min_start) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for reverse suffix search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited(&mut cache.hybrid, &input, min_start) + } else { + unreachable!("ReverseSuffix always has a DFA") + } + } +} + +impl Strategy for ReverseSuffix { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.core.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + self.core.create_cache() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + self.core.reset_cache(cache); + } + + fn is_accelerated(&self) -> bool { + self.pre.is_fast() + } + + fn memory_usage(&self) -> usize { + self.core.memory_usage() + self.pre.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search(cache, input); + } + match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse suffix optimization failed: {}", _err); + self.core.search(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse suffix reverse fast search failed: {}", _err); + self.core.search_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm_start)) => { + let fwdinput = input + .clone() + .anchored(Anchored::Pattern(hm_start.pattern())) + .span(hm_start.offset()..input.end()); + match self.try_search_half_fwd(cache, &fwdinput) { + Err(_err) => { + trace!( + "reverse suffix forward fast search failed: {}", + _err + ); + self.core.search_nofail(cache, input) + } + Ok(None) => { + unreachable!( + "suffix match plus reverse match implies \ + there must be a match", + ) + } + Ok(Some(hm_end)) => Some(Match::new( + hm_start.pattern(), + hm_start.offset()..hm_end.offset(), + )), + } + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_half(cache, input); + } + match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse suffix half optimization failed: {}", _err); + self.core.search_half(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!( + "reverse suffix reverse fast half search failed: {}", + _err + ); + self.core.search_half_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm_start)) => { + // This is a bit subtle. It is tempting to just stop searching + // at this point and return a half-match with an offset + // corresponding to where the suffix was found. But the suffix + // match does not necessarily correspond to the end of the + // proper leftmost-first match. Consider /[a-z]+ing/ against + // 'tingling'. The first suffix match is the first 'ing', and + // the /[a-z]+/ matches the 't'. So if we stopped here, then + // we'd report 'ting' as the match. But 'tingling' is the + // correct match because of greediness. + let fwdinput = input + .clone() + .anchored(Anchored::Pattern(hm_start.pattern())) + .span(hm_start.offset()..input.end()); + match self.try_search_half_fwd(cache, &fwdinput) { + Err(_err) => { + trace!( + "reverse suffix forward fast search failed: {}", + _err + ); + self.core.search_half_nofail(cache, input) + } + Ok(None) => { + unreachable!( + "suffix match plus reverse match implies \ + there must be a match", + ) + } + Ok(Some(hm_end)) => Some(hm_end), + } + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if input.get_anchored().is_anchored() { + return self.core.is_match(cache, input); + } + match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse suffix half optimization failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!( + "reverse suffix reverse fast half search failed: {}", + _err + ); + self.core.is_match_nofail(cache, input) + } + Ok(None) => false, + Ok(Some(_)) => true, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_slots(cache, input, slots); + } + if !self.core.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, trying fast path"); + let m = self.search(cache, input)?; + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + let hm_start = match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!( + "reverse suffix captures optimization failed: {}", + _err + ); + return self.core.search_slots(cache, input, slots); + } + Err(RetryError::Fail(_err)) => { + trace!( + "reverse suffix reverse fast captures search failed: {}", + _err + ); + return self.core.search_slots_nofail(cache, input, slots); + } + Ok(None) => return None, + Ok(Some(hm_start)) => hm_start, + }; + trace!( + "match found at {}..{} in capture search, \ + using another engine to find captures", + hm_start.offset(), + input.end(), + ); + let start = hm_start.offset(); + let input = input + .clone() + .span(start..input.end()) + .anchored(Anchored::Pattern(hm_start.pattern())); + self.core.search_slots_nofail(cache, &input, slots) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.core.which_overlapping_matches(cache, input, patset) + } +} + +#[derive(Debug)] +struct ReverseInner { + core: Core, + preinner: Prefilter, + nfarev: NFA, + hybrid: wrappers::ReverseHybrid, + dfa: wrappers::ReverseDFA, +} + +impl ReverseInner { + fn new(core: Core, hirs: &[&Hir]) -> Result { + if !core.info.config().get_auto_prefilter() { + debug!( + "skipping reverse inner optimization because \ + automatic prefilters are disabled" + ); + return Err(core); + } + // Currently we hard-code the assumption of leftmost-first match + // semantics. This isn't a huge deal because 'all' semantics tend to + // only be used for forward overlapping searches with multiple regexes, + // and this optimization only supports a single pattern at the moment. + if core.info.config().get_match_kind() != MatchKind::LeftmostFirst { + debug!( + "skipping reverse inner optimization because \ + match kind is {:?} but this only supports leftmost-first", + core.info.config().get_match_kind(), + ); + return Err(core); + } + // It's likely that a reverse inner scan has too much overhead for it + // to be worth it when the regex is anchored at the start. It is + // possible for it to be quite a bit faster if the initial literal + // scan fails to detect a match, in which case, we can say "no match" + // very quickly. But this could be undesirable, e.g., scanning too far + // or when the literal scan matches. If it matches, then confirming the + // match requires a reverse scan followed by a forward scan to confirm + // or reject, which is a fair bit of work. + // + // Note that the caller can still request an anchored search even + // when the regex isn't anchored. We detect that case in the search + // routines below and just fallback to the core engine. Currently this + // optimization assumes all searches are unanchored, so if we do want + // to enable this optimization for anchored searches, it will need a + // little work to support it. + if core.info.is_always_anchored_start() { + debug!( + "skipping reverse inner optimization because \ + the regex is always anchored at the start", + ); + return Err(core); + } + // Only DFAs can do reverse searches (currently), so we need one of + // them in order to do this optimization. It's possible (although + // pretty unlikely) that we have neither and need to give up. + if !core.hybrid.is_some() && !core.dfa.is_some() { + debug!( + "skipping reverse inner optimization because \ + we don't have a lazy DFA or a full DFA" + ); + return Err(core); + } + if core.pre.as_ref().map_or(false, |p| p.is_fast()) { + debug!( + "skipping reverse inner optimization because \ + we already have a prefilter that we think is fast" + ); + return Err(core); + } else if core.pre.is_some() { + debug!( + "core engine has a prefix prefilter, but it is \ + probably not fast, so continuing with attempt to \ + use reverse inner prefilter" + ); + } + let (concat_prefix, preinner) = match reverse_inner::extract(hirs) { + Some(x) => x, + // N.B. the 'extract' function emits debug messages explaining + // why we bailed out here. + None => return Err(core), + }; + debug!("building reverse NFA for prefix before inner literal"); + let mut lookm = LookMatcher::new(); + lookm.set_line_terminator(core.info.config().get_line_terminator()); + let thompson_config = thompson::Config::new() + .reverse(true) + .utf8(core.info.config().get_utf8_empty()) + .nfa_size_limit(core.info.config().get_nfa_size_limit()) + .shrink(false) + .which_captures(WhichCaptures::None) + .look_matcher(lookm); + let result = thompson::Compiler::new() + .configure(thompson_config) + .build_from_hir(&concat_prefix); + let nfarev = match result { + Ok(nfarev) => nfarev, + Err(_err) => { + debug!( + "skipping reverse inner optimization because the \ + reverse NFA failed to build: {}", + _err, + ); + return Err(core); + } + }; + debug!("building reverse DFA for prefix before inner literal"); + let dfa = if !core.info.config().get_dfa() { + wrappers::ReverseDFA::none() + } else { + wrappers::ReverseDFA::new(&core.info, &nfarev) + }; + let hybrid = if !core.info.config().get_hybrid() { + wrappers::ReverseHybrid::none() + } else if dfa.is_some() { + debug!( + "skipping lazy DFA for reverse inner optimization \ + because we have a full DFA" + ); + wrappers::ReverseHybrid::none() + } else { + wrappers::ReverseHybrid::new(&core.info, &nfarev) + }; + Ok(ReverseInner { core, preinner, nfarev, hybrid, dfa }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_full( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryError> { + let mut span = input.get_span(); + let mut min_match_start = 0; + let mut min_pre_start = 0; + loop { + let litmatch = match self.preinner.find(input.haystack(), span) { + None => return Ok(None), + Some(span) => span, + }; + if litmatch.start < min_pre_start { + trace!( + "found inner prefilter match at {:?}, which starts \ + before the end of the last forward scan at {}, \ + quitting to avoid quadratic behavior", + litmatch, + min_pre_start, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + trace!("reverse inner scan found inner match at {:?}", litmatch); + let revinput = input + .clone() + .anchored(Anchored::Yes) + .span(input.start()..litmatch.start); + // Note that in addition to the literal search above scanning past + // our minimum start point, this routine can also return an error + // as a result of detecting possible quadratic behavior if the + // reverse scan goes past the minimum start point. That is, the + // literal search might not, but the reverse regex search for the + // prefix might! + match self.try_search_half_rev_limited( + cache, + &revinput, + min_match_start, + )? { + None => { + if span.start >= span.end { + break; + } + span.start = litmatch.start.checked_add(1).unwrap(); + } + Some(hm_start) => { + let fwdinput = input + .clone() + .anchored(Anchored::Pattern(hm_start.pattern())) + .span(hm_start.offset()..input.end()); + match self.try_search_half_fwd_stopat(cache, &fwdinput)? { + Err(stopat) => { + min_pre_start = stopat; + span.start = + litmatch.start.checked_add(1).unwrap(); + } + Ok(hm_end) => { + return Ok(Some(Match::new( + hm_start.pattern(), + hm_start.offset()..hm_end.offset(), + ))) + } + } + } + } + min_match_start = litmatch.end; + } + Ok(None) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_fwd_stopat( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for forward reverse inner search at {:?}", + input.get_span() + ); + e.try_search_half_fwd_stopat(&input) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for forward reverse inner search at {:?}", + input.get_span() + ); + e.try_search_half_fwd_stopat(&mut cache.hybrid, &input) + } else { + unreachable!("ReverseInner always has a DFA") + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_rev_limited( + &self, + cache: &mut Cache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + if let Some(e) = self.dfa.get(&input) { + trace!( + "using full DFA for reverse inner search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited(&input, min_start) + } else if let Some(e) = self.hybrid.get(&input) { + trace!( + "using lazy DFA for reverse inner search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited( + &mut cache.revhybrid, + &input, + min_start, + ) + } else { + unreachable!("ReverseInner always has a DFA") + } + } +} + +impl Strategy for ReverseInner { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.core.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + let mut cache = self.core.create_cache(); + cache.revhybrid = self.hybrid.create_cache(); + cache + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + self.core.reset_cache(cache); + cache.revhybrid.reset(&self.hybrid); + } + + fn is_accelerated(&self) -> bool { + self.preinner.is_fast() + } + + fn memory_usage(&self) -> usize { + self.core.memory_usage() + + self.preinner.memory_usage() + + self.nfarev.memory_usage() + + self.dfa.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search(cache, input); + } + match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner optimization failed: {}", _err); + self.core.search(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast search failed: {}", _err); + self.core.search_nofail(cache, input) + } + Ok(matornot) => matornot, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_half(cache, input); + } + match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner half optimization failed: {}", _err); + self.core.search_half(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast half search failed: {}", _err); + self.core.search_half_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(m)) => Some(HalfMatch::new(m.pattern(), m.end())), + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if input.get_anchored().is_anchored() { + return self.core.is_match(cache, input); + } + match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner half optimization failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast half search failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Ok(None) => false, + Ok(Some(_)) => true, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_slots(cache, input, slots); + } + if !self.core.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, trying fast path"); + let m = self.search(cache, input)?; + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + let m = match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner captures optimization failed: {}", _err); + return self.core.search_slots(cache, input, slots); + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast captures search failed: {}", _err); + return self.core.search_slots_nofail(cache, input, slots); + } + Ok(None) => return None, + Ok(Some(m)) => m, + }; + trace!( + "match found at {}..{} in capture search, \ + using another engine to find captures", + m.start(), + m.end(), + ); + let input = input + .clone() + .span(m.start()..m.end()) + .anchored(Anchored::Pattern(m.pattern())); + self.core.search_slots_nofail(cache, &input, slots) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.core.which_overlapping_matches(cache, input, patset) + } +} + +/// Copies the offsets in the given match to the corresponding positions in +/// `slots`. +/// +/// In effect, this sets the slots corresponding to the implicit group for the +/// pattern in the given match. If the indices for the corresponding slots do +/// not exist, then no slots are set. +/// +/// This is useful when the caller provides slots (or captures), but you use a +/// regex engine that doesn't operate on slots (like a lazy DFA). This function +/// lets you map the match you get back to the slots provided by the caller. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn copy_match_to_slots(m: Match, slots: &mut [Option]) { + let slot_start = m.pattern().as_usize() * 2; + let slot_end = slot_start + 1; + if let Some(slot) = slots.get_mut(slot_start) { + *slot = NonMaxUsize::new(m.start()); + } + if let Some(slot) = slots.get_mut(slot_end) { + *slot = NonMaxUsize::new(m.end()); + } +} diff --git a/vendor/regex-automata/src/meta/wrappers.rs b/vendor/regex-automata/src/meta/wrappers.rs new file mode 100644 index 0000000..6cb19ba --- /dev/null +++ b/vendor/regex-automata/src/meta/wrappers.rs @@ -0,0 +1,1351 @@ +/*! +This module contains a boat load of wrappers around each of our internal regex +engines. They encapsulate a few things: + +1. The wrappers manage the conditional existence of the regex engine. Namely, +the PikeVM is the only required regex engine. The rest are optional. These +wrappers present a uniform API regardless of which engines are available. And +availability might be determined by compile time features or by dynamic +configuration via `meta::Config`. Encapsulating the conditional compilation +features is in particular a huge simplification for the higher level code that +composes these engines. +2. The wrappers manage construction of each engine, including skipping it if +the engine is unavailable or configured to not be used. +3. The wrappers manage whether an engine *can* be used for a particular +search configuration. For example, `BoundedBacktracker::get` only returns a +backtracking engine when the haystack is bigger than the maximum supported +length. The wrappers also sometimes take a position on when an engine *ought* +to be used, but only in cases where the logic is extremely local to the engine +itself. Otherwise, things like "choose between the backtracker and the one-pass +DFA" are managed by the higher level meta strategy code. + +There are also corresponding wrappers for the various `Cache` types for each +regex engine that needs them. If an engine is unavailable or not used, then a +cache for it will *not* actually be allocated. +*/ + +use alloc::vec::Vec; + +use crate::{ + meta::{ + error::{BuildError, RetryError, RetryFailError}, + regex::RegexInfo, + }, + nfa::thompson::{pikevm, NFA}, + util::{prefilter::Prefilter, primitives::NonMaxUsize}, + HalfMatch, Input, Match, MatchKind, PatternID, PatternSet, +}; + +#[cfg(feature = "dfa-build")] +use crate::dfa; +#[cfg(feature = "dfa-onepass")] +use crate::dfa::onepass; +#[cfg(feature = "hybrid")] +use crate::hybrid; +#[cfg(feature = "nfa-backtrack")] +use crate::nfa::thompson::backtrack; + +#[derive(Debug)] +pub(crate) struct PikeVM(PikeVMEngine); + +impl PikeVM { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result { + PikeVMEngine::new(info, pre, nfa).map(PikeVM) + } + + pub(crate) fn create_cache(&self) -> PikeVMCache { + PikeVMCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self) -> &PikeVMEngine { + &self.0 + } +} + +#[derive(Debug)] +pub(crate) struct PikeVMEngine(pikevm::PikeVM); + +impl PikeVMEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result { + let pikevm_config = pikevm::Config::new() + .match_kind(info.config().get_match_kind()) + .prefilter(pre); + let engine = pikevm::Builder::new() + .configure(pikevm_config) + .build_from_nfa(nfa.clone()) + .map_err(BuildError::nfa)?; + debug!("PikeVM built"); + Ok(PikeVMEngine(engine)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_match( + &self, + cache: &mut PikeVMCache, + input: &Input<'_>, + ) -> bool { + self.0.is_match(cache.0.as_mut().unwrap(), input.clone()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn search_slots( + &self, + cache: &mut PikeVMCache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + self.0.search_slots(cache.0.as_mut().unwrap(), input, slots) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn which_overlapping_matches( + &self, + cache: &mut PikeVMCache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.0.which_overlapping_matches( + cache.0.as_mut().unwrap(), + input, + patset, + ) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct PikeVMCache(Option); + +impl PikeVMCache { + pub(crate) fn none() -> PikeVMCache { + PikeVMCache(None) + } + + pub(crate) fn new(builder: &PikeVM) -> PikeVMCache { + PikeVMCache(Some(builder.get().0.create_cache())) + } + + pub(crate) fn reset(&mut self, builder: &PikeVM) { + self.0.as_mut().unwrap().reset(&builder.get().0); + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct BoundedBacktracker(Option); + +impl BoundedBacktracker { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result { + BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker) + } + + pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache { + BoundedBacktrackerCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get( + &self, + input: &Input<'_>, + ) -> Option<&BoundedBacktrackerEngine> { + let engine = self.0.as_ref()?; + // It is difficult to make the backtracker give up early if it is + // guaranteed to eventually wind up in a match state. This is because + // of the greedy nature of a backtracker: it just blindly mushes + // forward. Every other regex engine is able to give up more quickly, + // so even if the backtracker might be able to zip through faster than + // (say) the PikeVM, we prefer the theoretical benefit that some other + // engine might be able to scan much less of the haystack than the + // backtracker. + // + // Now, if the haystack is really short already, then we allow the + // backtracker to run. (This hasn't been litigated quantitatively with + // benchmarks. Just a hunch.) + if input.get_earliest() && input.haystack().len() > 128 { + return None; + } + // If the backtracker is just going to return an error because the + // haystack is too long, then obviously do not use it. + if input.get_span().len() > engine.max_haystack_len() { + return None; + } + Some(engine) + } +} + +#[derive(Debug)] +pub(crate) struct BoundedBacktrackerEngine( + #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker, + #[cfg(not(feature = "nfa-backtrack"))] (), +); + +impl BoundedBacktrackerEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result, BuildError> { + #[cfg(feature = "nfa-backtrack")] + { + if !info.config().get_backtrack() + || info.config().get_match_kind() != MatchKind::LeftmostFirst + { + return Ok(None); + } + let backtrack_config = backtrack::Config::new().prefilter(pre); + let engine = backtrack::Builder::new() + .configure(backtrack_config) + .build_from_nfa(nfa.clone()) + .map_err(BuildError::nfa)?; + debug!( + "BoundedBacktracker built (max haystack length: {:?})", + engine.max_haystack_len() + ); + Ok(Some(BoundedBacktrackerEngine(engine))) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + Ok(None) + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_match( + &self, + cache: &mut BoundedBacktrackerCache, + input: &Input<'_>, + ) -> bool { + #[cfg(feature = "nfa-backtrack")] + { + // OK because we only permit access to this engine when we know + // the haystack is short enough for the backtracker to run without + // reporting an error. + self.0 + .try_is_match(cache.0.as_mut().unwrap(), input.clone()) + .unwrap() + } + #[cfg(not(feature = "nfa-backtrack"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn search_slots( + &self, + cache: &mut BoundedBacktrackerCache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + #[cfg(feature = "nfa-backtrack")] + { + // OK because we only permit access to this engine when we know + // the haystack is short enough for the backtracker to run without + // reporting an error. + self.0 + .try_search_slots(cache.0.as_mut().unwrap(), input, slots) + .unwrap() + } + #[cfg(not(feature = "nfa-backtrack"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn max_haystack_len(&self) -> usize { + #[cfg(feature = "nfa-backtrack")] + { + self.0.max_haystack_len() + } + #[cfg(not(feature = "nfa-backtrack"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct BoundedBacktrackerCache( + #[cfg(feature = "nfa-backtrack")] Option, + #[cfg(not(feature = "nfa-backtrack"))] (), +); + +impl BoundedBacktrackerCache { + pub(crate) fn none() -> BoundedBacktrackerCache { + #[cfg(feature = "nfa-backtrack")] + { + BoundedBacktrackerCache(None) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + BoundedBacktrackerCache(()) + } + } + + pub(crate) fn new( + builder: &BoundedBacktracker, + ) -> BoundedBacktrackerCache { + #[cfg(feature = "nfa-backtrack")] + { + BoundedBacktrackerCache( + builder.0.as_ref().map(|e| e.0.create_cache()), + ) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + BoundedBacktrackerCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) { + #[cfg(feature = "nfa-backtrack")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "nfa-backtrack")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct OnePass(Option); + +impl OnePass { + pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass { + OnePass(OnePassEngine::new(info, nfa)) + } + + pub(crate) fn create_cache(&self) -> OnePassCache { + OnePassCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> { + let engine = self.0.as_ref()?; + if !input.get_anchored().is_anchored() + && !engine.get_nfa().is_always_start_anchored() + { + return None; + } + Some(engine) + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |e| e.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct OnePassEngine( + #[cfg(feature = "dfa-onepass")] onepass::DFA, + #[cfg(not(feature = "dfa-onepass"))] (), +); + +impl OnePassEngine { + pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option { + #[cfg(feature = "dfa-onepass")] + { + if !info.config().get_onepass() { + return None; + } + // In order to even attempt building a one-pass DFA, we require + // that we either have at least one explicit capturing group or + // there's a Unicode word boundary somewhere. If we don't have + // either of these things, then the lazy DFA will almost certainly + // be useable and be much faster. The only case where it might + // not is if the lazy DFA isn't utilizing its cache effectively, + // but in those cases, the underlying regex is almost certainly + // not one-pass or is too big to fit within the current one-pass + // implementation limits. + if info.props_union().explicit_captures_len() == 0 + && !info.props_union().look_set().contains_word_unicode() + { + debug!("not building OnePass because it isn't worth it"); + return None; + } + let onepass_config = onepass::Config::new() + .match_kind(info.config().get_match_kind()) + // Like for the lazy DFA, we unconditionally enable this + // because it doesn't cost much and makes the API more + // flexible. + .starts_for_each_pattern(true) + .byte_classes(info.config().get_byte_classes()) + .size_limit(info.config().get_onepass_size_limit()); + let result = onepass::Builder::new() + .configure(onepass_config) + .build_from_nfa(nfa.clone()); + let engine = match result { + Ok(engine) => engine, + Err(_err) => { + debug!("OnePass failed to build: {}", _err); + return None; + } + }; + debug!("OnePass built, {} bytes", engine.memory_usage()); + Some(OnePassEngine(engine)) + } + #[cfg(not(feature = "dfa-onepass"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn search_slots( + &self, + cache: &mut OnePassCache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + #[cfg(feature = "dfa-onepass")] + { + // OK because we only permit getting a OnePassEngine when we know + // the search is anchored and thus an error cannot occur. + self.0 + .try_search_slots(cache.0.as_mut().unwrap(), input, slots) + .unwrap() + } + #[cfg(not(feature = "dfa-onepass"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-onepass")] + { + self.0.memory_usage() + } + #[cfg(not(feature = "dfa-onepass"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn get_nfa(&self) -> &NFA { + #[cfg(feature = "dfa-onepass")] + { + self.0.get_nfa() + } + #[cfg(not(feature = "dfa-onepass"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct OnePassCache( + #[cfg(feature = "dfa-onepass")] Option, + #[cfg(not(feature = "dfa-onepass"))] (), +); + +impl OnePassCache { + pub(crate) fn none() -> OnePassCache { + #[cfg(feature = "dfa-onepass")] + { + OnePassCache(None) + } + #[cfg(not(feature = "dfa-onepass"))] + { + OnePassCache(()) + } + } + + pub(crate) fn new(builder: &OnePass) -> OnePassCache { + #[cfg(feature = "dfa-onepass")] + { + OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache())) + } + #[cfg(not(feature = "dfa-onepass"))] + { + OnePassCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &OnePass) { + #[cfg(feature = "dfa-onepass")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-onepass")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "dfa-onepass"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct Hybrid(Option); + +impl Hybrid { + pub(crate) fn none() -> Hybrid { + Hybrid(None) + } + + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> Hybrid { + Hybrid(HybridEngine::new(info, pre, nfa, nfarev)) + } + + pub(crate) fn create_cache(&self) -> HybridCache { + HybridCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } + + pub(crate) fn is_some(&self) -> bool { + self.0.is_some() + } +} + +#[derive(Debug)] +pub(crate) struct HybridEngine( + #[cfg(feature = "hybrid")] hybrid::regex::Regex, + #[cfg(not(feature = "hybrid"))] (), +); + +impl HybridEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "hybrid")] + { + if !info.config().get_hybrid() { + return None; + } + let dfa_config = hybrid::dfa::Config::new() + .match_kind(info.config().get_match_kind()) + .prefilter(pre.clone()) + // Enabling this is necessary for ensuring we can service any + // kind of 'Input' search without error. For the lazy DFA, + // this is not particularly costly, since the start states are + // generated lazily. + .starts_for_each_pattern(true) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(pre.is_some()) + .cache_capacity(info.config().get_hybrid_cache_capacity()) + // This makes it possible for building a lazy DFA to + // fail even though the NFA has already been built. Namely, + // if the cache capacity is too small to fit some minimum + // number of states (which is small, like 4 or 5), then the + // DFA will refuse to build. + // + // We shouldn't enable this to make building always work, since + // this could cause the allocation of a cache bigger than the + // provided capacity amount. + // + // This is effectively the only reason why building a lazy DFA + // could fail. If it does, then we simply suppress the error + // and return None. + .skip_cache_capacity_check(false) + // This and enabling heuristic Unicode word boundary support + // above make it so the lazy DFA can quit at match time. + .minimum_cache_clear_count(Some(3)) + .minimum_bytes_per_state(Some(10)); + let result = hybrid::dfa::Builder::new() + .configure(dfa_config.clone()) + .build_from_nfa(nfa.clone()); + let fwd = match result { + Ok(fwd) => fwd, + Err(_err) => { + debug!("forward lazy DFA failed to build: {}", _err); + return None; + } + }; + let result = hybrid::dfa::Builder::new() + .configure( + dfa_config + .clone() + .match_kind(MatchKind::All) + .prefilter(None) + .specialize_start_states(false), + ) + .build_from_nfa(nfarev.clone()); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("reverse lazy DFA failed to build: {}", _err); + return None; + } + }; + let engine = + hybrid::regex::Builder::new().build_from_dfas(fwd, rev); + debug!("lazy DFA built"); + Some(HybridEngine(engine)) + } + #[cfg(not(feature = "hybrid"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let cache = cache.0.as_mut().unwrap(); + self.0.try_search(cache, input).map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let fwd = self.0.forward(); + let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; + fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd_stopat( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let dfa = self.0.forward(); + let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0; + crate::meta::stopat::hybrid_try_search_half_fwd( + dfa, &mut cache, input, + ) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let rev = self.0.reverse(); + let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1; + rev.try_search_rev(&mut revcache, input).map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "hybrid")] + { + let dfa = self.0.reverse(); + let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1; + crate::meta::limited::hybrid_try_search_half_rev( + dfa, &mut cache, input, min_start, + ) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[inline] + pub(crate) fn try_which_overlapping_matches( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), RetryFailError> { + #[cfg(feature = "hybrid")] + { + let fwd = self.0.forward(); + let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; + fwd.try_which_overlapping_matches(&mut fwdcache, input, patset) + .map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct HybridCache( + #[cfg(feature = "hybrid")] Option, + #[cfg(not(feature = "hybrid"))] (), +); + +impl HybridCache { + pub(crate) fn none() -> HybridCache { + #[cfg(feature = "hybrid")] + { + HybridCache(None) + } + #[cfg(not(feature = "hybrid"))] + { + HybridCache(()) + } + } + + pub(crate) fn new(builder: &Hybrid) -> HybridCache { + #[cfg(feature = "hybrid")] + { + HybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) + } + #[cfg(not(feature = "hybrid"))] + { + HybridCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &Hybrid) { + #[cfg(feature = "hybrid")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "hybrid")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "hybrid"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct DFA(Option); + +impl DFA { + pub(crate) fn none() -> DFA { + DFA(None) + } + + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> DFA { + DFA(DFAEngine::new(info, pre, nfa, nfarev)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } + + pub(crate) fn is_some(&self) -> bool { + self.0.is_some() + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |e| e.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct DFAEngine( + #[cfg(feature = "dfa-build")] dfa::regex::Regex, + #[cfg(not(feature = "dfa-build"))] (), +); + +impl DFAEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "dfa-build")] + { + if !info.config().get_dfa() { + return None; + } + // If our NFA is anything but small, don't even bother with a DFA. + if let Some(state_limit) = info.config().get_dfa_state_limit() { + if nfa.states().len() > state_limit { + debug!( + "skipping full DFA because NFA has {} states, \ + which exceeds the heuristic limit of {}", + nfa.states().len(), + state_limit, + ); + return None; + } + } + // We cut the size limit in four because the total heap used by + // DFA construction is determinization aux memory and the DFA + // itself, and those things are configured independently in the + // lower level DFA builder API. And then split that in two because + // of forward and reverse DFAs. + let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4); + let dfa_config = dfa::dense::Config::new() + .match_kind(info.config().get_match_kind()) + .prefilter(pre.clone()) + // Enabling this is necessary for ensuring we can service any + // kind of 'Input' search without error. For the full DFA, this + // can be quite costly. But since we have such a small bound + // on the size of the DFA, in practice, any multl-regexes are + // probably going to blow the limit anyway. + .starts_for_each_pattern(true) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(pre.is_some()) + .determinize_size_limit(size_limit) + .dfa_size_limit(size_limit); + let result = dfa::dense::Builder::new() + .configure(dfa_config.clone()) + .build_from_nfa(&nfa); + let fwd = match result { + Ok(fwd) => fwd, + Err(_err) => { + debug!("forward full DFA failed to build: {}", _err); + return None; + } + }; + let result = dfa::dense::Builder::new() + .configure( + dfa_config + .clone() + // We never need unanchored reverse searches, so + // there's no point in building it into the DFA, which + // WILL take more space. (This isn't done for the lazy + // DFA because the DFA is, well, lazy. It doesn't pay + // the cost for supporting unanchored searches unless + // you actually do an unanchored search, which we + // don't.) + .start_kind(dfa::StartKind::Anchored) + .match_kind(MatchKind::All) + .prefilter(None) + .specialize_start_states(false), + ) + .build_from_nfa(&nfarev); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("reverse full DFA failed to build: {}", _err); + return None; + } + }; + let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev); + debug!( + "fully compiled forward and reverse DFAs built, {} bytes", + engine.forward().memory_usage() + + engine.reverse().memory_usage(), + ); + Some(DFAEngine(engine)) + } + #[cfg(not(feature = "dfa-build"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + self.0.try_search(input).map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + use crate::dfa::Automaton; + self.0.forward().try_search_fwd(input).map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd_stopat( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + let dfa = self.0.forward(); + crate::meta::stopat::dfa_try_search_half_fwd(dfa, input) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + use crate::dfa::Automaton; + self.0.reverse().try_search_rev(&input).map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "dfa-build")] + { + let dfa = self.0.reverse(); + crate::meta::limited::dfa_try_search_half_rev( + dfa, input, min_start, + ) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[inline] + pub(crate) fn try_which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), RetryFailError> { + #[cfg(feature = "dfa-build")] + { + use crate::dfa::Automaton; + self.0 + .forward() + .try_which_overlapping_matches(input, patset) + .map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-build")] + { + self.0.forward().memory_usage() + self.0.reverse().memory_usage() + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Debug)] +pub(crate) struct ReverseHybrid(Option); + +impl ReverseHybrid { + pub(crate) fn none() -> ReverseHybrid { + ReverseHybrid(None) + } + + pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid { + ReverseHybrid(ReverseHybridEngine::new(info, nfarev)) + } + + pub(crate) fn create_cache(&self) -> ReverseHybridCache { + ReverseHybridCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get( + &self, + _input: &Input<'_>, + ) -> Option<&ReverseHybridEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } +} + +#[derive(Debug)] +pub(crate) struct ReverseHybridEngine( + #[cfg(feature = "hybrid")] hybrid::dfa::DFA, + #[cfg(not(feature = "hybrid"))] (), +); + +impl ReverseHybridEngine { + pub(crate) fn new( + info: &RegexInfo, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "hybrid")] + { + if !info.config().get_hybrid() { + return None; + } + // Since we only use this for reverse searches, we can hard-code + // a number of things like match semantics, prefilters, starts + // for each pattern and so on. + let dfa_config = hybrid::dfa::Config::new() + .match_kind(MatchKind::All) + .prefilter(None) + .starts_for_each_pattern(false) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(false) + .cache_capacity(info.config().get_hybrid_cache_capacity()) + .skip_cache_capacity_check(false) + .minimum_cache_clear_count(Some(3)) + .minimum_bytes_per_state(Some(10)); + let result = hybrid::dfa::Builder::new() + .configure(dfa_config) + .build_from_nfa(nfarev.clone()); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("lazy reverse DFA failed to build: {}", _err); + return None; + } + }; + debug!("lazy reverse DFA built"); + Some(ReverseHybridEngine(rev)) + } + #[cfg(not(feature = "hybrid"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + cache: &mut ReverseHybridCache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "hybrid")] + { + let dfa = &self.0; + let mut cache = cache.0.as_mut().unwrap(); + crate::meta::limited::hybrid_try_search_half_rev( + dfa, &mut cache, input, min_start, + ) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct ReverseHybridCache( + #[cfg(feature = "hybrid")] Option, + #[cfg(not(feature = "hybrid"))] (), +); + +impl ReverseHybridCache { + pub(crate) fn none() -> ReverseHybridCache { + #[cfg(feature = "hybrid")] + { + ReverseHybridCache(None) + } + #[cfg(not(feature = "hybrid"))] + { + ReverseHybridCache(()) + } + } + + pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache { + #[cfg(feature = "hybrid")] + { + ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) + } + #[cfg(not(feature = "hybrid"))] + { + ReverseHybridCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &ReverseHybrid) { + #[cfg(feature = "hybrid")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "hybrid")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "hybrid"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct ReverseDFA(Option); + +impl ReverseDFA { + pub(crate) fn none() -> ReverseDFA { + ReverseDFA(None) + } + + pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA { + ReverseDFA(ReverseDFAEngine::new(info, nfarev)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } + + pub(crate) fn is_some(&self) -> bool { + self.0.is_some() + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |e| e.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct ReverseDFAEngine( + #[cfg(feature = "dfa-build")] dfa::dense::DFA>, + #[cfg(not(feature = "dfa-build"))] (), +); + +impl ReverseDFAEngine { + pub(crate) fn new( + info: &RegexInfo, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "dfa-build")] + { + if !info.config().get_dfa() { + return None; + } + // If our NFA is anything but small, don't even bother with a DFA. + if let Some(state_limit) = info.config().get_dfa_state_limit() { + if nfarev.states().len() > state_limit { + debug!( + "skipping full reverse DFA because NFA has {} states, \ + which exceeds the heuristic limit of {}", + nfarev.states().len(), + state_limit, + ); + return None; + } + } + // We cut the size limit in two because the total heap used by DFA + // construction is determinization aux memory and the DFA itself, + // and those things are configured independently in the lower level + // DFA builder API. + let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2); + // Since we only use this for reverse searches, we can hard-code + // a number of things like match semantics, prefilters, starts + // for each pattern and so on. We also disable acceleration since + // it's incompatible with limited searches (which is the only + // operation we support for this kind of engine at the moment). + let dfa_config = dfa::dense::Config::new() + .match_kind(MatchKind::All) + .prefilter(None) + .accelerate(false) + .start_kind(dfa::StartKind::Anchored) + .starts_for_each_pattern(false) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(false) + .determinize_size_limit(size_limit) + .dfa_size_limit(size_limit); + let result = dfa::dense::Builder::new() + .configure(dfa_config) + .build_from_nfa(&nfarev); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("full reverse DFA failed to build: {}", _err); + return None; + } + }; + debug!( + "fully compiled reverse DFA built, {} bytes", + rev.memory_usage() + ); + Some(ReverseDFAEngine(rev)) + } + #[cfg(not(feature = "dfa-build"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "dfa-build")] + { + let dfa = &self.0; + crate::meta::limited::dfa_try_search_half_rev( + dfa, input, min_start, + ) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-build")] + { + self.0.memory_usage() + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} diff --git a/vendor/regex-automata/src/nfa/mod.rs b/vendor/regex-automata/src/nfa/mod.rs new file mode 100644 index 0000000..0c36f59 --- /dev/null +++ b/vendor/regex-automata/src/nfa/mod.rs @@ -0,0 +1,55 @@ +/*! +Provides non-deterministic finite automata (NFA) and regex engines that use +them. + +While NFAs and DFAs (deterministic finite automata) have equivalent *theoretical* +power, their usage in practice tends to result in different engineering trade +offs. While this isn't meant to be a comprehensive treatment of the topic, here +are a few key trade offs that are, at minimum, true for this crate: + +* NFAs tend to be represented sparsely where as DFAs are represented densely. +Sparse representations use less memory, but are slower to traverse. Conversely, +dense representations use more memory, but are faster to traverse. (Sometimes +these lines are blurred. For example, an `NFA` might choose to represent a +particular state in a dense fashion, and a DFA can be built using a sparse +representation via [`sparse::DFA`](crate::dfa::sparse::DFA). +* NFAs have espilon transitions and DFAs don't. In practice, this means that +handling a single byte in a haystack with an NFA at search time may require +visiting multiple NFA states. In a DFA, each byte only requires visiting +a single state. Stated differently, NFAs require a variable number of CPU +instructions to process one byte in a haystack where as a DFA uses a constant +number of CPU instructions to process one byte. +* NFAs are generally easier to amend with secondary storage. For example, the +[`thompson::pikevm::PikeVM`] uses an NFA to match, but also uses additional +memory beyond the model of a finite state machine to track offsets for matching +capturing groups. Conversely, the most a DFA can do is report the offset (and +pattern ID) at which a match occurred. This is generally why we also compile +DFAs in reverse, so that we can run them after finding the end of a match to +also find the start of a match. +* NFAs take worst case linear time to build, but DFAs take worst case +exponential time to build. The [hybrid NFA/DFA](crate::hybrid) mitigates this +challenge for DFAs in many practical cases. + +There are likely other differences, but the bottom line is that NFAs tend to be +more memory efficient and give easier opportunities for increasing expressive +power, where as DFAs are faster to search with. + +# Why only a Thompson NFA? + +Currently, the only kind of NFA we support in this crate is a [Thompson +NFA](https://en.wikipedia.org/wiki/Thompson%27s_construction). This refers +to a specific construction algorithm that takes the syntax of a regex +pattern and converts it to an NFA. Specifically, it makes gratuitous use of +epsilon transitions in order to keep its structure simple. In exchange, its +construction time is linear in the size of the regex. A Thompson NFA also makes +the guarantee that given any state and a character in a haystack, there is at +most one transition defined for it. (Although there may be many epsilon +transitions.) + +It possible that other types of NFAs will be added in the future, such as a +[Glushkov NFA](https://en.wikipedia.org/wiki/Glushkov%27s_construction_algorithm). +But currently, this crate only provides a Thompson NFA. +*/ + +#[cfg(feature = "nfa-thompson")] +pub mod thompson; diff --git a/vendor/regex-automata/src/nfa/thompson/backtrack.rs b/vendor/regex-automata/src/nfa/thompson/backtrack.rs new file mode 100644 index 0000000..df99e45 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/backtrack.rs @@ -0,0 +1,1908 @@ +/*! +An NFA backed bounded backtracker for executing regex searches with capturing +groups. + +This module provides a [`BoundedBacktracker`] that works by simulating an NFA +using the classical backtracking algorithm with a twist: it avoids redoing +work that it has done before and thereby avoids worst case exponential time. +In exchange, it can only be used on "short" haystacks. Its advantage is that +is can be faster than the [`PikeVM`](thompson::pikevm::PikeVM) in many cases +because it does less book-keeping. +*/ + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::{self, BuildError, State, NFA}, + util::{ + captures::Captures, + empty, iter, + prefilter::Prefilter, + primitives::{NonMaxUsize, PatternID, SmallIndex, StateID}, + search::{Anchored, HalfMatch, Input, Match, MatchError, Span}, + }, +}; + +/// Returns the minimum visited capacity for the given haystack. +/// +/// This function can be used as the argument to [`Config::visited_capacity`] +/// in order to guarantee that a backtracking search for the given `input` +/// won't return an error when using a [`BoundedBacktracker`] built from the +/// given `NFA`. +/// +/// This routine exists primarily as a way to test that the bounded backtracker +/// works correctly when its capacity is set to the smallest possible amount. +/// Still, it may be useful in cases where you know you want to use the bounded +/// backtracker for a specific input, and just need to know what visited +/// capacity to provide to make it work. +/// +/// Be warned that this number could be quite large as it is multiplicative in +/// the size the given NFA and haystack. +pub fn min_visited_capacity(nfa: &NFA, input: &Input<'_>) -> usize { + div_ceil(nfa.states().len() * (input.get_span().len() + 1), 8) +} + +/// The configuration used for building a bounded backtracker. +/// +/// A bounded backtracker configuration is a simple data object that is +/// typically used with [`Builder::configure`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + pre: Option>, + visited_capacity: Option, +} + +impl Config { + /// Return a new default regex configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// util::prefilter::Prefilter, + /// Input, Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// Some(Match::must(0, 5..11)), + /// re.try_find(&mut cache, input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// // No match reported even though there clearly is one! + /// assert_eq!(None, re.try_find(&mut cache, input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + self + } + + /// Set the visited capacity used to bound backtracking. + /// + /// The visited capacity represents the amount of heap memory (in bytes) to + /// allocate toward tracking which parts of the backtracking search have + /// been done before. The heap memory needed for any particular search is + /// proportional to `haystack.len() * nfa.states().len()`, which an be + /// quite large. Therefore, the bounded backtracker is typically only able + /// to run on shorter haystacks. + /// + /// For a given regex, increasing the visited capacity means that the + /// maximum haystack length that can be searched is increased. The + /// [`BoundedBacktracker::max_haystack_len`] method returns that maximum. + /// + /// The default capacity is a reasonable but empirically chosen size. + /// + /// # Example + /// + /// As with other regex engines, Unicode is what tends to make the bounded + /// backtracker less useful by making the maximum haystack length quite + /// small. If necessary, increasing the visited capacity using this routine + /// will increase the maximum haystack length at the cost of using more + /// memory. + /// + /// Note though that the specific maximum values here are not an API + /// guarantee. The default visited capacity is subject to change and not + /// covered by semver. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// // Unicode inflates the size of the underlying NFA quite a bit, and + /// // thus means that the backtracker can only handle smaller haystacks, + /// // assuming that the visited capacity remains unchanged. + /// let re = BoundedBacktracker::new(r"\w+")?; + /// assert!(re.max_haystack_len() <= 7_000); + /// // But we can increase the visited capacity to handle bigger haystacks! + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().visited_capacity(1<<20)) + /// .build(r"\w+")?; + /// assert!(re.max_haystack_len() >= 25_000); + /// assert!(re.max_haystack_len() <= 28_000); + /// # Ok::<(), Box>(()) + /// ``` + pub fn visited_capacity(mut self, capacity: usize) -> Config { + self.visited_capacity = Some(capacity); + self + } + + /// Returns the prefilter set in this configuration, if one at all. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns the configured visited capacity. + /// + /// Note that the actual capacity used may be slightly bigger than the + /// configured capacity. + pub fn get_visited_capacity(&self) -> usize { + const DEFAULT: usize = 256 * (1 << 10); // 256 KB + self.visited_capacity.unwrap_or(DEFAULT) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + pre: o.pre.or_else(|| self.pre.clone()), + visited_capacity: o.visited_capacity.or(self.visited_capacity), + } + } +} + +/// A builder for a bounded backtracker. +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction and the `BoundedBacktracker` construction. This builder +/// is different from a general purpose regex builder in that it permits fine +/// grain configuration of the construction process. The trade off for this is +/// complexity, and the possibility of setting a configuration that might not +/// make sense. For example, there are two different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`] controls how the regex iterators themselves +/// advance the starting position of the next search when a match with zero +/// length is found. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{self, backtrack::BoundedBacktracker}, +/// util::syntax, +/// Match, +/// }; +/// +/// let re = BoundedBacktracker::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let mut cache = re.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Ok(Match::must(0, 1..9))); +/// let got = re.try_find_iter(&mut cache, haystack).next(); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// // +/// // N.B. This example does not show the impact of +/// // disabling UTF-8 mode on a BoundedBacktracker Config, since that +/// // only impacts regexes that can produce matches of +/// // length 0. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap()?.range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new BoundedBacktracker builder with its default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a `BoundedBacktracker` from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build( + &self, + pattern: &str, + ) -> Result { + self.build_many(&[pattern]) + } + + /// Build a `BoundedBacktracker` from the given patterns. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self.thompson.build_many(patterns)?; + self.build_from_nfa(nfa) + } + + /// Build a `BoundedBacktracker` directly from its NFA. + /// + /// Note that when using this method, any configuration that applies to the + /// construction of the NFA itself will of course be ignored, since the NFA + /// given here is already built. + pub fn build_from_nfa( + &self, + nfa: NFA, + ) -> Result { + nfa.look_set_any().available().map_err(BuildError::word)?; + Ok(BoundedBacktracker { config: self.config.clone(), nfa }) + } + + /// Apply the given `BoundedBacktracker` configuration options to this + /// builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a `BoundedBacktracker` + /// directly from a pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like if additional time should be spent + /// shrinking the size of the NFA. + /// + /// These settings only apply when constructing a `BoundedBacktracker` + /// directly from a pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// A backtracking regex engine that bounds its execution to avoid exponential +/// blow-up. +/// +/// This regex engine only implements leftmost-first match semantics and +/// only supports leftmost searches. It effectively does the same thing as a +/// [`PikeVM`](thompson::pikevm::PikeVM), but typically does it faster because +/// it doesn't have to worry about copying capturing group spans for most NFA +/// states. Instead, the backtracker can maintain one set of captures (provided +/// by the caller) and never needs to copy them. In exchange, the backtracker +/// bounds itself to ensure it doesn't exhibit worst case exponential time. +/// This results in the backtracker only being able to handle short haystacks +/// given reasonable memory usage. +/// +/// # Searches may return an error! +/// +/// By design, this backtracking regex engine is bounded. This bound is +/// implemented by not visiting any combination of NFA state ID and position +/// in a haystack more than once. Thus, the total memory required to bound +/// backtracking is proportional to `haystack.len() * nfa.states().len()`. +/// This can obviously get quite large, since large haystacks aren't terribly +/// uncommon. To avoid using exorbitant memory, the capacity is bounded by +/// a fixed limit set via [`Config::visited_capacity`]. Thus, if the total +/// capacity required for a particular regex and a haystack exceeds this +/// capacity, then the search routine will return an error. +/// +/// Unlike other regex engines that may return an error at search time (like +/// the DFA or the hybrid NFA/DFA), there is no way to guarantee that a bounded +/// backtracker will work for every haystack. Therefore, this regex engine +/// _only_ exposes fallible search routines to avoid the footgun of panicking +/// when running a search on a haystack that is too big. +/// +/// If one wants to use the fallible search APIs without handling the +/// error, the only way to guarantee an error won't occur from the +/// haystack length is to ensure the haystack length does not exceed +/// [`BoundedBacktracker::max_haystack_len`]. +/// +/// # Example: Unicode word boundaries +/// +/// This example shows that the bounded backtracker implements Unicode word +/// boundaries correctly by default. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{nfa::thompson::backtrack::BoundedBacktracker, Match}; +/// +/// let re = BoundedBacktracker::new(r"\b\w+\b")?; +/// let mut cache = re.create_cache(); +/// +/// let mut it = re.try_find_iter(&mut cache, "Шерлок Холмс"); +/// assert_eq!(Some(Ok(Match::must(0, 0..12))), it.next()); +/// assert_eq!(Some(Ok(Match::must(0, 13..23))), it.next()); +/// assert_eq!(None, it.next()); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: multiple regex patterns +/// +/// The bounded backtracker supports searching for multiple patterns +/// simultaneously, just like other regex engines. Note though that because it +/// uses a backtracking strategy, this regex engine is unlikely to scale well +/// as more patterns are added. But then again, as more patterns are added, the +/// maximum haystack length allowed will also shorten (assuming the visited +/// capacity remains invariant). +/// +/// ``` +/// use regex_automata::{nfa::thompson::backtrack::BoundedBacktracker, Match}; +/// +/// let re = BoundedBacktracker::new_many(&["[a-z]+", "[0-9]+"])?; +/// let mut cache = re.create_cache(); +/// +/// let mut it = re.try_find_iter(&mut cache, "abc 1 foo 4567 0 quux"); +/// assert_eq!(Some(Ok(Match::must(0, 0..3))), it.next()); +/// assert_eq!(Some(Ok(Match::must(1, 4..5))), it.next()); +/// assert_eq!(Some(Ok(Match::must(0, 6..9))), it.next()); +/// assert_eq!(Some(Ok(Match::must(1, 10..14))), it.next()); +/// assert_eq!(Some(Ok(Match::must(1, 15..16))), it.next()); +/// assert_eq!(Some(Ok(Match::must(0, 17..21))), it.next()); +/// assert_eq!(None, it.next()); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct BoundedBacktracker { + config: Config, + nfa: NFA, +} + +impl BoundedBacktracker { + /// Parse the given regular expression using the default configuration and + /// return the corresponding `BoundedBacktracker`. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Ok(Match::must(0, 3..14))), + /// re.try_find_iter(&mut cache, "zzzfoo12345barzzz").next(), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + BoundedBacktracker::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::new_many(&["[a-z]+", "[0-9]+"])?; + /// let mut cache = re.create_cache(); + /// + /// let mut it = re.try_find_iter(&mut cache, "abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Ok(Match::must(0, 0..3))), it.next()); + /// assert_eq!(Some(Ok(Match::must(1, 4..5))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 6..9))), it.next()); + /// assert_eq!(Some(Ok(Match::must(1, 10..14))), it.next()); + /// assert_eq!(Some(Ok(Match::must(1, 15..16))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 17..21))), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + BoundedBacktracker::builder().build_many(patterns) + } + + /// # Example + /// + /// This shows how to hand assemble a regular expression via its HIR, + /// compile an NFA from it and build a BoundedBacktracker from the NFA. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{NFA, backtrack::BoundedBacktracker}, + /// Match, + /// }; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = BoundedBacktracker::new_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let expected = Some(Match::must(0, 3..4)); + /// re.try_captures(&mut cache, "!@#A#@!", &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_from_nfa(nfa: NFA) -> Result { + BoundedBacktracker::builder().build_from_nfa(nfa) + } + + /// Create a new `BoundedBacktracker` that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::always_match()?; + /// let mut cache = re.create_cache(); + /// + /// let expected = Some(Ok(Match::must(0, 0..0))); + /// assert_eq!(expected, re.try_find_iter(&mut cache, "").next()); + /// assert_eq!(expected, re.try_find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + BoundedBacktracker::new_from_nfa(nfa) + } + + /// Create a new `BoundedBacktracker` that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::never_match()?; + /// let mut cache = re.create_cache(); + /// + /// assert_eq!(None, re.try_find_iter(&mut cache, "").next()); + /// assert_eq!(None, re.try_find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + BoundedBacktracker::new_from_nfa(nfa) + } + + /// Return a default configuration for a `BoundedBacktracker`. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of a `BoundedBacktracker`. + /// + /// # Example + /// + /// This example shows how to disable UTF-8 mode. When UTF-8 mode is + /// disabled, zero-width matches that split a codepoint are allowed. + /// Otherwise they are never reported. + /// + /// In the code below, notice that `""` is permitted to match positions + /// that split the encoding of a codepoint. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, backtrack::BoundedBacktracker}, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"")?; + /// let mut cache = re.create_cache(); + /// + /// let haystack = "a☃z"; + /// let mut it = re.try_find_iter(&mut cache, haystack); + /// assert_eq!(Some(Ok(Match::must(0, 0..0))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 1..1))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 2..2))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 3..3))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 4..4))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 5..5))), it.next()); + /// assert_eq!(None, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a + /// `BoundedBacktracker`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::{self, backtrack::BoundedBacktracker}, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// re.try_captures(&mut cache, haystack, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new cache for this regex. + /// + /// The cache returned should only be used for searches for this + /// regex. If you want to reuse the cache for another regex, then you + /// must call [`Cache::reset`] with that regex (or, equivalently, + /// [`BoundedBacktracker::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Create a new empty set of capturing groups that is guaranteed to be + /// valid for the search APIs on this `BoundedBacktracker`. + /// + /// A `Captures` value created for a specific `BoundedBacktracker` cannot + /// be used with any other `BoundedBacktracker`. + /// + /// This is a convenience function for [`Captures::all`]. See the + /// [`Captures`] documentation for an explanation of its alternative + /// constructors that permit the `BoundedBacktracker` to do less work + /// during a search, and thus might make it faster. + pub fn create_captures(&self) -> Captures { + Captures::all(self.get_nfa().group_info().clone()) + } + + /// Reset the given cache such that it can be used for searching with the + /// this `BoundedBacktracker` (and only this `BoundedBacktracker`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `BoundedBacktracker`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different + /// `BoundedBacktracker`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re1 = BoundedBacktracker::new(r"\w")?; + /// let re2 = BoundedBacktracker::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..2))), + /// re1.try_find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the BoundedBacktracker we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..3))), + /// re2.try_find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + cache.reset(self); + } + + /// Returns the total number of patterns compiled into this + /// `BoundedBacktracker`. + /// + /// In the case of a `BoundedBacktracker` that contains no patterns, this + /// returns `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a `BoundedBacktracker` that + /// never matches: + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::never_match()?; + /// assert_eq!(re.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a `BoundedBacktracker` that matches at every + /// position: + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::always_match()?; + /// assert_eq!(re.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a `BoundedBacktracker` that was constructed from multiple + /// patterns: + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(re.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.nfa.pattern_len() + } + + /// Return the config for this `BoundedBacktracker`. + #[inline] + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + #[inline] + pub fn get_nfa(&self) -> &NFA { + &self.nfa + } + + /// Returns the maximum haystack length supported by this backtracker. + /// + /// This routine is a function of both [`Config::visited_capacity`] and the + /// internal size of the backtracker's NFA. + /// + /// # Example + /// + /// This example shows how the maximum haystack length can vary depending + /// on the size of the regex itself. Note though that the specific maximum + /// values here are not an API guarantee. The default visited capacity is + /// subject to change and not covered by semver. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, MatchError, + /// }; + /// + /// // If you're only using ASCII, you get a big budget. + /// let re = BoundedBacktracker::new(r"(?-u)\w+")?; + /// let mut cache = re.create_cache(); + /// assert_eq!(re.max_haystack_len(), 299_592); + /// // Things work up to the max. + /// let mut haystack = "a".repeat(299_592); + /// let expected = Some(Ok(Match::must(0, 0..299_592))); + /// assert_eq!(expected, re.try_find_iter(&mut cache, &haystack).next()); + /// // But you'll get an error if you provide a haystack that's too big. + /// // Notice that we use the 'try_find_iter' routine instead, which + /// // yields Result instead of Match. + /// haystack.push('a'); + /// let expected = Some(Err(MatchError::haystack_too_long(299_593))); + /// assert_eq!(expected, re.try_find_iter(&mut cache, &haystack).next()); + /// + /// // Unicode inflates the size of the underlying NFA quite a bit, and + /// // thus means that the backtracker can only handle smaller haystacks, + /// // assuming that the visited capacity remains unchanged. + /// let re = BoundedBacktracker::new(r"\w+")?; + /// assert!(re.max_haystack_len() <= 7_000); + /// // But we can increase the visited capacity to handle bigger haystacks! + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().visited_capacity(1<<20)) + /// .build(r"\w+")?; + /// assert!(re.max_haystack_len() >= 25_000); + /// assert!(re.max_haystack_len() <= 28_000); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn max_haystack_len(&self) -> usize { + // The capacity given in the config is "bytes of heap memory," but the + // capacity we use here is "number of bits." So convert the capacity in + // bytes to the capacity in bits. + let capacity = 8 * self.get_config().get_visited_capacity(); + let blocks = div_ceil(capacity, Visited::BLOCK_SIZE); + let real_capacity = blocks.saturating_mul(Visited::BLOCK_SIZE); + // It's possible for `real_capacity` to be smaller than the number of + // NFA states for particularly large regexes, so we saturate towards + // zero. + (real_capacity / self.nfa.states().len()).saturating_sub(1) + } +} + +impl BoundedBacktracker { + /// Returns true if and only if this regex matches the given haystack. + /// + /// In the case of a backtracking regex engine, and unlike most other + /// regex engines in this crate, short circuiting isn't practical. However, + /// this routine may still be faster because it instructs backtracking to + /// not keep track of any capturing groups. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.try_is_match(&mut cache, "foo12345bar")?); + /// assert!(!re.try_is_match(&mut cache, "foobar")?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `find` returns a + /// match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Input, + /// }; + /// + /// let re = BoundedBacktracker::new("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(!re.try_is_match(&mut cache, Input::new("☃").span(1..2))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{backtrack::BoundedBacktracker, NFA}, + /// Input, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .thompson(NFA::config().utf8(false)) + /// .build("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.try_is_match(&mut cache, Input::new("☃").span(1..2))?); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Result { + let input = input.into().earliest(true); + self.try_search_slots(cache, &input, &mut []).map(|pid| pid.is_some()) + } + + /// Executes a leftmost forward search and returns a `Match` if one exists. + /// + /// This routine only includes the overall match span. To get + /// access to the individual spans of each capturing group, use + /// [`BoundedBacktracker::try_captures`]. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..8); + /// assert_eq!(Some(expected), re.try_find(&mut cache, "foo12345")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Result, MatchError> { + let input = input.into(); + if self.get_nfa().pattern_len() == 1 { + let mut slots = [None, None]; + let pid = match self.try_search_slots(cache, &input, &mut slots)? { + None => return Ok(None), + Some(pid) => pid, + }; + let start = match slots[0] { + None => return Ok(None), + Some(s) => s.get(), + }; + let end = match slots[1] { + None => return Ok(None), + Some(s) => s.get(), + }; + return Ok(Some(Match::new(pid, Span { start, end }))); + } + let ginfo = self.get_nfa().group_info(); + let slots_len = ginfo.implicit_slot_len(); + let mut slots = vec![None; slots_len]; + let pid = match self.try_search_slots(cache, &input, &mut slots)? { + None => return Ok(None), + Some(pid) => pid, + }; + let start = match slots[pid.as_usize() * 2] { + None => return Ok(None), + Some(s) => s.get(), + }; + let end = match slots[pid.as_usize() * 2 + 1] { + None => return Ok(None), + Some(s) => s.get(), + }; + Ok(Some(Match::new(pid, Span { start, end }))) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Span, + /// }; + /// + /// let re = BoundedBacktracker::new( + /// r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.try_captures(&mut cache, "2010-03-14", &mut caps)?; + /// assert!(caps.is_match()); + /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); + /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_captures<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + caps: &mut Captures, + ) -> Result<(), MatchError> { + self.try_search(cache, &input.into(), caps) + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// If the regex engine returns an error at any point, then the iterator + /// will yield that error. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, MatchError, + /// }; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let result: Result, MatchError> = re + /// .try_find_iter(&mut cache, text) + /// .collect(); + /// let matches = result?; + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_find_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> TryFindMatches<'r, 'c, 'h> { + let caps = Captures::matches(self.get_nfa().group_info().clone()); + let it = iter::Searcher::new(input.into()); + TryFindMatches { re: self, cache, caps, it } + } + + /// Returns an iterator over all non-overlapping `Captures` values. If no + /// match exists, then the iterator yields no elements. + /// + /// This yields the same matches as [`BoundedBacktracker::try_find_iter`], + /// but it includes the spans of all capturing groups that participate in + /// each match. + /// + /// If the regex engine returns an error at any point, then the iterator + /// will yield that error. + /// + /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for + /// how to correctly iterate over all matches in a haystack while avoiding + /// the creation of a new `Captures` value for every match. (Which you are + /// forced to do with an `Iterator`.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Span, + /// }; + /// + /// let re = BoundedBacktracker::new("foo(?P[0-9]+)")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let mut spans = vec![]; + /// for result in re.try_captures_iter(&mut cache, text) { + /// let caps = result?; + /// // The unwrap is OK since 'numbers' matches if the pattern matches. + /// spans.push(caps.get_group_by_name("numbers").unwrap()); + /// } + /// assert_eq!(spans, vec![ + /// Span::from(3..4), + /// Span::from(8..10), + /// Span::from(14..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_captures_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> TryCapturesMatches<'r, 'c, 'h> { + let caps = self.create_captures(); + let it = iter::Searcher::new(input.into()); + TryCapturesMatches { re: self, cache, caps, it } + } +} + +impl BoundedBacktracker { + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// This is like [`BoundedBacktracker::try_captures`], but it accepts a + /// concrete `&Input` instead of an `Into`. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi bounded backtracker that + /// permits searching for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Anchored, Input, Match, PatternID, + /// }; + /// + /// let re = BoundedBacktracker::new_many(&[ + /// "[a-z0-9]{6}", + /// "[a-z][a-z0-9]{5}", + /// ])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.try_search(&mut cache, &Input::new(haystack), &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, Input, + /// }; + /// + /// let re = BoundedBacktracker::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// re.try_search(&mut cache, &Input::new(&haystack[3..6]), &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// re.try_search( + /// &mut cache, &Input::new(haystack).range(3..6), &mut caps, + /// )?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) -> Result<(), MatchError> { + caps.set_pattern(None); + let pid = self.try_search_slots(cache, input, caps.slots_mut())?; + caps.set_pattern(pid); + Ok(()) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided `slots`, and + /// returns the matching pattern ID. The contents of the slots for patterns + /// other than the matching pattern are unspecified. If no match was found, + /// then `None` is returned and the contents of all `slots` is unspecified. + /// + /// This is like [`BoundedBacktracker::try_search`], but it accepts a raw + /// slots slice instead of a `Captures` value. This is useful in contexts + /// where you don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with + /// [`pattern_len() * 2`](crate::nfa::thompson::NFA::pattern_len) + /// slots, if you only care about the overall match spans for each matching + /// pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// PatternID, Input, + /// }; + /// + /// let re = BoundedBacktracker::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.try_search_slots(&mut cache, &input, &mut slots)?; + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + if !utf8empty { + let maybe_hm = self.try_search_slots_imp(cache, input, slots)?; + return Ok(maybe_hm.map(|hm| hm.pattern())); + } + // See PikeVM::try_search_slots for why we do this. + let min = self.get_nfa().group_info().implicit_slot_len(); + if slots.len() >= min { + let maybe_hm = self.try_search_slots_imp(cache, input, slots)?; + return Ok(maybe_hm.map(|hm| hm.pattern())); + } + if self.get_nfa().pattern_len() == 1 { + let mut enough = [None, None]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger + // than `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + return Ok(got.map(|hm| hm.pattern())); + } + let mut enough = vec![None; min]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger than + // `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + Ok(got.map(|hm| hm.pattern())) + } + + /// This is the actual implementation of `try_search_slots_imp` that + /// doesn't account for the special case when 1) the NFA has UTF-8 mode + /// enabled, 2) the NFA can match the empty string and 3) the caller has + /// provided an insufficient number of slots to record match offsets. + #[inline(never)] + fn try_search_slots_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match self.search_imp(cache, input, slots)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + Ok(self + .search_imp(cache, input, slots)? + .map(|hm| (hm, hm.offset()))) + }) + } + + /// The implementation of standard leftmost backtracking search. + /// + /// Capturing group spans are written to 'caps', but only if requested. + /// 'caps' can be one of three things: 1) totally empty, in which case, we + /// only report the pattern that matched or 2) only has slots for recording + /// the overall match offsets for any pattern or 3) has all slots available + /// for recording the spans of any groups participating in a match. + fn search_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + // Unlike in the PikeVM, we write our capturing group spans directly + // into the caller's captures groups. So we have to make sure we're + // starting with a blank slate first. In the PikeVM, we avoid this + // by construction: the spans that are copied to every slot in the + // 'Captures' value already account for presence/absence. In this + // backtracker, we write directly into the caller provided slots, where + // as in the PikeVM, we write into scratch space first and only copy + // them to the caller provided slots when a match is found. + for slot in slots.iter_mut() { + *slot = None; + } + cache.setup_search(&self, input)?; + if input.is_done() { + return Ok(None); + } + let (anchored, start_id) = match input.get_anchored() { + // Only way we're unanchored is if both the caller asked for an + // unanchored search *and* the pattern is itself not anchored. + Anchored::No => ( + self.nfa.is_always_start_anchored(), + // We always use the anchored starting state here, even if + // doing an unanchored search. The "unanchored" part of it is + // implemented in the loop below, by simply trying the next + // byte offset if the previous backtracking exploration failed. + self.nfa.start_anchored(), + ), + Anchored::Yes => (true, self.nfa.start_anchored()), + Anchored::Pattern(pid) => match self.nfa.start_pattern(pid) { + None => return Ok(None), + Some(sid) => (true, sid), + }, + }; + if anchored { + let at = input.start(); + return Ok(self.backtrack(cache, input, at, start_id, slots)); + } + let pre = self.get_config().get_prefilter(); + let mut at = input.start(); + while at <= input.end() { + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => break, + Some(ref span) => at = span.start, + } + } + if let Some(hm) = self.backtrack(cache, input, at, start_id, slots) + { + return Ok(Some(hm)); + } + at += 1; + } + Ok(None) + } + + /// Look for a match starting at `at` in `input` and write the matching + /// pattern ID and group spans to `caps`. The search uses `start_id` as its + /// starting state in the underlying NFA. + /// + /// If no match was found, then the caller should increment `at` and try + /// at the next position. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn backtrack( + &self, + cache: &mut Cache, + input: &Input<'_>, + at: usize, + start_id: StateID, + slots: &mut [Option], + ) -> Option { + cache.stack.push(Frame::Step { sid: start_id, at }); + while let Some(frame) = cache.stack.pop() { + match frame { + Frame::Step { sid, at } => { + if let Some(hm) = self.step(cache, input, sid, at, slots) { + return Some(hm); + } + } + Frame::RestoreCapture { slot, offset } => { + slots[slot] = offset; + } + } + } + None + } + + // LAMENTATION: The actual backtracking search is implemented in about + // 75 lines below. Yet this file is over 2,000 lines long. What have I + // done? + + /// Execute a "step" in the backtracing algorithm. + /// + /// A "step" is somewhat of a misnomer, because this routine keeps going + /// until it either runs out of things to try or fins a match. In the + /// former case, it may have pushed some things on to the backtracking + /// stack, in which case, those will be tried next as part of the + /// 'backtrack' routine above. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn step( + &self, + cache: &mut Cache, + input: &Input<'_>, + mut sid: StateID, + mut at: usize, + slots: &mut [Option], + ) -> Option { + loop { + if !cache.visited.insert(sid, at - input.start()) { + return None; + } + match *self.nfa.state(sid) { + State::ByteRange { ref trans } => { + // Why do we need this? Unlike other regex engines in this + // crate, the backtracker can steam roll ahead in the + // haystack outside of the main loop over the bytes in the + // haystack. While 'trans.matches()' below handles the case + // of 'at' being out of bounds of 'input.haystack()', we + // also need to handle the case of 'at' going out of bounds + // of the span the caller asked to search. + // + // We should perhaps make the 'trans.matches()' API accept + // an '&Input' instead of a '&[u8]'. Or at least, add a new + // API that does it. + if at >= input.end() { + return None; + } + if !trans.matches(input.haystack(), at) { + return None; + } + sid = trans.next; + at += 1; + } + State::Sparse(ref sparse) => { + if at >= input.end() { + return None; + } + sid = sparse.matches(input.haystack(), at)?; + at += 1; + } + State::Dense(ref dense) => { + if at >= input.end() { + return None; + } + sid = dense.matches(input.haystack(), at)?; + at += 1; + } + State::Look { look, next } => { + // OK because we don't permit building a searcher with a + // Unicode word boundary if the requisite Unicode data is + // unavailable. + if !self.nfa.look_matcher().matches_inline( + look, + input.haystack(), + at, + ) { + return None; + } + sid = next; + } + State::Union { ref alternates } => { + sid = match alternates.get(0) { + None => return None, + Some(&sid) => sid, + }; + cache.stack.extend( + alternates[1..] + .iter() + .copied() + .rev() + .map(|sid| Frame::Step { sid, at }), + ); + } + State::BinaryUnion { alt1, alt2 } => { + sid = alt1; + cache.stack.push(Frame::Step { sid: alt2, at }); + } + State::Capture { next, slot, .. } => { + if slot.as_usize() < slots.len() { + cache.stack.push(Frame::RestoreCapture { + slot, + offset: slots[slot], + }); + slots[slot] = NonMaxUsize::new(at); + } + sid = next; + } + State::Fail => return None, + State::Match { pattern_id } => { + return Some(HalfMatch::new(pattern_id, at)); + } + } + } + } +} + +/// An iterator over all non-overlapping matches for a fallible search. +/// +/// The iterator yields a `Result { + re: &'r BoundedBacktracker, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for TryFindMatches<'r, 'c, 'h> { + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + // Splitting 'self' apart seems necessary to appease borrowck. + let TryFindMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + it.try_advance(|input| { + re.try_search(cache, input, caps)?; + Ok(caps.get_match()) + }) + .transpose() + } +} + +/// An iterator over all non-overlapping leftmost matches, with their capturing +/// groups, for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the BoundedBacktracker. +/// * `'c` represents the lifetime of the BoundedBacktracker's cache. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the +/// [`BoundedBacktracker::try_captures_iter`] method. +#[derive(Debug)] +pub struct TryCapturesMatches<'r, 'c, 'h> { + re: &'r BoundedBacktracker, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for TryCapturesMatches<'r, 'c, 'h> { + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + // Splitting 'self' apart seems necessary to appease borrowck. + let TryCapturesMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + let _ = it + .try_advance(|input| { + re.try_search(cache, input, caps)?; + Ok(caps.get_match()) + }) + .transpose()?; + if caps.is_match() { + Some(Ok(caps.clone())) + } else { + None + } + } +} + +/// A cache represents mutable state that a [`BoundedBacktracker`] requires +/// during a search. +/// +/// For a given [`BoundedBacktracker`], its corresponding cache may be created +/// either via [`BoundedBacktracker::create_cache`], or via [`Cache::new`]. +/// They are equivalent in every way, except the former does not require +/// explicitly importing `Cache`. +/// +/// A particular `Cache` is coupled with the [`BoundedBacktracker`] from which +/// it was created. It may only be used with that `BoundedBacktracker`. A cache +/// and its allocations may be re-purposed via [`Cache::reset`], in which case, +/// it can only be used with the new `BoundedBacktracker` (and not the old +/// one). +#[derive(Clone, Debug)] +pub struct Cache { + /// Stack used on the heap for doing backtracking instead of the + /// traditional recursive approach. We don't want recursion because then + /// we're likely to hit a stack overflow for bigger regexes. + stack: Vec, + /// The set of (StateID, HaystackOffset) pairs that have been visited + /// by the backtracker within a single search. If such a pair has been + /// visited, then we avoid doing the work for that pair again. This is + /// what "bounds" the backtracking and prevents it from having worst case + /// exponential time. + visited: Visited, +} + +impl Cache { + /// Create a new [`BoundedBacktracker`] cache. + /// + /// A potentially more convenient routine to create a cache is + /// [`BoundedBacktracker::create_cache`], as it does not require also + /// importing the `Cache` type. + /// + /// If you want to reuse the returned `Cache` with some other + /// `BoundedBacktracker`, then you must call [`Cache::reset`] with the + /// desired `BoundedBacktracker`. + pub fn new(re: &BoundedBacktracker) -> Cache { + Cache { stack: vec![], visited: Visited::new(re) } + } + + /// Reset this cache such that it can be used for searching with different + /// [`BoundedBacktracker`]. + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `BoundedBacktracker`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different + /// `BoundedBacktracker`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re1 = BoundedBacktracker::new(r"\w")?; + /// let re2 = BoundedBacktracker::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..2))), + /// re1.try_find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the BoundedBacktracker we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..3))), + /// re2.try_find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &BoundedBacktracker) { + self.visited.reset(re); + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.stack.len() * core::mem::size_of::() + + self.visited.memory_usage() + } + + /// Clears this cache. This should be called at the start of every search + /// to ensure we start with a clean slate. + /// + /// This also sets the length of the capturing groups used in the current + /// search. This permits an optimization where by 'SlotTable::for_state' + /// only returns the number of slots equivalent to the number of slots + /// given in the 'Captures' value. This may be less than the total number + /// of possible slots, e.g., when one only wants to track overall match + /// offsets. This in turn permits less copying of capturing group spans + /// in the BoundedBacktracker. + fn setup_search( + &mut self, + re: &BoundedBacktracker, + input: &Input<'_>, + ) -> Result<(), MatchError> { + self.stack.clear(); + self.visited.setup_search(re, input)?; + Ok(()) + } +} + +/// Represents a stack frame on the heap while doing backtracking. +/// +/// Instead of using explicit recursion for backtracking, we use a stack on +/// the heap to keep track of things that we want to explore if the current +/// backtracking branch turns out to not lead to a match. +#[derive(Clone, Debug)] +enum Frame { + /// Look for a match starting at `sid` and the given position in the + /// haystack. + Step { sid: StateID, at: usize }, + /// Reset the given `slot` to the given `offset` (which might be `None`). + /// This effectively gives a "scope" to capturing groups, such that an + /// offset for a particular group only gets returned if the match goes + /// through that capturing group. If backtracking ends up going down a + /// different branch that results in a different offset (or perhaps none at + /// all), then this "restore capture" frame will cause the offset to get + /// reset. + RestoreCapture { slot: SmallIndex, offset: Option }, +} + +/// A bitset that keeps track of whether a particular (StateID, offset) has +/// been considered during backtracking. If it has already been visited, then +/// backtracking skips it. This is what gives backtracking its "bound." +#[derive(Clone, Debug)] +struct Visited { + /// The actual underlying bitset. Each element in the bitset corresponds + /// to a particular (StateID, offset) pair. States correspond to the rows + /// and the offsets correspond to the columns. + /// + /// If our underlying NFA has N states and the haystack we're searching + /// has M bytes, then we have N*(M+1) entries in our bitset table. The + /// M+1 occurs because our matches are delayed by one byte (to support + /// look-around), and so we need to handle the end position itself rather + /// than stopping just before the end. (If there is no end position, then + /// it's treated as "end-of-input," which is matched by things like '$'.) + /// + /// Given BITS=N*(M+1), we wind up with div_ceil(BITS, sizeof(usize)) + /// blocks. + /// + /// We use 'usize' to represent our blocks because it makes some of the + /// arithmetic in 'insert' a bit nicer. For example, if we used 'u32' for + /// our block, we'd either need to cast u32s to usizes or usizes to u32s. + bitset: Vec, + /// The stride represents one plus length of the haystack we're searching + /// (as described above). The stride must be initialized for each search. + stride: usize, +} + +impl Visited { + /// The size of each block, in bits. + const BLOCK_SIZE: usize = 8 * core::mem::size_of::(); + + /// Create a new visited set for the given backtracker. + /// + /// The set is ready to use, but must be setup at the beginning of each + /// search by calling `setup_search`. + fn new(re: &BoundedBacktracker) -> Visited { + let mut visited = Visited { bitset: vec![], stride: 0 }; + visited.reset(re); + visited + } + + /// Insert the given (StateID, offset) pair into this set. If it already + /// exists, then this is a no-op and it returns false. Otherwise this + /// returns true. + fn insert(&mut self, sid: StateID, at: usize) -> bool { + let table_index = sid.as_usize() * self.stride + at; + let block_index = table_index / Visited::BLOCK_SIZE; + let bit = table_index % Visited::BLOCK_SIZE; + let block_with_bit = 1 << bit; + if self.bitset[block_index] & block_with_bit != 0 { + return false; + } + self.bitset[block_index] |= block_with_bit; + true + } + + /// Reset this visited set to work with the given bounded backtracker. + fn reset(&mut self, _: &BoundedBacktracker) { + self.bitset.truncate(0); + } + + /// Setup this visited set to work for a search using the given NFA + /// and input configuration. The NFA must be the same NFA used by the + /// BoundedBacktracker given to Visited::reset. Failing to call this might + /// result in panics or silently incorrect search behavior. + fn setup_search( + &mut self, + re: &BoundedBacktracker, + input: &Input<'_>, + ) -> Result<(), MatchError> { + // Our haystack length is only the length of the span of the entire + // haystack that we'll be searching. + let haylen = input.get_span().len(); + let err = || MatchError::haystack_too_long(haylen); + // Our stride is one more than the length of the input because our main + // search loop includes the position at input.end(). (And it does this + // because matches are delayed by one byte to account for look-around.) + self.stride = haylen + 1; + let needed_capacity = + match re.get_nfa().states().len().checked_mul(self.stride) { + None => return Err(err()), + Some(capacity) => capacity, + }; + let max_capacity = 8 * re.get_config().get_visited_capacity(); + if needed_capacity > max_capacity { + return Err(err()); + } + let needed_blocks = div_ceil(needed_capacity, Visited::BLOCK_SIZE); + self.bitset.truncate(needed_blocks); + for block in self.bitset.iter_mut() { + *block = 0; + } + if needed_blocks > self.bitset.len() { + self.bitset.resize(needed_blocks, 0); + } + Ok(()) + } + + /// Return the heap memory usage, in bytes, of this visited set. + fn memory_usage(&self) -> usize { + self.bitset.len() * core::mem::size_of::() + } +} + +/// Integer division, but rounds up instead of down. +fn div_ceil(lhs: usize, rhs: usize) -> usize { + if lhs % rhs == 0 { + lhs / rhs + } else { + (lhs / rhs) + 1 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // This is a regression test for the maximum haystack length computation. + // Previously, it assumed that the total capacity of the backtracker's + // bitset would always be greater than the number of NFA states. But there + // is of course no guarantee that this is true. This regression test + // ensures that not only does `max_haystack_len` not panic, but that it + // should return `0`. + #[cfg(feature = "syntax")] + #[test] + fn max_haystack_len_overflow() { + let re = BoundedBacktracker::builder() + .configure(BoundedBacktracker::config().visited_capacity(10)) + .build(r"[0-9A-Za-z]{100}") + .unwrap(); + assert_eq!(0, re.max_haystack_len()); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/builder.rs b/vendor/regex-automata/src/nfa/thompson/builder.rs new file mode 100644 index 0000000..6b69e87 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/builder.rs @@ -0,0 +1,1337 @@ +use core::mem; + +use alloc::{sync::Arc, vec, vec::Vec}; + +use crate::{ + nfa::thompson::{ + error::BuildError, + nfa::{self, SparseTransitions, Transition, NFA}, + }, + util::{ + look::{Look, LookMatcher}, + primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID}, + }, +}; + +/// An intermediate NFA state used during construction. +/// +/// During construction of an NFA, it is often convenient to work with states +/// that are amenable to mutation and other carry more information than we +/// otherwise need once an NFA has been built. This type represents those +/// needs. +/// +/// Once construction is finished, the builder will convert these states to a +/// [`nfa::thompson::State`](crate::nfa::thompson::State). This conversion not +/// only results in a simpler representation, but in some cases, entire classes +/// of states are completely removed (such as [`State::Empty`]). +#[derive(Clone, Debug, Eq, PartialEq)] +enum State { + /// An empty state whose only purpose is to forward the automaton to + /// another state via an unconditional epsilon transition. + /// + /// Unconditional epsilon transitions are quite useful during the + /// construction of an NFA, as they permit the insertion of no-op + /// placeholders that make it easier to compose NFA sub-graphs. When + /// the Thompson NFA builder produces a final NFA, all unconditional + /// epsilon transitions are removed, and state identifiers are remapped + /// accordingly. + Empty { + /// The next state that this state should transition to. + next: StateID, + }, + /// A state that only transitions to another state if the current input + /// byte is in a particular range of bytes. + ByteRange { trans: Transition }, + /// A state with possibly many transitions, represented in a sparse + /// fashion. Transitions must be ordered lexicographically by input range + /// and be non-overlapping. As such, this may only be used when every + /// transition has equal priority. (In practice, this is only used for + /// encoding large UTF-8 automata.) In contrast, a `Union` state has each + /// alternate in order of priority. Priority is used to implement greedy + /// matching and also alternations themselves, e.g., `abc|a` where `abc` + /// has priority over `a`. + /// + /// To clarify, it is possible to remove `Sparse` and represent all things + /// that `Sparse` is used for via `Union`. But this creates a more bloated + /// NFA with more epsilon transitions than is necessary in the special case + /// of character classes. + Sparse { transitions: Vec }, + /// A conditional epsilon transition satisfied via some sort of + /// look-around. + Look { look: Look, next: StateID }, + /// An empty state that records the start of a capture location. This is an + /// unconditional epsilon transition like `Empty`, except it can be used to + /// record position information for a capture group when using the NFA for + /// search. + CaptureStart { + /// The ID of the pattern that this capture was defined. + pattern_id: PatternID, + /// The capture group index that this capture state corresponds to. + /// The capture group index is always relative to its corresponding + /// pattern. Therefore, in the presence of multiple patterns, both the + /// pattern ID and the capture group index are required to uniquely + /// identify a capturing group. + group_index: SmallIndex, + /// The next state that this state should transition to. + next: StateID, + }, + /// An empty state that records the end of a capture location. This is an + /// unconditional epsilon transition like `Empty`, except it can be used to + /// record position information for a capture group when using the NFA for + /// search. + CaptureEnd { + /// The ID of the pattern that this capture was defined. + pattern_id: PatternID, + /// The capture group index that this capture state corresponds to. + /// The capture group index is always relative to its corresponding + /// pattern. Therefore, in the presence of multiple patterns, both the + /// pattern ID and the capture group index are required to uniquely + /// identify a capturing group. + group_index: SmallIndex, + /// The next state that this state should transition to. + next: StateID, + }, + /// An alternation such that there exists an epsilon transition to all + /// states in `alternates`, where matches found via earlier transitions + /// are preferred over later transitions. + Union { alternates: Vec }, + /// An alternation such that there exists an epsilon transition to all + /// states in `alternates`, where matches found via later transitions are + /// preferred over earlier transitions. + /// + /// This "reverse" state exists for convenience during compilation that + /// permits easy construction of non-greedy combinations of NFA states. At + /// the end of compilation, Union and UnionReverse states are merged into + /// one Union type of state, where the latter has its epsilon transitions + /// reversed to reflect the priority inversion. + /// + /// The "convenience" here arises from the fact that as new states are + /// added to the list of `alternates`, we would like that add operation + /// to be amortized constant time. But if we used a `Union`, we'd need to + /// prepend the state, which takes O(n) time. There are other approaches we + /// could use to solve this, but this seems simple enough. + UnionReverse { alternates: Vec }, + /// A state that cannot be transitioned out of. This is useful for cases + /// where you want to prevent matching from occurring. For example, if your + /// regex parser permits empty character classes, then one could choose a + /// `Fail` state to represent it. + Fail, + /// A match state. There is at most one such occurrence of this state in + /// an NFA for each pattern compiled into the NFA. At time of writing, a + /// match state is always produced for every pattern given, but in theory, + /// if a pattern can never lead to a match, then the match state could be + /// omitted. + /// + /// `pattern_id` refers to the ID of the pattern itself, which corresponds + /// to the pattern's index (starting at 0). + Match { pattern_id: PatternID }, +} + +impl State { + /// If this state is an unconditional epsilon transition, then this returns + /// the target of the transition. + fn goto(&self) -> Option { + match *self { + State::Empty { next } => Some(next), + State::Union { ref alternates } if alternates.len() == 1 => { + Some(alternates[0]) + } + State::UnionReverse { ref alternates } + if alternates.len() == 1 => + { + Some(alternates[0]) + } + _ => None, + } + } + + /// Returns the heap memory usage, in bytes, of this state. + fn memory_usage(&self) -> usize { + match *self { + State::Empty { .. } + | State::ByteRange { .. } + | State::Look { .. } + | State::CaptureStart { .. } + | State::CaptureEnd { .. } + | State::Fail + | State::Match { .. } => 0, + State::Sparse { ref transitions } => { + transitions.len() * mem::size_of::() + } + State::Union { ref alternates } => { + alternates.len() * mem::size_of::() + } + State::UnionReverse { ref alternates } => { + alternates.len() * mem::size_of::() + } + } + } +} + +/// An abstraction for building Thompson NFAs by hand. +/// +/// A builder is what a [`thompson::Compiler`](crate::nfa::thompson::Compiler) +/// uses internally to translate a regex's high-level intermediate +/// representation into an [`NFA`]. +/// +/// The primary function of this builder is to abstract away the internal +/// representation of an NFA and make it difficult to produce NFAs are that +/// internally invalid or inconsistent. This builder also provides a way to +/// add "empty" states (which can be thought of as unconditional epsilon +/// transitions), despite the fact that [`thompson::State`](nfa::State) does +/// not have any "empty" representation. The advantage of "empty" states is +/// that they make the code for constructing a Thompson NFA logically simpler. +/// +/// Many of the routines on this builder may panic or return errors. Generally +/// speaking, panics occur when an invalid sequence of method calls were made, +/// where as an error occurs if things get too big. (Where "too big" might mean +/// exhausting identifier space or using up too much heap memory in accordance +/// with the configured [`size_limit`](Builder::set_size_limit).) +/// +/// # Overview +/// +/// ## Adding multiple patterns +/// +/// Each pattern you add to an NFA should correspond to a pair of +/// [`Builder::start_pattern`] and [`Builder::finish_pattern`] calls, with +/// calls inbetween that add NFA states for that pattern. NFA states may be +/// added without first calling `start_pattern`, with the exception of adding +/// capturing states. +/// +/// ## Adding NFA states +/// +/// Here is a very brief overview of each of the methods that add NFA states. +/// Every method adds a single state. +/// +/// * [`add_empty`](Builder::add_empty): Add a state with a single +/// unconditional epsilon transition to another state. +/// * [`add_union`](Builder::add_union): Adds a state with unconditional +/// epsilon transitions to two or more states, with earlier transitions +/// preferred over later ones. +/// * [`add_union_reverse`](Builder::add_union_reverse): Adds a state with +/// unconditional epsilon transitions to two or more states, with later +/// transitions preferred over earlier ones. +/// * [`add_range`](Builder::add_range): Adds a state with a single transition +/// to another state that can only be followed if the current input byte is +/// within the range given. +/// * [`add_sparse`](Builder::add_sparse): Adds a state with two or more +/// range transitions to other states, where a transition is only followed +/// if the current input byte is within one of the ranges. All transitions +/// in this state have equal priority, and the corresponding ranges must be +/// non-overlapping. +/// * [`add_look`](Builder::add_look): Adds a state with a single *conditional* +/// epsilon transition to another state, where the condition depends on a +/// limited look-around property. +/// * [`add_capture_start`](Builder::add_capture_start): Adds a state with +/// a single unconditional epsilon transition that also instructs an NFA +/// simulation to record the current input position to a specific location in +/// memory. This is intended to represent the starting location of a capturing +/// group. +/// * [`add_capture_end`](Builder::add_capture_end): Adds a state with +/// a single unconditional epsilon transition that also instructs an NFA +/// simulation to record the current input position to a specific location in +/// memory. This is intended to represent the ending location of a capturing +/// group. +/// * [`add_fail`](Builder::add_fail): Adds a state that never transitions to +/// another state. +/// * [`add_match`](Builder::add_match): Add a state that indicates a match has +/// been found for a particular pattern. A match state is a final state with +/// no outgoing transitions. +/// +/// ## Setting transitions between NFA states +/// +/// The [`Builder::patch`] method creates a transition from one state to the +/// next. If the `from` state corresponds to a state that supports multiple +/// outgoing transitions (such as "union"), then this adds the corresponding +/// transition. Otherwise, it sets the single transition. (This routine panics +/// if `from` corresponds to a state added by `add_sparse`, since sparse states +/// need more specialized handling.) +/// +/// # Example +/// +/// This annotated example shows how to hand construct the regex `[a-z]+` +/// (without an unanchored prefix). +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{pikevm::PikeVM, Builder, Transition}, +/// util::primitives::StateID, +/// Match, +/// }; +/// +/// let mut builder = Builder::new(); +/// // Before adding NFA states for our pattern, we need to tell the builder +/// // that we are starting the pattern. +/// builder.start_pattern()?; +/// // Since we use the Pike VM below for searching, we need to add capturing +/// // states. If you're just going to build a DFA from the NFA, then capturing +/// // states do not need to be added. +/// let start = builder.add_capture_start(StateID::ZERO, 0, None)?; +/// let range = builder.add_range(Transition { +/// // We don't know the state ID of the 'next' state yet, so we just fill +/// // in a dummy 'ZERO' value. +/// start: b'a', end: b'z', next: StateID::ZERO, +/// })?; +/// // This state will point back to 'range', but also enable us to move ahead. +/// // That is, this implements the '+' repetition operator. We add 'range' and +/// // then 'end' below to this alternation. +/// let alt = builder.add_union(vec![])?; +/// // The final state before the match state, which serves to capture the +/// // end location of the match. +/// let end = builder.add_capture_end(StateID::ZERO, 0)?; +/// // The match state for our pattern. +/// let mat = builder.add_match()?; +/// // Now we fill in the transitions between states. +/// builder.patch(start, range)?; +/// builder.patch(range, alt)?; +/// // If we added 'end' before 'range', then we'd implement non-greedy +/// // matching, i.e., '+?'. +/// builder.patch(alt, range)?; +/// builder.patch(alt, end)?; +/// builder.patch(end, mat)?; +/// // We must explicitly finish pattern and provide the starting state ID for +/// // this particular pattern. +/// builder.finish_pattern(start)?; +/// // Finally, when we build the NFA, we provide the anchored and unanchored +/// // starting state IDs. Since we didn't bother with an unanchored prefix +/// // here, we only support anchored searching. Thus, both starting states are +/// // the same. +/// let nfa = builder.build(start, start)?; +/// +/// // Now build a Pike VM from our NFA, and use it for searching. This shows +/// // how we can use a regex engine without ever worrying about syntax! +/// let re = PikeVM::new_from_nfa(nfa)?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// let expected = Some(Match::must(0, 0..3)); +/// re.captures(&mut cache, "foo0", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug, Default)] +pub struct Builder { + /// The ID of the pattern that we're currently building. + /// + /// Callers are required to set (and unset) this by calling + /// {start,finish}_pattern. Otherwise, most methods will panic. + pattern_id: Option, + /// A sequence of intermediate NFA states. Once a state is added to this + /// sequence, it is assigned a state ID equivalent to its index. Once a + /// state is added, it is still expected to be mutated, e.g., to set its + /// transition to a state that didn't exist at the time it was added. + states: Vec, + /// The starting states for each individual pattern. Starting at any + /// of these states will result in only an anchored search for the + /// corresponding pattern. The vec is indexed by pattern ID. When the NFA + /// contains a single regex, then `start_pattern[0]` and `start_anchored` + /// are always equivalent. + start_pattern: Vec, + /// A map from pattern ID to capture group index to name. (If no name + /// exists, then a None entry is present. Thus, all capturing groups are + /// present in this mapping.) + /// + /// The outer vec is indexed by pattern ID, while the inner vec is indexed + /// by capture index offset for the corresponding pattern. + /// + /// The first capture group for each pattern is always unnamed and is thus + /// always None. + captures: Vec>>>, + /// The combined memory used by each of the 'State's in 'states'. This + /// only includes heap usage by each state, and not the size of the state + /// itself. In other words, this tracks heap memory used that isn't + /// captured via `size_of::() * states.len()`. + memory_states: usize, + /// Whether this NFA only matches UTF-8 and whether regex engines using + /// this NFA for searching should report empty matches that split a + /// codepoint. + utf8: bool, + /// Whether this NFA should be matched in reverse or not. + reverse: bool, + /// The matcher to use for look-around assertions. + look_matcher: LookMatcher, + /// A size limit to respect when building an NFA. If the total heap memory + /// of the intermediate NFA states exceeds (or would exceed) this amount, + /// then an error is returned. + size_limit: Option, +} + +impl Builder { + /// Create a new builder for hand-assembling NFAs. + pub fn new() -> Builder { + Builder::default() + } + + /// Clear this builder. + /// + /// Clearing removes all state associated with building an NFA, but does + /// not reset configuration (such as size limits and whether the NFA + /// should only match UTF-8). After clearing, the builder can be reused to + /// assemble an entirely new NFA. + pub fn clear(&mut self) { + self.pattern_id = None; + self.states.clear(); + self.start_pattern.clear(); + self.captures.clear(); + self.memory_states = 0; + } + + /// Assemble a [`NFA`] from the states added so far. + /// + /// After building an NFA, more states may be added and `build` may be + /// called again. To reuse a builder to produce an entirely new NFA from + /// scratch, call the [`clear`](Builder::clear) method first. + /// + /// `start_anchored` refers to the ID of the starting state that anchored + /// searches should use. That is, searches who matches are limited to the + /// starting position of the search. + /// + /// `start_unanchored` refers to the ID of the starting state that + /// unanchored searches should use. This permits searches to report matches + /// that start after the beginning of the search. In cases where unanchored + /// searches are not supported, the unanchored starting state ID must be + /// the same as the anchored starting state ID. + /// + /// # Errors + /// + /// This returns an error if there was a problem producing the final NFA. + /// In particular, this might include an error if the capturing groups + /// added to this builder violate any of the invariants documented on + /// [`GroupInfo`](crate::util::captures::GroupInfo). + /// + /// # Panics + /// + /// If `start_pattern` was called, then `finish_pattern` must be called + /// before `build`, otherwise this panics. + /// + /// This may panic for other invalid uses of a builder. For example, if + /// a "start capture" state was added without a corresponding "end capture" + /// state. + pub fn build( + &self, + start_anchored: StateID, + start_unanchored: StateID, + ) -> Result { + assert!(self.pattern_id.is_none(), "must call 'finish_pattern' first"); + debug!( + "intermediate NFA compilation via builder is complete, \ + intermediate NFA size: {} states, {} bytes on heap", + self.states.len(), + self.memory_usage(), + ); + + let mut nfa = nfa::Inner::default(); + nfa.set_utf8(self.utf8); + nfa.set_reverse(self.reverse); + nfa.set_look_matcher(self.look_matcher.clone()); + // A set of compiler internal state IDs that correspond to states + // that are exclusively epsilon transitions, i.e., goto instructions, + // combined with the state that they point to. This is used to + // record said states while transforming the compiler's internal NFA + // representation to the external form. + let mut empties = vec![]; + // A map used to re-map state IDs when translating this builder's + // internal NFA state representation to the final NFA representation. + let mut remap = vec![]; + remap.resize(self.states.len(), StateID::ZERO); + + nfa.set_starts(start_anchored, start_unanchored, &self.start_pattern); + nfa.set_captures(&self.captures).map_err(BuildError::captures)?; + // The idea here is to convert our intermediate states to their final + // form. The only real complexity here is the process of converting + // transitions, which are expressed in terms of state IDs. The new + // set of states will be smaller because of partial epsilon removal, + // so the state IDs will not be the same. + for (sid, state) in self.states.iter().with_state_ids() { + match *state { + State::Empty { next } => { + // Since we're removing empty states, we need to handle + // them later since we don't yet know which new state this + // empty state will be mapped to. + empties.push((sid, next)); + } + State::ByteRange { trans } => { + remap[sid] = nfa.add(nfa::State::ByteRange { trans }); + } + State::Sparse { ref transitions } => { + remap[sid] = match transitions.len() { + 0 => nfa.add(nfa::State::Fail), + 1 => nfa.add(nfa::State::ByteRange { + trans: transitions[0], + }), + _ => { + let transitions = + transitions.to_vec().into_boxed_slice(); + let sparse = SparseTransitions { transitions }; + nfa.add(nfa::State::Sparse(sparse)) + } + } + } + State::Look { look, next } => { + remap[sid] = nfa.add(nfa::State::Look { look, next }); + } + State::CaptureStart { pattern_id, group_index, next } => { + // We can't remove this empty state because of the side + // effect of capturing an offset for this capture slot. + let slot = nfa + .group_info() + .slot(pattern_id, group_index.as_usize()) + .expect("invalid capture index"); + let slot = + SmallIndex::new(slot).expect("a small enough slot"); + remap[sid] = nfa.add(nfa::State::Capture { + next, + pattern_id, + group_index, + slot, + }); + } + State::CaptureEnd { pattern_id, group_index, next } => { + // We can't remove this empty state because of the side + // effect of capturing an offset for this capture slot. + // Also, this always succeeds because we check that all + // slot indices are valid for all capture indices when they + // are initially added. + let slot = nfa + .group_info() + .slot(pattern_id, group_index.as_usize()) + .expect("invalid capture index") + .checked_add(1) + .unwrap(); + let slot = + SmallIndex::new(slot).expect("a small enough slot"); + remap[sid] = nfa.add(nfa::State::Capture { + next, + pattern_id, + group_index, + slot, + }); + } + State::Union { ref alternates } => { + if alternates.is_empty() { + remap[sid] = nfa.add(nfa::State::Fail); + } else if alternates.len() == 1 { + empties.push((sid, alternates[0])); + remap[sid] = alternates[0]; + } else if alternates.len() == 2 { + remap[sid] = nfa.add(nfa::State::BinaryUnion { + alt1: alternates[0], + alt2: alternates[1], + }); + } else { + let alternates = + alternates.to_vec().into_boxed_slice(); + remap[sid] = nfa.add(nfa::State::Union { alternates }); + } + } + State::UnionReverse { ref alternates } => { + if alternates.is_empty() { + remap[sid] = nfa.add(nfa::State::Fail); + } else if alternates.len() == 1 { + empties.push((sid, alternates[0])); + remap[sid] = alternates[0]; + } else if alternates.len() == 2 { + remap[sid] = nfa.add(nfa::State::BinaryUnion { + alt1: alternates[1], + alt2: alternates[0], + }); + } else { + let mut alternates = + alternates.to_vec().into_boxed_slice(); + alternates.reverse(); + remap[sid] = nfa.add(nfa::State::Union { alternates }); + } + } + State::Fail => { + remap[sid] = nfa.add(nfa::State::Fail); + } + State::Match { pattern_id } => { + remap[sid] = nfa.add(nfa::State::Match { pattern_id }); + } + } + } + // Some of the new states still point to empty state IDs, so we need to + // follow each of them and remap the empty state IDs to their non-empty + // state IDs. + // + // We also keep track of which states we've already mapped. This helps + // avoid quadratic behavior in a long chain of empty states. For + // example, in 'a{0}{50000}'. + let mut remapped = vec![false; self.states.len()]; + for &(empty_id, empty_next) in empties.iter() { + if remapped[empty_id] { + continue; + } + // empty states can point to other empty states, forming a chain. + // So we must follow the chain until the end, which must end at + // a non-empty state, and therefore, a state that is correctly + // remapped. We are guaranteed to terminate because our compiler + // never builds a loop among only empty states. + let mut new_next = empty_next; + while let Some(next) = self.states[new_next].goto() { + new_next = next; + } + remap[empty_id] = remap[new_next]; + remapped[empty_id] = true; + + // Now that we've remapped the main 'empty_id' above, we re-follow + // the chain from above and remap every empty state we found along + // the way to our ultimate non-empty target. We are careful to set + // 'remapped' to true for each such state. We thus will not need + // to re-compute this chain for any subsequent empty states in + // 'empties' that are part of this chain. + let mut next2 = empty_next; + while let Some(next) = self.states[next2].goto() { + remap[next2] = remap[new_next]; + remapped[next2] = true; + next2 = next; + } + } + // Finally remap all of the state IDs. + nfa.remap(&remap); + let final_nfa = nfa.into_nfa(); + debug!( + "NFA compilation via builder complete, \ + final NFA size: {} states, {} bytes on heap, \ + has empty? {:?}, utf8? {:?}", + final_nfa.states().len(), + final_nfa.memory_usage(), + final_nfa.has_empty(), + final_nfa.is_utf8(), + ); + Ok(final_nfa) + } + + /// Start the assembly of a pattern in this NFA. + /// + /// Upon success, this returns the identifier for the new pattern. + /// Identifiers start at `0` and are incremented by 1 for each new pattern. + /// + /// It is necessary to call this routine before adding capturing states. + /// Otherwise, any other NFA state may be added before starting a pattern. + /// + /// # Errors + /// + /// If the pattern identifier space is exhausted, then this returns an + /// error. + /// + /// # Panics + /// + /// If this is called while assembling another pattern (i.e., before + /// `finish_pattern` is called), then this panics. + pub fn start_pattern(&mut self) -> Result { + assert!(self.pattern_id.is_none(), "must call 'finish_pattern' first"); + + let proposed = self.start_pattern.len(); + let pid = PatternID::new(proposed) + .map_err(|_| BuildError::too_many_patterns(proposed))?; + self.pattern_id = Some(pid); + // This gets filled in when 'finish_pattern' is called. + self.start_pattern.push(StateID::ZERO); + Ok(pid) + } + + /// Finish the assembly of a pattern in this NFA. + /// + /// Upon success, this returns the identifier for the new pattern. + /// Identifiers start at `0` and are incremented by 1 for each new + /// pattern. This is the same identifier returned by the corresponding + /// `start_pattern` call. + /// + /// Note that `start_pattern` and `finish_pattern` pairs cannot be + /// interleaved or nested. A correct `finish_pattern` call _always_ + /// corresponds to the most recently called `start_pattern` routine. + /// + /// # Errors + /// + /// This currently never returns an error, but this is subject to change. + /// + /// # Panics + /// + /// If this is called without a corresponding `start_pattern` call, then + /// this panics. + pub fn finish_pattern( + &mut self, + start_id: StateID, + ) -> Result { + let pid = self.current_pattern_id(); + self.start_pattern[pid] = start_id; + self.pattern_id = None; + Ok(pid) + } + + /// Returns the pattern identifier of the current pattern. + /// + /// # Panics + /// + /// If this doesn't occur after a `start_pattern` call and before the + /// corresponding `finish_pattern` call, then this panics. + pub fn current_pattern_id(&self) -> PatternID { + self.pattern_id.expect("must call 'start_pattern' first") + } + + /// Returns the number of patterns added to this builder so far. + /// + /// This only includes patterns that have had `finish_pattern` called + /// for them. + pub fn pattern_len(&self) -> usize { + self.start_pattern.len() + } + + /// Add an "empty" NFA state. + /// + /// An "empty" NFA state is a state with a single unconditional epsilon + /// transition to another NFA state. Such empty states are removed before + /// building the final [`NFA`] (which has no such "empty" states), but they + /// can be quite useful in the construction process of an NFA. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_empty(&mut self) -> Result { + self.add(State::Empty { next: StateID::ZERO }) + } + + /// Add a "union" NFA state. + /// + /// A "union" NFA state that contains zero or more unconditional epsilon + /// transitions to other NFA states. The order of these transitions + /// reflects a priority order where earlier transitions are preferred over + /// later transitions. + /// + /// Callers may provide an empty set of alternates to this method call, and + /// then later add transitions via `patch`. At final build time, a "union" + /// state with no alternates is converted to a "fail" state, and a "union" + /// state with exactly one alternate is treated as if it were an "empty" + /// state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_union( + &mut self, + alternates: Vec, + ) -> Result { + self.add(State::Union { alternates }) + } + + /// Add a "reverse union" NFA state. + /// + /// A "reverse union" NFA state contains zero or more unconditional epsilon + /// transitions to other NFA states. The order of these transitions + /// reflects a priority order where later transitions are preferred + /// over earlier transitions. This is an inverted priority order when + /// compared to `add_union`. This is useful, for example, for implementing + /// non-greedy repetition operators. + /// + /// Callers may provide an empty set of alternates to this method call, and + /// then later add transitions via `patch`. At final build time, a "reverse + /// union" state with no alternates is converted to a "fail" state, and a + /// "reverse union" state with exactly one alternate is treated as if it + /// were an "empty" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_union_reverse( + &mut self, + alternates: Vec, + ) -> Result { + self.add(State::UnionReverse { alternates }) + } + + /// Add a "range" NFA state. + /// + /// A "range" NFA state is a state with one outgoing transition to another + /// state, where that transition may only be followed if the current input + /// byte falls between a range of bytes given. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_range( + &mut self, + trans: Transition, + ) -> Result { + self.add(State::ByteRange { trans }) + } + + /// Add a "sparse" NFA state. + /// + /// A "sparse" NFA state contains zero or more outgoing transitions, where + /// the transition to be followed (if any) is chosen based on whether the + /// current input byte falls in the range of one such transition. The + /// transitions given *must* be non-overlapping and in ascending order. (A + /// "sparse" state with no transitions is equivalent to a "fail" state.) + /// + /// A "sparse" state is like adding a "union" state and pointing it at a + /// bunch of "range" states, except that the different alternates have + /// equal priority. + /// + /// Note that a "sparse" state is the only state that cannot be patched. + /// This is because a "sparse" state has many transitions, each of which + /// may point to a different NFA state. Moreover, adding more such + /// transitions requires more than just an NFA state ID to point to. It + /// also requires a byte range. The `patch` routine does not support the + /// additional information required. Therefore, callers must ensure that + /// all outgoing transitions for this state are included when `add_sparse` + /// is called. There is no way to add more later. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + /// + /// # Panics + /// + /// This routine _may_ panic if the transitions given overlap or are not + /// in ascending order. + pub fn add_sparse( + &mut self, + transitions: Vec, + ) -> Result { + self.add(State::Sparse { transitions }) + } + + /// Add a "look" NFA state. + /// + /// A "look" NFA state corresponds to a state with exactly one + /// *conditional* epsilon transition to another NFA state. Namely, it + /// represents one of a small set of simplistic look-around operators. + /// + /// Callers may provide a "dummy" state ID (typically [`StateID::ZERO`]), + /// and then change it later with [`patch`](Builder::patch). + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_look( + &mut self, + next: StateID, + look: Look, + ) -> Result { + self.add(State::Look { look, next }) + } + + /// Add a "start capture" NFA state. + /// + /// A "start capture" NFA state corresponds to a state with exactly one + /// outgoing unconditional epsilon transition to another state. Unlike + /// "empty" states, a "start capture" state also carries with it an + /// instruction for saving the current position of input to a particular + /// location in memory. NFA simulations, like the Pike VM, may use this + /// information to report the match locations of capturing groups in a + /// regex pattern. + /// + /// If the corresponding capturing group has a name, then callers should + /// include it here. + /// + /// Callers may provide a "dummy" state ID (typically [`StateID::ZERO`]), + /// and then change it later with [`patch`](Builder::patch). + /// + /// Note that unlike `start_pattern`/`finish_pattern`, capturing start and + /// end states may be interleaved. Indeed, it is typical for many "start + /// capture" NFA states to appear before the first "end capture" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded or if the given + /// capture index overflows `usize`. + /// + /// While the above are the only conditions in which this routine can + /// currently return an error, it is possible to call this method with an + /// inputs that results in the final `build()` step failing to produce an + /// NFA. For example, if one adds two distinct capturing groups with the + /// same name, then that will result in `build()` failing with an error. + /// + /// See the [`GroupInfo`](crate::util::captures::GroupInfo) type for + /// more information on what qualifies as valid capturing groups. + /// + /// # Example + /// + /// This example shows that an error occurs when one tries to add multiple + /// capturing groups with the same name to the same pattern. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::Builder, + /// util::primitives::StateID, + /// }; + /// + /// let name = Some(std::sync::Arc::from("foo")); + /// let mut builder = Builder::new(); + /// builder.start_pattern()?; + /// // 0th capture group should always be unnamed. + /// let start = builder.add_capture_start(StateID::ZERO, 0, None)?; + /// // OK + /// builder.add_capture_start(StateID::ZERO, 1, name.clone())?; + /// // This is not OK, but 'add_capture_start' still succeeds. We don't + /// // get an error until we call 'build' below. Without this call, the + /// // call to 'build' below would succeed. + /// builder.add_capture_start(StateID::ZERO, 2, name.clone())?; + /// // Finish our pattern so we can try to build the NFA. + /// builder.finish_pattern(start)?; + /// let result = builder.build(start, start); + /// assert!(result.is_err()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// However, adding multiple capturing groups with the same name to + /// distinct patterns is okay: + /// + /// ``` + /// use std::sync::Arc; + /// + /// use regex_automata::{ + /// nfa::thompson::{pikevm::PikeVM, Builder, Transition}, + /// util::{ + /// captures::Captures, + /// primitives::{PatternID, StateID}, + /// }, + /// Span, + /// }; + /// + /// // Hand-compile the patterns '(?P[a-z])' and '(?P[A-Z])'. + /// let mut builder = Builder::new(); + /// // We compile them to support an unanchored search, which requires + /// // adding an implicit '(?s-u:.)*?' prefix before adding either pattern. + /// let unanchored_prefix = builder.add_union_reverse(vec![])?; + /// let any = builder.add_range(Transition { + /// start: b'\x00', end: b'\xFF', next: StateID::ZERO, + /// })?; + /// builder.patch(unanchored_prefix, any)?; + /// builder.patch(any, unanchored_prefix)?; + /// + /// // Compile an alternation that permits matching multiple patterns. + /// let alt = builder.add_union(vec![])?; + /// builder.patch(unanchored_prefix, alt)?; + /// + /// // Compile '(?P[a-z]+)'. + /// builder.start_pattern()?; + /// let start0 = builder.add_capture_start(StateID::ZERO, 0, None)?; + /// // N.B. 0th capture group must always be unnamed. + /// let foo_start0 = builder.add_capture_start( + /// StateID::ZERO, 1, Some(Arc::from("foo")), + /// )?; + /// let lowercase = builder.add_range(Transition { + /// start: b'a', end: b'z', next: StateID::ZERO, + /// })?; + /// let foo_end0 = builder.add_capture_end(StateID::ZERO, 1)?; + /// let end0 = builder.add_capture_end(StateID::ZERO, 0)?; + /// let match0 = builder.add_match()?; + /// builder.patch(start0, foo_start0)?; + /// builder.patch(foo_start0, lowercase)?; + /// builder.patch(lowercase, foo_end0)?; + /// builder.patch(foo_end0, end0)?; + /// builder.patch(end0, match0)?; + /// builder.finish_pattern(start0)?; + /// + /// // Compile '(?P[A-Z]+)'. + /// builder.start_pattern()?; + /// let start1 = builder.add_capture_start(StateID::ZERO, 0, None)?; + /// // N.B. 0th capture group must always be unnamed. + /// let foo_start1 = builder.add_capture_start( + /// StateID::ZERO, 1, Some(Arc::from("foo")), + /// )?; + /// let uppercase = builder.add_range(Transition { + /// start: b'A', end: b'Z', next: StateID::ZERO, + /// })?; + /// let foo_end1 = builder.add_capture_end(StateID::ZERO, 1)?; + /// let end1 = builder.add_capture_end(StateID::ZERO, 0)?; + /// let match1 = builder.add_match()?; + /// builder.patch(start1, foo_start1)?; + /// builder.patch(foo_start1, uppercase)?; + /// builder.patch(uppercase, foo_end1)?; + /// builder.patch(foo_end1, end1)?; + /// builder.patch(end1, match1)?; + /// builder.finish_pattern(start1)?; + /// + /// // Now add the patterns to our alternation that we started above. + /// builder.patch(alt, start0)?; + /// builder.patch(alt, start1)?; + /// + /// // Finally build the NFA. The first argument is the anchored starting + /// // state (the pattern alternation) where as the second is the + /// // unanchored starting state (the unanchored prefix). + /// let nfa = builder.build(alt, unanchored_prefix)?; + /// + /// // Now build a Pike VM from our NFA and access the 'foo' capture + /// // group regardless of which pattern matched, since it is defined + /// // for both patterns. + /// let vm = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = vm.create_cache(); + /// let caps: Vec = + /// vm.captures_iter(&mut cache, "0123aAaAA").collect(); + /// assert_eq!(5, caps.len()); + /// + /// assert_eq!(Some(PatternID::must(0)), caps[0].pattern()); + /// assert_eq!(Some(Span::from(4..5)), caps[0].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(1)), caps[1].pattern()); + /// assert_eq!(Some(Span::from(5..6)), caps[1].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(0)), caps[2].pattern()); + /// assert_eq!(Some(Span::from(6..7)), caps[2].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(1)), caps[3].pattern()); + /// assert_eq!(Some(Span::from(7..8)), caps[3].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(1)), caps[4].pattern()); + /// assert_eq!(Some(Span::from(8..9)), caps[4].get_group_by_name("foo")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn add_capture_start( + &mut self, + next: StateID, + group_index: u32, + name: Option>, + ) -> Result { + let pid = self.current_pattern_id(); + let group_index = match SmallIndex::try_from(group_index) { + Err(_) => { + return Err(BuildError::invalid_capture_index(group_index)) + } + Ok(group_index) => group_index, + }; + // Make sure we have space to insert our (pid,index)|-->name mapping. + if pid.as_usize() >= self.captures.len() { + for _ in 0..=(pid.as_usize() - self.captures.len()) { + self.captures.push(vec![]); + } + } + // In the case where 'group_index < self.captures[pid].len()', it means + // that we are adding a duplicate capture group. This is somewhat + // weird, but permissible because the capture group itself can be + // repeated in the syntax. For example, '([a-z]){4}' will produce 4 + // capture groups. In practice, only the last will be set at search + // time when a match occurs. For duplicates, we don't need to push + // anything other than a CaptureStart NFA state. + if group_index.as_usize() >= self.captures[pid].len() { + // For discontiguous indices, push placeholders for earlier capture + // groups that weren't explicitly added. + for _ in 0..(group_index.as_usize() - self.captures[pid].len()) { + self.captures[pid].push(None); + } + self.captures[pid].push(name); + } + self.add(State::CaptureStart { pattern_id: pid, group_index, next }) + } + + /// Add a "end capture" NFA state. + /// + /// A "end capture" NFA state corresponds to a state with exactly one + /// outgoing unconditional epsilon transition to another state. Unlike + /// "empty" states, a "end capture" state also carries with it an + /// instruction for saving the current position of input to a particular + /// location in memory. NFA simulations, like the Pike VM, may use this + /// information to report the match locations of capturing groups in a + /// + /// Callers may provide a "dummy" state ID (typically [`StateID::ZERO`]), + /// and then change it later with [`patch`](Builder::patch). + /// + /// Note that unlike `start_pattern`/`finish_pattern`, capturing start and + /// end states may be interleaved. Indeed, it is typical for many "start + /// capture" NFA states to appear before the first "end capture" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded or if the given + /// capture index overflows `usize`. + /// + /// While the above are the only conditions in which this routine can + /// currently return an error, it is possible to call this method with an + /// inputs that results in the final `build()` step failing to produce an + /// NFA. For example, if one adds two distinct capturing groups with the + /// same name, then that will result in `build()` failing with an error. + /// + /// See the [`GroupInfo`](crate::util::captures::GroupInfo) type for + /// more information on what qualifies as valid capturing groups. + pub fn add_capture_end( + &mut self, + next: StateID, + group_index: u32, + ) -> Result { + let pid = self.current_pattern_id(); + let group_index = match SmallIndex::try_from(group_index) { + Err(_) => { + return Err(BuildError::invalid_capture_index(group_index)) + } + Ok(group_index) => group_index, + }; + self.add(State::CaptureEnd { pattern_id: pid, group_index, next }) + } + + /// Adds a "fail" NFA state. + /// + /// A "fail" state is simply a state that has no outgoing transitions. It + /// acts as a way to cause a search to stop without reporting a match. + /// For example, one way to represent an NFA with zero patterns is with a + /// single "fail" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_fail(&mut self) -> Result { + self.add(State::Fail) + } + + /// Adds a "match" NFA state. + /// + /// A "match" state has no outgoing transitions (just like a "fail" + /// state), but it has special significance in that if a search enters + /// this state, then a match has been found. The match state that is added + /// automatically has the current pattern ID associated with it. This is + /// used to report the matching pattern ID at search time. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + /// + /// # Panics + /// + /// This must be called after a `start_pattern` call but before the + /// corresponding `finish_pattern` call. Otherwise, it panics. + pub fn add_match(&mut self) -> Result { + let pattern_id = self.current_pattern_id(); + let sid = self.add(State::Match { pattern_id })?; + Ok(sid) + } + + /// The common implementation of "add a state." It handles the common + /// error cases of state ID exhausting (by owning state ID allocation) and + /// whether the size limit has been exceeded. + fn add(&mut self, state: State) -> Result { + let id = StateID::new(self.states.len()) + .map_err(|_| BuildError::too_many_states(self.states.len()))?; + self.memory_states += state.memory_usage(); + self.states.push(state); + self.check_size_limit()?; + Ok(id) + } + + /// Add a transition from one state to another. + /// + /// This routine is called "patch" since it is very common to add the + /// states you want, typically with "dummy" state ID transitions, and then + /// "patch" in the real state IDs later. This is because you don't always + /// know all of the necessary state IDs to add because they might not + /// exist yet. + /// + /// # Errors + /// + /// This may error if patching leads to an increase in heap usage beyond + /// the configured size limit. Heap usage only grows when patching adds a + /// new transition (as in the case of a "union" state). + /// + /// # Panics + /// + /// This panics if `from` corresponds to a "sparse" state. When "sparse" + /// states are added, there is no way to patch them after-the-fact. (If you + /// have a use case where this would be helpful, please file an issue. It + /// will likely require a new API.) + pub fn patch( + &mut self, + from: StateID, + to: StateID, + ) -> Result<(), BuildError> { + let old_memory_states = self.memory_states; + match self.states[from] { + State::Empty { ref mut next } => { + *next = to; + } + State::ByteRange { ref mut trans } => { + trans.next = to; + } + State::Sparse { .. } => { + panic!("cannot patch from a sparse NFA state") + } + State::Look { ref mut next, .. } => { + *next = to; + } + State::Union { ref mut alternates } => { + alternates.push(to); + self.memory_states += mem::size_of::(); + } + State::UnionReverse { ref mut alternates } => { + alternates.push(to); + self.memory_states += mem::size_of::(); + } + State::CaptureStart { ref mut next, .. } => { + *next = to; + } + State::CaptureEnd { ref mut next, .. } => { + *next = to; + } + State::Fail => {} + State::Match { .. } => {} + } + if old_memory_states != self.memory_states { + self.check_size_limit()?; + } + Ok(()) + } + + /// Set whether the NFA produced by this builder should only match UTF-8. + /// + /// This should be set when both of the following are true: + /// + /// 1. The caller guarantees that the NFA created by this build will only + /// report non-empty matches with spans that are valid UTF-8. + /// 2. The caller desires regex engines using this NFA to avoid reporting + /// empty matches with a span that splits a valid UTF-8 encoded codepoint. + /// + /// Property (1) is not checked. Instead, this requires the caller to + /// promise that it is true. Property (2) corresponds to the behavior of + /// regex engines using the NFA created by this builder. Namely, there + /// is no way in the NFA's graph itself to say that empty matches found + /// by, for example, the regex `a*` will fall on valid UTF-8 boundaries. + /// Instead, this option is used to communicate the UTF-8 semantic to regex + /// engines that will typically implement it as a post-processing step by + /// filtering out empty matches that don't fall on UTF-8 boundaries. + /// + /// If you're building an NFA from an HIR (and not using a + /// [`thompson::Compiler`](crate::nfa::thompson::Compiler)), then you can + /// use the [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) + /// option to guarantee that if the HIR detects a non-empty match, then it + /// is guaranteed to be valid UTF-8. + /// + /// Note that property (2) does *not* specify the behavior of executing + /// a search on a haystack that is not valid UTF-8. Therefore, if you're + /// *not* running this NFA on strings that are guaranteed to be valid + /// UTF-8, you almost certainly do not want to enable this option. + /// Similarly, if you are running the NFA on strings that *are* guaranteed + /// to be valid UTF-8, then you almost certainly want to enable this option + /// unless you can guarantee that your NFA will never produce a zero-width + /// match. + /// + /// It is disabled by default. + pub fn set_utf8(&mut self, yes: bool) { + self.utf8 = yes; + } + + /// Returns whether UTF-8 mode is enabled for this builder. + /// + /// See [`Builder::set_utf8`] for more details about what "UTF-8 mode" is. + pub fn get_utf8(&self) -> bool { + self.utf8 + } + + /// Sets whether the NFA produced by this builder should be matched in + /// reverse or not. Generally speaking, when enabled, the NFA produced + /// should be matched by moving backwards through a haystack, from a higher + /// memory address to a lower memory address. + /// + /// See also [`NFA::is_reverse`] for more details. + /// + /// This is disabled by default, which means NFAs are by default matched + /// in the forward direction. + pub fn set_reverse(&mut self, yes: bool) { + self.reverse = yes; + } + + /// Returns whether reverse mode is enabled for this builder. + /// + /// See [`Builder::set_reverse`] for more details about what "reverse mode" + /// is. + pub fn get_reverse(&self) -> bool { + self.reverse + } + + /// Sets the look-around matcher that should be used for the resulting NFA. + /// + /// A look-around matcher can be used to configure how look-around + /// assertions are matched. For example, a matcher might carry + /// configuration that changes the line terminator used for `(?m:^)` and + /// `(?m:$)` assertions. + pub fn set_look_matcher(&mut self, m: LookMatcher) { + self.look_matcher = m; + } + + /// Returns the look-around matcher used for this builder. + /// + /// If a matcher was not explicitly set, then `LookMatcher::default()` is + /// returned. + pub fn get_look_matcher(&self) -> &LookMatcher { + &self.look_matcher + } + + /// Set the size limit on this builder. + /// + /// Setting the size limit will also check whether the NFA built so far + /// fits within the given size limit. If it doesn't, then an error is + /// returned. + /// + /// By default, there is no configured size limit. + pub fn set_size_limit( + &mut self, + limit: Option, + ) -> Result<(), BuildError> { + self.size_limit = limit; + self.check_size_limit() + } + + /// Return the currently configured size limit. + /// + /// By default, this returns `None`, which corresponds to no configured + /// size limit. + pub fn get_size_limit(&self) -> Option { + self.size_limit + } + + /// Returns the heap memory usage, in bytes, used by the NFA states added + /// so far. + /// + /// Note that this is an approximation of how big the final NFA will be. + /// In practice, the final NFA will likely be a bit smaller because of + /// its simpler state representation. (For example, using things like + /// `Box<[StateID]>` instead of `Vec`.) + pub fn memory_usage(&self) -> usize { + self.states.len() * mem::size_of::() + self.memory_states + } + + fn check_size_limit(&self) -> Result<(), BuildError> { + if let Some(limit) = self.size_limit { + if self.memory_usage() > limit { + return Err(BuildError::exceeded_size_limit(limit)); + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // This asserts that a builder state doesn't have its size changed. It is + // *really* easy to accidentally increase the size, and thus potentially + // dramatically increase the memory usage of NFA builder. + // + // This assert doesn't mean we absolutely cannot increase the size of a + // builder state. We can. It's just here to make sure we do it knowingly + // and intentionally. + // + // A builder state is unfortunately a little bigger than an NFA state, + // since we really want to support adding things to a pre-existing state. + // i.e., We use Vec instead of Box<[thing]>. So we end up using an + // extra 8 bytes per state. Sad, but at least it gets freed once the NFA + // is built. + #[test] + fn state_has_small_size() { + #[cfg(target_pointer_width = "64")] + assert_eq!(32, core::mem::size_of::()); + #[cfg(target_pointer_width = "32")] + assert_eq!(16, core::mem::size_of::()); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/compiler.rs b/vendor/regex-automata/src/nfa/thompson/compiler.rs new file mode 100644 index 0000000..2d21729 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/compiler.rs @@ -0,0 +1,2265 @@ +use core::{borrow::Borrow, cell::RefCell}; + +use alloc::{sync::Arc, vec, vec::Vec}; + +use regex_syntax::{ + hir::{self, Hir}, + utf8::{Utf8Range, Utf8Sequences}, + ParserBuilder, +}; + +use crate::{ + nfa::thompson::{ + builder::Builder, + error::BuildError, + literal_trie::LiteralTrie, + map::{Utf8BoundedMap, Utf8SuffixKey, Utf8SuffixMap}, + nfa::{Transition, NFA}, + range_trie::RangeTrie, + }, + util::{ + look::{Look, LookMatcher}, + primitives::{PatternID, StateID}, + }, +}; + +/// The configuration used for a Thompson NFA compiler. +#[derive(Clone, Debug, Default)] +pub struct Config { + utf8: Option, + reverse: Option, + nfa_size_limit: Option>, + shrink: Option, + which_captures: Option, + look_matcher: Option, + #[cfg(test)] + unanchored_prefix: Option, +} + +impl Config { + /// Return a new default Thompson NFA compiler configuration. + pub fn new() -> Config { + Config::default() + } + + /// Whether to enable UTF-8 mode during search or not. + /// + /// A regex engine is said to be in UTF-8 mode when it guarantees that + /// all matches returned by it have spans consisting of only valid UTF-8. + /// That is, it is impossible for a match span to be returned that + /// contains any invalid UTF-8. + /// + /// UTF-8 mode generally consists of two things: + /// + /// 1. Whether the NFA's states are constructed such that all paths to a + /// match state that consume at least one byte always correspond to valid + /// UTF-8. + /// 2. Whether all paths to a match state that do _not_ consume any bytes + /// should always correspond to valid UTF-8 boundaries. + /// + /// (1) is a guarantee made by whoever constructs the NFA. + /// If you're parsing a regex from its concrete syntax, then + /// [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) can make + /// this guarantee for you. It does it by returning an error if the regex + /// pattern could every report a non-empty match span that contains invalid + /// UTF-8. So long as `syntax::Config::utf8` mode is enabled and your regex + /// successfully parses, then you're guaranteed that the corresponding NFA + /// will only ever report non-empty match spans containing valid UTF-8. + /// + /// (2) is a trickier guarantee because it cannot be enforced by the NFA + /// state graph itself. Consider, for example, the regex `a*`. It matches + /// the empty strings in `☃` at positions `0`, `1`, `2` and `3`, where + /// positions `1` and `2` occur within the UTF-8 encoding of a codepoint, + /// and thus correspond to invalid UTF-8 boundaries. Therefore, this + /// guarantee must be made at a higher level than the NFA state graph + /// itself. This crate deals with this case in each regex engine. Namely, + /// when a zero-width match that splits a codepoint is found and UTF-8 + /// mode enabled, then it is ignored and the engine moves on looking for + /// the next match. + /// + /// Thus, UTF-8 mode is both a promise that the NFA built only reports + /// non-empty matches that are valid UTF-8, and an *instruction* to regex + /// engines that empty matches that split codepoints should be banned. + /// + /// Because UTF-8 mode is fundamentally about avoiding invalid UTF-8 spans, + /// it only makes sense to enable this option when you *know* your haystack + /// is valid UTF-8. (For example, a `&str`.) Enabling UTF-8 mode and + /// searching a haystack that contains invalid UTF-8 leads to **unspecified + /// behavior**. + /// + /// Therefore, it may make sense to enable `syntax::Config::utf8` while + /// simultaneously *disabling* this option. That would ensure all non-empty + /// match spans are valid UTF-8, but that empty match spans may still split + /// a codepoint or match at other places that aren't valid UTF-8. + /// + /// In general, this mode is only relevant if your regex can match the + /// empty string. Most regexes don't. + /// + /// This is enabled by default. + /// + /// # Example + /// + /// This example shows how UTF-8 mode can impact the match spans that may + /// be reported in certain cases. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// Match, Input, + /// }; + /// + /// let re = PikeVM::new("")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// // UTF-8 mode is enabled by default. + /// let mut input = Input::new("☃"); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..0)), caps.get_match()); + /// + /// // Even though an empty regex matches at 1..1, our next match is + /// // 3..3 because 1..1 and 2..2 split the snowman codepoint (which is + /// // three bytes long). + /// input.set_start(1); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// // But if we disable UTF-8, then we'll get matches at 1..1 and 2..2: + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build("")?; + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 1..1)), caps.get_match()); + /// + /// input.set_start(2); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 2..2)), caps.get_match()); + /// + /// input.set_start(3); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// input.set_start(4); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn utf8(mut self, yes: bool) -> Config { + self.utf8 = Some(yes); + self + } + + /// Reverse the NFA. + /// + /// A NFA reversal is performed by reversing all of the concatenated + /// sub-expressions in the original pattern, recursively. (Look around + /// operators are also inverted.) The resulting NFA can be used to match + /// the pattern starting from the end of a string instead of the beginning + /// of a string. + /// + /// Reversing the NFA is useful for building a reverse DFA, which is most + /// useful for finding the start of a match after its ending position has + /// been found. NFA execution engines typically do not work on reverse + /// NFAs. For example, currently, the Pike VM reports the starting location + /// of matches without a reverse NFA. + /// + /// Currently, enabling this setting requires disabling the + /// [`captures`](Config::captures) setting. If both are enabled, then the + /// compiler will return an error. It is expected that this limitation will + /// be lifted in the future. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows how to build a DFA from a reverse NFA, and then use + /// the DFA to search backwards. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{self, Automaton}, + /// nfa::thompson::{NFA, WhichCaptures}, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = dfa::dense::Builder::new() + /// .thompson(NFA::config() + /// .which_captures(WhichCaptures::None) + /// .reverse(true) + /// ) + /// .build("baz[0-9]+")?; + /// let expected = Some(HalfMatch::must(0, 3)); + /// assert_eq!( + /// expected, + /// dfa.try_search_rev(&Input::new("foobaz12345bar"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reverse(mut self, yes: bool) -> Config { + self.reverse = Some(yes); + self + } + + /// Sets an approximate size limit on the total heap used by the NFA being + /// compiled. + /// + /// This permits imposing constraints on the size of a compiled NFA. This + /// may be useful in contexts where the regex pattern is untrusted and one + /// wants to avoid using too much memory. + /// + /// This size limit does not apply to auxiliary heap used during + /// compilation that is not part of the built NFA. + /// + /// Note that this size limit is applied during compilation in order for + /// the limit to prevent too much heap from being used. However, the + /// implementation may use an intermediate NFA representation that is + /// otherwise slightly bigger than the final public form. Since the size + /// limit may be applied to an intermediate representation, there is not + /// necessarily a precise correspondence between the configured size limit + /// and the heap usage of the final NFA. + /// + /// There is no size limit by default. + /// + /// # Example + /// + /// This example demonstrates how Unicode mode can greatly increase the + /// size of the NFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::NFA; + /// + /// // 300KB isn't enough! + /// NFA::compiler() + /// .configure(NFA::config().nfa_size_limit(Some(300_000))) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 400KB probably is. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().nfa_size_limit(Some(400_000))) + /// .build(r"\w{20}")?; + /// + /// assert_eq!(nfa.pattern_len(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn nfa_size_limit(mut self, bytes: Option) -> Config { + self.nfa_size_limit = Some(bytes); + self + } + + /// Apply best effort heuristics to shrink the NFA at the expense of more + /// time/memory. + /// + /// Generally speaking, if one is using an NFA to compile a DFA, then the + /// extra time used to shrink the NFA will be more than made up for during + /// DFA construction (potentially by a lot). In other words, enabling this + /// can substantially decrease the overall amount of time it takes to build + /// a DFA. + /// + /// A reason to keep this disabled is if you want to compile an NFA and + /// start using it as quickly as possible without needing to build a DFA, + /// and you don't mind using a bit of extra memory for the NFA. e.g., for + /// an NFA simulation or for a lazy DFA. + /// + /// NFA shrinking is currently most useful when compiling a reverse + /// NFA with large Unicode character classes. In particular, it trades + /// additional CPU time during NFA compilation in favor of generating fewer + /// NFA states. + /// + /// This is disabled by default because it can increase compile times + /// quite a bit if you aren't building a full DFA. + /// + /// # Example + /// + /// This example shows that NFA shrinking can lead to substantial space + /// savings in some cases. Notice that, as noted above, we build a reverse + /// DFA and use a pattern with a large Unicode character class. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::{NFA, WhichCaptures}; + /// + /// // Currently we have to disable captures when enabling reverse NFA. + /// let config = NFA::config() + /// .which_captures(WhichCaptures::None) + /// .reverse(true); + /// let not_shrunk = NFA::compiler() + /// .configure(config.clone().shrink(false)) + /// .build(r"\w")?; + /// let shrunk = NFA::compiler() + /// .configure(config.clone().shrink(true)) + /// .build(r"\w")?; + /// + /// // While a specific shrink factor is not guaranteed, the savings can be + /// // considerable in some cases. + /// assert!(shrunk.states().len() * 2 < not_shrunk.states().len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn shrink(mut self, yes: bool) -> Config { + self.shrink = Some(yes); + self + } + + /// Whether to include 'Capture' states in the NFA. + /// + /// Currently, enabling this setting requires disabling the + /// [`reverse`](Config::reverse) setting. If both are enabled, then the + /// compiler will return an error. It is expected that this limitation will + /// be lifted in the future. + /// + /// This is enabled by default. + /// + /// # Example + /// + /// This example demonstrates that some regex engines, like the Pike VM, + /// require capturing states to be present in the NFA to report match + /// offsets. + /// + /// (Note that since this method is deprecated, the example below uses + /// [`Config::which_captures`] to disable capture states.) + /// + /// ``` + /// use regex_automata::nfa::thompson::{ + /// pikevm::PikeVM, + /// NFA, + /// WhichCaptures, + /// }; + /// + /// let re = PikeVM::builder() + /// .thompson(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "abc")); + /// assert_eq!(None, re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[deprecated(since = "0.3.5", note = "use which_captures instead")] + pub fn captures(self, yes: bool) -> Config { + self.which_captures(if yes { + WhichCaptures::All + } else { + WhichCaptures::None + }) + } + + /// Configures what kinds of capture groups are compiled into + /// [`State::Capture`](crate::nfa::thompson::State::Capture) states in a + /// Thompson NFA. + /// + /// Currently, using any option except for [`WhichCaptures::None`] requires + /// disabling the [`reverse`](Config::reverse) setting. If both are + /// enabled, then the compiler will return an error. It is expected that + /// this limitation will be lifted in the future. + /// + /// This is set to [`WhichCaptures::All`] by default. Callers may wish to + /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the + /// overhead of capture states for explicit groups. Usually this occurs + /// when one wants to use the `PikeVM` only for determining the overall + /// match. Otherwise, the `PikeVM` could use much more memory than is + /// necessary. + /// + /// # Example + /// + /// This example demonstrates that some regex engines, like the Pike VM, + /// require capturing states to be present in the NFA to report match + /// offsets. + /// + /// ``` + /// use regex_automata::nfa::thompson::{ + /// pikevm::PikeVM, + /// NFA, + /// WhichCaptures, + /// }; + /// + /// let re = PikeVM::builder() + /// .thompson(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "abc")); + /// assert_eq!(None, re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// The same applies to the bounded backtracker: + /// + /// ``` + /// use regex_automata::nfa::thompson::{ + /// backtrack::BoundedBacktracker, + /// NFA, + /// WhichCaptures, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .thompson(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.try_is_match(&mut cache, "abc")?); + /// assert_eq!(None, re.try_find(&mut cache, "abc")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config { + self.which_captures = Some(which_captures); + self + } + + /// Sets the look-around matcher that should be used with this NFA. + /// + /// A look-around matcher determines how to match look-around assertions. + /// In particular, some assertions are configurable. For example, the + /// `(?m:^)` and `(?m:$)` assertions can have their line terminator changed + /// from the default of `\n` to any other byte. + /// + /// # Example + /// + /// This shows how to change the line terminator for multi-line assertions. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// util::look::LookMatcher, + /// Match, Input, + /// }; + /// + /// let mut lookm = LookMatcher::new(); + /// lookm.set_line_terminator(b'\x00'); + /// + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().look_matcher(lookm)) + /// .build(r"(?m)^[a-z]+$")?; + /// let mut cache = re.create_cache(); + /// + /// // Multi-line assertions now use NUL as a terminator. + /// assert_eq!( + /// Some(Match::must(0, 1..4)), + /// re.find(&mut cache, b"\x00abc\x00"), + /// ); + /// // ... and \n is no longer recognized as a terminator. + /// assert_eq!( + /// None, + /// re.find(&mut cache, b"\nabc\n"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn look_matcher(mut self, m: LookMatcher) -> Config { + self.look_matcher = Some(m); + self + } + + /// Whether to compile an unanchored prefix into this NFA. + /// + /// This is enabled by default. It is made available for tests only to make + /// it easier to unit test the output of the compiler. + #[cfg(test)] + fn unanchored_prefix(mut self, yes: bool) -> Config { + self.unanchored_prefix = Some(yes); + self + } + + /// Returns whether this configuration has enabled UTF-8 mode. + pub fn get_utf8(&self) -> bool { + self.utf8.unwrap_or(true) + } + + /// Returns whether this configuration has enabled reverse NFA compilation. + pub fn get_reverse(&self) -> bool { + self.reverse.unwrap_or(false) + } + + /// Return the configured NFA size limit, if it exists, in the number of + /// bytes of heap used. + pub fn get_nfa_size_limit(&self) -> Option { + self.nfa_size_limit.unwrap_or(None) + } + + /// Return whether NFA shrinking is enabled. + pub fn get_shrink(&self) -> bool { + self.shrink.unwrap_or(false) + } + + /// Return whether NFA compilation is configured to produce capture states. + #[deprecated(since = "0.3.5", note = "use get_which_captures instead")] + pub fn get_captures(&self) -> bool { + self.get_which_captures().is_any() + } + + /// Return what kinds of capture states will be compiled into an NFA. + pub fn get_which_captures(&self) -> WhichCaptures { + self.which_captures.unwrap_or(WhichCaptures::All) + } + + /// Return the look-around matcher for this NFA. + pub fn get_look_matcher(&self) -> LookMatcher { + self.look_matcher.clone().unwrap_or(LookMatcher::default()) + } + + /// Return whether NFA compilation is configured to include an unanchored + /// prefix. + /// + /// This is always false when not in test mode. + fn get_unanchored_prefix(&self) -> bool { + #[cfg(test)] + { + self.unanchored_prefix.unwrap_or(true) + } + #[cfg(not(test))] + { + true + } + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + utf8: o.utf8.or(self.utf8), + reverse: o.reverse.or(self.reverse), + nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit), + shrink: o.shrink.or(self.shrink), + which_captures: o.which_captures.or(self.which_captures), + look_matcher: o.look_matcher.or_else(|| self.look_matcher.clone()), + #[cfg(test)] + unanchored_prefix: o.unanchored_prefix.or(self.unanchored_prefix), + } + } +} + +/// A configuration indicating which kinds of +/// [`State::Capture`](crate::nfa::thompson::State::Capture) states to include. +/// +/// This configuration can be used with [`Config::which_captures`] to control +/// which capture states are compiled into a Thompson NFA. +/// +/// The default configuration is [`WhichCaptures::All`]. +#[derive(Clone, Copy, Debug)] +pub enum WhichCaptures { + /// All capture states, including those corresponding to both implicit and + /// explicit capture groups, are included in the Thompson NFA. + All, + /// Only capture states corresponding to implicit capture groups are + /// included. Implicit capture groups appear in every pattern implicitly + /// and correspond to the overall match of a pattern. + /// + /// This is useful when one only cares about the overall match of a + /// pattern. By excluding capture states from explicit capture groups, + /// one might be able to reduce the memory usage of a multi-pattern regex + /// substantially if it was otherwise written to have many explicit capture + /// groups. + Implicit, + /// No capture states are compiled into the Thompson NFA. + /// + /// This is useful when capture states are either not needed (for example, + /// if one is only trying to build a DFA) or if they aren't supported (for + /// example, a reverse NFA). + None, +} + +impl Default for WhichCaptures { + fn default() -> WhichCaptures { + WhichCaptures::All + } +} + +impl WhichCaptures { + /// Returns true if this configuration indicates that no capture states + /// should be produced in an NFA. + pub fn is_none(&self) -> bool { + matches!(*self, WhichCaptures::None) + } + + /// Returns true if this configuration indicates that some capture states + /// should be added to an NFA. Note that this might only include capture + /// states for implicit capture groups. + pub fn is_any(&self) -> bool { + !self.is_none() + } +} + +/* +This compiler below uses Thompson's construction algorithm. The compiler takes +a regex-syntax::Hir as input and emits an NFA graph as output. The NFA graph +is structured in a way that permits it to be executed by a virtual machine and +also used to efficiently build a DFA. + +The compiler deals with a slightly expanded set of NFA states than what is +in a final NFA (as exhibited by builder::State and nfa::State). Notably a +compiler state includes an empty node that has exactly one unconditional +epsilon transition to the next state. In other words, it's a "goto" instruction +if one views Thompson's NFA as a set of bytecode instructions. These goto +instructions are removed in a subsequent phase before returning the NFA to the +caller. The purpose of these empty nodes is that they make the construction +algorithm substantially simpler to implement. We remove them before returning +to the caller because they can represent substantial overhead when traversing +the NFA graph (either while searching using the NFA directly or while building +a DFA). + +In the future, it would be nice to provide a Glushkov compiler as well, as it +would work well as a bit-parallel NFA for smaller regexes. But the Thompson +construction is one I'm more familiar with and seems more straight-forward to +deal with when it comes to large Unicode character classes. + +Internally, the compiler uses interior mutability to improve composition in the +face of the borrow checker. In particular, we'd really like to be able to write +things like this: + + self.c_concat(exprs.iter().map(|e| self.c(e))) + +Which elegantly uses iterators to build up a sequence of compiled regex +sub-expressions and then hands it off to the concatenating compiler routine. +Without interior mutability, the borrow checker won't let us borrow `self` +mutably both inside and outside the closure at the same time. +*/ + +/// A builder for compiling an NFA from a regex's high-level intermediate +/// representation (HIR). +/// +/// This compiler provides a way to translate a parsed regex pattern into an +/// NFA state graph. The NFA state graph can either be used directly to execute +/// a search (e.g., with a Pike VM), or it can be further used to build a DFA. +/// +/// This compiler provides APIs both for compiling regex patterns directly from +/// their concrete syntax, or via a [`regex_syntax::hir::Hir`]. +/// +/// This compiler has various options that may be configured via +/// [`thompson::Config`](Config). +/// +/// Note that a compiler is not the same as a [`thompson::Builder`](Builder). +/// A `Builder` provides a lower level API that is uncoupled from a regex +/// pattern's concrete syntax or even its HIR. Instead, it permits stitching +/// together an NFA by hand. See its docs for examples. +/// +/// # Example: compilation from concrete syntax +/// +/// This shows how to compile an NFA from a pattern string while setting a size +/// limit on how big the NFA is allowed to be (in terms of bytes of heap used). +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{NFA, pikevm::PikeVM}, +/// Match, +/// }; +/// +/// let config = NFA::config().nfa_size_limit(Some(1_000)); +/// let nfa = NFA::compiler().configure(config).build(r"(?-u)\w")?; +/// +/// let re = PikeVM::new_from_nfa(nfa)?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// let expected = Some(Match::must(0, 3..4)); +/// re.captures(&mut cache, "!@#A#@!", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: compilation from HIR +/// +/// This shows how to hand assemble a regular expression via its HIR, and then +/// compile an NFA directly from it. +/// +/// ``` +/// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; +/// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; +/// +/// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ +/// ClassBytesRange::new(b'0', b'9'), +/// ClassBytesRange::new(b'A', b'Z'), +/// ClassBytesRange::new(b'_', b'_'), +/// ClassBytesRange::new(b'a', b'z'), +/// ]))); +/// +/// let config = NFA::config().nfa_size_limit(Some(1_000)); +/// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; +/// +/// let re = PikeVM::new_from_nfa(nfa)?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// let expected = Some(Match::must(0, 3..4)); +/// re.captures(&mut cache, "!@#A#@!", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Compiler { + /// A regex parser, used when compiling an NFA directly from a pattern + /// string. + parser: ParserBuilder, + /// The compiler configuration. + config: Config, + /// The builder for actually constructing an NFA. This provides a + /// convenient abstraction for writing a compiler. + builder: RefCell, + /// State used for compiling character classes to UTF-8 byte automata. + /// State is not retained between character class compilations. This just + /// serves to amortize allocation to the extent possible. + utf8_state: RefCell, + /// State used for arranging character classes in reverse into a trie. + trie_state: RefCell, + /// State used for caching common suffixes when compiling reverse UTF-8 + /// automata (for Unicode character classes). + utf8_suffix: RefCell, +} + +impl Compiler { + /// Create a new NFA builder with its default configuration. + pub fn new() -> Compiler { + Compiler { + parser: ParserBuilder::new(), + config: Config::default(), + builder: RefCell::new(Builder::new()), + utf8_state: RefCell::new(Utf8State::new()), + trie_state: RefCell::new(RangeTrie::new()), + utf8_suffix: RefCell::new(Utf8SuffixMap::new(1000)), + } + } + + /// Compile the given regular expression pattern into an NFA. + /// + /// If there was a problem parsing the regex, then that error is returned. + /// + /// Otherwise, if there was a problem building the NFA, then an error is + /// returned. The only error that can occur is if the compiled regex would + /// exceed the size limits configured on this builder, or if any part of + /// the NFA would exceed the integer representations used. (For example, + /// too many states might plausibly occur on a 16-bit target.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build(r"(?-u)\w")?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(0, 3..4)); + /// re.captures(&mut cache, "!@#A#@!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Compile the given regular expression patterns into a single NFA. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_many(&[ + /// r"(?-u)\s", + /// r"(?-u)\w", + /// ])?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(1, 1..2)); + /// re.captures(&mut cache, "!A! !A!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let mut hirs = vec![]; + for p in patterns { + hirs.push( + self.parser + .build() + .parse(p.as_ref()) + .map_err(BuildError::syntax)?, + ); + debug!("parsed: {:?}", p.as_ref()); + } + self.build_many_from_hir(&hirs) + } + + /// Compile the given high level intermediate representation of a regular + /// expression into an NFA. + /// + /// If there was a problem building the NFA, then an error is returned. The + /// only error that can occur is if the compiled regex would exceed the + /// size limits configured on this builder, or if any part of the NFA would + /// exceed the integer representations used. (For example, too many states + /// might plausibly occur on a 16-bit target.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(0, 3..4)); + /// re.captures(&mut cache, "!@#A#@!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_hir(&self, expr: &Hir) -> Result { + self.build_many_from_hir(&[expr]) + } + + /// Compile the given high level intermediate representations of regular + /// expressions into a single NFA. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hirs = &[ + /// Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'\t', b'\r'), + /// ClassBytesRange::new(b' ', b' '), + /// ]))), + /// Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))), + /// ]; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_many_from_hir(hirs)?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(1, 1..2)); + /// re.captures(&mut cache, "!A! !A!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_many_from_hir>( + &self, + exprs: &[H], + ) -> Result { + self.compile(exprs) + } + + /// Apply the given NFA configuration options to this builder. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build(r"(?-u)\w")?; + /// assert_eq!(nfa.pattern_len(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn configure(&mut self, config: Config) -> &mut Compiler { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// This syntax configuration only applies when an NFA is built directly + /// from a pattern string. If an NFA is built from an HIR, then all syntax + /// settings are ignored. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::syntax}; + /// + /// let syntax_config = syntax::Config::new().unicode(false); + /// let nfa = NFA::compiler().syntax(syntax_config).build(r"\w")?; + /// // If Unicode were enabled, the number of states would be much bigger. + /// assert!(nfa.states().len() < 15); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Compiler { + config.apply(&mut self.parser); + self + } +} + +impl Compiler { + /// Compile the sequence of HIR expressions given. Pattern IDs are + /// allocated starting from 0, in correspondence with the slice given. + /// + /// It is legal to provide an empty slice. In that case, the NFA returned + /// has no patterns and will never match anything. + fn compile>(&self, exprs: &[H]) -> Result { + if exprs.len() > PatternID::LIMIT { + return Err(BuildError::too_many_patterns(exprs.len())); + } + if self.config.get_reverse() + && self.config.get_which_captures().is_any() + { + return Err(BuildError::unsupported_captures()); + } + + self.builder.borrow_mut().clear(); + self.builder.borrow_mut().set_utf8(self.config.get_utf8()); + self.builder.borrow_mut().set_reverse(self.config.get_reverse()); + self.builder + .borrow_mut() + .set_look_matcher(self.config.get_look_matcher()); + self.builder + .borrow_mut() + .set_size_limit(self.config.get_nfa_size_limit())?; + + // We always add an unanchored prefix unless we were specifically told + // not to (for tests only), or if we know that the regex is anchored + // for all matches. When an unanchored prefix is not added, then the + // NFA's anchored and unanchored start states are equivalent. + let all_anchored = exprs.iter().all(|e| { + e.borrow() + .properties() + .look_set_prefix() + .contains(hir::Look::Start) + }); + let anchored = !self.config.get_unanchored_prefix() || all_anchored; + let unanchored_prefix = if anchored { + self.c_empty()? + } else { + self.c_at_least(&Hir::dot(hir::Dot::AnyByte), false, 0)? + }; + + let compiled = self.c_alt_iter(exprs.iter().map(|e| { + let _ = self.start_pattern()?; + let one = self.c_cap(0, None, e.borrow())?; + let match_state_id = self.add_match()?; + self.patch(one.end, match_state_id)?; + let _ = self.finish_pattern(one.start)?; + Ok(ThompsonRef { start: one.start, end: match_state_id }) + }))?; + self.patch(unanchored_prefix.end, compiled.start)?; + let nfa = self + .builder + .borrow_mut() + .build(compiled.start, unanchored_prefix.start)?; + + debug!("HIR-to-NFA compilation complete, config: {:?}", self.config); + Ok(nfa) + } + + /// Compile an arbitrary HIR expression. + fn c(&self, expr: &Hir) -> Result { + use regex_syntax::hir::{Class, HirKind::*}; + + match *expr.kind() { + Empty => self.c_empty(), + Literal(hir::Literal(ref bytes)) => self.c_literal(bytes), + Class(Class::Bytes(ref c)) => self.c_byte_class(c), + Class(Class::Unicode(ref c)) => self.c_unicode_class(c), + Look(ref look) => self.c_look(look), + Repetition(ref rep) => self.c_repetition(rep), + Capture(ref c) => self.c_cap(c.index, c.name.as_deref(), &c.sub), + Concat(ref es) => self.c_concat(es.iter().map(|e| self.c(e))), + Alternation(ref es) => self.c_alt_slice(es), + } + } + + /// Compile a concatenation of the sub-expressions yielded by the given + /// iterator. If the iterator yields no elements, then this compiles down + /// to an "empty" state that always matches. + /// + /// If the compiler is in reverse mode, then the expressions given are + /// automatically compiled in reverse. + fn c_concat(&self, mut it: I) -> Result + where + I: DoubleEndedIterator>, + { + let first = if self.is_reverse() { it.next_back() } else { it.next() }; + let ThompsonRef { start, mut end } = match first { + Some(result) => result?, + None => return self.c_empty(), + }; + loop { + let next = + if self.is_reverse() { it.next_back() } else { it.next() }; + let compiled = match next { + Some(result) => result?, + None => break, + }; + self.patch(end, compiled.start)?; + end = compiled.end; + } + Ok(ThompsonRef { start, end }) + } + + /// Compile an alternation of the given HIR values. + /// + /// This is like 'c_alt_iter', but it accepts a slice of HIR values instead + /// of an iterator of compiled NFA subgraphs. The point of accepting a + /// slice here is that it opens up some optimization opportunities. For + /// example, if all of the HIR values are literals, then this routine might + /// re-shuffle them to make NFA epsilon closures substantially faster. + fn c_alt_slice(&self, exprs: &[Hir]) -> Result { + // self.c_alt_iter(exprs.iter().map(|e| self.c(e))) + let literal_count = exprs + .iter() + .filter(|e| { + matches!(*e.kind(), hir::HirKind::Literal(hir::Literal(_))) + }) + .count(); + if literal_count <= 1 || literal_count < exprs.len() { + return self.c_alt_iter(exprs.iter().map(|e| self.c(e))); + } + + let mut trie = if self.is_reverse() { + LiteralTrie::reverse() + } else { + LiteralTrie::forward() + }; + for expr in exprs.iter() { + let literal = match *expr.kind() { + hir::HirKind::Literal(hir::Literal(ref bytes)) => bytes, + _ => unreachable!(), + }; + trie.add(literal)?; + } + trie.compile(&mut self.builder.borrow_mut()) + } + + /// Compile an alternation, where each element yielded by the given + /// iterator represents an item in the alternation. If the iterator yields + /// no elements, then this compiles down to a "fail" state. + /// + /// In an alternation, expressions appearing earlier are "preferred" at + /// match time over expressions appearing later. At least, this is true + /// when using "leftmost first" match semantics. (If "leftmost longest" are + /// ever added in the future, then this preference order of priority would + /// not apply in that mode.) + fn c_alt_iter(&self, mut it: I) -> Result + where + I: Iterator>, + { + let first = match it.next() { + None => return self.c_fail(), + Some(result) => result?, + }; + let second = match it.next() { + None => return Ok(first), + Some(result) => result?, + }; + + let union = self.add_union()?; + let end = self.add_empty()?; + self.patch(union, first.start)?; + self.patch(first.end, end)?; + self.patch(union, second.start)?; + self.patch(second.end, end)?; + for result in it { + let compiled = result?; + self.patch(union, compiled.start)?; + self.patch(compiled.end, end)?; + } + Ok(ThompsonRef { start: union, end }) + } + + /// Compile the given capture sub-expression. `expr` should be the + /// sub-expression contained inside the capture. If "capture" states are + /// enabled, then they are added as appropriate. + /// + /// This accepts the pieces of a capture instead of a `hir::Capture` so + /// that it's easy to manufacture a "fake" group when necessary, e.g., for + /// adding the entire pattern as if it were a group in order to create + /// appropriate "capture" states in the NFA. + fn c_cap( + &self, + index: u32, + name: Option<&str>, + expr: &Hir, + ) -> Result { + match self.config.get_which_captures() { + // No capture states means we always skip them. + WhichCaptures::None => return self.c(expr), + // Implicit captures states means we only add when index==0 since + // index==0 implies the group is implicit. + WhichCaptures::Implicit if index > 0 => return self.c(expr), + _ => {} + } + + let start = self.add_capture_start(index, name)?; + let inner = self.c(expr)?; + let end = self.add_capture_end(index)?; + self.patch(start, inner.start)?; + self.patch(inner.end, end)?; + Ok(ThompsonRef { start, end }) + } + + /// Compile the given repetition expression. This handles all types of + /// repetitions and greediness. + fn c_repetition( + &self, + rep: &hir::Repetition, + ) -> Result { + match (rep.min, rep.max) { + (0, Some(1)) => self.c_zero_or_one(&rep.sub, rep.greedy), + (min, None) => self.c_at_least(&rep.sub, rep.greedy, min), + (min, Some(max)) if min == max => self.c_exactly(&rep.sub, min), + (min, Some(max)) => self.c_bounded(&rep.sub, rep.greedy, min, max), + } + } + + /// Compile the given expression such that it matches at least `min` times, + /// but no more than `max` times. + /// + /// When `greedy` is true, then the preference is for the expression to + /// match as much as possible. Otherwise, it will match as little as + /// possible. + fn c_bounded( + &self, + expr: &Hir, + greedy: bool, + min: u32, + max: u32, + ) -> Result { + let prefix = self.c_exactly(expr, min)?; + if min == max { + return Ok(prefix); + } + + // It is tempting here to compile the rest here as a concatenation + // of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it + // were `aaa?a?a?`. The problem here is that it leads to this program: + // + // >000000: 61 => 01 + // 000001: 61 => 02 + // 000002: union(03, 04) + // 000003: 61 => 04 + // 000004: union(05, 06) + // 000005: 61 => 06 + // 000006: union(07, 08) + // 000007: 61 => 08 + // 000008: MATCH + // + // And effectively, once you hit state 2, the epsilon closure will + // include states 3, 5, 6, 7 and 8, which is quite a bit. It is better + // to instead compile it like so: + // + // >000000: 61 => 01 + // 000001: 61 => 02 + // 000002: union(03, 08) + // 000003: 61 => 04 + // 000004: union(05, 08) + // 000005: 61 => 06 + // 000006: union(07, 08) + // 000007: 61 => 08 + // 000008: MATCH + // + // So that the epsilon closure of state 2 is now just 3 and 8. + let empty = self.add_empty()?; + let mut prev_end = prefix.end; + for _ in min..max { + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + let compiled = self.c(expr)?; + self.patch(prev_end, union)?; + self.patch(union, compiled.start)?; + self.patch(union, empty)?; + prev_end = compiled.end; + } + self.patch(prev_end, empty)?; + Ok(ThompsonRef { start: prefix.start, end: empty }) + } + + /// Compile the given expression such that it may be matched `n` or more + /// times, where `n` can be any integer. (Although a particularly large + /// integer is likely to run afoul of any configured size limits.) + /// + /// When `greedy` is true, then the preference is for the expression to + /// match as much as possible. Otherwise, it will match as little as + /// possible. + fn c_at_least( + &self, + expr: &Hir, + greedy: bool, + n: u32, + ) -> Result { + if n == 0 { + // When the expression cannot match the empty string, then we + // can get away with something much simpler: just one 'alt' + // instruction that optionally repeats itself. But if the expr + // can match the empty string... see below. + if expr.properties().minimum_len().map_or(false, |len| len > 0) { + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + let compiled = self.c(expr)?; + self.patch(union, compiled.start)?; + self.patch(compiled.end, union)?; + return Ok(ThompsonRef { start: union, end: union }); + } + + // What's going on here? Shouldn't x* be simpler than this? It + // turns out that when implementing leftmost-first (Perl-like) + // match semantics, x* results in an incorrect preference order + // when computing the transitive closure of states if and only if + // 'x' can match the empty string. So instead, we compile x* as + // (x+)?, which preserves the correct preference order. + // + // See: https://github.com/rust-lang/regex/issues/779 + let compiled = self.c(expr)?; + let plus = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + self.patch(compiled.end, plus)?; + self.patch(plus, compiled.start)?; + + let question = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + let empty = self.add_empty()?; + self.patch(question, compiled.start)?; + self.patch(question, empty)?; + self.patch(plus, empty)?; + Ok(ThompsonRef { start: question, end: empty }) + } else if n == 1 { + let compiled = self.c(expr)?; + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + self.patch(compiled.end, union)?; + self.patch(union, compiled.start)?; + Ok(ThompsonRef { start: compiled.start, end: union }) + } else { + let prefix = self.c_exactly(expr, n - 1)?; + let last = self.c(expr)?; + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + self.patch(prefix.end, last.start)?; + self.patch(last.end, union)?; + self.patch(union, last.start)?; + Ok(ThompsonRef { start: prefix.start, end: union }) + } + } + + /// Compile the given expression such that it may be matched zero or one + /// times. + /// + /// When `greedy` is true, then the preference is for the expression to + /// match as much as possible. Otherwise, it will match as little as + /// possible. + fn c_zero_or_one( + &self, + expr: &Hir, + greedy: bool, + ) -> Result { + let union = + if greedy { self.add_union() } else { self.add_union_reverse() }?; + let compiled = self.c(expr)?; + let empty = self.add_empty()?; + self.patch(union, compiled.start)?; + self.patch(union, empty)?; + self.patch(compiled.end, empty)?; + Ok(ThompsonRef { start: union, end: empty }) + } + + /// Compile the given HIR expression exactly `n` times. + fn c_exactly( + &self, + expr: &Hir, + n: u32, + ) -> Result { + let it = (0..n).map(|_| self.c(expr)); + self.c_concat(it) + } + + /// Compile the given byte oriented character class. + /// + /// This uses "sparse" states to represent an alternation between ranges in + /// this character class. We can use "sparse" states instead of stitching + /// together a "union" state because all ranges in a character class have + /// equal priority *and* are non-overlapping (thus, only one can match, so + /// there's never a question of priority in the first place). This saves a + /// fair bit of overhead when traversing an NFA. + /// + /// This routine compiles an empty character class into a "fail" state. + fn c_byte_class( + &self, + cls: &hir::ClassBytes, + ) -> Result { + let end = self.add_empty()?; + let mut trans = Vec::with_capacity(cls.ranges().len()); + for r in cls.iter() { + trans.push(Transition { + start: r.start(), + end: r.end(), + next: end, + }); + } + Ok(ThompsonRef { start: self.add_sparse(trans)?, end }) + } + + /// Compile the given Unicode character class. + /// + /// This routine specifically tries to use various types of compression, + /// since UTF-8 automata of large classes can get quite large. The specific + /// type of compression used depends on forward vs reverse compilation, and + /// whether NFA shrinking is enabled or not. + /// + /// Aside from repetitions causing lots of repeat group, this is like the + /// single most expensive part of regex compilation. Therefore, a large part + /// of the expense of compilation may be reduce by disabling Unicode in the + /// pattern. + /// + /// This routine compiles an empty character class into a "fail" state. + fn c_unicode_class( + &self, + cls: &hir::ClassUnicode, + ) -> Result { + // If all we have are ASCII ranges wrapped in a Unicode package, then + // there is zero reason to bring out the big guns. We can fit all ASCII + // ranges within a single sparse state. + if cls.is_ascii() { + let end = self.add_empty()?; + let mut trans = Vec::with_capacity(cls.ranges().len()); + for r in cls.iter() { + // The unwraps below are OK because we've verified that this + // class only contains ASCII codepoints. + trans.push(Transition { + // FIXME(1.59): use the 'TryFrom for u8' impl. + start: u8::try_from(u32::from(r.start())).unwrap(), + end: u8::try_from(u32::from(r.end())).unwrap(), + next: end, + }); + } + Ok(ThompsonRef { start: self.add_sparse(trans)?, end }) + } else if self.is_reverse() { + if !self.config.get_shrink() { + // When we don't want to spend the extra time shrinking, we + // compile the UTF-8 automaton in reverse using something like + // the "naive" approach, but will attempt to re-use common + // suffixes. + self.c_unicode_class_reverse_with_suffix(cls) + } else { + // When we want to shrink our NFA for reverse UTF-8 automata, + // we cannot feed UTF-8 sequences directly to the UTF-8 + // compiler, since the UTF-8 compiler requires all sequences + // to be lexicographically sorted. Instead, we organize our + // sequences into a range trie, which can then output our + // sequences in the correct order. Unfortunately, building the + // range trie is fairly expensive (but not nearly as expensive + // as building a DFA). Hence the reason why the 'shrink' option + // exists, so that this path can be toggled off. For example, + // we might want to turn this off if we know we won't be + // compiling a DFA. + let mut trie = self.trie_state.borrow_mut(); + trie.clear(); + + for rng in cls.iter() { + for mut seq in Utf8Sequences::new(rng.start(), rng.end()) { + seq.reverse(); + trie.insert(seq.as_slice()); + } + } + let mut builder = self.builder.borrow_mut(); + let mut utf8_state = self.utf8_state.borrow_mut(); + let mut utf8c = + Utf8Compiler::new(&mut *builder, &mut *utf8_state)?; + trie.iter(|seq| { + utf8c.add(&seq)?; + Ok(()) + })?; + utf8c.finish() + } + } else { + // In the forward direction, we always shrink our UTF-8 automata + // because we can stream it right into the UTF-8 compiler. There + // is almost no downside (in either memory or time) to using this + // approach. + let mut builder = self.builder.borrow_mut(); + let mut utf8_state = self.utf8_state.borrow_mut(); + let mut utf8c = + Utf8Compiler::new(&mut *builder, &mut *utf8_state)?; + for rng in cls.iter() { + for seq in Utf8Sequences::new(rng.start(), rng.end()) { + utf8c.add(seq.as_slice())?; + } + } + utf8c.finish() + } + + // For reference, the code below is the "naive" version of compiling a + // UTF-8 automaton. It is deliciously simple (and works for both the + // forward and reverse cases), but will unfortunately produce very + // large NFAs. When compiling a forward automaton, the size difference + // can sometimes be an order of magnitude. For example, the '\w' regex + // will generate about ~3000 NFA states using the naive approach below, + // but only 283 states when using the approach above. This is because + // the approach above actually compiles a *minimal* (or near minimal, + // because of the bounded hashmap for reusing equivalent states) UTF-8 + // automaton. + // + // The code below is kept as a reference point in order to make it + // easier to understand the higher level goal here. Although, it will + // almost certainly bit-rot, so keep that in mind. Also, if you try to + // use it, some of the tests in this module will fail because they look + // for terser byte code produce by the more optimized handling above. + // But the integration test suite should still pass. + // + // One good example of the substantial difference this can make is to + // compare and contrast performance of the Pike VM when the code below + // is active vs the code above. Here's an example to try: + // + // regex-cli find match pikevm -b -p '(?m)^\w{20}' non-ascii-file + // + // With Unicode classes generated below, this search takes about 45s on + // my machine. But with the compressed version above, the search takes + // only around 1.4s. The NFA is also 20% smaller. This is in part due + // to the compression, but also because of the utilization of 'sparse' + // NFA states. They lead to much less state shuffling during the NFA + // search. + /* + let it = cls + .iter() + .flat_map(|rng| Utf8Sequences::new(rng.start(), rng.end())) + .map(|seq| { + let it = seq + .as_slice() + .iter() + .map(|rng| self.c_range(rng.start, rng.end)); + self.c_concat(it) + }); + self.c_alt_iter(it) + */ + } + + /// Compile the given Unicode character class in reverse with suffix + /// caching. + /// + /// This is a "quick" way to compile large Unicode classes into reverse + /// UTF-8 automata while doing a small amount of compression on that + /// automata by reusing common suffixes. + /// + /// A more comprehensive compression scheme can be accomplished by using + /// a range trie to efficiently sort a reverse sequence of UTF-8 byte + /// rqanges, and then use Daciuk's algorithm via `Utf8Compiler`. + /// + /// This is the technique used when "NFA shrinking" is disabled. + /// + /// (This also tries to use "sparse" states where possible, just like + /// `c_byte_class` does.) + fn c_unicode_class_reverse_with_suffix( + &self, + cls: &hir::ClassUnicode, + ) -> Result { + // N.B. It would likely be better to cache common *prefixes* in the + // reverse direction, but it's not quite clear how to do that. The + // advantage of caching suffixes is that it does give us a win, and + // has a very small additional overhead. + let mut cache = self.utf8_suffix.borrow_mut(); + cache.clear(); + + let union = self.add_union()?; + let alt_end = self.add_empty()?; + for urng in cls.iter() { + for seq in Utf8Sequences::new(urng.start(), urng.end()) { + let mut end = alt_end; + for brng in seq.as_slice() { + let key = Utf8SuffixKey { + from: end, + start: brng.start, + end: brng.end, + }; + let hash = cache.hash(&key); + if let Some(id) = cache.get(&key, hash) { + end = id; + continue; + } + + let compiled = self.c_range(brng.start, brng.end)?; + self.patch(compiled.end, end)?; + end = compiled.start; + cache.set(key, hash, end); + } + self.patch(union, end)?; + } + } + Ok(ThompsonRef { start: union, end: alt_end }) + } + + /// Compile the given HIR look-around assertion to an NFA look-around + /// assertion. + fn c_look(&self, anchor: &hir::Look) -> Result { + let look = match *anchor { + hir::Look::Start => Look::Start, + hir::Look::End => Look::End, + hir::Look::StartLF => Look::StartLF, + hir::Look::EndLF => Look::EndLF, + hir::Look::StartCRLF => Look::StartCRLF, + hir::Look::EndCRLF => Look::EndCRLF, + hir::Look::WordAscii => Look::WordAscii, + hir::Look::WordAsciiNegate => Look::WordAsciiNegate, + hir::Look::WordUnicode => Look::WordUnicode, + hir::Look::WordUnicodeNegate => Look::WordUnicodeNegate, + hir::Look::WordStartAscii => Look::WordStartAscii, + hir::Look::WordEndAscii => Look::WordEndAscii, + hir::Look::WordStartUnicode => Look::WordStartUnicode, + hir::Look::WordEndUnicode => Look::WordEndUnicode, + hir::Look::WordStartHalfAscii => Look::WordStartHalfAscii, + hir::Look::WordEndHalfAscii => Look::WordEndHalfAscii, + hir::Look::WordStartHalfUnicode => Look::WordStartHalfUnicode, + hir::Look::WordEndHalfUnicode => Look::WordEndHalfUnicode, + }; + let id = self.add_look(look)?; + Ok(ThompsonRef { start: id, end: id }) + } + + /// Compile the given byte string to a concatenation of bytes. + fn c_literal(&self, bytes: &[u8]) -> Result { + self.c_concat(bytes.iter().copied().map(|b| self.c_range(b, b))) + } + + /// Compile a "range" state with one transition that may only be followed + /// if the input byte is in the (inclusive) range given. + /// + /// Both the `start` and `end` locations point to the state created. + /// Callers will likely want to keep the `start`, but patch the `end` to + /// point to some other state. + fn c_range(&self, start: u8, end: u8) -> Result { + let id = self.add_range(start, end)?; + Ok(ThompsonRef { start: id, end: id }) + } + + /// Compile an "empty" state with one unconditional epsilon transition. + /// + /// Both the `start` and `end` locations point to the state created. + /// Callers will likely want to keep the `start`, but patch the `end` to + /// point to some other state. + fn c_empty(&self) -> Result { + let id = self.add_empty()?; + Ok(ThompsonRef { start: id, end: id }) + } + + /// Compile a "fail" state that can never have any outgoing transitions. + fn c_fail(&self) -> Result { + let id = self.add_fail()?; + Ok(ThompsonRef { start: id, end: id }) + } + + // The below helpers are meant to be simple wrappers around the + // corresponding Builder methods. For the most part, they let us write + // 'self.add_foo()' instead of 'self.builder.borrow_mut().add_foo()', where + // the latter is a mouthful. Some of the methods do inject a little bit + // of extra logic. e.g., Flipping look-around operators when compiling in + // reverse mode. + + fn patch(&self, from: StateID, to: StateID) -> Result<(), BuildError> { + self.builder.borrow_mut().patch(from, to) + } + + fn start_pattern(&self) -> Result { + self.builder.borrow_mut().start_pattern() + } + + fn finish_pattern( + &self, + start_id: StateID, + ) -> Result { + self.builder.borrow_mut().finish_pattern(start_id) + } + + fn add_empty(&self) -> Result { + self.builder.borrow_mut().add_empty() + } + + fn add_range(&self, start: u8, end: u8) -> Result { + self.builder.borrow_mut().add_range(Transition { + start, + end, + next: StateID::ZERO, + }) + } + + fn add_sparse( + &self, + ranges: Vec, + ) -> Result { + self.builder.borrow_mut().add_sparse(ranges) + } + + fn add_look(&self, mut look: Look) -> Result { + if self.is_reverse() { + look = look.reversed(); + } + self.builder.borrow_mut().add_look(StateID::ZERO, look) + } + + fn add_union(&self) -> Result { + self.builder.borrow_mut().add_union(vec![]) + } + + fn add_union_reverse(&self) -> Result { + self.builder.borrow_mut().add_union_reverse(vec![]) + } + + fn add_capture_start( + &self, + capture_index: u32, + name: Option<&str>, + ) -> Result { + let name = name.map(|n| Arc::from(n)); + self.builder.borrow_mut().add_capture_start( + StateID::ZERO, + capture_index, + name, + ) + } + + fn add_capture_end( + &self, + capture_index: u32, + ) -> Result { + self.builder.borrow_mut().add_capture_end(StateID::ZERO, capture_index) + } + + fn add_fail(&self) -> Result { + self.builder.borrow_mut().add_fail() + } + + fn add_match(&self) -> Result { + self.builder.borrow_mut().add_match() + } + + fn is_reverse(&self) -> bool { + self.config.get_reverse() + } +} + +/// A value that represents the result of compiling a sub-expression of a +/// regex's HIR. Specifically, this represents a sub-graph of the NFA that +/// has an initial state at `start` and a final state at `end`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct ThompsonRef { + pub(crate) start: StateID, + pub(crate) end: StateID, +} + +/// A UTF-8 compiler based on Daciuk's algorithm for compilining minimal DFAs +/// from a lexicographically sorted sequence of strings in linear time. +/// +/// The trick here is that any Unicode codepoint range can be converted to +/// a sequence of byte ranges that form a UTF-8 automaton. Connecting them +/// together via an alternation is trivial, and indeed, it works. However, +/// there is a lot of redundant structure in many UTF-8 automatons. Since our +/// UTF-8 ranges are in lexicographic order, we can use Daciuk's algorithm +/// to build nearly minimal DFAs in linear time. (They are guaranteed to be +/// minimal because we use a bounded cache of previously build DFA states.) +/// +/// The drawback is that this sadly doesn't work for reverse automata, since +/// the ranges are no longer in lexicographic order. For that, we invented the +/// range trie (which gets its own module). Once a range trie is built, we then +/// use this same Utf8Compiler to build a reverse UTF-8 automaton. +/// +/// The high level idea is described here: +/// https://blog.burntsushi.net/transducers/#finite-state-machines-as-data-structures +/// +/// There is also another implementation of this in the `fst` crate. +#[derive(Debug)] +struct Utf8Compiler<'a> { + builder: &'a mut Builder, + state: &'a mut Utf8State, + target: StateID, +} + +#[derive(Clone, Debug)] +struct Utf8State { + compiled: Utf8BoundedMap, + uncompiled: Vec, +} + +#[derive(Clone, Debug)] +struct Utf8Node { + trans: Vec, + last: Option, +} + +#[derive(Clone, Debug)] +struct Utf8LastTransition { + start: u8, + end: u8, +} + +impl Utf8State { + fn new() -> Utf8State { + Utf8State { compiled: Utf8BoundedMap::new(10_000), uncompiled: vec![] } + } + + fn clear(&mut self) { + self.compiled.clear(); + self.uncompiled.clear(); + } +} + +impl<'a> Utf8Compiler<'a> { + fn new( + builder: &'a mut Builder, + state: &'a mut Utf8State, + ) -> Result, BuildError> { + let target = builder.add_empty()?; + state.clear(); + let mut utf8c = Utf8Compiler { builder, state, target }; + utf8c.add_empty(); + Ok(utf8c) + } + + fn finish(&mut self) -> Result { + self.compile_from(0)?; + let node = self.pop_root(); + let start = self.compile(node)?; + Ok(ThompsonRef { start, end: self.target }) + } + + fn add(&mut self, ranges: &[Utf8Range]) -> Result<(), BuildError> { + let prefix_len = ranges + .iter() + .zip(&self.state.uncompiled) + .take_while(|&(range, node)| { + node.last.as_ref().map_or(false, |t| { + (t.start, t.end) == (range.start, range.end) + }) + }) + .count(); + assert!(prefix_len < ranges.len()); + self.compile_from(prefix_len)?; + self.add_suffix(&ranges[prefix_len..]); + Ok(()) + } + + fn compile_from(&mut self, from: usize) -> Result<(), BuildError> { + let mut next = self.target; + while from + 1 < self.state.uncompiled.len() { + let node = self.pop_freeze(next); + next = self.compile(node)?; + } + self.top_last_freeze(next); + Ok(()) + } + + fn compile( + &mut self, + node: Vec, + ) -> Result { + let hash = self.state.compiled.hash(&node); + if let Some(id) = self.state.compiled.get(&node, hash) { + return Ok(id); + } + let id = self.builder.add_sparse(node.clone())?; + self.state.compiled.set(node, hash, id); + Ok(id) + } + + fn add_suffix(&mut self, ranges: &[Utf8Range]) { + assert!(!ranges.is_empty()); + let last = self + .state + .uncompiled + .len() + .checked_sub(1) + .expect("non-empty nodes"); + assert!(self.state.uncompiled[last].last.is_none()); + self.state.uncompiled[last].last = Some(Utf8LastTransition { + start: ranges[0].start, + end: ranges[0].end, + }); + for r in &ranges[1..] { + self.state.uncompiled.push(Utf8Node { + trans: vec![], + last: Some(Utf8LastTransition { start: r.start, end: r.end }), + }); + } + } + + fn add_empty(&mut self) { + self.state.uncompiled.push(Utf8Node { trans: vec![], last: None }); + } + + fn pop_freeze(&mut self, next: StateID) -> Vec { + let mut uncompiled = self.state.uncompiled.pop().unwrap(); + uncompiled.set_last_transition(next); + uncompiled.trans + } + + fn pop_root(&mut self) -> Vec { + assert_eq!(self.state.uncompiled.len(), 1); + assert!(self.state.uncompiled[0].last.is_none()); + self.state.uncompiled.pop().expect("non-empty nodes").trans + } + + fn top_last_freeze(&mut self, next: StateID) { + let last = self + .state + .uncompiled + .len() + .checked_sub(1) + .expect("non-empty nodes"); + self.state.uncompiled[last].set_last_transition(next); + } +} + +impl Utf8Node { + fn set_last_transition(&mut self, next: StateID) { + if let Some(last) = self.last.take() { + self.trans.push(Transition { + start: last.start, + end: last.end, + next, + }); + } + } +} + +#[cfg(test)] +mod tests { + use alloc::{vec, vec::Vec}; + + use crate::{ + nfa::thompson::{SparseTransitions, State, Transition, NFA}, + util::primitives::{PatternID, SmallIndex, StateID}, + }; + + use super::*; + + fn build(pattern: &str) -> NFA { + NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false), + ) + .build(pattern) + .unwrap() + } + + fn pid(id: usize) -> PatternID { + PatternID::new(id).unwrap() + } + + fn sid(id: usize) -> StateID { + StateID::new(id).unwrap() + } + + fn s_byte(byte: u8, next: usize) -> State { + let next = sid(next); + let trans = Transition { start: byte, end: byte, next }; + State::ByteRange { trans } + } + + fn s_range(start: u8, end: u8, next: usize) -> State { + let next = sid(next); + let trans = Transition { start, end, next }; + State::ByteRange { trans } + } + + fn s_sparse(transitions: &[(u8, u8, usize)]) -> State { + let transitions = transitions + .iter() + .map(|&(start, end, next)| Transition { + start, + end, + next: sid(next), + }) + .collect(); + State::Sparse(SparseTransitions { transitions }) + } + + fn s_bin_union(alt1: usize, alt2: usize) -> State { + State::BinaryUnion { alt1: sid(alt1), alt2: sid(alt2) } + } + + fn s_union(alts: &[usize]) -> State { + State::Union { + alternates: alts + .iter() + .map(|&id| sid(id)) + .collect::>() + .into_boxed_slice(), + } + } + + fn s_cap(next: usize, pattern: usize, index: usize, slot: usize) -> State { + State::Capture { + next: sid(next), + pattern_id: pid(pattern), + group_index: SmallIndex::new(index).unwrap(), + slot: SmallIndex::new(slot).unwrap(), + } + } + + fn s_fail() -> State { + State::Fail + } + + fn s_match(id: usize) -> State { + State::Match { pattern_id: pid(id) } + } + + // Test that building an unanchored NFA has an appropriate `(?s:.)*?` + // prefix. + #[test] + fn compile_unanchored_prefix() { + let nfa = NFA::compiler() + .configure(NFA::config().which_captures(WhichCaptures::None)) + .build(r"a") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_bin_union(2, 1), + s_range(0, 255, 0), + s_byte(b'a', 3), + s_match(0), + ] + ); + } + + #[test] + fn compile_empty() { + assert_eq!(build("").states(), &[s_match(0),]); + } + + #[test] + fn compile_literal() { + assert_eq!(build("a").states(), &[s_byte(b'a', 1), s_match(0),]); + assert_eq!( + build("ab").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(0),] + ); + assert_eq!( + build("☃").states(), + &[s_byte(0xE2, 1), s_byte(0x98, 2), s_byte(0x83, 3), s_match(0)] + ); + + // Check that non-UTF-8 literals work. + let nfa = NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false), + ) + .syntax(crate::util::syntax::Config::new().utf8(false)) + .build(r"(?-u)\xFF") + .unwrap(); + assert_eq!(nfa.states(), &[s_byte(b'\xFF', 1), s_match(0),]); + } + + #[test] + fn compile_class_ascii() { + assert_eq!( + build(r"[a-z]").states(), + &[s_range(b'a', b'z', 1), s_match(0),] + ); + assert_eq!( + build(r"[x-za-c]").states(), + &[s_sparse(&[(b'a', b'c', 1), (b'x', b'z', 1)]), s_match(0)] + ); + } + + #[test] + #[cfg(not(miri))] + fn compile_class_unicode() { + assert_eq!( + build(r"[\u03B1-\u03B4]").states(), + &[s_range(0xB1, 0xB4, 2), s_byte(0xCE, 0), s_match(0)] + ); + assert_eq!( + build(r"[\u03B1-\u03B4\u{1F919}-\u{1F91E}]").states(), + &[ + s_range(0xB1, 0xB4, 5), + s_range(0x99, 0x9E, 5), + s_byte(0xA4, 1), + s_byte(0x9F, 2), + s_sparse(&[(0xCE, 0xCE, 0), (0xF0, 0xF0, 3)]), + s_match(0), + ] + ); + assert_eq!( + build(r"[a-z☃]").states(), + &[ + s_byte(0x83, 3), + s_byte(0x98, 0), + s_sparse(&[(b'a', b'z', 3), (0xE2, 0xE2, 1)]), + s_match(0), + ] + ); + } + + #[test] + fn compile_repetition() { + assert_eq!( + build(r"a?").states(), + &[s_bin_union(1, 2), s_byte(b'a', 2), s_match(0),] + ); + assert_eq!( + build(r"a??").states(), + &[s_bin_union(2, 1), s_byte(b'a', 2), s_match(0),] + ); + } + + #[test] + fn compile_group() { + assert_eq!( + build(r"ab+").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_bin_union(1, 3), s_match(0)] + ); + assert_eq!( + build(r"(ab)").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(0)] + ); + assert_eq!( + build(r"(ab)+").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_bin_union(0, 3), s_match(0)] + ); + } + + #[test] + fn compile_alternation() { + assert_eq!( + build(r"a|b").states(), + &[s_range(b'a', b'b', 1), s_match(0)] + ); + assert_eq!( + build(r"ab|cd").states(), + &[ + s_byte(b'b', 3), + s_byte(b'd', 3), + s_sparse(&[(b'a', b'a', 0), (b'c', b'c', 1)]), + s_match(0) + ], + ); + assert_eq!( + build(r"|b").states(), + &[s_byte(b'b', 2), s_bin_union(2, 0), s_match(0)] + ); + assert_eq!( + build(r"a|").states(), + &[s_byte(b'a', 2), s_bin_union(0, 2), s_match(0)] + ); + } + + // This tests the use of a non-binary union, i.e., a state with more than + // 2 unconditional epsilon transitions. The only place they tend to appear + // is in reverse NFAs when shrinking is disabled. Otherwise, 'binary-union' + // and 'sparse' tend to cover all other cases of alternation. + #[test] + fn compile_non_binary_union() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .reverse(true) + .shrink(false) + .unanchored_prefix(false), + ) + .build(r"[\u1000\u2000\u3000]") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_union(&[3, 6, 9]), + s_byte(0xE1, 10), + s_byte(0x80, 1), + s_byte(0x80, 2), + s_byte(0xE2, 10), + s_byte(0x80, 4), + s_byte(0x80, 5), + s_byte(0xE3, 10), + s_byte(0x80, 7), + s_byte(0x80, 8), + s_match(0), + ] + ); + } + + #[test] + fn compile_many_start_pattern() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false), + ) + .build_many(&["a", "b"]) + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_byte(b'a', 1), + s_match(0), + s_byte(b'b', 3), + s_match(1), + s_bin_union(0, 2), + ] + ); + assert_eq!(nfa.start_anchored().as_usize(), 4); + assert_eq!(nfa.start_unanchored().as_usize(), 4); + // Test that the start states for each individual pattern are correct. + assert_eq!(nfa.start_pattern(pid(0)).unwrap(), sid(0)); + assert_eq!(nfa.start_pattern(pid(1)).unwrap(), sid(2)); + } + + // This tests that our compiler can handle an empty character class. At the + // time of writing, the regex parser forbids it, so the only way to test it + // is to provide a hand written HIR. + #[test] + fn empty_class_bytes() { + use regex_syntax::hir::{Class, ClassBytes, Hir}; + + let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![]))); + let config = NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false); + let nfa = + NFA::compiler().configure(config).build_from_hir(&hir).unwrap(); + assert_eq!(nfa.states(), &[s_fail(), s_match(0)]); + } + + // Like empty_class_bytes, but for a Unicode class. + #[test] + fn empty_class_unicode() { + use regex_syntax::hir::{Class, ClassUnicode, Hir}; + + let hir = Hir::class(Class::Unicode(ClassUnicode::new(vec![]))); + let config = NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false); + let nfa = + NFA::compiler().configure(config).build_from_hir(&hir).unwrap(); + assert_eq!(nfa.states(), &[s_fail(), s_match(0)]); + } + + #[test] + fn compile_captures_all() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .unanchored_prefix(false) + .which_captures(WhichCaptures::All), + ) + .build("a(b)c") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_cap(1, 0, 0, 0), + s_byte(b'a', 2), + s_cap(3, 0, 1, 2), + s_byte(b'b', 4), + s_cap(5, 0, 1, 3), + s_byte(b'c', 6), + s_cap(7, 0, 0, 1), + s_match(0) + ] + ); + let ginfo = nfa.group_info(); + assert_eq!(2, ginfo.all_group_len()); + } + + #[test] + fn compile_captures_implicit() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .unanchored_prefix(false) + .which_captures(WhichCaptures::Implicit), + ) + .build("a(b)c") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_cap(1, 0, 0, 0), + s_byte(b'a', 2), + s_byte(b'b', 3), + s_byte(b'c', 4), + s_cap(5, 0, 0, 1), + s_match(0) + ] + ); + let ginfo = nfa.group_info(); + assert_eq!(1, ginfo.all_group_len()); + } + + #[test] + fn compile_captures_none() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .unanchored_prefix(false) + .which_captures(WhichCaptures::None), + ) + .build("a(b)c") + .unwrap(); + assert_eq!( + nfa.states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_byte(b'c', 3), s_match(0)] + ); + let ginfo = nfa.group_info(); + assert_eq!(0, ginfo.all_group_len()); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/error.rs b/vendor/regex-automata/src/nfa/thompson/error.rs new file mode 100644 index 0000000..3c2fa8a --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/error.rs @@ -0,0 +1,185 @@ +use crate::util::{ + captures, look, + primitives::{PatternID, StateID}, +}; + +/// An error that can occurred during the construction of a thompson NFA. +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying [`regex_syntax::Error`] type from its `source` +/// method via the `std::error::Error` trait. This error only occurs when using +/// convenience routines for building an NFA directly from a pattern string. +/// +/// Otherwise, errors typically occur when a limit has been breeched. For +/// example, if the total heap usage of the compiled NFA exceeds the limit +/// set by [`Config::nfa_size_limit`](crate::nfa::thompson::Config), then +/// building the NFA will fail. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +/// The kind of error that occurred during the construction of a thompson NFA. +#[derive(Clone, Debug)] +enum BuildErrorKind { + /// An error that occurred while parsing a regular expression. Note that + /// this error may be printed over multiple lines, and is generally + /// intended to be end user readable on its own. + #[cfg(feature = "syntax")] + Syntax(regex_syntax::Error), + /// An error that occurs if the capturing groups provided to an NFA builder + /// do not satisfy the documented invariants. For example, things like + /// too many groups, missing groups, having the first (zeroth) group be + /// named or duplicate group names within the same pattern. + Captures(captures::GroupInfoError), + /// An error that occurs when an NFA contains a Unicode word boundary, but + /// where the crate was compiled without the necessary data for dealing + /// with Unicode word boundaries. + Word(look::UnicodeWordBoundaryError), + /// An error that occurs if too many patterns were given to the NFA + /// compiler. + TooManyPatterns { + /// The number of patterns given, which exceeds the limit. + given: usize, + /// The limit on the number of patterns. + limit: usize, + }, + /// An error that occurs if too states are produced while building an NFA. + TooManyStates { + /// The minimum number of states that are desired, which exceeds the + /// limit. + given: usize, + /// The limit on the number of states. + limit: usize, + }, + /// An error that occurs when NFA compilation exceeds a configured heap + /// limit. + ExceededSizeLimit { + /// The configured limit, in bytes. + limit: usize, + }, + /// An error that occurs when an invalid capture group index is added to + /// the NFA. An "invalid" index can be one that would otherwise overflow + /// a `usize` on the current target. + InvalidCaptureIndex { + /// The invalid index that was given. + index: u32, + }, + /// An error that occurs when one tries to build a reverse NFA with + /// captures enabled. Currently, this isn't supported, but we probably + /// should support it at some point. + #[cfg(feature = "syntax")] + UnsupportedCaptures, +} + +impl BuildError { + /// If this error occurred because the NFA exceeded the configured size + /// limit before being built, then this returns the configured size limit. + /// + /// The limit returned is what was configured, and corresponds to the + /// maximum amount of heap usage in bytes. + pub fn size_limit(&self) -> Option { + match self.kind { + BuildErrorKind::ExceededSizeLimit { limit } => Some(limit), + _ => None, + } + } + + fn kind(&self) -> &BuildErrorKind { + &self.kind + } + + #[cfg(feature = "syntax")] + pub(crate) fn syntax(err: regex_syntax::Error) -> BuildError { + BuildError { kind: BuildErrorKind::Syntax(err) } + } + + pub(crate) fn captures(err: captures::GroupInfoError) -> BuildError { + BuildError { kind: BuildErrorKind::Captures(err) } + } + + pub(crate) fn word(err: look::UnicodeWordBoundaryError) -> BuildError { + BuildError { kind: BuildErrorKind::Word(err) } + } + + pub(crate) fn too_many_patterns(given: usize) -> BuildError { + let limit = PatternID::LIMIT; + BuildError { kind: BuildErrorKind::TooManyPatterns { given, limit } } + } + + pub(crate) fn too_many_states(given: usize) -> BuildError { + let limit = StateID::LIMIT; + BuildError { kind: BuildErrorKind::TooManyStates { given, limit } } + } + + pub(crate) fn exceeded_size_limit(limit: usize) -> BuildError { + BuildError { kind: BuildErrorKind::ExceededSizeLimit { limit } } + } + + pub(crate) fn invalid_capture_index(index: u32) -> BuildError { + BuildError { kind: BuildErrorKind::InvalidCaptureIndex { index } } + } + + #[cfg(feature = "syntax")] + pub(crate) fn unsupported_captures() -> BuildError { + BuildError { kind: BuildErrorKind::UnsupportedCaptures } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind() { + #[cfg(feature = "syntax")] + BuildErrorKind::Syntax(ref err) => Some(err), + BuildErrorKind::Captures(ref err) => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind() { + #[cfg(feature = "syntax")] + BuildErrorKind::Syntax(_) => write!(f, "error parsing regex"), + BuildErrorKind::Captures(_) => { + write!(f, "error with capture groups") + } + BuildErrorKind::Word(_) => { + write!(f, "NFA contains Unicode word boundary") + } + BuildErrorKind::TooManyPatterns { given, limit } => write!( + f, + "attempted to compile {} patterns, \ + which exceeds the limit of {}", + given, limit, + ), + BuildErrorKind::TooManyStates { given, limit } => write!( + f, + "attempted to compile {} NFA states, \ + which exceeds the limit of {}", + given, limit, + ), + BuildErrorKind::ExceededSizeLimit { limit } => write!( + f, + "heap usage during NFA compilation exceeded limit of {}", + limit, + ), + BuildErrorKind::InvalidCaptureIndex { index } => write!( + f, + "capture group index {} is invalid (too big or discontinuous)", + index, + ), + #[cfg(feature = "syntax")] + BuildErrorKind::UnsupportedCaptures => write!( + f, + "currently captures must be disabled when compiling \ + a reverse NFA", + ), + } + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/literal_trie.rs b/vendor/regex-automata/src/nfa/thompson/literal_trie.rs new file mode 100644 index 0000000..7ed129a --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/literal_trie.rs @@ -0,0 +1,528 @@ +use core::mem; + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::{self, compiler::ThompsonRef, BuildError, Builder}, + util::primitives::{IteratorIndexExt, StateID}, +}; + +/// A trie that preserves leftmost-first match semantics. +/// +/// This is a purpose-built data structure for optimizing 'lit1|lit2|..|litN' +/// patterns. It can *only* handle alternations of literals, which makes it +/// somewhat restricted in its scope, but literal alternations are fairly +/// common. +/// +/// At a 5,000 foot level, the main idea of this trie is make an alternation of +/// literals look more like a DFA than an NFA via epsilon removal. +/// +/// More precisely, the main issue is in how alternations are compiled into +/// a Thompson NFA. Namely, each alternation gets a single NFA "union" state +/// with an epsilon transition for every branch of the alternation pointing to +/// an NFA state corresponding to the start of that branch. The main problem +/// with this representation is the cost of computing an epsilon closure. Once +/// you hit the alternation's start state, it acts as a sort of "clog" that +/// requires you to traverse all of the epsilon transitions to compute the full +/// closure. +/// +/// While fixing such clogs in the general case is pretty tricky without going +/// to a DFA (or perhaps a Glushkov NFA, but that comes with other problems). +/// But at least in the case of an alternation of literals, we can convert +/// that to a prefix trie without too much cost. In theory, that's all you +/// really need to do: build the trie and then compile it to a Thompson NFA. +/// For example, if you have the pattern 'bar|baz|foo', then using a trie, it +/// is transformed to something like 'b(a(r|z))|f'. This reduces the clog by +/// reducing the number of epsilon transitions out of the alternation's start +/// state from 3 to 2 (it actually gets down to 1 when you use a sparse state, +/// which we do below). It's a small effect here, but when your alternation is +/// huge, the savings is also huge. +/// +/// And that is... essentially what a LiteralTrie does. But there is one +/// hiccup. Consider a regex like 'sam|samwise'. How does a prefix trie compile +/// that when leftmost-first semantics are used? If 'sam|samwise' was the +/// entire regex, then you could just drop the 'samwise' branch entirely since +/// it is impossible to match ('sam' will always take priority, and since it +/// is a prefix of 'samwise', 'samwise' will never match). But what about the +/// regex '\b(sam|samwise)\b'? In that case, you can't remove 'samwise' because +/// it might match when 'sam' doesn't fall on a word boundary. +/// +/// The main idea is that 'sam|samwise' can be translated to 'sam(?:|wise)', +/// which is a precisely equivalent regex that also gets rid of the clog. +/// +/// Another example is 'zapper|z|zap'. That gets translated to +/// 'z(?:apper||ap)'. +/// +/// We accomplish this by giving each state in the trie multiple "chunks" of +/// transitions. Each chunk barrier represents a match. The idea is that once +/// you know a match occurs, none of the transitions after the match can be +/// re-ordered and mixed in with the transitions before the match. Otherwise, +/// the match semantics could be changed. +/// +/// See the 'State' data type for a bit more detail. +/// +/// Future work: +/// +/// * In theory, it would be nice to generalize the idea of removing clogs and +/// apply it to the NFA graph itself. Then this could in theory work for +/// case insensitive alternations of literals, or even just alternations where +/// each branch starts with a non-epsilon transition. +/// * Could we instead use the Aho-Corasick algorithm here? The aho-corasick +/// crate deals with leftmost-first matches correctly, but I think this implies +/// encoding failure transitions into a Thompson NFA somehow. Which seems fine, +/// because failure transitions are just unconditional epsilon transitions? +/// * Or perhaps even better, could we use an aho_corasick::AhoCorasick +/// directly? At time of writing, 0.7 is the current version of the +/// aho-corasick crate, and that definitely cannot be used as-is. But if we +/// expose the underlying finite state machine API, then could we use it? That +/// would be super. If we could figure that out, it might also lend itself to +/// more general composition of finite state machines. +#[derive(Clone)] +pub(crate) struct LiteralTrie { + /// The set of trie states. Each state contains one or more chunks, where + /// each chunk is a sparse set of transitions to other states. A leaf state + /// is always a match state that contains only empty chunks (i.e., no + /// transitions). + states: Vec, + /// Whether to add literals in reverse to the trie. Useful when building + /// a reverse NFA automaton. + rev: bool, +} + +impl LiteralTrie { + /// Create a new literal trie that adds literals in the forward direction. + pub(crate) fn forward() -> LiteralTrie { + let root = State::default(); + LiteralTrie { states: vec![root], rev: false } + } + + /// Create a new literal trie that adds literals in reverse. + pub(crate) fn reverse() -> LiteralTrie { + let root = State::default(); + LiteralTrie { states: vec![root], rev: true } + } + + /// Add the given literal to this trie. + /// + /// If the literal could not be added because the `StateID` space was + /// exhausted, then an error is returned. If an error returns, the trie + /// is in an unspecified state. + pub(crate) fn add(&mut self, bytes: &[u8]) -> Result<(), BuildError> { + let mut prev = StateID::ZERO; + let mut it = bytes.iter().copied(); + while let Some(b) = if self.rev { it.next_back() } else { it.next() } { + prev = self.get_or_add_state(prev, b)?; + } + self.states[prev].add_match(); + Ok(()) + } + + /// If the given transition is defined, then return the next state ID. + /// Otherwise, add the transition to `from` and point it to a new state. + /// + /// If a new state ID could not be allocated, then an error is returned. + fn get_or_add_state( + &mut self, + from: StateID, + byte: u8, + ) -> Result { + let active = self.states[from].active_chunk(); + match active.binary_search_by_key(&byte, |t| t.byte) { + Ok(i) => Ok(active[i].next), + Err(i) => { + // Add a new state and get its ID. + let next = StateID::new(self.states.len()).map_err(|_| { + BuildError::too_many_states(self.states.len()) + })?; + self.states.push(State::default()); + // Offset our position to account for all transitions and not + // just the ones in the active chunk. + let i = self.states[from].active_chunk_start() + i; + let t = Transition { byte, next }; + self.states[from].transitions.insert(i, t); + Ok(next) + } + } + } + + /// Compile this literal trie to the NFA builder given. + /// + /// This forwards any errors that may occur while using the given builder. + pub(crate) fn compile( + &self, + builder: &mut Builder, + ) -> Result { + // Compilation proceeds via depth-first traversal of the trie. + // + // This is overall pretty brutal. The recursive version of this is + // deliciously simple. (See 'compile_to_hir' below for what it might + // look like.) But recursion on a trie means your call stack grows + // in accordance with the longest literal, which just does not seem + // appropriate. So we push the call stack to the heap. But as a result, + // the trie traversal becomes pretty brutal because we essentially + // have to encode the state of a double for-loop into an explicit call + // frame. If someone can simplify this without using recursion, that'd + // be great. + + // 'end' is our match state for this trie, but represented in the the + // NFA. Any time we see a match in the trie, we insert a transition + // from the current state we're in to 'end'. + let end = builder.add_empty()?; + let mut stack = vec![]; + let mut f = Frame::new(&self.states[StateID::ZERO]); + loop { + if let Some(t) = f.transitions.next() { + if self.states[t.next].is_leaf() { + f.sparse.push(thompson::Transition { + start: t.byte, + end: t.byte, + next: end, + }); + } else { + f.sparse.push(thompson::Transition { + start: t.byte, + end: t.byte, + // This is a little funny, but when the frame we create + // below completes, it will pop this parent frame off + // and modify this transition to point to the correct + // state. + next: StateID::ZERO, + }); + stack.push(f); + f = Frame::new(&self.states[t.next]); + } + continue; + } + // At this point, we have visited all transitions in f.chunk, so + // add it as a sparse NFA state. Unless the chunk was empty, in + // which case, we don't do anything. + if !f.sparse.is_empty() { + let chunk_id = if f.sparse.len() == 1 { + builder.add_range(f.sparse.pop().unwrap())? + } else { + let sparse = mem::replace(&mut f.sparse, vec![]); + builder.add_sparse(sparse)? + }; + f.union.push(chunk_id); + } + // Now we need to look to see if there are other chunks to visit. + if let Some(chunk) = f.chunks.next() { + // If we're here, it means we're on the second (or greater) + // chunk, which implies there is a match at this point. So + // connect this state to the final end state. + f.union.push(end); + // Advance to the next chunk. + f.transitions = chunk.iter(); + continue; + } + // Now that we are out of chunks, we have completely visited + // this state. So turn our union of chunks into an NFA union + // state, and add that union state to the parent state's current + // sparse state. (If there is no parent, we're done.) + let start = builder.add_union(f.union)?; + match stack.pop() { + None => { + return Ok(ThompsonRef { start, end }); + } + Some(mut parent) => { + // OK because the only way a frame gets pushed on to the + // stack (aside from the root) is when a transition has + // been added to 'sparse'. + parent.sparse.last_mut().unwrap().next = start; + f = parent; + } + } + } + } + + /// Converts this trie to an equivalent HIR expression. + /// + /// We don't actually use this, but it's useful for tests. In particular, + /// it provides a (somewhat) human readable representation of the trie + /// itself. + #[cfg(test)] + fn compile_to_hir(&self) -> regex_syntax::hir::Hir { + self.compile_state_to_hir(StateID::ZERO) + } + + /// The recursive implementation of 'to_hir'. + /// + /// Notice how simple this is compared to 'compile' above. 'compile' could + /// be similarly simple, but we opt to not use recursion in order to avoid + /// overflowing the stack in the case of a longer literal. + #[cfg(test)] + fn compile_state_to_hir(&self, sid: StateID) -> regex_syntax::hir::Hir { + use regex_syntax::hir::Hir; + + let mut alt = vec![]; + for (i, chunk) in self.states[sid].chunks().enumerate() { + if i > 0 { + alt.push(Hir::empty()); + } + if chunk.is_empty() { + continue; + } + let mut chunk_alt = vec![]; + for t in chunk.iter() { + chunk_alt.push(Hir::concat(vec![ + Hir::literal(vec![t.byte]), + self.compile_state_to_hir(t.next), + ])); + } + alt.push(Hir::alternation(chunk_alt)); + } + Hir::alternation(alt) + } +} + +impl core::fmt::Debug for LiteralTrie { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + writeln!(f, "LiteralTrie(")?; + for (sid, state) in self.states.iter().with_state_ids() { + writeln!(f, "{:06?}: {:?}", sid.as_usize(), state)?; + } + writeln!(f, ")")?; + Ok(()) + } +} + +/// An explicit stack frame used for traversing the trie without using +/// recursion. +/// +/// Each frame is tied to the traversal of a single trie state. The frame is +/// dropped once the entire state (and all of its children) have been visited. +/// The "output" of compiling a state is the 'union' vector, which is turn +/// converted to a NFA union state. Each branch of the union corresponds to a +/// chunk in the trie state. +/// +/// 'sparse' corresponds to the set of transitions for a particular chunk in a +/// trie state. It is ultimately converted to an NFA sparse state. The 'sparse' +/// field, after being converted to a sparse NFA state, is reused for any +/// subsequent chunks in the trie state, if any exist. +#[derive(Debug)] +struct Frame<'a> { + /// The remaining chunks to visit for a trie state. + chunks: StateChunksIter<'a>, + /// The transitions of the current chunk that we're iterating over. Since + /// every trie state has at least one chunk, every frame is initialized + /// with the first chunk's transitions ready to be consumed. + transitions: core::slice::Iter<'a, Transition>, + /// The NFA state IDs pointing to the start of each chunk compiled by + /// this trie state. This ultimately gets converted to an NFA union once + /// the entire trie state (and all of its children) have been compiled. + /// The order of these matters for leftmost-first match semantics, since + /// earlier matches in the union are preferred over later ones. + union: Vec, + /// The actual NFA transitions for a single chunk in a trie state. This + /// gets converted to an NFA sparse state, and its corresponding NFA state + /// ID should get added to 'union'. + sparse: Vec, +} + +impl<'a> Frame<'a> { + /// Create a new stack frame for trie traversal. This initializes the + /// 'transitions' iterator to the transitions for the first chunk, with the + /// 'chunks' iterator being every chunk after the first one. + fn new(state: &'a State) -> Frame<'a> { + let mut chunks = state.chunks(); + // every state has at least 1 chunk + let chunk = chunks.next().unwrap(); + let transitions = chunk.iter(); + Frame { chunks, transitions, union: vec![], sparse: vec![] } + } +} + +/// A state in a trie. +/// +/// This uses a sparse representation. Since we don't use literal tries +/// for searching, and ultimately (and compilation requires visiting every +/// transition anyway), we use a sparse representation for transitions. This +/// means we save on memory, at the expense of 'LiteralTrie::add' being perhaps +/// a bit slower. +/// +/// While 'transitions' is pretty standard as far as tries goes, the 'chunks' +/// piece here is more unusual. In effect, 'chunks' defines a partitioning +/// of 'transitions', where each chunk corresponds to a distinct set of +/// transitions. The key invariant is that a transition in one chunk cannot +/// be moved to another chunk. This is the secret sauce that preserve +/// leftmost-first match semantics. +/// +/// A new chunk is added whenever we mark a state as a match state. Once a +/// new chunk is added, the old active chunk is frozen and is never mutated +/// again. The new chunk becomes the active chunk, which is defined as +/// '&transitions[chunks.last().map_or(0, |c| c.1)..]'. Thus, a state where +/// 'chunks' is empty actually contains one chunk. Thus, every state contains +/// at least one (possibly empty) chunk. +/// +/// A "leaf" state is a state that has no outgoing transitions (so +/// 'transitions' is empty). Note that there is no way for a leaf state to be a +/// non-matching state. (Although while building the trie, within 'add', a leaf +/// state may exist while not containing any matches. But this invariant is +/// only broken within 'add'. Once 'add' returns, the invariant is upheld.) +#[derive(Clone, Default)] +struct State { + transitions: Vec, + chunks: Vec<(usize, usize)>, +} + +impl State { + /// Mark this state as a match state and freeze the active chunk such that + /// it can not be further mutated. + fn add_match(&mut self) { + // This is not strictly necessary, but there's no point in recording + // another match by adding another chunk if the state has no + // transitions. Note though that we only skip this if we already know + // this is a match state, which is only true if 'chunks' is not empty. + // Basically, if we didn't do this, nothing semantically would change, + // but we'd end up pushing another chunk and potentially triggering an + // alloc. + if self.transitions.is_empty() && !self.chunks.is_empty() { + return; + } + let chunk_start = self.active_chunk_start(); + let chunk_end = self.transitions.len(); + self.chunks.push((chunk_start, chunk_end)); + } + + /// Returns true if and only if this state is a leaf state. That is, a + /// state that has no outgoing transitions. + fn is_leaf(&self) -> bool { + self.transitions.is_empty() + } + + /// Returns an iterator over all of the chunks (including the currently + /// active chunk) in this state. Since the active chunk is included, the + /// iterator is guaranteed to always yield at least one chunk (although the + /// chunk may be empty). + fn chunks(&self) -> StateChunksIter<'_> { + StateChunksIter { + transitions: &*self.transitions, + chunks: self.chunks.iter(), + active: Some(self.active_chunk()), + } + } + + /// Returns the active chunk as a slice of transitions. + fn active_chunk(&self) -> &[Transition] { + let start = self.active_chunk_start(); + &self.transitions[start..] + } + + /// Returns the index into 'transitions' where the active chunk starts. + fn active_chunk_start(&self) -> usize { + self.chunks.last().map_or(0, |&(_, end)| end) + } +} + +impl core::fmt::Debug for State { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut spacing = " "; + for (i, chunk) in self.chunks().enumerate() { + if i > 0 { + write!(f, "{}MATCH", spacing)?; + } + spacing = ""; + for (j, t) in chunk.iter().enumerate() { + spacing = " "; + if j == 0 && i > 0 { + write!(f, " ")?; + } else if j > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", t)?; + } + } + Ok(()) + } +} + +/// An iterator over all of the chunks in a state, including the active chunk. +/// +/// This iterator is created by `State::chunks`. We name this iterator so that +/// we can include it in the `Frame` type for non-recursive trie traversal. +#[derive(Debug)] +struct StateChunksIter<'a> { + transitions: &'a [Transition], + chunks: core::slice::Iter<'a, (usize, usize)>, + active: Option<&'a [Transition]>, +} + +impl<'a> Iterator for StateChunksIter<'a> { + type Item = &'a [Transition]; + + fn next(&mut self) -> Option<&'a [Transition]> { + if let Some(&(start, end)) = self.chunks.next() { + return Some(&self.transitions[start..end]); + } + if let Some(chunk) = self.active.take() { + return Some(chunk); + } + None + } +} + +/// A single transition in a trie to another state. +#[derive(Clone, Copy)] +struct Transition { + byte: u8, + next: StateID, +} + +impl core::fmt::Debug for Transition { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "{:?} => {}", + crate::util::escape::DebugByte(self.byte), + self.next.as_usize() + ) + } +} + +#[cfg(test)] +mod tests { + use bstr::B; + use regex_syntax::hir::Hir; + + use super::*; + + #[test] + fn zap() { + let mut trie = LiteralTrie::forward(); + trie.add(b"zapper").unwrap(); + trie.add(b"z").unwrap(); + trie.add(b"zap").unwrap(); + + let got = trie.compile_to_hir(); + let expected = Hir::concat(vec![ + Hir::literal(B("z")), + Hir::alternation(vec![ + Hir::literal(B("apper")), + Hir::empty(), + Hir::literal(B("ap")), + ]), + ]); + assert_eq!(expected, got); + } + + #[test] + fn maker() { + let mut trie = LiteralTrie::forward(); + trie.add(b"make").unwrap(); + trie.add(b"maple").unwrap(); + trie.add(b"maker").unwrap(); + + let got = trie.compile_to_hir(); + let expected = Hir::concat(vec![ + Hir::literal(B("ma")), + Hir::alternation(vec![ + Hir::concat(vec![ + Hir::literal(B("ke")), + Hir::alternation(vec![Hir::empty(), Hir::literal(B("r"))]), + ]), + Hir::literal(B("ple")), + ]), + ]); + assert_eq!(expected, got); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/map.rs b/vendor/regex-automata/src/nfa/thompson/map.rs new file mode 100644 index 0000000..c92d4c0 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/map.rs @@ -0,0 +1,296 @@ +// This module contains a couple simple and purpose built hash maps. The key +// trade off they make is that they serve as caches rather than true maps. That +// is, inserting a new entry may cause eviction of another entry. This gives +// us two things. First, there's less overhead associated with inserts and +// lookups. Secondly, it lets us control our memory usage. +// +// These maps are used in some fairly hot code when generating NFA states for +// large Unicode character classes. +// +// Instead of exposing a rich hashmap entry API, we just permit the caller to +// produce a hash of the key directly. The hash can then be reused for both +// lookups and insertions at the cost of leaking abstraction a bit. But these +// are for internal use only, so it's fine. +// +// The Utf8BoundedMap is used for Daciuk's algorithm for constructing a +// (almost) minimal DFA for large Unicode character classes in linear time. +// (Daciuk's algorithm is always used when compiling forward NFAs. For reverse +// NFAs, it's only used when the compiler is configured to 'shrink' the NFA, +// since there's a bit more expense in the reverse direction.) +// +// The Utf8SuffixMap is used when compiling large Unicode character classes for +// reverse NFAs when 'shrink' is disabled. Specifically, it augments the naive +// construction of UTF-8 automata by caching common suffixes. This doesn't +// get the same space savings as Daciuk's algorithm, but it's basically as +// fast as the naive approach and typically winds up using less memory (since +// it generates smaller NFAs) despite the presence of the cache. +// +// These maps effectively represent caching mechanisms for sparse and +// byte-range NFA states, respectively. The former represents a single NFA +// state with many transitions of equivalent priority while the latter +// represents a single NFA state with a single transition. (Neither state ever +// has or is an epsilon transition.) Thus, they have different key types. It's +// likely we could make one generic map, but the machinery didn't seem worth +// it. They are simple enough. + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::Transition, + util::{ + int::{Usize, U64}, + primitives::StateID, + }, +}; + +// Basic FNV-1a hash constants as described in: +// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function +const PRIME: u64 = 1099511628211; +const INIT: u64 = 14695981039346656037; + +/// A bounded hash map where the key is a sequence of NFA transitions and the +/// value is a pre-existing NFA state ID. +/// +/// std's hashmap can be used for this, however, this map has two important +/// advantages. Firstly, it has lower overhead. Secondly, it permits us to +/// control our memory usage by limited the number of slots. In general, the +/// cost here is that this map acts as a cache. That is, inserting a new entry +/// may remove an old entry. We are okay with this, since it does not impact +/// correctness in the cases where it is used. The only effect that dropping +/// states from the cache has is that the resulting NFA generated may be bigger +/// than it otherwise would be. +/// +/// This improves benchmarks that compile large Unicode character classes, +/// since it makes the generation of (almost) minimal UTF-8 automaton faster. +/// Specifically, one could observe the difference with std's hashmap via +/// something like the following benchmark: +/// +/// hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'" +/// +/// But to observe that difference, you'd have to modify the code to use +/// std's hashmap. +/// +/// It is quite possible that there is a better way to approach this problem. +/// For example, if there happens to be a very common state that collides with +/// a lot of less frequent states, then we could wind up with very poor caching +/// behavior. Alas, the effectiveness of this cache has not been measured. +/// Instead, ad hoc experiments suggest that it is "good enough." Additional +/// smarts (such as an LRU eviction policy) have to be weighed against the +/// amount of extra time they cost. +#[derive(Clone, Debug)] +pub struct Utf8BoundedMap { + /// The current version of this map. Only entries with matching versions + /// are considered during lookups. If an entry is found with a mismatched + /// version, then the map behaves as if the entry does not exist. + /// + /// This makes it possible to clear the map by simply incrementing the + /// version number instead of actually deallocating any storage. + version: u16, + /// The total number of entries this map can store. + capacity: usize, + /// The actual entries, keyed by hash. Collisions between different states + /// result in the old state being dropped. + map: Vec, +} + +/// An entry in this map. +#[derive(Clone, Debug, Default)] +struct Utf8BoundedEntry { + /// The version of the map used to produce this entry. If this entry's + /// version does not match the current version of the map, then the map + /// should behave as if this entry does not exist. + version: u16, + /// The key, which is a sorted sequence of non-overlapping NFA transitions. + key: Vec, + /// The state ID corresponding to the state containing the transitions in + /// this entry. + val: StateID, +} + +impl Utf8BoundedMap { + /// Create a new bounded map with the given capacity. The map will never + /// grow beyond the given size. + /// + /// Note that this does not allocate. Instead, callers must call `clear` + /// before using this map. `clear` will allocate space if necessary. + /// + /// This avoids the need to pay for the allocation of this map when + /// compiling regexes that lack large Unicode character classes. + pub fn new(capacity: usize) -> Utf8BoundedMap { + assert!(capacity > 0); + Utf8BoundedMap { version: 0, capacity, map: vec![] } + } + + /// Clear this map of all entries, but permit the reuse of allocation + /// if possible. + /// + /// This must be called before the map can be used. + pub fn clear(&mut self) { + if self.map.is_empty() { + self.map = vec![Utf8BoundedEntry::default(); self.capacity]; + } else { + self.version = self.version.wrapping_add(1); + // If we loop back to version 0, then we forcefully clear the + // entire map. Otherwise, it might be possible to incorrectly + // match entries used to generate other NFAs. + if self.version == 0 { + self.map = vec![Utf8BoundedEntry::default(); self.capacity]; + } + } + } + + /// Return a hash of the given transitions. + pub fn hash(&self, key: &[Transition]) -> usize { + let mut h = INIT; + for t in key { + h = (h ^ u64::from(t.start)).wrapping_mul(PRIME); + h = (h ^ u64::from(t.end)).wrapping_mul(PRIME); + h = (h ^ t.next.as_u64()).wrapping_mul(PRIME); + } + (h % self.map.len().as_u64()).as_usize() + } + + /// Retrieve the cached state ID corresponding to the given key. The hash + /// given must have been computed with `hash` using the same key value. + /// + /// If there is no cached state with the given transitions, then None is + /// returned. + pub fn get(&mut self, key: &[Transition], hash: usize) -> Option { + let entry = &self.map[hash]; + if entry.version != self.version { + return None; + } + // There may be a hash collision, so we need to confirm real equality. + if entry.key != key { + return None; + } + Some(entry.val) + } + + /// Add a cached state to this map with the given key. Callers should + /// ensure that `state_id` points to a state that contains precisely the + /// NFA transitions given. + /// + /// `hash` must have been computed using the `hash` method with the same + /// key. + pub fn set( + &mut self, + key: Vec, + hash: usize, + state_id: StateID, + ) { + self.map[hash] = + Utf8BoundedEntry { version: self.version, key, val: state_id }; + } +} + +/// A cache of suffixes used to modestly compress UTF-8 automata for large +/// Unicode character classes. +#[derive(Clone, Debug)] +pub struct Utf8SuffixMap { + /// The current version of this map. Only entries with matching versions + /// are considered during lookups. If an entry is found with a mismatched + /// version, then the map behaves as if the entry does not exist. + version: u16, + /// The total number of entries this map can store. + capacity: usize, + /// The actual entries, keyed by hash. Collisions between different states + /// result in the old state being dropped. + map: Vec, +} + +/// A key that uniquely identifies an NFA state. It is a triple that represents +/// a transition from one state for a particular byte range. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Utf8SuffixKey { + pub from: StateID, + pub start: u8, + pub end: u8, +} + +/// An entry in this map. +#[derive(Clone, Debug, Default)] +struct Utf8SuffixEntry { + /// The version of the map used to produce this entry. If this entry's + /// version does not match the current version of the map, then the map + /// should behave as if this entry does not exist. + version: u16, + /// The key, which consists of a transition in a particular state. + key: Utf8SuffixKey, + /// The identifier that the transition in the key maps to. + val: StateID, +} + +impl Utf8SuffixMap { + /// Create a new bounded map with the given capacity. The map will never + /// grow beyond the given size. + /// + /// Note that this does not allocate. Instead, callers must call `clear` + /// before using this map. `clear` will allocate space if necessary. + /// + /// This avoids the need to pay for the allocation of this map when + /// compiling regexes that lack large Unicode character classes. + pub fn new(capacity: usize) -> Utf8SuffixMap { + assert!(capacity > 0); + Utf8SuffixMap { version: 0, capacity, map: vec![] } + } + + /// Clear this map of all entries, but permit the reuse of allocation + /// if possible. + /// + /// This must be called before the map can be used. + pub fn clear(&mut self) { + if self.map.is_empty() { + self.map = vec![Utf8SuffixEntry::default(); self.capacity]; + } else { + self.version = self.version.wrapping_add(1); + if self.version == 0 { + self.map = vec![Utf8SuffixEntry::default(); self.capacity]; + } + } + } + + /// Return a hash of the given transition. + pub fn hash(&self, key: &Utf8SuffixKey) -> usize { + // Basic FNV-1a hash as described: + // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function + const PRIME: u64 = 1099511628211; + const INIT: u64 = 14695981039346656037; + + let mut h = INIT; + h = (h ^ key.from.as_u64()).wrapping_mul(PRIME); + h = (h ^ u64::from(key.start)).wrapping_mul(PRIME); + h = (h ^ u64::from(key.end)).wrapping_mul(PRIME); + (h % self.map.len().as_u64()).as_usize() + } + + /// Retrieve the cached state ID corresponding to the given key. The hash + /// given must have been computed with `hash` using the same key value. + /// + /// If there is no cached state with the given key, then None is returned. + pub fn get( + &mut self, + key: &Utf8SuffixKey, + hash: usize, + ) -> Option { + let entry = &self.map[hash]; + if entry.version != self.version { + return None; + } + if key != &entry.key { + return None; + } + Some(entry.val) + } + + /// Add a cached state to this map with the given key. Callers should + /// ensure that `state_id` points to a state that contains precisely the + /// NFA transition given. + /// + /// `hash` must have been computed using the `hash` method with the same + /// key. + pub fn set(&mut self, key: Utf8SuffixKey, hash: usize, state_id: StateID) { + self.map[hash] = + Utf8SuffixEntry { version: self.version, key, val: state_id }; + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/mod.rs b/vendor/regex-automata/src/nfa/thompson/mod.rs new file mode 100644 index 0000000..cf42673 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/mod.rs @@ -0,0 +1,81 @@ +/*! +Defines a Thompson NFA and provides the [`PikeVM`](pikevm::PikeVM) and +[`BoundedBacktracker`](backtrack::BoundedBacktracker) regex engines. + +A Thompson NFA (non-deterministic finite automaton) is arguably _the_ central +data type in this library. It is the result of what is commonly referred to as +"regex compilation." That is, turning a regex pattern from its concrete syntax +string into something that can run a search looks roughly like this: + +* A `&str` is parsed into a [`regex-syntax::ast::Ast`](regex_syntax::ast::Ast). +* An `Ast` is translated into a [`regex-syntax::hir::Hir`](regex_syntax::hir::Hir). +* An `Hir` is compiled into a [`NFA`]. +* The `NFA` is then used to build one of a few different regex engines: + * An `NFA` is used directly in the `PikeVM` and `BoundedBacktracker` engines. + * An `NFA` is used by a [hybrid NFA/DFA](crate::hybrid) to build out a DFA's + transition table at search time. + * An `NFA`, assuming it is one-pass, is used to build a full + [one-pass DFA](crate::dfa::onepass) ahead of time. + * An `NFA` is used to build a [full DFA](crate::dfa) ahead of time. + +The [`meta`](crate::meta) regex engine makes all of these choices for you based +on various criteria. However, if you have a lower level use case, _you_ can +build any of the above regex engines and use them directly. But you must start +here by building an `NFA`. + +# Details + +It is perhaps worth expanding a bit more on what it means to go through the +`&str`->`Ast`->`Hir`->`NFA` process. + +* Parsing a string into an `Ast` gives it a structured representation. +Crucially, the size and amount of work done in this step is proportional to the +size of the original string. No optimization or Unicode handling is done at +this point. This means that parsing into an `Ast` has very predictable costs. +Moreover, an `Ast` can be roundtripped back to its original pattern string as +written. +* Translating an `Ast` into an `Hir` is a process by which the structured +representation is simplified down to its most fundamental components. +Translation deals with flags such as case insensitivity by converting things +like `(?i:a)` to `[Aa]`. Translation is also where Unicode tables are consulted +to resolve things like `\p{Emoji}` and `\p{Greek}`. It also flattens each +character class, regardless of how deeply nested it is, into a single sequence +of non-overlapping ranges. All the various literal forms are thrown out in +favor of one common representation. Overall, the `Hir` is small enough to fit +into your head and makes analysis and other tasks much simpler. +* Compiling an `Hir` into an `NFA` formulates the regex into a finite state +machine whose transitions are defined over bytes. For example, an `Hir` might +have a Unicode character class corresponding to a sequence of ranges defined +in terms of `char`. Compilation is then responsible for turning those ranges +into a UTF-8 automaton. That is, an automaton that matches the UTF-8 encoding +of just the codepoints specified by those ranges. Otherwise, the main job of +an `NFA` is to serve as a byte-code of sorts for a virtual machine. It can be +seen as a sequence of instructions for how to match a regex. +*/ + +#[cfg(feature = "nfa-backtrack")] +pub mod backtrack; +mod builder; +#[cfg(feature = "syntax")] +mod compiler; +mod error; +#[cfg(feature = "syntax")] +mod literal_trie; +#[cfg(feature = "syntax")] +mod map; +mod nfa; +#[cfg(feature = "nfa-pikevm")] +pub mod pikevm; +#[cfg(feature = "syntax")] +mod range_trie; + +pub use self::{ + builder::Builder, + error::BuildError, + nfa::{ + DenseTransitions, PatternIter, SparseTransitions, State, Transition, + NFA, + }, +}; +#[cfg(feature = "syntax")] +pub use compiler::{Compiler, Config, WhichCaptures}; diff --git a/vendor/regex-automata/src/nfa/thompson/nfa.rs b/vendor/regex-automata/src/nfa/thompson/nfa.rs new file mode 100644 index 0000000..1f57f8e --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/nfa.rs @@ -0,0 +1,2099 @@ +use core::{fmt, mem}; + +use alloc::{boxed::Box, format, string::String, sync::Arc, vec, vec::Vec}; + +#[cfg(feature = "syntax")] +use crate::nfa::thompson::{ + compiler::{Compiler, Config}, + error::BuildError, +}; +use crate::{ + nfa::thompson::builder::Builder, + util::{ + alphabet::{self, ByteClassSet, ByteClasses}, + captures::{GroupInfo, GroupInfoError}, + look::{Look, LookMatcher, LookSet}, + primitives::{ + IteratorIndexExt, PatternID, PatternIDIter, SmallIndex, StateID, + }, + sparse_set::SparseSet, + }, +}; + +/// A byte oriented Thompson non-deterministic finite automaton (NFA). +/// +/// A Thompson NFA is a finite state machine that permits unconditional epsilon +/// transitions, but guarantees that there exists at most one non-epsilon +/// transition for each element in the alphabet for each state. +/// +/// An NFA may be used directly for searching, for analysis or to build +/// a deterministic finite automaton (DFA). +/// +/// # Cheap clones +/// +/// Since an NFA is a core data type in this crate that many other regex +/// engines are based on top of, it is convenient to give ownership of an NFA +/// to said regex engines. Because of this, an NFA uses reference counting +/// internally. Therefore, it is cheap to clone and it is encouraged to do so. +/// +/// # Capabilities +/// +/// Using an NFA for searching via the +/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) provides the most amount +/// of "power" of any regex engine in this crate. Namely, it supports the +/// following in all cases: +/// +/// 1. Detection of a match. +/// 2. Location of a match, including both the start and end offset, in a +/// single pass of the haystack. +/// 3. Location of matching capturing groups. +/// 4. Handles multiple patterns, including (1)-(3) when multiple patterns are +/// present. +/// +/// # Capturing Groups +/// +/// Groups refer to parenthesized expressions inside a regex pattern. They look +/// like this, where `exp` is an arbitrary regex: +/// +/// * `(exp)` - An unnamed capturing group. +/// * `(?Pexp)` or `(?exp)` - A named capturing group. +/// * `(?:exp)` - A non-capturing group. +/// * `(?i:exp)` - A non-capturing group that sets flags. +/// +/// Only the first two forms are said to be _capturing_. Capturing +/// means that the last position at which they match is reportable. The +/// [`Captures`](crate::util::captures::Captures) type provides convenient +/// access to the match positions of capturing groups, which includes looking +/// up capturing groups by their name. +/// +/// # Byte oriented +/// +/// This NFA is byte oriented, which means that all of its transitions are +/// defined on bytes. In other words, the alphabet of an NFA consists of the +/// 256 different byte values. +/// +/// While DFAs nearly demand that they be byte oriented for performance +/// reasons, an NFA could conceivably be *Unicode codepoint* oriented. Indeed, +/// a previous version of this NFA supported both byte and codepoint oriented +/// modes. A codepoint oriented mode can work because an NFA fundamentally uses +/// a sparse representation of transitions, which works well with the large +/// sparse space of Unicode codepoints. +/// +/// Nevertheless, this NFA is only byte oriented. This choice is primarily +/// driven by implementation simplicity, and also in part memory usage. In +/// practice, performance between the two is roughly comparable. However, +/// building a DFA (including a hybrid DFA) really wants a byte oriented NFA. +/// So if we do have a codepoint oriented NFA, then we also need to generate +/// byte oriented NFA in order to build an hybrid NFA/DFA. Thus, by only +/// generating byte oriented NFAs, we can produce one less NFA. In other words, +/// if we made our NFA codepoint oriented, we'd need to *also* make it support +/// a byte oriented mode, which is more complicated. But a byte oriented mode +/// can support everything. +/// +/// # Differences with DFAs +/// +/// At the theoretical level, the precise difference between an NFA and a DFA +/// is that, in a DFA, for every state, an input symbol unambiguously refers +/// to a single transition _and_ that an input symbol is required for each +/// transition. At a practical level, this permits DFA implementations to be +/// implemented at their core with a small constant number of CPU instructions +/// for each byte of input searched. In practice, this makes them quite a bit +/// faster than NFAs _in general_. Namely, in order to execute a search for any +/// Thompson NFA, one needs to keep track of a _set_ of states, and execute +/// the possible transitions on all of those states for each input symbol. +/// Overall, this results in much more overhead. To a first approximation, one +/// can expect DFA searches to be about an order of magnitude faster. +/// +/// So why use an NFA at all? The main advantage of an NFA is that it takes +/// linear time (in the size of the pattern string after repetitions have been +/// expanded) to build and linear memory usage. A DFA, on the other hand, may +/// take exponential time and/or space to build. Even in non-pathological +/// cases, DFAs often take quite a bit more memory than their NFA counterparts, +/// _especially_ if large Unicode character classes are involved. Of course, +/// an NFA also provides additional capabilities. For example, it can match +/// Unicode word boundaries on non-ASCII text and resolve the positions of +/// capturing groups. +/// +/// Note that a [`hybrid::regex::Regex`](crate::hybrid::regex::Regex) strikes a +/// good balance between an NFA and a DFA. It avoids the exponential build time +/// of a DFA while maintaining its fast search time. The downside of a hybrid +/// NFA/DFA is that in some cases it can be slower at search time than the NFA. +/// (It also has less functionality than a pure NFA. It cannot handle Unicode +/// word boundaries on non-ASCII text and cannot resolve capturing groups.) +/// +/// # Example +/// +/// This shows how to build an NFA with the default configuration and execute a +/// search using the Pike VM. +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; +/// +/// let re = PikeVM::new(r"foo[0-9]+")?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// +/// let expected = Some(Match::must(0, 0..8)); +/// re.captures(&mut cache, b"foo12345", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: resolving capturing groups +/// +/// This example shows how to parse some simple dates and extract the +/// components of each date via capturing groups. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::captures::Captures, +/// }; +/// +/// let vm = PikeVM::new(r"(?P\d{4})-(?P\d{2})-(?P\d{2})")?; +/// let mut cache = vm.create_cache(); +/// +/// let haystack = "2012-03-14, 2013-01-01 and 2014-07-05"; +/// let all: Vec = vm.captures_iter( +/// &mut cache, haystack.as_bytes() +/// ).collect(); +/// // There should be a total of 3 matches. +/// assert_eq!(3, all.len()); +/// // The year from the second match is '2013'. +/// let span = all[1].get_group_by_name("y").unwrap(); +/// assert_eq!("2013", &haystack[span]); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This example shows that only the last match of a capturing group is +/// reported, even if it had to match multiple times for an overall match +/// to occur. +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; +/// +/// let re = PikeVM::new(r"([a-z]){4}")?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// +/// let haystack = b"quux"; +/// re.captures(&mut cache, haystack, &mut caps); +/// assert!(caps.is_match()); +/// assert_eq!(Some(Span::from(3..4)), caps.get_group(1)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct NFA( + // We make NFAs reference counted primarily for two reasons. First is that + // the NFA type itself is quite large (at least 0.5KB), and so it makes + // sense to put it on the heap by default anyway. Second is that, for Arc + // specifically, this enables cheap clones. This tends to be useful because + // several structures (the backtracker, the Pike VM, the hybrid NFA/DFA) + // all want to hang on to an NFA for use during search time. We could + // provide the NFA at search time via a function argument, but this makes + // for an unnecessarily annoying API. Instead, we just let each structure + // share ownership of the NFA. Using a deep clone would not be smart, since + // the NFA can use quite a bit of heap space. + Arc, +); + +impl NFA { + /// Parse the given regular expression using a default configuration and + /// build an NFA from it. + /// + /// If you want a non-default configuration, then use the NFA + /// [`Compiler`] with a [`Config`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new(r"foo[0-9]+")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(0, 0..8)); + /// re.captures(&mut cache, b"foo12345", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + NFA::compiler().build(pattern) + } + + /// Parse the given regular expressions using a default configuration and + /// build a multi-NFA from them. + /// + /// If you want a non-default configuration, then use the NFA + /// [`Compiler`] with a [`Config`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new_many(&["[0-9]+", "[a-z]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(1, 0..3)); + /// re.captures(&mut cache, b"foo12345bar", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>(patterns: &[P]) -> Result { + NFA::compiler().build_many(patterns) + } + + /// Returns an NFA with a single regex pattern that always matches at every + /// position. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// + /// let re = PikeVM::new_from_nfa(NFA::always_match())?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(0, 0..0)); + /// re.captures(&mut cache, b"", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// re.captures(&mut cache, b"foo", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> NFA { + // We could use NFA::new("") here and we'd get the same semantics, but + // hand-assembling the NFA (as below) does the same thing with a fewer + // number of states. It also avoids needing the 'syntax' feature + // enabled. + // + // Technically all we need is the "match" state, but we add the + // "capture" states so that the PikeVM can use this NFA. + // + // The unwraps below are OK because we add so few states that they will + // never exhaust any default limits in any environment. + let mut builder = Builder::new(); + let pid = builder.start_pattern().unwrap(); + assert_eq!(pid.as_usize(), 0); + let start_id = + builder.add_capture_start(StateID::ZERO, 0, None).unwrap(); + let end_id = builder.add_capture_end(StateID::ZERO, 0).unwrap(); + let match_id = builder.add_match().unwrap(); + builder.patch(start_id, end_id).unwrap(); + builder.patch(end_id, match_id).unwrap(); + let pid = builder.finish_pattern(start_id).unwrap(); + assert_eq!(pid.as_usize(), 0); + builder.build(start_id, start_id).unwrap() + } + + /// Returns an NFA that never matches at any position. + /// + /// This is a convenience routine for creating an NFA with zero patterns. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, pikevm::PikeVM}; + /// + /// let re = PikeVM::new_from_nfa(NFA::never_match())?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, b"", &mut caps); + /// assert!(!caps.is_match()); + /// re.captures(&mut cache, b"foo", &mut caps); + /// assert!(!caps.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> NFA { + // This always succeeds because it only requires one NFA state, which + // will never exhaust any (default) limits. + let mut builder = Builder::new(); + let sid = builder.add_fail().unwrap(); + builder.build(sid, sid).unwrap() + } + + /// Return a default configuration for an `NFA`. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of an NFA. + /// + /// # Example + /// + /// This example shows how to build an NFA with a small size limit that + /// results in a compilation error for any regex that tries to use more + /// heap memory than the configured limit. + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, pikevm::PikeVM}; + /// + /// let result = PikeVM::builder() + /// .thompson(NFA::config().nfa_size_limit(Some(1_000))) + /// // Remember, \w is Unicode-aware by default and thus huge. + /// .build(r"\w+"); + /// assert!(result.is_err()); + /// ``` + #[cfg(feature = "syntax")] + pub fn config() -> Config { + Config::new() + } + + /// Return a compiler for configuring the construction of an `NFA`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Compiler`] type in common cases. + /// + /// # Example + /// + /// This example shows how to build an NFA that is permitted match invalid + /// UTF-8. Without the additional syntax configuration here, compilation of + /// `(?-u:.)` would fail because it is permitted to match invalid UTF-8. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = PikeVM::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"[a-z]+(?-u:.)")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(0, 1..5)); + /// re.captures(&mut cache, b"\xFFabc\xFF", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn compiler() -> Compiler { + Compiler::new() + } + + /// Returns an iterator over all pattern identifiers in this NFA. + /// + /// Pattern IDs are allocated in sequential order starting from zero, + /// where the order corresponds to the order of patterns provided to the + /// [`NFA::new_many`] constructor. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// let pids: Vec = nfa.patterns().collect(); + /// assert_eq!(pids, vec![ + /// PatternID::must(0), + /// PatternID::must(1), + /// PatternID::must(2), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn patterns(&self) -> PatternIter<'_> { + PatternIter { + it: PatternID::iter(self.pattern_len()), + _marker: core::marker::PhantomData, + } + } + + /// Returns the total number of regex patterns in this NFA. + /// + /// This may return zero if the NFA was constructed with no patterns. In + /// this case, the NFA can never produce a match for any input. + /// + /// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because + /// NFA construction will fail if too many patterns are added. + /// + /// It is always true that `nfa.patterns().count() == nfa.pattern_len()`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(3, nfa.pattern_len()); + /// + /// let nfa = NFA::never_match(); + /// assert_eq!(0, nfa.pattern_len()); + /// + /// let nfa = NFA::always_match(); + /// assert_eq!(1, nfa.pattern_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern_len(&self) -> usize { + self.0.start_pattern.len() + } + + /// Return the state identifier of the initial anchored state of this NFA. + /// + /// The returned identifier is guaranteed to be a valid index into the + /// slice returned by [`NFA::states`], and is also a valid argument to + /// [`NFA::state`]. + /// + /// # Example + /// + /// This example shows a somewhat contrived example where we can easily + /// predict the anchored starting state. + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, State, WhichCaptures}; + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build("a")?; + /// let state = nfa.state(nfa.start_anchored()); + /// match *state { + /// State::ByteRange { trans } => { + /// assert_eq!(b'a', trans.start); + /// assert_eq!(b'a', trans.end); + /// } + /// _ => unreachable!("unexpected state"), + /// } + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn start_anchored(&self) -> StateID { + self.0.start_anchored + } + + /// Return the state identifier of the initial unanchored state of this + /// NFA. + /// + /// This is equivalent to the identifier returned by + /// [`NFA::start_anchored`] when the NFA has no unanchored starting state. + /// + /// The returned identifier is guaranteed to be a valid index into the + /// slice returned by [`NFA::states`], and is also a valid argument to + /// [`NFA::state`]. + /// + /// # Example + /// + /// This example shows that the anchored and unanchored starting states + /// are equivalent when an anchored NFA is built. + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new("^a")?; + /// assert_eq!(nfa.start_anchored(), nfa.start_unanchored()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn start_unanchored(&self) -> StateID { + self.0.start_unanchored + } + + /// Return the state identifier of the initial anchored state for the given + /// pattern, or `None` if there is no pattern corresponding to the given + /// identifier. + /// + /// If one uses the starting state for a particular pattern, then the only + /// match that can be returned is for the corresponding pattern. + /// + /// The returned identifier is guaranteed to be a valid index into the + /// slice returned by [`NFA::states`], and is also a valid argument to + /// [`NFA::state`]. + /// + /// # Errors + /// + /// If the pattern doesn't exist in this NFA, then this returns an error. + /// This occurs when `pid.as_usize() >= nfa.pattern_len()`. + /// + /// # Example + /// + /// This example shows that the anchored and unanchored starting states + /// are equivalent when an anchored NFA is built. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["^a", "^b"])?; + /// // The anchored and unanchored states for the entire NFA are the same, + /// // since all of the patterns are anchored. + /// assert_eq!(nfa.start_anchored(), nfa.start_unanchored()); + /// // But the anchored starting states for each pattern are distinct, + /// // because these starting states can only lead to matches for the + /// // corresponding pattern. + /// let anchored = Some(nfa.start_anchored()); + /// assert_ne!(anchored, nfa.start_pattern(PatternID::must(0))); + /// assert_ne!(anchored, nfa.start_pattern(PatternID::must(1))); + /// // Requesting a pattern not in the NFA will result in None: + /// assert_eq!(None, nfa.start_pattern(PatternID::must(2))); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn start_pattern(&self, pid: PatternID) -> Option { + self.0.start_pattern.get(pid.as_usize()).copied() + } + + /// Get the byte class set for this NFA. + /// + /// A byte class set is a partitioning of this NFA's alphabet into + /// equivalence classes. Any two bytes in the same equivalence class are + /// guaranteed to never discriminate between a match or a non-match. (The + /// partitioning may not be minimal.) + /// + /// Byte classes are used internally by this crate when building DFAs. + /// Namely, among other optimizations, they enable a space optimization + /// where the DFA's internal alphabet is defined over the equivalence + /// classes of bytes instead of all possible byte values. The former is + /// often quite a bit smaller than the latter, which permits the DFA to use + /// less space for its transition table. + #[inline] + pub(crate) fn byte_class_set(&self) -> &ByteClassSet { + &self.0.byte_class_set + } + + /// Get the byte classes for this NFA. + /// + /// Byte classes represent a partitioning of this NFA's alphabet into + /// equivalence classes. Any two bytes in the same equivalence class are + /// guaranteed to never discriminate between a match or a non-match. (The + /// partitioning may not be minimal.) + /// + /// Byte classes are used internally by this crate when building DFAs. + /// Namely, among other optimizations, they enable a space optimization + /// where the DFA's internal alphabet is defined over the equivalence + /// classes of bytes instead of all possible byte values. The former is + /// often quite a bit smaller than the latter, which permits the DFA to use + /// less space for its transition table. + /// + /// # Example + /// + /// This example shows how to query the class of various bytes. + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// // 'a' and 'z' are in the same class for this regex. + /// assert_eq!(classes.get(b'a'), classes.get(b'z')); + /// // But 'a' and 'A' are not. + /// assert_ne!(classes.get(b'a'), classes.get(b'A')); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn byte_classes(&self) -> &ByteClasses { + &self.0.byte_classes + } + + /// Return a reference to the NFA state corresponding to the given ID. + /// + /// This is a convenience routine for `nfa.states()[id]`. + /// + /// # Panics + /// + /// This panics when the given identifier does not reference a valid state. + /// That is, when `id.as_usize() >= nfa.states().len()`. + /// + /// # Example + /// + /// The anchored state for a pattern will typically correspond to a + /// capturing state for that pattern. (Although, this is not an API + /// guarantee!) + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, State}, PatternID}; + /// + /// let nfa = NFA::new("a")?; + /// let state = nfa.state(nfa.start_pattern(PatternID::ZERO).unwrap()); + /// match *state { + /// State::Capture { slot, .. } => { + /// assert_eq!(0, slot.as_usize()); + /// } + /// _ => unreachable!("unexpected state"), + /// } + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn state(&self, id: StateID) -> &State { + &self.states()[id] + } + + /// Returns a slice of all states in this NFA. + /// + /// The slice returned is indexed by `StateID`. This provides a convenient + /// way to access states while following transitions among those states. + /// + /// # Example + /// + /// This demonstrates that disabling UTF-8 mode can shrink the size of the + /// NFA considerably in some cases, especially when using Unicode character + /// classes. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa_unicode = NFA::new(r"\w")?; + /// let nfa_ascii = NFA::new(r"(?-u)\w")?; + /// // Yes, a factor of 45 difference. No lie. + /// assert!(40 * nfa_ascii.states().len() < nfa_unicode.states().len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn states(&self) -> &[State] { + &self.0.states + } + + /// Returns the capturing group info for this NFA. + /// + /// The [`GroupInfo`] provides a way to map to and from capture index + /// and capture name for each pattern. It also provides a mapping from + /// each of the capturing groups in every pattern to their corresponding + /// slot offsets encoded in [`State::Capture`] states. + /// + /// Note that `GroupInfo` uses reference counting internally, such that + /// cloning a `GroupInfo` is very cheap. + /// + /// # Example + /// + /// This example shows how to get a list of all capture group names for + /// a particular pattern. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(?Pb)(c)(d)(?Pe)")?; + /// // The first is the implicit group that is always unnammed. The next + /// // 5 groups are the explicit groups found in the concrete syntax above. + /// let expected = vec![None, None, Some("foo"), None, None, Some("bar")]; + /// let got: Vec> = + /// nfa.group_info().pattern_names(PatternID::ZERO).collect(); + /// assert_eq!(expected, got); + /// + /// // Using an invalid pattern ID will result in nothing yielded. + /// let got = nfa.group_info().pattern_names(PatternID::must(999)).count(); + /// assert_eq!(0, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn group_info(&self) -> &GroupInfo { + &self.0.group_info() + } + + /// Returns true if and only if this NFA has at least one + /// [`Capture`](State::Capture) in its sequence of states. + /// + /// This is useful as a way to perform a quick test before attempting + /// something that does or does not require capture states. For example, + /// some regex engines (like the PikeVM) require capture states in order to + /// work at all. + /// + /// # Example + /// + /// This example shows a few different NFAs and whether they have captures + /// or not. + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, WhichCaptures}; + /// + /// // Obviously has capture states. + /// let nfa = NFA::new("(a)")?; + /// assert!(nfa.has_capture()); + /// + /// // Less obviously has capture states, because every pattern has at + /// // least one anonymous capture group corresponding to the match for the + /// // entire pattern. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.has_capture()); + /// + /// // Other than hand building your own NFA, this is the only way to build + /// // an NFA without capturing groups. In general, you should only do this + /// // if you don't intend to use any of the NFA-oriented regex engines. + /// // Overall, capturing groups don't have many downsides. Although they + /// // can add a bit of noise to simple NFAs, so it can be nice to disable + /// // them for debugging purposes. + /// // + /// // Notice that 'has_capture' is false here even when we have an + /// // explicit capture group in the pattern. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build("(a)")?; + /// assert!(!nfa.has_capture()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn has_capture(&self) -> bool { + self.0.has_capture + } + + /// Returns true if and only if this NFA can match the empty string. + /// When it returns false, all possible matches are guaranteed to have a + /// non-zero length. + /// + /// This is useful as cheap way to know whether code needs to handle the + /// case of a zero length match. This is particularly important when UTF-8 + /// modes are enabled, as when UTF-8 mode is enabled, empty matches that + /// split a codepoint must never be reported. This extra handling can + /// sometimes be costly, and since regexes matching an empty string are + /// somewhat rare, it can be beneficial to treat such regexes specially. + /// + /// # Example + /// + /// This example shows a few different NFAs and whether they match the + /// empty string or not. Notice the empty string isn't merely a matter + /// of a string of length literally `0`, but rather, whether a match can + /// occur between specific pairs of bytes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::syntax}; + /// + /// // The empty regex matches the empty string. + /// let nfa = NFA::new("")?; + /// assert!(nfa.has_empty(), "empty matches empty"); + /// // The '+' repetition operator requires at least one match, and so + /// // does not match the empty string. + /// let nfa = NFA::new("a+")?; + /// assert!(!nfa.has_empty(), "+ does not match empty"); + /// // But the '*' repetition operator does. + /// let nfa = NFA::new("a*")?; + /// assert!(nfa.has_empty(), "* does match empty"); + /// // And wrapping '+' in an operator that can match an empty string also + /// // causes it to match the empty string too. + /// let nfa = NFA::new("(a+)*")?; + /// assert!(nfa.has_empty(), "+ inside of * matches empty"); + /// + /// // If a regex is just made of a look-around assertion, even if the + /// // assertion requires some kind of non-empty string around it (such as + /// // \b), then it is still treated as if it matches the empty string. + /// // Namely, if a match occurs of just a look-around assertion, then the + /// // match returned is empty. + /// let nfa = NFA::compiler() + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"^$\A\z\b\B(?-u:\b\B)")?; + /// assert!(nfa.has_empty(), "assertions match empty"); + /// // Even when an assertion is wrapped in a '+', it still matches the + /// // empty string. + /// let nfa = NFA::new(r"\b+")?; + /// assert!(nfa.has_empty(), "+ of an assertion matches empty"); + /// + /// // An alternation with even one branch that can match the empty string + /// // is also said to match the empty string overall. + /// let nfa = NFA::new("foo|(bar)?|quux")?; + /// assert!(nfa.has_empty(), "alternations can match empty"); + /// + /// // An NFA that matches nothing does not match the empty string. + /// let nfa = NFA::new("[a&&b]")?; + /// assert!(!nfa.has_empty(), "never matching means not matching empty"); + /// // But if it's wrapped in something that doesn't require a match at + /// // all, then it can match the empty string! + /// let nfa = NFA::new("[a&&b]*")?; + /// assert!(nfa.has_empty(), "* on never-match still matches empty"); + /// // Since a '+' requires a match, using it on something that can never + /// // match will itself produce a regex that can never match anything, + /// // and thus does not match the empty string. + /// let nfa = NFA::new("[a&&b]+")?; + /// assert!(!nfa.has_empty(), "+ on never-match still matches nothing"); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn has_empty(&self) -> bool { + self.0.has_empty + } + + /// Whether UTF-8 mode is enabled for this NFA or not. + /// + /// When UTF-8 mode is enabled, all matches reported by a regex engine + /// derived from this NFA are guaranteed to correspond to spans of valid + /// UTF-8. This includes zero-width matches. For example, the regex engine + /// must guarantee that the empty regex will not match at the positions + /// between code units in the UTF-8 encoding of a single codepoint. + /// + /// See [`Config::utf8`] for more information. + /// + /// This is enabled by default. + /// + /// # Example + /// + /// This example shows how UTF-8 mode can impact the match spans that may + /// be reported in certain cases. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// Match, Input, + /// }; + /// + /// let re = PikeVM::new("")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// // UTF-8 mode is enabled by default. + /// let mut input = Input::new("☃"); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..0)), caps.get_match()); + /// + /// // Even though an empty regex matches at 1..1, our next match is + /// // 3..3 because 1..1 and 2..2 split the snowman codepoint (which is + /// // three bytes long). + /// input.set_start(1); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// // But if we disable UTF-8, then we'll get matches at 1..1 and 2..2: + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build("")?; + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 1..1)), caps.get_match()); + /// + /// input.set_start(2); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 2..2)), caps.get_match()); + /// + /// input.set_start(3); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// input.set_start(4); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_utf8(&self) -> bool { + self.0.utf8 + } + + /// Returns true when this NFA is meant to be matched in reverse. + /// + /// Generally speaking, when this is true, it means the NFA is supposed to + /// be used in conjunction with moving backwards through the haystack. That + /// is, from a higher memory address to a lower memory address. + /// + /// It is often the case that lower level routines dealing with an NFA + /// don't need to care about whether it is "meant" to be matched in reverse + /// or not. However, there are some specific cases where it matters. For + /// example, the implementation of CRLF-aware `^` and `$` line anchors + /// needs to know whether the search is in the forward or reverse + /// direction. In the forward direction, neither `^` nor `$` should match + /// when a `\r` has been seen previously and a `\n` is next. However, in + /// the reverse direction, neither `^` nor `$` should match when a `\n` + /// has been seen previously and a `\r` is next. This fundamentally changes + /// how the state machine is constructed, and thus needs to be altered + /// based on the direction of the search. + /// + /// This is automatically set when using a [`Compiler`] with a configuration + /// where [`Config::reverse`] is enabled. If you're building your own NFA + /// by hand via a [`Builder`] + #[inline] + pub fn is_reverse(&self) -> bool { + self.0.reverse + } + + /// Returns true if and only if all starting states for this NFA correspond + /// to the beginning of an anchored search. + /// + /// Typically, an NFA will have both an anchored and an unanchored starting + /// state. Namely, because it tends to be useful to have both and the cost + /// of having an unanchored starting state is almost zero (for an NFA). + /// However, if all patterns in the NFA are themselves anchored, then even + /// the unanchored starting state will correspond to an anchored search + /// since the pattern doesn't permit anything else. + /// + /// # Example + /// + /// This example shows a few different scenarios where this method's + /// return value varies. + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// // The unanchored starting state permits matching this pattern anywhere + /// // in a haystack, instead of just at the beginning. + /// let nfa = NFA::new("a")?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// // In this case, the pattern is itself anchored, so there is no way + /// // to run an unanchored search. + /// let nfa = NFA::new("^a")?; + /// assert!(nfa.is_always_start_anchored()); + /// + /// // When multiline mode is enabled, '^' can match at the start of a line + /// // in addition to the start of a haystack, so an unanchored search is + /// // actually possible. + /// let nfa = NFA::new("(?m)^a")?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// // Weird cases also work. A pattern is only considered anchored if all + /// // matches may only occur at the start of a haystack. + /// let nfa = NFA::new("(^a)|a")?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// // When multiple patterns are present, if they are all anchored, then + /// // the NFA is always anchored too. + /// let nfa = NFA::new_many(&["^a", "^b", "^c"])?; + /// assert!(nfa.is_always_start_anchored()); + /// + /// // But if one pattern is unanchored, then the NFA must permit an + /// // unanchored search. + /// let nfa = NFA::new_many(&["^a", "b", "^c"])?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_always_start_anchored(&self) -> bool { + self.start_anchored() == self.start_unanchored() + } + + /// Returns the look-around matcher associated with this NFA. + /// + /// A look-around matcher determines how to match look-around assertions. + /// In particular, some assertions are configurable. For example, the + /// `(?m:^)` and `(?m:$)` assertions can have their line terminator changed + /// from the default of `\n` to any other byte. + /// + /// If the NFA was built using a [`Compiler`], then this matcher + /// can be set via the [`Config::look_matcher`] configuration + /// knob. Otherwise, if you've built an NFA by hand, it is set via + /// [`Builder::set_look_matcher`]. + /// + /// # Example + /// + /// This shows how to change the line terminator for multi-line assertions. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// util::look::LookMatcher, + /// Match, Input, + /// }; + /// + /// let mut lookm = LookMatcher::new(); + /// lookm.set_line_terminator(b'\x00'); + /// + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().look_matcher(lookm)) + /// .build(r"(?m)^[a-z]+$")?; + /// let mut cache = re.create_cache(); + /// + /// // Multi-line assertions now use NUL as a terminator. + /// assert_eq!( + /// Some(Match::must(0, 1..4)), + /// re.find(&mut cache, b"\x00abc\x00"), + /// ); + /// // ... and \n is no longer recognized as a terminator. + /// assert_eq!( + /// None, + /// re.find(&mut cache, b"\nabc\n"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_matcher(&self) -> &LookMatcher { + &self.0.look_matcher + } + + /// Returns the union of all look-around assertions used throughout this + /// NFA. When the returned set is empty, it implies that the NFA has no + /// look-around assertions and thus zero conditional epsilon transitions. + /// + /// This is useful in some cases enabling optimizations. It is not + /// unusual, for example, for optimizations to be of the form, "for any + /// regex with zero conditional epsilon transitions, do ..." where "..." + /// is some kind of optimization. + /// + /// This isn't only helpful for optimizations either. Sometimes look-around + /// assertions are difficult to support. For example, many of the DFAs in + /// this crate don't support Unicode word boundaries or handle them using + /// heuristics. Handling that correctly typically requires some kind of + /// cheap check of whether the NFA has a Unicode word boundary in the first + /// place. + /// + /// # Example + /// + /// This example shows how this routine varies based on the regex pattern: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::look::Look}; + /// + /// // No look-around at all. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.look_set_any().is_empty()); + /// + /// // When multiple patterns are present, since this returns the union, + /// // it will include look-around assertions that only appear in one + /// // pattern. + /// let nfa = NFA::new_many(&["a", "b", "a^b", "c"])?; + /// assert!(nfa.look_set_any().contains(Look::Start)); + /// + /// // Some groups of assertions have various shortcuts. For example: + /// let nfa = NFA::new(r"(?-u:\b)")?; + /// assert!(nfa.look_set_any().contains_word()); + /// assert!(!nfa.look_set_any().contains_word_unicode()); + /// assert!(nfa.look_set_any().contains_word_ascii()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_set_any(&self) -> LookSet { + self.0.look_set_any + } + + /// Returns the union of all prefix look-around assertions for every + /// pattern in this NFA. When the returned set is empty, it implies none of + /// the patterns require moving through a conditional epsilon transition + /// before inspecting the first byte in the haystack. + /// + /// This can be useful for determining what kinds of assertions need to be + /// satisfied at the beginning of a search. For example, typically DFAs + /// in this crate will build a distinct starting state for each possible + /// starting configuration that might result in look-around assertions + /// being satisfied differently. However, if the set returned here is + /// empty, then you know that the start state is invariant because there + /// are no conditional epsilon transitions to consider. + /// + /// # Example + /// + /// This example shows how this routine varies based on the regex pattern: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::look::Look}; + /// + /// // No look-around at all. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.look_set_prefix_any().is_empty()); + /// + /// // When multiple patterns are present, since this returns the union, + /// // it will include look-around assertions that only appear in one + /// // pattern. But it will only include assertions that are in the prefix + /// // of a pattern. For example, this includes '^' but not '$' even though + /// // '$' does appear. + /// let nfa = NFA::new_many(&["a", "b", "^ab$", "c"])?; + /// assert!(nfa.look_set_prefix_any().contains(Look::Start)); + /// assert!(!nfa.look_set_prefix_any().contains(Look::End)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_set_prefix_any(&self) -> LookSet { + self.0.look_set_prefix_any + } + + // FIXME: The `look_set_prefix_all` computation was not correct, and it + // seemed a little tricky to fix it. Since I wasn't actually using it for + // anything, I just decided to remove it in the run up to the regex 1.9 + // release. If you need this, please file an issue. + /* + /// Returns the intersection of all prefix look-around assertions for every + /// pattern in this NFA. When the returned set is empty, it implies at + /// least one of the patterns does not require moving through a conditional + /// epsilon transition before inspecting the first byte in the haystack. + /// Conversely, when the set contains an assertion, it implies that every + /// pattern in the NFA also contains that assertion in its prefix. + /// + /// This can be useful for determining what kinds of assertions need to be + /// satisfied at the beginning of a search. For example, if you know that + /// [`Look::Start`] is in the prefix intersection set returned here, then + /// you know that all searches, regardless of input configuration, will be + /// anchored. + /// + /// # Example + /// + /// This example shows how this routine varies based on the regex pattern: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::look::Look}; + /// + /// // No look-around at all. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.look_set_prefix_all().is_empty()); + /// + /// // When multiple patterns are present, since this returns the + /// // intersection, it will only include assertions present in every + /// // prefix, and only the prefix. + /// let nfa = NFA::new_many(&["^a$", "^b$", "$^ab$", "^c$"])?; + /// assert!(nfa.look_set_prefix_all().contains(Look::Start)); + /// assert!(!nfa.look_set_prefix_all().contains(Look::End)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_set_prefix_all(&self) -> LookSet { + self.0.look_set_prefix_all + } + */ + + /// Returns the memory usage, in bytes, of this NFA. + /// + /// This does **not** include the stack size used up by this NFA. To + /// compute that, use `std::mem::size_of::()`. + /// + /// # Example + /// + /// This example shows that large Unicode character classes can use quite + /// a bit of memory. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa_unicode = NFA::new(r"\w")?; + /// let nfa_ascii = NFA::new(r"(?-u:\w)")?; + /// + /// assert!(10 * nfa_ascii.memory_usage() < nfa_unicode.memory_usage()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn memory_usage(&self) -> usize { + use core::mem::size_of; + + size_of::() // allocated on the heap via Arc + + self.0.states.len() * size_of::() + + self.0.start_pattern.len() * size_of::() + + self.0.group_info.memory_usage() + + self.0.memory_extra + } +} + +impl fmt::Debug for NFA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// The "inner" part of the NFA. We split this part out so that we can easily +/// wrap it in an `Arc` above in the definition of `NFA`. +/// +/// See builder.rs for the code that actually builds this type. This module +/// does provide (internal) mutable methods for adding things to this +/// NFA before finalizing it, but the high level construction process is +/// controlled by the builder abstraction. (Which is complicated enough to +/// get its own module.) +#[derive(Default)] +pub(super) struct Inner { + /// The state sequence. This sequence is guaranteed to be indexable by all + /// starting state IDs, and it is also guaranteed to contain at most one + /// `Match` state for each pattern compiled into this NFA. (A pattern may + /// not have a corresponding `Match` state if a `Match` state is impossible + /// to reach.) + states: Vec, + /// The anchored starting state of this NFA. + start_anchored: StateID, + /// The unanchored starting state of this NFA. + start_unanchored: StateID, + /// The starting states for each individual pattern. Starting at any + /// of these states will result in only an anchored search for the + /// corresponding pattern. The vec is indexed by pattern ID. When the NFA + /// contains a single regex, then `start_pattern[0]` and `start_anchored` + /// are always equivalent. + start_pattern: Vec, + /// Info about the capturing groups in this NFA. This is responsible for + /// mapping groups to slots, mapping groups to names and names to groups. + group_info: GroupInfo, + /// A representation of equivalence classes over the transitions in this + /// NFA. Two bytes in the same equivalence class must not discriminate + /// between a match or a non-match. This map can be used to shrink the + /// total size of a DFA's transition table with a small match-time cost. + /// + /// Note that the NFA's transitions are *not* defined in terms of these + /// equivalence classes. The NFA's transitions are defined on the original + /// byte values. For the most part, this is because they wouldn't really + /// help the NFA much since the NFA already uses a sparse representation + /// to represent transitions. Byte classes are most effective in a dense + /// representation. + byte_class_set: ByteClassSet, + /// This is generated from `byte_class_set`, and essentially represents the + /// same thing but supports different access patterns. Namely, this permits + /// looking up the equivalence class of a byte very cheaply. + /// + /// Ideally we would just store this, but because of annoying code + /// structure reasons, we keep both this and `byte_class_set` around for + /// now. I think I would prefer that `byte_class_set` were computed in the + /// `Builder`, but right now, we compute it as states are added to the + /// `NFA`. + byte_classes: ByteClasses, + /// Whether this NFA has a `Capture` state anywhere. + has_capture: bool, + /// When the empty string is in the language matched by this NFA. + has_empty: bool, + /// Whether UTF-8 mode is enabled for this NFA. Briefly, this means that + /// all non-empty matches produced by this NFA correspond to spans of valid + /// UTF-8, and any empty matches produced by this NFA that split a UTF-8 + /// encoded codepoint should be filtered out by the corresponding regex + /// engine. + utf8: bool, + /// Whether this NFA is meant to be matched in reverse or not. + reverse: bool, + /// The matcher to be used for look-around assertions. + look_matcher: LookMatcher, + /// The union of all look-around assertions that occur anywhere within + /// this NFA. If this set is empty, then it means there are precisely zero + /// conditional epsilon transitions in the NFA. + look_set_any: LookSet, + /// The union of all look-around assertions that occur as a zero-length + /// prefix for any of the patterns in this NFA. + look_set_prefix_any: LookSet, + /* + /// The intersection of all look-around assertions that occur as a + /// zero-length prefix for any of the patterns in this NFA. + look_set_prefix_all: LookSet, + */ + /// Heap memory used indirectly by NFA states and other things (like the + /// various capturing group representations above). Since each state + /// might use a different amount of heap, we need to keep track of this + /// incrementally. + memory_extra: usize, +} + +impl Inner { + /// Runs any last finalization bits and turns this into a full NFA. + pub(super) fn into_nfa(mut self) -> NFA { + self.byte_classes = self.byte_class_set.byte_classes(); + // Do epsilon closure from the start state of every pattern in order + // to compute various properties such as look-around assertions and + // whether the empty string can be matched. + let mut stack = vec![]; + let mut seen = SparseSet::new(self.states.len()); + for &start_id in self.start_pattern.iter() { + stack.push(start_id); + seen.clear(); + // let mut prefix_all = LookSet::full(); + let mut prefix_any = LookSet::empty(); + while let Some(sid) = stack.pop() { + if !seen.insert(sid) { + continue; + } + match self.states[sid] { + State::ByteRange { .. } + | State::Dense { .. } + | State::Fail => continue, + State::Sparse(_) => { + // This snippet below will rewrite this sparse state + // as a dense state. By doing it here, we apply this + // optimization to all hot "sparse" states since these + // are the states that are reachable from the start + // state via an epsilon closure. + // + // Unfortunately, this optimization did not seem to + // help much in some very limited ad hoc benchmarking. + // + // I left the 'Dense' state type in place in case we + // want to revisit this, but I suspect the real way + // to make forward progress is a more fundamental + // rearchitecting of how data in the NFA is laid out. + // I think we should consider a single contiguous + // allocation instead of all this indirection and + // potential heap allocations for every state. But this + // is a large re-design and will require API breaking + // changes. + // self.memory_extra -= self.states[sid].memory_usage(); + // let trans = DenseTransitions::from_sparse(sparse); + // self.states[sid] = State::Dense(trans); + // self.memory_extra += self.states[sid].memory_usage(); + continue; + } + State::Match { .. } => self.has_empty = true, + State::Look { look, next } => { + prefix_any = prefix_any.insert(look); + stack.push(next); + } + State::Union { ref alternates } => { + // Order doesn't matter here, since we're just dealing + // with look-around sets. But if we do richer analysis + // here that needs to care about preference order, then + // this should be done in reverse. + stack.extend(alternates.iter()); + } + State::BinaryUnion { alt1, alt2 } => { + stack.push(alt2); + stack.push(alt1); + } + State::Capture { next, .. } => { + stack.push(next); + } + } + } + self.look_set_prefix_any = + self.look_set_prefix_any.union(prefix_any); + } + NFA(Arc::new(self)) + } + + /// Returns the capturing group info for this NFA. + pub(super) fn group_info(&self) -> &GroupInfo { + &self.group_info + } + + /// Add the given state to this NFA after allocating a fresh identifier for + /// it. + /// + /// This panics if too many states are added such that a fresh identifier + /// could not be created. (Currently, the only caller of this routine is + /// a `Builder`, and it upholds this invariant.) + pub(super) fn add(&mut self, state: State) -> StateID { + match state { + State::ByteRange { ref trans } => { + self.byte_class_set.set_range(trans.start, trans.end); + } + State::Sparse(ref sparse) => { + for trans in sparse.transitions.iter() { + self.byte_class_set.set_range(trans.start, trans.end); + } + } + State::Dense { .. } => unreachable!(), + State::Look { look, .. } => { + self.look_matcher + .add_to_byteset(look, &mut self.byte_class_set); + self.look_set_any = self.look_set_any.insert(look); + } + State::Capture { .. } => { + self.has_capture = true; + } + State::Union { .. } + | State::BinaryUnion { .. } + | State::Fail + | State::Match { .. } => {} + } + + let id = StateID::new(self.states.len()).unwrap(); + self.memory_extra += state.memory_usage(); + self.states.push(state); + id + } + + /// Set the starting state identifiers for this NFA. + /// + /// `start_anchored` and `start_unanchored` may be equivalent. When they + /// are, then the NFA can only execute anchored searches. This might + /// occur, for example, for patterns that are unconditionally anchored. + /// e.g., `^foo`. + pub(super) fn set_starts( + &mut self, + start_anchored: StateID, + start_unanchored: StateID, + start_pattern: &[StateID], + ) { + self.start_anchored = start_anchored; + self.start_unanchored = start_unanchored; + self.start_pattern = start_pattern.to_vec(); + } + + /// Sets the UTF-8 mode of this NFA. + pub(super) fn set_utf8(&mut self, yes: bool) { + self.utf8 = yes; + } + + /// Sets the reverse mode of this NFA. + pub(super) fn set_reverse(&mut self, yes: bool) { + self.reverse = yes; + } + + /// Sets the look-around assertion matcher for this NFA. + pub(super) fn set_look_matcher(&mut self, m: LookMatcher) { + self.look_matcher = m; + } + + /// Set the capturing groups for this NFA. + /// + /// The given slice should contain the capturing groups for each pattern, + /// The capturing groups in turn should correspond to the total number of + /// capturing groups in the pattern, including the anonymous first capture + /// group for each pattern. If a capturing group does have a name, then it + /// should be provided as a Arc. + /// + /// This returns an error if a corresponding `GroupInfo` could not be + /// built. + pub(super) fn set_captures( + &mut self, + captures: &[Vec>>], + ) -> Result<(), GroupInfoError> { + self.group_info = GroupInfo::new( + captures.iter().map(|x| x.iter().map(|y| y.as_ref())), + )?; + Ok(()) + } + + /// Remap the transitions in every state of this NFA using the given map. + /// The given map should be indexed according to state ID namespace used by + /// the transitions of the states currently in this NFA. + /// + /// This is particularly useful to the NFA builder, since it is convenient + /// to add NFA states in order to produce their final IDs. Then, after all + /// of the intermediate "empty" states (unconditional epsilon transitions) + /// have been removed from the builder's representation, we can re-map all + /// of the transitions in the states already added to their final IDs. + pub(super) fn remap(&mut self, old_to_new: &[StateID]) { + for state in &mut self.states { + state.remap(old_to_new); + } + self.start_anchored = old_to_new[self.start_anchored]; + self.start_unanchored = old_to_new[self.start_unanchored]; + for id in self.start_pattern.iter_mut() { + *id = old_to_new[*id]; + } + } +} + +impl fmt::Debug for Inner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "thompson::NFA(")?; + for (sid, state) in self.states.iter().with_state_ids() { + let status = if sid == self.start_anchored { + '^' + } else if sid == self.start_unanchored { + '>' + } else { + ' ' + }; + writeln!(f, "{}{:06?}: {:?}", status, sid.as_usize(), state)?; + } + let pattern_len = self.start_pattern.len(); + if pattern_len > 1 { + writeln!(f, "")?; + for pid in 0..pattern_len { + let sid = self.start_pattern[pid]; + writeln!(f, "START({:06?}): {:?}", pid, sid.as_usize())?; + } + } + writeln!(f, "")?; + writeln!( + f, + "transition equivalence classes: {:?}", + self.byte_classes, + )?; + writeln!(f, ")")?; + Ok(()) + } +} + +/// A state in an NFA. +/// +/// In theory, it can help to conceptualize an `NFA` as a graph consisting of +/// `State`s. Each `State` contains its complete set of outgoing transitions. +/// +/// In practice, it can help to conceptualize an `NFA` as a sequence of +/// instructions for a virtual machine. Each `State` says what to do and where +/// to go next. +/// +/// Strictly speaking, the practical interpretation is the most correct one, +/// because of the [`Capture`](State::Capture) state. Namely, a `Capture` +/// state always forwards execution to another state unconditionally. Its only +/// purpose is to cause a side effect: the recording of the current input +/// position at a particular location in memory. In this sense, an `NFA` +/// has more power than a theoretical non-deterministic finite automaton. +/// +/// For most uses of this crate, it is likely that one may never even need to +/// be aware of this type at all. The main use cases for looking at `State`s +/// directly are if you need to write your own search implementation or if you +/// need to do some kind of analysis on the NFA. +#[derive(Clone, Eq, PartialEq)] +pub enum State { + /// A state with a single transition that can only be taken if the current + /// input symbol is in a particular range of bytes. + ByteRange { + /// The transition from this state to the next. + trans: Transition, + }, + /// A state with possibly many transitions represented in a sparse fashion. + /// Transitions are non-overlapping and ordered lexicographically by input + /// range. + /// + /// In practice, this is used for encoding UTF-8 automata. Its presence is + /// primarily an optimization that avoids many additional unconditional + /// epsilon transitions (via [`Union`](State::Union) states), and thus + /// decreases the overhead of traversing the NFA. This can improve both + /// matching time and DFA construction time. + Sparse(SparseTransitions), + /// A dense representation of a state with multiple transitions. + Dense(DenseTransitions), + /// A conditional epsilon transition satisfied via some sort of + /// look-around. Look-around is limited to anchor and word boundary + /// assertions. + /// + /// Look-around states are meant to be evaluated while performing epsilon + /// closure (computing the set of states reachable from a particular state + /// via only epsilon transitions). If the current position in the haystack + /// satisfies the look-around assertion, then you're permitted to follow + /// that epsilon transition. + Look { + /// The look-around assertion that must be satisfied before moving + /// to `next`. + look: Look, + /// The state to transition to if the look-around assertion is + /// satisfied. + next: StateID, + }, + /// An alternation such that there exists an epsilon transition to all + /// states in `alternates`, where matches found via earlier transitions + /// are preferred over later transitions. + Union { + /// An ordered sequence of unconditional epsilon transitions to other + /// states. Transitions earlier in the sequence are preferred over + /// transitions later in the sequence. + alternates: Box<[StateID]>, + }, + /// An alternation such that there exists precisely two unconditional + /// epsilon transitions, where matches found via `alt1` are preferred over + /// matches found via `alt2`. + /// + /// This state exists as a common special case of Union where there are + /// only two alternates. In this case, we don't need any allocations to + /// represent the state. This saves a bit of memory and also saves an + /// additional memory access when traversing the NFA. + BinaryUnion { + /// An unconditional epsilon transition to another NFA state. This + /// is preferred over `alt2`. + alt1: StateID, + /// An unconditional epsilon transition to another NFA state. Matches + /// reported via this transition should only be reported if no matches + /// were found by following `alt1`. + alt2: StateID, + }, + /// An empty state that records a capture location. + /// + /// From the perspective of finite automata, this is precisely equivalent + /// to an unconditional epsilon transition, but serves the purpose of + /// instructing NFA simulations to record additional state when the finite + /// state machine passes through this epsilon transition. + /// + /// `slot` in this context refers to the specific capture group slot + /// offset that is being recorded. Each capturing group has two slots + /// corresponding to the start and end of the matching portion of that + /// group. + /// + /// The pattern ID and capture group index are also included in this state + /// in case they are useful. But mostly, all you'll need is `next` and + /// `slot`. + Capture { + /// The state to transition to, unconditionally. + next: StateID, + /// The pattern ID that this capture belongs to. + pattern_id: PatternID, + /// The capture group index that this capture belongs to. Capture group + /// indices are local to each pattern. For example, when capturing + /// groups are enabled, every pattern has a capture group at index + /// `0`. + group_index: SmallIndex, + /// The slot index for this capture. Every capturing group has two + /// slots: one for the start haystack offset and one for the end + /// haystack offset. Unlike capture group indices, slot indices are + /// global across all patterns in this NFA. That is, each slot belongs + /// to a single pattern, but there is only one slot at index `i`. + slot: SmallIndex, + }, + /// A state that cannot be transitioned out of. This is useful for cases + /// where you want to prevent matching from occurring. For example, if your + /// regex parser permits empty character classes, then one could choose + /// a `Fail` state to represent them. (An empty character class can be + /// thought of as an empty set. Since nothing is in an empty set, they can + /// never match anything.) + Fail, + /// A match state. There is at least one such occurrence of this state for + /// each regex that can match that is in this NFA. + Match { + /// The matching pattern ID. + pattern_id: PatternID, + }, +} + +impl State { + /// Returns true if and only if this state contains one or more epsilon + /// transitions. + /// + /// In practice, a state has no outgoing transitions (like `Match`), has + /// only non-epsilon transitions (like `ByteRange`) or has only epsilon + /// transitions (like `Union`). + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{State, Transition}, + /// util::primitives::{PatternID, StateID, SmallIndex}, + /// }; + /// + /// // Capture states are epsilon transitions. + /// let state = State::Capture { + /// next: StateID::ZERO, + /// pattern_id: PatternID::ZERO, + /// group_index: SmallIndex::ZERO, + /// slot: SmallIndex::ZERO, + /// }; + /// assert!(state.is_epsilon()); + /// + /// // ByteRange states are not. + /// let state = State::ByteRange { + /// trans: Transition { start: b'a', end: b'z', next: StateID::ZERO }, + /// }; + /// assert!(!state.is_epsilon()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_epsilon(&self) -> bool { + match *self { + State::ByteRange { .. } + | State::Sparse { .. } + | State::Dense { .. } + | State::Fail + | State::Match { .. } => false, + State::Look { .. } + | State::Union { .. } + | State::BinaryUnion { .. } + | State::Capture { .. } => true, + } + } + + /// Returns the heap memory usage of this NFA state in bytes. + fn memory_usage(&self) -> usize { + match *self { + State::ByteRange { .. } + | State::Look { .. } + | State::BinaryUnion { .. } + | State::Capture { .. } + | State::Match { .. } + | State::Fail => 0, + State::Sparse(SparseTransitions { ref transitions }) => { + transitions.len() * mem::size_of::() + } + State::Dense { .. } => 256 * mem::size_of::(), + State::Union { ref alternates } => { + alternates.len() * mem::size_of::() + } + } + } + + /// Remap the transitions in this state using the given map. Namely, the + /// given map should be indexed according to the transitions currently + /// in this state. + /// + /// This is used during the final phase of the NFA compiler, which turns + /// its intermediate NFA into the final NFA. + fn remap(&mut self, remap: &[StateID]) { + match *self { + State::ByteRange { ref mut trans } => { + trans.next = remap[trans.next] + } + State::Sparse(SparseTransitions { ref mut transitions }) => { + for t in transitions.iter_mut() { + t.next = remap[t.next]; + } + } + State::Dense(DenseTransitions { ref mut transitions }) => { + for sid in transitions.iter_mut() { + *sid = remap[*sid]; + } + } + State::Look { ref mut next, .. } => *next = remap[*next], + State::Union { ref mut alternates } => { + for alt in alternates.iter_mut() { + *alt = remap[*alt]; + } + } + State::BinaryUnion { ref mut alt1, ref mut alt2 } => { + *alt1 = remap[*alt1]; + *alt2 = remap[*alt2]; + } + State::Capture { ref mut next, .. } => *next = remap[*next], + State::Fail => {} + State::Match { .. } => {} + } + } +} + +impl fmt::Debug for State { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + State::ByteRange { ref trans } => trans.fmt(f), + State::Sparse(SparseTransitions { ref transitions }) => { + let rs = transitions + .iter() + .map(|t| format!("{:?}", t)) + .collect::>() + .join(", "); + write!(f, "sparse({})", rs) + } + State::Dense(ref dense) => { + write!(f, "dense(")?; + for (i, t) in dense.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", t)?; + } + write!(f, ")") + } + State::Look { ref look, next } => { + write!(f, "{:?} => {:?}", look, next.as_usize()) + } + State::Union { ref alternates } => { + let alts = alternates + .iter() + .map(|id| format!("{:?}", id.as_usize())) + .collect::>() + .join(", "); + write!(f, "union({})", alts) + } + State::BinaryUnion { alt1, alt2 } => { + write!( + f, + "binary-union({}, {})", + alt1.as_usize(), + alt2.as_usize() + ) + } + State::Capture { next, pattern_id, group_index, slot } => { + write!( + f, + "capture(pid={:?}, group={:?}, slot={:?}) => {:?}", + pattern_id.as_usize(), + group_index.as_usize(), + slot.as_usize(), + next.as_usize(), + ) + } + State::Fail => write!(f, "FAIL"), + State::Match { pattern_id } => { + write!(f, "MATCH({:?})", pattern_id.as_usize()) + } + } + } +} + +/// A sequence of transitions used to represent a sparse state. +/// +/// This is the primary representation of a [`Sparse`](State::Sparse) state. +/// It corresponds to a sorted sequence of transitions with non-overlapping +/// byte ranges. If the byte at the current position in the haystack matches +/// one of the byte ranges, then the finite state machine should take the +/// corresponding transition. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SparseTransitions { + /// The sorted sequence of non-overlapping transitions. + pub transitions: Box<[Transition]>, +} + +impl SparseTransitions { + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a matching byte + /// range (there is at most one) corresponding to the position `at` in + /// `haystack`. + /// + /// If `at >= haystack.len()`, then this returns `None`. + #[inline] + pub fn matches(&self, haystack: &[u8], at: usize) -> Option { + haystack.get(at).and_then(|&b| self.matches_byte(b)) + } + + /// This follows the matching transition for any member of the alphabet. + /// + /// The matching transition is found by looking for a matching byte + /// range (there is at most one) corresponding to the position `at` in + /// `haystack`. If the given alphabet unit is [`EOI`](alphabet::Unit::eoi), + /// then this always returns `None`. + #[inline] + pub(crate) fn matches_unit( + &self, + unit: alphabet::Unit, + ) -> Option { + unit.as_u8().map_or(None, |byte| self.matches_byte(byte)) + } + + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a matching byte range + /// (there is at most one) corresponding to the byte given. + #[inline] + pub fn matches_byte(&self, byte: u8) -> Option { + for t in self.transitions.iter() { + if t.start > byte { + break; + } else if t.matches_byte(byte) { + return Some(t.next); + } + } + None + + /* + // This is an alternative implementation that uses binary search. In + // some ad hoc experiments, like + // + // regex-cli find match pikevm -b -p '\b\w+\b' non-ascii-file + // + // I could not observe any improvement, and in fact, things seemed to + // be a bit slower. I can see an improvement in at least one benchmark: + // + // regex-cli find match pikevm -b -p '\pL{100}' all-codepoints-utf8 + // + // Where total search time goes from 3.2s to 2.4s when using binary + // search. + self.transitions + .binary_search_by(|t| { + if t.end < byte { + core::cmp::Ordering::Less + } else if t.start > byte { + core::cmp::Ordering::Greater + } else { + core::cmp::Ordering::Equal + } + }) + .ok() + .map(|i| self.transitions[i].next) + */ + } +} + +/// A sequence of transitions used to represent a dense state. +/// +/// This is the primary representation of a [`Dense`](State::Dense) state. It +/// provides constant time matching. That is, given a byte in a haystack and +/// a `DenseTransitions`, one can determine if the state matches in constant +/// time. +/// +/// This is in contrast to `SparseTransitions`, whose time complexity is +/// necessarily bigger than constant time. Also in contrast, `DenseTransitions` +/// usually requires (much) more heap memory. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct DenseTransitions { + /// A dense representation of this state's transitions on the heap. This + /// always has length 256. + pub transitions: Box<[StateID]>, +} + +impl DenseTransitions { + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a transition that + /// doesn't correspond to `StateID::ZERO` for the byte `at` the given + /// position in `haystack`. + /// + /// If `at >= haystack.len()`, then this returns `None`. + #[inline] + pub fn matches(&self, haystack: &[u8], at: usize) -> Option { + haystack.get(at).and_then(|&b| self.matches_byte(b)) + } + + /// This follows the matching transition for any member of the alphabet. + /// + /// The matching transition is found by looking for a transition that + /// doesn't correspond to `StateID::ZERO` for the byte `at` the given + /// position in `haystack`. + /// + /// If `at >= haystack.len()` or if the given alphabet unit is + /// [`EOI`](alphabet::Unit::eoi), then this returns `None`. + #[inline] + pub(crate) fn matches_unit( + &self, + unit: alphabet::Unit, + ) -> Option { + unit.as_u8().map_or(None, |byte| self.matches_byte(byte)) + } + + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a transition that + /// doesn't correspond to `StateID::ZERO` for the given `byte`. + /// + /// If `at >= haystack.len()`, then this returns `None`. + #[inline] + pub fn matches_byte(&self, byte: u8) -> Option { + let next = self.transitions[usize::from(byte)]; + if next == StateID::ZERO { + None + } else { + Some(next) + } + } + + /* + /// The dense state optimization isn't currently enabled, so permit a + /// little bit of dead code. + pub(crate) fn from_sparse(sparse: &SparseTransitions) -> DenseTransitions { + let mut dense = vec![StateID::ZERO; 256]; + for t in sparse.transitions.iter() { + for b in t.start..=t.end { + dense[usize::from(b)] = t.next; + } + } + DenseTransitions { transitions: dense.into_boxed_slice() } + } + */ + + /// Returns an iterator over all transitions that don't point to + /// `StateID::ZERO`. + pub(crate) fn iter(&self) -> impl Iterator + '_ { + use crate::util::int::Usize; + self.transitions + .iter() + .enumerate() + .filter(|&(_, &sid)| sid != StateID::ZERO) + .map(|(byte, &next)| Transition { + start: byte.as_u8(), + end: byte.as_u8(), + next, + }) + } +} + +/// A single transition to another state. +/// +/// This transition may only be followed if the current byte in the haystack +/// falls in the inclusive range of bytes specified. +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub struct Transition { + /// The inclusive start of the byte range. + pub start: u8, + /// The inclusive end of the byte range. + pub end: u8, + /// The identifier of the state to transition to. + pub next: StateID, +} + +impl Transition { + /// Returns true if the position `at` in `haystack` falls in this + /// transition's range of bytes. + /// + /// If `at >= haystack.len()`, then this returns `false`. + pub fn matches(&self, haystack: &[u8], at: usize) -> bool { + haystack.get(at).map_or(false, |&b| self.matches_byte(b)) + } + + /// Returns true if the given alphabet unit falls in this transition's + /// range of bytes. If the given unit is [`EOI`](alphabet::Unit::eoi), then + /// this returns `false`. + pub fn matches_unit(&self, unit: alphabet::Unit) -> bool { + unit.as_u8().map_or(false, |byte| self.matches_byte(byte)) + } + + /// Returns true if the given byte falls in this transition's range of + /// bytes. + pub fn matches_byte(&self, byte: u8) -> bool { + self.start <= byte && byte <= self.end + } +} + +impl fmt::Debug for Transition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use crate::util::escape::DebugByte; + + let Transition { start, end, next } = *self; + if self.start == self.end { + write!(f, "{:?} => {:?}", DebugByte(start), next.as_usize()) + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + next.as_usize(), + ) + } + } +} + +/// An iterator over all pattern IDs in an NFA. +/// +/// This iterator is created by [`NFA::patterns`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the NFA from which +/// this pattern iterator was created. +#[derive(Debug)] +pub struct PatternIter<'a> { + it: PatternIDIter, + /// We explicitly associate a lifetime with this iterator even though we + /// don't actually borrow anything from the NFA. We do this for backward + /// compatibility purposes. If we ever do need to borrow something from + /// the NFA, then we can and just get rid of this marker without breaking + /// the public API. + _marker: core::marker::PhantomData<&'a ()>, +} + +impl<'a> Iterator for PatternIter<'a> { + type Item = PatternID; + + fn next(&mut self) -> Option { + self.it.next() + } +} + +#[cfg(all(test, feature = "nfa-pikevm"))] +mod tests { + use super::*; + use crate::{nfa::thompson::pikevm::PikeVM, Input}; + + // This asserts that an NFA state doesn't have its size changed. It is + // *really* easy to accidentally increase the size, and thus potentially + // dramatically increase the memory usage of every NFA. + // + // This assert doesn't mean we absolutely cannot increase the size of an + // NFA state. We can. It's just here to make sure we do it knowingly and + // intentionally. + #[test] + fn state_has_small_size() { + #[cfg(target_pointer_width = "64")] + assert_eq!(24, core::mem::size_of::()); + #[cfg(target_pointer_width = "32")] + assert_eq!(20, core::mem::size_of::()); + } + + #[test] + fn always_match() { + let re = PikeVM::new_from_nfa(NFA::always_match()).unwrap(); + let mut cache = re.create_cache(); + let mut caps = re.create_captures(); + let mut find = |haystack, start, end| { + let input = Input::new(haystack).range(start..end); + re.search(&mut cache, &input, &mut caps); + caps.get_match().map(|m| m.end()) + }; + + assert_eq!(Some(0), find("", 0, 0)); + assert_eq!(Some(0), find("a", 0, 1)); + assert_eq!(Some(1), find("a", 1, 1)); + assert_eq!(Some(0), find("ab", 0, 2)); + assert_eq!(Some(1), find("ab", 1, 2)); + assert_eq!(Some(2), find("ab", 2, 2)); + } + + #[test] + fn never_match() { + let re = PikeVM::new_from_nfa(NFA::never_match()).unwrap(); + let mut cache = re.create_cache(); + let mut caps = re.create_captures(); + let mut find = |haystack, start, end| { + let input = Input::new(haystack).range(start..end); + re.search(&mut cache, &input, &mut caps); + caps.get_match().map(|m| m.end()) + }; + + assert_eq!(None, find("", 0, 0)); + assert_eq!(None, find("a", 0, 1)); + assert_eq!(None, find("a", 1, 1)); + assert_eq!(None, find("ab", 0, 2)); + assert_eq!(None, find("ab", 1, 2)); + assert_eq!(None, find("ab", 2, 2)); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/pikevm.rs b/vendor/regex-automata/src/nfa/thompson/pikevm.rs new file mode 100644 index 0000000..0128c15 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/pikevm.rs @@ -0,0 +1,2359 @@ +/*! +An NFA backed Pike VM for executing regex searches with capturing groups. + +This module provides a [`PikeVM`] that works by simulating an NFA and +resolving all spans of capturing groups that participate in a match. +*/ + +#[cfg(feature = "internal-instrument-pikevm")] +use core::cell::RefCell; + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::{self, BuildError, State, NFA}, + util::{ + captures::Captures, + empty, iter, + prefilter::Prefilter, + primitives::{NonMaxUsize, PatternID, SmallIndex, StateID}, + search::{ + Anchored, HalfMatch, Input, Match, MatchKind, PatternSet, Span, + }, + sparse_set::SparseSet, + }, +}; + +/// A simple macro for conditionally executing instrumentation logic when +/// the 'trace' log level is enabled. This is a compile-time no-op when the +/// 'internal-instrument-pikevm' feature isn't enabled. The intent here is that +/// this makes it easier to avoid doing extra work when instrumentation isn't +/// enabled. +/// +/// This macro accepts a closure of type `|&mut Counters|`. The closure can +/// then increment counters (or whatever) in accordance with what one wants +/// to track. +macro_rules! instrument { + ($fun:expr) => { + #[cfg(feature = "internal-instrument-pikevm")] + { + let fun: &mut dyn FnMut(&mut Counters) = &mut $fun; + COUNTERS.with(|c: &RefCell| fun(&mut *c.borrow_mut())); + } + }; +} + +#[cfg(feature = "internal-instrument-pikevm")] +std::thread_local! { + /// Effectively global state used to keep track of instrumentation + /// counters. The "proper" way to do this is to thread it through the + /// PikeVM, but it makes the code quite icky. Since this is just a + /// debugging feature, we're content to relegate it to thread local + /// state. When instrumentation is enabled, the counters are reset at the + /// beginning of every search and printed (with the 'trace' log level) at + /// the end of every search. + static COUNTERS: RefCell = RefCell::new(Counters::empty()); +} + +/// The configuration used for building a [`PikeVM`]. +/// +/// A PikeVM configuration is a simple data object that is typically used with +/// [`Builder::configure`]. It can be cheaply cloned. +/// +/// A default configuration can be created either with `Config::new`, or +/// perhaps more conveniently, with [`PikeVM::config`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + match_kind: Option, + pre: Option>, +} + +impl Config { + /// Return a new default PikeVM configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to "classical DFA" construction + /// where all possible matches are visited in the NFA by the `PikeVM`. + /// + /// Typically, `All` is used when one wants to execute an overlapping + /// search and `LeftmostFirst` otherwise. In particular, it rarely makes + /// sense to use `All` with the various "leftmost" find routines, since the + /// leftmost routines depend on the `LeftmostFirst` automata construction + /// strategy. Specifically, `LeftmostFirst` results in the `PikeVM` + /// simulating dead states as a way to terminate the search and report a + /// match. `LeftmostFirst` also supports non-greedy matches using this + /// strategy where as `All` does not. + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::prefilter::Prefilter, + /// Input, Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = PikeVM::builder() + /// .configure(PikeVM::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!(Some(Match::must(0, 5..11)), re.find(&mut cache, input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = PikeVM::builder() + /// .configure(PikeVM::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// // No match reported even though there clearly is one! + /// assert_eq!(None, re.find(&mut cache, input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + self + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns the prefilter set in this configuration, if one at all. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + pre: o.pre.or_else(|| self.pre.clone()), + } + } +} + +/// A builder for a `PikeVM`. +/// +/// This builder permits configuring options for the syntax of a pattern, +/// the NFA construction and the `PikeVM` construction. This builder is +/// different from a general purpose regex builder in that it permits fine +/// grain configuration of the construction process. The trade off for this is +/// complexity, and the possibility of setting a configuration that might not +/// make sense. For example, there are two different UTF-8 modes: +/// +/// * [`util::syntax::Config::utf8`](crate::util::syntax::Config::utf8) +/// controls whether the pattern itself can contain sub-expressions that match +/// invalid UTF-8. +/// * [`thompson::Config::utf8`] controls whether empty matches that split a +/// Unicode codepoint are reported or not. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{self, pikevm::PikeVM}, +/// util::syntax, +/// Match, +/// }; +/// +/// let re = PikeVM::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let mut cache = re.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Match::must(0, 1..9)); +/// let got = re.find_iter(&mut cache, haystack).next(); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// // +/// // N.B. This example does not show the impact of +/// // disabling UTF-8 mode on a PikeVM Config, since that +/// // only impacts regexes that can produce matches of +/// // length 0. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new PikeVM builder with its default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a `PikeVM` from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a `PikeVM` from the given patterns. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self.thompson.build_many(patterns)?; + self.build_from_nfa(nfa) + } + + /// Build a `PikeVM` directly from its NFA. + /// + /// Note that when using this method, any configuration that applies to the + /// construction of the NFA itself will of course be ignored, since the NFA + /// given here is already built. + pub fn build_from_nfa(&self, nfa: NFA) -> Result { + nfa.look_set_any().available().map_err(BuildError::word)?; + Ok(PikeVM { config: self.config.clone(), nfa }) + } + + /// Apply the given `PikeVM` configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a PikeVM directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like if additional time should be spent + /// shrinking the size of the NFA. + /// + /// These settings only apply when constructing a PikeVM directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// A virtual machine for executing regex searches with capturing groups. +/// +/// # Infallible APIs +/// +/// Unlike most other regex engines in this crate, a `PikeVM` never returns an +/// error at search time. It supports all [`Anchored`] configurations, never +/// quits and works on haystacks of arbitrary length. +/// +/// There are two caveats to mention though: +/// +/// * If an invalid pattern ID is given to a search via [`Anchored::Pattern`], +/// then the PikeVM will report "no match." This is consistent with all other +/// regex engines in this crate. +/// * When using [`PikeVM::which_overlapping_matches`] with a [`PatternSet`] +/// that has insufficient capacity to store all valid pattern IDs, then if a +/// match occurs for a `PatternID` that cannot be inserted, it is silently +/// dropped as if it did not match. +/// +/// # Advice +/// +/// The `PikeVM` is generally the most "powerful" regex engine in this crate. +/// "Powerful" in this context means that it can handle any regular expression +/// that is parseable by `regex-syntax` and any size haystack. Regretably, +/// the `PikeVM` is also simultaneously often the _slowest_ regex engine in +/// practice. This results in an annoying situation where one generally tries +/// to pick any other regex engine (or perhaps none at all) before being +/// forced to fall back to a `PikeVM`. +/// +/// For example, a common strategy for dealing with capturing groups is to +/// actually look for the overall match of the regex using a faster regex +/// engine, like a [lazy DFA](crate::hybrid::regex::Regex). Once the overall +/// match is found, one can then run the `PikeVM` on just the match span to +/// find the spans of the capturing groups. In this way, the faster regex +/// engine does the majority of the work, while the `PikeVM` only lends its +/// power in a more limited role. +/// +/// Unfortunately, this isn't always possible because the faster regex engines +/// don't support all of the regex features in `regex-syntax`. This notably +/// includes (and is currently limited to) Unicode word boundaries. So if +/// your pattern has Unicode word boundaries, you typically can't use a +/// DFA-based regex engine at all (unless you [enable heuristic support for +/// it](crate::hybrid::dfa::Config::unicode_word_boundary)). (The [one-pass +/// DFA](crate::dfa::onepass::DFA) can handle Unicode word boundaries for +/// anchored searches only, but in a cruel sort of joke, many Unicode features +/// tend to result in making the regex _not_ one-pass.) +/// +/// # Example +/// +/// This example shows that the `PikeVM` implements Unicode word boundaries +/// correctly by default. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; +/// +/// let re = PikeVM::new(r"\b\w+\b")?; +/// let mut cache = re.create_cache(); +/// +/// let mut it = re.find_iter(&mut cache, "Шерлок Холмс"); +/// assert_eq!(Some(Match::must(0, 0..12)), it.next()); +/// assert_eq!(Some(Match::must(0, 13..23)), it.next()); +/// assert_eq!(None, it.next()); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct PikeVM { + config: Config, + nfa: NFA, +} + +impl PikeVM { + /// Parse the given regular expression using the default configuration and + /// return the corresponding `PikeVM`. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find_iter(&mut cache, "zzzfoo12345barzzz").next(), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + PikeVM::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new_many(&["[a-z]+", "[0-9]+"])?; + /// let mut cache = re.create_cache(); + /// + /// let mut it = re.find_iter(&mut cache, "abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + PikeVM::builder().build_many(patterns) + } + + /// Like `new`, but builds a PikeVM directly from an NFA. This is useful + /// if you already have an NFA, or even if you hand-assembled the NFA. + /// + /// # Example + /// + /// This shows how to hand assemble a regular expression via its HIR, + /// compile an NFA from it and build a PikeVM from the NFA. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let expected = Some(Match::must(0, 3..4)); + /// re.captures(&mut cache, "!@#A#@!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_from_nfa(nfa: NFA) -> Result { + PikeVM::builder().build_from_nfa(nfa) + } + + /// Create a new `PikeVM` that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::always_match()?; + /// let mut cache = re.create_cache(); + /// + /// let expected = Match::must(0, 0..0); + /// assert_eq!(Some(expected), re.find_iter(&mut cache, "").next()); + /// assert_eq!(Some(expected), re.find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + PikeVM::new_from_nfa(nfa) + } + + /// Create a new `PikeVM` that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::never_match()?; + /// let mut cache = re.create_cache(); + /// + /// assert_eq!(None, re.find_iter(&mut cache, "").next()); + /// assert_eq!(None, re.find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + PikeVM::new_from_nfa(nfa) + } + + /// Return a default configuration for a `PikeVM`. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of a `PikeVM`. + /// + /// # Example + /// + /// This example shows how to disable UTF-8 mode. When UTF-8 mode is + /// disabled, zero-width matches that split a codepoint are allowed. + /// Otherwise they are never reported. + /// + /// In the code below, notice that `""` is permitted to match positions + /// that split the encoding of a codepoint. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{self, pikevm::PikeVM}, Match}; + /// + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"")?; + /// let mut cache = re.create_cache(); + /// + /// let haystack = "a☃z"; + /// let mut it = re.find_iter(&mut cache, haystack); + /// assert_eq!(Some(Match::must(0, 0..0)), it.next()); + /// assert_eq!(Some(Match::must(0, 1..1)), it.next()); + /// assert_eq!(Some(Match::must(0, 2..2)), it.next()); + /// assert_eq!(Some(Match::must(0, 3..3)), it.next()); + /// assert_eq!(Some(Match::must(0, 4..4)), it.next()); + /// assert_eq!(Some(Match::must(0, 5..5)), it.next()); + /// assert_eq!(None, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a `PikeVM`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = PikeVM::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// re.captures(&mut cache, haystack, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new empty set of capturing groups that is guaranteed to be + /// valid for the search APIs on this `PikeVM`. + /// + /// A `Captures` value created for a specific `PikeVM` cannot be used with + /// any other `PikeVM`. + /// + /// This is a convenience function for [`Captures::all`]. See the + /// [`Captures`] documentation for an explanation of its alternative + /// constructors that permit the `PikeVM` to do less work during a search, + /// and thus might make it faster. + pub fn create_captures(&self) -> Captures { + Captures::all(self.get_nfa().group_info().clone()) + } + + /// Create a new cache for this `PikeVM`. + /// + /// The cache returned should only be used for searches for this + /// `PikeVM`. If you want to reuse the cache for another `PikeVM`, then + /// you must call [`Cache::reset`] with that `PikeVM` (or, equivalently, + /// [`PikeVM::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this `PikeVM` (and only this `PikeVM`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `PikeVM`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `PikeVM`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re1 = PikeVM::new(r"\w")?; + /// let re2 = PikeVM::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the PikeVM we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + cache.reset(self); + } + + /// Returns the total number of patterns compiled into this `PikeVM`. + /// + /// In the case of a `PikeVM` that contains no patterns, this returns `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a `PikeVM` that never + /// matches: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::never_match()?; + /// assert_eq!(re.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a `PikeVM` that matches at every position: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::always_match()?; + /// assert_eq!(re.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a `PikeVM` that was constructed from multiple patterns: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(re.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.nfa.pattern_len() + } + + /// Return the config for this `PikeVM`. + #[inline] + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + #[inline] + pub fn get_nfa(&self) -> &NFA { + &self.nfa + } +} + +impl PikeVM { + /// Returns true if and only if this `PikeVM` matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future + /// input will never lead to a different result. In particular, if the + /// underlying NFA enters a match state, then this routine will return + /// `true` immediately without inspecting any future input. (Consider how + /// this might make a difference given the regex `a+` on the haystack + /// `aaaaaaaaaaaaaaa`. This routine can stop after it sees the first `a`, + /// but routines like `find` need to continue searching because `+` is + /// greedy by default.) + /// + /// # Example + /// + /// This shows basic usage: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "foo12345bar")); + /// assert!(!re.is_match(&mut cache, "foobar")); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `find` returns a + /// match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Input}; + /// + /// let re = PikeVM::new("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(!re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{nfa::thompson::{pikevm::PikeVM, NFA}, Input}; + /// + /// let re = PikeVM::builder() + /// .thompson(NFA::config().utf8(false)) + /// .build("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> bool { + let input = input.into().earliest(true); + self.search_slots(cache, &input, &mut []).is_some() + } + + /// Executes a leftmost forward search and returns a `Match` if one exists. + /// + /// This routine only includes the overall match span. To get access to the + /// individual spans of each capturing group, use [`PikeVM::captures`]. + /// + /// # Example + /// + /// Leftmost first match semantics corresponds to the match with the + /// smallest starting offset, but where the end offset is determined by + /// preferring earlier branches in the original regular expression. For + /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam` + /// will match `Samwise` in `Samwise`. + /// + /// Generally speaking, the "leftmost first" match is how most backtracking + /// regular expressions tend to work. This is in contrast to POSIX-style + /// regular expressions that yield "leftmost longest" matches. Namely, + /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using + /// leftmost longest semantics. (This crate does not currently support + /// leftmost longest semantics.) + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..8); + /// assert_eq!(Some(expected), re.find(&mut cache, "foo12345")); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over later parts. + /// let re = PikeVM::new("abc|a")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..3); + /// assert_eq!(Some(expected), re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Option { + let input = input.into(); + if self.get_nfa().pattern_len() == 1 { + let mut slots = [None, None]; + let pid = self.search_slots(cache, &input, &mut slots)?; + let start = slots[0]?.get(); + let end = slots[1]?.get(); + return Some(Match::new(pid, Span { start, end })); + } + let ginfo = self.get_nfa().group_info(); + let slots_len = ginfo.implicit_slot_len(); + let mut slots = vec![None; slots_len]; + let pid = self.search_slots(cache, &input, &mut slots)?; + let start = slots[pid.as_usize() * 2]?.get(); + let end = slots[pid.as_usize() * 2 + 1]?.get(); + Some(Match::new(pid, Span { start, end })) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "2010-03-14", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); + /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + caps: &mut Captures, + ) { + self.search(cache, &input.into(), caps) + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(&mut cache, text).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> FindMatches<'r, 'c, 'h> { + let caps = Captures::matches(self.get_nfa().group_info().clone()); + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, cache, caps, it } + } + + /// Returns an iterator over all non-overlapping `Captures` values. If no + /// match exists, then the iterator yields no elements. + /// + /// This yields the same matches as [`PikeVM::find_iter`], but it includes + /// the spans of all capturing groups that participate in each match. + /// + /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for + /// how to correctly iterate over all matches in a haystack while avoiding + /// the creation of a new `Captures` value for every match. (Which you are + /// forced to do with an `Iterator`.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new("foo(?P[0-9]+)")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re + /// .captures_iter(&mut cache, text) + /// // The unwrap is OK since 'numbers' matches if the pattern matches. + /// .map(|caps| caps.get_group_by_name("numbers").unwrap()) + /// .collect(); + /// assert_eq!(matches, vec![ + /// Span::from(3..4), + /// Span::from(8..10), + /// Span::from(14..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> CapturesMatches<'r, 'c, 'h> { + let caps = self.create_captures(); + let it = iter::Searcher::new(input.into()); + CapturesMatches { re: self, cache, caps, it } + } +} + +impl PikeVM { + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// This is like [`PikeVM::captures`], but it accepts a concrete `&Input` + /// instead of an `Into`. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-PikeVM that permits searching + /// for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Anchored, Match, PatternID, Input, + /// }; + /// + /// let re = PikeVM::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.search(&mut cache, &Input::new(haystack), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input}; + /// + /// let re = PikeVM::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// re.search(&mut cache, &Input::new(&haystack[3..6]), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) { + caps.set_pattern(None); + let pid = self.search_slots(cache, input, caps.slots_mut()); + caps.set_pattern(pid); + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided `slots`, and + /// returns the matching pattern ID. The contents of the slots for patterns + /// other than the matching pattern are unspecified. If no match was found, + /// then `None` is returned and the contents of `slots` is unspecified. + /// + /// This is like [`PikeVM::search`], but it accepts a raw slots slice + /// instead of a `Captures` value. This is useful in contexts where you + /// don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with + /// [`pattern_len() * 2`](crate::nfa::thompson::NFA::pattern_len) + /// slots, if you only care about the overall match spans for each matching + /// pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID, Input}; + /// + /// let re = PikeVM::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.search_slots(&mut cache, &input, &mut slots); + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + if !utf8empty { + let hm = self.search_slots_imp(cache, input, slots)?; + return Some(hm.pattern()); + } + // There is an unfortunate special case where if the regex can + // match the empty string and UTF-8 mode is enabled, the search + // implementation requires that the slots have at least as much space + // to report the bounds of any match. This is so zero-width matches + // that split a codepoint can be filtered out. + // + // Note that if utf8empty is true, we specialize the case for when + // the number of patterns is 1. In that case, we can just use a stack + // allocation. Otherwise we resort to a heap allocation, which we + // convince ourselves we're fine with due to the pathological nature of + // this case. + let min = self.get_nfa().group_info().implicit_slot_len(); + if slots.len() >= min { + let hm = self.search_slots_imp(cache, input, slots)?; + return Some(hm.pattern()); + } + if self.get_nfa().pattern_len() == 1 { + let mut enough = [None, None]; + let got = self.search_slots_imp(cache, input, &mut enough); + // This is OK because we know `enough` is strictly bigger than + // `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + return got.map(|hm| hm.pattern()); + } + let mut enough = vec![None; min]; + let got = self.search_slots_imp(cache, input, &mut enough); + // This is OK because we know `enough` is strictly bigger than `slots`, + // otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + got.map(|hm| hm.pattern()) + } + + /// This is the actual implementation of `search_slots_imp` that + /// doesn't account for the special case when 1) the NFA has UTF-8 mode + /// enabled, 2) the NFA can match the empty string and 3) the caller has + /// provided an insufficient number of slots to record match offsets. + #[inline(never)] + fn search_slots_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match self.search_imp(cache, input, slots) { + None => return None, + Some(hm) if !utf8empty => return Some(hm), + Some(hm) => hm, + }; + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + Ok(self + .search_imp(cache, input, slots) + .map(|hm| (hm, hm.offset()))) + }) + // OK because the PikeVM never errors. + .unwrap() + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and this `PikeVM` was configured with [`MatchKind::All`] + /// semantics, then all matching patterns are written to the given set. + /// + /// Unless all of the patterns in this `PikeVM` are anchored, then + /// generally speaking, this will visit every byte in the haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Input, MatchKind, PatternSet, + /// }; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let re = PikeVM::builder() + /// .configure(PikeVM::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(re.pattern_len()); + /// re.which_overlapping_matches(&mut cache, &input, &mut patset); + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.which_overlapping_imp(cache, input, patset) + } +} + +impl PikeVM { + /// The implementation of standard leftmost search. + /// + /// Capturing group spans are written to `slots`, but only if requested. + /// `slots` can be any length. Any slot in the NFA that is activated but + /// which is out of bounds for the given `slots` is ignored. + fn search_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + cache.setup_search(slots.len()); + if input.is_done() { + return None; + } + // Why do we even care about this? Well, in our 'Captures' + // representation, we use usize::MAX as a sentinel to indicate "no + // match." This isn't problematic so long as our haystack doesn't have + // a maximal length. Byte slices are guaranteed by Rust to have a + // length that fits into isize, and so this assert should always pass. + // But we put it here to make our assumption explicit. + assert!( + input.haystack().len() < core::usize::MAX, + "byte slice lengths must be less than usize MAX", + ); + instrument!(|c| c.reset(&self.nfa)); + + // Whether we want to visit all match states instead of emulating the + // 'leftmost' semantics of typical backtracking regex engines. + let allmatches = + self.config.get_match_kind().continue_past_first_match(); + let (anchored, start_id) = match self.start_config(input) { + None => return None, + Some(config) => config, + }; + + let pre = + if anchored { None } else { self.get_config().get_prefilter() }; + let Cache { ref mut stack, ref mut curr, ref mut next } = cache; + let mut hm = None; + // Yes, our search doesn't end at input.end(), but includes it. This + // is necessary because matches are delayed by one byte, just like + // how the DFA engines work. The delay is used to handle look-behind + // assertions. In the case of the PikeVM, the delay is implemented + // by not considering a match to exist until it is visited in + // 'steps'. Technically, we know a match exists in the previous + // iteration via 'epsilon_closure'. (It's the same thing in NFA-to-DFA + // determinization. We don't mark a DFA state as a match state if it + // contains an NFA match state, but rather, whether the DFA state was + // generated by a transition from a DFA state that contains an NFA + // match state.) + let mut at = input.start(); + while at <= input.end() { + // If we have no states left to visit, then there are some cases + // where we know we can quit early or even skip ahead. + if curr.set.is_empty() { + // We have a match and we haven't been instructed to continue + // on even after finding a match, so we can quit. + if hm.is_some() && !allmatches { + break; + } + // If we're running an anchored search and we've advanced + // beyond the start position with no other states to try, then + // we will never observe a match and thus can stop. + if anchored && at > input.start() { + break; + } + // If there no states left to explore at this position and we + // know we can't terminate early, then we are effectively at + // the starting state of the NFA. If we fell through here, + // we'd end up adding our '(?s-u:.)*?' prefix and it would be + // the only thing in 'curr'. So we might as well just skip + // ahead until we find something that we know might advance us + // forward. + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => break, + Some(ref span) => at = span.start, + } + } + } + // Instead of using the NFA's unanchored start state, we actually + // always use its anchored starting state. As a result, when doing + // an unanchored search, we need to simulate our own '(?s-u:.)*?' + // prefix, to permit a match to appear anywhere. + // + // Now, we don't *have* to do things this way. We could use the + // NFA's unanchored starting state and do one 'epsilon_closure' + // call from that starting state before the main loop here. And + // that is just as correct. However, it turns out to be slower + // than our approach here because it slightly increases the cost + // of processing each byte by requiring us to visit more NFA + // states to deal with the additional NFA states in the unanchored + // prefix. By simulating it explicitly here, we lower those costs + // substantially. The cost is itself small, but it adds up for + // large haystacks. + // + // In order to simulate the '(?s-u:.)*?' prefix---which is not + // greedy---we are careful not to perform an epsilon closure on + // the start state if we already have a match. Namely, if we + // did otherwise, we would never reach a terminating condition + // because there would always be additional states to process. + // In effect, the exclusion of running 'epsilon_closure' when + // we have a match corresponds to the "dead" states we have in + // our DFA regex engines. Namely, in a DFA, match states merely + // instruct the search execution to record the current offset as + // the most recently seen match. It is the dead state that actually + // indicates when to stop the search (other than EOF or quit + // states). + // + // However, when 'allmatches' is true, the caller has asked us to + // leave in every possible match state. This tends not to make a + // whole lot of sense in unanchored searches, because it means the + // search really cannot terminate until EOF. And often, in that + // case, you wind up skipping over a bunch of matches and are left + // with the "last" match. Arguably, it just doesn't make a lot of + // sense to run a 'leftmost' search (which is what this routine is) + // with 'allmatches' set to true. But the DFAs support it and this + // matches their behavior. (Generally, 'allmatches' is useful for + // overlapping searches or leftmost anchored searches to find the + // longest possible match by ignoring match priority.) + // + // Additionally, when we're running an anchored search, this + // epsilon closure should only be computed at the beginning of the + // search. If we re-computed it at every position, we would be + // simulating an unanchored search when we were tasked to perform + // an anchored search. + if (!hm.is_some() || allmatches) + && (!anchored || at == input.start()) + { + // Since we are adding to the 'curr' active states and since + // this is for the start ID, we use a slots slice that is + // guaranteed to have the right length but where every element + // is absent. This is exactly what we want, because this + // epsilon closure is responsible for simulating an unanchored + // '(?s:.)*?' prefix. It is specifically outside of any + // capturing groups, and thus, using slots that are always + // absent is correct. + // + // Note though that we can't just use '&mut []' here, since + // this epsilon closure may traverse through 'Captures' epsilon + // transitions, and thus must be able to write offsets to the + // slots given which are later copied to slot values in 'curr'. + let slots = next.slot_table.all_absent(); + self.epsilon_closure(stack, slots, curr, input, at, start_id); + } + if let Some(pid) = self.nexts(stack, curr, next, input, at, slots) + { + hm = Some(HalfMatch::new(pid, at)); + } + // Unless the caller asked us to return early, we need to mush on + // to see if we can extend our match. (But note that 'nexts' will + // quit right after seeing a match when match_kind==LeftmostFirst, + // as is consistent with leftmost-first match priority.) + if input.get_earliest() && hm.is_some() { + break; + } + core::mem::swap(curr, next); + next.set.clear(); + at += 1; + } + instrument!(|c| c.eprint(&self.nfa)); + hm + } + + /// The implementation for the 'which_overlapping_matches' API. Basically, + /// we do a single scan through the entire haystack (unless our regex + /// or search is anchored) and record every pattern that matched. In + /// particular, when MatchKind::All is used, this supports overlapping + /// matches. So if we have the regexes 'sam' and 'samwise', they will + /// *both* be reported in the pattern set when searching the haystack + /// 'samwise'. + fn which_overlapping_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + // NOTE: This is effectively a copy of 'search_imp' above, but with no + // captures support and instead writes patterns that matched directly + // to 'patset'. See that routine for better commentary about what's + // going on in this routine. We probably could unify the routines using + // generics or more helper routines, but I'm not sure it's worth it. + // + // NOTE: We somewhat go out of our way here to support things like + // 'input.get_earliest()' and 'leftmost-first' match semantics. Neither + // of those seem particularly relevant to this routine, but they are + // both supported by the DFA analogs of this routine by construction + // and composition, so it seems like good sense to have the PikeVM + // match that behavior. + + cache.setup_search(0); + if input.is_done() { + return; + } + assert!( + input.haystack().len() < core::usize::MAX, + "byte slice lengths must be less than usize MAX", + ); + instrument!(|c| c.reset(&self.nfa)); + + let allmatches = + self.config.get_match_kind().continue_past_first_match(); + let (anchored, start_id) = match self.start_config(input) { + None => return, + Some(config) => config, + }; + + let Cache { ref mut stack, ref mut curr, ref mut next } = cache; + for at in input.start()..=input.end() { + let any_matches = !patset.is_empty(); + if curr.set.is_empty() { + if any_matches && !allmatches { + break; + } + if anchored && at > input.start() { + break; + } + } + if !any_matches || allmatches { + let slots = &mut []; + self.epsilon_closure(stack, slots, curr, input, at, start_id); + } + self.nexts_overlapping(stack, curr, next, input, at, patset); + // If we found a match and filled our set, then there is no more + // additional info that we can provide. Thus, we can quit. We also + // quit if the caller asked us to stop at the earliest point that + // we know a match exists. + if patset.is_full() || input.get_earliest() { + break; + } + core::mem::swap(curr, next); + next.set.clear(); + } + instrument!(|c| c.eprint(&self.nfa)); + } + + /// Process the active states in 'curr' to find the states (written to + /// 'next') we should process for the next byte in the haystack. + /// + /// 'stack' is used to perform a depth first traversal of the NFA when + /// computing an epsilon closure. + /// + /// When a match is found, the slots for that match state (in 'curr') are + /// copied to 'caps'. Moreover, once a match is seen, processing for 'curr' + /// stops (unless the PikeVM was configured with MatchKind::All semantics). + #[cfg_attr(feature = "perf-inline", inline(always))] + fn nexts( + &self, + stack: &mut Vec, + curr: &mut ActiveStates, + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + slots: &mut [Option], + ) -> Option { + instrument!(|c| c.record_state_set(&curr.set)); + let mut pid = None; + let ActiveStates { ref set, ref mut slot_table } = *curr; + for sid in set.iter() { + pid = match self.next(stack, slot_table, next, input, at, sid) { + None => continue, + Some(pid) => Some(pid), + }; + slots.copy_from_slice(slot_table.for_state(sid)); + if !self.config.get_match_kind().continue_past_first_match() { + break; + } + } + pid + } + + /// Like 'nexts', but for the overlapping case. This doesn't write any + /// slots, and instead just writes which pattern matched in 'patset'. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn nexts_overlapping( + &self, + stack: &mut Vec, + curr: &mut ActiveStates, + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + patset: &mut PatternSet, + ) { + instrument!(|c| c.record_state_set(&curr.set)); + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let ActiveStates { ref set, ref mut slot_table } = *curr; + for sid in set.iter() { + let pid = match self.next(stack, slot_table, next, input, at, sid) + { + None => continue, + Some(pid) => pid, + }; + // This handles the case of finding a zero-width match that splits + // a codepoint. Namely, if we're in UTF-8 mode AND we know we can + // match the empty string, then the only valid way of getting to + // this point with an offset that splits a codepoint is when we + // have an empty match. Such matches, in UTF-8 mode, must not be + // reported. So we just skip them here and pretend as if we did + // not see a match. + if utf8empty && !input.is_char_boundary(at) { + continue; + } + let _ = patset.try_insert(pid); + if !self.config.get_match_kind().continue_past_first_match() { + break; + } + } + } + + /// Starting from 'sid', if the position 'at' in the 'input' haystack has a + /// transition defined out of 'sid', then add the state transitioned to and + /// its epsilon closure to the 'next' set of states to explore. + /// + /// 'stack' is used by the epsilon closure computation to perform a depth + /// first traversal of the NFA. + /// + /// 'curr_slot_table' should be the table of slots for the current set of + /// states being explored. If there is a transition out of 'sid', then + /// sid's row in the slot table is used to perform the epsilon closure. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next( + &self, + stack: &mut Vec, + curr_slot_table: &mut SlotTable, + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + sid: StateID, + ) -> Option { + instrument!(|c| c.record_step(sid)); + match *self.nfa.state(sid) { + State::Fail + | State::Look { .. } + | State::Union { .. } + | State::BinaryUnion { .. } + | State::Capture { .. } => None, + State::ByteRange { ref trans } => { + if trans.matches(input.haystack(), at) { + let slots = curr_slot_table.for_state(sid); + // OK because 'at <= haystack.len() < usize::MAX', so + // adding 1 will never wrap. + let at = at.wrapping_add(1); + self.epsilon_closure( + stack, slots, next, input, at, trans.next, + ); + } + None + } + State::Sparse(ref sparse) => { + if let Some(next_sid) = sparse.matches(input.haystack(), at) { + let slots = curr_slot_table.for_state(sid); + // OK because 'at <= haystack.len() < usize::MAX', so + // adding 1 will never wrap. + let at = at.wrapping_add(1); + self.epsilon_closure( + stack, slots, next, input, at, next_sid, + ); + } + None + } + State::Dense(ref dense) => { + if let Some(next_sid) = dense.matches(input.haystack(), at) { + let slots = curr_slot_table.for_state(sid); + // OK because 'at <= haystack.len() < usize::MAX', so + // adding 1 will never wrap. + let at = at.wrapping_add(1); + self.epsilon_closure( + stack, slots, next, input, at, next_sid, + ); + } + None + } + State::Match { pattern_id } => Some(pattern_id), + } + } + + /// Compute the epsilon closure of 'sid', writing the closure into 'next' + /// while copying slot values from 'curr_slots' into corresponding states + /// in 'next'. 'curr_slots' should be the slot values corresponding to + /// 'sid'. + /// + /// The given 'stack' is used to perform a depth first traversal of the + /// NFA by recursively following all epsilon transitions out of 'sid'. + /// Conditional epsilon transitions are followed if and only if they are + /// satisfied for the position 'at' in the 'input' haystack. + /// + /// While this routine may write to 'curr_slots', once it returns, any + /// writes are undone and the original values (even if absent) are + /// restored. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn epsilon_closure( + &self, + stack: &mut Vec, + curr_slots: &mut [Option], + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + sid: StateID, + ) { + instrument!(|c| { + c.record_closure(sid); + c.record_stack_push(sid); + }); + stack.push(FollowEpsilon::Explore(sid)); + while let Some(frame) = stack.pop() { + match frame { + FollowEpsilon::RestoreCapture { slot, offset: pos } => { + curr_slots[slot] = pos; + } + FollowEpsilon::Explore(sid) => { + self.epsilon_closure_explore( + stack, curr_slots, next, input, at, sid, + ); + } + } + } + } + + /// Explore all of the epsilon transitions out of 'sid'. This is mostly + /// split out from 'epsilon_closure' in order to clearly delineate + /// the actual work of computing an epsilon closure from the stack + /// book-keeping. + /// + /// This will push any additional explorations needed on to 'stack'. + /// + /// 'curr_slots' should refer to the slots for the currently active NFA + /// state. That is, the current state we are stepping through. These + /// slots are mutated in place as new 'Captures' states are traversed + /// during epsilon closure, but the slots are restored to their original + /// values once the full epsilon closure is completed. The ultimate use of + /// 'curr_slots' is to copy them to the corresponding 'next_slots', so that + /// the capturing group spans are forwarded from the currently active state + /// to the next. + /// + /// 'next' refers to the next set of active states. Computing an epsilon + /// closure may increase the next set of active states. + /// + /// 'input' refers to the caller's input configuration and 'at' refers to + /// the current position in the haystack. These are used to check whether + /// conditional epsilon transitions (like look-around) are satisfied at + /// the current position. If they aren't, then the epsilon closure won't + /// include them. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn epsilon_closure_explore( + &self, + stack: &mut Vec, + curr_slots: &mut [Option], + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + mut sid: StateID, + ) { + // We can avoid pushing some state IDs on to our stack in precisely + // the cases where a 'push(x)' would be immediately followed by a 'x + // = pop()'. This is achieved by this outer-loop. We simply set 'sid' + // to be the next state ID we want to explore once we're done with + // our initial exploration. In practice, this avoids a lot of stack + // thrashing. + loop { + instrument!(|c| c.record_set_insert(sid)); + // Record this state as part of our next set of active states. If + // we've already explored it, then no need to do it again. + if !next.set.insert(sid) { + return; + } + match *self.nfa.state(sid) { + State::Fail + | State::Match { .. } + | State::ByteRange { .. } + | State::Sparse { .. } + | State::Dense { .. } => { + next.slot_table.for_state(sid).copy_from_slice(curr_slots); + return; + } + State::Look { look, next } => { + // OK because we don't permit building a searcher with a + // Unicode word boundary if the requisite Unicode data is + // unavailable. + if !self.nfa.look_matcher().matches_inline( + look, + input.haystack(), + at, + ) { + return; + } + sid = next; + } + State::Union { ref alternates } => { + sid = match alternates.get(0) { + None => return, + Some(&sid) => sid, + }; + instrument!(|c| { + for &alt in &alternates[1..] { + c.record_stack_push(alt); + } + }); + stack.extend( + alternates[1..] + .iter() + .copied() + .rev() + .map(FollowEpsilon::Explore), + ); + } + State::BinaryUnion { alt1, alt2 } => { + sid = alt1; + instrument!(|c| c.record_stack_push(sid)); + stack.push(FollowEpsilon::Explore(alt2)); + } + State::Capture { next, slot, .. } => { + // There's no need to do anything with slots that + // ultimately won't be copied into the caller-provided + // 'Captures' value. So we just skip dealing with them at + // all. + if slot.as_usize() < curr_slots.len() { + instrument!(|c| c.record_stack_push(sid)); + stack.push(FollowEpsilon::RestoreCapture { + slot, + offset: curr_slots[slot], + }); + // OK because length of a slice must fit into an isize. + curr_slots[slot] = Some(NonMaxUsize::new(at).unwrap()); + } + sid = next; + } + } + } + } + + /// Return the starting configuration of a PikeVM search. + /// + /// The "start config" is basically whether the search should be anchored + /// or not and the NFA state ID at which to begin the search. The state ID + /// returned always corresponds to an anchored starting state even when the + /// search is unanchored. This is because the PikeVM search loop deals with + /// unanchored searches with an explicit epsilon closure out of the start + /// state. + /// + /// This routine accounts for both the caller's `Input` configuration + /// and the pattern itself. For example, even if the caller asks for an + /// unanchored search, if the pattern itself is anchored, then this will + /// always return 'true' because implementing an unanchored search in that + /// case would be incorrect. + /// + /// Similarly, if the caller requests an anchored search for a particular + /// pattern, then the starting state ID returned will reflect that. + /// + /// If a pattern ID is given in the input configuration that is not in + /// this regex, then `None` is returned. + fn start_config(&self, input: &Input<'_>) -> Option<(bool, StateID)> { + match input.get_anchored() { + // Only way we're unanchored is if both the caller asked for an + // unanchored search *and* the pattern is itself not anchored. + Anchored::No => Some(( + self.nfa.is_always_start_anchored(), + self.nfa.start_anchored(), + )), + Anchored::Yes => Some((true, self.nfa.start_anchored())), + Anchored::Pattern(pid) => { + Some((true, self.nfa.start_pattern(pid)?)) + } + } + } +} + +/// An iterator over all non-overlapping matches for a particular search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the PikeVM. +/// * `'c` represents the lifetime of the PikeVM's cache. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`PikeVM::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'c, 'h> { + re: &'r PikeVM, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + // Splitting 'self' apart seems necessary to appease borrowck. + let FindMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + // 'advance' converts errors into panics, which is OK here because + // the PikeVM can never return an error. + it.advance(|input| { + re.search(cache, input, caps); + Ok(caps.get_match()) + }) + } +} + +/// An iterator over all non-overlapping leftmost matches, with their capturing +/// groups, for a particular search. +/// +/// The iterator yields a [`Captures`] value until no more matches could be +/// found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the PikeVM. +/// * `'c` represents the lifetime of the PikeVM's cache. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`PikeVM::captures_iter`] method. +#[derive(Debug)] +pub struct CapturesMatches<'r, 'c, 'h> { + re: &'r PikeVM, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for CapturesMatches<'r, 'c, 'h> { + type Item = Captures; + + #[inline] + fn next(&mut self) -> Option { + // Splitting 'self' apart seems necessary to appease borrowck. + let CapturesMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + // 'advance' converts errors into panics, which is OK here because + // the PikeVM can never return an error. + it.advance(|input| { + re.search(cache, input, caps); + Ok(caps.get_match()) + }); + if caps.is_match() { + Some(caps.clone()) + } else { + None + } + } +} + +/// A cache represents mutable state that a [`PikeVM`] requires during a +/// search. +/// +/// For a given [`PikeVM`], its corresponding cache may be created either via +/// [`PikeVM::create_cache`], or via [`Cache::new`]. They are equivalent in +/// every way, except the former does not require explicitly importing `Cache`. +/// +/// A particular `Cache` is coupled with the [`PikeVM`] from which it +/// was created. It may only be used with that `PikeVM`. A cache and its +/// allocations may be re-purposed via [`Cache::reset`], in which case, it can +/// only be used with the new `PikeVM` (and not the old one). +#[derive(Clone, Debug)] +pub struct Cache { + /// Stack used while computing epsilon closure. This effectively lets us + /// move what is more naturally expressed through recursion to a stack + /// on the heap. + stack: Vec, + /// The current active states being explored for the current byte in the + /// haystack. + curr: ActiveStates, + /// The next set of states we're building that will be explored for the + /// next byte in the haystack. + next: ActiveStates, +} + +impl Cache { + /// Create a new [`PikeVM`] cache. + /// + /// A potentially more convenient routine to create a cache is + /// [`PikeVM::create_cache`], as it does not require also importing the + /// `Cache` type. + /// + /// If you want to reuse the returned `Cache` with some other `PikeVM`, + /// then you must call [`Cache::reset`] with the desired `PikeVM`. + pub fn new(re: &PikeVM) -> Cache { + Cache { + stack: vec![], + curr: ActiveStates::new(re), + next: ActiveStates::new(re), + } + } + + /// Reset this cache such that it can be used for searching with a + /// different [`PikeVM`]. + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `PikeVM`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `PikeVM`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re1 = PikeVM::new(r"\w")?; + /// let re2 = PikeVM::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the PikeVM we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &PikeVM) { + self.curr.reset(re); + self.next.reset(re); + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + use core::mem::size_of; + (self.stack.len() * size_of::()) + + self.curr.memory_usage() + + self.next.memory_usage() + } + + /// Clears this cache. This should be called at the start of every search + /// to ensure we start with a clean slate. + /// + /// This also sets the length of the capturing groups used in the current + /// search. This permits an optimization where by 'SlotTable::for_state' + /// only returns the number of slots equivalent to the number of slots + /// given in the 'Captures' value. This may be less than the total number + /// of possible slots, e.g., when one only wants to track overall match + /// offsets. This in turn permits less copying of capturing group spans + /// in the PikeVM. + fn setup_search(&mut self, captures_slot_len: usize) { + self.stack.clear(); + self.curr.setup_search(captures_slot_len); + self.next.setup_search(captures_slot_len); + } +} + +/// A set of active states used to "simulate" the execution of an NFA via the +/// PikeVM. +/// +/// There are two sets of these used during NFA simulation. One set corresponds +/// to the "current" set of states being traversed for the current position +/// in a haystack. The other set corresponds to the "next" set of states being +/// built, which will become the new "current" set for the next position in the +/// haystack. These two sets correspond to CLIST and NLIST in Thompson's +/// original paper regexes: https://dl.acm.org/doi/pdf/10.1145/363347.363387 +/// +/// In addition to representing a set of NFA states, this also maintains slot +/// values for each state. These slot values are what turn the NFA simulation +/// into the "Pike VM." Namely, they track capturing group values for each +/// state. During the computation of epsilon closure, we copy slot values from +/// states in the "current" set to the "next" set. Eventually, once a match +/// is found, the slot values for that match state are what we write to the +/// caller provided 'Captures' value. +#[derive(Clone, Debug)] +struct ActiveStates { + /// The set of active NFA states. This set preserves insertion order, which + /// is critical for simulating the match semantics of backtracking regex + /// engines. + set: SparseSet, + /// The slots for every NFA state, where each slot stores a (possibly + /// absent) offset. Every capturing group has two slots. One for a start + /// offset and one for an end offset. + slot_table: SlotTable, +} + +impl ActiveStates { + /// Create a new set of active states for the given PikeVM. The active + /// states returned may only be used with the given PikeVM. (Use 'reset' + /// to re-purpose the allocation for a different PikeVM.) + fn new(re: &PikeVM) -> ActiveStates { + let mut active = ActiveStates { + set: SparseSet::new(0), + slot_table: SlotTable::new(), + }; + active.reset(re); + active + } + + /// Reset this set of active states such that it can be used with the given + /// PikeVM (and only that PikeVM). + fn reset(&mut self, re: &PikeVM) { + self.set.resize(re.get_nfa().states().len()); + self.slot_table.reset(re); + } + + /// Return the heap memory usage, in bytes, used by this set of active + /// states. + /// + /// This does not include the stack size of this value. + fn memory_usage(&self) -> usize { + self.set.memory_usage() + self.slot_table.memory_usage() + } + + /// Setup this set of active states for a new search. The given slot + /// length should be the number of slots in a caller provided 'Captures' + /// (and may be zero). + fn setup_search(&mut self, captures_slot_len: usize) { + self.set.clear(); + self.slot_table.setup_search(captures_slot_len); + } +} + +/// A table of slots, where each row represent a state in an NFA. Thus, the +/// table has room for storing slots for every single state in an NFA. +/// +/// This table is represented with a single contiguous allocation. In general, +/// the notion of "capturing group" doesn't really exist at this level of +/// abstraction, hence the name "slot" instead. (Indeed, every capturing group +/// maps to a pair of slots, one for the start offset and one for the end +/// offset.) Slots are indexed by the 'Captures' NFA state. +/// +/// N.B. Not every state actually needs a row of slots. Namely, states that +/// only have epsilon transitions currently never have anything written to +/// their rows in this table. Thus, the table is somewhat wasteful in its heap +/// usage. However, it is important to maintain fast random access by state +/// ID, which means one giant table tends to work well. RE2 takes a different +/// approach here and allocates each row as its own reference counted thing. +/// I explored such a strategy at one point here, but couldn't get it to work +/// well using entirely safe code. (To the ambitious reader: I encourage you to +/// re-litigate that experiment.) I very much wanted to stick to safe code, but +/// could be convinced otherwise if there was a solid argument and the safety +/// was encapsulated well. +#[derive(Clone, Debug)] +struct SlotTable { + /// The actual table of offsets. + table: Vec>, + /// The number of slots per state, i.e., the table's stride or the length + /// of each row. + slots_per_state: usize, + /// The number of slots in the caller-provided 'Captures' value for the + /// current search. Setting this to 'slots_per_state' is always correct, + /// but may be wasteful. + slots_for_captures: usize, +} + +impl SlotTable { + /// Create a new slot table. + /// + /// One should call 'reset' with the corresponding PikeVM before use. + fn new() -> SlotTable { + SlotTable { table: vec![], slots_for_captures: 0, slots_per_state: 0 } + } + + /// Reset this slot table such that it can be used with the given PikeVM + /// (and only that PikeVM). + fn reset(&mut self, re: &PikeVM) { + let nfa = re.get_nfa(); + self.slots_per_state = nfa.group_info().slot_len(); + // This is always correct, but may be reduced for a particular search + // if a 'Captures' has fewer slots, e.g., none at all or only slots + // for tracking the overall match instead of all slots for every + // group. + self.slots_for_captures = core::cmp::max( + self.slots_per_state, + nfa.pattern_len().checked_mul(2).unwrap(), + ); + let len = nfa + .states() + .len() + .checked_mul(self.slots_per_state) + // Add space to account for scratch space used during a search. + .and_then(|x| x.checked_add(self.slots_for_captures)) + // It seems like this could actually panic on legitimate inputs on + // 32-bit targets, and very likely to panic on 16-bit. Should we + // somehow convert this to an error? What about something similar + // for the lazy DFA cache? If you're tripping this assert, please + // file a bug. + .expect("slot table length doesn't overflow"); + // This happens about as often as a regex is compiled, so it probably + // should be at debug level, but I found it quite distracting and not + // particularly useful. + trace!( + "resizing PikeVM active states table to {} entries \ + (slots_per_state={})", + len, + self.slots_per_state, + ); + self.table.resize(len, None); + } + + /// Return the heap memory usage, in bytes, used by this slot table. + /// + /// This does not include the stack size of this value. + fn memory_usage(&self) -> usize { + self.table.len() * core::mem::size_of::>() + } + + /// Perform any per-search setup for this slot table. + /// + /// In particular, this sets the length of the number of slots used in the + /// 'Captures' given by the caller (if any at all). This number may be + /// smaller than the total number of slots available, e.g., when the caller + /// is only interested in tracking the overall match and not the spans of + /// every matching capturing group. Only tracking the overall match can + /// save a substantial amount of time copying capturing spans during a + /// search. + fn setup_search(&mut self, captures_slot_len: usize) { + self.slots_for_captures = captures_slot_len; + } + + /// Return a mutable slice of the slots for the given state. + /// + /// Note that the length of the slice returned may be less than the total + /// number of slots available for this state. In particular, the length + /// always matches the number of slots indicated via 'setup_search'. + fn for_state(&mut self, sid: StateID) -> &mut [Option] { + let i = sid.as_usize() * self.slots_per_state; + &mut self.table[i..i + self.slots_for_captures] + } + + /// Return a slice of slots of appropriate length where every slot offset + /// is guaranteed to be absent. This is useful in cases where you need to + /// compute an epsilon closure outside of the user supplied regex, and thus + /// never want it to have any capturing slots set. + fn all_absent(&mut self) -> &mut [Option] { + let i = self.table.len() - self.slots_for_captures; + &mut self.table[i..i + self.slots_for_captures] + } +} + +/// Represents a stack frame for use while computing an epsilon closure. +/// +/// (An "epsilon closure" refers to the set of reachable NFA states from a +/// single state without consuming any input. That is, the set of all epsilon +/// transitions not only from that single state, but from every other state +/// reachable by an epsilon transition as well. This is why it's called a +/// "closure." Computing an epsilon closure is also done during DFA +/// determinization! Compare and contrast the epsilon closure here in this +/// PikeVM and the one used for determinization in crate::util::determinize.) +/// +/// Computing the epsilon closure in a Thompson NFA proceeds via a depth +/// first traversal over all epsilon transitions from a particular state. +/// (A depth first traversal is important because it emulates the same priority +/// of matches that is typically found in backtracking regex engines.) This +/// depth first traversal is naturally expressed using recursion, but to avoid +/// a call stack size proportional to the size of a regex, we put our stack on +/// the heap instead. +/// +/// This stack thus consists of call frames. The typical call frame is +/// `Explore`, which instructs epsilon closure to explore the epsilon +/// transitions from that state. (Subsequent epsilon transitions are then +/// pushed on to the stack as more `Explore` frames.) If the state ID being +/// explored has no epsilon transitions, then the capturing group slots are +/// copied from the original state that sparked the epsilon closure (from the +/// 'step' routine) to the state ID being explored. This way, capturing group +/// slots are forwarded from the previous state to the next. +/// +/// The other stack frame, `RestoreCaptures`, instructs the epsilon closure to +/// set the position for a particular slot back to some particular offset. This +/// frame is pushed when `Explore` sees a `Capture` transition. `Explore` will +/// set the offset of the slot indicated in `Capture` to the current offset, +/// and then push the old offset on to the stack as a `RestoreCapture` frame. +/// Thus, the new offset is only used until the epsilon closure reverts back to +/// the `RestoreCapture` frame. In effect, this gives the `Capture` epsilon +/// transition its "scope" to only states that come "after" it during depth +/// first traversal. +#[derive(Clone, Debug)] +enum FollowEpsilon { + /// Explore the epsilon transitions from a state ID. + Explore(StateID), + /// Reset the given `slot` to the given `offset` (which might be `None`). + RestoreCapture { slot: SmallIndex, offset: Option }, +} + +/// A set of counters that "instruments" a PikeVM search. To enable this, you +/// must enable the 'internal-instrument-pikevm' feature. Then run your Rust +/// program with RUST_LOG=regex_automata::nfa::thompson::pikevm=trace set in +/// the environment. The metrics collected will be dumped automatically for +/// every search executed by the PikeVM. +/// +/// NOTE: When 'internal-instrument-pikevm' is enabled, it will likely cause an +/// absolute decrease in wall-clock performance, even if the 'trace' log level +/// isn't enabled. (Although, we do try to avoid extra costs when 'trace' isn't +/// enabled.) The main point of instrumentation is to get counts of various +/// events that occur during the PikeVM's execution. +/// +/// This is a somewhat hacked together collection of metrics that are useful +/// to gather from a PikeVM search. In particular, it lets us scrutinize the +/// performance profile of a search beyond what general purpose profiling tools +/// give us. Namely, we orient the profiling data around the specific states of +/// the NFA. +/// +/// In other words, this lets us see which parts of the NFA graph are most +/// frequently activated. This then provides direction for optimization +/// opportunities. +/// +/// The really sad part about this is that it absolutely clutters up the PikeVM +/// implementation. :'( Another approach would be to just manually add this +/// code in whenever I want this kind of profiling data, but it's complicated +/// and tedious enough that I went with this approach... for now. +/// +/// When instrumentation is enabled (which also turns on 'logging'), then a +/// `Counters` is initialized for every search and `trace`'d just before the +/// search returns to the caller. +/// +/// Tip: When debugging performance problems with the PikeVM, it's best to try +/// to work with an NFA that is as small as possible. Otherwise the state graph +/// is likely to be too big to digest. +#[cfg(feature = "internal-instrument-pikevm")] +#[derive(Clone, Debug)] +struct Counters { + /// The number of times the NFA is in a particular permutation of states. + state_sets: alloc::collections::BTreeMap, u64>, + /// The number of times 'step' is called for a particular state ID (which + /// indexes this array). + steps: Vec, + /// The number of times an epsilon closure was computed for a state. + closures: Vec, + /// The number of times a particular state ID is pushed on to a stack while + /// computing an epsilon closure. + stack_pushes: Vec, + /// The number of times a particular state ID is inserted into a sparse set + /// while computing an epsilon closure. + set_inserts: Vec, +} + +#[cfg(feature = "internal-instrument-pikevm")] +impl Counters { + fn empty() -> Counters { + Counters { + state_sets: alloc::collections::BTreeMap::new(), + steps: vec![], + closures: vec![], + stack_pushes: vec![], + set_inserts: vec![], + } + } + + fn reset(&mut self, nfa: &NFA) { + let len = nfa.states().len(); + + self.state_sets.clear(); + + self.steps.clear(); + self.steps.resize(len, 0); + + self.closures.clear(); + self.closures.resize(len, 0); + + self.stack_pushes.clear(); + self.stack_pushes.resize(len, 0); + + self.set_inserts.clear(); + self.set_inserts.resize(len, 0); + } + + fn eprint(&self, nfa: &NFA) { + trace!("===== START PikeVM Instrumentation Output ====="); + // We take the top-K most occurring state sets. Otherwise the output + // is likely to be overwhelming. And we probably only care about the + // most frequently occurring ones anyway. + const LIMIT: usize = 20; + let mut set_counts = + self.state_sets.iter().collect::, &u64)>>(); + set_counts.sort_by_key(|(_, &count)| core::cmp::Reverse(count)); + trace!("## PikeVM frequency of state sets (top {})", LIMIT); + for (set, count) in set_counts.iter().take(LIMIT) { + trace!("{:?}: {}", set, count); + } + if set_counts.len() > LIMIT { + trace!( + "... {} sets omitted (out of {} total)", + set_counts.len() - LIMIT, + set_counts.len(), + ); + } + + trace!(""); + trace!("## PikeVM total frequency of events"); + trace!( + "steps: {}, closures: {}, stack-pushes: {}, set-inserts: {}", + self.steps.iter().copied().sum::(), + self.closures.iter().copied().sum::(), + self.stack_pushes.iter().copied().sum::(), + self.set_inserts.iter().copied().sum::(), + ); + + trace!(""); + trace!("## PikeVM frequency of events broken down by state"); + for sid in 0..self.steps.len() { + trace!( + "{:06}: steps: {}, closures: {}, \ + stack-pushes: {}, set-inserts: {}", + sid, + self.steps[sid], + self.closures[sid], + self.stack_pushes[sid], + self.set_inserts[sid], + ); + } + + trace!(""); + trace!("## NFA debug display"); + trace!("{:?}", nfa); + trace!("===== END PikeVM Instrumentation Output ====="); + } + + fn record_state_set(&mut self, set: &SparseSet) { + let set = set.iter().collect::>(); + *self.state_sets.entry(set).or_insert(0) += 1; + } + + fn record_step(&mut self, sid: StateID) { + self.steps[sid] += 1; + } + + fn record_closure(&mut self, sid: StateID) { + self.closures[sid] += 1; + } + + fn record_stack_push(&mut self, sid: StateID) { + self.stack_pushes[sid] += 1; + } + + fn record_set_insert(&mut self, sid: StateID) { + self.set_inserts[sid] += 1; + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/range_trie.rs b/vendor/regex-automata/src/nfa/thompson/range_trie.rs new file mode 100644 index 0000000..75c9b79 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/range_trie.rs @@ -0,0 +1,1055 @@ +/* +I've called the primary data structure in this module a "range trie." As far +as I can tell, there is no prior art on a data structure like this, however, +it's likely someone somewhere has built something like it. Searching for +"range trie" turns up the paper "Range Tries for Scalable Address Lookup," +but it does not appear relevant. + +The range trie is just like a trie in that it is a special case of a +deterministic finite state machine. It has states and each state has a set +of transitions to other states. It is acyclic, and, like a normal trie, +it makes no attempt to reuse common suffixes among its elements. The key +difference between a normal trie and a range trie below is that a range trie +operates on *contiguous sequences* of bytes instead of singleton bytes. +One could say say that our alphabet is ranges of bytes instead of bytes +themselves, except a key part of range trie construction is splitting ranges +apart to ensure there is at most one transition that can be taken for any +byte in a given state. + +I've tried to explain the details of how the range trie works below, so +for now, we are left with trying to understand what problem we're trying to +solve. Which is itself fairly involved! + +At the highest level, here's what we want to do. We want to convert a +sequence of Unicode codepoints into a finite state machine whose transitions +are over *bytes* and *not* Unicode codepoints. We want this because it makes +said finite state machines much smaller and much faster to execute. As a +simple example, consider a byte oriented automaton for all Unicode scalar +values (0x00 through 0x10FFFF, not including surrogate codepoints): + + [00-7F] + [C2-DF][80-BF] + [E0-E0][A0-BF][80-BF] + [E1-EC][80-BF][80-BF] + [ED-ED][80-9F][80-BF] + [EE-EF][80-BF][80-BF] + [F0-F0][90-BF][80-BF][80-BF] + [F1-F3][80-BF][80-BF][80-BF] + [F4-F4][80-8F][80-BF][80-BF] + +(These byte ranges are generated via the regex-syntax::utf8 module, which +was based on Russ Cox's code in RE2, which was in turn based on Ken +Thompson's implementation of the same idea in his Plan9 implementation of +grep.) + +It should be fairly straight-forward to see how one could compile this into +a DFA. The sequences are sorted and non-overlapping. Essentially, you could +build a trie from this fairly easy. The problem comes when your initial +range (in this case, 0x00-0x10FFFF) isn't so nice. For example, the class +represented by '\w' contains only a tenth of the codepoints that +0x00-0x10FFFF contains, but if we were to write out the byte based ranges +as we did above, the list would stretch to 892 entries! This turns into +quite a large NFA with a few thousand states. Turning this beast into a DFA +takes quite a bit of time. We are thus left with trying to trim down the +number of states we produce as early as possible. + +One approach (used by RE2 and still by the regex crate, at time of writing) +is to try to find common suffixes while building NFA states for the above +and reuse them. This is very cheap to do and one can control precisely how +much extra memory you want to use for the cache. + +Another approach, however, is to reuse an algorithm for constructing a +*minimal* DFA from a sorted sequence of inputs. I don't want to go into +the full details here, but I explain it in more depth in my blog post on +FSTs[1]. Note that the algorithm was not invented by me, but was published +in paper by Daciuk et al. in 2000 called "Incremental Construction of +MinimalAcyclic Finite-State Automata." Like the suffix cache approach above, +it is also possible to control the amount of extra memory one uses, although +this usually comes with the cost of sacrificing true minimality. (But it's +typically close enough with a reasonably sized cache of states.) + +The catch is that Daciuk's algorithm only works if you add your keys in +lexicographic ascending order. In our case, since we're dealing with ranges, +we also need the additional requirement that ranges are either equivalent +or do not overlap at all. For example, if one were given the following byte +ranges: + + [BC-BF][80-BF] + [BC-BF][90-BF] + +Then Daciuk's algorithm would not work, since there is nothing to handle the +fact that the ranges overlap. They would need to be split apart. Thankfully, +Thompson's algorithm for producing byte ranges for Unicode codepoint ranges +meets both of our requirements. (A proof for this eludes me, but it appears +true.) + +... however, we would also like to be able to compile UTF-8 automata in +reverse. We want this because in order to find the starting location of a +match using a DFA, we need to run a second DFA---a reversed version of the +forward DFA---backwards to discover the match location. Unfortunately, if +we reverse our byte sequences for 0x00-0x10FFFF, we get sequences that are +can overlap, even if they are sorted: + + [00-7F] + [80-BF][80-9F][ED-ED] + [80-BF][80-BF][80-8F][F4-F4] + [80-BF][80-BF][80-BF][F1-F3] + [80-BF][80-BF][90-BF][F0-F0] + [80-BF][80-BF][E1-EC] + [80-BF][80-BF][EE-EF] + [80-BF][A0-BF][E0-E0] + [80-BF][C2-DF] + +For example, '[80-BF][80-BF][EE-EF]' and '[80-BF][A0-BF][E0-E0]' have +overlapping ranges between '[80-BF]' and '[A0-BF]'. Thus, there is no +simple way to apply Daciuk's algorithm. + +And thus, the range trie was born. The range trie's only purpose is to take +sequences of byte ranges like the ones above, collect them into a trie and then +spit them out in a sorted fashion with no overlapping ranges. For example, +0x00-0x10FFFF gets translated to: + + [0-7F] + [80-BF][80-9F][80-8F][F1-F3] + [80-BF][80-9F][80-8F][F4] + [80-BF][80-9F][90-BF][F0] + [80-BF][80-9F][90-BF][F1-F3] + [80-BF][80-9F][E1-EC] + [80-BF][80-9F][ED] + [80-BF][80-9F][EE-EF] + [80-BF][A0-BF][80-8F][F1-F3] + [80-BF][A0-BF][80-8F][F4] + [80-BF][A0-BF][90-BF][F0] + [80-BF][A0-BF][90-BF][F1-F3] + [80-BF][A0-BF][E0] + [80-BF][A0-BF][E1-EC] + [80-BF][A0-BF][EE-EF] + [80-BF][C2-DF] + +We've thus satisfied our requirements for running Daciuk's algorithm. All +sequences of ranges are sorted, and any corresponding ranges are either +exactly equivalent or non-overlapping. + +In effect, a range trie is building a DFA from a sequence of arbitrary byte +ranges. But it uses an algorithm custom tailored to its input, so it is not as +costly as traditional DFA construction. While it is still quite a bit more +costly than the forward case (which only needs Daciuk's algorithm), it winds +up saving a substantial amount of time if one is doing a full DFA powerset +construction later by virtue of producing a much much smaller NFA. + +[1] - https://blog.burntsushi.net/transducers/ +[2] - https://www.mitpressjournals.org/doi/pdfplus/10.1162/089120100561601 +*/ + +use core::{cell::RefCell, convert::TryFrom, fmt, mem, ops::RangeInclusive}; + +use alloc::{format, string::String, vec, vec::Vec}; + +use regex_syntax::utf8::Utf8Range; + +use crate::util::primitives::StateID; + +/// There is only one final state in this trie. Every sequence of byte ranges +/// added shares the same final state. +const FINAL: StateID = StateID::ZERO; + +/// The root state of the trie. +const ROOT: StateID = StateID::new_unchecked(1); + +/// A range trie represents an ordered set of sequences of bytes. +/// +/// A range trie accepts as input a sequence of byte ranges and merges +/// them into the existing set such that the trie can produce a sorted +/// non-overlapping sequence of byte ranges. The sequence emitted corresponds +/// precisely to the sequence of bytes matched by the given keys, although the +/// byte ranges themselves may be split at different boundaries. +/// +/// The order complexity of this data structure seems difficult to analyze. +/// If the size of a byte is held as a constant, then insertion is clearly +/// O(n) where n is the number of byte ranges in the input key. However, if +/// k=256 is our alphabet size, then insertion could be O(k^2 * n). In +/// particular it seems possible for pathological inputs to cause insertion +/// to do a lot of work. However, for what we use this data structure for, +/// there should be no pathological inputs since the ultimate source is always +/// a sorted set of Unicode scalar value ranges. +/// +/// Internally, this trie is setup like a finite state machine. Note though +/// that it is acyclic. +#[derive(Clone)] +pub struct RangeTrie { + /// The states in this trie. The first is always the shared final state. + /// The second is always the root state. Otherwise, there is no + /// particular order. + states: Vec, + /// A free-list of states. When a range trie is cleared, all of its states + /// are added to this list. Creating a new state reuses states from this + /// list before allocating a new one. + free: Vec, + /// A stack for traversing this trie to yield sequences of byte ranges in + /// lexicographic order. + iter_stack: RefCell>, + /// A buffer that stores the current sequence during iteration. + iter_ranges: RefCell>, + /// A stack used for traversing the trie in order to (deeply) duplicate + /// a state. States are recursively duplicated when ranges are split. + dupe_stack: Vec, + /// A stack used for traversing the trie during insertion of a new + /// sequence of byte ranges. + insert_stack: Vec, +} + +/// A single state in this trie. +#[derive(Clone)] +struct State { + /// A sorted sequence of non-overlapping transitions to other states. Each + /// transition corresponds to a single range of bytes. + transitions: Vec, +} + +/// A transition is a single range of bytes. If a particular byte is in this +/// range, then the corresponding machine may transition to the state pointed +/// to by `next_id`. +#[derive(Clone)] +struct Transition { + /// The byte range. + range: Utf8Range, + /// The next state to transition to. + next_id: StateID, +} + +impl RangeTrie { + /// Create a new empty range trie. + pub fn new() -> RangeTrie { + let mut trie = RangeTrie { + states: vec![], + free: vec![], + iter_stack: RefCell::new(vec![]), + iter_ranges: RefCell::new(vec![]), + dupe_stack: vec![], + insert_stack: vec![], + }; + trie.clear(); + trie + } + + /// Clear this range trie such that it is empty. Clearing a range trie + /// and reusing it can beneficial because this may reuse allocations. + pub fn clear(&mut self) { + self.free.extend(self.states.drain(..)); + self.add_empty(); // final + self.add_empty(); // root + } + + /// Iterate over all of the sequences of byte ranges in this trie, and + /// call the provided function for each sequence. Iteration occurs in + /// lexicographic order. + pub fn iter Result<(), E>>( + &self, + mut f: F, + ) -> Result<(), E> { + let mut stack = self.iter_stack.borrow_mut(); + stack.clear(); + let mut ranges = self.iter_ranges.borrow_mut(); + ranges.clear(); + + // We do iteration in a way that permits us to use a single buffer + // for our keys. We iterate in a depth first fashion, while being + // careful to expand our frontier as we move deeper in the trie. + stack.push(NextIter { state_id: ROOT, tidx: 0 }); + while let Some(NextIter { mut state_id, mut tidx }) = stack.pop() { + // This could be implemented more simply without an inner loop + // here, but at the cost of more stack pushes. + loop { + let state = self.state(state_id); + // If we've visited all transitions in this state, then pop + // back to the parent state. + if tidx >= state.transitions.len() { + ranges.pop(); + break; + } + + let t = &state.transitions[tidx]; + ranges.push(t.range); + if t.next_id == FINAL { + f(&ranges)?; + ranges.pop(); + tidx += 1; + } else { + // Expand our frontier. Once we come back to this state + // via the stack, start in on the next transition. + stack.push(NextIter { state_id, tidx: tidx + 1 }); + // Otherwise, move to the first transition of the next + // state. + state_id = t.next_id; + tidx = 0; + } + } + } + Ok(()) + } + + /// Inserts a new sequence of ranges into this trie. + /// + /// The sequence given must be non-empty and must not have a length + /// exceeding 4. + pub fn insert(&mut self, ranges: &[Utf8Range]) { + assert!(!ranges.is_empty()); + assert!(ranges.len() <= 4); + + let mut stack = mem::replace(&mut self.insert_stack, vec![]); + stack.clear(); + + stack.push(NextInsert::new(ROOT, ranges)); + while let Some(next) = stack.pop() { + let (state_id, ranges) = (next.state_id(), next.ranges()); + assert!(!ranges.is_empty()); + + let (mut new, rest) = (ranges[0], &ranges[1..]); + + // i corresponds to the position of the existing transition on + // which we are operating. Typically, the result is to remove the + // transition and replace it with two or more new transitions + // corresponding to the partitions generated by splitting the + // 'new' with the ith transition's range. + let mut i = self.state(state_id).find(new); + + // In this case, there is no overlap *and* the new range is greater + // than all existing ranges. So we can just add it to the end. + if i == self.state(state_id).transitions.len() { + let next_id = NextInsert::push(self, &mut stack, rest); + self.add_transition(state_id, new, next_id); + continue; + } + + // The need for this loop is a bit subtle, buf basically, after + // we've handled the partitions from our initial split, it's + // possible that there will be a partition leftover that overlaps + // with a subsequent transition. If so, then we have to repeat + // the split process again with the leftovers and that subsequent + // transition. + 'OUTER: loop { + let old = self.state(state_id).transitions[i].clone(); + let split = match Split::new(old.range, new) { + Some(split) => split, + None => { + let next_id = NextInsert::push(self, &mut stack, rest); + self.add_transition_at(i, state_id, new, next_id); + continue; + } + }; + let splits = split.as_slice(); + // If we only have one partition, then the ranges must be + // equivalent. There's nothing to do here for this state, so + // just move on to the next one. + if splits.len() == 1 { + // ... but only if we have anything left to do. + if !rest.is_empty() { + stack.push(NextInsert::new(old.next_id, rest)); + } + break; + } + // At this point, we know that 'split' is non-empty and there + // must be some overlap AND that the two ranges are not + // equivalent. Therefore, the existing range MUST be removed + // and split up somehow. Instead of actually doing the removal + // and then a subsequent insertion---with all the memory + // shuffling that entails---we simply overwrite the transition + // at position `i` for the first new transition we want to + // insert. After that, we're forced to do expensive inserts. + let mut first = true; + let mut add_trans = + |trie: &mut RangeTrie, pos, from, range, to| { + if first { + trie.set_transition_at(pos, from, range, to); + first = false; + } else { + trie.add_transition_at(pos, from, range, to); + } + }; + for (j, &srange) in splits.iter().enumerate() { + match srange { + SplitRange::Old(r) => { + // Deep clone the state pointed to by the ith + // transition. This is always necessary since 'old' + // is always coupled with at least a 'both' + // partition. We don't want any new changes made + // via the 'both' partition to impact the part of + // the transition that doesn't overlap with the + // new range. + let dup_id = self.duplicate(old.next_id); + add_trans(self, i, state_id, r, dup_id); + } + SplitRange::New(r) => { + // This is a bit subtle, but if this happens to be + // the last partition in our split, it is possible + // that this overlaps with a subsequent transition. + // If it does, then we must repeat the whole + // splitting process over again with `r` and the + // subsequent transition. + { + let trans = &self.state(state_id).transitions; + if j + 1 == splits.len() + && i < trans.len() + && intersects(r, trans[i].range) + { + new = r; + continue 'OUTER; + } + } + + // ... otherwise, setup exploration for a new + // empty state and add a brand new transition for + // this new range. + let next_id = + NextInsert::push(self, &mut stack, rest); + add_trans(self, i, state_id, r, next_id); + } + SplitRange::Both(r) => { + // Continue adding the remaining ranges on this + // path and update the transition with the new + // range. + if !rest.is_empty() { + stack.push(NextInsert::new(old.next_id, rest)); + } + add_trans(self, i, state_id, r, old.next_id); + } + } + i += 1; + } + // If we've reached this point, then we know that there are + // no subsequent transitions with any overlap. Therefore, we + // can stop processing this range and move on to the next one. + break; + } + } + self.insert_stack = stack; + } + + pub fn add_empty(&mut self) -> StateID { + let id = match StateID::try_from(self.states.len()) { + Ok(id) => id, + Err(_) => { + // This generally should not happen since a range trie is + // only ever used to compile a single sequence of Unicode + // scalar values. If we ever got to this point, we would, at + // *minimum*, be using 96GB in just the range trie alone. + panic!("too many sequences added to range trie"); + } + }; + // If we have some free states available, then use them to avoid + // more allocations. + if let Some(mut state) = self.free.pop() { + state.clear(); + self.states.push(state); + } else { + self.states.push(State { transitions: vec![] }); + } + id + } + + /// Performs a deep clone of the given state and returns the duplicate's + /// state ID. + /// + /// A "deep clone" in this context means that the state given along with + /// recursively all states that it points to are copied. Once complete, + /// the given state ID and the returned state ID share nothing. + /// + /// This is useful during range trie insertion when a new range overlaps + /// with an existing range that is bigger than the new one. The part + /// of the existing range that does *not* overlap with the new one is + /// duplicated so that adding the new range to the overlap doesn't disturb + /// the non-overlapping portion. + /// + /// There's one exception: if old_id is the final state, then it is not + /// duplicated and the same final state is returned. This is because all + /// final states in this trie are equivalent. + fn duplicate(&mut self, old_id: StateID) -> StateID { + if old_id == FINAL { + return FINAL; + } + + let mut stack = mem::replace(&mut self.dupe_stack, vec![]); + stack.clear(); + + let new_id = self.add_empty(); + // old_id is the state we're cloning and new_id is the ID of the + // duplicated state for old_id. + stack.push(NextDupe { old_id, new_id }); + while let Some(NextDupe { old_id, new_id }) = stack.pop() { + for i in 0..self.state(old_id).transitions.len() { + let t = self.state(old_id).transitions[i].clone(); + if t.next_id == FINAL { + // All final states are the same, so there's no need to + // duplicate it. + self.add_transition(new_id, t.range, FINAL); + continue; + } + + let new_child_id = self.add_empty(); + self.add_transition(new_id, t.range, new_child_id); + stack.push(NextDupe { + old_id: t.next_id, + new_id: new_child_id, + }); + } + } + self.dupe_stack = stack; + new_id + } + + /// Adds the given transition to the given state. + /// + /// Callers must ensure that all previous transitions in this state + /// are lexicographically smaller than the given range. + fn add_transition( + &mut self, + from_id: StateID, + range: Utf8Range, + next_id: StateID, + ) { + self.state_mut(from_id) + .transitions + .push(Transition { range, next_id }); + } + + /// Like `add_transition`, except this inserts the transition just before + /// the ith transition. + fn add_transition_at( + &mut self, + i: usize, + from_id: StateID, + range: Utf8Range, + next_id: StateID, + ) { + self.state_mut(from_id) + .transitions + .insert(i, Transition { range, next_id }); + } + + /// Overwrites the transition at position i with the given transition. + fn set_transition_at( + &mut self, + i: usize, + from_id: StateID, + range: Utf8Range, + next_id: StateID, + ) { + self.state_mut(from_id).transitions[i] = Transition { range, next_id }; + } + + /// Return an immutable borrow for the state with the given ID. + fn state(&self, id: StateID) -> &State { + &self.states[id] + } + + /// Return a mutable borrow for the state with the given ID. + fn state_mut(&mut self, id: StateID) -> &mut State { + &mut self.states[id] + } +} + +impl State { + /// Find the position at which the given range should be inserted in this + /// state. + /// + /// The position returned is always in the inclusive range + /// [0, transitions.len()]. If 'transitions.len()' is returned, then the + /// given range overlaps with no other range in this state *and* is greater + /// than all of them. + /// + /// For all other possible positions, the given range either overlaps + /// with the transition at that position or is otherwise less than it + /// with no overlap (and is greater than the previous transition). In the + /// former case, careful attention must be paid to inserting this range + /// as a new transition. In the latter case, the range can be inserted as + /// a new transition at the given position without disrupting any other + /// transitions. + fn find(&self, range: Utf8Range) -> usize { + /// Returns the position `i` at which `pred(xs[i])` first returns true + /// such that for all `j >= i`, `pred(xs[j]) == true`. If `pred` never + /// returns true, then `xs.len()` is returned. + /// + /// We roll our own binary search because it doesn't seem like the + /// standard library's binary search can be used here. Namely, if + /// there is an overlapping range, then we want to find the first such + /// occurrence, but there may be many. Or at least, it's not quite + /// clear to me how to do it. + fn binary_search(xs: &[T], mut pred: F) -> usize + where + F: FnMut(&T) -> bool, + { + let (mut left, mut right) = (0, xs.len()); + while left < right { + // Overflow is impossible because xs.len() <= 256. + let mid = (left + right) / 2; + if pred(&xs[mid]) { + right = mid; + } else { + left = mid + 1; + } + } + left + } + + // Benchmarks suggest that binary search is just a bit faster than + // straight linear search. Specifically when using the debug tool: + // + // hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'" + binary_search(&self.transitions, |t| range.start <= t.range.end) + } + + /// Clear this state such that it has zero transitions. + fn clear(&mut self) { + self.transitions.clear(); + } +} + +/// The next state to process during duplication. +#[derive(Clone, Debug)] +struct NextDupe { + /// The state we want to duplicate. + old_id: StateID, + /// The ID of the new state that is a duplicate of old_id. + new_id: StateID, +} + +/// The next state (and its corresponding transition) that we want to visit +/// during iteration in lexicographic order. +#[derive(Clone, Debug)] +struct NextIter { + state_id: StateID, + tidx: usize, +} + +/// The next state to process during insertion and any remaining ranges that we +/// want to add for a particular sequence of ranges. The first such instance +/// is always the root state along with all ranges given. +#[derive(Clone, Debug)] +struct NextInsert { + /// The next state to begin inserting ranges. This state should be the + /// state at which `ranges[0]` should be inserted. + state_id: StateID, + /// The ranges to insert. We used a fixed-size array here to avoid an + /// allocation. + ranges: [Utf8Range; 4], + /// The number of valid ranges in the above array. + len: u8, +} + +impl NextInsert { + /// Create the next item to visit. The given state ID should correspond + /// to the state at which the first range in the given slice should be + /// inserted. The slice given must not be empty and it must be no longer + /// than 4. + fn new(state_id: StateID, ranges: &[Utf8Range]) -> NextInsert { + let len = ranges.len(); + assert!(len > 0); + assert!(len <= 4); + + let mut tmp = [Utf8Range { start: 0, end: 0 }; 4]; + tmp[..len].copy_from_slice(ranges); + NextInsert { state_id, ranges: tmp, len: u8::try_from(len).unwrap() } + } + + /// Push a new empty state to visit along with any remaining ranges that + /// still need to be inserted. The ID of the new empty state is returned. + /// + /// If ranges is empty, then no new state is created and FINAL is returned. + fn push( + trie: &mut RangeTrie, + stack: &mut Vec, + ranges: &[Utf8Range], + ) -> StateID { + if ranges.is_empty() { + FINAL + } else { + let next_id = trie.add_empty(); + stack.push(NextInsert::new(next_id, ranges)); + next_id + } + } + + /// Return the ID of the state to visit. + fn state_id(&self) -> StateID { + self.state_id + } + + /// Return the remaining ranges to insert. + fn ranges(&self) -> &[Utf8Range] { + &self.ranges[..usize::try_from(self.len).unwrap()] + } +} + +/// Split represents a partitioning of two ranges into one or more ranges. This +/// is the secret sauce that makes a range trie work, as it's what tells us +/// how to deal with two overlapping but unequal ranges during insertion. +/// +/// Essentially, either two ranges overlap or they don't. If they don't, then +/// handling insertion is easy: just insert the new range into its +/// lexicographically correct position. Since it does not overlap with anything +/// else, no other transitions are impacted by the new range. +/// +/// If they do overlap though, there are generally three possible cases to +/// handle: +/// +/// 1. The part where the two ranges actually overlap. i.e., The intersection. +/// 2. The part of the existing range that is not in the the new range. +/// 3. The part of the new range that is not in the old range. +/// +/// (1) is guaranteed to always occur since all overlapping ranges have a +/// non-empty intersection. If the two ranges are not equivalent, then at +/// least one of (2) or (3) is guaranteed to occur as well. In some cases, +/// e.g., `[0-4]` and `[4-9]`, all three cases will occur. +/// +/// This `Split` type is responsible for providing (1), (2) and (3) for any +/// possible pair of byte ranges. +/// +/// As for insertion, for the overlap in (1), the remaining ranges to insert +/// should be added by following the corresponding transition. However, this +/// should only be done for the overlapping parts of the range. If there was +/// a part of the existing range that was not in the new range, then that +/// existing part must be split off from the transition and duplicated. The +/// remaining parts of the overlap can then be added to using the new ranges +/// without disturbing the existing range. +/// +/// Handling the case for the part of a new range that is not in an existing +/// range is seemingly easy. Just treat it as if it were a non-overlapping +/// range. The problem here is that if this new non-overlapping range occurs +/// after both (1) and (2), then it's possible that it can overlap with the +/// next transition in the current state. If it does, then the whole process +/// must be repeated! +/// +/// # Details of the 3 cases +/// +/// The following details the various cases that are implemented in code +/// below. It's plausible that the number of cases is not actually minimal, +/// but it's important for this code to remain at least somewhat readable. +/// +/// Given [a,b] and [x,y], where a <= b, x <= y, b < 256 and y < 256, we define +/// the follow distinct relationships where at least one must apply. The order +/// of these matters, since multiple can match. The first to match applies. +/// +/// 1. b < x <=> [a,b] < [x,y] +/// 2. y < a <=> [x,y] < [a,b] +/// +/// In the case of (1) and (2), these are the only cases where there is no +/// overlap. Or otherwise, the intersection of [a,b] and [x,y] is empty. In +/// order to compute the intersection, one can do [max(a,x), min(b,y)]. The +/// intersection in all of the following cases is non-empty. +/// +/// 3. a = x && b = y <=> [a,b] == [x,y] +/// 4. a = x && b < y <=> [x,y] right-extends [a,b] +/// 5. b = y && a > x <=> [x,y] left-extends [a,b] +/// 6. x = a && y < b <=> [a,b] right-extends [x,y] +/// 7. y = b && x > a <=> [a,b] left-extends [x,y] +/// 8. a > x && b < y <=> [x,y] covers [a,b] +/// 9. x > a && y < b <=> [a,b] covers [x,y] +/// 10. b = x && a < y <=> [a,b] is left-adjacent to [x,y] +/// 11. y = a && x < b <=> [x,y] is left-adjacent to [a,b] +/// 12. b > x && b < y <=> [a,b] left-overlaps [x,y] +/// 13. y > a && y < b <=> [x,y] left-overlaps [a,b] +/// +/// In cases 3-13, we can form rules that partition the ranges into a +/// non-overlapping ordered sequence of ranges: +/// +/// 3. [a,b] +/// 4. [a,b], [b+1,y] +/// 5. [x,a-1], [a,b] +/// 6. [x,y], [y+1,b] +/// 7. [a,x-1], [x,y] +/// 8. [x,a-1], [a,b], [b+1,y] +/// 9. [a,x-1], [x,y], [y+1,b] +/// 10. [a,b-1], [b,b], [b+1,y] +/// 11. [x,y-1], [y,y], [y+1,b] +/// 12. [a,x-1], [x,b], [b+1,y] +/// 13. [x,a-1], [a,y], [y+1,b] +/// +/// In the code below, we go a step further and identify each of the above +/// outputs as belonging either to the overlap of the two ranges or to one +/// of [a,b] or [x,y] exclusively. +#[derive(Clone, Debug, Eq, PartialEq)] +struct Split { + partitions: [SplitRange; 3], + len: usize, +} + +/// A tagged range indicating how it was derived from a pair of ranges. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum SplitRange { + Old(Utf8Range), + New(Utf8Range), + Both(Utf8Range), +} + +impl Split { + /// Create a partitioning of the given ranges. + /// + /// If the given ranges have an empty intersection, then None is returned. + fn new(o: Utf8Range, n: Utf8Range) -> Option { + let range = |r: RangeInclusive| Utf8Range { + start: *r.start(), + end: *r.end(), + }; + let old = |r| SplitRange::Old(range(r)); + let new = |r| SplitRange::New(range(r)); + let both = |r| SplitRange::Both(range(r)); + + // Use same names as the comment above to make it easier to compare. + let (a, b, x, y) = (o.start, o.end, n.start, n.end); + + if b < x || y < a { + // case 1, case 2 + None + } else if a == x && b == y { + // case 3 + Some(Split::parts1(both(a..=b))) + } else if a == x && b < y { + // case 4 + Some(Split::parts2(both(a..=b), new(b + 1..=y))) + } else if b == y && a > x { + // case 5 + Some(Split::parts2(new(x..=a - 1), both(a..=b))) + } else if x == a && y < b { + // case 6 + Some(Split::parts2(both(x..=y), old(y + 1..=b))) + } else if y == b && x > a { + // case 7 + Some(Split::parts2(old(a..=x - 1), both(x..=y))) + } else if a > x && b < y { + // case 8 + Some(Split::parts3(new(x..=a - 1), both(a..=b), new(b + 1..=y))) + } else if x > a && y < b { + // case 9 + Some(Split::parts3(old(a..=x - 1), both(x..=y), old(y + 1..=b))) + } else if b == x && a < y { + // case 10 + Some(Split::parts3(old(a..=b - 1), both(b..=b), new(b + 1..=y))) + } else if y == a && x < b { + // case 11 + Some(Split::parts3(new(x..=y - 1), both(y..=y), old(y + 1..=b))) + } else if b > x && b < y { + // case 12 + Some(Split::parts3(old(a..=x - 1), both(x..=b), new(b + 1..=y))) + } else if y > a && y < b { + // case 13 + Some(Split::parts3(new(x..=a - 1), both(a..=y), old(y + 1..=b))) + } else { + unreachable!() + } + } + + /// Create a new split with a single partition. This only occurs when two + /// ranges are equivalent. + fn parts1(r1: SplitRange) -> Split { + // This value doesn't matter since it is never accessed. + let nada = SplitRange::Old(Utf8Range { start: 0, end: 0 }); + Split { partitions: [r1, nada, nada], len: 1 } + } + + /// Create a new split with two partitions. + fn parts2(r1: SplitRange, r2: SplitRange) -> Split { + // This value doesn't matter since it is never accessed. + let nada = SplitRange::Old(Utf8Range { start: 0, end: 0 }); + Split { partitions: [r1, r2, nada], len: 2 } + } + + /// Create a new split with three partitions. + fn parts3(r1: SplitRange, r2: SplitRange, r3: SplitRange) -> Split { + Split { partitions: [r1, r2, r3], len: 3 } + } + + /// Return the partitions in this split as a slice. + fn as_slice(&self) -> &[SplitRange] { + &self.partitions[..self.len] + } +} + +impl fmt::Debug for RangeTrie { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "")?; + for (i, state) in self.states.iter().enumerate() { + let status = if i == FINAL.as_usize() { '*' } else { ' ' }; + writeln!(f, "{}{:06}: {:?}", status, i, state)?; + } + Ok(()) + } +} + +impl fmt::Debug for State { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let rs = self + .transitions + .iter() + .map(|t| format!("{:?}", t)) + .collect::>() + .join(", "); + write!(f, "{}", rs) + } +} + +impl fmt::Debug for Transition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.range.start == self.range.end { + write!( + f, + "{:02X} => {:02X}", + self.range.start, + self.next_id.as_usize(), + ) + } else { + write!( + f, + "{:02X}-{:02X} => {:02X}", + self.range.start, + self.range.end, + self.next_id.as_usize(), + ) + } + } +} + +/// Returns true if and only if the given ranges intersect. +fn intersects(r1: Utf8Range, r2: Utf8Range) -> bool { + !(r1.end < r2.start || r2.end < r1.start) +} + +#[cfg(test)] +mod tests { + use core::ops::RangeInclusive; + + use regex_syntax::utf8::Utf8Range; + + use super::*; + + fn r(range: RangeInclusive) -> Utf8Range { + Utf8Range { start: *range.start(), end: *range.end() } + } + + fn split_maybe( + old: RangeInclusive, + new: RangeInclusive, + ) -> Option { + Split::new(r(old), r(new)) + } + + fn split( + old: RangeInclusive, + new: RangeInclusive, + ) -> Vec { + split_maybe(old, new).unwrap().as_slice().to_vec() + } + + #[test] + fn no_splits() { + // case 1 + assert_eq!(None, split_maybe(0..=1, 2..=3)); + // case 2 + assert_eq!(None, split_maybe(2..=3, 0..=1)); + } + + #[test] + fn splits() { + let range = |r: RangeInclusive| Utf8Range { + start: *r.start(), + end: *r.end(), + }; + let old = |r| SplitRange::Old(range(r)); + let new = |r| SplitRange::New(range(r)); + let both = |r| SplitRange::Both(range(r)); + + // case 3 + assert_eq!(split(0..=0, 0..=0), vec![both(0..=0)]); + assert_eq!(split(9..=9, 9..=9), vec![both(9..=9)]); + + // case 4 + assert_eq!(split(0..=5, 0..=6), vec![both(0..=5), new(6..=6)]); + assert_eq!(split(0..=5, 0..=8), vec![both(0..=5), new(6..=8)]); + assert_eq!(split(5..=5, 5..=8), vec![both(5..=5), new(6..=8)]); + + // case 5 + assert_eq!(split(1..=5, 0..=5), vec![new(0..=0), both(1..=5)]); + assert_eq!(split(3..=5, 0..=5), vec![new(0..=2), both(3..=5)]); + assert_eq!(split(5..=5, 0..=5), vec![new(0..=4), both(5..=5)]); + + // case 6 + assert_eq!(split(0..=6, 0..=5), vec![both(0..=5), old(6..=6)]); + assert_eq!(split(0..=8, 0..=5), vec![both(0..=5), old(6..=8)]); + assert_eq!(split(5..=8, 5..=5), vec![both(5..=5), old(6..=8)]); + + // case 7 + assert_eq!(split(0..=5, 1..=5), vec![old(0..=0), both(1..=5)]); + assert_eq!(split(0..=5, 3..=5), vec![old(0..=2), both(3..=5)]); + assert_eq!(split(0..=5, 5..=5), vec![old(0..=4), both(5..=5)]); + + // case 8 + assert_eq!( + split(3..=6, 2..=7), + vec![new(2..=2), both(3..=6), new(7..=7)], + ); + assert_eq!( + split(3..=6, 1..=8), + vec![new(1..=2), both(3..=6), new(7..=8)], + ); + + // case 9 + assert_eq!( + split(2..=7, 3..=6), + vec![old(2..=2), both(3..=6), old(7..=7)], + ); + assert_eq!( + split(1..=8, 3..=6), + vec![old(1..=2), both(3..=6), old(7..=8)], + ); + + // case 10 + assert_eq!( + split(3..=6, 6..=7), + vec![old(3..=5), both(6..=6), new(7..=7)], + ); + assert_eq!( + split(3..=6, 6..=8), + vec![old(3..=5), both(6..=6), new(7..=8)], + ); + assert_eq!( + split(5..=6, 6..=7), + vec![old(5..=5), both(6..=6), new(7..=7)], + ); + + // case 11 + assert_eq!( + split(6..=7, 3..=6), + vec![new(3..=5), both(6..=6), old(7..=7)], + ); + assert_eq!( + split(6..=8, 3..=6), + vec![new(3..=5), both(6..=6), old(7..=8)], + ); + assert_eq!( + split(6..=7, 5..=6), + vec![new(5..=5), both(6..=6), old(7..=7)], + ); + + // case 12 + assert_eq!( + split(3..=7, 5..=9), + vec![old(3..=4), both(5..=7), new(8..=9)], + ); + assert_eq!( + split(3..=5, 4..=6), + vec![old(3..=3), both(4..=5), new(6..=6)], + ); + + // case 13 + assert_eq!( + split(5..=9, 3..=7), + vec![new(3..=4), both(5..=7), old(8..=9)], + ); + assert_eq!( + split(4..=6, 3..=5), + vec![new(3..=3), both(4..=5), old(6..=6)], + ); + } + + // Arguably there should be more tests here, but in practice, this data + // structure is well covered by the huge number of regex tests. +} diff --git a/vendor/regex-automata/src/util/alphabet.rs b/vendor/regex-automata/src/util/alphabet.rs new file mode 100644 index 0000000..22b5a76 --- /dev/null +++ b/vendor/regex-automata/src/util/alphabet.rs @@ -0,0 +1,1139 @@ +/*! +This module provides APIs for dealing with the alphabets of finite state +machines. + +There are two principal types in this module, [`ByteClasses`] and [`Unit`]. +The former defines the alphabet of a finite state machine while the latter +represents an element of that alphabet. + +To a first approximation, the alphabet of all automata in this crate is just +a `u8`. Namely, every distinct byte value. All 256 of them. In practice, this +can be quite wasteful when building a transition table for a DFA, since it +requires storing a state identifier for each element in the alphabet. Instead, +we collapse the alphabet of an automaton down into equivalence classes, where +every byte in the same equivalence class never discriminates between a match or +a non-match from any other byte in the same class. For example, in the regex +`[a-z]+`, then you could consider it having an alphabet consisting of two +equivalence classes: `a-z` and everything else. In terms of the transitions on +an automaton, it doesn't actually require representing every distinct byte. +Just the equivalence classes. + +The downside of equivalence classes is that, of course, searching a haystack +deals with individual byte values. Those byte values need to be mapped to +their corresponding equivalence class. This is what `ByteClasses` does. In +practice, doing this for every state transition has negligible impact on modern +CPUs. Moreover, it helps make more efficient use of the CPU cache by (possibly +considerably) shrinking the size of the transition table. + +One last hiccup concerns `Unit`. Namely, because of look-around and how the +DFAs in this crate work, we need to add a sentinel value to our alphabet +of equivalence classes that represents the "end" of a search. We call that +sentinel [`Unit::eoi`] or "end of input." Thus, a `Unit` is either an +equivalence class corresponding to a set of bytes, or it is a special "end of +input" sentinel. + +In general, you should not expect to need either of these types unless you're +doing lower level shenanigans with DFAs, or even building your own DFAs. +(Although, you don't have to use these types to build your own DFAs of course.) +For example, if you're walking a DFA's state graph, it's probably useful to +make use of [`ByteClasses`] to visit each element in the DFA's alphabet instead +of just visiting every distinct `u8` value. The latter isn't necessarily wrong, +but it could be potentially very wasteful. +*/ +use crate::util::{ + escape::DebugByte, + wire::{self, DeserializeError, SerializeError}, +}; + +/// Unit represents a single unit of haystack for DFA based regex engines. +/// +/// It is not expected for consumers of this crate to need to use this type +/// unless they are implementing their own DFA. And even then, it's not +/// required: implementors may use other techniques to handle haystack units. +/// +/// Typically, a single unit of haystack for a DFA would be a single byte. +/// However, for the DFAs in this crate, matches are delayed by a single byte +/// in order to handle look-ahead assertions (`\b`, `$` and `\z`). Thus, once +/// we have consumed the haystack, we must run the DFA through one additional +/// transition using a unit that indicates the haystack has ended. +/// +/// There is no way to represent a sentinel with a `u8` since all possible +/// values *may* be valid haystack units to a DFA, therefore this type +/// explicitly adds room for a sentinel value. +/// +/// The sentinel EOI value is always its own equivalence class and is +/// ultimately represented by adding 1 to the maximum equivalence class value. +/// So for example, the regex `^[a-z]+$` might be split into the following +/// equivalence classes: +/// +/// ```text +/// 0 => [\x00-`] +/// 1 => [a-z] +/// 2 => [{-\xFF] +/// 3 => [EOI] +/// ``` +/// +/// Where EOI is the special sentinel value that is always in its own +/// singleton equivalence class. +#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)] +pub struct Unit(UnitKind); + +#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)] +enum UnitKind { + /// Represents a byte value, or more typically, an equivalence class + /// represented as a byte value. + U8(u8), + /// Represents the "end of input" sentinel. We regretably use a `u16` + /// here since the maximum sentinel value is `256`. Thankfully, we don't + /// actually store a `Unit` anywhere, so this extra space shouldn't be too + /// bad. + EOI(u16), +} + +impl Unit { + /// Create a new haystack unit from a byte value. + /// + /// All possible byte values are legal. However, when creating a haystack + /// unit for a specific DFA, one should be careful to only construct units + /// that are in that DFA's alphabet. Namely, one way to compact a DFA's + /// in-memory representation is to collapse its transitions to a set of + /// equivalence classes into a set of all possible byte values. If a DFA + /// uses equivalence classes instead of byte values, then the byte given + /// here should be the equivalence class. + pub fn u8(byte: u8) -> Unit { + Unit(UnitKind::U8(byte)) + } + + /// Create a new "end of input" haystack unit. + /// + /// The value given is the sentinel value used by this unit to represent + /// the "end of input." The value should be the total number of equivalence + /// classes in the corresponding alphabet. Its maximum value is `256`, + /// which occurs when every byte is its own equivalence class. + /// + /// # Panics + /// + /// This panics when `num_byte_equiv_classes` is greater than `256`. + pub fn eoi(num_byte_equiv_classes: usize) -> Unit { + assert!( + num_byte_equiv_classes <= 256, + "max number of byte-based equivalent classes is 256, but got {}", + num_byte_equiv_classes, + ); + Unit(UnitKind::EOI(u16::try_from(num_byte_equiv_classes).unwrap())) + } + + /// If this unit is not an "end of input" sentinel, then returns its + /// underlying byte value. Otherwise return `None`. + pub fn as_u8(self) -> Option { + match self.0 { + UnitKind::U8(b) => Some(b), + UnitKind::EOI(_) => None, + } + } + + /// If this unit is an "end of input" sentinel, then return the underlying + /// sentinel value that was given to [`Unit::eoi`]. Otherwise return + /// `None`. + pub fn as_eoi(self) -> Option { + match self.0 { + UnitKind::U8(_) => None, + UnitKind::EOI(sentinel) => Some(sentinel), + } + } + + /// Return this unit as a `usize`, regardless of whether it is a byte value + /// or an "end of input" sentinel. In the latter case, the underlying + /// sentinel value given to [`Unit::eoi`] is returned. + pub fn as_usize(self) -> usize { + match self.0 { + UnitKind::U8(b) => usize::from(b), + UnitKind::EOI(eoi) => usize::from(eoi), + } + } + + /// Returns true if and only of this unit is a byte value equivalent to the + /// byte given. This always returns false when this is an "end of input" + /// sentinel. + pub fn is_byte(self, byte: u8) -> bool { + self.as_u8().map_or(false, |b| b == byte) + } + + /// Returns true when this unit represents an "end of input" sentinel. + pub fn is_eoi(self) -> bool { + self.as_eoi().is_some() + } + + /// Returns true when this unit corresponds to an ASCII word byte. + /// + /// This always returns false when this unit represents an "end of input" + /// sentinel. + pub fn is_word_byte(self) -> bool { + self.as_u8().map_or(false, crate::util::utf8::is_word_byte) + } +} + +impl core::fmt::Debug for Unit { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self.0 { + UnitKind::U8(b) => write!(f, "{:?}", DebugByte(b)), + UnitKind::EOI(_) => write!(f, "EOI"), + } + } +} + +/// A representation of byte oriented equivalence classes. +/// +/// This is used in a DFA to reduce the size of the transition table. This can +/// have a particularly large impact not only on the total size of a dense DFA, +/// but also on compile times. +/// +/// The essential idea here is that the alphabet of a DFA is shrunk from the +/// usual 256 distinct byte values down to a set of equivalence classes. The +/// guarantee you get is that any byte belonging to the same equivalence class +/// can be treated as if it were any other byte in the same class, and the +/// result of a search wouldn't change. +/// +/// # Example +/// +/// This example shows how to get byte classes from an +/// [`NFA`](crate::nfa::thompson::NFA) and ask for the class of various bytes. +/// +/// ``` +/// use regex_automata::nfa::thompson::NFA; +/// +/// let nfa = NFA::new("[a-z]+")?; +/// let classes = nfa.byte_classes(); +/// // 'a' and 'z' are in the same class for this regex. +/// assert_eq!(classes.get(b'a'), classes.get(b'z')); +/// // But 'a' and 'A' are not. +/// assert_ne!(classes.get(b'a'), classes.get(b'A')); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Copy)] +pub struct ByteClasses([u8; 256]); + +impl ByteClasses { + /// Creates a new set of equivalence classes where all bytes are mapped to + /// the same class. + #[inline] + pub fn empty() -> ByteClasses { + ByteClasses([0; 256]) + } + + /// Creates a new set of equivalence classes where each byte belongs to + /// its own equivalence class. + #[inline] + pub fn singletons() -> ByteClasses { + let mut classes = ByteClasses::empty(); + for b in 0..=255 { + classes.set(b, b); + } + classes + } + + /// Deserializes a byte class map from the given slice. If the slice is of + /// insufficient length or otherwise contains an impossible mapping, then + /// an error is returned. Upon success, the number of bytes read along with + /// the map are returned. The number of bytes read is always a multiple of + /// 8. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(ByteClasses, usize), DeserializeError> { + wire::check_slice_len(slice, 256, "byte class map")?; + let mut classes = ByteClasses::empty(); + for (b, &class) in slice[..256].iter().enumerate() { + classes.set(u8::try_from(b).unwrap(), class); + } + // We specifically don't use 'classes.iter()' here because that + // iterator depends on 'classes.alphabet_len()' being correct. But that + // is precisely the thing we're trying to verify below! + for &b in classes.0.iter() { + if usize::from(b) >= classes.alphabet_len() { + return Err(DeserializeError::generic( + "found equivalence class greater than alphabet len", + )); + } + } + Ok((classes, 256)) + } + + /// Writes this byte class map to the given byte buffer. if the given + /// buffer is too small, then an error is returned. Upon success, the total + /// number of bytes written is returned. The number of bytes written is + /// guaranteed to be a multiple of 8. + pub(crate) fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("byte class map")); + } + for b in 0..=255 { + dst[0] = self.get(b); + dst = &mut dst[1..]; + } + Ok(nwrite) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 256 + } + + /// Set the equivalence class for the given byte. + #[inline] + pub fn set(&mut self, byte: u8, class: u8) { + self.0[usize::from(byte)] = class; + } + + /// Get the equivalence class for the given byte. + #[inline] + pub fn get(&self, byte: u8) -> u8 { + self.0[usize::from(byte)] + } + + /// Get the equivalence class for the given haystack unit and return the + /// class as a `usize`. + #[inline] + pub fn get_by_unit(&self, unit: Unit) -> usize { + match unit.0 { + UnitKind::U8(b) => usize::from(self.get(b)), + UnitKind::EOI(b) => usize::from(b), + } + } + + /// Create a unit that represents the "end of input" sentinel based on the + /// number of equivalence classes. + #[inline] + pub fn eoi(&self) -> Unit { + // The alphabet length already includes the EOI sentinel, hence why + // we subtract 1. + Unit::eoi(self.alphabet_len().checked_sub(1).unwrap()) + } + + /// Return the total number of elements in the alphabet represented by + /// these equivalence classes. Equivalently, this returns the total number + /// of equivalence classes. + #[inline] + pub fn alphabet_len(&self) -> usize { + // Add one since the number of equivalence classes is one bigger than + // the last one. But add another to account for the final EOI class + // that isn't explicitly represented. + usize::from(self.0[255]) + 1 + 1 + } + + /// Returns the stride, as a base-2 exponent, required for these + /// equivalence classes. + /// + /// The stride is always the smallest power of 2 that is greater than or + /// equal to the alphabet length, and the `stride2` returned here is the + /// exponent applied to `2` to get the smallest power. This is done so that + /// converting between premultiplied state IDs and indices can be done with + /// shifts alone, which is much faster than integer division. + #[inline] + pub fn stride2(&self) -> usize { + let zeros = self.alphabet_len().next_power_of_two().trailing_zeros(); + usize::try_from(zeros).unwrap() + } + + /// Returns true if and only if every byte in this class maps to its own + /// equivalence class. Equivalently, there are 257 equivalence classes + /// and each class contains either exactly one byte or corresponds to the + /// singleton class containing the "end of input" sentinel. + #[inline] + pub fn is_singleton(&self) -> bool { + self.alphabet_len() == 257 + } + + /// Returns an iterator over all equivalence classes in this set. + #[inline] + pub fn iter(&self) -> ByteClassIter<'_> { + ByteClassIter { classes: self, i: 0 } + } + + /// Returns an iterator over a sequence of representative bytes from each + /// equivalence class within the range of bytes given. + /// + /// When the given range is unbounded on both sides, the iterator yields + /// exactly N items, where N is equivalent to the number of equivalence + /// classes. Each item is an arbitrary byte drawn from each equivalence + /// class. + /// + /// This is useful when one is determinizing an NFA and the NFA's alphabet + /// hasn't been converted to equivalence classes. Picking an arbitrary byte + /// from each equivalence class then permits a full exploration of the NFA + /// instead of using every possible byte value and thus potentially saves + /// quite a lot of redundant work. + /// + /// # Example + /// + /// This shows an example of what a complete sequence of representatives + /// might look like from a real example. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit}; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// let reps: Vec = classes.representatives(..).collect(); + /// // Note that the specific byte values yielded are not guaranteed! + /// let expected = vec![ + /// Unit::u8(b'\x00'), + /// Unit::u8(b'a'), + /// Unit::u8(b'{'), + /// Unit::eoi(3), + /// ]; + /// assert_eq!(expected, reps); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Note though, that you can ask for an arbitrary range of bytes, and only + /// representatives for that range will be returned: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit}; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// let reps: Vec = classes.representatives(b'A'..=b'z').collect(); + /// // Note that the specific byte values yielded are not guaranteed! + /// let expected = vec![ + /// Unit::u8(b'A'), + /// Unit::u8(b'a'), + /// ]; + /// assert_eq!(expected, reps); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn representatives>( + &self, + range: R, + ) -> ByteClassRepresentatives<'_> { + use core::ops::Bound; + + let cur_byte = match range.start_bound() { + Bound::Included(&i) => usize::from(i), + Bound::Excluded(&i) => usize::from(i).checked_add(1).unwrap(), + Bound::Unbounded => 0, + }; + let end_byte = match range.end_bound() { + Bound::Included(&i) => { + Some(usize::from(i).checked_add(1).unwrap()) + } + Bound::Excluded(&i) => Some(usize::from(i)), + Bound::Unbounded => None, + }; + assert_ne!( + cur_byte, + usize::MAX, + "start range must be less than usize::MAX", + ); + ByteClassRepresentatives { + classes: self, + cur_byte, + end_byte, + last_class: None, + } + } + + /// Returns an iterator of the bytes in the given equivalence class. + /// + /// This is useful when one needs to know the actual bytes that belong to + /// an equivalence class. For example, conceptually speaking, accelerating + /// a DFA state occurs when a state only has a few outgoing transitions. + /// But in reality, what is required is that there are only a small + /// number of distinct bytes that can lead to an outgoing transition. The + /// difference is that any one transition can correspond to an equivalence + /// class which may contains many bytes. Therefore, DFA state acceleration + /// considers the actual elements in each equivalence class of each + /// outgoing transition. + /// + /// # Example + /// + /// This shows an example of how to get all of the elements in an + /// equivalence class. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit}; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// let elements: Vec = classes.elements(Unit::u8(1)).collect(); + /// let expected: Vec = (b'a'..=b'z').map(Unit::u8).collect(); + /// assert_eq!(expected, elements); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn elements(&self, class: Unit) -> ByteClassElements { + ByteClassElements { classes: self, class, byte: 0 } + } + + /// Returns an iterator of byte ranges in the given equivalence class. + /// + /// That is, a sequence of contiguous ranges are returned. Typically, every + /// class maps to a single contiguous range. + fn element_ranges(&self, class: Unit) -> ByteClassElementRanges { + ByteClassElementRanges { elements: self.elements(class), range: None } + } +} + +impl Default for ByteClasses { + fn default() -> ByteClasses { + ByteClasses::singletons() + } +} + +impl core::fmt::Debug for ByteClasses { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if self.is_singleton() { + write!(f, "ByteClasses({{singletons}})") + } else { + write!(f, "ByteClasses(")?; + for (i, class) in self.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?} => [", class.as_usize())?; + for (start, end) in self.element_ranges(class) { + if start == end { + write!(f, "{:?}", start)?; + } else { + write!(f, "{:?}-{:?}", start, end)?; + } + } + write!(f, "]")?; + } + write!(f, ")") + } + } +} + +/// An iterator over each equivalence class. +/// +/// The last element in this iterator always corresponds to [`Unit::eoi`]. +/// +/// This is created by the [`ByteClasses::iter`] method. +/// +/// The lifetime `'a` refers to the lifetime of the byte classes that this +/// iterator was created from. +#[derive(Debug)] +pub struct ByteClassIter<'a> { + classes: &'a ByteClasses, + i: usize, +} + +impl<'a> Iterator for ByteClassIter<'a> { + type Item = Unit; + + fn next(&mut self) -> Option { + if self.i + 1 == self.classes.alphabet_len() { + self.i += 1; + Some(self.classes.eoi()) + } else if self.i < self.classes.alphabet_len() { + let class = u8::try_from(self.i).unwrap(); + self.i += 1; + Some(Unit::u8(class)) + } else { + None + } + } +} + +/// An iterator over representative bytes from each equivalence class. +/// +/// This is created by the [`ByteClasses::representatives`] method. +/// +/// The lifetime `'a` refers to the lifetime of the byte classes that this +/// iterator was created from. +#[derive(Debug)] +pub struct ByteClassRepresentatives<'a> { + classes: &'a ByteClasses, + cur_byte: usize, + end_byte: Option, + last_class: Option, +} + +impl<'a> Iterator for ByteClassRepresentatives<'a> { + type Item = Unit; + + fn next(&mut self) -> Option { + while self.cur_byte < self.end_byte.unwrap_or(256) { + let byte = u8::try_from(self.cur_byte).unwrap(); + let class = self.classes.get(byte); + self.cur_byte += 1; + + if self.last_class != Some(class) { + self.last_class = Some(class); + return Some(Unit::u8(byte)); + } + } + if self.cur_byte != usize::MAX && self.end_byte.is_none() { + // Using usize::MAX as a sentinel is OK because we ban usize::MAX + // from appearing as a start bound in iterator construction. But + // why do it this way? Well, we want to return the EOI class + // whenever the end of the given range is unbounded because EOI + // isn't really a "byte" per se, so the only way it should be + // excluded is if there is a bounded end to the range. Therefore, + // when the end is unbounded, we just need to know whether we've + // reported EOI or not. When we do, we set cur_byte to a value it + // can never otherwise be. + self.cur_byte = usize::MAX; + return Some(self.classes.eoi()); + } + None + } +} + +/// An iterator over all elements in an equivalence class. +/// +/// This is created by the [`ByteClasses::elements`] method. +/// +/// The lifetime `'a` refers to the lifetime of the byte classes that this +/// iterator was created from. +#[derive(Debug)] +pub struct ByteClassElements<'a> { + classes: &'a ByteClasses, + class: Unit, + byte: usize, +} + +impl<'a> Iterator for ByteClassElements<'a> { + type Item = Unit; + + fn next(&mut self) -> Option { + while self.byte < 256 { + let byte = u8::try_from(self.byte).unwrap(); + self.byte += 1; + if self.class.is_byte(self.classes.get(byte)) { + return Some(Unit::u8(byte)); + } + } + if self.byte < 257 { + self.byte += 1; + if self.class.is_eoi() { + return Some(Unit::eoi(256)); + } + } + None + } +} + +/// An iterator over all elements in an equivalence class expressed as a +/// sequence of contiguous ranges. +#[derive(Debug)] +struct ByteClassElementRanges<'a> { + elements: ByteClassElements<'a>, + range: Option<(Unit, Unit)>, +} + +impl<'a> Iterator for ByteClassElementRanges<'a> { + type Item = (Unit, Unit); + + fn next(&mut self) -> Option<(Unit, Unit)> { + loop { + let element = match self.elements.next() { + None => return self.range.take(), + Some(element) => element, + }; + match self.range.take() { + None => { + self.range = Some((element, element)); + } + Some((start, end)) => { + if end.as_usize() + 1 != element.as_usize() + || element.is_eoi() + { + self.range = Some((element, element)); + return Some((start, end)); + } + self.range = Some((start, element)); + } + } + } + } +} + +/// A partitioning of bytes into equivalence classes. +/// +/// A byte class set keeps track of an *approximation* of equivalence classes +/// of bytes during NFA construction. That is, every byte in an equivalence +/// class cannot discriminate between a match and a non-match. +/// +/// For example, in the regex `[ab]+`, the bytes `a` and `b` would be in the +/// same equivalence class because it never matters whether an `a` or a `b` is +/// seen, and no combination of `a`s and `b`s in the text can discriminate a +/// match. +/// +/// Note though that this does not compute the minimal set of equivalence +/// classes. For example, in the regex `[ac]+`, both `a` and `c` are in the +/// same equivalence class for the same reason that `a` and `b` are in the +/// same equivalence class in the aforementioned regex. However, in this +/// implementation, `a` and `c` are put into distinct equivalence classes. The +/// reason for this is implementation complexity. In the future, we should +/// endeavor to compute the minimal equivalence classes since they can have a +/// rather large impact on the size of the DFA. (Doing this will likely require +/// rethinking how equivalence classes are computed, including changing the +/// representation here, which is only able to group contiguous bytes into the +/// same equivalence class.) +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +pub(crate) struct ByteClassSet(ByteSet); + +#[cfg(feature = "alloc")] +impl Default for ByteClassSet { + fn default() -> ByteClassSet { + ByteClassSet::empty() + } +} + +#[cfg(feature = "alloc")] +impl ByteClassSet { + /// Create a new set of byte classes where all bytes are part of the same + /// equivalence class. + pub(crate) fn empty() -> Self { + ByteClassSet(ByteSet::empty()) + } + + /// Indicate the the range of byte given (inclusive) can discriminate a + /// match between it and all other bytes outside of the range. + pub(crate) fn set_range(&mut self, start: u8, end: u8) { + debug_assert!(start <= end); + if start > 0 { + self.0.add(start - 1); + } + self.0.add(end); + } + + /// Add the contiguous ranges in the set given to this byte class set. + pub(crate) fn add_set(&mut self, set: &ByteSet) { + for (start, end) in set.iter_ranges() { + self.set_range(start, end); + } + } + + /// Convert this boolean set to a map that maps all byte values to their + /// corresponding equivalence class. The last mapping indicates the largest + /// equivalence class identifier (which is never bigger than 255). + pub(crate) fn byte_classes(&self) -> ByteClasses { + let mut classes = ByteClasses::empty(); + let mut class = 0u8; + let mut b = 0u8; + loop { + classes.set(b, class); + if b == 255 { + break; + } + if self.0.contains(b) { + class = class.checked_add(1).unwrap(); + } + b = b.checked_add(1).unwrap(); + } + classes + } +} + +/// A simple set of bytes that is reasonably cheap to copy and allocation free. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct ByteSet { + bits: BitSet, +} + +/// The representation of a byte set. Split out so that we can define a +/// convenient Debug impl for it while keeping "ByteSet" in the output. +#[derive(Clone, Copy, Default, Eq, PartialEq)] +struct BitSet([u128; 2]); + +impl ByteSet { + /// Create an empty set of bytes. + pub(crate) fn empty() -> ByteSet { + ByteSet { bits: BitSet([0; 2]) } + } + + /// Add a byte to this set. + /// + /// If the given byte already belongs to this set, then this is a no-op. + pub(crate) fn add(&mut self, byte: u8) { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] |= 1 << bit; + } + + /// Remove a byte from this set. + /// + /// If the given byte is not in this set, then this is a no-op. + pub(crate) fn remove(&mut self, byte: u8) { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] &= !(1 << bit); + } + + /// Return true if and only if the given byte is in this set. + pub(crate) fn contains(&self, byte: u8) -> bool { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] & (1 << bit) > 0 + } + + /// Return true if and only if the given inclusive range of bytes is in + /// this set. + pub(crate) fn contains_range(&self, start: u8, end: u8) -> bool { + (start..=end).all(|b| self.contains(b)) + } + + /// Returns an iterator over all bytes in this set. + pub(crate) fn iter(&self) -> ByteSetIter { + ByteSetIter { set: self, b: 0 } + } + + /// Returns an iterator over all contiguous ranges of bytes in this set. + pub(crate) fn iter_ranges(&self) -> ByteSetRangeIter { + ByteSetRangeIter { set: self, b: 0 } + } + + /// Return true if and only if this set is empty. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_empty(&self) -> bool { + self.bits.0 == [0, 0] + } + + /// Deserializes a byte set from the given slice. If the slice is of + /// incorrect length or is otherwise malformed, then an error is returned. + /// Upon success, the number of bytes read along with the set are returned. + /// The number of bytes read is always a multiple of 8. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(ByteSet, usize), DeserializeError> { + use core::mem::size_of; + + wire::check_slice_len(slice, 2 * size_of::(), "byte set")?; + let mut nread = 0; + let (low, nr) = wire::try_read_u128(slice, "byte set low bucket")?; + nread += nr; + let (high, nr) = wire::try_read_u128(slice, "byte set high bucket")?; + nread += nr; + Ok((ByteSet { bits: BitSet([low, high]) }, nread)) + } + + /// Writes this byte set to the given byte buffer. If the given buffer is + /// too small, then an error is returned. Upon success, the total number of + /// bytes written is returned. The number of bytes written is guaranteed to + /// be a multiple of 8. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + use core::mem::size_of; + + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("byte set")); + } + let mut nw = 0; + E::write_u128(self.bits.0[0], &mut dst[nw..]); + nw += size_of::(); + E::write_u128(self.bits.0[1], &mut dst[nw..]); + nw += size_of::(); + assert_eq!(nwrite, nw, "expected to write certain number of bytes",); + assert_eq!( + nw % 8, + 0, + "expected to write multiple of 8 bytes for byte set", + ); + Ok(nw) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 2 * core::mem::size_of::() + } +} + +impl core::fmt::Debug for BitSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut fmtd = f.debug_set(); + for b in 0u8..=255 { + if (ByteSet { bits: *self }).contains(b) { + fmtd.entry(&b); + } + } + fmtd.finish() + } +} + +#[derive(Debug)] +pub(crate) struct ByteSetIter<'a> { + set: &'a ByteSet, + b: usize, +} + +impl<'a> Iterator for ByteSetIter<'a> { + type Item = u8; + + fn next(&mut self) -> Option { + while self.b <= 255 { + let b = u8::try_from(self.b).unwrap(); + self.b += 1; + if self.set.contains(b) { + return Some(b); + } + } + None + } +} + +#[derive(Debug)] +pub(crate) struct ByteSetRangeIter<'a> { + set: &'a ByteSet, + b: usize, +} + +impl<'a> Iterator for ByteSetRangeIter<'a> { + type Item = (u8, u8); + + fn next(&mut self) -> Option<(u8, u8)> { + let asu8 = |n: usize| u8::try_from(n).unwrap(); + while self.b <= 255 { + let start = asu8(self.b); + self.b += 1; + if !self.set.contains(start) { + continue; + } + + let mut end = start; + while self.b <= 255 && self.set.contains(asu8(self.b)) { + end = asu8(self.b); + self.b += 1; + } + return Some((start, end)); + } + None + } +} + +#[cfg(all(test, feature = "alloc"))] +mod tests { + use alloc::{vec, vec::Vec}; + + use super::*; + + #[test] + fn byte_classes() { + let mut set = ByteClassSet::empty(); + set.set_range(b'a', b'z'); + + let classes = set.byte_classes(); + assert_eq!(classes.get(0), 0); + assert_eq!(classes.get(1), 0); + assert_eq!(classes.get(2), 0); + assert_eq!(classes.get(b'a' - 1), 0); + assert_eq!(classes.get(b'a'), 1); + assert_eq!(classes.get(b'm'), 1); + assert_eq!(classes.get(b'z'), 1); + assert_eq!(classes.get(b'z' + 1), 2); + assert_eq!(classes.get(254), 2); + assert_eq!(classes.get(255), 2); + + let mut set = ByteClassSet::empty(); + set.set_range(0, 2); + set.set_range(4, 6); + let classes = set.byte_classes(); + assert_eq!(classes.get(0), 0); + assert_eq!(classes.get(1), 0); + assert_eq!(classes.get(2), 0); + assert_eq!(classes.get(3), 1); + assert_eq!(classes.get(4), 2); + assert_eq!(classes.get(5), 2); + assert_eq!(classes.get(6), 2); + assert_eq!(classes.get(7), 3); + assert_eq!(classes.get(255), 3); + } + + #[test] + fn full_byte_classes() { + let mut set = ByteClassSet::empty(); + for b in 0u8..=255 { + set.set_range(b, b); + } + assert_eq!(set.byte_classes().alphabet_len(), 257); + } + + #[test] + fn elements_typical() { + let mut set = ByteClassSet::empty(); + set.set_range(b'b', b'd'); + set.set_range(b'g', b'm'); + set.set_range(b'z', b'z'); + let classes = set.byte_classes(); + // class 0: \x00-a + // class 1: b-d + // class 2: e-f + // class 3: g-m + // class 4: n-y + // class 5: z-z + // class 6: \x7B-\xFF + // class 7: EOI + assert_eq!(classes.alphabet_len(), 8); + + let elements = classes.elements(Unit::u8(0)).collect::>(); + assert_eq!(elements.len(), 98); + assert_eq!(elements[0], Unit::u8(b'\x00')); + assert_eq!(elements[97], Unit::u8(b'a')); + + let elements = classes.elements(Unit::u8(1)).collect::>(); + assert_eq!( + elements, + vec![Unit::u8(b'b'), Unit::u8(b'c'), Unit::u8(b'd')], + ); + + let elements = classes.elements(Unit::u8(2)).collect::>(); + assert_eq!(elements, vec![Unit::u8(b'e'), Unit::u8(b'f')],); + + let elements = classes.elements(Unit::u8(3)).collect::>(); + assert_eq!( + elements, + vec![ + Unit::u8(b'g'), + Unit::u8(b'h'), + Unit::u8(b'i'), + Unit::u8(b'j'), + Unit::u8(b'k'), + Unit::u8(b'l'), + Unit::u8(b'm'), + ], + ); + + let elements = classes.elements(Unit::u8(4)).collect::>(); + assert_eq!(elements.len(), 12); + assert_eq!(elements[0], Unit::u8(b'n')); + assert_eq!(elements[11], Unit::u8(b'y')); + + let elements = classes.elements(Unit::u8(5)).collect::>(); + assert_eq!(elements, vec![Unit::u8(b'z')]); + + let elements = classes.elements(Unit::u8(6)).collect::>(); + assert_eq!(elements.len(), 133); + assert_eq!(elements[0], Unit::u8(b'\x7B')); + assert_eq!(elements[132], Unit::u8(b'\xFF')); + + let elements = classes.elements(Unit::eoi(7)).collect::>(); + assert_eq!(elements, vec![Unit::eoi(256)]); + } + + #[test] + fn elements_singletons() { + let classes = ByteClasses::singletons(); + assert_eq!(classes.alphabet_len(), 257); + + let elements = classes.elements(Unit::u8(b'a')).collect::>(); + assert_eq!(elements, vec![Unit::u8(b'a')]); + + let elements = classes.elements(Unit::eoi(5)).collect::>(); + assert_eq!(elements, vec![Unit::eoi(256)]); + } + + #[test] + fn elements_empty() { + let classes = ByteClasses::empty(); + assert_eq!(classes.alphabet_len(), 2); + + let elements = classes.elements(Unit::u8(0)).collect::>(); + assert_eq!(elements.len(), 256); + assert_eq!(elements[0], Unit::u8(b'\x00')); + assert_eq!(elements[255], Unit::u8(b'\xFF')); + + let elements = classes.elements(Unit::eoi(1)).collect::>(); + assert_eq!(elements, vec![Unit::eoi(256)]); + } + + #[test] + fn representatives() { + let mut set = ByteClassSet::empty(); + set.set_range(b'b', b'd'); + set.set_range(b'g', b'm'); + set.set_range(b'z', b'z'); + let classes = set.byte_classes(); + + let got: Vec = classes.representatives(..).collect(); + let expected = vec![ + Unit::u8(b'\x00'), + Unit::u8(b'b'), + Unit::u8(b'e'), + Unit::u8(b'g'), + Unit::u8(b'n'), + Unit::u8(b'z'), + Unit::u8(b'\x7B'), + Unit::eoi(7), + ]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(..0).collect(); + assert!(got.is_empty()); + let got: Vec = classes.representatives(1..1).collect(); + assert!(got.is_empty()); + let got: Vec = classes.representatives(255..255).collect(); + assert!(got.is_empty()); + + // A weird case that is the only guaranteed to way to get an iterator + // of just the EOI class by excluding all possible byte values. + let got: Vec = classes + .representatives(( + core::ops::Bound::Excluded(255), + core::ops::Bound::Unbounded, + )) + .collect(); + let expected = vec![Unit::eoi(7)]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(..=255).collect(); + let expected = vec![ + Unit::u8(b'\x00'), + Unit::u8(b'b'), + Unit::u8(b'e'), + Unit::u8(b'g'), + Unit::u8(b'n'), + Unit::u8(b'z'), + Unit::u8(b'\x7B'), + ]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'b'..=b'd').collect(); + let expected = vec![Unit::u8(b'b')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'a'..=b'd').collect(); + let expected = vec![Unit::u8(b'a'), Unit::u8(b'b')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'b'..=b'e').collect(); + let expected = vec![Unit::u8(b'b'), Unit::u8(b'e')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'A'..=b'Z').collect(); + let expected = vec![Unit::u8(b'A')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'A'..=b'z').collect(); + let expected = vec![ + Unit::u8(b'A'), + Unit::u8(b'b'), + Unit::u8(b'e'), + Unit::u8(b'g'), + Unit::u8(b'n'), + Unit::u8(b'z'), + ]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'z'..).collect(); + let expected = vec![Unit::u8(b'z'), Unit::u8(b'\x7B'), Unit::eoi(7)]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'z'..=0xFF).collect(); + let expected = vec![Unit::u8(b'z'), Unit::u8(b'\x7B')]; + assert_eq!(expected, got); + } +} diff --git a/vendor/regex-automata/src/util/captures.rs b/vendor/regex-automata/src/util/captures.rs new file mode 100644 index 0000000..05db6a9 --- /dev/null +++ b/vendor/regex-automata/src/util/captures.rs @@ -0,0 +1,2548 @@ +/*! +Provides types for dealing with capturing groups. + +Capturing groups refer to sub-patterns of regexes that some regex engines can +report matching offsets for. For example, matching `[a-z]([0-9]+)` against +`a789` would give `a789` as the overall match (for the implicit capturing group +at index `0`) and `789` as the match for the capturing group `([0-9]+)` (an +explicit capturing group at index `1`). + +Not all regex engines can report match offsets for capturing groups. Indeed, +to a first approximation, regex engines that can report capturing group offsets +tend to be quite a bit slower than regex engines that can't. This is because +tracking capturing groups at search time usually requires more "power" that +in turn adds overhead. + +Other regex implementations might call capturing groups "submatches." + +# Overview + +The main types in this module are: + +* [`Captures`] records the capturing group offsets found during a search. It +provides convenience routines for looking up capturing group offsets by either +index or name. +* [`GroupInfo`] records the mapping between capturing groups and "slots," +where the latter are how capturing groups are recorded during a regex search. +This also keeps a mapping from capturing group name to index, and capture +group index to name. A `GroupInfo` is used by `Captures` internally to +provide a convenient API. It is unlikely that you'll use a `GroupInfo` +directly, but for example, if you've compiled an Thompson NFA, then you can use +[`thompson::NFA::group_info`](crate::nfa::thompson::NFA::group_info) to get its +underlying `GroupInfo`. +*/ + +use alloc::{string::String, sync::Arc, vec, vec::Vec}; + +use crate::util::{ + interpolate, + primitives::{ + NonMaxUsize, PatternID, PatternIDError, PatternIDIter, SmallIndex, + }, + search::{Match, Span}, +}; + +/// The span offsets of capturing groups after a match has been found. +/// +/// This type represents the output of regex engines that can report the +/// offsets at which capturing groups matches or "submatches" occur. For +/// example, the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). When a match +/// occurs, it will at minimum contain the [`PatternID`] of the pattern that +/// matched. Depending upon how it was constructed, it may also contain the +/// start/end offsets of the entire match of the pattern and the start/end +/// offsets of each capturing group that participated in the match. +/// +/// Values of this type are always created for a specific [`GroupInfo`]. It is +/// unspecified behavior to use a `Captures` value in a search with any regex +/// engine that has a different `GroupInfo` than the one the `Captures` were +/// created with. +/// +/// # Constructors +/// +/// There are three constructors for this type that control what kind of +/// information is available upon a match: +/// +/// * [`Captures::all`]: Will store overall pattern match offsets in addition +/// to the offsets of capturing groups that participated in the match. +/// * [`Captures::matches`]: Will store only the overall pattern +/// match offsets. The offsets of capturing groups (even ones that participated +/// in the match) are not available. +/// * [`Captures::empty`]: Will only store the pattern ID that matched. No +/// match offsets are available at all. +/// +/// If you aren't sure which to choose, then pick the first one. The first one +/// is what convenience routines like, +/// [`PikeVM::create_captures`](crate::nfa::thompson::pikevm::PikeVM::create_captures), +/// will use automatically. +/// +/// The main difference between these choices is performance. Namely, if you +/// ask for _less_ information, then the execution of regex search may be able +/// to run more quickly. +/// +/// # Notes +/// +/// It is worth pointing out that this type is not coupled to any one specific +/// regex engine. Instead, its coupling is with [`GroupInfo`], which is the +/// thing that is responsible for mapping capturing groups to "slot" offsets. +/// Slot offsets are indices into a single sequence of memory at which matching +/// haystack offsets for the corresponding group are written by regex engines. +/// +/// # Example +/// +/// This example shows how to parse a simple date and extract the components of +/// the date via capturing groups: +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; +/// +/// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// re.captures(&mut cache, "2010-03-14", &mut caps); +/// assert!(caps.is_match()); +/// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); +/// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); +/// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: named capturing groups +/// +/// This example is like the one above, but leverages the ability to name +/// capturing groups in order to make the code a bit clearer: +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; +/// +/// let re = PikeVM::new(r"^(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2})$")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// re.captures(&mut cache, "2010-03-14", &mut caps); +/// assert!(caps.is_match()); +/// assert_eq!(Some(Span::from(0..4)), caps.get_group_by_name("y")); +/// assert_eq!(Some(Span::from(5..7)), caps.get_group_by_name("m")); +/// assert_eq!(Some(Span::from(8..10)), caps.get_group_by_name("d")); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct Captures { + /// The group info that these capture groups are coupled to. This is what + /// gives the "convenience" of the `Captures` API. Namely, it provides the + /// slot mapping and the name|-->index mapping for capture lookups by name. + group_info: GroupInfo, + /// The ID of the pattern that matched. Regex engines must set this to + /// None when no match occurs. + pid: Option, + /// The slot values, i.e., submatch offsets. + /// + /// In theory, the smallest sequence of slots would be something like + /// `max(groups(pattern) for pattern in regex) * 2`, but instead, we use + /// `sum(groups(pattern) for pattern in regex) * 2`. Why? + /// + /// Well, the former could be used in theory, because we don't generally + /// have any overlapping APIs that involve capturing groups. Therefore, + /// there's technically never any need to have slots set for multiple + /// patterns. However, this might change some day, in which case, we would + /// need to have slots available. + /// + /// The other reason is that during the execution of some regex engines, + /// there exists a point in time where multiple slots for different + /// patterns may be written to before knowing which pattern has matched. + /// Therefore, the regex engines themselves, in order to support multiple + /// patterns correctly, must have all slots available. If `Captures` + /// doesn't have all slots available, then regex engines can't write + /// directly into the caller provided `Captures` and must instead write + /// into some other storage and then copy the slots involved in the match + /// at the end of the search. + /// + /// So overall, at least as of the time of writing, it seems like the path + /// of least resistance is to just require allocating all possible slots + /// instead of the conceptual minimum. Another way to justify this is that + /// the most common case is a single pattern, in which case, there is no + /// inefficiency here since the 'max' and 'sum' calculations above are + /// equivalent in that case. + /// + /// N.B. The mapping from group index to slot is maintained by `GroupInfo` + /// and is considered an API guarantee. See `GroupInfo` for more details on + /// that mapping. + /// + /// N.B. `Option` has the same size as a `usize`. + slots: Vec>, +} + +impl Captures { + /// Create new storage for the offsets of all matching capturing groups. + /// + /// This routine provides the most information for matches---namely, the + /// spans of matching capturing groups---but also requires the regex search + /// routines to do the most work. + /// + /// It is unspecified behavior to use the returned `Captures` value in a + /// search with a `GroupInfo` other than the one that is provided to this + /// constructor. + /// + /// # Example + /// + /// This example shows that all capturing groups---but only ones that + /// participated in a match---are available to query after a match has + /// been found: + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// Span, Match, + /// }; + /// + /// let re = PikeVM::new( + /// r"^(?:(?P[a-z]+)|(?P[A-Z]+))(?P[0-9]+)$", + /// )?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::all(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "ABC123", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match()); + /// // The 'lower' group didn't match, so it won't have any offsets. + /// assert_eq!(None, caps.get_group_by_name("lower")); + /// assert_eq!(Some(Span::from(0..3)), caps.get_group_by_name("upper")); + /// assert_eq!(Some(Span::from(3..6)), caps.get_group_by_name("digits")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn all(group_info: GroupInfo) -> Captures { + let slots = group_info.slot_len(); + Captures { group_info, pid: None, slots: vec![None; slots] } + } + + /// Create new storage for only the full match spans of a pattern. This + /// does not include any capturing group offsets. + /// + /// It is unspecified behavior to use the returned `Captures` value in a + /// search with a `GroupInfo` other than the one that is provided to this + /// constructor. + /// + /// # Example + /// + /// This example shows that only overall match offsets are reported when + /// this constructor is used. Accessing any capturing groups other than + /// the 0th will always return `None`. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// Match, + /// }; + /// + /// let re = PikeVM::new( + /// r"^(?:(?P[a-z]+)|(?P[A-Z]+))(?P[0-9]+)$", + /// )?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::matches(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "ABC123", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match()); + /// // We didn't ask for capturing group offsets, so they aren't available. + /// assert_eq!(None, caps.get_group_by_name("lower")); + /// assert_eq!(None, caps.get_group_by_name("upper")); + /// assert_eq!(None, caps.get_group_by_name("digits")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn matches(group_info: GroupInfo) -> Captures { + // This is OK because we know there are at least this many slots, + // and GroupInfo construction guarantees that the number of slots fits + // into a usize. + let slots = group_info.pattern_len().checked_mul(2).unwrap(); + Captures { group_info, pid: None, slots: vec![None; slots] } + } + + /// Create new storage for only tracking which pattern matched. No offsets + /// are stored at all. + /// + /// It is unspecified behavior to use the returned `Captures` value in a + /// search with a `GroupInfo` other than the one that is provided to this + /// constructor. + /// + /// # Example + /// + /// This example shows that only the pattern that matched can be accessed + /// from a `Captures` value created via this constructor. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// PatternID, + /// }; + /// + /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::empty(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "aABCz", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(PatternID::must(0)), caps.pattern()); + /// // We didn't ask for any offsets, so they aren't available. + /// assert_eq!(None, caps.get_match()); + /// + /// re.captures(&mut cache, &"aABCz"[1..], &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(PatternID::must(1)), caps.pattern()); + /// // We didn't ask for any offsets, so they aren't available. + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn empty(group_info: GroupInfo) -> Captures { + Captures { group_info, pid: None, slots: vec![] } + } + + /// Returns true if and only if this capturing group represents a match. + /// + /// This is a convenience routine for `caps.pattern().is_some()`. + /// + /// # Example + /// + /// When using the PikeVM (for example), the lightest weight way of + /// detecting whether a match exists is to create capturing groups that + /// only track the ID of the pattern that match (if any): + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// }; + /// + /// let re = PikeVM::new(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::empty(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "aABCz", &mut caps); + /// assert!(caps.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match(&self) -> bool { + self.pid.is_some() + } + + /// Returns the identifier of the pattern that matched when this + /// capturing group represents a match. If no match was found, then this + /// always returns `None`. + /// + /// This returns a pattern ID in precisely the cases in which `is_match` + /// returns `true`. Similarly, the pattern ID returned is always the + /// same pattern ID found in the `Match` returned by `get_match`. + /// + /// # Example + /// + /// When using the PikeVM (for example), the lightest weight way of + /// detecting which pattern matched is to create capturing groups that only + /// track the ID of the pattern that match (if any): + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// PatternID, + /// }; + /// + /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::empty(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "ABC", &mut caps); + /// assert_eq!(Some(PatternID::must(1)), caps.pattern()); + /// // Recall that offsets are only available when using a non-empty + /// // Captures value. So even though a match occurred, this returns None! + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern(&self) -> Option { + self.pid + } + + /// Returns the pattern ID and the span of the match, if one occurred. + /// + /// This always returns `None` when `Captures` was created with + /// [`Captures::empty`], even if a match was found. + /// + /// If this routine returns a non-`None` value, then `is_match` is + /// guaranteed to return `true` and `pattern` is also guaranteed to return + /// a non-`None` value. + /// + /// # Example + /// + /// This example shows how to get the full match from a search: + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "ABC", &mut caps); + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn get_match(&self) -> Option { + Some(Match::new(self.pattern()?, self.get_group(0)?)) + } + + /// Returns the span of a capturing group match corresponding to the group + /// index given, only if both the overall pattern matched and the capturing + /// group participated in that match. + /// + /// This returns `None` if `index` is invalid. `index` is valid if and only + /// if it's less than [`Captures::group_len`] for the matching pattern. + /// + /// This always returns `None` when `Captures` was created with + /// [`Captures::empty`], even if a match was found. This also always + /// returns `None` for any `index > 0` when `Captures` was created with + /// [`Captures::matches`]. + /// + /// If this routine returns a non-`None` value, then `is_match` is + /// guaranteed to return `true`, `pattern` is guaranteed to return a + /// non-`None` value and `get_match` is guaranteed to return a non-`None` + /// value. + /// + /// By convention, the 0th capture group will always return the same + /// span as the span returned by `get_match`. This is because the 0th + /// capture group always corresponds to the entirety of the pattern's + /// match. (It is similarly always unnamed because it is implicit.) This + /// isn't necessarily true of all regex engines. For example, one can + /// hand-compile a [`thompson::NFA`](crate::nfa::thompson::NFA) via a + /// [`thompson::Builder`](crate::nfa::thompson::Builder), which isn't + /// technically forced to make the 0th capturing group always correspond to + /// the entire match. + /// + /// # Example + /// + /// This example shows how to get the capturing groups, by index, from a + /// match: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match}; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match()); + /// assert_eq!(Some(Span::from(0..5)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(6..17)), caps.get_group(2)); + /// // Looking for a non-existent capturing group will return None: + /// assert_eq!(None, caps.get_group(3)); + /// # // literals are too big for 32-bit usize: #1039 + /// # #[cfg(target_pointer_width = "64")] + /// assert_eq!(None, caps.get_group(9944060567225171988)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn get_group(&self, index: usize) -> Option { + let pid = self.pattern()?; + // There's a little bit of work needed to map captures to slots in the + // fully general case. But in the overwhelming common case of a single + // pattern, we can just do some simple arithmetic. + let (slot_start, slot_end) = if self.group_info().pattern_len() == 1 { + (index.checked_mul(2)?, index.checked_mul(2)?.checked_add(1)?) + } else { + self.group_info().slots(pid, index)? + }; + let start = self.slots.get(slot_start).copied()??; + let end = self.slots.get(slot_end).copied()??; + Some(Span { start: start.get(), end: end.get() }) + } + + /// Returns the span of a capturing group match corresponding to the group + /// name given, only if both the overall pattern matched and the capturing + /// group participated in that match. + /// + /// This returns `None` if `name` does not correspond to a valid capturing + /// group for the pattern that matched. + /// + /// This always returns `None` when `Captures` was created with + /// [`Captures::empty`], even if a match was found. This also always + /// returns `None` for any `index > 0` when `Captures` was created with + /// [`Captures::matches`]. + /// + /// If this routine returns a non-`None` value, then `is_match` is + /// guaranteed to return `true`, `pattern` is guaranteed to return a + /// non-`None` value and `get_match` is guaranteed to return a non-`None` + /// value. + /// + /// # Example + /// + /// This example shows how to get the capturing groups, by name, from a + /// match: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match}; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match()); + /// assert_eq!(Some(Span::from(0..5)), caps.get_group_by_name("first")); + /// assert_eq!(Some(Span::from(6..17)), caps.get_group_by_name("last")); + /// // Looking for a non-existent capturing group will return None: + /// assert_eq!(None, caps.get_group_by_name("middle")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_group_by_name(&self, name: &str) -> Option { + let index = self.group_info().to_index(self.pattern()?, name)?; + self.get_group(index) + } + + /// Returns an iterator of possible spans for every capturing group in the + /// matching pattern. + /// + /// If this `Captures` value does not correspond to a match, then the + /// iterator returned yields no elements. + /// + /// Note that the iterator returned yields elements of type `Option`. + /// A span is present if and only if it corresponds to a capturing group + /// that participated in a match. + /// + /// # Example + /// + /// This example shows how to collect all capturing groups: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new( + /// // Matches first/last names, with an optional middle name. + /// r"^(?P\pL+)\s+(?:(?P\pL+)\s+)?(?P\pL+)$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Harry James Potter", &mut caps); + /// assert!(caps.is_match()); + /// let groups: Vec> = caps.iter().collect(); + /// assert_eq!(groups, vec![ + /// Some(Span::from(0..18)), + /// Some(Span::from(0..5)), + /// Some(Span::from(6..11)), + /// Some(Span::from(12..18)), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This example uses the same regex as the previous example, but with a + /// haystack that omits the middle name. This results in a capturing group + /// that is present in the elements yielded by the iterator but without a + /// match: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new( + /// // Matches first/last names, with an optional middle name. + /// r"^(?P\pL+)\s+(?:(?P\pL+)\s+)?(?P\pL+)$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Harry Potter", &mut caps); + /// assert!(caps.is_match()); + /// let groups: Vec> = caps.iter().collect(); + /// assert_eq!(groups, vec![ + /// Some(Span::from(0..12)), + /// Some(Span::from(0..5)), + /// None, + /// Some(Span::from(6..12)), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn iter(&self) -> CapturesPatternIter<'_> { + let names = self + .pattern() + .map_or(GroupInfoPatternNames::empty().enumerate(), |pid| { + self.group_info().pattern_names(pid).enumerate() + }); + CapturesPatternIter { caps: self, names } + } + + /// Return the total number of capturing groups for the matching pattern. + /// + /// If this `Captures` value does not correspond to a match, then this + /// always returns `0`. + /// + /// This always returns the same number of elements yielded by + /// [`Captures::iter`]. That is, the number includes capturing groups even + /// if they don't participate in the match. + /// + /// # Example + /// + /// This example shows how to count the total number of capturing groups + /// associated with a pattern. Notice that it includes groups that did not + /// participate in a match (just like `Captures::iter` does). + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new( + /// // Matches first/last names, with an optional middle name. + /// r"^(?P\pL+)\s+(?:(?P\pL+)\s+)?(?P\pL+)$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Harry Potter", &mut caps); + /// assert_eq!(4, caps.group_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn group_len(&self) -> usize { + let pid = match self.pattern() { + None => return 0, + Some(pid) => pid, + }; + self.group_info().group_len(pid) + } + + /// Returns a reference to the underlying group info on which these + /// captures are based. + /// + /// The difference between `GroupInfo` and `Captures` is that the former + /// defines the structure of capturing groups where as the latter is what + /// stores the actual match information. So where as `Captures` only gives + /// you access to the current match, `GroupInfo` lets you query any + /// information about all capturing groups, even ones for patterns that + /// weren't involved in a match. + /// + /// Note that a `GroupInfo` uses reference counting internally, so it may + /// be cloned cheaply. + /// + /// # Example + /// + /// This example shows how to get all capturing group names from the + /// underlying `GroupInfo`. Notice that we don't even need to run a + /// search. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?Pa)", + /// r"(a)(b)", + /// r"ab", + /// r"(?Pa)(?Pa)", + /// r"(?Pz)", + /// ])?; + /// let caps = re.create_captures(); + /// + /// let expected = vec![ + /// (PatternID::must(0), 0, None), + /// (PatternID::must(0), 1, Some("foo")), + /// (PatternID::must(1), 0, None), + /// (PatternID::must(1), 1, None), + /// (PatternID::must(1), 2, None), + /// (PatternID::must(2), 0, None), + /// (PatternID::must(3), 0, None), + /// (PatternID::must(3), 1, Some("bar")), + /// (PatternID::must(3), 2, Some("quux")), + /// (PatternID::must(4), 0, None), + /// (PatternID::must(4), 1, Some("foo")), + /// ]; + /// // We could also just use 're.get_nfa().group_info()'. + /// let got: Vec<(PatternID, usize, Option<&str>)> = + /// caps.group_info().all_names().collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn group_info(&self) -> &GroupInfo { + &self.group_info + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated string is returned. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = "year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = "On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_string(hay, replacement); + /// assert_eq!("year=2010, month=03, day=14", result); + /// + /// // And this matches the second pattern. + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_string(hay, replacement); + /// assert_eq!("year=2010, month=03, day=14", result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_string( + &self, + haystack: &str, + replacement: &str, + ) -> String { + let mut dst = String::new(); + self.interpolate_string_into(haystack, replacement, &mut dst); + dst + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated string is written to `dst`. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = "year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = "On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = String::new(); + /// caps.interpolate_string_into(hay, replacement, &mut dst); + /// assert_eq!("year=2010, month=03, day=14", dst); + /// + /// // And this matches the second pattern. + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = String::new(); + /// caps.interpolate_string_into(hay, replacement, &mut dst); + /// assert_eq!("year=2010, month=03, day=14", dst); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_string_into( + &self, + haystack: &str, + replacement: &str, + dst: &mut String, + ) { + interpolate::string( + replacement, + |index, dst| { + let span = match self.get_group(index) { + None => return, + Some(span) => span, + }; + dst.push_str(&haystack[span]); + }, + |name| self.group_info().to_index(self.pattern()?, name), + dst, + ); + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated byte string is returned. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = b"year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = b"On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_bytes(hay, replacement); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], result); + /// + /// // And this matches the second pattern. + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_bytes(hay, replacement); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_bytes( + &self, + haystack: &[u8], + replacement: &[u8], + ) -> Vec { + let mut dst = vec![]; + self.interpolate_bytes_into(haystack, replacement, &mut dst); + dst + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated byte string is written to `dst`. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = b"year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = b"On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = vec![]; + /// caps.interpolate_bytes_into(hay, replacement, &mut dst); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst); + /// + /// // And this matches the second pattern. + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = vec![]; + /// caps.interpolate_bytes_into(hay, replacement, &mut dst); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_bytes_into( + &self, + haystack: &[u8], + replacement: &[u8], + dst: &mut Vec, + ) { + interpolate::bytes( + replacement, + |index, dst| { + let span = match self.get_group(index) { + None => return, + Some(span) => span, + }; + dst.extend_from_slice(&haystack[span]); + }, + |name| self.group_info().to_index(self.pattern()?, name), + dst, + ); + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups in the given `haystack`. The + /// `haystack` should be the same substring used to find the match spans in + /// this `Captures` value. + /// + /// This is identical to [`Captures::extract_bytes`], except it works with + /// `&str` instead of `&[u8]`. + /// + /// # Panics + /// + /// This panics if the number of explicit matching groups in this + /// `Captures` value is less than `N`. This also panics if this `Captures` + /// value does not correspond to a match. + /// + /// Note that this does *not* panic if the number of explicit matching + /// groups is bigger than `N`. In that case, only the first `N` matching + /// groups are extracted. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// assert!(caps.is_match()); + /// let (full, [year, month, day]) = caps.extract(hay); + /// assert_eq!("2010-03-14", full); + /// assert_eq!("2010", year); + /// assert_eq!("03", month); + /// assert_eq!("14", day); + /// + /// // We can also ask for fewer than all capture groups. + /// let (full, [year]) = caps.extract(hay); + /// assert_eq!("2010-03-14", full); + /// assert_eq!("2010", year); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn extract<'h, const N: usize>( + &self, + haystack: &'h str, + ) -> (&'h str, [&'h str; N]) { + let mut matched = self.iter().flatten(); + let whole_match = &haystack[matched.next().expect("a match")]; + let group_matches = [0; N].map(|_| { + let sp = matched.next().expect("too few matching groups"); + &haystack[sp] + }); + (whole_match, group_matches) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups in the given `haystack`. The + /// `haystack` should be the same substring used to find the match spans in + /// this `Captures` value. + /// + /// This is identical to [`Captures::extract`], except it works with + /// `&[u8]` instead of `&str`. + /// + /// # Panics + /// + /// This panics if the number of explicit matching groups in this + /// `Captures` value is less than `N`. This also panics if this `Captures` + /// value does not correspond to a match. + /// + /// Note that this does *not* panic if the number of explicit matching + /// groups is bigger than `N`. In that case, only the first `N` matching + /// groups are extracted. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// assert!(caps.is_match()); + /// let (full, [year, month, day]) = caps.extract_bytes(hay); + /// assert_eq!(b"2010-03-14", full); + /// assert_eq!(b"2010", year); + /// assert_eq!(b"03", month); + /// assert_eq!(b"14", day); + /// + /// // We can also ask for fewer than all capture groups. + /// let (full, [year]) = caps.extract_bytes(hay); + /// assert_eq!(b"2010-03-14", full); + /// assert_eq!(b"2010", year); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn extract_bytes<'h, const N: usize>( + &self, + haystack: &'h [u8], + ) -> (&'h [u8], [&'h [u8]; N]) { + let mut matched = self.iter().flatten(); + let whole_match = &haystack[matched.next().expect("a match")]; + let group_matches = [0; N].map(|_| { + let sp = matched.next().expect("too few matching groups"); + &haystack[sp] + }); + (whole_match, group_matches) + } +} + +/// Lower level "slot" oriented APIs. One does not typically need to use these +/// when executing a search. They are instead mostly intended for folks that +/// are writing their own regex engine while reusing this `Captures` type. +impl Captures { + /// Clear this `Captures` value. + /// + /// After clearing, all slots inside this `Captures` value will be set to + /// `None`. Similarly, any pattern ID that it was previously associated + /// with (for a match) is erased. + /// + /// It is not usually necessary to call this routine. Namely, a `Captures` + /// value only provides high level access to the capturing groups of the + /// pattern that matched, and only low level access to individual slots. + /// Thus, even if slots corresponding to groups that aren't associated + /// with the matching pattern are set, then it won't impact the higher + /// level APIs. Namely, higher level APIs like [`Captures::get_group`] will + /// return `None` if no pattern ID is present, even if there are spans set + /// in the underlying slots. + /// + /// Thus, to "clear" a `Captures` value of a match, it is usually only + /// necessary to call [`Captures::set_pattern`] with `None`. + /// + /// # Example + /// + /// This example shows what happens when a `Captures` value is cleared. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert!(caps.is_match()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// // Note that the following ordering is considered an API guarantee. + /// assert_eq!(slots, vec![ + /// Some(0), + /// Some(17), + /// Some(0), + /// Some(5), + /// Some(6), + /// Some(17), + /// ]); + /// + /// // Now clear the slots. Everything is gone and it is no longer a match. + /// caps.clear(); + /// assert!(!caps.is_match()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// assert_eq!(slots, vec![ + /// None, + /// None, + /// None, + /// None, + /// None, + /// None, + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn clear(&mut self) { + self.pid = None; + for slot in self.slots.iter_mut() { + *slot = None; + } + } + + /// Set the pattern on this `Captures` value. + /// + /// When the pattern ID is `None`, then this `Captures` value does not + /// correspond to a match (`is_match` will return `false`). Otherwise, it + /// corresponds to a match. + /// + /// This is useful in search implementations where you might want to + /// initially call `set_pattern(None)` in order to avoid the cost of + /// calling `clear()` if it turns out to not be necessary. + /// + /// # Example + /// + /// This example shows that `set_pattern` merely overwrites the pattern ID. + /// It does not actually change the underlying slot values. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert!(caps.is_match()); + /// assert!(caps.pattern().is_some()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// // Note that the following ordering is considered an API guarantee. + /// assert_eq!(slots, vec![ + /// Some(0), + /// Some(17), + /// Some(0), + /// Some(5), + /// Some(6), + /// Some(17), + /// ]); + /// + /// // Now set the pattern to None. Note that the slot values remain. + /// caps.set_pattern(None); + /// assert!(!caps.is_match()); + /// assert!(!caps.pattern().is_some()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// // Note that the following ordering is considered an API guarantee. + /// assert_eq!(slots, vec![ + /// Some(0), + /// Some(17), + /// Some(0), + /// Some(5), + /// Some(6), + /// Some(17), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn set_pattern(&mut self, pid: Option) { + self.pid = pid; + } + + /// Returns the underlying slots, where each slot stores a single offset. + /// + /// Every matching capturing group generally corresponds to two slots: one + /// slot for the starting position and another for the ending position. + /// Typically, either both are present or neither are. (The weasel word + /// "typically" is used here because it really depends on the regex engine + /// implementation. Every sensible regex engine likely adheres to this + /// invariant, and every regex engine in this crate is sensible.) + /// + /// Generally speaking, callers should prefer to use higher level routines + /// like [`Captures::get_match`] or [`Captures::get_group`]. + /// + /// An important note here is that a regex engine may not reset all of the + /// slots to `None` values when no match occurs, or even when a match of + /// a different pattern occurs. But this depends on how the regex engine + /// implementation deals with slots. + /// + /// # Example + /// + /// This example shows how to get the underlying slots from a regex match. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::primitives::{PatternID, NonMaxUsize}, + /// }; + /// + /// let re = PikeVM::new_many(&[ + /// r"[a-z]+", + /// r"[0-9]+", + /// ])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "123", &mut caps); + /// assert_eq!(Some(PatternID::must(1)), caps.pattern()); + /// // Note that the only guarantee we have here is that slots 2 and 3 + /// // are set to correct values. The contents of the first two slots are + /// // unspecified since the 0th pattern did not match. + /// let expected = &[ + /// None, + /// None, + /// NonMaxUsize::new(0), + /// NonMaxUsize::new(3), + /// ]; + /// assert_eq!(expected, caps.slots()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slots(&self) -> &[Option] { + &self.slots + } + + /// Returns the underlying slots as a mutable slice, where each slot stores + /// a single offset. + /// + /// This tends to be most useful for regex engine implementations for + /// writing offsets for matching capturing groups to slots. + /// + /// See [`Captures::slots`] for more information about slots. + #[inline] + pub fn slots_mut(&mut self) -> &mut [Option] { + &mut self.slots + } +} + +impl core::fmt::Debug for Captures { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut dstruct = f.debug_struct("Captures"); + dstruct.field("pid", &self.pid); + if let Some(pid) = self.pid { + dstruct.field("spans", &CapturesDebugMap { pid, caps: self }); + } + dstruct.finish() + } +} + +/// A little helper type to provide a nice map-like debug representation for +/// our capturing group spans. +struct CapturesDebugMap<'a> { + pid: PatternID, + caps: &'a Captures, +} + +impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + let mut map = f.debug_map(); + let names = self.caps.group_info().pattern_names(self.pid); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get_group(group_index) { + None => map.entry(&key, &None::<()>), + Some(span) => map.entry(&key, &span), + }; + } + map.finish() + } +} + +/// An iterator over all capturing groups in a `Captures` value. +/// +/// This iterator includes capturing groups that did not participate in a +/// match. See the [`Captures::iter`] method documentation for more details +/// and examples. +/// +/// The lifetime parameter `'a` refers to the lifetime of the underlying +/// `Captures` value. +#[derive(Clone, Debug)] +pub struct CapturesPatternIter<'a> { + caps: &'a Captures, + names: core::iter::Enumerate>, +} + +impl<'a> Iterator for CapturesPatternIter<'a> { + type Item = Option; + + fn next(&mut self) -> Option> { + let (group_index, _) = self.names.next()?; + Some(self.caps.get_group(group_index)) + } + + fn size_hint(&self) -> (usize, Option) { + self.names.size_hint() + } + + fn count(self) -> usize { + self.names.count() + } +} + +impl<'a> ExactSizeIterator for CapturesPatternIter<'a> {} +impl<'a> core::iter::FusedIterator for CapturesPatternIter<'a> {} + +/// Represents information about capturing groups in a compiled regex. +/// +/// The information encapsulated by this type consists of the following. For +/// each pattern: +/// +/// * A map from every capture group name to its corresponding capture group +/// index. +/// * A map from every capture group index to its corresponding capture group +/// name. +/// * A map from capture group index to its corresponding slot index. A slot +/// refers to one half of a capturing group. That is, a capture slot is either +/// the start or end of a capturing group. A slot is usually the mechanism +/// by which a regex engine records offsets for each capturing group during a +/// search. +/// +/// A `GroupInfo` uses reference counting internally and is thus cheap to +/// clone. +/// +/// # Mapping from capture groups to slots +/// +/// One of the main responsibilities of a `GroupInfo` is to build a mapping +/// from `(PatternID, u32)` (where the `u32` is a capture index) to something +/// called a "slot." As mentioned above, a slot refers to one half of a +/// capturing group. Both combined provide the start and end offsets of +/// a capturing group that participated in a match. +/// +/// **The mapping between group indices and slots is an API guarantee.** That +/// is, the mapping won't change within a semver compatible release. +/// +/// Slots exist primarily because this is a convenient mechanism by which +/// regex engines report group offsets at search time. For example, the +/// [`nfa::thompson::State::Capture`](crate::nfa::thompson::State::Capture) +/// NFA state includes the slot index. When a regex engine transitions through +/// this state, it will likely use the slot index to write the current haystack +/// offset to some region of memory. When a match is found, those slots are +/// then reported to the caller, typically via a convenient abstraction like a +/// [`Captures`] value. +/// +/// Because this crate provides first class support for multi-pattern regexes, +/// and because of some performance related reasons, the mapping between +/// capturing groups and slots is a little complex. However, in the case of a +/// single pattern, the mapping can be described very simply: for all capture +/// group indices `i`, its corresponding slots are at `i * 2` and `i * 2 + 1`. +/// Notice that the pattern ID isn't involved at all here, because it only +/// applies to a single-pattern regex, it is therefore always `0`. +/// +/// In the multi-pattern case, the mapping is a bit more complicated. To talk +/// about it, we must define what we mean by "implicit" vs "explicit" +/// capturing groups: +/// +/// * An **implicit** capturing group refers to the capturing group that is +/// present for every pattern automatically, and corresponds to the overall +/// match of a pattern. Every pattern has precisely one implicit capturing +/// group. It is always unnamed and it always corresponds to the capture group +/// index `0`. +/// * An **explicit** capturing group refers to any capturing group that +/// appears in the concrete syntax of the pattern. (Or, if an NFA was hand +/// built without any concrete syntax, it refers to any capturing group with an +/// index greater than `0`.) +/// +/// Some examples: +/// +/// * `\w+` has one implicit capturing group and zero explicit capturing +/// groups. +/// * `(\w+)` has one implicit group and one explicit group. +/// * `foo(\d+)(?:\pL+)(\d+)` has one implicit group and two explicit groups. +/// +/// Turning back to the slot mapping, we can now state it as follows: +/// +/// * Given a pattern ID `pid`, the slots for its implicit group are always +/// at `pid * 2` and `pid * 2 + 1`. +/// * Given a pattern ID `0`, the slots for its explicit groups start +/// at `group_info.pattern_len() * 2`. +/// * Given a pattern ID `pid > 0`, the slots for its explicit groups start +/// immediately following where the slots for the explicit groups of `pid - 1` +/// end. +/// +/// In particular, while there is a concrete formula one can use to determine +/// where the slots for the implicit group of any pattern are, there is no +/// general formula for determining where the slots for explicit capturing +/// groups are. This is because each pattern can contain a different number +/// of groups. +/// +/// The intended way of getting the slots for a particular capturing group +/// (whether implicit or explicit) is via the [`GroupInfo::slot`] or +/// [`GroupInfo::slots`] method. +/// +/// See below for a concrete example of how capturing groups get mapped to +/// slots. +/// +/// # Example +/// +/// This example shows how to build a new `GroupInfo` and query it for +/// information. +/// +/// ``` +/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID}; +/// +/// let info = GroupInfo::new(vec![ +/// vec![None, Some("foo")], +/// vec![None], +/// vec![None, None, None, Some("bar"), None], +/// vec![None, None, Some("foo")], +/// ])?; +/// // The number of patterns being tracked. +/// assert_eq!(4, info.pattern_len()); +/// // We can query the number of groups for any pattern. +/// assert_eq!(2, info.group_len(PatternID::must(0))); +/// assert_eq!(1, info.group_len(PatternID::must(1))); +/// assert_eq!(5, info.group_len(PatternID::must(2))); +/// assert_eq!(3, info.group_len(PatternID::must(3))); +/// // An invalid pattern always has zero groups. +/// assert_eq!(0, info.group_len(PatternID::must(999))); +/// // 2 slots per group +/// assert_eq!(22, info.slot_len()); +/// +/// // We can map a group index for a particular pattern to its name, if +/// // one exists. +/// assert_eq!(Some("foo"), info.to_name(PatternID::must(3), 2)); +/// assert_eq!(None, info.to_name(PatternID::must(2), 4)); +/// // Or map a name to its group index. +/// assert_eq!(Some(1), info.to_index(PatternID::must(0), "foo")); +/// assert_eq!(Some(2), info.to_index(PatternID::must(3), "foo")); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: mapping from capture groups to slots +/// +/// This example shows the specific mapping from capture group indices for +/// each pattern to their corresponding slots. The slot values shown in this +/// example are considered an API guarantee. +/// +/// ``` +/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID}; +/// +/// let info = GroupInfo::new(vec![ +/// vec![None, Some("foo")], +/// vec![None], +/// vec![None, None, None, Some("bar"), None], +/// vec![None, None, Some("foo")], +/// ])?; +/// +/// // We first show the slots for each pattern's implicit group. +/// assert_eq!(Some((0, 1)), info.slots(PatternID::must(0), 0)); +/// assert_eq!(Some((2, 3)), info.slots(PatternID::must(1), 0)); +/// assert_eq!(Some((4, 5)), info.slots(PatternID::must(2), 0)); +/// assert_eq!(Some((6, 7)), info.slots(PatternID::must(3), 0)); +/// +/// // And now we show the slots for each pattern's explicit group. +/// assert_eq!(Some((8, 9)), info.slots(PatternID::must(0), 1)); +/// assert_eq!(Some((10, 11)), info.slots(PatternID::must(2), 1)); +/// assert_eq!(Some((12, 13)), info.slots(PatternID::must(2), 2)); +/// assert_eq!(Some((14, 15)), info.slots(PatternID::must(2), 3)); +/// assert_eq!(Some((16, 17)), info.slots(PatternID::must(2), 4)); +/// assert_eq!(Some((18, 19)), info.slots(PatternID::must(3), 1)); +/// assert_eq!(Some((20, 21)), info.slots(PatternID::must(3), 2)); +/// +/// // Asking for the slots for an invalid pattern ID or even for an invalid +/// // group index for a specific pattern will return None. So for example, +/// // you're guaranteed to not get the slots for a different pattern than the +/// // one requested. +/// assert_eq!(None, info.slots(PatternID::must(5), 0)); +/// assert_eq!(None, info.slots(PatternID::must(1), 1)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug, Default)] +pub struct GroupInfo(Arc); + +impl GroupInfo { + /// Creates a new group info from a sequence of patterns, where each + /// sequence of patterns yields a sequence of possible group names. The + /// index of each pattern in the sequence corresponds to its `PatternID`, + /// and the index of each group in each pattern's sequence corresponds to + /// its corresponding group index. + /// + /// While this constructor is very generic and therefore perhaps hard to + /// chew on, an example of a valid concrete type that can be passed to + /// this constructor is `Vec>>`. The outer `Vec` + /// corresponds to the patterns, i.e., one `Vec>` per + /// pattern. The inner `Vec` corresponds to the capturing groups for + /// each pattern. The `Option` corresponds to the name of the + /// capturing group, if present. + /// + /// It is legal to pass an empty iterator to this constructor. It will + /// return an empty group info with zero slots. An empty group info is + /// useful for cases where you have no patterns or for cases where slots + /// aren't being used at all (e.g., for most DFAs in this crate). + /// + /// # Errors + /// + /// This constructor returns an error if the given capturing groups are + /// invalid in some way. Those reasons include, but are not necessarily + /// limited to: + /// + /// * Too many patterns (i.e., `PatternID` would overflow). + /// * Too many capturing groups (e.g., `u32` would overflow). + /// * A pattern is given that has no capturing groups. (All patterns must + /// have at least an implicit capturing group at index `0`.) + /// * The capturing group at index `0` has a name. It must be unnamed. + /// * There are duplicate capturing group names within the same pattern. + /// (Multiple capturing groups with the same name may exist, but they + /// must be in different patterns.) + /// + /// An example below shows how to trigger some of the above error + /// conditions. + /// + /// # Example + /// + /// This example shows how to build a new `GroupInfo` and query it for + /// information. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// let info = GroupInfo::new(vec![ + /// vec![None, Some("foo")], + /// vec![None], + /// vec![None, None, None, Some("bar"), None], + /// vec![None, None, Some("foo")], + /// ])?; + /// // The number of patterns being tracked. + /// assert_eq!(4, info.pattern_len()); + /// // 2 slots per group + /// assert_eq!(22, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: empty `GroupInfo` + /// + /// This example shows how to build a new `GroupInfo` and query it for + /// information. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// let info = GroupInfo::empty(); + /// // Everything is zero. + /// assert_eq!(0, info.pattern_len()); + /// assert_eq!(0, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: error conditions + /// + /// This example shows how to provoke some of the ways in which building + /// a `GroupInfo` can fail. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // Either the group info is empty, or all patterns must have at least + /// // one capturing group. + /// assert!(GroupInfo::new(vec![ + /// vec![None, Some("a")], // ok + /// vec![None], // ok + /// vec![], // not ok + /// ]).is_err()); + /// // Note that building an empty group info is OK. + /// assert!(GroupInfo::new(Vec::>>::new()).is_ok()); + /// + /// // The first group in each pattern must correspond to an implicit + /// // anonymous group. i.e., One that is not named. By convention, this + /// // group corresponds to the overall match of a regex. Every other group + /// // in a pattern is explicit and optional. + /// assert!(GroupInfo::new(vec![vec![Some("foo")]]).is_err()); + /// + /// // There must not be duplicate group names within the same pattern. + /// assert!(GroupInfo::new(vec![ + /// vec![None, Some("foo"), Some("foo")], + /// ]).is_err()); + /// // But duplicate names across distinct patterns is OK. + /// assert!(GroupInfo::new(vec![ + /// vec![None, Some("foo")], + /// vec![None, Some("foo")], + /// ]).is_ok()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// There are other ways for building a `GroupInfo` to fail but are + /// difficult to show. For example, if the number of patterns given would + /// overflow `PatternID`. + pub fn new(pattern_groups: P) -> Result + where + P: IntoIterator, + G: IntoIterator>, + N: AsRef, + { + let mut group_info = GroupInfoInner { + slot_ranges: vec![], + name_to_index: vec![], + index_to_name: vec![], + memory_extra: 0, + }; + for (pattern_index, groups) in pattern_groups.into_iter().enumerate() { + // If we can't convert the pattern index to an ID, then the caller + // tried to build capture info for too many patterns. + let pid = PatternID::new(pattern_index) + .map_err(GroupInfoError::too_many_patterns)?; + + let mut groups_iter = groups.into_iter().enumerate(); + match groups_iter.next() { + None => return Err(GroupInfoError::missing_groups(pid)), + Some((_, Some(_))) => { + return Err(GroupInfoError::first_must_be_unnamed(pid)) + } + Some((_, None)) => {} + } + group_info.add_first_group(pid); + // Now iterate over the rest, which correspond to all of the + // (conventionally) explicit capture groups in a regex pattern. + for (group_index, maybe_name) in groups_iter { + // Just like for patterns, if the group index can't be + // converted to a "small" index, then the caller has given too + // many groups for a particular pattern. + let group = SmallIndex::new(group_index).map_err(|_| { + GroupInfoError::too_many_groups(pid, group_index) + })?; + group_info.add_explicit_group(pid, group, maybe_name)?; + } + } + group_info.fixup_slot_ranges()?; + Ok(GroupInfo(Arc::new(group_info))) + } + + /// This creates an empty `GroupInfo`. + /// + /// This is a convenience routine for calling `GroupInfo::new` with an + /// iterator that yields no elements. + /// + /// # Example + /// + /// This example shows how to build a new empty `GroupInfo` and query it + /// for information. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// let info = GroupInfo::empty(); + /// // Everything is zero. + /// assert_eq!(0, info.pattern_len()); + /// assert_eq!(0, info.all_group_len()); + /// assert_eq!(0, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn empty() -> GroupInfo { + GroupInfo::new(core::iter::empty::<[Option<&str>; 0]>()) + .expect("empty group info is always valid") + } + + /// Return the capture group index corresponding to the given name in the + /// given pattern. If no such capture group name exists in the given + /// pattern, then this returns `None`. + /// + /// If the given pattern ID is invalid, then this returns `None`. + /// + /// This also returns `None` for all inputs if these captures are empty + /// (e.g., built from an empty [`GroupInfo`]). To check whether captures + /// are are present for a specific pattern, use [`GroupInfo::group_len`]. + /// + /// # Example + /// + /// This example shows how to find the capture index for the given pattern + /// and group name. + /// + /// Remember that capture indices are relative to the pattern, such that + /// the same capture index value may refer to different capturing groups + /// for distinct patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1)); + /// + /// let nfa = NFA::new_many(&[ + /// r"a(?P\w+)z(?P\s+)", + /// r"a(?P\d+)z", + /// ])?; + /// let groups = nfa.group_info(); + /// assert_eq!(Some(2), groups.to_index(pid0, "foo")); + /// // Recall that capture index 0 is always unnamed and refers to the + /// // entire pattern. So the first capturing group present in the pattern + /// // itself always starts at index 1. + /// assert_eq!(Some(1), groups.to_index(pid1, "foo")); + /// + /// // And if a name does not exist for a particular pattern, None is + /// // returned. + /// assert!(groups.to_index(pid0, "quux").is_some()); + /// assert!(groups.to_index(pid1, "quux").is_none()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn to_index(&self, pid: PatternID, name: &str) -> Option { + let indices = self.0.name_to_index.get(pid.as_usize())?; + indices.get(name).cloned().map(|i| i.as_usize()) + } + + /// Return the capture name for the given index and given pattern. If the + /// corresponding group does not have a name, then this returns `None`. + /// + /// If the pattern ID is invalid, then this returns `None`. + /// + /// If the group index is invalid for the given pattern, then this returns + /// `None`. A group `index` is valid for a pattern `pid` in an `nfa` if and + /// only if `index < nfa.pattern_capture_len(pid)`. + /// + /// This also returns `None` for all inputs if these captures are empty + /// (e.g., built from an empty [`GroupInfo`]). To check whether captures + /// are are present for a specific pattern, use [`GroupInfo::group_len`]. + /// + /// # Example + /// + /// This example shows how to find the capture group name for the given + /// pattern and group index. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1)); + /// + /// let nfa = NFA::new_many(&[ + /// r"a(?P\w+)z(\s+)x(\d+)", + /// r"a(\d+)z(?P\s+)", + /// ])?; + /// let groups = nfa.group_info(); + /// assert_eq!(None, groups.to_name(pid0, 0)); + /// assert_eq!(Some("foo"), groups.to_name(pid0, 1)); + /// assert_eq!(None, groups.to_name(pid0, 2)); + /// assert_eq!(None, groups.to_name(pid0, 3)); + /// + /// assert_eq!(None, groups.to_name(pid1, 0)); + /// assert_eq!(None, groups.to_name(pid1, 1)); + /// assert_eq!(Some("foo"), groups.to_name(pid1, 2)); + /// // '3' is not a valid capture index for the second pattern. + /// assert_eq!(None, groups.to_name(pid1, 3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn to_name(&self, pid: PatternID, group_index: usize) -> Option<&str> { + let pattern_names = self.0.index_to_name.get(pid.as_usize())?; + pattern_names.get(group_index)?.as_deref() + } + + /// Return an iterator of all capture groups and their names (if present) + /// for a particular pattern. + /// + /// If the given pattern ID is invalid or if this `GroupInfo` is empty, + /// then the iterator yields no elements. + /// + /// The number of elements yielded by this iterator is always equal to + /// the result of calling [`GroupInfo::group_len`] with the same + /// `PatternID`. + /// + /// # Example + /// + /// This example shows how to get a list of all capture group names for + /// a particular pattern. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(?Pb)(c)(d)(?Pe)")?; + /// // The first is the implicit group that is always unnammed. The next + /// // 5 groups are the explicit groups found in the concrete syntax above. + /// let expected = vec![None, None, Some("foo"), None, None, Some("bar")]; + /// let got: Vec> = + /// nfa.group_info().pattern_names(PatternID::ZERO).collect(); + /// assert_eq!(expected, got); + /// + /// // Using an invalid pattern ID will result in nothing yielded. + /// let got = nfa.group_info().pattern_names(PatternID::must(999)).count(); + /// assert_eq!(0, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern_names(&self, pid: PatternID) -> GroupInfoPatternNames<'_> { + GroupInfoPatternNames { + it: self + .0 + .index_to_name + .get(pid.as_usize()) + .map(|indices| indices.iter()) + .unwrap_or([].iter()), + } + } + + /// Return an iterator of all capture groups for all patterns supported by + /// this `GroupInfo`. Each item yielded is a triple of the group's pattern + /// ID, index in the pattern and the group's name, if present. + /// + /// # Example + /// + /// This example shows how to get a list of all capture groups found in + /// one NFA, potentially spanning multiple patterns. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&[ + /// r"(?Pa)", + /// r"a", + /// r"(a)", + /// ])?; + /// let expected = vec![ + /// (PatternID::must(0), 0, None), + /// (PatternID::must(0), 1, Some("foo")), + /// (PatternID::must(1), 0, None), + /// (PatternID::must(2), 0, None), + /// (PatternID::must(2), 1, None), + /// ]; + /// let got: Vec<(PatternID, usize, Option<&str>)> = + /// nfa.group_info().all_names().collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Unlike other capturing group related routines, this routine doesn't + /// panic even if captures aren't enabled on this NFA: + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, WhichCaptures}; + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build_many(&[ + /// r"(?Pa)", + /// r"a", + /// r"(a)", + /// ])?; + /// // When captures aren't enabled, there's nothing to return. + /// assert_eq!(0, nfa.group_info().all_names().count()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn all_names(&self) -> GroupInfoAllNames<'_> { + GroupInfoAllNames { + group_info: self, + pids: PatternID::iter(self.pattern_len()), + current_pid: None, + names: None, + } + } + + /// Returns the starting and ending slot corresponding to the given + /// capturing group for the given pattern. The ending slot is always one + /// more than the starting slot returned. + /// + /// Note that this is like [`GroupInfo::slot`], except that it also returns + /// the ending slot value for convenience. + /// + /// If either the pattern ID or the capture index is invalid, then this + /// returns None. + /// + /// # Example + /// + /// This example shows that the starting slots for the first capturing + /// group of each pattern are distinct. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["a", "b"])?; + /// assert_ne!( + /// nfa.group_info().slots(PatternID::must(0), 0), + /// nfa.group_info().slots(PatternID::must(1), 0), + /// ); + /// + /// // Also, the start and end slot values are never equivalent. + /// let (start, end) = nfa.group_info().slots(PatternID::ZERO, 0).unwrap(); + /// assert_ne!(start, end); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slots( + &self, + pid: PatternID, + group_index: usize, + ) -> Option<(usize, usize)> { + // Since 'slot' only even returns valid starting slots, we know that + // there must also be an end slot and that end slot is always one more + // than the start slot. + self.slot(pid, group_index).map(|start| (start, start + 1)) + } + + /// Returns the starting slot corresponding to the given capturing group + /// for the given pattern. The ending slot is always one more than the + /// value returned. + /// + /// If either the pattern ID or the capture index is invalid, then this + /// returns None. + /// + /// # Example + /// + /// This example shows that the starting slots for the first capturing + /// group of each pattern are distinct. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["a", "b"])?; + /// assert_ne!( + /// nfa.group_info().slot(PatternID::must(0), 0), + /// nfa.group_info().slot(PatternID::must(1), 0), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slot(&self, pid: PatternID, group_index: usize) -> Option { + if group_index >= self.group_len(pid) { + return None; + } + // At this point, we know that 'pid' refers to a real pattern and that + // 'group_index' refers to a real group. We therefore also know that + // the pattern and group can be combined to return a correct slot. + // That's why we don't need to use checked arithmetic below. + if group_index == 0 { + Some(pid.as_usize() * 2) + } else { + // As above, we don't need to check that our slot is less than the + // end of our range since we already know the group index is a + // valid index for the given pattern. + let (start, _) = self.0.slot_ranges[pid]; + Some(start.as_usize() + ((group_index - 1) * 2)) + } + } + + /// Returns the total number of patterns in this `GroupInfo`. + /// + /// This may return zero if the `GroupInfo` was constructed with no + /// patterns. + /// + /// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because + /// `GroupInfo` construction will fail if too many patterns are added. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(3, nfa.group_info().pattern_len()); + /// + /// let nfa = NFA::never_match(); + /// assert_eq!(0, nfa.group_info().pattern_len()); + /// + /// let nfa = NFA::always_match(); + /// assert_eq!(1, nfa.group_info().pattern_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern_len(&self) -> usize { + self.0.pattern_len() + } + + /// Return the number of capture groups in a pattern. + /// + /// If the pattern ID is invalid, then this returns `0`. + /// + /// # Example + /// + /// This example shows how the values returned by this routine may vary + /// for different patterns and NFA configurations. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(b)(c)")?; + /// // There are 3 explicit groups in the pattern's concrete syntax and + /// // 1 unnamed and implicit group spanning the entire pattern. + /// assert_eq!(4, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// let nfa = NFA::new(r"abc")?; + /// // There is just the unnamed implicit group. + /// assert_eq!(1, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"abc")?; + /// // We disabled capturing groups, so there are none. + /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"(a)(b)(c)")?; + /// // We disabled capturing groups, so there are none, even if there are + /// // explicit groups in the concrete syntax. + /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn group_len(&self, pid: PatternID) -> usize { + self.0.group_len(pid) + } + + /// Return the total number of capture groups across all patterns. + /// + /// This includes implicit groups that represent the entire match of a + /// pattern. + /// + /// # Example + /// + /// This example shows how the values returned by this routine may vary + /// for different patterns and NFA configurations. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(b)(c)")?; + /// // There are 3 explicit groups in the pattern's concrete syntax and + /// // 1 unnamed and implicit group spanning the entire pattern. + /// assert_eq!(4, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::new(r"abc")?; + /// // There is just the unnamed implicit group. + /// assert_eq!(1, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::new_many(&["(a)", "b", "(c)"])?; + /// // Each pattern has one implicit groups, and two + /// // patterns have one explicit group each. + /// assert_eq!(5, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"abc")?; + /// // We disabled capturing groups, so there are none. + /// assert_eq!(0, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"(a)(b)(c)")?; + /// // We disabled capturing groups, so there are none, even if there are + /// // explicit groups in the concrete syntax. + /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn all_group_len(&self) -> usize { + self.slot_len() / 2 + } + + /// Returns the total number of slots in this `GroupInfo` across all + /// patterns. + /// + /// The total number of slots is always twice the total number of capturing + /// groups, including both implicit and explicit groups. + /// + /// # Example + /// + /// This example shows the relationship between the number of capturing + /// groups and slots. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // There are 11 total groups here. + /// let info = GroupInfo::new(vec![ + /// vec![None, Some("foo")], + /// vec![None], + /// vec![None, None, None, Some("bar"), None], + /// vec![None, None, Some("foo")], + /// ])?; + /// // 2 slots per group gives us 11*2=22 slots. + /// assert_eq!(22, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slot_len(&self) -> usize { + self.0.small_slot_len().as_usize() + } + + /// Returns the total number of slots for implicit capturing groups. + /// + /// This is like [`GroupInfo::slot_len`], except it doesn't include the + /// explicit slots for each pattern. Since there are always exactly 2 + /// implicit slots for each pattern, the number of implicit slots is always + /// equal to twice the number of patterns. + /// + /// # Example + /// + /// This example shows the relationship between the number of capturing + /// groups, implicit slots and explicit slots. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // There are 11 total groups here. + /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?; + /// // 2 slots per group gives us 11*2=22 slots. + /// assert_eq!(6, info.slot_len()); + /// // 2 implicit slots per pattern gives us 2 implicit slots since there + /// // is 1 pattern. + /// assert_eq!(2, info.implicit_slot_len()); + /// // 2 explicit capturing groups gives us 2*2=4 explicit slots. + /// assert_eq!(4, info.explicit_slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn implicit_slot_len(&self) -> usize { + self.pattern_len() * 2 + } + + /// Returns the total number of slots for explicit capturing groups. + /// + /// This is like [`GroupInfo::slot_len`], except it doesn't include the + /// implicit slots for each pattern. (There are always 2 implicit slots for + /// each pattern.) + /// + /// For a non-empty `GroupInfo`, it is always the case that `slot_len` is + /// strictly greater than `explicit_slot_len`. For an empty `GroupInfo`, + /// both the total number of slots and the number of explicit slots is + /// `0`. + /// + /// # Example + /// + /// This example shows the relationship between the number of capturing + /// groups, implicit slots and explicit slots. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // There are 11 total groups here. + /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?; + /// // 2 slots per group gives us 11*2=22 slots. + /// assert_eq!(6, info.slot_len()); + /// // 2 implicit slots per pattern gives us 2 implicit slots since there + /// // is 1 pattern. + /// assert_eq!(2, info.implicit_slot_len()); + /// // 2 explicit capturing groups gives us 2*2=4 explicit slots. + /// assert_eq!(4, info.explicit_slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn explicit_slot_len(&self) -> usize { + self.slot_len().saturating_sub(self.implicit_slot_len()) + } + + /// Returns the memory usage, in bytes, of this `GroupInfo`. + /// + /// This does **not** include the stack size used up by this `GroupInfo`. + /// To compute that, use `std::mem::size_of::()`. + #[inline] + pub fn memory_usage(&self) -> usize { + use core::mem::size_of as s; + + s::() + + self.0.slot_ranges.len() * s::<(SmallIndex, SmallIndex)>() + + self.0.name_to_index.len() * s::() + + self.0.index_to_name.len() * s::>>>() + + self.0.memory_extra + } +} + +/// A map from capture group name to its corresponding capture group index. +/// +/// This type is actually wrapped inside a Vec indexed by pattern ID on a +/// `GroupInfo`, since multiple patterns may have the same capture group name. +/// That is, each pattern gets its own namespace of capture group names. +/// +/// Perhaps a more memory efficient representation would be +/// HashMap<(PatternID, Arc), usize>, but this makes it difficult to look +/// up a capture index by name without producing a `Arc`, which requires +/// an allocation. To fix this, I think we'd need to define our own unsized +/// type or something? Anyway, I didn't give this much thought since it +/// probably doesn't matter much in the grand scheme of things. But it did +/// stand out to me as mildly wasteful. +#[cfg(feature = "std")] +type CaptureNameMap = std::collections::HashMap, SmallIndex>; +#[cfg(not(feature = "std"))] +type CaptureNameMap = alloc::collections::BTreeMap, SmallIndex>; + +/// The inner guts of `GroupInfo`. This type only exists so that it can +/// be wrapped in an `Arc` to make `GroupInfo` reference counted. +#[derive(Debug, Default)] +struct GroupInfoInner { + slot_ranges: Vec<(SmallIndex, SmallIndex)>, + name_to_index: Vec, + index_to_name: Vec>>>, + memory_extra: usize, +} + +impl GroupInfoInner { + /// This adds the first unnamed group for the given pattern ID. The given + /// pattern ID must be zero if this is the first time this method is + /// called, or must be exactly one more than the pattern ID supplied to the + /// previous call to this method. (This method panics if this rule is + /// violated.) + /// + /// This can be thought of as initializing the GroupInfo state for the + /// given pattern and closing off the state for any previous pattern. + fn add_first_group(&mut self, pid: PatternID) { + assert_eq!(pid.as_usize(), self.slot_ranges.len()); + assert_eq!(pid.as_usize(), self.name_to_index.len()); + assert_eq!(pid.as_usize(), self.index_to_name.len()); + // This is the start of our slots for the explicit capturing groups. + // Note that since the slots for the 0th group for every pattern appear + // before any slots for the nth group (where n > 0) in any pattern, we + // will have to fix up the slot ranges once we know how many patterns + // we've added capture groups for. + let slot_start = self.small_slot_len(); + self.slot_ranges.push((slot_start, slot_start)); + self.name_to_index.push(CaptureNameMap::new()); + self.index_to_name.push(vec![None]); + self.memory_extra += core::mem::size_of::>>(); + } + + /// Add an explicit capturing group for the given pattern with the given + /// index. If the group has a name, then that must be given as well. + /// + /// Note that every capturing group except for the first or zeroth group is + /// explicit. + /// + /// This returns an error if adding this group would result in overflowing + /// slot indices or if a capturing group with the same name for this + /// pattern has already been added. + fn add_explicit_group>( + &mut self, + pid: PatternID, + group: SmallIndex, + maybe_name: Option, + ) -> Result<(), GroupInfoError> { + // We also need to check that the slot index generated for + // this group is also valid. Although, this is a little weird + // because we offset these indices below, at which point, we'll + // have to recheck them. Gosh this is annoying. Note that + // the '+2' below is OK because 'end' is guaranteed to be less + // than isize::MAX. + let end = &mut self.slot_ranges[pid].1; + *end = SmallIndex::new(end.as_usize() + 2).map_err(|_| { + GroupInfoError::too_many_groups(pid, group.as_usize()) + })?; + if let Some(name) = maybe_name { + let name = Arc::::from(name.as_ref()); + if self.name_to_index[pid].contains_key(&*name) { + return Err(GroupInfoError::duplicate(pid, &name)); + } + let len = name.len(); + self.name_to_index[pid].insert(Arc::clone(&name), group); + self.index_to_name[pid].push(Some(name)); + // Adds the memory used by the Arc in both maps. + self.memory_extra += + 2 * (len + core::mem::size_of::>>()); + // And also the value entry for the 'name_to_index' map. + // This is probably an underestimate for 'name_to_index' since + // hashmaps/btrees likely have some non-zero overhead, but we + // assume here that they have zero overhead. + self.memory_extra += core::mem::size_of::(); + } else { + self.index_to_name[pid].push(None); + self.memory_extra += core::mem::size_of::>>(); + } + // This is a sanity assert that checks that our group index + // is in line with the number of groups added so far for this + // pattern. + assert_eq!(group.one_more(), self.group_len(pid)); + // And is also in line with the 'index_to_name' map. + assert_eq!(group.one_more(), self.index_to_name[pid].len()); + Ok(()) + } + + /// This corrects the slot ranges to account for the slots corresponding + /// to the zeroth group of each pattern. That is, every slot range is + /// offset by 'pattern_len() * 2', since each pattern uses two slots to + /// represent the zeroth group. + fn fixup_slot_ranges(&mut self) -> Result<(), GroupInfoError> { + use crate::util::primitives::IteratorIndexExt; + // Since we know number of patterns fits in PatternID and + // PatternID::MAX < isize::MAX, it follows that multiplying by 2 will + // never overflow usize. + let offset = self.pattern_len().checked_mul(2).unwrap(); + for (pid, &mut (ref mut start, ref mut end)) in + self.slot_ranges.iter_mut().with_pattern_ids() + { + let group_len = 1 + ((end.as_usize() - start.as_usize()) / 2); + let new_end = match end.as_usize().checked_add(offset) { + Some(new_end) => new_end, + None => { + return Err(GroupInfoError::too_many_groups( + pid, group_len, + )) + } + }; + *end = SmallIndex::new(new_end).map_err(|_| { + GroupInfoError::too_many_groups(pid, group_len) + })?; + // Since start <= end, if end is valid then start must be too. + *start = SmallIndex::new(start.as_usize() + offset).unwrap(); + } + Ok(()) + } + + /// Return the total number of patterns represented by this capture slot + /// info. + fn pattern_len(&self) -> usize { + self.slot_ranges.len() + } + + /// Return the total number of capturing groups for the given pattern. If + /// the given pattern isn't valid for this capture slot info, then 0 is + /// returned. + fn group_len(&self, pid: PatternID) -> usize { + let (start, end) = match self.slot_ranges.get(pid.as_usize()) { + None => return 0, + Some(range) => range, + }; + // The difference between any two SmallIndex values always fits in a + // usize since we know that SmallIndex::MAX <= isize::MAX-1. We also + // know that start<=end by construction and that the number of groups + // never exceeds SmallIndex and thus never overflows usize. + 1 + ((end.as_usize() - start.as_usize()) / 2) + } + + /// Return the total number of slots in this capture slot info as a + /// "small index." + fn small_slot_len(&self) -> SmallIndex { + // Since slots are allocated in order of pattern (starting at 0) and + // then in order of capture group, it follows that the number of slots + // is the end of the range of slots for the last pattern. This is + // true even when the last pattern has no capturing groups, since + // 'slot_ranges' will still represent it explicitly with an empty + // range. + self.slot_ranges.last().map_or(SmallIndex::ZERO, |&(_, end)| end) + } +} + +/// An error that may occur when building a `GroupInfo`. +/// +/// Building a `GroupInfo` does a variety of checks to make sure the +/// capturing groups satisfy a number of invariants. This includes, but is not +/// limited to, ensuring that the first capturing group is unnamed and that +/// there are no duplicate capture groups for a specific pattern. +#[derive(Clone, Debug)] +pub struct GroupInfoError { + kind: GroupInfoErrorKind, +} + +/// The kind of error that occurs when building a `GroupInfo` fails. +/// +/// We keep this un-exported because it's not clear how useful it is to +/// export it. +#[derive(Clone, Debug)] +enum GroupInfoErrorKind { + /// This occurs when too many patterns have been added. i.e., It would + /// otherwise overflow a `PatternID`. + TooManyPatterns { err: PatternIDError }, + /// This occurs when too many capturing groups have been added for a + /// particular pattern. + TooManyGroups { + /// The ID of the pattern that had too many groups. + pattern: PatternID, + /// The minimum number of groups that the caller has tried to add for + /// a pattern. + minimum: usize, + }, + /// An error that occurs when a pattern has no capture groups. Either the + /// group info must be empty, or all patterns must have at least one group + /// (corresponding to the unnamed group for the entire pattern). + MissingGroups { + /// The ID of the pattern that had no capturing groups. + pattern: PatternID, + }, + /// An error that occurs when one tries to provide a name for the capture + /// group at index 0. This capturing group must currently always be + /// unnamed. + FirstMustBeUnnamed { + /// The ID of the pattern that was found to have a named first + /// capturing group. + pattern: PatternID, + }, + /// An error that occurs when duplicate capture group names for the same + /// pattern are added. + /// + /// NOTE: At time of writing, this error can never occur if you're using + /// regex-syntax, since the parser itself will reject patterns with + /// duplicate capture group names. This error can only occur when the + /// builder is used to hand construct NFAs. + Duplicate { + /// The pattern in which the duplicate capture group name was found. + pattern: PatternID, + /// The duplicate name. + name: String, + }, +} + +impl GroupInfoError { + fn too_many_patterns(err: PatternIDError) -> GroupInfoError { + GroupInfoError { kind: GroupInfoErrorKind::TooManyPatterns { err } } + } + + fn too_many_groups(pattern: PatternID, minimum: usize) -> GroupInfoError { + GroupInfoError { + kind: GroupInfoErrorKind::TooManyGroups { pattern, minimum }, + } + } + + fn missing_groups(pattern: PatternID) -> GroupInfoError { + GroupInfoError { kind: GroupInfoErrorKind::MissingGroups { pattern } } + } + + fn first_must_be_unnamed(pattern: PatternID) -> GroupInfoError { + GroupInfoError { + kind: GroupInfoErrorKind::FirstMustBeUnnamed { pattern }, + } + } + + fn duplicate(pattern: PatternID, name: &str) -> GroupInfoError { + GroupInfoError { + kind: GroupInfoErrorKind::Duplicate { + pattern, + name: String::from(name), + }, + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for GroupInfoError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind { + GroupInfoErrorKind::TooManyPatterns { .. } + | GroupInfoErrorKind::TooManyGroups { .. } + | GroupInfoErrorKind::MissingGroups { .. } + | GroupInfoErrorKind::FirstMustBeUnnamed { .. } + | GroupInfoErrorKind::Duplicate { .. } => None, + } + } +} + +impl core::fmt::Display for GroupInfoError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use self::GroupInfoErrorKind::*; + + match self.kind { + TooManyPatterns { ref err } => { + write!(f, "too many patterns to build capture info: {}", err) + } + TooManyGroups { pattern, minimum } => { + write!( + f, + "too many capture groups (at least {}) were \ + found for pattern {}", + minimum, + pattern.as_usize() + ) + } + MissingGroups { pattern } => write!( + f, + "no capturing groups found for pattern {} \ + (either all patterns have zero groups or all patterns have \ + at least one group)", + pattern.as_usize(), + ), + FirstMustBeUnnamed { pattern } => write!( + f, + "first capture group (at index 0) for pattern {} has a name \ + (it must be unnamed)", + pattern.as_usize(), + ), + Duplicate { pattern, ref name } => write!( + f, + "duplicate capture group name '{}' found for pattern {}", + name, + pattern.as_usize(), + ), + } + } +} + +/// An iterator over capturing groups and their names for a specific pattern. +/// +/// This iterator is created by [`GroupInfo::pattern_names`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo` +/// from which this iterator was created. +#[derive(Clone, Debug)] +pub struct GroupInfoPatternNames<'a> { + it: core::slice::Iter<'a, Option>>, +} + +impl GroupInfoPatternNames<'static> { + fn empty() -> GroupInfoPatternNames<'static> { + GroupInfoPatternNames { it: [].iter() } + } +} + +impl<'a> Iterator for GroupInfoPatternNames<'a> { + type Item = Option<&'a str>; + + fn next(&mut self) -> Option> { + self.it.next().map(|x| x.as_deref()) + } + + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } + + fn count(self) -> usize { + self.it.count() + } +} + +impl<'a> ExactSizeIterator for GroupInfoPatternNames<'a> {} +impl<'a> core::iter::FusedIterator for GroupInfoPatternNames<'a> {} + +/// An iterator over capturing groups and their names for a `GroupInfo`. +/// +/// This iterator is created by [`GroupInfo::all_names`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo` +/// from which this iterator was created. +#[derive(Debug)] +pub struct GroupInfoAllNames<'a> { + group_info: &'a GroupInfo, + pids: PatternIDIter, + current_pid: Option, + names: Option>>, +} + +impl<'a> Iterator for GroupInfoAllNames<'a> { + type Item = (PatternID, usize, Option<&'a str>); + + fn next(&mut self) -> Option<(PatternID, usize, Option<&'a str>)> { + // If the group info has no captures, then we never have anything + // to yield. We need to consider this case explicitly (at time of + // writing) because 'pattern_capture_names' will panic if captures + // aren't enabled. + if self.group_info.0.index_to_name.is_empty() { + return None; + } + if self.current_pid.is_none() { + self.current_pid = Some(self.pids.next()?); + } + let pid = self.current_pid.unwrap(); + if self.names.is_none() { + self.names = Some(self.group_info.pattern_names(pid).enumerate()); + } + let (group_index, name) = match self.names.as_mut().unwrap().next() { + Some((group_index, name)) => (group_index, name), + None => { + self.current_pid = None; + self.names = None; + return self.next(); + } + }; + Some((pid, group_index, name)) + } +} diff --git a/vendor/regex-automata/src/util/determinize/mod.rs b/vendor/regex-automata/src/util/determinize/mod.rs new file mode 100644 index 0000000..ba32991 --- /dev/null +++ b/vendor/regex-automata/src/util/determinize/mod.rs @@ -0,0 +1,682 @@ +/*! +This module contains types and routines for implementing determinization. + +In this crate, there are at least two places where we implement +determinization: fully ahead-of-time compiled DFAs in the `dfa` module and +lazily compiled DFAs in the `hybrid` module. The stuff in this module +corresponds to the things that are in common between these implementations. + +There are three broad things that our implementations of determinization have +in common, as defined by this module: + +* The classification of start states. That is, whether we're dealing with +word boundaries, line boundaries, etc., is all the same. This also includes +the look-behind assertions that are satisfied by each starting state +classification. +* The representation of DFA states as sets of NFA states, including +convenience types for building these DFA states that are amenable to reusing +allocations. +* Routines for the "classical" parts of determinization: computing the +epsilon closure, tracking match states (with corresponding pattern IDs, since +we support multi-pattern finite automata) and, of course, computing the +transition function between states for units of input. + +I did consider a couple of alternatives to this particular form of code reuse: + +1. Don't do any code reuse. The problem here is that we *really* want both +forms of determinization to do exactly identical things when it comes to +their handling of NFA states. While our tests generally ensure this, the code +is tricky and large enough where not reusing code is a pretty big bummer. + +2. Implement all of determinization once and make it generic over fully +compiled DFAs and lazily compiled DFAs. While I didn't actually try this +approach, my instinct is that it would be more complex than is needed here. +And the interface required would be pretty hairy. Instead, I think splitting +it into logical sub-components works better. +*/ + +use alloc::vec::Vec; + +pub(crate) use self::state::{ + State, StateBuilderEmpty, StateBuilderMatches, StateBuilderNFA, +}; + +use crate::{ + nfa::thompson, + util::{ + alphabet, + look::{Look, LookSet}, + primitives::StateID, + search::MatchKind, + sparse_set::{SparseSet, SparseSets}, + start::Start, + utf8, + }, +}; + +mod state; + +/// Compute the set of all reachable NFA states, including the full epsilon +/// closure, from a DFA state for a single unit of input. The set of reachable +/// states is returned as a `StateBuilderNFA`. The `StateBuilderNFA` returned +/// also includes any look-behind assertions satisfied by `unit`, in addition +/// to whether it is a match state. For multi-pattern DFAs, the builder will +/// also include the pattern IDs that match (in the order seen). +/// +/// `nfa` must be able to resolve any NFA state in `state` and any NFA state +/// reachable via the epsilon closure of any NFA state in `state`. `sparses` +/// must have capacity equivalent to `nfa.len()`. +/// +/// `match_kind` should correspond to the match semantics implemented by the +/// DFA being built. Generally speaking, for leftmost-first match semantics, +/// states that appear after the first NFA match state will not be included in +/// the `StateBuilderNFA` returned since they are impossible to visit. +/// +/// `sparses` is used as scratch space for NFA traversal. Other than their +/// capacity requirements (detailed above), there are no requirements on what's +/// contained within them (if anything). Similarly, what's inside of them once +/// this routine returns is unspecified. +/// +/// `stack` must have length 0. It is used as scratch space for depth first +/// traversal. After returning, it is guaranteed that `stack` will have length +/// 0. +/// +/// `state` corresponds to the current DFA state on which one wants to compute +/// the transition for the input `unit`. +/// +/// `empty_builder` corresponds to the builder allocation to use to produce a +/// complete `StateBuilderNFA` state. If the state is not needed (or is already +/// cached), then it can be cleared and reused without needing to create a new +/// `State`. The `StateBuilderNFA` state returned is final and ready to be +/// turned into a `State` if necessary. +pub(crate) fn next( + nfa: &thompson::NFA, + match_kind: MatchKind, + sparses: &mut SparseSets, + stack: &mut Vec, + state: &State, + unit: alphabet::Unit, + empty_builder: StateBuilderEmpty, +) -> StateBuilderNFA { + sparses.clear(); + + // Whether the NFA is matched in reverse or not. We use this in some + // conditional logic for dealing with the exceptionally annoying CRLF-aware + // line anchors. + let rev = nfa.is_reverse(); + // The look-around matcher that our NFA is configured with. We don't + // actually use it to match look-around assertions, but we do need its + // configuration for constructing states consistent with how it matches. + let lookm = nfa.look_matcher(); + + // Put the NFA state IDs into a sparse set in case we need to + // re-compute their epsilon closure. + // + // Doing this state shuffling is technically not necessary unless some + // kind of look-around is used in the DFA. Some ad hoc experiments + // suggested that avoiding this didn't lead to much of an improvement, + // but perhaps more rigorous experimentation should be done. And in + // particular, avoiding this check requires some light refactoring of + // the code below. + state.iter_nfa_state_ids(|nfa_id| { + sparses.set1.insert(nfa_id); + }); + + // Compute look-ahead assertions originating from the current state. Based + // on the input unit we're transitioning over, some additional set of + // assertions may be true. Thus, we re-compute this state's epsilon closure + // (but only if necessary). Notably, when we build a DFA state initially, + // we don't enable any look-ahead assertions because we don't know whether + // they're true or not at that point. + if !state.look_need().is_empty() { + // Add look-ahead assertions that are now true based on the current + // input unit. + let mut look_have = state.look_have().clone(); + match unit.as_u8() { + Some(b'\r') => { + if !rev || !state.is_half_crlf() { + look_have = look_have.insert(Look::EndCRLF); + } + } + Some(b'\n') => { + if rev || !state.is_half_crlf() { + look_have = look_have.insert(Look::EndCRLF); + } + } + Some(_) => {} + None => { + look_have = look_have + .insert(Look::End) + .insert(Look::EndLF) + .insert(Look::EndCRLF); + } + } + if unit.is_byte(lookm.get_line_terminator()) { + look_have = look_have.insert(Look::EndLF); + } + if state.is_half_crlf() + && ((rev && !unit.is_byte(b'\r')) + || (!rev && !unit.is_byte(b'\n'))) + { + look_have = look_have.insert(Look::StartCRLF); + } + if state.is_from_word() == unit.is_word_byte() { + look_have = look_have + .insert(Look::WordAsciiNegate) + .insert(Look::WordUnicodeNegate); + } else { + look_have = + look_have.insert(Look::WordAscii).insert(Look::WordUnicode); + } + if !unit.is_word_byte() { + look_have = look_have + .insert(Look::WordEndHalfAscii) + .insert(Look::WordEndHalfUnicode); + } + if state.is_from_word() && !unit.is_word_byte() { + look_have = look_have + .insert(Look::WordEndAscii) + .insert(Look::WordEndUnicode); + } else if !state.is_from_word() && unit.is_word_byte() { + look_have = look_have + .insert(Look::WordStartAscii) + .insert(Look::WordStartUnicode); + } + // If we have new assertions satisfied that are among the set of + // assertions that exist in this state (that is, just because we added + // an EndLF assertion above doesn't mean there is an EndLF conditional + // epsilon transition in this state), then we re-compute this state's + // epsilon closure using the updated set of assertions. + // + // Note that since our DFA states omit unconditional epsilon + // transitions, this check is necessary for correctness. If we re-did + // the epsilon closure below needlessly, it could change based on the + // fact that we omitted epsilon states originally. + if !look_have + .subtract(state.look_have()) + .intersect(state.look_need()) + .is_empty() + { + for nfa_id in sparses.set1.iter() { + epsilon_closure( + nfa, + nfa_id, + look_have, + stack, + &mut sparses.set2, + ); + } + sparses.swap(); + sparses.set2.clear(); + } + } + + // Convert our empty builder into one that can record assertions and match + // pattern IDs. + let mut builder = empty_builder.into_matches(); + // Set whether the StartLF look-behind assertion is true for this + // transition or not. The look-behind assertion for ASCII word boundaries + // is handled below. + if nfa.look_set_any().contains_anchor_line() + && unit.is_byte(lookm.get_line_terminator()) + { + // Why only handle StartLF here and not Start? That's because Start + // can only impact the starting state, which is special cased in + // start state handling. + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + // We also need to add StartCRLF to our assertions too, if we can. This + // is unfortunately a bit more complicated, because it depends on the + // direction of the search. In the forward direction, ^ matches after a + // \n, but in the reverse direction, ^ only matches after a \r. (This is + // further complicated by the fact that reverse a regex means changing a ^ + // to a $ and vice versa.) + if nfa.look_set_any().contains_anchor_crlf() + && ((rev && unit.is_byte(b'\r')) || (!rev && unit.is_byte(b'\n'))) + { + builder.set_look_have(|have| have.insert(Look::StartCRLF)); + } + // And also for the start-half word boundary assertions. As long as the + // look-behind byte is not a word char, then the assertions are satisfied. + if nfa.look_set_any().contains_word() && !unit.is_word_byte() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + for nfa_id in sparses.set1.iter() { + match *nfa.state(nfa_id) { + thompson::State::Union { .. } + | thompson::State::BinaryUnion { .. } + | thompson::State::Fail + | thompson::State::Look { .. } + | thompson::State::Capture { .. } => {} + thompson::State::Match { pattern_id } => { + // Notice here that we are calling the NEW state a match + // state if the OLD state we are transitioning from + // contains an NFA match state. This is precisely how we + // delay all matches by one byte and also what therefore + // guarantees that starting states cannot be match states. + // + // If we didn't delay matches by one byte, then whether + // a DFA is a matching state or not would be determined + // by whether one of its own constituent NFA states + // was a match state. (And that would be done in + // 'add_nfa_states'.) + // + // Also, 'add_match_pattern_id' requires that callers never + // pass duplicative pattern IDs. We do in fact uphold that + // guarantee here, but it's subtle. In particular, a Thompson + // NFA guarantees that each pattern has exactly one match + // state. Moreover, since we're iterating over the NFA state + // IDs in a set, we are guarateed not to have any duplicative + // match states. Thus, it is impossible to add the same pattern + // ID more than once. + // + // N.B. We delay matches by 1 byte as a way to hack 1-byte + // look-around into DFA searches. This lets us support ^, $ + // and ASCII-only \b. The delay is also why we need a special + // "end-of-input" (EOI) sentinel and why we need to follow the + // EOI sentinel at the end of every search. This final EOI + // transition is necessary to report matches found at the end + // of a haystack. + builder.add_match_pattern_id(pattern_id); + if !match_kind.continue_past_first_match() { + break; + } + } + thompson::State::ByteRange { ref trans } => { + if trans.matches_unit(unit) { + epsilon_closure( + nfa, + trans.next, + builder.look_have(), + stack, + &mut sparses.set2, + ); + } + } + thompson::State::Sparse(ref sparse) => { + if let Some(next) = sparse.matches_unit(unit) { + epsilon_closure( + nfa, + next, + builder.look_have(), + stack, + &mut sparses.set2, + ); + } + } + thompson::State::Dense(ref dense) => { + if let Some(next) = dense.matches_unit(unit) { + epsilon_closure( + nfa, + next, + builder.look_have(), + stack, + &mut sparses.set2, + ); + } + } + } + } + // We only set the word byte if there's a word boundary look-around + // anywhere in this regex. Otherwise, there's no point in bloating the + // number of states if we don't have one. + // + // We also only set it when the state has a non-zero number of NFA states. + // Otherwise, we could wind up with states that *should* be DEAD states + // but are otherwise distinct from DEAD states because of this look-behind + // assertion being set. While this can't technically impact correctness *in + // theory*, it can create pathological DFAs that consume input until EOI or + // a quit byte is seen. Consuming until EOI isn't a correctness problem, + // but a (serious) perf problem. Hitting a quit byte, however, could be a + // correctness problem since it could cause search routines to report an + // error instead of a detected match once the quit state is entered. (The + // search routine could be made to be a bit smarter by reporting a match + // if one was detected once it enters a quit state (and indeed, the search + // routines in this crate do just that), but it seems better to prevent + // these things by construction if possible.) + if !sparses.set2.is_empty() { + if nfa.look_set_any().contains_word() && unit.is_word_byte() { + builder.set_is_from_word(); + } + if nfa.look_set_any().contains_anchor_crlf() + && ((rev && unit.is_byte(b'\n')) || (!rev && unit.is_byte(b'\r'))) + { + builder.set_is_half_crlf(); + } + } + let mut builder_nfa = builder.into_nfa(); + add_nfa_states(nfa, &sparses.set2, &mut builder_nfa); + builder_nfa +} + +/// Compute the epsilon closure for the given NFA state. The epsilon closure +/// consists of all NFA state IDs, including `start_nfa_id`, that can be +/// reached from `start_nfa_id` without consuming any input. These state IDs +/// are written to `set` in the order they are visited, but only if they are +/// not already in `set`. `start_nfa_id` must be a valid state ID for the NFA +/// given. +/// +/// `look_have` consists of the satisfied assertions at the current +/// position. For conditional look-around epsilon transitions, these are +/// only followed if they are satisfied by `look_have`. +/// +/// `stack` must have length 0. It is used as scratch space for depth first +/// traversal. After returning, it is guaranteed that `stack` will have length +/// 0. +pub(crate) fn epsilon_closure( + nfa: &thompson::NFA, + start_nfa_id: StateID, + look_have: LookSet, + stack: &mut Vec, + set: &mut SparseSet, +) { + assert!(stack.is_empty()); + // If this isn't an epsilon state, then the epsilon closure is always just + // itself, so there's no need to spin up the machinery below to handle it. + if !nfa.state(start_nfa_id).is_epsilon() { + set.insert(start_nfa_id); + return; + } + + stack.push(start_nfa_id); + while let Some(mut id) = stack.pop() { + // In many cases, we can avoid stack operations when an NFA state only + // adds one new state to visit. In that case, we just set our ID to + // that state and mush on. We only use the stack when an NFA state + // introduces multiple new states to visit. + loop { + // Insert this NFA state, and if it's already in the set and thus + // already visited, then we can move on to the next one. + if !set.insert(id) { + break; + } + match *nfa.state(id) { + thompson::State::ByteRange { .. } + | thompson::State::Sparse { .. } + | thompson::State::Dense { .. } + | thompson::State::Fail + | thompson::State::Match { .. } => break, + thompson::State::Look { look, next } => { + if !look_have.contains(look) { + break; + } + id = next; + } + thompson::State::Union { ref alternates } => { + id = match alternates.get(0) { + None => break, + Some(&id) => id, + }; + // We need to process our alternates in order to preserve + // match preferences, so put the earliest alternates closer + // to the top of the stack. + stack.extend(alternates[1..].iter().rev()); + } + thompson::State::BinaryUnion { alt1, alt2 } => { + id = alt1; + stack.push(alt2); + } + thompson::State::Capture { next, .. } => { + id = next; + } + } + } + } +} + +/// Add the NFA state IDs in the given `set` to the given DFA builder state. +/// The order in which states are added corresponds to the order in which they +/// were added to `set`. +/// +/// The DFA builder state given should already have its complete set of match +/// pattern IDs added (if any) and any look-behind assertions (StartLF, Start +/// and whether this state is being generated for a transition over a word byte +/// when applicable) that are true immediately prior to transitioning into this +/// state (via `builder.look_have()`). The match pattern IDs should correspond +/// to matches that occurred on the previous transition, since all matches are +/// delayed by one byte. The things that should _not_ be set are look-ahead +/// assertions (EndLF, End and whether the next byte is a word byte or not). +/// The builder state should also not have anything in `look_need` set, as this +/// routine will compute that for you. +/// +/// The given NFA should be able to resolve all identifiers in `set` to a +/// particular NFA state. Additionally, `set` must have capacity equivalent +/// to `nfa.len()`. +pub(crate) fn add_nfa_states( + nfa: &thompson::NFA, + set: &SparseSet, + builder: &mut StateBuilderNFA, +) { + for nfa_id in set.iter() { + match *nfa.state(nfa_id) { + thompson::State::ByteRange { .. } => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Sparse { .. } => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Dense { .. } => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Look { look, .. } => { + builder.add_nfa_state_id(nfa_id); + builder.set_look_need(|need| need.insert(look)); + } + thompson::State::Union { .. } + | thompson::State::BinaryUnion { .. } => { + // Pure epsilon transitions don't need to be tracked as part + // of the DFA state. Tracking them is actually superfluous; + // they won't cause any harm other than making determinization + // slower. + // + // Why aren't these needed? Well, in an NFA, epsilon + // transitions are really just jumping points to other states. + // So once you hit an epsilon transition, the same set of + // resulting states always appears. Therefore, putting them in + // a DFA's set of ordered NFA states is strictly redundant. + // + // Look-around states are also epsilon transitions, but + // they are *conditional*. So their presence could be + // discriminatory, and thus, they are tracked above. + // + // But wait... why are epsilon states in our `set` in the first + // place? Why not just leave them out? They're in our `set` + // because it was generated by computing an epsilon closure, + // and we want to keep track of all states we visited to avoid + // re-visiting them. In exchange, we have to do this second + // iteration over our collected states to finalize our DFA + // state. In theory, we could avoid this second iteration if + // we maintained two sets during epsilon closure: the set of + // visited states (to avoid cycles) and the set of states that + // will actually be used to construct the next DFA state. + // + // Note that this optimization requires that we re-compute the + // epsilon closure to account for look-ahead in 'next' *only + // when necessary*. Namely, only when the set of look-around + // assertions changes and only when those changes are within + // the set of assertions that are needed in order to step + // through the closure correctly. Otherwise, if we re-do the + // epsilon closure needlessly, it could change based on the + // fact that we are omitting epsilon states here. + // + // ----- + // + // Welp, scratch the above. It turns out that recording these + // is in fact necessary to seemingly handle one particularly + // annoying case: when a conditional epsilon transition is + // put inside of a repetition operator. One specific case I + // ran into was the regex `(?:\b|%)+` on the haystack `z%`. + // The correct leftmost first matches are: [0, 0] and [1, 1]. + // But the DFA was reporting [0, 0] and [1, 2]. To understand + // why this happens, consider the NFA for the aforementioned + // regex: + // + // >000000: binary-union(4, 1) + // 000001: \x00-\xFF => 0 + // 000002: WordAscii => 5 + // 000003: % => 5 + // ^000004: binary-union(2, 3) + // 000005: binary-union(4, 6) + // 000006: MATCH(0) + // + // The problem here is that one of the DFA start states is + // going to consist of the NFA states [2, 3] by computing the + // epsilon closure of state 4. State 4 isn't included because + // we previously were not keeping track of union states. But + // only a subset of transitions out of this state will be able + // to follow WordAscii, and in those cases, the epsilon closure + // is redone. The only problem is that computing the epsilon + // closure from [2, 3] is different than computing the epsilon + // closure from [4]. In the former case, assuming the WordAscii + // assertion is satisfied, you get: [2, 3, 6]. In the latter + // case, you get: [2, 6, 3]. Notice that '6' is the match state + // and appears AFTER '3' in the former case. This leads to a + // preferential but incorrect match of '%' before returning + // a match. In the latter case, the match is preferred over + // continuing to accept the '%'. + // + // It almost feels like we might be able to fix the NFA states + // to avoid this, or to at least only keep track of union + // states where this actually matters, since in the vast + // majority of cases, this doesn't matter. + // + // Another alternative would be to define a new HIR property + // called "assertion is repeated anywhere" and compute it + // inductively over the entire pattern. If it happens anywhere, + // which is probably pretty rare, then we record union states. + // Otherwise we don't. + builder.add_nfa_state_id(nfa_id); + } + // Capture states we definitely do not need to record, since they + // are unconditional epsilon transitions with no branching. + thompson::State::Capture { .. } => {} + // It's not totally clear whether we need to record fail states or + // not, but we do so out of an abundance of caution. Since they are + // quite rare in practice, there isn't much cost to recording them. + thompson::State::Fail => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Match { .. } => { + // Normally, the NFA match state doesn't actually need to + // be inside the DFA state. But since we delay matches by + // one byte, the matching DFA state corresponds to states + // that transition from the one we're building here. And + // the way we detect those cases is by looking for an NFA + // match state. See 'next' for how this is handled. + builder.add_nfa_state_id(nfa_id); + } + } + } + // If we know this state contains no look-around assertions, then + // there's no reason to track which look-around assertions were + // satisfied when this state was created. + if builder.look_need().is_empty() { + builder.set_look_have(|_| LookSet::empty()); + } +} + +/// Sets the appropriate look-behind assertions on the given state based on +/// this starting configuration. +pub(crate) fn set_lookbehind_from_start( + nfa: &thompson::NFA, + start: &Start, + builder: &mut StateBuilderMatches, +) { + let rev = nfa.is_reverse(); + let lineterm = nfa.look_matcher().get_line_terminator(); + let lookset = nfa.look_set_any(); + match *start { + Start::NonWordByte => { + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::WordByte => { + if lookset.contains_word() { + builder.set_is_from_word(); + } + } + Start::Text => { + if lookset.contains_anchor_haystack() { + builder.set_look_have(|have| have.insert(Look::Start)); + } + if lookset.contains_anchor_line() { + builder.set_look_have(|have| { + have.insert(Look::StartLF).insert(Look::StartCRLF) + }); + } + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::LineLF => { + if rev { + if lookset.contains_anchor_crlf() { + builder.set_is_half_crlf(); + } + if lookset.contains_anchor_line() { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + } else { + if lookset.contains_anchor_line() { + builder.set_look_have(|have| have.insert(Look::StartCRLF)); + } + } + if lookset.contains_anchor_line() && lineterm == b'\n' { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::LineCR => { + if lookset.contains_anchor_crlf() { + if rev { + builder.set_look_have(|have| have.insert(Look::StartCRLF)); + } else { + builder.set_is_half_crlf(); + } + } + if lookset.contains_anchor_line() && lineterm == b'\r' { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::CustomLineTerminator => { + if lookset.contains_anchor_line() { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + // This is a bit of a tricky case, but if the line terminator was + // set to a word byte, then we also need to behave as if the start + // configuration is Start::WordByte. That is, we need to mark our + // state as having come from a word byte. + if lookset.contains_word() { + if utf8::is_word_byte(lineterm) { + builder.set_is_from_word(); + } else { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + } + } +} diff --git a/vendor/regex-automata/src/util/determinize/state.rs b/vendor/regex-automata/src/util/determinize/state.rs new file mode 100644 index 0000000..effa6f4 --- /dev/null +++ b/vendor/regex-automata/src/util/determinize/state.rs @@ -0,0 +1,907 @@ +/*! +This module defines a DFA state representation and builders for constructing +DFA states. + +This representation is specifically for use in implementations of NFA-to-DFA +conversion via powerset construction. (Also called "determinization" in this +crate.) + +The term "DFA state" is somewhat overloaded in this crate. In some cases, it +refers to the set of transitions over an alphabet for a particular state. In +other cases, it refers to a set of NFA states. The former is really about the +final representation of a state in a DFA's transition table, where as the +latter---what this module is focused on---is closer to an intermediate form +that is used to help eventually build the transition table. + +This module exports four types. All four types represent the same idea: an +ordered set of NFA states. This ordered set represents the epsilon closure of a +particular NFA state, where the "epsilon closure" is the set of NFA states that +can be transitioned to without consuming any input. i.e., Follow all of the NFA +state's epsilon transitions. In addition, this implementation of DFA states +cares about two other things: the ordered set of pattern IDs corresponding +to the patterns that match if the state is a match state, and the set of +look-behind assertions that were true when the state was created. + +The first, `State`, is a frozen representation of a state that cannot be +modified. It may be cheaply cloned without copying the state itself and can be +accessed safely from multiple threads simultaneously. This type is useful for +when one knows that the DFA state being constructed is distinct from any other +previously constructed states. Namely, powerset construction, in practice, +requires one to keep a cache of previously created DFA states. Otherwise, +the number of DFA states created in memory balloons to an impractically +large number. For this reason, equivalent states should endeavor to have an +equivalent byte-level representation. (In general, "equivalency" here means, +"equivalent assertions, pattern IDs and NFA state IDs." We do not require that +full DFA minimization be implemented here. This form of equivalency is only +surface deep and is more-or-less a practical necessity.) + +The other three types represent different phases in the construction of a +DFA state. Internally, these three types (and `State`) all use the same +byte-oriented representation. That means one can use any of the builder types +to check whether the state it represents already exists or not. If it does, +then there is no need to freeze it into a `State` (which requires an alloc and +a copy). Here are the three types described succinctly: + +* `StateBuilderEmpty` represents a state with no pattern IDs, no assertions +and no NFA states. Creating a `StateBuilderEmpty` performs no allocs. A +`StateBuilderEmpty` can only be used to query its underlying memory capacity, +or to convert into a builder for recording pattern IDs and/or assertions. + +* `StateBuilderMatches` represents a state with zero or more pattern IDs, zero +or more satisfied assertions and zero NFA state IDs. A `StateBuilderMatches` +can only be used for adding pattern IDs and recording assertions. + +* `StateBuilderNFA` represents a state with zero or more pattern IDs, zero or +more satisfied assertions and zero or more NFA state IDs. A `StateBuilderNFA` +can only be used for adding NFA state IDs and recording some assertions. + +The expected flow here is to use the above builders to construct a candidate +DFA state to check if it already exists. If it does, then there's no need to +freeze it into a `State`. It it doesn't exist, then `StateBuilderNFA::to_state` +can be called to freeze the builder into an immutable `State`. In either +case, `clear` should be called on the builder to turn it back into a +`StateBuilderEmpty` that reuses the underlying memory. + +The main purpose for splitting the builder into these distinct types is to +make it impossible to do things like adding a pattern ID after adding an NFA +state ID. Namely, this makes it simpler to use a space-and-time efficient +binary representation for the state. (The format is documented on the `Repr` +type below.) If we just used one type for everything, it would be possible for +callers to use an incorrect interleaving of calls and thus result in a corrupt +representation. I chose to use more type machinery to make this impossible to +do because 1) determinization is itself pretty complex and it wouldn't be too +hard to foul this up and 2) there isn't too much machinery involved and it's +well contained. + +As an optimization, sometimes states won't have certain things set. For +example, if the underlying NFA has no word boundary assertions, then there is +no reason to set a state's look-behind assertion as to whether it was generated +from a word byte or not. Similarly, if a state has no NFA states corresponding +to look-around assertions, then there is no reason to set `look_have` to a +non-empty set. Finally, callers usually omit unconditional epsilon transitions +when adding NFA state IDs since they aren't discriminatory. + +Finally, the binary representation used by these states is, thankfully, not +serialized anywhere. So any kind of change can be made with reckless abandon, +as long as everything in this module agrees. +*/ + +use core::{convert::TryFrom, mem}; + +use alloc::{sync::Arc, vec::Vec}; + +use crate::util::{ + int::{I32, U32}, + look::LookSet, + primitives::{PatternID, StateID}, + wire::{self, Endian}, +}; + +/// A DFA state that, at its core, is represented by an ordered set of NFA +/// states. +/// +/// This type is intended to be used only in NFA-to-DFA conversion via powerset +/// construction. +/// +/// It may be cheaply cloned and accessed safely from multiple threads +/// simultaneously. +#[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub(crate) struct State(Arc<[u8]>); + +/// This Borrow impl permits us to lookup any state in a map by its byte +/// representation. This is particularly convenient when one has a StateBuilder +/// and we want to see if a correspondingly equivalent state already exists. If +/// one does exist, then we can reuse the allocation required by StateBuilder +/// without having to convert it into a State first. +impl core::borrow::Borrow<[u8]> for State { + fn borrow(&self) -> &[u8] { + &*self.0 + } +} + +impl core::fmt::Debug for State { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("State").field(&self.repr()).finish() + } +} + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl State { + pub(crate) fn dead() -> State { + StateBuilderEmpty::new().into_matches().into_nfa().to_state() + } + + pub(crate) fn is_match(&self) -> bool { + self.repr().is_match() + } + + pub(crate) fn is_from_word(&self) -> bool { + self.repr().is_from_word() + } + + pub(crate) fn is_half_crlf(&self) -> bool { + self.repr().is_half_crlf() + } + + pub(crate) fn look_have(&self) -> LookSet { + self.repr().look_have() + } + + pub(crate) fn look_need(&self) -> LookSet { + self.repr().look_need() + } + + pub(crate) fn match_len(&self) -> usize { + self.repr().match_len() + } + + pub(crate) fn match_pattern(&self, index: usize) -> PatternID { + self.repr().match_pattern(index) + } + + pub(crate) fn match_pattern_ids(&self) -> Option> { + self.repr().match_pattern_ids() + } + + #[cfg(all(test, not(miri)))] + pub(crate) fn iter_match_pattern_ids(&self, f: F) { + self.repr().iter_match_pattern_ids(f) + } + + pub(crate) fn iter_nfa_state_ids(&self, f: F) { + self.repr().iter_nfa_state_ids(f) + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.len() + } + + fn repr(&self) -> Repr<'_> { + Repr(&*self.0) + } +} + +/// A state builder that represents an empty state. +/// +/// This is a useful "initial condition" for state construction. It has no +/// NFA state IDs, no assertions set and no pattern IDs. No allocations are +/// made when new() is called. Its main use is for being converted into a +/// builder that can capture assertions and pattern IDs. +#[derive(Clone, Debug)] +pub(crate) struct StateBuilderEmpty(Vec); + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl StateBuilderEmpty { + pub(crate) fn new() -> StateBuilderEmpty { + StateBuilderEmpty(alloc::vec![]) + } + + pub(crate) fn into_matches(mut self) -> StateBuilderMatches { + self.0.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0]); + StateBuilderMatches(self.0) + } + + fn clear(&mut self) { + self.0.clear(); + } + + pub(crate) fn capacity(&self) -> usize { + self.0.capacity() + } +} + +/// A state builder that collects assertions and pattern IDs. +/// +/// When collecting pattern IDs is finished, this can be converted into a +/// builder that collects NFA state IDs. +#[derive(Clone)] +pub(crate) struct StateBuilderMatches(Vec); + +impl core::fmt::Debug for StateBuilderMatches { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("StateBuilderMatches").field(&self.repr()).finish() + } +} + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl StateBuilderMatches { + pub(crate) fn into_nfa(mut self) -> StateBuilderNFA { + self.repr_vec().close_match_pattern_ids(); + StateBuilderNFA { repr: self.0, prev_nfa_state_id: StateID::ZERO } + } + + pub(crate) fn set_is_from_word(&mut self) { + self.repr_vec().set_is_from_word() + } + + pub(crate) fn set_is_half_crlf(&mut self) { + self.repr_vec().set_is_half_crlf() + } + + pub(crate) fn look_have(&self) -> LookSet { + LookSet::read_repr(&self.0[1..]) + } + + pub(crate) fn set_look_have( + &mut self, + set: impl FnMut(LookSet) -> LookSet, + ) { + self.repr_vec().set_look_have(set) + } + + pub(crate) fn add_match_pattern_id(&mut self, pid: PatternID) { + self.repr_vec().add_match_pattern_id(pid) + } + + fn repr(&self) -> Repr<'_> { + Repr(&self.0) + } + + fn repr_vec(&mut self) -> ReprVec<'_> { + ReprVec(&mut self.0) + } +} + +/// A state builder that collects some assertions and NFA state IDs. +/// +/// When collecting NFA state IDs is finished, this can be used to build a +/// `State` if necessary. +/// +/// When dont with building a state (regardless of whether it got kept or not), +/// it's usually a good idea to call `clear` to get an empty builder back so +/// that it can be reused to build the next state. +#[derive(Clone)] +pub(crate) struct StateBuilderNFA { + repr: Vec, + prev_nfa_state_id: StateID, +} + +impl core::fmt::Debug for StateBuilderNFA { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("StateBuilderNFA").field(&self.repr()).finish() + } +} + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl StateBuilderNFA { + pub(crate) fn to_state(&self) -> State { + State(Arc::from(&*self.repr)) + } + + pub(crate) fn clear(self) -> StateBuilderEmpty { + let mut builder = StateBuilderEmpty(self.repr); + builder.clear(); + builder + } + + pub(crate) fn look_need(&self) -> LookSet { + self.repr().look_need() + } + + pub(crate) fn set_look_have( + &mut self, + set: impl FnMut(LookSet) -> LookSet, + ) { + self.repr_vec().set_look_have(set) + } + + pub(crate) fn set_look_need( + &mut self, + set: impl FnMut(LookSet) -> LookSet, + ) { + self.repr_vec().set_look_need(set) + } + + pub(crate) fn add_nfa_state_id(&mut self, sid: StateID) { + ReprVec(&mut self.repr) + .add_nfa_state_id(&mut self.prev_nfa_state_id, sid) + } + + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.repr + } + + fn repr(&self) -> Repr<'_> { + Repr(&self.repr) + } + + fn repr_vec(&mut self) -> ReprVec<'_> { + ReprVec(&mut self.repr) + } +} + +/// Repr is a read-only view into the representation of a DFA state. +/// +/// Primarily, a Repr is how we achieve DRY: we implement decoding the format +/// in one place, and then use a Repr to implement the various methods on the +/// public state types. +/// +/// The format is as follows: +/// +/// The first three bytes correspond to bitsets. +/// +/// Byte 0 is a bitset corresponding to miscellaneous flags associated with the +/// state. Bit 0 is set to 1 if the state is a match state. Bit 1 is set to 1 +/// if the state has pattern IDs explicitly written to it. (This is a flag that +/// is not meant to be set by determinization, but rather, is used as part of +/// an internal space-saving optimization.) Bit 2 is set to 1 if the state was +/// generated by a transition over a "word" byte. (Callers may not always set +/// this. For example, if the NFA has no word boundary assertion, then needing +/// to track whether a state came from a word byte or not is superfluous and +/// wasteful.) Bit 3 is set to 1 if the state was generated by a transition +/// from a `\r` (forward search) or a `\n` (reverse search) when CRLF mode is +/// enabled. +/// +/// Bytes 1..5 correspond to the look-behind assertions that were satisfied +/// by the transition that created this state. (Look-ahead assertions are not +/// tracked as part of states. Instead, these are applied by re-computing the +/// epsilon closure of a state when computing the transition function. See +/// `next` in the parent module.) +/// +/// Bytes 5..9 correspond to the set of look-around assertions (including both +/// look-behind and look-ahead) that appear somewhere in this state's set of +/// NFA state IDs. This is used to determine whether this state's epsilon +/// closure should be re-computed when computing the transition function. +/// Namely, look-around assertions are "just" conditional epsilon transitions, +/// so if there are new assertions available when computing the transition +/// function, we should only re-compute the epsilon closure if those new +/// assertions are relevant to this particular state. +/// +/// Bytes 9..13 correspond to a 32-bit native-endian encoded integer +/// corresponding to the number of patterns encoded in this state. If the state +/// is not a match state (byte 0 bit 0 is 0) or if it's only pattern ID is +/// PatternID::ZERO, then no integer is encoded at this position. Instead, byte +/// offset 3 is the position at which the first NFA state ID is encoded. +/// +/// For a match state with at least one non-ZERO pattern ID, the next bytes +/// correspond to a sequence of 32-bit native endian encoded integers that +/// represent each pattern ID, in order, that this match state represents. +/// +/// After the pattern IDs (if any), NFA state IDs are delta encoded as +/// varints.[1] The first NFA state ID is encoded as itself, and each +/// subsequent NFA state ID is encoded as the difference between itself and the +/// previous NFA state ID. +/// +/// [1] - https://developers.google.com/protocol-buffers/docs/encoding#varints +struct Repr<'a>(&'a [u8]); + +impl<'a> Repr<'a> { + /// Returns true if and only if this is a match state. + /// + /// If callers have added pattern IDs to this state, then callers MUST set + /// this state as a match state explicitly. However, as a special case, + /// states that are marked as match states but with no pattern IDs, then + /// the state is treated as if it had a single pattern ID equivalent to + /// PatternID::ZERO. + fn is_match(&self) -> bool { + self.0[0] & (1 << 0) > 0 + } + + /// Returns true if and only if this state has had at least one pattern + /// ID added to it. + /// + /// This is an internal-only flag that permits the representation to save + /// space in the common case of an NFA with one pattern in it. In that + /// case, a match state can only ever have exactly one pattern ID: + /// PatternID::ZERO. So there's no need to represent it. + fn has_pattern_ids(&self) -> bool { + self.0[0] & (1 << 1) > 0 + } + + /// Returns true if and only if this state is marked as having been created + /// from a transition over a word byte. This is useful for checking whether + /// a word boundary assertion is true or not, which requires look-behind + /// (whether the current state came from a word byte or not) and look-ahead + /// (whether the transition byte is a word byte or not). + /// + /// Since states with this set are distinct from states that don't have + /// this set (even if they are otherwise equivalent), callers should not + /// set this assertion unless the underlying NFA has at least one word + /// boundary assertion somewhere. Otherwise, a superfluous number of states + /// may be created. + fn is_from_word(&self) -> bool { + self.0[0] & (1 << 2) > 0 + } + + /// Returns true if and only if this state is marked as being inside of a + /// CRLF terminator. In the forward direction, this means the state was + /// created after seeing a `\r`. In the reverse direction, this means the + /// state was created after seeing a `\n`. + fn is_half_crlf(&self) -> bool { + self.0[0] & (1 << 3) > 0 + } + + /// The set of look-behind assertions that were true in the transition that + /// created this state. + /// + /// Generally, this should be empty if 'look_need' is empty, since there is + /// no reason to track which look-behind assertions are true if the state + /// has no conditional epsilon transitions. + /// + /// Satisfied look-ahead assertions are not tracked in states. Instead, + /// these are re-computed on demand via epsilon closure when computing the + /// transition function. + fn look_have(&self) -> LookSet { + LookSet::read_repr(&self.0[1..]) + } + + /// The set of look-around (both behind and ahead) assertions that appear + /// at least once in this state's set of NFA states. + /// + /// This is used to determine whether the epsilon closure needs to be + /// re-computed when computing the transition function. Namely, if the + /// state has no conditional epsilon transitions, then there is no need + /// to re-compute the epsilon closure. + fn look_need(&self) -> LookSet { + LookSet::read_repr(&self.0[5..]) + } + + /// Returns the total number of match pattern IDs in this state. + /// + /// If this state is not a match state, then this always returns 0. + fn match_len(&self) -> usize { + if !self.is_match() { + return 0; + } else if !self.has_pattern_ids() { + 1 + } else { + self.encoded_pattern_len() + } + } + + /// Returns the pattern ID for this match state at the given index. + /// + /// If the given index is greater than or equal to `match_len()` for this + /// state, then this could panic or return incorrect results. + fn match_pattern(&self, index: usize) -> PatternID { + if !self.has_pattern_ids() { + PatternID::ZERO + } else { + let offset = 13 + index * PatternID::SIZE; + // This is OK since we only ever serialize valid PatternIDs to + // states. + wire::read_pattern_id_unchecked(&self.0[offset..]).0 + } + } + + /// Returns a copy of all match pattern IDs in this state. If this state + /// is not a match state, then this returns None. + fn match_pattern_ids(&self) -> Option> { + if !self.is_match() { + return None; + } + let mut pids = alloc::vec![]; + self.iter_match_pattern_ids(|pid| pids.push(pid)); + Some(pids) + } + + /// Calls the given function on every pattern ID in this state. + fn iter_match_pattern_ids(&self, mut f: F) { + if !self.is_match() { + return; + } + // As an optimization for a very common case, when this is a match + // state for an NFA with only one pattern, we don't actually write the + // pattern ID to the state representation. Instead, we know it must + // be there since it is the only possible choice. + if !self.has_pattern_ids() { + f(PatternID::ZERO); + return; + } + let mut pids = &self.0[13..self.pattern_offset_end()]; + while !pids.is_empty() { + let pid = wire::read_u32(pids); + pids = &pids[PatternID::SIZE..]; + // This is OK since we only ever serialize valid PatternIDs to + // states. And since pattern IDs can never exceed a usize, the + // unwrap is OK. + f(PatternID::new_unchecked(usize::try_from(pid).unwrap())); + } + } + + /// Calls the given function on every NFA state ID in this state. + fn iter_nfa_state_ids(&self, mut f: F) { + let mut sids = &self.0[self.pattern_offset_end()..]; + let mut prev = 0i32; + while !sids.is_empty() { + let (delta, nr) = read_vari32(sids); + sids = &sids[nr..]; + let sid = prev + delta; + prev = sid; + // This is OK since we only ever serialize valid StateIDs to + // states. And since state IDs can never exceed an isize, they must + // always be able to fit into a usize, and thus cast is OK. + f(StateID::new_unchecked(sid.as_usize())) + } + } + + /// Returns the offset into this state's representation where the pattern + /// IDs end and the NFA state IDs begin. + fn pattern_offset_end(&self) -> usize { + let encoded = self.encoded_pattern_len(); + if encoded == 0 { + return 9; + } + // This arithmetic is OK since we were able to address this many bytes + // when writing to the state, thus, it must fit into a usize. + encoded.checked_mul(4).unwrap().checked_add(13).unwrap() + } + + /// Returns the total number of *encoded* pattern IDs in this state. + /// + /// This may return 0 even when this is a match state, since the pattern + /// ID `PatternID::ZERO` is not encoded when it's the only pattern ID in + /// the match state (the overwhelming common case). + fn encoded_pattern_len(&self) -> usize { + if !self.has_pattern_ids() { + return 0; + } + // This unwrap is OK since the total number of patterns is always + // guaranteed to fit into a usize. + usize::try_from(wire::read_u32(&self.0[9..13])).unwrap() + } +} + +impl<'a> core::fmt::Debug for Repr<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut nfa_ids = alloc::vec![]; + self.iter_nfa_state_ids(|sid| nfa_ids.push(sid)); + f.debug_struct("Repr") + .field("is_match", &self.is_match()) + .field("is_from_word", &self.is_from_word()) + .field("is_half_crlf", &self.is_half_crlf()) + .field("look_have", &self.look_have()) + .field("look_need", &self.look_need()) + .field("match_pattern_ids", &self.match_pattern_ids()) + .field("nfa_state_ids", &nfa_ids) + .finish() + } +} + +/// ReprVec is a write-only view into the representation of a DFA state. +/// +/// See Repr for more details on the purpose of this type and also the format. +/// +/// Note that not all possible combinations of methods may be called. This is +/// precisely what the various StateBuilder types encapsulate: they only +/// permit valid combinations via Rust's linear typing. +struct ReprVec<'a>(&'a mut Vec); + +impl<'a> ReprVec<'a> { + /// Set this state as a match state. + /// + /// This should not be exposed explicitly outside of this module. It is + /// set automatically when a pattern ID is added. + fn set_is_match(&mut self) { + self.0[0] |= 1 << 0; + } + + /// Set that this state has pattern IDs explicitly written to it. + /// + /// This should not be exposed explicitly outside of this module. This is + /// used internally as a space saving optimization. Namely, if the state + /// is a match state but does not have any pattern IDs written to it, + /// then it is automatically inferred to have a pattern ID of ZERO. + fn set_has_pattern_ids(&mut self) { + self.0[0] |= 1 << 1; + } + + /// Set this state as being built from a transition over a word byte. + /// + /// Setting this is only necessary when one needs to deal with word + /// boundary assertions. Therefore, if the underlying NFA has no word + /// boundary assertions, callers should not set this. + fn set_is_from_word(&mut self) { + self.0[0] |= 1 << 2; + } + + /// Set this state as having seen half of a CRLF terminator. + /// + /// In the forward direction, this should be set when a `\r` has been seen. + /// In the reverse direction, this should be set when a `\n` has been seen. + fn set_is_half_crlf(&mut self) { + self.0[0] |= 1 << 3; + } + + /// The set of look-behind assertions that were true in the transition that + /// created this state. + fn look_have(&self) -> LookSet { + self.repr().look_have() + } + + /// The set of look-around (both behind and ahead) assertions that appear + /// at least once in this state's set of NFA states. + fn look_need(&self) -> LookSet { + self.repr().look_need() + } + + /// Mutate the set of look-behind assertions that were true in the + /// transition that created this state. + fn set_look_have(&mut self, mut set: impl FnMut(LookSet) -> LookSet) { + set(self.look_have()).write_repr(&mut self.0[1..]); + } + + /// Mutate the set of look-around (both behind and ahead) assertions that + /// appear at least once in this state's set of NFA states. + fn set_look_need(&mut self, mut set: impl FnMut(LookSet) -> LookSet) { + set(self.look_need()).write_repr(&mut self.0[5..]); + } + + /// Add a pattern ID to this state. All match states must have at least + /// one pattern ID associated with it. + /// + /// Callers must never add duplicative pattern IDs. + /// + /// The order in which patterns are added must correspond to the order + /// in which patterns are reported as matches. + fn add_match_pattern_id(&mut self, pid: PatternID) { + // As a (somewhat small) space saving optimization, in the case where + // a matching state has exactly one pattern ID, PatternID::ZERO, we do + // not write either the pattern ID or the number of patterns encoded. + // Instead, all we do is set the 'is_match' bit on this state. Overall, + // this saves 8 bytes per match state for the overwhelming majority of + // match states. + // + // In order to know whether pattern IDs need to be explicitly read or + // not, we use another internal-only bit, 'has_pattern_ids', to + // indicate whether they have been explicitly written or not. + if !self.repr().has_pattern_ids() { + if pid == PatternID::ZERO { + self.set_is_match(); + return; + } + // Make room for 'close_match_pattern_ids' to write the total + // number of pattern IDs written. + self.0.extend(core::iter::repeat(0).take(PatternID::SIZE)); + self.set_has_pattern_ids(); + // If this was already a match state, then the only way that's + // possible when the state doesn't have pattern IDs is if + // PatternID::ZERO was added by the caller previously. In this + // case, we are now adding a non-ZERO pattern ID after it, in + // which case, we want to make sure to represent ZERO explicitly + // now. + if self.repr().is_match() { + write_u32(self.0, 0) + } else { + // Otherwise, just make sure the 'is_match' bit is set. + self.set_is_match(); + } + } + write_u32(self.0, pid.as_u32()); + } + + /// Indicate that no more pattern IDs will be added to this state. + /// + /// Once this is called, callers must not call it or 'add_match_pattern_id' + /// again. + /// + /// This should not be exposed explicitly outside of this module. It + /// should be called only when converting a StateBuilderMatches into a + /// StateBuilderNFA. + fn close_match_pattern_ids(&mut self) { + // If we never wrote any pattern IDs, then there's nothing to do here. + if !self.repr().has_pattern_ids() { + return; + } + let patsize = PatternID::SIZE; + let pattern_bytes = self.0.len() - 13; + // Every pattern ID uses 4 bytes, so number of bytes should be + // divisible by 4. + assert_eq!(pattern_bytes % patsize, 0); + // This unwrap is OK since we are guaranteed that the maximum number + // of possible patterns fits into a u32. + let count32 = u32::try_from(pattern_bytes / patsize).unwrap(); + wire::NE::write_u32(count32, &mut self.0[9..13]); + } + + /// Add an NFA state ID to this state. The order in which NFA states are + /// added matters. It is the caller's responsibility to ensure that + /// duplicate NFA state IDs are not added. + fn add_nfa_state_id(&mut self, prev: &mut StateID, sid: StateID) { + let delta = sid.as_i32() - prev.as_i32(); + write_vari32(self.0, delta); + *prev = sid; + } + + /// Return a read-only view of this state's representation. + fn repr(&self) -> Repr<'_> { + Repr(self.0.as_slice()) + } +} + +/// Write a signed 32-bit integer using zig-zag encoding. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn write_vari32(data: &mut Vec, n: i32) { + let mut un = n.to_bits() << 1; + if n < 0 { + un = !un; + } + write_varu32(data, un) +} + +/// Read a signed 32-bit integer using zig-zag encoding. Also, return the +/// number of bytes read. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn read_vari32(data: &[u8]) -> (i32, usize) { + let (un, i) = read_varu32(data); + let mut n = i32::from_bits(un >> 1); + if un & 1 != 0 { + n = !n; + } + (n, i) +} + +/// Write an unsigned 32-bit integer as a varint. In essence, `n` is written +/// as a sequence of bytes where all bytes except for the last one have the +/// most significant bit set. The least significant 7 bits correspond to the +/// actual bits of `n`. So in the worst case, a varint uses 5 bytes, but in +/// very common cases, it uses fewer than 4. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn write_varu32(data: &mut Vec, mut n: u32) { + while n >= 0b1000_0000 { + data.push(n.low_u8() | 0b1000_0000); + n >>= 7; + } + data.push(n.low_u8()); +} + +/// Read an unsigned 32-bit varint. Also, return the number of bytes read. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn read_varu32(data: &[u8]) -> (u32, usize) { + // N.B. We can assume correctness here since we know that all varuints are + // written with write_varu32. Hence, the 'as' uses and unchecked arithmetic + // is all okay. + let mut n: u32 = 0; + let mut shift: u32 = 0; + for (i, &b) in data.iter().enumerate() { + if b < 0b1000_0000 { + return (n | (u32::from(b) << shift), i + 1); + } + n |= (u32::from(b) & 0b0111_1111) << shift; + shift += 7; + } + (0, 0) +} + +/// Push a native-endian encoded `n` on to `dst`. +fn write_u32(dst: &mut Vec, n: u32) { + use crate::util::wire::NE; + + let start = dst.len(); + dst.extend(core::iter::repeat(0).take(mem::size_of::())); + NE::write_u32(n, &mut dst[start..]); +} + +#[cfg(test)] +mod tests { + use alloc::vec; + + use quickcheck::quickcheck; + + use super::*; + + #[cfg(not(miri))] + quickcheck! { + fn prop_state_read_write_nfa_state_ids(sids: Vec) -> bool { + // Builders states do not permit duplicate IDs. + let sids = dedup_state_ids(sids); + + let mut b = StateBuilderEmpty::new().into_matches().into_nfa(); + for &sid in &sids { + b.add_nfa_state_id(sid); + } + let s = b.to_state(); + let mut got = vec![]; + s.iter_nfa_state_ids(|sid| got.push(sid)); + got == sids + } + + fn prop_state_read_write_pattern_ids(pids: Vec) -> bool { + // Builders states do not permit duplicate IDs. + let pids = dedup_pattern_ids(pids); + + let mut b = StateBuilderEmpty::new().into_matches(); + for &pid in &pids { + b.add_match_pattern_id(pid); + } + let s = b.into_nfa().to_state(); + let mut got = vec![]; + s.iter_match_pattern_ids(|pid| got.push(pid)); + got == pids + } + + fn prop_state_read_write_nfa_state_and_pattern_ids( + sids: Vec, + pids: Vec + ) -> bool { + // Builders states do not permit duplicate IDs. + let sids = dedup_state_ids(sids); + let pids = dedup_pattern_ids(pids); + + let mut b = StateBuilderEmpty::new().into_matches(); + for &pid in &pids { + b.add_match_pattern_id(pid); + } + + let mut b = b.into_nfa(); + for &sid in &sids { + b.add_nfa_state_id(sid); + } + + let s = b.to_state(); + let mut got_pids = vec![]; + s.iter_match_pattern_ids(|pid| got_pids.push(pid)); + let mut got_sids = vec![]; + s.iter_nfa_state_ids(|sid| got_sids.push(sid)); + got_pids == pids && got_sids == sids + } + } + + quickcheck! { + fn prop_read_write_varu32(n: u32) -> bool { + let mut buf = vec![]; + write_varu32(&mut buf, n); + let (got, nread) = read_varu32(&buf); + nread == buf.len() && got == n + } + + fn prop_read_write_vari32(n: i32) -> bool { + let mut buf = vec![]; + write_vari32(&mut buf, n); + let (got, nread) = read_vari32(&buf); + nread == buf.len() && got == n + } + } + + #[cfg(not(miri))] + fn dedup_state_ids(sids: Vec) -> Vec { + let mut set = alloc::collections::BTreeSet::new(); + let mut deduped = vec![]; + for sid in sids { + if set.contains(&sid) { + continue; + } + set.insert(sid); + deduped.push(sid); + } + deduped + } + + #[cfg(not(miri))] + fn dedup_pattern_ids(pids: Vec) -> Vec { + let mut set = alloc::collections::BTreeSet::new(); + let mut deduped = vec![]; + for pid in pids { + if set.contains(&pid) { + continue; + } + set.insert(pid); + deduped.push(pid); + } + deduped + } +} diff --git a/vendor/regex-automata/src/util/empty.rs b/vendor/regex-automata/src/util/empty.rs new file mode 100644 index 0000000..e16af3b --- /dev/null +++ b/vendor/regex-automata/src/util/empty.rs @@ -0,0 +1,265 @@ +/*! +This module provides helper routines for dealing with zero-width matches. + +The main problem being solved here is this: + +1. The caller wants to search something that they know is valid UTF-8, such +as a Rust `&str`. +2. The regex used by the caller can match the empty string. For example, `a*`. +3. The caller should never get match offsets returned that occur within the +encoding of a UTF-8 codepoint. It is logically incorrect, and also means that, +e.g., slicing the `&str` at those offsets will lead to a panic. + +So the question here is, how do we prevent the caller from getting match +offsets that split a codepoint? For example, strictly speaking, the regex `a*` +matches `☃` at the positions `[0, 0]`, `[1, 1]`, `[2, 2]` and `[3, 3]` since +the UTF-8 encoding of `☃` is `\xE2\x98\x83`. In particular, the `NFA` that +underlies all of the matching engines in this crate doesn't have anything in +its state graph that prevents matching between UTF-8 code units. Indeed, any +engine derived from the `NFA` will match at those positions by virtue of the +fact that the `NFA` is byte oriented. That is, its transitions are defined over +bytes and the matching engines work by proceeding one byte at a time. + +(An alternative architecture would be to define the transitions in an `NFA` +over codepoints, or `char`. And then make the matching engines proceed by +decoding one codepoint at a time. This is a viable strategy, but it doesn't +work for DFA matching engines because designing a fast and memory efficient +transition table for an alphabet as large as Unicode is quite difficult. More +to the point, the top-level `regex` crate supports matching on arbitrary bytes +when Unicode mode is disabled and one is searching a `&[u8]`. So in that case, +you can't just limit yourself to decoding codepoints and matching those. You +really do need to be able to follow byte oriented transitions on the `NFA`.) + +In an older version of the regex crate, we handled this case not in the regex +engine, but in the iterators over matches. Namely, since this case only arises +when the match is empty, we "just" incremented the next starting position +of the search by `N`, where `N` is the length of the codepoint encoded at +the current position. The alternative or more "natural" solution of just +incrementing by `1` would result in executing a search of `a*` on `☃` like +this: + +* Start search at `0`. +* Found match at `[0, 0]`. +* Next start position is `0`. +* To avoid an infinite loop, since it's an empty match, increment by `1`. +* Start search at `1`. +* Found match at `[1, 1]`. Oops. + +But if we instead incremented by `3` (the length in bytes of `☃`), then we get +the following: + +* Start search at `0`. +* Found match at `[0, 0]`. +* Next start position is `0`. +* To avoid an infinite loop, since it's an empty match, increment by `3`. +* Start search at `3`. +* Found match at `[3, 3]`. + +And we get the correct result. But does this technique work in all cases? +Crucially, it requires that a zero-width match that splits a codepoint never +occurs beyond the starting position of the search. Because if it did, merely +incrementing the start position by the number of bytes in the codepoint at +the current position wouldn't be enough. A zero-width match could just occur +anywhere. It turns out that it is _almost_ true. We can convince ourselves by +looking at all possible patterns that can match the empty string: + +* Patterns like `a*`, `a{0}`, `(?:)`, `a|` and `|a` all unconditionally match +the empty string. That is, assuming there isn't an `a` at the current position, +they will all match the empty string at the start of a search. There is no way +to move past it because any other match would not be "leftmost." +* `^` only matches at the beginning of the haystack, where the start position +is `0`. Since we know we're searching valid UTF-8 (if it isn't valid UTF-8, +then this entire problem goes away because it implies your string type supports +invalid UTF-8 and thus must deal with offsets that not only split a codepoint +but occur in entirely invalid UTF-8 somehow), it follows that `^` never matches +between the code units of a codepoint because the start of a valid UTF-8 string +is never within the encoding of a codepoint. +* `$` basically the same logic as `^`, but for the end of a string. A valid +UTF-8 string can't have an incomplete codepoint at the end of it. +* `(?m:^)` follows similarly to `^`, but it can match immediately following +a `\n`. However, since a `\n` is always a codepoint itself and can never +appear within a codepoint, it follows that the position immediately following +a `\n` in a string that is valid UTF-8 is guaranteed to not be between the +code units of another codepoint. (One caveat here is that the line terminator +for multi-line anchors can now be changed to any arbitrary byte, including +things like `\x98` which might occur within a codepoint. However, this wasn't +supported by the old regex crate. If it was, it pose the same problems as +`(?-u:\B)`, as we'll discuss below.) +* `(?m:$)` a similar argument as for `(?m:^)`. The only difference is that a +`(?m:$)` matches just before a `\n`. But the same argument applies. +* `(?Rm:^)` and `(?Rm:$)` weren't supported by the old regex crate, but the +CRLF aware line anchors follow a similar argument as for `(?m:^)` and `(?m:$)`. +Namely, since they only ever match at a boundary where one side is either a +`\r` or a `\n`, neither of which can occur within a codepoint. +* `\b` only matches at positions where both sides are valid codepoints, so +this cannot split a codepoint. +* `\B`, like `\b`, also only matches at positions where both sides are valid +codepoints. So this cannot split a codepoint either. +* `(?-u:\b)` matches only at positions where at least one side of it is an ASCII +word byte. Since ASCII bytes cannot appear as code units in non-ASCII codepoints +(one of the many amazing qualities of UTF-8), it follows that this too cannot +split a codepoint. +* `(?-u:\B)` finally represents a problem. It can matches between *any* two +bytes that are either both word bytes or non-word bytes. Since code units like +`\xE2` and `\x98` (from the UTF-8 encoding of `☃`) are both non-word bytes, +`(?-u:\B)` will match at the position between them. + +Thus, our approach of incrementing one codepoint at a time after seeing an +empty match is flawed because `(?-u:\B)` can result in an empty match that +splits a codepoint at a position past the starting point of a search. For +example, searching `(?-u:\B)` on `a☃` would produce the following matches: `[2, +2]`, `[3, 3]` and `[4, 4]`. The positions at `0` and `1` don't match because +they correspond to word boundaries since `a` is an ASCII word byte. + +So what did the old regex crate do to avoid this? It banned `(?-u:\B)` from +regexes that could match `&str`. That might sound extreme, but a lot of other +things were banned too. For example, all of `(?-u:.)`, `(?-u:[^a])` and +`(?-u:\W)` can match invalid UTF-8 too, including individual code units with a +codepoint. The key difference is that those expressions could never produce an +empty match. That ban happens when translating an `Ast` to an `Hir`, because +that process that reason about whether an `Hir` can produce *non-empty* matches +at invalid UTF-8 boundaries. Bottom line though is that we side-stepped the +`(?-u:\B)` issue by banning it. + +If banning `(?-u:\B)` were the only issue with the old regex crate's approach, +then I probably would have kept it. `\B` is rarely used, so it's not such a big +deal to have to work-around it. However, the problem with the above approach +is that it doesn't compose. The logic for avoiding splitting a codepoint only +lived in the iterator, which means if anyone wants to implement their own +iterator over regex matches, they have to deal with this extremely subtle edge +case to get full correctness. + +Instead, in this crate, we take the approach of pushing this complexity down +to the lowest layers of each regex engine. The approach is pretty simple: + +* If this corner case doesn't apply, don't do anything. (For example, if UTF-8 +mode isn't enabled or if the regex cannot match the empty string.) +* If an empty match is reported, explicitly check if it splits a codepoint. +* If it doesn't, we're done, return the match. +* If it does, then ignore the match and re-run the search. +* Repeat the above process until the end of the haystack is reached or a match +is found that doesn't split a codepoint or isn't zero width. + +And that's pretty much what this module provides. Every regex engine uses these +methods in their lowest level public APIs, but just above the layer where +their internal engine is used. That way, all regex engines can be arbitrarily +composed without worrying about handling this case, and iterators don't need to +handle it explicitly. + +(It turns out that a new feature I added, support for changing the line +terminator in a regex to any arbitrary byte, also provokes the above problem. +Namely, the byte could be invalid UTF-8 or a UTF-8 continuation byte. So that +support would need to be limited or banned when UTF-8 mode is enabled, just +like we did for `(?-u:\B)`. But thankfully our more robust approach in this +crate handles that case just fine too.) +*/ + +use crate::util::search::{Input, MatchError}; + +#[cold] +#[inline(never)] +pub(crate) fn skip_splits_fwd( + input: &Input<'_>, + init_value: T, + match_offset: usize, + find: F, +) -> Result, MatchError> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + skip_splits(true, input, init_value, match_offset, find) +} + +#[cold] +#[inline(never)] +pub(crate) fn skip_splits_rev( + input: &Input<'_>, + init_value: T, + match_offset: usize, + find: F, +) -> Result, MatchError> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + skip_splits(false, input, init_value, match_offset, find) +} + +fn skip_splits( + forward: bool, + input: &Input<'_>, + init_value: T, + mut match_offset: usize, + mut find: F, +) -> Result, MatchError> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + // If our config says to do an anchored search, then we're definitely + // done. We just need to determine whether we have a valid match or + // not. If we don't, then we're not allowed to continue, so we report + // no match. + // + // This is actually quite a subtle correctness thing. The key here is + // that if we got an empty match that splits a codepoint after doing an + // anchored search in UTF-8 mode, then that implies that we must have + // *started* the search at a location that splits a codepoint. This + // follows from the fact that if a match is reported from an anchored + // search, then the start offset of the match *must* match the start + // offset of the search. + // + // It also follows that no other non-empty match is possible. For + // example, you might write a regex like '(?:)|SOMETHING' and start its + // search in the middle of a codepoint. The first branch is an empty + // regex that will bubble up a match at the first position, and then + // get rejected here and report no match. But what if 'SOMETHING' could + // have matched? We reason that such a thing is impossible, because + // if it does, it must report a match that starts in the middle of a + // codepoint. This in turn implies that a match is reported whose span + // does not correspond to valid UTF-8, and this breaks the promise + // made when UTF-8 mode is enabled. (That promise *can* be broken, for + // example, by enabling UTF-8 mode but building an by hand NFA that + // produces non-empty matches that span invalid UTF-8. This is an unchecked + // but documented precondition violation of UTF-8 mode, and is documented + // to have unspecified behavior.) + // + // I believe this actually means that if an anchored search is run, and + // UTF-8 mode is enabled and the start position splits a codepoint, + // then it is correct to immediately report no match without even + // executing the regex engine. But it doesn't really seem worth writing + // out that case in every regex engine to save a tiny bit of work in an + // extremely pathological case, so we just handle it here. + if input.get_anchored().is_anchored() { + return Ok(if input.is_char_boundary(match_offset) { + Some(init_value) + } else { + None + }); + } + // Otherwise, we have an unanchored search, so just keep looking for + // matches until we have one that does not split a codepoint or we hit + // EOI. + let mut value = init_value; + let mut input = input.clone(); + while !input.is_char_boundary(match_offset) { + if forward { + // The unwrap is OK here because overflowing usize while + // iterating over a slice is impossible, at it would require + // a slice of length greater than isize::MAX, which is itself + // impossible. + input.set_start(input.start().checked_add(1).unwrap()); + } else { + input.set_end(match input.end().checked_sub(1) { + None => return Ok(None), + Some(end) => end, + }); + } + match find(&input)? { + None => return Ok(None), + Some((new_value, new_match_end)) => { + value = new_value; + match_offset = new_match_end; + } + } + } + Ok(Some(value)) +} diff --git a/vendor/regex-automata/src/util/escape.rs b/vendor/regex-automata/src/util/escape.rs new file mode 100644 index 0000000..7f6aa15 --- /dev/null +++ b/vendor/regex-automata/src/util/escape.rs @@ -0,0 +1,84 @@ +/*! +Provides convenience routines for escaping raw bytes. + +Since this crate tends to deal with `&[u8]` everywhere and the default +`Debug` implementation just shows decimal integers, it makes debugging those +representations quite difficult. This module provides types that show `&[u8]` +as if it were a string, with invalid UTF-8 escaped into its byte-by-byte hex +representation. +*/ + +use crate::util::utf8; + +/// Provides a convenient `Debug` implementation for a `u8`. +/// +/// The `Debug` impl treats the byte as an ASCII, and emits a human readable +/// representation of it. If the byte isn't ASCII, then it's emitted as a hex +/// escape sequence. +#[derive(Clone, Copy)] +pub struct DebugByte(pub u8); + +impl core::fmt::Debug for DebugByte { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + // Special case ASCII space. It's too hard to read otherwise, so + // put quotes around it. I sometimes wonder whether just '\x20' would + // be better... + if self.0 == b' ' { + return write!(f, "' '"); + } + // 10 bytes is enough to cover any output from ascii::escape_default. + let mut bytes = [0u8; 10]; + let mut len = 0; + for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { + // capitalize \xab to \xAB + if i >= 2 && b'a' <= b && b <= b'f' { + b -= 32; + } + bytes[len] = b; + len += 1; + } + write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) + } +} + +/// Provides a convenient `Debug` implementation for `&[u8]`. +/// +/// This generally works best when the bytes are presumed to be mostly UTF-8, +/// but will work for anything. For any bytes that aren't UTF-8, they are +/// emitted as hex escape sequences. +pub struct DebugHaystack<'a>(pub &'a [u8]); + +impl<'a> core::fmt::Debug for DebugHaystack<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "\"")?; + // This is a sad re-implementation of a similar impl found in bstr. + let mut bytes = self.0; + while let Some(result) = utf8::decode(bytes) { + let ch = match result { + Ok(ch) => ch, + Err(byte) => { + write!(f, r"\x{:02x}", byte)?; + bytes = &bytes[1..]; + continue; + } + }; + bytes = &bytes[ch.len_utf8()..]; + match ch { + '\0' => write!(f, "\\0")?, + // ASCII control characters except \0, \n, \r, \t + '\x01'..='\x08' + | '\x0b' + | '\x0c' + | '\x0e'..='\x19' + | '\x7f' => { + write!(f, "\\x{:02x}", u32::from(ch))?; + } + '\n' | '\r' | '\t' | _ => { + write!(f, "{}", ch.escape_debug())?; + } + } + } + write!(f, "\"")?; + Ok(()) + } +} diff --git a/vendor/regex-automata/src/util/int.rs b/vendor/regex-automata/src/util/int.rs new file mode 100644 index 0000000..e6b13bf --- /dev/null +++ b/vendor/regex-automata/src/util/int.rs @@ -0,0 +1,252 @@ +/*! +This module provides several integer oriented traits for converting between +both fixed size integers and integers whose size varies based on the target +(like `usize`). + +The driving design principle of this module is to attempt to centralize as many +`as` casts as possible here. And in particular, we separate casts into two +buckets: + +* Casts that we use for their truncating behavior. In this case, we use more +descriptive names, like `low_u32` and `high_u32`. +* Casts that we use for converting back-and-forth between `usize`. These +conversions are generally necessary because we often store indices in different +formats to save on memory, which requires converting to and from `usize`. In +this case, we very specifically do not want to overflow, and so the methods +defined here will panic if the `as` cast would be lossy in debug mode. (A +normal `as` cast will never panic!) + +For `as` casts between raw pointers, we use `cast`, so `as` isn't needed there. + +For regex engines, floating point is just never used, so we don't have to worry +about `as` casts for those. + +Otherwise, this module pretty much covers all of our `as` needs except for one +thing: const contexts. There are a select few places in this crate where we +still need to use `as` because const functions on traits aren't stable yet. +If we wind up significantly expanding our const footprint in this crate, it +might be worth defining free functions to handle those cases. But at the time +of writing, that just seemed like too much ceremony. Instead, I comment each +such use of `as` in a const context with a "fixme" notice. + +NOTE: for simplicity, we don't take target pointer width into account here for +`usize` conversions. Since we currently only panic in debug mode, skipping the +check when it can be proven it isn't needed at compile time doesn't really +matter. Now, if we wind up wanting to do as many checks as possible in release +mode, then we would want to skip those when we know the conversions are always +non-lossy. + +NOTE: this module isn't an exhaustive API. For example, we still use things +like `u64::from` where possible, or even `usize::try_from()` for when we do +explicitly want to panic or when we want to return an error for overflow. +*/ + +pub(crate) trait U8 { + fn as_usize(self) -> usize; +} + +impl U8 for u8 { + fn as_usize(self) -> usize { + usize::from(self) + } +} + +pub(crate) trait U16 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn high_u8(self) -> u8; +} + +impl U16 for u16 { + fn as_usize(self) -> usize { + usize::from(self) + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn high_u8(self) -> u8 { + (self >> 8) as u8 + } +} + +pub(crate) trait U32 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn low_u16(self) -> u16; + fn high_u16(self) -> u16; +} + +impl U32 for u32 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("u32 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn low_u16(self) -> u16 { + self as u16 + } + + fn high_u16(self) -> u16 { + (self >> 16) as u16 + } +} + +pub(crate) trait U64 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn low_u16(self) -> u16; + fn low_u32(self) -> u32; + fn high_u32(self) -> u32; +} + +impl U64 for u64 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("u64 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn low_u16(self) -> u16 { + self as u16 + } + + fn low_u32(self) -> u32 { + self as u32 + } + + fn high_u32(self) -> u32 { + (self >> 32) as u32 + } +} + +pub(crate) trait I32 { + fn as_usize(self) -> usize; + fn to_bits(self) -> u32; + fn from_bits(n: u32) -> i32; +} + +impl I32 for i32 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("i32 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn to_bits(self) -> u32 { + self as u32 + } + + fn from_bits(n: u32) -> i32 { + n as i32 + } +} + +pub(crate) trait Usize { + fn as_u8(self) -> u8; + fn as_u16(self) -> u16; + fn as_u32(self) -> u32; + fn as_u64(self) -> u64; +} + +impl Usize for usize { + fn as_u8(self) -> u8 { + #[cfg(debug_assertions)] + { + u8::try_from(self).expect("usize overflowed u8") + } + #[cfg(not(debug_assertions))] + { + self as u8 + } + } + + fn as_u16(self) -> u16 { + #[cfg(debug_assertions)] + { + u16::try_from(self).expect("usize overflowed u16") + } + #[cfg(not(debug_assertions))] + { + self as u16 + } + } + + fn as_u32(self) -> u32 { + #[cfg(debug_assertions)] + { + u32::try_from(self).expect("usize overflowed u32") + } + #[cfg(not(debug_assertions))] + { + self as u32 + } + } + + fn as_u64(self) -> u64 { + #[cfg(debug_assertions)] + { + u64::try_from(self).expect("usize overflowed u64") + } + #[cfg(not(debug_assertions))] + { + self as u64 + } + } +} + +// Pointers aren't integers, but we convert pointers to integers to perform +// offset arithmetic in some places. (And no, we don't convert the integers +// back to pointers.) So add 'as_usize' conversions here too for completeness. +// +// These 'as' casts are actually okay because they're always non-lossy. But the +// idea here is to just try and remove as much 'as' as possible, particularly +// in this crate where we are being really paranoid about offsets and making +// sure we don't panic on inputs that might be untrusted. This way, the 'as' +// casts become easier to audit if they're all in one place, even when some of +// them are actually okay 100% of the time. + +pub(crate) trait Pointer { + fn as_usize(self) -> usize; +} + +impl Pointer for *const T { + fn as_usize(self) -> usize { + self as usize + } +} + +pub(crate) trait PointerMut { + fn as_usize(self) -> usize; +} + +impl PointerMut for *mut T { + fn as_usize(self) -> usize { + self as usize + } +} diff --git a/vendor/regex-automata/src/util/interpolate.rs b/vendor/regex-automata/src/util/interpolate.rs new file mode 100644 index 0000000..f274629 --- /dev/null +++ b/vendor/regex-automata/src/util/interpolate.rs @@ -0,0 +1,579 @@ +/*! +Provides routines for interpolating capture group references. + +That is, if a replacement string contains references like `$foo` or `${foo1}`, +then they are replaced with the corresponding capture values for the groups +named `foo` and `foo1`, respectively. Similarly, syntax like `$1` and `${1}` +is supported as well, with `1` corresponding to a capture group index and not +a name. + +This module provides the free functions [`string`] and [`bytes`], which +interpolate Rust Unicode strings and byte strings, respectively. + +# Format + +These routines support two different kinds of capture references: unbraced and +braced. + +For the unbraced format, the format supported is `$ref` where `name` can be +any character in the class `[0-9A-Za-z_]`. `ref` is always the longest +possible parse. So for example, `$1a` corresponds to the capture group named +`1a` and not the capture group at index `1`. If `ref` matches `^[0-9]+$`, then +it is treated as a capture group index itself and not a name. + +For the braced format, the format supported is `${ref}` where `ref` can be any +sequence of bytes except for `}`. If no closing brace occurs, then it is not +considered a capture reference. As with the unbraced format, if `ref` matches +`^[0-9]+$`, then it is treated as a capture group index and not a name. + +The braced format is useful for exerting precise control over the name of the +capture reference. For example, `${1}a` corresponds to the capture group +reference `1` followed by the letter `a`, where as `$1a` (as mentioned above) +corresponds to the capture group reference `1a`. The braced format is also +useful for expressing capture group names that use characters not supported by +the unbraced format. For example, `${foo[bar].baz}` refers to the capture group +named `foo[bar].baz`. + +If a capture group reference is found and it does not refer to a valid capture +group, then it will be replaced with the empty string. + +To write a literal `$`, use `$$`. + +To be clear, and as exhibited via the type signatures in the routines in this +module, it is impossible for a replacement string to be invalid. A replacement +string may not have the intended semantics, but the interpolation procedure +itself can never fail. +*/ + +use alloc::{string::String, vec::Vec}; + +use crate::util::memchr::memchr; + +/// Accepts a replacement string and interpolates capture references with their +/// corresponding values. +/// +/// `append` should be a function that appends the string value of a capture +/// group at a particular index to the string given. If the capture group +/// index is invalid, then nothing should be appended. +/// +/// `name_to_index` should be a function that maps a capture group name to a +/// capture group index. If the given name doesn't exist, then `None` should +/// be returned. +/// +/// Finally, `dst` is where the final interpolated contents should be written. +/// If `replacement` contains no capture group references, then `dst` will be +/// equivalent to `replacement`. +/// +/// See the [module documentation](self) for details about the format +/// supported. +/// +/// # Example +/// +/// ``` +/// use regex_automata::util::interpolate; +/// +/// let mut dst = String::new(); +/// interpolate::string( +/// "foo $bar baz", +/// |index, dst| { +/// if index == 0 { +/// dst.push_str("BAR"); +/// } +/// }, +/// |name| { +/// if name == "bar" { +/// Some(0) +/// } else { +/// None +/// } +/// }, +/// &mut dst, +/// ); +/// assert_eq!("foo BAR baz", dst); +/// ``` +pub fn string( + mut replacement: &str, + mut append: impl FnMut(usize, &mut String), + mut name_to_index: impl FnMut(&str) -> Option, + dst: &mut String, +) { + while !replacement.is_empty() { + match memchr(b'$', replacement.as_bytes()) { + None => break, + Some(i) => { + dst.push_str(&replacement[..i]); + replacement = &replacement[i..]; + } + } + // Handle escaping of '$'. + if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { + dst.push_str("$"); + replacement = &replacement[2..]; + continue; + } + debug_assert!(!replacement.is_empty()); + let cap_ref = match find_cap_ref(replacement.as_bytes()) { + Some(cap_ref) => cap_ref, + None => { + dst.push_str("$"); + replacement = &replacement[1..]; + continue; + } + }; + replacement = &replacement[cap_ref.end..]; + match cap_ref.cap { + Ref::Number(i) => append(i, dst), + Ref::Named(name) => { + if let Some(i) = name_to_index(name) { + append(i, dst); + } + } + } + } + dst.push_str(replacement); +} + +/// Accepts a replacement byte string and interpolates capture references with +/// their corresponding values. +/// +/// `append` should be a function that appends the byte string value of a +/// capture group at a particular index to the byte string given. If the +/// capture group index is invalid, then nothing should be appended. +/// +/// `name_to_index` should be a function that maps a capture group name to a +/// capture group index. If the given name doesn't exist, then `None` should +/// be returned. +/// +/// Finally, `dst` is where the final interpolated contents should be written. +/// If `replacement` contains no capture group references, then `dst` will be +/// equivalent to `replacement`. +/// +/// See the [module documentation](self) for details about the format +/// supported. +/// +/// # Example +/// +/// ``` +/// use regex_automata::util::interpolate; +/// +/// let mut dst = vec![]; +/// interpolate::bytes( +/// b"foo $bar baz", +/// |index, dst| { +/// if index == 0 { +/// dst.extend_from_slice(b"BAR"); +/// } +/// }, +/// |name| { +/// if name == "bar" { +/// Some(0) +/// } else { +/// None +/// } +/// }, +/// &mut dst, +/// ); +/// assert_eq!(&b"foo BAR baz"[..], dst); +/// ``` +pub fn bytes( + mut replacement: &[u8], + mut append: impl FnMut(usize, &mut Vec), + mut name_to_index: impl FnMut(&str) -> Option, + dst: &mut Vec, +) { + while !replacement.is_empty() { + match memchr(b'$', replacement) { + None => break, + Some(i) => { + dst.extend_from_slice(&replacement[..i]); + replacement = &replacement[i..]; + } + } + // Handle escaping of '$'. + if replacement.get(1).map_or(false, |&b| b == b'$') { + dst.push(b'$'); + replacement = &replacement[2..]; + continue; + } + debug_assert!(!replacement.is_empty()); + let cap_ref = match find_cap_ref(replacement) { + Some(cap_ref) => cap_ref, + None => { + dst.push(b'$'); + replacement = &replacement[1..]; + continue; + } + }; + replacement = &replacement[cap_ref.end..]; + match cap_ref.cap { + Ref::Number(i) => append(i, dst), + Ref::Named(name) => { + if let Some(i) = name_to_index(name) { + append(i, dst); + } + } + } + } + dst.extend_from_slice(replacement); +} + +/// `CaptureRef` represents a reference to a capture group inside some text. +/// The reference is either a capture group name or a number. +/// +/// It is also tagged with the position in the text following the +/// capture reference. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct CaptureRef<'a> { + cap: Ref<'a>, + end: usize, +} + +/// A reference to a capture group in some text. +/// +/// e.g., `$2`, `$foo`, `${foo}`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum Ref<'a> { + Named(&'a str), + Number(usize), +} + +impl<'a> From<&'a str> for Ref<'a> { + fn from(x: &'a str) -> Ref<'a> { + Ref::Named(x) + } +} + +impl From for Ref<'static> { + fn from(x: usize) -> Ref<'static> { + Ref::Number(x) + } +} + +/// Parses a possible reference to a capture group name in the given text, +/// starting at the beginning of `replacement`. +/// +/// If no such valid reference could be found, None is returned. +/// +/// Note that this returns a "possible" reference because this routine doesn't +/// know whether the reference is to a valid group or not. If it winds up not +/// being a valid reference, then it should be replaced with the empty string. +fn find_cap_ref(replacement: &[u8]) -> Option> { + let mut i = 0; + let rep: &[u8] = replacement; + if rep.len() <= 1 || rep[0] != b'$' { + return None; + } + i += 1; + if rep[i] == b'{' { + return find_cap_ref_braced(rep, i + 1); + } + let mut cap_end = i; + while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { + cap_end += 1; + } + if cap_end == i { + return None; + } + // We just verified that the range 0..cap_end is valid ASCII, so it must + // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 + // check via an unchecked conversion or by parsing the number straight from + // &[u8]. + let cap = core::str::from_utf8(&rep[i..cap_end]) + .expect("valid UTF-8 capture name"); + Some(CaptureRef { + cap: match cap.parse::() { + Ok(i) => Ref::Number(i), + Err(_) => Ref::Named(cap), + }, + end: cap_end, + }) +} + +/// Looks for a braced reference, e.g., `${foo1}`. This assumes that an opening +/// brace has been found at `i-1` in `rep`. This then looks for a closing +/// brace and returns the capture reference within the brace. +fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option> { + assert_eq!(b'{', rep[i.checked_sub(1).unwrap()]); + let start = i; + while rep.get(i).map_or(false, |&b| b != b'}') { + i += 1; + } + if !rep.get(i).map_or(false, |&b| b == b'}') { + return None; + } + // When looking at braced names, we don't put any restrictions on the name, + // so it's possible it could be invalid UTF-8. But a capture group name + // can never be invalid UTF-8, so if we have invalid UTF-8, then we can + // safely return None. + let cap = match core::str::from_utf8(&rep[start..i]) { + Err(_) => return None, + Ok(cap) => cap, + }; + Some(CaptureRef { + cap: match cap.parse::() { + Ok(i) => Ref::Number(i), + Err(_) => Ref::Named(cap), + }, + end: i + 1, + }) +} + +/// Returns true if and only if the given byte is allowed in a capture name +/// written in non-brace form. +fn is_valid_cap_letter(b: u8) -> bool { + match b { + b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, + _ => false, + } +} + +#[cfg(test)] +mod tests { + use alloc::{string::String, vec, vec::Vec}; + + use super::{find_cap_ref, CaptureRef}; + + macro_rules! find { + ($name:ident, $text:expr) => { + #[test] + fn $name() { + assert_eq!(None, find_cap_ref($text.as_bytes())); + } + }; + ($name:ident, $text:expr, $capref:expr) => { + #[test] + fn $name() { + assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); + } + }; + } + + macro_rules! c { + ($name_or_number:expr, $pos:expr) => { + CaptureRef { cap: $name_or_number.into(), end: $pos } + }; + } + + find!(find_cap_ref1, "$foo", c!("foo", 4)); + find!(find_cap_ref2, "${foo}", c!("foo", 6)); + find!(find_cap_ref3, "$0", c!(0, 2)); + find!(find_cap_ref4, "$5", c!(5, 2)); + find!(find_cap_ref5, "$10", c!(10, 3)); + // See https://github.com/rust-lang/regex/pull/585 + // for more on characters following numbers + find!(find_cap_ref6, "$42a", c!("42a", 4)); + find!(find_cap_ref7, "${42}a", c!(42, 5)); + find!(find_cap_ref8, "${42"); + find!(find_cap_ref9, "${42 "); + find!(find_cap_ref10, " $0 "); + find!(find_cap_ref11, "$"); + find!(find_cap_ref12, " "); + find!(find_cap_ref13, ""); + find!(find_cap_ref14, "$1-$2", c!(1, 2)); + find!(find_cap_ref15, "$1_$2", c!("1_", 3)); + find!(find_cap_ref16, "$x-$y", c!("x", 2)); + find!(find_cap_ref17, "$x_$y", c!("x_", 3)); + find!(find_cap_ref18, "${#}", c!("#", 4)); + find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); + find!(find_cap_ref20, "${¾}", c!("¾", 5)); + find!(find_cap_ref21, "${¾a}", c!("¾a", 6)); + find!(find_cap_ref22, "${a¾}", c!("a¾", 6)); + find!(find_cap_ref23, "${☃}", c!("☃", 6)); + find!(find_cap_ref24, "${a☃}", c!("a☃", 7)); + find!(find_cap_ref25, "${☃a}", c!("☃a", 7)); + find!(find_cap_ref26, "${名字}", c!("名字", 9)); + + fn interpolate_string( + mut name_to_index: Vec<(&'static str, usize)>, + caps: Vec<&'static str>, + replacement: &str, + ) -> String { + name_to_index.sort_by_key(|x| x.0); + + let mut dst = String::new(); + super::string( + replacement, + |i, dst| { + if let Some(&s) = caps.get(i) { + dst.push_str(s); + } + }, + |name| -> Option { + name_to_index + .binary_search_by_key(&name, |x| x.0) + .ok() + .map(|i| name_to_index[i].1) + }, + &mut dst, + ); + dst + } + + fn interpolate_bytes( + mut name_to_index: Vec<(&'static str, usize)>, + caps: Vec<&'static str>, + replacement: &str, + ) -> String { + name_to_index.sort_by_key(|x| x.0); + + let mut dst = vec![]; + super::bytes( + replacement.as_bytes(), + |i, dst| { + if let Some(&s) = caps.get(i) { + dst.extend_from_slice(s.as_bytes()); + } + }, + |name| -> Option { + name_to_index + .binary_search_by_key(&name, |x| x.0) + .ok() + .map(|i| name_to_index[i].1) + }, + &mut dst, + ); + String::from_utf8(dst).unwrap() + } + + macro_rules! interp { + ($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => { + #[test] + fn $name() { + assert_eq!( + $expected, + interpolate_string($map, $caps, $hay), + "interpolate::string failed", + ); + assert_eq!( + $expected, + interpolate_bytes($map, $caps, $hay), + "interpolate::bytes failed", + ); + } + }; + } + + interp!( + interp1, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $foo test", + "test xxx test", + ); + + interp!( + interp2, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test$footest", + "test", + ); + + interp!( + interp3, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test${foo}test", + "testxxxtest", + ); + + interp!( + interp4, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test$2test", + "test", + ); + + interp!( + interp5, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test${2}test", + "testxxxtest", + ); + + interp!( + interp6, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $$foo test", + "test $foo test", + ); + + interp!( + interp7, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $foo", + "test xxx", + ); + + interp!( + interp8, + vec![("foo", 2)], + vec!["", "", "xxx"], + "$foo test", + "xxx test", + ); + + interp!( + interp9, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test $bar$foo", + "test yyyxxx", + ); + + interp!( + interp10, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test $ test", + "test $ test", + ); + + interp!( + interp11, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${} test", + "test test", + ); + + interp!( + interp12, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${ } test", + "test test", + ); + + interp!( + interp13, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${a b} test", + "test test", + ); + + interp!( + interp14, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${a} test", + "test test", + ); + + // This is a funny case where a braced reference is never closed, but + // within the unclosed braced reference, there is an unbraced reference. + // In this case, the braced reference is just treated literally and the + // unbraced reference is found. + interp!( + interp15, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${wat $bar ok", + "test ${wat yyy ok", + ); +} diff --git a/vendor/regex-automata/src/util/iter.rs b/vendor/regex-automata/src/util/iter.rs new file mode 100644 index 0000000..a789fa0 --- /dev/null +++ b/vendor/regex-automata/src/util/iter.rs @@ -0,0 +1,1027 @@ +/*! +Generic helpers for iteration of matches from a regex engine in a haystack. + +The principle type in this module is a [`Searcher`]. A `Searcher` provides +its own lower level iterator-like API in addition to methods for constructing +types that implement `Iterator`. The documentation for `Searcher` explains a +bit more about why these different APIs exist. + +Currently, this module supports iteration over any regex engine that works +with the [`HalfMatch`], [`Match`] or [`Captures`] types. +*/ + +#[cfg(feature = "alloc")] +use crate::util::captures::Captures; +use crate::util::search::{HalfMatch, Input, Match, MatchError}; + +/// A searcher for creating iterators and performing lower level iteration. +/// +/// This searcher encapsulates the logic required for finding all successive +/// non-overlapping matches in a haystack. In theory, iteration would look +/// something like this: +/// +/// 1. Setting the start position to `0`. +/// 2. Execute a regex search. If no match, end iteration. +/// 3. Report the match and set the start position to the end of the match. +/// 4. Go back to (2). +/// +/// And if this were indeed the case, it's likely that `Searcher` wouldn't +/// exist. Unfortunately, because a regex may match the empty string, the above +/// logic won't work for all possible regexes. Namely, if an empty match is +/// found, then step (3) would set the start position of the search to the +/// position it was at. Thus, iteration would never end. +/// +/// Instead, a `Searcher` knows how to detect these cases and forcefully +/// advance iteration in the case of an empty match that overlaps with a +/// previous match. +/// +/// If you know that your regex cannot match any empty string, then the simple +/// algorithm described above will work correctly. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// In particular, a `Searcher` is not itself an iterator. Instead, it provides +/// `advance` routines that permit moving the search along explicitly. It also +/// provides various routines, like [`Searcher::into_matches_iter`], that +/// accept a closure (representing how a regex engine executes a search) and +/// returns a conventional iterator. +/// +/// The lifetime parameters come from the [`Input`] type passed to +/// [`Searcher::new`]: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// # Searcher vs Iterator +/// +/// Why does a search type with "advance" APIs exist at all when we also have +/// iterators? Unfortunately, the reasoning behind this split is a complex +/// combination of the following things: +/// +/// 1. While many of the regex engines expose their own iterators, it is also +/// nice to expose this lower level iteration helper because it permits callers +/// to provide their own `Input` configuration. Moreover, a `Searcher` can work +/// with _any_ regex engine instead of only the ones defined in this crate. +/// This way, everyone benefits from a shared iteration implementation. +/// 2. There are many different regex engines that, while they have the same +/// match semantics, they have slightly different APIs. Iteration is just +/// complex enough to want to share code, and so we need a way of abstracting +/// over those different regex engines. While we could define a new trait that +/// describes any regex engine search API, it would wind up looking very close +/// to a closure. While there may still be reasons for the more generic trait +/// to exist, for now and for the purposes of iteration, we use a closure. +/// Closures also provide a lot of easy flexibility at the call site, in that +/// they permit the caller to borrow any kind of state they want for use during +/// each search call. +/// 3. As a result of using closures, and because closures are anonymous types +/// that cannot be named, it is difficult to encapsulate them without both +/// costs to speed and added complexity to the public API. For example, in +/// defining an iterator type like +/// [`dfa::regex::FindMatches`](crate::dfa::regex::FindMatches), +/// if we use a closure internally, it's not possible to name this type in the +/// return type of the iterator constructor. Thus, the only way around it is +/// to erase the type by boxing it and turning it into a `Box`. +/// This boxed closure is unlikely to be inlined _and_ it infects the public +/// API in subtle ways. Namely, unless you declare the closure as implementing +/// `Send` and `Sync`, then the resulting iterator type won't implement it +/// either. But there are practical issues with requiring the closure to +/// implement `Send` and `Sync` that result in other API complexities that +/// are beyond the scope of this already long exposition. +/// 4. Some regex engines expose more complex match information than just +/// "which pattern matched" and "at what offsets." For example, the PikeVM +/// exposes match spans for each capturing group that participated in the +/// match. In such cases, it can be quite beneficial to reuse the capturing +/// group allocation on subsequent searches. A proper iterator doesn't permit +/// this API due to its interface, so it's useful to have something a bit lower +/// level that permits callers to amortize allocations while also reusing a +/// shared implementation of iteration. (See the documentation for +/// [`Searcher::advance`] for an example of using the "advance" API with the +/// PikeVM.) +/// +/// What this boils down to is that there are "advance" APIs which require +/// handing a closure to it for every call, and there are also APIs to create +/// iterators from a closure. The former are useful for _implementing_ +/// iterators or when you need more flexibility, while the latter are useful +/// for conveniently writing custom iterators on-the-fly. +/// +/// # Example: iterating with captures +/// +/// Several regex engines in this crate over convenient iterator APIs over +/// [`Captures`] values. To do so, this requires allocating a new `Captures` +/// value for each iteration step. This can perhaps be more costly than you +/// might want. Instead of implementing your own iterator to avoid that +/// cost (which can be a little subtle if you want to handle empty matches +/// correctly), you can use this `Searcher` to do it for you: +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::iter::Searcher, +/// Input, Span, +/// }; +/// +/// let re = PikeVM::new("foo(?P[0-9]+)")?; +/// let haystack = "foo1 foo12 foo123"; +/// +/// let mut caps = re.create_captures(); +/// let mut cache = re.create_cache(); +/// let mut matches = vec![]; +/// let mut searcher = Searcher::new(Input::new(haystack)); +/// while let Some(_) = searcher.advance(|input| { +/// re.search(&mut cache, input, &mut caps); +/// Ok(caps.get_match()) +/// }) { +/// // The unwrap is OK since 'numbers' matches if the pattern matches. +/// matches.push(caps.get_group_by_name("numbers").unwrap()); +/// } +/// assert_eq!(matches, vec![ +/// Span::from(3..4), +/// Span::from(8..10), +/// Span::from(14..17), +/// ]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Searcher<'h> { + /// The input parameters to give to each regex engine call. + /// + /// The start position of the search is mutated during iteration. + input: Input<'h>, + /// Records the end offset of the most recent match. This is necessary to + /// handle a corner case for preventing empty matches from overlapping with + /// the ending bounds of a prior match. + last_match_end: Option, +} + +impl<'h> Searcher<'h> { + /// Create a new fallible non-overlapping matches iterator. + /// + /// The given `input` provides the parameters (including the haystack), + /// while the `finder` represents a closure that calls the underlying regex + /// engine. The closure may borrow any additional state that is needed, + /// such as a prefilter scanner. + pub fn new(input: Input<'h>) -> Searcher<'h> { + Searcher { input, last_match_end: None } + } + + /// Returns the current `Input` used by this searcher. + /// + /// The `Input` returned is generally equivalent to the one given to + /// [`Searcher::new`], but its start position may be different to reflect + /// the start of the next search to be executed. + pub fn input<'s>(&'s self) -> &'s Input<'h> { + &self.input + } + + /// Return the next half match for an infallible search if one exists, and + /// advance to the next position. + /// + /// This is like `try_advance_half`, except errors are converted into + /// panics. + /// + /// # Panics + /// + /// If the given closure returns an error, then this panics. This is useful + /// when you know your underlying regex engine has been configured to not + /// return an error. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to iterate over all matches + /// when using a DFA, which only provides "half" matches. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::iter::Searcher, + /// HalfMatch, Input, + /// }; + /// + /// let re = DFA::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(HalfMatch::must(0, 10)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 21)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 32)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This correctly moves iteration forward even when an empty match occurs: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::iter::Searcher, + /// HalfMatch, Input, + /// }; + /// + /// let re = DFA::new(r"a|")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("abba"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(HalfMatch::must(0, 1)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 2)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 4)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn advance_half(&mut self, finder: F) -> Option + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + match self.try_advance_half(finder) { + Ok(m) => m, + Err(err) => panic!( + "unexpected regex half find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } + + /// Return the next match for an infallible search if one exists, and + /// advance to the next position. + /// + /// The search is advanced even in the presence of empty matches by + /// forbidding empty matches from overlapping with any other match. + /// + /// This is like `try_advance`, except errors are converted into panics. + /// + /// # Panics + /// + /// If the given closure returns an error, then this panics. This is useful + /// when you know your underlying regex engine has been configured to not + /// return an error. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to iterate over all matches + /// when using a regex based on lazy DFAs: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::regex::Regex, + /// util::iter::Searcher, + /// Match, Input, + /// }; + /// + /// let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(Match::must(0, 0..10)); + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 11..21)); + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 22..32)); + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This example shows the same as above, but with the PikeVM. This example + /// is useful because it shows how to use this API even when the regex + /// engine doesn't directly return a `Match`. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::iter::Searcher, + /// Match, Input, + /// }; + /// + /// let re = PikeVM::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(Match::must(0, 0..10)); + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// // Note that if we wanted to extract capturing group spans, we could + /// // do that here with 'caps'. + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 11..21)); + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 22..32)); + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn advance(&mut self, finder: F) -> Option + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + match self.try_advance(finder) { + Ok(m) => m, + Err(err) => panic!( + "unexpected regex find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } + + /// Return the next half match for a fallible search if one exists, and + /// advance to the next position. + /// + /// This is like `advance_half`, except it permits callers to handle errors + /// during iteration. + #[inline] + pub fn try_advance_half( + &mut self, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + let mut m = match finder(&self.input)? { + None => return Ok(None), + Some(m) => m, + }; + if Some(m.offset()) == self.last_match_end { + m = match self.handle_overlapping_empty_half_match(m, finder)? { + None => return Ok(None), + Some(m) => m, + }; + } + self.input.set_start(m.offset()); + self.last_match_end = Some(m.offset()); + Ok(Some(m)) + } + + /// Return the next match for a fallible search if one exists, and advance + /// to the next position. + /// + /// This is like `advance`, except it permits callers to handle errors + /// during iteration. + #[inline] + pub fn try_advance( + &mut self, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + let mut m = match finder(&self.input)? { + None => return Ok(None), + Some(m) => m, + }; + if m.is_empty() && Some(m.end()) == self.last_match_end { + m = match self.handle_overlapping_empty_match(m, finder)? { + None => return Ok(None), + Some(m) => m, + }; + } + self.input.set_start(m.end()); + self.last_match_end = Some(m.end()); + Ok(Some(m)) + } + + /// Given a closure that executes a single search, return an iterator over + /// all successive non-overlapping half matches. + /// + /// The iterator returned yields result values. If the underlying regex + /// engine is configured to never return an error, consider calling + /// [`TryHalfMatchesIter::infallible`] to convert errors into panics. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to create a proper + /// iterator over half matches. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::iter::Searcher, + /// HalfMatch, Input, + /// }; + /// + /// let re = DFA::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input).into_half_matches_iter(|input| { + /// re.try_search_fwd(&mut cache, input) + /// }); + /// + /// let expected = Some(Ok(HalfMatch::must(0, 10))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(HalfMatch::must(0, 21))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(HalfMatch::must(0, 32))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = None; + /// assert_eq!(expected, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn into_half_matches_iter( + self, + finder: F, + ) -> TryHalfMatchesIter<'h, F> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + TryHalfMatchesIter { it: self, finder } + } + + /// Given a closure that executes a single search, return an iterator over + /// all successive non-overlapping matches. + /// + /// The iterator returned yields result values. If the underlying regex + /// engine is configured to never return an error, consider calling + /// [`TryMatchesIter::infallible`] to convert errors into panics. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to create a proper + /// iterator over matches. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::regex::Regex, + /// util::iter::Searcher, + /// Match, Input, + /// }; + /// + /// let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input).into_matches_iter(|input| { + /// re.try_search(&mut cache, input) + /// }); + /// + /// let expected = Some(Ok(Match::must(0, 0..10))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(Match::must(0, 11..21))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(Match::must(0, 22..32))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = None; + /// assert_eq!(expected, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn into_matches_iter(self, finder: F) -> TryMatchesIter<'h, F> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + TryMatchesIter { it: self, finder } + } + + /// Given a closure that executes a single search, return an iterator over + /// all successive non-overlapping `Captures` values. + /// + /// The iterator returned yields result values. If the underlying regex + /// engine is configured to never return an error, consider calling + /// [`TryCapturesIter::infallible`] to convert errors into panics. + /// + /// Unlike the other iterator constructors, this accepts an initial + /// `Captures` value. This `Captures` value is reused for each search, and + /// the iterator implementation clones it before returning it. The caller + /// must provide this value because the iterator is purposely ignorant + /// of the underlying regex engine and thus doesn't know how to create + /// one itself. More to the point, a `Captures` value itself has a few + /// different constructors, which change which kind of information is + /// available to query in exchange for search performance. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to create a proper iterator + /// over `Captures` values, which provides access to all capturing group + /// spans for each match. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::iter::Searcher, + /// Input, + /// }; + /// + /// let re = PikeVM::new( + /// r"(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2})", + /// )?; + /// let (mut cache, caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = "2010-03-14 2016-10-08 2020-10-22"; + /// let input = Input::new(haystack); + /// let mut it = Searcher::new(input) + /// .into_captures_iter(caps, |input, caps| { + /// re.search(&mut cache, input, caps); + /// Ok(()) + /// }); + /// + /// let got = it.next().expect("first date")?; + /// let year = got.get_group_by_name("y").expect("must match"); + /// assert_eq!("2010", &haystack[year]); + /// + /// let got = it.next().expect("second date")?; + /// let month = got.get_group_by_name("m").expect("must match"); + /// assert_eq!("10", &haystack[month]); + /// + /// let got = it.next().expect("third date")?; + /// let day = got.get_group_by_name("d").expect("must match"); + /// assert_eq!("22", &haystack[day]); + /// + /// assert!(it.next().is_none()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "alloc")] + #[inline] + pub fn into_captures_iter( + self, + caps: Captures, + finder: F, + ) -> TryCapturesIter<'h, F> + where + F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>, + { + TryCapturesIter { it: self, caps, finder } + } + + /// Handles the special case of a match that begins where the previous + /// match ended. Without this special handling, it'd be possible to get + /// stuck where an empty match never results in forward progress. This + /// also makes it more consistent with how presiding general purpose regex + /// engines work. + #[cold] + #[inline(never)] + fn handle_overlapping_empty_half_match( + &mut self, + _: HalfMatch, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + // Since we are only here when 'm.offset()' matches the offset of the + // last match, it follows that this must have been an empty match. + // Since we both need to make progress *and* prevent overlapping + // matches, we discard this match and advance the search by 1. + // + // Note that this may start a search in the middle of a codepoint. The + // regex engines themselves are expected to deal with that and not + // report any matches within a codepoint if they are configured in + // UTF-8 mode. + self.input.set_start(self.input.start().checked_add(1).unwrap()); + finder(&self.input) + } + + /// Handles the special case of an empty match by ensuring that 1) the + /// iterator always advances and 2) empty matches never overlap with other + /// matches. + /// + /// (1) is necessary because we principally make progress by setting the + /// starting location of the next search to the ending location of the last + /// match. But if a match is empty, then this results in a search that does + /// not advance and thus does not terminate. + /// + /// (2) is not strictly necessary, but makes intuitive sense and matches + /// the presiding behavior of most general purpose regex engines. The + /// "intuitive sense" here is that we want to report NON-overlapping + /// matches. So for example, given the regex 'a|(?:)' against the haystack + /// 'a', without the special handling, you'd get the matches [0, 1) and [1, + /// 1), where the latter overlaps with the end bounds of the former. + /// + /// Note that we mark this cold and forcefully prevent inlining because + /// handling empty matches like this is extremely rare and does require + /// quite a bit of code, comparatively. Keeping this code out of the main + /// iterator function keeps it smaller and more amenable to inlining + /// itself. + #[cold] + #[inline(never)] + fn handle_overlapping_empty_match( + &mut self, + m: Match, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + assert!(m.is_empty()); + self.input.set_start(self.input.start().checked_add(1).unwrap()); + finder(&self.input) + } +} + +/// An iterator over all non-overlapping half matches for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_half_matches_iter`]. +pub struct TryHalfMatchesIter<'h, F> { + it: Searcher<'h>, + finder: F, +} + +impl<'h, F> TryHalfMatchesIter<'h, F> { + /// Return an infallible version of this iterator. + /// + /// Any item yielded that corresponds to an error results in a panic. This + /// is useful if your underlying regex engine is configured in a way that + /// it is guaranteed to never return an error. + pub fn infallible(self) -> HalfMatchesIter<'h, F> { + HalfMatchesIter(self) + } + + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.it.input() + } +} + +impl<'h, F> Iterator for TryHalfMatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + self.it.try_advance_half(&mut self.finder).transpose() + } +} + +impl<'h, F> core::fmt::Debug for TryHalfMatchesIter<'h, F> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TryHalfMatchesIter") + .field("it", &self.it) + .field("finder", &"") + .finish() + } +} + +/// An iterator over all non-overlapping half matches for an infallible search. +/// +/// The iterator yields a [`HalfMatch`] value until no more matches could be +/// found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_half_matches_iter`] and +/// then calling [`TryHalfMatchesIter::infallible`]. +#[derive(Debug)] +pub struct HalfMatchesIter<'h, F>(TryHalfMatchesIter<'h, F>); + +impl<'h, F> HalfMatchesIter<'h, F> { + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.0.it.input() + } +} + +impl<'h, F> Iterator for HalfMatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = HalfMatch; + + #[inline] + fn next(&mut self) -> Option { + match self.0.next()? { + Ok(m) => Some(m), + Err(err) => panic!( + "unexpected regex half find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } +} + +/// An iterator over all non-overlapping matches for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_matches_iter`]. +pub struct TryMatchesIter<'h, F> { + it: Searcher<'h>, + finder: F, +} + +impl<'h, F> TryMatchesIter<'h, F> { + /// Return an infallible version of this iterator. + /// + /// Any item yielded that corresponds to an error results in a panic. This + /// is useful if your underlying regex engine is configured in a way that + /// it is guaranteed to never return an error. + pub fn infallible(self) -> MatchesIter<'h, F> { + MatchesIter(self) + } + + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.it.input() + } +} + +impl<'h, F> Iterator for TryMatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + self.it.try_advance(&mut self.finder).transpose() + } +} + +impl<'h, F> core::fmt::Debug for TryMatchesIter<'h, F> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TryMatchesIter") + .field("it", &self.it) + .field("finder", &"") + .finish() + } +} + +/// An iterator over all non-overlapping matches for an infallible search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_matches_iter`] and +/// then calling [`TryMatchesIter::infallible`]. +#[derive(Debug)] +pub struct MatchesIter<'h, F>(TryMatchesIter<'h, F>); + +impl<'h, F> MatchesIter<'h, F> { + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.0.it.input() + } +} + +impl<'h, F> Iterator for MatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + match self.0.next()? { + Ok(m) => Some(m), + Err(err) => panic!( + "unexpected regex find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } +} + +/// An iterator over all non-overlapping captures for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_captures_iter`]. +#[cfg(feature = "alloc")] +pub struct TryCapturesIter<'h, F> { + it: Searcher<'h>, + caps: Captures, + finder: F, +} + +#[cfg(feature = "alloc")] +impl<'h, F> TryCapturesIter<'h, F> { + /// Return an infallible version of this iterator. + /// + /// Any item yielded that corresponds to an error results in a panic. This + /// is useful if your underlying regex engine is configured in a way that + /// it is guaranteed to never return an error. + pub fn infallible(self) -> CapturesIter<'h, F> { + CapturesIter(self) + } +} + +#[cfg(feature = "alloc")] +impl<'h, F> Iterator for TryCapturesIter<'h, F> +where + F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>, +{ + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + let TryCapturesIter { ref mut it, ref mut caps, ref mut finder } = + *self; + let result = it + .try_advance(|input| { + (finder)(input, caps)?; + Ok(caps.get_match()) + }) + .transpose()?; + match result { + Ok(_) => Some(Ok(caps.clone())), + Err(err) => Some(Err(err)), + } + } +} + +#[cfg(feature = "alloc")] +impl<'h, F> core::fmt::Debug for TryCapturesIter<'h, F> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TryCapturesIter") + .field("it", &self.it) + .field("caps", &self.caps) + .field("finder", &"") + .finish() + } +} + +/// An iterator over all non-overlapping captures for an infallible search. +/// +/// The iterator yields a [`Captures`] value until no more matches could be +/// found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_captures_iter`] and then +/// calling [`TryCapturesIter::infallible`]. +#[cfg(feature = "alloc")] +#[derive(Debug)] +pub struct CapturesIter<'h, F>(TryCapturesIter<'h, F>); + +#[cfg(feature = "alloc")] +impl<'h, F> Iterator for CapturesIter<'h, F> +where + F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>, +{ + type Item = Captures; + + #[inline] + fn next(&mut self) -> Option { + match self.0.next()? { + Ok(m) => Some(m), + Err(err) => panic!( + "unexpected regex captures error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } +} diff --git a/vendor/regex-automata/src/util/lazy.rs b/vendor/regex-automata/src/util/lazy.rs new file mode 100644 index 0000000..0d0b4fb --- /dev/null +++ b/vendor/regex-automata/src/util/lazy.rs @@ -0,0 +1,461 @@ +/*! +A lazily initialized value for safe sharing between threads. + +The principal type in this module is `Lazy`, which makes it easy to construct +values that are shared safely across multiple threads simultaneously. +*/ + +use core::fmt; + +/// A lazily initialized value that implements `Deref` for `T`. +/// +/// A `Lazy` takes an initialization function and permits callers from any +/// thread to access the result of that initialization function in a safe +/// manner. In effect, this permits one-time initialization of global resources +/// in a (possibly) multi-threaded program. +/// +/// This type and its functionality are available even when neither the `alloc` +/// nor the `std` features are enabled. In exchange, a `Lazy` does **not** +/// guarantee that the given `create` function is called at most once. It +/// might be called multiple times. Moreover, a call to `Lazy::get` (either +/// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T` +/// is available. +/// +/// This is very similar to `lazy_static` or `once_cell`, except it doesn't +/// guarantee that the initialization function will be run once and it works +/// in no-alloc no-std environments. With that said, if you need stronger +/// guarantees or a more flexible API, then it is recommended to use either +/// `lazy_static` or `once_cell`. +/// +/// # Warning: may use a spin lock +/// +/// When this crate is compiled _without_ the `alloc` feature, then this type +/// may used a spin lock internally. This can have subtle effects that may +/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more +/// thorough treatment of this topic. +/// +/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html +/// +/// # Example +/// +/// This type is useful for creating regexes once, and then using them from +/// multiple threads simultaneously without worrying about synchronization. +/// +/// ``` +/// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match}; +/// +/// static RE: Lazy = Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap()); +/// +/// let expected = Some(Match::must(0, 3..14)); +/// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz")); +/// ``` +pub struct Lazy T>(lazy::Lazy); + +impl Lazy { + /// Create a new `Lazy` value that is initialized via the given function. + /// + /// The `T` type is automatically inferred from the return type of the + /// `create` function given. + pub const fn new(create: F) -> Lazy { + Lazy(lazy::Lazy::new(create)) + } +} + +impl T> Lazy { + /// Return a reference to the lazily initialized value. + /// + /// This routine may block if another thread is initializing a `T`. + /// + /// Note that given a `x` which has type `Lazy`, this must be called via + /// `Lazy::get(x)` and not `x.get()`. This routine is defined this way + /// because `Lazy` impls `Deref` with a target of `T`. + /// + /// # Panics + /// + /// This panics if the `create` function inside this lazy value panics. + /// If the panic occurred in another thread, then this routine _may_ also + /// panic (but is not guaranteed to do so). + pub fn get(this: &Lazy) -> &T { + this.0.get() + } +} + +impl T> core::ops::Deref for Lazy { + type Target = T; + + fn deref(&self) -> &T { + Lazy::get(self) + } +} + +impl T> fmt::Debug for Lazy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +#[cfg(feature = "alloc")] +mod lazy { + use core::{ + fmt, + marker::PhantomData, + sync::atomic::{AtomicPtr, Ordering}, + }; + + use alloc::boxed::Box; + + /// A non-std lazy initialized value. + /// + /// This might run the initialization function more than once, but will + /// never block. + /// + /// I wish I could get these semantics into the non-alloc non-std Lazy + /// type below, but I'm not sure how to do it. If you can do an alloc, + /// then the implementation becomes very simple if you don't care about + /// redundant work precisely because a pointer can be atomically swapped. + /// + /// Perhaps making this approach work in the non-alloc non-std case + /// requires asking the caller for a pointer? It would make the API less + /// convenient I think. + pub(super) struct Lazy { + data: AtomicPtr, + create: F, + // This indicates to the compiler that this type can drop T. It's not + // totally clear how the absence of this marker could lead to trouble, + // but putting here doesn't have any downsides so we hedge until somone + // can from the Unsafe Working Group can tell us definitively that we + // don't need it. + // + // See: https://github.com/BurntSushi/regex-automata/issues/30 + owned: PhantomData>, + } + + // SAFETY: So long as T and &T (and F and &F) can themselves be safely + // shared among threads, so to can a Lazy. Namely, the Lazy API only + // permits accessing a &T and initialization is free of data races. So if T + // is thread safe, then so to is Lazy. + // + // We specifically require that T: Send in order for Lazy to be Sync. + // Without that requirement, it's possible to send a T from one thread to + // another via Lazy's destructor. + // + // It's not clear whether we need F: Send+Sync for Lazy to be Sync. But + // we're conservative for now and keep both. + unsafe impl Sync for Lazy {} + + impl Lazy { + /// Create a new alloc but non-std lazy value that is racily + /// initialized. That is, the 'create' function may be called more than + /// once. + pub(super) const fn new(create: F) -> Lazy { + Lazy { + data: AtomicPtr::new(core::ptr::null_mut()), + create, + owned: PhantomData, + } + } + } + + impl T> Lazy { + /// Get the underlying lazy value. If it hasn't been initialized + /// yet, then always attempt to initialize it (even if some other + /// thread is initializing it) and atomically attach it to this lazy + /// value before returning it. + pub(super) fn get(&self) -> &T { + if let Some(data) = self.poll() { + return data; + } + let data = (self.create)(); + let mut ptr = Box::into_raw(Box::new(data)); + // We attempt to stuff our initialized value into our atomic + // pointer. Upon success, we don't need to do anything. But if + // someone else beat us to the punch, then we need to make sure + // our newly created value is dropped. + let result = self.data.compare_exchange( + core::ptr::null_mut(), + ptr, + Ordering::AcqRel, + Ordering::Acquire, + ); + if let Err(old) = result { + // SAFETY: We created 'ptr' via Box::into_raw above, so turning + // it back into a Box via from_raw is safe. + drop(unsafe { Box::from_raw(ptr) }); + ptr = old; + } + // SAFETY: We just set the pointer above to a non-null value, even + // in the error case, and set it to a fully initialized value + // returned by 'create'. + unsafe { &*ptr } + } + + /// If this lazy value has been initialized successfully, then return + /// that value. Otherwise return None immediately. This never attempts + /// to run initialization itself. + fn poll(&self) -> Option<&T> { + let ptr = self.data.load(Ordering::Acquire); + if ptr.is_null() { + return None; + } + // SAFETY: We just checked that the pointer is not null. Since it's + // not null, it must have been fully initialized by 'get' at some + // point. + Some(unsafe { &*ptr }) + } + } + + impl T> fmt::Debug for Lazy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Lazy").field("data", &self.poll()).finish() + } + } + + impl Drop for Lazy { + fn drop(&mut self) { + let ptr = *self.data.get_mut(); + if !ptr.is_null() { + // SAFETY: We just checked that 'ptr' is not null. And since + // we have exclusive access, there are no races to worry about. + drop(unsafe { Box::from_raw(ptr) }); + } + } + } +} + +#[cfg(not(feature = "alloc"))] +mod lazy { + use core::{ + cell::Cell, + fmt, + mem::MaybeUninit, + panic::{RefUnwindSafe, UnwindSafe}, + sync::atomic::{AtomicU8, Ordering}, + }; + + /// Our 'Lazy' value can be in one of three states: + /// + /// * INIT is where it starts, and also ends up back here if the + /// 'create' routine panics. + /// * BUSY is where it sits while initialization is running in exactly + /// one thread. + /// * DONE is where it sits after 'create' has completed and 'data' has + /// been fully initialized. + const LAZY_STATE_INIT: u8 = 0; + const LAZY_STATE_BUSY: u8 = 1; + const LAZY_STATE_DONE: u8 = 2; + + /// A non-alloc non-std lazy initialized value. + /// + /// This guarantees initialization only happens once, but uses a spinlock + /// to block in the case of simultaneous access. Blocking occurs so that + /// one thread waits while another thread initializes the value. + /// + /// I would much rather have the semantics of the 'alloc' Lazy type above. + /// Namely, that we might run the initialization function more than once, + /// but we never otherwise block. However, I don't know how to do that in + /// a non-alloc non-std context. + pub(super) struct Lazy { + state: AtomicU8, + create: Cell>, + data: Cell>, + } + + // SAFETY: So long as T and &T (and F and &F) can themselves be safely + // shared among threads, so to can a Lazy. Namely, the Lazy API only + // permits accessing a &T and initialization is free of data races. So if T + // is thread safe, then so to is Lazy. + unsafe impl Sync for Lazy {} + // A reference to a Lazy is unwind safe because we specifically take + // precautions to poison all accesses to a Lazy if the caller-provided + // 'create' function panics. + impl RefUnwindSafe + for Lazy + { + } + + impl Lazy { + /// Create a new non-alloc non-std lazy value that is initialized + /// exactly once on first use using the given function. + pub(super) const fn new(create: F) -> Lazy { + Lazy { + state: AtomicU8::new(LAZY_STATE_INIT), + create: Cell::new(Some(create)), + data: Cell::new(MaybeUninit::uninit()), + } + } + } + + impl T> Lazy { + /// Get the underlying lazy value. If it isn't been initialized + /// yet, then either initialize it or block until some other thread + /// initializes it. If the 'create' function given to Lazy::new panics + /// (even in another thread), then this panics too. + pub(super) fn get(&self) -> &T { + // This is effectively a spinlock. We loop until we enter a DONE + // state, and if possible, initialize it ourselves. The only way + // we exit the loop is if 'create' panics, we initialize 'data' or + // some other thread initializes 'data'. + // + // Yes, I have read spinlocks considered harmful[1]. And that + // article is why this spinlock is only active when 'alloc' isn't + // enabled. I did this because I don't think there is really + // another choice without 'alloc', other than not providing this at + // all. But I think that's a big bummer. + // + // [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html + while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE { + // Check if we're the first ones to get here. If so, we'll be + // the ones who initialize. + let result = self.state.compare_exchange( + LAZY_STATE_INIT, + LAZY_STATE_BUSY, + Ordering::AcqRel, + Ordering::Acquire, + ); + // This means we saw the INIT state and nobody else can. So we + // must take responsibility for initializing. And by virtue of + // observing INIT, we have also told anyone else trying to + // get here that we are BUSY. If someone else sees BUSY, then + // they will spin until we finish initialization. + if let Ok(_) = result { + // Since we are guaranteed to be the only ones here, we + // know that 'create' is there... Unless someone else got + // here before us and 'create' panicked. In which case, + // 'self.create' is now 'None' and we forward the panic + // to the caller. (i.e., We implement poisoning.) + // + // SAFETY: Our use of 'self.state' guarantees that we are + // the only thread executing this line, and thus there are + // no races. + let create = unsafe { + (*self.create.as_ptr()).take().expect( + "Lazy's create function panicked, \ + preventing initialization, + poisoning current thread", + ) + }; + let guard = Guard { state: &self.state }; + // SAFETY: Our use of 'self.state' guarantees that we are + // the only thread executing this line, and thus there are + // no races. + unsafe { + (*self.data.as_ptr()).as_mut_ptr().write(create()); + } + // All is well. 'self.create' ran successfully, so we + // forget the guard. + core::mem::forget(guard); + // Everything is initialized, so we can declare success. + self.state.store(LAZY_STATE_DONE, Ordering::Release); + break; + } + core::hint::spin_loop(); + } + // We only get here if data is fully initialized, and thus poll + // will always return something. + self.poll().unwrap() + } + + /// If this lazy value has been initialized successfully, then return + /// that value. Otherwise return None immediately. This never blocks. + fn poll(&self) -> Option<&T> { + if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE { + // SAFETY: The DONE state only occurs when data has been fully + // initialized. + Some(unsafe { &*(*self.data.as_ptr()).as_ptr() }) + } else { + None + } + } + } + + impl T> fmt::Debug for Lazy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Lazy") + .field("state", &self.state.load(Ordering::Acquire)) + .field("create", &"") + .field("data", &self.poll()) + .finish() + } + } + + impl Drop for Lazy { + fn drop(&mut self) { + if *self.state.get_mut() == LAZY_STATE_DONE { + // SAFETY: state is DONE if and only if data has been fully + // initialized. At which point, it is safe to drop. + unsafe { + self.data.get_mut().assume_init_drop(); + } + } + } + } + + /// A guard that will reset a Lazy's state back to INIT when dropped. The + /// idea here is to 'forget' this guard on success. On failure (when a + /// panic occurs), the Drop impl runs and causes all in-progress and future + /// 'get' calls to panic. Without this guard, all in-progress and future + /// 'get' calls would spin forever. Crashing is much better than getting + /// stuck in an infinite loop. + struct Guard<'a> { + state: &'a AtomicU8, + } + + impl<'a> Drop for Guard<'a> { + fn drop(&mut self) { + // We force ourselves back into an INIT state. This will in turn + // cause any future 'get' calls to attempt calling 'self.create' + // again which will in turn panic because 'self.create' will now + // be 'None'. + self.state.store(LAZY_STATE_INIT, Ordering::Release); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_send() {} + fn assert_sync() {} + fn assert_unwind() {} + fn assert_refunwind() {} + + #[test] + fn oibits() { + assert_send::>(); + assert_sync::>(); + assert_unwind::>(); + assert_refunwind::>(); + } + + // This is a regression test because we used to rely on the inferred Sync + // impl for the Lazy type defined above (for 'alloc' mode). In the + // inferred impl, it only requires that T: Sync for Lazy: Sync. But + // if we have that, we can actually make use of the fact that Lazy drops + // T to create a value on one thread and drop it on another. This *should* + // require T: Send, but our missing bounds before let it sneak by. + // + // Basically, this test should not compile, so we... comment it out. We + // don't have a great way of testing compile-fail tests right now. + // + // See: https://github.com/BurntSushi/regex-automata/issues/30 + /* + #[test] + fn sync_not_send() { + #[allow(dead_code)] + fn inner() { + let lazy = Lazy::new(move || T::default()); + std::thread::scope(|scope| { + scope.spawn(|| { + Lazy::get(&lazy); // We create T in this thread + }); + }); + // And drop in this thread. + drop(lazy); + // So we have send a !Send type over threads. (with some more + // legwork, its possible to even sneak the value out of drop + // through thread local) + } + } + */ +} diff --git a/vendor/regex-automata/src/util/look.rs b/vendor/regex-automata/src/util/look.rs new file mode 100644 index 0000000..73e51c0 --- /dev/null +++ b/vendor/regex-automata/src/util/look.rs @@ -0,0 +1,2547 @@ +/*! +Types and routines for working with look-around assertions. + +This module principally defines two types: + +* [`Look`] enumerates all of the assertions supported by this crate. +* [`LookSet`] provides a way to efficiently store a set of [`Look`] values. +* [`LookMatcher`] provides routines for checking whether a `Look` or a +`LookSet` matches at a particular position in a haystack. +*/ + +// LAMENTATION: Sadly, a lot of the API of `Look` and `LookSet` were basically +// copied verbatim from the regex-syntax crate. I would have no problems using +// the regex-syntax types and defining the matching routines (only found +// in this crate) as free functions, except the `Look` and `LookSet` types +// are used in lots of places. Including in places we expect to work when +// regex-syntax is *not* enabled, such as in the definition of the NFA itself. +// +// Thankfully the code we copy is pretty simple and there isn't much of it. +// Otherwise, the rest of this module deals with *matching* the assertions, +// which is not something that regex-syntax handles. + +use crate::util::{escape::DebugByte, utf8}; + +/// A look-around assertion. +/// +/// An assertion matches at a position between characters in a haystack. +/// Namely, it does not actually "consume" any input as most parts of a regular +/// expression do. Assertions are a way of stating that some property must be +/// true at a particular point during matching. +/// +/// For example, `(?m)^[a-z]+$` is a pattern that: +/// +/// * Scans the haystack for a position at which `(?m:^)` is satisfied. That +/// occurs at either the beginning of the haystack, or immediately following +/// a `\n` character. +/// * Looks for one or more occurrences of `[a-z]`. +/// * Once `[a-z]+` has matched as much as it can, an overall match is only +/// reported when `[a-z]+` stops just before a `\n`. +/// +/// So in this case, `abc` and `\nabc\n` match, but `\nabc1\n` does not. +/// +/// Assertions are also called "look-around," "look-behind" and "look-ahead." +/// Specifically, some assertions are look-behind (like `^`), other assertions +/// are look-ahead (like `$`) and yet other assertions are both look-ahead and +/// look-behind (like `\b`). +/// +/// # Assertions in an NFA +/// +/// An assertion in a [`thompson::NFA`](crate::nfa::thompson::NFA) can be +/// thought of as a conditional epsilon transition. That is, a matching engine +/// like the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) only permits +/// moving through conditional epsilon transitions when their condition +/// is satisfied at whatever position the `PikeVM` is currently at in the +/// haystack. +/// +/// How assertions are handled in a `DFA` is trickier, since a DFA does not +/// have epsilon transitions at all. In this case, they are compiled into the +/// automaton itself, at the expense of more states than what would be required +/// without an assertion. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Look { + /// Match the beginning of text. Specifically, this matches at the starting + /// position of the input. + Start = 1 << 0, + /// Match the end of text. Specifically, this matches at the ending + /// position of the input. + End = 1 << 1, + /// Match the beginning of a line or the beginning of text. Specifically, + /// this matches at the starting position of the input, or at the position + /// immediately following a `\n` character. + StartLF = 1 << 2, + /// Match the end of a line or the end of text. Specifically, this matches + /// at the end position of the input, or at the position immediately + /// preceding a `\n` character. + EndLF = 1 << 3, + /// Match the beginning of a line or the beginning of text. Specifically, + /// this matches at the starting position of the input, or at the position + /// immediately following either a `\r` or `\n` character, but never after + /// a `\r` when a `\n` follows. + StartCRLF = 1 << 4, + /// Match the end of a line or the end of text. Specifically, this matches + /// at the end position of the input, or at the position immediately + /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r` + /// precedes it. + EndCRLF = 1 << 5, + /// Match an ASCII-only word boundary. That is, this matches a position + /// where the left adjacent character and right adjacent character + /// correspond to a word and non-word or a non-word and word character. + WordAscii = 1 << 6, + /// Match an ASCII-only negation of a word boundary. + WordAsciiNegate = 1 << 7, + /// Match a Unicode-aware word boundary. That is, this matches a position + /// where the left adjacent character and right adjacent character + /// correspond to a word and non-word or a non-word and word character. + WordUnicode = 1 << 8, + /// Match a Unicode-aware negation of a word boundary. + WordUnicodeNegate = 1 << 9, + /// Match the start of an ASCII-only word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartAscii = 1 << 10, + /// Match the end of an ASCII-only word boundary. That is, this matches + /// a position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndAscii = 1 << 11, + /// Match the start of a Unicode word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartUnicode = 1 << 12, + /// Match the end of a Unicode word boundary. That is, this matches a + /// position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndUnicode = 1 << 13, + /// Match the start half of an ASCII-only word boundary. That is, this + /// matches a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfAscii = 1 << 14, + /// Match the end half of an ASCII-only word boundary. That is, this + /// matches a position at either the end of the haystack or where the + /// following character is not a word character. + WordEndHalfAscii = 1 << 15, + /// Match the start half of a Unicode word boundary. That is, this matches + /// a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfUnicode = 1 << 16, + /// Match the end half of a Unicode word boundary. That is, this matches + /// a position at either the end of the haystack or where the following + /// character is not a word character. + WordEndHalfUnicode = 1 << 17, +} + +impl Look { + /// Flip the look-around assertion to its equivalent for reverse searches. + /// For example, `StartLF` gets translated to `EndLF`. + /// + /// Some assertions, such as `WordUnicode`, remain the same since they + /// match the same positions regardless of the direction of the search. + #[inline] + pub const fn reversed(self) -> Look { + match self { + Look::Start => Look::End, + Look::End => Look::Start, + Look::StartLF => Look::EndLF, + Look::EndLF => Look::StartLF, + Look::StartCRLF => Look::EndCRLF, + Look::EndCRLF => Look::StartCRLF, + Look::WordAscii => Look::WordAscii, + Look::WordAsciiNegate => Look::WordAsciiNegate, + Look::WordUnicode => Look::WordUnicode, + Look::WordUnicodeNegate => Look::WordUnicodeNegate, + Look::WordStartAscii => Look::WordEndAscii, + Look::WordEndAscii => Look::WordStartAscii, + Look::WordStartUnicode => Look::WordEndUnicode, + Look::WordEndUnicode => Look::WordStartUnicode, + Look::WordStartHalfAscii => Look::WordEndHalfAscii, + Look::WordEndHalfAscii => Look::WordStartHalfAscii, + Look::WordStartHalfUnicode => Look::WordEndHalfUnicode, + Look::WordEndHalfUnicode => Look::WordStartHalfUnicode, + } + } + + /// Return the underlying representation of this look-around enumeration + /// as an integer. Giving the return value to the [`Look::from_repr`] + /// constructor is guaranteed to return the same look-around variant that + /// one started with within a semver compatible release of this crate. + #[inline] + pub const fn as_repr(self) -> u32 { + // AFAIK, 'as' is the only way to zero-cost convert an int enum to an + // actual int. + self as u32 + } + + /// Given the underlying representation of a `Look` value, return the + /// corresponding `Look` value if the representation is valid. Otherwise + /// `None` is returned. + #[inline] + pub const fn from_repr(repr: u32) -> Option { + match repr { + 0b00_0000_0000_0000_0001 => Some(Look::Start), + 0b00_0000_0000_0000_0010 => Some(Look::End), + 0b00_0000_0000_0000_0100 => Some(Look::StartLF), + 0b00_0000_0000_0000_1000 => Some(Look::EndLF), + 0b00_0000_0000_0001_0000 => Some(Look::StartCRLF), + 0b00_0000_0000_0010_0000 => Some(Look::EndCRLF), + 0b00_0000_0000_0100_0000 => Some(Look::WordAscii), + 0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate), + 0b00_0000_0001_0000_0000 => Some(Look::WordUnicode), + 0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate), + 0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii), + 0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii), + 0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode), + 0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode), + 0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii), + 0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii), + 0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode), + 0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode), + _ => None, + } + } + + /// Returns a convenient single codepoint representation of this + /// look-around assertion. Each assertion is guaranteed to be represented + /// by a distinct character. + /// + /// This is useful for succinctly representing a look-around assertion in + /// human friendly but succinct output intended for a programmer working on + /// regex internals. + #[inline] + pub const fn as_char(self) -> char { + match self { + Look::Start => 'A', + Look::End => 'z', + Look::StartLF => '^', + Look::EndLF => '$', + Look::StartCRLF => 'r', + Look::EndCRLF => 'R', + Look::WordAscii => 'b', + Look::WordAsciiNegate => 'B', + Look::WordUnicode => '𝛃', + Look::WordUnicodeNegate => '𝚩', + Look::WordStartAscii => '<', + Look::WordEndAscii => '>', + Look::WordStartUnicode => '〈', + Look::WordEndUnicode => '〉', + Look::WordStartHalfAscii => '◁', + Look::WordEndHalfAscii => '▷', + Look::WordStartHalfUnicode => '◀', + Look::WordEndHalfUnicode => '▶', + } + } +} + +/// LookSet is a memory-efficient set of look-around assertions. +/// +/// This is useful for efficiently tracking look-around assertions. For +/// example, a [`thompson::NFA`](crate::nfa::thompson::NFA) provides properties +/// that return `LookSet`s. +#[derive(Clone, Copy, Default, Eq, PartialEq)] +pub struct LookSet { + /// The underlying representation this set is exposed to make it possible + /// to store it somewhere efficiently. The representation is that + /// of a bitset, where each assertion occupies bit `i` where + /// `i = Look::as_repr()`. + /// + /// Note that users of this internal representation must permit the full + /// range of `u16` values to be represented. For example, even if the + /// current implementation only makes use of the 10 least significant bits, + /// it may use more bits in a future semver compatible release. + pub bits: u32, +} + +impl LookSet { + /// Create an empty set of look-around assertions. + #[inline] + pub fn empty() -> LookSet { + LookSet { bits: 0 } + } + + /// Create a full set of look-around assertions. + /// + /// This set contains all possible look-around assertions. + #[inline] + pub fn full() -> LookSet { + LookSet { bits: !0 } + } + + /// Create a look-around set containing the look-around assertion given. + /// + /// This is a convenience routine for creating an empty set and inserting + /// one look-around assertions. + #[inline] + pub fn singleton(look: Look) -> LookSet { + LookSet::empty().insert(look) + } + + /// Returns the total number of look-around assertions in this set. + #[inline] + pub fn len(self) -> usize { + // OK because max value always fits in a u8, which in turn always + // fits in a usize, regardless of target. + usize::try_from(self.bits.count_ones()).unwrap() + } + + /// Returns true if and only if this set is empty. + #[inline] + pub fn is_empty(self) -> bool { + self.len() == 0 + } + + /// Returns true if and only if the given look-around assertion is in this + /// set. + #[inline] + pub fn contains(self, look: Look) -> bool { + self.bits & look.as_repr() != 0 + } + + /// Returns true if and only if this set contains any anchor assertions. + /// This includes both "start/end of haystack" and "start/end of line." + #[inline] + pub fn contains_anchor(&self) -> bool { + self.contains_anchor_haystack() || self.contains_anchor_line() + } + + /// Returns true if and only if this set contains any "start/end of + /// haystack" anchors. This doesn't include "start/end of line" anchors. + #[inline] + pub fn contains_anchor_haystack(&self) -> bool { + self.contains(Look::Start) || self.contains(Look::End) + } + + /// Returns true if and only if this set contains any "start/end of line" + /// anchors. This doesn't include "start/end of haystack" anchors. This + /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors. + #[inline] + pub fn contains_anchor_line(&self) -> bool { + self.contains(Look::StartLF) + || self.contains(Look::EndLF) + || self.contains(Look::StartCRLF) + || self.contains(Look::EndCRLF) + } + + /// Returns true if and only if this set contains any "start/end of line" + /// anchors that only treat `\n` as line terminators. This does not include + /// haystack anchors or CRLF aware line anchors. + #[inline] + pub fn contains_anchor_lf(&self) -> bool { + self.contains(Look::StartLF) || self.contains(Look::EndLF) + } + + /// Returns true if and only if this set contains any "start/end of line" + /// anchors that are CRLF-aware. This doesn't include "start/end of + /// haystack" or "start/end of line-feed" anchors. + #[inline] + pub fn contains_anchor_crlf(&self) -> bool { + self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF) + } + + /// Returns true if and only if this set contains any word boundary or + /// negated word boundary assertions. This include both Unicode and ASCII + /// word boundaries. + #[inline] + pub fn contains_word(self) -> bool { + self.contains_word_unicode() || self.contains_word_ascii() + } + + /// Returns true if and only if this set contains any Unicode word boundary + /// or negated Unicode word boundary assertions. + #[inline] + pub fn contains_word_unicode(self) -> bool { + self.contains(Look::WordUnicode) + || self.contains(Look::WordUnicodeNegate) + || self.contains(Look::WordStartUnicode) + || self.contains(Look::WordEndUnicode) + || self.contains(Look::WordStartHalfUnicode) + || self.contains(Look::WordEndHalfUnicode) + } + + /// Returns true if and only if this set contains any ASCII word boundary + /// or negated ASCII word boundary assertions. + #[inline] + pub fn contains_word_ascii(self) -> bool { + self.contains(Look::WordAscii) + || self.contains(Look::WordAsciiNegate) + || self.contains(Look::WordStartAscii) + || self.contains(Look::WordEndAscii) + || self.contains(Look::WordStartHalfAscii) + || self.contains(Look::WordEndHalfAscii) + } + + /// Returns an iterator over all of the look-around assertions in this set. + #[inline] + pub fn iter(self) -> LookSetIter { + LookSetIter { set: self } + } + + /// Return a new set that is equivalent to the original, but with the given + /// assertion added to it. If the assertion is already in the set, then the + /// returned set is equivalent to the original. + #[inline] + pub fn insert(self, look: Look) -> LookSet { + LookSet { bits: self.bits | look.as_repr() } + } + + /// Updates this set in place with the result of inserting the given + /// assertion into this set. + #[inline] + pub fn set_insert(&mut self, look: Look) { + *self = self.insert(look); + } + + /// Return a new set that is equivalent to the original, but with the given + /// assertion removed from it. If the assertion is not in the set, then the + /// returned set is equivalent to the original. + #[inline] + pub fn remove(self, look: Look) -> LookSet { + LookSet { bits: self.bits & !look.as_repr() } + } + + /// Updates this set in place with the result of removing the given + /// assertion from this set. + #[inline] + pub fn set_remove(&mut self, look: Look) { + *self = self.remove(look); + } + + /// Returns a new set that is the result of subtracting the given set from + /// this set. + #[inline] + pub fn subtract(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits & !other.bits } + } + + /// Updates this set in place with the result of subtracting the given set + /// from this set. + #[inline] + pub fn set_subtract(&mut self, other: LookSet) { + *self = self.subtract(other); + } + + /// Returns a new set that is the union of this and the one given. + #[inline] + pub fn union(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits | other.bits } + } + + /// Updates this set in place with the result of unioning it with the one + /// given. + #[inline] + pub fn set_union(&mut self, other: LookSet) { + *self = self.union(other); + } + + /// Returns a new set that is the intersection of this and the one given. + #[inline] + pub fn intersect(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits & other.bits } + } + + /// Updates this set in place with the result of intersecting it with the + /// one given. + #[inline] + pub fn set_intersect(&mut self, other: LookSet) { + *self = self.intersect(other); + } + + /// Return a `LookSet` from the slice given as a native endian 32-bit + /// integer. + /// + /// # Panics + /// + /// This panics if `slice.len() < 4`. + #[inline] + pub fn read_repr(slice: &[u8]) -> LookSet { + let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap()); + LookSet { bits } + } + + /// Write a `LookSet` as a native endian 32-bit integer to the beginning + /// of the slice given. + /// + /// # Panics + /// + /// This panics if `slice.len() < 4`. + #[inline] + pub fn write_repr(self, slice: &mut [u8]) { + let raw = self.bits.to_ne_bytes(); + slice[0] = raw[0]; + slice[1] = raw[1]; + slice[2] = raw[2]; + slice[3] = raw[3]; + } + + /// Checks that all assertions in this set can be matched. + /// + /// Some assertions, such as Unicode word boundaries, require optional (but + /// enabled by default) tables that may not be available. If there are + /// assertions in this set that require tables that are not available, then + /// this will return an error. + /// + /// Specifically, this returns an error when the the + /// `unicode-word-boundary` feature is _not_ enabled _and_ this set + /// contains a Unicode word boundary assertion. + /// + /// It can be useful to use this on the result of + /// [`NFA::look_set_any`](crate::nfa::thompson::NFA::look_set_any) + /// when building a matcher engine to ensure methods like + /// [`LookMatcher::matches_set`] do not panic at search time. + pub fn available(self) -> Result<(), UnicodeWordBoundaryError> { + if self.contains_word_unicode() { + UnicodeWordBoundaryError::check()?; + } + Ok(()) + } +} + +impl core::fmt::Debug for LookSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_empty() { + return write!(f, "∅"); + } + for look in self.iter() { + write!(f, "{}", look.as_char())?; + } + Ok(()) + } +} + +/// An iterator over all look-around assertions in a [`LookSet`]. +/// +/// This iterator is created by [`LookSet::iter`]. +#[derive(Clone, Debug)] +pub struct LookSetIter { + set: LookSet, +} + +impl Iterator for LookSetIter { + type Item = Look; + + #[inline] + fn next(&mut self) -> Option { + if self.set.is_empty() { + return None; + } + // We'll never have more than u8::MAX distinct look-around assertions, + // so 'bit' will always fit into a u16. + let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap(); + let look = Look::from_repr(1 << bit)?; + self.set = self.set.remove(look); + Some(look) + } +} + +/// A matcher for look-around assertions. +/// +/// This matcher permits configuring aspects of how look-around assertions are +/// matched. +/// +/// # Example +/// +/// A `LookMatcher` can change the line terminator used for matching multi-line +/// anchors such as `(?m:^)` and `(?m:$)`. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{self, pikevm::PikeVM}, +/// util::look::LookMatcher, +/// Match, Input, +/// }; +/// +/// let mut lookm = LookMatcher::new(); +/// lookm.set_line_terminator(b'\x00'); +/// +/// let re = PikeVM::builder() +/// .thompson(thompson::Config::new().look_matcher(lookm)) +/// .build(r"(?m)^[a-z]+$")?; +/// let mut cache = re.create_cache(); +/// +/// // Multi-line assertions now use NUL as a terminator. +/// assert_eq!( +/// Some(Match::must(0, 1..4)), +/// re.find(&mut cache, b"\x00abc\x00"), +/// ); +/// // ... and \n is no longer recognized as a terminator. +/// assert_eq!( +/// None, +/// re.find(&mut cache, b"\nabc\n"), +/// ); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct LookMatcher { + lineterm: DebugByte, +} + +impl LookMatcher { + /// Creates a new default matcher for look-around assertions. + pub fn new() -> LookMatcher { + LookMatcher { lineterm: DebugByte(b'\n') } + } + + /// Sets the line terminator for use with `(?m:^)` and `(?m:$)`. + /// + /// Namely, instead of `^` matching after `\n` and `$` matching immediately + /// before a `\n`, this will cause it to match after and before the byte + /// given. + /// + /// It can occasionally be useful to use this to configure the line + /// terminator to the NUL byte when searching binary data. + /// + /// Note that this does not apply to CRLF-aware line anchors such as + /// `(?Rm:^)` and `(?Rm:$)`. CRLF-aware line anchors are hard-coded to + /// use `\r` and `\n`. + pub fn set_line_terminator(&mut self, byte: u8) -> &mut LookMatcher { + self.lineterm.0 = byte; + self + } + + /// Returns the line terminator that was configured for this matcher. + /// + /// If no line terminator was configured, then this returns `\n`. + /// + /// Note that the line terminator should only be used for matching `(?m:^)` + /// and `(?m:$)` assertions. It specifically should _not_ be used for + /// matching the CRLF aware assertions `(?Rm:^)` and `(?Rm:$)`. + pub fn get_line_terminator(&self) -> u8 { + self.lineterm.0 + } + + /// Returns true when the position `at` in `haystack` satisfies the given + /// look-around assertion. + /// + /// # Panics + /// + /// This panics when testing any Unicode word boundary assertion in this + /// set and when the Unicode word data is not available. Specifically, this + /// only occurs when the `unicode-word-boundary` feature is not enabled. + /// + /// Since it's generally expected that this routine is called inside of + /// a matching engine, callers should check the error condition when + /// building the matching engine. If there is a Unicode word boundary + /// in the matcher and the data isn't available, then the matcher should + /// fail to build. + /// + /// Callers can check the error condition with [`LookSet::available`]. + /// + /// This also may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn matches(&self, look: Look, haystack: &[u8], at: usize) -> bool { + self.matches_inline(look, haystack, at) + } + + /// Like `matches`, but forcefully inlined. + /// + /// # Panics + /// + /// This panics when testing any Unicode word boundary assertion in this + /// set and when the Unicode word data is not available. Specifically, this + /// only occurs when the `unicode-word-boundary` feature is not enabled. + /// + /// Since it's generally expected that this routine is called inside of + /// a matching engine, callers should check the error condition when + /// building the matching engine. If there is a Unicode word boundary + /// in the matcher and the data isn't available, then the matcher should + /// fail to build. + /// + /// Callers can check the error condition with [`LookSet::available`]. + /// + /// This also may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn matches_inline( + &self, + look: Look, + haystack: &[u8], + at: usize, + ) -> bool { + match look { + Look::Start => self.is_start(haystack, at), + Look::End => self.is_end(haystack, at), + Look::StartLF => self.is_start_lf(haystack, at), + Look::EndLF => self.is_end_lf(haystack, at), + Look::StartCRLF => self.is_start_crlf(haystack, at), + Look::EndCRLF => self.is_end_crlf(haystack, at), + Look::WordAscii => self.is_word_ascii(haystack, at), + Look::WordAsciiNegate => self.is_word_ascii_negate(haystack, at), + Look::WordUnicode => self.is_word_unicode(haystack, at).unwrap(), + Look::WordUnicodeNegate => { + self.is_word_unicode_negate(haystack, at).unwrap() + } + Look::WordStartAscii => self.is_word_start_ascii(haystack, at), + Look::WordEndAscii => self.is_word_end_ascii(haystack, at), + Look::WordStartUnicode => { + self.is_word_start_unicode(haystack, at).unwrap() + } + Look::WordEndUnicode => { + self.is_word_end_unicode(haystack, at).unwrap() + } + Look::WordStartHalfAscii => { + self.is_word_start_half_ascii(haystack, at) + } + Look::WordEndHalfAscii => { + self.is_word_end_half_ascii(haystack, at) + } + Look::WordStartHalfUnicode => { + self.is_word_start_half_unicode(haystack, at).unwrap() + } + Look::WordEndHalfUnicode => { + self.is_word_end_half_unicode(haystack, at).unwrap() + } + } + } + + /// Returns true when _all_ of the assertions in the given set match at the + /// given position in the haystack. + /// + /// # Panics + /// + /// This panics when testing any Unicode word boundary assertion in this + /// set and when the Unicode word data is not available. Specifically, this + /// only occurs when the `unicode-word-boundary` feature is not enabled. + /// + /// Since it's generally expected that this routine is called inside of + /// a matching engine, callers should check the error condition when + /// building the matching engine. If there is a Unicode word boundary + /// in the matcher and the data isn't available, then the matcher should + /// fail to build. + /// + /// Callers can check the error condition with [`LookSet::available`]. + /// + /// This also may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn matches_set( + &self, + set: LookSet, + haystack: &[u8], + at: usize, + ) -> bool { + self.matches_set_inline(set, haystack, at) + } + + /// Like `LookSet::matches`, but forcefully inlined for perf. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn matches_set_inline( + &self, + set: LookSet, + haystack: &[u8], + at: usize, + ) -> bool { + // This used to luse LookSet::iter with Look::matches on each element, + // but that proved to be quite diastrous for perf. The manual "if + // the set has this assertion, check it" turns out to be quite a bit + // faster. + if set.contains(Look::Start) { + if !self.is_start(haystack, at) { + return false; + } + } + if set.contains(Look::End) { + if !self.is_end(haystack, at) { + return false; + } + } + if set.contains(Look::StartLF) { + if !self.is_start_lf(haystack, at) { + return false; + } + } + if set.contains(Look::EndLF) { + if !self.is_end_lf(haystack, at) { + return false; + } + } + if set.contains(Look::StartCRLF) { + if !self.is_start_crlf(haystack, at) { + return false; + } + } + if set.contains(Look::EndCRLF) { + if !self.is_end_crlf(haystack, at) { + return false; + } + } + if set.contains(Look::WordAscii) { + if !self.is_word_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordAsciiNegate) { + if !self.is_word_ascii_negate(haystack, at) { + return false; + } + } + if set.contains(Look::WordUnicode) { + if !self.is_word_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordUnicodeNegate) { + if !self.is_word_unicode_negate(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordStartAscii) { + if !self.is_word_start_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordEndAscii) { + if !self.is_word_end_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordStartUnicode) { + if !self.is_word_start_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordEndUnicode) { + if !self.is_word_end_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordStartHalfAscii) { + if !self.is_word_start_half_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordEndHalfAscii) { + if !self.is_word_end_half_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordStartHalfUnicode) { + if !self.is_word_start_half_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordEndHalfUnicode) { + if !self.is_word_end_half_unicode(haystack, at).unwrap() { + return false; + } + } + true + } + + /// Split up the given byte classes into equivalence classes in a way that + /// is consistent with this look-around assertion. + #[cfg(feature = "alloc")] + pub(crate) fn add_to_byteset( + &self, + look: Look, + set: &mut crate::util::alphabet::ByteClassSet, + ) { + match look { + Look::Start | Look::End => {} + Look::StartLF | Look::EndLF => { + set.set_range(self.lineterm.0, self.lineterm.0); + } + Look::StartCRLF | Look::EndCRLF => { + set.set_range(b'\r', b'\r'); + set.set_range(b'\n', b'\n'); + } + Look::WordAscii + | Look::WordAsciiNegate + | Look::WordUnicode + | Look::WordUnicodeNegate + | Look::WordStartAscii + | Look::WordEndAscii + | Look::WordStartUnicode + | Look::WordEndUnicode + | Look::WordStartHalfAscii + | Look::WordEndHalfAscii + | Look::WordStartHalfUnicode + | Look::WordEndHalfUnicode => { + // We need to mark all ranges of bytes whose pairs result in + // evaluating \b differently. This isn't technically correct + // for Unicode word boundaries, but DFAs can't handle those + // anyway, and thus, the byte classes don't need to either + // since they are themselves only used in DFAs. + // + // FIXME: It seems like the calls to 'set_range' here are + // completely invariant, which means we could just hard-code + // them here without needing to write a loop. And we only need + // to do this dance at most once per regex. + // + // FIXME: Is this correct for \B? + let iswb = utf8::is_word_byte; + // This unwrap is OK because we guard every use of 'asu8' with + // a check that the input is <= 255. + let asu8 = |b: u16| u8::try_from(b).unwrap(); + let mut b1: u16 = 0; + let mut b2: u16; + while b1 <= 255 { + b2 = b1 + 1; + while b2 <= 255 && iswb(asu8(b1)) == iswb(asu8(b2)) { + b2 += 1; + } + // The guards above guarantee that b2 can never get any + // bigger. + assert!(b2 <= 256); + // Subtracting 1 from b2 is always OK because it is always + // at least 1 greater than b1, and the assert above + // guarantees that the asu8 conversion will succeed. + set.set_range(asu8(b1), asu8(b2.checked_sub(1).unwrap())); + b1 = b2; + } + } + } + } + + /// Returns true when [`Look::Start`] is satisfied `at` the given position + /// in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_start(&self, _haystack: &[u8], at: usize) -> bool { + at == 0 + } + + /// Returns true when [`Look::End`] is satisfied `at` the given position in + /// `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_end(&self, haystack: &[u8], at: usize) -> bool { + at == haystack.len() + } + + /// Returns true when [`Look::StartLF`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_start_lf(&self, haystack: &[u8], at: usize) -> bool { + self.is_start(haystack, at) || haystack[at - 1] == self.lineterm.0 + } + + /// Returns true when [`Look::EndLF`] is satisfied `at` the given position + /// in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_end_lf(&self, haystack: &[u8], at: usize) -> bool { + self.is_end(haystack, at) || haystack[at] == self.lineterm.0 + } + + /// Returns true when [`Look::StartCRLF`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_start_crlf(&self, haystack: &[u8], at: usize) -> bool { + self.is_start(haystack, at) + || haystack[at - 1] == b'\n' + || (haystack[at - 1] == b'\r' + && (at >= haystack.len() || haystack[at] != b'\n')) + } + + /// Returns true when [`Look::EndCRLF`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_end_crlf(&self, haystack: &[u8], at: usize) -> bool { + self.is_end(haystack, at) + || haystack[at] == b'\r' + || (haystack[at] == b'\n' + && (at == 0 || haystack[at - 1] != b'\r')) + } + + /// Returns true when [`Look::WordAscii`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + word_before != word_after + } + + /// Returns true when [`Look::WordAsciiNegate`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_ascii_negate(&self, haystack: &[u8], at: usize) -> bool { + !self.is_word_ascii(haystack, at) + } + + /// Returns true when [`Look::WordUnicode`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + let word_before = is_word_char::rev(haystack, at)?; + let word_after = is_word_char::fwd(haystack, at)?; + Ok(word_before != word_after) + } + + /// Returns true when [`Look::WordUnicodeNegate`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_unicode_negate( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + // This is pretty subtle. Why do we need to do UTF-8 decoding here? + // Well... at time of writing, the is_word_char_{fwd,rev} routines will + // only return true if there is a valid UTF-8 encoding of a "word" + // codepoint, and false in every other case (including invalid UTF-8). + // This means that in regions of invalid UTF-8 (which might be a + // subset of valid UTF-8!), it would result in \B matching. While this + // would be questionable in the context of truly invalid UTF-8, it is + // *certainly* wrong to report match boundaries that split the encoding + // of a codepoint. So to work around this, we ensure that we can decode + // a codepoint on either side of `at`. If either direction fails, then + // we don't permit \B to match at all. + // + // Now, this isn't exactly optimal from a perf perspective. We could + // try and detect this in is_word_char::{fwd,rev}, but it's not clear + // if it's worth it. \B is, after all, rarely used. Even worse, + // is_word_char::{fwd,rev} could do its own UTF-8 decoding, and so this + // will wind up doing UTF-8 decoding twice. Owch. We could fix this + // with more code complexity, but it just doesn't feel worth it for \B. + // + // And in particular, we do *not* have to do this with \b, because \b + // *requires* that at least one side of `at` be a "word" codepoint, + // which in turn implies one side of `at` must be valid UTF-8. This in + // turn implies that \b can never split a valid UTF-8 encoding of a + // codepoint. In the case where one side of `at` is truly invalid UTF-8 + // and the other side IS a word codepoint, then we want \b to match + // since it represents a valid UTF-8 boundary. It also makes sense. For + // example, you'd want \b\w+\b to match 'abc' in '\xFFabc\xFF'. + // + // Note also that this is not just '!is_word_unicode(..)' like it is + // for the ASCII case. For example, neither \b nor \B is satisfied + // within invalid UTF-8 sequences. + let word_before = at > 0 + && match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::rev(haystack, at)?, + }; + let word_after = at < haystack.len() + && match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::fwd(haystack, at)?, + }; + Ok(word_before == word_after) + } + + /// Returns true when [`Look::WordStartAscii`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_start_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + !word_before && word_after + } + + /// Returns true when [`Look::WordEndAscii`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_end_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + word_before && !word_after + } + + /// Returns true when [`Look::WordStartUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_start_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + let word_before = is_word_char::rev(haystack, at)?; + let word_after = is_word_char::fwd(haystack, at)?; + Ok(!word_before && word_after) + } + + /// Returns true when [`Look::WordEndUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_end_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + let word_before = is_word_char::rev(haystack, at)?; + let word_after = is_word_char::fwd(haystack, at)?; + Ok(word_before && !word_after) + } + + /// Returns true when [`Look::WordStartHalfAscii`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_start_half_ascii( + &self, + haystack: &[u8], + at: usize, + ) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + !word_before + } + + /// Returns true when [`Look::WordEndHalfAscii`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_end_half_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + !word_after + } + + /// Returns true when [`Look::WordStartHalfUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_start_half_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + // See `is_word_unicode_negate` for why we need to do this. We don't + // need to do it for `is_word_start_unicode` because that guarantees + // that the position matched falls on a valid UTF-8 boundary given + // that the right side must be in \w. + let word_before = at > 0 + && match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::rev(haystack, at)?, + }; + Ok(!word_before) + } + + /// Returns true when [`Look::WordEndHalfUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_end_half_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + // See `is_word_unicode_negate` for why we need to do this. We don't + // need to do it for `is_word_end_unicode` because that guarantees + // that the position matched falls on a valid UTF-8 boundary given + // that the left side must be in \w. + let word_after = at < haystack.len() + && match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::fwd(haystack, at)?, + }; + Ok(!word_after) + } +} + +impl Default for LookMatcher { + fn default() -> LookMatcher { + LookMatcher::new() + } +} + +/// An error that occurs when the Unicode-aware `\w` class is unavailable. +/// +/// This error can occur when the data tables necessary for the Unicode aware +/// Perl character class `\w` are unavailable. The `\w` class is used to +/// determine whether a codepoint is considered a word character or not when +/// determining whether a Unicode aware `\b` (or `\B`) matches at a particular +/// position. +/// +/// This error can only occur when the `unicode-word-boundary` feature is +/// disabled. +#[derive(Clone, Debug)] +pub struct UnicodeWordBoundaryError(()); + +impl UnicodeWordBoundaryError { + #[cfg(not(feature = "unicode-word-boundary"))] + pub(crate) fn new() -> UnicodeWordBoundaryError { + UnicodeWordBoundaryError(()) + } + + /// Returns an error if and only if Unicode word boundary data is + /// unavailable. + pub fn check() -> Result<(), UnicodeWordBoundaryError> { + is_word_char::check() + } +} + +#[cfg(feature = "std")] +impl std::error::Error for UnicodeWordBoundaryError {} + +impl core::fmt::Display for UnicodeWordBoundaryError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "Unicode-aware \\b and \\B are unavailable because the \ + requisite data tables are missing, please enable the \ + unicode-word-boundary feature" + ) + } +} + +// Below are FOUR different ways for checking whether whether a "word" +// codepoint exists at a particular position in the haystack. The four +// different approaches are, in order of preference: +// +// 1. Parse '\w', convert to an NFA, convert to a fully compiled DFA on the +// first call, and then use that DFA for all subsequent calls. +// 2. Do UTF-8 decoding and use regex_syntax::is_word_character if available. +// 3. Do UTF-8 decoding and use our own 'perl_word' table. +// 4. Return an error. +// +// The reason for all of these approaches is a combination of perf and +// permitting one to build regex-automata without the Unicode data necessary +// for handling Unicode-aware word boundaries. (In which case, '(?-u:\b)' would +// still work.) +// +// The DFA approach is the fastest, but it requires the regex parser, the +// NFA compiler, the DFA builder and the DFA search runtime. That's a lot to +// bring in, but if it's available, it's (probably) the best we can do. +// +// Approaches (2) and (3) are effectively equivalent, but (2) reuses the +// data in regex-syntax and avoids duplicating it in regex-automata. +// +// Finally, (4) unconditionally returns an error since the requisite data isn't +// available anywhere. +// +// There are actually more approaches possible that we didn't implement. For +// example, if the DFA builder is available but the syntax parser is not, we +// could technically hand construct our own NFA from the 'perl_word' data +// table. But to avoid some pretty hairy code duplication, we would in turn +// need to pull the UTF-8 compiler out of the NFA compiler. Yikes. +// +// A possibly more sensible alternative is to use a lazy DFA when the full +// DFA builder isn't available... +// +// Yet another choice would be to build the full DFA and then embed it into the +// source. Then we'd only need to bring in the DFA search runtime, which is +// considerably smaller than the DFA builder code. The problem here is that the +// Debian people have spooked me[1] into avoiding cyclic dependencies. Namely, +// we'd need to build regex-cli, which depends on regex-automata in order to +// build some part of regex-automata. But to be honest, something like this has +// to be allowed somehow? I just don't know what the right process is. +// +// There are perhaps other choices as well. Why did I stop at these 4? Because +// I wanted to preserve my sanity. I suspect I'll wind up adding the lazy DFA +// approach eventually, as the benefits of the DFA approach are somewhat +// compelling. The 'boundary-words-holmes' benchmark tests this. (Note that +// the commands below no longer work. If necessary, we should re-capitulate +// the benchmark from whole cloth in rebar.) +// +// $ regex-cli bench measure -f boundary-words-holmes -e pikevm > dfa.csv +// +// Then I changed the code below so that the util/unicode_data/perl_word table +// was used and re-ran the benchmark: +// +// $ regex-cli bench measure -f boundary-words-holmes -e pikevm > table.csv +// +// And compared them: +// +// $ regex-cli bench diff dfa.csv table.csv +// benchmark engine dfa table +// --------- ------ --- ----- +// internal/count/boundary-words-holmes regex/automata/pikevm 18.6 MB/s 12.9 MB/s +// +// Which is a nice improvement. +// +// UPDATE: It turns out that it takes approximately 22ms to build the reverse +// DFA for \w. (And about 3ms for the forward DFA.) It's probably not much in +// the grand scheme things, but that is a significant latency cost. So I'm not +// sure that's a good idea. I then tried using a lazy DFA instead, and that +// eliminated the overhead, but since the lazy DFA requires mutable working +// memory, that requires introducing a 'Cache' for every simultaneous call. +// +// I ended up deciding for now to just keep the "UTF-8 decode and check the +// table." The DFA and lazy DFA approaches are still below, but commented out. +// +// [1]: https://github.com/BurntSushi/ucd-generate/issues/11 + +/* +/// A module that looks for word codepoints using lazy DFAs. +#[cfg(all( + feature = "unicode-word-boundary", + feature = "syntax", + feature = "unicode-perl", + feature = "hybrid" +))] +mod is_word_char { + use alloc::vec::Vec; + + use crate::{ + hybrid::dfa::{Cache, DFA}, + nfa::thompson::NFA, + util::{lazy::Lazy, pool::Pool, primitives::StateID}, + Anchored, Input, + }; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy = Lazy::new(|| DFA::new(r"\w").unwrap()); + static CACHE: Lazy> = + Lazy::new(|| Pool::new(|| WORD.create_cache())); + let dfa = Lazy::get(&WORD); + let mut cache = Lazy::get(&CACHE).get(); + let mut sid = dfa + .start_state_forward( + &mut cache, + &Input::new("").anchored(Anchored::Yes), + ) + .unwrap(); + while at < haystack.len() { + let byte = haystack[at]; + sid = dfa.next_state(&mut cache, sid, byte).unwrap(); + at += 1; + if sid.is_tagged() { + if sid.is_match() { + return Ok(true); + } else if sid.is_dead() { + return Ok(false); + } + } + } + Ok(dfa.next_eoi_state(&mut cache, sid).unwrap().is_match()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy = Lazy::new(|| { + DFA::builder() + .thompson(NFA::config().reverse(true)) + .build(r"\w") + .unwrap() + }); + static CACHE: Lazy> = + Lazy::new(|| Pool::new(|| WORD.create_cache())); + let dfa = Lazy::get(&WORD); + let mut cache = Lazy::get(&CACHE).get(); + let mut sid = dfa + .start_state_reverse( + &mut cache, + &Input::new("").anchored(Anchored::Yes), + ) + .unwrap(); + while at > 0 { + at -= 1; + let byte = haystack[at]; + sid = dfa.next_state(&mut cache, sid, byte).unwrap(); + if sid.is_tagged() { + if sid.is_match() { + return Ok(true); + } else if sid.is_dead() { + return Ok(false); + } + } + } + Ok(dfa.next_eoi_state(&mut cache, sid).unwrap().is_match()) + } +} +*/ + +/* +/// A module that looks for word codepoints using fully compiled DFAs. +#[cfg(all( + feature = "unicode-word-boundary", + feature = "syntax", + feature = "unicode-perl", + feature = "dfa-build" +))] +mod is_word_char { + use alloc::vec::Vec; + + use crate::{ + dfa::{dense::DFA, Automaton, StartKind}, + nfa::thompson::NFA, + util::{lazy::Lazy, primitives::StateID}, + Anchored, Input, + }; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy<(DFA>, StateID)> = Lazy::new(|| { + let dfa = DFA::builder() + .configure(DFA::config().start_kind(StartKind::Anchored)) + .build(r"\w") + .unwrap(); + // OK because our regex has no look-around. + let start_id = dfa.universal_start_state(Anchored::Yes).unwrap(); + (dfa, start_id) + }); + let &(ref dfa, mut sid) = Lazy::get(&WORD); + while at < haystack.len() { + let byte = haystack[at]; + sid = dfa.next_state(sid, byte); + at += 1; + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + return Ok(true); + } else if dfa.is_dead_state(sid) { + return Ok(false); + } + } + } + Ok(dfa.is_match_state(dfa.next_eoi_state(sid))) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy<(DFA>, StateID)> = Lazy::new(|| { + let dfa = DFA::builder() + .configure(DFA::config().start_kind(StartKind::Anchored)) + // From ad hoc measurements, it looks like setting + // shrink==false is slightly faster than shrink==true. I kind + // of feel like this indicates that shrinking is probably a + // failure, although it can help in some cases. Sigh. + .thompson(NFA::config().reverse(true).shrink(false)) + .build(r"\w") + .unwrap(); + // OK because our regex has no look-around. + let start_id = dfa.universal_start_state(Anchored::Yes).unwrap(); + (dfa, start_id) + }); + let &(ref dfa, mut sid) = Lazy::get(&WORD); + while at > 0 { + at -= 1; + let byte = haystack[at]; + sid = dfa.next_state(sid, byte); + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + return Ok(true); + } else if dfa.is_dead_state(sid) { + return Ok(false); + } + } + } + Ok(dfa.is_match_state(dfa.next_eoi_state(sid))) + } +} +*/ + +/// A module that looks for word codepoints using regex-syntax's data tables. +#[cfg(all( + feature = "unicode-word-boundary", + feature = "syntax", + feature = "unicode-perl", +))] +mod is_word_char { + use regex_syntax::try_is_word_character; + + use crate::util::utf8; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => try_is_word_character(ch).expect( + "since unicode-word-boundary, syntax and unicode-perl \ + are all enabled, it is expected that \ + try_is_word_character succeeds", + ), + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => try_is_word_character(ch).expect( + "since unicode-word-boundary, syntax and unicode-perl \ + are all enabled, it is expected that \ + try_is_word_character succeeds", + ), + }) + } +} + +/// A module that looks for word codepoints using regex-automata's data tables +/// (which are only compiled when regex-syntax's tables aren't available). +/// +/// Note that the cfg should match the one in src/util/unicode_data/mod.rs for +/// perl_word. +#[cfg(all( + feature = "unicode-word-boundary", + not(all(feature = "syntax", feature = "unicode-perl")), +))] +mod is_word_char { + use crate::util::utf8; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => is_word_character(ch), + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => is_word_character(ch), + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_word_character(c: char) -> bool { + use crate::util::{unicode_data::perl_word::PERL_WORD, utf8}; + + if u8::try_from(c).map_or(false, utf8::is_word_byte) { + return true; + } + PERL_WORD + .binary_search_by(|&(start, end)| { + use core::cmp::Ordering; + + if start <= c && c <= end { + Ordering::Equal + } else if start > c { + Ordering::Greater + } else { + Ordering::Less + } + }) + .is_ok() + } +} + +/// A module that always returns an error if Unicode word boundaries are +/// disabled. When this feature is disabled, then regex-automata will not +/// include its own data tables even if regex-syntax is disabled. +#[cfg(not(feature = "unicode-word-boundary"))] +mod is_word_char { + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Err(super::UnicodeWordBoundaryError::new()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + _bytes: &[u8], + _at: usize, + ) -> Result { + Err(super::UnicodeWordBoundaryError::new()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + _bytes: &[u8], + _at: usize, + ) -> Result { + Err(super::UnicodeWordBoundaryError::new()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! testlook { + ($look:expr, $haystack:expr, $at:expr) => { + LookMatcher::default().matches($look, $haystack.as_bytes(), $at) + }; + } + + #[test] + fn look_matches_start_line() { + let look = Look::StartLF; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 0)); + assert!(testlook!(look, "\n", 1)); + assert!(testlook!(look, "a", 0)); + assert!(testlook!(look, "\na", 1)); + + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a\na", 1)); + } + + #[test] + fn look_matches_end_line() { + let look = Look::EndLF; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 1)); + assert!(testlook!(look, "\na", 0)); + assert!(testlook!(look, "\na", 2)); + assert!(testlook!(look, "a\na", 1)); + + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "\na", 1)); + assert!(!testlook!(look, "a\na", 0)); + assert!(!testlook!(look, "a\na", 2)); + } + + #[test] + fn look_matches_start_text() { + let look = Look::Start; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 0)); + assert!(testlook!(look, "a", 0)); + + assert!(!testlook!(look, "\n", 1)); + assert!(!testlook!(look, "\na", 1)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a\na", 1)); + } + + #[test] + fn look_matches_end_text() { + let look = Look::End; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 1)); + assert!(testlook!(look, "\na", 2)); + + assert!(!testlook!(look, "\na", 0)); + assert!(!testlook!(look, "a\na", 1)); + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "\na", 1)); + assert!(!testlook!(look, "a\na", 0)); + assert!(!testlook!(look, "a\na", 2)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_unicode() { + let look = Look::WordUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_ascii() { + let look = Look::WordAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_unicode_negate() { + let look = Look::WordUnicodeNegate; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + // These don't match because they could otherwise return an offset that + // splits the UTF-8 encoding of a codepoint. + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. These also don't + // match because they could otherwise return an offset that splits the + // UTF-8 encoding of a codepoint. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + // But this one does, since 𐆀 isn't a word codepoint, and 8 is the end + // of the haystack. So the "end" of the haystack isn't a word and 𐆀 + // isn't a word, thus, \B matches. + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_ascii_negate() { + let look = Look::WordAsciiNegate; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(testlook!(look, "𝛃", 1)); + assert!(testlook!(look, "𝛃", 2)); + assert!(testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 1)); + assert!(testlook!(look, "𝛃𐆀", 2)); + assert!(testlook!(look, "𝛃𐆀", 3)); + assert!(testlook!(look, "𝛃𐆀", 5)); + assert!(testlook!(look, "𝛃𐆀", 6)); + assert!(testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_start_ascii() { + let look = Look::WordStartAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_end_ascii() { + let look = Look::WordEndAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_start_unicode() { + let look = Look::WordStartUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_end_unicode() { + let look = Look::WordEndUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_start_half_ascii() { + let look = Look::WordStartHalfAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(testlook!(look, "𝛃", 1)); + assert!(testlook!(look, "𝛃", 2)); + assert!(testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 1)); + assert!(testlook!(look, "𝛃𐆀", 2)); + assert!(testlook!(look, "𝛃𐆀", 3)); + assert!(testlook!(look, "𝛃𐆀", 5)); + assert!(testlook!(look, "𝛃𐆀", 6)); + assert!(testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_end_half_ascii() { + let look = Look::WordEndHalfAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(testlook!(look, "𝛃", 1)); + assert!(testlook!(look, "𝛃", 2)); + assert!(testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 1)); + assert!(testlook!(look, "𝛃𐆀", 2)); + assert!(testlook!(look, "𝛃𐆀", 3)); + assert!(testlook!(look, "𝛃𐆀", 5)); + assert!(testlook!(look, "𝛃𐆀", 6)); + assert!(testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_start_half_unicode() { + let look = Look::WordStartHalfUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_end_half_unicode() { + let look = Look::WordEndHalfUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_set() { + let mut f = LookSet::default(); + assert!(!f.contains(Look::Start)); + assert!(!f.contains(Look::End)); + assert!(!f.contains(Look::StartLF)); + assert!(!f.contains(Look::EndLF)); + assert!(!f.contains(Look::WordUnicode)); + assert!(!f.contains(Look::WordUnicodeNegate)); + assert!(!f.contains(Look::WordAscii)); + assert!(!f.contains(Look::WordAsciiNegate)); + + f = f.insert(Look::Start); + assert!(f.contains(Look::Start)); + f = f.remove(Look::Start); + assert!(!f.contains(Look::Start)); + + f = f.insert(Look::End); + assert!(f.contains(Look::End)); + f = f.remove(Look::End); + assert!(!f.contains(Look::End)); + + f = f.insert(Look::StartLF); + assert!(f.contains(Look::StartLF)); + f = f.remove(Look::StartLF); + assert!(!f.contains(Look::StartLF)); + + f = f.insert(Look::EndLF); + assert!(f.contains(Look::EndLF)); + f = f.remove(Look::EndLF); + assert!(!f.contains(Look::EndLF)); + + f = f.insert(Look::StartCRLF); + assert!(f.contains(Look::StartCRLF)); + f = f.remove(Look::StartCRLF); + assert!(!f.contains(Look::StartCRLF)); + + f = f.insert(Look::EndCRLF); + assert!(f.contains(Look::EndCRLF)); + f = f.remove(Look::EndCRLF); + assert!(!f.contains(Look::EndCRLF)); + + f = f.insert(Look::WordUnicode); + assert!(f.contains(Look::WordUnicode)); + f = f.remove(Look::WordUnicode); + assert!(!f.contains(Look::WordUnicode)); + + f = f.insert(Look::WordUnicodeNegate); + assert!(f.contains(Look::WordUnicodeNegate)); + f = f.remove(Look::WordUnicodeNegate); + assert!(!f.contains(Look::WordUnicodeNegate)); + + f = f.insert(Look::WordAscii); + assert!(f.contains(Look::WordAscii)); + f = f.remove(Look::WordAscii); + assert!(!f.contains(Look::WordAscii)); + + f = f.insert(Look::WordAsciiNegate); + assert!(f.contains(Look::WordAsciiNegate)); + f = f.remove(Look::WordAsciiNegate); + assert!(!f.contains(Look::WordAsciiNegate)); + + f = f.insert(Look::WordStartAscii); + assert!(f.contains(Look::WordStartAscii)); + f = f.remove(Look::WordStartAscii); + assert!(!f.contains(Look::WordStartAscii)); + + f = f.insert(Look::WordEndAscii); + assert!(f.contains(Look::WordEndAscii)); + f = f.remove(Look::WordEndAscii); + assert!(!f.contains(Look::WordEndAscii)); + + f = f.insert(Look::WordStartUnicode); + assert!(f.contains(Look::WordStartUnicode)); + f = f.remove(Look::WordStartUnicode); + assert!(!f.contains(Look::WordStartUnicode)); + + f = f.insert(Look::WordEndUnicode); + assert!(f.contains(Look::WordEndUnicode)); + f = f.remove(Look::WordEndUnicode); + assert!(!f.contains(Look::WordEndUnicode)); + + f = f.insert(Look::WordStartHalfAscii); + assert!(f.contains(Look::WordStartHalfAscii)); + f = f.remove(Look::WordStartHalfAscii); + assert!(!f.contains(Look::WordStartHalfAscii)); + + f = f.insert(Look::WordEndHalfAscii); + assert!(f.contains(Look::WordEndHalfAscii)); + f = f.remove(Look::WordEndHalfAscii); + assert!(!f.contains(Look::WordEndHalfAscii)); + + f = f.insert(Look::WordStartHalfUnicode); + assert!(f.contains(Look::WordStartHalfUnicode)); + f = f.remove(Look::WordStartHalfUnicode); + assert!(!f.contains(Look::WordStartHalfUnicode)); + + f = f.insert(Look::WordEndHalfUnicode); + assert!(f.contains(Look::WordEndHalfUnicode)); + f = f.remove(Look::WordEndHalfUnicode); + assert!(!f.contains(Look::WordEndHalfUnicode)); + } + + #[test] + fn look_set_iter() { + let set = LookSet::empty(); + assert_eq!(0, set.iter().count()); + + let set = LookSet::full(); + assert_eq!(18, set.iter().count()); + + let set = + LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode); + assert_eq!(2, set.iter().count()); + + let set = LookSet::empty().insert(Look::StartLF); + assert_eq!(1, set.iter().count()); + + let set = LookSet::empty().insert(Look::WordAsciiNegate); + assert_eq!(1, set.iter().count()); + + let set = LookSet::empty().insert(Look::WordEndHalfUnicode); + assert_eq!(1, set.iter().count()); + } + + #[test] + #[cfg(feature = "alloc")] + fn look_set_debug() { + let res = alloc::format!("{:?}", LookSet::empty()); + assert_eq!("∅", res); + let res = alloc::format!("{:?}", LookSet::full()); + assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res); + } +} diff --git a/vendor/regex-automata/src/util/memchr.rs b/vendor/regex-automata/src/util/memchr.rs new file mode 100644 index 0000000..a2cbb07 --- /dev/null +++ b/vendor/regex-automata/src/util/memchr.rs @@ -0,0 +1,93 @@ +/*! +This module defines simple wrapper routines for the memchr functions from the +`memchr` crate. Basically, when the `memchr` crate is available, we use it, +otherwise we use a naive implementation which is still pretty fast. +*/ + +pub(crate) use self::inner::*; + +#[cfg(feature = "perf-literal-substring")] +pub(super) mod inner { + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option { + memchr::memchr(n1, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + memchr::memchr2(n1, n2, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + memchr::memchr3(n1, n2, n3, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option { + memchr::memrchr(n1, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + memchr::memrchr2(n1, n2, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + memchr::memrchr3(n1, n2, n3, haystack) + } +} + +#[cfg(not(feature = "perf-literal-substring"))] +pub(super) mod inner { + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) + } +} diff --git a/vendor/regex-automata/src/util/mod.rs b/vendor/regex-automata/src/util/mod.rs new file mode 100644 index 0000000..b3eef64 --- /dev/null +++ b/vendor/regex-automata/src/util/mod.rs @@ -0,0 +1,57 @@ +/*! +A collection of modules that provide APIs that are useful across many regex +engines. + +While one should explore the sub-modules directly to get a sense of what's +there, here are some highlights that tie the sub-modules to higher level +use cases: + +* `alphabet` contains APIs that are useful if you're doing low level things +with the DFAs in this crate. For example, implementing determinization or +walking its state graph directly. +* `captures` contains APIs for dealing with capture group matches and their +mapping to "slots" used inside an NFA graph. This is also where you can find +iterators over capture group names. +* `escape` contains types for pretty-printing raw byte slices as strings. +* `iter` contains API helpers for writing regex iterators. +* `lazy` contains a no-std and no-alloc variant of `lazy_static!` and +`once_cell`. +* `look` contains APIs for matching and configuring look-around assertions. +* `pool` provides a way to reuse mutable memory allocated in a thread safe +manner. +* `prefilter` provides APIs for building prefilters and using them in searches. +* `primitives` are what you might use if you're doing lower level work on +automata, such as walking an NFA state graph. +* `syntax` provides some higher level convenience functions for interacting +with the `regex-syntax` crate. +* `wire` is useful if you're working with DFA serialization. +*/ + +pub mod alphabet; +#[cfg(feature = "alloc")] +pub mod captures; +pub mod escape; +#[cfg(feature = "alloc")] +pub mod interpolate; +pub mod iter; +pub mod lazy; +pub mod look; +#[cfg(feature = "alloc")] +pub mod pool; +pub mod prefilter; +pub mod primitives; +pub mod start; +#[cfg(feature = "syntax")] +pub mod syntax; +pub mod wire; + +#[cfg(any(feature = "dfa-build", feature = "hybrid"))] +pub(crate) mod determinize; +pub(crate) mod empty; +pub(crate) mod int; +pub(crate) mod memchr; +pub(crate) mod search; +#[cfg(feature = "alloc")] +pub(crate) mod sparse_set; +pub(crate) mod unicode_data; +pub(crate) mod utf8; diff --git a/vendor/regex-automata/src/util/pool.rs b/vendor/regex-automata/src/util/pool.rs new file mode 100644 index 0000000..d90d4ec --- /dev/null +++ b/vendor/regex-automata/src/util/pool.rs @@ -0,0 +1,1199 @@ +// This module provides a relatively simple thread-safe pool of reusable +// objects. For the most part, it's implemented by a stack represented by a +// Mutex>. It has one small trick: because unlocking a mutex is somewhat +// costly, in the case where a pool is accessed by the first thread that tried +// to get a value, we bypass the mutex. Here are some benchmarks showing the +// difference. +// +// 2022-10-15: These benchmarks are from the old regex crate and they aren't +// easy to reproduce because some rely on older implementations of Pool that +// are no longer around. I've left the results here for posterity, but any +// enterprising individual should feel encouraged to re-litigate the way Pool +// works. I am not at all certain it is the best approach. +// +// 1) misc::anchored_literal_long_non_match 21 (18571 MB/s) +// 2) misc::anchored_literal_long_non_match 107 (3644 MB/s) +// 3) misc::anchored_literal_long_non_match 45 (8666 MB/s) +// 4) misc::anchored_literal_long_non_match 19 (20526 MB/s) +// +// (1) represents our baseline: the master branch at the time of writing when +// using the 'thread_local' crate to implement the pool below. +// +// (2) represents a naive pool implemented completely via Mutex>. There +// is no special trick for bypassing the mutex. +// +// (3) is the same as (2), except it uses Mutex>>. It is twice as +// fast because a Box is much smaller than the T we use with a Pool in this +// crate. So pushing and popping a Box from a Vec is quite a bit faster +// than for T. +// +// (4) is the same as (3), but with the trick for bypassing the mutex in the +// case of the first-to-get thread. +// +// Why move off of thread_local? Even though (4) is a hair faster than (1) +// above, this was not the main goal. The main goal was to move off of +// thread_local and find a way to *simply* re-capture some of its speed for +// regex's specific case. So again, why move off of it? The *primary* reason is +// because of memory leaks. See https://github.com/rust-lang/regex/issues/362 +// for example. (Why do I want it to be simple? Well, I suppose what I mean is, +// "use as much safe code as possible to minimize risk and be as sure as I can +// be that it is correct.") +// +// My guess is that the thread_local design is probably not appropriate for +// regex since its memory usage scales to the number of active threads that +// have used a regex, where as the pool below scales to the number of threads +// that simultaneously use a regex. While neither case permits contraction, +// since we own the pool data structure below, we can add contraction if a +// clear use case pops up in the wild. More pressingly though, it seems that +// there are at least some use case patterns where one might have many threads +// sitting around that might have used a regex at one point. While thread_local +// does try to reuse space previously used by a thread that has since stopped, +// its maximal memory usage still scales with the total number of active +// threads. In contrast, the pool below scales with the total number of threads +// *simultaneously* using the pool. The hope is that this uses less memory +// overall. And if it doesn't, we can hopefully tune it somehow. +// +// It seems that these sort of conditions happen frequently +// in FFI inside of other more "managed" languages. This was +// mentioned in the issue linked above, and also mentioned here: +// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users +// confirm that disabling the use of thread_local resolves the leak. +// +// There were other weaker reasons for moving off of thread_local as well. +// Namely, at the time, I was looking to reduce dependencies. And for something +// like regex, maintenance can be simpler when we own the full dependency tree. +// +// Note that I am not entirely happy with this pool. It has some subtle +// implementation details and is overall still observable (even with the +// thread owner optimization) in benchmarks. If someone wants to take a crack +// at building something better, please file an issue. Even if it means a +// different API. The API exposed by this pool is not the minimal thing that +// something like a 'Regex' actually needs. It could adapt to, for example, +// an API more like what is found in the 'thread_local' crate. However, we do +// really need to support the no-std alloc-only context, or else the regex +// crate wouldn't be able to support no-std alloc-only. However, I'm generally +// okay with making the alloc-only context slower (as it is here), although I +// do find it unfortunate. + +/*! +A thread safe memory pool. + +The principal type in this module is a [`Pool`]. It main use case is for +holding a thread safe collection of mutable scratch spaces (usually called +`Cache` in this crate) that regex engines need to execute a search. This then +permits sharing the same read-only regex object across multiple threads while +having a quick way of reusing scratch space in a thread safe way. This avoids +needing to re-create the scratch space for every search, which could wind up +being quite expensive. +*/ + +/// A thread safe pool that works in an `alloc`-only context. +/// +/// Getting a value out comes with a guard. When that guard is dropped, the +/// value is automatically put back in the pool. The guard provides both a +/// `Deref` and a `DerefMut` implementation for easy access to an underlying +/// `T`. +/// +/// A `Pool` impls `Sync` when `T` is `Send` (even if `T` is not `Sync`). This +/// is possible because a pool is guaranteed to provide a value to exactly one +/// thread at any time. +/// +/// Currently, a pool never contracts in size. Its size is proportional to the +/// maximum number of simultaneous uses. This may change in the future. +/// +/// A `Pool` is a particularly useful data structure for this crate because +/// many of the regex engines require a mutable "cache" in order to execute +/// a search. Since regexes themselves tend to be global, the problem is then: +/// how do you get a mutable cache to execute a search? You could: +/// +/// 1. Use a `thread_local!`, which requires the standard library and requires +/// that the regex pattern be statically known. +/// 2. Use a `Pool`. +/// 3. Make the cache an explicit dependency in your code and pass it around. +/// 4. Put the cache state in a `Mutex`, but this means only one search can +/// execute at a time. +/// 5. Create a new cache for every search. +/// +/// A `thread_local!` is perhaps the best choice if it works for your use case. +/// Putting the cache in a mutex or creating a new cache for every search are +/// perhaps the worst choices. Of the remaining two choices, whether you use +/// this `Pool` or thread through a cache explicitly in your code is a matter +/// of taste and depends on your code architecture. +/// +/// # Warning: may use a spin lock +/// +/// When this crate is compiled _without_ the `std` feature, then this type +/// may used a spin lock internally. This can have subtle effects that may +/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more +/// thorough treatment of this topic. +/// +/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html +/// +/// # Example +/// +/// This example shows how to share a single hybrid regex among multiple +/// threads, while also safely getting exclusive access to a hybrid's +/// [`Cache`](crate::hybrid::regex::Cache) without preventing other searches +/// from running while your thread uses the `Cache`. +/// +/// ``` +/// use regex_automata::{ +/// hybrid::regex::{Cache, Regex}, +/// util::{lazy::Lazy, pool::Pool}, +/// Match, +/// }; +/// +/// static RE: Lazy = +/// Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap()); +/// static CACHE: Lazy> = +/// Lazy::new(|| Pool::new(|| RE.create_cache())); +/// +/// let expected = Some(Match::must(0, 3..14)); +/// assert_eq!(expected, RE.find(&mut CACHE.get(), b"zzzfoo12345barzzz")); +/// ``` +pub struct Pool T>(alloc::boxed::Box>); + +impl Pool { + /// Create a new pool. The given closure is used to create values in + /// the pool when necessary. + pub fn new(create: F) -> Pool { + Pool(alloc::boxed::Box::new(inner::Pool::new(create))) + } +} + +impl T> Pool { + /// Get a value from the pool. The caller is guaranteed to have + /// exclusive access to the given value. Namely, it is guaranteed that + /// this will never return a value that was returned by another call to + /// `get` but was not put back into the pool. + /// + /// When the guard goes out of scope and its destructor is called, then + /// it will automatically be put back into the pool. Alternatively, + /// [`PoolGuard::put`] may be used to explicitly put it back in the pool + /// without relying on its destructor. + /// + /// Note that there is no guarantee provided about which value in the + /// pool is returned. That is, calling get, dropping the guard (causing + /// the value to go back into the pool) and then calling get again is + /// *not* guaranteed to return the same value received in the first `get` + /// call. + #[inline] + pub fn get(&self) -> PoolGuard<'_, T, F> { + PoolGuard(self.0.get()) + } +} + +impl core::fmt::Debug for Pool { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("Pool").field(&self.0).finish() + } +} + +/// A guard that is returned when a caller requests a value from the pool. +/// +/// The purpose of the guard is to use RAII to automatically put the value +/// back in the pool once it's dropped. +pub struct PoolGuard<'a, T: Send, F: Fn() -> T>(inner::PoolGuard<'a, T, F>); + +impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> { + /// Consumes this guard and puts it back into the pool. + /// + /// This circumvents the guard's `Drop` implementation. This can be useful + /// in circumstances where the automatic `Drop` results in poorer codegen, + /// such as calling non-inlined functions. + #[inline] + pub fn put(this: PoolGuard<'_, T, F>) { + inner::PoolGuard::put(this.0); + } +} + +impl<'a, T: Send, F: Fn() -> T> core::ops::Deref for PoolGuard<'a, T, F> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + self.0.value() + } +} + +impl<'a, T: Send, F: Fn() -> T> core::ops::DerefMut for PoolGuard<'a, T, F> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + self.0.value_mut() + } +} + +impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug + for PoolGuard<'a, T, F> +{ + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("PoolGuard").field(&self.0).finish() + } +} + +#[cfg(feature = "std")] +mod inner { + use core::{ + cell::UnsafeCell, + panic::{RefUnwindSafe, UnwindSafe}, + sync::atomic::{AtomicUsize, Ordering}, + }; + + use alloc::{boxed::Box, vec, vec::Vec}; + + use std::{sync::Mutex, thread_local}; + + /// An atomic counter used to allocate thread IDs. + /// + /// We specifically start our counter at 3 so that we can use the values + /// less than it as sentinels. + static COUNTER: AtomicUsize = AtomicUsize::new(3); + + /// A thread ID indicating that there is no owner. This is the initial + /// state of a pool. Once a pool has an owner, there is no way to change + /// it. + static THREAD_ID_UNOWNED: usize = 0; + + /// A thread ID indicating that the special owner value is in use and not + /// available. This state is useful for avoiding a case where the owner + /// of a pool calls `get` before putting the result of a previous `get` + /// call back into the pool. + static THREAD_ID_INUSE: usize = 1; + + /// This sentinel is used to indicate that a guard has already been dropped + /// and should not be re-dropped. We use this because our drop code can be + /// called outside of Drop and thus there could be a bug in the internal + /// implementation that results in trying to put the same guard back into + /// the same pool multiple times, and *that* could result in UB if we + /// didn't mark the guard as already having been put back in the pool. + /// + /// So this isn't strictly necessary, but this let's us define some + /// routines as safe (like PoolGuard::put_imp) that we couldn't otherwise + /// do. + static THREAD_ID_DROPPED: usize = 2; + + /// The number of stacks we use inside of the pool. These are only used for + /// non-owners. That is, these represent the "slow" path. + /// + /// In the original implementation of this pool, we only used a single + /// stack. While this might be okay for a couple threads, the prevalence of + /// 32, 64 and even 128 core CPUs has made it untenable. The contention + /// such an environment introduces when threads are doing a lot of searches + /// on short haystacks (a not uncommon use case) is palpable and leads to + /// huge slowdowns. + /// + /// This constant reflects a change from using one stack to the number of + /// stacks that this constant is set to. The stack for a particular thread + /// is simply chosen by `thread_id % MAX_POOL_STACKS`. The idea behind + /// this setup is that there should be a good chance that accesses to the + /// pool will be distributed over several stacks instead of all of them + /// converging to one. + /// + /// This is not a particularly smart or dynamic strategy. Fixing this to a + /// specific number has at least two downsides. First is that it will help, + /// say, an 8 core CPU more than it will a 128 core CPU. (But, crucially, + /// it will still help the 128 core case.) Second is that this may wind + /// up being a little wasteful with respect to memory usage. Namely, if a + /// regex is used on one thread and then moved to another thread, then it + /// could result in creating a new copy of the data in the pool even though + /// only one is actually needed. + /// + /// And that memory usage bit is why this is set to 8 and not, say, 64. + /// Keeping it at 8 limits, to an extent, how much unnecessary memory can + /// be allocated. + /// + /// In an ideal world, we'd be able to have something like this: + /// + /// * Grow the number of stacks as the number of concurrent callers + /// increases. I spent a little time trying this, but even just adding an + /// atomic addition/subtraction for each pop/push for tracking concurrent + /// callers led to a big perf hit. Since even more work would seemingly be + /// required than just an addition/subtraction, I abandoned this approach. + /// * The maximum amount of memory used should scale with respect to the + /// number of concurrent callers and *not* the total number of existing + /// threads. This is primarily why the `thread_local` crate isn't used, as + /// as some environments spin up a lot of threads. This led to multiple + /// reports of extremely high memory usage (often described as memory + /// leaks). + /// * Even more ideally, the pool should contract in size. That is, it + /// should grow with bursts and then shrink. But this is a pretty thorny + /// issue to tackle and it might be better to just not. + /// * It would be nice to explore the use of, say, a lock-free stack + /// instead of using a mutex to guard a `Vec` that is ultimately just + /// treated as a stack. The main thing preventing me from exploring this + /// is the ABA problem. The `crossbeam` crate has tools for dealing with + /// this sort of problem (via its epoch based memory reclamation strategy), + /// but I can't justify bringing in all of `crossbeam` as a dependency of + /// `regex` for this. + /// + /// See this issue for more context and discussion: + /// https://github.com/rust-lang/regex/issues/934 + const MAX_POOL_STACKS: usize = 8; + + thread_local!( + /// A thread local used to assign an ID to a thread. + static THREAD_ID: usize = { + let next = COUNTER.fetch_add(1, Ordering::Relaxed); + // SAFETY: We cannot permit the reuse of thread IDs since reusing a + // thread ID might result in more than one thread "owning" a pool, + // and thus, permit accessing a mutable value from multiple threads + // simultaneously without synchronization. The intent of this panic + // is to be a sanity check. It is not expected that the thread ID + // space will actually be exhausted in practice. Even on a 32-bit + // system, it would require spawning 2^32 threads (although they + // wouldn't all need to run simultaneously, so it is in theory + // possible). + // + // This checks that the counter never wraps around, since atomic + // addition wraps around on overflow. + if next == 0 { + panic!("regex: thread ID allocation space exhausted"); + } + next + }; + ); + + /// This puts each stack in the pool below into its own cache line. This is + /// an absolutely critical optimization that tends to have the most impact + /// in high contention workloads. Without forcing each mutex protected + /// into its own cache line, high contention exacerbates the performance + /// problem by causing "false sharing." By putting each mutex in its own + /// cache-line, we avoid the false sharing problem and the affects of + /// contention are greatly reduced. + #[derive(Debug)] + #[repr(C, align(64))] + struct CacheLine(T); + + /// A thread safe pool utilizing std-only features. + /// + /// The main difference between this and the simplistic alloc-only pool is + /// the use of std::sync::Mutex and an "owner thread" optimization that + /// makes accesses by the owner of a pool faster than all other threads. + /// This makes the common case of running a regex within a single thread + /// faster by avoiding mutex unlocking. + pub(super) struct Pool { + /// A function to create more T values when stack is empty and a caller + /// has requested a T. + create: F, + /// Multiple stacks of T values to hand out. These are used when a Pool + /// is accessed by a thread that didn't create it. + /// + /// Conceptually this is `Mutex>>`, but sharded out to make + /// it scale better under high contention work-loads. We index into + /// this sequence via `thread_id % stacks.len()`. + stacks: Vec>>>>, + /// The ID of the thread that owns this pool. The owner is the thread + /// that makes the first call to 'get'. When the owner calls 'get', it + /// gets 'owner_val' directly instead of returning a T from 'stack'. + /// See comments elsewhere for details, but this is intended to be an + /// optimization for the common case that makes getting a T faster. + /// + /// It is initialized to a value of zero (an impossible thread ID) as a + /// sentinel to indicate that it is unowned. + owner: AtomicUsize, + /// A value to return when the caller is in the same thread that + /// first called `Pool::get`. + /// + /// This is set to None when a Pool is first created, and set to Some + /// once the first thread calls Pool::get. + owner_val: UnsafeCell>, + } + + // SAFETY: Since we want to use a Pool from multiple threads simultaneously + // behind an Arc, we need for it to be Sync. In cases where T is sync, + // Pool would be Sync. However, since we use a Pool to store mutable + // scratch space, we wind up using a T that has interior mutability and is + // thus itself not Sync. So what we *really* want is for our Pool to by + // Sync even when T is not Sync (but is at least Send). + // + // The only non-sync aspect of a Pool is its 'owner_val' field, which is + // used to implement faster access to a pool value in the common case of + // a pool being accessed in the same thread in which it was created. The + // 'stack' field is also shared, but a Mutex where T: Send is already + // Sync. So we only need to worry about 'owner_val'. + // + // The key is to guarantee that 'owner_val' can only ever be accessed from + // one thread. In our implementation below, we guarantee this by only + // returning the 'owner_val' when the ID of the current thread matches the + // ID of the thread that first called 'Pool::get'. Since this can only ever + // be one thread, it follows that only one thread can access 'owner_val' at + // any point in time. Thus, it is safe to declare that Pool is Sync when + // T is Send. + // + // If there is a way to achieve our performance goals using safe code, then + // I would very much welcome a patch. As it stands, the implementation + // below tries to balance safety with performance. The case where a Regex + // is used from multiple threads simultaneously will suffer a bit since + // getting a value out of the pool will require unlocking a mutex. + // + // We require `F: Send + Sync` because we call `F` at any point on demand, + // potentially from multiple threads simultaneously. + unsafe impl Sync for Pool {} + + // If T is UnwindSafe, then since we provide exclusive access to any + // particular value in the pool, the pool should therefore also be + // considered UnwindSafe. + // + // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any + // point on demand, so it needs to be unwind safe on both dimensions for + // the entire Pool to be unwind safe. + impl UnwindSafe for Pool {} + + // If T is UnwindSafe, then since we provide exclusive access to any + // particular value in the pool, the pool should therefore also be + // considered RefUnwindSafe. + // + // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any + // point on demand, so it needs to be unwind safe on both dimensions for + // the entire Pool to be unwind safe. + impl RefUnwindSafe + for Pool + { + } + + impl Pool { + /// Create a new pool. The given closure is used to create values in + /// the pool when necessary. + pub(super) fn new(create: F) -> Pool { + // FIXME: Now that we require 1.65+, Mutex::new is available as + // const... So we can almost mark this function as const. But of + // course, we're creating a Vec of stacks below (we didn't when I + // originally wrote this code). It seems like the best way to work + // around this would be to use a `[Stack; MAX_POOL_STACKS]` instead + // of a `Vec`. I refrained from making this change at time + // of writing (2023/10/08) because I was making a lot of other + // changes at the same time and wanted to do this more carefully. + // Namely, because of the cache line optimization, that `[Stack; + // MAX_POOL_STACKS]` would be quite big. It's unclear how bad (if + // at all) that would be. + // + // Another choice would be to lazily allocate the stacks, but... + // I'm not so sure about that. Seems like a fair bit of complexity? + // + // Maybe there's a simple solution I'm missing. + // + // ... OK, I tried to fix this. First, I did it by putting `stacks` + // in an `UnsafeCell` and using a `Once` to lazily initialize it. + // I benchmarked it and everything looked okay. I then made this + // function `const` and thought I was just about done. But the + // public pool type wraps its inner pool in a `Box` to keep its + // size down. Blech. + // + // So then I thought that I could push the box down into this + // type (and leave the non-std version unboxed) and use the same + // `UnsafeCell` technique to lazily initialize it. This has the + // downside of the `Once` now needing to get hit in the owner fast + // path, but maybe that's OK? However, I then realized that we can + // only lazily initialize `stacks`, `owner` and `owner_val`. The + // `create` function needs to be put somewhere outside of the box. + // So now the pool is a `Box`, `Once` and a function. Now we're + // starting to defeat the point of boxing in the first place. So I + // backed out that change too. + // + // Back to square one. I maybe we just don't make a pool's + // constructor const and live with it. It's probably not a huge + // deal. + let mut stacks = Vec::with_capacity(MAX_POOL_STACKS); + for _ in 0..stacks.capacity() { + stacks.push(CacheLine(Mutex::new(vec![]))); + } + let owner = AtomicUsize::new(THREAD_ID_UNOWNED); + let owner_val = UnsafeCell::new(None); // init'd on first access + Pool { create, stacks, owner, owner_val } + } + } + + impl T> Pool { + /// Get a value from the pool. This may block if another thread is also + /// attempting to retrieve a value from the pool. + #[inline] + pub(super) fn get(&self) -> PoolGuard<'_, T, F> { + // Our fast path checks if the caller is the thread that "owns" + // this pool. Or stated differently, whether it is the first thread + // that tried to extract a value from the pool. If it is, then we + // can return a T to the caller without going through a mutex. + // + // SAFETY: We must guarantee that only one thread gets access + // to this value. Since a thread is uniquely identified by the + // THREAD_ID thread local, it follows that if the caller's thread + // ID is equal to the owner, then only one thread may receive this + // value. This is also why we can get away with what looks like a + // racy load and a store. We know that if 'owner == caller', then + // only one thread can be here, so we don't need to worry about any + // other thread setting the owner to something else. + let caller = THREAD_ID.with(|id| *id); + let owner = self.owner.load(Ordering::Acquire); + if caller == owner { + // N.B. We could also do a CAS here instead of a load/store, + // but ad hoc benchmarking suggests it is slower. And a lot + // slower in the case where `get_slow` is common. + self.owner.store(THREAD_ID_INUSE, Ordering::Release); + return self.guard_owned(caller); + } + self.get_slow(caller, owner) + } + + /// This is the "slow" version that goes through a mutex to pop an + /// allocated value off a stack to return to the caller. (Or, if the + /// stack is empty, a new value is created.) + /// + /// If the pool has no owner, then this will set the owner. + #[cold] + fn get_slow( + &self, + caller: usize, + owner: usize, + ) -> PoolGuard<'_, T, F> { + if owner == THREAD_ID_UNOWNED { + // This sentinel means this pool is not yet owned. We try to + // atomically set the owner. If we do, then this thread becomes + // the owner and we can return a guard that represents the + // special T for the owner. + // + // Note that we set the owner to a different sentinel that + // indicates that the owned value is in use. The owner ID will + // get updated to the actual ID of this thread once the guard + // returned by this function is put back into the pool. + let res = self.owner.compare_exchange( + THREAD_ID_UNOWNED, + THREAD_ID_INUSE, + Ordering::AcqRel, + Ordering::Acquire, + ); + if res.is_ok() { + // SAFETY: A successful CAS above implies this thread is + // the owner and that this is the only such thread that + // can reach here. Thus, there is no data race. + unsafe { + *self.owner_val.get() = Some((self.create)()); + } + return self.guard_owned(caller); + } + } + let stack_id = caller % self.stacks.len(); + // We try to acquire exclusive access to this thread's stack, and + // if so, grab a value from it if we can. We put this in a loop so + // that it's easy to tweak and experiment with a different number + // of tries. In the end, I couldn't see anything obviously better + // than one attempt in ad hoc testing. + for _ in 0..1 { + let mut stack = match self.stacks[stack_id].0.try_lock() { + Err(_) => continue, + Ok(stack) => stack, + }; + if let Some(value) = stack.pop() { + return self.guard_stack(value); + } + // Unlock the mutex guarding the stack before creating a fresh + // value since we no longer need the stack. + drop(stack); + let value = Box::new((self.create)()); + return self.guard_stack(value); + } + // We're only here if we could get access to our stack, so just + // create a new value. This seems like it could be wasteful, but + // waiting for exclusive access to a stack when there's high + // contention is brutal for perf. + self.guard_stack_transient(Box::new((self.create)())) + } + + /// Puts a value back into the pool. Callers don't need to call this. + /// Once the guard that's returned by 'get' is dropped, it is put back + /// into the pool automatically. + #[inline] + fn put_value(&self, value: Box) { + let caller = THREAD_ID.with(|id| *id); + let stack_id = caller % self.stacks.len(); + // As with trying to pop a value from this thread's stack, we + // merely attempt to get access to push this value back on the + // stack. If there's too much contention, we just give up and throw + // the value away. + // + // Interestingly, in ad hoc benchmarking, it is beneficial to + // attempt to push the value back more than once, unlike when + // popping the value. I don't have a good theory for why this is. + // I guess if we drop too many values then that winds up forcing + // the pop operation to create new fresh values and thus leads to + // less reuse. There's definitely a balancing act here. + for _ in 0..10 { + let mut stack = match self.stacks[stack_id].0.try_lock() { + Err(_) => continue, + Ok(stack) => stack, + }; + stack.push(value); + return; + } + } + + /// Create a guard that represents the special owned T. + #[inline] + fn guard_owned(&self, caller: usize) -> PoolGuard<'_, T, F> { + PoolGuard { pool: self, value: Err(caller), discard: false } + } + + /// Create a guard that contains a value from the pool's stack. + #[inline] + fn guard_stack(&self, value: Box) -> PoolGuard<'_, T, F> { + PoolGuard { pool: self, value: Ok(value), discard: false } + } + + /// Create a guard that contains a value from the pool's stack with an + /// instruction to throw away the value instead of putting it back + /// into the pool. + #[inline] + fn guard_stack_transient(&self, value: Box) -> PoolGuard<'_, T, F> { + PoolGuard { pool: self, value: Ok(value), discard: true } + } + } + + impl core::fmt::Debug for Pool { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("Pool") + .field("stacks", &self.stacks) + .field("owner", &self.owner) + .field("owner_val", &self.owner_val) + .finish() + } + } + + /// A guard that is returned when a caller requests a value from the pool. + pub(super) struct PoolGuard<'a, T: Send, F: Fn() -> T> { + /// The pool that this guard is attached to. + pool: &'a Pool, + /// This is Err when the guard represents the special "owned" value. + /// In which case, the value is retrieved from 'pool.owner_val'. And + /// in the special case of `Err(THREAD_ID_DROPPED)`, it means the + /// guard has been put back into the pool and should no longer be used. + value: Result, usize>, + /// When true, the value should be discarded instead of being pushed + /// back into the pool. We tend to use this under high contention, and + /// this allows us to avoid inflating the size of the pool. (Because + /// under contention, we tend to create more values instead of waiting + /// for access to a stack of existing values.) + discard: bool, + } + + impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> { + /// Return the underlying value. + #[inline] + pub(super) fn value(&self) -> &T { + match self.value { + Ok(ref v) => &**v, + // SAFETY: This is safe because the only way a PoolGuard gets + // created for self.value=Err is when the current thread + // corresponds to the owning thread, of which there can only + // be one. Thus, we are guaranteed to be providing exclusive + // access here which makes this safe. + // + // Also, since 'owner_val' is guaranteed to be initialized + // before an owned PoolGuard is created, the unchecked unwrap + // is safe. + Err(id) => unsafe { + // This assert is *not* necessary for safety, since we + // should never be here if the guard had been put back into + // the pool. This is a sanity check to make sure we didn't + // break an internal invariant. + debug_assert_ne!(THREAD_ID_DROPPED, id); + (*self.pool.owner_val.get()).as_ref().unwrap_unchecked() + }, + } + } + + /// Return the underlying value as a mutable borrow. + #[inline] + pub(super) fn value_mut(&mut self) -> &mut T { + match self.value { + Ok(ref mut v) => &mut **v, + // SAFETY: This is safe because the only way a PoolGuard gets + // created for self.value=None is when the current thread + // corresponds to the owning thread, of which there can only + // be one. Thus, we are guaranteed to be providing exclusive + // access here which makes this safe. + // + // Also, since 'owner_val' is guaranteed to be initialized + // before an owned PoolGuard is created, the unwrap_unchecked + // is safe. + Err(id) => unsafe { + // This assert is *not* necessary for safety, since we + // should never be here if the guard had been put back into + // the pool. This is a sanity check to make sure we didn't + // break an internal invariant. + debug_assert_ne!(THREAD_ID_DROPPED, id); + (*self.pool.owner_val.get()).as_mut().unwrap_unchecked() + }, + } + } + + /// Consumes this guard and puts it back into the pool. + #[inline] + pub(super) fn put(this: PoolGuard<'_, T, F>) { + // Since this is effectively consuming the guard and putting the + // value back into the pool, there's no reason to run its Drop + // impl after doing this. I don't believe there is a correctness + // problem with doing so, but there's definitely a perf problem + // by redoing this work. So we avoid it. + let mut this = core::mem::ManuallyDrop::new(this); + this.put_imp(); + } + + /// Puts this guard back into the pool by only borrowing the guard as + /// mutable. This should be called at most once. + #[inline(always)] + fn put_imp(&mut self) { + match core::mem::replace(&mut self.value, Err(THREAD_ID_DROPPED)) { + Ok(value) => { + // If we were told to discard this value then don't bother + // trying to put it back into the pool. This occurs when + // the pop operation failed to acquire a lock and we + // decided to create a new value in lieu of contending for + // the lock. + if self.discard { + return; + } + self.pool.put_value(value); + } + // If this guard has a value "owned" by the thread, then + // the Pool guarantees that this is the ONLY such guard. + // Therefore, in order to place it back into the pool and make + // it available, we need to change the owner back to the owning + // thread's ID. But note that we use the ID that was stored in + // the guard, since a guard can be moved to another thread and + // dropped. (A previous iteration of this code read from the + // THREAD_ID thread local, which uses the ID of the current + // thread which may not be the ID of the owning thread! This + // also avoids the TLS access, which is likely a hair faster.) + Err(owner) => { + // If we hit this point, it implies 'put_imp' has been + // called multiple times for the same guard which in turn + // corresponds to a bug in this implementation. + assert_ne!(THREAD_ID_DROPPED, owner); + self.pool.owner.store(owner, Ordering::Release); + } + } + } + } + + impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> { + #[inline] + fn drop(&mut self) { + self.put_imp(); + } + } + + impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug + for PoolGuard<'a, T, F> + { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("PoolGuard") + .field("pool", &self.pool) + .field("value", &self.value) + .finish() + } + } +} + +// FUTURE: We should consider using Mara Bos's nearly-lock-free version of this +// here: https://gist.github.com/m-ou-se/5fdcbdf7dcf4585199ce2de697f367a4. +// +// One reason why I did things with a "mutex" below is that it isolates the +// safety concerns to just the Mutex, where as the safety of Mara's pool is a +// bit more sprawling. I also expect this code to not be used that much, and +// so is unlikely to get as much real world usage with which to test it. That +// means the "obviously correct" lever is an important one. +// +// The specific reason to use Mara's pool is that it is likely faster and also +// less likely to hit problems with spin-locks, although it is not completely +// impervious to them. +// +// The best solution to this problem, probably, is a truly lock free pool. That +// could be done with a lock free linked list. The issue is the ABA problem. It +// is difficult to avoid, and doing so is complex. BUT, the upshot of that is +// that if we had a truly lock free pool, then we could also use it above in +// the 'std' pool instead of a Mutex because it should be completely free the +// problems that come from spin-locks. +#[cfg(not(feature = "std"))] +mod inner { + use core::{ + cell::UnsafeCell, + panic::{RefUnwindSafe, UnwindSafe}, + sync::atomic::{AtomicBool, Ordering}, + }; + + use alloc::{boxed::Box, vec, vec::Vec}; + + /// A thread safe pool utilizing alloc-only features. + /// + /// Unlike the std version, it doesn't seem possible(?) to implement the + /// "thread owner" optimization because alloc-only doesn't have any concept + /// of threads. So the best we can do is just a normal stack. This will + /// increase latency in alloc-only environments. + pub(super) struct Pool { + /// A stack of T values to hand out. These are used when a Pool is + /// accessed by a thread that didn't create it. + stack: Mutex>>, + /// A function to create more T values when stack is empty and a caller + /// has requested a T. + create: F, + } + + // If T is UnwindSafe, then since we provide exclusive access to any + // particular value in the pool, it should therefore also be considered + // RefUnwindSafe. + impl RefUnwindSafe for Pool {} + + impl Pool { + /// Create a new pool. The given closure is used to create values in + /// the pool when necessary. + pub(super) const fn new(create: F) -> Pool { + Pool { stack: Mutex::new(vec![]), create } + } + } + + impl T> Pool { + /// Get a value from the pool. This may block if another thread is also + /// attempting to retrieve a value from the pool. + #[inline] + pub(super) fn get(&self) -> PoolGuard<'_, T, F> { + let mut stack = self.stack.lock(); + let value = match stack.pop() { + None => Box::new((self.create)()), + Some(value) => value, + }; + PoolGuard { pool: self, value: Some(value) } + } + + #[inline] + fn put(&self, guard: PoolGuard<'_, T, F>) { + let mut guard = core::mem::ManuallyDrop::new(guard); + if let Some(value) = guard.value.take() { + self.put_value(value); + } + } + + /// Puts a value back into the pool. Callers don't need to call this. + /// Once the guard that's returned by 'get' is dropped, it is put back + /// into the pool automatically. + #[inline] + fn put_value(&self, value: Box) { + let mut stack = self.stack.lock(); + stack.push(value); + } + } + + impl core::fmt::Debug for Pool { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("Pool").field("stack", &self.stack).finish() + } + } + + /// A guard that is returned when a caller requests a value from the pool. + pub(super) struct PoolGuard<'a, T: Send, F: Fn() -> T> { + /// The pool that this guard is attached to. + pool: &'a Pool, + /// This is None after the guard has been put back into the pool. + value: Option>, + } + + impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> { + /// Return the underlying value. + #[inline] + pub(super) fn value(&self) -> &T { + self.value.as_deref().unwrap() + } + + /// Return the underlying value as a mutable borrow. + #[inline] + pub(super) fn value_mut(&mut self) -> &mut T { + self.value.as_deref_mut().unwrap() + } + + /// Consumes this guard and puts it back into the pool. + #[inline] + pub(super) fn put(this: PoolGuard<'_, T, F>) { + // Since this is effectively consuming the guard and putting the + // value back into the pool, there's no reason to run its Drop + // impl after doing this. I don't believe there is a correctness + // problem with doing so, but there's definitely a perf problem + // by redoing this work. So we avoid it. + let mut this = core::mem::ManuallyDrop::new(this); + this.put_imp(); + } + + /// Puts this guard back into the pool by only borrowing the guard as + /// mutable. This should be called at most once. + #[inline(always)] + fn put_imp(&mut self) { + if let Some(value) = self.value.take() { + self.pool.put_value(value); + } + } + } + + impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> { + #[inline] + fn drop(&mut self) { + self.put_imp(); + } + } + + impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug + for PoolGuard<'a, T, F> + { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("PoolGuard") + .field("pool", &self.pool) + .field("value", &self.value) + .finish() + } + } + + /// A spin-lock based mutex. Yes, I have read spinlocks cosnidered + /// harmful[1], and if there's a reasonable alternative choice, I'll + /// happily take it. + /// + /// I suspect the most likely alternative here is a Treiber stack, but + /// implementing one correctly in a way that avoids the ABA problem looks + /// subtle enough that I'm not sure I want to attempt that. But otherwise, + /// we only need a mutex in order to implement our pool, so if there's + /// something simpler we can use that works for our `Pool` use case, then + /// that would be great. + /// + /// Note that this mutex does not do poisoning. + /// + /// [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html + #[derive(Debug)] + struct Mutex { + locked: AtomicBool, + data: UnsafeCell, + } + + // SAFETY: Since a Mutex guarantees exclusive access, as long as we can + // send it across threads, it must also be Sync. + unsafe impl Sync for Mutex {} + + impl Mutex { + /// Create a new mutex for protecting access to the given value across + /// multiple threads simultaneously. + const fn new(value: T) -> Mutex { + Mutex { + locked: AtomicBool::new(false), + data: UnsafeCell::new(value), + } + } + + /// Lock this mutex and return a guard providing exclusive access to + /// `T`. This blocks if some other thread has already locked this + /// mutex. + #[inline] + fn lock(&self) -> MutexGuard<'_, T> { + while self + .locked + .compare_exchange( + false, + true, + Ordering::AcqRel, + Ordering::Acquire, + ) + .is_err() + { + core::hint::spin_loop(); + } + // SAFETY: The only way we're here is if we successfully set + // 'locked' to true, which implies we must be the only thread here + // and thus have exclusive access to 'data'. + let data = unsafe { &mut *self.data.get() }; + MutexGuard { locked: &self.locked, data } + } + } + + /// A guard that derefs to &T and &mut T. When it's dropped, the lock is + /// released. + #[derive(Debug)] + struct MutexGuard<'a, T> { + locked: &'a AtomicBool, + data: &'a mut T, + } + + impl<'a, T> core::ops::Deref for MutexGuard<'a, T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + self.data + } + } + + impl<'a, T> core::ops::DerefMut for MutexGuard<'a, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + self.data + } + } + + impl<'a, T> Drop for MutexGuard<'a, T> { + #[inline] + fn drop(&mut self) { + // Drop means 'data' is no longer accessible, so we can unlock + // the mutex. + self.locked.store(false, Ordering::Release); + } + } +} + +#[cfg(test)] +mod tests { + use core::panic::{RefUnwindSafe, UnwindSafe}; + + use alloc::{boxed::Box, vec, vec::Vec}; + + use super::*; + + #[test] + fn oibits() { + fn assert_oitbits() {} + assert_oitbits::>>(); + assert_oitbits::>>>(); + assert_oitbits::< + Pool< + Vec, + Box< + dyn Fn() -> Vec + + Send + + Sync + + UnwindSafe + + RefUnwindSafe, + >, + >, + >(); + } + + // Tests that Pool implements the "single owner" optimization. That is, the + // thread that first accesses the pool gets its own copy, while all other + // threads get distinct copies. + #[cfg(feature = "std")] + #[test] + fn thread_owner_optimization() { + use std::{cell::RefCell, sync::Arc, vec}; + + let pool: Arc>>> = + Arc::new(Pool::new(|| RefCell::new(vec!['a']))); + pool.get().borrow_mut().push('x'); + + let pool1 = pool.clone(); + let t1 = std::thread::spawn(move || { + let guard = pool1.get(); + guard.borrow_mut().push('y'); + }); + + let pool2 = pool.clone(); + let t2 = std::thread::spawn(move || { + let guard = pool2.get(); + guard.borrow_mut().push('z'); + }); + + t1.join().unwrap(); + t2.join().unwrap(); + + // If we didn't implement the single owner optimization, then one of + // the threads above is likely to have mutated the [a, x] vec that + // we stuffed in the pool before spawning the threads. But since + // neither thread was first to access the pool, and because of the + // optimization, we should be guaranteed that neither thread mutates + // the special owned pool value. + // + // (Technically this is an implementation detail and not a contract of + // Pool's API.) + assert_eq!(vec!['a', 'x'], *pool.get().borrow()); + } + + // This tests that if the "owner" of a pool asks for two values, then it + // gets two distinct values and not the same one. This test failed in the + // course of developing the pool, which in turn resulted in UB because it + // permitted getting aliasing &mut borrows to the same place in memory. + #[test] + fn thread_owner_distinct() { + let pool = Pool::new(|| vec!['a']); + + { + let mut g1 = pool.get(); + let v1 = &mut *g1; + let mut g2 = pool.get(); + let v2 = &mut *g2; + v1.push('b'); + v2.push('c'); + assert_eq!(&mut vec!['a', 'b'], v1); + assert_eq!(&mut vec!['a', 'c'], v2); + } + // This isn't technically guaranteed, but we + // expect to now get the "owned" value (the first + // call to 'get()' above) now that it's back in + // the pool. + assert_eq!(&mut vec!['a', 'b'], &mut *pool.get()); + } + + // This tests that we can share a guard with another thread, mutate the + // underlying value and everything works. This failed in the course of + // developing a pool since the pool permitted 'get()' to return the same + // value to the owner thread, even before the previous value was put back + // into the pool. This in turn resulted in this test producing a data race. + #[cfg(feature = "std")] + #[test] + fn thread_owner_sync() { + let pool = Pool::new(|| vec!['a']); + { + let mut g1 = pool.get(); + let mut g2 = pool.get(); + std::thread::scope(|s| { + s.spawn(|| { + g1.push('b'); + }); + s.spawn(|| { + g2.push('c'); + }); + }); + + let v1 = &mut *g1; + let v2 = &mut *g2; + assert_eq!(&mut vec!['a', 'b'], v1); + assert_eq!(&mut vec!['a', 'c'], v2); + } + + // This isn't technically guaranteed, but we + // expect to now get the "owned" value (the first + // call to 'get()' above) now that it's back in + // the pool. + assert_eq!(&mut vec!['a', 'b'], &mut *pool.get()); + } + + // This tests that if we move a PoolGuard that is owned by the current + // thread to another thread and drop it, then the thread owner doesn't + // change. During development of the pool, this test failed because the + // PoolGuard assumed it was dropped in the same thread from which it was + // created, and thus used the current thread's ID as the owner, which could + // be different than the actual owner of the pool. + #[cfg(feature = "std")] + #[test] + fn thread_owner_send_drop() { + let pool = Pool::new(|| vec!['a']); + // Establishes this thread as the owner. + { + pool.get().push('b'); + } + std::thread::scope(|s| { + // Sanity check that we get the same value back. + // (Not technically guaranteed.) + let mut g = pool.get(); + assert_eq!(&vec!['a', 'b'], &*g); + // Now push it to another thread and drop it. + s.spawn(move || { + g.push('c'); + }) + .join() + .unwrap(); + }); + // Now check that we're still the owner. This is not technically + // guaranteed by the API, but is true in practice given the thread + // owner optimization. + assert_eq!(&vec!['a', 'b', 'c'], &*pool.get()); + } +} diff --git a/vendor/regex-automata/src/util/prefilter/aho_corasick.rs b/vendor/regex-automata/src/util/prefilter/aho_corasick.rs new file mode 100644 index 0000000..50cce82 --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/aho_corasick.rs @@ -0,0 +1,149 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct AhoCorasick { + #[cfg(not(feature = "perf-literal-multisubstring"))] + _unused: (), + #[cfg(feature = "perf-literal-multisubstring")] + ac: aho_corasick::AhoCorasick, +} + +impl AhoCorasick { + pub(crate) fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + None + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // We used to use `aho_corasick::MatchKind::Standard` here when + // `kind` was `MatchKind::All`, but this is not correct. The + // "standard" Aho-Corasick match semantics are to report a match + // immediately as soon as it is seen, but `All` isn't like that. + // In particular, with "standard" semantics, given the needles + // "abc" and "b" and the haystack "abc," it would report a match + // at offset 1 before a match at offset 0. This is never what we + // want in the context of the regex engine, regardless of whether + // we have leftmost-first or 'all' semantics. Namely, we always + // want the leftmost match. + let ac_match_kind = match kind { + MatchKind::LeftmostFirst | MatchKind::All => { + aho_corasick::MatchKind::LeftmostFirst + } + }; + // This is kind of just an arbitrary number, but basically, if we + // have a small enough set of literals, then we try to use the VERY + // memory hungry DFA. Otherwise, we whimp out and use an NFA. The + // upshot is that the NFA is quite lean and decently fast. Faster + // than a naive Aho-Corasick NFA anyway. + let ac_kind = if needles.len() <= 500 { + aho_corasick::AhoCorasickKind::DFA + } else { + aho_corasick::AhoCorasickKind::ContiguousNFA + }; + let result = aho_corasick::AhoCorasick::builder() + .kind(Some(ac_kind)) + .match_kind(ac_match_kind) + .start_kind(aho_corasick::StartKind::Both) + // We try to handle all of the prefilter cases in the super + // module, and only use Aho-Corasick for the actual automaton. + // The aho-corasick crate does have some extra prefilters, + // namely, looking for rare bytes to feed to memchr{,2,3} + // instead of just the first byte. If we end up wanting + // those---and they are somewhat tricky to implement---then + // we could port them to this crate. + // + // The main reason for doing things this way is so we have a + // complete and easy to understand picture of which prefilters + // are available and how they work. Otherwise it seems too + // easy to get into a situation where we have a prefilter + // layered on top of prefilter, and that might have unintended + // consequences. + .prefilter(false) + .build(needles); + let ac = match result { + Ok(ac) => ac, + Err(_err) => { + debug!("aho-corasick prefilter failed to build: {}", _err); + return None; + } + }; + Some(AhoCorasick { ac }) + } + } +} + +impl PrefilterI for AhoCorasick { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let input = + aho_corasick::Input::new(haystack).span(span.start..span.end); + self.ac + .find(input) + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let input = aho_corasick::Input::new(haystack) + .anchored(aho_corasick::Anchored::Yes) + .span(span.start..span.end); + self.ac + .find(input) + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn memory_usage(&self) -> usize { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + self.ac.memory_usage() + } + } + + fn is_fast(&self) -> bool { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // Aho-Corasick is never considered "fast" because it's never + // going to be even close to an order of magnitude faster than the + // regex engine itself (assuming a DFA is used). In fact, it is + // usually slower. The magic of Aho-Corasick is that it can search + // a *large* number of literals with a relatively small amount of + // memory. The regex engines are far more wasteful. + // + // Aho-Corasick may be "fast" when the regex engine corresponds + // to, say, the PikeVM. That happens when the lazy DFA couldn't be + // built or used for some reason. But in these cases, the regex + // itself is likely quite big and we're probably hosed no matter + // what we do. (In this case, the best bet is for the caller to + // increase some of the memory limits on the hybrid cache capacity + // and hope that's enough.) + false + } + } +} diff --git a/vendor/regex-automata/src/util/prefilter/byteset.rs b/vendor/regex-automata/src/util/prefilter/byteset.rs new file mode 100644 index 0000000..a669d6c --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/byteset.rs @@ -0,0 +1,58 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct ByteSet([bool; 256]); + +impl ByteSet { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + None + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let mut set = [false; 256]; + for needle in needles.iter() { + let needle = needle.as_ref(); + if needle.len() != 1 { + return None; + } + set[usize::from(needle[0])] = true; + } + Some(ByteSet(set)) + } + } +} + +impl PrefilterI for ByteSet { + fn find(&self, haystack: &[u8], span: Span) -> Option { + haystack[span].iter().position(|&b| self.0[usize::from(b)]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0[usize::from(b)] { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + false + } +} diff --git a/vendor/regex-automata/src/util/prefilter/memchr.rs b/vendor/regex-automata/src/util/prefilter/memchr.rs new file mode 100644 index 0000000..3d44b83 --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/memchr.rs @@ -0,0 +1,186 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct Memchr(u8); + +impl Memchr { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + None + } + #[cfg(feature = "perf-literal-substring")] + { + if needles.len() != 1 { + return None; + } + if needles[0].as_ref().len() != 1 { + return None; + } + Some(Memchr(needles[0].as_ref()[0])) + } + } +} + +impl PrefilterI for Memchr { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-substring")] + { + memchr::memchr(self.0, &haystack[span]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0 == b { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + true + } +} + +#[derive(Clone, Debug)] +pub(crate) struct Memchr2(u8, u8); + +impl Memchr2 { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + None + } + #[cfg(feature = "perf-literal-substring")] + { + if needles.len() != 2 { + return None; + } + if !needles.iter().all(|n| n.as_ref().len() == 1) { + return None; + } + let b1 = needles[0].as_ref()[0]; + let b2 = needles[1].as_ref()[0]; + Some(Memchr2(b1, b2)) + } + } +} + +impl PrefilterI for Memchr2 { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-substring")] + { + memchr::memchr2(self.0, self.1, &haystack[span]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0 == b || self.1 == b { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + true + } +} + +#[derive(Clone, Debug)] +pub(crate) struct Memchr3(u8, u8, u8); + +impl Memchr3 { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + None + } + #[cfg(feature = "perf-literal-substring")] + { + if needles.len() != 3 { + return None; + } + if !needles.iter().all(|n| n.as_ref().len() == 1) { + return None; + } + let b1 = needles[0].as_ref()[0]; + let b2 = needles[1].as_ref()[0]; + let b3 = needles[2].as_ref()[0]; + Some(Memchr3(b1, b2, b3)) + } + } +} + +impl PrefilterI for Memchr3 { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-substring")] + { + memchr::memchr3(self.0, self.1, self.2, &haystack[span]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0 == b || self.1 == b || self.2 == b { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + true + } +} diff --git a/vendor/regex-automata/src/util/prefilter/memmem.rs b/vendor/regex-automata/src/util/prefilter/memmem.rs new file mode 100644 index 0000000..deea17b --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/memmem.rs @@ -0,0 +1,88 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct Memmem { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + _unused: (), + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + finder: memchr::memmem::Finder<'static>, +} + +impl Memmem { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + None + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + if needles.len() != 1 { + return None; + } + let needle = needles[0].as_ref(); + let finder = memchr::memmem::Finder::new(needle).into_owned(); + Some(Memmem { finder }) + } + } +} + +impl PrefilterI for Memmem { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + self.finder.find(&haystack[span]).map(|i| { + let start = span.start + i; + let end = start + self.finder.needle().len(); + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + let needle = self.finder.needle(); + if haystack[span].starts_with(needle) { + Some(Span { end: span.start + needle.len(), ..span }) + } else { + None + } + } + } + + fn memory_usage(&self) -> usize { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + self.finder.needle().len() + } + } + + fn is_fast(&self) -> bool { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + true + } + } +} diff --git a/vendor/regex-automata/src/util/prefilter/mod.rs b/vendor/regex-automata/src/util/prefilter/mod.rs new file mode 100644 index 0000000..51fc922 --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/mod.rs @@ -0,0 +1,696 @@ +/*! +Defines a prefilter for accelerating regex searches. + +A prefilter can be created by building a [`Prefilter`] value. + +A prefilter represents one of the most important optimizations available for +accelerating regex searches. The idea of a prefilter is to very quickly find +candidate locations in a haystack where a regex _could_ match. Once a candidate +is found, it is then intended for the regex engine to run at that position to +determine whether the candidate is a match or a false positive. + +In the aforementioned description of the prefilter optimization also lay its +demise. Namely, if a prefilter has a high false positive rate and it produces +lots of candidates, then a prefilter can overall make a regex search slower. +It can run more slowly because more time is spent ping-ponging between the +prefilter search and the regex engine attempting to confirm each candidate as +a match. This ping-ponging has overhead that adds up, and is exacerbated by +a high false positive rate. + +Nevertheless, the optimization is still generally worth performing in most +cases. Particularly given just how much throughput can be improved. (It is not +uncommon for prefilter optimizations to improve throughput by one or two orders +of magnitude.) + +Typically a prefilter is used to find occurrences of literal prefixes from a +regex pattern, but this isn't required. A prefilter can be used to look for +suffixes or even inner literals. + +Note that as of now, prefilters throw away information about which pattern +each literal comes from. In other words, when a prefilter finds a match, +there's no way to know which pattern (or patterns) it came from. Therefore, +in order to confirm a match, you'll have to check all of the patterns by +running the full regex engine. +*/ + +mod aho_corasick; +mod byteset; +mod memchr; +mod memmem; +mod teddy; + +use core::{ + borrow::Borrow, + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +#[cfg(feature = "alloc")] +use alloc::sync::Arc; + +#[cfg(feature = "syntax")] +use regex_syntax::hir::{literal, Hir}; + +use crate::util::search::{MatchKind, Span}; + +pub(crate) use crate::util::prefilter::{ + aho_corasick::AhoCorasick, + byteset::ByteSet, + memchr::{Memchr, Memchr2, Memchr3}, + memmem::Memmem, + teddy::Teddy, +}; + +/// A prefilter for accelerating regex searches. +/// +/// If you already have your literals that you want to search with, +/// then the vanilla [`Prefilter::new`] constructor is for you. But +/// if you have an [`Hir`] value from the `regex-syntax` crate, then +/// [`Prefilter::from_hir_prefix`] might be more convenient. Namely, it uses +/// the [`regex-syntax::hir::literal`](regex_syntax::hir::literal) module to +/// extract literal prefixes for you, optimize them and then select and build a +/// prefilter matcher. +/// +/// A prefilter must have **zero false negatives**. However, by its very +/// nature, it may produce false positives. That is, a prefilter will never +/// skip over a position in the haystack that corresponds to a match of the +/// original regex pattern, but it *may* produce a match for a position +/// in the haystack that does *not* correspond to a match of the original +/// regex pattern. If you use either the [`Prefilter::from_hir_prefix`] or +/// [`Prefilter::from_hirs_prefix`] constructors, then this guarantee is +/// upheld for you automatically. This guarantee is not preserved if you use +/// [`Prefilter::new`] though, since it is up to the caller to provide correct +/// literal strings with respect to the original regex pattern. +/// +/// # Cloning +/// +/// It is an API guarantee that cloning a prefilter is cheap. That is, cloning +/// it will not duplicate whatever heap memory is used to represent the +/// underlying matcher. +/// +/// # Example +/// +/// This example shows how to attach a `Prefilter` to the +/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) in order to accelerate +/// searches. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::prefilter::Prefilter, +/// Match, MatchKind, +/// }; +/// +/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Bruce "]) +/// .expect("a prefilter"); +/// let re = PikeVM::builder() +/// .configure(PikeVM::config().prefilter(Some(pre))) +/// .build(r"Bruce \w+")?; +/// let mut cache = re.create_cache(); +/// assert_eq!( +/// Some(Match::must(0, 6..23)), +/// re.find(&mut cache, "Hello Bruce Springsteen!"), +/// ); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// But note that if you get your prefilter incorrect, it could lead to an +/// incorrect result! +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::prefilter::Prefilter, +/// Match, MatchKind, +/// }; +/// +/// // This prefilter is wrong! +/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Patti "]) +/// .expect("a prefilter"); +/// let re = PikeVM::builder() +/// .configure(PikeVM::config().prefilter(Some(pre))) +/// .build(r"Bruce \w+")?; +/// let mut cache = re.create_cache(); +/// // We find no match even though the regex does match. +/// assert_eq!( +/// None, +/// re.find(&mut cache, "Hello Bruce Springsteen!"), +/// ); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Prefilter { + #[cfg(not(feature = "alloc"))] + _unused: (), + #[cfg(feature = "alloc")] + pre: Arc, + #[cfg(feature = "alloc")] + is_fast: bool, +} + +impl Prefilter { + /// Create a new prefilter from a sequence of needles and a corresponding + /// match semantics. + /// + /// This may return `None` for a variety of reasons, for example, if + /// a suitable prefilter could not be constructed. That might occur + /// if they are unavailable (e.g., the `perf-literal-substring` and + /// `perf-literal-multisubstring` features aren't enabled), or it might + /// occur because of heuristics or other artifacts of how the prefilter + /// works. + /// + /// Note that if you have an [`Hir`] expression, it may be more convenient + /// to use [`Prefilter::from_hir_prefix`]. It will automatically handle the + /// task of extracting prefix literals for you. + /// + /// # Example + /// + /// This example shows how match semantics can impact the matching + /// algorithm used by the prefilter. For this reason, it is important to + /// ensure that the match semantics given here are consistent with the + /// match semantics intended for the regular expression that the literals + /// were extracted from. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hay = "Hello samwise"; + /// + /// // With leftmost-first, we find 'samwise' here because it comes + /// // before 'sam' in the sequence we give it.. + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["samwise", "sam"]) + /// .expect("a prefilter"); + /// assert_eq!( + /// Some(Span::from(6..13)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// // Still with leftmost-first but with the literals reverse, now 'sam' + /// // will match instead! + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["sam", "samwise"]) + /// .expect("a prefilter"); + /// assert_eq!( + /// Some(Span::from(6..9)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + Choice::new(kind, needles).and_then(Prefilter::from_choice) + } + + /// This turns a prefilter selection into a `Prefilter`. That is, in turns + /// the enum given into a trait object. + fn from_choice(choice: Choice) -> Option { + #[cfg(not(feature = "alloc"))] + { + None + } + #[cfg(feature = "alloc")] + { + let pre: Arc = match choice { + Choice::Memchr(p) => Arc::new(p), + Choice::Memchr2(p) => Arc::new(p), + Choice::Memchr3(p) => Arc::new(p), + Choice::Memmem(p) => Arc::new(p), + Choice::Teddy(p) => Arc::new(p), + Choice::ByteSet(p) => Arc::new(p), + Choice::AhoCorasick(p) => Arc::new(p), + }; + let is_fast = pre.is_fast(); + Some(Prefilter { pre, is_fast }) + } + } + + /// This attempts to extract prefixes from the given `Hir` expression for + /// the given match semantics, and if possible, builds a prefilter for + /// them. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from an [`Hir`] + /// expression, and use to find an occurrence of a prefix from the regex + /// pattern. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hir = syntax::parse(r"(Bruce|Patti) \w+")?; + /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir) + /// .expect("a prefilter"); + /// let hay = "Hello Patti Scialfa!"; + /// assert_eq!( + /// Some(Span::from(6..12)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn from_hir_prefix(kind: MatchKind, hir: &Hir) -> Option { + Prefilter::from_hirs_prefix(kind, &[hir]) + } + + /// This attempts to extract prefixes from the given `Hir` expressions for + /// the given match semantics, and if possible, builds a prefilter for + /// them. + /// + /// Note that as of now, prefilters throw away information about which + /// pattern each literal comes from. In other words, when a prefilter finds + /// a match, there's no way to know which pattern (or patterns) it came + /// from. Therefore, in order to confirm a match, you'll have to check all + /// of the patterns by running the full regex engine. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from multiple + /// `Hir` expressions expression, and use it to find an occurrence of a + /// prefix from the regex patterns. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hirs = syntax::parse_many(&[ + /// r"(Bruce|Patti) \w+", + /// r"Mrs?\. Doubtfire", + /// ])?; + /// let pre = Prefilter::from_hirs_prefix(MatchKind::LeftmostFirst, &hirs) + /// .expect("a prefilter"); + /// let hay = "Hello Mrs. Doubtfire"; + /// assert_eq!( + /// Some(Span::from(6..20)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn from_hirs_prefix>( + kind: MatchKind, + hirs: &[H], + ) -> Option { + prefixes(kind, hirs) + .literals() + .and_then(|lits| Prefilter::new(kind, lits)) + } + + /// Run this prefilter on `haystack[span.start..end]` and return a matching + /// span if one exists. + /// + /// The span returned is guaranteed to have a start position greater than + /// or equal to the one given, and an end position less than or equal to + /// the one given. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from an [`Hir`] + /// expression, and use it to find an occurrence of a prefix from the regex + /// pattern. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hir = syntax::parse(r"Bruce \w+")?; + /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir) + /// .expect("a prefilter"); + /// let hay = "Hello Bruce Springsteen!"; + /// assert_eq!( + /// Some(Span::from(6..12)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.pre.find(haystack, span) + } + } + + /// Returns the span of a prefix of `haystack[span.start..span.end]` if + /// the prefilter matches. + /// + /// The span returned is guaranteed to have a start position equivalent to + /// the one given, and an end position less than or equal to the one given. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from an [`Hir`] + /// expression, and use it to find an occurrence of a prefix from the regex + /// pattern that begins at the start of a haystack only. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hir = syntax::parse(r"Bruce \w+")?; + /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir) + /// .expect("a prefilter"); + /// let hay = "Hello Bruce Springsteen!"; + /// // Nothing is found here because 'Bruce' does + /// // not occur at the beginning of our search. + /// assert_eq!( + /// None, + /// pre.prefix(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// // But if we change where we start the search + /// // to begin where 'Bruce ' begins, then a + /// // match will be found. + /// assert_eq!( + /// Some(Span::from(6..12)), + /// pre.prefix(hay.as_bytes(), Span::from(6..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.pre.prefix(haystack, span) + } + } + + /// Returns the heap memory, in bytes, used by the underlying prefilter. + #[inline] + pub fn memory_usage(&self) -> usize { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.pre.memory_usage() + } + } + + /// Implementations might return true here if they believe themselves to + /// be "fast." The concept of "fast" is deliberately left vague, but in + /// practice this usually corresponds to whether it's believed that SIMD + /// will be used. + /// + /// Why do we care about this? Well, some prefilter tricks tend to come + /// with their own bits of overhead, and so might only make sense if we + /// know that a scan will be *much* faster than the regex engine itself. + /// Otherwise, the trick may not be worth doing. Whether something is + /// "much" faster than the regex engine generally boils down to whether + /// SIMD is used. (But not always. Even a SIMD matcher with a high false + /// positive rate can become quite slow.) + /// + /// Even if this returns true, it is still possible for the prefilter to + /// be "slow." Remember, prefilters are just heuristics. We can't really + /// *know* a prefilter will be fast without actually trying the prefilter. + /// (Which of course we cannot afford to do.) + #[inline] + pub(crate) fn is_fast(&self) -> bool { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.is_fast + } + } +} + +/// A trait for abstracting over prefilters. Basically, a prefilter is +/// something that do an unanchored *and* an anchored search in a haystack +/// within a given span. +/// +/// This exists pretty much only so that we can use prefilters as a trait +/// object (which is what `Prefilter` is). If we ever move off of trait objects +/// and to an enum, then it's likely this trait could be removed. +pub(crate) trait PrefilterI: + Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static +{ + /// Run this prefilter on `haystack[span.start..end]` and return a matching + /// span if one exists. + /// + /// The span returned is guaranteed to have a start position greater than + /// or equal to the one given, and an end position less than or equal to + /// the one given. + fn find(&self, haystack: &[u8], span: Span) -> Option; + + /// Returns the span of a prefix of `haystack[span.start..span.end]` if + /// the prefilter matches. + /// + /// The span returned is guaranteed to have a start position equivalent to + /// the one given, and an end position less than or equal to the one given. + fn prefix(&self, haystack: &[u8], span: Span) -> Option; + + /// Returns the heap memory, in bytes, used by the underlying prefilter. + fn memory_usage(&self) -> usize; + + /// Implementations might return true here if they believe themselves to + /// be "fast." See [`Prefilter::is_fast`] for more details. + fn is_fast(&self) -> bool; +} + +#[cfg(feature = "alloc")] +impl PrefilterI for Arc

 .             any character except new line (includes new line with s flag)
+[0-9]         any ASCII digit
 \d            digit (\p{Nd})
 \D            not digit
-\pN           One-letter name Unicode character class
+\pX           Unicode character class identified by a one-letter name
 \p{Greek}     Unicode character class (general category or script)
-\PN           Negated one-letter name Unicode character class
+\PX           Negated Unicode character class identified by a one-letter name
 \P{Greek}     negated Unicode character class (general category or script)
 
@@ -305,27 +663,49 @@ a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax). [0-9--4] Direct subtraction (matching 0-9 except 4) [a-g~~b-h] Symmetric difference (matching `a` and `h` only) [\[\]] Escaping in character classes (matching [ or ]) +[a&&b] An empty character class matching nothing Any named character class may appear inside a bracketed `[...]` character -class. For example, `[\p{Greek}[:digit:]]` matches any Greek or ASCII -digit. `[\p{Greek}&&\pL]` matches Greek letters. +class. For example, `[\p{Greek}[:digit:]]` matches any ASCII digit or any +codepoint in the `Greek` script. `[\p{Greek}&&\pL]` matches Greek letters. Precedence in character classes, from most binding to least: -1. Ranges: `a-cd` == `[a-c]d` -2. Union: `ab&&bc` == `[ab]&&[bc]` -3. Intersection: `^a-z&&b` == `^[a-z&&b]` -4. Negation +1. Ranges: `[a-cd]` == `[[a-c]d]` +2. Union: `[ab&&bc]` == `[[ab]&&[bc]]` +3. Intersection, difference, symmetric difference. All three have equivalent +precedence, and are evaluated in left-to-right order. For example, +`[\pL--\p{Greek}&&\p{Uppercase}]` == `[[\pL--\p{Greek}]&&\p{Uppercase}]`. +4. Negation: `[^a-z&&b]` == `[^[a-z&&b]]`. -## Composites +### Composites
 xy    concatenation (x followed by y)
 x|y   alternation (x or y, prefer x)
 
-## Repetitions +This example shows how an alternation works, and what it means to prefer a +branch in the alternation over subsequent branches. + +``` +use regex::Regex; + +let haystack = "samwise"; +// If 'samwise' comes first in our alternation, then it is +// preferred as a match, even if the regex engine could +// technically detect that 'sam' led to a match earlier. +let re = Regex::new(r"samwise|sam").unwrap(); +assert_eq!("samwise", re.find(haystack).unwrap().as_str()); +// But if 'sam' comes first, then it will match instead. +// In this case, it is impossible for 'samwise' to match +// because 'sam' is a prefix of it. +let re = Regex::new(r"sam|samwise").unwrap(); +assert_eq!("sam", re.find(haystack).unwrap().as_str()); +``` + +### Repetitions
 x*        zero or more of x (greedy)
@@ -342,30 +722,60 @@ x{n,}?    at least n x (ungreedy/lazy)
 x{n}?     exactly n x
 
-## Empty matches +### Empty matches
-^     the beginning of text (or start-of-line with multi-line mode)
-$     the end of text (or end-of-line with multi-line mode)
-\A    only the beginning of text (even with multi-line mode enabled)
-\z    only the end of text (even with multi-line mode enabled)
-\b    a Unicode word boundary (\w on one side and \W, \A, or \z on other)
-\B    not a Unicode word boundary
+^               the beginning of a haystack (or start-of-line with multi-line mode)
+$               the end of a haystack (or end-of-line with multi-line mode)
+\A              only the beginning of a haystack (even with multi-line mode enabled)
+\z              only the end of a haystack (even with multi-line mode enabled)
+\b              a Unicode word boundary (\w on one side and \W, \A, or \z on other)
+\B              not a Unicode word boundary
+\b{start}, \<   a Unicode start-of-word boundary (\W|\A on the left, \w on the right)
+\b{end}, \>     a Unicode end-of-word boundary (\w on the left, \W|\z on the right))
+\b{start-half}  half of a Unicode start-of-word boundary (\W|\A on the left)
+\b{end-half}    half of a Unicode end-of-word boundary (\W|\z on the right)
 
-The empty regex is valid and matches the empty string. For example, the empty -regex matches `abc` at positions `0`, `1`, `2` and `3`. +The empty regex is valid and matches the empty string. For example, the +empty regex matches `abc` at positions `0`, `1`, `2` and `3`. When using the +top-level [`Regex`] on `&str` haystacks, an empty match that splits a codepoint +is guaranteed to never be returned. However, such matches are permitted when +using a [`bytes::Regex`]. For example: -## Grouping and flags +```rust +let re = regex::Regex::new(r"").unwrap(); +let ranges: Vec<_> = re.find_iter("💩").map(|m| m.range()).collect(); +assert_eq!(ranges, vec![0..0, 4..4]); + +let re = regex::bytes::Regex::new(r"").unwrap(); +let ranges: Vec<_> = re.find_iter("💩".as_bytes()).map(|m| m.range()).collect(); +assert_eq!(ranges, vec![0..0, 1..1, 2..2, 3..3, 4..4]); +``` + +Note that an empty regex is distinct from a regex that can never match. +For example, the regex `[a&&b]` is a character class that represents the +intersection of `a` and `b`. That intersection is empty, which means the +character class is empty. Since nothing is in the empty set, `[a&&b]` matches +nothing, not even the empty string. + +### Grouping and flags
 (exp)          numbered capture group (indexed by opening parenthesis)
-(?P<name>exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
+(?P<name>exp)  named (also numbered) capture group (names must be alpha-numeric)
+(?<name>exp)   named (also numbered) capture group (names must be alpha-numeric)
 (?:exp)        non-capturing group
 (?flags)       set flags within current group
 (?flags:exp)   set flags for exp (non-capturing)
 
+Capture group names must be any sequence of alpha-numeric Unicode codepoints, +in addition to `.`, `_`, `[` and `]`. Names must start with either an `_` or +an alphabetic codepoint. Alphabetic codepoints correspond to the `Alphabetic` +Unicode property, while numeric codepoints correspond to the union of the +`Decimal_Number`, `Letter_Number` and `Other_Number` general categories. + Flags are each a single character. For example, `(?x)` sets the flag `x` and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets @@ -377,31 +787,36 @@ All flags are by default disabled unless stated otherwise. They are: i case-insensitive: letters match both upper and lower case m multi-line mode: ^ and $ match begin/end of line s allow . to match \n +R enables CRLF mode: when multi-line mode is enabled, \r\n is used U swap the meaning of x* and x*? u Unicode support (enabled by default) -x ignore whitespace and allow line comments (starting with `#`) +x verbose mode, ignores whitespace and allow line comments (starting with `#`) +Note that in verbose mode, whitespace is ignored everywhere, including within +character classes. To insert whitespace, use its escaped form or a hex literal. +For example, `\ ` or `\x20` for an ASCII space. + Flags can be toggled within a pattern. Here's an example that matches case-insensitively for the first part but case-sensitively for the second part: ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"(?i)a+(?-i)b+").unwrap(); -let cap = re.captures("AaAaAbbBBBb").unwrap(); -assert_eq!(&cap[0], "AaAaAbb"); -# } +let m = re.find("AaAaAbbBBBb").unwrap(); +assert_eq!(m.as_str(), "AaAaAbb"); ``` Notice that the `a+` matches either `a` or `A`, but the `b+` only matches `b`. Multi-line mode means `^` and `$` no longer match just at the beginning/end of -the input, but at the beginning/end of lines: +the input, but also at the beginning/end of lines: ``` -# use regex::Regex; +use regex::Regex; + let re = Regex::new(r"(?m)^line \d+").unwrap(); let m = re.find("line one\nline 2\n").unwrap(); assert_eq!(m.as_str(), "line 2"); @@ -410,44 +825,72 @@ assert_eq!(m.as_str(), "line 2"); Note that `^` matches after new lines, even at the end of input: ``` -# use regex::Regex; +use regex::Regex; + let re = Regex::new(r"(?m)^").unwrap(); let m = re.find_iter("test\n").last().unwrap(); assert_eq!((m.start(), m.end()), (5, 5)); ``` -Here is an example that uses an ASCII word boundary instead of a Unicode -word boundary: +When both CRLF mode and multi-line mode are enabled, then `^` and `$` will +match either `\r` and `\n`, but never in the middle of a `\r\n`: + +``` +use regex::Regex; + +let re = Regex::new(r"(?mR)^foo$").unwrap(); +let m = re.find("\r\nfoo\r\n").unwrap(); +assert_eq!(m.as_str(), "foo"); +``` + +Unicode mode can also be selectively disabled, although only when the result +*would not* match invalid UTF-8. One good example of this is using an ASCII +word boundary instead of a Unicode word boundary, which might make some regex +searches run faster: ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap(); -let cap = re.captures("$$abc$$").unwrap(); -assert_eq!(&cap[0], "abc"); -# } +let m = re.find("$$abc$$").unwrap(); +assert_eq!(m.as_str(), "abc"); ``` -## Escape sequences +### Escape sequences + +Note that this includes all possible escape sequences, even ones that are +documented elsewhere.
-\*          literal *, works for any punctuation character: \.+*?()|[]{}^$
-\a          bell (\x07)
-\f          form feed (\x0C)
-\t          horizontal tab
-\n          new line
-\r          carriage return
-\v          vertical tab (\x0B)
-\123        octal character code (up to three digits) (when enabled)
-\x7F        hex character code (exactly two digits)
-\x{10FFFF}  any hex character code corresponding to a Unicode code point
-\u007F      hex character code (exactly four digits)
-\u{7F}      any hex character code corresponding to a Unicode code point
-\U0000007F  hex character code (exactly eight digits)
-\U{7F}      any hex character code corresponding to a Unicode code point
+\*              literal *, applies to all ASCII except [0-9A-Za-z<>]
+\a              bell (\x07)
+\f              form feed (\x0C)
+\t              horizontal tab
+\n              new line
+\r              carriage return
+\v              vertical tab (\x0B)
+\A              matches at the beginning of a haystack
+\z              matches at the end of a haystack
+\b              word boundary assertion
+\B              negated word boundary assertion
+\b{start}, \<   start-of-word boundary assertion
+\b{end}, \>     end-of-word boundary assertion
+\b{start-half}  half of a start-of-word boundary assertion
+\b{end-half}    half of a end-of-word boundary assertion
+\123            octal character code, up to three digits (when enabled)
+\x7F            hex character code (exactly two digits)
+\x{10FFFF}      any hex character code corresponding to a Unicode code point
+\u007F          hex character code (exactly four digits)
+\u{7F}          any hex character code corresponding to a Unicode code point
+\U0000007F      hex character code (exactly eight digits)
+\U{7F}          any hex character code corresponding to a Unicode code point
+\p{Letter}      Unicode character class
+\P{Letter}      negated Unicode character class
+\d, \s, \w      Perl character class
+\D, \S, \W      negated Perl character class
 
-## Perl character classes (Unicode friendly) +### Perl character classes (Unicode friendly) These classes are based on the definitions provided in [UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties): @@ -461,7 +904,10 @@ These classes are based on the definitions provided in \W not word character -## ASCII character classes +### ASCII character classes + +These classes are based on the definitions provided in +[UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties):
 [[:alnum:]]    alphanumeric ([0-9A-Za-z])
@@ -480,16 +926,228 @@ These classes are based on the definitions provided in
 [[:xdigit:]]   hex digit ([0-9A-Fa-f])
 
+# Untrusted input + +This crate is meant to be able to run regex searches on untrusted haystacks +without fear of [ReDoS]. This crate also, to a certain extent, supports +untrusted patterns. + +[ReDoS]: https://en.wikipedia.org/wiki/ReDoS + +This crate differs from most (but not all) other regex engines in that it +doesn't use unbounded backtracking to run a regex search. In those cases, +one generally cannot use untrusted patterns *or* untrusted haystacks because +it can be very difficult to know whether a particular pattern will result in +catastrophic backtracking or not. + +We'll first discuss how this crate deals with untrusted inputs and then wrap +it up with a realistic discussion about what practice really looks like. + +### Panics + +Outside of clearly documented cases, most APIs in this crate are intended to +never panic regardless of the inputs given to them. For example, `Regex::new`, +`Regex::is_match`, `Regex::find` and `Regex::captures` should never panic. That +is, it is an API promise that those APIs will never panic no matter what inputs +are given to them. With that said, regex engines are complicated beasts, and +providing a rock solid guarantee that these APIs literally never panic is +essentially equivalent to saying, "there are no bugs in this library." That is +a bold claim, and not really one that can be feasibly made with a straight +face. + +Don't get the wrong impression here. This crate is extensively tested, not just +with unit and integration tests, but also via fuzz testing. For example, this +crate is part of the [OSS-fuzz project]. Panics should be incredibly rare, but +it is possible for bugs to exist, and thus possible for a panic to occur. If +you need a rock solid guarantee against panics, then you should wrap calls into +this library with [`std::panic::catch_unwind`]. + +It's also worth pointing out that this library will *generally* panic when +other regex engines would commit undefined behavior. When undefined behavior +occurs, your program might continue as if nothing bad has happened, but it also +might mean your program is open to the worst kinds of exploits. In contrast, +the worst thing a panic can do is a denial of service. + +[OSS-fuzz project]: https://android.googlesource.com/platform/external/oss-fuzz/+/refs/tags/android-t-preview-1/projects/rust-regex/ +[`std::panic::catch_unwind`]: https://doc.rust-lang.org/std/panic/fn.catch_unwind.html + +### Untrusted patterns + +The principal way this crate deals with them is by limiting their size by +default. The size limit can be configured via [`RegexBuilder::size_limit`]. The +idea of a size limit is that compiling a pattern into a `Regex` will fail if it +becomes "too big." Namely, while *most* resources consumed by compiling a regex +are approximately proportional (albeit with some high constant factors in some +cases, such as with Unicode character classes) to the length of the pattern +itself, there is one particular exception to this: counted repetitions. Namely, +this pattern: + +```text +a{5}{5}{5}{5}{5}{5} +``` + +Is equivalent to this pattern: + +```text +a{15625} +``` + +In both of these cases, the actual pattern string is quite small, but the +resulting `Regex` value is quite large. Indeed, as the first pattern shows, +it isn't enough to locally limit the size of each repetition because they can +be stacked in a way that results in exponential growth. + +To provide a bit more context, a simplified view of regex compilation looks +like this: + +* The pattern string is parsed into a structured representation called an AST. +Counted repetitions are not expanded and Unicode character classes are not +looked up in this stage. That is, the size of the AST is proportional to the +size of the pattern with "reasonable" constant factors. In other words, one +can reasonably limit the memory used by an AST by limiting the length of the +pattern string. +* The AST is translated into an HIR. Counted repetitions are still *not* +expanded at this stage, but Unicode character classes are embedded into the +HIR. The memory usage of a HIR is still proportional to the length of the +original pattern string, but the constant factors---mostly as a result of +Unicode character classes---can be quite high. Still though, the memory used by +an HIR can be reasonably limited by limiting the length of the pattern string. +* The HIR is compiled into a [Thompson NFA]. This is the stage at which +something like `\w{5}` is rewritten to `\w\w\w\w\w`. Thus, this is the stage +at which [`RegexBuilder::size_limit`] is enforced. If the NFA exceeds the +configured size, then this stage will fail. + +[Thompson NFA]: https://en.wikipedia.org/wiki/Thompson%27s_construction + +The size limit helps avoid two different kinds of exorbitant resource usage: + +* It avoids permitting exponential memory usage based on the size of the +pattern string. +* It avoids long search times. This will be discussed in more detail in the +next section, but worst case search time *is* dependent on the size of the +regex. So keeping regexes limited to a reasonable size is also a way of keeping +search times reasonable. + +Finally, it's worth pointing out that regex compilation is guaranteed to take +worst case `O(m)` time, where `m` is proportional to the size of regex. The +size of the regex here is *after* the counted repetitions have been expanded. + +**Advice for those using untrusted regexes**: limit the pattern length to +something small and expand it as needed. Configure [`RegexBuilder::size_limit`] +to something small and then expand it as needed. + +### Untrusted haystacks + +The main way this crate guards against searches from taking a long time is by +using algorithms that guarantee a `O(m * n)` worst case time and space bound. +Namely: + +* `m` is proportional to the size of the regex, where the size of the regex +includes the expansion of all counted repetitions. (See the previous section on +untrusted patterns.) +* `n` is proportional to the length, in bytes, of the haystack. + +In other words, if you consider `m` to be a constant (for example, the regex +pattern is a literal in the source code), then the search can be said to run +in "linear time." Or equivalently, "linear time with respect to the size of the +haystack." + +But the `m` factor here is important not to ignore. If a regex is +particularly big, the search times can get quite slow. This is why, in part, +[`RegexBuilder::size_limit`] exists. + +**Advice for those searching untrusted haystacks**: As long as your regexes +are not enormous, you should expect to be able to search untrusted haystacks +without fear. If you aren't sure, you should benchmark it. Unlike backtracking +engines, if your regex is so big that it's likely to result in slow searches, +this is probably something you'll be able to observe regardless of what the +haystack is made up of. + +### Iterating over matches + +One thing that is perhaps easy to miss is that the worst case time +complexity bound of `O(m * n)` applies to methods like [`Regex::is_match`], +[`Regex::find`] and [`Regex::captures`]. It does **not** apply to +[`Regex::find_iter`] or [`Regex::captures_iter`]. Namely, since iterating over +all matches can execute many searches, and each search can scan the entire +haystack, the worst case time complexity for iterators is `O(m * n^2)`. + +One example of where this occurs is when a pattern consists of an alternation, +where an earlier branch of the alternation requires scanning the entire +haystack only to discover that there is no match. It also requires a later +branch of the alternation to have matched at the beginning of the search. For +example, consider the pattern `.*[^A-Z]|[A-Z]` and the haystack `AAAAA`. The +first search will scan to the end looking for matches of `.*[^A-Z]` even though +a finite automata engine (as in this crate) knows that `[A-Z]` has already +matched the first character of the haystack. This is due to the greedy nature +of regex searching. That first search will report a match at the first `A` only +after scanning to the end to discover that no other match exists. The next +search then begins at the second `A` and the behavior repeats. + +There is no way to avoid this. This means that if both patterns and haystacks +are untrusted and you're iterating over all matches, you're susceptible to +worst case quadratic time complexity. One possible way to mitigate this +is to drop down to the lower level `regex-automata` crate and use its +`meta::Regex` iterator APIs. There, you can configure the search to operate +in "earliest" mode by passing a `Input::new(haystack).earliest(true)` to +`meta::Regex::find_iter` (for example). By enabling this mode, you give up +the normal greedy match semantics of regex searches and instead ask the regex +engine to immediately stop as soon as a match has been found. Enabling this +mode will thus restore the worst case `O(m * n)` time complexity bound, but at +the cost of different semantics. + +### Untrusted inputs in practice + +While providing a `O(m * n)` worst case time bound on all searches goes a long +way toward preventing [ReDoS], that doesn't mean every search you can possibly +run will complete without burning CPU time. In general, there are a few ways +for the `m * n` time bound to still bite you: + +* You are searching an exceptionally long haystack. No matter how you slice +it, a longer haystack will take more time to search. This crate may often make +very quick work of even long haystacks because of its literal optimizations, +but those aren't available for all regexes. +* Unicode character classes can cause searches to be quite slow in some cases. +This is especially true when they are combined with counted repetitions. While +the regex size limit above will protect you from the most egregious cases, +the default size limit still permits pretty big regexes that can execute more +slowly than one might expect. +* While routines like [`Regex::find`] and [`Regex::captures`] guarantee +worst case `O(m * n)` search time, routines like [`Regex::find_iter`] and +[`Regex::captures_iter`] actually have worst case `O(m * n^2)` search time. +This is because `find_iter` runs many searches, and each search takes worst +case `O(m * n)` time. Thus, iteration of all matches in a haystack has +worst case `O(m * n^2)`. A good example of a pattern that exhibits this is +`(?:A+){1000}|` or even `.*[^A-Z]|[A-Z]`. + +In general, unstrusted haystacks are easier to stomach than untrusted patterns. +Untrusted patterns give a lot more control to the caller to impact the +performance of a search. In many cases, a regex search will actually execute in +average case `O(n)` time (i.e., not dependent on the size of the regex), but +this can't be guaranteed in general. Therefore, permitting untrusted patterns +means that your only line of defense is to put a limit on how big `m` (and +perhaps also `n`) can be in `O(m * n)`. `n` is limited by simply inspecting +the length of the haystack while `m` is limited by *both* applying a limit to +the length of the pattern *and* a limit on the compiled size of the regex via +[`RegexBuilder::size_limit`]. + +It bears repeating: if you're accepting untrusted patterns, it would be a good +idea to start with conservative limits on `m` and `n`, and then carefully +increase them as needed. + # Crate features By default, this crate tries pretty hard to make regex matching both as fast -as possible and as correct as it can be, within reason. This means that there -is a lot of code dedicated to performance, the handling of Unicode data and the -Unicode data itself. Overall, this leads to more dependencies, larger binaries -and longer compile times. This trade off may not be appropriate in all cases, -and indeed, even when all Unicode and performance features are disabled, one -is still left with a perfectly serviceable regex engine that will work well -in many cases. +as possible and as correct as it can be. This means that there is a lot of +code dedicated to performance, the handling of Unicode data and the Unicode +data itself. Overall, this leads to more dependencies, larger binaries and +longer compile times. This trade off may not be appropriate in all cases, and +indeed, even when all Unicode and performance features are disabled, one is +still left with a perfectly serviceable regex engine that will work well in +many cases. (Note that code is not arbitrarily reducible, and for this reason, +the [`regex-lite`](https://docs.rs/regex-lite) crate exists to provide an even +more minimal experience by cutting out Unicode and performance, but still +maintaining the linear search time bound.) This crate exposes a number of features for controlling that trade off. Some of these features are strictly performance oriented, such that disabling them @@ -498,32 +1156,61 @@ Other features, such as the ones controlling the presence or absence of Unicode data, can result in a loss of functionality. For example, if one disables the `unicode-case` feature (described below), then compiling the regex `(?i)a` will fail since Unicode case insensitivity is enabled by default. Instead, -callers must use `(?i-u)a` instead to disable Unicode case folding. Stated -differently, enabling or disabling any of the features below can only add or -subtract from the total set of valid regular expressions. Enabling or disabling -a feature will never modify the match semantics of a regular expression. +callers must use `(?i-u)a` to disable Unicode case folding. Stated differently, +enabling or disabling any of the features below can only add or subtract from +the total set of valid regular expressions. Enabling or disabling a feature +will never modify the match semantics of a regular expression. -All features below are enabled by default. +Most features below are enabled by default. Features that aren't enabled by +default are noted. ### Ecosystem features * **std** - - When enabled, this will cause `regex` to use the standard library. Currently, - disabling this feature will always result in a compilation error. It is - intended to add `alloc`-only support to regex in the future. + When enabled, this will cause `regex` to use the standard library. In terms + of APIs, `std` causes error types to implement the `std::error::Error` + trait. Enabling `std` will also result in performance optimizations, + including SIMD and faster synchronization primitives. Notably, **disabling + the `std` feature will result in the use of spin locks**. To use a regex + engine without `std` and without spin locks, you'll need to drop down to + the [`regex-automata`](https://docs.rs/regex-automata) crate. +* **logging** - + When enabled, the `log` crate is used to emit messages about regex + compilation and search strategies. This is **disabled by default**. This is + typically only useful to someone working on this crate's internals, but might + be useful if you're doing some rabbit hole performance hacking. Or if you're + just interested in the kinds of decisions being made by the regex engine. ### Performance features * **perf** - - Enables all performance related features. This feature is enabled by default - and will always cover all features that improve performance, even if more - are added in the future. + Enables all performance related features except for `perf-dfa-full`. This + feature is enabled by default is intended to cover all reasonable features + that improve performance, even if more are added in the future. * **perf-dfa** - Enables the use of a lazy DFA for matching. The lazy DFA is used to compile portions of a regex to a very fast DFA on an as-needed basis. This can result in substantial speedups, usually by an order of magnitude on large haystacks. The lazy DFA does not bring in any new dependencies, but it can make compile times longer. +* **perf-dfa-full** - + Enables the use of a full DFA for matching. Full DFAs are problematic because + they have worst case `O(2^n)` construction time. For this reason, when this + feature is enabled, full DFAs are only used for very small regexes and a + very small space bound is used during determinization to avoid the DFA + from blowing up. This feature is not enabled by default, even as part of + `perf`, because it results in fairly sizeable increases in binary size and + compilation time. It can result in faster search times, but they tend to be + more modest and limited to non-Unicode regexes. +* **perf-onepass** - + Enables the use of a one-pass DFA for extracting the positions of capture + groups. This optimization applies to a subset of certain types of NFAs and + represents the fastest engine in this crate for dealing with capture groups. +* **perf-backtrack** - + Enables the use of a bounded backtracking algorithm for extracting the + positions of capture groups. This usually sits between the slowest engine + (the PikeVM) and the fastest engine (one-pass DFA) for extracting capture + groups. It's used whenever the regex is not one-pass and is small enough. * **perf-inline** - Enables the use of aggressive inlining inside match routines. This reduces the overhead of each match. The aggressive inlining, however, increases @@ -577,193 +1264,83 @@ All features below are enabled by default. This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and `\p{sb=ATerm}`. - -# Untrusted input - -This crate can handle both untrusted regular expressions and untrusted -search text. - -Untrusted regular expressions are handled by capping the size of a compiled -regular expression. -(See [`RegexBuilder::size_limit`](struct.RegexBuilder.html#method.size_limit).) -Without this, it would be trivial for an attacker to exhaust your system's -memory with expressions like `a{100}{100}{100}`. - -Untrusted search text is allowed because the matching engine(s) in this -crate have time complexity `O(mn)` (with `m ~ regex` and `n ~ search -text`), which means there's no way to cause exponential blow-up like with -some other regular expression engines. (We pay for this by disallowing -features like arbitrary look-ahead and backreferences.) - -When a DFA is used, pathological cases with exponential state blow-up are -avoided by constructing the DFA lazily or in an "online" manner. Therefore, -at most one new state can be created for each byte of input. This satisfies -our time complexity guarantees, but can lead to memory growth -proportional to the size of the input. As a stopgap, the DFA is only -allowed to store a fixed number of states. When the limit is reached, its -states are wiped and continues on, possibly duplicating previous work. If -the limit is reached too frequently, it gives up and hands control off to -another matching engine with fixed memory requirements. -(The DFA size limit can also be tweaked. See -[`RegexBuilder::dfa_size_limit`](struct.RegexBuilder.html#method.dfa_size_limit).) +# Other crates + +This crate has two required dependencies and several optional dependencies. +This section briefly describes them with the goal of raising awareness of how +different components of this crate may be used independently. + +It is somewhat unusual for a regex engine to have dependencies, as most regex +libraries are self contained units with no dependencies other than a particular +environment's standard library. Indeed, for other similarly optimized regex +engines, most or all of the code in the dependencies of this crate would +normally just be unseparable or coupled parts of the crate itself. But since +Rust and its tooling ecosystem make the use of dependencies so easy, it made +sense to spend some effort de-coupling parts of this crate and making them +independently useful. + +We only briefly describe each crate here. + +* [`regex-lite`](https://docs.rs/regex-lite) is not a dependency of `regex`, +but rather, a standalone zero-dependency simpler version of `regex` that +prioritizes compile times and binary size. In exchange, it eschews Unicode +support and performance. Its match semantics are as identical as possible to +the `regex` crate, and for the things it supports, its APIs are identical to +the APIs in this crate. In other words, for a lot of use cases, it is a drop-in +replacement. +* [`regex-syntax`](https://docs.rs/regex-syntax) provides a regular expression +parser via `Ast` and `Hir` types. It also provides routines for extracting +literals from a pattern. Folks can use this crate to do analysis, or even to +build their own regex engine without having to worry about writing a parser. +* [`regex-automata`](https://docs.rs/regex-automata) provides the regex engines +themselves. One of the downsides of finite automata based regex engines is that +they often need multiple internal engines in order to have similar or better +performance than an unbounded backtracking engine in practice. `regex-automata` +in particular provides public APIs for a PikeVM, a bounded backtracker, a +one-pass DFA, a lazy DFA, a fully compiled DFA and a meta regex engine that +combines all them together. It also has native multi-pattern support and +provides a way to compile and serialize full DFAs such that they can be loaded +and searched in a no-std no-alloc environment. `regex-automata` itself doesn't +even have a required dependency on `regex-syntax`! +* [`memchr`](https://docs.rs/memchr) provides low level SIMD vectorized +routines for quickly finding the location of single bytes or even substrings +in a haystack. In other words, it provides fast `memchr` and `memmem` routines. +These are used by this crate in literal optimizations. +* [`aho-corasick`](https://docs.rs/aho-corasick) provides multi-substring +search. It also provides SIMD vectorized routines in the case where the number +of substrings to search for is relatively small. The `regex` crate also uses +this for literal optimizations. */ +#![no_std] #![deny(missing_docs)] #![cfg_attr(feature = "pattern", feature(pattern))] #![warn(missing_debug_implementations)] -#[cfg(not(feature = "std"))] -compile_error!("`std` feature is currently required to build this crate"); +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); -// To check README's example -// TODO: Re-enable this once the MSRV is 1.43 or greater. -// See: https://github.com/rust-lang/regex/issues/684 -// See: https://github.com/rust-lang/regex/issues/685 -// #[cfg(doctest)] -// doc_comment::doctest!("../README.md"); +extern crate alloc; +#[cfg(any(test, feature = "std"))] +extern crate std; -#[cfg(feature = "std")] pub use crate::error::Error; -#[cfg(feature = "std")] -pub use crate::re_builder::set_unicode::*; -#[cfg(feature = "std")] -pub use crate::re_builder::unicode::*; -#[cfg(feature = "std")] -pub use crate::re_set::unicode::*; -#[cfg(feature = "std")] -pub use crate::re_unicode::{ - escape, CaptureLocations, CaptureMatches, CaptureNames, Captures, - Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split, - SplitN, SubCaptureMatches, -}; - -/** -Match regular expressions on arbitrary bytes. - -This module provides a nearly identical API to the one found in the -top-level of this crate. There are two important differences: - -1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec` -is used where `String` would have been used. -2. Unicode support can be disabled even when disabling it would result in -matching invalid UTF-8 bytes. - -# Example: match null terminated string -This shows how to find all null-terminated strings in a slice of bytes: - -```rust -# use regex::bytes::Regex; -let re = Regex::new(r"(?-u)(?P[^\x00]+)\x00").unwrap(); -let text = b"foo\x00bar\x00baz\x00"; - -// Extract all of the strings without the null terminator from each match. -// The unwrap is OK here since a match requires the `cstr` capture to match. -let cstrs: Vec<&[u8]> = - re.captures_iter(text) - .map(|c| c.name("cstr").unwrap().as_bytes()) - .collect(); -assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs); -``` - -# Example: selectively enable Unicode support - -This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded -string (e.g., to extract a title from a Matroska file): - -```rust -# use std::str; -# use regex::bytes::Regex; -let re = Regex::new( - r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))" -).unwrap(); -let text = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65"; -let caps = re.captures(text).unwrap(); - -// Notice that despite the `.*` at the end, it will only match valid UTF-8 -// because Unicode mode was enabled with the `u` flag. Without the `u` flag, -// the `.*` would match the rest of the bytes. -let mat = caps.get(1).unwrap(); -assert_eq!((7, 10), (mat.start(), mat.end())); - -// If there was a match, Unicode mode guarantees that `title` is valid UTF-8. -let title = str::from_utf8(&caps[1]).unwrap(); -assert_eq!("☃", title); -``` - -In general, if the Unicode flag is enabled in a capture group and that capture -is part of the overall match, then the capture is *guaranteed* to be valid -UTF-8. - -# Syntax - -The supported syntax is pretty much the same as the syntax for Unicode -regular expressions with a few changes that make sense for matching arbitrary -bytes: - -1. The `u` flag can be disabled even when disabling it might cause the regex to -match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in -"ASCII compatible" mode. -2. In ASCII compatible mode, neither Unicode scalar values nor Unicode -character classes are allowed. -3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`) -revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps -to `[[:digit:]]` and `\s` maps to `[[:space:]]`. -4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to -determine whether a byte is a word byte or not. -5. Hexadecimal notation can be used to specify arbitrary bytes instead of -Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the -literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that -matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when -enabled. -6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the -`s` flag is additionally enabled, `.` matches any byte. - -# Performance - -In general, one should expect performance on `&[u8]` to be roughly similar to -performance on `&str`. -*/ -#[cfg(feature = "std")] -pub mod bytes { - pub use crate::re_builder::bytes::*; - pub use crate::re_builder::set_bytes::*; - pub use crate::re_bytes::*; - pub use crate::re_set::bytes::*; -} +pub use crate::{builders::string::*, regex::string::*, regexset::string::*}; -mod backtrack; -mod compile; -#[cfg(feature = "perf-dfa")] -mod dfa; +mod builders; +pub mod bytes; mod error; -mod exec; -mod expand; mod find_byte; -mod input; -mod literal; #[cfg(feature = "pattern")] mod pattern; -mod pikevm; -mod pool; -mod prog; -mod re_builder; -mod re_bytes; -mod re_set; -mod re_trait; -mod re_unicode; -mod sparse; -mod utf8; - -/// The `internal` module exists to support suspicious activity, such as -/// testing different matching engines and supporting the `regex-debug` CLI -/// utility. -#[doc(hidden)] -#[cfg(feature = "std")] -pub mod internal { - pub use crate::compile::Compiler; - pub use crate::exec::{Exec, ExecBuilder}; - pub use crate::input::{Char, CharInput, Input, InputAt}; - pub use crate::literal::LiteralSearcher; - pub use crate::prog::{EmptyLook, Inst, InstRanges, Program}; +mod regex; +mod regexset; + +/// Escapes all regular expression meta characters in `pattern`. +/// +/// The string returned may be safely used as a literal in a regular +/// expression. +pub fn escape(pattern: &str) -> alloc::string::String { + regex_syntax::escape(pattern) } diff --git a/vendor/regex/src/literal/imp.rs b/vendor/regex/src/literal/imp.rs deleted file mode 100644 index 90b2f11..0000000 --- a/vendor/regex/src/literal/imp.rs +++ /dev/null @@ -1,402 +0,0 @@ -use std::mem; - -use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder}; -use memchr::{memchr, memchr2, memchr3, memmem}; -use regex_syntax::hir::literal::{Literal, Literals}; - -/// A prefix extracted from a compiled regular expression. -/// -/// A regex prefix is a set of literal strings that *must* be matched at the -/// beginning of a regex in order for the entire regex to match. Similarly -/// for a regex suffix. -#[derive(Clone, Debug)] -pub struct LiteralSearcher { - complete: bool, - lcp: Memmem, - lcs: Memmem, - matcher: Matcher, -} - -#[derive(Clone, Debug)] -enum Matcher { - /// No literals. (Never advances through the input.) - Empty, - /// A set of four or more single byte literals. - Bytes(SingleByteSet), - /// A single substring, using vector accelerated routines when available. - Memmem(Memmem), - /// An Aho-Corasick automaton. - AC { ac: AhoCorasick, lits: Vec }, - /// A packed multiple substring searcher, using SIMD. - /// - /// Note that Aho-Corasick will actually use this packed searcher - /// internally automatically, however, there is some overhead associated - /// with going through the Aho-Corasick machinery. So using the packed - /// searcher directly results in some gains. - Packed { s: packed::Searcher, lits: Vec }, -} - -impl LiteralSearcher { - /// Returns a matcher that never matches and never advances the input. - pub fn empty() -> Self { - Self::new(Literals::empty(), Matcher::Empty) - } - - /// Returns a matcher for literal prefixes from the given set. - pub fn prefixes(lits: Literals) -> Self { - let matcher = Matcher::prefixes(&lits); - Self::new(lits, matcher) - } - - /// Returns a matcher for literal suffixes from the given set. - pub fn suffixes(lits: Literals) -> Self { - let matcher = Matcher::suffixes(&lits); - Self::new(lits, matcher) - } - - fn new(lits: Literals, matcher: Matcher) -> Self { - let complete = lits.all_complete(); - LiteralSearcher { - complete, - lcp: Memmem::new(lits.longest_common_prefix()), - lcs: Memmem::new(lits.longest_common_suffix()), - matcher, - } - } - - /// Returns true if all matches comprise the entire regular expression. - /// - /// This does not necessarily mean that a literal match implies a match - /// of the regular expression. For example, the regular expression `^a` - /// is comprised of a single complete literal `a`, but the regular - /// expression demands that it only match at the beginning of a string. - pub fn complete(&self) -> bool { - self.complete && !self.is_empty() - } - - /// Find the position of a literal in `haystack` if it exists. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> { - use self::Matcher::*; - match self.matcher { - Empty => Some((0, 0)), - Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)), - Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())), - AC { ref ac, .. } => { - ac.find(haystack).map(|m| (m.start(), m.end())) - } - Packed { ref s, .. } => { - s.find(haystack).map(|m| (m.start(), m.end())) - } - } - } - - /// Like find, except matches must start at index `0`. - pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> { - for lit in self.iter() { - if lit.len() > haystack.len() { - continue; - } - if lit == &haystack[0..lit.len()] { - return Some((0, lit.len())); - } - } - None - } - - /// Like find, except matches must end at index `haystack.len()`. - pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> { - for lit in self.iter() { - if lit.len() > haystack.len() { - continue; - } - if lit == &haystack[haystack.len() - lit.len()..] { - return Some((haystack.len() - lit.len(), haystack.len())); - } - } - None - } - - /// Returns an iterator over all literals to be matched. - pub fn iter(&self) -> LiteralIter<'_> { - match self.matcher { - Matcher::Empty => LiteralIter::Empty, - Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense), - Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()), - Matcher::AC { ref lits, .. } => LiteralIter::AC(lits), - Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits), - } - } - - /// Returns a matcher for the longest common prefix of this matcher. - pub fn lcp(&self) -> &Memmem { - &self.lcp - } - - /// Returns a matcher for the longest common suffix of this matcher. - pub fn lcs(&self) -> &Memmem { - &self.lcs - } - - /// Returns true iff this prefix is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the number of prefixes in this machine. - pub fn len(&self) -> usize { - use self::Matcher::*; - match self.matcher { - Empty => 0, - Bytes(ref sset) => sset.dense.len(), - Memmem(_) => 1, - AC { ref ac, .. } => ac.pattern_count(), - Packed { ref lits, .. } => lits.len(), - } - } - - /// Return the approximate heap usage of literals in bytes. - pub fn approximate_size(&self) -> usize { - use self::Matcher::*; - match self.matcher { - Empty => 0, - Bytes(ref sset) => sset.approximate_size(), - Memmem(ref single) => single.approximate_size(), - AC { ref ac, .. } => ac.heap_bytes(), - Packed { ref s, .. } => s.heap_bytes(), - } - } -} - -impl Matcher { - fn prefixes(lits: &Literals) -> Self { - let sset = SingleByteSet::prefixes(lits); - Matcher::new(lits, sset) - } - - fn suffixes(lits: &Literals) -> Self { - let sset = SingleByteSet::suffixes(lits); - Matcher::new(lits, sset) - } - - fn new(lits: &Literals, sset: SingleByteSet) -> Self { - if lits.literals().is_empty() { - return Matcher::Empty; - } - if sset.dense.len() >= 26 { - // Avoid trying to match a large number of single bytes. - // This is *very* sensitive to a frequency analysis comparison - // between the bytes in sset and the composition of the haystack. - // No matter the size of sset, if its members all are rare in the - // haystack, then it'd be worth using it. How to tune this... IDK. - // ---AG - return Matcher::Empty; - } - if sset.complete { - return Matcher::Bytes(sset); - } - if lits.literals().len() == 1 { - return Matcher::Memmem(Memmem::new(&lits.literals()[0])); - } - - let pats = lits.literals().to_owned(); - let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii; - if lits.literals().len() <= 100 && !is_aho_corasick_fast { - let mut builder = packed::Config::new() - .match_kind(packed::MatchKind::LeftmostFirst) - .builder(); - if let Some(s) = builder.extend(&pats).build() { - return Matcher::Packed { s, lits: pats }; - } - } - let ac = AhoCorasickBuilder::new() - .match_kind(aho_corasick::MatchKind::LeftmostFirst) - .dfa(true) - .build_with_size::(&pats) - .unwrap(); - Matcher::AC { ac, lits: pats } - } -} - -#[derive(Debug)] -pub enum LiteralIter<'a> { - Empty, - Bytes(&'a [u8]), - Single(&'a [u8]), - AC(&'a [Literal]), - Packed(&'a [Literal]), -} - -impl<'a> Iterator for LiteralIter<'a> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option { - match *self { - LiteralIter::Empty => None, - LiteralIter::Bytes(ref mut many) => { - if many.is_empty() { - None - } else { - let next = &many[0..1]; - *many = &many[1..]; - Some(next) - } - } - LiteralIter::Single(ref mut one) => { - if one.is_empty() { - None - } else { - let next = &one[..]; - *one = &[]; - Some(next) - } - } - LiteralIter::AC(ref mut lits) => { - if lits.is_empty() { - None - } else { - let next = &lits[0]; - *lits = &lits[1..]; - Some(&**next) - } - } - LiteralIter::Packed(ref mut lits) => { - if lits.is_empty() { - None - } else { - let next = &lits[0]; - *lits = &lits[1..]; - Some(&**next) - } - } - } - } -} - -#[derive(Clone, Debug)] -struct SingleByteSet { - sparse: Vec, - dense: Vec, - complete: bool, - all_ascii: bool, -} - -impl SingleByteSet { - fn new() -> SingleByteSet { - SingleByteSet { - sparse: vec![false; 256], - dense: vec![], - complete: true, - all_ascii: true, - } - } - - fn prefixes(lits: &Literals) -> SingleByteSet { - let mut sset = SingleByteSet::new(); - for lit in lits.literals() { - sset.complete = sset.complete && lit.len() == 1; - if let Some(&b) = lit.get(0) { - if !sset.sparse[b as usize] { - if b > 0x7F { - sset.all_ascii = false; - } - sset.dense.push(b); - sset.sparse[b as usize] = true; - } - } - } - sset - } - - fn suffixes(lits: &Literals) -> SingleByteSet { - let mut sset = SingleByteSet::new(); - for lit in lits.literals() { - sset.complete = sset.complete && lit.len() == 1; - if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) { - if !sset.sparse[b as usize] { - if b > 0x7F { - sset.all_ascii = false; - } - sset.dense.push(b); - sset.sparse[b as usize] = true; - } - } - } - sset - } - - /// Faster find that special cases certain sizes to use memchr. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find(&self, text: &[u8]) -> Option { - match self.dense.len() { - 0 => None, - 1 => memchr(self.dense[0], text), - 2 => memchr2(self.dense[0], self.dense[1], text), - 3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text), - _ => self._find(text), - } - } - - /// Generic find that works on any sized set. - fn _find(&self, haystack: &[u8]) -> Option { - for (i, &b) in haystack.iter().enumerate() { - if self.sparse[b as usize] { - return Some(i); - } - } - None - } - - fn approximate_size(&self) -> usize { - (self.dense.len() * mem::size_of::()) - + (self.sparse.len() * mem::size_of::()) - } -} - -/// A simple wrapper around the memchr crate's memmem implementation. -/// -/// The API this exposes mirrors the API of previous substring searchers that -/// this supplanted. -#[derive(Clone, Debug)] -pub struct Memmem { - finder: memmem::Finder<'static>, - char_len: usize, -} - -impl Memmem { - fn new(pat: &[u8]) -> Memmem { - Memmem { - finder: memmem::Finder::new(pat).into_owned(), - char_len: char_len_lossy(pat), - } - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn find(&self, haystack: &[u8]) -> Option { - self.finder.find(haystack) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn is_suffix(&self, text: &[u8]) -> bool { - if text.len() < self.len() { - return false; - } - &text[text.len() - self.len()..] == self.finder.needle() - } - - pub fn len(&self) -> usize { - self.finder.needle().len() - } - - pub fn char_len(&self) -> usize { - self.char_len - } - - fn approximate_size(&self) -> usize { - self.finder.needle().len() * mem::size_of::() - } -} - -fn char_len_lossy(bytes: &[u8]) -> usize { - String::from_utf8_lossy(bytes).chars().count() -} diff --git a/vendor/regex/src/literal/mod.rs b/vendor/regex/src/literal/mod.rs deleted file mode 100644 index 980f523..0000000 --- a/vendor/regex/src/literal/mod.rs +++ /dev/null @@ -1,55 +0,0 @@ -pub use self::imp::*; - -#[cfg(feature = "perf-literal")] -mod imp; - -#[allow(missing_docs)] -#[cfg(not(feature = "perf-literal"))] -mod imp { - use regex_syntax::hir::literal::Literals; - - #[derive(Clone, Debug)] - pub struct LiteralSearcher(()); - - impl LiteralSearcher { - pub fn empty() -> Self { - LiteralSearcher(()) - } - - pub fn prefixes(_: Literals) -> Self { - LiteralSearcher(()) - } - - pub fn suffixes(_: Literals) -> Self { - LiteralSearcher(()) - } - - pub fn complete(&self) -> bool { - false - } - - pub fn find(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn find_start(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn find_end(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn is_empty(&self) -> bool { - true - } - - pub fn len(&self) -> usize { - 0 - } - - pub fn approximate_size(&self) -> usize { - 0 - } - } -} diff --git a/vendor/regex/src/pattern.rs b/vendor/regex/src/pattern.rs index 00549e5..2db04d8 100644 --- a/vendor/regex/src/pattern.rs +++ b/vendor/regex/src/pattern.rs @@ -1,6 +1,6 @@ -use std::str::pattern::{Pattern, SearchStep, Searcher}; +use core::str::pattern::{Pattern, SearchStep, Searcher}; -use crate::re_unicode::{Matches, Regex}; +use crate::{Matches, Regex}; #[derive(Debug)] pub struct RegexSearcher<'r, 't> { diff --git a/vendor/regex/src/pikevm.rs b/vendor/regex/src/pikevm.rs deleted file mode 100644 index 8c9eac2..0000000 --- a/vendor/regex/src/pikevm.rs +++ /dev/null @@ -1,360 +0,0 @@ -// This module implements the Pike VM. That is, it guarantees linear time -// search of a regex on any text with memory use proportional to the size of -// the regex. -// -// It is equal in power to the backtracking engine in this crate, except the -// backtracking engine is typically faster on small regexes/texts at the -// expense of a bigger memory footprint. -// -// It can do more than the DFA can (specifically, record capture locations -// and execute Unicode word boundary assertions), but at a slower speed. -// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding -// epsilon transitions. That is, the Pike VM engine can be in multiple states -// at once where as the DFA is only ever in one state at a time. -// -// Therefore, the Pike VM is generally treated as the fallback when the other -// matching engines either aren't feasible to run or are insufficient. - -use std::mem; - -use crate::exec::ProgramCache; -use crate::input::{Input, InputAt}; -use crate::prog::{InstPtr, Program}; -use crate::re_trait::Slot; -use crate::sparse::SparseSet; - -/// An NFA simulation matching engine. -#[derive(Debug)] -pub struct Fsm<'r, I> { - /// The sequence of opcodes (among other things) that is actually executed. - /// - /// The program may be byte oriented or Unicode codepoint oriented. - prog: &'r Program, - /// An explicit stack used for following epsilon transitions. (This is - /// borrowed from the cache.) - stack: &'r mut Vec, - /// The input to search. - input: I, -} - -/// A cached allocation that can be reused on each execution. -#[derive(Clone, Debug)] -pub struct Cache { - /// A pair of ordered sets for tracking NFA states. - clist: Threads, - nlist: Threads, - /// An explicit stack used for following epsilon transitions. - stack: Vec, -} - -/// An ordered set of NFA states and their captures. -#[derive(Clone, Debug)] -struct Threads { - /// An ordered set of opcodes (each opcode is an NFA state). - set: SparseSet, - /// Captures for every NFA state. - /// - /// It is stored in row-major order, where the columns are the capture - /// slots and the rows are the states. - caps: Vec, - /// The number of capture slots stored per thread. (Every capture has - /// two slots.) - slots_per_thread: usize, -} - -/// A representation of an explicit stack frame when following epsilon -/// transitions. This is used to avoid recursion. -#[derive(Clone, Debug)] -enum FollowEpsilon { - /// Follow transitions at the given instruction pointer. - IP(InstPtr), - /// Restore the capture slot with the given position in the input. - Capture { slot: usize, pos: Slot }, -} - -impl Cache { - /// Create a new allocation used by the NFA machine to record execution - /// and captures. - pub fn new(_prog: &Program) -> Self { - Cache { clist: Threads::new(), nlist: Threads::new(), stack: vec![] } - } -} - -impl<'r, I: Input> Fsm<'r, I> { - /// Execute the NFA matching engine. - /// - /// If there's a match, `exec` returns `true` and populates the given - /// captures accordingly. - pub fn exec( - prog: &'r Program, - cache: &ProgramCache, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - input: I, - start: usize, - end: usize, - ) -> bool { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.pikevm; - cache.clist.resize(prog.len(), prog.captures.len()); - cache.nlist.resize(prog.len(), prog.captures.len()); - let at = input.at(start); - Fsm { prog, stack: &mut cache.stack, input }.exec_( - &mut cache.clist, - &mut cache.nlist, - matches, - slots, - quit_after_match, - at, - end, - ) - } - - fn exec_( - &mut self, - mut clist: &mut Threads, - mut nlist: &mut Threads, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - mut at: InputAt, - end: usize, - ) -> bool { - let mut matched = false; - let mut all_matched = false; - clist.set.clear(); - nlist.set.clear(); - 'LOOP: loop { - if clist.set.is_empty() { - // Three ways to bail out when our current set of threads is - // empty. - // - // 1. We have a match---so we're done exploring any possible - // alternatives. Time to quit. (We can't do this if we're - // looking for matches for multiple regexes, unless we know - // they all matched.) - // - // 2. If the expression starts with a '^' we can terminate as - // soon as the last thread dies. - if (matched && matches.len() <= 1) - || all_matched - || (!at.is_start() && self.prog.is_anchored_start) - { - break; - } - - // 3. If there's a literal prefix for the program, try to - // jump ahead quickly. If it can't be found, then we can - // bail out early. - if !self.prog.prefixes.is_empty() { - at = match self.input.prefix_at(&self.prog.prefixes, at) { - None => break, - Some(at) => at, - }; - } - } - - // This simulates a preceding '.*?' for every regex by adding - // a state starting at the current position in the input for the - // beginning of the program only if we don't already have a match. - if clist.set.is_empty() - || (!self.prog.is_anchored_start && !all_matched) - { - self.add(&mut clist, slots, 0, at); - } - // The previous call to "add" actually inspects the position just - // before the current character. For stepping through the machine, - // we can to look at the current character, so we advance the - // input. - let at_next = self.input.at(at.next_pos()); - for i in 0..clist.set.len() { - let ip = clist.set[i]; - if self.step( - &mut nlist, - matches, - slots, - clist.caps(ip), - ip, - at, - at_next, - ) { - matched = true; - all_matched = all_matched || matches.iter().all(|&b| b); - if quit_after_match { - // If we only care if a match occurs (not its - // position), then we can quit right now. - break 'LOOP; - } - if self.prog.matches.len() == 1 { - // We don't need to check the rest of the threads - // in this set because we've matched something - // ("leftmost-first"). However, we still need to check - // threads in the next set to support things like - // greedy matching. - // - // This is only true on normal regexes. For regex sets, - // we need to mush on to observe other matches. - break; - } - } - } - if at.pos() >= end { - break; - } - at = at_next; - mem::swap(clist, nlist); - nlist.set.clear(); - } - matched - } - - /// Step through the input, one token (byte or codepoint) at a time. - /// - /// nlist is the set of states that will be processed on the next token - /// in the input. - /// - /// caps is the set of captures passed by the caller of the NFA. They are - /// written to only when a match state is visited. - /// - /// thread_caps is the set of captures set for the current NFA state, ip. - /// - /// at and at_next are the current and next positions in the input. at or - /// at_next may be EOF. - fn step( - &mut self, - nlist: &mut Threads, - matches: &mut [bool], - slots: &mut [Slot], - thread_caps: &mut [Option], - ip: usize, - at: InputAt, - at_next: InputAt, - ) -> bool { - use crate::prog::Inst::*; - match self.prog[ip] { - Match(match_slot) => { - if match_slot < matches.len() { - matches[match_slot] = true; - } - for (slot, val) in slots.iter_mut().zip(thread_caps.iter()) { - *slot = *val; - } - true - } - Char(ref inst) => { - if inst.c == at.char() { - self.add(nlist, thread_caps, inst.goto, at_next); - } - false - } - Ranges(ref inst) => { - if inst.matches(at.char()) { - self.add(nlist, thread_caps, inst.goto, at_next); - } - false - } - Bytes(ref inst) => { - if let Some(b) = at.byte() { - if inst.matches(b) { - self.add(nlist, thread_caps, inst.goto, at_next); - } - } - false - } - EmptyLook(_) | Save(_) | Split(_) => false, - } - } - - /// Follows epsilon transitions and adds them for processing to nlist, - /// starting at and including ip. - fn add( - &mut self, - nlist: &mut Threads, - thread_caps: &mut [Option], - ip: usize, - at: InputAt, - ) { - self.stack.push(FollowEpsilon::IP(ip)); - while let Some(frame) = self.stack.pop() { - match frame { - FollowEpsilon::IP(ip) => { - self.add_step(nlist, thread_caps, ip, at); - } - FollowEpsilon::Capture { slot, pos } => { - thread_caps[slot] = pos; - } - } - } - } - - /// A helper function for add that avoids excessive pushing to the stack. - fn add_step( - &mut self, - nlist: &mut Threads, - thread_caps: &mut [Option], - mut ip: usize, - at: InputAt, - ) { - // Instead of pushing and popping to the stack, we mutate ip as we - // traverse the set of states. We only push to the stack when we - // absolutely need recursion (restoring captures or following a - // branch). - use crate::prog::Inst::*; - loop { - // Don't visit states we've already added. - if nlist.set.contains(ip) { - return; - } - nlist.set.insert(ip); - match self.prog[ip] { - EmptyLook(ref inst) => { - if self.input.is_empty_match(at, inst) { - ip = inst.goto; - } - } - Save(ref inst) => { - if inst.slot < thread_caps.len() { - self.stack.push(FollowEpsilon::Capture { - slot: inst.slot, - pos: thread_caps[inst.slot], - }); - thread_caps[inst.slot] = Some(at.pos()); - } - ip = inst.goto; - } - Split(ref inst) => { - self.stack.push(FollowEpsilon::IP(inst.goto2)); - ip = inst.goto1; - } - Match(_) | Char(_) | Ranges(_) | Bytes(_) => { - let t = &mut nlist.caps(ip); - for (slot, val) in t.iter_mut().zip(thread_caps.iter()) { - *slot = *val; - } - return; - } - } - } - } -} - -impl Threads { - fn new() -> Self { - Threads { set: SparseSet::new(0), caps: vec![], slots_per_thread: 0 } - } - - fn resize(&mut self, num_insts: usize, ncaps: usize) { - if num_insts == self.set.capacity() { - return; - } - self.slots_per_thread = ncaps * 2; - self.set = SparseSet::new(num_insts); - self.caps = vec![None; self.slots_per_thread * num_insts]; - } - - fn caps(&mut self, pc: usize) -> &mut [Option] { - let i = pc * self.slots_per_thread; - &mut self.caps[i..i + self.slots_per_thread] - } -} diff --git a/vendor/regex/src/pool.rs b/vendor/regex/src/pool.rs deleted file mode 100644 index 6a6f15b..0000000 --- a/vendor/regex/src/pool.rs +++ /dev/null @@ -1,333 +0,0 @@ -// This module provides a relatively simple thread-safe pool of reusable -// objects. For the most part, it's implemented by a stack represented by a -// Mutex>. It has one small trick: because unlocking a mutex is somewhat -// costly, in the case where a pool is accessed by the first thread that tried -// to get a value, we bypass the mutex. Here are some benchmarks showing the -// difference. -// -// 1) misc::anchored_literal_long_non_match 21 (18571 MB/s) -// 2) misc::anchored_literal_long_non_match 107 (3644 MB/s) -// 3) misc::anchored_literal_long_non_match 45 (8666 MB/s) -// 4) misc::anchored_literal_long_non_match 19 (20526 MB/s) -// -// (1) represents our baseline: the master branch at the time of writing when -// using the 'thread_local' crate to implement the pool below. -// -// (2) represents a naive pool implemented completely via Mutex>. There -// is no special trick for bypassing the mutex. -// -// (3) is the same as (2), except it uses Mutex>>. It is twice as -// fast because a Box is much smaller than the T we use with a Pool in this -// crate. So pushing and popping a Box from a Vec is quite a bit faster -// than for T. -// -// (4) is the same as (3), but with the trick for bypassing the mutex in the -// case of the first-to-get thread. -// -// Why move off of thread_local? Even though (4) is a hair faster than (1) -// above, this was not the main goal. The main goal was to move off of -// thread_local and find a way to *simply* re-capture some of its speed for -// regex's specific case. So again, why move off of it? The *primary* reason is -// because of memory leaks. See https://github.com/rust-lang/regex/issues/362 -// for example. (Why do I want it to be simple? Well, I suppose what I mean is, -// "use as much safe code as possible to minimize risk and be as sure as I can -// be that it is correct.") -// -// My guess is that the thread_local design is probably not appropriate for -// regex since its memory usage scales to the number of active threads that -// have used a regex, where as the pool below scales to the number of threads -// that simultaneously use a regex. While neither case permits contraction, -// since we own the pool data structure below, we can add contraction if a -// clear use case pops up in the wild. More pressingly though, it seems that -// there are at least some use case patterns where one might have many threads -// sitting around that might have used a regex at one point. While thread_local -// does try to reuse space previously used by a thread that has since stopped, -// its maximal memory usage still scales with the total number of active -// threads. In contrast, the pool below scales with the total number of threads -// *simultaneously* using the pool. The hope is that this uses less memory -// overall. And if it doesn't, we can hopefully tune it somehow. -// -// It seems that these sort of conditions happen frequently -// in FFI inside of other more "managed" languages. This was -// mentioned in the issue linked above, and also mentioned here: -// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users -// confirm that disabling the use of thread_local resolves the leak. -// -// There were other weaker reasons for moving off of thread_local as well. -// Namely, at the time, I was looking to reduce dependencies. And for something -// like regex, maintenance can be simpler when we own the full dependency tree. - -use std::panic::{RefUnwindSafe, UnwindSafe}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Mutex; - -/// An atomic counter used to allocate thread IDs. -static COUNTER: AtomicUsize = AtomicUsize::new(1); - -thread_local!( - /// A thread local used to assign an ID to a thread. - static THREAD_ID: usize = { - let next = COUNTER.fetch_add(1, Ordering::Relaxed); - // SAFETY: We cannot permit the reuse of thread IDs since reusing a - // thread ID might result in more than one thread "owning" a pool, - // and thus, permit accessing a mutable value from multiple threads - // simultaneously without synchronization. The intent of this panic is - // to be a sanity check. It is not expected that the thread ID space - // will actually be exhausted in practice. - // - // This checks that the counter never wraps around, since atomic - // addition wraps around on overflow. - if next == 0 { - panic!("regex: thread ID allocation space exhausted"); - } - next - }; -); - -/// The type of the function used to create values in a pool when the pool is -/// empty and the caller requests one. -type CreateFn = - Box T + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>; - -/// A simple thread safe pool for reusing values. -/// -/// Getting a value out comes with a guard. When that guard is dropped, the -/// value is automatically put back in the pool. -/// -/// A Pool impls Sync when T is Send (even if it's not Sync). This means -/// that T can use interior mutability. This is possible because a pool is -/// guaranteed to provide a value to exactly one thread at any time. -/// -/// Currently, a pool never contracts in size. Its size is proportional to the -/// number of simultaneous uses. -pub struct Pool { - /// A stack of T values to hand out. These are used when a Pool is - /// accessed by a thread that didn't create it. - stack: Mutex>>, - /// A function to create more T values when stack is empty and a caller - /// has requested a T. - create: CreateFn, - /// The ID of the thread that owns this pool. The owner is the thread - /// that makes the first call to 'get'. When the owner calls 'get', it - /// gets 'owner_val' directly instead of returning a T from 'stack'. - /// See comments elsewhere for details, but this is intended to be an - /// optimization for the common case that makes getting a T faster. - /// - /// It is initialized to a value of zero (an impossible thread ID) as a - /// sentinel to indicate that it is unowned. - owner: AtomicUsize, - /// A value to return when the caller is in the same thread that created - /// the Pool. - owner_val: T, -} - -// SAFETY: Since we want to use a Pool from multiple threads simultaneously -// behind an Arc, we need for it to be Sync. In cases where T is sync, Pool -// would be Sync. However, since we use a Pool to store mutable scratch space, -// we wind up using a T that has interior mutability and is thus itself not -// Sync. So what we *really* want is for our Pool to by Sync even when T is -// not Sync (but is at least Send). -// -// The only non-sync aspect of a Pool is its 'owner_val' field, which is used -// to implement faster access to a pool value in the common case of a pool -// being accessed in the same thread in which it was created. The 'stack' field -// is also shared, but a Mutex where T: Send is already Sync. So we only -// need to worry about 'owner_val'. -// -// The key is to guarantee that 'owner_val' can only ever be accessed from one -// thread. In our implementation below, we guarantee this by only returning the -// 'owner_val' when the ID of the current thread matches the ID of the thread -// that created the Pool. Since this can only ever be one thread, it follows -// that only one thread can access 'owner_val' at any point in time. Thus, it -// is safe to declare that Pool is Sync when T is Send. -// -// NOTE: It would also be possible to make the owning thread be the *first* -// thread that tries to get a value out of a Pool. However, the current -// implementation is a little simpler and it's not clear if making the first -// thread (rather than the creating thread) is meaningfully better. -// -// If there is a way to achieve our performance goals using safe code, then -// I would very much welcome a patch. As it stands, the implementation below -// tries to balance safety with performance. The case where a Regex is used -// from multiple threads simultaneously will suffer a bit since getting a cache -// will require unlocking a mutex. -unsafe impl Sync for Pool {} - -impl ::std::fmt::Debug for Pool { - fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { - f.debug_struct("Pool") - .field("stack", &self.stack) - .field("owner", &self.owner) - .field("owner_val", &self.owner_val) - .finish() - } -} - -/// A guard that is returned when a caller requests a value from the pool. -/// -/// The purpose of the guard is to use RAII to automatically put the value back -/// in the pool once it's dropped. -#[derive(Debug)] -pub struct PoolGuard<'a, T: Send> { - /// The pool that this guard is attached to. - pool: &'a Pool, - /// This is None when the guard represents the special "owned" value. In - /// which case, the value is retrieved from 'pool.owner_val'. - value: Option>, -} - -impl Pool { - /// Create a new pool. The given closure is used to create values in the - /// pool when necessary. - pub fn new(create: CreateFn) -> Pool { - let owner = AtomicUsize::new(0); - let owner_val = create(); - Pool { stack: Mutex::new(vec![]), create, owner, owner_val } - } - - /// Get a value from the pool. The caller is guaranteed to have exclusive - /// access to the given value. - /// - /// Note that there is no guarantee provided about which value in the - /// pool is returned. That is, calling get, dropping the guard (causing - /// the value to go back into the pool) and then calling get again is NOT - /// guaranteed to return the same value received in the first get call. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn get(&self) -> PoolGuard<'_, T> { - // Our fast path checks if the caller is the thread that "owns" this - // pool. Or stated differently, whether it is the first thread that - // tried to extract a value from the pool. If it is, then we can return - // a T to the caller without going through a mutex. - // - // SAFETY: We must guarantee that only one thread gets access to this - // value. Since a thread is uniquely identified by the THREAD_ID thread - // local, it follows that is the caller's thread ID is equal to the - // owner, then only one thread may receive this value. - let caller = THREAD_ID.with(|id| *id); - let owner = self.owner.load(Ordering::Relaxed); - if caller == owner { - return self.guard_owned(); - } - self.get_slow(caller, owner) - } - - /// This is the "slow" version that goes through a mutex to pop an - /// allocated value off a stack to return to the caller. (Or, if the stack - /// is empty, a new value is created.) - /// - /// If the pool has no owner, then this will set the owner. - #[cold] - fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<'_, T> { - use std::sync::atomic::Ordering::Relaxed; - - if owner == 0 { - // The sentinel 0 value means this pool is not yet owned. We - // try to atomically set the owner. If we do, then this thread - // becomes the owner and we can return a guard that represents - // the special T for the owner. - let res = self.owner.compare_exchange(0, caller, Relaxed, Relaxed); - if res.is_ok() { - return self.guard_owned(); - } - } - let mut stack = self.stack.lock().unwrap(); - let value = match stack.pop() { - None => Box::new((self.create)()), - Some(value) => value, - }; - self.guard_stack(value) - } - - /// Puts a value back into the pool. Callers don't need to call this. Once - /// the guard that's returned by 'get' is dropped, it is put back into the - /// pool automatically. - fn put(&self, value: Box) { - let mut stack = self.stack.lock().unwrap(); - stack.push(value); - } - - /// Create a guard that represents the special owned T. - fn guard_owned(&self) -> PoolGuard<'_, T> { - PoolGuard { pool: self, value: None } - } - - /// Create a guard that contains a value from the pool's stack. - fn guard_stack(&self, value: Box) -> PoolGuard<'_, T> { - PoolGuard { pool: self, value: Some(value) } - } -} - -impl<'a, T: Send> PoolGuard<'a, T> { - /// Return the underlying value. - pub fn value(&self) -> &T { - match self.value { - None => &self.pool.owner_val, - Some(ref v) => &**v, - } - } -} - -impl<'a, T: Send> Drop for PoolGuard<'a, T> { - #[cfg_attr(feature = "perf-inline", inline(always))] - fn drop(&mut self) { - if let Some(value) = self.value.take() { - self.pool.put(value); - } - } -} - -#[cfg(test)] -mod tests { - use std::panic::{RefUnwindSafe, UnwindSafe}; - - use super::*; - - #[test] - fn oibits() { - use crate::exec::ProgramCache; - - fn has_oibits() {} - has_oibits::>(); - } - - // Tests that Pool implements the "single owner" optimization. That is, the - // thread that first accesses the pool gets its own copy, while all other - // threads get distinct copies. - #[test] - fn thread_owner_optimization() { - use std::cell::RefCell; - use std::sync::Arc; - - let pool: Arc>>> = - Arc::new(Pool::new(Box::new(|| RefCell::new(vec!['a'])))); - pool.get().value().borrow_mut().push('x'); - - let pool1 = pool.clone(); - let t1 = std::thread::spawn(move || { - let guard = pool1.get(); - let v = guard.value(); - v.borrow_mut().push('y'); - }); - - let pool2 = pool.clone(); - let t2 = std::thread::spawn(move || { - let guard = pool2.get(); - let v = guard.value(); - v.borrow_mut().push('z'); - }); - - t1.join().unwrap(); - t2.join().unwrap(); - - // If we didn't implement the single owner optimization, then one of - // the threads above is likely to have mutated the [a, x] vec that - // we stuffed in the pool before spawning the threads. But since - // neither thread was first to access the pool, and because of the - // optimization, we should be guaranteed that neither thread mutates - // the special owned pool value. - // - // (Technically this is an implementation detail and not a contract of - // Pool's API.) - assert_eq!(vec!['a', 'x'], *pool.get().value().borrow()); - } -} diff --git a/vendor/regex/src/prog.rs b/vendor/regex/src/prog.rs deleted file mode 100644 index c211f71..0000000 --- a/vendor/regex/src/prog.rs +++ /dev/null @@ -1,447 +0,0 @@ -use std::cmp::Ordering; -use std::collections::HashMap; -use std::fmt; -use std::mem; -use std::ops::Deref; -use std::slice; -use std::sync::Arc; - -use crate::input::Char; -use crate::literal::LiteralSearcher; - -/// `InstPtr` represents the index of an instruction in a regex program. -pub type InstPtr = usize; - -/// Program is a sequence of instructions and various facts about thos -/// instructions. -#[derive(Clone)] -pub struct Program { - /// A sequence of instructions that represents an NFA. - pub insts: Vec, - /// Pointers to each Match instruction in the sequence. - /// - /// This is always length 1 unless this program represents a regex set. - pub matches: Vec, - /// The ordered sequence of all capture groups extracted from the AST. - /// Unnamed groups are `None`. - pub captures: Vec>, - /// Pointers to all named capture groups into `captures`. - pub capture_name_idx: Arc>, - /// A pointer to the start instruction. This can vary depending on how - /// the program was compiled. For example, programs for use with the DFA - /// engine have a `.*?` inserted at the beginning of unanchored regular - /// expressions. The actual starting point of the program is after the - /// `.*?`. - pub start: InstPtr, - /// A set of equivalence classes for discriminating bytes in the compiled - /// program. - pub byte_classes: Vec, - /// When true, this program can only match valid UTF-8. - pub only_utf8: bool, - /// When true, this program uses byte range instructions instead of Unicode - /// range instructions. - pub is_bytes: bool, - /// When true, the program is compiled for DFA matching. For example, this - /// implies `is_bytes` and also inserts a preceding `.*?` for unanchored - /// regexes. - pub is_dfa: bool, - /// When true, the program matches text in reverse (for use only in the - /// DFA). - pub is_reverse: bool, - /// Whether the regex must match from the start of the input. - pub is_anchored_start: bool, - /// Whether the regex must match at the end of the input. - pub is_anchored_end: bool, - /// Whether this program contains a Unicode word boundary instruction. - pub has_unicode_word_boundary: bool, - /// A possibly empty machine for very quickly matching prefix literals. - pub prefixes: LiteralSearcher, - /// A limit on the size of the cache that the DFA is allowed to use while - /// matching. - /// - /// The cache limit specifies approximately how much space we're willing to - /// give to the state cache. Once the state cache exceeds the size, it is - /// wiped and all states must be re-computed. - /// - /// Note that this value does not impact correctness. It can be set to 0 - /// and the DFA will run just fine. (It will only ever store exactly one - /// state in the cache, and will likely run very slowly, but it will work.) - /// - /// Also note that this limit is *per thread of execution*. That is, - /// if the same regex is used to search text across multiple threads - /// simultaneously, then the DFA cache is not shared. Instead, copies are - /// made. - pub dfa_size_limit: usize, -} - -impl Program { - /// Creates an empty instruction sequence. Fields are given default - /// values. - pub fn new() -> Self { - Program { - insts: vec![], - matches: vec![], - captures: vec![], - capture_name_idx: Arc::new(HashMap::new()), - start: 0, - byte_classes: vec![0; 256], - only_utf8: true, - is_bytes: false, - is_dfa: false, - is_reverse: false, - is_anchored_start: false, - is_anchored_end: false, - has_unicode_word_boundary: false, - prefixes: LiteralSearcher::empty(), - dfa_size_limit: 2 * (1 << 20), - } - } - - /// If pc is an index to a no-op instruction (like Save), then return the - /// next pc that is not a no-op instruction. - pub fn skip(&self, mut pc: usize) -> usize { - loop { - match self[pc] { - Inst::Save(ref i) => pc = i.goto, - _ => return pc, - } - } - } - - /// Return true if and only if an execution engine at instruction `pc` will - /// always lead to a match. - pub fn leads_to_match(&self, pc: usize) -> bool { - if self.matches.len() > 1 { - // If we have a regex set, then we have more than one ending - // state, so leading to one of those states is generally - // meaningless. - return false; - } - match self[self.skip(pc)] { - Inst::Match(_) => true, - _ => false, - } - } - - /// Returns true if the current configuration demands that an implicit - /// `.*?` be prepended to the instruction sequence. - pub fn needs_dotstar(&self) -> bool { - self.is_dfa && !self.is_reverse && !self.is_anchored_start - } - - /// Returns true if this program uses Byte instructions instead of - /// Char/Range instructions. - pub fn uses_bytes(&self) -> bool { - self.is_bytes || self.is_dfa - } - - /// Returns true if this program exclusively matches valid UTF-8 bytes. - /// - /// That is, if an invalid UTF-8 byte is seen, then no match is possible. - pub fn only_utf8(&self) -> bool { - self.only_utf8 - } - - /// Return the approximate heap usage of this instruction sequence in - /// bytes. - pub fn approximate_size(&self) -> usize { - // The only instruction that uses heap space is Ranges (for - // Unicode codepoint programs) to store non-overlapping codepoint - // ranges. To keep this operation constant time, we ignore them. - (self.len() * mem::size_of::()) - + (self.matches.len() * mem::size_of::()) - + (self.captures.len() * mem::size_of::>()) - + (self.capture_name_idx.len() - * (mem::size_of::() + mem::size_of::())) - + (self.byte_classes.len() * mem::size_of::()) - + self.prefixes.approximate_size() - } -} - -impl Deref for Program { - type Target = [Inst]; - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn deref(&self) -> &Self::Target { - &*self.insts - } -} - -impl fmt::Debug for Program { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use self::Inst::*; - - fn with_goto(cur: usize, goto: usize, fmtd: String) -> String { - if goto == cur + 1 { - fmtd - } else { - format!("{} (goto: {})", fmtd, goto) - } - } - - fn visible_byte(b: u8) -> String { - use std::ascii::escape_default; - let escaped = escape_default(b).collect::>(); - String::from_utf8_lossy(&escaped).into_owned() - } - - for (pc, inst) in self.iter().enumerate() { - match *inst { - Match(slot) => write!(f, "{:04} Match({:?})", pc, slot)?, - Save(ref inst) => { - let s = format!("{:04} Save({})", pc, inst.slot); - write!(f, "{}", with_goto(pc, inst.goto, s))?; - } - Split(ref inst) => { - write!( - f, - "{:04} Split({}, {})", - pc, inst.goto1, inst.goto2 - )?; - } - EmptyLook(ref inst) => { - let s = format!("{:?}", inst.look); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - Char(ref inst) => { - let s = format!("{:?}", inst.c); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - Ranges(ref inst) => { - let ranges = inst - .ranges - .iter() - .map(|r| format!("{:?}-{:?}", r.0, r.1)) - .collect::>() - .join(", "); - write!( - f, - "{:04} {}", - pc, - with_goto(pc, inst.goto, ranges) - )?; - } - Bytes(ref inst) => { - let s = format!( - "Bytes({}, {})", - visible_byte(inst.start), - visible_byte(inst.end) - ); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - } - if pc == self.start { - write!(f, " (start)")?; - } - writeln!(f)?; - } - Ok(()) - } -} - -impl<'a> IntoIterator for &'a Program { - type Item = &'a Inst; - type IntoIter = slice::Iter<'a, Inst>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -/// Inst is an instruction code in a Regex program. -/// -/// Regrettably, a regex program either contains Unicode codepoint -/// instructions (Char and Ranges) or it contains byte instructions (Bytes). -/// A regex program can never contain both. -/// -/// It would be worth investigating splitting this into two distinct types and -/// then figuring out how to make the matching engines polymorphic over those -/// types without sacrificing performance. -/// -/// Other than the benefit of moving invariants into the type system, another -/// benefit is the decreased size. If we remove the `Char` and `Ranges` -/// instructions from the `Inst` enum, then its size shrinks from 32 bytes to -/// 24 bytes. (This is because of the removal of a `Box<[]>` in the `Ranges` -/// variant.) Given that byte based machines are typically much bigger than -/// their Unicode analogues (because they can decode UTF-8 directly), this ends -/// up being a pretty significant savings. -#[derive(Clone, Debug)] -pub enum Inst { - /// Match indicates that the program has reached a match state. - /// - /// The number in the match corresponds to the Nth logical regular - /// expression in this program. This index is always 0 for normal regex - /// programs. Values greater than 0 appear when compiling regex sets, and - /// each match instruction gets its own unique value. The value corresponds - /// to the Nth regex in the set. - Match(usize), - /// Save causes the program to save the current location of the input in - /// the slot indicated by InstSave. - Save(InstSave), - /// Split causes the program to diverge to one of two paths in the - /// program, preferring goto1 in InstSplit. - Split(InstSplit), - /// EmptyLook represents a zero-width assertion in a regex program. A - /// zero-width assertion does not consume any of the input text. - EmptyLook(InstEmptyLook), - /// Char requires the regex program to match the character in InstChar at - /// the current position in the input. - Char(InstChar), - /// Ranges requires the regex program to match the character at the current - /// position in the input with one of the ranges specified in InstRanges. - Ranges(InstRanges), - /// Bytes is like Ranges, except it expresses a single byte range. It is - /// used in conjunction with Split instructions to implement multi-byte - /// character classes. - Bytes(InstBytes), -} - -impl Inst { - /// Returns true if and only if this is a match instruction. - pub fn is_match(&self) -> bool { - match *self { - Inst::Match(_) => true, - _ => false, - } - } -} - -/// Representation of the Save instruction. -#[derive(Clone, Debug)] -pub struct InstSave { - /// The next location to execute in the program. - pub goto: InstPtr, - /// The capture slot (there are two slots for every capture in a regex, - /// including the zeroth capture for the entire match). - pub slot: usize, -} - -/// Representation of the Split instruction. -#[derive(Clone, Debug)] -pub struct InstSplit { - /// The first instruction to try. A match resulting from following goto1 - /// has precedence over a match resulting from following goto2. - pub goto1: InstPtr, - /// The second instruction to try. A match resulting from following goto1 - /// has precedence over a match resulting from following goto2. - pub goto2: InstPtr, -} - -/// Representation of the `EmptyLook` instruction. -#[derive(Clone, Debug)] -pub struct InstEmptyLook { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The type of zero-width assertion to check. - pub look: EmptyLook, -} - -/// The set of zero-width match instructions. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum EmptyLook { - /// Start of line or input. - StartLine, - /// End of line or input. - EndLine, - /// Start of input. - StartText, - /// End of input. - EndText, - /// Word character on one side and non-word character on other. - WordBoundary, - /// Word character on both sides or non-word character on both sides. - NotWordBoundary, - /// ASCII word boundary. - WordBoundaryAscii, - /// Not ASCII word boundary. - NotWordBoundaryAscii, -} - -/// Representation of the Char instruction. -#[derive(Clone, Debug)] -pub struct InstChar { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The character to test. - pub c: char, -} - -/// Representation of the Ranges instruction. -#[derive(Clone, Debug)] -pub struct InstRanges { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The set of Unicode scalar value ranges to test. - pub ranges: Box<[(char, char)]>, -} - -impl InstRanges { - /// Tests whether the given input character matches this instruction. - pub fn matches(&self, c: Char) -> bool { - // This speeds up the `match_class_unicode` benchmark by checking - // some common cases quickly without binary search. e.g., Matching - // a Unicode class on predominantly ASCII text. - for r in self.ranges.iter().take(4) { - if c < r.0 { - return false; - } - if c <= r.1 { - return true; - } - } - self.ranges - .binary_search_by(|r| { - if r.1 < c { - Ordering::Less - } else if r.0 > c { - Ordering::Greater - } else { - Ordering::Equal - } - }) - .is_ok() - } - - /// Return the number of distinct characters represented by all of the - /// ranges. - pub fn num_chars(&self) -> usize { - self.ranges - .iter() - .map(|&(s, e)| 1 + (e as u32) - (s as u32)) - .sum::() as usize - } -} - -/// Representation of the Bytes instruction. -#[derive(Clone, Debug)] -pub struct InstBytes { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The start (inclusive) of this byte range. - pub start: u8, - /// The end (inclusive) of this byte range. - pub end: u8, -} - -impl InstBytes { - /// Returns true if and only if the given byte is in this range. - pub fn matches(&self, byte: u8) -> bool { - self.start <= byte && byte <= self.end - } -} - -#[cfg(test)] -mod test { - #[test] - #[cfg(target_pointer_width = "64")] - fn test_size_of_inst() { - use std::mem::size_of; - - use super::Inst; - - assert_eq!(32, size_of::()); - } -} diff --git a/vendor/regex/src/re_builder.rs b/vendor/regex/src/re_builder.rs deleted file mode 100644 index ee63836..0000000 --- a/vendor/regex/src/re_builder.rs +++ /dev/null @@ -1,421 +0,0 @@ -/// The set of user configurable options for compiling zero or more regexes. -#[derive(Clone, Debug)] -#[allow(missing_docs)] -pub struct RegexOptions { - pub pats: Vec, - pub size_limit: usize, - pub dfa_size_limit: usize, - pub nest_limit: u32, - pub case_insensitive: bool, - pub multi_line: bool, - pub dot_matches_new_line: bool, - pub swap_greed: bool, - pub ignore_whitespace: bool, - pub unicode: bool, - pub octal: bool, -} - -impl Default for RegexOptions { - fn default() -> Self { - RegexOptions { - pats: vec![], - size_limit: 10 * (1 << 20), - dfa_size_limit: 2 * (1 << 20), - nest_limit: 250, - case_insensitive: false, - multi_line: false, - dot_matches_new_line: false, - swap_greed: false, - ignore_whitespace: false, - unicode: true, - octal: false, - } - } -} - -macro_rules! define_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::$regex_mod::Regex; - - /// A configurable builder for a regular expression. - /// - /// A builder can be used to configure how the regex is built, for example, by - /// setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexBuilder(RegexOptions); - - impl RegexBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new(pattern: &str) -> RegexBuilder { - let mut builder = RegexBuilder(RegexOptions::default()); - builder.0.pats.push(pattern.to_owned()); - builder - } - - /// Consume the builder and compile the regular expression. - /// - /// Note that calling `as_str` on the resulting `Regex` will produce the - /// pattern given to `new` verbatim. Notably, it will not incorporate any - /// of the flags set on this builder. - pub fn build(&self) -> Result { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(Regex::from) - } - - /// Set the value for the case insensitive (`i`) flag. - /// - /// When enabled, letters in the pattern will match both upper case and - /// lower case variants. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - /// - /// When enabled, `^` matches the beginning of lines and `$` matches the - /// end of lines. - /// - /// By default, they match beginning/end of the input. - pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" when Unicode is disabled and - /// means "any valid UTF-8 encoding of any Unicode scalar value" when - /// Unicode is enabled. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - /// - /// When enabled, a pattern like `a*` is lazy (tries to find shortest - /// match) and `a*?` is greedy (tries to find longest match). - /// - /// By default, `a*` is greedy and `a*?` is lazy. - pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - /// - /// When enabled, whitespace such as new lines and spaces will be ignored - /// between expressions of the pattern, and `#` can be used to start a - /// comment until the next new line. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - /// - /// Enabled by default. When disabled, character classes such as `\w` only - /// match ASCII word characters instead of all Unicode word characters. - pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_builder!(bytes, re_bytes, false); -define_builder!(unicode, re_unicode, true); - -macro_rules! define_set_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::re_set::$regex_mod::RegexSet; - - /// A configurable builder for a set of regular expressions. - /// - /// A builder can be used to configure how the regexes are built, for example, - /// by setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexSetBuilder(RegexOptions); - - impl RegexSetBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new(patterns: I) -> RegexSetBuilder - where - S: AsRef, - I: IntoIterator, - { - let mut builder = RegexSetBuilder(RegexOptions::default()); - for pat in patterns { - builder.0.pats.push(pat.as_ref().to_owned()); - } - builder - } - - /// Consume the builder and compile the regular expressions into a set. - pub fn build(&self) -> Result { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(RegexSet::from) - } - - /// Set the value for the case insensitive (`i`) flag. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - pub fn multi_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet` - /// expressions and means "any Unicode scalar value" for `regex::RegexSet` - /// expressions. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - pub fn swap_greed( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit( - &mut self, - limit: u32, - ) -> &mut RegexSetBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_set_builder!(set_bytes, bytes, false); -define_set_builder!(set_unicode, unicode, true); diff --git a/vendor/regex/src/re_bytes.rs b/vendor/regex/src/re_bytes.rs deleted file mode 100644 index 07e9f98..0000000 --- a/vendor/regex/src/re_bytes.rs +++ /dev/null @@ -1,1260 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt; -use std::iter::FusedIterator; -use std::ops::{Index, Range}; -use std::str::FromStr; -use std::sync::Arc; - -use crate::find_byte::find_byte; - -use crate::error::Error; -use crate::exec::{Exec, ExecNoSync}; -use crate::expand::expand_bytes; -use crate::re_builder::bytes::RegexBuilder; -use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; - -/// Match represents a single match of a regex in a haystack. -/// -/// The lifetime parameter `'t` refers to the lifetime of the matched text. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct Match<'t> { - text: &'t [u8], - start: usize, - end: usize, -} - -impl<'t> Match<'t> { - /// Returns the starting byte offset of the match in the haystack. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Returns the ending byte offset of the match in the haystack. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// Returns the range over the starting and ending byte offsets of the - /// match in the haystack. - #[inline] - pub fn range(&self) -> Range { - self.start..self.end - } - - /// Returns the matched text. - #[inline] - pub fn as_bytes(&self) -> &'t [u8] { - &self.text[self.range()] - } - - /// Creates a new match from the given haystack and byte offsets. - #[inline] - fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start, end } - } -} - -impl<'t> From> for Range { - fn from(m: Match<'t>) -> Range { - m.range() - } -} - -/// A compiled regular expression for matching arbitrary bytes. -/// -/// It can be used to search, split or replace text. All searching is done with -/// an implicit `.*?` at the beginning and end of an expression. To force an -/// expression to match the whole string (or a prefix or a suffix), you must -/// use an anchor like `^` or `$` (or `\A` and `\z`). -/// -/// Like the `Regex` type in the parent module, matches with this regex return -/// byte offsets into the search text. **Unlike** the parent `Regex` type, -/// these byte offsets may not correspond to UTF-8 sequence boundaries since -/// the regexes in this module can match arbitrary bytes. -#[derive(Clone)] -pub struct Regex(Exec); - -impl fmt::Display for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl fmt::Debug for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -/// A constructor for Regex from an Exec. -/// -/// This is hidden because Exec isn't actually part of the public API. -#[doc(hidden)] -impl From for Regex { - fn from(exec: Exec) -> Regex { - Regex(exec) - } -} - -impl FromStr for Regex { - type Err = Error; - - /// Attempts to parse a string into a regular expression - fn from_str(s: &str) -> Result { - Regex::new(s) - } -} - -/// Core regular expression methods. -impl Regex { - /// Compiles a regular expression. Once compiled, it can be used repeatedly - /// to search, split or replace text in a string. - /// - /// If an invalid expression is given, then an error is returned. - pub fn new(re: &str) -> Result { - RegexBuilder::new(re).build() - } - - /// Returns true if and only if there is a match for the regex in the - /// string given. - /// - /// It is recommended to use this method if all you need to do is test - /// a match, since the underlying matching engine may be able to do less - /// work. - /// - /// # Example - /// - /// Test if some text contains at least one word with exactly 13 ASCII word - /// bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"I categorically deny having triskaidekaphobia."; - /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); - /// # } - /// ``` - pub fn is_match(&self, text: &[u8]) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the start and end byte range of the leftmost-first match in - /// `text`. If no match exists, then `None` is returned. - /// - /// Note that this should only be used if you want to discover the position - /// of the match. Testing the existence of a match is faster if you use - /// `is_match`. - /// - /// # Example - /// - /// Find the start and end location of the first word with exactly 13 - /// ASCII word bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"I categorically deny having triskaidekaphobia."; - /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); - /// assert_eq!((mat.start(), mat.end()), (2, 15)); - /// # } - /// ``` - pub fn find<'t>(&self, text: &'t [u8]) -> Option> { - self.find_at(text, 0) - } - - /// Returns an iterator for each successive non-overlapping match in - /// `text`, returning the start and end byte indices with respect to - /// `text`. - /// - /// # Example - /// - /// Find the start and end location of every word with exactly 13 ASCII - /// word bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"Retroactively relinquishing remunerations is reprehensible."; - /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { - /// println!("{:?}", mat); - /// } - /// # } - /// ``` - pub fn find_iter<'r, 't>(&'r self, text: &'t [u8]) -> Matches<'r, 't> { - Matches(self.0.searcher().find_iter(text)) - } - - /// Returns the capture groups corresponding to the leftmost-first - /// match in `text`. Capture group `0` always corresponds to the entire - /// match. If no match is found, then `None` is returned. - /// - /// You should only use `captures` if you need access to the location of - /// capturing group matches. Otherwise, `find` is faster for discovering - /// the location of the overall match. - /// - /// # Examples - /// - /// Say you have some text with movie names and their release years, - /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text - /// looking like that, while also extracting the movie name and its release - /// year separately. - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); - /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.get(1).unwrap().as_bytes(), &b"Citizen Kane"[..]); - /// assert_eq!(caps.get(2).unwrap().as_bytes(), &b"1941"[..]); - /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); - /// // You can also access the groups by index using the Index notation. - /// // Note that this will panic on an invalid index. - /// assert_eq!(&caps[1], b"Citizen Kane"); - /// assert_eq!(&caps[2], b"1941"); - /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); - /// # } - /// ``` - /// - /// Note that the full match is at capture group `0`. Each subsequent - /// capture group is indexed by the order of its opening `(`. - /// - /// We can make this example a bit clearer by using *named* capture groups: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane"); - /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941"); - /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); - /// // You can also access the groups by name using the Index notation. - /// // Note that this will panic on an invalid group name. - /// assert_eq!(&caps["title"], b"Citizen Kane"); - /// assert_eq!(&caps["year"], b"1941"); - /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); - /// - /// # } - /// ``` - /// - /// Here we name the capture groups, which we can access with the `name` - /// method or the `Index` notation with a `&str`. Note that the named - /// capture groups are still accessible with `get` or the `Index` notation - /// with a `usize`. - /// - /// The `0`th capture group is always unnamed, so it must always be - /// accessed with `get(0)` or `[0]`. - pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> { - let mut locs = self.capture_locations(); - self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { - text, - locs: locs.0, - named_groups: self.0.capture_name_idx().clone(), - }) - } - - /// Returns an iterator over all the non-overlapping capture groups matched - /// in `text`. This is operationally the same as `find_iter`, except it - /// yields information about capturing group matches. - /// - /// # Example - /// - /// We can use this to find all movie titles and their release years in - /// some text, where the movie is formatted like "'Title' (xxxx)": - /// - /// ```rust - /// # use std::str; use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; - /// for caps in re.captures_iter(text) { - /// let title = str::from_utf8(&caps["title"]).unwrap(); - /// let year = str::from_utf8(&caps["year"]).unwrap(); - /// println!("Movie: {:?}, Released: {:?}", title, year); - /// } - /// // Output: - /// // Movie: Citizen Kane, Released: 1941 - /// // Movie: The Wizard of Oz, Released: 1939 - /// // Movie: M, Released: 1931 - /// # } - /// ``` - pub fn captures_iter<'r, 't>( - &'r self, - text: &'t [u8], - ) -> CaptureMatches<'r, 't> { - CaptureMatches(self.0.searcher().captures_iter(text)) - } - - /// Returns an iterator of substrings of `text` delimited by a match of the - /// regular expression. Namely, each element of the iterator corresponds to - /// text that *isn't* matched by the regular expression. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// To split a string delimited by arbitrary amounts of spaces or tabs: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"[ \t]+").unwrap(); - /// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect(); - /// assert_eq!(fields, vec![ - /// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..], - /// ]); - /// # } - /// ``` - pub fn split<'r, 't>(&'r self, text: &'t [u8]) -> Split<'r, 't> { - Split { finder: self.find_iter(text), last: 0 } - } - - /// Returns an iterator of at most `limit` substrings of `text` delimited - /// by a match of the regular expression. (A `limit` of `0` will return no - /// substrings.) Namely, each element of the iterator corresponds to text - /// that *isn't* matched by the regular expression. The remainder of the - /// string that is not split will be the last element in the iterator. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// Get the first two words in some text: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"\W+").unwrap(); - /// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect(); - /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]); - /// # } - /// ``` - pub fn splitn<'r, 't>( - &'r self, - text: &'t [u8], - limit: usize, - ) -> SplitN<'r, 't> { - SplitN { splits: self.split(text), n: limit } - } - - /// Replaces the leftmost-first match with the replacement provided. The - /// replacement can be a regular byte string (where `$N` and `$name` are - /// expanded to match capture groups) or a function that takes the matches' - /// `Captures` and returns the replaced byte string. - /// - /// If no match is found, then a copy of the byte string is returned - /// unchanged. - /// - /// # Replacement string syntax - /// - /// All instances of `$name` in the replacement text is replaced with the - /// corresponding capture group `name`. - /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. - /// - /// To write a literal `$` use `$$`. - /// - /// # Examples - /// - /// Note that this function is polymorphic with respect to the replacement. - /// In typical usage, this can just be a normal byte string: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new("[^01]+").unwrap(); - /// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]); - /// # } - /// ``` - /// - /// But anything satisfying the `Replacer` trait will work. For example, a - /// closure of type `|&Captures| -> Vec<u8>` provides direct access to the - /// captures corresponding to a match. This allows one to access capturing - /// group matches easily: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # use regex::bytes::Captures; fn main() { - /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { - /// let mut replacement = caps[2].to_owned(); - /// replacement.push(b' '); - /// replacement.extend(&caps[1]); - /// replacement - /// }); - /// assert_eq!(result, &b"Bruce Springsteen"[..]); - /// # } - /// ``` - /// - /// But this is a bit cumbersome to use all the time. Instead, a simple - /// syntax is supported that expands `$name` into the corresponding capture - /// group. Here's the last example, but using this expansion technique - /// with named capture groups: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]); - /// assert_eq!(result, &b"Bruce Springsteen"[..]); - /// # } - /// ``` - /// - /// Note that using `$2` instead of `$first` or `$1` instead of `$last` - /// would produce the same result. To write a literal `$` use `$$`. - /// - /// Sometimes the replacement string requires use of curly braces to - /// delineate a capture group replacement and surrounding literal text. - /// For example, if we wanted to join two words together with an - /// underscore: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); - /// let result = re.replace(b"deep fried", &b"${first}_$second"[..]); - /// assert_eq!(result, &b"deep_fried"[..]); - /// # } - /// ``` - /// - /// Without the curly braces, the capture group name `first_` would be - /// used, and since it doesn't exist, it would be replaced with the empty - /// string. - /// - /// Finally, sometimes you just want to replace a literal string with no - /// regard for capturing group expansion. This can be done by wrapping a - /// byte string with `NoExpand`: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// use regex::bytes::NoExpand; - /// - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); - /// assert_eq!(result, &b"$2 $last"[..]); - /// # } - /// ``` - pub fn replace<'t, R: Replacer>( - &self, - text: &'t [u8], - rep: R, - ) -> Cow<'t, [u8]> { - self.replacen(text, 1, rep) - } - - /// Replaces all non-overlapping matches in `text` with the replacement - /// provided. This is the same as calling `replacen` with `limit` set to - /// `0`. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement text. - pub fn replace_all<'t, R: Replacer>( - &self, - text: &'t [u8], - rep: R, - ) -> Cow<'t, [u8]> { - self.replacen(text, 0, rep) - } - - /// Replaces at most `limit` non-overlapping matches in `text` with the - /// replacement provided. If `limit` is 0, then all non-overlapping matches - /// are replaced. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement text. - pub fn replacen<'t, R: Replacer>( - &self, - text: &'t [u8], - limit: usize, - mut rep: R, - ) -> Cow<'t, [u8]> { - if let Some(rep) = rep.no_expansion() { - let mut it = self.find_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = Vec::with_capacity(text.len()); - let mut last_match = 0; - for (i, m) in it { - new.extend_from_slice(&text[last_match..m.start()]); - new.extend_from_slice(&rep); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.extend_from_slice(&text[last_match..]); - return Cow::Owned(new); - } - - // The slower path, which we use if the replacement needs access to - // capture groups. - let mut it = self.captures_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = Vec::with_capacity(text.len()); - let mut last_match = 0; - for (i, cap) in it { - // unwrap on 0 is OK because captures only reports matches - let m = cap.get(0).unwrap(); - new.extend_from_slice(&text[last_match..m.start()]); - rep.replace_append(&cap, &mut new); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.extend_from_slice(&text[last_match..]); - Cow::Owned(new) - } -} - -/// Advanced or "lower level" search methods. -impl Regex { - /// Returns the end location of a match in the text given. - /// - /// This method may have the same performance characteristics as - /// `is_match`, except it provides an end location for a match. In - /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match. - /// - /// # Example - /// - /// Typically, `a+` would match the entire first sequence of `a` in some - /// text, but `shortest_match` can give up as soon as it sees the first - /// `a`. - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"aaaaa"; - /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); - /// assert_eq!(pos, Some(1)); - /// # } - /// ``` - pub fn shortest_match(&self, text: &[u8]) -> Option<usize> { - self.shortest_match_at(text, 0) - } - - /// Returns the same as shortest_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn shortest_match_at( - &self, - text: &[u8], - start: usize, - ) -> Option<usize> { - self.0.searcher().shortest_match_at(text, start) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn is_match_at(&self, text: &[u8], start: usize) -> bool { - self.0.searcher().is_match_at(text, start) - } - - /// Returns the same as find, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn find_at<'t>( - &self, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher() - .find_at(text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// This is like `captures`, but uses - /// [`CaptureLocations`](struct.CaptureLocations.html) - /// instead of - /// [`Captures`](struct.Captures.html) in order to amortize allocations. - /// - /// To create a `CaptureLocations` value, use the - /// `Regex::capture_locations` method. - /// - /// This returns the overall match if this was successful, which is always - /// equivalence to the `0`th capture group. - pub fn captures_read<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, 0) - } - - /// Returns the same as `captures_read`, but starts the search at the given - /// offset and populates the capture locations given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_read_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher() - .captures_read_at(&mut locs.0, text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// An undocumented alias for `captures_read_at`. - /// - /// The `regex-capi` crate previously used this routine, so to avoid - /// breaking that crate, we continue to provide the name as an undocumented - /// alias. - #[doc(hidden)] - pub fn read_captures_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, start) - } -} - -/// Auxiliary methods. -impl Regex { - /// Returns the original string of this regex. - pub fn as_str(&self) -> &str { - &self.0.regex_strings()[0] - } - - /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames<'_> { - CaptureNames(self.0.capture_names().iter()) - } - - /// Returns the number of captures. - pub fn captures_len(&self) -> usize { - self.0.capture_names().len() - } - - /// Returns an empty set of capture locations that can be reused in - /// multiple calls to `captures_read` or `captures_read_at`. - pub fn capture_locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher().locations()) - } - - /// An alias for `capture_locations` to preserve backward compatibility. - /// - /// The `regex-capi` crate uses this method, so to avoid breaking that - /// crate, we continue to export it as an undocumented API. - #[doc(hidden)] - pub fn locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher().locations()) - } -} - -/// An iterator over all non-overlapping matches for a particular string. -/// -/// The iterator yields a tuple of integers corresponding to the start and end -/// of the match. The indices are byte offsets. The iterator stops when no more -/// matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched byte string. -#[derive(Debug)] -pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>); - -impl<'r, 't> Iterator for Matches<'r, 't> { - type Item = Match<'t>; - - fn next(&mut self) -> Option<Match<'t>> { - let text = self.0.text(); - self.0.next().map(|(s, e)| Match::new(text, s, e)) - } -} - -impl<'r, 't> FusedIterator for Matches<'r, 't> {} - -/// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. -/// -/// The iterator stops when no more matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched byte string. -#[derive(Debug)] -pub struct CaptureMatches<'r, 't>( - re_trait::CaptureMatches<'t, ExecNoSync<'r>>, -); - -impl<'r, 't> Iterator for CaptureMatches<'r, 't> { - type Item = Captures<'t>; - - fn next(&mut self) -> Option<Captures<'t>> { - self.0.next().map(|locs| Captures { - text: self.0.text(), - locs, - named_groups: self.0.regex().capture_name_idx().clone(), - }) - } -} - -impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} - -/// Yields all substrings delimited by a regular expression match. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the byte string being split. -#[derive(Debug)] -pub struct Split<'r, 't> { - finder: Matches<'r, 't>, - last: usize, -} - -impl<'r, 't> Iterator for Split<'r, 't> { - type Item = &'t [u8]; - - fn next(&mut self) -> Option<&'t [u8]> { - let text = self.finder.0.text(); - match self.finder.next() { - None => { - if self.last > text.len() { - None - } else { - let s = &text[self.last..]; - self.last = text.len() + 1; // Next call will return None - Some(s) - } - } - Some(m) => { - let matched = &text[self.last..m.start()]; - self.last = m.end(); - Some(matched) - } - } - } -} - -impl<'r, 't> FusedIterator for Split<'r, 't> {} - -/// Yields at most `N` substrings delimited by a regular expression match. -/// -/// The last substring will be whatever remains after splitting. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the byte string being split. -#[derive(Debug)] -pub struct SplitN<'r, 't> { - splits: Split<'r, 't>, - n: usize, -} - -impl<'r, 't> Iterator for SplitN<'r, 't> { - type Item = &'t [u8]; - - fn next(&mut self) -> Option<&'t [u8]> { - if self.n == 0 { - return None; - } - - self.n -= 1; - if self.n > 0 { - return self.splits.next(); - } - - let text = self.splits.finder.0.text(); - if self.splits.last > text.len() { - // We've already returned all substrings. - None - } else { - // self.n == 0, so future calls will return None immediately - Some(&text[self.splits.last..]) - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.n)) - } -} - -impl<'r, 't> FusedIterator for SplitN<'r, 't> {} - -/// An iterator over the names of all possible captures. -/// -/// `None` indicates an unnamed capture; the first element (capture 0, the -/// whole matched region) is always unnamed. -/// -/// `'r` is the lifetime of the compiled regular expression. -#[derive(Clone, Debug)] -pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); - -impl<'r> Iterator for CaptureNames<'r> { - type Item = Option<&'r str>; - - fn next(&mut self) -> Option<Option<&'r str>> { - self.0 - .next() - .as_ref() - .map(|slot| slot.as_ref().map(|name| name.as_ref())) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } - - fn count(self) -> usize { - self.0.count() - } -} - -impl<'r> ExactSizeIterator for CaptureNames<'r> {} - -impl<'r> FusedIterator for CaptureNames<'r> {} - -/// CaptureLocations is a low level representation of the raw offsets of each -/// submatch. -/// -/// You can think of this as a lower level -/// [`Captures`](struct.Captures.html), where this type does not support -/// named capturing groups directly and it does not borrow the text that these -/// offsets were matched on. -/// -/// Primarily, this type is useful when using the lower level `Regex` APIs -/// such as `read_captures`, which permits amortizing the allocation in which -/// capture match locations are stored. -/// -/// In order to build a value of this type, you'll need to call the -/// `capture_locations` method on the `Regex` being used to execute the search. -/// The value returned can then be reused in subsequent searches. -#[derive(Clone, Debug)] -pub struct CaptureLocations(re_trait::Locations); - -/// A type alias for `CaptureLocations` for backwards compatibility. -/// -/// Previously, we exported `CaptureLocations` as `Locations` in an -/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), -/// we continue re-exporting the same undocumented API. -#[doc(hidden)] -pub type Locations = CaptureLocations; - -impl CaptureLocations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - #[inline] - pub fn get(&self, i: usize) -> Option<(usize, usize)> { - self.0.pos(i) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - #[inline] - pub fn len(&self) -> usize { - self.0.len() - } - - /// An alias for the `get` method for backwards compatibility. - /// - /// Previously, we exported `get` as `pos` in an undocumented API. To - /// prevent breaking that code (e.g., in `regex-capi`), we continue - /// re-exporting the same undocumented API. - #[doc(hidden)] - #[inline] - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - self.get(i) - } -} - -/// Captures represents a group of captured byte strings for a single match. -/// -/// The 0th capture always corresponds to the entire match. Each subsequent -/// index corresponds to the next capture group in the regex. If a capture -/// group is named, then the matched byte string is *also* available via the -/// `name` method. (Note that the 0th capture is always unnamed and so must be -/// accessed with the `get` method.) -/// -/// Positions returned from a capture group are always byte indices. -/// -/// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { - text: &'t [u8], - locs: re_trait::Locations, - named_groups: Arc<HashMap<String, usize>>, -} - -impl<'t> Captures<'t> { - /// Returns the match associated with the capture group at index `i`. If - /// `i` does not correspond to a capture group, or if the capture group - /// did not participate in the match, then `None` is returned. - /// - /// # Examples - /// - /// Get the text of the match with a default of an empty string if this - /// group didn't participate in the match: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); - /// let caps = re.captures(b"abc123").unwrap(); - /// - /// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); - /// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); - /// assert_eq!(text1, &b"123"[..]); - /// assert_eq!(text2, &b""[..]); - /// ``` - pub fn get(&self, i: usize) -> Option<Match<'t>> { - self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) - } - - /// Returns the match for the capture group named `name`. If `name` isn't a - /// valid capture group or didn't match anything, then `None` is returned. - pub fn name(&self, name: &str) -> Option<Match<'t>> { - self.named_groups.get(name).and_then(|&i| self.get(i)) - } - - /// An iterator that yields all capturing matches in the order in which - /// they appear in the regex. If a particular capture group didn't - /// participate in the match, then `None` is yielded for that capture. - /// - /// The first match always corresponds to the overall match of the regex. - pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { - SubCaptureMatches { caps: self, it: self.locs.iter() } - } - - /// Expands all instances of `$name` in `replacement` to the corresponding - /// capture group `name`, and writes them to the `dst` buffer given. - /// - /// `name` may be an integer corresponding to the index of the capture - /// group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name consisting of the characters `[_0-9A-Za-z]` - /// is used. e.g., `$1a` looks up the capture group named `1a` and not the - /// capture group at index `1`. To exert more precise control over the - /// name, or to refer to a capture group name that uses characters outside - /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When - /// using braces, any sequence of valid UTF-8 bytes is permitted. If the - /// sequence does not refer to a capture group name in the corresponding - /// regex, then it is replaced with an empty string. - /// - /// To write a literal `$` use `$$`. - pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { - expand_bytes(self, replacement, dst) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1`, since every regex has at least one capture - /// group that corresponds to the full match. - #[inline] - pub fn len(&self) -> usize { - self.locs.len() - } -} - -impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() - } -} - -struct CapturesDebug<'c, 't>(&'c Captures<'t>); - -impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fn escape_bytes(bytes: &[u8]) -> String { - let mut s = String::new(); - for &b in bytes { - s.push_str(&escape_byte(b)); - } - s - } - - fn escape_byte(byte: u8) -> String { - use std::ascii::escape_default; - - let escaped: Vec<u8> = escape_default(byte).collect(); - String::from_utf8_lossy(&escaped).into_owned() - } - - // We'd like to show something nice here, even if it means an - // allocation to build a reverse index. - let slot_to_name: HashMap<&usize, &String> = - self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); - let mut map = f.debug_map(); - for (slot, m) in self.0.locs.iter().enumerate() { - let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e])); - if let Some(name) = slot_to_name.get(&slot) { - map.entry(&name, &m); - } else { - map.entry(&slot, &m); - } - } - map.finish() - } -} - -/// Get a group by index. -/// -/// `'t` is the lifetime of the matched text. -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `get()` instead. -/// -/// # Panics -/// -/// If there is no group at the given index. -impl<'t> Index<usize> for Captures<'t> { - type Output = [u8]; - - fn index(&self, i: usize) -> &[u8] { - self.get(i) - .map(|m| m.as_bytes()) - .unwrap_or_else(|| panic!("no group at index '{}'", i)) - } -} - -/// Get a group by name. -/// -/// `'t` is the lifetime of the matched text and `'i` is the lifetime -/// of the group name (the index). -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `name` instead. -/// -/// # Panics -/// -/// If there is no group named by the given value. -impl<'t, 'i> Index<&'i str> for Captures<'t> { - type Output = [u8]; - - fn index<'a>(&'a self, name: &'i str) -> &'a [u8] { - self.name(name) - .map(|m| m.as_bytes()) - .unwrap_or_else(|| panic!("no group named '{}'", name)) - } -} - -/// An iterator that yields all capturing matches in the order in which they -/// appear in the regex. -/// -/// If a particular capture group didn't participate in the match, then `None` -/// is yielded for that capture. The first match always corresponds to the -/// overall match of the regex. -/// -/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and -/// the lifetime `'t` corresponds to the originally matched text. -#[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't> { - caps: &'c Captures<'t>, - it: SubCapturesPosIter<'c>, -} - -impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { - type Item = Option<Match<'t>>; - - fn next(&mut self) -> Option<Option<Match<'t>>> { - self.it - .next() - .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) - } -} - -impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} - -/// Replacer describes types that can be used to replace matches in a byte -/// string. -/// -/// In general, users of this crate shouldn't need to implement this trait, -/// since implementations are already provided for `&[u8]` along with other -/// variants of bytes types and `FnMut(&Captures) -> Vec<u8>` (or any -/// `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`), which covers most use cases. -pub trait Replacer { - /// Appends text to `dst` to replace the current match. - /// - /// The current match is represented by `caps`, which is guaranteed to - /// have a match at capture group `0`. - /// - /// For example, a no-op replacement would be - /// `dst.extend(&caps[0])`. - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); - - /// Return a fixed unchanging replacement byte string. - /// - /// When doing replacements, if access to `Captures` is not needed (e.g., - /// the replacement byte string does not need `$` expansion), then it can - /// be beneficial to avoid finding sub-captures. - /// - /// In general, this is called once for every call to `replacen`. - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { - None - } - - /// Return a `Replacer` that borrows and wraps this `Replacer`. - /// - /// This is useful when you want to take a generic `Replacer` (which might - /// not be cloneable) and use it without consuming it, so it can be used - /// more than once. - /// - /// # Example - /// - /// ``` - /// use regex::bytes::{Regex, Replacer}; - /// - /// fn replace_all_twice<R: Replacer>( - /// re: Regex, - /// src: &[u8], - /// mut rep: R, - /// ) -> Vec<u8> { - /// let dst = re.replace_all(src, rep.by_ref()); - /// let dst = re.replace_all(&dst, rep.by_ref()); - /// dst.into_owned() - /// } - /// ``` - fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { - ReplacerRef(self) - } -} - -/// By-reference adaptor for a `Replacer` -/// -/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). -#[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); - -impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - self.0.replace_append(caps, dst) - } - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { - self.0.no_expansion() - } -} - -impl<'a> Replacer for &'a [u8] { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Vec<u8> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl Replacer for Vec<u8> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self.as_ref(), dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self.as_ref(), dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> { - let s = t.as_ref(); - match find_byte(b'$', s) { - Some(_) => None, - None => Some(Cow::Borrowed(s)), - } -} - -impl<F, T> Replacer for F -where - F: FnMut(&Captures<'_>) -> T, - T: AsRef<[u8]>, -{ - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - dst.extend_from_slice((*self)(caps).as_ref()); - } -} - -/// `NoExpand` indicates literal byte string replacement. -/// -/// It can be used with `replace` and `replace_all` to do a literal byte string -/// replacement without expanding `$name` to their corresponding capture -/// groups. This can be both convenient (to avoid escaping `$`, for example) -/// and performant (since capture groups don't need to be found). -/// -/// `'t` is the lifetime of the literal text. -#[derive(Clone, Debug)] -pub struct NoExpand<'t>(pub &'t [u8]); - -impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { - dst.extend_from_slice(self.0); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - Some(Cow::Borrowed(self.0)) - } -} diff --git a/vendor/regex/src/re_set.rs b/vendor/regex/src/re_set.rs deleted file mode 100644 index a6d886d..0000000 --- a/vendor/regex/src/re_set.rs +++ /dev/null @@ -1,507 +0,0 @@ -macro_rules! define_set { - ($name:ident, $builder_mod:ident, $text_ty:ty, $as_bytes:expr, - $(#[$doc_regexset_example:meta])* ) => { - pub mod $name { - use std::fmt; - use std::iter; - use std::slice; - use std::vec; - - use crate::error::Error; - use crate::exec::Exec; - use crate::re_builder::$builder_mod::RegexSetBuilder; - use crate::re_trait::RegularExpression; - -/// Match multiple (possibly overlapping) regular expressions in a single scan. -/// -/// A regex set corresponds to the union of two or more regular expressions. -/// That is, a regex set will match text where at least one of its -/// constituent regular expressions matches. A regex set as its formulated here -/// provides a touch more power: it will also report *which* regular -/// expressions in the set match. Indeed, this is the key difference between -/// regex sets and a single `Regex` with many alternates, since only one -/// alternate can match at a time. -/// -/// For example, consider regular expressions to match email addresses and -/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a -/// regex set is constructed from those regexes, then searching the text -/// `foo@example.com` will report both regexes as matching. Of course, one -/// could accomplish this by compiling each regex on its own and doing two -/// searches over the text. The key advantage of using a regex set is that it -/// will report the matching regexes using a *single pass through the text*. -/// If one has hundreds or thousands of regexes to match repeatedly (like a URL -/// router for a complex web application or a user agent matcher), then a regex -/// set can realize huge performance gains. -/// -/// # Example -/// -/// This shows how the above two regexes (for matching email addresses and -/// domains) might work: -/// -$(#[$doc_regexset_example])* -/// -/// Note that it would be possible to adapt the above example to using `Regex` -/// with an expression like: -/// -/// ```text -/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) -/// ``` -/// -/// After a match, one could then inspect the capture groups to figure out -/// which alternates matched. The problem is that it is hard to make this -/// approach scale when there are many regexes since the overlap between each -/// alternate isn't always obvious to reason about. -/// -/// # Limitations -/// -/// Regex sets are limited to answering the following two questions: -/// -/// 1. Does any regex in the set match? -/// 2. If so, which regexes in the set match? -/// -/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1) -/// instead of (2) since the matching engines can stop after the first match -/// is found. -/// -/// You cannot directly extract [`Match`][crate::Match] or -/// [`Captures`][crate::Captures] objects from a regex set. If you need these -/// operations, the recommended approach is to compile each pattern in the set -/// independently and scan the exact same input a second time with those -/// independently compiled patterns: -/// -/// ```rust -/// use regex::{Regex, RegexSet}; -/// -/// let patterns = ["foo", "bar"]; -/// // Both patterns will match different ranges of this string. -/// let text = "barfoo"; -/// -/// // Compile a set matching any of our patterns. -/// let set = RegexSet::new(&patterns).unwrap(); -/// // Compile each pattern independently. -/// let regexes: Vec<_> = set.patterns().iter() -/// .map(|pat| Regex::new(pat).unwrap()) -/// .collect(); -/// -/// // Match against the whole set first and identify the individual -/// // matching patterns. -/// let matches: Vec<&str> = set.matches(text).into_iter() -/// // Dereference the match index to get the corresponding -/// // compiled pattern. -/// .map(|match_idx| ®exes[match_idx]) -/// // To get match locations or any other info, we then have to search -/// // the exact same text again, using our separately-compiled pattern. -/// .map(|pat| pat.find(text).unwrap().as_str()) -/// .collect(); -/// -/// // Matches arrive in the order the constituent patterns were declared, -/// // not the order they appear in the input. -/// assert_eq!(vec!["foo", "bar"], matches); -/// ``` -/// -/// # Performance -/// -/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, -/// search takes `O(mn)` time, where `m` is proportional to the size of the -/// regex set and `n` is proportional to the length of the search text. -#[derive(Clone)] -pub struct RegexSet(Exec); - -impl RegexSet { - /// Create a new regex set with the given regular expressions. - /// - /// This takes an iterator of `S`, where `S` is something that can produce - /// a `&str`. If any of the strings in the iterator are not valid regular - /// expressions, then an error is returned. - /// - /// # Example - /// - /// Create a new regex set from an iterator of strings: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap(); - /// assert!(set.is_match("foo")); - /// ``` - pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> - where S: AsRef<str>, I: IntoIterator<Item=S> { - RegexSetBuilder::new(exprs).build() - } - - /// Create a new empty regex set. - /// - /// # Example - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::empty(); - /// assert!(set.is_empty()); - /// ``` - pub fn empty() -> RegexSet { - RegexSetBuilder::new(&[""; 0]).build().unwrap() - } - - /// Returns true if and only if one of the regexes in this set matches - /// the text given. - /// - /// This method should be preferred if you only need to test whether any - /// of the regexes in the set should match, but don't care about *which* - /// regexes matched. This is because the underlying matching engine will - /// quit immediately after seeing the first match instead of continuing to - /// find all matches. - /// - /// Note that as with searches using `Regex`, the expression is unanchored - /// by default. That is, if the regex does not start with `^` or `\A`, or - /// end with `$` or `\z`, then it is permitted to match anywhere in the - /// text. - /// - /// # Example - /// - /// Tests whether a set matches some text: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap(); - /// assert!(set.is_match("foo")); - /// assert!(!set.is_match("☃")); - /// ``` - pub fn is_match(&self, text: $text_ty) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - #[doc(hidden)] - pub fn is_match_at(&self, text: $text_ty, start: usize) -> bool { - self.0.searcher().is_match_at($as_bytes(text), start) - } - - /// Returns the set of regular expressions that match in the given text. - /// - /// The set returned contains the index of each regular expression that - /// matches in the given text. The index is in correspondence with the - /// order of regular expressions given to `RegexSet`'s constructor. - /// - /// The set can also be used to iterate over the matched indices. - /// - /// Note that as with searches using `Regex`, the expression is unanchored - /// by default. That is, if the regex does not start with `^` or `\A`, or - /// end with `$` or `\z`, then it is permitted to match anywhere in the - /// text. - /// - /// # Example - /// - /// Tests which regular expressions match the given text: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[ - /// r"\w+", - /// r"\d+", - /// r"\pL+", - /// r"foo", - /// r"bar", - /// r"barfoo", - /// r"foobar", - /// ]).unwrap(); - /// let matches: Vec<_> = set.matches("foobar").into_iter().collect(); - /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); - /// - /// // You can also test whether a particular regex matched: - /// let matches = set.matches("foobar"); - /// assert!(!matches.matched(5)); - /// assert!(matches.matched(6)); - /// ``` - pub fn matches(&self, text: $text_ty) -> SetMatches { - let mut matches = vec![false; self.0.regex_strings().len()]; - let any = self.read_matches_at(&mut matches, text, 0); - SetMatches { - matched_any: any, - matches: matches, - } - } - - /// Returns the same as matches, but starts the search at the given - /// offset and stores the matches into the slice given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - /// - /// `matches` must have a length that is at least the number of regexes - /// in this set. - /// - /// This method returns true if and only if at least one member of - /// `matches` is true after executing the set against `text`. - #[doc(hidden)] - pub fn read_matches_at( - &self, - matches: &mut [bool], - text: $text_ty, - start: usize, - ) -> bool { - self.0.searcher().many_matches_at(matches, $as_bytes(text), start) - } - - /// Returns the total number of regular expressions in this set. - pub fn len(&self) -> usize { - self.0.regex_strings().len() - } - - /// Returns `true` if this set contains no regular expressions. - pub fn is_empty(&self) -> bool { - self.0.regex_strings().is_empty() - } - - /// Returns the patterns that this set will match on. - /// - /// This function can be used to determine the pattern for a match. The - /// slice returned has exactly as many patterns givens to this regex set, - /// and the order of the slice is the same as the order of the patterns - /// provided to the set. - /// - /// # Example - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[ - /// r"\w+", - /// r"\d+", - /// r"\pL+", - /// r"foo", - /// r"bar", - /// r"barfoo", - /// r"foobar", - /// ]).unwrap(); - /// let matches: Vec<_> = set - /// .matches("foobar") - /// .into_iter() - /// .map(|match_idx| &set.patterns()[match_idx]) - /// .collect(); - /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); - /// ``` - pub fn patterns(&self) -> &[String] { - self.0.regex_strings() - } -} - -/// A set of matches returned by a regex set. -#[derive(Clone, Debug)] -pub struct SetMatches { - matched_any: bool, - matches: Vec<bool>, -} - -impl SetMatches { - /// Whether this set contains any matches. - pub fn matched_any(&self) -> bool { - self.matched_any - } - - /// Whether the regex at the given index matched. - /// - /// The index for a regex is determined by its insertion order upon the - /// initial construction of a `RegexSet`, starting at `0`. - /// - /// # Panics - /// - /// If `regex_index` is greater than or equal to `self.len()`. - pub fn matched(&self, regex_index: usize) -> bool { - self.matches[regex_index] - } - - /// The total number of regexes in the set that created these matches. - pub fn len(&self) -> usize { - self.matches.len() - } - - /// Returns an iterator over indexes in the regex that matched. - /// - /// This will always produces matches in ascending order of index, where - /// the index corresponds to the index of the regex that matched with - /// respect to its position when initially building the set. - pub fn iter(&self) -> SetMatchesIter<'_> { - SetMatchesIter((&*self.matches).into_iter().enumerate()) - } -} - -impl IntoIterator for SetMatches { - type IntoIter = SetMatchesIntoIter; - type Item = usize; - - fn into_iter(self) -> Self::IntoIter { - SetMatchesIntoIter(self.matches.into_iter().enumerate()) - } -} - -impl<'a> IntoIterator for &'a SetMatches { - type IntoIter = SetMatchesIter<'a>; - type Item = usize; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -/// An owned iterator over the set of matches from a regex set. -/// -/// This will always produces matches in ascending order of index, where the -/// index corresponds to the index of the regex that matched with respect to -/// its position when initially building the set. -#[derive(Debug)] -pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>); - -impl Iterator for SetMatchesIntoIter { - type Item = usize; - - fn next(&mut self) -> Option<usize> { - loop { - match self.0.next() { - None => return None, - Some((_, false)) => {} - Some((i, true)) => return Some(i), - } - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } -} - -impl DoubleEndedIterator for SetMatchesIntoIter { - fn next_back(&mut self) -> Option<usize> { - loop { - match self.0.next_back() { - None => return None, - Some((_, false)) => {} - Some((i, true)) => return Some(i), - } - } - } -} - -impl iter::FusedIterator for SetMatchesIntoIter {} - -/// A borrowed iterator over the set of matches from a regex set. -/// -/// The lifetime `'a` refers to the lifetime of a `SetMatches` value. -/// -/// This will always produces matches in ascending order of index, where the -/// index corresponds to the index of the regex that matched with respect to -/// its position when initially building the set. -#[derive(Clone, Debug)] -pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>); - -impl<'a> Iterator for SetMatchesIter<'a> { - type Item = usize; - - fn next(&mut self) -> Option<usize> { - loop { - match self.0.next() { - None => return None, - Some((_, &false)) => {} - Some((i, &true)) => return Some(i), - } - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } -} - -impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { - fn next_back(&mut self) -> Option<usize> { - loop { - match self.0.next_back() { - None => return None, - Some((_, &false)) => {} - Some((i, &true)) => return Some(i), - } - } - } -} - -impl<'a> iter::FusedIterator for SetMatchesIter<'a> {} - -#[doc(hidden)] -impl From<Exec> for RegexSet { - fn from(exec: Exec) -> Self { - RegexSet(exec) - } -} - -impl fmt::Debug for RegexSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "RegexSet({:?})", self.0.regex_strings()) - } -} - -#[allow(dead_code)] fn as_bytes_str(text: &str) -> &[u8] { text.as_bytes() } -#[allow(dead_code)] fn as_bytes_bytes(text: &[u8]) -> &[u8] { text } - } - } -} - -define_set! { - unicode, - set_unicode, - &str, - as_bytes_str, -/// ```rust -/// # use regex::RegexSet; -/// let set = RegexSet::new(&[ -/// r"[a-z]+@[a-z]+\.(com|org|net)", -/// r"[a-z]+\.(com|org|net)", -/// ]).unwrap(); -/// -/// // Ask whether any regexes in the set match. -/// assert!(set.is_match("foo@example.com")); -/// -/// // Identify which regexes in the set match. -/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect(); -/// assert_eq!(vec![0, 1], matches); -/// -/// // Try again, but with text that only matches one of the regexes. -/// let matches: Vec<_> = set.matches("example.com").into_iter().collect(); -/// assert_eq!(vec![1], matches); -/// -/// // Try again, but with text that doesn't match any regex in the set. -/// let matches: Vec<_> = set.matches("example").into_iter().collect(); -/// assert!(matches.is_empty()); -/// ``` -} - -define_set! { - bytes, - set_bytes, - &[u8], - as_bytes_bytes, -/// ```rust -/// # use regex::bytes::RegexSet; -/// let set = RegexSet::new(&[ -/// r"[a-z]+@[a-z]+\.(com|org|net)", -/// r"[a-z]+\.(com|org|net)", -/// ]).unwrap(); -/// -/// // Ask whether any regexes in the set match. -/// assert!(set.is_match(b"foo@example.com")); -/// -/// // Identify which regexes in the set match. -/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect(); -/// assert_eq!(vec![0, 1], matches); -/// -/// // Try again, but with text that only matches one of the regexes. -/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); -/// assert_eq!(vec![1], matches); -/// -/// // Try again, but with text that doesn't match any regex in the set. -/// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); -/// assert!(matches.is_empty()); -/// ``` -} diff --git a/vendor/regex/src/re_trait.rs b/vendor/regex/src/re_trait.rs deleted file mode 100644 index d0c717d..0000000 --- a/vendor/regex/src/re_trait.rs +++ /dev/null @@ -1,294 +0,0 @@ -use std::fmt; -use std::iter::FusedIterator; - -/// Slot is a single saved capture location. Note that there are two slots for -/// every capture in a regular expression (one slot each for the start and end -/// of the capture). -pub type Slot = Option<usize>; - -/// Locations represents the offsets of each capturing group in a regex for -/// a single match. -/// -/// Unlike `Captures`, a `Locations` value only stores offsets. -#[doc(hidden)] -#[derive(Clone, Debug)] -pub struct Locations(Vec<Slot>); - -impl Locations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - let (s, e) = (i * 2, i * 2 + 1); - match (self.0.get(s), self.0.get(e)) { - (Some(&Some(s)), Some(&Some(e))) => Some((s, e)), - _ => None, - } - } - - /// Creates an iterator of all the capture group positions in order of - /// appearance in the regular expression. Positions are byte indices - /// in terms of the original string matched. - pub fn iter(&self) -> SubCapturesPosIter<'_> { - SubCapturesPosIter { idx: 0, locs: self } - } - - /// Returns the total number of capturing groups. - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - pub fn len(&self) -> usize { - self.0.len() / 2 - } - - /// Return the individual slots as a slice. - pub(crate) fn as_slots(&mut self) -> &mut [Slot] { - &mut self.0 - } -} - -/// An iterator over capture group positions for a particular match of a -/// regular expression. -/// -/// Positions are byte indices in terms of the original string matched. -/// -/// `'c` is the lifetime of the captures. -#[derive(Clone, Debug)] -pub struct SubCapturesPosIter<'c> { - idx: usize, - locs: &'c Locations, -} - -impl<'c> Iterator for SubCapturesPosIter<'c> { - type Item = Option<(usize, usize)>; - - fn next(&mut self) -> Option<Option<(usize, usize)>> { - if self.idx >= self.locs.len() { - return None; - } - let x = match self.locs.pos(self.idx) { - None => Some(None), - Some((s, e)) => Some(Some((s, e))), - }; - self.idx += 1; - x - } - - fn size_hint(&self) -> (usize, Option<usize>) { - let len = self.locs.len() - self.idx; - (len, Some(len)) - } - - fn count(self) -> usize { - self.len() - } -} - -impl<'c> ExactSizeIterator for SubCapturesPosIter<'c> {} - -impl<'c> FusedIterator for SubCapturesPosIter<'c> {} - -/// `RegularExpression` describes types that can implement regex searching. -/// -/// This trait is my attempt at reducing code duplication and to standardize -/// the internal API. Specific duplication that is avoided are the `find` -/// and `capture` iterators, which are slightly tricky. -/// -/// It's not clear whether this trait is worth it, and it also isn't -/// clear whether it's useful as a public trait or not. Methods like -/// `next_after_empty` reak of bad design, but the rest of the methods seem -/// somewhat reasonable. One particular thing this trait would expose would be -/// the ability to start the search of a regex anywhere in a haystack, which -/// isn't possible in the current public API. -pub trait RegularExpression: Sized + fmt::Debug { - /// The type of the haystack. - type Text: ?Sized + fmt::Debug; - - /// The number of capture slots in the compiled regular expression. This is - /// always two times the number of capture groups (two slots per group). - fn slots_len(&self) -> usize; - - /// Allocates fresh space for all capturing groups in this regex. - fn locations(&self) -> Locations { - Locations(vec![None; self.slots_len()]) - } - - /// Returns the position of the next character after `i`. - /// - /// For example, a haystack with type `&[u8]` probably returns `i+1`, - /// whereas a haystack with type `&str` probably returns `i` plus the - /// length of the next UTF-8 sequence. - fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize; - - /// Returns the location of the shortest match. - fn shortest_match_at( - &self, - text: &Self::Text, - start: usize, - ) -> Option<usize>; - - /// Returns whether the regex matches the text given. - fn is_match_at(&self, text: &Self::Text, start: usize) -> bool; - - /// Returns the leftmost-first match location if one exists. - fn find_at( - &self, - text: &Self::Text, - start: usize, - ) -> Option<(usize, usize)>; - - /// Returns the leftmost-first match location if one exists, and also - /// fills in any matching capture slot locations. - fn captures_read_at( - &self, - locs: &mut Locations, - text: &Self::Text, - start: usize, - ) -> Option<(usize, usize)>; - - /// Returns an iterator over all non-overlapping successive leftmost-first - /// matches. - fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> { - Matches { re: self, text, last_end: 0, last_match: None } - } - - /// Returns an iterator over all non-overlapping successive leftmost-first - /// matches with captures. - fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> { - CaptureMatches(self.find_iter(text)) - } -} - -/// An iterator over all non-overlapping successive leftmost-first matches. -#[derive(Debug)] -pub struct Matches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - re: R, - text: &'t R::Text, - last_end: usize, - last_match: Option<usize>, -} - -impl<'t, R> Matches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - /// Return the text being searched. - pub fn text(&self) -> &'t R::Text { - self.text - } - - /// Return the underlying regex. - pub fn regex(&self) -> &R { - &self.re - } -} - -impl<'t, R> Iterator for Matches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ - type Item = (usize, usize); - - fn next(&mut self) -> Option<(usize, usize)> { - if self.last_end > self.text.as_ref().len() { - return None; - } - let (s, e) = match self.re.find_at(self.text, self.last_end) { - None => return None, - Some((s, e)) => (s, e), - }; - if s == e { - // This is an empty match. To ensure we make progress, start - // the next search at the smallest possible starting position - // of the next match following this one. - self.last_end = self.re.next_after_empty(self.text, e); - // Don't accept empty matches immediately following a match. - // Just move on to the next match. - if Some(e) == self.last_match { - return self.next(); - } - } else { - self.last_end = e; - } - self.last_match = Some(e); - Some((s, e)) - } -} - -impl<'t, R> FusedIterator for Matches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ -} - -/// An iterator over all non-overlapping successive leftmost-first matches with -/// captures. -#[derive(Debug)] -pub struct CaptureMatches<'t, R>(Matches<'t, R>) -where - R: RegularExpression, - R::Text: 't; - -impl<'t, R> CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - /// Return the text being searched. - pub fn text(&self) -> &'t R::Text { - self.0.text() - } - - /// Return the underlying regex. - pub fn regex(&self) -> &R { - self.0.regex() - } -} - -impl<'t, R> Iterator for CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ - type Item = Locations; - - fn next(&mut self) -> Option<Locations> { - if self.0.last_end > self.0.text.as_ref().len() { - return None; - } - let mut locs = self.0.re.locations(); - let (s, e) = match self.0.re.captures_read_at( - &mut locs, - self.0.text, - self.0.last_end, - ) { - None => return None, - Some((s, e)) => (s, e), - }; - if s == e { - self.0.last_end = self.0.re.next_after_empty(self.0.text, e); - if Some(e) == self.0.last_match { - return self.next(); - } - } else { - self.0.last_end = e; - } - self.0.last_match = Some(e); - Some(locs) - } -} - -impl<'t, R> FusedIterator for CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ -} diff --git a/vendor/regex/src/re_unicode.rs b/vendor/regex/src/re_unicode.rs deleted file mode 100644 index 197510e..0000000 --- a/vendor/regex/src/re_unicode.rs +++ /dev/null @@ -1,1311 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt; -use std::iter::FusedIterator; -use std::ops::{Index, Range}; -use std::str::FromStr; -use std::sync::Arc; - -use crate::find_byte::find_byte; - -use crate::error::Error; -use crate::exec::{Exec, ExecNoSyncStr}; -use crate::expand::expand_str; -use crate::re_builder::unicode::RegexBuilder; -use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; - -/// Escapes all regular expression meta characters in `text`. -/// -/// The string returned may be safely used as a literal in a regular -/// expression. -pub fn escape(text: &str) -> String { - regex_syntax::escape(text) -} - -/// Match represents a single match of a regex in a haystack. -/// -/// The lifetime parameter `'t` refers to the lifetime of the matched text. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct Match<'t> { - text: &'t str, - start: usize, - end: usize, -} - -impl<'t> Match<'t> { - /// Returns the starting byte offset of the match in the haystack. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Returns the ending byte offset of the match in the haystack. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// Returns the range over the starting and ending byte offsets of the - /// match in the haystack. - #[inline] - pub fn range(&self) -> Range<usize> { - self.start..self.end - } - - /// Returns the matched text. - #[inline] - pub fn as_str(&self) -> &'t str { - &self.text[self.range()] - } - - /// Creates a new match from the given haystack and byte offsets. - #[inline] - fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start, end } - } -} - -impl<'t> From<Match<'t>> for &'t str { - fn from(m: Match<'t>) -> &'t str { - m.as_str() - } -} - -impl<'t> From<Match<'t>> for Range<usize> { - fn from(m: Match<'t>) -> Range<usize> { - m.range() - } -} - -/// A compiled regular expression for matching Unicode strings. -/// -/// It is represented as either a sequence of bytecode instructions (dynamic) -/// or as a specialized Rust function (native). It can be used to search, split -/// or replace text. All searching is done with an implicit `.*?` at the -/// beginning and end of an expression. To force an expression to match the -/// whole string (or a prefix or a suffix), you must use an anchor like `^` or -/// `$` (or `\A` and `\z`). -/// -/// While this crate will handle Unicode strings (whether in the regular -/// expression or in the search text), all positions returned are **byte -/// indices**. Every byte index is guaranteed to be at a Unicode code point -/// boundary. -/// -/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a -/// compiled regular expression and text to search, respectively. -/// -/// The only methods that allocate new strings are the string replacement -/// methods. All other methods (searching and splitting) return borrowed -/// pointers into the string given. -/// -/// # Examples -/// -/// Find the location of a US phone number: -/// -/// ```rust -/// # use regex::Regex; -/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); -/// let mat = re.find("phone: 111-222-3333").unwrap(); -/// assert_eq!((mat.start(), mat.end()), (7, 19)); -/// ``` -/// -/// # Using the `std::str::pattern` methods with `Regex` -/// -/// > **Note**: This section requires that this crate is compiled with the -/// > `pattern` Cargo feature enabled, which **requires nightly Rust**. -/// -/// Since `Regex` implements `Pattern`, you can use regexes with methods -/// defined on `&str`. For example, `is_match`, `find`, `find_iter` -/// and `split` can be replaced with `str::contains`, `str::find`, -/// `str::match_indices` and `str::split`. -/// -/// Here are some examples: -/// -/// ```rust,ignore -/// # use regex::Regex; -/// let re = Regex::new(r"\d+").unwrap(); -/// let haystack = "a111b222c"; -/// -/// assert!(haystack.contains(&re)); -/// assert_eq!(haystack.find(&re), Some(1)); -/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(), -/// vec![(1, "111"), (5, "222")]); -/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); -/// ``` -#[derive(Clone)] -pub struct Regex(Exec); - -impl fmt::Display for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl fmt::Debug for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -#[doc(hidden)] -impl From<Exec> for Regex { - fn from(exec: Exec) -> Regex { - Regex(exec) - } -} - -impl FromStr for Regex { - type Err = Error; - - /// Attempts to parse a string into a regular expression - fn from_str(s: &str) -> Result<Regex, Error> { - Regex::new(s) - } -} - -/// Core regular expression methods. -impl Regex { - /// Compiles a regular expression. Once compiled, it can be used repeatedly - /// to search, split or replace text in a string. - /// - /// If an invalid expression is given, then an error is returned. - pub fn new(re: &str) -> Result<Regex, Error> { - RegexBuilder::new(re).build() - } - - /// Returns true if and only if there is a match for the regex in the - /// string given. - /// - /// It is recommended to use this method if all you need to do is test - /// a match, since the underlying matching engine may be able to do less - /// work. - /// - /// # Example - /// - /// Test if some text contains at least one word with exactly 13 - /// Unicode word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "I categorically deny having triskaidekaphobia."; - /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); - /// # } - /// ``` - pub fn is_match(&self, text: &str) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the start and end byte range of the leftmost-first match in - /// `text`. If no match exists, then `None` is returned. - /// - /// Note that this should only be used if you want to discover the position - /// of the match. Testing the existence of a match is faster if you use - /// `is_match`. - /// - /// # Example - /// - /// Find the start and end location of the first word with exactly 13 - /// Unicode word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "I categorically deny having triskaidekaphobia."; - /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); - /// assert_eq!(mat.start(), 2); - /// assert_eq!(mat.end(), 15); - /// # } - /// ``` - pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> { - self.find_at(text, 0) - } - - /// Returns an iterator for each successive non-overlapping match in - /// `text`, returning the start and end byte indices with respect to - /// `text`. - /// - /// # Example - /// - /// Find the start and end location of every word with exactly 13 Unicode - /// word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "Retroactively relinquishing remunerations is reprehensible."; - /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { - /// println!("{:?}", mat); - /// } - /// # } - /// ``` - pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> { - Matches(self.0.searcher_str().find_iter(text)) - } - - /// Returns the capture groups corresponding to the leftmost-first - /// match in `text`. Capture group `0` always corresponds to the entire - /// match. If no match is found, then `None` is returned. - /// - /// You should only use `captures` if you need access to the location of - /// capturing group matches. Otherwise, `find` is faster for discovering - /// the location of the overall match. - /// - /// # Examples - /// - /// Say you have some text with movie names and their release years, - /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text - /// looking like that, while also extracting the movie name and its release - /// year separately. - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); - /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane"); - /// assert_eq!(caps.get(2).unwrap().as_str(), "1941"); - /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); - /// // You can also access the groups by index using the Index notation. - /// // Note that this will panic on an invalid index. - /// assert_eq!(&caps[1], "Citizen Kane"); - /// assert_eq!(&caps[2], "1941"); - /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); - /// # } - /// ``` - /// - /// Note that the full match is at capture group `0`. Each subsequent - /// capture group is indexed by the order of its opening `(`. - /// - /// We can make this example a bit clearer by using *named* capture groups: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane"); - /// assert_eq!(caps.name("year").unwrap().as_str(), "1941"); - /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); - /// // You can also access the groups by name using the Index notation. - /// // Note that this will panic on an invalid group name. - /// assert_eq!(&caps["title"], "Citizen Kane"); - /// assert_eq!(&caps["year"], "1941"); - /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); - /// - /// # } - /// ``` - /// - /// Here we name the capture groups, which we can access with the `name` - /// method or the `Index` notation with a `&str`. Note that the named - /// capture groups are still accessible with `get` or the `Index` notation - /// with a `usize`. - /// - /// The `0`th capture group is always unnamed, so it must always be - /// accessed with `get(0)` or `[0]`. - pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> { - let mut locs = self.capture_locations(); - self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { - text, - locs: locs.0, - named_groups: self.0.capture_name_idx().clone(), - }) - } - - /// Returns an iterator over all the non-overlapping capture groups matched - /// in `text`. This is operationally the same as `find_iter`, except it - /// yields information about capturing group matches. - /// - /// # Example - /// - /// We can use this to find all movie titles and their release years in - /// some text, where the movie is formatted like "'Title' (xxxx)": - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; - /// for caps in re.captures_iter(text) { - /// println!("Movie: {:?}, Released: {:?}", - /// &caps["title"], &caps["year"]); - /// } - /// // Output: - /// // Movie: Citizen Kane, Released: 1941 - /// // Movie: The Wizard of Oz, Released: 1939 - /// // Movie: M, Released: 1931 - /// # } - /// ``` - pub fn captures_iter<'r, 't>( - &'r self, - text: &'t str, - ) -> CaptureMatches<'r, 't> { - CaptureMatches(self.0.searcher_str().captures_iter(text)) - } - - /// Returns an iterator of substrings of `text` delimited by a match of the - /// regular expression. Namely, each element of the iterator corresponds to - /// text that *isn't* matched by the regular expression. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// To split a string delimited by arbitrary amounts of spaces or tabs: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"[ \t]+").unwrap(); - /// let fields: Vec<&str> = re.split("a b \t c\td e").collect(); - /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); - /// # } - /// ``` - pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> { - Split { finder: self.find_iter(text), last: 0 } - } - - /// Returns an iterator of at most `limit` substrings of `text` delimited - /// by a match of the regular expression. (A `limit` of `0` will return no - /// substrings.) Namely, each element of the iterator corresponds to text - /// that *isn't* matched by the regular expression. The remainder of the - /// string that is not split will be the last element in the iterator. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// Get the first two words in some text: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"\W+").unwrap(); - /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect(); - /// assert_eq!(fields, vec!("Hey", "How", "are you?")); - /// # } - /// ``` - pub fn splitn<'r, 't>( - &'r self, - text: &'t str, - limit: usize, - ) -> SplitN<'r, 't> { - SplitN { splits: self.split(text), n: limit } - } - - /// Replaces the leftmost-first match with the replacement provided. - /// The replacement can be a regular string (where `$N` and `$name` are - /// expanded to match capture groups) or a function that takes the matches' - /// `Captures` and returns the replaced string. - /// - /// If no match is found, then a copy of the string is returned unchanged. - /// - /// # Replacement string syntax - /// - /// All instances of `$name` in the replacement text is replaced with the - /// corresponding capture group `name`. - /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. - /// - /// To write a literal `$` use `$$`. - /// - /// # Examples - /// - /// Note that this function is polymorphic with respect to the replacement. - /// In typical usage, this can just be a normal string: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new("[^01]+").unwrap(); - /// assert_eq!(re.replace("1078910", ""), "1010"); - /// # } - /// ``` - /// - /// But anything satisfying the `Replacer` trait will work. For example, - /// a closure of type `|&Captures| -> String` provides direct access to the - /// captures corresponding to a match. This allows one to access - /// capturing group matches easily: - /// - /// ```rust - /// # use regex::Regex; - /// # use regex::Captures; fn main() { - /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { - /// format!("{} {}", &caps[2], &caps[1]) - /// }); - /// assert_eq!(result, "Bruce Springsteen"); - /// # } - /// ``` - /// - /// But this is a bit cumbersome to use all the time. Instead, a simple - /// syntax is supported that expands `$name` into the corresponding capture - /// group. Here's the last example, but using this expansion technique - /// with named capture groups: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", "$first $last"); - /// assert_eq!(result, "Bruce Springsteen"); - /// # } - /// ``` - /// - /// Note that using `$2` instead of `$first` or `$1` instead of `$last` - /// would produce the same result. To write a literal `$` use `$$`. - /// - /// Sometimes the replacement string requires use of curly braces to - /// delineate a capture group replacement and surrounding literal text. - /// For example, if we wanted to join two words together with an - /// underscore: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); - /// let result = re.replace("deep fried", "${first}_$second"); - /// assert_eq!(result, "deep_fried"); - /// # } - /// ``` - /// - /// Without the curly braces, the capture group name `first_` would be - /// used, and since it doesn't exist, it would be replaced with the empty - /// string. - /// - /// Finally, sometimes you just want to replace a literal string with no - /// regard for capturing group expansion. This can be done by wrapping a - /// byte string with `NoExpand`: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// use regex::NoExpand; - /// - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); - /// assert_eq!(result, "$2 $last"); - /// # } - /// ``` - pub fn replace<'t, R: Replacer>( - &self, - text: &'t str, - rep: R, - ) -> Cow<'t, str> { - self.replacen(text, 1, rep) - } - - /// Replaces all non-overlapping matches in `text` with the replacement - /// provided. This is the same as calling `replacen` with `limit` set to - /// `0`. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement string. - pub fn replace_all<'t, R: Replacer>( - &self, - text: &'t str, - rep: R, - ) -> Cow<'t, str> { - self.replacen(text, 0, rep) - } - - /// Replaces at most `limit` non-overlapping matches in `text` with the - /// replacement provided. If `limit` is 0, then all non-overlapping matches - /// are replaced. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement string. - pub fn replacen<'t, R: Replacer>( - &self, - text: &'t str, - limit: usize, - mut rep: R, - ) -> Cow<'t, str> { - // If we know that the replacement doesn't have any capture expansions, - // then we can use the fast path. The fast path can make a tremendous - // difference: - // - // 1) We use `find_iter` instead of `captures_iter`. Not asking for - // captures generally makes the regex engines faster. - // 2) We don't need to look up all of the capture groups and do - // replacements inside the replacement string. We just push it - // at each match and be done with it. - if let Some(rep) = rep.no_expansion() { - let mut it = self.find_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for (i, m) in it { - new.push_str(&text[last_match..m.start()]); - new.push_str(&rep); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.push_str(&text[last_match..]); - return Cow::Owned(new); - } - - // The slower path, which we use if the replacement needs access to - // capture groups. - let mut it = self.captures_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for (i, cap) in it { - // unwrap on 0 is OK because captures only reports matches - let m = cap.get(0).unwrap(); - new.push_str(&text[last_match..m.start()]); - rep.replace_append(&cap, &mut new); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.push_str(&text[last_match..]); - Cow::Owned(new) - } -} - -/// Advanced or "lower level" search methods. -impl Regex { - /// Returns the end location of a match in the text given. - /// - /// This method may have the same performance characteristics as - /// `is_match`, except it provides an end location for a match. In - /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match. - /// - /// # Example - /// - /// Typically, `a+` would match the entire first sequence of `a` in some - /// text, but `shortest_match` can give up as soon as it sees the first - /// `a`. - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "aaaaa"; - /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); - /// assert_eq!(pos, Some(1)); - /// # } - /// ``` - pub fn shortest_match(&self, text: &str) -> Option<usize> { - self.shortest_match_at(text, 0) - } - - /// Returns the same as shortest_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn shortest_match_at( - &self, - text: &str, - start: usize, - ) -> Option<usize> { - self.0.searcher_str().shortest_match_at(text, start) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn is_match_at(&self, text: &str, start: usize) -> bool { - self.0.searcher_str().is_match_at(text, start) - } - - /// Returns the same as find, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn find_at<'t>( - &self, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher_str() - .find_at(text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// This is like `captures`, but uses - /// [`CaptureLocations`](struct.CaptureLocations.html) - /// instead of - /// [`Captures`](struct.Captures.html) in order to amortize allocations. - /// - /// To create a `CaptureLocations` value, use the - /// `Regex::capture_locations` method. - /// - /// This returns the overall match if this was successful, which is always - /// equivalence to the `0`th capture group. - pub fn captures_read<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, 0) - } - - /// Returns the same as captures, but starts the search at the given - /// offset and populates the capture locations given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_read_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher_str() - .captures_read_at(&mut locs.0, text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// An undocumented alias for `captures_read_at`. - /// - /// The `regex-capi` crate previously used this routine, so to avoid - /// breaking that crate, we continue to provide the name as an undocumented - /// alias. - #[doc(hidden)] - pub fn read_captures_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, start) - } -} - -/// Auxiliary methods. -impl Regex { - /// Returns the original string of this regex. - pub fn as_str(&self) -> &str { - &self.0.regex_strings()[0] - } - - /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames<'_> { - CaptureNames(self.0.capture_names().iter()) - } - - /// Returns the number of captures. - pub fn captures_len(&self) -> usize { - self.0.capture_names().len() - } - - /// Returns an empty set of capture locations that can be reused in - /// multiple calls to `captures_read` or `captures_read_at`. - pub fn capture_locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher_str().locations()) - } - - /// An alias for `capture_locations` to preserve backward compatibility. - /// - /// The `regex-capi` crate uses this method, so to avoid breaking that - /// crate, we continue to export it as an undocumented API. - #[doc(hidden)] - pub fn locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher_str().locations()) - } -} - -/// An iterator over the names of all possible captures. -/// -/// `None` indicates an unnamed capture; the first element (capture 0, the -/// whole matched region) is always unnamed. -/// -/// `'r` is the lifetime of the compiled regular expression. -#[derive(Clone, Debug)] -pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); - -impl<'r> Iterator for CaptureNames<'r> { - type Item = Option<&'r str>; - - fn next(&mut self) -> Option<Option<&'r str>> { - self.0 - .next() - .as_ref() - .map(|slot| slot.as_ref().map(|name| name.as_ref())) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } - - fn count(self) -> usize { - self.0.count() - } -} - -impl<'r> ExactSizeIterator for CaptureNames<'r> {} - -impl<'r> FusedIterator for CaptureNames<'r> {} - -/// Yields all substrings delimited by a regular expression match. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the string being split. -#[derive(Debug)] -pub struct Split<'r, 't> { - finder: Matches<'r, 't>, - last: usize, -} - -impl<'r, 't> Iterator for Split<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - let text = self.finder.0.text(); - match self.finder.next() { - None => { - if self.last > text.len() { - None - } else { - let s = &text[self.last..]; - self.last = text.len() + 1; // Next call will return None - Some(s) - } - } - Some(m) => { - let matched = &text[self.last..m.start()]; - self.last = m.end(); - Some(matched) - } - } - } -} - -impl<'r, 't> FusedIterator for Split<'r, 't> {} - -/// Yields at most `N` substrings delimited by a regular expression match. -/// -/// The last substring will be whatever remains after splitting. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the string being split. -#[derive(Debug)] -pub struct SplitN<'r, 't> { - splits: Split<'r, 't>, - n: usize, -} - -impl<'r, 't> Iterator for SplitN<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - if self.n == 0 { - return None; - } - - self.n -= 1; - if self.n > 0 { - return self.splits.next(); - } - - let text = self.splits.finder.0.text(); - if self.splits.last > text.len() { - // We've already returned all substrings. - None - } else { - // self.n == 0, so future calls will return None immediately - Some(&text[self.splits.last..]) - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.n)) - } -} - -impl<'r, 't> FusedIterator for SplitN<'r, 't> {} - -/// CaptureLocations is a low level representation of the raw offsets of each -/// submatch. -/// -/// You can think of this as a lower level -/// [`Captures`](struct.Captures.html), where this type does not support -/// named capturing groups directly and it does not borrow the text that these -/// offsets were matched on. -/// -/// Primarily, this type is useful when using the lower level `Regex` APIs -/// such as `read_captures`, which permits amortizing the allocation in which -/// capture match locations are stored. -/// -/// In order to build a value of this type, you'll need to call the -/// `capture_locations` method on the `Regex` being used to execute the search. -/// The value returned can then be reused in subsequent searches. -#[derive(Clone, Debug)] -pub struct CaptureLocations(re_trait::Locations); - -/// A type alias for `CaptureLocations` for backwards compatibility. -/// -/// Previously, we exported `CaptureLocations` as `Locations` in an -/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), -/// we continue re-exporting the same undocumented API. -#[doc(hidden)] -pub type Locations = CaptureLocations; - -impl CaptureLocations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - #[inline] - pub fn get(&self, i: usize) -> Option<(usize, usize)> { - self.0.pos(i) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - #[inline] - pub fn len(&self) -> usize { - self.0.len() - } - - /// An alias for the `get` method for backwards compatibility. - /// - /// Previously, we exported `get` as `pos` in an undocumented API. To - /// prevent breaking that code (e.g., in `regex-capi`), we continue - /// re-exporting the same undocumented API. - #[doc(hidden)] - #[inline] - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - self.get(i) - } -} - -/// Captures represents a group of captured strings for a single match. -/// -/// The 0th capture always corresponds to the entire match. Each subsequent -/// index corresponds to the next capture group in the regex. If a capture -/// group is named, then the matched string is *also* available via the `name` -/// method. (Note that the 0th capture is always unnamed and so must be -/// accessed with the `get` method.) -/// -/// Positions returned from a capture group are always byte indices. -/// -/// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { - text: &'t str, - locs: re_trait::Locations, - named_groups: Arc<HashMap<String, usize>>, -} - -impl<'t> Captures<'t> { - /// Returns the match associated with the capture group at index `i`. If - /// `i` does not correspond to a capture group, or if the capture group - /// did not participate in the match, then `None` is returned. - /// - /// # Examples - /// - /// Get the text of the match with a default of an empty string if this - /// group didn't participate in the match: - /// - /// ```rust - /// # use regex::Regex; - /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); - /// let caps = re.captures("abc123").unwrap(); - /// - /// let text1 = caps.get(1).map_or("", |m| m.as_str()); - /// let text2 = caps.get(2).map_or("", |m| m.as_str()); - /// assert_eq!(text1, "123"); - /// assert_eq!(text2, ""); - /// ``` - pub fn get(&self, i: usize) -> Option<Match<'t>> { - self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) - } - - /// Returns the match for the capture group named `name`. If `name` isn't a - /// valid capture group or didn't match anything, then `None` is returned. - pub fn name(&self, name: &str) -> Option<Match<'t>> { - self.named_groups.get(name).and_then(|&i| self.get(i)) - } - - /// An iterator that yields all capturing matches in the order in which - /// they appear in the regex. If a particular capture group didn't - /// participate in the match, then `None` is yielded for that capture. - /// - /// The first match always corresponds to the overall match of the regex. - pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { - SubCaptureMatches { caps: self, it: self.locs.iter() } - } - - /// Expands all instances of `$name` in `replacement` to the corresponding - /// capture group `name`, and writes them to the `dst` buffer given. - /// - /// `name` may be an integer corresponding to the index of the capture - /// group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name consisting of the characters `[_0-9A-Za-z]` - /// is used. e.g., `$1a` looks up the capture group named `1a` and not the - /// capture group at index `1`. To exert more precise control over the - /// name, or to refer to a capture group name that uses characters outside - /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When - /// using braces, any sequence of characters is permitted. If the sequence - /// does not refer to a capture group name in the corresponding regex, then - /// it is replaced with an empty string. - /// - /// To write a literal `$` use `$$`. - pub fn expand(&self, replacement: &str, dst: &mut String) { - expand_str(self, replacement, dst) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1`, since every regex has at least one capture - /// group that corresponds to the full match. - #[inline] - pub fn len(&self) -> usize { - self.locs.len() - } -} - -impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() - } -} - -struct CapturesDebug<'c, 't>(&'c Captures<'t>); - -impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // We'd like to show something nice here, even if it means an - // allocation to build a reverse index. - let slot_to_name: HashMap<&usize, &String> = - self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); - let mut map = f.debug_map(); - for (slot, m) in self.0.locs.iter().enumerate() { - let m = m.map(|(s, e)| &self.0.text[s..e]); - if let Some(name) = slot_to_name.get(&slot) { - map.entry(&name, &m); - } else { - map.entry(&slot, &m); - } - } - map.finish() - } -} - -/// Get a group by index. -/// -/// `'t` is the lifetime of the matched text. -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `get()` instead. -/// -/// # Panics -/// -/// If there is no group at the given index. -impl<'t> Index<usize> for Captures<'t> { - type Output = str; - - fn index(&self, i: usize) -> &str { - self.get(i) - .map(|m| m.as_str()) - .unwrap_or_else(|| panic!("no group at index '{}'", i)) - } -} - -/// Get a group by name. -/// -/// `'t` is the lifetime of the matched text and `'i` is the lifetime -/// of the group name (the index). -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `name` instead. -/// -/// # Panics -/// -/// If there is no group named by the given value. -impl<'t, 'i> Index<&'i str> for Captures<'t> { - type Output = str; - - fn index<'a>(&'a self, name: &'i str) -> &'a str { - self.name(name) - .map(|m| m.as_str()) - .unwrap_or_else(|| panic!("no group named '{}'", name)) - } -} - -/// An iterator that yields all capturing matches in the order in which they -/// appear in the regex. -/// -/// If a particular capture group didn't participate in the match, then `None` -/// is yielded for that capture. The first match always corresponds to the -/// overall match of the regex. -/// -/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and -/// the lifetime `'t` corresponds to the originally matched text. -#[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't> { - caps: &'c Captures<'t>, - it: SubCapturesPosIter<'c>, -} - -impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { - type Item = Option<Match<'t>>; - - fn next(&mut self) -> Option<Option<Match<'t>>> { - self.it - .next() - .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.it.size_hint() - } - - fn count(self) -> usize { - self.it.count() - } -} - -impl<'c, 't> ExactSizeIterator for SubCaptureMatches<'c, 't> {} - -impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} - -/// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. -/// -/// The iterator stops when no more matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched string. -#[derive(Debug)] -pub struct CaptureMatches<'r, 't>( - re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>, -); - -impl<'r, 't> Iterator for CaptureMatches<'r, 't> { - type Item = Captures<'t>; - - fn next(&mut self) -> Option<Captures<'t>> { - self.0.next().map(|locs| Captures { - text: self.0.text(), - locs, - named_groups: self.0.regex().capture_name_idx().clone(), - }) - } -} - -impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} - -/// An iterator over all non-overlapping matches for a particular string. -/// -/// The iterator yields a `Match` value. The iterator stops when no more -/// matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched string. -#[derive(Debug)] -pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>); - -impl<'r, 't> Iterator for Matches<'r, 't> { - type Item = Match<'t>; - - fn next(&mut self) -> Option<Match<'t>> { - let text = self.0.text(); - self.0.next().map(|(s, e)| Match::new(text, s, e)) - } -} - -impl<'r, 't> FusedIterator for Matches<'r, 't> {} - -/// Replacer describes types that can be used to replace matches in a string. -/// -/// In general, users of this crate shouldn't need to implement this trait, -/// since implementations are already provided for `&str` along with other -/// variants of string types and `FnMut(&Captures) -> String` (or any -/// `FnMut(&Captures) -> T` where `T: AsRef<str>`), which covers most use cases. -pub trait Replacer { - /// Appends text to `dst` to replace the current match. - /// - /// The current match is represented by `caps`, which is guaranteed to - /// have a match at capture group `0`. - /// - /// For example, a no-op replacement would be - /// `dst.push_str(caps.get(0).unwrap().as_str())`. - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); - - /// Return a fixed unchanging replacement string. - /// - /// When doing replacements, if access to `Captures` is not needed (e.g., - /// the replacement byte string does not need `$` expansion), then it can - /// be beneficial to avoid finding sub-captures. - /// - /// In general, this is called once for every call to `replacen`. - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { - None - } - - /// Return a `Replacer` that borrows and wraps this `Replacer`. - /// - /// This is useful when you want to take a generic `Replacer` (which might - /// not be cloneable) and use it without consuming it, so it can be used - /// more than once. - /// - /// # Example - /// - /// ``` - /// use regex::{Regex, Replacer}; - /// - /// fn replace_all_twice<R: Replacer>( - /// re: Regex, - /// src: &str, - /// mut rep: R, - /// ) -> String { - /// let dst = re.replace_all(src, rep.by_ref()); - /// let dst = re.replace_all(&dst, rep.by_ref()); - /// dst.into_owned() - /// } - /// ``` - fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { - ReplacerRef(self) - } -} - -/// By-reference adaptor for a `Replacer` -/// -/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). -#[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); - -impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.0.replace_append(caps, dst) - } - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - self.0.no_expansion() - } -} - -impl<'a> Replacer for &'a str { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a String { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_str().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl Replacer for String { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_str().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_ref().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_ref().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> { - let s = t.as_ref(); - match find_byte(b'$', s.as_bytes()) { - Some(_) => None, - None => Some(Cow::Borrowed(s)), - } -} - -impl<F, T> Replacer for F -where - F: FnMut(&Captures<'_>) -> T, - T: AsRef<str>, -{ - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - dst.push_str((*self)(caps).as_ref()); - } -} - -/// `NoExpand` indicates literal string replacement. -/// -/// It can be used with `replace` and `replace_all` to do a literal string -/// replacement without expanding `$name` to their corresponding capture -/// groups. This can be both convenient (to avoid escaping `$`, for example) -/// and performant (since capture groups don't need to be found). -/// -/// `'t` is the lifetime of the literal text. -#[derive(Clone, Debug)] -pub struct NoExpand<'t>(pub &'t str); - -impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { - dst.push_str(self.0); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - Some(Cow::Borrowed(self.0)) - } -} diff --git a/vendor/regex/src/regex/bytes.rs b/vendor/regex/src/regex/bytes.rs new file mode 100644 index 0000000..19f5701 --- /dev/null +++ b/vendor/regex/src/regex/bytes.rs @@ -0,0 +1,2600 @@ +use alloc::{borrow::Cow, string::String, sync::Arc, vec::Vec}; + +use regex_automata::{meta, util::captures, Input, PatternID}; + +use crate::{bytes::RegexBuilder, error::Error}; + +/// A compiled regular expression for searching Unicode haystacks. +/// +/// A `Regex` can be used to search haystacks, split haystacks into substrings +/// or replace substrings in a haystack with a different substring. All +/// searching is done with an implicit `(?s:.)*?` at the beginning and end of +/// an pattern. To force an expression to match the whole string (or a prefix +/// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`). +/// +/// Like the `Regex` type in the parent module, matches with this regex return +/// byte offsets into the haystack. **Unlike** the parent `Regex` type, these +/// byte offsets may not correspond to UTF-8 sequence boundaries since the +/// regexes in this module can match arbitrary bytes. +/// +/// The only methods that allocate new byte strings are the string replacement +/// methods. All other methods (searching and splitting) return borrowed +/// references into the haystack given. +/// +/// # Example +/// +/// Find the offsets of a US phone number: +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); +/// let m = re.find(b"phone: 111-222-3333").unwrap(); +/// assert_eq!(7..19, m.range()); +/// ``` +/// +/// # Example: extracting capture groups +/// +/// A common way to use regexes is with capture groups. That is, instead of +/// just looking for matches of an entire regex, parentheses are used to create +/// groups that represent part of the match. +/// +/// For example, consider a haystack with multiple lines, and each line has +/// three whitespace delimited fields where the second field is expected to be +/// a number and the third field a boolean. To make this convenient, we use +/// the [`Captures::extract`] API to put the strings that match each group +/// into a fixed size array: +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let hay = b" +/// rabbit 54 true +/// groundhog 2 true +/// does not match +/// fox 109 false +/// "; +/// let re = Regex::new(r"(?m)^\s*(\S+)\s+([0-9]+)\s+(true|false)\s*$").unwrap(); +/// let mut fields: Vec<(&[u8], i64, bool)> = vec![]; +/// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(|caps| caps.extract()) { +/// // These unwraps are OK because our pattern is written in a way where +/// // all matches for f2 and f3 will be valid UTF-8. +/// let f2 = std::str::from_utf8(f2).unwrap(); +/// let f3 = std::str::from_utf8(f3).unwrap(); +/// fields.push((f1, f2.parse()?, f3.parse()?)); +/// } +/// assert_eq!(fields, vec![ +/// (&b"rabbit"[..], 54, true), +/// (&b"groundhog"[..], 2, true), +/// (&b"fox"[..], 109, false), +/// ]); +/// +/// # Ok::<(), Box<dyn std::error::Error>>(()) +/// ``` +/// +/// # Example: matching invalid UTF-8 +/// +/// One of the reasons for searching `&[u8]` haystacks is that the `&[u8]` +/// might not be valid UTF-8. Indeed, with a `bytes::Regex`, patterns that +/// match invalid UTF-8 are explicitly allowed. Here's one example that looks +/// for valid UTF-8 fields that might be separated by invalid UTF-8. In this +/// case, we use `(?s-u:.)`, which matches any byte. Attempting to use it in a +/// top-level `Regex` will result in the regex failing to compile. Notice also +/// that we use `.` with Unicode mode enabled, in which case, only valid UTF-8 +/// is matched. In this way, we can build one pattern where some parts only +/// match valid UTF-8 while other parts are more permissive. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// // F0 9F 92 A9 is the UTF-8 encoding for a Pile of Poo. +/// let hay = b"\xFF\xFFfoo\xFF\xFF\xFF\xF0\x9F\x92\xA9\xFF"; +/// // An equivalent to '(?s-u:.)' is '(?-u:[\x00-\xFF])'. +/// let re = Regex::new(r"(?s)(?-u:.)*?(?<f1>.+)(?-u:.)*?(?<f2>.+)").unwrap(); +/// let caps = re.captures(hay).unwrap(); +/// assert_eq!(&caps["f1"], &b"foo"[..]); +/// assert_eq!(&caps["f2"], "💩".as_bytes()); +/// ``` +#[derive(Clone)] +pub struct Regex { + pub(crate) meta: meta::Regex, + pub(crate) pattern: Arc<str>, +} + +impl core::fmt::Display for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl core::fmt::Debug for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("Regex").field(&self.as_str()).finish() + } +} + +impl core::str::FromStr for Regex { + type Err = Error; + + /// Attempts to parse a string into a regular expression + fn from_str(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<&str> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<String> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: String) -> Result<Regex, Error> { + Regex::new(&s) + } +} + +/// Core regular expression methods. +impl Regex { + /// Compiles a regular expression. Once compiled, it can be used repeatedly + /// to search, split or replace substrings in a haystack. + /// + /// Note that regex compilation tends to be a somewhat expensive process, + /// and unlike higher level environments, compilation is not automatically + /// cached for you. One should endeavor to compile a regex once and then + /// reuse it. For example, it's a bad idea to compile the same regex + /// repeatedly in a loop. + /// + /// # Errors + /// + /// If an invalid pattern is given, then an error is returned. + /// An error is also returned if the pattern is valid, but would + /// produce a regex that is bigger than the configured size limit via + /// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by + /// default.) + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// // An Invalid pattern because of an unclosed parenthesis + /// assert!(Regex::new(r"foo(bar").is_err()); + /// // An invalid pattern because the regex would be too big + /// // because Unicode tends to inflate things. + /// assert!(Regex::new(r"\w{1000}").is_err()); + /// // Disabling Unicode can make the regex much smaller, + /// // potentially by up to or more than an order of magnitude. + /// assert!(Regex::new(r"(?-u:\w){1000}").is_ok()); + /// ``` + pub fn new(re: &str) -> Result<Regex, Error> { + RegexBuilder::new(re).build() + } + + /// Returns true if and only if there is a match for the regex anywhere + /// in the haystack given. + /// + /// It is recommended to use this method if all you need to do is test + /// whether a match exists, since the underlying matching engine may be + /// able to do less work. + /// + /// # Example + /// + /// Test if some haystack contains at least one word with exactly 13 + /// Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"I categorically deny having triskaidekaphobia."; + /// assert!(re.is_match(hay)); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &[u8]) -> bool { + self.is_match_at(haystack, 0) + } + + /// This routine searches for the first match of this regex in the + /// haystack given, and if found, returns a [`Match`]. The `Match` + /// provides access to both the byte offsets of the match and the actual + /// substring that matched. + /// + /// Note that this should only be used if you want to find the entire + /// match. If instead you just want to test the existence of a match, + /// it's potentially faster to use `Regex::is_match(hay)` instead of + /// `Regex::find(hay).is_some()`. + /// + /// # Example + /// + /// Find the first word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"I categorically deny having triskaidekaphobia."; + /// let mat = re.find(hay).unwrap(); + /// assert_eq!(2..15, mat.range()); + /// assert_eq!(b"categorically", mat.as_bytes()); + /// ``` + #[inline] + pub fn find<'h>(&self, haystack: &'h [u8]) -> Option<Match<'h>> { + self.find_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Match`]. + /// + /// # Time complexity + /// + /// Note that since `find_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// Find every word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"Retroactively relinquishing remunerations is reprehensible."; + /// let matches: Vec<_> = re.find_iter(hay).map(|m| m.as_bytes()).collect(); + /// assert_eq!(matches, vec![ + /// &b"Retroactively"[..], + /// &b"relinquishing"[..], + /// &b"remunerations"[..], + /// &b"reprehensible"[..], + /// ]); + /// ``` + #[inline] + pub fn find_iter<'r, 'h>(&'r self, haystack: &'h [u8]) -> Matches<'r, 'h> { + Matches { haystack, it: self.meta.find_iter(haystack) } + } + + /// This routine searches for the first match of this regex in the haystack + /// given, and if found, returns not only the overall match but also the + /// matches of each capture group in the regex. If no match is found, then + /// `None` is returned. + /// + /// Capture group `0` always corresponds to an implicit unnamed group that + /// includes the entire match. If a match is found, this group is always + /// present. Subsequent groups may be named and are numbered, starting + /// at 1, by the order in which the opening parenthesis appears in the + /// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`, + /// `b` and `c` correspond to capture group indices `1`, `2` and `3`, + /// respectively. + /// + /// You should only use `captures` if you need access to the capture group + /// matches. Otherwise, [`Regex::find`] is generally faster for discovering + /// just the overall match. + /// + /// # Example + /// + /// Say you have some haystack with movie names and their release years, + /// like "'Citizen Kane' (1941)". It'd be nice if we could search for + /// strings looking like that, while also extracting the movie name and its + /// release year separately. The example below shows how to do that. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_bytes(), b"'Citizen Kane' (1941)"); + /// assert_eq!(caps.get(1).unwrap().as_bytes(), b"Citizen Kane"); + /// assert_eq!(caps.get(2).unwrap().as_bytes(), b"1941"); + /// // You can also access the groups by index using the Index notation. + /// // Note that this will panic on an invalid index. In this case, these + /// // accesses are always correct because the overall regex will only + /// // match when these capture groups match. + /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); + /// assert_eq!(&caps[1], b"Citizen Kane"); + /// assert_eq!(&caps[2], b"1941"); + /// ``` + /// + /// Note that the full match is at capture group `0`. Each subsequent + /// capture group is indexed by the order of its opening `(`. + /// + /// We can make this example a bit clearer by using *named* capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_bytes(), b"'Citizen Kane' (1941)"); + /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane"); + /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941"); + /// // You can also access the groups by name using the Index notation. + /// // Note that this will panic on an invalid group name. In this case, + /// // these accesses are always correct because the overall regex will + /// // only match when these capture groups match. + /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); + /// assert_eq!(&caps["title"], b"Citizen Kane"); + /// assert_eq!(&caps["year"], b"1941"); + /// ``` + /// + /// Here we name the capture groups, which we can access with the `name` + /// method or the `Index` notation with a `&str`. Note that the named + /// capture groups are still accessible with `get` or the `Index` notation + /// with a `usize`. + /// + /// The `0`th capture group is always unnamed, so it must always be + /// accessed with `get(0)` or `[0]`. + /// + /// Finally, one other way to to get the matched substrings is with the + /// [`Captures::extract`] API: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let (full, [title, year]) = re.captures(hay).unwrap().extract(); + /// assert_eq!(full, b"'Citizen Kane' (1941)"); + /// assert_eq!(title, b"Citizen Kane"); + /// assert_eq!(year, b"1941"); + /// ``` + #[inline] + pub fn captures<'h>(&self, haystack: &'h [u8]) -> Option<Captures<'h>> { + self.captures_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Captures`]. + /// + /// This is the same as [`Regex::find_iter`], but instead of only providing + /// access to the overall match, each value yield includes access to the + /// matches of all capture groups in the regex. Reporting this extra match + /// data is potentially costly, so callers should only use `captures_iter` + /// over `find_iter` when they actually need access to the capture group + /// matches. + /// + /// # Time complexity + /// + /// Note that since `captures_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// We can use this to find all movie titles and their release years in + /// some haystack, where the movie is formatted like "'Title' (xxxx)": + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\(([0-9]{4})\)").unwrap(); + /// let hay = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut movies = vec![]; + /// for (_, [title, year]) in re.captures_iter(hay).map(|c| c.extract()) { + /// // OK because [0-9]{4} can only match valid UTF-8. + /// let year = std::str::from_utf8(year).unwrap(); + /// movies.push((title, year.parse::<i64>()?)); + /// } + /// assert_eq!(movies, vec![ + /// (&b"Citizen Kane"[..], 1941), + /// (&b"The Wizard of Oz"[..], 1939), + /// (&b"M"[..], 1931), + /// ]); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + /// + /// Or with named groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>[0-9]{4})\)").unwrap(); + /// let hay = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut it = re.captures_iter(hay); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"Citizen Kane"); + /// assert_eq!(&caps["year"], b"1941"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"The Wizard of Oz"); + /// assert_eq!(&caps["year"], b"1939"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"M"); + /// assert_eq!(&caps["year"], b"1931"); + /// ``` + #[inline] + pub fn captures_iter<'r, 'h>( + &'r self, + haystack: &'h [u8], + ) -> CaptureMatches<'r, 'h> { + CaptureMatches { haystack, it: self.meta.captures_iter(haystack) } + } + + /// Returns an iterator of substrings of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[ \t]+").unwrap(); + /// let hay = b"a b \t c\td e"; + /// let fields: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(fields, vec![ + /// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..], + /// ]); + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b"Mary had a little lamb"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b"Mary"[..], &b"had"[..], &b"a"[..], &b"little"[..], &b"lamb"[..], + /// ]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b""; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b""[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"lionXXtigerXleopard"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b"lion"[..], &b""[..], &b"tiger"[..], &b"leopard"[..], + /// ]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = b"lion::tiger::leopard"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b"tiger"[..], &b"leopard"[..]]); + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"XXXXaXXbXc"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b""[..], &b""[..], &b""[..], &b""[..], + /// &b"a"[..], &b""[..], &b"b"[..], &b"c"[..], + /// ]); + /// + /// let re = Regex::new(r"/").unwrap(); + /// let hay = b"(///)"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b"("[..], &b""[..], &b""[..], &b")"[..]]); + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// substring. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"0").unwrap(); + /// let hay = b"010"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b""[..], &b"1"[..], &b""[..]]); + /// ``` + /// + /// When the regex can match the empty string, it splits at every byte + /// position in the haystack. This includes between all UTF-8 code units. + /// (The top-level [`Regex::split`](crate::Regex::split) will only split + /// at valid UTF-8 boundaries.) + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let hay = "☃".as_bytes(); + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &[][..], &[b'\xE2'][..], &[b'\x98'][..], &[b'\x83'][..], &[][..], + /// ]); + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b" a b c"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b""[..], &b""[..], &b""[..], &b""[..], + /// &b"a"[..], &b""[..], &b"b"[..], &b"c"[..], + /// ]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" +").unwrap(); + /// let hay = b" a b c"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec![&b""[..], &b"a"[..], &b"b"[..], &b"c"[..]]); + /// ``` + #[inline] + pub fn split<'r, 'h>(&'r self, haystack: &'h [u8]) -> Split<'r, 'h> { + Split { haystack, it: self.meta.split(haystack) } + } + + /// Returns an iterator of at most `limit` substrings of the haystack + /// given, delimited by a match of the regex. (A `limit` of `0` will return + /// no substrings.) Namely, each element of the iterator corresponds to a + /// part of the haystack that *isn't* matched by the regular expression. + /// The remainder of the haystack that is not split will be the last + /// element in the iterator. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = b"Hey! How are you?"; + /// let fields: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]); + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b"Mary had a little lamb"; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b"Mary"[..], &b"had"[..], &b"a little lamb"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b""; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b""[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"lionXXtigerXleopard"; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b""[..], &b"tigerXleopard"[..]]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = b"lion::tiger::leopard"; + /// let got: Vec<&[u8]> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b"tiger::leopard"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcXdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 1).collect(); + /// assert_eq!(got, vec![&b"abcXdef"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec![&b"abcdef"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcXdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 0).collect(); + /// assert!(got.is_empty()); + /// ``` + #[inline] + pub fn splitn<'r, 'h>( + &'r self, + haystack: &'h [u8], + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { haystack, it: self.meta.splitn(haystack, limit) } + } + + /// Replaces the leftmost-first match in the given haystack with the + /// replacement provided. The replacement can be a regular string (where + /// `$N` and `$name` are expanded to match capture groups) or a function + /// that takes a [`Captures`] and returns the replaced string. + /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// # Replacement string syntax + /// + /// All instances of `$ref` in the replacement string are replaced with + /// the substring corresponding to the capture group identified by `ref`. + /// + /// `ref` may be an integer corresponding to the index of the capture group + /// (counted by order of opening parenthesis where `0` is the entire match) + /// or it can be a name (consisting of letters, digits or underscores) + /// corresponding to a named capture group. + /// + /// If `ref` isn't a valid capture group (whether the name doesn't exist or + /// isn't a valid index), then it is replaced with the empty string. + /// + /// The longest possible name is used. For example, `$1a` looks up the + /// capture group named `1a` and not the capture group at index `1`. To + /// exert more precise control over the name, use braces, e.g., `${1}a`. + /// + /// To write a literal `$` use `$$`. + /// + /// # Example + /// + /// Note that this function is polymorphic with respect to the replacement. + /// In typical usage, this can just be a normal string: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[^01]+").unwrap(); + /// assert_eq!(re.replace(b"1078910", b""), &b"1010"[..]); + /// ``` + /// + /// But anything satisfying the [`Replacer`] trait will work. For example, + /// a closure of type `|&Captures| -> String` provides direct access to the + /// captures corresponding to a match. This allows one to access capturing + /// group matches easily: + /// + /// ``` + /// use regex::bytes::{Captures, Regex}; + /// + /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { + /// let mut buf = vec![]; + /// buf.extend_from_slice(&caps[2]); + /// buf.push(b' '); + /// buf.extend_from_slice(&caps[1]); + /// buf + /// }); + /// assert_eq!(result, &b"Bruce Springsteen"[..]); + /// ``` + /// + /// But this is a bit cumbersome to use all the time. Instead, a simple + /// syntax is supported (as described above) that expands `$name` into the + /// corresponding capture group. Here's the last example, but using this + /// expansion technique with named capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", b"$first $last"); + /// assert_eq!(result, &b"Bruce Springsteen"[..]); + /// ``` + /// + /// Note that using `$2` instead of `$first` or `$1` instead of `$last` + /// would produce the same result. To write a literal `$` use `$$`. + /// + /// Sometimes the replacement string requires use of curly braces to + /// delineate a capture group replacement when it is adjacent to some other + /// literal text. For example, if we wanted to join two words together with + /// an underscore: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap(); + /// let result = re.replace(b"deep fried", b"${first}_$second"); + /// assert_eq!(result, &b"deep_fried"[..]); + /// ``` + /// + /// Without the curly braces, the capture group name `first_` would be + /// used, and since it doesn't exist, it would be replaced with the empty + /// string. + /// + /// Finally, sometimes you just want to replace a literal string with no + /// regard for capturing group expansion. This can be done by wrapping a + /// string with [`NoExpand`]: + /// + /// ``` + /// use regex::bytes::{NoExpand, Regex}; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); + /// assert_eq!(result, &b"$2 $last"[..]); + /// ``` + /// + /// Using `NoExpand` may also be faster, since the replacement string won't + /// need to be parsed for the `$` syntax. + #[inline] + pub fn replace<'h, R: Replacer>( + &self, + haystack: &'h [u8], + rep: R, + ) -> Cow<'h, [u8]> { + self.replacen(haystack, 1, rep) + } + + /// Replaces all non-overlapping matches in the haystack with the + /// replacement provided. This is the same as calling `replacen` with + /// `limit` set to `0`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Fallibility + /// + /// If you need to write a replacement routine where any individual + /// replacement might "fail," doing so with this API isn't really feasible + /// because there's no way to stop the search process if a replacement + /// fails. Instead, if you need this functionality, you should consider + /// implementing your own replacement routine: + /// + /// ``` + /// use regex::bytes::{Captures, Regex}; + /// + /// fn replace_all<E>( + /// re: &Regex, + /// haystack: &[u8], + /// replacement: impl Fn(&Captures) -> Result<Vec<u8>, E>, + /// ) -> Result<Vec<u8>, E> { + /// let mut new = Vec::with_capacity(haystack.len()); + /// let mut last_match = 0; + /// for caps in re.captures_iter(haystack) { + /// let m = caps.get(0).unwrap(); + /// new.extend_from_slice(&haystack[last_match..m.start()]); + /// new.extend_from_slice(&replacement(&caps)?); + /// last_match = m.end(); + /// } + /// new.extend_from_slice(&haystack[last_match..]); + /// Ok(new) + /// } + /// + /// // Let's replace each word with the number of bytes in that word. + /// // But if we see a word that is "too long," we'll give up. + /// let re = Regex::new(r"\w+").unwrap(); + /// let replacement = |caps: &Captures| -> Result<Vec<u8>, &'static str> { + /// if caps[0].len() >= 5 { + /// return Err("word too long"); + /// } + /// Ok(caps[0].len().to_string().into_bytes()) + /// }; + /// assert_eq!( + /// Ok(b"2 3 3 3?".to_vec()), + /// replace_all(&re, b"hi how are you?", &replacement), + /// ); + /// assert!(replace_all(&re, b"hi there", &replacement).is_err()); + /// ``` + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = b" + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replace_all(hay, b"$2 $1"); + /// assert_eq!(new, &b" + /// 1973 Greetings + /// 1973 Wild + /// 1975 BornToRun + /// 1978 Darkness + /// 1980 TheRiver + /// "[..]); + /// ``` + #[inline] + pub fn replace_all<'h, R: Replacer>( + &self, + haystack: &'h [u8], + rep: R, + ) -> Cow<'h, [u8]> { + self.replacen(haystack, 0, rep) + } + + /// Replaces at most `limit` non-overlapping matches in the haystack with + /// the replacement provided. If `limit` is `0`, then all non-overlapping + /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is + /// equivalent to `Regex::replacen(hay, 0, rep)`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Fallibility + /// + /// See the corresponding section in the docs for [`Regex::replace_all`] + /// for tips on how to deal with a replacement routine that can fail. + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields. But we only do it for the first two matches. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = b" + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replacen(hay, 2, b"$2 $1"); + /// assert_eq!(new, &b" + /// 1973 Greetings + /// 1973 Wild + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "[..]); + /// ``` + #[inline] + pub fn replacen<'h, R: Replacer>( + &self, + haystack: &'h [u8], + limit: usize, + mut rep: R, + ) -> Cow<'h, [u8]> { + // If we know that the replacement doesn't have any capture expansions, + // then we can use the fast path. The fast path can make a tremendous + // difference: + // + // 1) We use `find_iter` instead of `captures_iter`. Not asking for + // captures generally makes the regex engines faster. + // 2) We don't need to look up all of the capture groups and do + // replacements inside the replacement string. We just push it + // at each match and be done with it. + if let Some(rep) = rep.no_expansion() { + let mut it = self.find_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = Vec::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, m) in it { + new.extend_from_slice(&haystack[last_match..m.start()]); + new.extend_from_slice(&rep); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.extend_from_slice(&haystack[last_match..]); + return Cow::Owned(new); + } + + // The slower path, which we use if the replacement needs access to + // capture groups. + let mut it = self.captures_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = Vec::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, cap) in it { + // unwrap on 0 is OK because captures only reports matches + let m = cap.get(0).unwrap(); + new.extend_from_slice(&haystack[last_match..m.start()]); + rep.replace_append(&cap, &mut new); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.extend_from_slice(&haystack[last_match..]); + Cow::Owned(new) + } +} + +/// A group of advanced or "lower level" search methods. Some methods permit +/// starting the search at a position greater than `0` in the haystack. Other +/// methods permit reusing allocations, for example, when extracting the +/// matches for capture groups. +impl Regex { + /// Returns the end byte offset of the first match in the haystack given. + /// + /// This method may have the same performance characteristics as + /// `is_match`. Behaviorlly, it doesn't just report whether it match + /// occurs, but also the end offset for a match. In particular, the offset + /// returned *may be shorter* than the proper end of the leftmost-first + /// match that you would find via [`Regex::find`]. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change based on internal heuristics. + /// + /// # Example + /// + /// Typically, `a+` would match the entire first sequence of `a` in some + /// haystack, but `shortest_match` *may* give up as soon as it sees the + /// first `a`. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"a+").unwrap(); + /// let offset = re.shortest_match(b"aaaaa").unwrap(); + /// assert_eq!(offset, 1); + /// ``` + #[inline] + pub fn shortest_match(&self, haystack: &[u8]) -> Option<usize> { + self.shortest_match_at(haystack, 0) + } + + /// Returns the same as `shortest_match`, but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only match + /// when `start == 0`. + /// + /// If a match is found, the offset returned is relative to the beginning + /// of the haystack, not the beginning of the search. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.shortest_match(&hay[2..]), Some(4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.shortest_match_at(hay, 2), None); + /// ``` + #[inline] + pub fn shortest_match_at( + &self, + haystack: &[u8], + start: usize, + ) -> Option<usize> { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).map(|hm| hm.offset()) + } + + /// Returns the same as [`Regex::is_match`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert!(re.is_match(&hay[2..])); + /// // No match because the assertions take the context into account. + /// assert!(!re.is_match_at(hay, 2)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the same as [`Regex::find`], but starts the search at the given + /// offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.find(&hay[2..]).map(|m| m.range()), Some(0..4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.find_at(hay, 2), None); + /// ``` + #[inline] + pub fn find_at<'h>( + &self, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.find(input).map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// Returns the same as [`Regex::captures`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(&re.captures(&hay[2..]).unwrap()[0], b"chew"); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_at(hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_at<'h>( + &self, + haystack: &'h [u8], + start: usize, + ) -> Option<Captures<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + let mut caps = self.meta.create_captures(); + self.meta.captures(input, &mut caps); + if caps.is_match() { + let static_captures_len = self.static_captures_len(); + Some(Captures { haystack, caps, static_captures_len }) + } else { + None + } + } + + /// This is like [`Regex::captures`], but writes the byte offsets of each + /// capture group match into the locations given. + /// + /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`], + /// but does *not* store a reference to the haystack. This makes its API + /// a bit lower level and less convenient. But in exchange, callers + /// may allocate their own `CaptureLocations` and reuse it for multiple + /// searches. This may be helpful if allocating a `Captures` shows up in a + /// profile as too costly. + /// + /// To create a `CaptureLocations` value, use the + /// [`Regex::capture_locations`] method. + /// + /// This also returns the overall match if one was found. When a match is + /// found, its offsets are also always stored in `locs` at index `0`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, b"id=foo123").is_some()); + /// assert_eq!(Some((0, 9)), locs.get(0)); + /// assert_eq!(Some((0, 2)), locs.get(1)); + /// assert_eq!(Some((3, 9)), locs.get(2)); + /// ``` + #[inline] + pub fn captures_read<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, 0) + } + + /// Returns the same as [`Regex::captures_read`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// let mut locs = re.capture_locations(); + /// // We get a match here, but it's probably not intended. + /// assert!(re.captures_read(&mut locs, &hay[2..]).is_some()); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_read_at(&mut locs, hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_read_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.search_captures(&input, &mut locs.0); + locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// An undocumented alias for `captures_read_at`. + /// + /// The `regex-capi` crate previously used this routine, so to avoid + /// breaking that crate, we continue to provide the name as an undocumented + /// alias. + #[doc(hidden)] + #[inline] + pub fn read_captures_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, start) + } +} + +/// Auxiliary methods. +impl Regex { + /// Returns the original string of this regex. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"foo\w+bar").unwrap(); + /// assert_eq!(re.as_str(), r"foo\w+bar"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.pattern + } + + /// Returns an iterator over the capture names in this regex. + /// + /// The iterator returned yields elements of type `Option<&str>`. That is, + /// the iterator yields values for all capture groups, even ones that are + /// unnamed. The order of the groups corresponds to the order of the group's + /// corresponding opening parenthesis. + /// + /// The first element of the iterator always yields the group corresponding + /// to the overall match, and this group is always unnamed. Therefore, the + /// iterator always yields at least one group. + /// + /// # Example + /// + /// This shows basic usage with a mix of named and unnamed capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), Some(Some("a"))); + /// assert_eq!(names.next(), Some(Some("b"))); + /// assert_eq!(names.next(), Some(None)); + /// // the '(?:.)' group is non-capturing and so doesn't appear here! + /// assert_eq!(names.next(), Some(Some("c"))); + /// assert_eq!(names.next(), None); + /// ``` + /// + /// The iterator always yields at least one element, even for regexes with + /// no capture groups and even for regexes that can never match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// ``` + #[inline] + pub fn capture_names(&self) -> CaptureNames<'_> { + CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO)) + } + + /// Returns the number of captures groups in this regex. + /// + /// This includes all named and unnamed groups, including the implicit + /// unnamed group that is always present and corresponds to the entire + /// match. + /// + /// Since the implicit unnamed group is always included in this length, the + /// length returned is guaranteed to be greater than zero. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"foo").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// + /// let re = Regex::new(r"(foo)").unwrap(); + /// assert_eq!(2, re.captures_len()); + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// assert_eq!(5, re.captures_len()); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// ``` + #[inline] + pub fn captures_len(&self) -> usize { + self.meta.group_info().group_len(PatternID::ZERO) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option<usize> { + self.meta.static_captures_len() + } + + /// Returns a fresh allocated set of capture locations that can + /// be reused in multiple calls to [`Regex::captures_read`] or + /// [`Regex::captures_read_at`]. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(.)(.)(\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, b"Padron").is_some()); + /// assert_eq!(locs.get(0), Some((0, 6))); + /// assert_eq!(locs.get(1), Some((0, 1))); + /// assert_eq!(locs.get(2), Some((1, 2))); + /// assert_eq!(locs.get(3), Some((2, 6))); + /// ``` + #[inline] + pub fn capture_locations(&self) -> CaptureLocations { + CaptureLocations(self.meta.create_captures()) + } + + /// An alias for `capture_locations` to preserve backward compatibility. + /// + /// The `regex-capi` crate uses this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn locations(&self) -> CaptureLocations { + self.capture_locations() + } +} + +/// Represents a single match of a regex in a haystack. +/// +/// A `Match` contains both the start and end byte offsets of the match and the +/// actual substring corresponding to the range of those byte offsets. It is +/// guaranteed that `start <= end`. When `start == end`, the match is empty. +/// +/// Unlike the top-level `Match` type, this `Match` type is produced by APIs +/// that search `&[u8]` haystacks. This means that the offsets in a `Match` can +/// point to anywhere in the haystack, including in a place that splits the +/// UTF-8 encoding of a Unicode scalar value. +/// +/// The lifetime parameter `'h` refers to the lifetime of the matched of the +/// haystack that this match was produced from. +/// +/// # Numbering +/// +/// The byte offsets in a `Match` form a half-open interval. That is, the +/// start of the range is inclusive and the end of the range is exclusive. +/// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte +/// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and +/// `6` corresponds to `x`, which is one past the end of the match. This +/// corresponds to the same kind of slicing that Rust uses. +/// +/// For more on why this was chosen over other schemes (aside from being +/// consistent with how Rust the language works), see [this discussion] and +/// [Dijkstra's note on a related topic][note]. +/// +/// [this discussion]: https://github.com/rust-lang/regex/discussions/866 +/// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html +/// +/// # Example +/// +/// This example shows the value of each of the methods on `Match` for a +/// particular search. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"\p{Greek}+").unwrap(); +/// let hay = "Greek: αβγδ".as_bytes(); +/// let m = re.find(hay).unwrap(); +/// assert_eq!(7, m.start()); +/// assert_eq!(15, m.end()); +/// assert!(!m.is_empty()); +/// assert_eq!(8, m.len()); +/// assert_eq!(7..15, m.range()); +/// assert_eq!("αβγδ".as_bytes(), m.as_bytes()); +/// ``` +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Match<'h> { + haystack: &'h [u8], + start: usize, + end: usize, +} + +impl<'h> Match<'h> { + /// Returns the byte offset of the start of the match in the haystack. The + /// start of the match corresponds to the position where the match begins + /// and includes the first byte in the match. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// Unlike the top-level `Match` type, the start offset may appear anywhere + /// in the haystack. This includes between the code units of a UTF-8 + /// encoded Unicode scalar value. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Returns the byte offset of the end of the match in the haystack. The + /// end of the match corresponds to the byte immediately following the last + /// byte in the match. This means that `&slice[start..end]` works as one + /// would expect. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// Unlike the top-level `Match` type, the start offset may appear anywhere + /// in the haystack. This includes between the code units of a UTF-8 + /// encoded Unicode scalar value. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Returns true if and only if this match has a length of zero. + /// + /// Note that an empty match can only occur when the regex itself can + /// match the empty string. Here are some examples of regexes that can + /// all match the empty string: `^`, `^$`, `\b`, `a?`, `a*`, `a{0}`, + /// `(foo|\d+|quux)?`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start == self.end + } + + /// Returns the length, in bytes, of this match. + #[inline] + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns the range over the starting and ending byte offsets of the + /// match in the haystack. + #[inline] + pub fn range(&self) -> core::ops::Range<usize> { + self.start..self.end + } + + /// Returns the substring of the haystack that matched. + #[inline] + pub fn as_bytes(&self) -> &'h [u8] { + &self.haystack[self.range()] + } + + /// Creates a new match from the given haystack and byte offsets. + #[inline] + fn new(haystack: &'h [u8], start: usize, end: usize) -> Match<'h> { + Match { haystack, start, end } + } +} + +impl<'h> core::fmt::Debug for Match<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut fmt = f.debug_struct("Match"); + fmt.field("start", &self.start).field("end", &self.end); + if let Ok(s) = core::str::from_utf8(self.as_bytes()) { + fmt.field("bytes", &s); + } else { + // FIXME: It would be nice if this could be printed as a string + // with invalid UTF-8 replaced with hex escapes. A alloc would + // probably okay if that makes it easier, but regex-automata does + // (at time of writing) have internal routines that do this. So + // maybe we should expose them. + fmt.field("bytes", &self.as_bytes()); + } + fmt.finish() + } +} + +impl<'h> From<Match<'h>> for &'h [u8] { + fn from(m: Match<'h>) -> &'h [u8] { + m.as_bytes() + } +} + +impl<'h> From<Match<'h>> for core::ops::Range<usize> { + fn from(m: Match<'h>) -> core::ops::Range<usize> { + m.range() + } +} + +/// Represents the capture groups for a single match. +/// +/// Capture groups refer to parts of a regex enclosed in parentheses. They can +/// be optionally named. The purpose of capture groups is to be able to +/// reference different parts of a match based on the original pattern. For +/// example, say you want to match the individual letters in a 5-letter word: +/// +/// ```text +/// (?<first>\w)(\w)(?:\w)\w(?<last>\w) +/// ``` +/// +/// This regex has 4 capture groups: +/// +/// * The group at index `0` corresponds to the overall match. It is always +/// present in every match and never has a name. +/// * The group at index `1` with name `first` corresponding to the first +/// letter. +/// * The group at index `2` with no name corresponding to the second letter. +/// * The group at index `3` with name `last` corresponding to the fifth and +/// last letter. +/// +/// Notice that `(?:\w)` was not listed above as a capture group despite it +/// being enclosed in parentheses. That's because `(?:pattern)` is a special +/// syntax that permits grouping but *without* capturing. The reason for not +/// treating it as a capture is that tracking and reporting capture groups +/// requires additional state that may lead to slower searches. So using as few +/// capture groups as possible can help performance. (Although the difference +/// in performance of a couple of capture groups is likely immaterial.) +/// +/// Values with this type are created by [`Regex::captures`] or +/// [`Regex::captures_iter`]. +/// +/// `'h` is the lifetime of the haystack that these captures were matched from. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap(); +/// let caps = re.captures(b"toady").unwrap(); +/// assert_eq!(b"toady", &caps[0]); +/// assert_eq!(b"t", &caps["first"]); +/// assert_eq!(b"o", &caps[2]); +/// assert_eq!(b"y", &caps["last"]); +/// ``` +pub struct Captures<'h> { + haystack: &'h [u8], + caps: captures::Captures, + static_captures_len: Option<usize>, +} + +impl<'h> Captures<'h> { + /// Returns the `Match` associated with the capture group at index `i`. If + /// `i` does not correspond to a capture group, or if the capture group did + /// not participate in the match, then `None` is returned. + /// + /// When `i == 0`, this is guaranteed to return a non-`None` value. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); + /// let caps = re.captures(b"abc123").unwrap(); + /// + /// let substr1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); + /// let substr2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); + /// assert_eq!(substr1, b"123"); + /// assert_eq!(substr2, b""); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<Match<'h>> { + self.caps + .get_group(i) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// Returns the `Match` associated with the capture group named `name`. If + /// `name` isn't a valid capture group or it refers to a group that didn't + /// match, then `None` is returned. + /// + /// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime + /// matches the lifetime of the haystack in this `Captures` value. + /// Conversely, the substring returned by `caps["name"]` has a lifetime + /// of the `Captures` value, which is likely shorter than the lifetime of + /// the haystack. In some cases, it may be necessary to use this method to + /// access the matching substring instead of the `caps["name"]` notation. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new( + /// r"[a-z]+(?:(?<numbers>[0-9]+)|(?<letters>[A-Z]+))", + /// ).unwrap(); + /// let caps = re.captures(b"abc123").unwrap(); + /// + /// let numbers = caps.name("numbers").map_or(&b""[..], |m| m.as_bytes()); + /// let letters = caps.name("letters").map_or(&b""[..], |m| m.as_bytes()); + /// assert_eq!(numbers, b"123"); + /// assert_eq!(letters, b""); + /// ``` + #[inline] + pub fn name(&self, name: &str) -> Option<Match<'h>> { + self.caps + .get_group_by_name(name) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups. + /// + /// This returns a tuple where the first element corresponds to the full + /// substring of the haystack that matched the regex. The second element is + /// an array of substrings, with each corresponding to the to the substring + /// that matched for a particular capture group. + /// + /// # Panics + /// + /// This panics if the number of possible matching groups in this + /// `Captures` value is not fixed to `N` in all circumstances. + /// More precisely, this routine only works when `N` is equivalent to + /// [`Regex::static_captures_len`]. + /// + /// Stated more plainly, if the number of matching capture groups in a + /// regex can vary from match to match, then this function always panics. + /// + /// For example, `(a)(b)|(c)` could produce two matching capture groups + /// or one matching capture group for any given match. Therefore, one + /// cannot use `extract` with such a pattern. + /// + /// But a pattern like `(a)(b)|(c)(d)` can be used with `extract` because + /// the number of capture groups in every match is always equivalent, + /// even if the capture _indices_ in each match are not. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// let Some((full, [year, month, day])) = + /// re.captures(hay).map(|caps| caps.extract()) else { return }; + /// assert_eq!(b"2010-03-14", full); + /// assert_eq!(b"2010", year); + /// assert_eq!(b"03", month); + /// assert_eq!(b"14", day); + /// ``` + /// + /// # Example: iteration + /// + /// This example shows how to use this method when iterating over all + /// `Captures` matches in a haystack. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = b"1973-01-05, 1975-08-25 and 1980-10-18"; + /// + /// let mut dates: Vec<(&[u8], &[u8], &[u8])> = vec![]; + /// for (_, [y, m, d]) in re.captures_iter(hay).map(|c| c.extract()) { + /// dates.push((y, m, d)); + /// } + /// assert_eq!(dates, vec![ + /// (&b"1973"[..], &b"01"[..], &b"05"[..]), + /// (&b"1975"[..], &b"08"[..], &b"25"[..]), + /// (&b"1980"[..], &b"10"[..], &b"18"[..]), + /// ]); + /// ``` + /// + /// # Example: parsing different formats + /// + /// This API is particularly useful when you need to extract a particular + /// value that might occur in a different format. Consider, for example, + /// an identifier that might be in double quotes or single quotes: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r#"id:(?:"([^"]+)"|'([^']+)')"#).unwrap(); + /// let hay = br#"The first is id:"foo" and the second is id:'bar'."#; + /// let mut ids = vec![]; + /// for (_, [id]) in re.captures_iter(hay).map(|c| c.extract()) { + /// ids.push(id); + /// } + /// assert_eq!(ids, vec![b"foo", b"bar"]); + /// ``` + pub fn extract<const N: usize>(&self) -> (&'h [u8], [&'h [u8]; N]) { + let len = self + .static_captures_len + .expect("number of capture groups can vary in a match") + .checked_sub(1) + .expect("number of groups is always greater than zero"); + assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len); + // The regex-automata variant of extract is a bit more permissive. + // It doesn't require the number of matching capturing groups to be + // static, and you can even request fewer groups than what's there. So + // this is guaranteed to never panic because we've asserted above that + // the user has requested precisely the number of groups that must be + // present in any match for this regex. + self.caps.extract_bytes(self.haystack) + } + + /// Expands all instances of `$ref` in `replacement` to the corresponding + /// capture group, and writes them to the `dst` buffer given. A `ref` can + /// be a capture group index or a name. If `ref` doesn't refer to a capture + /// group that participated in the match, then it is replaced with the + /// empty string. + /// + /// # Format + /// + /// The format of the replacement string supports two different kinds of + /// capture references: unbraced and braced. + /// + /// For the unbraced format, the format supported is `$ref` where `name` + /// can be any character in the class `[0-9A-Za-z_]`. `ref` is always + /// the longest possible parse. So for example, `$1a` corresponds to the + /// capture group named `1a` and not the capture group at index `1`. If + /// `ref` matches `^[0-9]+$`, then it is treated as a capture group index + /// itself and not a name. + /// + /// For the braced format, the format supported is `${ref}` where `ref` can + /// be any sequence of bytes except for `}`. If no closing brace occurs, + /// then it is not considered a capture reference. As with the unbraced + /// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture + /// group index and not a name. + /// + /// The braced format is useful for exerting precise control over the name + /// of the capture reference. For example, `${1}a` corresponds to the + /// capture group reference `1` followed by the letter `a`, where as `$1a` + /// (as mentioned above) corresponds to the capture group reference `1a`. + /// The braced format is also useful for expressing capture group names + /// that use characters not supported by the unbraced format. For example, + /// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`. + /// + /// If a capture group reference is found and it does not refer to a valid + /// capture group, then it will be replaced with the empty string. + /// + /// To write a literal `$`, use `$$`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new( + /// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})", + /// ).unwrap(); + /// let hay = b"On 14-03-2010, I became a Tenneessee lamb."; + /// let caps = re.captures(hay).unwrap(); + /// + /// let mut dst = vec![]; + /// caps.expand(b"year=$year, month=$month, day=$day", &mut dst); + /// assert_eq!(dst, b"year=2010, month=03, day=14"); + /// ``` + #[inline] + pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { + self.caps.interpolate_bytes_into(self.haystack, replacement, dst); + } + + /// Returns an iterator over all capture groups. This includes both + /// matching and non-matching groups. + /// + /// The iterator always yields at least one matching group: the first group + /// (at index `0`) with no name. Subsequent groups are returned in the order + /// of their opening parenthesis in the regex. + /// + /// The elements yielded have type `Option<Match<'h>>`, where a non-`None` + /// value is present if the capture group matches. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures(b"AZ").unwrap(); + /// + /// let mut it = caps.iter(); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"AZ"[..])); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"A"[..])); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), None); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"Z"[..])); + /// assert_eq!(it.next(), None); + /// ``` + #[inline] + pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> { + SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() } + } + + /// Returns the total number of capture groups. This includes both + /// matching and non-matching groups. + /// + /// The length returned is always equivalent to the number of elements + /// yielded by [`Captures::iter`]. Consequently, the length is always + /// greater than zero since every `Captures` value always includes the + /// match for the entire regex. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures(b"AZ").unwrap(); + /// assert_eq!(caps.len(), 4); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.caps.group_len() + } +} + +impl<'h> core::fmt::Debug for Captures<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + /// A little helper type to provide a nice map-like debug + /// representation for our capturing group spans. + /// + /// regex-automata has something similar, but it includes the pattern + /// ID in its debug output, which is confusing. It also doesn't include + /// that strings that match because a regex-automata `Captures` doesn't + /// borrow the haystack. + struct CapturesDebugMap<'a> { + caps: &'a Captures<'a>, + } + + impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut map = f.debug_map(); + let names = + self.caps.caps.group_info().pattern_names(PatternID::ZERO); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get(group_index) { + None => map.entry(&key, &None::<()>), + Some(mat) => map.entry(&key, &Value(mat)), + }; + } + map.finish() + } + } + + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + struct Value<'a>(Match<'a>); + + impl<'a> core::fmt::Debug for Value<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use regex_automata::util::escape::DebugHaystack; + + write!( + f, + "{}..{}/{:?}", + self.0.start(), + self.0.end(), + DebugHaystack(self.0.as_bytes()) + ) + } + } + + f.debug_tuple("Captures") + .field(&CapturesDebugMap { caps: self }) + .finish() + } +} + +/// Get a matching capture group's haystack substring by index. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// # Panics +/// +/// If there is no matching group at the given index. +impl<'h> core::ops::Index<usize> for Captures<'h> { + type Output = [u8]; + + // The lifetime is written out to make it clear that the &str returned + // does NOT have a lifetime equivalent to 'h. + fn index<'a>(&'a self, i: usize) -> &'a [u8] { + self.get(i) + .map(|m| m.as_bytes()) + .unwrap_or_else(|| panic!("no group at index '{}'", i)) + } +} + +/// Get a matching capture group's haystack substring by name. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// `'n` is the lifetime of the group name used to index the `Captures` value. +/// +/// # Panics +/// +/// If there is no matching group at the given name. +impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> { + type Output = [u8]; + + fn index<'a>(&'a self, name: &'n str) -> &'a [u8] { + self.name(name) + .map(|m| m.as_bytes()) + .unwrap_or_else(|| panic!("no group named '{}'", name)) + } +} + +/// A low level representation of the byte offsets of each capture group. +/// +/// You can think of this as a lower level [`Captures`], where this type does +/// not support named capturing groups directly and it does not borrow the +/// haystack that these offsets were matched on. +/// +/// Primarily, this type is useful when using the lower level `Regex` APIs such +/// as [`Regex::captures_read`], which permits amortizing the allocation in +/// which capture match offsets are stored. +/// +/// In order to build a value of this type, you'll need to call the +/// [`Regex::capture_locations`] method. The value returned can then be reused +/// in subsequent searches for that regex. Using it for other regexes may +/// result in a panic or otherwise incorrect results. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// # // literals are too big for 32-bit usize: #1041 +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(34973498648)); +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` +#[derive(Clone, Debug)] +pub struct CaptureLocations(captures::Captures); + +/// A type alias for `CaptureLocations` for backwards compatibility. +/// +/// Previously, we exported `CaptureLocations` as `Locations` in an +/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), +/// we continue re-exporting the same undocumented API. +#[doc(hidden)] +pub type Locations = CaptureLocations; + +impl CaptureLocations { + /// Returns the start and end byte offsets of the capture group at index + /// `i`. This returns `None` if `i` is not a valid capture group or if the + /// capture group did not match. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); + /// assert_eq!(Some((0, 17)), locs.get(0)); + /// assert_eq!(Some((0, 5)), locs.get(1)); + /// assert_eq!(Some((6, 17)), locs.get(2)); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<(usize, usize)> { + self.0.get_group(i).map(|sp| (sp.start, sp.end)) + } + + /// Returns the total number of capture groups (even if they didn't match). + /// That is, the length returned is unaffected by the result of a search. + /// + /// This is always at least `1` since every regex has at least `1` + /// capturing group that corresponds to the entire match. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert_eq!(3, locs.len()); + /// re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); + /// assert_eq!(3, locs.len()); + /// ``` + /// + /// Notice that the length is always at least `1`, regardless of the regex: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// + /// // [a&&b] is a regex that never matches anything. + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + // self.0.group_len() returns 0 if the underlying captures doesn't + // represent a match, but the behavior guaranteed for this method is + // that the length doesn't change based on a match or not. + self.0.group_info().group_len(PatternID::ZERO) + } + + /// An alias for the `get` method for backwards compatibility. + /// + /// Previously, we exported `get` as `pos` in an undocumented API. To + /// prevent breaking that code (e.g., in `regex-capi`), we continue + /// re-exporting the same undocumented API. + #[doc(hidden)] + #[inline] + pub fn pos(&self, i: usize) -> Option<(usize, usize)> { + self.get(i) + } +} + +/// An iterator over all non-overlapping matches in a haystack. +/// +/// This iterator yields [`Match`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the haystack. +/// +/// This iterator is created by [`Regex::find_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Matches<'r, 'h> { + haystack: &'h [u8], + it: meta::FindMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for Matches<'r, 'h> { + type Item = Match<'h>; + + #[inline] + fn next(&mut self) -> Option<Match<'h>> { + self.it + .next() + .map(|sp| Match::new(self.haystack, sp.start(), sp.end())) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {} + +/// An iterator over all non-overlapping capture matches in a haystack. +/// +/// This iterator yields [`Captures`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the matched string. +/// +/// This iterator is created by [`Regex::captures_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct CaptureMatches<'r, 'h> { + haystack: &'h [u8], + it: meta::CapturesMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> { + type Item = Captures<'h>; + + #[inline] + fn next(&mut self) -> Option<Captures<'h>> { + let static_captures_len = self.it.regex().static_captures_len(); + self.it.next().map(|caps| Captures { + haystack: self.haystack, + caps, + static_captures_len, + }) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {} + +/// An iterator over all substrings delimited by a regex match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::split`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Split<'r, 'h> { + haystack: &'h [u8], + it: meta::Split<'r, 'h>, +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = &'h [u8]; + + #[inline] + fn next(&mut self) -> Option<&'h [u8]> { + self.it.next().map(|span| &self.haystack[span]) + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// An iterator over at most `N` substrings delimited by a regex match. +/// +/// The last substring yielded by this iterator will be whatever remains after +/// `N-1` splits. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::splitn`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +/// +/// Although note that the worst case time here has an upper bound given +/// by the `limit` parameter to [`Regex::splitn`]. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + haystack: &'h [u8], + it: meta::SplitN<'r, 'h>, +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = &'h [u8]; + + #[inline] + fn next(&mut self) -> Option<&'h [u8]> { + self.it.next().map(|span| &self.haystack[span]) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// An iterator over the names of all capture groups in a regex. +/// +/// This iterator yields values of type `Option<&str>` in order of the opening +/// capture group parenthesis in the regex pattern. `None` is yielded for +/// groups with no name. The first element always corresponds to the implicit +/// and unnamed group for the overall match. +/// +/// `'r` is the lifetime of the compiled regular expression. +/// +/// This iterator is created by [`Regex::capture_names`]. +#[derive(Clone, Debug)] +pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>); + +impl<'r> Iterator for CaptureNames<'r> { + type Item = Option<&'r str>; + + #[inline] + fn next(&mut self) -> Option<Option<&'r str>> { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'r> ExactSizeIterator for CaptureNames<'r> {} + +impl<'r> core::iter::FusedIterator for CaptureNames<'r> {} + +/// An iterator over all group matches in a [`Captures`] value. +/// +/// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the +/// lifetime of the haystack that the matches are for. The order of elements +/// yielded corresponds to the order of the opening parenthesis for the group +/// in the regex pattern. `None` is yielded for groups that did not participate +/// in the match. +/// +/// The first element always corresponds to the implicit group for the overall +/// match. Since this iterator is created by a [`Captures`] value, and a +/// `Captures` value is only created when a match occurs, it follows that the +/// first element yielded by this iterator is guaranteed to be non-`None`. +/// +/// The lifetime `'c` corresponds to the lifetime of the `Captures` value that +/// created this iterator, and the lifetime `'h` corresponds to the originally +/// matched haystack. +#[derive(Clone, Debug)] +pub struct SubCaptureMatches<'c, 'h> { + haystack: &'h [u8], + it: captures::CapturesPatternIter<'c>, +} + +impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> { + type Item = Option<Match<'h>>; + + #[inline] + fn next(&mut self) -> Option<Option<Match<'h>>> { + self.it.next().map(|group| { + group.map(|sp| Match::new(self.haystack, sp.start, sp.end)) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.it.count() + } +} + +impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {} + +impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {} + +/// A trait for types that can be used to replace matches in a haystack. +/// +/// In general, users of this crate shouldn't need to implement this trait, +/// since implementations are already provided for `&[u8]` along with other +/// variants of byte string types, as well as `FnMut(&Captures) -> Vec<u8>` (or +/// any `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`). Those cover most use +/// cases, but callers can implement this trait directly if necessary. +/// +/// # Example +/// +/// This example shows a basic implementation of the `Replacer` trait. This can +/// be done much more simply using the replacement byte string interpolation +/// support (e.g., `$first $last`), but this approach avoids needing to parse +/// the replacement byte string at all. +/// +/// ``` +/// use regex::bytes::{Captures, Regex, Replacer}; +/// +/// struct NameSwapper; +/// +/// impl Replacer for NameSwapper { +/// fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { +/// dst.extend_from_slice(&caps["first"]); +/// dst.extend_from_slice(b" "); +/// dst.extend_from_slice(&caps["last"]); +/// } +/// } +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); +/// let result = re.replace(b"Springsteen, Bruce", NameSwapper); +/// assert_eq!(result, &b"Bruce Springsteen"[..]); +/// ``` +pub trait Replacer { + /// Appends possibly empty data to `dst` to replace the current match. + /// + /// The current match is represented by `caps`, which is guaranteed to have + /// a match at capture group `0`. + /// + /// For example, a no-op replacement would be + /// `dst.extend_from_slice(&caps[0])`. + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); + + /// Return a fixed unchanging replacement byte string. + /// + /// When doing replacements, if access to [`Captures`] is not needed (e.g., + /// the replacement byte string does not need `$` expansion), then it can + /// be beneficial to avoid finding sub-captures. + /// + /// In general, this is called once for every call to a replacement routine + /// such as [`Regex::replace_all`]. + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { + None + } + + /// Returns a type that implements `Replacer`, but that borrows and wraps + /// this `Replacer`. + /// + /// This is useful when you want to take a generic `Replacer` (which might + /// not be cloneable) and use it without consuming it, so it can be used + /// more than once. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::{Regex, Replacer}; + /// + /// fn replace_all_twice<R: Replacer>( + /// re: Regex, + /// src: &[u8], + /// mut rep: R, + /// ) -> Vec<u8> { + /// let dst = re.replace_all(src, rep.by_ref()); + /// let dst = re.replace_all(&dst, rep.by_ref()); + /// dst.into_owned() + /// } + /// ``` + fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { + ReplacerRef(self) + } +} + +impl<'a, const N: usize> Replacer for &'a [u8; N] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(&**self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<const N: usize> Replacer for [u8; N] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(&*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a [u8] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Vec<u8> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl Replacer for Vec<u8> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for Cow<'a, [u8]> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self.as_ref(), dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Cow<'a, [u8]> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self.as_ref(), dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<F, T> Replacer for F +where + F: FnMut(&Captures<'_>) -> T, + T: AsRef<[u8]>, +{ + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + dst.extend_from_slice((*self)(caps).as_ref()); + } +} + +/// A by-reference adaptor for a [`Replacer`]. +/// +/// This permits reusing the same `Replacer` value in multiple calls to a +/// replacement routine like [`Regex::replace_all`]. +/// +/// This type is created by [`Replacer::by_ref`]. +#[derive(Debug)] +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); + +impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + self.0.replace_append(caps, dst) + } + + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { + self.0.no_expansion() + } +} + +/// A helper type for forcing literal string replacement. +/// +/// It can be used with routines like [`Regex::replace`] and +/// [`Regex::replace_all`] to do a literal string replacement without expanding +/// `$name` to their corresponding capture groups. This can be both convenient +/// (to avoid escaping `$`, for example) and faster (since capture groups +/// don't need to be found). +/// +/// `'s` is the lifetime of the literal string to use. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::{NoExpand, Regex}; +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); +/// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); +/// assert_eq!(result, &b"$2 $last"[..]); +/// ``` +#[derive(Clone, Debug)] +pub struct NoExpand<'s>(pub &'s [u8]); + +impl<'s> Replacer for NoExpand<'s> { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { + dst.extend_from_slice(self.0); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + Some(Cow::Borrowed(self.0)) + } +} + +/// Quickly checks the given replacement string for whether interpolation +/// should be done on it. It returns `None` if a `$` was found anywhere in the +/// given string, which suggests interpolation needs to be done. But if there's +/// no `$` anywhere, then interpolation definitely does not need to be done. In +/// that case, the given string is returned as a borrowed `Cow`. +/// +/// This is meant to be used to implement the `Replacer::no_expandsion` method +/// in its various trait impls. +fn no_expansion<T: AsRef<[u8]>>(replacement: &T) -> Option<Cow<'_, [u8]>> { + let replacement = replacement.as_ref(); + match crate::find_byte::find_byte(b'$', replacement) { + Some(_) => None, + None => Some(Cow::Borrowed(replacement)), + } +} diff --git a/vendor/regex/src/regex/mod.rs b/vendor/regex/src/regex/mod.rs new file mode 100644 index 0000000..93fadec --- /dev/null +++ b/vendor/regex/src/regex/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod bytes; +pub(crate) mod string; diff --git a/vendor/regex/src/regex/string.rs b/vendor/regex/src/regex/string.rs new file mode 100644 index 0000000..880d608 --- /dev/null +++ b/vendor/regex/src/regex/string.rs @@ -0,0 +1,2582 @@ +use alloc::{borrow::Cow, string::String, sync::Arc}; + +use regex_automata::{meta, util::captures, Input, PatternID}; + +use crate::{error::Error, RegexBuilder}; + +/// A compiled regular expression for searching Unicode haystacks. +/// +/// A `Regex` can be used to search haystacks, split haystacks into substrings +/// or replace substrings in a haystack with a different substring. All +/// searching is done with an implicit `(?s:.)*?` at the beginning and end of +/// an pattern. To force an expression to match the whole string (or a prefix +/// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`). +/// +/// While this crate will handle Unicode strings (whether in the regular +/// expression or in the haystack), all positions returned are **byte +/// offsets**. Every byte offset is guaranteed to be at a Unicode code point +/// boundary. That is, all offsets returned by the `Regex` API are guaranteed +/// to be ranges that can slice a `&str` without panicking. If you want to +/// relax this requirement, then you must search `&[u8]` haystacks with a +/// [`bytes::Regex`](crate::bytes::Regex). +/// +/// The only methods that allocate new strings are the string replacement +/// methods. All other methods (searching and splitting) return borrowed +/// references into the haystack given. +/// +/// # Example +/// +/// Find the offsets of a US phone number: +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); +/// let m = re.find("phone: 111-222-3333").unwrap(); +/// assert_eq!(7..19, m.range()); +/// ``` +/// +/// # Example: extracting capture groups +/// +/// A common way to use regexes is with capture groups. That is, instead of +/// just looking for matches of an entire regex, parentheses are used to create +/// groups that represent part of the match. +/// +/// For example, consider a haystack with multiple lines, and each line has +/// three whitespace delimited fields where the second field is expected to be +/// a number and the third field a boolean. To make this convenient, we use +/// the [`Captures::extract`] API to put the strings that match each group +/// into a fixed size array: +/// +/// ``` +/// use regex::Regex; +/// +/// let hay = " +/// rabbit 54 true +/// groundhog 2 true +/// does not match +/// fox 109 false +/// "; +/// let re = Regex::new(r"(?m)^\s*(\S+)\s+([0-9]+)\s+(true|false)\s*$").unwrap(); +/// let mut fields: Vec<(&str, i64, bool)> = vec![]; +/// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(|caps| caps.extract()) { +/// fields.push((f1, f2.parse()?, f3.parse()?)); +/// } +/// assert_eq!(fields, vec![ +/// ("rabbit", 54, true), +/// ("groundhog", 2, true), +/// ("fox", 109, false), +/// ]); +/// +/// # Ok::<(), Box<dyn std::error::Error>>(()) +/// ``` +/// +/// # Example: searching with the `Pattern` trait +/// +/// **Note**: This section requires that this crate is compiled with the +/// `pattern` Cargo feature enabled, which **requires nightly Rust**. +/// +/// Since `Regex` implements `Pattern` from the standard library, one can +/// use regexes with methods defined on `&str`. For example, `is_match`, +/// `find`, `find_iter` and `split` can, in some cases, be replaced with +/// `str::contains`, `str::find`, `str::match_indices` and `str::split`. +/// +/// Here are some examples: +/// +/// ```ignore +/// use regex::Regex; +/// +/// let re = Regex::new(r"\d+").unwrap(); +/// let hay = "a111b222c"; +/// +/// assert!(hay.contains(&re)); +/// assert_eq!(hay.find(&re), Some(1)); +/// assert_eq!(hay.match_indices(&re).collect::<Vec<_>>(), vec![ +/// (1, "111"), +/// (5, "222"), +/// ]); +/// assert_eq!(hay.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); +/// ``` +#[derive(Clone)] +pub struct Regex { + pub(crate) meta: meta::Regex, + pub(crate) pattern: Arc<str>, +} + +impl core::fmt::Display for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl core::fmt::Debug for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("Regex").field(&self.as_str()).finish() + } +} + +impl core::str::FromStr for Regex { + type Err = Error; + + /// Attempts to parse a string into a regular expression + fn from_str(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<&str> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<String> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: String) -> Result<Regex, Error> { + Regex::new(&s) + } +} + +/// Core regular expression methods. +impl Regex { + /// Compiles a regular expression. Once compiled, it can be used repeatedly + /// to search, split or replace substrings in a haystack. + /// + /// Note that regex compilation tends to be a somewhat expensive process, + /// and unlike higher level environments, compilation is not automatically + /// cached for you. One should endeavor to compile a regex once and then + /// reuse it. For example, it's a bad idea to compile the same regex + /// repeatedly in a loop. + /// + /// # Errors + /// + /// If an invalid pattern is given, then an error is returned. + /// An error is also returned if the pattern is valid, but would + /// produce a regex that is bigger than the configured size limit via + /// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by + /// default.) + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// // An Invalid pattern because of an unclosed parenthesis + /// assert!(Regex::new(r"foo(bar").is_err()); + /// // An invalid pattern because the regex would be too big + /// // because Unicode tends to inflate things. + /// assert!(Regex::new(r"\w{1000}").is_err()); + /// // Disabling Unicode can make the regex much smaller, + /// // potentially by up to or more than an order of magnitude. + /// assert!(Regex::new(r"(?-u:\w){1000}").is_ok()); + /// ``` + pub fn new(re: &str) -> Result<Regex, Error> { + RegexBuilder::new(re).build() + } + + /// Returns true if and only if there is a match for the regex anywhere + /// in the haystack given. + /// + /// It is recommended to use this method if all you need to do is test + /// whether a match exists, since the underlying matching engine may be + /// able to do less work. + /// + /// # Example + /// + /// Test if some haystack contains at least one word with exactly 13 + /// Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "I categorically deny having triskaidekaphobia."; + /// assert!(re.is_match(hay)); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &str) -> bool { + self.is_match_at(haystack, 0) + } + + /// This routine searches for the first match of this regex in the + /// haystack given, and if found, returns a [`Match`]. The `Match` + /// provides access to both the byte offsets of the match and the actual + /// substring that matched. + /// + /// Note that this should only be used if you want to find the entire + /// match. If instead you just want to test the existence of a match, + /// it's potentially faster to use `Regex::is_match(hay)` instead of + /// `Regex::find(hay).is_some()`. + /// + /// # Example + /// + /// Find the first word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "I categorically deny having triskaidekaphobia."; + /// let mat = re.find(hay).unwrap(); + /// assert_eq!(2..15, mat.range()); + /// assert_eq!("categorically", mat.as_str()); + /// ``` + #[inline] + pub fn find<'h>(&self, haystack: &'h str) -> Option<Match<'h>> { + self.find_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Match`]. + /// + /// # Time complexity + /// + /// Note that since `find_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// Find every word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "Retroactively relinquishing remunerations is reprehensible."; + /// let matches: Vec<_> = re.find_iter(hay).map(|m| m.as_str()).collect(); + /// assert_eq!(matches, vec![ + /// "Retroactively", + /// "relinquishing", + /// "remunerations", + /// "reprehensible", + /// ]); + /// ``` + #[inline] + pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h> { + Matches { haystack, it: self.meta.find_iter(haystack) } + } + + /// This routine searches for the first match of this regex in the haystack + /// given, and if found, returns not only the overall match but also the + /// matches of each capture group in the regex. If no match is found, then + /// `None` is returned. + /// + /// Capture group `0` always corresponds to an implicit unnamed group that + /// includes the entire match. If a match is found, this group is always + /// present. Subsequent groups may be named and are numbered, starting + /// at 1, by the order in which the opening parenthesis appears in the + /// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`, + /// `b` and `c` correspond to capture group indices `1`, `2` and `3`, + /// respectively. + /// + /// You should only use `captures` if you need access to the capture group + /// matches. Otherwise, [`Regex::find`] is generally faster for discovering + /// just the overall match. + /// + /// # Example + /// + /// Say you have some haystack with movie names and their release years, + /// like "'Citizen Kane' (1941)". It'd be nice if we could search for + /// substrings looking like that, while also extracting the movie name and + /// its release year separately. The example below shows how to do that. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane"); + /// assert_eq!(caps.get(2).unwrap().as_str(), "1941"); + /// // You can also access the groups by index using the Index notation. + /// // Note that this will panic on an invalid index. In this case, these + /// // accesses are always correct because the overall regex will only + /// // match when these capture groups match. + /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); + /// assert_eq!(&caps[1], "Citizen Kane"); + /// assert_eq!(&caps[2], "1941"); + /// ``` + /// + /// Note that the full match is at capture group `0`. Each subsequent + /// capture group is indexed by the order of its opening `(`. + /// + /// We can make this example a bit clearer by using *named* capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane"); + /// assert_eq!(caps.name("year").unwrap().as_str(), "1941"); + /// // You can also access the groups by name using the Index notation. + /// // Note that this will panic on an invalid group name. In this case, + /// // these accesses are always correct because the overall regex will + /// // only match when these capture groups match. + /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); + /// assert_eq!(&caps["title"], "Citizen Kane"); + /// assert_eq!(&caps["year"], "1941"); + /// ``` + /// + /// Here we name the capture groups, which we can access with the `name` + /// method or the `Index` notation with a `&str`. Note that the named + /// capture groups are still accessible with `get` or the `Index` notation + /// with a `usize`. + /// + /// The `0`th capture group is always unnamed, so it must always be + /// accessed with `get(0)` or `[0]`. + /// + /// Finally, one other way to to get the matched substrings is with the + /// [`Captures::extract`] API: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let (full, [title, year]) = re.captures(hay).unwrap().extract(); + /// assert_eq!(full, "'Citizen Kane' (1941)"); + /// assert_eq!(title, "Citizen Kane"); + /// assert_eq!(year, "1941"); + /// ``` + #[inline] + pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> { + self.captures_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Captures`]. + /// + /// This is the same as [`Regex::find_iter`], but instead of only providing + /// access to the overall match, each value yield includes access to the + /// matches of all capture groups in the regex. Reporting this extra match + /// data is potentially costly, so callers should only use `captures_iter` + /// over `find_iter` when they actually need access to the capture group + /// matches. + /// + /// # Time complexity + /// + /// Note that since `captures_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// We can use this to find all movie titles and their release years in + /// some haystack, where the movie is formatted like "'Title' (xxxx)": + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\(([0-9]{4})\)").unwrap(); + /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut movies = vec![]; + /// for (_, [title, year]) in re.captures_iter(hay).map(|c| c.extract()) { + /// movies.push((title, year.parse::<i64>()?)); + /// } + /// assert_eq!(movies, vec![ + /// ("Citizen Kane", 1941), + /// ("The Wizard of Oz", 1939), + /// ("M", 1931), + /// ]); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + /// + /// Or with named groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>[0-9]{4})\)").unwrap(); + /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut it = re.captures_iter(hay); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "Citizen Kane"); + /// assert_eq!(&caps["year"], "1941"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "The Wizard of Oz"); + /// assert_eq!(&caps["year"], "1939"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "M"); + /// assert_eq!(&caps["year"], "1931"); + /// ``` + #[inline] + pub fn captures_iter<'r, 'h>( + &'r self, + haystack: &'h str, + ) -> CaptureMatches<'r, 'h> { + CaptureMatches { haystack, it: self.meta.captures_iter(haystack) } + } + + /// Returns an iterator of substrings of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[ \t]+").unwrap(); + /// let hay = "a b \t c\td e"; + /// let fields: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = ""; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["lion", "tiger", "leopard"]); + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "XXXXaXXbXc"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// + /// let re = Regex::new(r"/").unwrap(); + /// let hay = "(///)"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["(", "", "", ")"]); + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// substring. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"0").unwrap(); + /// let hay = "010"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "1", ""]); + /// ``` + /// + /// When the empty string is used as a regex, it splits at every valid + /// UTF-8 boundary by default (which includes the beginning and end of the + /// haystack): + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let hay = "rust"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]); + /// + /// // Splitting by an empty string is UTF-8 aware by default! + /// let re = Regex::new(r"").unwrap(); + /// let hay = "☃"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "☃", ""]); + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" +").unwrap(); + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec!["", "a", "b", "c"]); + /// ``` + #[inline] + pub fn split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h> { + Split { haystack, it: self.meta.split(haystack) } + } + + /// Returns an iterator of at most `limit` substrings of the haystack + /// given, delimited by a match of the regex. (A `limit` of `0` will return + /// no substrings.) Namely, each element of the iterator corresponds to a + /// part of the haystack that *isn't* matched by the regular expression. + /// The remainder of the haystack that is not split will be the last + /// element in the iterator. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = "Hey! How are you?"; + /// let fields: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(fields, vec!["Hey", "How", "are you?"]); + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a little lamb"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = ""; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec!["lion", "", "tigerXleopard"]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec!["lion", "tiger::leopard"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 1).collect(); + /// assert_eq!(got, vec!["abcXdef"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcdef"; + /// let got: Vec<&str> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec!["abcdef"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 0).collect(); + /// assert!(got.is_empty()); + /// ``` + #[inline] + pub fn splitn<'r, 'h>( + &'r self, + haystack: &'h str, + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { haystack, it: self.meta.splitn(haystack, limit) } + } + + /// Replaces the leftmost-first match in the given haystack with the + /// replacement provided. The replacement can be a regular string (where + /// `$N` and `$name` are expanded to match capture groups) or a function + /// that takes a [`Captures`] and returns the replaced string. + /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// # Replacement string syntax + /// + /// All instances of `$ref` in the replacement string are replaced with + /// the substring corresponding to the capture group identified by `ref`. + /// + /// `ref` may be an integer corresponding to the index of the capture group + /// (counted by order of opening parenthesis where `0` is the entire match) + /// or it can be a name (consisting of letters, digits or underscores) + /// corresponding to a named capture group. + /// + /// If `ref` isn't a valid capture group (whether the name doesn't exist or + /// isn't a valid index), then it is replaced with the empty string. + /// + /// The longest possible name is used. For example, `$1a` looks up the + /// capture group named `1a` and not the capture group at index `1`. To + /// exert more precise control over the name, use braces, e.g., `${1}a`. + /// + /// To write a literal `$` use `$$`. + /// + /// # Example + /// + /// Note that this function is polymorphic with respect to the replacement. + /// In typical usage, this can just be a normal string: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[^01]+").unwrap(); + /// assert_eq!(re.replace("1078910", ""), "1010"); + /// ``` + /// + /// But anything satisfying the [`Replacer`] trait will work. For example, + /// a closure of type `|&Captures| -> String` provides direct access to the + /// captures corresponding to a match. This allows one to access capturing + /// group matches easily: + /// + /// ``` + /// use regex::{Captures, Regex}; + /// + /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { + /// format!("{} {}", &caps[2], &caps[1]) + /// }); + /// assert_eq!(result, "Bruce Springsteen"); + /// ``` + /// + /// But this is a bit cumbersome to use all the time. Instead, a simple + /// syntax is supported (as described above) that expands `$name` into the + /// corresponding capture group. Here's the last example, but using this + /// expansion technique with named capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", "$first $last"); + /// assert_eq!(result, "Bruce Springsteen"); + /// ``` + /// + /// Note that using `$2` instead of `$first` or `$1` instead of `$last` + /// would produce the same result. To write a literal `$` use `$$`. + /// + /// Sometimes the replacement string requires use of curly braces to + /// delineate a capture group replacement when it is adjacent to some other + /// literal text. For example, if we wanted to join two words together with + /// an underscore: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap(); + /// let result = re.replace("deep fried", "${first}_$second"); + /// assert_eq!(result, "deep_fried"); + /// ``` + /// + /// Without the curly braces, the capture group name `first_` would be + /// used, and since it doesn't exist, it would be replaced with the empty + /// string. + /// + /// Finally, sometimes you just want to replace a literal string with no + /// regard for capturing group expansion. This can be done by wrapping a + /// string with [`NoExpand`]: + /// + /// ``` + /// use regex::{NoExpand, Regex}; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); + /// assert_eq!(result, "$2 $last"); + /// ``` + /// + /// Using `NoExpand` may also be faster, since the replacement string won't + /// need to be parsed for the `$` syntax. + #[inline] + pub fn replace<'h, R: Replacer>( + &self, + haystack: &'h str, + rep: R, + ) -> Cow<'h, str> { + self.replacen(haystack, 1, rep) + } + + /// Replaces all non-overlapping matches in the haystack with the + /// replacement provided. This is the same as calling `replacen` with + /// `limit` set to `0`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Fallibility + /// + /// If you need to write a replacement routine where any individual + /// replacement might "fail," doing so with this API isn't really feasible + /// because there's no way to stop the search process if a replacement + /// fails. Instead, if you need this functionality, you should consider + /// implementing your own replacement routine: + /// + /// ``` + /// use regex::{Captures, Regex}; + /// + /// fn replace_all<E>( + /// re: &Regex, + /// haystack: &str, + /// replacement: impl Fn(&Captures) -> Result<String, E>, + /// ) -> Result<String, E> { + /// let mut new = String::with_capacity(haystack.len()); + /// let mut last_match = 0; + /// for caps in re.captures_iter(haystack) { + /// let m = caps.get(0).unwrap(); + /// new.push_str(&haystack[last_match..m.start()]); + /// new.push_str(&replacement(&caps)?); + /// last_match = m.end(); + /// } + /// new.push_str(&haystack[last_match..]); + /// Ok(new) + /// } + /// + /// // Let's replace each word with the number of bytes in that word. + /// // But if we see a word that is "too long," we'll give up. + /// let re = Regex::new(r"\w+").unwrap(); + /// let replacement = |caps: &Captures| -> Result<String, &'static str> { + /// if caps[0].len() >= 5 { + /// return Err("word too long"); + /// } + /// Ok(caps[0].len().to_string()) + /// }; + /// assert_eq!( + /// Ok("2 3 3 3?".to_string()), + /// replace_all(&re, "hi how are you?", &replacement), + /// ); + /// assert!(replace_all(&re, "hi there", &replacement).is_err()); + /// ``` + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = " + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replace_all(hay, "$2 $1"); + /// assert_eq!(new, " + /// 1973 Greetings + /// 1973 Wild + /// 1975 BornToRun + /// 1978 Darkness + /// 1980 TheRiver + /// "); + /// ``` + #[inline] + pub fn replace_all<'h, R: Replacer>( + &self, + haystack: &'h str, + rep: R, + ) -> Cow<'h, str> { + self.replacen(haystack, 0, rep) + } + + /// Replaces at most `limit` non-overlapping matches in the haystack with + /// the replacement provided. If `limit` is `0`, then all non-overlapping + /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is + /// equivalent to `Regex::replacen(hay, 0, rep)`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Fallibility + /// + /// See the corresponding section in the docs for [`Regex::replace_all`] + /// for tips on how to deal with a replacement routine that can fail. + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields. But we only do it for the first two matches. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = " + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replacen(hay, 2, "$2 $1"); + /// assert_eq!(new, " + /// 1973 Greetings + /// 1973 Wild + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "); + /// ``` + #[inline] + pub fn replacen<'h, R: Replacer>( + &self, + haystack: &'h str, + limit: usize, + mut rep: R, + ) -> Cow<'h, str> { + // If we know that the replacement doesn't have any capture expansions, + // then we can use the fast path. The fast path can make a tremendous + // difference: + // + // 1) We use `find_iter` instead of `captures_iter`. Not asking for + // captures generally makes the regex engines faster. + // 2) We don't need to look up all of the capture groups and do + // replacements inside the replacement string. We just push it + // at each match and be done with it. + if let Some(rep) = rep.no_expansion() { + let mut it = self.find_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, m) in it { + new.push_str(&haystack[last_match..m.start()]); + new.push_str(&rep); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.push_str(&haystack[last_match..]); + return Cow::Owned(new); + } + + // The slower path, which we use if the replacement may need access to + // capture groups. + let mut it = self.captures_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, cap) in it { + // unwrap on 0 is OK because captures only reports matches + let m = cap.get(0).unwrap(); + new.push_str(&haystack[last_match..m.start()]); + rep.replace_append(&cap, &mut new); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.push_str(&haystack[last_match..]); + Cow::Owned(new) + } +} + +/// A group of advanced or "lower level" search methods. Some methods permit +/// starting the search at a position greater than `0` in the haystack. Other +/// methods permit reusing allocations, for example, when extracting the +/// matches for capture groups. +impl Regex { + /// Returns the end byte offset of the first match in the haystack given. + /// + /// This method may have the same performance characteristics as + /// `is_match`. Behaviorlly, it doesn't just report whether it match + /// occurs, but also the end offset for a match. In particular, the offset + /// returned *may be shorter* than the proper end of the leftmost-first + /// match that you would find via [`Regex::find`]. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change based on internal heuristics. + /// + /// # Example + /// + /// Typically, `a+` would match the entire first sequence of `a` in some + /// haystack, but `shortest_match` *may* give up as soon as it sees the + /// first `a`. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"a+").unwrap(); + /// let offset = re.shortest_match("aaaaa").unwrap(); + /// assert_eq!(offset, 1); + /// ``` + #[inline] + pub fn shortest_match(&self, haystack: &str) -> Option<usize> { + self.shortest_match_at(haystack, 0) + } + + /// Returns the same as [`Regex::shortest_match`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only match + /// when `start == 0`. + /// + /// If a match is found, the offset returned is relative to the beginning + /// of the haystack, not the beginning of the search. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.shortest_match(&hay[2..]), Some(4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.shortest_match_at(hay, 2), None); + /// ``` + #[inline] + pub fn shortest_match_at( + &self, + haystack: &str, + start: usize, + ) -> Option<usize> { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).map(|hm| hm.offset()) + } + + /// Returns the same as [`Regex::is_match`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert!(re.is_match(&hay[2..])); + /// // No match because the assertions take the context into account. + /// assert!(!re.is_match_at(hay, 2)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &str, start: usize) -> bool { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).is_some() + } + + /// Returns the same as [`Regex::find`], but starts the search at the given + /// offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.find(&hay[2..]).map(|m| m.range()), Some(0..4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.find_at(hay, 2), None); + /// ``` + #[inline] + pub fn find_at<'h>( + &self, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta + .search(&input) + .map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// Returns the same as [`Regex::captures`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(&re.captures(&hay[2..]).unwrap()[0], "chew"); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_at(hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_at<'h>( + &self, + haystack: &'h str, + start: usize, + ) -> Option<Captures<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + let mut caps = self.meta.create_captures(); + self.meta.search_captures(&input, &mut caps); + if caps.is_match() { + let static_captures_len = self.static_captures_len(); + Some(Captures { haystack, caps, static_captures_len }) + } else { + None + } + } + + /// This is like [`Regex::captures`], but writes the byte offsets of each + /// capture group match into the locations given. + /// + /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`], + /// but does *not* store a reference to the haystack. This makes its API + /// a bit lower level and less convenient. But in exchange, callers + /// may allocate their own `CaptureLocations` and reuse it for multiple + /// searches. This may be helpful if allocating a `Captures` shows up in a + /// profile as too costly. + /// + /// To create a `CaptureLocations` value, use the + /// [`Regex::capture_locations`] method. + /// + /// This also returns the overall match if one was found. When a match is + /// found, its offsets are also always stored in `locs` at index `0`. + /// + /// # Panics + /// + /// This routine may panic if the given `CaptureLocations` was not created + /// by this regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, "id=foo123").is_some()); + /// assert_eq!(Some((0, 9)), locs.get(0)); + /// assert_eq!(Some((0, 2)), locs.get(1)); + /// assert_eq!(Some((3, 9)), locs.get(2)); + /// ``` + #[inline] + pub fn captures_read<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, 0) + } + + /// Returns the same as [`Regex::captures_read`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// This routine may also panic if the given `CaptureLocations` was not + /// created by this regex. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// let mut locs = re.capture_locations(); + /// // We get a match here, but it's probably not intended. + /// assert!(re.captures_read(&mut locs, &hay[2..]).is_some()); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_read_at(&mut locs, hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_read_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.search_captures(&input, &mut locs.0); + locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// An undocumented alias for `captures_read_at`. + /// + /// The `regex-capi` crate previously used this routine, so to avoid + /// breaking that crate, we continue to provide the name as an undocumented + /// alias. + #[doc(hidden)] + #[inline] + pub fn read_captures_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, start) + } +} + +/// Auxiliary methods. +impl Regex { + /// Returns the original string of this regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"foo\w+bar").unwrap(); + /// assert_eq!(re.as_str(), r"foo\w+bar"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.pattern + } + + /// Returns an iterator over the capture names in this regex. + /// + /// The iterator returned yields elements of type `Option<&str>`. That is, + /// the iterator yields values for all capture groups, even ones that are + /// unnamed. The order of the groups corresponds to the order of the group's + /// corresponding opening parenthesis. + /// + /// The first element of the iterator always yields the group corresponding + /// to the overall match, and this group is always unnamed. Therefore, the + /// iterator always yields at least one group. + /// + /// # Example + /// + /// This shows basic usage with a mix of named and unnamed capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), Some(Some("a"))); + /// assert_eq!(names.next(), Some(Some("b"))); + /// assert_eq!(names.next(), Some(None)); + /// // the '(?:.)' group is non-capturing and so doesn't appear here! + /// assert_eq!(names.next(), Some(Some("c"))); + /// assert_eq!(names.next(), None); + /// ``` + /// + /// The iterator always yields at least one element, even for regexes with + /// no capture groups and even for regexes that can never match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// ``` + #[inline] + pub fn capture_names(&self) -> CaptureNames<'_> { + CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO)) + } + + /// Returns the number of captures groups in this regex. + /// + /// This includes all named and unnamed groups, including the implicit + /// unnamed group that is always present and corresponds to the entire + /// match. + /// + /// Since the implicit unnamed group is always included in this length, the + /// length returned is guaranteed to be greater than zero. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"foo").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// + /// let re = Regex::new(r"(foo)").unwrap(); + /// assert_eq!(2, re.captures_len()); + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// assert_eq!(5, re.captures_len()); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// ``` + #[inline] + pub fn captures_len(&self) -> usize { + self.meta.group_info().group_len(PatternID::ZERO) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option<usize> { + self.meta.static_captures_len() + } + + /// Returns a fresh allocated set of capture locations that can + /// be reused in multiple calls to [`Regex::captures_read`] or + /// [`Regex::captures_read_at`]. + /// + /// The returned locations can be used for any subsequent search for this + /// particular regex. There is no guarantee that it is correct to use for + /// other regexes, even if they have the same number of capture groups. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(.)(.)(\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, "Padron").is_some()); + /// assert_eq!(locs.get(0), Some((0, 6))); + /// assert_eq!(locs.get(1), Some((0, 1))); + /// assert_eq!(locs.get(2), Some((1, 2))); + /// assert_eq!(locs.get(3), Some((2, 6))); + /// ``` + #[inline] + pub fn capture_locations(&self) -> CaptureLocations { + CaptureLocations(self.meta.create_captures()) + } + + /// An alias for `capture_locations` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn locations(&self) -> CaptureLocations { + self.capture_locations() + } +} + +/// Represents a single match of a regex in a haystack. +/// +/// A `Match` contains both the start and end byte offsets of the match and the +/// actual substring corresponding to the range of those byte offsets. It is +/// guaranteed that `start <= end`. When `start == end`, the match is empty. +/// +/// Since this `Match` can only be produced by the top-level `Regex` APIs +/// that only support searching UTF-8 encoded strings, the byte offsets for a +/// `Match` are guaranteed to fall on valid UTF-8 codepoint boundaries. That +/// is, slicing a `&str` with [`Match::range`] is guaranteed to never panic. +/// +/// Values with this type are created by [`Regex::find`] or +/// [`Regex::find_iter`]. Other APIs can create `Match` values too. For +/// example, [`Captures::get`]. +/// +/// The lifetime parameter `'h` refers to the lifetime of the matched of the +/// haystack that this match was produced from. +/// +/// # Numbering +/// +/// The byte offsets in a `Match` form a half-open interval. That is, the +/// start of the range is inclusive and the end of the range is exclusive. +/// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte +/// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and +/// `6` corresponds to `x`, which is one past the end of the match. This +/// corresponds to the same kind of slicing that Rust uses. +/// +/// For more on why this was chosen over other schemes (aside from being +/// consistent with how Rust the language works), see [this discussion] and +/// [Dijkstra's note on a related topic][note]. +/// +/// [this discussion]: https://github.com/rust-lang/regex/discussions/866 +/// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html +/// +/// # Example +/// +/// This example shows the value of each of the methods on `Match` for a +/// particular search. +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"\p{Greek}+").unwrap(); +/// let hay = "Greek: αβγδ"; +/// let m = re.find(hay).unwrap(); +/// assert_eq!(7, m.start()); +/// assert_eq!(15, m.end()); +/// assert!(!m.is_empty()); +/// assert_eq!(8, m.len()); +/// assert_eq!(7..15, m.range()); +/// assert_eq!("αβγδ", m.as_str()); +/// ``` +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Match<'h> { + haystack: &'h str, + start: usize, + end: usize, +} + +impl<'h> Match<'h> { + /// Returns the byte offset of the start of the match in the haystack. The + /// start of the match corresponds to the position where the match begins + /// and includes the first byte in the match. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That + /// is, it will never be an offset that appears between the UTF-8 code + /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is + /// always safe to slice the corresponding haystack using this offset. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Returns the byte offset of the end of the match in the haystack. The + /// end of the match corresponds to the byte immediately following the last + /// byte in the match. This means that `&slice[start..end]` works as one + /// would expect. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That + /// is, it will never be an offset that appears between the UTF-8 code + /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is + /// always safe to slice the corresponding haystack using this offset. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Returns true if and only if this match has a length of zero. + /// + /// Note that an empty match can only occur when the regex itself can + /// match the empty string. Here are some examples of regexes that can + /// all match the empty string: `^`, `^$`, `\b`, `a?`, `a*`, `a{0}`, + /// `(foo|\d+|quux)?`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start == self.end + } + + /// Returns the length, in bytes, of this match. + #[inline] + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns the range over the starting and ending byte offsets of the + /// match in the haystack. + /// + /// It is always correct to slice the original haystack searched with this + /// range. That is, because the offsets are guaranteed to fall on valid + /// UTF-8 boundaries, the range returned is always valid. + #[inline] + pub fn range(&self) -> core::ops::Range<usize> { + self.start..self.end + } + + /// Returns the substring of the haystack that matched. + #[inline] + pub fn as_str(&self) -> &'h str { + &self.haystack[self.range()] + } + + /// Creates a new match from the given haystack and byte offsets. + #[inline] + fn new(haystack: &'h str, start: usize, end: usize) -> Match<'h> { + Match { haystack, start, end } + } +} + +impl<'h> core::fmt::Debug for Match<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Match") + .field("start", &self.start) + .field("end", &self.end) + .field("string", &self.as_str()) + .finish() + } +} + +impl<'h> From<Match<'h>> for &'h str { + fn from(m: Match<'h>) -> &'h str { + m.as_str() + } +} + +impl<'h> From<Match<'h>> for core::ops::Range<usize> { + fn from(m: Match<'h>) -> core::ops::Range<usize> { + m.range() + } +} + +/// Represents the capture groups for a single match. +/// +/// Capture groups refer to parts of a regex enclosed in parentheses. They can +/// be optionally named. The purpose of capture groups is to be able to +/// reference different parts of a match based on the original pattern. For +/// example, say you want to match the individual letters in a 5-letter word: +/// +/// ```text +/// (?<first>\w)(\w)(?:\w)\w(?<last>\w) +/// ``` +/// +/// This regex has 4 capture groups: +/// +/// * The group at index `0` corresponds to the overall match. It is always +/// present in every match and never has a name. +/// * The group at index `1` with name `first` corresponding to the first +/// letter. +/// * The group at index `2` with no name corresponding to the second letter. +/// * The group at index `3` with name `last` corresponding to the fifth and +/// last letter. +/// +/// Notice that `(?:\w)` was not listed above as a capture group despite it +/// being enclosed in parentheses. That's because `(?:pattern)` is a special +/// syntax that permits grouping but *without* capturing. The reason for not +/// treating it as a capture is that tracking and reporting capture groups +/// requires additional state that may lead to slower searches. So using as few +/// capture groups as possible can help performance. (Although the difference +/// in performance of a couple of capture groups is likely immaterial.) +/// +/// Values with this type are created by [`Regex::captures`] or +/// [`Regex::captures_iter`]. +/// +/// `'h` is the lifetime of the haystack that these captures were matched from. +/// +/// # Example +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap(); +/// let caps = re.captures("toady").unwrap(); +/// assert_eq!("toady", &caps[0]); +/// assert_eq!("t", &caps["first"]); +/// assert_eq!("o", &caps[2]); +/// assert_eq!("y", &caps["last"]); +/// ``` +pub struct Captures<'h> { + haystack: &'h str, + caps: captures::Captures, + static_captures_len: Option<usize>, +} + +impl<'h> Captures<'h> { + /// Returns the `Match` associated with the capture group at index `i`. If + /// `i` does not correspond to a capture group, or if the capture group did + /// not participate in the match, then `None` is returned. + /// + /// When `i == 0`, this is guaranteed to return a non-`None` value. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); + /// let caps = re.captures("abc123").unwrap(); + /// + /// let substr1 = caps.get(1).map_or("", |m| m.as_str()); + /// let substr2 = caps.get(2).map_or("", |m| m.as_str()); + /// assert_eq!(substr1, "123"); + /// assert_eq!(substr2, ""); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<Match<'h>> { + self.caps + .get_group(i) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// Returns the `Match` associated with the capture group named `name`. If + /// `name` isn't a valid capture group or it refers to a group that didn't + /// match, then `None` is returned. + /// + /// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime + /// matches the lifetime of the haystack in this `Captures` value. + /// Conversely, the substring returned by `caps["name"]` has a lifetime + /// of the `Captures` value, which is likely shorter than the lifetime of + /// the haystack. In some cases, it may be necessary to use this method to + /// access the matching substring instead of the `caps["name"]` notation. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new( + /// r"[a-z]+(?:(?<numbers>[0-9]+)|(?<letters>[A-Z]+))", + /// ).unwrap(); + /// let caps = re.captures("abc123").unwrap(); + /// + /// let numbers = caps.name("numbers").map_or("", |m| m.as_str()); + /// let letters = caps.name("letters").map_or("", |m| m.as_str()); + /// assert_eq!(numbers, "123"); + /// assert_eq!(letters, ""); + /// ``` + #[inline] + pub fn name(&self, name: &str) -> Option<Match<'h>> { + self.caps + .get_group_by_name(name) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups. + /// + /// This returns a tuple where the first element corresponds to the full + /// substring of the haystack that matched the regex. The second element is + /// an array of substrings, with each corresponding to the to the substring + /// that matched for a particular capture group. + /// + /// # Panics + /// + /// This panics if the number of possible matching groups in this + /// `Captures` value is not fixed to `N` in all circumstances. + /// More precisely, this routine only works when `N` is equivalent to + /// [`Regex::static_captures_len`]. + /// + /// Stated more plainly, if the number of matching capture groups in a + /// regex can vary from match to match, then this function always panics. + /// + /// For example, `(a)(b)|(c)` could produce two matching capture groups + /// or one matching capture group for any given match. Therefore, one + /// cannot use `extract` with such a pattern. + /// + /// But a pattern like `(a)(b)|(c)(d)` can be used with `extract` because + /// the number of capture groups in every match is always equivalent, + /// even if the capture _indices_ in each match are not. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// let Some((full, [year, month, day])) = + /// re.captures(hay).map(|caps| caps.extract()) else { return }; + /// assert_eq!("2010-03-14", full); + /// assert_eq!("2010", year); + /// assert_eq!("03", month); + /// assert_eq!("14", day); + /// ``` + /// + /// # Example: iteration + /// + /// This example shows how to use this method when iterating over all + /// `Captures` matches in a haystack. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = "1973-01-05, 1975-08-25 and 1980-10-18"; + /// + /// let mut dates: Vec<(&str, &str, &str)> = vec![]; + /// for (_, [y, m, d]) in re.captures_iter(hay).map(|c| c.extract()) { + /// dates.push((y, m, d)); + /// } + /// assert_eq!(dates, vec![ + /// ("1973", "01", "05"), + /// ("1975", "08", "25"), + /// ("1980", "10", "18"), + /// ]); + /// ``` + /// + /// # Example: parsing different formats + /// + /// This API is particularly useful when you need to extract a particular + /// value that might occur in a different format. Consider, for example, + /// an identifier that might be in double quotes or single quotes: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r#"id:(?:"([^"]+)"|'([^']+)')"#).unwrap(); + /// let hay = r#"The first is id:"foo" and the second is id:'bar'."#; + /// let mut ids = vec![]; + /// for (_, [id]) in re.captures_iter(hay).map(|c| c.extract()) { + /// ids.push(id); + /// } + /// assert_eq!(ids, vec!["foo", "bar"]); + /// ``` + pub fn extract<const N: usize>(&self) -> (&'h str, [&'h str; N]) { + let len = self + .static_captures_len + .expect("number of capture groups can vary in a match") + .checked_sub(1) + .expect("number of groups is always greater than zero"); + assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len); + // The regex-automata variant of extract is a bit more permissive. + // It doesn't require the number of matching capturing groups to be + // static, and you can even request fewer groups than what's there. So + // this is guaranteed to never panic because we've asserted above that + // the user has requested precisely the number of groups that must be + // present in any match for this regex. + self.caps.extract(self.haystack) + } + + /// Expands all instances of `$ref` in `replacement` to the corresponding + /// capture group, and writes them to the `dst` buffer given. A `ref` can + /// be a capture group index or a name. If `ref` doesn't refer to a capture + /// group that participated in the match, then it is replaced with the + /// empty string. + /// + /// # Format + /// + /// The format of the replacement string supports two different kinds of + /// capture references: unbraced and braced. + /// + /// For the unbraced format, the format supported is `$ref` where `name` + /// can be any character in the class `[0-9A-Za-z_]`. `ref` is always + /// the longest possible parse. So for example, `$1a` corresponds to the + /// capture group named `1a` and not the capture group at index `1`. If + /// `ref` matches `^[0-9]+$`, then it is treated as a capture group index + /// itself and not a name. + /// + /// For the braced format, the format supported is `${ref}` where `ref` can + /// be any sequence of bytes except for `}`. If no closing brace occurs, + /// then it is not considered a capture reference. As with the unbraced + /// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture + /// group index and not a name. + /// + /// The braced format is useful for exerting precise control over the name + /// of the capture reference. For example, `${1}a` corresponds to the + /// capture group reference `1` followed by the letter `a`, where as `$1a` + /// (as mentioned above) corresponds to the capture group reference `1a`. + /// The braced format is also useful for expressing capture group names + /// that use characters not supported by the unbraced format. For example, + /// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`. + /// + /// If a capture group reference is found and it does not refer to a valid + /// capture group, then it will be replaced with the empty string. + /// + /// To write a literal `$`, use `$$`. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new( + /// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})", + /// ).unwrap(); + /// let hay = "On 14-03-2010, I became a Tenneessee lamb."; + /// let caps = re.captures(hay).unwrap(); + /// + /// let mut dst = String::new(); + /// caps.expand("year=$year, month=$month, day=$day", &mut dst); + /// assert_eq!(dst, "year=2010, month=03, day=14"); + /// ``` + #[inline] + pub fn expand(&self, replacement: &str, dst: &mut String) { + self.caps.interpolate_string_into(self.haystack, replacement, dst); + } + + /// Returns an iterator over all capture groups. This includes both + /// matching and non-matching groups. + /// + /// The iterator always yields at least one matching group: the first group + /// (at index `0`) with no name. Subsequent groups are returned in the order + /// of their opening parenthesis in the regex. + /// + /// The elements yielded have type `Option<Match<'h>>`, where a non-`None` + /// value is present if the capture group matches. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures("AZ").unwrap(); + /// + /// let mut it = caps.iter(); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("AZ")); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("A")); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), None); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("Z")); + /// assert_eq!(it.next(), None); + /// ``` + #[inline] + pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> { + SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() } + } + + /// Returns the total number of capture groups. This includes both + /// matching and non-matching groups. + /// + /// The length returned is always equivalent to the number of elements + /// yielded by [`Captures::iter`]. Consequently, the length is always + /// greater than zero since every `Captures` value always includes the + /// match for the entire regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures("AZ").unwrap(); + /// assert_eq!(caps.len(), 4); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.caps.group_len() + } +} + +impl<'h> core::fmt::Debug for Captures<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + /// A little helper type to provide a nice map-like debug + /// representation for our capturing group spans. + /// + /// regex-automata has something similar, but it includes the pattern + /// ID in its debug output, which is confusing. It also doesn't include + /// that strings that match because a regex-automata `Captures` doesn't + /// borrow the haystack. + struct CapturesDebugMap<'a> { + caps: &'a Captures<'a>, + } + + impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut map = f.debug_map(); + let names = + self.caps.caps.group_info().pattern_names(PatternID::ZERO); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get(group_index) { + None => map.entry(&key, &None::<()>), + Some(mat) => map.entry(&key, &Value(mat)), + }; + } + map.finish() + } + } + + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + struct Value<'a>(Match<'a>); + + impl<'a> core::fmt::Debug for Value<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "{}..{}/{:?}", + self.0.start(), + self.0.end(), + self.0.as_str() + ) + } + } + + f.debug_tuple("Captures") + .field(&CapturesDebugMap { caps: self }) + .finish() + } +} + +/// Get a matching capture group's haystack substring by index. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// # Panics +/// +/// If there is no matching group at the given index. +impl<'h> core::ops::Index<usize> for Captures<'h> { + type Output = str; + + // The lifetime is written out to make it clear that the &str returned + // does NOT have a lifetime equivalent to 'h. + fn index<'a>(&'a self, i: usize) -> &'a str { + self.get(i) + .map(|m| m.as_str()) + .unwrap_or_else(|| panic!("no group at index '{}'", i)) + } +} + +/// Get a matching capture group's haystack substring by name. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// `'n` is the lifetime of the group name used to index the `Captures` value. +/// +/// # Panics +/// +/// If there is no matching group at the given name. +impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> { + type Output = str; + + fn index<'a>(&'a self, name: &'n str) -> &'a str { + self.name(name) + .map(|m| m.as_str()) + .unwrap_or_else(|| panic!("no group named '{}'", name)) + } +} + +/// A low level representation of the byte offsets of each capture group. +/// +/// You can think of this as a lower level [`Captures`], where this type does +/// not support named capturing groups directly and it does not borrow the +/// haystack that these offsets were matched on. +/// +/// Primarily, this type is useful when using the lower level `Regex` APIs such +/// as [`Regex::captures_read`], which permits amortizing the allocation in +/// which capture match offsets are stored. +/// +/// In order to build a value of this type, you'll need to call the +/// [`Regex::capture_locations`] method. The value returned can then be reused +/// in subsequent searches for that regex. Using it for other regexes may +/// result in a panic or otherwise incorrect results. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// # // literals are too big for 32-bit usize: #1041 +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(34973498648)); +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` +#[derive(Clone, Debug)] +pub struct CaptureLocations(captures::Captures); + +/// A type alias for `CaptureLocations` for backwards compatibility. +/// +/// Previously, we exported `CaptureLocations` as `Locations` in an +/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), +/// we continue re-exporting the same undocumented API. +#[doc(hidden)] +pub type Locations = CaptureLocations; + +impl CaptureLocations { + /// Returns the start and end byte offsets of the capture group at index + /// `i`. This returns `None` if `i` is not a valid capture group or if the + /// capture group did not match. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); + /// assert_eq!(Some((0, 17)), locs.get(0)); + /// assert_eq!(Some((0, 5)), locs.get(1)); + /// assert_eq!(Some((6, 17)), locs.get(2)); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<(usize, usize)> { + self.0.get_group(i).map(|sp| (sp.start, sp.end)) + } + + /// Returns the total number of capture groups (even if they didn't match). + /// That is, the length returned is unaffected by the result of a search. + /// + /// This is always at least `1` since every regex has at least `1` + /// capturing group that corresponds to the entire match. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert_eq!(3, locs.len()); + /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); + /// assert_eq!(3, locs.len()); + /// ``` + /// + /// Notice that the length is always at least `1`, regardless of the regex: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// + /// // [a&&b] is a regex that never matches anything. + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + // self.0.group_len() returns 0 if the underlying captures doesn't + // represent a match, but the behavior guaranteed for this method is + // that the length doesn't change based on a match or not. + self.0.group_info().group_len(PatternID::ZERO) + } + + /// An alias for the `get` method for backwards compatibility. + /// + /// Previously, we exported `get` as `pos` in an undocumented API. To + /// prevent breaking that code (e.g., in `regex-capi`), we continue + /// re-exporting the same undocumented API. + #[doc(hidden)] + #[inline] + pub fn pos(&self, i: usize) -> Option<(usize, usize)> { + self.get(i) + } +} + +/// An iterator over all non-overlapping matches in a haystack. +/// +/// This iterator yields [`Match`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the haystack. +/// +/// This iterator is created by [`Regex::find_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Matches<'r, 'h> { + haystack: &'h str, + it: meta::FindMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for Matches<'r, 'h> { + type Item = Match<'h>; + + #[inline] + fn next(&mut self) -> Option<Match<'h>> { + self.it + .next() + .map(|sp| Match::new(self.haystack, sp.start(), sp.end())) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {} + +/// An iterator over all non-overlapping capture matches in a haystack. +/// +/// This iterator yields [`Captures`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the matched string. +/// +/// This iterator is created by [`Regex::captures_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct CaptureMatches<'r, 'h> { + haystack: &'h str, + it: meta::CapturesMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> { + type Item = Captures<'h>; + + #[inline] + fn next(&mut self) -> Option<Captures<'h>> { + let static_captures_len = self.it.regex().static_captures_len(); + self.it.next().map(|caps| Captures { + haystack: self.haystack, + caps, + static_captures_len, + }) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {} + +/// An iterator over all substrings delimited by a regex match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::split`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Split<'r, 'h> { + haystack: &'h str, + it: meta::Split<'r, 'h>, +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = &'h str; + + #[inline] + fn next(&mut self) -> Option<&'h str> { + self.it.next().map(|span| &self.haystack[span]) + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// An iterator over at most `N` substrings delimited by a regex match. +/// +/// The last substring yielded by this iterator will be whatever remains after +/// `N-1` splits. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::splitn`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +/// +/// Although note that the worst case time here has an upper bound given +/// by the `limit` parameter to [`Regex::splitn`]. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + haystack: &'h str, + it: meta::SplitN<'r, 'h>, +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = &'h str; + + #[inline] + fn next(&mut self) -> Option<&'h str> { + self.it.next().map(|span| &self.haystack[span]) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// An iterator over the names of all capture groups in a regex. +/// +/// This iterator yields values of type `Option<&str>` in order of the opening +/// capture group parenthesis in the regex pattern. `None` is yielded for +/// groups with no name. The first element always corresponds to the implicit +/// and unnamed group for the overall match. +/// +/// `'r` is the lifetime of the compiled regular expression. +/// +/// This iterator is created by [`Regex::capture_names`]. +#[derive(Clone, Debug)] +pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>); + +impl<'r> Iterator for CaptureNames<'r> { + type Item = Option<&'r str>; + + #[inline] + fn next(&mut self) -> Option<Option<&'r str>> { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'r> ExactSizeIterator for CaptureNames<'r> {} + +impl<'r> core::iter::FusedIterator for CaptureNames<'r> {} + +/// An iterator over all group matches in a [`Captures`] value. +/// +/// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the +/// lifetime of the haystack that the matches are for. The order of elements +/// yielded corresponds to the order of the opening parenthesis for the group +/// in the regex pattern. `None` is yielded for groups that did not participate +/// in the match. +/// +/// The first element always corresponds to the implicit group for the overall +/// match. Since this iterator is created by a [`Captures`] value, and a +/// `Captures` value is only created when a match occurs, it follows that the +/// first element yielded by this iterator is guaranteed to be non-`None`. +/// +/// The lifetime `'c` corresponds to the lifetime of the `Captures` value that +/// created this iterator, and the lifetime `'h` corresponds to the originally +/// matched haystack. +#[derive(Clone, Debug)] +pub struct SubCaptureMatches<'c, 'h> { + haystack: &'h str, + it: captures::CapturesPatternIter<'c>, +} + +impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> { + type Item = Option<Match<'h>>; + + #[inline] + fn next(&mut self) -> Option<Option<Match<'h>>> { + self.it.next().map(|group| { + group.map(|sp| Match::new(self.haystack, sp.start, sp.end)) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.it.count() + } +} + +impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {} + +impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {} + +/// A trait for types that can be used to replace matches in a haystack. +/// +/// In general, users of this crate shouldn't need to implement this trait, +/// since implementations are already provided for `&str` along with other +/// variants of string types, as well as `FnMut(&Captures) -> String` (or any +/// `FnMut(&Captures) -> T` where `T: AsRef<str>`). Those cover most use cases, +/// but callers can implement this trait directly if necessary. +/// +/// # Example +/// +/// This example shows a basic implementation of the `Replacer` trait. This +/// can be done much more simply using the replacement string interpolation +/// support (e.g., `$first $last`), but this approach avoids needing to parse +/// the replacement string at all. +/// +/// ``` +/// use regex::{Captures, Regex, Replacer}; +/// +/// struct NameSwapper; +/// +/// impl Replacer for NameSwapper { +/// fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { +/// dst.push_str(&caps["first"]); +/// dst.push_str(" "); +/// dst.push_str(&caps["last"]); +/// } +/// } +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); +/// let result = re.replace("Springsteen, Bruce", NameSwapper); +/// assert_eq!(result, "Bruce Springsteen"); +/// ``` +pub trait Replacer { + /// Appends possibly empty data to `dst` to replace the current match. + /// + /// The current match is represented by `caps`, which is guaranteed to + /// have a match at capture group `0`. + /// + /// For example, a no-op replacement would be `dst.push_str(&caps[0])`. + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); + + /// Return a fixed unchanging replacement string. + /// + /// When doing replacements, if access to [`Captures`] is not needed (e.g., + /// the replacement string does not need `$` expansion), then it can be + /// beneficial to avoid finding sub-captures. + /// + /// In general, this is called once for every call to a replacement routine + /// such as [`Regex::replace_all`]. + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { + None + } + + /// Returns a type that implements `Replacer`, but that borrows and wraps + /// this `Replacer`. + /// + /// This is useful when you want to take a generic `Replacer` (which might + /// not be cloneable) and use it without consuming it, so it can be used + /// more than once. + /// + /// # Example + /// + /// ``` + /// use regex::{Regex, Replacer}; + /// + /// fn replace_all_twice<R: Replacer>( + /// re: Regex, + /// src: &str, + /// mut rep: R, + /// ) -> String { + /// let dst = re.replace_all(src, rep.by_ref()); + /// let dst = re.replace_all(&dst, rep.by_ref()); + /// dst.into_owned() + /// } + /// ``` + fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { + ReplacerRef(self) + } +} + +impl<'a> Replacer for &'a str { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a String { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_str().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl Replacer for String { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_str().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for Cow<'a, str> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_ref().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Cow<'a, str> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_ref().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<F, T> Replacer for F +where + F: FnMut(&Captures<'_>) -> T, + T: AsRef<str>, +{ + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + dst.push_str((*self)(caps).as_ref()); + } +} + +/// A by-reference adaptor for a [`Replacer`]. +/// +/// This permits reusing the same `Replacer` value in multiple calls to a +/// replacement routine like [`Regex::replace_all`]. +/// +/// This type is created by [`Replacer::by_ref`]. +#[derive(Debug)] +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); + +impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.0.replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + self.0.no_expansion() + } +} + +/// A helper type for forcing literal string replacement. +/// +/// It can be used with routines like [`Regex::replace`] and +/// [`Regex::replace_all`] to do a literal string replacement without expanding +/// `$name` to their corresponding capture groups. This can be both convenient +/// (to avoid escaping `$`, for example) and faster (since capture groups +/// don't need to be found). +/// +/// `'s` is the lifetime of the literal string to use. +/// +/// # Example +/// +/// ``` +/// use regex::{NoExpand, Regex}; +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); +/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); +/// assert_eq!(result, "$2 $last"); +/// ``` +#[derive(Clone, Debug)] +pub struct NoExpand<'s>(pub &'s str); + +impl<'s> Replacer for NoExpand<'s> { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { + dst.push_str(self.0); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + Some(Cow::Borrowed(self.0)) + } +} + +/// Quickly checks the given replacement string for whether interpolation +/// should be done on it. It returns `None` if a `$` was found anywhere in the +/// given string, which suggests interpolation needs to be done. But if there's +/// no `$` anywhere, then interpolation definitely does not need to be done. In +/// that case, the given string is returned as a borrowed `Cow`. +/// +/// This is meant to be used to implement the `Replacer::no_expandsion` method +/// in its various trait impls. +fn no_expansion<T: AsRef<str>>(replacement: &T) -> Option<Cow<'_, str>> { + let replacement = replacement.as_ref(); + match crate::find_byte::find_byte(b'$', replacement.as_bytes()) { + Some(_) => None, + None => Some(Cow::Borrowed(replacement)), + } +} diff --git a/vendor/regex/src/regexset/bytes.rs b/vendor/regex/src/regexset/bytes.rs new file mode 100644 index 0000000..1220a14 --- /dev/null +++ b/vendor/regex/src/regexset/bytes.rs @@ -0,0 +1,710 @@ +use alloc::string::String; + +use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; + +use crate::{bytes::RegexSetBuilder, Error}; + +/// Match multiple, possibly overlapping, regexes in a single search. +/// +/// A regex set corresponds to the union of zero or more regular expressions. +/// That is, a regex set will match a haystack when at least one of its +/// constituent regexes matches. A regex set as its formulated here provides a +/// touch more power: it will also report *which* regular expressions in the +/// set match. Indeed, this is the key difference between regex sets and a +/// single `Regex` with many alternates, since only one alternate can match at +/// a time. +/// +/// For example, consider regular expressions to match email addresses and +/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a +/// regex set is constructed from those regexes, then searching the haystack +/// `foo@example.com` will report both regexes as matching. Of course, one +/// could accomplish this by compiling each regex on its own and doing two +/// searches over the haystack. The key advantage of using a regex set is +/// that it will report the matching regexes using a *single pass through the +/// haystack*. If one has hundreds or thousands of regexes to match repeatedly +/// (like a URL router for a complex web application or a user agent matcher), +/// then a regex set *can* realize huge performance gains. +/// +/// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet` +/// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this +/// `RegexSet` is permitted to match invalid UTF-8. +/// +/// # Limitations +/// +/// Regex sets are limited to answering the following two questions: +/// +/// 1. Does any regex in the set match? +/// 2. If so, which regexes in the set match? +/// +/// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask +/// (1) instead of (2) since the matching engines can stop after the first +/// match is found. +/// +/// You cannot directly extract [`Match`][crate::bytes::Match] or +/// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need +/// these operations, the recommended approach is to compile each pattern in +/// the set independently and scan the exact same haystack a second time with +/// those independently compiled patterns: +/// +/// ``` +/// use regex::bytes::{Regex, RegexSet}; +/// +/// let patterns = ["foo", "bar"]; +/// // Both patterns will match different ranges of this string. +/// let hay = b"barfoo"; +/// +/// // Compile a set matching any of our patterns. +/// let set = RegexSet::new(patterns).unwrap(); +/// // Compile each pattern independently. +/// let regexes: Vec<_> = set +/// .patterns() +/// .iter() +/// .map(|pat| Regex::new(pat).unwrap()) +/// .collect(); +/// +/// // Match against the whole set first and identify the individual +/// // matching patterns. +/// let matches: Vec<&[u8]> = set +/// .matches(hay) +/// .into_iter() +/// // Dereference the match index to get the corresponding +/// // compiled pattern. +/// .map(|index| ®exes[index]) +/// // To get match locations or any other info, we then have to search the +/// // exact same haystack again, using our separately-compiled pattern. +/// .map(|re| re.find(hay).unwrap().as_bytes()) +/// .collect(); +/// +/// // Matches arrive in the order the constituent patterns were declared, +/// // not the order they appear in the haystack. +/// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches); +/// ``` +/// +/// # Performance +/// +/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, +/// search takes `O(m * n)` time, where `m` is proportional to the size of the +/// regex set and `n` is proportional to the length of the haystack. +/// +/// # Trait implementations +/// +/// The `Default` trait is implemented for `RegexSet`. The default value +/// is an empty set. An empty set can also be explicitly constructed via +/// [`RegexSet::empty`]. +/// +/// # Example +/// +/// This shows how the above two regexes (for matching email addresses and +/// domains) might work: +/// +/// ``` +/// use regex::bytes::RegexSet; +/// +/// let set = RegexSet::new(&[ +/// r"[a-z]+@[a-z]+\.(com|org|net)", +/// r"[a-z]+\.(com|org|net)", +/// ]).unwrap(); +/// +/// // Ask whether any regexes in the set match. +/// assert!(set.is_match(b"foo@example.com")); +/// +/// // Identify which regexes in the set match. +/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect(); +/// assert_eq!(vec![0, 1], matches); +/// +/// // Try again, but with a haystack that only matches one of the regexes. +/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); +/// assert_eq!(vec![1], matches); +/// +/// // Try again, but with a haystack that doesn't match any regex in the set. +/// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); +/// assert!(matches.is_empty()); +/// ``` +/// +/// Note that it would be possible to adapt the above example to using `Regex` +/// with an expression like: +/// +/// ```text +/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) +/// ``` +/// +/// After a match, one could then inspect the capture groups to figure out +/// which alternates matched. The problem is that it is hard to make this +/// approach scale when there are many regexes since the overlap between each +/// alternate isn't always obvious to reason about. +#[derive(Clone)] +pub struct RegexSet { + pub(crate) meta: meta::Regex, + pub(crate) patterns: alloc::sync::Arc<[String]>, +} + +impl RegexSet { + /// Create a new regex set with the given regular expressions. + /// + /// This takes an iterator of `S`, where `S` is something that can produce + /// a `&str`. If any of the strings in the iterator are not valid regular + /// expressions, then an error is returned. + /// + /// # Example + /// + /// Create a new regex set from an iterator of strings: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match(b"foo")); + /// ``` + pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> + where + S: AsRef<str>, + I: IntoIterator<Item = S>, + { + RegexSetBuilder::new(exprs).build() + } + + /// Create a new empty regex set. + /// + /// An empty regex never matches anything. + /// + /// This is a convenience function for `RegexSet::new([])`, but doesn't + /// require one to specify the type of the input. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::empty(); + /// assert!(set.is_empty()); + /// // an empty set matches nothing + /// assert!(!set.is_match(b"")); + /// ``` + pub fn empty() -> RegexSet { + let empty: [&str; 0] = []; + RegexSetBuilder::new(empty).build().unwrap() + } + + /// Returns true if and only if one of the regexes in this set matches + /// the haystack given. + /// + /// This method should be preferred if you only need to test whether any + /// of the regexes in the set should match, but don't care about *which* + /// regexes matched. This is because the underlying matching engine will + /// quit immediately after seeing the first match instead of continuing to + /// find all matches. + /// + /// Note that as with searches using [`Regex`](crate::bytes::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests whether a set matches somewhere in a haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match(b"foo")); + /// assert!(!set.is_match("☃".as_bytes())); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &[u8]) -> bool { + self.is_match_at(haystack, 0) + } + + /// Returns true if and only if one of the regexes in this set matches the + /// haystack given, with the search starting at the offset given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start`. Namely, consider a + /// haystack `foobar` and a desire to execute a search starting at offset + /// `3`. You could search a substring explicitly, but then the look-around + /// assertions won't work correctly. Instead, you can use this method to + /// specify the start position of a search. + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = b"foobar"; + /// // We get a match here, but it's probably not intended. + /// assert!(set.is_match(&hay[3..])); + /// // No match because the assertions take the context into account. + /// assert!(!set.is_match_at(hay, 3)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// Note that as with searches using [`Regex`](crate::bytes::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect(); + /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); + /// + /// // You can also test whether a particular regex matched: + /// let matches = set.matches(b"foobar"); + /// assert!(!matches.matched(5)); + /// assert!(matches.matched(6)); + /// ``` + #[inline] + pub fn matches(&self, haystack: &[u8]) -> SetMatches { + self.matches_at(haystack, 0) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = b"foobar"; + /// // We get matches here, but it's probably not intended. + /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); + /// assert_eq!(matches, vec![0, 1]); + /// // No matches because the assertions take the context into account. + /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); + /// assert_eq!(matches, vec![]); + /// ``` + #[inline] + pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches { + let input = Input::new(haystack).span(start..haystack.len()); + let mut patset = PatternSet::new(self.meta.pattern_len()); + self.meta.which_overlapping_matches(&input, &mut patset); + SetMatches(patset) + } + + /// Returns the same as matches, but starts the search at the given + /// offset and stores the matches into the slice given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// `matches` must have a length that is at least the number of regexes + /// in this set. + /// + /// This method returns true if and only if at least one member of + /// `matches` is true after executing the set against `haystack`. + #[doc(hidden)] + #[inline] + pub fn matches_read_at( + &self, + matches: &mut [bool], + haystack: &[u8], + start: usize, + ) -> bool { + // This is pretty dumb. We should try to fix this, but the + // regex-automata API doesn't provide a way to store matches in an + // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // thus not public... But regex-capi currently uses it. We should + // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet + // is in regex-automata, not regex. So maybe we should just accept a + // 'SetMatches', which is basically just a newtype around PatternSet. + let mut patset = PatternSet::new(self.meta.pattern_len()); + let mut input = Input::new(haystack); + input.set_start(start); + self.meta.which_overlapping_matches(&input, &mut patset); + for pid in patset.iter() { + matches[pid] = true; + } + !patset.is_empty() + } + + /// An alias for `matches_read_at` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn read_matches_at( + &self, + matches: &mut [bool], + haystack: &[u8], + start: usize, + ) -> bool { + self.matches_read_at(matches, haystack, start) + } + + /// Returns the total number of regexes in this set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// assert_eq!(0, RegexSet::empty().len()); + /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); + /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.meta.pattern_len() + } + + /// Returns `true` if this set contains no regexes. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// assert!(RegexSet::empty().is_empty()); + /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.meta.pattern_len() == 0 + } + + /// Returns the regex patterns that this regex set was constructed from. + /// + /// This function can be used to determine the pattern for a match. The + /// slice returned has exactly as many patterns givens to this regex set, + /// and the order of the slice is the same as the order of the patterns + /// provided to the set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set + /// .matches(b"foobar") + /// .into_iter() + /// .map(|index| &set.patterns()[index]) + /// .collect(); + /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); + /// ``` + #[inline] + pub fn patterns(&self) -> &[String] { + &self.patterns + } +} + +impl Default for RegexSet { + fn default() -> Self { + RegexSet::empty() + } +} + +/// A set of matches returned by a regex set. +/// +/// Values of this type are constructed by [`RegexSet::matches`]. +#[derive(Clone, Debug)] +pub struct SetMatches(PatternSet); + +impl SetMatches { + /// Whether this set contains any matches. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"foo@example.com"); + /// assert!(matches.matched_any()); + /// ``` + #[inline] + pub fn matched_any(&self) -> bool { + !self.0.is_empty() + } + + /// Whether the regex at the given index matched. + /// + /// The index for a regex is determined by its insertion order upon the + /// initial construction of a `RegexSet`, starting at `0`. + /// + /// # Panics + /// + /// If `index` is greater than or equal to the number of regexes in the + /// original set that produced these matches. Equivalently, when `index` + /// is greater than or equal to [`SetMatches::len`]. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"example.com"); + /// assert!(!matches.matched(0)); + /// assert!(matches.matched(1)); + /// ``` + #[inline] + pub fn matched(&self, index: usize) -> bool { + self.0.contains(PatternID::new_unchecked(index)) + } + + /// The total number of regexes in the set that created these matches. + /// + /// **WARNING:** This always returns the same value as [`RegexSet::len`]. + /// In particular, it does *not* return the number of elements yielded by + /// [`SetMatches::iter`]. The only way to determine the total number of + /// matched regexes is to iterate over them. + /// + /// # Example + /// + /// Notice that this method returns the total number of regexes in the + /// original set, and *not* the total number of regexes that matched. + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"example.com"); + /// // Total number of patterns that matched. + /// assert_eq!(1, matches.iter().count()); + /// // Total number of patterns in the set. + /// assert_eq!(2, matches.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.0.capacity() + } + + /// Returns an iterator over the indices of the regexes that matched. + /// + /// This will always produces matches in ascending order, where the index + /// yielded corresponds to the index of the regex that matched with respect + /// to its position when initially building the set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1".as_bytes(); + /// let matches: Vec<_> = set.matches(hay).iter().collect(); + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + /// + /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so + /// this method is not always needed. For example: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1".as_bytes(); + /// let mut matches = vec![]; + /// for index in set.matches(hay) { + /// matches.push(index); + /// } + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + #[inline] + pub fn iter(&self) -> SetMatchesIter<'_> { + SetMatchesIter(self.0.iter()) + } +} + +impl IntoIterator for SetMatches { + type IntoIter = SetMatchesIntoIter; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + let it = 0..self.0.capacity(); + SetMatchesIntoIter { patset: self.0, it } + } +} + +impl<'a> IntoIterator for &'a SetMatches { + type IntoIter = SetMatchesIter<'a>; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An owned iterator over the set of matches from a regex set. +/// +/// This will always produces matches in ascending order of index, where the +/// index corresponds to the index of the regex that matched with respect to +/// its position when initially building the set. +/// +/// This iterator is created by calling `SetMatches::into_iter` via the +/// `IntoIterator` trait. This is automatically done in `for` loops. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::RegexSet; +/// +/// let set = RegexSet::new([ +/// r"[0-9]", +/// r"[a-z]", +/// r"[A-Z]", +/// r"\p{Greek}", +/// ]).unwrap(); +/// let hay = "βa1".as_bytes(); +/// let mut matches = vec![]; +/// for index in set.matches(hay) { +/// matches.push(index); +/// } +/// assert_eq!(matches, vec![0, 1, 3]); +/// ``` +#[derive(Debug)] +pub struct SetMatchesIntoIter { + patset: PatternSet, + it: core::ops::Range<usize>, +} + +impl Iterator for SetMatchesIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + loop { + let id = self.it.next()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl DoubleEndedIterator for SetMatchesIntoIter { + fn next_back(&mut self) -> Option<usize> { + loop { + let id = self.it.next_back()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } +} + +impl core::iter::FusedIterator for SetMatchesIntoIter {} + +/// A borrowed iterator over the set of matches from a regex set. +/// +/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that +/// created this iterator. +/// +/// This will always produces matches in ascending order, where the index +/// corresponds to the index of the regex that matched with respect to its +/// position when initially building the set. +/// +/// This iterator is created by the [`SetMatches::iter`] method. +#[derive(Clone, Debug)] +pub struct SetMatchesIter<'a>(PatternSetIter<'a>); + +impl<'a> Iterator for SetMatchesIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + self.0.next().map(|pid| pid.as_usize()) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } +} + +impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { + fn next_back(&mut self) -> Option<usize> { + self.0.next_back().map(|pid| pid.as_usize()) + } +} + +impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} + +impl core::fmt::Debug for RegexSet { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "RegexSet({:?})", self.patterns()) + } +} diff --git a/vendor/regex/src/regexset/mod.rs b/vendor/regex/src/regexset/mod.rs new file mode 100644 index 0000000..93fadec --- /dev/null +++ b/vendor/regex/src/regexset/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod bytes; +pub(crate) mod string; diff --git a/vendor/regex/src/regexset/string.rs b/vendor/regex/src/regexset/string.rs new file mode 100644 index 0000000..2a3e7b8 --- /dev/null +++ b/vendor/regex/src/regexset/string.rs @@ -0,0 +1,706 @@ +use alloc::string::String; + +use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; + +use crate::{Error, RegexSetBuilder}; + +/// Match multiple, possibly overlapping, regexes in a single search. +/// +/// A regex set corresponds to the union of zero or more regular expressions. +/// That is, a regex set will match a haystack when at least one of its +/// constituent regexes matches. A regex set as its formulated here provides a +/// touch more power: it will also report *which* regular expressions in the +/// set match. Indeed, this is the key difference between regex sets and a +/// single `Regex` with many alternates, since only one alternate can match at +/// a time. +/// +/// For example, consider regular expressions to match email addresses and +/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a +/// regex set is constructed from those regexes, then searching the haystack +/// `foo@example.com` will report both regexes as matching. Of course, one +/// could accomplish this by compiling each regex on its own and doing two +/// searches over the haystack. The key advantage of using a regex set is +/// that it will report the matching regexes using a *single pass through the +/// haystack*. If one has hundreds or thousands of regexes to match repeatedly +/// (like a URL router for a complex web application or a user agent matcher), +/// then a regex set *can* realize huge performance gains. +/// +/// # Limitations +/// +/// Regex sets are limited to answering the following two questions: +/// +/// 1. Does any regex in the set match? +/// 2. If so, which regexes in the set match? +/// +/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1) +/// instead of (2) since the matching engines can stop after the first match +/// is found. +/// +/// You cannot directly extract [`Match`][crate::Match] or +/// [`Captures`][crate::Captures] objects from a regex set. If you need these +/// operations, the recommended approach is to compile each pattern in the set +/// independently and scan the exact same haystack a second time with those +/// independently compiled patterns: +/// +/// ``` +/// use regex::{Regex, RegexSet}; +/// +/// let patterns = ["foo", "bar"]; +/// // Both patterns will match different ranges of this string. +/// let hay = "barfoo"; +/// +/// // Compile a set matching any of our patterns. +/// let set = RegexSet::new(patterns).unwrap(); +/// // Compile each pattern independently. +/// let regexes: Vec<_> = set +/// .patterns() +/// .iter() +/// .map(|pat| Regex::new(pat).unwrap()) +/// .collect(); +/// +/// // Match against the whole set first and identify the individual +/// // matching patterns. +/// let matches: Vec<&str> = set +/// .matches(hay) +/// .into_iter() +/// // Dereference the match index to get the corresponding +/// // compiled pattern. +/// .map(|index| ®exes[index]) +/// // To get match locations or any other info, we then have to search the +/// // exact same haystack again, using our separately-compiled pattern. +/// .map(|re| re.find(hay).unwrap().as_str()) +/// .collect(); +/// +/// // Matches arrive in the order the constituent patterns were declared, +/// // not the order they appear in the haystack. +/// assert_eq!(vec!["foo", "bar"], matches); +/// ``` +/// +/// # Performance +/// +/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, +/// search takes `O(m * n)` time, where `m` is proportional to the size of the +/// regex set and `n` is proportional to the length of the haystack. +/// +/// # Trait implementations +/// +/// The `Default` trait is implemented for `RegexSet`. The default value +/// is an empty set. An empty set can also be explicitly constructed via +/// [`RegexSet::empty`]. +/// +/// # Example +/// +/// This shows how the above two regexes (for matching email addresses and +/// domains) might work: +/// +/// ``` +/// use regex::RegexSet; +/// +/// let set = RegexSet::new(&[ +/// r"[a-z]+@[a-z]+\.(com|org|net)", +/// r"[a-z]+\.(com|org|net)", +/// ]).unwrap(); +/// +/// // Ask whether any regexes in the set match. +/// assert!(set.is_match("foo@example.com")); +/// +/// // Identify which regexes in the set match. +/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect(); +/// assert_eq!(vec![0, 1], matches); +/// +/// // Try again, but with a haystack that only matches one of the regexes. +/// let matches: Vec<_> = set.matches("example.com").into_iter().collect(); +/// assert_eq!(vec![1], matches); +/// +/// // Try again, but with a haystack that doesn't match any regex in the set. +/// let matches: Vec<_> = set.matches("example").into_iter().collect(); +/// assert!(matches.is_empty()); +/// ``` +/// +/// Note that it would be possible to adapt the above example to using `Regex` +/// with an expression like: +/// +/// ```text +/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) +/// ``` +/// +/// After a match, one could then inspect the capture groups to figure out +/// which alternates matched. The problem is that it is hard to make this +/// approach scale when there are many regexes since the overlap between each +/// alternate isn't always obvious to reason about. +#[derive(Clone)] +pub struct RegexSet { + pub(crate) meta: meta::Regex, + pub(crate) patterns: alloc::sync::Arc<[String]>, +} + +impl RegexSet { + /// Create a new regex set with the given regular expressions. + /// + /// This takes an iterator of `S`, where `S` is something that can produce + /// a `&str`. If any of the strings in the iterator are not valid regular + /// expressions, then an error is returned. + /// + /// # Example + /// + /// Create a new regex set from an iterator of strings: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match("foo")); + /// ``` + pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> + where + S: AsRef<str>, + I: IntoIterator<Item = S>, + { + RegexSetBuilder::new(exprs).build() + } + + /// Create a new empty regex set. + /// + /// An empty regex never matches anything. + /// + /// This is a convenience function for `RegexSet::new([])`, but doesn't + /// require one to specify the type of the input. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::empty(); + /// assert!(set.is_empty()); + /// // an empty set matches nothing + /// assert!(!set.is_match("")); + /// ``` + pub fn empty() -> RegexSet { + let empty: [&str; 0] = []; + RegexSetBuilder::new(empty).build().unwrap() + } + + /// Returns true if and only if one of the regexes in this set matches + /// the haystack given. + /// + /// This method should be preferred if you only need to test whether any + /// of the regexes in the set should match, but don't care about *which* + /// regexes matched. This is because the underlying matching engine will + /// quit immediately after seeing the first match instead of continuing to + /// find all matches. + /// + /// Note that as with searches using [`Regex`](crate::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests whether a set matches somewhere in a haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match("foo")); + /// assert!(!set.is_match("☃")); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &str) -> bool { + self.is_match_at(haystack, 0) + } + + /// Returns true if and only if one of the regexes in this set matches the + /// haystack given, with the search starting at the offset given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start`. Namely, consider a + /// haystack `foobar` and a desire to execute a search starting at offset + /// `3`. You could search a substring explicitly, but then the look-around + /// assertions won't work correctly. Instead, you can use this method to + /// specify the start position of a search. + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = "foobar"; + /// // We get a match here, but it's probably not intended. + /// assert!(set.is_match(&hay[3..])); + /// // No match because the assertions take the context into account. + /// assert!(!set.is_match_at(hay, 3)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &str, start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// Note that as with searches using [`Regex`](crate::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set.matches("foobar").into_iter().collect(); + /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); + /// + /// // You can also test whether a particular regex matched: + /// let matches = set.matches("foobar"); + /// assert!(!matches.matched(5)); + /// assert!(matches.matched(6)); + /// ``` + #[inline] + pub fn matches(&self, haystack: &str) -> SetMatches { + self.matches_at(haystack, 0) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = "foobar"; + /// // We get matches here, but it's probably not intended. + /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); + /// assert_eq!(matches, vec![0, 1]); + /// // No matches because the assertions take the context into account. + /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); + /// assert_eq!(matches, vec![]); + /// ``` + #[inline] + pub fn matches_at(&self, haystack: &str, start: usize) -> SetMatches { + let input = Input::new(haystack).span(start..haystack.len()); + let mut patset = PatternSet::new(self.meta.pattern_len()); + self.meta.which_overlapping_matches(&input, &mut patset); + SetMatches(patset) + } + + /// Returns the same as matches, but starts the search at the given + /// offset and stores the matches into the slice given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// `matches` must have a length that is at least the number of regexes + /// in this set. + /// + /// This method returns true if and only if at least one member of + /// `matches` is true after executing the set against `haystack`. + #[doc(hidden)] + #[inline] + pub fn matches_read_at( + &self, + matches: &mut [bool], + haystack: &str, + start: usize, + ) -> bool { + // This is pretty dumb. We should try to fix this, but the + // regex-automata API doesn't provide a way to store matches in an + // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // thus not public... But regex-capi currently uses it. We should + // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet + // is in regex-automata, not regex. So maybe we should just accept a + // 'SetMatches', which is basically just a newtype around PatternSet. + let mut patset = PatternSet::new(self.meta.pattern_len()); + let mut input = Input::new(haystack); + input.set_start(start); + self.meta.which_overlapping_matches(&input, &mut patset); + for pid in patset.iter() { + matches[pid] = true; + } + !patset.is_empty() + } + + /// An alias for `matches_read_at` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn read_matches_at( + &self, + matches: &mut [bool], + haystack: &str, + start: usize, + ) -> bool { + self.matches_read_at(matches, haystack, start) + } + + /// Returns the total number of regexes in this set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// assert_eq!(0, RegexSet::empty().len()); + /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); + /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.meta.pattern_len() + } + + /// Returns `true` if this set contains no regexes. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// assert!(RegexSet::empty().is_empty()); + /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.meta.pattern_len() == 0 + } + + /// Returns the regex patterns that this regex set was constructed from. + /// + /// This function can be used to determine the pattern for a match. The + /// slice returned has exactly as many patterns givens to this regex set, + /// and the order of the slice is the same as the order of the patterns + /// provided to the set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set + /// .matches("foobar") + /// .into_iter() + /// .map(|index| &set.patterns()[index]) + /// .collect(); + /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); + /// ``` + #[inline] + pub fn patterns(&self) -> &[String] { + &self.patterns + } +} + +impl Default for RegexSet { + fn default() -> Self { + RegexSet::empty() + } +} + +/// A set of matches returned by a regex set. +/// +/// Values of this type are constructed by [`RegexSet::matches`]. +#[derive(Clone, Debug)] +pub struct SetMatches(PatternSet); + +impl SetMatches { + /// Whether this set contains any matches. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("foo@example.com"); + /// assert!(matches.matched_any()); + /// ``` + #[inline] + pub fn matched_any(&self) -> bool { + !self.0.is_empty() + } + + /// Whether the regex at the given index matched. + /// + /// The index for a regex is determined by its insertion order upon the + /// initial construction of a `RegexSet`, starting at `0`. + /// + /// # Panics + /// + /// If `index` is greater than or equal to the number of regexes in the + /// original set that produced these matches. Equivalently, when `index` + /// is greater than or equal to [`SetMatches::len`]. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("example.com"); + /// assert!(!matches.matched(0)); + /// assert!(matches.matched(1)); + /// ``` + #[inline] + pub fn matched(&self, index: usize) -> bool { + self.0.contains(PatternID::new_unchecked(index)) + } + + /// The total number of regexes in the set that created these matches. + /// + /// **WARNING:** This always returns the same value as [`RegexSet::len`]. + /// In particular, it does *not* return the number of elements yielded by + /// [`SetMatches::iter`]. The only way to determine the total number of + /// matched regexes is to iterate over them. + /// + /// # Example + /// + /// Notice that this method returns the total number of regexes in the + /// original set, and *not* the total number of regexes that matched. + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("example.com"); + /// // Total number of patterns that matched. + /// assert_eq!(1, matches.iter().count()); + /// // Total number of patterns in the set. + /// assert_eq!(2, matches.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.0.capacity() + } + + /// Returns an iterator over the indices of the regexes that matched. + /// + /// This will always produces matches in ascending order, where the index + /// yielded corresponds to the index of the regex that matched with respect + /// to its position when initially building the set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1"; + /// let matches: Vec<_> = set.matches(hay).iter().collect(); + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + /// + /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so + /// this method is not always needed. For example: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1"; + /// let mut matches = vec![]; + /// for index in set.matches(hay) { + /// matches.push(index); + /// } + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + #[inline] + pub fn iter(&self) -> SetMatchesIter<'_> { + SetMatchesIter(self.0.iter()) + } +} + +impl IntoIterator for SetMatches { + type IntoIter = SetMatchesIntoIter; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + let it = 0..self.0.capacity(); + SetMatchesIntoIter { patset: self.0, it } + } +} + +impl<'a> IntoIterator for &'a SetMatches { + type IntoIter = SetMatchesIter<'a>; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An owned iterator over the set of matches from a regex set. +/// +/// This will always produces matches in ascending order of index, where the +/// index corresponds to the index of the regex that matched with respect to +/// its position when initially building the set. +/// +/// This iterator is created by calling `SetMatches::into_iter` via the +/// `IntoIterator` trait. This is automatically done in `for` loops. +/// +/// # Example +/// +/// ``` +/// use regex::RegexSet; +/// +/// let set = RegexSet::new([ +/// r"[0-9]", +/// r"[a-z]", +/// r"[A-Z]", +/// r"\p{Greek}", +/// ]).unwrap(); +/// let hay = "βa1"; +/// let mut matches = vec![]; +/// for index in set.matches(hay) { +/// matches.push(index); +/// } +/// assert_eq!(matches, vec![0, 1, 3]); +/// ``` +#[derive(Debug)] +pub struct SetMatchesIntoIter { + patset: PatternSet, + it: core::ops::Range<usize>, +} + +impl Iterator for SetMatchesIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + loop { + let id = self.it.next()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl DoubleEndedIterator for SetMatchesIntoIter { + fn next_back(&mut self) -> Option<usize> { + loop { + let id = self.it.next_back()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } +} + +impl core::iter::FusedIterator for SetMatchesIntoIter {} + +/// A borrowed iterator over the set of matches from a regex set. +/// +/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that +/// created this iterator. +/// +/// This will always produces matches in ascending order, where the index +/// corresponds to the index of the regex that matched with respect to its +/// position when initially building the set. +/// +/// This iterator is created by the [`SetMatches::iter`] method. +#[derive(Clone, Debug)] +pub struct SetMatchesIter<'a>(PatternSetIter<'a>); + +impl<'a> Iterator for SetMatchesIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + self.0.next().map(|pid| pid.as_usize()) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } +} + +impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { + fn next_back(&mut self) -> Option<usize> { + self.0.next_back().map(|pid| pid.as_usize()) + } +} + +impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} + +impl core::fmt::Debug for RegexSet { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "RegexSet({:?})", self.patterns()) + } +} diff --git a/vendor/regex/src/sparse.rs b/vendor/regex/src/sparse.rs deleted file mode 100644 index 98b7266..0000000 --- a/vendor/regex/src/sparse.rs +++ /dev/null @@ -1,84 +0,0 @@ -use std::fmt; -use std::ops::Deref; -use std::slice; - -/// A sparse set used for representing ordered NFA states. -/// -/// This supports constant time addition and membership testing. Clearing an -/// entire set can also be done in constant time. Iteration yields elements -/// in the order in which they were inserted. -/// -/// The data structure is based on: https://research.swtch.com/sparse -/// Note though that we don't actually use uninitialized memory. We generally -/// reuse allocations, so the initial allocation cost is bareable. However, -/// its other properties listed above are extremely useful. -#[derive(Clone)] -pub struct SparseSet { - /// Dense contains the instruction pointers in the order in which they - /// were inserted. - dense: Vec<usize>, - /// Sparse maps instruction pointers to their location in dense. - /// - /// An instruction pointer is in the set if and only if - /// sparse[ip] < dense.len() && ip == dense[sparse[ip]]. - sparse: Box<[usize]>, -} - -impl SparseSet { - pub fn new(size: usize) -> SparseSet { - SparseSet { - dense: Vec::with_capacity(size), - sparse: vec![0; size].into_boxed_slice(), - } - } - - pub fn len(&self) -> usize { - self.dense.len() - } - - pub fn is_empty(&self) -> bool { - self.dense.is_empty() - } - - pub fn capacity(&self) -> usize { - self.dense.capacity() - } - - pub fn insert(&mut self, value: usize) { - let i = self.len(); - assert!(i < self.capacity()); - self.dense.push(value); - self.sparse[value] = i; - } - - pub fn contains(&self, value: usize) -> bool { - let i = self.sparse[value]; - self.dense.get(i) == Some(&value) - } - - pub fn clear(&mut self) { - self.dense.clear(); - } -} - -impl fmt::Debug for SparseSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "SparseSet({:?})", self.dense) - } -} - -impl Deref for SparseSet { - type Target = [usize]; - - fn deref(&self) -> &Self::Target { - &self.dense - } -} - -impl<'a> IntoIterator for &'a SparseSet { - type Item = &'a usize; - type IntoIter = slice::Iter<'a, usize>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} diff --git a/vendor/regex/src/testdata/LICENSE b/vendor/regex/src/testdata/LICENSE deleted file mode 100644 index f47dbf4..0000000 --- a/vendor/regex/src/testdata/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -The following license covers testregex.c and all associated test data. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, and/or sell copies of the -Software, and to permit persons to whom the Software is furnished to do -so, subject to the following disclaimer: - -THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/regex/src/testdata/README b/vendor/regex/src/testdata/README deleted file mode 100644 index 6efc2da..0000000 --- a/vendor/regex/src/testdata/README +++ /dev/null @@ -1,17 +0,0 @@ -Test data was taken from the Go distribution, which was in turn taken from the -testregex test suite: - - http://www2.research.att.com/~astopen/testregex/testregex.html - -The LICENSE in this directory corresponds to the LICENSE that the data was -released under. - -The tests themselves were modified for RE2/Go. A couple were modified further -by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. -(Yes, it seems like RE2/Go includes failing test cases.) This may or may not -have been a bad idea, but I think being consistent with an established Regex -library is worth something. - -Note that these files are read by 'scripts/regex-match-tests.py' and turned -into Rust tests found in 'regex_macros/tests/matches.rs'. - diff --git a/vendor/regex/src/utf8.rs b/vendor/regex/src/utf8.rs deleted file mode 100644 index 2dfd2c0..0000000 --- a/vendor/regex/src/utf8.rs +++ /dev/null @@ -1,264 +0,0 @@ -/// A few elementary UTF-8 encoding and decoding functions used by the matching -/// engines. -/// -/// In an ideal world, the matching engines operate on `&str` and we can just -/// lean on the standard library for all our UTF-8 needs. However, to support -/// byte based regexes (that can match on arbitrary bytes which may contain -/// UTF-8), we need to be capable of searching and decoding UTF-8 on a `&[u8]`. -/// The standard library doesn't really recognize this use case, so we have -/// to build it out ourselves. -/// -/// Should this be factored out into a separate crate? It seems independently -/// useful. There are other crates that already exist (e.g., `utf-8`) that have -/// overlapping use cases. Not sure what to do. -use std::char; - -const TAG_CONT: u8 = 0b1000_0000; -const TAG_TWO: u8 = 0b1100_0000; -const TAG_THREE: u8 = 0b1110_0000; -const TAG_FOUR: u8 = 0b1111_0000; - -/// Returns the smallest possible index of the next valid UTF-8 sequence -/// starting after `i`. -pub fn next_utf8(text: &[u8], i: usize) -> usize { - let b = match text.get(i) { - None => return i + 1, - Some(&b) => b, - }; - let inc = if b <= 0x7F { - 1 - } else if b <= 0b110_11111 { - 2 - } else if b <= 0b1110_1111 { - 3 - } else { - 4 - }; - i + inc -} - -/// Decode a single UTF-8 sequence into a single Unicode codepoint from `src`. -/// -/// If no valid UTF-8 sequence could be found, then `None` is returned. -/// Otherwise, the decoded codepoint and the number of bytes read is returned. -/// The number of bytes read (for a valid UTF-8 sequence) is guaranteed to be -/// 1, 2, 3 or 4. -/// -/// Note that a UTF-8 sequence is invalid if it is incorrect UTF-8, encodes a -/// codepoint that is out of range (surrogate codepoints are out of range) or -/// is not the shortest possible UTF-8 sequence for that codepoint. -#[inline] -pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> { - let b0 = match src.get(0) { - None => return None, - Some(&b) if b <= 0x7F => return Some((b as char, 1)), - Some(&b) => b, - }; - match b0 { - 0b110_00000..=0b110_11111 => { - if src.len() < 2 { - return None; - } - let b1 = src[1]; - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_TWO) as u32) << 6 | ((b1 & !TAG_CONT) as u32); - match cp { - 0x80..=0x7FF => char::from_u32(cp).map(|cp| (cp, 2)), - _ => None, - } - } - 0b1110_0000..=0b1110_1111 => { - if src.len() < 3 { - return None; - } - let (b1, b2) = (src[1], src[2]); - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - if 0b11_000000 & b2 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_THREE) as u32) << 12 - | ((b1 & !TAG_CONT) as u32) << 6 - | ((b2 & !TAG_CONT) as u32); - match cp { - // char::from_u32 will disallow surrogate codepoints. - 0x800..=0xFFFF => char::from_u32(cp).map(|cp| (cp, 3)), - _ => None, - } - } - 0b11110_000..=0b11110_111 => { - if src.len() < 4 { - return None; - } - let (b1, b2, b3) = (src[1], src[2], src[3]); - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - if 0b11_000000 & b2 != TAG_CONT { - return None; - } - if 0b11_000000 & b3 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_FOUR) as u32) << 18 - | ((b1 & !TAG_CONT) as u32) << 12 - | ((b2 & !TAG_CONT) as u32) << 6 - | ((b3 & !TAG_CONT) as u32); - match cp { - 0x10000..=0x0010_FFFF => char::from_u32(cp).map(|cp| (cp, 4)), - _ => None, - } - } - _ => None, - } -} - -/// Like `decode_utf8`, but decodes the last UTF-8 sequence in `src` instead -/// of the first. -pub fn decode_last_utf8(src: &[u8]) -> Option<(char, usize)> { - if src.is_empty() { - return None; - } - let mut start = src.len() - 1; - if src[start] <= 0x7F { - return Some((src[start] as char, 1)); - } - while start > src.len().saturating_sub(4) { - start -= 1; - if is_start_byte(src[start]) { - break; - } - } - match decode_utf8(&src[start..]) { - None => None, - Some((_, n)) if n < src.len() - start => None, - Some((cp, n)) => Some((cp, n)), - } -} - -fn is_start_byte(b: u8) -> bool { - b & 0b11_000000 != 0b1_0000000 -} - -#[cfg(test)] -mod tests { - use std::str; - - use quickcheck::quickcheck; - - use super::{ - decode_last_utf8, decode_utf8, TAG_CONT, TAG_FOUR, TAG_THREE, TAG_TWO, - }; - - #[test] - fn prop_roundtrip() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let encoded_len = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, got_len) = decode_utf8(&tmp[..encoded_len]).unwrap(); - encoded_len == got_len && given_cp == got_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_roundtrip_last() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let encoded_len = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, got_len) = - decode_last_utf8(&tmp[..encoded_len]).unwrap(); - encoded_len == got_len && given_cp == got_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_encode_matches_std() { - fn p(cp: char) -> bool { - let mut got = [0; 4]; - let n = cp.encode_utf8(&mut got).len(); - let expected = cp.to_string(); - &got[..n] == expected.as_bytes() - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_decode_matches_std() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let n = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, _) = decode_utf8(&tmp[..n]).unwrap(); - let expected_cp = - str::from_utf8(&tmp[..n]).unwrap().chars().next().unwrap(); - got_cp == expected_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_decode_last_matches_std() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let n = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, _) = decode_last_utf8(&tmp[..n]).unwrap(); - let expected_cp = str::from_utf8(&tmp[..n]) - .unwrap() - .chars() - .rev() - .next() - .unwrap(); - got_cp == expected_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn reject_invalid() { - // Invalid start byte - assert_eq!(decode_utf8(&[0xFF]), None); - // Surrogate pair - assert_eq!(decode_utf8(&[0xED, 0xA0, 0x81]), None); - // Invalid continuation byte. - assert_eq!(decode_utf8(&[0xD4, 0xC2]), None); - // Bad lengths - assert_eq!(decode_utf8(&[0xC3]), None); // 2 bytes - assert_eq!(decode_utf8(&[0xEF, 0xBF]), None); // 3 bytes - assert_eq!(decode_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes - // Not a minimal UTF-8 sequence - assert_eq!(decode_utf8(&[TAG_TWO, TAG_CONT | b'a']), None); - assert_eq!(decode_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a']), None); - assert_eq!( - decode_utf8(&[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',]), - None - ); - } - - #[test] - fn reject_invalid_last() { - // Invalid start byte - assert_eq!(decode_last_utf8(&[0xFF]), None); - // Surrogate pair - assert_eq!(decode_last_utf8(&[0xED, 0xA0, 0x81]), None); - // Bad lengths - assert_eq!(decode_last_utf8(&[0xC3]), None); // 2 bytes - assert_eq!(decode_last_utf8(&[0xEF, 0xBF]), None); // 3 bytes - assert_eq!(decode_last_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes - // Not a minimal UTF-8 sequence - assert_eq!(decode_last_utf8(&[TAG_TWO, TAG_CONT | b'a']), None); - assert_eq!( - decode_last_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a',]), - None - ); - assert_eq!( - decode_last_utf8( - &[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',] - ), - None - ); - } -} diff --git a/vendor/regex/test b/vendor/regex/test index b10564f..48224c6 100755 --- a/vendor/regex/test +++ b/vendor/regex/test @@ -2,14 +2,23 @@ set -e +# cd to the directory containing this crate's Cargo.toml so that we don't need +# to pass --manifest-path to every `cargo` command. +cd "$(dirname "$0")" + # This is a convenience script for running a broad swath of tests across # features. We don't test the complete space, since the complete space is quite # large. Hopefully once we migrate the test suite to better infrastructure # (like regex-automata), we'll be able to test more of the space. -echo "===== DEFAULT FEATURES ===" +echo "===== DEFAULT FEATURES =====" cargo test -echo "===== DOC TESTS ===" +# no-std mode is annoyingly difficult to test. Currently, the integration tests +# don't run. So for now, we just test that library tests run. (There aren't +# many because `regex` is just a wrapper crate.) +cargo test --no-default-features --lib + +echo "===== DOC TESTS =====" cargo test --doc features=( @@ -21,10 +30,17 @@ features=( "std perf-dfa" "std perf-inline" "std perf-literal" + "std perf-dfa-full" + "std perf-onepass" + "std perf-backtrack" ) for f in "${features[@]}"; do - echo "===== FEATURE: $f (default) ===" - cargo test --test default --no-default-features --features "$f" - echo "===== FEATURE: $f (default-bytes) ===" - cargo test --test default-bytes --no-default-features --features "$f" + echo "===== FEATURE: $f =====" + cargo test --test integration --no-default-features --features "$f" done + +# And test the probably-forever-nightly-only 'pattern' feature... +if rustc --version | grep -q nightly; then + echo "===== FEATURE: std,pattern,unicode-perl =====" + cargo test --test integration --no-default-features --features std,pattern,unicode-perl +fi diff --git a/vendor/regex/testdata/README.md b/vendor/regex/testdata/README.md new file mode 100644 index 0000000..c3bc1ac --- /dev/null +++ b/vendor/regex/testdata/README.md @@ -0,0 +1,22 @@ +This directory contains a large suite of regex tests defined in a TOML format. +They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs` +and `regex-lite/tests/lib.rs`. + +See the [`regex-test`][regex-test] crate documentation for an explanation of +the format and how it generates tests. + +The basic idea here is that we have many different regex engines but generally +one set of tests. We want to be able to run those tests (or most of them) on +every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup +of macros and a different test executable for each engine. It overall took a +longer time to compile, was harder to maintain and it made the test definitions +themselves less clear. + +In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot +worse because of an increase in the number of engines. So I devised an engine +independent format for testing regex patterns and their semantics. + +Note: the naming scheme used in these tests isn't terribly consistent. It would +be great to fix that. + +[regex-test]: https://docs.rs/regex-test diff --git a/vendor/regex/testdata/anchored.toml b/vendor/regex/testdata/anchored.toml new file mode 100644 index 0000000..0f2248d --- /dev/null +++ b/vendor/regex/testdata/anchored.toml @@ -0,0 +1,127 @@ +# These tests are specifically geared toward searches with 'anchored = true'. +# While they are interesting in their own right, they are particularly +# important for testing the one-pass DFA since the one-pass DFA can't work in +# unanchored contexts. +# +# Note that "anchored" in this context does not mean "^". Anchored searches are +# searches whose matches must begin at the start of the search, which may not +# be at the start of the haystack. That's why anchored searches---and there are +# some examples below---can still report multiple matches. This occurs when the +# matches are adjacent to one another. + +[[test]] +name = "greedy" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true + +# When a "earliest" search is used, greediness doesn't really exist because +# matches are reported as soon as they are known. +[[test]] +name = "greedy-earliest" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true +search-kind = "earliest" + +[[test]] +name = "nongreedy" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true + +# When "all" semantics are used, non-greediness doesn't exist since the longest +# possible match is always taken. +[[test]] +name = "nongreedy-all" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true +match-kind = "all" + +[[test]] +name = "word-boundary-unicode-01" +regex = '\b\w+\b' +haystack = 'βββ☃' +matches = [[0, 6]] +anchored = true + +[[test]] +name = "word-boundary-nounicode-01" +regex = '\b\w+\b' +haystack = 'abcβ' +matches = [[0, 3]] +anchored = true +unicode = false + +# Tests that '.c' doesn't match 'abc' when performing an anchored search from +# the beginning of the haystack. This test found two different bugs in the +# PikeVM and the meta engine. +[[test]] +name = "no-match-at-start" +regex = '.c' +haystack = 'abc' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-bounds" +regex = '.c' +haystack = 'aabc' +bounds = [1, 4] +matches = [] +anchored = true + +# This is like no-match-at-start, but hits the "reverse inner" optimization +# inside the meta engine. (no-match-at-start hits the "reverse suffix" +# optimization.) +[[test]] +name = "no-match-at-start-reverse-inner" +regex = '.c[a-z]' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-inner-bounds" +regex = '.c[a-z]' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true + +# Same as no-match-at-start, but applies to the meta engine's "reverse +# anchored" optimization. +[[test]] +name = "no-match-at-start-reverse-anchored" +regex = '.c[a-z]$' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-anchored-bounds" +regex = '.c[a-z]$' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true diff --git a/vendor/regex/testdata/bytes.toml b/vendor/regex/testdata/bytes.toml new file mode 100644 index 0000000..346e369 --- /dev/null +++ b/vendor/regex/testdata/bytes.toml @@ -0,0 +1,235 @@ +# These are tests specifically crafted for regexes that can match arbitrary +# bytes. In some cases, we also test the Unicode variant as well, just because +# it's good sense to do so. But also, these tests aren't really about Unicode, +# but whether matches are only reported at valid UTF-8 boundaries. For most +# tests in this entire collection, utf8 = true. But for these tests, we use +# utf8 = false. + +[[test]] +name = "word-boundary-ascii" +regex = ' \b' +haystack = " δ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode" +regex = ' \b' +haystack = " δ" +matches = [[0, 1]] +unicode = true +utf8 = false + +[[test]] +name = "word-boundary-ascii-not" +regex = ' \B' +haystack = " δ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode-not" +regex = ' \B' +haystack = " δ" +matches = [] +unicode = true +utf8 = false + +[[test]] +name = "perl-word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] +unicode = true +utf8 = false + +[[test]] +name = "perl-decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false +utf8 = false + +[[test]] +name = "perl-decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] +unicode = true +utf8 = false + +[[test]] +name = "perl-whitespace-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-whitespace-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] +unicode = true +utf8 = false + +# The first `(.+)` matches two Unicode codepoints, but can't match the 5th +# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and +# matches. +[[test]] +name = "mixed-dot" +regex = '(.+)(?-u)(.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [ + [[0, 5], [0, 4], [4, 5]], +] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "case-one-ascii" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-one-unicode" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "case-class-simple-ascii" +regex = '[a-z]+' +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-ascii" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-unicode" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "negate-ascii" +regex = '[^a]' +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "negate-unicode" +regex = '[^a]' +haystack = "δ" +matches = [[0, 2]] +unicode = true +utf8 = false + +# When utf8=true, this won't match, because the implicit '.*?' prefix is +# Unicode aware and will refuse to match through invalid UTF-8 bytes. +[[test]] +name = "dotstar-prefix-ascii" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "dotstar-prefix-unicode" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "null-bytes" +regex = '(?P<cstr>[^\x00]+)\x00' +haystack = 'foo\x00' +matches = [ + [[0, 4], [0, 3]], +] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-100" +regex = '\xCC?^' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-200" +regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[22, 22]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-300" +regex = '^|ddp\xff\xffdddddlQd@\x80' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-100" +regex = '\Bx\B' +haystack = "áxβ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-200" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/crazy.toml b/vendor/regex/testdata/crazy.toml new file mode 100644 index 0000000..aed46ea --- /dev/null +++ b/vendor/regex/testdata/crazy.toml @@ -0,0 +1,315 @@ +[[test]] +name = "nothing-empty" +regex = [] +haystack = "" +matches = [] + +[[test]] +name = "nothing-something" +regex = [] +haystack = "wat" +matches = [] + +[[test]] +name = "ranges" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 255" +matches = [[5, 8]] + +[[test]] +name = "ranges-not" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 256" +matches = [] + +[[test]] +name = "float1" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1" +matches = [[0, 3]] + +[[test]] +name = "float2" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1.2" +matches = [[0, 3]] +match-limit = 1 + +[[test]] +name = "float3" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "a1.2" +matches = [[1, 4]] + +[[test]] +name = "float4" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "1.a" +matches = [[0, 1]] + +[[test]] +name = "float5" +regex = '^[-+]?[0-9]*\.?[0-9]+$' +haystack = "1.a" +matches = [] + +[[test]] +name = "email" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "email-not" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail " +matches = [] + +[[test]] +name = "email-big" +regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "date1" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-01-01" +matches = [[0, 10]] +unicode = false + +[[test]] +name = "date2" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-00-01" +matches = [] +unicode = false + +[[test]] +name = "date3" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-13-01" +matches = [] +unicode = false + +[[test]] +name = "start-end-empty" +regex = '^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rev" +regex = '$^' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-1" +regex = '^$^$^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-2" +regex = '^^^$$$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rep" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "start-end-empty-rep-rev" +regex = '(?:$^)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "neg-class-letter" +regex = '[^ac]' +haystack = "acx" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-comma" +regex = '[^a,]' +haystack = "a,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-space" +regex = '[^a[:space:]]' +haystack = "a x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma" +regex = '[^,]' +haystack = ",,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-space" +regex = '[^[:space:]]' +haystack = " a" +matches = [[1, 2]] + +[[test]] +name = "neg-class-space-comma" +regex = '[^,[:space:]]' +haystack = ", a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma-space" +regex = '[^[:space:],]' +haystack = " ,a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-ascii" +regex = '[^[:alpha:]Z]' +haystack = "A1" +matches = [[1, 2]] + +[[test]] +name = "lazy-many-many" +regex = '(?:(?:.*)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-many-optional" +regex = '(?:(?:.?)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-many" +regex = '(?:(?:.*)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-optional" +regex = '(?:(?:.?)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-min-many" +regex = '(?:(?:.*){1,}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-many" +regex = '(?:(?:.*){1,2}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-many" +regex = '(?:(?:.*)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-optional" +regex = '(?:(?:.?)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-many" +regex = '(?:(?:.*)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-optional" +regex = '(?:(?:.?)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-min-many" +regex = '(?:(?:.*){1,})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-many" +regex = '(?:(?:.*){1,2})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "empty1" +regex = '' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] diff --git a/vendor/regex/testdata/crlf.toml b/vendor/regex/testdata/crlf.toml new file mode 100644 index 0000000..9e2d376 --- /dev/null +++ b/vendor/regex/testdata/crlf.toml @@ -0,0 +1,117 @@ +# This is a basic test that checks ^ and $ treat \r\n as a single line +# terminator. If ^ and $ only treated \n as a line terminator, then this would +# only match 'xyz' at the end of the haystack. +[[test]] +name = "basic" +regex = '(?mR)^[a-z]+$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 3], [5, 8], [10, 13]] + +# Tests that a CRLF-aware '^$' assertion does not match between CR and LF. +[[test]] +name = "start-end-non-empty" +regex = '(?mR)^$' +haystack = "abc\r\ndef\r\nxyz" +matches = [] + +# Tests that a CRLF-aware '^$' assertion matches the empty string, just like +# a non-CRLF-aware '^$' assertion. +[[test]] +name = "start-end-empty" +regex = '(?mR)^$' +haystack = "" +matches = [[0, 0]] + +# Tests that a CRLF-aware '^$' assertion matches the empty string preceding +# and following a line terminator. +[[test]] +name = "start-end-before-after" +regex = '(?mR)^$' +haystack = "\r\n" +matches = [[0, 0], [2, 2]] + +# Tests that a CRLF-aware '^' assertion does not split a line terminator. +[[test]] +name = "start-no-split" +regex = '(?mR)^' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 0], [5, 5], [10, 10]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "start-no-split-adjacent" +regex = '(?mR)^' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "start-no-split-adjacent-cr" +regex = '(?mR)^' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "start-no-split-adjacent-lf" +regex = '(?mR)^' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that a CRLF-aware '$' assertion does not split a line terminator. +[[test]] +name = "end-no-split" +regex = '(?mR)$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[3, 3], [8, 8], [13, 13]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "end-no-split-adjacent" +regex = '(?mR)$' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "end-no-split-adjacent-cr" +regex = '(?mR)$' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "end-no-split-adjacent-lf" +regex = '(?mR)$' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note +# that this doesn't require multi-line mode to be enabled. +[[test]] +name = "dot-no-crlf" +regex = '(?R).' +haystack = "\r\n\r\n\r\n" +matches = [] + +# This is a test that caught a bug in the one-pass DFA where it (amazingly) was +# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy & +# paste bug. We insert an empty capture group here because it provokes the meta +# regex engine to first find a match and then trip over a panic because the +# one-pass DFA erroneously says there is no match. +[[test]] +name = "onepass-wrong-crlf-with-capture" +regex = '(?Rm:().$)' +haystack = "ZZ\r" +matches = [[[1, 2], [1, 1]]] + +# This is like onepass-wrong-crlf-with-capture above, except it sets up the +# test so that it can be run by the one-pass DFA directly. (i.e., Make it +# anchored and start the search at the right place.) +[[test]] +name = "onepass-wrong-crlf-anchored" +regex = '(?Rm:.$)' +haystack = "ZZ\r" +matches = [[1, 2]] +anchored = true +bounds = [1, 3] diff --git a/vendor/regex/testdata/earliest.toml b/vendor/regex/testdata/earliest.toml new file mode 100644 index 0000000..9516893 --- /dev/null +++ b/vendor/regex/testdata/earliest.toml @@ -0,0 +1,52 @@ +[[test]] +name = "no-greedy-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "no-greedy-200" +regex = 'abc+' +haystack = "zzzabccc" +matches = [[3, 6]] +search-kind = "earliest" + +[[test]] +name = "is-ungreedy" +regex = 'a+?' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "look-start-test" +regex = '^(abc|a)' +haystack = "abc" +matches = [ + [[0, 1], [0, 1]], +] +search-kind = "earliest" + +[[test]] +name = "look-end-test" +regex = '(abc|a)$' +haystack = "abc" +matches = [ + [[0, 3], [0, 3]], +] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-100" +regex = 'abc|a' +haystack = "abc" +matches = [[0, 1]] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-200" +regex = 'aba|a' +haystack = "aba" +matches = [[0, 1], [2, 3]] +search-kind = "earliest" diff --git a/vendor/regex/testdata/empty.toml b/vendor/regex/testdata/empty.toml new file mode 100644 index 0000000..7dfd802 --- /dev/null +++ b/vendor/regex/testdata/empty.toml @@ -0,0 +1,113 @@ +[[test]] +name = "100" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "110" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "120" +regex = "|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "130" +regex = "z|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "200" +regex = "|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "210" +regex = "||" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "220" +regex = "||b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "230" +regex = "b||" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "240" +regex = "||z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "300" +regex = "(?:)|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "310" +regex = "b|(?:)" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "320" +regex = "(?:|)" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "330" +regex = "(?:|)|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "400" +regex = "a(?:)|b" +haystack = "abc" +matches = [[0, 1], [1, 2]] + +[[test]] +name = "500" +regex = "" +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "510" +regex = "" +haystack = "a" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "520" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "600" +regex = '(?:|a)*' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "610" +regex = '(?:|a)+' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] diff --git a/vendor/regex/testdata/expensive.toml b/vendor/regex/testdata/expensive.toml new file mode 100644 index 0000000..b70e42f --- /dev/null +++ b/vendor/regex/testdata/expensive.toml @@ -0,0 +1,23 @@ +# This file represent tests that may be expensive to run on some regex engines. +# For example, tests that build a full DFA ahead of time and minimize it can +# take a horrendously long time on regexes that are large (or result in an +# explosion in the number of states). We group these tests together so that +# such engines can simply skip these tests. + +# See: https://github.com/rust-lang/regex/issues/98 +[[test]] +name = "regression-many-repeat-no-stack-overflow" +regex = '^.{1,2500}' +haystack = "a" +matches = [[0, 1]] + +# This test is meant to blow the bounded backtracker's visited capacity. In +# order to do that, we need a somewhat sizeable regex. The purpose of this +# is to make sure there's at least one test that exercises this path in the +# backtracker. All other tests (at time of writing) are small enough that the +# backtracker can handle them fine. +[[test]] +name = "backtrack-blow-visited-capacity" +regex = '\pL{50}' +haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ" +matches = [[0, 50], [50, 100], [100, 150]] diff --git a/vendor/regex/testdata/flags.toml b/vendor/regex/testdata/flags.toml new file mode 100644 index 0000000..30b412c --- /dev/null +++ b/vendor/regex/testdata/flags.toml @@ -0,0 +1,68 @@ +[[test]] +name = "1" +regex = "(?i)abc" +haystack = "ABC" +matches = [[0, 3]] + +[[test]] +name = "2" +regex = "(?i)a(?-i)bc" +haystack = "Abc" +matches = [[0, 3]] + +[[test]] +name = "3" +regex = "(?i)a(?-i)bc" +haystack = "ABC" +matches = [] + +[[test]] +name = "4" +regex = "(?is)a." +haystack = "A\n" +matches = [[0, 2]] + +[[test]] +name = "5" +regex = "(?is)a.(?-is)a." +haystack = "A\nab" +matches = [[0, 4]] + +[[test]] +name = "6" +regex = "(?is)a.(?-is)a." +haystack = "A\na\n" +matches = [] + +[[test]] +name = "7" +regex = "(?is)a.(?-is:a.)?" +haystack = "A\na\n" +matches = [[0, 2]] +match-limit = 1 + +[[test]] +name = "8" +regex = "(?U)a+" +haystack = "aa" +matches = [[0, 1]] +match-limit = 1 + +[[test]] +name = "9" +regex = "(?U)a+?" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "10" +regex = "(?U)(?-U)a+" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "11" +regex = '(?m)(?:^\d+$\n?)+' +haystack = "123\n456\n789" +matches = [[0, 11]] +unicode = false diff --git a/vendor/regex/testdata/fowler/basic.toml b/vendor/regex/testdata/fowler/basic.toml new file mode 100644 index 0000000..92b4e4c --- /dev/null +++ b/vendor/regex/testdata/fowler/basic.toml @@ -0,0 +1,1611 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "basic3" +regex = '''abracadabra$''' +haystack = '''abracadabracadabra''' +matches = [[[7, 18]]] +match-limit = 1 + +[[test]] +name = "basic4" +regex = '''a...b''' +haystack = '''abababbb''' +matches = [[[2, 7]]] +match-limit = 1 + +[[test]] +name = "basic5" +regex = '''XXXXXX''' +haystack = '''..XXXXXX''' +matches = [[[2, 8]]] +match-limit = 1 + +[[test]] +name = "basic6" +regex = '''\)''' +haystack = '''()''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic7" +regex = '''a]''' +haystack = '''a]a''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic9" +regex = '''\}''' +haystack = '''}''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic10" +regex = '''\]''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic12" +regex = ''']''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic15" +regex = '''^a''' +haystack = '''ax''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic16" +regex = '''\^a''' +haystack = '''a^a''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic17" +regex = '''a\^''' +haystack = '''a^''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic18" +regex = '''a$''' +haystack = '''aa''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic19" +regex = '''a\$''' +haystack = '''a$''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic20" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic21" +regex = '''$^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic22" +regex = '''a($)''' +haystack = '''aa''' +matches = [[[1, 2], [2, 2]]] +match-limit = 1 + +[[test]] +name = "basic23" +regex = '''a*(^a)''' +haystack = '''aa''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic24" +regex = '''(..)*(...)*''' +haystack = '''a''' +matches = [[[0, 0], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic25" +regex = '''(..)*(...)*''' +haystack = '''abcd''' +matches = [[[0, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic26" +regex = '''(ab|a)(bc|c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic27" +regex = '''(ab)c|abc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic28" +regex = '''a{0}b''' +haystack = '''ab''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic29" +regex = '''(a*)(b?)(b+)b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic30" +regex = '''(a*)(b{0,1})(b{1,})b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic32" +regex = '''((a|a)|a)''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic33" +regex = '''(a*)(a|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic34" +regex = '''a*(a.|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic35" +regex = '''a(b)|c(d)|a(e)f''' +haystack = '''aef''' +matches = [[[0, 3], [], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic36" +regex = '''(a|b)?.*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic37" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ac''' +matches = [[[0, 2], [0, 1], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic38" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ab''' +matches = [[[0, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic39" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''abc''' +matches = [[[0, 3], [1, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic40" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''xc''' +matches = [[[1, 2], [], []]] +match-limit = 1 + +[[test]] +name = "basic41" +regex = '''(.a|.b).*|.*(.a|.b)''' +haystack = '''xa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic42" +regex = '''a?(ab|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic43" +regex = '''a?(ac{0}b|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic44" +regex = '''ab|abab''' +haystack = '''abbabab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic45" +regex = '''aba|bab|bba''' +haystack = '''baaabbbaba''' +matches = [[[5, 8]]] +match-limit = 1 + +[[test]] +name = "basic46" +regex = '''aba|bab''' +haystack = '''baaabbbaba''' +matches = [[[6, 9]]] +match-limit = 1 + +[[test]] +name = "basic47" +regex = '''(aa|aaa)*|(a|aaaaa)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic48" +regex = '''(a.|.a.)*|(a|.a...)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic49" +regex = '''ab|a''' +haystack = '''xabc''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic50" +regex = '''ab|a''' +haystack = '''xxabc''' +matches = [[[2, 4]]] +match-limit = 1 + +[[test]] +name = "basic51" +regex = '''(Ab|cD)*''' +haystack = '''aBcD''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true +case-insensitive = true + +[[test]] +name = "basic52" +regex = '''[^-]''' +haystack = '''--a''' +matches = [[[2, 3]]] +match-limit = 1 + +[[test]] +name = "basic53" +regex = '''[a-]*''' +haystack = '''--a''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic54" +regex = '''[a-m-]*''' +haystack = '''--amoma--''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic55" +regex = ''':::1:::0:|:::1:1:0:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic56" +regex = ''':::1:::0:|:::1:1:1:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic57" +regex = '''[[:upper:]]''' +haystack = '''A''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic58" +regex = '''[[:lower:]]+''' +haystack = '''`az{''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic59" +regex = '''[[:upper:]]+''' +haystack = '''@AZ[''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic65" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic66" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic67" +regex = '''[^a]''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic68" +regex = '''\na''' +haystack = '''\na''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic69" +regex = '''(a)(b)(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [1, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic70" +regex = '''xxx''' +haystack = '''xxx''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic72" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 6,''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic74" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''2/7''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic76" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 1,Feb 6''' +matches = [[[5, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic78" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))''' +haystack = '''x''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic80" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))*''' +haystack = '''xx''' +matches = [[[0, 2], [1, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic81" +regex = '''a?(ab|ba)*''' +haystack = '''ababababababababababababababababababababababababababababababababababababababababa''' +matches = [[[0, 81], [79, 81]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic82" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabbbbaa''' +matches = [[[18, 25]]] +match-limit = 1 + +[[test]] +name = "basic83" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabaa''' +matches = [[[18, 22]]] +match-limit = 1 + +[[test]] +name = "basic84" +regex = '''aaac|aabc|abac|abbc|baac|babc|bbac|bbbc''' +haystack = '''baaabbbabac''' +matches = [[[7, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic86" +regex = '''.*''' +haystack = '''\x01\x7f''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic87" +regex = '''aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll''' +haystack = '''XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa''' +matches = [[[53, 57]]] +match-limit = 1 + +[[test]] +name = "basic89" +regex = '''a*a*a*a*a*b''' +haystack = '''aaaaaaaaab''' +matches = [[[0, 10]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic90" +regex = '''^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic91" +regex = '''$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic92" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic93" +regex = '''^a$''' +haystack = '''a''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic94" +regex = '''abc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic95" +regex = '''abc''' +haystack = '''xabcy''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic96" +regex = '''abc''' +haystack = '''ababc''' +matches = [[[2, 5]]] +match-limit = 1 + +[[test]] +name = "basic97" +regex = '''ab*c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic98" +regex = '''ab*bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic99" +regex = '''ab*bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic100" +regex = '''ab*bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic101" +regex = '''ab+bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic102" +regex = '''ab+bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic103" +regex = '''ab?bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic104" +regex = '''ab?bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic105" +regex = '''ab?c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic106" +regex = '''^abc$''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic107" +regex = '''^abc''' +haystack = '''abcc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic108" +regex = '''abc$''' +haystack = '''aabc''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic109" +regex = '''^''' +haystack = '''abc''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic110" +regex = '''$''' +haystack = '''abc''' +matches = [[[3, 3]]] +match-limit = 1 + +[[test]] +name = "basic111" +regex = '''a.c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic112" +regex = '''a.c''' +haystack = '''axc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic113" +regex = '''a.*c''' +haystack = '''axyzc''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic114" +regex = '''a[bc]d''' +haystack = '''abd''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic115" +regex = '''a[b-d]e''' +haystack = '''ace''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic116" +regex = '''a[b-d]''' +haystack = '''aac''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic117" +regex = '''a[-b]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic118" +regex = '''a[b-]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic119" +regex = '''a]''' +haystack = '''a]''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic120" +regex = '''a[]]b''' +haystack = '''a]b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic121" +regex = '''a[^bc]d''' +haystack = '''aed''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic122" +regex = '''a[^-b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic123" +regex = '''a[^]b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic124" +regex = '''ab|cd''' +haystack = '''abc''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic125" +regex = '''ab|cd''' +haystack = '''abcd''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic126" +regex = '''a\(b''' +haystack = '''a(b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic127" +regex = '''a\(*b''' +haystack = '''ab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic128" +regex = '''a\(*b''' +haystack = '''a((b''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic129" +regex = '''((a))''' +haystack = '''abc''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic130" +regex = '''(a)b(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic131" +regex = '''a+b+c''' +haystack = '''aabbabc''' +matches = [[[4, 7]]] +match-limit = 1 + +[[test]] +name = "basic132" +regex = '''a*''' +haystack = '''aaa''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic133" +regex = '''(a*)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic134" +regex = '''(a*)+''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic135" +regex = '''(a*|b)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic136" +regex = '''(a+|b)*''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic137" +regex = '''(a+|b)+''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic138" +regex = '''(a+|b)?''' +haystack = '''ab''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic139" +regex = '''[^ab]*''' +haystack = '''cde''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic140" +regex = '''(^)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic141" +regex = '''a*''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic142" +regex = '''([abc])*d''' +haystack = '''abbbcd''' +matches = [[[0, 6], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic143" +regex = '''([abc])*bcd''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic144" +regex = '''a|b|c|d|e''' +haystack = '''e''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic145" +regex = '''(a|b|c|d|e)f''' +haystack = '''ef''' +matches = [[[0, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic146" +regex = '''((a*|b))*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic147" +regex = '''abcd*efg''' +haystack = '''abcdefg''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic148" +regex = '''ab*''' +haystack = '''xabyabbbz''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic149" +regex = '''ab*''' +haystack = '''xayabbbz''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic150" +regex = '''(ab|cd)e''' +haystack = '''abcde''' +matches = [[[2, 5], [2, 4]]] +match-limit = 1 + +[[test]] +name = "basic151" +regex = '''[abhgefdc]ij''' +haystack = '''hij''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic152" +regex = '''(a|b)c*d''' +haystack = '''abcd''' +matches = [[[1, 4], [1, 2]]] +match-limit = 1 + +[[test]] +name = "basic153" +regex = '''(ab|ab*)bc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic154" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic155" +regex = '''a([bc]*)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic156" +regex = '''a([bc]+)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic157" +regex = '''a([bc]*)(c+d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 2], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic158" +regex = '''a[bcd]*dcdcde''' +haystack = '''adcdcde''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic159" +regex = '''(ab|a)b*c''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic160" +regex = '''((a)(b)c)(d)''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 3], [0, 1], [1, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic161" +regex = '''[A-Za-z_][A-Za-z0-9_]*''' +haystack = '''alpha''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic162" +regex = '''^a(bc+|b[eh])g|.h$''' +haystack = '''abh''' +matches = [[[1, 3], []]] +match-limit = 1 + +[[test]] +name = "basic163" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''effgz''' +matches = [[[0, 5], [0, 5], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic164" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''ij''' +matches = [[[0, 2], [0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic165" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''reffgz''' +matches = [[[1, 6], [1, 6], []]] +match-limit = 1 + +[[test]] +name = "basic166" +regex = '''(((((((((a)))))))))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic167" +regex = '''multiple words''' +haystack = '''multiple words yeah''' +matches = [[[0, 14]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic168" +regex = '''(.*)c(.*)''' +haystack = '''abcde''' +matches = [[[0, 5], [0, 2], [3, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic169" +regex = '''abcd''' +haystack = '''abcd''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic170" +regex = '''a(bc)d''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic171" +regex = '''a[\x01-\x03]?c''' +haystack = '''a\x02c''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic172" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic173" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mo'ammar Gadhafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic174" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Kaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic175" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qadhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic176" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gadafi''' +matches = [[[0, 14], [], [10, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic177" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic178" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moamar Gaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic179" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadhdhafi''' +matches = [[[0, 18], [], [13, 15]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic180" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Khaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic181" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafy''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic182" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic183" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic184" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muamar Kaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic185" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Quathafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic186" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gheddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic187" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Khadafy''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic188" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Qudhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic189" +regex = '''a+(b|c)*d+''' +haystack = '''aabcdd''' +matches = [[[0, 6], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic190" +regex = '''^.+$''' +haystack = '''vivi''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic191" +regex = '''^(.+)$''' +haystack = '''vivi''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic192" +regex = '''^([^!.]+).att.com!(.+)$''' +haystack = '''gryphon.att.com!eby''' +matches = [[[0, 19], [0, 7], [16, 19]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic193" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic194" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic195" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic196" +regex = '''^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic197" +regex = '''((foo)|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic198" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic199" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic200" +regex = '''((foo)|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic201" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], []]] +match-limit = 1 + +[[test]] +name = "basic202" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic203" +regex = '''(foo|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic204" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic205" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic206" +regex = '''(foo|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic207" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic208" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic209" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic210" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic211" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic212" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic213" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic214" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bas''' +matches = [[[0, 3], [0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic215" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic216" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic217" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic218" +regex = '''.*(/XXX).*''' +haystack = '''/XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic219" +regex = '''.*(\\XXX).*''' +haystack = '''\XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic220" +regex = '''\\XXX''' +haystack = '''\XXX''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic221" +regex = '''.*(/000).*''' +haystack = '''/000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic222" +regex = '''.*(\\000).*''' +haystack = '''\000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic223" +regex = '''\\000''' +haystack = '''\000''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/fowler/dat/README b/vendor/regex/testdata/fowler/dat/README new file mode 100644 index 0000000..242a0e6 --- /dev/null +++ b/vendor/regex/testdata/fowler/dat/README @@ -0,0 +1,25 @@ +Test data was taken from the Go distribution, which was in turn taken from the +testregex test suite: + + http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html + +Unfortunately, the original web site now appears dead, but the test data lives +on. + +The LICENSE in this directory corresponds to the LICENSE that the data was +originally released under. + +The tests themselves were modified for RE2/Go (and marked as such). A +couple were modified further by me (Andrew Gallant) and marked with 'Rust'. + +After some number of years, these tests were transformed into a TOML format +using the 'regex-cli generate fowler' command. To re-generate the +TOML files, run the following from the root of this repository: + + regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat + +This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md' +from the root of the repository for more information. + +This brings the Fowler tests into a more "sensible" structured format in which +other tests can be written such that they aren't write-only. diff --git a/vendor/regex/src/testdata/basic.dat b/vendor/regex/testdata/fowler/dat/basic.dat similarity index 87% rename from vendor/regex/src/testdata/basic.dat rename to vendor/regex/testdata/fowler/dat/basic.dat index 632e1bb..654a72b 100644 --- a/vendor/regex/src/testdata/basic.dat +++ b/vendor/regex/testdata/fowler/dat/basic.dat @@ -48,7 +48,7 @@ E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) E ab|a xabc (1,3) E ab|a xxabc (2,4) -Ei (?-u)(Ab|cD)* aBcD (0,4)(2,4) +Ei (Ab|cD)* aBcD (0,4)(2,4) BE [^-] --a (2,3) BE [a-]* --a (0,3) BE [a-m-]* --amoma-- (0,4) @@ -68,16 +68,22 @@ BE$ [^a] \n (0,1) BE$ \na \na (0,2) E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) BE xxx xxx (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) -E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) -E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 6, (0,6) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) 2/7 (0,3) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 1,Feb 6 (5,11) Rust +#E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) Rust +#E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) Rust E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) -BE$ .* \x01\x7f (0,2) +#BE$ .* \x01\xff (0,2) +BE$ .* \x01\x7f (0,2) Rust E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH E a*a*a*a*a*b aaaaaaaaab (0,10) @@ -124,24 +130,20 @@ E ((a)) abc (0,1)(0,1)(0,1) E (a)b(c) abc (0,3)(0,1)(2,3) E a+b+c aabbabc (4,7) E a* aaa (0,3) -#E (a*)* - (0,0)(0,0) -E (a*)* - (0,0)(?,?) RE2/Go +E (a*)* - (0,0)(0,0) E (a*)+ - (0,0)(0,0) -#E (a*|b)* - (0,0)(0,0) -E (a*|b)* - (0,0)(?,?) RE2/Go +E (a*|b)* - (0,0)(0,0) E (a+|b)* ab (0,2)(1,2) E (a+|b)+ ab (0,2)(1,2) E (a+|b)? ab (0,1)(0,1) BE [^ab]* cde (0,3) -#E (^)* - (0,0)(0,0) -E (^)* - (0,0)(?,?) RE2/Go +E (^)* - (0,0)(0,0) BE a* NULL (0,0) E ([abc])*d abbbcd (0,6)(4,5) E ([abc])*bcd abcd (0,4)(0,1) E a|b|c|d|e e (0,1) E (a|b|c|d|e)f ef (0,2)(0,1) -#E ((a*|b))* - (0,0)(0,0)(0,0) -E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go +E ((a*|b))* - (0,0)(0,0)(0,0) BE abcd*efg abcdefg (0,7) BE ab* xabyabbbz (1,3) BE ab* xayabbbz (1,2) diff --git a/vendor/regex/src/testdata/nullsubexpr.dat b/vendor/regex/testdata/fowler/dat/nullsubexpr.dat similarity index 81% rename from vendor/regex/src/testdata/nullsubexpr.dat rename to vendor/regex/testdata/fowler/dat/nullsubexpr.dat index 2e18fbb..a944306 100644 --- a/vendor/regex/src/testdata/nullsubexpr.dat +++ b/vendor/regex/testdata/fowler/dat/nullsubexpr.dat @@ -1,8 +1,7 @@ NOTE null subexpression matches : 2002-06-06 E (a*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go +E SAME x (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E (a*)+ a (0,1)(0,1) @@ -19,8 +18,7 @@ E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E ([a]*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go +E SAME x (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E ([a]*)+ a (0,1)(0,1) @@ -28,8 +26,7 @@ E SAME x (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E ([^b]*)* a (0,1)(0,1) -#E SAME b (0,0)(0,0) -E SAME b (0,0)(?,?) RE2/Go +E SAME b (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaab (0,6)(0,6) E ([ab]*)* a (0,1)(0,1) @@ -41,13 +38,12 @@ E SAME bbbbbb (0,6)(0,6) E SAME aaaabcde (0,5)(0,5) E ([^a]*)* b (0,1)(0,1) E SAME bbbbbb (0,6)(0,6) -#E SAME aaaaaa (0,0)(0,0) -E SAME aaaaaa (0,0)(?,?) RE2/Go +E SAME aaaaaa (0,0)(0,0) E ([^ab]*)* ccccxx (0,6)(0,6) -#E SAME ababab (0,0)(0,0) -E SAME ababab (0,0)(?,?) RE2/Go +E SAME ababab (0,0)(0,0) -E ((z)+|a)* zabcde (0,2)(1,2) +#E ((z)+|a)* zabcde (0,2)(1,2) +E ((z)+|a)* zabcde (0,2)(1,2)(0,1) Rust #{E a+? aaaaaa (0,1) no *? +? mimimal match ops #E (a) aaa (0,1)(0,1) @@ -65,8 +61,7 @@ B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) -#E (a*)*(x) x (0,1)(0,0)(0,1) -E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go +E (a*)*(x) x (0,1)(0,0)(0,1) E (a*)*(x) ax (0,2)(0,1)(1,2) E (a*)*(x) axa (0,2)(0,1)(1,2) diff --git a/vendor/regex/src/testdata/repetition.dat b/vendor/regex/testdata/fowler/dat/repetition.dat similarity index 89% rename from vendor/regex/src/testdata/repetition.dat rename to vendor/regex/testdata/fowler/dat/repetition.dat index 3bb2121..cf0d838 100644 --- a/vendor/regex/src/testdata/repetition.dat +++ b/vendor/regex/testdata/fowler/dat/repetition.dat @@ -84,7 +84,7 @@ E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 -# These test a bug in OS X / FreeBSD / NetBSD, and libtree. +# These test a bug in OS X / FreeBSD / NetBSD, and libtree. # Linux/GLIBC gets the {8,} and {8,8} wrong. :HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) @@ -123,18 +123,24 @@ NOTE additional repetition tests graciously provided by Chris Kuklewicz www.hask # OS X / FreeBSD / NetBSD badly fail many of these, with impossible # results like (0,6)(4,5)(6,6). -:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) +#:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) Rust :HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) :HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) :HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH -:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) +#:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) Rust :HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) :HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) :HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH -:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) -:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) +#:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) +:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) +:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) Rust # The above worked on Linux/GLIBC but the following often fail. # They also trip up OS X / FreeBSD / NetBSD: diff --git a/vendor/regex/testdata/fowler/nullsubexpr.toml b/vendor/regex/testdata/fowler/nullsubexpr.toml new file mode 100644 index 0000000..2f1f018 --- /dev/null +++ b/vendor/regex/testdata/fowler/nullsubexpr.toml @@ -0,0 +1,405 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "nullsubexpr3" +regex = '''(a*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr4" +regex = '''(a*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr5" +regex = '''(a*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr6" +regex = '''(a*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr7" +regex = '''(a*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr8" +regex = '''(a*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr9" +regex = '''(a*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr10" +regex = '''(a*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr11" +regex = '''(a+)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr12" +regex = '''(a+)*''' +haystack = '''x''' +matches = [[[0, 0], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr13" +regex = '''(a+)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr14" +regex = '''(a+)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr15" +regex = '''(a+)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr16" +regex = '''(a+)+''' +haystack = '''x''' +matches = [] +match-limit = 1 + +[[test]] +name = "nullsubexpr17" +regex = '''(a+)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr18" +regex = '''(a+)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr20" +regex = '''([a]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr21" +regex = '''([a]*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr22" +regex = '''([a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr23" +regex = '''([a]*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr24" +regex = '''([a]*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr25" +regex = '''([a]*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr26" +regex = '''([a]*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr27" +regex = '''([a]*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr28" +regex = '''([^b]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr29" +regex = '''([^b]*)*''' +haystack = '''b''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr30" +regex = '''([^b]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr31" +regex = '''([^b]*)*''' +haystack = '''aaaaaab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr32" +regex = '''([ab]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr33" +regex = '''([ab]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr34" +regex = '''([ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr35" +regex = '''([ab]*)*''' +haystack = '''bababa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr36" +regex = '''([ab]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr37" +regex = '''([ab]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr38" +regex = '''([ab]*)*''' +haystack = '''aaaabcde''' +matches = [[[0, 5], [0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr39" +regex = '''([^a]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr40" +regex = '''([^a]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr41" +regex = '''([^a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr42" +regex = '''([^ab]*)*''' +haystack = '''ccccxx''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr43" +regex = '''([^ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "nullsubexpr46" +regex = '''((z)+|a)*''' +haystack = '''zabcde''' +matches = [[[0, 2], [1, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr64" +regex = '''(a*)*(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr65" +regex = '''(a*)*(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr66" +regex = '''(a*)*(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr68" +regex = '''(a*)+(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr69" +regex = '''(a*)+(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr70" +regex = '''(a*)+(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr72" +regex = '''(a*){2}(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr73" +regex = '''(a*){2}(x)''' +haystack = '''ax''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr74" +regex = '''(a*){2}(x)''' +haystack = '''axa''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/fowler/repetition.toml b/vendor/regex/testdata/fowler/repetition.toml new file mode 100644 index 0000000..d6a7112 --- /dev/null +++ b/vendor/regex/testdata/fowler/repetition.toml @@ -0,0 +1,746 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "repetition10" +regex = '''((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition11" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition12" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition14" +regex = '''((..)|(.)){1}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition15" +regex = '''((..)|(.)){2}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition16" +regex = '''((..)|(.)){3}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition18" +regex = '''((..)|(.))*''' +haystack = '''''' +matches = [[[0, 0], [], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition20" +regex = '''((..)|(.))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition21" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition22" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition24" +regex = '''((..)|(.)){1}''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition25" +regex = '''((..)|(.)){2}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition26" +regex = '''((..)|(.)){3}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition28" +regex = '''((..)|(.))*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition30" +regex = '''((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition31" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition32" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition34" +regex = '''((..)|(.)){1}''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition35" +regex = '''((..)|(.)){2}''' +haystack = '''aa''' +matches = [[[0, 2], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition36" +regex = '''((..)|(.)){3}''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition38" +regex = '''((..)|(.))*''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition40" +regex = '''((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition41" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition42" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition44" +regex = '''((..)|(.)){1}''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition46" +regex = '''((..)|(.)){2}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition47" +regex = '''((..)|(.)){3}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition50" +regex = '''((..)|(.))*''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition52" +regex = '''((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition53" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition54" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition56" +regex = '''((..)|(.)){1}''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition57" +regex = '''((..)|(.)){2}''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition59" +regex = '''((..)|(.)){3}''' +haystack = '''aaaa''' +matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition61" +regex = '''((..)|(.))*''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition63" +regex = '''((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition64" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition65" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition67" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition68" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition70" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition73" +regex = '''((..)|(.))*''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition75" +regex = '''((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition76" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition77" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition79" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition80" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition81" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition83" +regex = '''((..)|(.))*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive90" +regex = '''X(.?){0,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive91" +regex = '''X(.?){1,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive92" +regex = '''X(.?){2,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive93" +regex = '''X(.?){3,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive94" +regex = '''X(.?){4,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive95" +regex = '''X(.?){5,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive96" +regex = '''X(.?){6,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive97" +regex = '''X(.?){7,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive98" +regex = '''X(.?){8,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive100" +regex = '''X(.?){0,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive102" +regex = '''X(.?){1,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive104" +regex = '''X(.?){2,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive106" +regex = '''X(.?){3,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive108" +regex = '''X(.?){4,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive110" +regex = '''X(.?){5,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive112" +regex = '''X(.?){6,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive114" +regex = '''X(.?){7,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive115" +regex = '''X(.?){8,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive127" +regex = '''(a|ab|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive129" +regex = '''(a|ab|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive130" +regex = '''(a|ab|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive131" +regex = '''(a|ab|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive132" +regex = '''(a|ab|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive134" +regex = '''(a|ab|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive136" +regex = '''(a|ab|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive137" +regex = '''(a|ab|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive138" +regex = '''(a|ab|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive139" +regex = '''(a|ab|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive141" +regex = '''(a|ab|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive143" +regex = '''(a|ab|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive149" +regex = '''(ab|a|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive151" +regex = '''(ab|a|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive153" +regex = '''(ab|a|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive155" +regex = '''(ab|a|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive156" +regex = '''(ab|a|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive158" +regex = '''(ab|a|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive160" +regex = '''(ab|a|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive162" +regex = '''(ab|a|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive164" +regex = '''(ab|a|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive165" +regex = '''(ab|a|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive167" +regex = '''(ab|a|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive169" +regex = '''(ab|a|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/iter.toml b/vendor/regex/testdata/iter.toml new file mode 100644 index 0000000..329b9f0 --- /dev/null +++ b/vendor/regex/testdata/iter.toml @@ -0,0 +1,143 @@ +[[test]] +name = "1" +regex = "a" +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] + +[[test]] +name = "2" +regex = "a" +haystack = "aba" +matches = [[0, 1], [2, 3]] + +[[test]] +name = "empty1" +regex = '' +haystack = '' +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "start1" +regex = "^a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "start2" +regex = "^a" +haystack = "aa" +matches = [[0, 1]] + +[[test]] +name = "anchored1" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +# This test is pretty subtle. It demonstrates the crucial difference between +# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively +# matches at the start of a haystack and nowhere else. The latter regex has +# no such restriction, but its automaton is constructed such that it lacks a +# `.*?` prefix. So it can actually produce matches at multiple locations. +# The anchored3 test drives this point home. +[[test]] +name = "anchored2" +regex = "a" +haystack = "aa" +matches = [[0, 1], [1, 2]] +anchored = true + +# Unlikely anchored2, this test stops matching anything after it sees `b` +# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it +# determines that there are no remaining matches. +[[test]] +name = "anchored3" +regex = "a" +haystack = "aaba" +matches = [[0, 1], [1, 2]] +anchored = true + +[[test]] +name = "nonempty-followedby-empty" +regex = 'abc|.*?' +haystack = "abczzz" +matches = [[0, 3], [4, 4], [5, 5], [6, 6]] + +[[test]] +name = "nonempty-followedby-oneempty" +regex = 'abc|.*?' +haystack = "abcz" +matches = [[0, 3], [4, 4]] + +[[test]] +name = "nonempty-followedby-onemixed" +regex = 'abc|.*?' +haystack = "abczabc" +matches = [[0, 3], [4, 7]] + +[[test]] +name = "nonempty-followedby-twomixed" +regex = 'abc|.*?' +haystack = "abczzabc" +matches = [[0, 3], [4, 4], [5, 8]] diff --git a/vendor/regex/testdata/leftmost-all.toml b/vendor/regex/testdata/leftmost-all.toml new file mode 100644 index 0000000..e3fd950 --- /dev/null +++ b/vendor/regex/testdata/leftmost-all.toml @@ -0,0 +1,25 @@ +[[test]] +name = "alt" +regex = 'foo|foobar' +haystack = "foobar" +matches = [[0, 6]] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "multi" +regex = ['foo', 'foobar'] +haystack = "foobar" +matches = [ + { id = 1, span = [0, 6] }, +] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "dotall" +regex = '(?s:.)' +haystack = "foobar" +matches = [[5, 6]] +match-kind = "all" +search-kind = "leftmost" diff --git a/vendor/regex/testdata/line-terminator.toml b/vendor/regex/testdata/line-terminator.toml new file mode 100644 index 0000000..a398daf --- /dev/null +++ b/vendor/regex/testdata/line-terminator.toml @@ -0,0 +1,109 @@ +# This tests that we can switch the line terminator to the NUL byte. +[[test]] +name = "nul" +regex = '(?m)^[a-z]+$' +haystack = '\x00abc\x00' +matches = [[1, 4]] +unescape = true +line-terminator = '\x00' + +# This tests that '.' will not match the configured line terminator, but will +# match \n. +[[test]] +name = "dot-changes-with-line-terminator" +regex = '.' +haystack = '\x00\n' +matches = [[1, 2]] +unescape = true +line-terminator = '\x00' + +# This tests that when we switch the line terminator, \n is no longer +# recognized as the terminator. +[[test]] +name = "not-line-feed" +regex = '(?m)^[a-z]+$' +haystack = '\nabc\n' +matches = [] +unescape = true +line-terminator = '\x00' + +# This tests that we can set the line terminator to a non-ASCII byte and have +# it behave as expected. +[[test]] +name = "non-ascii" +regex = '(?m)^[a-z]+$' +haystack = '\xFFabc\xFF' +matches = [[1, 4]] +unescape = true +line-terminator = '\xFF' +utf8 = false + +# This tests a tricky case where the line terminator is set to \r. This ensures +# that the StartLF look-behind assertion is tracked when computing the start +# state. +[[test]] +name = "carriage" +regex = '(?m)^[a-z]+' +haystack = 'ABC\rabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true +line-terminator = '\r' + +# This tests that we can set the line terminator to a byte corresponding to a +# word character, and things work as expected. +[[test]] +name = "word-byte" +regex = '(?m)^[a-z]+$' +haystack = 'ZabcZ' +matches = [[1, 4]] +unescape = true +line-terminator = 'Z' + +# This tests that we can set the line terminator to a byte corresponding to a +# non-word character, and things work as expected. +[[test]] +name = "non-word-byte" +regex = '(?m)^[a-z]+$' +haystack = '%abc%' +matches = [[1, 4]] +unescape = true +line-terminator = '%' + +# This combines "set line terminator to a word byte" with a word boundary +# assertion, which should result in no match even though ^/$ matches. +[[test]] +name = "word-boundary" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary', but does an anchored search at the point where ^ +# matches, but where \b should not. +[[test]] +name = "word-boundary-at" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary-at', but flips the word boundary to a negation. This +# in particular tests a tricky case in DFA engines, where they must consider +# explicitly that a starting configuration from a custom line terminator may +# also required setting the "is from word byte" flag on a state. Otherwise, +# it's treated as "not from a word byte," which would result in \B not matching +# here when it should. +[[test]] +name = "not-word-boundary-at" +regex = '(?m)^\B[a-z]+\B$' +haystack = 'ZabcZ' +matches = [[1, 4]] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' diff --git a/vendor/regex/testdata/misc.toml b/vendor/regex/testdata/misc.toml new file mode 100644 index 0000000..c65531f --- /dev/null +++ b/vendor/regex/testdata/misc.toml @@ -0,0 +1,99 @@ +[[test]] +name = "ascii-literal" +regex = "a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "ascii-literal-not" +regex = "a" +haystack = "z" +matches = [] + +[[test]] +name = "ascii-literal-anchored" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +[[test]] +name = "ascii-literal-anchored-not" +regex = "a" +haystack = "z" +matches = [] +anchored = true + +[[test]] +name = "anchor-start-end-line" +regex = '(?m)^bar$' +haystack = "foo\nbar\nbaz" +matches = [[4, 7]] + +[[test]] +name = "prefix-literal-match" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] + +[[test]] +name = "prefix-literal-match-ascii" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "prefix-literal-no-match" +regex = '^abc' +haystack = "zabc" +matches = [] + +[[test]] +name = "one-literal-edge" +regex = 'abc' +haystack = "xxxxxab" +matches = [] + +[[test]] +name = "terminates" +regex = 'a$' +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "suffix-100" +regex = '.*abcd' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-200" +regex = '.*(?:abcd)+' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-300" +regex = '.*(?:abcd)+' +haystack = "abcdabcd" +matches = [[0, 8]] + +[[test]] +name = "suffix-400" +regex = '.*(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-500" +regex = '.*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-600" +regex = '[^abcd]*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[4, 9]] diff --git a/vendor/regex/testdata/multiline.toml b/vendor/regex/testdata/multiline.toml new file mode 100644 index 0000000..3acc901 --- /dev/null +++ b/vendor/regex/testdata/multiline.toml @@ -0,0 +1,845 @@ +[[test]] +name = "basic1" +regex = '(?m)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf-cr" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\rdef\rxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic2" +regex = '(?m)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf" +regex = '(?Rm)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf-cr" +regex = '(?Rm)^$' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic3" +regex = '(?m)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf" +regex = '(?Rm)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf-cr" +regex = '(?Rm)^' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic4" +regex = '(?m)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf" +regex = '(?Rm)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf-cr" +regex = '(?Rm)$' +haystack = "abc\rdef\rxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic5" +regex = '(?m)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf" +regex = '(?Rm)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf-cr" +regex = '(?Rm)^[a-z]' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic6" +regex = '(?m)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf" +regex = '(?Rm)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf-cr" +regex = '(?Rm)[a-z]^' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic7" +regex = '(?m)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf" +regex = '(?Rm)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf-cr" +regex = '(?Rm)[a-z]$' +haystack = "abc\rdef\rxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic8" +regex = '(?m)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf" +regex = '(?Rm)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf-cr" +regex = '(?Rm)$[a-z]' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic9" +regex = '(?m)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "basic9-crlf" +regex = '(?Rm)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "repeat1" +regex = '(?m)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf" +regex = '(?Rm)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf-cr" +regex = '(?Rm)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf" +regex = '(?R)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf-cr" +regex = '(?R)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat2" +regex = '(?m)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf" +regex = '(?Rm)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf-cr" +regex = '(?Rm)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-no-multi" +regex = '(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf" +regex = '(?R)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf-cr" +regex = '(?R)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat3" +regex = '(?m)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf" +regex = '(?Rm)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf-cr" +regex = '(?Rm)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi" +regex = '(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf" +regex = '(?R)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf-cr" +regex = '(?R)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat4" +regex = '(?m)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf" +regex = '(?Rm)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf-cr" +regex = '(?Rm)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-no-multi" +regex = '(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf" +regex = '(?R)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf-cr" +regex = '(?R)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat5" +regex = '(?m)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf" +regex = '(?Rm)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf-cr" +regex = '(?Rm)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi" +regex = '(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf" +regex = '(?R)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf-cr" +regex = '(?R)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat6" +regex = '(?m)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf-cr" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-no-multi" +regex = '(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat7" +regex = '(?m)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-no-multi" +regex = '(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat8" +regex = '(?m)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-no-multi" +regex = '(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat9" +regex = '(?m)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-no-multi" +regex = '(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat10" +regex = '(?m)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-no-multi" +regex = '(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat11" +regex = '(?m)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf" +regex = '(?Rm)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf-cr" +regex = '(?Rm)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi" +regex = '^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf" +regex = '(?R)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf-cr" +regex = '(?R)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat12" +regex = '(?m)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf" +regex = '(?Rm)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf-cr" +regex = '(?Rm)^+' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-no-multi" +regex = '^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf" +regex = '(?R)^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf-cr" +regex = '(?R)^+' +haystack = "\raa\r" +matches = [[0, 0]] + +[[test]] +name = "repeat13" +regex = '(?m)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf" +regex = '(?Rm)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf-cr" +regex = '(?Rm)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi" +regex = '$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf" +regex = '(?R)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf-cr" +regex = '(?R)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat14" +regex = '(?m)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf" +regex = '(?Rm)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf-cr" +regex = '(?Rm)$+' +haystack = "\raa\r" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-no-multi" +regex = '$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf" +regex = '(?R)$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf-cr" +regex = '(?R)$+' +haystack = "\raa\r" +matches = [[4, 4]] + +[[test]] +name = "repeat15" +regex = '(?m)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf" +regex = '(?Rm)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf-cr" +regex = '(?Rm)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-no-multi" +regex = '(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf" +regex = '(?R)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf-cr" +regex = '(?R)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat16" +regex = '(?m)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf" +regex = '(?Rm)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf-cr" +regex = '(?Rm)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-no-multi" +regex = '(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf" +regex = '(?R)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf-cr" +regex = '(?R)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat17" +regex = '(?m)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf" +regex = '(?Rm)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf-cr" +regex = '(?Rm)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-no-multi" +regex = '(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf" +regex = '(?R)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf-cr" +regex = '(?R)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat18" +regex = '(?m)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf" +regex = '(?Rm)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf-cr" +regex = '(?Rm)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-no-multi" +regex = '(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf" +regex = '(?R)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf-cr" +regex = '(?R)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "match-line-100" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-200" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/no-unicode.toml b/vendor/regex/testdata/no-unicode.toml new file mode 100644 index 0000000..0ddac4c --- /dev/null +++ b/vendor/regex/testdata/no-unicode.toml @@ -0,0 +1,222 @@ +[[test]] +name = "invalid-utf8-literal1" +regex = '\xFF' +haystack = '\xFF' +matches = [[0, 1]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "mixed" +regex = '(?:.+)(?-u)(?:.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [[0, 5]] +utf8 = false +unescape = true + + +[[test]] +name = "case1" +regex = "a" +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false + +[[test]] +name = "case2" +regex = "[a-z]+" +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false + +[[test]] +name = "case3" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true + +[[test]] +name = "case4" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false + + +[[test]] +name = "negate1" +regex = "[^a]" +haystack = "δ" +matches = [[0, 2]] + +[[test]] +name = "negate2" +regex = "[^a]" +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + + +[[test]] +name = "dotstar-prefix1" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +unicode = false +utf8 = false +unescape = true + +[[test]] +name = "dotstar-prefix2" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +utf8 = false +unescape = true + + +[[test]] +name = "null-bytes1" +regex = '[^\x00]+\x00' +haystack = 'foo\x00' +matches = [[0, 4]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] + +[[test]] +name = "decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false + +[[test]] +name = "decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "space-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "space-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] + + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8. +name = "iter2-bytes" +regex = '' +haystack = 'b\xFFr' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unescape = true +utf8 = false + + +# These test that unanchored prefixes can munch through invalid UTF-8 even when +# utf8 is enabled. +# +# This test actually reflects an interesting simplification in how the Thompson +# NFA is constructed. It used to be that the NFA could be built with an +# unanchored prefix that either matched any byte or _only_ matched valid UTF-8. +# But the latter turns out to be pretty precarious when it comes to prefilters, +# because if you search a haystack that contains invalid UTF-8 but have an +# unanchored prefix that requires UTF-8, then prefilters are no longer a valid +# optimization because you actually have to check that everything is valid +# UTF-8. +# +# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in +# order to guarantee that we only match at valid UTF-8 boundaries. But this +# isn't actually true! There are really only two things to consider here: +# +# 1) Will a regex match split an encoded codepoint? No. Because by construction, +# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming +# all of the UTF-8 modes are enabled). +# +# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no, +# assuming all of the UTF-8 modes are enabled. +[[test]] +name = "unanchored-invalid-utf8-match-100" +regex = '[a-z]' +haystack = '\xFFa\xFF' +matches = [[1, 2]] +unescape = true +utf8 = false + +# This test shows that we can still prevent a match from occurring by requiring +# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the +# behavior of not munching through invalid UTF-8 anywhere is needed, then it +# can be achieved thusly. +[[test]] +name = "unanchored-invalid-utf8-nomatch" +regex = '^(?s:.)*?[a-z]' +haystack = '\xFFa\xFF' +matches = [] +unescape = true +utf8 = false + +# This is a tricky test that makes sure we don't accidentally do a kind of +# unanchored search when we've requested that a regex engine not report +# empty matches that split a codepoint. This test caught a regression during +# development where the code for skipping over bad empty matches would do so +# even if the search should have been anchored. This is ultimately what led to +# making 'anchored' an 'Input' option, so that it was always clear what kind +# of search was being performed. (Before that, whether a search was anchored +# or not was a config knob on the regex engine.) This did wind up making DFAs +# a little more complex to configure (with their 'StartKind' knob), but it +# generally smoothed out everything else. +# +# Great example of a test whose failure motivated a sweeping API refactoring. +[[test]] +name = "anchored-iter-empty-utf8" +regex = '' +haystack = 'a☃z' +matches = [[0, 0], [1, 1]] +unescape = false +utf8 = true +anchored = true diff --git a/vendor/regex/testdata/overlapping.toml b/vendor/regex/testdata/overlapping.toml new file mode 100644 index 0000000..7bcd45a --- /dev/null +++ b/vendor/regex/testdata/overlapping.toml @@ -0,0 +1,280 @@ +# NOTE: We define a number of tests where the *match* kind is 'leftmost-first' +# but the *search* kind is 'overlapping'. This is a somewhat nonsensical +# combination and can produce odd results. Nevertheless, those results should +# be consistent so we test them here. (At the time of writing this note, I +# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result +# in unspecified behavior.) + +# This demonstrates how a full overlapping search is obvious quadratic. This +# regex reports a match for every substring in the haystack. +[[test]] +name = "ungreedy-dotstar-matches-everything-100" +regex = [".*?"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "greedy-dotstar-matches-everything-100" +regex = [".*"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], + [[4, 4], []], + [[5, 5], []], + [[2, 5], [2, 5]], + [[6, 6], []], + [[7, 7], []], + [[8, 8], []], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[9, 9], []], + [[10, 10], []], + [[11, 11], []], + [[12, 12], []], + [[13, 13], []], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-leftmost-first" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-all" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "alt-leftmost-first-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "alt-all-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-000" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-000" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-010" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "iter1-incomplete-utf8" +regex = '' +haystack = '\xE2\x98' # incomplete snowman +matches = [[0, 0], [1, 1], [2, 2]] +match-kind = "all" +search-kind = "overlapping" +unescape = true +utf8 = false + +[[test]] +name = "scratch" +regex = ['sam', 'samwise'] +haystack = "samwise" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "overlapping" diff --git a/vendor/regex/testdata/regex-lite.toml b/vendor/regex/testdata/regex-lite.toml new file mode 100644 index 0000000..1769d80 --- /dev/null +++ b/vendor/regex/testdata/regex-lite.toml @@ -0,0 +1,98 @@ +# These tests are specifically written to test the regex-lite crate. While it +# largely has the same semantics as the regex crate, there are some differences +# around Unicode support and UTF-8. +# +# To be clear, regex-lite supports far fewer patterns because of its lack of +# Unicode support, nested character classes and character class set operations. +# What we're talking about here are the patterns that both crates support but +# where the semantics might differ. + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-decimal" +regex = '\d' +haystack = '᠕' +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-space" +regex = '\s' +haystack = "\u2000" +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-word" +regex = '\w' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for word boundary assertions. +[[test]] +name = "word-boundary" +regex = '\b' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for negated word boundary +# assertions. But note that it should still not split codepoints! +[[test]] +name = "word-boundary-negated" +regex = '\B' +haystack = 'δ' +matches = [[0, 0], [2, 2]] +unicode = true + +# While we're here, the empty regex---which matches at every +# position---shouldn't split a codepoint either. +[[test]] +name = "empty-no-split-codepoint" +regex = '' +haystack = '💩' +matches = [[0, 0], [4, 4]] +unicode = true + +# A dot always matches a full codepoint. +[[test]] +name = "dot-always-matches-codepoint" +regex = '.' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# A negated character class also always matches a full codepoint. +[[test]] +name = "negated-class-always-matches-codepoint" +regex = '[^a]' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# regex-lite only supports ASCII-aware case insensitive matching. +[[test]] +name = "case-insensitive-is-ascii-only" +regex = 's' +haystack = 'ſ' +matches = [] +unicode = true +case-insensitive = true + +# Negated word boundaries shouldn't split a codepoint, but they will match +# between invalid UTF-8. +# +# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in +# regex-lite. This can't happen in the main API because &str can't contain +# invalid UTF-8. +# [[test]] +# name = "word-boundary-invalid-utf8" +# regex = '\B' +# haystack = '\xFF\xFF\xFF\xFF' +# unescape = true +# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +# unicode = true +# utf8 = false diff --git a/vendor/regex/testdata/regression.toml b/vendor/regex/testdata/regression.toml new file mode 100644 index 0000000..53b0701 --- /dev/null +++ b/vendor/regex/testdata/regression.toml @@ -0,0 +1,830 @@ +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-100" +regex = '(*)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-200" +regex = '(?:?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-300" +regex = '(?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-400" +regex = '*' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-100" +regex = '(?i-u)[a_]+' +haystack = "A_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-200" +regex = '(?i-u)[A_]+' +haystack = "a_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/76 +[[test]] +name = "unicode-case-lower-nocase-flag" +regex = '(?i)\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-100" +regex = '(?i)[^x]' +haystack = "x" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-200" +regex = '(?i)[^x]' +haystack = "X" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/101 +[[test]] +name = "ascii-word-underscore" +regex = '[[:word:]]' +haystack = "_" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/129 +[[test]] +name = "captures-repeat" +regex = '([a-f]){2}(?P<foo>[x-z])' +haystack = "abx" +matches = [ + [[0, 3], [1, 2], [2, 3]], +] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-100" +regex = 'ab?|$' +haystack = "az" +matches = [[0, 1], [2, 2]] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-200" +regex = '^(?:.*?)(?:\n|\r\n?|$)' +haystack = "ab\rcd" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/169 +[[test]] +name = "leftmost-first-prefix" +regex = 'z*azb' +haystack = "azb" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/191 +[[test]] +name = "many-alternates" +regex = '1|2|3|4|5|6|7|8|9|10|int' +haystack = "int" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-100" +regex = '\b' +haystack = "Should this (work?)" +matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-200" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-no-capture" +regex = '\B' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-capture" +regex = '(?:\B)' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/268 +[[test]] +name = "partial-anchor" +regex = '^a|b' +haystack = "ba" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "endl-or-word-boundary" +regex = '(?m:$)|(?-u:\b)' +haystack = "\U0006084E" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "zero-or-end" +regex = '(?i-u:\x00)|$' +haystack = "\U000E682F" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "y-or-endl" +regex = '(?i-u:y)|(?m:$)' +haystack = "\U000B4331" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-start-x" +regex = '(?u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-ascii-start-x" +regex = '(?-u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "end-not-word-boundary" +regex = '$\B' +haystack = "\U0005C124\U000B576C" +matches = [[8, 8]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-begin" +regex = '^a|z' +haystack = "yyyyya" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-end" +regex = 'a$|z' +haystack = "ayyyyy" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/289 +[[test]] +name = "lits-unambiguous-100" +regex = '(?:ABC|CDA|BC)X' +haystack = "CDAX" +matches = [[0, 4]] + +# See: https://github.com/rust-lang/regex/issues/291 +[[test]] +name = "lits-unambiguous-200" +regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$' +haystack = "CIMG2341" +matches = [ + [[0, 8], [0, 4], [], [0, 4], [4, 8]], +] + +# See: https://github.com/rust-lang/regex/issues/303 +# +# 2022-09-19: This has now been "properly" fixed in that empty character +# classes are fully supported as something that can never match. This test +# used to be marked as 'compiles = false', but now it works. +[[test]] +name = "negated-full-byte-range" +regex = '[^\x00-\xFF]' +haystack = "" +matches = [] +compiles = true +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-prefix" +regex = 'a^{2}' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-suffix" +regex = '${2}a' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-100" +regex = 'a(b*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-200" +regex = 'a(bc*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-300" +regex = '(aa$)?' +haystack = "aaz" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] + +# Plucked from "Why aren’t regular expressions a lingua franca? an empirical +# study on the re-use and portability of regular expressions", The ACM Joint +# European Software Engineering Conference and Symposium on the Foundations of +# Software Engineering (ESEC/FSE), 2019. +# +# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909 +[[test]] +name = "captures-after-dfa-premature-end-400" +regex = '(a)\d*\.?\d+\b' +haystack = "a0.0c" +matches = [ + [[0, 2], [0, 1]], +] + +# See: https://github.com/rust-lang/regex/issues/437 +[[test]] +name = "literal-panic" +regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+' +haystack = "test" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/527 +[[test]] +name = "empty-flag-expr" +regex = '(?:(?:(?x)))' +haystack = "" +matches = [[0, 0]] + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab" +#regex = '[[:blank:]]' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = false +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab-inverted" +#regex = '^[[:^blank:]]+$' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = true +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/555 +[[test]] +name = "invalid-repetition" +regex = '(?m){1,1}' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/640 +[[test]] +name = "flags-are-unset" +regex = '(?:(?i)foo)|Bar' +haystack = "foo Foo bar Bar" +matches = [[0, 3], [4, 7], [12, 15]] + +# Note that 'Ј' is not 'j', but cyrillic Je +# https://en.wikipedia.org/wiki/Je_(Cyrillic) +# +# See: https://github.com/rust-lang/regex/issues/659 +[[test]] +name = "empty-group-with-unicode" +regex = '(?:)Ј01' +haystack = 'zЈ01' +matches = [[1, 5]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-ascii" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-minimal-ascii" +regex = '\b..\b' +haystack = "az,,b" +matches = [[0, 2], [2, 4]] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-100" +regex = '[0-4][0-4][0-4]000' +haystack = "153.230000" +matches = [[4, 10]] + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-200" +regex = '[0-9][0-9][0-9]000' +haystack = "153.230000\n" +matches = [[4, 10]] + +# This is a tricky case for the reverse suffix optimization, because it +# finds the 'foobar' match but the reverse scan must fail to find a match by +# correctly dealing with the word boundary following the 'foobar' literal when +# computing the start state. +# +# This test exists because I tried to break the following assumption that +# is currently in the code: that if a suffix is found and the reverse scan +# succeeds, then it's guaranteed that there is an overall match. Namely, the +# 'is_match' routine does *not* do another forward scan in this case because of +# this assumption. +[[test]] +name = "reverse-suffix-300" +regex = '\w+foobar\b' +haystack = "xyzfoobarZ" +matches = [] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops" +regex = '\bs(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops-ascii" +regex = '(?-u:\b)s(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/850 +[[test]] +name = "adjacent-line-boundary-100" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "line1\nline2" +matches = [[0, 5], [6, 11]] + +# Continued. +[[test]] +name = "adjacent-line-boundary-200" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "A\nB" +matches = [[0, 1], [2, 3]] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-100" +regex = '^a[[:^space:]]' +haystack = "a " +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-200" +regex = '^a[[:^space:]]' +haystack = "foo boo a" +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-300" +regex = '^-[a-z]' +haystack = "r-f" +matches = [] + +# Tests that a possible Aho-Corasick optimization works correctly. It only +# kicks in when we have a lot of literals. By "works correctly," we mean that +# leftmost-first match semantics are properly respected. That is, samwise +# should match, not sam. +# +# There is no issue for this bug. +[[test]] +name = "aho-corasick-100" +regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z' +haystack = "samwise" +matches = [[0, 7]] + +# See: https://github.com/rust-lang/regex/issues/921 +[[test]] +name = "interior-anchor-capture" +regex = '(a$)b$' +haystack = 'ab' +matches = [] + +# I found this bug in the course of adding some of the regexes that Ruff uses +# to rebar. It turns out that the lazy DFA was finding a match that was being +# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack. +# +# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52 +[[test]] +name = "ruff-whitespace-around-keywords" +regex = '^(a|ab)$' +haystack = "ab" +anchored = true +unicode = false +utf8 = true +matches = [[[0, 2], [0, 2]]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-0" +regex = '(?:(?-u:\b)|(?u:h))+' +haystack = "h" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-1" +regex = '(?u:\B)' +haystack = "鋸" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-2" +regex = '(?:(?u:\b)|(?s-u:.))+' +haystack = "oB" +unicode = true +utf8 = false +matches = [[0, 0], [1, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3-utf8" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = true +matches = [[0, 0], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-4" +regex = '(?m:$)(?m:^)(?su:.)' +haystack = "\n‣" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-5" +regex = '(?m:$)^(?m:^)' +haystack = "\n" +unicode = true +utf8 = false +matches = [[0, 0]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-6" +regex = '(?P<kp>(?iu:do)(?m:$))*' +haystack = "dodo" +unicode = true +utf8 = false +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 4], [2, 4]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-7" +regex = '(?u:\B)' +haystack = "䡁" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-8" +regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-9" +regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)' +haystack = "\n\n" +unicode = true +utf8 = false +matches = [ + [[1, 2], [1, 2]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-10" +regex = '(?m:$)(?m:$)^(?su:.)' +haystack = "\n\u0081¨\u200a" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-11" +regex = '(?-u:\B)(?m:^)' +haystack = "0\n" +unicode = true +utf8 = false +matches = [[2, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-12" +regex = '(?:(?u:\b)|(?-u:.))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/969 +[[test]] +name = "i969" +regex = 'c.*d\z' +haystack = "ababcd" +bounds = [4, 6] +search-kind = "earliest" +matches = [[4, 6]] + +# I found this during the regex-automata migration. This is the fowler basic +# 154 test, but without anchored = true and without a match limit. +# +# This test caught a subtle bug in the hybrid reverse DFA search, where it +# would skip over the termination condition if it entered a start state. This +# was a double bug. Firstly, the reverse DFA shouldn't have had start states +# specialized in the first place, and thus it shouldn't have possible to detect +# that the DFA had entered a start state. The second bug was that the start +# state handling was incorrect by jumping over the termination condition. +[[test]] +name = "fowler-basic154-unanchored" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] + +# From: https://github.com/rust-lang/regex/issues/981 +# +# This was never really a problem in the new architecture because the +# regex-automata engines are far more principled about how they deal with +# look-around. (This was one of the many reasons I wanted to re-work the +# original regex crate engines.) +[[test]] +name = "word-boundary-interact-poorly-with-literal-optimizations" +regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))' +haystack = 'ubi-Darwin-x86_64.tar.gz' +matches = [] + +# This was found during fuzz testing of regex. It provoked a panic in the meta +# engine as a result of the reverse suffix optimization. Namely, it hit a case +# where a suffix match was found, a corresponding reverse match was found, but +# the forward search turned up no match. The forward search should always match +# if the suffix and reverse search match. +# +# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy +# and fully compiled) engines. It was caused by a mishandling of the collection +# of NFA state IDs in the generic determinization code (which is why both types +# of DFA were impacted). Namely, when a fail state was encountered (that's the +# `[^\s\S]` in the pattern below), then it would just stop collecting states. +# But that's not correct since a later state could lead to a match. +[[test]] +name = "impossible-branch" +regex = '.*[^\s\S]A|B' +haystack = "B" +matches = [[0, 1]] + +# This was found during fuzz testing in regex-lite. The regex crate never +# suffered from this bug, but it causes regex-lite to incorrectly compile +# captures. +[[test]] +name = "captures-wrong-order" +regex = '(a){0}(a)' +haystack = 'a' +matches = [[[0, 1], [], [0, 1]]] + +# This tests a bug in how quit states are handled in the DFA. At some point +# during development, the DFAs were tweaked slightly such that if they hit +# a quit state (which means, they hit a byte that the caller configured should +# stop the search), then it might not return an error necessarily. Namely, if a +# match had already been found, then it would be returned instead of an error. +# +# But this is actually wrong! Why? Because even though a match had been found, +# it wouldn't be fully correct to return it once a quit state has been seen +# because you can't determine whether the match offset returned is the correct +# greedy/leftmost-first match. Since you can't complete the search as requested +# by the caller, the DFA should just stop and return an error. +# +# Interestingly, this does seem to produce an unavoidable difference between +# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs. +# The former will stop immediately once a match is known to occur and return +# 'Ok(true)', where as the latter could find the match but quit with an +# 'Err(..)' first. +# +# Thankfully, I believe this inconsistency between 'is_match()' and 'find()' +# cannot be observed in the higher level meta regex API because it specifically +# will try another engine that won't fail in the case of a DFA failing. +# +# This regression happened in the regex crate rewrite, but before anything got +# released. +[[test]] +name = "negated-unicode-word-boundary-dfa-fail" +regex = '\B.*' +haystack = "!\u02D7" +matches = [[0, 3]] + +# This failure was found in the *old* regex crate (prior to regex 1.9), but +# I didn't investigate why. My best guess is that it's a literal optimization +# bug. It didn't occur in the rewrite. +[[test]] +name = "missed-match" +regex = 'e..+e.ee>' +haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>' +matches = [[1, 26]] + +# This test came from the 'ignore' crate and tripped a bug in how accelerated +# DFA states were handled in an overlapping search. +[[test]] +name = "regex-to-glob" +regex = ['(?-u)^path1/[^/]*$'] +haystack = "path1/foo" +matches = [[0, 9]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-plus-shorter-than-expected" +regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex +# to demonstrate the extent of the rot. Sigh. +# +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-short" +regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# This regression test was found via the RegexSet APIs. It triggered a +# particular code path where a regex was compiled with 'All' match semantics +# (to support overlapping search), but got funneled down into a standard +# leftmost search when calling 'is_match'. This is fine on its own, but the +# leftmost search will use a prefilter and that's where this went awry. +# +# Namely, since 'All' semantics were used, the aho-corasick prefilter was +# incorrectly compiled with 'Standard' semantics. This was wrong because +# 'Standard' immediately attempts to report a match at every position, even if +# that would mean reporting a match past the leftmost match before reporting +# the leftmost match. This breaks the prefilter contract of never having false +# negatives and leads overall to the engine not finding a match. +# +# See: https://github.com/rust-lang/regex/issues/1070 +[[test]] +name = "prefilter-with-aho-corasick-standard-semantics" +regex = '(?m)^ *v [0-9]' +haystack = 'v 0' +matches = [ + { id = 0, spans = [[0, 3]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = true +utf8 = true + +# This tests that the PikeVM and the meta regex agree on a particular regex. +# This test previously failed when the ad hoc engines inside the meta engine +# did not handle quit states correctly. Namely, the Unicode word boundary here +# combined with a non-ASCII codepoint provokes the quit state. The ad hoc +# engines were previously returning a match even after entering the quit state +# if a match had been previously detected, but this is incorrect. The reason +# is that if a quit state is found, then the search must give up *immediately* +# because it prevents the search from finding the "proper" leftmost-first +# match. If it instead returns a match that has been found, it risks reporting +# an improper match, as it did in this case. +# +# See: https://github.com/rust-lang/regex/issues/1046 +[[test]] +name = "non-prefix-literal-quit-state" +regex = '.+\b\n' +haystack = "β77\n" +matches = [[0, 5]] + +# This is a regression test for some errant HIR interval set operations that +# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The +# issue here is that the HIR produced from the regex had out-of-order ranges. +# +# See: https://github.com/rust-lang/regex/issues/1103 +# Ref: https://github.com/rust-lang/regex/pull/1051 +# Ref: https://github.com/rust-lang/regex/pull/1102 +[[test]] +name = "hir-optimization-out-of-order-class" +regex = '^[[:alnum:]./-]+$' +haystack = "a-b" +matches = [[0, 3]] + +# This is a regression test for an improper reverse suffix optimization. This +# occurred when I "broadened" the applicability of the optimization to include +# multiple possible literal suffixes instead of only sticking to a non-empty +# longest common suffix. It turns out that, at least given how the reverse +# suffix optimization works, we need to stick to the longest common suffix for +# now. +# +# See: https://github.com/rust-lang/regex/issues/1110 +# See also: https://github.com/astral-sh/ruff/pull/7980 +[[test]] +name = 'improper-reverse-suffix-optimization' +regex = '(\\N\{[^}]+})|([{}])' +haystack = 'hiya \N{snowman} bye' +matches = [[[5, 16], [5, 16], []]] diff --git a/vendor/regex/testdata/set.toml b/vendor/regex/testdata/set.toml new file mode 100644 index 0000000..049e8a8 --- /dev/null +++ b/vendor/regex/testdata/set.toml @@ -0,0 +1,641 @@ +# Basic multi-regex tests. + +[[test]] +name = "basic10" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic10-leftmost-first" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic20" +regex = ["a", "a"] +haystack = "ba" +matches = [ + { id = 0, span = [1, 2] }, + { id = 1, span = [1, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic30" +regex = ["a", "b"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic40" +regex = ["a", "b"] +haystack = "b" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic50" +regex = ["a|b", "b|a"] +haystack = "b" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60-leftmost-first" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic61" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, + { id = 0, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic61-leftmost-first" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic70" +regex = ["abcd", "bcd", "cd", "d"] +haystack = "abcd" +matches = [ + { id = 0, span = [0, 4] }, + { id = 1, span = [1, 4] }, + { id = 2, span = [2, 4] }, + { id = 3, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic71" +regex = ["bcd", "cd", "d", "abcd"] +haystack = "abcd" +matches = [ + { id = 3, span = [0, 4] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic80" +regex = ["^foo", "bar$"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic81" +regex = ["^foo", "bar$"] +haystack = "foo bar" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [4, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic82" +regex = ["^foo", "bar$"] +haystack = "bar" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic90" +regex = ["[a-z]+$", "foo"] +haystack = "01234 foo" +matches = [ + { id = 0, span = [8, 9] }, + { id = 0, span = [7, 9] }, + { id = 0, span = [6, 9] }, + { id = 1, span = [6, 9] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic91" +regex = ["[a-z]+$", "foo"] +haystack = "foo 01234" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic100" +regex = [".*?", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic101" +regex = [".*", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic102" +regex = [".*", "a"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic110" +regex = ['\ba\b'] +haystack = "hello a bye" +matches = [ + { id = 0, span = [6, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic111" +regex = ['\ba\b', '\be\b'] +haystack = "hello a bye e" +matches = [ + { id = 0, span = [6, 7] }, + { id = 1, span = [12, 13] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic120" +regex = ["a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic121" +regex = [".*a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic122" +regex = [".*a", "β"] +haystack = "β" +matches = [ + { id = 1, span = [0, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic130" +regex = ["ab", "b"] +haystack = "ba" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +# These test cases where one of the regexes matches the empty string. + +[[test]] +name = "empty10" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 1, span = [0, 1] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty10-leftmost-first" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty11" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [0, 1] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty11-leftmost-first" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty20" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty20-leftmost-first" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty21" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty21-leftmost-first" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty22" +regex = ["(?:)", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty23" +regex = ["b", "(?:)"] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30-leftmost-first" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty31" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty31-leftmost-first" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty40" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty40-leftmost-first" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +# These test cases where there are no matches. + +[[test]] +name = "nomatch10" +regex = ["a", "a"] +haystack = "b" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch20" +regex = ["^foo", "bar$"] +haystack = "bar foo" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch30" +regex = [] +haystack = "a" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch40" +regex = ["^rooted$", '\.log$'] +haystack = "notrooted" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +# These test multi-regex searches with capture groups. +# +# NOTE: I wrote these tests in the course of developing a first class API for +# overlapping capturing group matches, but ultimately removed that API because +# the semantics for overlapping matches aren't totally clear. However, I've +# left the tests because I believe the semantics for these patterns are clear +# and because we can still test our "which patterns matched" APIs with them. + +[[test]] +name = "caps-010" +regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-020" +regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [1, 5], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-030" +regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-110" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-120" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-121" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen Foo Bar" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 0, spans = [[18, 25], [18, 21], [22, 25]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/substring.toml b/vendor/regex/testdata/substring.toml new file mode 100644 index 0000000..69595ce --- /dev/null +++ b/vendor/regex/testdata/substring.toml @@ -0,0 +1,36 @@ +# These tests check that regex engines perform as expected when the search is +# instructed to only search a substring of a haystack instead of the entire +# haystack. This tends to exercise interesting edge cases that are otherwise +# difficult to provoke. (But not necessarily impossible. Regex search iterators +# for example, make use of the "search just a substring" APIs by changing the +# starting position of a search to the end position of the previous match.) + +[[test]] +name = "unicode-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [] + +[[test]] +name = "unicode-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [] + +[[test]] +name = "ascii-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [[2, 5]] +unicode = false + +[[test]] +name = "ascii-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [[0, 3]] +unicode = false diff --git a/vendor/regex/testdata/unicode.toml b/vendor/regex/testdata/unicode.toml new file mode 100644 index 0000000..f4ac76b --- /dev/null +++ b/vendor/regex/testdata/unicode.toml @@ -0,0 +1,517 @@ +# Basic Unicode literal support. +[[test]] +name = "literal1" +regex = '☃' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal2" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal3" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] +case-insensitive = true + +[[test]] +name = "literal4" +regex = 'Δ' +haystack = "δ" +matches = [[0, 2]] +case-insensitive = true + +# Unicode word boundaries. +[[test]] +name = "wb-100" +regex = '\d\b' +haystack = "6δ" +matches = [] + +[[test]] +name = "wb-200" +regex = '\d\b' +haystack = "6 " +matches = [[0, 1]] + +[[test]] +name = "wb-300" +regex = '\d\B' +haystack = "6δ" +matches = [[0, 1]] + +[[test]] +name = "wb-400" +regex = '\d\B' +haystack = "6 " +matches = [] + +# Unicode character class support. +[[test]] +name = "class1" +regex = '[☃Ⅰ]+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "class2" +regex = '\pN' +haystack = "Ⅰ" +matches = [[0, 3]] + +[[test]] +name = "class3" +regex = '\pN+' +haystack = "Ⅰ1Ⅱ2" +matches = [[0, 8]] + +[[test]] +name = "class4" +regex = '\PN+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class5" +regex = '[\PN]+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class6" +regex = '[^\PN]+' +haystack = "abⅠ" +matches = [[2, 5]] + +[[test]] +name = "class7" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 8]] + +[[test]] +name = "class8" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] +case-insensitive = true + +[[test]] +name = "class9" +regex = '\pL+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +[[test]] +name = "class10" +regex = '\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[8, 10]] + +# Unicode aware "Perl" character classes. +[[test]] +name = "perl1" +regex = '\w+' +haystack = "dδd" +matches = [[0, 4]] + +[[test]] +name = "perl2" +regex = '\w+' +haystack = "⥡" +matches = [] + +[[test]] +name = "perl3" +regex = '\W+' +haystack = "⥡" +matches = [[0, 3]] + +[[test]] +name = "perl4" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "perl5" +regex = '\d+' +haystack = "Ⅱ" +matches = [] + +[[test]] +name = "perl6" +regex = '\D+' +haystack = "Ⅱ" +matches = [[0, 3]] + +[[test]] +name = "perl7" +regex = '\s+' +haystack = " " +matches = [[0, 3]] + +[[test]] +name = "perl8" +regex = '\s+' +haystack = "☃" +matches = [] + +[[test]] +name = "perl9" +regex = '\S+' +haystack = "☃" +matches = [[0, 3]] + +# Specific tests for Unicode general category classes. +[[test]] +name = "class-gencat1" +regex = '\p{Cased_Letter}' +haystack = "A" +matches = [[0, 3]] + +[[test]] +name = "class-gencat2" +regex = '\p{Close_Punctuation}' +haystack = "❯" +matches = [[0, 3]] + +[[test]] +name = "class-gencat3" +regex = '\p{Connector_Punctuation}' +haystack = "⁀" +matches = [[0, 3]] + +[[test]] +name = "class-gencat4" +regex = '\p{Control}' +haystack = "\u009F" +matches = [[0, 2]] + +[[test]] +name = "class-gencat5" +regex = '\p{Currency_Symbol}' +haystack = "£" +matches = [[0, 3]] + +[[test]] +name = "class-gencat6" +regex = '\p{Dash_Punctuation}' +haystack = "〰" +matches = [[0, 3]] + +[[test]] +name = "class-gencat7" +regex = '\p{Decimal_Number}' +haystack = "𑓙" +matches = [[0, 4]] + +[[test]] +name = "class-gencat8" +regex = '\p{Enclosing_Mark}' +haystack = "\uA672" +matches = [[0, 3]] + +[[test]] +name = "class-gencat9" +regex = '\p{Final_Punctuation}' +haystack = "⸡" +matches = [[0, 3]] + +[[test]] +name = "class-gencat10" +regex = '\p{Format}' +haystack = "\U000E007F" +matches = [[0, 4]] + +[[test]] +name = "class-gencat11" +regex = '\p{Initial_Punctuation}' +haystack = "⸜" +matches = [[0, 3]] + +[[test]] +name = "class-gencat12" +regex = '\p{Letter}' +haystack = "Έ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat13" +regex = '\p{Letter_Number}' +haystack = "ↂ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat14" +regex = '\p{Line_Separator}' +haystack = "\u2028" +matches = [[0, 3]] + +[[test]] +name = "class-gencat15" +regex = '\p{Lowercase_Letter}' +haystack = "ϛ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat16" +regex = '\p{Mark}' +haystack = "\U000E01EF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat17" +regex = '\p{Math}' +haystack = "⋿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat18" +regex = '\p{Modifier_Letter}' +haystack = "𖭃" +matches = [[0, 4]] + +[[test]] +name = "class-gencat19" +regex = '\p{Modifier_Symbol}' +haystack = "🏿" +matches = [[0, 4]] + +[[test]] +name = "class-gencat20" +regex = '\p{Nonspacing_Mark}' +haystack = "\U0001E94A" +matches = [[0, 4]] + +[[test]] +name = "class-gencat21" +regex = '\p{Number}' +haystack = "⓿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat22" +regex = '\p{Open_Punctuation}' +haystack = "⦅" +matches = [[0, 3]] + +[[test]] +name = "class-gencat23" +regex = '\p{Other}' +haystack = "\u0BC9" +matches = [[0, 3]] + +[[test]] +name = "class-gencat24" +regex = '\p{Other_Letter}' +haystack = "ꓷ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat25" +regex = '\p{Other_Number}' +haystack = "㉏" +matches = [[0, 3]] + +[[test]] +name = "class-gencat26" +regex = '\p{Other_Punctuation}' +haystack = "𞥞" +matches = [[0, 4]] + +[[test]] +name = "class-gencat27" +regex = '\p{Other_Symbol}' +haystack = "⅌" +matches = [[0, 3]] + +[[test]] +name = "class-gencat28" +regex = '\p{Paragraph_Separator}' +haystack = "\u2029" +matches = [[0, 3]] + +[[test]] +name = "class-gencat29" +regex = '\p{Private_Use}' +haystack = "\U0010FFFD" +matches = [[0, 4]] + +[[test]] +name = "class-gencat30" +regex = '\p{Punctuation}' +haystack = "𑁍" +matches = [[0, 4]] + +[[test]] +name = "class-gencat31" +regex = '\p{Separator}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-gencat32" +regex = '\p{Space_Separator}' +haystack = "\u205F" +matches = [[0, 3]] + +[[test]] +name = "class-gencat33" +regex = '\p{Spacing_Mark}' +haystack = "\U00016F7E" +matches = [[0, 4]] + +[[test]] +name = "class-gencat34" +regex = '\p{Symbol}' +haystack = "⯈" +matches = [[0, 3]] + +[[test]] +name = "class-gencat35" +regex = '\p{Titlecase_Letter}' +haystack = "ῼ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat36" +regex = '\p{Unassigned}' +haystack = "\U0010FFFF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat37" +regex = '\p{Uppercase_Letter}' +haystack = "Ꝋ" +matches = [[0, 3]] + + +# Tests for Unicode emoji properties. +[[test]] +name = "class-emoji1" +regex = '\p{Emoji}' +haystack = "\u23E9" +matches = [[0, 3]] + +[[test]] +name = "class-emoji2" +regex = '\p{emoji}' +haystack = "\U0001F21A" +matches = [[0, 4]] + +[[test]] +name = "class-emoji3" +regex = '\p{extendedpictographic}' +haystack = "\U0001FA6E" +matches = [[0, 4]] + +[[test]] +name = "class-emoji4" +regex = '\p{extendedpictographic}' +haystack = "\U0001FFFD" +matches = [[0, 4]] + + +# Tests for Unicode grapheme cluster properties. +[[test]] +name = "class-gcb1" +regex = '\p{grapheme_cluster_break=prepend}' +haystack = "\U00011D46" +matches = [[0, 4]] + +[[test]] +name = "class-gcb2" +regex = '\p{gcb=regional_indicator}' +haystack = "\U0001F1E6" +matches = [[0, 4]] + +[[test]] +name = "class-gcb3" +regex = '\p{gcb=ri}' +haystack = "\U0001F1E7" +matches = [[0, 4]] + +[[test]] +name = "class-gcb4" +regex = '\p{regionalindicator}' +haystack = "\U0001F1FF" +matches = [[0, 4]] + +[[test]] +name = "class-gcb5" +regex = '\p{gcb=lvt}' +haystack = "\uC989" +matches = [[0, 3]] + +[[test]] +name = "class-gcb6" +regex = '\p{gcb=zwj}' +haystack = "\u200D" +matches = [[0, 3]] + +# Tests for Unicode word boundary properties. +[[test]] +name = "class-word-break1" +regex = '\p{word_break=Hebrew_Letter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break2" +regex = '\p{wb=hebrewletter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break3" +regex = '\p{wb=ExtendNumLet}' +haystack = "\uFF3F" +matches = [[0, 3]] + +[[test]] +name = "class-word-break4" +regex = '\p{wb=WSegSpace}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-word-break5" +regex = '\p{wb=numeric}' +haystack = "\U0001E950" +matches = [[0, 4]] + +# Tests for Unicode sentence boundary properties. +[[test]] +name = "class-sentence-break1" +regex = '\p{sentence_break=Lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break2" +regex = '\p{sb=lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break3" +regex = '\p{sb=Close}' +haystack = "\uFF60" +matches = [[0, 3]] + +[[test]] +name = "class-sentence-break4" +regex = '\p{sb=Close}' +haystack = "\U0001F677" +matches = [[0, 4]] + +[[test]] +name = "class-sentence-break5" +regex = '\p{sb=SContinue}' +haystack = "\uFF64" +matches = [[0, 3]] diff --git a/vendor/regex/testdata/utf8.toml b/vendor/regex/testdata/utf8.toml new file mode 100644 index 0000000..39e284b --- /dev/null +++ b/vendor/regex/testdata/utf8.toml @@ -0,0 +1,399 @@ +# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is +# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior +# is unspecified.) This also corresponds to building the regex engine with the +# following two guarantees: +# +# 1) For any non-empty match reported, its span is guaranteed to correspond to +# valid UTF-8. +# 2) All empty or zero-width matches reported must never split a UTF-8 +# encoded codepoint. If the haystack has invalid UTF-8, then this results in +# unspecified behavior. +# +# The (2) is in particular what we focus our testing on since (1) is generally +# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there. +# The thing with (2) is that it can't be described in the HIR, so the regex +# engines have to handle that case. Thus, we test it here. +# +# Note that it is possible to build a regex that has property (1) but not +# (2), and vice versa. This is done by building the HIR with 'utf8=true' but +# building the Thompson NFA with 'utf8=false'. We don't test that here because +# the harness doesn't expose a way to enable or disable UTF-8 mode with that +# granularity. Instead, those combinations are lightly tested via doc examples. +# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it +# because it cannot guarantee that its haystack is valid UTF-8. + +# This tests that an empty regex doesn't split a codepoint. +[[test]] +name = "empty-utf8yes" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex DOES split a codepoint when utf=false. +[[test]] +name = "empty-utf8no" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex doesn't split a codepoint, even if we give +# it bounds entirely within the codepoint. +# +# This is one of the trickier cases and is what motivated the current UTF-8 +# mode design. In particular, at one point, this test failed the 'is_match' +# variant of the test but not 'find'. This is because the 'is_match' code path +# is specifically optimized for "was a match found" rather than "where is the +# match." In the former case, you don't really care about the empty-vs-non-empty +# matches, and thus, the codepoint splitting filtering logic wasn't getting +# applied. (In multiple ways across multiple regex engines.) In this way, you +# can wind up with a situation where 'is_match' says "yes," but 'find' says, +# "I didn't find anything." Which is... not great. +# +# I could have decided to say that providing boundaries that themselves split +# a codepoint would have unspecified behavior. But I couldn't quite convince +# myself that such boundaries were the only way to get an inconsistency between +# 'is_match' and 'find'. +# +# Note that I also tried to come up with a test like this that fails without +# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree. +# But I couldn't do it, and I'm tempted to conclude it is impossible. The +# fundamental problem is that you need to simultaneously produce an empty match +# that splits a codepoint while *not* matching before or after the codepoint. +[[test]] +name = "empty-utf8yes-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex splits a codepoint when the bounds are +# entirely within the codepoint. +[[test]] +name = "empty-utf8no-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search. Since the start position is also a UTF-8 +# boundary, we get a match. +[[test]] +name = "empty-utf8yes-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. It almost doesn't change the +# result, except for the fact that since this is an anchored search and we +# always find all matches, the test harness will keep reporting matches until +# none are found. Because it's anchored, matches will be reported so long as +# they are directly adjacent. Since with UTF-8 mode the next anchored search +# after the match at [0, 0] fails, iteration stops (and doesn't find the last +# match at [4, 4]). +[[test]] +name = "empty-utf8no-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search, but also set bounds. The bounds start the +# search in the middle of a codepoint, so there should never be a match. +[[test]] +name = "empty-utf8yes-anchored-bounds" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled, +# matching within a codepoint is allowed. And remember, as in the anchored test +# above with UTF-8 mode disabled, iteration will report all adjacent matches. +# The matches at [0, 0] and [4, 4] are not included because of the bounds of +# the search. +[[test]] +name = "empty-utf8no-anchored-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the string when the bounds +# exclude the first match. +[[test]] +name = "empty-utf8yes-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. +[[test]] +name = "empty-utf8no-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we don't find any matches in an anchored search, even when +# the bounds include a match (at the end). +[[test]] +name = "empty-utf8yes-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. Even though this is an anchored search, +# since the matches are adjacent, we find all of them. +[[test]] +name = "empty-utf8no-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the haystack in UTF-8 mode +# when our bounds only include the empty string at the end of the haystack. +[[test]] +name = "empty-utf8yes-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, but with UTF-8 mode disabled. Results remain the same since +# the only possible match does not split a codepoint. +[[test]] +name = "empty-utf8no-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" diff --git a/vendor/regex/testdata/word-boundary-special.toml b/vendor/regex/testdata/word-boundary-special.toml new file mode 100644 index 0000000..2b5a2a0 --- /dev/null +++ b/vendor/regex/testdata/word-boundary-special.toml @@ -0,0 +1,687 @@ +# These tests are for the "special" word boundary assertions. That is, +# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty +# assertions for more niche use cases, but hitting those cases without these +# assertions is difficult. For example, \b{start-half} and \b{end-half} are +# used to implement the -w/--word-regexp flag in a grep program. + +# Tests for (?-u:\b{start}) + +[[test]] +name = "word-start-ascii-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-start-ascii-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[4, 4]] +unicode = false + +[[test]] +name = "word-start-ascii-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = false + +# Tests for (?-u:\b{end}) + +[[test]] +name = "word-end-ascii-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = false + +[[test]] +name = "word-end-ascii-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[1, 1]] +unicode = false + +# Tests for \b{start} + +[[test]] +name = "word-start-unicode-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end} + +[[test]] +name = "word-end-unicode-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Tests for (?-u:\b{start-half}) + +[[test]] +name = "word-start-half-ascii-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "word-start-half-ascii-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = false + +[[test]] +name = "word-start-half-ascii-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060-noutf8" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +[[test]] +name = "word-start-half-ascii-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-half-ascii-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-start-half-ascii-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-start-half-ascii-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0], [5, 5]] +unicode = false + +# Tests for (?-u:\b{end-half}) + +[[test]] +name = "word-end-half-ascii-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-half-ascii-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = false + +[[test]] +name = "word-end-half-ascii-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-end-half-ascii-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-half-ascii-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-end-half-ascii-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-end-half-ascii-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[0, 0], [5, 5]] +unicode = false + +[[test]] +name = "word-end-half-ascii-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[1, 1], [5, 5]] +unicode = false + +# Tests for \b{start-half} + +[[test]] +name = "word-start-half-unicode-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = true + +[[test]] +name = "word-start-half-unicode-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = true + +[[test]] +name = "word-start-half-unicode-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-half-unicode-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [6, 6]] +unicode = true + +[[test]] +name = "word-start-half-unicode-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [8, 8]] +unicode = true + +[[test]] +name = "word-start-half-unicode-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end-half} + +[[test]] +name = "word-end-half-unicode-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-half-unicode-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = true + +[[test]] +name = "word-end-half-unicode-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-end-half-unicode-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-half-unicode-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [5, 5], [6, 6]] +unicode = true + +[[test]] +name = "word-end-half-unicode-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[4, 4], [8, 8]] +unicode = true + +[[test]] +name = "word-end-half-unicode-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-half-unicode-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Specialty tests. + +# Since \r is special cased in the start state computation (to deal with CRLF +# mode), this test ensures that the correct start state is computed when the +# pattern starts with a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-carriage" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\rabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Since \n is also special cased in the start state computation, this test +# ensures that the correct start state is computed when the pattern starts with +# a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-linefeed" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\nabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Like the carriage return test above, but with a custom line terminator. +[[test]] +name = "word-start-half-ascii-customlineterm" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC!abc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true +line-terminator = '!' diff --git a/vendor/regex/testdata/word-boundary.toml b/vendor/regex/testdata/word-boundary.toml new file mode 100644 index 0000000..1d86fc9 --- /dev/null +++ b/vendor/regex/testdata/word-boundary.toml @@ -0,0 +1,781 @@ +# Some of these are cribbed from RE2's test suite. + +# These test \b. Below are tests for \B. +[[test]] +name = "wb1" +regex = '\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb2" +regex = '\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb3" +regex = '\b' +haystack = "ab" +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "wb4" +regex = '^\b' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb5" +regex = '\b$' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "wb6" +regex = '^\b$' +haystack = "ab" +matches = [] +unicode = false + +[[test]] +name = "wb7" +regex = '\bbar\b' +haystack = "nobar bar foo bar" +matches = [[6, 9], [14, 17]] +unicode = false + +[[test]] +name = "wb8" +regex = 'a\b' +haystack = "faoa x" +matches = [[3, 4]] +unicode = false + +[[test]] +name = "wb9" +regex = '\bbar' +haystack = "bar x" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb10" +regex = '\bbar' +haystack = "foo\nbar x" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb11" +regex = 'bar\b' +haystack = "foobar" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb12" +regex = 'bar\b' +haystack = "foobar\nxxx" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb13" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb14" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb15" +regex = '\b(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb16" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "X" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "wb17" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "XY" +matches = [] +unicode = false + +[[test]] +name = "wb18" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "bar" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb19" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb20" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb21" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "ffoo bbar N x" +matches = [[10, 11]] +unicode = false + +[[test]] +name = "wb22" +regex = '\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb23" +regex = '\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb24" +regex = '\b\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb25" +regex = '\b\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb26" +regex = '\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb27" +regex = '\b$' +haystack = "x" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "wb28" +regex = '\b$' +haystack = "y x" +matches = [[3, 3]] +unicode = false + +[[test]] +name = "wb29" +regex = '(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb30" +regex = '^\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb31" +regex = '^\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb32" +regex = '^\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb33" +regex = '^\b$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb34" +regex = '^(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb35" +regex = '^(?-u:\b).(?-u:\b)$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb36" +regex = '^^^^^\b$$$$$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb37" +regex = '^^^^^(?-u:\b).$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb38" +regex = '^^^^^\b$$$$$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb39" +regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb40" +regex = '(?-u:\b).+(?-u:\b)' +haystack = "$$abc$$" +matches = [[2, 5]] + +[[test]] +name = "wb41" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false + +[[test]] +name = "wb42" +regex = '\bfoo\b' +haystack = "zzz foo zzz" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb43" +regex = '\b^' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb44" +regex = '$\b' +haystack = "ab" +matches = [[2, 2]] +unicode = false + + +# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we +# have to disable it for most of these tests. This is because \B can match at +# non-UTF-8 boundaries. +[[test]] +name = "nb1" +regex = '\Bfoo\B' +haystack = "n foo xfoox that" +matches = [[7, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb2" +regex = 'a\B' +haystack = "faoa x" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb3" +regex = '\Bbar' +haystack = "bar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb4" +regex = '\Bbar' +haystack = "foo\nbar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb5" +regex = 'bar\B' +haystack = "foobar" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb6" +regex = 'bar\B' +haystack = "foobar\nxxx" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb7" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foox" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb8" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb9" +regex = '\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb10" +regex = '\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb11" +regex = '\B(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb12" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xXy" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb13" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XY" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb14" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XYZ" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb15" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "abara" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb16" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo_" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb17" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb18" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "foo bar vNX" +matches = [[9, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb19" +regex = '\B(?:fo|foo)\B' +haystack = "xfoo" +matches = [[1, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb20" +regex = '\B(?:foo|fo)\B' +haystack = "xfooo" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb21" +regex = '\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb22" +regex = '\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb23" +regex = '\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb24" +regex = '\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb25" +regex = '\B$' +haystack = "y x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb26" +regex = '\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb27" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb28" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb29" +regex = '^\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb30" +regex = '^\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb31" +regex = '^\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb32" +regex = '^\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb33" +regex = '^\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb34" +regex = '^\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb35" +regex = '^\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb36" +regex = '^\B.\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb37" +regex = '^^^^^\B$$$$$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb38" +regex = '^^^^^\B.$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb39" +regex = '^^^^^\B$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + + +# unicode1* and unicode2* work for both Unicode and ASCII because all matches +# are reported as byte offsets, and « and » do not correspond to word +# boundaries at either the character or byte level. +[[test]] +name = "unicode1" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] + +[[test]] +name = "unicode1-only-ascii" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode2" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] + +[[test]] +name = "unicode2-only-ascii" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] +unicode = false + +# ASCII word boundaries are completely oblivious to Unicode characters, so +# even though β is a character, an ASCII \b treats it as a word boundary +# when it is adjacent to another ASCII character. (The ASCII \b only looks +# at the leading byte of β.) For Unicode \b, the tests are precisely inverted. +[[test]] +name = "unicode3" +regex = '\bx\b' +haystack = 'áxβ' +matches = [] + +[[test]] +name = "unicode3-only-ascii" +regex = '\bx\b' +haystack = 'áxβ' +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode4" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [[2, 3]] + +[[test]] +name = "unicode4-only-ascii" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [] +unicode = false +utf8 = false + +# The same as above, but with \b instead of \B as a sanity check. +[[test]] +name = "unicode5" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "unicode5-only-ascii" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] +unicode = false +utf8 = false + +[[test]] +name = "unicode5-noutf8" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +utf8 = false + +[[test]] +name = "unicode5-noutf8-only-ascii" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +unicode = false +utf8 = false + +# Weird special case to ensure that ASCII \B treats each individual code unit +# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary +# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the +# \w character class.) +[[test]] +name = "unicode5-not" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[5, 5]] + +[[test]] +name = "unicode5-not-only-ascii" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false + +# This gets no matches since \B only matches in the presence of valid UTF-8 +# when Unicode is enabled, even when UTF-8 mode is disabled. +[[test]] +name = "unicode5-not-noutf8" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [] +unescape = true +utf8 = false + +# But this DOES get matches since \B in ASCII mode only looks at individual +# bytes. +[[test]] +name = "unicode5-not-noutf8-only-ascii" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unescape = true +unicode = false +utf8 = false + +# Some tests of no particular significance. +[[test]] +name = "unicode6" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456 quux 789" +matches = [[4, 7], [12, 15], [21, 24]] + +[[test]] +name = "unicode7" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar a456 quux 789" +matches = [[4, 7], [22, 25]] + +[[test]] +name = "unicode8" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456a quux 789" +matches = [[4, 7], [22, 25]] + +# A variant of the problem described here: +# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667 +[[test]] +name = "alt-with-assertion-repetition" +regex = '(?:\b|%)+' +haystack = "z%" +bounds = [1, 2] +anchored = true +matches = [[1, 1]] diff --git a/vendor/regex/tests/api.rs b/vendor/regex/tests/api.rs deleted file mode 100644 index c7250a8..0000000 --- a/vendor/regex/tests/api.rs +++ /dev/null @@ -1,234 +0,0 @@ -#[test] -fn empty_regex_empty_match() { - let re = regex!(""); - assert_eq!(vec![(0, 0)], findall!(re, "")); -} - -#[test] -fn empty_regex_nonempty_match() { - let re = regex!(""); - assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc")); -} - -#[test] -fn one_zero_length_match() { - let re = regex!(r"[0-9]*"); - assert_eq!(vec![(0, 0), (1, 2), (3, 4)], findall!(re, "a1b2")); -} - -#[test] -fn many_zero_length_match() { - let re = regex!(r"[0-9]*"); - assert_eq!( - vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)], - findall!(re, "a1bbb2") - ); -} - -#[test] -fn many_sequential_zero_length_match() { - let re = regex!(r"[0-9]?"); - assert_eq!( - vec![(0, 0), (1, 2), (2, 3), (4, 5), (6, 6)], - findall!(re, "a12b3c") - ); -} - -#[test] -fn quoted_bracket_set() { - let re = regex!(r"([\x{5b}\x{5d}])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); - let re = regex!(r"([\[\]])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn first_range_starts_with_left_bracket() { - let re = regex!(r"([\[-z])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn range_ends_with_escape() { - let re = regex!(r"([\[-\x{5d}])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn empty_match_find_iter() { - let re = regex!(r".*?"); - assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc")); -} - -#[test] -fn empty_match_captures_iter() { - let re = regex!(r".*?"); - let ms: Vec<_> = re - .captures_iter(text!("abc")) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]); -} - -#[test] -fn capture_names() { - let re = regex!(r"(.)(?P<a>.)"); - assert_eq!(3, re.captures_len()); - assert_eq!((3, Some(3)), re.capture_names().size_hint()); - assert_eq!( - vec![None, None, Some("a")], - re.capture_names().collect::<Vec<_>>() - ); -} - -#[test] -fn regex_string() { - assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str()); - assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+"))); - assert_eq!(r"[a-zA-Z0-9]+", &format!("{:?}", regex!(r"[a-zA-Z0-9]+"))); -} - -#[test] -fn capture_index() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - assert_eq!(&cap[0], t!("abc")); - assert_eq!(&cap[1], t!("abc")); - assert_eq!(&cap["name"], t!("abc")); -} - -#[test] -#[should_panic] -#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)] -fn capture_index_panic_usize() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - let _ = cap[2]; -} - -#[test] -#[should_panic] -#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)] -fn capture_index_panic_name() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - let _ = cap["bad name"]; -} - -#[test] -fn capture_index_lifetime() { - // This is a test of whether the types on `caps["..."]` are general - // enough. If not, this will fail to typecheck. - fn inner(s: &str) -> usize { - let re = regex!(r"(?P<number>[0-9]+)"); - let caps = re.captures(t!(s)).unwrap(); - caps["number"].len() - } - assert_eq!(3, inner("123")); -} - -#[test] -fn capture_misc() { - let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); - let cap = re.captures(t!("abc")).unwrap(); - - assert_eq!(5, cap.len()); - - assert_eq!((0, 3), { - let m = cap.get(0).unwrap(); - (m.start(), m.end()) - }); - assert_eq!(None, cap.get(2)); - assert_eq!((2, 3), { - let m = cap.get(4).unwrap(); - (m.start(), m.end()) - }); - - assert_eq!(t!("abc"), match_text!(cap.get(0).unwrap())); - assert_eq!(None, cap.get(2)); - assert_eq!(t!("c"), match_text!(cap.get(4).unwrap())); - - assert_eq!(None, cap.name("a")); - assert_eq!(t!("c"), match_text!(cap.name("b").unwrap())); -} - -#[test] -fn sub_capture_matches() { - let re = regex!(r"([a-z])(([a-z])|([0-9]))"); - let cap = re.captures(t!("a5")).unwrap(); - let subs: Vec<_> = cap.iter().collect(); - - assert_eq!(5, subs.len()); - assert!(subs[0].is_some()); - assert!(subs[1].is_some()); - assert!(subs[2].is_some()); - assert!(subs[3].is_none()); - assert!(subs[4].is_some()); - - assert_eq!(t!("a5"), match_text!(subs[0].unwrap())); - assert_eq!(t!("a"), match_text!(subs[1].unwrap())); - assert_eq!(t!("5"), match_text!(subs[2].unwrap())); - assert_eq!(t!("5"), match_text!(subs[4].unwrap())); -} - -expand!(expand1, r"(?-u)(?P<foo>\w+)", "abc", "$foo", "abc"); -expand!(expand2, r"(?-u)(?P<foo>\w+)", "abc", "$0", "abc"); -expand!(expand3, r"(?-u)(?P<foo>\w+)", "abc", "$1", "abc"); -expand!(expand4, r"(?-u)(?P<foo>\w+)", "abc", "$$1", "$1"); -expand!(expand5, r"(?-u)(?P<foo>\w+)", "abc", "$$foo", "$foo"); -expand!(expand6, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$b$a", "123abc"); -expand!(expand7, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "z$bz$az", "z"); -expand!( - expand8, - r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", - "abc 123", - ".$b.$a.", - ".123.abc." -); -expand!( - expand9, - r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", - "abc 123", - " $b $a ", - " 123 abc " -); -expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", ""); - -expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%"); -expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc["); -expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{"); -expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}"); -expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%"); -expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%"); -expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc["); -expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "["); -expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "["); -expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "["); -expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "["); - -split!( - split1, - r"(?-u)\s+", - "a b\nc\td\n\t e", - &[t!("a"), t!("b"), t!("c"), t!("d"), t!("e")] -); -split!( - split2, - r"(?-u)\b", - "a b c", - &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c"), t!("")] -); -split!(split3, r"a$", "a", &[t!(""), t!("")]); -split!(split_none, r"-", r"a", &[t!("a")]); -split!(split_trailing_blank, r"-", r"a-", &[t!("a"), t!("")]); -split!(split_trailing_blanks, r"-", r"a--", &[t!("a"), t!(""), t!("")]); -split!(split_empty, r"-", r"", &[t!("")]); - -splitn!(splitn_below_limit, r"-", r"a", 2, &[t!("a")]); -splitn!(splitn_at_limit, r"-", r"a-b", 2, &[t!("a"), t!("b")]); -splitn!(splitn_above_limit, r"-", r"a-b-c", 2, &[t!("a"), t!("b-c")]); -splitn!(splitn_zero_limit, r"-", r"a-b", 0, empty_vec!()); -splitn!(splitn_trailing_blank, r"-", r"a-", 2, &[t!("a"), t!("")]); -splitn!(splitn_trailing_separator, r"-", r"a--", 2, &[t!("a"), t!("-")]); -splitn!(splitn_empty, r"-", r"", 1, &[t!("")]); diff --git a/vendor/regex/tests/api_str.rs b/vendor/regex/tests/api_str.rs deleted file mode 100644 index 480116d..0000000 --- a/vendor/regex/tests/api_str.rs +++ /dev/null @@ -1,34 +0,0 @@ -// These tests don't really make sense with the bytes API, so we only test them -// on the Unicode API. - -#[test] -fn empty_match_unicode_find_iter() { - // Tests that we still yield byte ranges at valid UTF-8 sequence boundaries - // even when we're susceptible to empty width matches. - let re = regex!(r".*?"); - assert_eq!( - vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], - findall!(re, "Ⅰ1Ⅱ2") - ); -} - -#[test] -fn empty_match_unicode_captures_iter() { - // Same as empty_match_unicode_find_iter, but tests capture iteration. - let re = regex!(r".*?"); - let ms: Vec<_> = re - .captures_iter(text!("Ⅰ1Ⅱ2")) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!(vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], ms); -} - -#[test] -fn match_as_str() { - let re = regex!(r"fo+"); - let caps = re.captures("barfoobar").unwrap(); - assert_eq!(caps.get(0).map(|m| m.as_str()), Some("foo")); - assert_eq!(caps.get(0).map(From::from), Some("foo")); - assert_eq!(caps.get(0).map(Into::into), Some("foo")); -} diff --git a/vendor/regex/tests/bytes.rs b/vendor/regex/tests/bytes.rs deleted file mode 100644 index d05f138..0000000 --- a/vendor/regex/tests/bytes.rs +++ /dev/null @@ -1,107 +0,0 @@ -// These are tests specifically crafted for regexes that can match arbitrary -// bytes. - -// A silly wrapper to make it possible to write and match raw bytes. -struct R<'a>(&'a [u8]); -impl<'a> R<'a> { - fn as_bytes(&self) -> &'a [u8] { - self.0 - } -} - -mat!(word_boundary, r"(?-u) \b", " δ", None); -#[cfg(feature = "unicode-perl")] -mat!(word_boundary_unicode, r" \b", " δ", Some((0, 1))); -mat!(word_not_boundary, r"(?-u) \B", " δ", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(word_not_boundary_unicode, r" \B", " δ", None); - -mat!(perl_w_ascii, r"(?-u)\w+", "aδ", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_w_unicode, r"\w+", "aδ", Some((0, 3))); -mat!(perl_d_ascii, r"(?-u)\d+", "1२३9", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_d_unicode, r"\d+", "1२३9", Some((0, 8))); -mat!(perl_s_ascii, r"(?-u)\s+", " \u{1680}", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_s_unicode, r"\s+", " \u{1680}", Some((0, 4))); - -// The first `(.+)` matches two Unicode codepoints, but can't match the 5th -// byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and -// matches. -mat!( - mixed1, - r"(.+)(?-u)(.+)", - R(b"\xCE\x93\xCE\x94\xFF"), - Some((0, 5)), - Some((0, 4)), - Some((4, 5)) -); - -mat!(case_ascii_one, r"(?i-u)a", "A", Some((0, 1))); -mat!(case_ascii_class, r"(?i-u)[a-z]+", "AaAaA", Some((0, 5))); -#[cfg(feature = "unicode-case")] -mat!(case_unicode, r"(?i)[a-z]+", "aA\u{212A}aA", Some((0, 7))); -mat!(case_not_unicode, r"(?i-u)[a-z]+", "aA\u{212A}aA", Some((0, 2))); - -mat!(negate_unicode, r"[^a]", "δ", Some((0, 2))); -mat!(negate_not_unicode, r"(?-u)[^a]", "δ", Some((0, 1))); - -// This doesn't match in a normal Unicode regex because the implicit preceding -// `.*?` is Unicode aware. -mat!(dotstar_prefix_not_unicode1, r"(?-u)a", R(b"\xFFa"), Some((1, 2))); -mat!(dotstar_prefix_not_unicode2, r"a", R(b"\xFFa"), Some((1, 2))); - -// Have fun with null bytes. -mat!( - null_bytes, - r"(?-u)(?P<cstr>[^\x00]+)\x00", - R(b"foo\x00"), - Some((0, 4)), - Some((0, 3)) -); - -// Test that lookahead operators work properly in the face of invalid UTF-8. -// See: https://github.com/rust-lang/regex/issues/277 -matiter!( - invalidutf8_anchor1, - r"(?-u)\xcc?^", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (0, 0) -); -matiter!( - invalidutf8_anchor2, - r"(?-u)^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (22, 22) -); -matiter!( - invalidutf8_anchor3, - r"(?-u)^|ddp\xff\xffdddddlQd@\x80", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (0, 0) -); - -// See https://github.com/rust-lang/regex/issues/303 -#[test] -fn negated_full_byte_range() { - assert!(::regex::bytes::Regex::new(r#"(?-u)[^\x00-\xff]"#).is_err()); -} - -matiter!(word_boundary_ascii1, r"(?-u:\B)x(?-u:\B)", "áxβ"); -matiter!( - word_boundary_ascii2, - r"(?-u:\B)", - "0\u{7EF5E}", - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// See: https://github.com/rust-lang/regex/issues/264 -mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0))); -mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0))); - -// See: https://github.com/rust-lang/regex/issues/271 -mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8))); diff --git a/vendor/regex/tests/consistent.rs b/vendor/regex/tests/consistent.rs deleted file mode 100644 index 722f2a5..0000000 --- a/vendor/regex/tests/consistent.rs +++ /dev/null @@ -1,238 +0,0 @@ -use regex::internal::ExecBuilder; - -/// Given a regex, check if all of the backends produce the same -/// results on a number of different inputs. -/// -/// For now this just throws quickcheck at the problem, which -/// is not very good because it only really tests half of the -/// problem space. It is pretty unlikely that a random string -/// will match any given regex, so this will probably just -/// be checking that the different backends fail in the same -/// way. This is still worthwhile to test, but is definitely not -/// the whole story. -/// -/// TODO(ethan): In order to cover the other half of the problem -/// space, we should generate a random matching string by inspecting -/// the AST of the input regex. The right way to do this probably -/// involves adding a custom Arbitrary instance around a couple -/// of newtypes. That way we can respect the quickcheck size hinting -/// and shrinking and whatnot. -pub fn backends_are_consistent(re: &str) -> Result<u64, String> { - let standard_backends = vec![ - ( - "bounded_backtracking_re", - ExecBuilder::new(re) - .bounded_backtracking() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_re", - ExecBuilder::new(re) - .nfa() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_re", - ExecBuilder::new(re) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - let utf8bytes_backends = vec![ - ( - "bounded_backtracking_utf8bytes_re", - ExecBuilder::new(re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_utf8bytes_re", - ExecBuilder::new(re) - .nfa() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_utf8bytes_re", - ExecBuilder::new(re) - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - let bytes_backends = vec![ - ( - "bounded_backtracking_bytes_re", - ExecBuilder::new(re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_bytes_re", - ExecBuilder::new(re) - .nfa() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_bytes_re", - ExecBuilder::new(re) - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - Ok(string_checker::check_backends(&standard_backends)? - + string_checker::check_backends(&utf8bytes_backends)? - + bytes_checker::check_backends(&bytes_backends)?) -} - -// -// A consistency checker parameterized by the input type (&str or &[u8]). -// - -macro_rules! checker { - ($module_name:ident, $regex_type:path, $mk_input:expr) => { - mod $module_name { - use quickcheck; - use quickcheck::{Arbitrary, TestResult}; - - pub fn check_backends( - backends: &[(&str, $regex_type)], - ) -> Result<u64, String> { - let mut total_passed = 0; - for regex in backends[1..].iter() { - total_passed += quickcheck_regex_eq(&backends[0], regex)?; - } - - Ok(total_passed) - } - - fn quickcheck_regex_eq( - &(name1, ref re1): &(&str, $regex_type), - &(name2, ref re2): &(&str, $regex_type), - ) -> Result<u64, String> { - quickcheck::QuickCheck::new() - .quicktest(RegexEqualityTest::new( - re1.clone(), - re2.clone(), - )) - .map_err(|err| { - format!( - "{}(/{}/) and {}(/{}/) are inconsistent.\ - QuickCheck Err: {:?}", - name1, re1, name2, re2, err - ) - }) - } - - struct RegexEqualityTest { - re1: $regex_type, - re2: $regex_type, - } - impl RegexEqualityTest { - fn new(re1: $regex_type, re2: $regex_type) -> Self { - RegexEqualityTest { re1: re1, re2: re2 } - } - } - - impl quickcheck::Testable for RegexEqualityTest { - fn result(&self, gen: &mut quickcheck::Gen) -> TestResult { - let input = $mk_input(gen); - let input = &input; - - if self.re1.find(&input) != self.re2.find(input) { - return TestResult::error(format!( - "find mismatch input={:?}", - input - )); - } - - let cap1 = self.re1.captures(input); - let cap2 = self.re2.captures(input); - match (cap1, cap2) { - (None, None) => {} - (Some(cap1), Some(cap2)) => { - for (c1, c2) in cap1.iter().zip(cap2.iter()) { - if c1 != c2 { - return TestResult::error(format!( - "captures mismatch input={:?}", - input - )); - } - } - } - _ => { - return TestResult::error(format!( - "captures mismatch input={:?}", - input - )) - } - } - - let fi1 = self.re1.find_iter(input); - let fi2 = self.re2.find_iter(input); - for (m1, m2) in fi1.zip(fi2) { - if m1 != m2 { - return TestResult::error(format!( - "find_iter mismatch input={:?}", - input - )); - } - } - - let ci1 = self.re1.captures_iter(input); - let ci2 = self.re2.captures_iter(input); - for (cap1, cap2) in ci1.zip(ci2) { - for (c1, c2) in cap1.iter().zip(cap2.iter()) { - if c1 != c2 { - return TestResult::error(format!( - "captures_iter mismatch input={:?}", - input - )); - } - } - } - - let s1 = self.re1.split(input); - let s2 = self.re2.split(input); - for (chunk1, chunk2) in s1.zip(s2) { - if chunk1 != chunk2 { - return TestResult::error(format!( - "split mismatch input={:?}", - input - )); - } - } - - TestResult::from_bool(true) - } - } - } // mod - }; // rule case -} // macro_rules! - -checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen)); -checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary( - gen -)); diff --git a/vendor/regex/tests/crates_regex.rs b/vendor/regex/tests/crates_regex.rs deleted file mode 100644 index 200ec27..0000000 --- a/vendor/regex/tests/crates_regex.rs +++ /dev/null @@ -1,3287 +0,0 @@ -// DO NOT EDIT. Automatically generated by 'scripts/scrape_crates_io.py' -// on 2018-06-20 09:56:32.820354. - -// autoshutdown-0.1.0: r"\s*(\d+)(\w)\s*" -consistent!(autoshutdown_0, r"\s*(\d+)(\w)\s*"); - -// epub-1.1.1: r"/" -consistent!(epub_0, r"/"); - -// rpi-info-0.2.0: "^Revision\t+: ([0-9a-fA-F]+)" -consistent!(rpi_info_0, "^Revision\t+: ([0-9a-fA-F]+)"); - -// rpi-info-0.2.0: "Serial\t+: ([0-9a-fA-F]+)" -consistent!(rpi_info_1, "Serial\t+: ([0-9a-fA-F]+)"); - -// pnet_macros-0.21.0: r"^u([0-9]+)(be|le|he)?$" -consistent!(pnet_macros_0, r"^u([0-9]+)(be|le|he)?$"); - -// iban_validate-1.0.3: r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$" -consistent!(iban_validate_0, r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$"); - -// markifier-0.1.0: r".*\[(?P<percent>.+)%.*\].*" -consistent!(markifier_0, r".*\[(?P<percent>.+)%.*\].*"); - -// mallumo-0.3.0: r"(#include) (\S*)(.*)" -consistent!(mallumo_0, r"(#include) (\S*)(.*)"); - -// mallumo-0.3.0: r"(ERROR: \d+:)(\d+)(: )(.+)" -consistent!(mallumo_1, r"(ERROR: \d+:)(\d+)(: )(.+)"); - -// mallumo-0.3.0: r"(\d+\()(\d+)(?:\) : )(.+)" -consistent!(mallumo_2, r"(\d+\()(\d+)(?:\) : )(.+)"); - -// magnet_more-0.0.1: r"(.+?)(\[.*?\])?" -consistent!(magnet_more_0, r"(.+?)(\[.*?\])?"); - -// magnet_app-0.0.1: r":(?P<k>[a-zA-Z_]+)" -consistent!(magnet_app_0, r":(?P<k>[a-zA-Z_]+)"); - -// yubibomb-0.2.0: r"^\d{6}(?:\s*,\s*\d{6})*$" -consistent!(yubibomb_0, r"^\d{6}(?:\s*,\s*\d{6})*$"); - -// multirust-rs-0.0.4: r"[\\/]([^\\/?]+)(\?.*)?$" -consistent!(multirust_rs_0, r"[\\/]([^\\/?]+)(\?.*)?$"); - -// hueclient-0.3.2: "\"[a-z]*\":null" -consistent!(hueclient_0, "\"[a-z]*\":null"); - -// hueclient-0.3.2: ",+" -consistent!(hueclient_1, ",+"); - -// hueclient-0.3.2: ",\\}" -consistent!(hueclient_2, ",\\}"); - -// hueclient-0.3.2: "\\{," -consistent!(hueclient_3, "\\{,"); - -// aerial-0.1.0: r"[a-zA-Z_\$][a-zA-Z_0-9]*" -consistent!(aerial_0, r"[a-zA-Z_\$][a-zA-Z_0-9]*"); - -// aerial-0.1.0: r"thi[sng]+" -consistent!(aerial_1, r"thi[sng]+"); - -// rvue-0.1.0: r"(.+)\s+\((.+?)\)" -consistent!(rvue_0, r"(.+)\s+\((.+?)\)"); - -// rvue-0.1.0: r"([\d\.]+)\s*out\s*of\s*([\d\.]+)" -consistent!(rvue_1, r"([\d\.]+)\s*out\s*of\s*([\d\.]+)"); - -// rvue-0.1.0: r"^([\d\.]+)\s*(?:\(\))?$" -consistent!(rvue_2, r"^([\d\.]+)\s*(?:\(\))?$"); - -// rvue-0.1.0: r"([\d\.]+)\s*Points\s*Possible" -consistent!(rvue_3, r"([\d\.]+)\s*Points\s*Possible"); - -// rvue-0.1.0: r"([\d\.]+)\s*/\s*([\d\.]+)" -consistent!(rvue_4, r"([\d\.]+)\s*/\s*([\d\.]+)"); - -// rvsim-0.1.0: r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]" -consistent!(rvsim_0, r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]"); - -// nereon-0.1.4: "(.*[^\\\\])\\{\\}(.*)" -consistent!(nereon_0, "(.*[^\\\\])\\{\\}(.*)"); - -// next_episode-0.3.0: r"((?i)^(.+).s(\d+)e(\d+).*)$" -consistent!(next_episode_0, r"((?i)^(.+).s(\d+)e(\d+).*)$"); - -// migrant_lib-0.19.2: r"[^a-z0-9-]+" -consistent!(migrant_lib_0, r"[^a-z0-9-]+"); - -// migrant_lib-0.19.2: r"[0-9]{14}_[a-z0-9-]+" -consistent!(migrant_lib_1, r"[0-9]{14}_[a-z0-9-]+"); - -// migrant_lib-0.19.2: r"([0-9]{14}_)?[a-z0-9-]+" -consistent!(migrant_lib_2, r"([0-9]{14}_)?[a-z0-9-]+"); - -// minipre-0.2.0: "$_" -consistent!(minipre_0, "$_"); - -// minifier-0.0.13: r">\s+<" -consistent!(minifier_0, r">\s+<"); - -// minifier-0.0.13: r"\s{2,}|[\r\n]" -consistent!(minifier_1, r"\s{2,}|[\r\n]"); - -// minifier-0.0.13: r"<(style|script)[\w|\s].*?>" -consistent!(minifier_2, r"<(style|script)[\w|\s].*?>"); - -// minifier-0.0.13: "<!--(.|\n)*?-->" -consistent!(minifier_3, "<!--(.|\n)*?-->"); - -// minifier-0.0.13: r"<\w.*?>" -consistent!(minifier_4, r"<\w.*?>"); - -// minifier-0.0.13: r" \s+|\s +" -consistent!(minifier_5, r" \s+|\s +"); - -// minifier-0.0.13: r"\w\s+\w" -consistent!(minifier_6, r"\w\s+\w"); - -// minifier-0.0.13: r"'\s+>" -consistent!(minifier_7, r"'\s+>"); - -// minifier-0.0.13: r"\d\s+>" -consistent!(minifier_8, r"\d\s+>"); - -// ggp-rs-0.1.2: r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)" -consistent!(ggp_rs_0, r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)"); - -// ggp-rs-0.1.2: r"\((.*)\)." -consistent!(ggp_rs_1, r"\((.*)\)."); - -// poe-superfilter-0.2.0: "[A-Za-z0-9_]" -consistent!(poe_superfilter_0, "[A-Za-z0-9_]"); - -// poke-a-mango-0.5.0: r"(\d+)x(\d+)" -consistent!(poke_a_mango_0, r"(\d+)x(\d+)"); - -// pop3-rs-0.1.0: r"(?P<nmsg>\d+) (?P<size>\d+)" -consistent!(pop3_rs_0, r"(?P<nmsg>\d+) (?P<size>\d+)"); - -// pop3-rs-0.1.0: r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})" -consistent!(pop3_rs_1, r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})"); - -// pop3-rs-0.1.0: r"(<.*>)\r\n$" -consistent!(pop3_rs_2, r"(<.*>)\r\n$"); - -// pop3-rs-0.1.0: r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)" -consistent!(pop3_rs_3, r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)"); - -// pop3-1.0.6: r"^\.\r\n$" -consistent!(pop3_0, r"^\.\r\n$"); - -// pop3-1.0.6: r"\+OK(.*)" -consistent!(pop3_1, r"\+OK(.*)"); - -// pop3-1.0.6: r"-ERR(.*)" -consistent!(pop3_2, r"-ERR(.*)"); - -// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n" -consistent!(pop3_3, r"\+OK (\d+) (\d+)\r\n"); - -// pop3-1.0.6: r"(\d+) ([\x21-\x7e]+)\r\n" -consistent!(pop3_4, r"(\d+) ([\x21-\x7e]+)\r\n"); - -// pop3-1.0.6: r"\+OK (\d+) ([\x21-\x7e]+)\r\n" -consistent!(pop3_5, r"\+OK (\d+) ([\x21-\x7e]+)\r\n"); - -// pop3-1.0.6: r"(\d+) (\d+)\r\n" -consistent!(pop3_6, r"(\d+) (\d+)\r\n"); - -// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n" -consistent!(pop3_7, r"\+OK (\d+) (\d+)\r\n"); - -// polk-1.1.3: "github:(\\w+)/?(\\w+)?" -consistent!(polk_0, "github:(\\w+)/?(\\w+)?"); - -// geochunk-0.1.5: "^[0-9]{5}" -consistent!(geochunk_0, "^[0-9]{5}"); - -// generic-dns-update-1.1.4: r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))" -consistent!(generic_dns_update_0, r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))"); - -// generic-dns-update-1.1.4: r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))" -consistent!(generic_dns_update_1, r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))"); - -// generic-dns-update-1.1.4: r"<value><string>([0-9.]*)</string></value>" -consistent!( - generic_dns_update_2, - r"<value><string>([0-9.]*)</string></value>" -); - -// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>" -consistent!(generic_dns_update_3, r"<int>([0-9]+)</int>"); - -// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>" -consistent!(generic_dns_update_4, r"<int>([0-9]+)</int>"); - -// generic-dns-update-1.1.4: r"<boolean>([0-1]*)</boolean>" -consistent!(generic_dns_update_5, r"<boolean>([0-1]*)</boolean>"); - -// generate-nix-pkg-0.3.0: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(generate_nix_pkg_0, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// generate-nix-pkg-0.3.0: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(generate_nix_pkg_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// genact-0.6.0: r"arch/([a-z0-9_])+/" -consistent!(genact_0, r"arch/([a-z0-9_])+/"); - -// genact-0.6.0: r"arch/([a-z0-9_])+/" -consistent!(genact_1, r"arch/([a-z0-9_])+/"); - -// cron_rs-0.1.6: r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$" -consistent!( - cron_rs_0, - r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$" -); - -// systemfd-0.3.0: r"^([a-zA-Z]+)::(.+)$" -consistent!(systemfd_0, r"^([a-zA-Z]+)::(.+)$"); - -// symbolic-debuginfo-5.0.2: "__?hidden#\\d+_" -consistent!(symbolic_debuginfo_0, "__?hidden#\\d+_"); - -// symbolic-minidump-5.0.2: r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$" -consistent!(symbolic_minidump_0, r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$"); - -// graphql-idl-parser-0.1.1: "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+" -consistent!(graphql_idl_parser_0, "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"); - -// graphql-idl-parser-0.1.1: "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+" -consistent!(graphql_idl_parser_1, "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"); - -// graphql-idl-parser-0.1.1: "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*" -consistent!(graphql_idl_parser_2, "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*"); - -// graphql-idl-parser-0.1.1: "^(?u:!)" -consistent!(graphql_idl_parser_3, "^(?u:!)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\()" -consistent!(graphql_idl_parser_4, "^(?u:\\()"); - -// graphql-idl-parser-0.1.1: "^(?u:\\))" -consistent!(graphql_idl_parser_5, "^(?u:\\))"); - -// graphql-idl-parser-0.1.1: "^(?u:,)" -consistent!(graphql_idl_parser_6, "^(?u:,)"); - -// graphql-idl-parser-0.1.1: "^(?u::)" -consistent!(graphql_idl_parser_7, "^(?u::)"); - -// graphql-idl-parser-0.1.1: "^(?u:@)" -consistent!(graphql_idl_parser_8, "^(?u:@)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\[)" -consistent!(graphql_idl_parser_9, "^(?u:\\[)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\])" -consistent!(graphql_idl_parser_10, "^(?u:\\])"); - -// graphql-idl-parser-0.1.1: "^(?u:enum)" -consistent!(graphql_idl_parser_11, "^(?u:enum)"); - -// graphql-idl-parser-0.1.1: "^(?u:implements)" -consistent!(graphql_idl_parser_12, "^(?u:implements)"); - -// graphql-idl-parser-0.1.1: "^(?u:input)" -consistent!(graphql_idl_parser_13, "^(?u:input)"); - -// graphql-idl-parser-0.1.1: "^(?u:interface)" -consistent!(graphql_idl_parser_14, "^(?u:interface)"); - -// graphql-idl-parser-0.1.1: "^(?u:scalar)" -consistent!(graphql_idl_parser_15, "^(?u:scalar)"); - -// graphql-idl-parser-0.1.1: "^(?u:type)" -consistent!(graphql_idl_parser_16, "^(?u:type)"); - -// graphql-idl-parser-0.1.1: "^(?u:union)" -consistent!(graphql_idl_parser_17, "^(?u:union)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\{)" -consistent!(graphql_idl_parser_18, "^(?u:\\{)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\})" -consistent!(graphql_idl_parser_19, "^(?u:\\})"); - -// grimoire-0.1.0: r"(?s)/\*(?P<config>.*?)\*/" -consistent!(grimoire_0, r"(?s)/\*(?P<config>.*?)\*/"); - -// phonenumber-0.2.0+8.9.0: r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?" -consistent!(phonenumber_0, r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?"); - -// phonenumber-0.2.0+8.9.0: r"[, \[\]]" -consistent!(phonenumber_1, r"[, \[\]]"); - -// phonenumber-0.2.0+8.9.0: r"[\\/] *x" -consistent!(phonenumber_2, r"[\\/] *x"); - -// phonenumber-0.2.0+8.9.0: r"[[\P{N}&&\P{L}]&&[^#]]+$" -consistent!(phonenumber_3, r"[[\P{N}&&\P{L}]&&[^#]]+$"); - -// phonenumber-0.2.0+8.9.0: r"(?:.*?[A-Za-z]){3}.*" -consistent!(phonenumber_4, r"(?:.*?[A-Za-z]){3}.*"); - -// phonenumber-0.2.0+8.9.0: r"(\D+)" -consistent!(phonenumber_5, r"(\D+)"); - -// phonenumber-0.2.0+8.9.0: r"(\$\d)" -consistent!(phonenumber_6, r"(\$\d)"); - -// phonenumber-0.2.0+8.9.0: r"\(?\$1\)?" -consistent!(phonenumber_7, r"\(?\$1\)?"); - -// phone_number-0.1.0: r"\D" -consistent!(phone_number_0, r"\D"); - -// phone_number-0.1.0: r"^0+" -consistent!(phone_number_1, r"^0+"); - -// phone_number-0.1.0: r"^89" -consistent!(phone_number_2, r"^89"); - -// phone_number-0.1.0: r"^8+" -consistent!(phone_number_3, r"^8+"); - -// phile-0.1.4: r"^ *(\^_*\^) *$" -consistent!(phile_0, r"^ *(\^_*\^) *$"); - -// phile-0.1.4: r"^[_\p{XID_Start}]$" -consistent!(phile_1, r"^[_\p{XID_Start}]$"); - -// phile-0.1.4: r"^\p{XID_Continue}$" -consistent!(phile_2, r"^\p{XID_Continue}$"); - -// uritemplate-0.1.2: "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])" -consistent!(uritemplate_0, "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])"); - -// urdf-rs-0.4.2: "^package://(\\w+)/" -consistent!(urdf_rs_0, "^package://(\\w+)/"); - -// url-match-0.1.7: r"(?P<key>[?&.])" -consistent!(url_match_0, r"(?P<key>[?&.])"); - -// url-match-0.1.7: r":(?P<key>[a-zA-Z0-9_-]+)" -consistent!(url_match_1, r":(?P<key>[a-zA-Z0-9_-]+)"); - -// tsm-sys-0.1.0: r"hello world" -consistent!(tsm_sys_0, r"hello world"); - -// deb-version-0.1.0: "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$" -consistent!(deb_version_0, "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$"); - -// debcargo-2.1.0: r"^(?i)(a|an|the)\s+" -consistent!(debcargo_0, r"^(?i)(a|an|the)\s+"); - -// debcargo-2.1.0: r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+" -consistent!( - debcargo_1, - r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+" -); - -// feaders-0.2.0: r"^.*\.h$" -consistent!(feaders_0, r"^.*\.h$"); - -// feaders-0.2.0: r"^.*\.c$" -consistent!(feaders_1, r"^.*\.c$"); - -// feaders-0.2.0: r"^.*\.hpp$" -consistent!(feaders_2, r"^.*\.hpp$"); - -// feaders-0.2.0: r"^.*\.cc$" -consistent!(feaders_3, r"^.*\.cc$"); - -// feaders-0.2.0: r"^.*\.cpp$" -consistent!(feaders_4, r"^.*\.cpp$"); - -// hyperscan-0.1.6: r"CPtr\(\w+\)" -consistent!(hyperscan_0, r"CPtr\(\w+\)"); - -// hyperscan-0.1.6: r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$" -consistent!( - hyperscan_1, - r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$" -); - -// hyperscan-0.1.6: r"RawDatabase<Block>\{db: \w+\}" -consistent!(hyperscan_2, r"RawDatabase<Block>\{db: \w+\}"); - -// hyperscan-0.1.6: r"RawSerializedDatabase\{p: \w+, len: \d+\}" -consistent!(hyperscan_3, r"RawSerializedDatabase\{p: \w+, len: \d+\}"); - -// ucd-parse-0.1.1: r"[0-9A-F]+" -consistent!(ucd_parse_0, r"[0-9A-F]+"); - -// afsort-0.2.0: r".*" -consistent!(afsort_0, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_1, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_2, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_3, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_4, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_5, r".*"); - -// afsort-0.2.0: r"^[a-z]+$" -consistent!(afsort_6, r"^[a-z]+$"); - -// afsort-0.2.0: r"^[a-z]+$" -consistent!(afsort_7, r"^[a-z]+$"); - -// tin-summer-1.21.4: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(tin_summer_0, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// tin-drummer-1.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(tin_drummer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$" -consistent!( - tin_drummer_1, - r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$" -); - -// tin-drummer-1.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(tin_drummer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|\.js)$" -consistent!(tin_drummer_3, r".*?\.(stats|conf|h|out|cache.*|\.js)$"); - -// tin-drummer-1.0.1: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(tin_drummer_4, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// tin-drummer-1.0.1: r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$" -consistent!( - tin_drummer_5, - r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$" -); - -// tin-drummer-1.0.1: r".*?\.(ibc)$" -consistent!(tin_drummer_6, r".*?\.(ibc)$"); - -// tin-drummer-1.0.1: r"\.stack-work|dist-newstyle" -consistent!(tin_drummer_7, r"\.stack-work|dist-newstyle"); - -// timmy-0.3.0: r"_NET_WM_PID\(CARDINAL\) = (\d+)" -consistent!(timmy_0, r"_NET_WM_PID\(CARDINAL\) = (\d+)"); - -// timmy-0.3.0: r"today|yesterday|now" -consistent!(timmy_1, r"today|yesterday|now"); - -// timmy-0.3.0: r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?" -consistent!( - timmy_2, - r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?" -); - -// timmy-0.3.0: r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)" -consistent!(timmy_3, r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)"); - -// timmy-0.3.0: r"(?P<hr>\d{2}):(?P<mins>\d{2})" -consistent!(timmy_4, r"(?P<hr>\d{2}):(?P<mins>\d{2})"); - -// tinfo-0.5.0: r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?" -consistent!( - tinfo_0, - r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?" -); - -// tinfo-0.5.0: r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]" -consistent!(tinfo_1, r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]"); - -// timespan-0.0.4: r"(?:\\\{start\\\}|\\\{end\\\})" -consistent!(timespan_0, r"(?:\\\{start\\\}|\\\{end\\\})"); - -// timespan-0.0.4: r"(.*)\s+-\s+(.*)" -consistent!(timespan_1, r"(.*)\s+-\s+(.*)"); - -// timespan-0.0.4: r"(.*)\s+(\w+)$" -consistent!(timespan_2, r"(.*)\s+(\w+)$"); - -// timespan-0.0.4: r"(.*)\s+(\w+)$" -consistent!(timespan_3, r"(.*)\s+(\w+)$"); - -// timespan-0.0.4: r"(.*)\s+-\s+(.*)" -consistent!(timespan_4, r"(.*)\s+-\s+(.*)"); - -// titlecase-0.10.0: r"[[:lower:]]" -consistent!(titlecase_0, r"[[:lower:]]"); - -// tight-0.1.3: r"^\d+ (day|week|month|year)s?$" -consistent!(tight_0, r"^\d+ (day|week|month|year)s?$"); - -// tight-0.1.3: r"^\d+ (day|week|month|year)s?$" -consistent!(tight_1, r"^\d+ (day|week|month|year)s?$"); - -// yaml-0.2.1: r"^[-+]?(0|[1-9][0-9_]*)$" -consistent!(yaml_0, r"^[-+]?(0|[1-9][0-9_]*)$"); - -// yaml-0.2.1: r"^([-+]?)0o?([0-7_]+)$" -consistent!(yaml_1, r"^([-+]?)0o?([0-7_]+)$"); - -// yaml-0.2.1: r"^([-+]?)0x([0-9a-fA-F_]+)$" -consistent!(yaml_2, r"^([-+]?)0x([0-9a-fA-F_]+)$"); - -// yaml-0.2.1: r"^([-+]?)0b([0-1_]+)$" -consistent!(yaml_3, r"^([-+]?)0b([0-1_]+)$"); - -// yaml-0.2.1: r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$" -consistent!( - yaml_4, - r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$" -); - -// yaml-0.2.1: r"^[+]?(\.inf|\.Inf|\.INF)$" -consistent!(yaml_5, r"^[+]?(\.inf|\.Inf|\.INF)$"); - -// yaml-0.2.1: r"^-(\.inf|\.Inf|\.INF)$" -consistent!(yaml_6, r"^-(\.inf|\.Inf|\.INF)$"); - -// yaml-0.2.1: r"^(\.nan|\.NaN|\.NAN)$" -consistent!(yaml_7, r"^(\.nan|\.NaN|\.NAN)$"); - -// yaml-0.2.1: r"^(null|Null|NULL|~)$" -consistent!(yaml_8, r"^(null|Null|NULL|~)$"); - -// yaml-0.2.1: r"^(true|True|TRUE|yes|Yes|YES)$" -consistent!(yaml_9, r"^(true|True|TRUE|yes|Yes|YES)$"); - -// yaml-0.2.1: r"^(false|False|FALSE|no|No|NO)$" -consistent!(yaml_10, r"^(false|False|FALSE|no|No|NO)$"); - -// kefia-0.1.0: r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$" -consistent!(kefia_0, r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$"); - -// risp-0.7.0: "^(\\s+|;.*?(\n|$))+" -consistent!(risp_0, "^(\\s+|;.*?(\n|$))+"); - -// risp-0.7.0: "^\".*?\"" -consistent!(risp_1, "^\".*?\""); - -// risp-0.7.0: r"^[^\s\{\}()\[\]]+" -consistent!(risp_2, r"^[^\s\{\}()\[\]]+"); - -// risp-0.7.0: r"^-?\d+" -consistent!(risp_3, r"^-?\d+"); - -// ripgrep-0.8.1: "^([0-9]+)([KMG])?$" -consistent!(ripgrep_0, "^([0-9]+)([KMG])?$"); - -// riquid-0.0.1: r"^\w+" -consistent!(riquid_0, r"^\w+"); - -// riquid-0.0.1: r"^\d+" -consistent!(riquid_1, r"^\d+"); - -// recursive_disassembler-2.1.2: r"\A(0x)?([a-fA-F0-9]+)\z" -consistent!(recursive_disassembler_0, r"\A(0x)?([a-fA-F0-9]+)\z"); - -// remake-0.1.0: r"^[a-zA-Z_][a-zA-Z0-9_]*" -consistent!(remake_0, r"^[a-zA-Z_][a-zA-Z0-9_]*"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_0, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_1, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_2, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_3, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_4, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_5, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)" -consistent!(regex_decode_6, r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_7, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_8, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_9, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_10, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_11, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_12, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_13, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-cache-0.2.0: "[0-9]{3}-[0-9]{3}-[0-9]{4}" -consistent!(regex_cache_0, "[0-9]{3}-[0-9]{3}-[0-9]{4}"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_1, r"^\d+$"); - -// regex-cache-0.2.0: r"^[a-z]+$" -consistent!(regex_cache_2, r"^[a-z]+$"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_3, r"^\d+$"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_4, r"^\d+$"); - -// regex_dfa-0.5.0: r"\d{4}-\d{2}-\d{2}" -consistent!(regex_dfa_0, r"\d{4}-\d{2}-\d{2}"); - -// reaper-2.0.0: r"^[0-9\p{L} _\\.]{3,16}$" -consistent!(reaper_0, r"^[0-9\p{L} _\\.]{3,16}$"); - -// retdec-0.1.0: r"^attachment; filename=(.+)$" -consistent!(retdec_0, r"^attachment; filename=(.+)$"); - -// renvsubst-0.1.2: r"(\\)(?P<head>\$[0-9A-Za-z_{])" -consistent!(renvsubst_0, r"(\\)(?P<head>\$[0-9A-Za-z_{])"); - -// renvsubst-0.1.2: r"\$([[:word:]]+)" -consistent!(renvsubst_1, r"\$([[:word:]]+)"); - -// renvsubst-0.1.2: r"\$\{([[:word:]]+)\}" -consistent!(renvsubst_2, r"\$\{([[:word:]]+)\}"); - -// rexpect-0.3.0: r"'[a-z]+'" -consistent!(rexpect_0, r"'[a-z]+'"); - -// rexpect-0.3.0: r"^\d{4}-\d{2}-\d{2}$" -consistent!(rexpect_1, r"^\d{4}-\d{2}-\d{2}$"); - -// rexpect-0.3.0: r"-\d{2}-" -consistent!(rexpect_2, r"-\d{2}-"); - -// luther-0.1.0: "^a(b|c)c*$" -consistent!(luther_0, "^a(b|c)c*$"); - -// little_boxes-1.6.0: r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]" -consistent!(little_boxes_0, r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]"); - -// libimagentrytag-0.8.0: "^[a-zA-Z]([a-zA-Z0-9_-]*)$" -consistent!(libimagentrytag_0, "^[a-zA-Z]([a-zA-Z0-9_-]*)$"); - -// libimaginteraction-0.8.0: r"^[Yy](\n?)$" -consistent!(libimaginteraction_0, r"^[Yy](\n?)$"); - -// libimaginteraction-0.8.0: r"^[Nn](\n?)$" -consistent!(libimaginteraction_1, r"^[Nn](\n?)$"); - -// libimagutil-0.8.0: "^(?P<KEY>([^=]*))=(.*)$" -consistent!(libimagutil_0, "^(?P<KEY>([^=]*))=(.*)$"); - -// libimagutil-0.8.0: "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$" -consistent!(libimagutil_1, "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$"); - -// linux_ip-0.1.0: r"\s+" -consistent!(linux_ip_0, r"\s+"); - -// linux_ip-0.1.0: r"\s*[\n\r]+\s*" -consistent!(linux_ip_1, r"\s*[\n\r]+\s*"); - -// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$" -consistent!(linux_ip_2, r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"); - -// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$" -consistent!(linux_ip_3, r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"); - -// linux_ip-0.1.0: r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$" -consistent!(linux_ip_4, r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$"); - -// linux_ip-0.1.0: r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$" -consistent!( - linux_ip_5, - r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$" -); - -// linux_ip-0.1.0: r"\s*[\n\r]+\s*" -consistent!(linux_ip_6, r"\s*[\n\r]+\s*"); - -// linux_ip-0.1.0: r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$" -consistent!(linux_ip_7, r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$"); - -// linux_ip-0.1.0: r"\s*link/ether\s+([a-f0-9:]+)\s+.*" -consistent!(linux_ip_8, r"\s*link/ether\s+([a-f0-9:]+)\s+.*"); - -// linux_ip-0.1.0: r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*" -consistent!(linux_ip_9, r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*"); - -// linky-0.1.4: r"[^\w -]" -consistent!(linky_0, r"[^\w -]"); - -// linky-0.1.4: r"^(.*):(\d+): [^ ]* ([^ ]*)$" -consistent!(linky_1, r"^(.*):(\d+): [^ ]* ([^ ]*)$"); - -// limonite-0.2.1: r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$" -consistent!(limonite_0, r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$"); - -// process-queue-0.1.1: r"^[a-zA-Z]+$" -consistent!(process_queue_0, r"^[a-zA-Z]+$"); - -// pronghorn-0.1.2: r"^\{([a-zA-Z_]+)\}$" -consistent!(pronghorn_0, r"^\{([a-zA-Z_]+)\}$"); - -// protocol-ftp-client-0.1.1: "(?m:^(\\d{3}) (.+)\r$)" -consistent!(protocol_ftp_client_0, "(?m:^(\\d{3}) (.+)\r$)"); - -// protocol-ftp-client-0.1.1: "\"(.+)\"" -consistent!(protocol_ftp_client_1, "\"(.+)\""); - -// protocol-ftp-client-0.1.1: "(\\w+) [Tt]ype: (\\w+)" -consistent!(protocol_ftp_client_2, "(\\w+) [Tt]ype: (\\w+)"); - -// protocol-ftp-client-0.1.1: "(?m:^(\\d{3})-.+\r$)" -consistent!(protocol_ftp_client_3, "(?m:^(\\d{3})-.+\r$)"); - -// protocol-ftp-client-0.1.1: "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)" -consistent!( - protocol_ftp_client_4, - "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)" -); - -// protocol-ftp-client-0.1.1: "(?m:^(.+)\r$)" -consistent!(protocol_ftp_client_5, "(?m:^(.+)\r$)"); - -// protocol-ftp-client-0.1.1: "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$" -consistent!( - protocol_ftp_client_6, - "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$" -); - -// article-date-extractor-0.1.1: r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})" -consistent!(article_date_extractor_0, r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})"); - -// article-date-extractor-0.1.1: r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date" -consistent!( - article_date_extractor_1, - r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date" -); - -// arthas_plugin-0.1.1: r"type\((.*)\)" -consistent!(arthas_plugin_0, r"type\((.*)\)"); - -// arthas_plugin-0.1.1: r"Vec<(.*)>" -consistent!(arthas_plugin_1, r"Vec<(.*)>"); - -// arthas_plugin-0.1.1: r"Option<(.*)>" -consistent!(arthas_plugin_2, r"Option<(.*)>"); - -// arthas_plugin-0.1.1: r"HashMap<[a-z0-9A-Z]+, *(.*)>" -consistent!(arthas_plugin_3, r"HashMap<[a-z0-9A-Z]+, *(.*)>"); - -// arthas_derive-0.1.0: "Vec *< *(.*) *>" -consistent!(arthas_derive_0, "Vec *< *(.*) *>"); - -// arthas_derive-0.1.0: r"Option *< *(.*) *>" -consistent!(arthas_derive_1, r"Option *< *(.*) *>"); - -// arthas_derive-0.1.0: r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>" -consistent!(arthas_derive_2, r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>"); - -// arpabet-0.2.0: r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$" -consistent!(arpabet_0, r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$"); - -// arpabet-0.2.0: r"^;;;\s+" -consistent!(arpabet_1, r"^;;;\s+"); - -// glossy_codegen-0.2.0: r"/\*.*?\*/|//.*" -consistent!(glossy_codegen_0, r"/\*.*?\*/|//.*"); - -// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$" -consistent!(glossy_codegen_1, "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$"); - -// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$" -consistent!(glossy_codegen_2, "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$"); - -// glossy_codegen-0.2.0: r"^\s*#\s*version\s+(\d+)" -consistent!(glossy_codegen_3, r"^\s*#\s*version\s+(\d+)"); - -// glossy_codegen-0.2.0: r"^\s*$" -consistent!(glossy_codegen_4, r"^\s*$"); - -// gluster-1.0.1: r"(?P<addr>via \S+)" -consistent!(gluster_0, r"(?P<addr>via \S+)"); - -// gluster-1.0.1: r"(?P<src>src \S+)" -consistent!(gluster_1, r"(?P<src>src \S+)"); - -// gl_helpers-0.1.7: r"(.*)\[\d+\]" -consistent!(gl_helpers_0, r"(.*)\[\d+\]"); - -// gl_helpers-0.1.7: r"(\d+).(\d+)" -consistent!(gl_helpers_1, r"(\d+).(\d+)"); - -// glr-parser-0.0.1: r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])" -consistent!(glr_parser_0, r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])"); - -// glr-parser-0.0.1: r"^\w+$" -consistent!(glr_parser_1, r"^\w+$"); - -// glr-parser-0.0.1: "'[^']+'" -consistent!(glr_parser_2, "'[^']+'"); - -// hoodlum-0.5.0: r"(?m)//.*" -consistent!(hoodlum_0, r"(?m)//.*"); - -// form-checker-0.2.2: r"^1\d{10}$" -consistent!(form_checker_0, r"^1\d{10}$"); - -// form-checker-0.2.2: r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$" -consistent!(form_checker_1, r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$"); - -// wikibase-0.2.0: r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)" -consistent!(wikibase_0, r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)"); - -// wifiscanner-0.3.6: r"Cell [0-9]{2,} - Address:" -consistent!(wifiscanner_0, r"Cell [0-9]{2,} - Address:"); - -// wifiscanner-0.3.6: r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}" -consistent!( - wifiscanner_1, - r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}" -); - -// wifiscanner-0.3.6: r"Signal level=(\d+)/100" -consistent!(wifiscanner_2, r"Signal level=(\d+)/100"); - -// bbcode-1.0.2: r"(?s)\[b\](.*?)\[/b\]" -consistent!(bbcode_0, r"(?s)\[b\](.*?)\[/b\]"); - -// bbcode-1.0.2: r"(?s)\[i\](.*?)\[/i\]" -consistent!(bbcode_1, r"(?s)\[i\](.*?)\[/i\]"); - -// bbcode-1.0.2: r"(?s)\[u\](.*?)\[/u\]" -consistent!(bbcode_2, r"(?s)\[u\](.*?)\[/u\]"); - -// bbcode-1.0.2: r"(?s)\[s\](.*?)\[/s\]" -consistent!(bbcode_3, r"(?s)\[s\](.*?)\[/s\]"); - -// bbcode-1.0.2: r"(?s)\[size=(\d+)](.*?)\[/size\]" -consistent!(bbcode_4, r"(?s)\[size=(\d+)](.*?)\[/size\]"); - -// bbcode-1.0.2: r"(?s)\[color=(.+)](.*?)\[/color\]" -consistent!(bbcode_5, r"(?s)\[color=(.+)](.*?)\[/color\]"); - -// bbcode-1.0.2: r"(?s)\[center\](.*?)\[/center\]" -consistent!(bbcode_6, r"(?s)\[center\](.*?)\[/center\]"); - -// bbcode-1.0.2: r"(?s)\[left\](.*?)\[/left\]" -consistent!(bbcode_7, r"(?s)\[left\](.*?)\[/left\]"); - -// bbcode-1.0.2: r"(?s)\[right\](.*?)\[/right\]" -consistent!(bbcode_8, r"(?s)\[right\](.*?)\[/right\]"); - -// bbcode-1.0.2: r"(?s)\[table\](.*?)\[/table\]" -consistent!(bbcode_9, r"(?s)\[table\](.*?)\[/table\]"); - -// bbcode-1.0.2: r"(?s)\[td\](.*?)\[/td\]" -consistent!(bbcode_10, r"(?s)\[td\](.*?)\[/td\]"); - -// bbcode-1.0.2: r"(?s)\[tr\](.*?)\[/tr\]" -consistent!(bbcode_11, r"(?s)\[tr\](.*?)\[/tr\]"); - -// bbcode-1.0.2: r"(?s)\[th\](.*?)\[/th\]" -consistent!(bbcode_12, r"(?s)\[th\](.*?)\[/th\]"); - -// bbcode-1.0.2: r"(?s)\[url\](.*?)\[/url\]" -consistent!(bbcode_13, r"(?s)\[url\](.*?)\[/url\]"); - -// bbcode-1.0.2: r"(?s)\[url=(.+)\](.*?)\[/url\]" -consistent!(bbcode_14, r"(?s)\[url=(.+)\](.*?)\[/url\]"); - -// bbcode-1.0.2: r"(?s)\[quote\](.*?)\[/quote\]" -consistent!(bbcode_15, r"(?s)\[quote\](.*?)\[/quote\]"); - -// bbcode-1.0.2: r"(?s)\[quote=(.+)\](.*?)\[/quote\]" -consistent!(bbcode_16, r"(?s)\[quote=(.+)\](.*?)\[/quote\]"); - -// bbcode-1.0.2: r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_17, r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_18, r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[img(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_19, r"(?s)\[img(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[ol\](.*?)\[/ol\]" -consistent!(bbcode_20, r"(?s)\[ol\](.*?)\[/ol\]"); - -// bbcode-1.0.2: r"(?s)\[ul\](.*?)\[/ul\]" -consistent!(bbcode_21, r"(?s)\[ul\](.*?)\[/ul\]"); - -// bbcode-1.0.2: r"(?s)\[list\](.*?)\[/list\]" -consistent!(bbcode_22, r"(?s)\[list\](.*?)\[/list\]"); - -// bbcode-1.0.2: r"(?s)\[youtube\](.*?)\[/youtube\]" -consistent!(bbcode_23, r"(?s)\[youtube\](.*?)\[/youtube\]"); - -// bbcode-1.0.2: r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]" -consistent!(bbcode_24, r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]"); - -// bbcode-1.0.2: r"(?s)\[li\](.*?)\[/li\]" -consistent!(bbcode_25, r"(?s)\[li\](.*?)\[/li\]"); - -// block-utils-0.5.0: r"loop\d+" -consistent!(block_utils_0, r"loop\d+"); - -// block-utils-0.5.0: r"ram\d+" -consistent!(block_utils_1, r"ram\d+"); - -// block-utils-0.5.0: r"md\d+" -consistent!(block_utils_2, r"md\d+"); - -// kvvliveapi-0.1.0: r"^([1-9]) min$" -consistent!(kvvliveapi_0, r"^([1-9]) min$"); - -// rfc822_sanitizer-0.3.3: r"(\d{2}):(\d{2}):(\d{2})" -consistent!(rfc822_sanitizer_0, r"(\d{2}):(\d{2}):(\d{2})"); - -// rfc822_sanitizer-0.3.3: r"(\d{1,2}):(\d{1,2}):(\d{1,2})" -consistent!(rfc822_sanitizer_1, r"(\d{1,2}):(\d{1,2}):(\d{1,2})"); - -// faker-0.0.4: r"[2-9]" -consistent!(faker_0, r"[2-9]"); - -// faker-0.0.4: r"[1-9]" -consistent!(faker_1, r"[1-9]"); - -// faker-0.0.4: r"[0-9]" -consistent!(faker_2, r"[0-9]"); - -// faker-0.0.4: r"\d{10}" -consistent!(faker_3, r"\d{10}"); - -// faker-0.0.4: r"\d{1}" -consistent!(faker_4, r"\d{1}"); - -// faker-0.0.4: r"^\w+" -consistent!(faker_5, r"^\w+"); - -// faker-0.0.4: r"^\w+" -consistent!(faker_6, r"^\w+"); - -// faker-0.0.4: r"^(\w+\.? ?){2,3}$" -consistent!(faker_7, r"^(\w+\.? ?){2,3}$"); - -// faker-0.0.4: r"^[A-Z][a-z]+\.?$" -consistent!(faker_8, r"^[A-Z][a-z]+\.?$"); - -// faker-0.0.4: r"^[A-Z][A-Za-z]*\.?$" -consistent!(faker_9, r"^[A-Z][A-Za-z]*\.?$"); - -// faker-0.0.4: r"http://lorempixel.com/100/100/\w+" -consistent!(faker_10, r"http://lorempixel.com/100/100/\w+"); - -// faker-0.0.4: r"http://lorempixel.com/100/100/cats" -consistent!(faker_11, r"http://lorempixel.com/100/100/cats"); - -// fancy-regex-0.1.0: "(?i:ß)" -consistent!(fancy_regex_0, "(?i:ß)"); - -// fancy-regex-0.1.0: "(?i:\\x{0587})" -consistent!(fancy_regex_1, "(?i:\\x{0587})"); - -// fancy-regex-0.1.0: "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})" -consistent!(fancy_regex_2, "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})"); - -// fancy-prompt-0.1.5: r"/([^/])[^/]+/" -consistent!(fancy_prompt_0, r"/([^/])[^/]+/"); - -// fancy-prompt-0.1.5: r"^([^:]+):.*?(?::([^:]+))?$" -consistent!(fancy_prompt_1, r"^([^:]+):.*?(?::([^:]+))?$"); - -// fanta-0.2.0: r"^(/?__\w+__)/(.*)" -consistent!(fanta_0, r"^(/?__\w+__)/(.*)"); - -// fanta-cli-0.1.1: r"(.)([A-Z])" -consistent!(fanta_cli_0, r"(.)([A-Z])"); - -// fanta-cli-0.1.1: "\\{:[^\\s]+\\}" -consistent!(fanta_cli_1, "\\{:[^\\s]+\\}"); - -// amethyst_tools-0.7.1: "(?P<last>[^\r])\n" -consistent!(amethyst_tools_0, "(?P<last>[^\r])\n"); - -// amigo-0.3.1: r"^-?\d+(\.\d)?" -consistent!(amigo_0, r"^-?\d+(\.\d)?"); - -// amigo-0.3.1: r"^[a-zA-Z_]+[\w-]*[!?_]?" -consistent!(amigo_1, r"^[a-zA-Z_]+[\w-]*[!?_]?"); - -// amigo-0.3.1: r"^\(" -consistent!(amigo_2, r"^\("); - -// amigo-0.3.1: r"^\)" -consistent!(amigo_3, r"^\)"); - -// amigo-0.3.1: r"^\s+" -consistent!(amigo_4, r"^\s+"); - -// ethcore-logger-1.12.0: "\x1b\\[[^m]+m" -consistent!(ethcore_logger_0, "\x1b\\[[^m]+m"); - -// dash2html-1.0.1: r"__.*?__" -consistent!(dash2html_0, r"__.*?__"); - -// dash2html-1.0.1: r"(?i)@(?:time|clipboard|cursor|date)" -consistent!(dash2html_1, r"(?i)@(?:time|clipboard|cursor|date)"); - -// os_type-2.0.0: r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$" -consistent!(os_type_0, r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$"); - -// os_type-2.0.0: r"ProductName:\s([\w\s]+)\n" -consistent!(os_type_1, r"ProductName:\s([\w\s]+)\n"); - -// os_type-2.0.0: r"ProductVersion:\s(\w+\.\w+\.\w+)" -consistent!(os_type_2, r"ProductVersion:\s(\w+\.\w+\.\w+)"); - -// os_type-2.0.0: r"BuildVersion:\s(\w+)" -consistent!(os_type_3, r"BuildVersion:\s(\w+)"); - -// os_type-2.0.0: r"(\w+) Linux release" -consistent!(os_type_4, r"(\w+) Linux release"); - -// os_type-2.0.0: r"release\s([\w\.]+)" -consistent!(os_type_5, r"release\s([\w\.]+)"); - -// os_type-2.0.0: r"Distributor ID:\s(\w+)" -consistent!(os_type_6, r"Distributor ID:\s(\w+)"); - -// os_type-2.0.0: r"Release:\s([\w\.]+)" -consistent!(os_type_7, r"Release:\s([\w\.]+)"); - -// bindgen-0.37.0: r"typename type\-parameter\-\d+\-\d+::.+" -consistent!(bindgen_0, r"typename type\-parameter\-\d+\-\d+::.+"); - -// imap-0.8.1: "^+(.*)\r\n" -consistent!(imap_0, "^+(.*)\r\n"); - -// image-base64-0.1.0: r"^ffd8ffe0" -consistent!(image_base64_0, r"^ffd8ffe0"); - -// image-base64-0.1.0: r"^89504e47" -consistent!(image_base64_1, r"^89504e47"); - -// image-base64-0.1.0: r"^47494638" -consistent!(image_base64_2, r"^47494638"); - -// json-pointer-0.3.2: "^(/([^/~]|~[01])*)*$" -consistent!(json_pointer_0, "^(/([^/~]|~[01])*)*$"); - -// json-pointer-0.3.2: "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$" -consistent!(json_pointer_1, "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$"); - -// mysql_common-0.7.0: r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB" -consistent!(mysql_common_0, r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB"); - -// mysql_common-0.7.0: r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)" -consistent!(mysql_common_1, r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)"); - -// government_id-0.1.0: r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$" -consistent!(government_id_0, r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$"); - -// ohmers-0.1.1: r"UniqueIndexViolation: (\w+)" -consistent!(ohmers_0, r"UniqueIndexViolation: (\w+)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_0, r"(.*) you are (.*)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_1, r"(.*) you are (.*)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_2, r"(.*) you are (.*)"); - -// chema-0.0.5: "^\\s*\\*" -consistent!(chema_0, "^\\s*\\*"); - -// chema-0.0.5: "^\\s*@(\\w+)\\s+(.*)" -consistent!(chema_1, "^\\s*@(\\w+)\\s+(.*)"); - -// chord3-0.3.0: r"^\s*#" -consistent!(chord3_0, r"^\s*#"); - -// chord3-0.3.0: r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}" -consistent!(chord3_1, r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}"); - -// chord3-0.3.0: r"\{(eot|end_of_tab):?\s*" -consistent!(chord3_2, r"\{(eot|end_of_tab):?\s*"); - -// chord3-0.3.0: r"([^\[]*)(?:\[([^\]]*)\])?" -consistent!(chord3_3, r"([^\[]*)(?:\[([^\]]*)\])?"); - -// checkmail-0.1.1: "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" -consistent!(checkmail_0, "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"); - -// cntk-0.2.1: r"\b\w\w+\b" -consistent!(cntk_0, r"\b\w\w+\b"); - -// cntk-0.2.1: r"\b\w\w+\b" -consistent!(cntk_1, r"\b\w\w+\b"); - -// cniguru-0.1.0: r"\(id: (\d+)\)" -consistent!(cniguru_0, r"\(id: (\d+)\)"); - -// upm_lib-0.3.0: r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$" -consistent!(upm_lib_0, r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$"); - -// avro-0.2.1: r"^\s*(\*+(\s+))?" -consistent!(avro_0, r"^\s*(\*+(\s+))?"); - -// avro-0.2.1: r"^\s*(\*+)?" -consistent!(avro_1, r"^\s*(\*+)?"); - -// nomi-0.0.2: "[0-9]+" -consistent!(nomi_0, "[0-9]+"); - -// nodes-0.1.0: "([0-9]+)@(?:nodes|n)?:([^@]+)?" -consistent!(nodes_0, "([0-9]+)@(?:nodes|n)?:([^@]+)?"); - -// not-stakkr-1.0.0: r"(?i)in (\d+) (second|minute|hour|day|week)s?" -consistent!(not_stakkr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?"); - -// notetxt-0.0.1: "^([A-Za-z0-9 -_:]+)\n-+\n" -consistent!(notetxt_0, "^([A-Za-z0-9 -_:]+)\n-+\n"); - -// nail-0.1.0-pre.0: r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$" -consistent!(nail_0, r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$"); - -// nail-0.1.0-pre.0: r"^-?[0-9]+$" -consistent!(nail_1, r"^-?[0-9]+$"); - -// askalono-0.2.0: r"[^\w\s\pP]+" -consistent!(askalono_0, r"[^\w\s\pP]+"); - -// askalono-0.2.0: r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+" -consistent!(askalono_1, r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+"); - -// askalono-0.2.0: r"\p{Pd}+" -consistent!(askalono_2, r"\p{Pd}+"); - -// askalono-0.2.0: r"\p{Ps}+" -consistent!(askalono_3, r"\p{Ps}+"); - -// askalono-0.2.0: r"\p{Pe}+" -consistent!(askalono_4, r"\p{Pe}+"); - -// askalono-0.2.0: r"\p{Pc}+" -consistent!(askalono_5, r"\p{Pc}+"); - -// askalono-0.2.0: r"[©Ⓒⓒ]" -consistent!(askalono_6, r"[©Ⓒⓒ]"); - -// askalono-0.2.0: r"[\r\n\v\f]" -consistent!(askalono_7, r"[\r\n\v\f]"); - -// askalono-0.2.0: r"\n{3,}" -consistent!(askalono_8, r"\n{3,}"); - -// askalono-0.2.0: r"[^\w\s]+" -consistent!(askalono_9, r"[^\w\s]+"); - -// askalono-0.2.0: r"\s+" -consistent!(askalono_10, r"\s+"); - -// assembunny_plus-0.0.3: r"[^0-9a-zA-Z_]" -consistent!(assembunny_plus_0, r"[^0-9a-zA-Z_]"); - -// assembunny_plus-0.0.3: r"[0-9]" -consistent!(assembunny_plus_1, r"[0-9]"); - -// salt-compressor-0.4.0: r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$" -consistent!( - salt_compressor_0, - r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$" -); - -// sabisabi-0.4.1: r"</?[^>]+?>" -consistent!(sabisabi_0, r"</?[^>]+?>"); - -// sabisabi-0.4.1: r"\([^)]*\)" -consistent!(sabisabi_1, r"\([^)]*\)"); - -// sassers-0.13.5-h28: "@import \"([^\"]*)\";" -consistent!(sassers_0, "@import \"([^\"]*)\";"); - -// shadowsocks-0.6.2: r"[A-Za-z\d-]{1,63}$" -consistent!(shadowsocks_0, r"[A-Za-z\d-]{1,63}$"); - -// shkeleton-0.1.5: "[abc]+" -consistent!(shkeleton_0, "[abc]+"); - -// shellwords-0.1.0: r"([^A-Za-z0-9_\-.,:/@\n])" -consistent!(shellwords_0, r"([^A-Za-z0-9_\-.,:/@\n])"); - -// shellwords-0.1.0: r"\n" -consistent!(shellwords_1, r"\n"); - -// shush-0.1.5: "(?P<num>[0-9]+)(?P<units>[dhms])" -consistent!(shush_0, "(?P<num>[0-9]+)(?P<units>[dhms])"); - -// woothee-0.8.0: r"(?:Chrome|CrMo|CriOS)/([.0-9]+)" -consistent!(woothee_0, r"(?:Chrome|CrMo|CriOS)/([.0-9]+)"); - -// woothee-0.8.0: r"Vivaldi/([.0-9]+)" -consistent!(woothee_1, r"Vivaldi/([.0-9]+)"); - -// woothee-0.8.0: r"Firefox/([.0-9]+)" -consistent!(woothee_2, r"Firefox/([.0-9]+)"); - -// woothee-0.8.0: r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$" -consistent!(woothee_3, r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$"); - -// woothee-0.8.0: r"FxiOS/([.0-9]+)" -consistent!(woothee_4, r"FxiOS/([.0-9]+)"); - -// woothee-0.8.0: r"\(([^;)]+);FOMA;" -consistent!(woothee_5, r"\(([^;)]+);FOMA;"); - -// woothee-0.8.0: r"jig browser[^;]+; ([^);]+)" -consistent!(woothee_6, r"jig browser[^;]+; ([^);]+)"); - -// woothee-0.8.0: r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)" -consistent!(woothee_7, r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)"); - -// woothee-0.8.0: r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)" -consistent!(woothee_8, r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)"); - -// woothee-0.8.0: r"(?i)(?:feed|web) ?parser" -consistent!(woothee_9, r"(?i)(?:feed|web) ?parser"); - -// woothee-0.8.0: r"(?i)watch ?dog" -consistent!(woothee_10, r"(?i)watch ?dog"); - -// woothee-0.8.0: r"Edge/([.0-9]+)" -consistent!(woothee_11, r"Edge/([.0-9]+)"); - -// woothee-0.8.0: r"MSIE ([.0-9]+);" -consistent!(woothee_12, r"MSIE ([.0-9]+);"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_13, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"Opera[/ ]([.0-9]+)" -consistent!(woothee_14, r"Opera[/ ]([.0-9]+)"); - -// woothee-0.8.0: r"OPR/([.0-9]+)" -consistent!(woothee_15, r"OPR/([.0-9]+)"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_16, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)" -consistent!(woothee_17, r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)"); - -// woothee-0.8.0: r"Trident/([.0-9]+);" -consistent!(woothee_18, r"Trident/([.0-9]+);"); - -// woothee-0.8.0: r" rv:([.0-9]+)" -consistent!(woothee_19, r" rv:([.0-9]+)"); - -// woothee-0.8.0: r"IEMobile/([.0-9]+);" -consistent!(woothee_20, r"IEMobile/([.0-9]+);"); - -// woothee-0.8.0: r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)" -consistent!(woothee_21, r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)"); - -// woothee-0.8.0: r"Windows ([ .a-zA-Z0-9]+)[;\\)]" -consistent!(woothee_22, r"Windows ([ .a-zA-Z0-9]+)[;\\)]"); - -// woothee-0.8.0: r"^Phone(?: OS)? ([.0-9]+)" -consistent!(woothee_23, r"^Phone(?: OS)? ([.0-9]+)"); - -// woothee-0.8.0: r"iP(hone;|ad;|od) .*like Mac OS X" -consistent!(woothee_24, r"iP(hone;|ad;|od) .*like Mac OS X"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_25, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"rv:(\d+\.\d+\.\d+)" -consistent!(woothee_26, r"rv:(\d+\.\d+\.\d+)"); - -// woothee-0.8.0: r"FreeBSD ([^;\)]+);" -consistent!(woothee_27, r"FreeBSD ([^;\)]+);"); - -// woothee-0.8.0: r"CrOS ([^\)]+)\)" -consistent!(woothee_28, r"CrOS ([^\)]+)\)"); - -// woothee-0.8.0: r"Android[- ](\d+\.\d+(?:\.\d+)?)" -consistent!(woothee_29, r"Android[- ](\d+\.\d+(?:\.\d+)?)"); - -// woothee-0.8.0: r"PSP \(PlayStation Portable\); ([.0-9]+)\)" -consistent!(woothee_30, r"PSP \(PlayStation Portable\); ([.0-9]+)\)"); - -// woothee-0.8.0: r"PLAYSTATION 3;? ([.0-9]+)\)" -consistent!(woothee_31, r"PLAYSTATION 3;? ([.0-9]+)\)"); - -// woothee-0.8.0: r"PlayStation Vita ([.0-9]+)\)" -consistent!(woothee_32, r"PlayStation Vita ([.0-9]+)\)"); - -// woothee-0.8.0: r"PlayStation 4 ([.0-9]+)\)" -consistent!(woothee_33, r"PlayStation 4 ([.0-9]+)\)"); - -// woothee-0.8.0: r"BB10(?:.+)Version/([.0-9]+) " -consistent!(woothee_34, r"BB10(?:.+)Version/([.0-9]+) "); - -// woothee-0.8.0: r"BlackBerry(?:\d+)/([.0-9]+) " -consistent!(woothee_35, r"BlackBerry(?:\d+)/([.0-9]+) "); - -// woothee-0.8.0: r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X" -consistent!( - woothee_36, - r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X" -); - -// woothee-0.8.0: r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)" -consistent!(woothee_37, r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)"); - -// woothee-0.8.0: r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)" -consistent!( - woothee_38, - r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)" -); - -// woothee-0.8.0: r"[- ]HttpClient(/|$)" -consistent!(woothee_39, r"[- ]HttpClient(/|$)"); - -// woothee-0.8.0: r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)" -consistent!( - woothee_40, - r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)" -); - -// woothee-0.8.0: r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)" -consistent!(woothee_41, r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)"); - -// woothee-0.8.0: r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)" -consistent!( - woothee_42, - r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)" -); - -// woothee-0.8.0: r"Sleipnir/([.0-9]+)" -consistent!(woothee_43, r"Sleipnir/([.0-9]+)"); - -// word_replace-0.0.3: r"@@[a-z|A-Z|\d]+@@" -consistent!(word_replace_0, r"@@[a-z|A-Z|\d]+@@"); - -// wordcount-0.1.0: r"\w+" -consistent!(wordcount_0, r"\w+"); - -// just-0.3.12: "^([^=]+)=(.*)$" -consistent!(just_0, "^([^=]+)=(.*)$"); - -// emote-0.1.0: r":[a-zA-Z_]+?:" -consistent!(emote_0, r":[a-zA-Z_]+?:"); - -// emojicons-1.0.1: r":([a-zA-Z0-9_+-]+):" -consistent!(emojicons_0, r":([a-zA-Z0-9_+-]+):"); - -// git2_codecommit-0.1.2: r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com" -consistent!( - git2_codecommit_0, - r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com" -); - -// git-workarea-3.1.2: r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$" -consistent!( - git_workarea_0, - r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$" -); - -// git-shell-enforce-directory-1.0.0: r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$" -consistent!( - git_shell_enforce_directory_0, - r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$" -); - -// git-journal-1.6.3: r"[ \n]:(.*?):" -consistent!(git_journal_0, r"[ \n]:(.*?):"); - -// git-find-0.3.2: r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$" -consistent!( - git_find_0, - r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$" -); - -// gitlab-api-0.6.0: r"private_token=\w{20}" -consistent!(gitlab_api_0, r"private_token=\w{20}"); - -// td-client-0.7.0: "^(http://|https://)" -consistent!(td_client_0, "^(http://|https://)"); - -// karaconv-0.3.0: r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)" -consistent!(karaconv_0, r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)"); - -// katana-1.0.2: r"(?P<comp>et al\.)(?:\.)" -consistent!(katana_0, r"(?P<comp>et al\.)(?:\.)"); - -// katana-1.0.2: r"\.{3}" -consistent!(katana_1, r"\.{3}"); - -// katana-1.0.2: r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)" -consistent!(katana_2, r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)"); - -// katana-1.0.2: r"\s\.(?P<nums>[0-9]+)" -consistent!(katana_3, r"\s\.(?P<nums>[0-9]+)"); - -// katana-1.0.2: r"(?:[A-Za-z]\.){2,}" -consistent!(katana_4, r"(?:[A-Za-z]\.){2,}"); - -// katana-1.0.2: r"(?P<init>[A-Z])(?P<point>\.)" -consistent!(katana_5, r"(?P<init>[A-Z])(?P<point>\.)"); - -// katana-1.0.2: r"(?P<title>[A-Z][a-z]{1,3})(\.)" -consistent!(katana_6, r"(?P<title>[A-Z][a-z]{1,3})(\.)"); - -// katana-1.0.2: r"&==&(?P<p>[.!?])" -consistent!(katana_7, r"&==&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\^&(?P<p>[.!?])" -consistent!(katana_8, r"&\^&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\*\*&(?P<p>[.!?])" -consistent!(katana_9, r"&\*\*&(?P<p>[.!?])"); - -// katana-1.0.2: r"&=&(?P<p>[.!?])" -consistent!(katana_10, r"&=&(?P<p>[.!?])"); - -// katana-1.0.2: r"&##&(?P<p>[.!?])" -consistent!(katana_11, r"&##&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\$&(?P<p>[.!?])" -consistent!(katana_12, r"&\$&(?P<p>[.!?])"); - -// kailua_syntax-1.1.0: r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)" -consistent!(kailua_syntax_0, r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)"); - -// kailua_syntax-1.1.0: r"<(\d+)>" -consistent!(kailua_syntax_1, r"<(\d+)>"); - -// ftp-3.0.1: r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)" -consistent!(ftp_0, r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)"); - -// ftp-3.0.1: r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b" -consistent!(ftp_1, r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b"); - -// ftp-3.0.1: r"\s+(\d+)\s*$" -consistent!(ftp_2, r"\s+(\d+)\s*$"); - -// vat-0.1.0: r"<countryCode>(.*?)</countryCode>" -consistent!(vat_0, r"<countryCode>(.*?)</countryCode>"); - -// vat-0.1.0: r"<vatNumber>(.*?)</vatNumber>" -consistent!(vat_1, r"<vatNumber>(.*?)</vatNumber>"); - -// vat-0.1.0: r"<name>(.*?)</name>" -consistent!(vat_2, r"<name>(.*?)</name>"); - -// vat-0.1.0: r"<address>(?s)(.*?)(?-s)</address>" -consistent!(vat_3, r"<address>(?s)(.*?)(?-s)</address>"); - -// vat-0.1.0: r"<valid>(true|false)</valid>" -consistent!(vat_4, r"<valid>(true|false)</valid>"); - -// vat-0.1.0: r"^ATU\d{8}$" -consistent!(vat_5, r"^ATU\d{8}$"); - -// vat-0.1.0: r"^BE0?\d{9, 10}$" -consistent!(vat_6, r"^BE0?\d{9, 10}$"); - -// vat-0.1.0: r"^BG\d{9,10}$" -consistent!(vat_7, r"^BG\d{9,10}$"); - -// vat-0.1.0: r"^HR\d{11}$" -consistent!(vat_8, r"^HR\d{11}$"); - -// vat-0.1.0: r"^CY\d{8}[A-Z]$" -consistent!(vat_9, r"^CY\d{8}[A-Z]$"); - -// vat-0.1.0: r"^CZ\d{8,10}$" -consistent!(vat_10, r"^CZ\d{8,10}$"); - -// vat-0.1.0: r"^DK\d{8}$" -consistent!(vat_11, r"^DK\d{8}$"); - -// vat-0.1.0: r"^EE\d{9}$" -consistent!(vat_12, r"^EE\d{9}$"); - -// vat-0.1.0: r"^FI\d{8}$" -consistent!(vat_13, r"^FI\d{8}$"); - -// vat-0.1.0: r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$" -consistent!(vat_14, r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$"); - -// vat-0.1.0: r"^DE\d{9}$" -consistent!(vat_15, r"^DE\d{9}$"); - -// vat-0.1.0: r"^EL\d{9}$" -consistent!(vat_16, r"^EL\d{9}$"); - -// vat-0.1.0: r"^HU\d{8}$" -consistent!(vat_17, r"^HU\d{8}$"); - -// vat-0.1.0: r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$" -consistent!(vat_18, r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$"); - -// vat-0.1.0: r"^IT\d{11}$" -consistent!(vat_19, r"^IT\d{11}$"); - -// vat-0.1.0: r"^LV\d{11}$" -consistent!(vat_20, r"^LV\d{11}$"); - -// vat-0.1.0: r"^LT(\d{9}|\d{12})$" -consistent!(vat_21, r"^LT(\d{9}|\d{12})$"); - -// vat-0.1.0: r"^LU\d{8}$" -consistent!(vat_22, r"^LU\d{8}$"); - -// vat-0.1.0: r"^MT\d{8}$" -consistent!(vat_23, r"^MT\d{8}$"); - -// vat-0.1.0: r"^NL\d{9}B\d{2}$" -consistent!(vat_24, r"^NL\d{9}B\d{2}$"); - -// vat-0.1.0: r"^PL\d{10}$" -consistent!(vat_25, r"^PL\d{10}$"); - -// vat-0.1.0: r"^PT\d{9}$" -consistent!(vat_26, r"^PT\d{9}$"); - -// vat-0.1.0: r"^RO\d{2,10}$" -consistent!(vat_27, r"^RO\d{2,10}$"); - -// vat-0.1.0: r"^SK\d{10}$" -consistent!(vat_28, r"^SK\d{10}$"); - -// vat-0.1.0: r"^SI\d{8}$" -consistent!(vat_29, r"^SI\d{8}$"); - -// vat-0.1.0: r"^ES[A-Z0-9]\d{7}[A-Z0-9]$" -consistent!(vat_30, r"^ES[A-Z0-9]\d{7}[A-Z0-9]$"); - -// vat-0.1.0: r"^SE\d{10}01$" -consistent!(vat_31, r"^SE\d{10}01$"); - -// vat-0.1.0: r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$" -consistent!(vat_32, r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$"); - -// eve-0.1.1: r"\{\{(.*)\}\}" -consistent!(eve_0, r"\{\{(.*)\}\}"); - -// egc-0.1.2: "^mio" -consistent!(egc_0, "^mio"); - -// pew-0.2.3: "" -consistent!(pew_0, ""); - -// pew-0.2.3: "" -consistent!(pew_1, ""); - -// mob-0.4.3: "y" -consistent!(mob_0, "y"); - -// lit-0.2.8: "@([a-z]+)" -consistent!(lit_0, "@([a-z]+)"); - -// lit-0.2.8: "([A-Z-]+):(.*)" -consistent!(lit_1, "([A-Z-]+):(.*)"); - -// lit-0.2.8: "^[a-zA-Z_][a-zA-Z0-9_]*$" -consistent!(lit_2, "^[a-zA-Z_][a-zA-Z0-9_]*$"); - -// avm-1.0.1: r"\d+\.\d+\.\d+" -consistent!(avm_0, r"\d+\.\d+\.\d+"); - -// avm-1.0.1: r"\d+\.\d+\.\d+" -consistent!(avm_1, r"\d+\.\d+\.\d+"); - -// orm-0.2.0: r"^Vec<(.+)>$" -consistent!(orm_0, r"^Vec<(.+)>$"); - -// sgf-0.1.5: r"\\(\r\n|\n\r|\n|\r)" -consistent!(sgf_0, r"\\(\r\n|\n\r|\n|\r)"); - -// sgf-0.1.5: r"\\(.)" -consistent!(sgf_1, r"\\(.)"); - -// sgf-0.1.5: r"\r\n|\n\r|\n|\r" -consistent!(sgf_2, r"\r\n|\n\r|\n|\r"); - -// sgf-0.1.5: r"([\]\\:])" -consistent!(sgf_3, r"([\]\\:])"); - -// dok-0.2.0: "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$" -consistent!( - dok_0, - "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$" -); - -// d20-0.1.0: r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)" -consistent!(d20_0, r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)"); - -// dvb-0.3.0: "E" -consistent!(dvb_0, "E"); - -// dvb-0.3.0: "^F" -consistent!(dvb_1, "^F"); - -// dvb-0.3.0: "^S" -consistent!(dvb_2, "^S"); - -// ger-0.2.0: r"Change-Id: (I[a-f0-9]{40})$" -consistent!(ger_0, r"Change-Id: (I[a-f0-9]{40})$"); - -// ger-0.2.0: r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$" -consistent!( - ger_1, - r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$" -); - -// n5-0.2.1: r"(\d+)(\.(\d+))?(\.(\d+))?(.*)" -consistent!(n5_0, r"(\d+)(\.(\d+))?(\.(\d+))?(.*)"); - -// po-0.1.4: r"[A-Za-z0-9]" -consistent!(po_0, r"[A-Za-z0-9]"); - -// carnix-0.8.5: "path is (‘|')?([^’'\n]*)(’|')?" -consistent!(carnix_0, "path is (‘|')?([^’'\n]*)(’|')?"); - -// carnix-0.8.5: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?" -consistent!(carnix_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?"); - -// carnix-0.8.5: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(carnix_2, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// carnix-0.8.5: r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(carnix_3, r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// caseless-0.2.1: r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$" -consistent!(caseless_0, r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$"); - -// caseless-0.2.1: r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);" -consistent!(caseless_1, r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);"); - -// cabot-0.2.0: "\r?\n\r?\n" -consistent!(cabot_0, "\r?\n\r?\n"); - -// cabot-0.2.0: "\r?\n" -consistent!(cabot_1, "\r?\n"); - -// card-validate-2.2.1: r"^600" -consistent!(card_validate_0, r"^600"); - -// card-validate-2.2.1: r"^5019" -consistent!(card_validate_1, r"^5019"); - -// card-validate-2.2.1: r"^4" -consistent!(card_validate_2, r"^4"); - -// card-validate-2.2.1: r"^(5[1-5]|2[2-7])" -consistent!(card_validate_3, r"^(5[1-5]|2[2-7])"); - -// card-validate-2.2.1: r"^3[47]" -consistent!(card_validate_4, r"^3[47]"); - -// card-validate-2.2.1: r"^3[0689]" -consistent!(card_validate_5, r"^3[0689]"); - -// card-validate-2.2.1: r"^6([045]|22)" -consistent!(card_validate_6, r"^6([045]|22)"); - -// card-validate-2.2.1: r"^(62|88)" -consistent!(card_validate_7, r"^(62|88)"); - -// card-validate-2.2.1: r"^35" -consistent!(card_validate_8, r"^35"); - -// card-validate-2.2.1: r"^[0-9]+$" -consistent!(card_validate_9, r"^[0-9]+$"); - -// cargo-testify-0.3.0: r"\d{1,} passed.*filtered out" -consistent!(cargo_testify_0, r"\d{1,} passed.*filtered out"); - -// cargo-testify-0.3.0: r"error(:|\[).*" -consistent!(cargo_testify_1, r"error(:|\[).*"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_0, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_1, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_2, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_3, r"<(.*?)>"); - -// cargo-incremental-0.1.23: r"(?m)^incremental: re-using (\d+) out of (\d+) modules$" -consistent!( - cargo_incremental_0, - r"(?m)^incremental: re-using (\d+) out of (\d+) modules$" -); - -// cargo-incremental-0.1.23: "(?m)(warning|error): (.*)\n --> ([^:]:\\d+:\\d+)$" -consistent!( - cargo_incremental_1, - "(?m)(warning|error): (.*)\n --> ([^:]:\\d+:\\d+)$" -); - -// cargo-incremental-0.1.23: r"(?m)^test (.*) \.\.\. (\w+)" -consistent!(cargo_incremental_2, r"(?m)^test (.*) \.\.\. (\w+)"); - -// cargo-incremental-0.1.23: r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured" -consistent!( - cargo_incremental_3, - r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured" -); - -// cargo-testjs-0.1.2: r"^[^-]+-[0-9a-f]+\.js$" -consistent!(cargo_testjs_0, r"^[^-]+-[0-9a-f]+\.js$"); - -// cargo-tarpaulin-0.6.2: r"\s*//" -consistent!(cargo_tarpaulin_0, r"\s*//"); - -// cargo-tarpaulin-0.6.2: r"/\*" -consistent!(cargo_tarpaulin_1, r"/\*"); - -// cargo-tarpaulin-0.6.2: r"\*/" -consistent!(cargo_tarpaulin_2, r"\*/"); - -// cargo-culture-kit-0.1.0: r"^fo" -consistent!(cargo_culture_kit_0, r"^fo"); - -// cargo-screeps-0.1.3: "\\s+" -consistent!(cargo_screeps_0, "\\s+"); - -// cargo-brew-0.1.4: r"`(\S+) v([0-9.]+)" -consistent!(cargo_brew_0, r"`(\S+) v([0-9.]+)"); - -// cargo-release-0.10.2: "^\\[.+\\]" -consistent!(cargo_release_0, "^\\[.+\\]"); - -// cargo-release-0.10.2: "^\\[\\[.+\\]\\]" -consistent!(cargo_release_1, "^\\[\\[.+\\]\\]"); - -// cargo-edit-0.3.0-beta.1: r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -consistent!( - cargo_edit_0, - r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -); - -// cargo-edit-0.3.0-beta.1: r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -consistent!( - cargo_edit_1, - r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -); - -// cargo-disassemble-0.1.1: ".*" -consistent!(cargo_disassemble_0, ".*"); - -// cargo-demangle-0.1.2: r"(?m)(?P<symbol>_ZN[0-9]+.*E)" -consistent!(cargo_demangle_0, r"(?m)(?P<symbol>_ZN[0-9]+.*E)"); - -// cargo-coverage-annotations-0.1.5: r"^\s*\}(?:\)*;?|\s*else\s*\{)$" -consistent!(cargo_coverage_annotations_0, r"^\s*\}(?:\)*;?|\s*else\s*\{)$"); - -// cargo-urlcrate-1.0.1: "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]" -consistent!(cargo_urlcrate_0, "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]"); - -// cargo-script-0.2.8: r"^\s*\*( |$)" -consistent!(cargo_script_0, r"^\s*\*( |$)"); - -// cargo-script-0.2.8: r"^(\s+)" -consistent!(cargo_script_1, r"^(\s+)"); - -// cargo-script-0.2.8: r"/\*|\*/" -consistent!(cargo_script_2, r"/\*|\*/"); - -// cargo-script-0.2.8: r"^\s*//!" -consistent!(cargo_script_3, r"^\s*//!"); - -// cargo-script-0.2.8: r"^#![^\[].*?(\r\n|\n)" -consistent!(cargo_script_4, r"^#![^\[].*?(\r\n|\n)"); - -// cargo-update-1.5.2: r"cargo-install-update\.exe-v.+" -consistent!(cargo_update_0, r"cargo-install-update\.exe-v.+"); - -// canteen-0.4.1: r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$" -consistent!( - canteen_0, - r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$" -); - -// thruster-cli-0.1.3: r"(.)([A-Z])" -consistent!(thruster_cli_0, r"(.)([A-Z])"); - -// thieves-cant-0.1.0: "([Z]+)$" -consistent!(thieves_cant_0, "([Z]+)$"); - -// codeowners-0.1.3: r"^@\S+/\S+" -consistent!(codeowners_0, r"^@\S+/\S+"); - -// codeowners-0.1.3: r"^@\S+" -consistent!(codeowners_1, r"^@\S+"); - -// codeowners-0.1.3: r"^\S+@\S+" -consistent!(codeowners_2, r"^\S+@\S+"); - -// conserve-0.4.2: r"^b0000 {21} complete 20[-0-9T:+]+\s +\d+s\n$" -consistent!(conserve_0, r"^b0000 {21} complete 20[-0-9T:+]+\s +\d+s\n$"); - -// commodore-0.3.0: r"(?P<greeting>\S+?) (?P<name>\S+?)$" -consistent!(commodore_0, r"(?P<greeting>\S+?) (?P<name>\S+?)$"); - -// corollary-0.3.0: r"([ \t]*)```haskell([\s\S]*?)```" -consistent!(corollary_0, r"([ \t]*)```haskell([\s\S]*?)```"); - -// corollary-0.3.0: r"\b((?:a|b|t)\d*)\b" -consistent!(corollary_1, r"\b((?:a|b|t)\d*)\b"); - -// colorizex-0.1.3: "NB" -consistent!(colorizex_0, "NB"); - -// colorstring-0.0.1: r"(?i)\[[a-z0-9_-]+\]" -consistent!(colorstring_0, r"(?i)\[[a-z0-9_-]+\]"); - -// colorstring-0.0.1: r"^(?i)(\[[a-z0-9_-]+\])+" -consistent!(colorstring_1, r"^(?i)(\[[a-z0-9_-]+\])+"); - -// cosmogony-0.3.0: "name:(.+)" -consistent!(cosmogony_0, "name:(.+)"); - -// cobalt-bin-0.12.1: r"(?m:^ {0,3}\[[^\]]+\]:.+$)" -consistent!(cobalt_bin_0, r"(?m:^ {0,3}\[[^\]]+\]:.+$)"); - -// comrak-0.2.12: r"[^\p{L}\p{M}\p{N}\p{Pc} -]" -consistent!(comrak_0, r"[^\p{L}\p{M}\p{N}\p{Pc} -]"); - -// content-blocker-0.2.3: "" -consistent!(content_blocker_0, ""); - -// content-blocker-0.2.3: "(?i)hi" -consistent!(content_blocker_1, "(?i)hi"); - -// content-blocker-0.2.3: "http[s]?://domain.org" -consistent!(content_blocker_2, "http[s]?://domain.org"); - -// content-blocker-0.2.3: "(?i)http[s]?://domain.org" -consistent!(content_blocker_3, "(?i)http[s]?://domain.org"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_4, "http://domain.org"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_5, "http://domain.org"); - -// content-blocker-0.2.3: "ad.html" -consistent!(content_blocker_6, "ad.html"); - -// content-blocker-0.2.3: "ad.html" -consistent!(content_blocker_7, "ad.html"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_8, "http://domain.org"); - -// content-blocker-0.2.3: "http://domain.org/nocookies.sjs" -consistent!(content_blocker_9, "http://domain.org/nocookies.sjs"); - -// content-blocker-0.2.3: "http://domain.org/nocookies.sjs" -consistent!(content_blocker_10, "http://domain.org/nocookies.sjs"); - -// content-blocker-0.2.3: "http://domain.org/hideme.jpg" -consistent!(content_blocker_11, "http://domain.org/hideme.jpg"); - -// content-blocker-0.2.3: "http://domain.org/ok.html" -consistent!(content_blocker_12, "http://domain.org/ok.html"); - -// content-blocker-0.2.3: "http://domain.org/ok.html\\?except_this=1" -consistent!(content_blocker_13, "http://domain.org/ok.html\\?except_this=1"); - -// victoria-dom-0.1.2: "[A-Za-z0-9=]" -consistent!(victoria_dom_0, "[A-Za-z0-9=]"); - -// numbat-1.0.0: r"^nsq://" -consistent!(numbat_0, r"^nsq://"); - -// airkorea-0.1.2: r"[\s\t\r\n]" -consistent!(airkorea_0, r"[\s\t\r\n]"); - -// airkorea-0.1.2: r"([\{\[,])|([\}\]])" -consistent!(airkorea_1, r"([\{\[,])|([\}\]])"); - -// airkorea-0.1.2: r"[^.\d]+$" -consistent!(airkorea_2, r"[^.\d]+$"); - -// rofl-0.0.1: r"\b" -// consistent!(rofl_0, r"\b"); - -// rogcat-0.2.15: r"--------- beginning of.*" -consistent!(rogcat_0, r"--------- beginning of.*"); - -// rogcat-0.2.15: r"a|e|i|o|u" -consistent!(rogcat_1, r"a|e|i|o|u"); - -// rogcat-0.2.15: r"^(\d+)([kMG])$" -consistent!(rogcat_2, r"^(\d+)([kMG])$"); - -// media_filename-0.1.4: "\\.([A-Za-z0-9]{2,4})$" -consistent!(media_filename_0, "\\.([A-Za-z0-9]{2,4})$"); - -// media_filename-0.1.4: "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})" -consistent!(media_filename_1, "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})"); - -// media_filename-0.1.4: "(?:^\\[([^]]+)\\]|- ?([^-]+)$)" -consistent!(media_filename_2, "(?:^\\[([^]]+)\\]|- ?([^-]+)$)"); - -// media_filename-0.1.4: "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])" -consistent!( - media_filename_3, - "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])" -); - -// media_filename-0.1.4: "[sS]([0-9]{1,2})" -consistent!(media_filename_4, "[sS]([0-9]{1,2})"); - -// media_filename-0.1.4: "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)" -consistent!(media_filename_5, "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)"); - -// media_filename-0.1.4: "((19[0-9]|20[01])[0-9])" -consistent!(media_filename_6, "((19[0-9]|20[01])[0-9])"); - -// media_filename-0.1.4: "((?i)xvid|x264|h\\.?264)" -consistent!(media_filename_7, "((?i)xvid|x264|h\\.?264)"); - -// media_filename-0.1.4: "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)" -consistent!(media_filename_8, "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)"); - -// media_filename-0.1.4: "\\[([0-9A-F]{8})\\]" -consistent!(media_filename_9, "\\[([0-9A-F]{8})\\]"); - -// termimage-0.3.2: r"(\d+)[xX](\d+)" -consistent!(termimage_0, r"(\d+)[xX](\d+)"); - -// teensy-0.1.0: r".*(\d{4}-\d{2}-\d{2}).*" -consistent!(teensy_0, r".*(\d{4}-\d{2}-\d{2}).*"); - -// telescreen-0.1.3: r"<@(.+)>" -consistent!(telescreen_0, r"<@(.+)>"); - -// tempus_fugit-0.4.4: r"^(\d+)" -consistent!(tempus_fugit_0, r"^(\d+)"); - -// fselect-0.4.1: "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)" -consistent!(fselect_0, "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"); - -// fselect-0.4.1: "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)" -consistent!(fselect_1, "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"); - -// fs_eventbridge-0.1.0: r"^([A-Z]+)(?:\s(.+))?\s*" -consistent!(fs_eventbridge_0, r"^([A-Z]+)(?:\s(.+))?\s*"); - -// joseki-0.0.1: r"(\w{1,2})\[(.+?)\]" -consistent!(joseki_0, r"(\w{1,2})\[(.+?)\]"); - -// tweetr-0.2.1: r"(?i)in (\d+) (second|minute|hour|day|week)s?" -consistent!(tweetr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?"); - -// bullet_core-0.1.1: "^(?u:[0-9])+" -consistent!(bullet_core_0, "^(?u:[0-9])+"); - -// bullet_core-0.1.1: "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+" -consistent!(bullet_core_1, "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+"); - -// bullet_core-0.1.1: "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+" -consistent!(bullet_core_2, "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+"); - -// bullet_core-0.1.1: "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)" -consistent!(bullet_core_3, "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)"); - -// bullet_core-0.1.1: "^(?u:\\()" -consistent!(bullet_core_4, "^(?u:\\()"); - -// bullet_core-0.1.1: "^(?u:\\))" -consistent!(bullet_core_5, "^(?u:\\))"); - -// bullet_core-0.1.1: "^(?u:\\*)" -consistent!(bullet_core_6, "^(?u:\\*)"); - -// bullet_core-0.1.1: "^(?u:\\+)" -consistent!(bullet_core_7, "^(?u:\\+)"); - -// bullet_core-0.1.1: "^(?u:,)" -consistent!(bullet_core_8, "^(?u:,)"); - -// bullet_core-0.1.1: "^(?u:\\-)" -consistent!(bullet_core_9, "^(?u:\\-)"); - -// bullet_core-0.1.1: "^(?u:/)" -consistent!(bullet_core_10, "^(?u:/)"); - -// bullet_core-0.1.1: "^(?u:\\[)" -consistent!(bullet_core_11, "^(?u:\\[)"); - -// bullet_core-0.1.1: "^(?u:\\])" -consistent!(bullet_core_12, "^(?u:\\])"); - -// bullet_core-0.1.1: "^(?u:\\^)" -consistent!(bullet_core_13, "^(?u:\\^)"); - -// bullet_core-0.1.1: "^(?u:·)" -consistent!(bullet_core_14, "^(?u:·)"); - -// actix-web-0.6.13: "//+" -consistent!(actix_web_0, "//+"); - -// actix-web-0.6.13: "//+" -consistent!(actix_web_1, "//+"); - -// althea_kernel_interface-0.1.0: r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)" -consistent!( - althea_kernel_interface_0, - r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)" -); - -// althea_kernel_interface-0.1.0: r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)" -consistent!( - althea_kernel_interface_1, - r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)" -); - -// alcibiades-0.3.0: r"\buci(?:\s|$)" -consistent!(alcibiades_0, r"\buci(?:\s|$)"); - -// ruma-identifiers-0.11.0: r"\A[a-z0-9._=-]+\z" -consistent!(ruma_identifiers_0, r"\A[a-z0-9._=-]+\z"); - -// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$" -consistent!(rusqbin_0, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$"); - -// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$" -consistent!(rusqbin_1, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$"); - -// rust-install-0.0.4: r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$" -consistent!( - rust_install_0, - r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$" -); - -// rust_inbox-0.0.5: "^+(.*)\r\n" -consistent!(rust_inbox_0, "^+(.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* CAPABILITY (.*)\r\n" -consistent!(rust_inbox_1, r"^\* CAPABILITY (.*)\r\n"); - -// rust_inbox-0.0.5: r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)" -consistent!(rust_inbox_2, r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)"); - -// rust_inbox-0.0.5: r"^\* (\d+) EXISTS\r\n" -consistent!(rust_inbox_3, r"^\* (\d+) EXISTS\r\n"); - -// rust_inbox-0.0.5: r"^\* (\d+) RECENT\r\n" -consistent!(rust_inbox_4, r"^\* (\d+) RECENT\r\n"); - -// rust_inbox-0.0.5: r"^\* FLAGS (.+)\r\n" -consistent!(rust_inbox_5, r"^\* FLAGS (.+)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UNSEEN (\d+)\](.*)\r\n" -consistent!(rust_inbox_6, r"^\* OK \[UNSEEN (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n" -consistent!(rust_inbox_7, r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n" -consistent!(rust_inbox_8, r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n" -consistent!(rust_inbox_9, r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_0, r"^[a-z]+ (\d+)$"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_1, r"^[a-z]+ (\d+)$"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_2, r"^[a-z]+ (\d+)$"); - -// rustfmt-0.10.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-core-0.4.0: r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)" -consistent!(rustfmt_core_0, r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)"); - -// rustfmt-core-0.4.0: r"^## `([^`]+)`" -consistent!(rustfmt_core_1, r"^## `([^`]+)`"); - -// rustfmt-core-0.4.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_core_2, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-core-0.4.0: r"\s;" -consistent!(rustfmt_core_3, r"\s;"); - -// rust-enum-derive-0.4.0: r"^(0x)?([:digit:]+)$" -consistent!(rust_enum_derive_0, r"^(0x)?([:digit:]+)$"); - -// rust-enum-derive-0.4.0: r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$" -consistent!( - rust_enum_derive_1, - r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$" -); - -// rust-enum-derive-0.4.0: r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*," -consistent!(rust_enum_derive_2, r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*,"); - -// rust-enum-derive-0.4.0: r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)" -consistent!( - rust_enum_derive_3, - r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)" -); - -// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$" -consistent!(rustsourcebundler_0, r"^\s*pub mod (.+);$"); - -// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$" -consistent!(rustsourcebundler_1, r"^\s*pub mod (.+);$"); - -// rustfmt-nightly-0.8.2: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_nightly_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-nightly-0.8.2: r"\s;" -consistent!(rustfmt_nightly_1, r"\s;"); - -// rustache-0.1.0: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)" -consistent!(rustache_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"); - -// rustfilt-0.2.0: r"_ZN[\$\._[:alnum:]]*" -consistent!(rustfilt_0, r"_ZN[\$\._[:alnum:]]*"); - -// rustache-lists-0.1.2: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)" -consistent!(rustache_lists_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"); - -// rural-0.7.3: "(.+)=(.+)" -consistent!(rural_0, "(.+)=(.+)"); - -// rural-0.7.3: "(.*):(.+)" -consistent!(rural_1, "(.*):(.+)"); - -// rural-0.7.3: "(.+):=(.+)" -consistent!(rural_2, "(.+):=(.+)"); - -// rural-0.7.3: "(.*)==(.+)" -consistent!(rural_3, "(.*)==(.+)"); - -// rusoto_credential-0.11.0: r"^\[([^\]]+)\]$" -consistent!(rusoto_credential_0, r"^\[([^\]]+)\]$"); - -// rumblebars-0.3.0: "([:blank:]*)$" -consistent!(rumblebars_0, "([:blank:]*)$"); - -// rumblebars-0.3.0: "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -consistent!(rumblebars_1, "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z"); - -// rumblebars-0.3.0: "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -consistent!( - rumblebars_2, - "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -); - -// rumblebars-0.3.0: "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$" -consistent!(rumblebars_3, "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$"); - -// rumblebars-0.3.0: "^([:blank:]*\r?\n)(.*)" -consistent!(rumblebars_4, "^([:blank:]*\r?\n)(.*)"); - -// diesel_cli-1.3.1: r"(?P<stamp>[\d-]*)_hello" -consistent!(diesel_cli_0, r"(?P<stamp>[\d-]*)_hello"); - -// dishub-0.1.1: r"(\d+)s" -consistent!(dishub_0, r"(\d+)s"); - -// spreadsheet_textconv-0.1.0: r"\n" -consistent!(spreadsheet_textconv_0, r"\n"); - -// spreadsheet_textconv-0.1.0: r"\r" -consistent!(spreadsheet_textconv_1, r"\r"); - -// spreadsheet_textconv-0.1.0: r"\t" -consistent!(spreadsheet_textconv_2, r"\t"); - -// split_aud-0.1.0: r"DELAY (-?\d+)ms" -consistent!(split_aud_0, r"DELAY (-?\d+)ms"); - -// split_aud-0.1.0: r"Trim\((\d+), ?(\d+)\)" -consistent!(split_aud_1, r"Trim\((\d+), ?(\d+)\)"); - -// spotrust-0.0.5: r"spotify:[a-z]+:[a-zA-Z0-9]+" -consistent!(spotrust_0, r"spotify:[a-z]+:[a-zA-Z0-9]+"); - -// spaceslugs-0.1.0: r"[^\x00-\x7F]" -consistent!(spaceslugs_0, r"[^\x00-\x7F]"); - -// spaceslugs-0.1.0: r"[']+" -consistent!(spaceslugs_1, r"[']+"); - -// spaceslugs-0.1.0: r"\W+" -consistent!(spaceslugs_2, r"\W+"); - -// spaceslugs-0.1.0: r"[ ]+" -consistent!(spaceslugs_3, r"[ ]+"); - -// space_email_api-0.1.1: "PHPSESSID=([0-9a-f]+)" -consistent!(space_email_api_0, "PHPSESSID=([0-9a-f]+)"); - -// lorikeet-0.7.0: "[^0-9.,]" -consistent!(lorikeet_0, "[^0-9.,]"); - -// claude-0.3.0: r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$" -consistent!(claude_0, r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$"); - -// clam-0.1.6: r"<%=\s*(.+?)\s*%>" -consistent!(clam_0, r"<%=\s*(.+?)\s*%>"); - -// classifier-0.0.3: r"(\s)" -consistent!(classifier_0, r"(\s)"); - -// click-0.3.2: r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)" -consistent!(click_0, r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)"); - -// click-0.3.2: r"-----BEGIN PRIVATE KEY-----" -consistent!(click_1, r"-----BEGIN PRIVATE KEY-----"); - -// ultrastar-txt-0.1.2: r"#([A-Z3a-z]*):(.*)" -consistent!(ultrastar_txt_0, r"#([A-Z3a-z]*):(.*)"); - -// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s*$" -consistent!(ultrastar_txt_1, "^-\\s?(-?[0-9]+)\\s*$"); - -// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)" -consistent!(ultrastar_txt_2, "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)"); - -// ultrastar-txt-0.1.2: "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)" -consistent!( - ultrastar_txt_3, - "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)" -); - -// ultrastar-txt-0.1.2: "^P\\s?(-?[0-9]+)" -consistent!(ultrastar_txt_4, "^P\\s?(-?[0-9]+)"); - -// db-accelerate-2.0.0: r"^template\.add($|\..+$)" -consistent!(db_accelerate_0, r"^template\.add($|\..+$)"); - -// db-accelerate-2.0.0: r"^template\.sub($|\..+$)" -consistent!(db_accelerate_1, r"^template\.sub($|\..+$)"); - -// sterling-0.3.0: r"(\d+)([cegps])" -consistent!(sterling_0, r"(\d+)([cegps])"); - -// stache-0.2.0: r"[^\w]" -consistent!(stache_0, r"[^\w]"); - -// strukt-0.1.0: "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\"" -consistent!(strukt_0, "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\""); - -// steamid-ng-0.3.1: r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$" -consistent!(steamid_ng_0, r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$"); - -// steamid-ng-0.3.1: r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$" -consistent!( - steamid_ng_1, - r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$" -); - -// strscan-0.1.1: r"^\w+" -consistent!(strscan_0, r"^\w+"); - -// strscan-0.1.1: r"^\s+" -consistent!(strscan_1, r"^\s+"); - -// strscan-0.1.1: r"^\w+" -consistent!(strscan_2, r"^\w+"); - -// strscan-0.1.1: r"^\s+" -consistent!(strscan_3, r"^\s+"); - -// strscan-0.1.1: r"^(\w+)\s+" -consistent!(strscan_4, r"^(\w+)\s+"); - -// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$" -consistent!(tk_carbon_0, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"); - -// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$" -consistent!(tk_carbon_1, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"); - -// evalrs-0.0.10: r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?" -consistent!(evalrs_0, r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?"); - -// evalrs-0.0.10: r"(?m)^# " -consistent!(evalrs_1, r"(?m)^# "); - -// evalrs-0.0.10: r"(?m)^\s*fn +main *\( *\)" -consistent!(evalrs_2, r"(?m)^\s*fn +main *\( *\)"); - -// evalrs-0.0.10: r"(extern\s+crate\s+[a-z0-9_]+\s*;)" -consistent!(evalrs_3, r"(extern\s+crate\s+[a-z0-9_]+\s*;)"); - -// gate_build-0.5.0: "(.*)_t([0-9]+)" -consistent!(gate_build_0, "(.*)_t([0-9]+)"); - -// rake-0.1.1: r"[^\P{P}-]|\s+-\s+" -consistent!(rake_0, r"[^\P{P}-]|\s+-\s+"); - -// rafy-0.2.1: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" -consistent!(rafy_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"); - -// raven-0.2.1: r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$" -consistent!(raven_0, r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$"); - -// rargs-0.2.0: r"\{[[:space:]]*[^{}]*[[:space:]]*\}" -consistent!(rargs_0, r"\{[[:space:]]*[^{}]*[[:space:]]*\}"); - -// rargs-0.2.0: r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$" -consistent!(rargs_1, r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$"); - -// rargs-0.2.0: r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$" -consistent!(rargs_2, r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$"); - -// rargs-0.2.0: r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$" -consistent!( - rargs_3, - r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$" -); - -// rargs-0.2.0: r"(.*?)[[:space:]]+|(.*?)$" -consistent!(rargs_4, r"(.*?)[[:space:]]+|(.*?)$"); - -// indradb-lib-0.15.0: r"[a-zA-Z0-9]{8}" -consistent!(indradb_lib_0, r"[a-zA-Z0-9]{8}"); - -// fungi-lang-0.1.50: r"::" -consistent!(fungi_lang_0, r"::"); - -// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)" -consistent!(nickel_0, "/hello/(?P<name>[a-zA-Z]+)"); - -// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)" -consistent!(nickel_1, "/hello/(?P<name>[a-zA-Z]+)"); - -// pact_verifier-0.4.0: r"\{(\w+)\}" -consistent!(pact_verifier_0, r"\{(\w+)\}"); - -// pact_matching-0.4.1: "application/.*json" -consistent!(pact_matching_0, "application/.*json"); - -// pact_matching-0.4.1: "application/json.*" -consistent!(pact_matching_1, "application/json.*"); - -// pact_matching-0.4.1: "application/.*xml" -consistent!(pact_matching_2, "application/.*xml"); - -// pangu-0.2.0: "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])" -consistent!( - pangu_0, - "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])" -); - -// pangu-0.2.0: "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)" -consistent!( - pangu_1, - "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)" -); - -// parser-haskell-0.2.0: r"\{-[\s\S]*?-\}" -consistent!(parser_haskell_0, r"\{-[\s\S]*?-\}"); - -// parser-haskell-0.2.0: r"(?m);+\s*$" -consistent!(parser_haskell_1, r"(?m);+\s*$"); - -// parser-haskell-0.2.0: r"(?m)^#(if|ifn?def|endif|else|include|elif).*" -consistent!(parser_haskell_2, r"(?m)^#(if|ifn?def|endif|else|include|elif).*"); - -// parser-haskell-0.2.0: r"'([^'\\]|\\[A-Z]{1,3}|\\.)'" -consistent!(parser_haskell_3, r"'([^'\\]|\\[A-Z]{1,3}|\\.)'"); - -// parser-haskell-0.2.0: r"forall\s+(.*?)\." -consistent!(parser_haskell_4, r"forall\s+(.*?)\."); - -// html2md-0.2.1: "\\s{2,}" -consistent!(html2md_0, "\\s{2,}"); - -// html2md-0.2.1: "\\n{2,}" -consistent!(html2md_1, "\\n{2,}"); - -// html2md-0.2.1: "(?m)(\\S) $" -consistent!(html2md_2, "(?m)(\\S) $"); - -// html2md-0.2.1: "(?m)^[-*] " -consistent!(html2md_3, "(?m)^[-*] "); - -// ovpnfile-0.1.2: r"#.*$" -consistent!(ovpnfile_0, r"#.*$"); - -// ovpnfile-0.1.2: r"^<(\S+)>" -consistent!(ovpnfile_1, r"^<(\S+)>"); - -// ovpnfile-0.1.2: r"^</(\S+)>" -consistent!(ovpnfile_2, r"^</(\S+)>"); - -// screenruster-saver-fractal-0.1.1: r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})" -consistent!( - screenruster_saver_fractal_0, - r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})" -); - -// scarlet-0.2.2: r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)" -consistent!( - scarlet_0, - r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)" -); - -// cpp_to_rust_generator-0.2.0: r"^([\w:]+)<(.+)>$" -consistent!(cpp_to_rust_generator_0, r"^([\w:]+)<(.+)>$"); - -// cpp_to_rust_generator-0.2.0: r"^type-parameter-(\d+)-(\d+)$" -consistent!(cpp_to_rust_generator_1, r"^type-parameter-(\d+)-(\d+)$"); - -// cpp_to_rust_generator-0.2.0: r"^([\w~]+)<[^<>]+>$" -consistent!(cpp_to_rust_generator_2, r"^([\w~]+)<[^<>]+>$"); - -// cpp_to_rust_generator-0.2.0: r"(signals|Q_SIGNALS)\s*:" -consistent!(cpp_to_rust_generator_3, r"(signals|Q_SIGNALS)\s*:"); - -// cpp_to_rust_generator-0.2.0: r"(slots|Q_SLOTS)\s*:" -consistent!(cpp_to_rust_generator_4, r"(slots|Q_SLOTS)\s*:"); - -// cpp_to_rust_generator-0.2.0: r"(public|protected|private)\s*:" -consistent!(cpp_to_rust_generator_5, r"(public|protected|private)\s*:"); - -// cpp_to_rust-0.5.3: r"^([\w:]+)<(.+)>$" -consistent!(cpp_to_rust_0, r"^([\w:]+)<(.+)>$"); - -// cpp_to_rust-0.5.3: r"^type-parameter-(\d+)-(\d+)$" -consistent!(cpp_to_rust_1, r"^type-parameter-(\d+)-(\d+)$"); - -// cpp_to_rust-0.5.3: r"^([\w~]+)<[^<>]+>$" -consistent!(cpp_to_rust_2, r"^([\w~]+)<[^<>]+>$"); - -// cpp_to_rust-0.5.3: r"(signals|Q_SIGNALS)\s*:" -consistent!(cpp_to_rust_3, r"(signals|Q_SIGNALS)\s*:"); - -// cpp_to_rust-0.5.3: r"(slots|Q_SLOTS)\s*:" -consistent!(cpp_to_rust_4, r"(slots|Q_SLOTS)\s*:"); - -// cpp_to_rust-0.5.3: r"(public|protected|private)\s*:" -consistent!(cpp_to_rust_5, r"(public|protected|private)\s*:"); - -// fritzbox_logs-0.2.0: "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)" -consistent!( - fritzbox_logs_0, - "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)" -); - -// fractal-matrix-api-3.29.0: r"mxc://(?P<server>[^/]+)/(?P<media>.+)" -consistent!(fractal_matrix_api_0, r"mxc://(?P<server>[^/]+)/(?P<media>.+)"); - -// smtp2go-0.1.4: r"^api-[a-zA-Z0-9]{32}$" -consistent!(smtp2go_0, r"^api-[a-zA-Z0-9]{32}$"); - -// pusher-0.3.1: r"^[-a-zA-Z0-9_=@,.;]+$" -consistent!(pusher_0, r"^[-a-zA-Z0-9_=@,.;]+$"); - -// pusher-0.3.1: r"\A\d+\.\d+\z" -consistent!(pusher_1, r"\A\d+\.\d+\z"); - -// bakervm-0.9.0: r"^\.(.+?) +?(.+)$" -consistent!(bakervm_0, r"^\.(.+?) +?(.+)$"); - -// bakervm-0.9.0: r"^\.([^\s]+)$" -consistent!(bakervm_1, r"^\.([^\s]+)$"); - -// bakervm-0.9.0: r"^include! +([^\s]+)$" -consistent!(bakervm_2, r"^include! +([^\s]+)$"); - -// bakervm-0.9.0: r"^@(\d+)$" -consistent!(bakervm_3, r"^@(\d+)$"); - -// bakervm-0.9.0: r"^true|false$" -consistent!(bakervm_4, r"^true|false$"); - -// bakervm-0.9.0: r"^(-?\d+)?\.[0-9]+$" -consistent!(bakervm_5, r"^(-?\d+)?\.[0-9]+$"); - -// bakervm-0.9.0: r"^(-?\d+)?$" -consistent!(bakervm_6, r"^(-?\d+)?$"); - -// bakervm-0.9.0: r"^#([0-9abcdefABCDEF]{6})$" -consistent!(bakervm_7, r"^#([0-9abcdefABCDEF]{6})$"); - -// bakervm-0.9.0: r"^'(.)'$" -consistent!(bakervm_8, r"^'(.)'$"); - -// bakervm-0.9.0: r"^\$vi\((\d+)\)$" -consistent!(bakervm_9, r"^\$vi\((\d+)\)$"); - -// bakervm-0.9.0: r"^\$key\((\d+)\)$" -consistent!(bakervm_10, r"^\$key\((\d+)\)$"); - -// banana-0.0.2: "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)" -consistent!( - banana_0, - "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)" -); - -// serial-key-2.0.0: r"[A-F0-9]{8}" -consistent!(serial_key_0, r"[A-F0-9]{8}"); - -// serde-hjson-0.8.1: "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_0, "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-hjson-0.8.1: "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_1, "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-hjson-0.8.1: "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_2, "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-odbc-0.1.0: r"/todos/(?P<id>\d+)" -consistent!(serde_odbc_0, r"/todos/(?P<id>\d+)"); - -// sentry-0.6.0: r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)" -consistent!(sentry_0, r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)"); - -// sentiment-0.1.1: r"[^a-zA-Z0 -]+" -consistent!(sentiment_0, r"[^a-zA-Z0 -]+"); - -// sentiment-0.1.1: r" {2,}" -consistent!(sentiment_1, r" {2,}"); - -// verilog-0.0.1: r"(?m)//.*" -consistent!(verilog_0, r"(?m)//.*"); - -// verex-0.2.2: "(?P<robot>C3PO)" -consistent!(verex_0, "(?P<robot>C3PO)"); - -// handlebars-0.32.4: ">|<|\"|&" -consistent!(handlebars_0, ">|<|\"|&"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789]{4}$" -consistent!(haikunator_0, r"^\w+-\w+-[0123456789]{4}$"); - -// haikunator-0.1.2: r"^\w+@\w+@[0123456789]{4}$" -consistent!(haikunator_1, r"^\w+@\w+@[0123456789]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789abcdef]{4}$" -consistent!(haikunator_2, r"^\w+-\w+-[0123456789abcdef]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$" -consistent!(haikunator_3, r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$"); - -// haikunator-0.1.2: r"^\w+-\w+$" -consistent!(haikunator_4, r"^\w+-\w+$"); - -// haikunator-0.1.2: r"^\w+-\w+-[foo]{4}$" -consistent!(haikunator_5, r"^\w+-\w+-[foo]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$" -consistent!(haikunator_6, r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$"); - -// bobbin-cli-0.8.3: r"(.*)" -consistent!(bobbin_cli_0, r"(.*)"); - -// bobbin-cli-0.8.3: r"rustc (.*)" -consistent!(bobbin_cli_1, r"rustc (.*)"); - -// bobbin-cli-0.8.3: r"cargo (.*)" -consistent!(bobbin_cli_2, r"cargo (.*)"); - -// bobbin-cli-0.8.3: r"xargo (.*)\n" -consistent!(bobbin_cli_3, r"xargo (.*)\n"); - -// bobbin-cli-0.8.3: r"Open On-Chip Debugger (.*)" -consistent!(bobbin_cli_4, r"Open On-Chip Debugger (.*)"); - -// bobbin-cli-0.8.3: r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)" -consistent!( - bobbin_cli_5, - r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)" -); - -// bobbin-cli-0.8.3: r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n" -consistent!( - bobbin_cli_6, - r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n" -); - -// bobbin-cli-0.8.3: r"(?m)SEGGER J-Link Commander (.*)\n" -consistent!(bobbin_cli_7, r"(?m)SEGGER J-Link Commander (.*)\n"); - -// bobbin-cli-0.8.3: r"(?m)Teensy Loader, Command Line, Version (.*)\n" -consistent!(bobbin_cli_8, r"(?m)Teensy Loader, Command Line, Version (.*)\n"); - -// bobbin-cli-0.8.3: r"dfu-util (.*)\n" -consistent!(bobbin_cli_9, r"dfu-util (.*)\n"); - -// borsholder-0.9.1: r"^/static/[\w.]+$" -consistent!(borsholder_0, r"^/static/[\w.]+$"); - -// borsholder-0.9.1: r"^/timeline/([0-9]+)$" -consistent!(borsholder_1, r"^/timeline/([0-9]+)$"); - -// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]" -consistent!(fblog_0, "\u{001B}\\[[\\d;]*[^\\d;]"); - -// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]" -consistent!(fblog_1, "\u{001B}\\[[\\d;]*[^\\d;]"); - -// toml-query-0.6.0: r"^\[\d+\]$" -consistent!(toml_query_0, r"^\[\d+\]$"); - -// todo-txt-1.1.0: r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)" -consistent!(todo_txt_0, r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)"); - -// findr-0.1.5: r"\band\b" -consistent!(findr_0, r"\band\b"); - -// findr-0.1.5: r"\bor\b" -consistent!(findr_1, r"\bor\b"); - -// findr-0.1.5: r"\bnot\b" -consistent!(findr_2, r"\bnot\b"); - -// file-sniffer-3.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(file_sniffer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*|dat|pc|info)$" -consistent!(file_sniffer_1, r".*?\.(stats|conf|h|cache.*|dat|pc|info)$"); - -// file-sniffer-3.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(file_sniffer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*)$" -consistent!(file_sniffer_3, r".*?\.(stats|conf|h|cache.*)$"); - -// file-sniffer-3.0.1: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(file_sniffer_4, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// file_logger-0.1.0: "test" -consistent!(file_logger_0, "test"); - -// file_scanner-0.2.0: r"foo" -consistent!(file_scanner_0, r"foo"); - -// file_scanner-0.2.0: r"a+b" -consistent!(file_scanner_1, r"a+b"); - -// file_scanner-0.2.0: r"a[ab]*b" -consistent!(file_scanner_2, r"a[ab]*b"); - -// file_scanner-0.2.0: r"\s+" -consistent!(file_scanner_3, r"\s+"); - -// file_scanner-0.2.0: r"\s+" -consistent!(file_scanner_4, r"\s+"); - -// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$" -consistent!(cellsplit_0, r"^\s*([^\s]+) %cellsplit<\d+>$"); - -// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$" -consistent!(cellsplit_1, r"^\s*([^\s]+) %cellsplit<\d+>$"); - -// aterm-0.20.0: r"^[+\-]?[0-9]+" -consistent!(aterm_0, r"^[+\-]?[0-9]+"); - -// aterm-0.20.0: r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?" -consistent!(aterm_1, r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?"); - -// atarashii_imap-0.3.0: r"^[*] OK" -consistent!(atarashii_imap_0, r"^[*] OK"); - -// atarashii_imap-0.3.0: r"FLAGS\s\((.+)\)" -consistent!(atarashii_imap_1, r"FLAGS\s\((.+)\)"); - -// atarashii_imap-0.3.0: r"\[PERMANENTFLAGS\s\((.+)\)\]" -consistent!(atarashii_imap_2, r"\[PERMANENTFLAGS\s\((.+)\)\]"); - -// atarashii_imap-0.3.0: r"\[UIDVALIDITY\s(\d+)\]" -consistent!(atarashii_imap_3, r"\[UIDVALIDITY\s(\d+)\]"); - -// atarashii_imap-0.3.0: r"(\d+)\sEXISTS" -consistent!(atarashii_imap_4, r"(\d+)\sEXISTS"); - -// atarashii_imap-0.3.0: r"(\d+)\sRECENT" -consistent!(atarashii_imap_5, r"(\d+)\sRECENT"); - -// atarashii_imap-0.3.0: r"\[UNSEEN\s(\d+)\]" -consistent!(atarashii_imap_6, r"\[UNSEEN\s(\d+)\]"); - -// atarashii_imap-0.3.0: r"\[UIDNEXT\s(\d+)\]" -consistent!(atarashii_imap_7, r"\[UIDNEXT\s(\d+)\]"); - -// editorconfig-1.0.0: r"\\(\{|\})" -consistent!(editorconfig_0, r"\\(\{|\})"); - -// editorconfig-1.0.0: r"(^|[^\\])\\\|" -consistent!(editorconfig_1, r"(^|[^\\])\\\|"); - -// editorconfig-1.0.0: r"\[([^\]]*)$" -consistent!(editorconfig_2, r"\[([^\]]*)$"); - -// editorconfig-1.0.0: r"\[(.*/.*)\]" -consistent!(editorconfig_3, r"\[(.*/.*)\]"); - -// editorconfig-1.0.0: r"\{(-?\d+\\\.\\\.-?\d+)\}" -consistent!(editorconfig_4, r"\{(-?\d+\\\.\\\.-?\d+)\}"); - -// editorconfig-1.0.0: r"\{([^,]+)\}" -consistent!(editorconfig_5, r"\{([^,]+)\}"); - -// editorconfig-1.0.0: r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}" -consistent!(editorconfig_6, r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}"); - -// editorconfig-1.0.0: r"^/" -consistent!(editorconfig_7, r"^/"); - -// editorconfig-1.0.0: r"(^|[^\\])(\{|\})" -consistent!(editorconfig_8, r"(^|[^\\])(\{|\})"); - -// edmunge-1.0.0: "^#!.*\n" -consistent!(edmunge_0, "^#!.*\n"); - -// unicode_names2_macros-0.2.0: r"\\N\{(.*?)(?:\}|$)" -consistent!(unicode_names2_macros_0, r"\\N\{(.*?)(?:\}|$)"); - -// unidiff-0.2.1: r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -consistent!( - unidiff_0, - r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -); - -// unidiff-0.2.1: r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -consistent!( - unidiff_1, - r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -); - -// unidiff-0.2.1: r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)" -consistent!(unidiff_2, r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)"); - -// unidiff-0.2.1: r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)" -consistent!(unidiff_3, r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)"); - -// slippy-map-tiles-0.13.1: "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$" -consistent!(slippy_map_tiles_0, "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$"); - -// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$" -consistent!(slippy_map_tiles_1, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"); - -// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$" -consistent!(slippy_map_tiles_2, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"); - -// sonos-0.1.2: r"^https?://(.+?):1400/xml" -consistent!(sonos_0, r"^https?://(.+?):1400/xml"); - -// validator_derive-0.7.0: r"^[a-z]{2}$" -consistent!(validator_derive_0, r"^[a-z]{2}$"); - -// validator_derive-0.7.0: r"[a-z]{2}" -consistent!(validator_derive_1, r"[a-z]{2}"); - -// validator_derive-0.7.0: r"[a-z]{2}" -consistent!(validator_derive_2, r"[a-z]{2}"); - -// nginx-config-0.8.0: r"one of \d+ options" -consistent!(nginx_config_0, r"one of \d+ options"); - -// waltz-0.4.0: r"[\s,]" -consistent!(waltz_0, r"[\s,]"); - -// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)" -consistent!(warheadhateus_0, r"^aws_access_key_id = (.*)"); - -// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)" -consistent!(warheadhateus_1, r"^aws_secret_access_key = (.*)"); - -// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)" -consistent!(warheadhateus_2, r"^aws_access_key_id = (.*)"); - -// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)" -consistent!(warheadhateus_3, r"^aws_secret_access_key = (.*)"); - -// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)" -consistent!(jieba_rs_0, r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)"); - -// jieba-rs-0.2.2: r"(\r\n|\s)" -consistent!(jieba_rs_1, r"(\r\n|\s)"); - -// jieba-rs-0.2.2: "([\u{4E00}-\u{9FD5}]+)" -consistent!(jieba_rs_2, "([\u{4E00}-\u{9FD5}]+)"); - -// jieba-rs-0.2.2: r"[^a-zA-Z0-9+#\n]" -consistent!(jieba_rs_3, r"[^a-zA-Z0-9+#\n]"); - -// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}]+)" -consistent!(jieba_rs_4, r"([\u{4E00}-\u{9FD5}]+)"); - -// jieba-rs-0.2.2: r"([a-zA-Z0-9]+(?:.\d+)?%?)" -consistent!(jieba_rs_5, r"([a-zA-Z0-9]+(?:.\d+)?%?)"); - -// lalrpop-0.15.2: r"Span\([0-9 ,]*\)" -consistent!(lalrpop_0, r"Span\([0-9 ,]*\)"); - -// lalrpop-snap-0.15.2: r"Span\([0-9 ,]*\)" -consistent!(lalrpop_snap_0, r"Span\([0-9 ,]*\)"); - -// nlp-tokenize-0.1.0: r"[\S]+" -consistent!(nlp_tokenize_0, r"[\S]+"); - -// kbgpg-0.1.2: "[[:xdigit:]][70]" -consistent!(kbgpg_0, "[[:xdigit:]][70]"); - -// cdbd-0.1.1: r"^((?P<address>.*):)?(?P<port>\d+)$" -consistent!(cdbd_0, r"^((?P<address>.*):)?(?P<port>\d+)$"); - -// mbutiles-0.1.1: r"[\w\s=+-/]+\((\{(.|\n)*\})\);?" -consistent!(mbutiles_0, r"[\w\s=+-/]+\((\{(.|\n)*\})\);?"); - -// extrahop-0.2.5: r"^-\d+(?:ms|s|m|h|d|w|y)?$" -consistent!(extrahop_0, r"^-\d+(?:ms|s|m|h|d|w|y)?$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$" -consistent!(pippin_0, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -consistent!( - pippin_1, - "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$" -consistent!(pippin_2, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -consistent!( - pippin_3, - "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -); - -// pippin-0.1.0: "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$" -consistent!(pippin_4, "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$"); - -// pippin-0.1.0: "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$" -consistent!( - pippin_5, - "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$" -); - -// pinyin-0.3.0: r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]" -consistent!( - pinyin_0, - r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]" -); - -// pinyin-0.3.0: r"([aeoiuvnm])([0-4])$" -consistent!(pinyin_1, r"([aeoiuvnm])([0-4])$"); - -// duration-parser-0.2.0: r"(?P<value>\d+)(?P<units>[a-z])" -consistent!(duration_parser_0, r"(?P<value>\d+)(?P<units>[a-z])"); - -// dutree-0.2.7: r"^\d+\D?$" -consistent!(dutree_0, r"^\d+\D?$"); - -// djangohashers-0.3.0: r"^[A-Za-z0-9]*$" -consistent!(djangohashers_0, r"^[A-Za-z0-9]*$"); - -// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}$" -consistent!(rtag_0, r"^[A-Z][A-Z0-9]{2,}$"); - -// rtag-0.3.5: r"^http://www\.emusic\.com" -consistent!(rtag_1, r"^http://www\.emusic\.com"); - -// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}" -consistent!(rtag_2, r"^[A-Z][A-Z0-9]{2,}"); - -// rtag-0.3.5: r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)" -consistent!( - rtag_3, - r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)" -); - -// rtow-0.1.0: r"(\d+)[xX](\d+)" -consistent!(rtow_0, r"(\d+)[xX](\d+)"); - -// pleingres-sql-plugin-0.1.0: r"\$([a-zA-Z0-9_]+)" -consistent!(pleingres_sql_plugin_0, r"\$([a-zA-Z0-9_]+)"); - -// dono-2.0.0: "[\\n]+" -consistent!(dono_0, "[\\n]+"); - -// dono-2.0.0: "(?m)^\\n" -consistent!(dono_1, "(?m)^\\n"); - -// dono-2.0.0: "(?m)^\\n" -consistent!(dono_2, "(?m)^\\n"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.ed25519$" -consistent!(ssb_common_0, r"^[0-9A-Za-z\+/]{43}=\.ed25519$"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{86}==\.ed25519$" -consistent!(ssb_common_1, r"^[0-9A-Za-z\+/]{86}==\.ed25519$"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.sha256$" -consistent!(ssb_common_2, r"^[0-9A-Za-z\+/]{43}=\.sha256$"); - -// mozversion-0.1.3: r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$" -consistent!(mozversion_0, r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$"); - -// monger-0.5.6: r"^(\d+)\.(\d+)$" -consistent!(monger_0, r"^(\d+)\.(\d+)$"); - -// mongo_rub-0.0.2: r"^[rv]2\.6" -consistent!(mongo_rub_0, r"^[rv]2\.6"); - -// flow-0.3.5: "body value" -consistent!(flow_0, "body value"); - -// flow-0.3.5: "start marker" -consistent!(flow_1, "start marker"); - -// flow-0.3.5: "end marker" -consistent!(flow_2, "end marker"); - -// flow-0.3.5: "body value" -consistent!(flow_3, "body value"); - -// vobsub-0.2.3: "^([A-Za-z/ ]+): (.*)" -consistent!(vobsub_0, "^([A-Za-z/ ]+): (.*)"); - -// voidmap-1.1.2: r"#([^\s=]+)*" -consistent!(voidmap_0, r"#([^\s=]+)*"); - -// voidmap-1.1.2: r"#(\S+)*" -consistent!(voidmap_1, r"#(\S+)*"); - -// voidmap-1.1.2: r"#prio=(\d+)" -consistent!(voidmap_2, r"#prio=(\d+)"); - -// voidmap-1.1.2: r"\[(\S+)\]" -consistent!(voidmap_3, r"\[(\S+)\]"); - -// voidmap-1.1.2: r"#limit=(\d+)" -consistent!(voidmap_4, r"#limit=(\d+)"); - -// voidmap-1.1.2: r"#tagged=(\S+)" -consistent!(voidmap_5, r"#tagged=(\S+)"); - -// voidmap-1.1.2: r"#rev\b" -consistent!(voidmap_6, r"#rev\b"); - -// voidmap-1.1.2: r"#done\b" -consistent!(voidmap_7, r"#done\b"); - -// voidmap-1.1.2: r"#open\b" -consistent!(voidmap_8, r"#open\b"); - -// voidmap-1.1.2: r"#since=(\S+)" -consistent!(voidmap_9, r"#since=(\S+)"); - -// voidmap-1.1.2: r"#until=(\S+)" -consistent!(voidmap_10, r"#until=(\S+)"); - -// voidmap-1.1.2: r"#plot=(\S+)" -consistent!(voidmap_11, r"#plot=(\S+)"); - -// voidmap-1.1.2: r"#n=(\d+)" -consistent!(voidmap_12, r"#n=(\d+)"); - -// voidmap-1.1.2: r"(\S+)" -consistent!(voidmap_13, r"(\S+)"); - -// voidmap-1.1.2: r"(?P<y>\d+)y" -consistent!(voidmap_14, r"(?P<y>\d+)y"); - -// voidmap-1.1.2: r"(?P<m>\d+)m" -consistent!(voidmap_15, r"(?P<m>\d+)m"); - -// voidmap-1.1.2: r"(?P<w>\d+)w" -consistent!(voidmap_16, r"(?P<w>\d+)w"); - -// voidmap-1.1.2: r"(?P<d>\d+)d" -consistent!(voidmap_17, r"(?P<d>\d+)d"); - -// voidmap-1.1.2: r"(?P<h>\d+)h" -consistent!(voidmap_18, r"(?P<h>\d+)h"); - -// voidmap-1.1.2: r"C-(.)" -consistent!(voidmap_19, r"C-(.)"); - -// qt_generator-0.2.0: r"^\.\./qt[^/]+/" -consistent!(qt_generator_0, r"^\.\./qt[^/]+/"); - -// qt_generator-0.2.0: "(href|src)=\"([^\"]*)\"" -consistent!(qt_generator_1, "(href|src)=\"([^\"]*)\""); - -// kryptos-0.6.1: r"[01]{5}" -consistent!(kryptos_0, r"[01]{5}"); - -// cifar_10_loader-0.2.0: "data_batch_[1-5].bin" -consistent!(cifar_10_loader_0, "data_batch_[1-5].bin"); - -// cifar_10_loader-0.2.0: "test_batch.bin" -consistent!(cifar_10_loader_1, "test_batch.bin"); - -// circadian-0.6.0: r"^\d+.\d+s$" -consistent!(circadian_0, r"^\d+.\d+s$"); - -// circadian-0.6.0: r"^\d+:\d+$" -consistent!(circadian_1, r"^\d+:\d+$"); - -// circadian-0.6.0: r"^\d+:\d+m$" -consistent!(circadian_2, r"^\d+:\d+m$"); - -// cicada-0.8.1: r"!!" -consistent!(cicada_0, r"!!"); - -// cicada-0.8.1: r"^([^`]*)`([^`]+)`(.*)$" -consistent!(cicada_1, r"^([^`]*)`([^`]+)`(.*)$"); - -// cicada-0.8.1: r"\*+" -consistent!(cicada_2, r"\*+"); - -// cicada-0.8.1: r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)" -consistent!(cicada_3, r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)"); - -// cicada-0.8.1: r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$" -consistent!(cicada_4, r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$"); - -// vterm-sys-0.1.0: r"hi" -consistent!(vterm_sys_0, r"hi"); - -// skim-0.5.0: r".*?\t" -consistent!(skim_0, r".*?\t"); - -// skim-0.5.0: r".*?[\t ]" -consistent!(skim_1, r".*?[\t ]"); - -// skim-0.5.0: r"(\{-?[0-9.,q]*?})" -consistent!(skim_2, r"(\{-?[0-9.,q]*?})"); - -// skim-0.5.0: r"[ \t\n]+" -consistent!(skim_3, r"[ \t\n]+"); - -// skim-0.5.0: r"[ \t\n]+" -consistent!(skim_4, r"[ \t\n]+"); - -// skim-0.5.0: r"([^ |]+( +\| +[^ |]*)+)|( +)" -consistent!(skim_5, r"([^ |]+( +\| +[^ |]*)+)|( +)"); - -// skim-0.5.0: r" +\| +" -consistent!(skim_6, r" +\| +"); - -// skim-0.5.0: r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$" -consistent!(skim_7, r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$"); - -// skim-0.5.0: "," -consistent!(skim_8, ","); - -// skim-0.5.0: ".*?," -consistent!(skim_9, ".*?,"); - -// skim-0.5.0: ".*?," -consistent!(skim_10, ".*?,"); - -// skim-0.5.0: "," -consistent!(skim_11, ","); - -// skim-0.5.0: r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))" -consistent!(skim_12, r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))"); - -// egg-mode-text-1.14.7: r"[-_./]\z" -consistent!(egg_mode_text_0, r"[-_./]\z"); - -// java-properties-1.1.1: "^[ \t\r\n\x0c]*[#!]" -consistent!(java_properties_0, "^[ \t\r\n\x0c]*[#!]"); - -// java-properties-1.1.1: r"^[ \t\x0c]*[#!][^\r\n]*$" -consistent!(java_properties_1, r"^[ \t\x0c]*[#!][^\r\n]*$"); - -// java-properties-1.1.1: r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$" -consistent!(java_properties_2, r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$"); - -// ipaddress-0.1.2: r":.+\." -consistent!(ipaddress_0, r":.+\."); - -// ipaddress-0.1.2: r"\." -consistent!(ipaddress_1, r"\."); - -// ipaddress-0.1.2: r":" -consistent!(ipaddress_2, r":"); - -// iptables-0.2.2: r"v(\d+)\.(\d+)\.(\d+)" -consistent!(iptables_0, r"v(\d+)\.(\d+)\.(\d+)"); - -// rsure-0.8.1: r"^([^-]+)-(.*)\.dat\.gz$" -consistent!(rsure_0, r"^([^-]+)-(.*)\.dat\.gz$"); - -// rs-jsonpath-0.1.0: "^(.*?)(<=|<|==|>=|>)(.*?)$" -consistent!(rs_jsonpath_0, "^(.*?)(<=|<|==|>=|>)(.*?)$"); - -// oatie-0.3.0: r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))" -consistent!(oatie_0, r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))"); - -// weld-0.2.0: "#.*$" -consistent!(weld_0, "#.*$"); - -// weld-0.2.0: r"^[A-Za-z$_][A-Za-z0-9$_]*$" -consistent!(weld_1, r"^[A-Za-z$_][A-Za-z0-9$_]*$"); - -// weld-0.2.0: r"^[0-9]+[cC]$" -consistent!(weld_2, r"^[0-9]+[cC]$"); - -// weld-0.2.0: r"^0b[0-1]+[cC]$" -consistent!(weld_3, r"^0b[0-1]+[cC]$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+[cC]$" -consistent!(weld_4, r"^0x[0-9a-fA-F]+[cC]$"); - -// weld-0.2.0: r"^[0-9]+$" -consistent!(weld_5, r"^[0-9]+$"); - -// weld-0.2.0: r"^0b[0-1]+$" -consistent!(weld_6, r"^0b[0-1]+$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+$" -consistent!(weld_7, r"^0x[0-9a-fA-F]+$"); - -// weld-0.2.0: r"^[0-9]+[lL]$" -consistent!(weld_8, r"^[0-9]+[lL]$"); - -// weld-0.2.0: r"^0b[0-1]+[lL]$" -consistent!(weld_9, r"^0b[0-1]+[lL]$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+[lL]$" -consistent!(weld_10, r"^0x[0-9a-fA-F]+[lL]$"); - -// webgl_generator-0.1.0: "([(, ])enum\\b" -consistent!(webgl_generator_0, "([(, ])enum\\b"); - -// webgl_generator-0.1.0: "\\bAcquireResourcesCallback\\b" -consistent!(webgl_generator_1, "\\bAcquireResourcesCallback\\b"); - -// weave-0.2.0: r"^(\d+)(,(\d+))?([acd]).*$" -consistent!(weave_0, r"^(\d+)(,(\d+))?([acd]).*$"); - -// wemo-0.0.12: r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>" -consistent!(wemo_0, r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>"); - -// webscale-0.9.4: r"(http[s]?://[^\s]+)" -consistent!(webscale_0, r"(http[s]?://[^\s]+)"); - -// svgrep-1.1.0: r"^\d+.*$" -consistent!(svgrep_0, r"^\d+.*$"); - -// ignore-0.4.2: r"^[\pL\pN]+$" -consistent!(ignore_0, r"^[\pL\pN]+$"); - -// ommui_string_patterns-0.1.2: r"^([A-Za-z][0-9A-Za-z_]*)?$" -consistent!(ommui_string_patterns_0, r"^([A-Za-z][0-9A-Za-z_]*)?$"); - -// ommui_string_patterns-0.1.2: r"^(\S+(?:.*\S)?)?$" -consistent!(ommui_string_patterns_1, r"^(\S+(?:.*\S)?)?$"); - -// opcua-types-0.3.0: "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$" -consistent!(opcua_types_0, "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$"); - -// opcua-types-0.3.0: r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$" -consistent!(opcua_types_1, r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$"); - -// open_read_later-1.1.1: r"^(.+?)\s*:\s*(.+)$" -consistent!(open_read_later_0, r"^(.+?)\s*:\s*(.+)$"); - -// youtube-downloader-0.1.0: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" -consistent!(youtube_downloader_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"); - -// yobot-0.1.1: "." -consistent!(yobot_0, "."); - -// yobot-0.1.1: r"." -consistent!(yobot_1, r"."); - -// yobot-0.1.1: r".+" -consistent!(yobot_2, r".+"); - -// yobot-0.1.1: r"." -consistent!(yobot_3, r"."); - -// ubiquity-0.1.5: r"foo" -consistent!(ubiquity_0, r"foo"); - -// ubiquity-0.1.5: r"/target/" -consistent!(ubiquity_1, r"/target/"); - -// ubiquity-0.1.5: r".DS_Store" -consistent!(ubiquity_2, r".DS_Store"); - -// qasm-1.0.0: r"//.*" -consistent!(qasm_0, r"//.*"); - -// drill-0.3.5: r"\{\{ *([a-z\._]+) *\}\}" -consistent!(drill_0, r"\{\{ *([a-z\._]+) *\}\}"); - -// queryst-2.0.0: r"^([^\]\[]+)" -consistent!(queryst_0, r"^([^\]\[]+)"); - -// queryst-2.0.0: r"(\[[^\]\[]*\])" -consistent!(queryst_1, r"(\[[^\]\[]*\])"); - -// qui-vive-0.1.0: r"^/(\w+)$" -consistent!(qui_vive_0, r"^/(\w+)$"); - -// qui-vive-0.1.0: r"^/key$" -consistent!(qui_vive_1, r"^/key$"); - -// qui-vive-0.1.0: r"^/key/(\w+)$" -consistent!(qui_vive_2, r"^/key/(\w+)$"); - -// qui-vive-0.1.0: r"^/url$" -consistent!(qui_vive_3, r"^/url$"); - -// qui-vive-0.1.0: r"^/url/(\w+)$" -consistent!(qui_vive_4, r"^/url/(\w+)$"); - -// qui-vive-0.1.0: r"^/inv$" -consistent!(qui_vive_5, r"^/inv$"); - -// qui-vive-0.1.0: r"^/inv/(\w+)$" -consistent!(qui_vive_6, r"^/inv/(\w+)$"); - -// subdiff-0.1.0: r"\b" -// consistent!(subdiff_0, r"\b"); - -// substudy-0.4.5: r"^(\d+)/(\d+)$" -consistent!(substudy_0, r"^(\d+)/(\d+)$"); - -// substudy-0.4.5: r"\s+" -consistent!(substudy_1, r"\s+"); - -// substudy-0.4.5: r"<[a-z/][^>]*>" -consistent!(substudy_2, r"<[a-z/][^>]*>"); - -// substudy-0.4.5: r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)" -consistent!(substudy_3, r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)"); - -// substudy-0.4.5: r"\s+" -consistent!(substudy_4, r"\s+"); - -// isbnid-0.1.3: r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$" -consistent!(isbnid_0, r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$"); - -// isbnid-0.1.3: r"[^0-9X]" -consistent!(isbnid_1, r"[^0-9X]"); - -// ispc-0.3.5: r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)" -consistent!( - ispc_0, - r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)" -); diff --git a/vendor/regex/tests/crazy.rs b/vendor/regex/tests/crazy.rs deleted file mode 100644 index 293ac1a..0000000 --- a/vendor/regex/tests/crazy.rs +++ /dev/null @@ -1,459 +0,0 @@ -mat!(ascii_literal, r"a", "a", Some((0, 1))); - -// Some crazy expressions from regular-expressions.info. -mat!( - match_ranges, - r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 255", - Some((5, 8)) -); -mat!( - match_ranges_not, - r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 256", - None -); -mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))); -mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))); -mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))); -mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None); -mat!( - match_email, - r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail.com ", - Some((8, 26)) -); -mat!( - match_email_not, - r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail ", - None -); -mat!( - match_email_big, - r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", - "mine is jam.slam@gmail.com ", - Some((8, 26)) -); -mat!( - match_date1, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-01-01", - Some((0, 10)) -); -mat!( - match_date2, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-00-01", - None -); -mat!( - match_date3, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-13-01", - None -); - -// Do some crazy dancing with the start/end assertions. -matiter!(match_start_end_empty, r"^$", "", (0, 0)); -matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0)); -matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0)); -matiter!(match_start_end_empty_rev, r"$^", "", (0, 0)); -matiter!( - match_start_end_empty_rep, - r"(?:^$)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); -matiter!( - match_start_end_empty_rep_rev, - r"(?:$^)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// Test negated character classes. -mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3))); -mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3))); -mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3))); -mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3))); -mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2))); -mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3))); -mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3))); -mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); - -// Test that repeated empty expressions don't loop forever. -mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2))); -mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2))); -mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2))); -mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2))); -mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2))); -mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2))); -mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2))); -mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2))); -mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2))); -mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2))); -mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2))); -mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2))); - -// Test that we handle various flavors of empty expressions. -matiter!(match_empty1, r"", "", (0, 0)); -matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2)); - -// Test that the DFA can handle pathological cases. -// (This should result in the DFA's cache being flushed too frequently, which -// should cause it to quit and fall back to the NFA algorithm.) -#[test] -fn dfa_handles_pathological_case() { - fn ones_and_zeroes(count: usize) -> String { - use rand::rngs::SmallRng; - use rand::{Rng, SeedableRng}; - - let mut rng = SmallRng::from_entropy(); - let mut s = String::new(); - for _ in 0..count { - if rng.gen() { - s.push('1'); - } else { - s.push('0'); - } - } - s - } - - let re = regex!(r"[01]*1[01]{20}$"); - let text = { - let mut pieces = ones_and_zeroes(100_000); - pieces.push('1'); - pieces.push_str(&ones_and_zeroes(20)); - pieces - }; - assert!(re.is_match(text!(&*text))); -} - -#[test] -fn nest_limit_makes_it_parse() { - use regex::RegexBuilder; - - RegexBuilder::new( - r#"(?-u) - 2(?: - [45]\d{3}| - 7(?: - 1[0-267]| - 2[0-289]| - 3[0-29]| - 4[01]| - 5[1-3]| - 6[013]| - 7[0178]| - 91 - )| - 8(?: - 0[125]| - [139][1-6]| - 2[0157-9]| - 41| - 6[1-35]| - 7[1-5]| - 8[1-8]| - 90 - )| - 9(?: - 0[0-2]| - 1[0-4]| - 2[568]| - 3[3-6]| - 5[5-7]| - 6[0167]| - 7[15]| - 8[0146-9] - ) - )\d{4}| - 3(?: - 12?[5-7]\d{2}| - 0(?: - 2(?: - [025-79]\d| - [348]\d{1,2} - )| - 3(?: - [2-4]\d| - [56]\d? - ) - )| - 2(?: - 1\d{2}| - 2(?: - [12]\d| - [35]\d{1,2}| - 4\d? - ) - )| - 3(?: - 1\d{2}| - 2(?: - [2356]\d| - 4\d{1,2} - ) - )| - 4(?: - 1\d{2}| - 2(?: - 2\d{1,2}| - [47]| - 5\d{2} - ) - )| - 5(?: - 1\d{2}| - 29 - )| - [67]1\d{2}| - 8(?: - 1\d{2}| - 2(?: - 2\d{2}| - 3| - 4\d - ) - ) - )\d{3}| - 4(?: - 0(?: - 2(?: - [09]\d| - 7 - )| - 33\d{2} - )| - 1\d{3}| - 2(?: - 1\d{2}| - 2(?: - [25]\d?| - [348]\d| - [67]\d{1,2} - ) - )| - 3(?: - 1\d{2}(?: - \d{2} - )?| - 2(?: - [045]\d| - [236-9]\d{1,2} - )| - 32\d{2} - )| - 4(?: - [18]\d{2}| - 2(?: - [2-46]\d{2}| - 3 - )| - 5[25]\d{2} - )| - 5(?: - 1\d{2}| - 2(?: - 3\d| - 5 - ) - )| - 6(?: - [18]\d{2}| - 2(?: - 3(?: - \d{2} - )?| - [46]\d{1,2}| - 5\d{2}| - 7\d - )| - 5(?: - 3\d?| - 4\d| - [57]\d{1,2}| - 6\d{2}| - 8 - ) - )| - 71\d{2}| - 8(?: - [18]\d{2}| - 23\d{2}| - 54\d{2} - )| - 9(?: - [18]\d{2}| - 2[2-5]\d{2}| - 53\d{1,2} - ) - )\d{3}| - 5(?: - 02[03489]\d{2}| - 1\d{2}| - 2(?: - 1\d{2}| - 2(?: - 2(?: - \d{2} - )?| - [457]\d{2} - ) - )| - 3(?: - 1\d{2}| - 2(?: - [37](?: - \d{2} - )?| - [569]\d{2} - ) - )| - 4(?: - 1\d{2}| - 2[46]\d{2} - )| - 5(?: - 1\d{2}| - 26\d{1,2} - )| - 6(?: - [18]\d{2}| - 2| - 53\d{2} - )| - 7(?: - 1| - 24 - )\d{2}| - 8(?: - 1| - 26 - )\d{2}| - 91\d{2} - )\d{3}| - 6(?: - 0(?: - 1\d{2}| - 2(?: - 3\d{2}| - 4\d{1,2} - ) - )| - 2(?: - 2[2-5]\d{2}| - 5(?: - [3-5]\d{2}| - 7 - )| - 8\d{2} - )| - 3(?: - 1| - 2[3478] - )\d{2}| - 4(?: - 1| - 2[34] - )\d{2}| - 5(?: - 1| - 2[47] - )\d{2}| - 6(?: - [18]\d{2}| - 6(?: - 2(?: - 2\d| - [34]\d{2} - )| - 5(?: - [24]\d{2}| - 3\d| - 5\d{1,2} - ) - ) - )| - 72[2-5]\d{2}| - 8(?: - 1\d{2}| - 2[2-5]\d{2} - )| - 9(?: - 1\d{2}| - 2[2-6]\d{2} - ) - )\d{3}| - 7(?: - (?: - 02| - [3-589]1| - 6[12]| - 72[24] - )\d{2}| - 21\d{3}| - 32 - )\d{3}| - 8(?: - (?: - 4[12]| - [5-7]2| - 1\d? - )| - (?: - 0| - 3[12]| - [5-7]1| - 217 - )\d - )\d{4}| - 9(?: - [35]1| - (?: - [024]2| - 81 - )\d| - (?: - 1| - [24]1 - )\d{2} - )\d{3} - "#, - ) - .build() - .unwrap(); -} diff --git a/vendor/regex/tests/flags.rs b/vendor/regex/tests/flags.rs deleted file mode 100644 index c33b82d..0000000 --- a/vendor/regex/tests/flags.rs +++ /dev/null @@ -1,31 +0,0 @@ -mat!(match_flag_case, "(?-u)(?i)abc", "ABC", Some((0, 3))); -mat!(match_flag_weird_case, "(?-u)(?i)a(?-i)bc", "Abc", Some((0, 3))); -mat!(match_flag_weird_case_not, "(?-u)(?i)a(?-i)bc", "ABC", None); -mat!(match_flag_case_dotnl, "(?-u)(?is)a(?u:.)", "A\n", Some((0, 2))); -mat!( - match_flag_case_dotnl_toggle, - "(?-u)(?is)a(?u:.)(?-is)a(?u:.)", - "A\nab", - Some((0, 4)) -); -mat!( - match_flag_case_dotnl_toggle_not, - "(?-u)(?is)a(?u:.)(?-is)a(?u:.)", - "A\na\n", - None -); -mat!( - match_flag_case_dotnl_toggle_ok, - "(?-u)(?is)a(?u:.)(?-is:a(?u:.))?", - "A\na\n", - Some((0, 2)) -); -mat!( - match_flag_multi, - r"(?-u)(?m)(?:^\d+$\n?)+", - "123\n456\n789", - Some((0, 11)) -); -mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))); -mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))); -mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))); diff --git a/vendor/regex/tests/fowler.rs b/vendor/regex/tests/fowler.rs deleted file mode 100644 index 7f56a75..0000000 --- a/vendor/regex/tests/fowler.rs +++ /dev/null @@ -1,1588 +0,0 @@ -// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py' -// on 2019-09-02 11:07:37.849994. - -// Tests from basic.dat -mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18))); -mat!(match_basic_4, r"a...b", r"abababbb", Some((2, 7))); -mat!(match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8))); -mat!(match_basic_6, r"\)", r"()", Some((1, 2))); -mat!(match_basic_7, r"a]", r"a]a", Some((0, 2))); -mat!(match_basic_9, r"\}", r"}", Some((0, 1))); -mat!(match_basic_10, r"\]", r"]", Some((0, 1))); -mat!(match_basic_12, r"]", r"]", Some((0, 1))); -mat!(match_basic_15, r"^a", r"ax", Some((0, 1))); -mat!(match_basic_16, r"\^a", r"a^a", Some((1, 3))); -mat!(match_basic_17, r"a\^", r"a^", Some((0, 2))); -mat!(match_basic_18, r"a$", r"aa", Some((1, 2))); -mat!(match_basic_19, r"a\$", r"a$", Some((0, 2))); -mat!(match_basic_20, r"^$", r"", Some((0, 0))); -mat!(match_basic_21, r"$^", r"", Some((0, 0))); -mat!(match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2))); -mat!(match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1))); -mat!(match_basic_24, r"(..)*(...)*", r"a", Some((0, 0))); -mat!(match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4))); -mat!( - match_basic_26, - r"(ab|a)(bc|c)", - r"abc", - Some((0, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!(match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2))); -mat!(match_basic_28, r"a{0}b", r"ab", Some((1, 2))); -mat!( - match_basic_29, - r"(a*)(b?)(b+)b{3}", - r"aaabbbbbbb", - Some((0, 10)), - Some((0, 3)), - Some((3, 4)), - Some((4, 7)) -); -mat!( - match_basic_30, - r"(a*)(b{0,1})(b{1,})b{3}", - r"aaabbbbbbb", - Some((0, 10)), - Some((0, 3)), - Some((3, 4)), - Some((4, 7)) -); -mat!( - match_basic_32, - r"((a|a)|a)", - r"a", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_33, - r"(a*)(a|aa)", - r"aaaa", - Some((0, 4)), - Some((0, 3)), - Some((3, 4)) -); -mat!(match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4))); -mat!( - match_basic_35, - r"a(b)|c(d)|a(e)f", - r"aef", - Some((0, 3)), - None, - None, - Some((1, 2)) -); -mat!(match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1))); -mat!( - match_basic_38, - r"(a|b)c|a(b|c)", - r"ab", - Some((0, 2)), - None, - Some((1, 2)) -); -mat!(match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2))); -mat!(match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2))); -mat!( - match_basic_41, - r"(.a|.b).*|.*(.a|.b)", - r"xa", - Some((0, 2)), - Some((0, 2)) -); -mat!(match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2))); -mat!(match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2))); -mat!(match_basic_44, r"ab|abab", r"abbabab", Some((0, 2))); -mat!(match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8))); -mat!(match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9))); -mat!( - match_basic_47, - r"(aa|aaa)*|(a|aaaaa)", - r"aa", - Some((0, 2)), - Some((0, 2)) -); -mat!( - match_basic_48, - r"(a.|.a.)*|(a|.a...)", - r"aa", - Some((0, 2)), - Some((0, 2)) -); -mat!(match_basic_49, r"ab|a", r"xabc", Some((1, 3))); -mat!(match_basic_50, r"ab|a", r"xxabc", Some((2, 4))); -mat!( - match_basic_51, - r"(?i)(?-u)(Ab|cD)*", - r"aBcD", - Some((0, 4)), - Some((2, 4)) -); -mat!(match_basic_52, r"[^-]", r"--a", Some((2, 3))); -mat!(match_basic_53, r"[a-]*", r"--a", Some((0, 3))); -mat!(match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4))); -mat!( - match_basic_55, - r":::1:::0:|:::1:1:0:", - r":::0:::1:::1:::0:", - Some((8, 17)) -); -mat!( - match_basic_56, - r":::1:::0:|:::1:1:1:", - r":::0:::1:::1:::0:", - Some((8, 17)) -); -mat!(match_basic_57, r"[[:upper:]]", r"A", Some((0, 1))); -mat!(match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3))); -mat!(match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3))); -mat!( - match_basic_65, - r" -", - r" -", - Some((0, 1)) -); -mat!( - match_basic_66, - r" -", - r" -", - Some((0, 1)) -); -mat!( - match_basic_67, - r"[^a]", - r" -", - Some((0, 1)) -); -mat!( - match_basic_68, - r" -a", - r" -a", - Some((0, 2)) -); -mat!( - match_basic_69, - r"(a)(b)(c)", - r"abc", - Some((0, 3)), - Some((0, 1)), - Some((1, 2)), - Some((2, 3)) -); -mat!(match_basic_70, r"xxx", r"xxx", Some((0, 3))); -mat!( - match_basic_71, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"feb 6,", - Some((0, 6)) -); -mat!( - match_basic_72, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"2/7", - Some((0, 3)) -); -mat!( - match_basic_73, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"feb 1,Feb 6", - Some((5, 11)) -); -mat!( - match_basic_74, - r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", - r"x", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_75, - r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", - r"xx", - Some((0, 2)), - Some((1, 2)), - Some((1, 2)) -); -mat!( - match_basic_76, - r"a?(ab|ba)*", - r"ababababababababababababababababababababababababababababababababababababababababa", - Some((0, 81)), - Some((79, 81)) -); -mat!( - match_basic_77, - r"abaa|abbaa|abbbaa|abbbbaa", - r"ababbabbbabbbabbbbabbbbaa", - Some((18, 25)) -); -mat!( - match_basic_78, - r"abaa|abbaa|abbbaa|abbbbaa", - r"ababbabbbabbbabbbbabaa", - Some((18, 22)) -); -mat!( - match_basic_79, - r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", - r"baaabbbabac", - Some((7, 11)) -); -mat!(match_basic_80, r".*", r"", Some((0, 2))); -mat!( - match_basic_81, - r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", - r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", - Some((53, 57)) -); -mat!(match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10))); -mat!(match_basic_84, r"^", r"", Some((0, 0))); -mat!(match_basic_85, r"$", r"", Some((0, 0))); -mat!(match_basic_86, r"^$", r"", Some((0, 0))); -mat!(match_basic_87, r"^a$", r"a", Some((0, 1))); -mat!(match_basic_88, r"abc", r"abc", Some((0, 3))); -mat!(match_basic_89, r"abc", r"xabcy", Some((1, 4))); -mat!(match_basic_90, r"abc", r"ababc", Some((2, 5))); -mat!(match_basic_91, r"ab*c", r"abc", Some((0, 3))); -mat!(match_basic_92, r"ab*bc", r"abc", Some((0, 3))); -mat!(match_basic_93, r"ab*bc", r"abbc", Some((0, 4))); -mat!(match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6))); -mat!(match_basic_95, r"ab+bc", r"abbc", Some((0, 4))); -mat!(match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6))); -mat!(match_basic_97, r"ab?bc", r"abbc", Some((0, 4))); -mat!(match_basic_98, r"ab?bc", r"abc", Some((0, 3))); -mat!(match_basic_99, r"ab?c", r"abc", Some((0, 3))); -mat!(match_basic_100, r"^abc$", r"abc", Some((0, 3))); -mat!(match_basic_101, r"^abc", r"abcc", Some((0, 3))); -mat!(match_basic_102, r"abc$", r"aabc", Some((1, 4))); -mat!(match_basic_103, r"^", r"abc", Some((0, 0))); -mat!(match_basic_104, r"$", r"abc", Some((3, 3))); -mat!(match_basic_105, r"a.c", r"abc", Some((0, 3))); -mat!(match_basic_106, r"a.c", r"axc", Some((0, 3))); -mat!(match_basic_107, r"a.*c", r"axyzc", Some((0, 5))); -mat!(match_basic_108, r"a[bc]d", r"abd", Some((0, 3))); -mat!(match_basic_109, r"a[b-d]e", r"ace", Some((0, 3))); -mat!(match_basic_110, r"a[b-d]", r"aac", Some((1, 3))); -mat!(match_basic_111, r"a[-b]", r"a-", Some((0, 2))); -mat!(match_basic_112, r"a[b-]", r"a-", Some((0, 2))); -mat!(match_basic_113, r"a]", r"a]", Some((0, 2))); -mat!(match_basic_114, r"a[]]b", r"a]b", Some((0, 3))); -mat!(match_basic_115, r"a[^bc]d", r"aed", Some((0, 3))); -mat!(match_basic_116, r"a[^-b]c", r"adc", Some((0, 3))); -mat!(match_basic_117, r"a[^]b]c", r"adc", Some((0, 3))); -mat!(match_basic_118, r"ab|cd", r"abc", Some((0, 2))); -mat!(match_basic_119, r"ab|cd", r"abcd", Some((0, 2))); -mat!(match_basic_120, r"a\(b", r"a(b", Some((0, 3))); -mat!(match_basic_121, r"a\(*b", r"ab", Some((0, 2))); -mat!(match_basic_122, r"a\(*b", r"a((b", Some((0, 4))); -mat!( - match_basic_123, - r"((a))", - r"abc", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_124, - r"(a)b(c)", - r"abc", - Some((0, 3)), - Some((0, 1)), - Some((2, 3)) -); -mat!(match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7))); -mat!(match_basic_126, r"a*", r"aaa", Some((0, 3))); -mat!(match_basic_128, r"(a*)*", r"-", Some((0, 0)), None); -mat!(match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0))); -mat!(match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None); -mat!(match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2))); -mat!(match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2))); -mat!(match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1))); -mat!(match_basic_135, r"[^ab]*", r"cde", Some((0, 3))); -mat!(match_basic_137, r"(^)*", r"-", Some((0, 0)), None); -mat!(match_basic_138, r"a*", r"", Some((0, 0))); -mat!(match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5))); -mat!(match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1))); -mat!(match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1))); -mat!(match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1))); -mat!(match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None); -mat!(match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7))); -mat!(match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3))); -mat!(match_basic_147, r"ab*", r"xayabbbz", Some((1, 2))); -mat!(match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4))); -mat!(match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3))); -mat!(match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2))); -mat!(match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1))); -mat!(match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3))); -mat!( - match_basic_153, - r"a([bc]*)(c*d)", - r"abcd", - Some((0, 4)), - Some((1, 3)), - Some((3, 4)) -); -mat!( - match_basic_154, - r"a([bc]+)(c*d)", - r"abcd", - Some((0, 4)), - Some((1, 3)), - Some((3, 4)) -); -mat!( - match_basic_155, - r"a([bc]*)(c+d)", - r"abcd", - Some((0, 4)), - Some((1, 2)), - Some((2, 4)) -); -mat!(match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7))); -mat!(match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2))); -mat!( - match_basic_158, - r"((a)(b)c)(d)", - r"abcd", - Some((0, 4)), - Some((0, 3)), - Some((0, 1)), - Some((1, 2)), - Some((3, 4)) -); -mat!(match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5))); -mat!(match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3))); -mat!( - match_basic_161, - r"(bc+d$|ef*g.|h?i(j|k))", - r"effgz", - Some((0, 5)), - Some((0, 5)) -); -mat!( - match_basic_162, - r"(bc+d$|ef*g.|h?i(j|k))", - r"ij", - Some((0, 2)), - Some((0, 2)), - Some((1, 2)) -); -mat!( - match_basic_163, - r"(bc+d$|ef*g.|h?i(j|k))", - r"reffgz", - Some((1, 6)), - Some((1, 6)) -); -mat!( - match_basic_164, - r"(((((((((a)))))))))", - r"a", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_165, - r"multiple words", - r"multiple words yeah", - Some((0, 14)) -); -mat!( - match_basic_166, - r"(.*)c(.*)", - r"abcde", - Some((0, 5)), - Some((0, 2)), - Some((3, 5)) -); -mat!(match_basic_167, r"abcd", r"abcd", Some((0, 4))); -mat!(match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3))); -mat!(match_basic_169, r"a[-]?c", r"ac", Some((0, 3))); -mat!( - match_basic_170, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Qaddafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_171, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mo'ammar Gadhafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_172, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Kaddafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_173, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Qadhafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_174, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Gadafi", - Some((0, 14)), - None, - Some((10, 11)) -); -mat!( - match_basic_175, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mu'ammar Qadafi", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_176, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moamar Gaddafi", - Some((0, 14)), - None, - Some((9, 11)) -); -mat!( - match_basic_177, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mu'ammar Qadhdhafi", - Some((0, 18)), - None, - Some((13, 15)) -); -mat!( - match_basic_178, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Khaddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_179, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghaddafy", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_180, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghadafi", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_181, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghaddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_182, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muamar Kaddafi", - Some((0, 14)), - None, - Some((9, 11)) -); -mat!( - match_basic_183, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Quathafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_184, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Gheddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_185, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moammar Khadafy", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_186, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moammar Qudhafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!(match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4))); -mat!(match_basic_188, r"^.+$", r"vivi", Some((0, 4))); -mat!(match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4))); -mat!( - match_basic_190, - r"^([^!.]+).att.com!(.+)$", - r"gryphon.att.com!eby", - Some((0, 19)), - Some((0, 7)), - Some((16, 19)) -); -mat!( - match_basic_191, - r"^([^!]+!)?([^!]+)$", - r"bas", - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_192, - r"^([^!]+!)?([^!]+)$", - r"bar!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_193, - r"^([^!]+!)?([^!]+)$", - r"foo!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_194, - r"^.+!([^!]+!)([^!]+)$", - r"foo!bar!bas", - Some((0, 11)), - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_195, - r"((foo)|(bar))!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_196, - r"((foo)|(bar))!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)), - None, - Some((4, 7)) -); -mat!( - match_basic_197, - r"((foo)|(bar))!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_198, - r"((foo)|bar)!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_199, - r"((foo)|bar)!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)) -); -mat!( - match_basic_200, - r"((foo)|bar)!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_201, - r"(foo|(bar))!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_202, - r"(foo|(bar))!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)), - Some((4, 7)) -); -mat!( - match_basic_203, - r"(foo|(bar))!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_204, - r"(foo|bar)!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_205, - r"(foo|bar)!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)) -); -mat!( - match_basic_206, - r"(foo|bar)!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_207, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bar!bas", - Some((0, 11)), - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_208, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"bas", - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_209, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"bar!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_210, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"foo!bar!bas", - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_211, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"foo!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_212, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"bas", - Some((0, 3)), - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_213, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"bar!bas", - Some((0, 7)), - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_214, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bar!bas", - Some((0, 11)), - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_215, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bas", - Some((0, 7)), - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!(match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4))); -mat!(match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4))); -mat!(match_basic_218, r"\\XXX", r"\XXX", Some((0, 4))); -mat!(match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4))); -mat!(match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4))); -mat!(match_basic_221, r"\\000", r"\000", Some((0, 4))); - -// Tests from nullsubexpr.dat -mat!(match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None); -mat!(match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0))); -mat!(match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0))); -mat!(match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_17, r"(a+)+", r"x", None); -mat!(match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None); -mat!(match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0))); -mat!(match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None); -mat!(match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!( - match_nullsubexpr_34, - r"([^b]*)*", - r"aaaaaab", - Some((0, 6)), - Some((0, 6)) -); -mat!(match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))); -mat!( - match_nullsubexpr_41, - r"([ab]*)*", - r"aaaabcde", - Some((0, 5)), - Some((0, 5)) -); -mat!(match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None); -mat!( - match_nullsubexpr_46, - r"([^ab]*)*", - r"ccccxx", - Some((0, 6)), - Some((0, 6)) -); -mat!(match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None); -mat!( - match_nullsubexpr_50, - r"((z)+|a)*", - r"zabcde", - Some((0, 2)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_69, - r"(a*)*(x)", - r"x", - Some((0, 1)), - None, - Some((0, 1)) -); -mat!( - match_nullsubexpr_70, - r"(a*)*(x)", - r"ax", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_71, - r"(a*)*(x)", - r"axa", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_73, - r"(a*)+(x)", - r"x", - Some((0, 1)), - Some((0, 0)), - Some((0, 1)) -); -mat!( - match_nullsubexpr_74, - r"(a*)+(x)", - r"ax", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_75, - r"(a*)+(x)", - r"axa", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_77, - r"(a*){2}(x)", - r"x", - Some((0, 1)), - Some((0, 0)), - Some((0, 1)) -); -mat!( - match_nullsubexpr_78, - r"(a*){2}(x)", - r"ax", - Some((0, 2)), - Some((1, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_79, - r"(a*){2}(x)", - r"axa", - Some((0, 2)), - Some((1, 1)), - Some((1, 2)) -); - -// Tests from repetition.dat -mat!(match_repetition_10, r"((..)|(.))", r"", None); -mat!(match_repetition_11, r"((..)|(.))((..)|(.))", r"", None); -mat!(match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None); -mat!(match_repetition_14, r"((..)|(.)){1}", r"", None); -mat!(match_repetition_15, r"((..)|(.)){2}", r"", None); -mat!(match_repetition_16, r"((..)|(.)){3}", r"", None); -mat!(match_repetition_18, r"((..)|(.))*", r"", Some((0, 0))); -mat!( - match_repetition_20, - r"((..)|(.))", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!(match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None); -mat!(match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None); -mat!( - match_repetition_24, - r"((..)|(.)){1}", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!(match_repetition_25, r"((..)|(.)){2}", r"a", None); -mat!(match_repetition_26, r"((..)|(.)){3}", r"a", None); -mat!( - match_repetition_28, - r"((..)|(.))*", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!( - match_repetition_30, - r"((..)|(.))", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_31, - r"((..)|(.))((..)|(.))", - r"aa", - Some((0, 2)), - Some((0, 1)), - None, - Some((0, 1)), - Some((1, 2)), - None, - Some((1, 2)) -); -mat!(match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None); -mat!( - match_repetition_34, - r"((..)|(.)){1}", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_35, - r"((..)|(.)){2}", - r"aa", - Some((0, 2)), - Some((1, 2)), - None, - Some((1, 2)) -); -mat!(match_repetition_36, r"((..)|(.)){3}", r"aa", None); -mat!( - match_repetition_38, - r"((..)|(.))*", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_40, - r"((..)|(.))", - r"aaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_41, - r"((..)|(.))((..)|(.))", - r"aaa", - Some((0, 3)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_42, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaa", - Some((0, 3)), - Some((0, 1)), - None, - Some((0, 1)), - Some((1, 2)), - None, - Some((1, 2)), - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_44, - r"((..)|(.)){1}", - r"aaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_46, - r"((..)|(.)){2}", - r"aaa", - Some((0, 3)), - Some((2, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!( - match_repetition_47, - r"((..)|(.)){3}", - r"aaa", - Some((0, 3)), - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_50, - r"((..)|(.))*", - r"aaa", - Some((0, 3)), - Some((2, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!( - match_repetition_52, - r"((..)|(.))", - r"aaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_53, - r"((..)|(.))((..)|(.))", - r"aaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_54, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 3)), - None, - Some((2, 3)), - Some((3, 4)), - None, - Some((3, 4)) -); -mat!( - match_repetition_56, - r"((..)|(.)){1}", - r"aaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_57, - r"((..)|(.)){2}", - r"aaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_59, - r"((..)|(.)){3}", - r"aaaa", - Some((0, 4)), - Some((3, 4)), - Some((0, 2)), - Some((3, 4)) -); -mat!( - match_repetition_61, - r"((..)|(.))*", - r"aaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_63, - r"((..)|(.))", - r"aaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_64, - r"((..)|(.))((..)|(.))", - r"aaaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_65, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaaa", - Some((0, 5)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None, - Some((4, 5)), - None, - Some((4, 5)) -); -mat!( - match_repetition_67, - r"((..)|(.)){1}", - r"aaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_68, - r"((..)|(.)){2}", - r"aaaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_70, - r"((..)|(.)){3}", - r"aaaaa", - Some((0, 5)), - Some((4, 5)), - Some((2, 4)), - Some((4, 5)) -); -mat!( - match_repetition_73, - r"((..)|(.))*", - r"aaaaa", - Some((0, 5)), - Some((4, 5)), - Some((2, 4)), - Some((4, 5)) -); -mat!( - match_repetition_75, - r"((..)|(.))", - r"aaaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_76, - r"((..)|(.))((..)|(.))", - r"aaaaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_77, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaaaa", - Some((0, 6)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None, - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_79, - r"((..)|(.)){1}", - r"aaaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_80, - r"((..)|(.)){2}", - r"aaaaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_81, - r"((..)|(.)){3}", - r"aaaaaa", - Some((0, 6)), - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_83, - r"((..)|(.))*", - r"aaaaaa", - Some((0, 6)), - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_90, - r"X(.?){0,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_91, - r"X(.?){1,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_92, - r"X(.?){2,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_93, - r"X(.?){3,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_94, - r"X(.?){4,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_95, - r"X(.?){5,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_96, - r"X(.?){6,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_97, - r"X(.?){7,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_98, - r"X(.?){8,}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_100, - r"X(.?){0,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_102, - r"X(.?){1,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_104, - r"X(.?){2,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_106, - r"X(.?){3,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_108, - r"X(.?){4,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_110, - r"X(.?){5,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_112, - r"X(.?){6,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_114, - r"X(.?){7,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_115, - r"X(.?){8,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_126, - r"(a|ab|c|bcd){0,}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_127, - r"(a|ab|c|bcd){1,}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_128, - r"(a|ab|c|bcd){2,}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!( - match_repetition_129, - r"(a|ab|c|bcd){3,}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!(match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None); -mat!( - match_repetition_131, - r"(a|ab|c|bcd){0,10}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_132, - r"(a|ab|c|bcd){1,10}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_133, - r"(a|ab|c|bcd){2,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!( - match_repetition_134, - r"(a|ab|c|bcd){3,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!(match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None); -mat!( - match_repetition_136, - r"(a|ab|c|bcd)*(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_137, - r"(a|ab|c|bcd)+(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_143, - r"(ab|a|c|bcd){0,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_145, - r"(ab|a|c|bcd){1,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_147, - r"(ab|a|c|bcd){2,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_149, - r"(ab|a|c|bcd){3,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!(match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None); -mat!( - match_repetition_152, - r"(ab|a|c|bcd){0,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_154, - r"(ab|a|c|bcd){1,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_156, - r"(ab|a|c|bcd){2,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_158, - r"(ab|a|c|bcd){3,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!(match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None); -mat!( - match_repetition_161, - r"(ab|a|c|bcd)*(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_163, - r"(ab|a|c|bcd)+(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); diff --git a/vendor/regex/tests/fuzz/mod.rs b/vendor/regex/tests/fuzz/mod.rs new file mode 100644 index 0000000..88c196a --- /dev/null +++ b/vendor/regex/tests/fuzz/mod.rs @@ -0,0 +1,166 @@ +// This set of tests is different from regression_fuzz in that the tests start +// from the fuzzer data directly. The test essentially duplicates the fuzz +// target. I wonder if there's a better way to set this up... Hmmm. I bet +// `cargo fuzz` has something where it can run a target against crash files and +// verify that they pass. + +// This case found by the fuzzer causes the meta engine to use the "reverse +// inner" literal strategy. That in turn uses a specialized search routine +// for the lazy DFA in order to avoid worst case quadratic behavior. That +// specialized search routine had a bug where it assumed that start state +// specialization was disabled. But this is indeed not the case, since it +// reuses the "general" lazy DFA for the full regex created as part of the core +// strategy, which might very well have start states specialized due to the +// existence of a prefilter. +// +// This is a somewhat weird case because if the core engine has a prefilter, +// then it's usually the case that the "reverse inner" optimization won't be +// pursued in that case. But there are some heuristics that try to detect +// whether a prefilter is "fast" or not. If it's not, then the meta engine will +// attempt the reverse inner optimization. And indeed, that's what happens +// here. So the reverse inner optimization ends up with a lazy DFA that has +// start states specialized. Ideally this wouldn't happen because specializing +// start states without a prefilter inside the DFA can be disastrous for +// performance by causing the DFA to ping-pong in and out of the special state +// handling. In this case, it's probably not a huge deal because the lazy +// DFA is only used for part of the matching where as the work horse is the +// prefilter found by the reverse inner optimization. +// +// We could maybe fix this by refactoring the meta engine to be a little more +// careful. For example, by attempting the optimizations before building the +// core engine. But this is perhaps a little tricky. +#[test] +fn meta_stopat_specialize_start_states() { + let data = include_bytes!( + "testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// Same bug as meta_stopat_specialize_start_states, but minimized by the +// fuzzer. +#[test] +fn meta_stopat_specialize_start_states_min() { + let data = include_bytes!( + "testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// This input generated a pattern with a fail state (e.g., \P{any}, [^\s\S] +// or [a&&b]). But the fail state was in a branch, where a subsequent branch +// should have led to an overall match, but handling of the fail state +// prevented it from doing so. A hand-minimized version of this is '[^\s\S]A|B' +// on the haystack 'B'. That should yield a match of 'B'. +// +// The underlying cause was an issue in how DFA determinization handled fail +// states. The bug didn't impact the PikeVM or the bounded backtracker. +#[test] +fn fail_branch_prevents_match() { + let data = include_bytes!( + "testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04", + ); + let _ = run(data); +} + +// This input generated a pattern that contained a sub-expression like this: +// +// a{0}{50000} +// +// This turned out to provoke quadratic behavior in the NFA compiler. +// Basically, the NFA compiler works in two phases. The first phase builds +// a more complicated-but-simpler-to-construct sequence of NFA states that +// includes unconditional epsilon transitions. As part of converting this +// sequence to the "final" NFA, we remove those unconditional espilon +// transition. The code responsible for doing this follows every chain of +// these transitions and remaps the state IDs. The way we were doing this +// before resulted in re-following every subsequent part of the chain for each +// state in the chain, which ended up being quadratic behavior. We effectively +// memoized this, which fixed the performance bug. +#[test] +fn slow_big_empty_chain() { + let data = include_bytes!( + "testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain2() { + let data = include_bytes!( + "testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain3() { + let data = include_bytes!( + "testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain4() { + let data = include_bytes!( + "testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain5() { + let data = include_bytes!( + "testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain6() { + let data = include_bytes!( + "testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085", + ); + let _ = run(data); +} + +// This fuzz input generated a pattern with a large repetition that would fail +// NFA compilation, but its HIR was small. (HIR doesn't expand repetitions.) +// But, the bounds were high enough that the minimum length calculation +// overflowed. We fixed this by using saturating arithmetic (and also checked +// arithmetic for the maximum length calculation). +// +// Incidentally, this was the only unguarded arithmetic operation performed in +// the HIR smart constructors. And the fuzzer found it. Hah. Nice. +#[test] +fn minimum_len_overflow() { + let data = include_bytes!( + "testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff", + ); + let _ = run(data); +} + +// This is the fuzz target function. We duplicate it here since this is the +// thing we use to interpret the data. It is ultimately what we want to +// succeed. +fn run(data: &[u8]) -> Option<()> { + if data.len() < 2 { + return None; + } + let mut split_at = usize::from(data[0]); + let data = std::str::from_utf8(&data[1..]).ok()?; + // Split data into a regex and haystack to search. + let len = usize::try_from(data.chars().count()).ok()?; + split_at = std::cmp::max(split_at, 1) % len; + let char_index = data.char_indices().nth(split_at)?.0; + let (pattern, input) = data.split_at(char_index); + let re = regex::Regex::new(pattern).ok()?; + re.is_match(input); + Some(()) +} diff --git a/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff b/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff new file mode 100644 index 0000000000000000000000000000000000000000..f7ffbc97402d80d6160a9498522ab44bc2ac9fb6 GIT binary patch literal 77 zcmXSC&`PprfB@-gbMxA2a}x*vF;Tb-JRoJl_OY>CPz?+WGWHC5dJGCRdU`qxDm4H( CVGU~l literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 new file mode 100644 index 0000000000000000000000000000000000000000..86748199b38c4191448fa37e71afd272daf8b334 GIT binary patch literal 21 YcmZ>D(qd5KHP_Mu14i2zCx#dX04Fj7k^lez literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 b/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 new file mode 100644 index 0000000000000000000000000000000000000000..152769d2da2ec10d9f6145cd5da285ce172c17c4 GIT binary patch literal 63 zcma!!R&cONiq!&95kSgLO^tyikv-O0OQO7nnORSRv4$B{&YC?|OH0eaDpt!)-v9v7 CUJdC0 literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 new file mode 100644 index 0000000000000000000000000000000000000000..69663d5c73247087c03a53984351b0256dcc358d GIT binary patch literal 16 XcmZ>D(qd5KHP_P9(~EIph+zN#7t;dD literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e b/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e new file mode 100644 index 0000000000000000000000000000000000000000..6c228035381bab5c3902011505941ecc1bf541b9 GIT binary patch literal 68 ycmZ=%VPLSgW?*E10x3NqAj?1mNZ120h&BW<sx&mKEe#9|%xewwfm{%P>jD70T?L>3 literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c b/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c new file mode 100644 index 0000000000000000000000000000000000000000..0570f328c37c45361599d42e6430209535a349d5 GIT binary patch literal 84 zcmZ=%VPLSgW?*E10x3Nq1_l-b5g^wdh(WX=h*71XS#4=xU|?QrpbzANfF4wrwSp*P Kq>2hJ+!O#Ov<C73 literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 b/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 new file mode 100644 index 0000000000000000000000000000000000000000..182bc7fa136d8e1c8caf68b89c8914b9e8f072fc GIT binary patch literal 105 zcmcDPv1hQhXRueZX8-|4289}y5C;3)N(KfNkboY8UPMHNN|ur+W0HNfL2b1e5SyEu k)v6^X1GPBC#Hxao0To5qL$m<-2@HH7Q6OLgllHb20IYxzLI3~& literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 b/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 new file mode 100644 index 0000000000000000000000000000000000000000..f939c33ab737500503aea4872577a46975016864 GIT binary patch literal 22 acmYe-v#&NVsI9gHfm()Yi&_T;1_l61HwCf) literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a b/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a new file mode 100644 index 0000000000000000000000000000000000000000..a87de230ff84d8463d9fa2460c5295f1eff1ddda GIT binary patch literal 83 zcmYe-v#&NVsI9gHg4${$i&};nwwlN!Ew&mktyjYa6t=`7?7+ZK!@$S@2G!=~wbcxo KKz0=aLpcE8DHPrS literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 b/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 new file mode 100644 index 0000000000000000000000000000000000000000..dc3329339ea0242c15150380779cbd1333c207ff GIT binary patch literal 102 zcmcC!U}OLT4I|BJLyKC53JuL_Gt*j-uv!(2Z)9Wy5wcEVV6d;M0;&P2WlU15LWn?h M0Zq$B0Sq?L0RPVx;Q#;t literal 0 HcmV?d00001 diff --git a/vendor/regex/tests/lib.rs b/vendor/regex/tests/lib.rs new file mode 100644 index 0000000..b3f6942 --- /dev/null +++ b/vendor/regex/tests/lib.rs @@ -0,0 +1,58 @@ +#![cfg_attr(feature = "pattern", feature(pattern))] + +mod fuzz; +mod misc; +mod regression; +mod regression_fuzz; +mod replace; +#[cfg(feature = "pattern")] +mod searcher; +mod suite_bytes; +mod suite_bytes_set; +mod suite_string; +mod suite_string_set; + +const BLACKLIST: &[&str] = &[ + // Nothing to blacklist yet! +]; + +fn suite() -> anyhow::Result<regex_test::RegexTests> { + let _ = env_logger::try_init(); + + let mut tests = regex_test::RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("../testdata/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("anchored"); + load!("bytes"); + load!("crazy"); + load!("crlf"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("leftmost-all"); + load!("line-terminator"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("substring"); + load!("unicode"); + load!("utf8"); + load!("word-boundary"); + load!("word-boundary-special"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + + Ok(tests) +} diff --git a/vendor/regex/tests/macros.rs b/vendor/regex/tests/macros.rs deleted file mode 100644 index e70e948..0000000 --- a/vendor/regex/tests/macros.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Convenience macros. - -macro_rules! findall { - ($re:expr, $text:expr) => {{ - $re.find_iter(text!($text)) - .map(|m| (m.start(), m.end())).collect::<Vec<_>>() - }} -} - -// Macros for automatically producing tests. - -macro_rules! ismatch { - ($name:ident, $re:expr, $text:expr, $ismatch:expr) => { - #[test] - fn $name() { - let re = regex!($re); - assert_eq!($ismatch, re.is_match(text!($text))); - } - }; -} - -macro_rules! mat( - ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<Option<_>> = vec![$($loc)+]; - let r = regex!($re); - let got: Vec<Option<_>> = match r.captures(text) { - Some(c) => { - assert!(r.is_match(text)); - assert!(r.shortest_match(text).is_some()); - r.capture_names() - .enumerate() - .map(|(i, _)| c.get(i).map(|m| (m.start(), m.end()))) - .collect() - } - None => vec![None], - }; - // The test set sometimes leave out capture groups, so truncate - // actual capture groups to match test set. - let mut sgot = &got[..]; - if sgot.len() > expected.len() { - sgot = &sgot[0..expected.len()] - } - if expected != sgot { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, sgot); - } - } - ); -); - -macro_rules! matiter( - ($name:ident, $re:expr, $text:expr) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<(usize, usize)> = vec![]; - let r = regex!($re); - let got: Vec<_> = - r.find_iter(text).map(|m| (m.start(), m.end())).collect(); - if expected != got { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, got); - } - let captures_got: Vec<_> = - r.captures_iter(text) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - if captures_got != got { - panic!("For RE '{}' against '{:?}', \ - got '{:?}' using find_iter but got '{:?}' \ - using captures_iter", - $re, text, got, captures_got); - } - } - ); - ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<_> = vec![$($loc)+]; - let r = regex!($re); - let got: Vec<_> = - r.find_iter(text).map(|m| (m.start(), m.end())).collect(); - if expected != got { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, got); - } - let captures_got: Vec<_> = - r.captures_iter(text) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - if captures_got != got { - panic!("For RE '{}' against '{:?}', \ - got '{:?}' using find_iter but got '{:?}' \ - using captures_iter", - $re, text, got, captures_got); - } - } - ); -); - -macro_rules! matset { - ($name:ident, $res:expr, $text:expr, $($match_index:expr),*) => { - #[test] - fn $name() { - let text = text!($text); - let set = regex_set!($res); - assert!(set.is_match(text)); - let expected = vec![$($match_index),*]; - let matches = set.matches(text); - assert!(matches.matched_any()); - let got: Vec<_> = matches.into_iter().collect(); - assert_eq!(expected, got); - } - } -} - -macro_rules! nomatset { - ($name:ident, $res:expr, $text:expr) => { - #[test] - fn $name() { - let text = text!($text); - let set = regex_set!($res); - assert!(!set.is_match(text)); - let matches = set.matches(text); - assert!(!matches.matched_any()); - assert_eq!(0, matches.into_iter().count()); - } - } -} - -macro_rules! split { - ($name:ident, $re:expr, $text:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let splitted: Vec<_> = re.split(t!($text)).collect(); - assert_eq!($expected, &*splitted); - } - } -} - -macro_rules! splitn { - ($name:ident, $re:expr, $text:expr, $limit:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let splitted: Vec<_> = re.splitn(t!($text), $limit).collect(); - assert_eq!($expected, &*splitted); - } - } -} diff --git a/vendor/regex/tests/macros_bytes.rs b/vendor/regex/tests/macros_bytes.rs deleted file mode 100644 index 3d6c8c3..0000000 --- a/vendor/regex/tests/macros_bytes.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Macros for use in writing tests generic over &str/&[u8]. -macro_rules! text { ($text:expr) => { $text.as_bytes() } } -macro_rules! t { ($re:expr) => { text!($re) } } -macro_rules! match_text { ($text:expr) => { $text.as_bytes() } } -macro_rules! use_ { ($($path: tt)*) => { use regex::bytes::$($path)*; } } -macro_rules! empty_vec { () => { <Vec<&[u8]>>::new() } } -macro_rules! bytes { ($text:expr) => { $text } } - -macro_rules! no_expand { - ($text:expr) => {{ - use regex::bytes::NoExpand; - NoExpand(text!($text)) - }} -} - -macro_rules! show { - ($text:expr) => {{ - use std::ascii::escape_default; - let mut s = vec![]; - for &b in bytes!($text) { - s.extend(escape_default(b)); - } - String::from_utf8(s).unwrap() - }} -} - -macro_rules! expand { - ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let cap = re.captures(t!($text)).unwrap(); - - let mut got = vec![]; - cap.expand(t!($expand), &mut got); - assert_eq!(show!(t!($expected)), show!(&*got)); - } - } -} diff --git a/vendor/regex/tests/macros_str.rs b/vendor/regex/tests/macros_str.rs deleted file mode 100644 index 7b7eb11..0000000 --- a/vendor/regex/tests/macros_str.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Macros for use in writing tests generic over &str/&[u8]. -macro_rules! text { ($text:expr) => { $text } } -macro_rules! t { ($text:expr) => { text!($text) } } -macro_rules! match_text { ($text:expr) => { $text.as_str() } } -macro_rules! use_ { ($($path: tt)*) => { use regex::$($path)*; } } -macro_rules! empty_vec { () => { <Vec<&str>>::new() } } -macro_rules! bytes { ($text:expr) => { std::str::from_utf8($text.as_ref()).unwrap() } } - -macro_rules! no_expand { - ($text:expr) => {{ - use regex::NoExpand; - NoExpand(text!($text)) - }} -} - -macro_rules! show { ($text:expr) => { $text } } - -// N.B. The expansion API for &str and &[u8] APIs differs slightly for now, -// but they should be unified in 1.0. Then we can move this macro back into -// tests/api.rs where it is used. ---AG -macro_rules! expand { - ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let cap = re.captures(t!($text)).unwrap(); - - let mut got = String::new(); - cap.expand(t!($expand), &mut got); - assert_eq!(show!(t!($expected)), show!(&*got)); - } - } -} - -#[cfg(feature = "pattern")] -macro_rules! searcher_expr { ($e:expr) => ($e) } -#[cfg(not(feature = "pattern"))] -macro_rules! searcher_expr { ($e:expr) => ({}) } diff --git a/vendor/regex/tests/misc.rs b/vendor/regex/tests/misc.rs index 314811e..91e7d28 100644 --- a/vendor/regex/tests/misc.rs +++ b/vendor/regex/tests/misc.rs @@ -1,4 +1,143 @@ -mat!(prefix_literal_match, r"^abc", r"abc", Some((0, 3))); -mat!(prefix_literal_nomatch, r"^abc", r"zabc", None); -mat!(one_literal_edge, r"abc", r"xxxxxab", None); -matiter!(terminates, r"a$", r"a", (0, 1)); +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +#[test] +fn unclosed_group_error() { + let err = Regex::new(r"(").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("unclosed group"), "error message: {:?}", msg); +} + +#[test] +fn regex_string() { + assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str()); + assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+"))); + assert_eq!( + r#"Regex("[a-zA-Z0-9]+")"#, + &format!("{:?}", regex!(r"[a-zA-Z0-9]+")) + ); +} + +#[test] +fn capture_names() { + let re = regex!(r"(.)(?P<a>.)"); + assert_eq!(3, re.captures_len()); + assert_eq!((3, Some(3)), re.capture_names().size_hint()); + assert_eq!( + vec![None, None, Some("a")], + re.capture_names().collect::<Vec<_>>() + ); +} + +#[test] +fn capture_index() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + assert_eq!(&cap[0], "abc"); + assert_eq!(&cap[1], "abc"); + assert_eq!(&cap["name"], "abc"); +} + +#[test] +#[should_panic] +fn capture_index_panic_usize() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap[2]; +} + +#[test] +#[should_panic] +fn capture_index_panic_name() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap["bad name"]; +} + +#[test] +fn capture_index_lifetime() { + // This is a test of whether the types on `caps["..."]` are general + // enough. If not, this will fail to typecheck. + fn inner(s: &str) -> usize { + let re = regex!(r"(?P<number>[0-9]+)"); + let caps = re.captures(s).unwrap(); + caps["number"].len() + } + assert_eq!(3, inner("123")); +} + +#[test] +fn capture_misc() { + let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); + let cap = re.captures("abc").unwrap(); + + assert_eq!(5, cap.len()); + + assert_eq!((0, 3), { + let m = cap.get(0).unwrap(); + (m.start(), m.end()) + }); + assert_eq!(None, cap.get(2)); + assert_eq!((2, 3), { + let m = cap.get(4).unwrap(); + (m.start(), m.end()) + }); + + assert_eq!("abc", cap.get(0).unwrap().as_str()); + assert_eq!(None, cap.get(2)); + assert_eq!("c", cap.get(4).unwrap().as_str()); + + assert_eq!(None, cap.name("a")); + assert_eq!("c", cap.name("b").unwrap().as_str()); +} + +#[test] +fn sub_capture_matches() { + let re = regex!(r"([a-z])(([a-z])|([0-9]))"); + let cap = re.captures("a5").unwrap(); + let subs: Vec<_> = cap.iter().collect(); + + assert_eq!(5, subs.len()); + assert!(subs[0].is_some()); + assert!(subs[1].is_some()); + assert!(subs[2].is_some()); + assert!(subs[3].is_none()); + assert!(subs[4].is_some()); + + assert_eq!("a5", subs[0].unwrap().as_str()); + assert_eq!("a", subs[1].unwrap().as_str()); + assert_eq!("5", subs[2].unwrap().as_str()); + assert_eq!("5", subs[4].unwrap().as_str()); +} + +// Test that the DFA can handle pathological cases. (This should result in the +// DFA's cache being flushed too frequently, which should cause it to quit and +// fall back to the NFA algorithm.) +#[test] +fn dfa_handles_pathological_case() { + fn ones_and_zeroes(count: usize) -> String { + let mut s = String::new(); + for i in 0..count { + if i % 3 == 0 { + s.push('1'); + } else { + s.push('0'); + } + } + s + } + + let re = regex!(r"[01]*1[01]{20}$"); + let text = { + let mut pieces = ones_and_zeroes(100_000); + pieces.push('1'); + pieces.push_str(&ones_and_zeroes(20)); + pieces + }; + assert!(re.is_match(&text)); +} diff --git a/vendor/regex/tests/multiline.rs b/vendor/regex/tests/multiline.rs deleted file mode 100644 index 62ee47b..0000000 --- a/vendor/regex/tests/multiline.rs +++ /dev/null @@ -1,144 +0,0 @@ -matiter!( - match_multi_1, - r"(?m)^[a-z]+$", - "abc\ndef\nxyz", - (0, 3), - (4, 7), - (8, 11) -); -matiter!(match_multi_2, r"(?m)^$", "abc\ndef\nxyz"); -matiter!(match_multi_3, r"(?m)^", "abc\ndef\nxyz", (0, 0), (4, 4), (8, 8)); -matiter!(match_multi_4, r"(?m)$", "abc\ndef\nxyz", (3, 3), (7, 7), (11, 11)); -matiter!( - match_multi_5, - r"(?m)^[a-z]", - "abc\ndef\nxyz", - (0, 1), - (4, 5), - (8, 9) -); -matiter!(match_multi_6, r"(?m)[a-z]^", "abc\ndef\nxyz"); -matiter!( - match_multi_7, - r"(?m)[a-z]$", - "abc\ndef\nxyz", - (2, 3), - (6, 7), - (10, 11) -); -matiter!(match_multi_8, r"(?m)$[a-z]", "abc\ndef\nxyz"); -matiter!(match_multi_9, r"(?m)^$", "", (0, 0)); - -matiter!( - match_multi_rep_1, - r"(?m)(?:^$)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); -matiter!( - match_multi_rep_2, - r"(?m)(?:^|a)+", - "a\naaa\n", - (0, 0), - (2, 2), - (3, 5), - (6, 6) -); -matiter!( - match_multi_rep_3, - r"(?m)(?:^|a)*", - "a\naaa\n", - (0, 1), - (2, 5), - (6, 6) -); -matiter!( - match_multi_rep_4, - r"(?m)(?:^[a-z])+", - "abc\ndef\nxyz", - (0, 1), - (4, 5), - (8, 9) -); -matiter!( - match_multi_rep_5, - r"(?m)(?:^[a-z]{3}\n?)+", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_6, - r"(?m)(?:^[a-z]{3}\n?)*", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_7, - r"(?m)(?:\n?[a-z]{3}$)+", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_8, - r"(?m)(?:\n?[a-z]{3}$)*", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_9, - r"(?m)^*", - "\naa\n", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4) -); -matiter!(match_multi_rep_10, r"(?m)^+", "\naa\n", (0, 0), (1, 1), (4, 4)); -matiter!( - match_multi_rep_11, - r"(?m)$*", - "\naa\n", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4) -); -matiter!(match_multi_rep_12, r"(?m)$+", "\naa\n", (0, 0), (3, 3), (4, 4)); -matiter!(match_multi_rep_13, r"(?m)(?:$\n)+", "\n\naaa\n\n", (0, 2), (5, 7)); -matiter!( - match_multi_rep_14, - r"(?m)(?:$\n)*", - "\n\naaa\n\n", - (0, 2), - (3, 3), - (4, 4), - (5, 7) -); -matiter!(match_multi_rep_15, r"(?m)(?:$\n^)+", "\n\naaa\n\n", (0, 2), (5, 7)); -matiter!( - match_multi_rep_16, - r"(?m)(?:^|$)+", - "\n\naaa\n\n", - (0, 0), - (1, 1), - (2, 2), - (5, 5), - (6, 6), - (7, 7) -); -matiter!( - match_multi_rep_17, - r"(?m)(?:$\n)*", - "\n\naaa\n\n", - (0, 2), - (3, 3), - (4, 4), - (5, 7) -); diff --git a/vendor/regex/tests/noparse.rs b/vendor/regex/tests/noparse.rs deleted file mode 100644 index 8ded1dc..0000000 --- a/vendor/regex/tests/noparse.rs +++ /dev/null @@ -1,45 +0,0 @@ -macro_rules! noparse( - ($name:ident, $re:expr) => ( - #[test] - fn $name() { - let re = $re; - match regex_new!(re) { - Err(_) => {}, - Ok(_) => panic!("Regex '{}' should cause a parse error.", re), - } - } - ); -); - -noparse!(fail_no_repeat_arg, "*"); -noparse!(fail_incomplete_escape, "\\"); -noparse!(fail_class_incomplete, "[A-"); -noparse!(fail_class_not_closed, "[A"); -noparse!(fail_class_no_begin, r"[\A]"); -noparse!(fail_class_no_end, r"[\z]"); -noparse!(fail_class_no_boundary, r"[\b]"); -noparse!(fail_open_paren, "("); -noparse!(fail_close_paren, ")"); -noparse!(fail_invalid_range, "[a-Z]"); -noparse!(fail_empty_capture_name, "(?P<>a)"); -noparse!(fail_bad_capture_name, "(?P<na-me>)"); -noparse!(fail_bad_flag, "(?a)a"); -noparse!(fail_too_big, "a{10000000}"); -noparse!(fail_counted_no_close, "a{1001"); -noparse!(fail_counted_decreasing, "a{2,1}"); -noparse!(fail_counted_nonnegative, "a{-1,1}"); -noparse!(fail_unfinished_cap, "(?"); -noparse!(fail_unfinished_escape, "\\"); -noparse!(fail_octal_digit, r"\8"); -noparse!(fail_hex_digit, r"\xG0"); -noparse!(fail_hex_short, r"\xF"); -noparse!(fail_hex_long_digits, r"\x{fffg}"); -noparse!(fail_flag_bad, "(?a)"); -noparse!(fail_flag_empty, "(?)"); -noparse!(fail_double_neg, "(?-i-i)"); -noparse!(fail_neg_empty, "(?i-)"); -noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)"); -noparse!(fail_range_end_no_class, "[a-[:lower:]]"); -noparse!(fail_range_end_no_begin, r"[a-\A]"); -noparse!(fail_range_end_no_end, r"[a-\z]"); -noparse!(fail_range_end_no_boundary, r"[a-\b]"); diff --git a/vendor/regex/tests/regression.rs b/vendor/regex/tests/regression.rs index e8b2525..a586701 100644 --- a/vendor/regex/tests/regression.rs +++ b/vendor/regex/tests/regression.rs @@ -1,222 +1,94 @@ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + // See: https://github.com/rust-lang/regex/issues/48 #[test] fn invalid_regexes_no_crash() { - assert!(regex_new!("(*)").is_err()); - assert!(regex_new!("(?:?)").is_err()); - assert!(regex_new!("(?)").is_err()); - assert!(regex_new!("*").is_err()); + assert!(Regex::new("(*)").is_err()); + assert!(Regex::new("(?:?)").is_err()); + assert!(Regex::new("(?)").is_err()); + assert!(Regex::new("*").is_err()); } // See: https://github.com/rust-lang/regex/issues/98 #[test] fn regression_many_repeat_stack_overflow() { let re = regex!("^.{1,2500}"); - assert_eq!(vec![(0, 1)], findall!(re, "a")); + assert_eq!( + vec![0..1], + re.find_iter("a").map(|m| m.range()).collect::<Vec<_>>() + ); } // See: https://github.com/rust-lang/regex/issues/555 #[test] fn regression_invalid_repetition_expr() { - assert!(regex_new!("(?m){1,1}").is_err()); + assert!(Regex::new("(?m){1,1}").is_err()); } // See: https://github.com/rust-lang/regex/issues/527 #[test] fn regression_invalid_flags_expression() { - assert!(regex_new!("(((?x)))").is_ok()); + assert!(Regex::new("(((?x)))").is_ok()); } -// See: https://github.com/rust-lang/regex/issues/75 -mat!(regression_unsorted_binary_search_1, r"(?i-u)[a_]+", "A_", Some((0, 2))); -mat!(regression_unsorted_binary_search_2, r"(?i-u)[A_]+", "a_", Some((0, 2))); - -// See: https://github.com/rust-lang/regex/issues/99 -#[cfg(feature = "unicode-case")] -mat!(regression_negated_char_class_1, r"(?i)[^x]", "x", None); -#[cfg(feature = "unicode-case")] -mat!(regression_negated_char_class_2, r"(?i)[^x]", "X", None); - -// See: https://github.com/rust-lang/regex/issues/101 -mat!(regression_ascii_word_underscore, r"[[:word:]]", "_", Some((0, 1))); - // See: https://github.com/rust-lang/regex/issues/129 #[test] fn regression_captures_rep() { let re = regex!(r"([a-f]){2}(?P<foo>[x-z])"); - let caps = re.captures(text!("abx")).unwrap(); - assert_eq!(match_text!(caps.name("foo").unwrap()), text!("x")); + let caps = re.captures("abx").unwrap(); + assert_eq!(&caps["foo"], "x"); } -// See: https://github.com/rust-lang/regex/issues/153 -mat!(regression_alt_in_alt1, r"ab?|$", "az", Some((0, 1))); -mat!(regression_alt_in_alt2, r"^(.*?)(\n|\r\n?|$)", "ab\rcd", Some((0, 3))); - -// See: https://github.com/rust-lang/regex/issues/169 -mat!(regression_leftmost_first_prefix, r"z*azb", "azb", Some((0, 3))); - -// See: https://github.com/rust-lang/regex/issues/76 -#[cfg(all(feature = "unicode-case", feature = "unicode-gencat"))] -mat!(uni_case_lower_nocase_flag, r"(?i)\p{Ll}+", "ΛΘΓΔα", Some((0, 10))); - -// See: https://github.com/rust-lang/regex/issues/191 -mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3))); - -// burntsushi was bad and didn't create an issue for this bug. -mat!(anchored_prefix1, r"^a[[:^space:]]", "a ", None); -mat!(anchored_prefix2, r"^a[[:^space:]]", "foo boo a ", None); -mat!(anchored_prefix3, r"^-[a-z]", "r-f", None); - -// See: https://github.com/rust-lang/regex/issues/204 -#[cfg(feature = "unicode-perl")] -split!( - split_on_word_boundary, - r"\b", - r"Should this (work?)", - &[ - t!(""), - t!("Should"), - t!(" "), - t!("this"), - t!(" ("), - t!("work"), - t!("?)") - ] -); -#[cfg(feature = "unicode-perl")] -matiter!( - word_boundary_dfa, - r"\b", - "a b c", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// See: https://github.com/rust-lang/regex/issues/268 -matiter!(partial_anchor, r"^a|b", "ba", (0, 1)); - -// See: https://github.com/rust-lang/regex/issues/280 -ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false); -ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false); - -// See: https://github.com/rust-lang/regex/issues/289 -mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4))); - -// See: https://github.com/rust-lang/regex/issues/291 -mat!( - lits_unambiguous2, - r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$", - "CIMG2341", - Some((0, 8)), - Some((0, 4)), - None, - Some((0, 4)), - Some((4, 8)) -); - -// See: https://github.com/rust-lang/regex/issues/271 -mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4))); -mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4))); -mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4))); -#[cfg(feature = "unicode-perl")] -mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1))); - -// See: https://github.com/rust-lang/regex/issues/321 -ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false); -ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false); - -// See: https://github.com/BurntSushi/ripgrep/issues/1203 -ismatch!(reverse_suffix1, r"[0-4][0-4][0-4]000", "153.230000", true); -ismatch!(reverse_suffix2, r"[0-9][0-9][0-9]000", "153.230000\n", true); -matiter!(reverse_suffix3, r"[0-9][0-9][0-9]000", "153.230000\n", (4, 10)); - -// See: https://github.com/rust-lang/regex/issues/334 -// See: https://github.com/rust-lang/regex/issues/557 -mat!( - captures_after_dfa_premature_end1, - r"a(b*(X|$))?", - "abcbX", - Some((0, 1)), - None, - None -); -mat!( - captures_after_dfa_premature_end2, - r"a(bc*(X|$))?", - "abcbX", - Some((0, 1)), - None, - None -); -mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", Some((0, 0))); - -// See: https://github.com/rust-lang/regex/issues/437 -ismatch!( - literal_panic, - r"typename type\-parameter\-[0-9]+\-[0-9]+::.+", - "test", - false -); - -// See: https://github.com/rust-lang/regex/issues/533 -ismatch!( - blank_matches_nothing_between_space_and_tab, - r"[[:blank:]]", - "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ - \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ - \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", - false -); - -ismatch!( - inverted_blank_matches_everything_between_space_and_tab, - r"^[[:^blank:]]+$", - "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ - \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ - \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", - true -); - -// Tests that our Aho-Corasick optimization works correctly. It only -// kicks in when we have >32 literals. By "works correctly," we mean that -// leftmost-first match semantics are properly respected. That is, samwise -// should match, not sam. -mat!( - ahocorasick1, - "samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|\ - A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z", - "samwise", - Some((0, 7)) -); - // See: https://github.com/BurntSushi/ripgrep/issues/1247 -#[test] #[cfg(feature = "unicode-perl")] +#[test] fn regression_nfa_stops1() { - let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); + let re = regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); assert_eq!(0, re.find_iter(b"s\xE4").count()); } -// See: https://github.com/rust-lang/regex/issues/640 -#[cfg(feature = "unicode-case")] -matiter!( - flags_are_unset, - r"((?i)foo)|Bar", - "foo Foo bar Bar", - (0, 3), - (4, 7), - (12, 15) -); +// See: https://github.com/rust-lang/regex/issues/981 +#[cfg(feature = "unicode")] +#[test] +fn regression_bad_word_boundary() { + let re = regex!(r#"(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"#); + let hay = "ubi-Darwin-x86_64.tar.gz"; + assert!(!re.is_match(hay)); + let hay = "ubi-Windows-x86_64.zip"; + assert!(re.is_match(hay)); +} -// See: https://github.com/rust-lang/regex/issues/659 -// -// Note that 'Ј' is not 'j', but cyrillic Je -// https://en.wikipedia.org/wiki/Je_(Cyrillic) -ismatch!(empty_group_match, r"()Ј01", "zЈ01", true); -matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5)); +// See: https://github.com/rust-lang/regex/issues/982 +#[cfg(feature = "unicode-perl")] +#[test] +fn regression_unicode_perl_not_enabled() { + let pat = r"(\d+\s?(years|year|y))?\s?(\d+\s?(months|month|m))?\s?(\d+\s?(weeks|week|w))?\s?(\d+\s?(days|day|d))?\s?(\d+\s?(hours|hour|h))?"; + assert!(Regex::new(pat).is_ok()); +} -// See: https://github.com/rust-lang/regex/issues/862 -mat!(non_greedy_question_literal, r"ab??", "ab", Some((0, 1))); +// See: https://github.com/rust-lang/regex/issues/995 +#[test] +fn regression_big_regex_overflow() { + let pat = r" {2147483516}{2147483416}{5}"; + assert!(Regex::new(pat).is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/999 +#[test] +fn regression_complete_literals_suffix_incorrect() { + let needles = vec![ + "aA", "bA", "cA", "dA", "eA", "fA", "gA", "hA", "iA", "jA", "kA", + "lA", "mA", "nA", "oA", "pA", "qA", "rA", "sA", "tA", "uA", "vA", + "wA", "xA", "yA", "zA", + ]; + let pattern = needles.join("|"); + let re = regex!(&pattern); + let hay = "FUBAR"; + assert_eq!(0, re.find_iter(hay).count()); +} diff --git a/vendor/regex/tests/regression_fuzz.rs b/vendor/regex/tests/regression_fuzz.rs index 4e76704..f90ad4c 100644 --- a/vendor/regex/tests/regression_fuzz.rs +++ b/vendor/regex/tests/regression_fuzz.rs @@ -2,6 +2,14 @@ // can take quite a long time. Some of them take long enough that it's not // practical to run them in debug mode. :-/ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + // See: https://oss-fuzz.com/testcase-detail/5673225499181056 // // Ignored by default since it takes too long in debug mode (almost a minute). @@ -14,8 +22,9 @@ fn fuzz1() { // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505 // See: https://github.com/rust-lang/regex/issues/722 #[test] +#[cfg(feature = "unicode")] fn empty_any_errors_no_panic() { - assert!(regex_new!(r"\P{any}").is_err()); + assert!(Regex::new(r"\P{any}").is_ok()); } // This tests that a very large regex errors during compilation instead of @@ -27,5 +36,26 @@ fn empty_any_errors_no_panic() { #[test] fn big_regex_fails_to_compile() { let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}"; - assert!(regex_new!(pat).is_err()); + assert!(Regex::new(pat).is_err()); +} + +// This was caught while on master but before a release went out(!). +// +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58173 +#[test] +fn todo() { + let pat = "(?:z|xx)@|xx"; + assert!(Regex::new(pat).is_ok()); +} + +// This was caused by the fuzzer, and then minimized by hand. +// +// This was caused by a bug in DFA determinization that mishandled NFA fail +// states. +#[test] +fn fail_branch_prevents_match() { + let pat = r".*[a&&b]A|B"; + let hay = "B"; + let re = Regex::new(pat).unwrap(); + assert!(re.is_match(hay)); } diff --git a/vendor/regex/tests/replace.rs b/vendor/regex/tests/replace.rs index d65be07..f26ae46 100644 --- a/vendor/regex/tests/replace.rs +++ b/vendor/regex/tests/replace.rs @@ -3,39 +3,32 @@ macro_rules! replace( $search:expr, $replace:expr, $result:expr) => ( #[test] fn $name() { - let re = regex!($re); - assert_eq!(re.$which(text!($search), $replace), text!($result)); + let re = regex::Regex::new($re).unwrap(); + assert_eq!(re.$which($search, $replace), $result); } ); ); -replace!(first, replace, r"[0-9]", "age: 26", t!("Z"), "age: Z6"); -replace!(plus, replace, r"[0-9]+", "age: 26", t!("Z"), "age: Z"); -replace!(all, replace_all, r"[0-9]", "age: 26", t!("Z"), "age: ZZ"); -replace!( - groups, - replace, - r"(?-u)(\S+)\s+(\S+)", - "w1 w2", - t!("$2 $1"), - "w2 w1" -); +replace!(first, replace, r"[0-9]", "age: 26", "Z", "age: Z6"); +replace!(plus, replace, r"[0-9]+", "age: 26", "Z", "age: Z"); +replace!(all, replace_all, r"[0-9]", "age: 26", "Z", "age: ZZ"); +replace!(groups, replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", "$2 $1", "w2 w1"); replace!( double_dollar, replace, - r"(?-u)(\S+)\s+(\S+)", + r"([^ ]+)[ ]+([^ ]+)", "w1 w2", - t!("$2 $$1"), + "$2 $$1", "w2 $1" ); // replace!(adjacent_index, replace, -// r"([^aeiouy])ies$", "skies", t!("$1y"), "sky"); +// r"([^aeiouy])ies$", "skies", "$1y", "sky"); replace!( named, replace_all, - r"(?-u)(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)", + r"(?P<first>[^ ]+)[ ]+(?P<last>[^ ]+)(?P<space>[ ]*)", "w1 w2 w3 w4", - t!("$last $first$space"), + "$last $first$space", "w2 w1 w4 w3" ); replace!( @@ -43,60 +36,57 @@ replace!( replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t", - t!(""), + "", "trim me" ); -replace!(number_hypen, replace, r"(.)(.)", "ab", t!("$1-$2"), "a-b"); -// replace!(number_underscore, replace, r"(.)(.)", "ab", t!("$1_$2"), "a_b"); +replace!(number_hyphen, replace, r"(.)(.)", "ab", "$1-$2", "a-b"); +// replace!(number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b"); replace!( simple_expand, replace_all, - r"(?-u)(\w) (\w)", + r"([a-z]) ([a-z])", "a b", - t!("$2 $1"), + "$2 $1", "b a" ); replace!( literal_dollar1, replace_all, - r"(?-u)(\w+) (\w+)", + r"([a-z]+) ([a-z]+)", "a b", - t!("$$1"), + "$$1", "$1" ); replace!( literal_dollar2, replace_all, - r"(?-u)(\w+) (\w+)", + r"([a-z]+) ([a-z]+)", "a b", - t!("$2 $$c $1"), + "$2 $$c $1", "b $c a" ); replace!( no_expand1, replace, - r"(?-u)(\S+)\s+(\S+)", + r"([^ ]+)[ ]+([^ ]+)", "w1 w2", - no_expand!("$2 $1"), + regex::NoExpand("$2 $1"), "$2 $1" ); replace!( no_expand2, replace, - r"(?-u)(\S+)\s+(\S+)", + r"([^ ]+)[ ]+([^ ]+)", "w1 w2", - no_expand!("$$1"), + regex::NoExpand("$$1"), "$$1" ); -use_!(Captures); replace!( closure_returning_reference, replace, r"([0-9]+)", "age: 26", - |captures: &Captures<'_>| { - match_text!(captures.get(1).unwrap())[0..1].to_owned() - }, + |captures: ®ex::Captures<'_>| { captures[1][0..1].to_owned() }, "age: 2" ); replace!( @@ -104,7 +94,7 @@ replace!( replace, r"[0-9]+", "age: 26", - |_captures: &Captures<'_>| t!("Z").to_owned(), + |_captures: ®ex::Captures<'_>| "Z".to_owned(), "age: Z" ); @@ -114,12 +104,12 @@ replace!( replace_all, r"foo", "foobar", - t!(""), + "", "bar" ); // See https://github.com/rust-lang/regex/issues/393 -replace!(single_empty_match, replace, r"^", "bar", t!("foo"), "foobar"); +replace!(single_empty_match, replace, r"^", "bar", "foo", "foobar"); // See https://github.com/rust-lang/regex/issues/399 replace!( @@ -127,7 +117,7 @@ replace!( replace_all, r"(.)", "b", - t!("${1}a $1a"), + "${1}a $1a", "ba " ); @@ -136,7 +126,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!("Z".to_string()), + "Z".to_string(), "age: Z6" ); replace!( @@ -144,7 +134,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(&"Z".to_string()), + &"Z".to_string(), "age: Z6" ); replace!( @@ -152,7 +142,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(std::borrow::Cow::<'_, str>::Borrowed("Z")), + std::borrow::Cow::<'_, str>::Borrowed("Z"), "age: Z6" ); replace!( @@ -160,7 +150,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(&std::borrow::Cow::<'_, str>::Borrowed("Z")), + &std::borrow::Cow::<'_, str>::Borrowed("Z"), "age: Z6" ); replace!( @@ -168,7 +158,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(std::borrow::Cow::<'_, str>::Owned("Z".to_string())), + std::borrow::Cow::<'_, str>::Owned("Z".to_string()), "age: Z6" ); replace!( @@ -176,73 +166,18 @@ replace!( replace, r"[0-9]", "age: 26", - t!(&std::borrow::Cow::<'_, str>::Owned("Z".to_string())), - "age: Z6" -); - -replace!( - impl_vec_u8, - replace, - r"[0-9]", - "age: 26", - bytes!(vec![b'Z']), - "age: Z6" -); -replace!( - impl_vec_u8_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&vec![b'Z']), - "age: Z6" -); -replace!( - impl_cow_slice_borrowed, - replace, - r"[0-9]", - "age: 26", - bytes!(std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_borrowed_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_owned, - replace, - r"[0-9]", - "age: 26", - bytes!(std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_owned_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])), + &std::borrow::Cow::<'_, str>::Owned("Z".to_string()), "age: Z6" ); #[test] fn replacen_no_captures() { - let re = regex!(r"[0-9]"); - assert_eq!( - re.replacen(text!("age: 1234"), 2, t!("Z")), - text!("age: ZZ34") - ); + let re = regex::Regex::new(r"[0-9]").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "Z"), "age: ZZ34"); } #[test] fn replacen_with_captures() { - let re = regex!(r"([0-9])"); - assert_eq!( - re.replacen(text!("age: 1234"), 2, t!("${1}Z")), - text!("age: 1Z2Z34") - ); + let re = regex::Regex::new(r"([0-9])").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "${1}Z"), "age: 1Z2Z34"); } diff --git a/vendor/regex/tests/searcher.rs b/vendor/regex/tests/searcher.rs index 3779f54..f6dae13 100644 --- a/vendor/regex/tests/searcher.rs +++ b/vendor/regex/tests/searcher.rs @@ -12,20 +12,18 @@ macro_rules! searcher { #[test] #[allow(unused_imports)] fn $name() { - searcher_expr! {{ - use std::str::pattern::{Pattern, Searcher}; - use std::str::pattern::SearchStep::{Match, Reject, Done}; - let re = regex!($re); - let mut se = re.into_searcher($haystack); - let mut got_steps = vec![]; - loop { - match se.next() { - Done => break, - step => { got_steps.push(step); } - } + use std::str::pattern::{Pattern, Searcher}; + use std::str::pattern::SearchStep::{Match, Reject, Done}; + let re = regex::Regex::new($re).unwrap(); + let mut se = re.into_searcher($haystack); + let mut got_steps = vec![]; + loop { + match se.next() { + Done => break, + step => { got_steps.push(step); } } - assert_eq!(got_steps, $expect_steps); - }} + } + assert_eq!(got_steps, $expect_steps); } ); } diff --git a/vendor/regex/tests/set.rs b/vendor/regex/tests/set.rs deleted file mode 100644 index 37fcf87..0000000 --- a/vendor/regex/tests/set.rs +++ /dev/null @@ -1,67 +0,0 @@ -matset!(set1, &["a", "a"], "a", 0, 1); -matset!(set2, &["a", "a"], "ba", 0, 1); -matset!(set3, &["a", "b"], "a", 0); -matset!(set4, &["a", "b"], "b", 1); -matset!(set5, &["a|b", "b|a"], "b", 0, 1); -matset!(set6, &["foo", "oo"], "foo", 0, 1); -matset!(set7, &["^foo", "bar$"], "foo", 0); -matset!(set8, &["^foo", "bar$"], "foo bar", 0, 1); -matset!(set9, &["^foo", "bar$"], "bar", 1); -matset!(set10, &[r"[a-z]+$", "foo"], "01234 foo", 0, 1); -matset!(set11, &[r"[a-z]+$", "foo"], "foo 01234", 1); -matset!(set12, &[r".*?", "a"], "zzzzzza", 0, 1); -matset!(set13, &[r".*", "a"], "zzzzzza", 0, 1); -matset!(set14, &[r".*", "a"], "zzzzzz", 0); -matset!(set15, &[r"(?-u)\ba\b"], "hello a bye", 0); -matset!(set16, &["a"], "a", 0); -matset!(set17, &[".*a"], "a", 0); -matset!(set18, &["a", "β"], "β", 1); - -// regexes that match the empty string -matset!(setempty1, &["", "a"], "abc", 0, 1); -matset!(setempty2, &["", "b"], "abc", 0, 1); -matset!(setempty3, &["", "z"], "abc", 0); -matset!(setempty4, &["a", ""], "abc", 0, 1); -matset!(setempty5, &["b", ""], "abc", 0, 1); -matset!(setempty6, &["z", ""], "abc", 1); -matset!(setempty7, &["b", "(?:)"], "abc", 0, 1); -matset!(setempty8, &["(?:)", "b"], "abc", 0, 1); -matset!(setempty9, &["c(?:)", "b"], "abc", 0, 1); - -nomatset!(nset1, &["a", "a"], "b"); -nomatset!(nset2, &["^foo", "bar$"], "bar foo"); -nomatset!( - nset3, - { - let xs: &[&str] = &[]; - xs - }, - "a" -); -nomatset!(nset4, &[r"^rooted$", r"\.log$"], "notrooted"); - -// See: https://github.com/rust-lang/regex/issues/187 -#[test] -fn regression_subsequent_matches() { - let set = regex_set!(&["ab", "b"]); - let text = text!("ba"); - assert!(set.matches(text).matched(1)); - assert!(set.matches(text).matched(1)); -} - -#[test] -fn get_set_patterns() { - let set = regex_set!(&["a", "b"]); - assert_eq!(vec!["a", "b"], set.patterns()); -} - -#[test] -fn len_and_empty() { - let empty = regex_set!(&[""; 0]); - assert_eq!(empty.len(), 0); - assert!(empty.is_empty()); - - let not_empty = regex_set!(&["ab", "b"]); - assert_eq!(not_empty.len(), 2); - assert!(!not_empty.is_empty()); -} diff --git a/vendor/regex/tests/shortest_match.rs b/vendor/regex/tests/shortest_match.rs deleted file mode 100644 index f8b4fed..0000000 --- a/vendor/regex/tests/shortest_match.rs +++ /dev/null @@ -1,14 +0,0 @@ -macro_rules! shortmat { - ($name:ident, $re:expr, $text:expr, $shortest_match:expr) => { - #[test] - fn $name() { - let text = text!($text); - let re = regex!($re); - assert_eq!($shortest_match, re.shortest_match(text)); - } - }; -} - -shortmat!(t01, r"a+", r"aa", Some(1)); -// Test that the reverse suffix optimization gets it right. -shortmat!(t02, r".*(?:abcd)+", r"abcdabcd", Some(4)); diff --git a/vendor/regex/tests/suffix_reverse.rs b/vendor/regex/tests/suffix_reverse.rs deleted file mode 100644 index 774c9e8..0000000 --- a/vendor/regex/tests/suffix_reverse.rs +++ /dev/null @@ -1,6 +0,0 @@ -mat!(t01, r".*abcd", r"abcd", Some((0, 4))); -mat!(t02, r".*(?:abcd)+", r"abcd", Some((0, 4))); -mat!(t03, r".*(?:abcd)+", r"abcdabcd", Some((0, 8))); -mat!(t04, r".*(?:abcd)+", r"abcdxabcd", Some((0, 9))); -mat!(t05, r".*x(?:abcd)+", r"abcdxabcd", Some((0, 9))); -mat!(t06, r"[^abcd]*x(?:abcd)+", r"abcdxabcd", Some((4, 9))); diff --git a/vendor/regex/tests/suite_bytes.rs b/vendor/regex/tests/suite_bytes.rs new file mode 100644 index 0000000..106d998 --- /dev/null +++ b/vendor/regex/tests/suite_bytes.rs @@ -0,0 +1,108 @@ +use { + anyhow::Result, + regex::bytes::{Regex, RegexBuilder}, + regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + }, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "find" => TestResult::matches( + re.find_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures( + caps: ®ex::bytes::Captures<'_>, +) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} diff --git a/vendor/regex/tests/suite_bytes_set.rs b/vendor/regex/tests/suite_bytes_set.rs new file mode 100644 index 0000000..899d24c --- /dev/null +++ b/vendor/regex/tests/suite_bytes_set.rs @@ -0,0 +1,71 @@ +use { + anyhow::Result, + regex::bytes::{RegexSet, RegexSetBuilder}, + regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "which" => TestResult::which(re.matches(test.haystack()).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} diff --git a/vendor/regex/tests/suite_string.rs b/vendor/regex/tests/suite_string.rs new file mode 100644 index 0000000..1e5bf0b --- /dev/null +++ b/vendor/regex/tests/suite_string.rs @@ -0,0 +1,114 @@ +use { + anyhow::Result, + regex::{Regex, RegexBuilder}, + regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + }, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "find" => TestResult::matches( + re.find_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures(caps: ®ex::Captures<'_>) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} diff --git a/vendor/regex/tests/suite_string_set.rs b/vendor/regex/tests/suite_string_set.rs new file mode 100644 index 0000000..dffdc70 --- /dev/null +++ b/vendor/regex/tests/suite_string_set.rs @@ -0,0 +1,79 @@ +use { + anyhow::Result, + regex::{RegexSet, RegexSetBuilder}, + regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "which" => TestResult::which(re.matches(hay).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} diff --git a/vendor/regex/tests/test_backtrack.rs b/vendor/regex/tests/test_backtrack.rs deleted file mode 100644 index fb934e2..0000000 --- a/vendor/regex/tests/test_backtrack.rs +++ /dev/null @@ -1,56 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .build() - .map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_backtrack_bytes.rs b/vendor/regex/tests/test_backtrack_bytes.rs deleted file mode 100644 index a59426c..0000000 --- a/vendor/regex/tests/test_backtrack_bytes.rs +++ /dev/null @@ -1,55 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_ascii; diff --git a/vendor/regex/tests/test_backtrack_utf8bytes.rs b/vendor/regex/tests/test_backtrack_utf8bytes.rs deleted file mode 100644 index 6d308e9..0000000 --- a/vendor/regex/tests/test_backtrack_utf8bytes.rs +++ /dev/null @@ -1,58 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_crates_regex.rs b/vendor/regex/tests/test_crates_regex.rs deleted file mode 100644 index a681604..0000000 --- a/vendor/regex/tests/test_crates_regex.rs +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This test is a minimal version of <rofl_0> and <subdiff_0> - * - * Once this bug gets fixed, uncomment rofl_0 and subdiff_0 - * (in `tests/crates_regex.rs`). -#[test] -fn word_boundary_backtracking_default_mismatch() { - use regex::internal::ExecBuilder; - - let backtrack_re = ExecBuilder::new(r"\b") - .bounded_backtracking() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_re = ExecBuilder::new(r"\b") - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = "䅅\\u{a0}"; - - let fi1 = backtrack_re.find_iter(input); - let fi2 = default_re.find_iter(input); - for (m1, m2) in fi1.zip(fi2) { - assert_eq!(m1, m2); - } -} -*/ - -mod consistent; - -mod crates_regex { - - macro_rules! consistent { - ($test_name:ident, $regex_src:expr) => { - #[test] - fn $test_name() { - use super::consistent::backends_are_consistent; - - if option_env!("RUST_REGEX_RANDOM_TEST").is_some() { - match backends_are_consistent($regex_src) { - Ok(_) => {} - Err(err) => panic!("{}", err), - } - } - } - }; - } - - include!("crates_regex.rs"); -} diff --git a/vendor/regex/tests/test_default.rs b/vendor/regex/tests/test_default.rs deleted file mode 100644 index 19a319a..0000000 --- a/vendor/regex/tests/test_default.rs +++ /dev/null @@ -1,232 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -use regex; - -// Due to macro scoping rules, this definition only applies for the modules -// defined below. Effectively, it allows us to use the same tests for both -// native and dynamic regexes. -// -// This is also used to test the various matching engines. This one exercises -// the normal code path which automatically chooses the engine based on the -// regex and the input. Other dynamic tests explicitly set the engine to use. -macro_rules! regex_new { - ($re:expr) => {{ - use regex::Regex; - Regex::new($re) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::RegexSet; - RegexSet::new($re) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod misc; -mod multiline; -mod noparse; -mod regression; -mod regression_fuzz; -mod replace; -mod searcher; -mod set; -mod shortest_match; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; - -#[test] -fn disallow_non_utf8() { - assert!(regex::Regex::new(r"(?-u)\xFF").is_err()); - assert!(regex::Regex::new(r"(?-u).").is_err()); - assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); - assert!(regex::Regex::new(r"(?-u)☃").is_err()); -} - -#[test] -fn disallow_octal() { - assert!(regex::Regex::new(r"\0").is_err()); -} - -#[test] -fn allow_octal() { - assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); -} - -#[test] -fn oibits() { - use regex::bytes; - use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder}; - use std::panic::{RefUnwindSafe, UnwindSafe}; - - fn assert_send<T: Send>() {} - fn assert_sync<T: Sync>() {} - fn assert_unwind_safe<T: UnwindSafe>() {} - fn assert_ref_unwind_safe<T: RefUnwindSafe>() {} - - assert_send::<Regex>(); - assert_sync::<Regex>(); - assert_unwind_safe::<Regex>(); - assert_ref_unwind_safe::<Regex>(); - assert_send::<RegexBuilder>(); - assert_sync::<RegexBuilder>(); - assert_unwind_safe::<RegexBuilder>(); - assert_ref_unwind_safe::<RegexBuilder>(); - - assert_send::<bytes::Regex>(); - assert_sync::<bytes::Regex>(); - assert_unwind_safe::<bytes::Regex>(); - assert_ref_unwind_safe::<bytes::Regex>(); - assert_send::<bytes::RegexBuilder>(); - assert_sync::<bytes::RegexBuilder>(); - assert_unwind_safe::<bytes::RegexBuilder>(); - assert_ref_unwind_safe::<bytes::RegexBuilder>(); - - assert_send::<RegexSet>(); - assert_sync::<RegexSet>(); - assert_unwind_safe::<RegexSet>(); - assert_ref_unwind_safe::<RegexSet>(); - assert_send::<RegexSetBuilder>(); - assert_sync::<RegexSetBuilder>(); - assert_unwind_safe::<RegexSetBuilder>(); - assert_ref_unwind_safe::<RegexSetBuilder>(); - - assert_send::<bytes::RegexSet>(); - assert_sync::<bytes::RegexSet>(); - assert_unwind_safe::<bytes::RegexSet>(); - assert_ref_unwind_safe::<bytes::RegexSet>(); - assert_send::<bytes::RegexSetBuilder>(); - assert_sync::<bytes::RegexSetBuilder>(); - assert_unwind_safe::<bytes::RegexSetBuilder>(); - assert_ref_unwind_safe::<bytes::RegexSetBuilder>(); -} - -// See: https://github.com/rust-lang/regex/issues/568 -#[test] -fn oibits_regression() { - use regex::Regex; - use std::panic; - - let _ = panic::catch_unwind(|| Regex::new("a").unwrap()); -} - -// See: https://github.com/rust-lang/regex/issues/750 -#[test] -#[cfg(target_pointer_width = "64")] -fn regex_is_reasonably_small() { - use std::mem::size_of; - - use regex::bytes; - use regex::{Regex, RegexSet}; - - assert_eq!(16, size_of::<Regex>()); - assert_eq!(16, size_of::<RegexSet>()); - assert_eq!(16, size_of::<bytes::Regex>()); - assert_eq!(16, size_of::<bytes::RegexSet>()); -} - -// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 -// See: CVE-2022-24713 -// -// We test that our regex compiler will correctly return a "too big" error when -// we try to use a very large repetition on an *empty* sub-expression. -// -// At the time this test was written, the regex compiler does not represent -// empty sub-expressions with any bytecode instructions. In effect, it's an -// "optimization" to leave them out, since they would otherwise correspond -// to an unconditional JUMP in the regex bytecode (i.e., an unconditional -// epsilon transition in the NFA graph). Therefore, an empty sub-expression -// represents an interesting case for the compiler's size limits. Since it -// doesn't actually contribute any additional memory to the compiled regex -// instructions, the size limit machinery never detects it. Instead, it just -// dumbly tries to compile the empty sub-expression N times, where N is the -// repetition size. -// -// When N is very large, this will cause the compiler to essentially spin and -// do nothing for a decently large amount of time. It causes the regex to take -// quite a bit of time to compile, despite the concrete syntax of the regex -// being quite small. -// -// The degree to which this is actually a problem is somewhat of a judgment -// call. Some regexes simply take a long time to compile. But in general, you -// should be able to reasonably control this by setting lower or higher size -// limits on the compiled object size. But this mitigation doesn't work at all -// for this case. -// -// This particular test is somewhat narrow. It merely checks that regex -// compilation will, at some point, return a "too big" error. Before the -// fix landed, this test would eventually fail because the regex would be -// successfully compiled (after enough time elapsed). So while this test -// doesn't check that we exit in a reasonable amount of time, it does at least -// check that we are properly returning an error at some point. -#[test] -fn big_empty_regex_fails() { - use regex::Regex; - - let result = Regex::new("(?:){4294967295}"); - assert!(result.is_err()); -} - -// Below is a "billion laughs" variant of the previous test case. -#[test] -fn big_empty_reps_chain_regex_fails() { - use regex::Regex; - - let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); - assert!(result.is_err()); -} - -// Below is another situation where a zero-length sub-expression can be -// introduced. -#[test] -fn big_zero_reps_regex_fails() { - use regex::Regex; - - let result = Regex::new(r"x{0}{4294967295}"); - assert!(result.is_err()); -} - -// Testing another case for completeness. -#[test] -fn empty_alt_regex_fails() { - use regex::Regex; - - let result = Regex::new(r"(?:|){4294967295}"); - assert!(result.is_err()); -} - -// Regression test for: https://github.com/rust-lang/regex/issues/969 -#[test] -fn regression_i969() { - use regex::Regex; - - let re = Regex::new(r"c.*d\z").unwrap(); - assert_eq!(Some(6), re.shortest_match_at("ababcd", 4)); - assert_eq!(Some(6), re.find_at("ababcd", 4).map(|m| m.end())); -} diff --git a/vendor/regex/tests/test_default_bytes.rs b/vendor/regex/tests/test_default_bytes.rs deleted file mode 100644 index f200596..0000000 --- a/vendor/regex/tests/test_default_bytes.rs +++ /dev/null @@ -1,75 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::bytes::Regex; - Regex::new($re) - }}; -} - -macro_rules! regex_set_new { - ($res:expr) => {{ - use regex::bytes::RegexSet; - RegexSet::new($res) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -// A silly wrapper to make it possible to write and match raw bytes. -struct R<'a>(&'a [u8]); -impl<'a> R<'a> { - fn as_bytes(&self) -> &'a [u8] { - self.0 - } -} - -// See: https://github.com/rust-lang/regex/issues/321 -// -// These tests are here because they do not have the same behavior in every -// regex engine. -mat!(invalid_utf8_nfa1, r".", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), Some((2, 3))); -mat!(invalid_utf8_nfa2, r"${2}ä", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), None); -mat!( - invalid_utf8_nfa3, - r".", - R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"), - Some((1, 3)) -); -mat!( - invalid_utf8_nfa4, - r"${2}ä", - R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"), - None -); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod shortest_match; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_nfa.rs b/vendor/regex/tests/test_nfa.rs deleted file mode 100644 index e5a67d1..0000000 --- a/vendor/regex/tests/test_nfa.rs +++ /dev/null @@ -1,50 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).nfa().build().map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re).nfa().build().map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_nfa_bytes.rs b/vendor/regex/tests/test_nfa_bytes.rs deleted file mode 100644 index 0a10e03..0000000 --- a/vendor/regex/tests/test_nfa_bytes.rs +++ /dev/null @@ -1,55 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .nfa() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .nfa() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_nfa_utf8bytes.rs b/vendor/regex/tests/test_nfa_utf8bytes.rs deleted file mode 100644 index 36a572b..0000000 --- a/vendor/regex/tests/test_nfa_utf8bytes.rs +++ /dev/null @@ -1,54 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).nfa().bytes(true).build().map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .nfa() - .bytes(true) - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/unicode.rs b/vendor/regex/tests/unicode.rs deleted file mode 100644 index 9b32286..0000000 --- a/vendor/regex/tests/unicode.rs +++ /dev/null @@ -1,251 +0,0 @@ -mat!(uni_literal, r"☃", "☃", Some((0, 3))); -mat!(uni_literal_plus, r"☃+", "☃", Some((0, 3))); -mat!(uni_literal_casei_plus, r"(?i)☃+", "☃", Some((0, 3))); -mat!(uni_class_plus, r"[☃Ⅰ]+", "☃", Some((0, 3))); -mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3))); -mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8))); -mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2))); -mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2))); -mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5))); -mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2))); -mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))); -mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))); -mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))); -mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))); - -// Test the Unicode friendliness of Perl character classes. -mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4))); -mat!(uni_perl_w_not, r"\w+", "⥡", None); -mat!(uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))); -mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8))); -mat!(uni_perl_d_not, r"\d+", "Ⅱ", None); -mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3))); -mat!(uni_perl_s, r"\s+", " ", Some((0, 3))); -mat!(uni_perl_s_not, r"\s+", "☃", None); -mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3))); - -// And do the same for word boundaries. -mat!(uni_boundary_none, r"\d\b", "6δ", None); -mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))); -mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1))); -mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None); - -// Test general categories. -// -// We should test more, but there's a lot. Write a script to generate more of -// these tests. -mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "A", Some((0, 3))); -mat!( - uni_class_gencat_close_punctuation, - r"\p{Close_Punctuation}", - "❯", - Some((0, 3)) -); -mat!( - uni_class_gencat_connector_punctuation, - r"\p{Connector_Punctuation}", - "⁀", - Some((0, 3)) -); -mat!(uni_class_gencat_control, r"\p{Control}", "\u{9f}", Some((0, 2))); -mat!( - uni_class_gencat_currency_symbol, - r"\p{Currency_Symbol}", - "£", - Some((0, 3)) -); -mat!( - uni_class_gencat_dash_punctuation, - r"\p{Dash_Punctuation}", - "〰", - Some((0, 3)) -); -mat!(uni_class_gencat_decimal_numer, r"\p{Decimal_Number}", "𑓙", Some((0, 4))); -mat!( - uni_class_gencat_enclosing_mark, - r"\p{Enclosing_Mark}", - "\u{A672}", - Some((0, 3)) -); -mat!( - uni_class_gencat_final_punctuation, - r"\p{Final_Punctuation}", - "⸡", - Some((0, 3)) -); -mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4))); -// See: https://github.com/rust-lang/regex/issues/719 -mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4))); -mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4))); -mat!( - uni_class_gencat_initial_punctuation, - r"\p{Initial_Punctuation}", - "⸜", - Some((0, 3)) -); -mat!(uni_class_gencat_letter, r"\p{Letter}", "Έ", Some((0, 2))); -mat!(uni_class_gencat_letter_number, r"\p{Letter_Number}", "ↂ", Some((0, 3))); -mat!( - uni_class_gencat_line_separator, - r"\p{Line_Separator}", - "\u{2028}", - Some((0, 3)) -); -mat!( - uni_class_gencat_lowercase_letter, - r"\p{Lowercase_Letter}", - "ϛ", - Some((0, 2)) -); -mat!(uni_class_gencat_mark, r"\p{Mark}", "\u{E01EF}", Some((0, 4))); -mat!(uni_class_gencat_math, r"\p{Math}", "⋿", Some((0, 3))); -mat!( - uni_class_gencat_modifier_letter, - r"\p{Modifier_Letter}", - "𖭃", - Some((0, 4)) -); -mat!( - uni_class_gencat_modifier_symbol, - r"\p{Modifier_Symbol}", - "🏿", - Some((0, 4)) -); -mat!( - uni_class_gencat_nonspacing_mark, - r"\p{Nonspacing_Mark}", - "\u{1E94A}", - Some((0, 4)) -); -mat!(uni_class_gencat_number, r"\p{Number}", "⓿", Some((0, 3))); -mat!( - uni_class_gencat_open_punctuation, - r"\p{Open_Punctuation}", - "⦅", - Some((0, 3)) -); -mat!(uni_class_gencat_other, r"\p{Other}", "\u{bc9}", Some((0, 3))); -mat!(uni_class_gencat_other_letter, r"\p{Other_Letter}", "ꓷ", Some((0, 3))); -mat!(uni_class_gencat_other_number, r"\p{Other_Number}", "㉏", Some((0, 3))); -mat!( - uni_class_gencat_other_punctuation, - r"\p{Other_Punctuation}", - "𞥞", - Some((0, 4)) -); -mat!(uni_class_gencat_other_symbol, r"\p{Other_Symbol}", "⅌", Some((0, 3))); -mat!( - uni_class_gencat_paragraph_separator, - r"\p{Paragraph_Separator}", - "\u{2029}", - Some((0, 3)) -); -mat!( - uni_class_gencat_private_use, - r"\p{Private_Use}", - "\u{10FFFD}", - Some((0, 4)) -); -mat!(uni_class_gencat_punctuation, r"\p{Punctuation}", "𑁍", Some((0, 4))); -mat!(uni_class_gencat_separator, r"\p{Separator}", "\u{3000}", Some((0, 3))); -mat!( - uni_class_gencat_space_separator, - r"\p{Space_Separator}", - "\u{205F}", - Some((0, 3)) -); -mat!( - uni_class_gencat_spacing_mark, - r"\p{Spacing_Mark}", - "\u{16F7E}", - Some((0, 4)) -); -mat!(uni_class_gencat_symbol, r"\p{Symbol}", "⯈", Some((0, 3))); -mat!( - uni_class_gencat_titlecase_letter, - r"\p{Titlecase_Letter}", - "ῼ", - Some((0, 3)) -); -mat!( - uni_class_gencat_unassigned, - r"\p{Unassigned}", - "\u{10FFFF}", - Some((0, 4)) -); -mat!( - uni_class_gencat_uppercase_letter, - r"\p{Uppercase_Letter}", - "Ꝋ", - Some((0, 3)) -); - -// Test a smattering of properties. -mat!(uni_class_prop_emoji1, r"\p{Emoji}", "\u{23E9}", Some((0, 3))); -mat!(uni_class_prop_emoji2, r"\p{emoji}", "\u{1F21A}", Some((0, 4))); -mat!( - uni_class_prop_picto1, - r"\p{extendedpictographic}", - "\u{1FA6E}", - Some((0, 4)) -); -mat!( - uni_class_prop_picto2, - r"\p{extendedpictographic}", - "\u{1FFFD}", - Some((0, 4)) -); - -// grapheme_cluster_break -mat!( - uni_class_gcb_prepend, - r"\p{grapheme_cluster_break=prepend}", - "\u{11D46}", - Some((0, 4)) -); -mat!( - uni_class_gcb_ri1, - r"\p{gcb=regional_indicator}", - "\u{1F1E6}", - Some((0, 4)) -); -mat!(uni_class_gcb_ri2, r"\p{gcb=ri}", "\u{1F1E7}", Some((0, 4))); -mat!( - uni_class_gcb_ri3, - r"\p{gcb=regionalindicator}", - "\u{1F1FF}", - Some((0, 4)) -); -mat!(uni_class_gcb_lvt, r"\p{gcb=lvt}", "\u{C989}", Some((0, 3))); -mat!(uni_class_gcb_zwj, r"\p{gcb=zwj}", "\u{200D}", Some((0, 3))); - -// word_break -mat!(uni_class_wb1, r"\p{word_break=Hebrew_Letter}", "\u{FB46}", Some((0, 3))); -mat!(uni_class_wb2, r"\p{wb=hebrewletter}", "\u{FB46}", Some((0, 3))); -mat!(uni_class_wb3, r"\p{wb=ExtendNumLet}", "\u{FF3F}", Some((0, 3))); -mat!(uni_class_wb4, r"\p{wb=WSegSpace}", "\u{3000}", Some((0, 3))); -mat!(uni_class_wb5, r"\p{wb=numeric}", "\u{1E950}", Some((0, 4))); - -// sentence_break -mat!(uni_class_sb1, r"\p{sentence_break=Lower}", "\u{0469}", Some((0, 2))); -mat!(uni_class_sb2, r"\p{sb=lower}", "\u{0469}", Some((0, 2))); -mat!(uni_class_sb3, r"\p{sb=Close}", "\u{FF60}", Some((0, 3))); -mat!(uni_class_sb4, r"\p{sb=Close}", "\u{1F677}", Some((0, 4))); -mat!(uni_class_sb5, r"\p{sb=SContinue}", "\u{FF64}", Some((0, 3))); - -// Test 'Vithkuqi' support, which was added in Unicode 14. -// See: https://github.com/rust-lang/regex/issues/877 -mat!( - uni_vithkuqi_literal_upper, - r"(?i)^\u{10570}$", - "\u{10570}", - Some((0, 4)) -); -mat!( - uni_vithkuqi_literal_lower, - r"(?i)^\u{10570}$", - "\u{10597}", - Some((0, 4)) -); -mat!(uni_vithkuqi_word_upper, r"^\w$", "\u{10570}", Some((0, 4))); -mat!(uni_vithkuqi_word_lower, r"^\w$", "\u{10597}", Some((0, 4))); diff --git a/vendor/regex/tests/word_boundary.rs b/vendor/regex/tests/word_boundary.rs deleted file mode 100644 index 7fe97a2..0000000 --- a/vendor/regex/tests/word_boundary.rs +++ /dev/null @@ -1,89 +0,0 @@ -// Many of these are cribbed from RE2's test suite. - -matiter!(wb1, r"\b", ""); -matiter!(wb2, r"\b", "a", (0, 0), (1, 1)); -matiter!(wb3, r"\b", "ab", (0, 0), (2, 2)); -matiter!(wb4, r"^\b", "ab", (0, 0)); -matiter!(wb5, r"\b$", "ab", (2, 2)); -matiter!(wb6, r"^\b$", "ab"); -matiter!(wb7, r"\bbar\b", "nobar bar foo bar", (6, 9), (14, 17)); -matiter!(wb8, r"a\b", "faoa x", (3, 4)); -matiter!(wb9, r"\bbar", "bar x", (0, 3)); -matiter!(wb10, r"\bbar", "foo\nbar x", (4, 7)); -matiter!(wb11, r"bar\b", "foobar", (3, 6)); -matiter!(wb12, r"bar\b", "foobar\nxxx", (3, 6)); -matiter!(wb13, r"(foo|bar|[A-Z])\b", "foo", (0, 3)); -matiter!(wb14, r"(foo|bar|[A-Z])\b", "foo\n", (0, 3)); -matiter!(wb15, r"\b(foo|bar|[A-Z])", "foo", (0, 3)); -matiter!(wb16, r"\b(foo|bar|[A-Z])\b", "X", (0, 1)); -matiter!(wb17, r"\b(foo|bar|[A-Z])\b", "XY"); -matiter!(wb18, r"\b(foo|bar|[A-Z])\b", "bar", (0, 3)); -matiter!(wb19, r"\b(foo|bar|[A-Z])\b", "foo", (0, 3)); -matiter!(wb20, r"\b(foo|bar|[A-Z])\b", "foo\n", (0, 3)); -matiter!(wb21, r"\b(foo|bar|[A-Z])\b", "ffoo bbar N x", (10, 11)); -matiter!(wb22, r"\b(fo|foo)\b", "fo", (0, 2)); -matiter!(wb23, r"\b(fo|foo)\b", "foo", (0, 3)); -matiter!(wb24, r"\b\b", ""); -matiter!(wb25, r"\b\b", "a", (0, 0), (1, 1)); -matiter!(wb26, r"\b$", ""); -matiter!(wb27, r"\b$", "x", (1, 1)); -matiter!(wb28, r"\b$", "y x", (3, 3)); -matiter!(wb29, r"\b.$", "x", (0, 1)); -matiter!(wb30, r"^\b(fo|foo)\b", "fo", (0, 2)); -matiter!(wb31, r"^\b(fo|foo)\b", "foo", (0, 3)); -matiter!(wb32, r"^\b$", ""); -matiter!(wb33, r"^\b$", "x"); -matiter!(wb34, r"^\b.$", "x", (0, 1)); -matiter!(wb35, r"^\b.\b$", "x", (0, 1)); -matiter!(wb36, r"^^^^^\b$$$$$", ""); -matiter!(wb37, r"^^^^^\b.$$$$$", "x", (0, 1)); -matiter!(wb38, r"^^^^^\b$$$$$", "x"); -matiter!(wb39, r"^^^^^\b\b\b.\b\b\b$$$$$", "x", (0, 1)); -matiter!(wb40, r"\b.+\b", "$$abc$$", (2, 5)); -matiter!(wb41, r"\b", "a b c", (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); - -matiter!(nb1, r"\Bfoo\B", "n foo xfoox that", (7, 10)); -matiter!(nb2, r"a\B", "faoa x", (1, 2)); -matiter!(nb3, r"\Bbar", "bar x"); -matiter!(nb4, r"\Bbar", "foo\nbar x"); -matiter!(nb5, r"bar\B", "foobar"); -matiter!(nb6, r"bar\B", "foobar\nxxx"); -matiter!(nb7, r"(foo|bar|[A-Z])\B", "foox", (0, 3)); -matiter!(nb8, r"(foo|bar|[A-Z])\B", "foo\n"); -matiter!(nb9, r"\B", "", (0, 0)); -matiter!(nb10, r"\B", "x"); -matiter!(nb11, r"\B(foo|bar|[A-Z])", "foo"); -matiter!(nb12, r"\B(foo|bar|[A-Z])\B", "xXy", (1, 2)); -matiter!(nb13, r"\B(foo|bar|[A-Z])\B", "XY"); -matiter!(nb14, r"\B(foo|bar|[A-Z])\B", "XYZ", (1, 2)); -matiter!(nb15, r"\B(foo|bar|[A-Z])\B", "abara", (1, 4)); -matiter!(nb16, r"\B(foo|bar|[A-Z])\B", "xfoo_", (1, 4)); -matiter!(nb17, r"\B(foo|bar|[A-Z])\B", "xfoo\n"); -matiter!(nb18, r"\B(foo|bar|[A-Z])\B", "foo bar vNX", (9, 10)); -matiter!(nb19, r"\B(fo|foo)\B", "xfoo", (1, 3)); -matiter!(nb20, r"\B(foo|fo)\B", "xfooo", (1, 4)); -matiter!(nb21, r"\B\B", "", (0, 0)); -matiter!(nb22, r"\B\B", "x"); -matiter!(nb23, r"\B$", "", (0, 0)); -matiter!(nb24, r"\B$", "x"); -matiter!(nb25, r"\B$", "y x"); -matiter!(nb26, r"\B.$", "x"); -matiter!(nb27, r"^\B(fo|foo)\B", "fo"); -matiter!(nb28, r"^\B(fo|foo)\B", "foo"); -matiter!(nb29, r"^\B", "", (0, 0)); -matiter!(nb30, r"^\B", "x"); -matiter!(nb31, r"^\B\B", "", (0, 0)); -matiter!(nb32, r"^\B\B", "x"); -matiter!(nb33, r"^\B$", "", (0, 0)); -matiter!(nb34, r"^\B$", "x"); -matiter!(nb35, r"^\B.$", "x"); -matiter!(nb36, r"^\B.\B$", "x"); -matiter!(nb37, r"^^^^^\B$$$$$", "", (0, 0)); -matiter!(nb38, r"^^^^^\B.$$$$$", "x"); -matiter!(nb39, r"^^^^^\B$$$$$", "x"); - -// These work for both Unicode and ASCII because all matches are reported as -// byte offsets, and « and » do not correspond to word boundaries at either -// the character or byte level. -matiter!(unicode1, r"\bx\b", "«x", (2, 3)); -matiter!(unicode2, r"\bx\b", "x»", (0, 1)); diff --git a/vendor/regex/tests/word_boundary_ascii.rs b/vendor/regex/tests/word_boundary_ascii.rs deleted file mode 100644 index 5a3cf11..0000000 --- a/vendor/regex/tests/word_boundary_ascii.rs +++ /dev/null @@ -1,9 +0,0 @@ -// ASCII word boundaries are completely oblivious to Unicode characters. -// For Unicode word boundaries, the tests are precisely inverted. -matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3)); -matiter!(ascii2, r"(?-u:\B)x(?-u:\B)", "áxβ"); -matiter!(ascii3, r"(?-u:\B)", "0\u{7EF5E}", (2, 2), (3, 3), (4, 4), (5, 5)); - -// We still get Unicode word boundaries by default in byte regexes. -matiter!(unicode1, r"\bx\b", "áxβ"); -matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3)); diff --git a/vendor/regex/tests/word_boundary_unicode.rs b/vendor/regex/tests/word_boundary_unicode.rs deleted file mode 100644 index c41355f..0000000 --- a/vendor/regex/tests/word_boundary_unicode.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Unicode word boundaries know about Unicode characters. -// For ASCII word boundaries, the tests are precisely inverted. -matiter!(unicode1, r"\bx\b", "áxβ"); -matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3)); - -matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3)); diff --git a/vendor/serde/.cargo-checksum.json b/vendor/serde/.cargo-checksum.json index 015fd0a..b8aaa17 100644 --- a/vendor/serde/.cargo-checksum.json +++ b/vendor/serde/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"16a89586e6a6f1e8acdc0d2e4b11d055764d085962a2725e6ee1a1b86ebf9ef3","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"13c66875efb67f64fdec817725f34ceb07913e1ebea4adc240868d2ed581d3da","build.rs":"976e089a5f74fc03e23465744bcc02b3b600a59f1c098da60c29d3979c5b35df","crates-io.md":"ee22254ee64c3189eef3e707c8d75dc66a8df2a7ee9e518d95238950780ec387","src/de/format.rs":"84f902fd4c3be66e81ac01d5b21cd876113c16f9890ff8bab5faa0d085386294","src/de/ignored_any.rs":"967184c86707c99b77a1cfb218dfc823f560fae227b6635aee6af19ee82962f5","src/de/impls.rs":"60c481b12dc3bb41810302f6979d7d9a8fa47f4467617d511e283a6a889a7428","src/de/mod.rs":"71198e80e4c64aa686b5ceb6e8bce10db20845a87a30fa14227ecbe365a046d5","src/de/seed.rs":"e8cf0233afe0af5b8fb9e4c94f301c92729c5ba417280af9e2201b732e374a72","src/de/utf8.rs":"f17524ee0af98ec3abcfd7d0b812fbd1033263bd8e2ce2f57c1e1999ce153558","src/de/value.rs":"aa5055923e2c3fd1c1f1abdfb380a1d63d07cf4d602ef62d2df2b7da33dd8c81","src/integer128.rs":"ca49591abde2d8c4f582174533fee28f0fa9139e5d71bf22b25a6b175f8abccc","src/lib.rs":"a22386ab9ba972cb91b6613e1eb3c7d82aa1355950fc96e045e165be90197beb","src/macros.rs":"3d695a51f0a07f9f719dcb5620012c21a1b084c06a6283349cabf574ceba8123","src/private/de.rs":"a85efe9af4f5629ac7d946af56e20fbc184df6ac40a6cfe47bf3997a95b2ea20","src/private/doc.rs":"e9801a43c3088fccd5f1fac76416698f948e65b647024aa9da17d673e1e8c217","src/private/mod.rs":"37b204775e572396515477b393ce793b2579de48e5971e6f596ba3723c489fd6","src/private/ser.rs":"57fbff98429e870da86edcf61c0831caaa3b708c0c32e3038c4b2179e8dff73e","src/private/size_hint.rs":"605521227e9ba3100fbb9d5ea7fd5853385097c35015ce6908bd5f1ea20d59ad","src/ser/fmt.rs":"7827ed07fd8897e6324f75625ba0c926a4c4e7ec2914cd067391ce54d942ac7b","src/ser/impls.rs":"46229722b7f0d8c4f01c43567c765608bf2c1974a5f24ce2525815c5bfd42ff5","src/ser/impossible.rs":"db17913522c1c27389c5a085113911353b9813c1b116518681362e7c8b692c3a","src/ser/mod.rs":"e1e6c764837c70b6410dcf1949a0dae1b4b4ffce65b87607d3d173b612e9bccf","src/std_error.rs":"3aac687856c035517fae44ed2906dd4a1e3184bae4bf613adcdeb73f74126c57"},"package":"3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065"} \ No newline at end of file +{"files":{"Cargo.toml":"3c553cd5adc065a5e6412e518b55ef85f7ffa307171ad4369d016de03a8f163f","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"c3ece10a36d19b4e857a770eaf74a2164d220f55fa11947065a3898c1697ecef","build.rs":"f9ba30324b9ce085c903595fb55a5293f8c2348ff36bfe870521b935ae6d105c","crates-io.md":"56e988ac4944c45f5bf5051e3827892ed8fb817853d99d9df1fff6621108e270","src/de/format.rs":"c85071b016df643b161859682d21ce34fa0ebf2a3bdbeeea69859da48f5d934f","src/de/ignored_any.rs":"6480f2b2a83dc4764d01b2eec7309729eef2492eede2e5ee98d23a60b05198eb","src/de/impls.rs":"2857d734176a0b78a41c9358354b0b0b83c6b2d948590be072d98606a8cae9d6","src/de/mod.rs":"7fb7427de1981bfa13af06c898d213a6bc34697148e96cef08d3c447c1999527","src/de/seed.rs":"045d890712a04eb33ffc5a021e5d948a63c89402b8ffeea749df2171b7484f8f","src/de/size_hint.rs":"fff83dc39d30e75e8e611991f9c5399188a1aad23a6462dbca2c8b62655cfedb","src/de/value.rs":"5d8dcae3a98a2203f2c0934adb84dbf741f120f246dfc02aa6d0d10673dc39c7","src/integer128.rs":"29ef30b7d94507b34807090e68173767cdc7aff62edccd38affe69e75338dddc","src/lib.rs":"b16783d3e69a1e3b499d28810823aac2fd92d696c0b511f7bcda0d011399167c","src/macros.rs":"e3486ef4a9a4ed1b27234aa1817ccb25ec0eb026ffc95e2c71c7b917f1f45629","src/private/de.rs":"6557a124fdaf61f9c7cd80163e40f4a453354e45b63a4eb55dafdfe0159f6881","src/private/doc.rs":"9ad740e9ea2eedf861b77116eda9a6fb74bc8553541cd17d1bc5791a3ef3271a","src/private/mod.rs":"b8f0c348621d91dd9da3db83d8877e70bc61ad0a2dc2d6fb57c6fc2c2cbafa26","src/private/ser.rs":"656613691bd8d40cb70a52d4ebe3ee96a993c8a1292d50822d9ca5bdad84426b","src/ser/fmt.rs":"77a5583e5c227ea1982b097ed6378af5c899d43761d71e33440262fd35944695","src/ser/impls.rs":"850619164b399c37cd373d24f5a2c83453f40b34bb978c5722d2c1ae226775b5","src/ser/impossible.rs":"e11b37689ec1395378d546fce74221ca9046d0761744301f12029102fd07e30e","src/ser/mod.rs":"a7fd082203d63cbe4f0fe86d9be16bf4f3b2444653dac6bb61d82e0f4f6b4214","src/std_error.rs":"25a07149e2e468747ffa5a58051c7f93d7b3c0fa0372f012a96c97ec8ab03b97"},"package":"63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02"} \ No newline at end of file diff --git a/vendor/serde/Cargo.toml b/vendor/serde/Cargo.toml index 37a2cbe..a11aad0 100644 --- a/vendor/serde/Cargo.toml +++ b/vendor/serde/Cargo.toml @@ -10,22 +10,15 @@ # See Cargo.toml.orig for the original contents. [package] -rust-version = "1.19" +edition = "2018" +rust-version = "1.31" name = "serde" -version = "1.0.159" +version = "1.0.195" authors = [ "Erick Tryzelaar <erick.tryzelaar@gmail.com>", "David Tolnay <dtolnay@gmail.com>", ] build = "build.rs" -include = [ - "build.rs", - "src/**/*.rs", - "crates-io.md", - "README.md", - "LICENSE-APACHE", - "LICENSE-MIT", -] description = "A generic serialization/deserialization framework" homepage = "https://serde.rs" documentation = "https://docs.rs/serde" @@ -38,12 +31,22 @@ keywords = [ categories = [ "encoding", "no-std", + "no-std::no-alloc", ] license = "MIT OR Apache-2.0" repository = "https://github.com/serde-rs/serde" [package.metadata.docs.rs] -features = ["derive"] +features = [ + "derive", + "rc", + "unstable", +] +rustdoc-args = [ + "--cfg", + "doc_cfg", + "--generate-link-to-definition", +] targets = ["x86_64-unknown-linux-gnu"] [package.metadata.playground] @@ -56,11 +59,11 @@ features = [ doc-scrape-examples = false [dependencies.serde_derive] -version = "=1.0.159" +version = "1" optional = true [dev-dependencies.serde_derive] -version = "1.0" +version = "1" [features] alloc = [] @@ -69,3 +72,6 @@ derive = ["serde_derive"] rc = [] std = [] unstable = [] + +[target."cfg(any())".dependencies.serde_derive] +version = "=1.0.195" diff --git a/vendor/serde/README.md b/vendor/serde/README.md index d53e572..477fd36 100644 --- a/vendor/serde/README.md +++ b/vendor/serde/README.md @@ -1,12 +1,12 @@ -# Serde   [![Build Status]][actions] [![Latest Version]][crates.io] [![serde: rustc 1.19+]][Rust 1.19] [![serde_derive: rustc 1.56+]][Rust 1.56] +# Serde   [![Build Status]][actions] [![Latest Version]][crates.io] [![serde: rustc 1.31+]][Rust 1.31] [![serde_derive: rustc 1.56+]][Rust 1.56] [Build Status]: https://img.shields.io/github/actions/workflow/status/serde-rs/serde/ci.yml?branch=master [actions]: https://github.com/serde-rs/serde/actions?query=branch%3Amaster [Latest Version]: https://img.shields.io/crates/v/serde.svg [crates.io]: https://crates.io/crates/serde -[serde: rustc 1.19+]: https://img.shields.io/badge/serde-rustc_1.19+-lightgray.svg +[serde: rustc 1.31+]: https://img.shields.io/badge/serde-rustc_1.31+-lightgray.svg [serde_derive: rustc 1.56+]: https://img.shields.io/badge/serde_derive-rustc_1.56+-lightgray.svg -[Rust 1.19]: https://blog.rust-lang.org/2017/07/20/Rust-1.19.html +[Rust 1.31]: https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html [Rust 1.56]: https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html **Serde is a framework for *ser*ializing and *de*serializing Rust data structures efficiently and generically.** @@ -48,7 +48,7 @@ serde_json = "1.0" <p></p> ```rust -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug)] struct Point { diff --git a/vendor/serde/build.rs b/vendor/serde/build.rs index 929d8e1..fe5486a 100644 --- a/vendor/serde/build.rs +++ b/vendor/serde/build.rs @@ -16,68 +16,6 @@ fn main() { let target = env::var("TARGET").unwrap(); let emscripten = target == "asmjs-unknown-emscripten" || target == "wasm32-unknown-emscripten"; - // std::collections::Bound was stabilized in Rust 1.17 - // but it was moved to core::ops later in Rust 1.26: - // https://doc.rust-lang.org/core/ops/enum.Bound.html - if minor < 26 { - println!("cargo:rustc-cfg=no_ops_bound"); - if minor < 17 { - println!("cargo:rustc-cfg=no_collections_bound"); - } - } - - // core::cmp::Reverse stabilized in Rust 1.19: - // https://doc.rust-lang.org/stable/core/cmp/struct.Reverse.html - if minor < 19 { - println!("cargo:rustc-cfg=no_core_reverse"); - } - - // CString::into_boxed_c_str and PathBuf::into_boxed_path stabilized in Rust 1.20: - // https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_boxed_c_str - // https://doc.rust-lang.org/std/path/struct.PathBuf.html#method.into_boxed_path - if minor < 20 { - println!("cargo:rustc-cfg=no_de_boxed_c_str"); - println!("cargo:rustc-cfg=no_de_boxed_path"); - } - - // From<Box<T>> for Rc<T> / Arc<T> stabilized in Rust 1.21: - // https://doc.rust-lang.org/std/rc/struct.Rc.html#impl-From<Box<T>> - // https://doc.rust-lang.org/std/sync/struct.Arc.html#impl-From<Box<T>> - if minor < 21 { - println!("cargo:rustc-cfg=no_de_rc_dst"); - } - - // Duration available in core since Rust 1.25: - // https://blog.rust-lang.org/2018/03/29/Rust-1.25.html#library-stabilizations - if minor < 25 { - println!("cargo:rustc-cfg=no_core_duration"); - } - - // 128-bit integers stabilized in Rust 1.26: - // https://blog.rust-lang.org/2018/05/10/Rust-1.26.html - // - // Disabled on Emscripten targets before Rust 1.40 since - // Emscripten did not support 128-bit integers until Rust 1.40 - // (https://github.com/rust-lang/rust/pull/65251) - if minor < 26 || emscripten && minor < 40 { - println!("cargo:rustc-cfg=no_integer128"); - } - - // Inclusive ranges methods stabilized in Rust 1.27: - // https://github.com/rust-lang/rust/pull/50758 - // Also Iterator::try_for_each: - // https://blog.rust-lang.org/2018/06/21/Rust-1.27.html#library-stabilizations - if minor < 27 { - println!("cargo:rustc-cfg=no_range_inclusive"); - println!("cargo:rustc-cfg=no_iterator_try_fold"); - } - - // Non-zero integers stabilized in Rust 1.28: - // https://blog.rust-lang.org/2018/08/02/Rust-1.28.html#library-stabilizations - if minor < 28 { - println!("cargo:rustc-cfg=no_num_nonzero"); - } - // TryFrom, Atomic types, non-zero signed integers, and SystemTime::checked_add // stabilized in Rust 1.34: // https://blog.rust-lang.org/2019/04/11/Rust-1.34.0.html#tryfrom-and-tryinto @@ -89,6 +27,12 @@ fn main() { println!("cargo:rustc-cfg=no_relaxed_trait_bounds"); } + // f32::copysign and f64::copysign stabilized in Rust 1.35. + // https://blog.rust-lang.org/2019/05/23/Rust-1.35.0.html#copy-the-sign-of-a-floating-point-number-onto-another + if minor < 35 { + println!("cargo:rustc-cfg=no_float_copysign"); + } + // Current minimum supported version of serde_derive crate is Rust 1.56. if minor < 56 { println!("cargo:rustc-cfg=no_serde_derive"); diff --git a/vendor/serde/crates-io.md b/vendor/serde/crates-io.md index 6e0ec28..1871003 100644 --- a/vendor/serde/crates-io.md +++ b/vendor/serde/crates-io.md @@ -16,7 +16,7 @@ You may be looking for: ## Serde in action ```rust -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug)] struct Point { diff --git a/vendor/serde/src/de/format.rs b/vendor/serde/src/de/format.rs index f14580b..9053cc0 100644 --- a/vendor/serde/src/de/format.rs +++ b/vendor/serde/src/de/format.rs @@ -1,5 +1,5 @@ -use lib::fmt::{self, Write}; -use lib::str; +use crate::lib::fmt::{self, Write}; +use crate::lib::str; pub(super) struct Buf<'a> { bytes: &'a mut [u8], diff --git a/vendor/serde/src/de/ignored_any.rs b/vendor/serde/src/de/ignored_any.rs index 9ed438e..2360a17 100644 --- a/vendor/serde/src/de/ignored_any.rs +++ b/vendor/serde/src/de/ignored_any.rs @@ -1,6 +1,6 @@ -use lib::*; +use crate::lib::*; -use de::{ +use crate::de::{ Deserialize, Deserializer, EnumAccess, Error, MapAccess, SeqAccess, VariantAccess, Visitor, }; @@ -10,13 +10,12 @@ use de::{ /// any type, except that it does not store any information about the data that /// gets deserialized. /// -/// ```edition2018 -/// use std::fmt; -/// use std::marker::PhantomData; -/// +/// ```edition2021 /// use serde::de::{ /// self, Deserialize, DeserializeSeed, Deserializer, IgnoredAny, SeqAccess, Visitor, /// }; +/// use std::fmt; +/// use std::marker::PhantomData; /// /// /// A seed that can be used to deserialize only the `n`th element of a sequence /// /// while efficiently discarding elements of any type before or after index `n`. @@ -108,7 +107,7 @@ use de::{ /// # Ok(()) /// # } /// ``` -#[derive(Copy, Clone, Debug, Default)] +#[derive(Copy, Clone, Debug, Default, PartialEq)] pub struct IgnoredAny; impl<'de> Visitor<'de> for IgnoredAny { @@ -130,12 +129,10 @@ impl<'de> Visitor<'de> for IgnoredAny { Ok(IgnoredAny) } - serde_if_integer128! { - #[inline] - fn visit_i128<E>(self, x: i128) -> Result<Self::Value, E> { - let _ = x; - Ok(IgnoredAny) - } + #[inline] + fn visit_i128<E>(self, x: i128) -> Result<Self::Value, E> { + let _ = x; + Ok(IgnoredAny) } #[inline] @@ -144,12 +141,10 @@ impl<'de> Visitor<'de> for IgnoredAny { Ok(IgnoredAny) } - serde_if_integer128! { - #[inline] - fn visit_u128<E>(self, x: u128) -> Result<Self::Value, E> { - let _ = x; - Ok(IgnoredAny) - } + #[inline] + fn visit_u128<E>(self, x: u128) -> Result<Self::Value, E> { + let _ = x; + Ok(IgnoredAny) } #[inline] @@ -198,7 +193,7 @@ impl<'de> Visitor<'de> for IgnoredAny { where A: SeqAccess<'de>, { - while let Some(IgnoredAny) = try!(seq.next_element()) { + while let Some(IgnoredAny) = tri!(seq.next_element()) { // Gobble } Ok(IgnoredAny) @@ -209,7 +204,7 @@ impl<'de> Visitor<'de> for IgnoredAny { where A: MapAccess<'de>, { - while let Some((IgnoredAny, IgnoredAny)) = try!(map.next_entry()) { + while let Some((IgnoredAny, IgnoredAny)) = tri!(map.next_entry()) { // Gobble } Ok(IgnoredAny) @@ -228,7 +223,7 @@ impl<'de> Visitor<'de> for IgnoredAny { where A: EnumAccess<'de>, { - try!(data.variant::<IgnoredAny>()).1.newtype_variant() + tri!(data.variant::<IgnoredAny>()).1.newtype_variant() } } diff --git a/vendor/serde/src/de/impls.rs b/vendor/serde/src/de/impls.rs index a2e2c48..413c997 100644 --- a/vendor/serde/src/de/impls.rs +++ b/vendor/serde/src/de/impls.rs @@ -1,16 +1,14 @@ -use lib::*; +use crate::lib::*; -use de::{ - Deserialize, Deserializer, EnumAccess, Error, SeqAccess, Unexpected, VariantAccess, Visitor, +use crate::de::{ + Deserialize, Deserializer, EnumAccess, Error, MapAccess, SeqAccess, Unexpected, VariantAccess, + Visitor, }; -#[cfg(any(feature = "std", feature = "alloc", not(no_core_duration)))] -use de::MapAccess; - -use seed::InPlaceSeed; +use crate::seed::InPlaceSeed; #[cfg(any(feature = "std", feature = "alloc"))] -use __private::size_hint; +use crate::de::size_hint; //////////////////////////////////////////////////////////////////////////////// @@ -41,6 +39,7 @@ impl<'de> Deserialize<'de> for () { } #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] impl<'de> Deserialize<'de> for ! { fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error> where @@ -84,7 +83,7 @@ macro_rules! impl_deserialize_num { ($primitive:ident, $nonzero:ident $(cfg($($cfg:tt)*))*, $deserialize:ident $($method:ident!($($val:ident : $visit:ident)*);)*) => { impl_deserialize_num!($primitive, $deserialize $($method!($($val : $visit)*);)*); - #[cfg(all(not(no_num_nonzero), $($($cfg)*)*))] + $(#[cfg($($cfg)*)])* impl<'de> Deserialize<'de> for num::$nonzero { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -182,6 +181,28 @@ macro_rules! num_as_self { }; } +macro_rules! num_as_copysign_self { + ($ty:ident : $visit:ident) => { + #[inline] + fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> + where + E: Error, + { + #[cfg(any(no_float_copysign, not(feature = "std")))] + { + Ok(v as Self::Value) + } + + #[cfg(all(not(no_float_copysign), feature = "std"))] + { + // Preserve sign of NaN. The `as` produces a nondeterministic sign. + let sign = if v.is_sign_positive() { 1.0 } else { -1.0 }; + Ok((v as Self::Value).copysign(sign)) + } + } + }; +} + macro_rules! int_to_int { ($ty:ident : $visit:ident) => { #[inline] @@ -353,7 +374,7 @@ impl_deserialize_num! { impl_deserialize_num! { f32, deserialize_f32 num_self!(f32:visit_f32); - num_as_self!(f64:visit_f64); + num_as_copysign_self!(f64:visit_f64); num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); } @@ -361,69 +382,67 @@ impl_deserialize_num! { impl_deserialize_num! { f64, deserialize_f64 num_self!(f64:visit_f64); - num_as_self!(f32:visit_f32); + num_as_copysign_self!(f32:visit_f32); num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); } -serde_if_integer128! { - macro_rules! num_128 { - ($ty:ident : $visit:ident) => { - fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> - where - E: Error, +macro_rules! num_128 { + ($ty:ident : $visit:ident) => { + fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> + where + E: Error, + { + if v as i128 >= Self::Value::min_value() as i128 + && v as u128 <= Self::Value::max_value() as u128 { - if v as i128 >= Self::Value::min_value() as i128 - && v as u128 <= Self::Value::max_value() as u128 - { - Ok(v as Self::Value) - } else { - Err(Error::invalid_value( - Unexpected::Other(stringify!($ty)), - &self, - )) - } + Ok(v as Self::Value) + } else { + Err(Error::invalid_value( + Unexpected::Other(stringify!($ty)), + &self, + )) } - }; + } + }; - (nonzero $primitive:ident $ty:ident : $visit:ident) => { - fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> - where - E: Error, + (nonzero $primitive:ident $ty:ident : $visit:ident) => { + fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> + where + E: Error, + { + if v as i128 >= $primitive::min_value() as i128 + && v as u128 <= $primitive::max_value() as u128 { - if v as i128 >= $primitive::min_value() as i128 - && v as u128 <= $primitive::max_value() as u128 - { - if let Some(nonzero) = Self::Value::new(v as $primitive) { - Ok(nonzero) - } else { - Err(Error::invalid_value(Unexpected::Unsigned(0), &self)) - } + if let Some(nonzero) = Self::Value::new(v as $primitive) { + Ok(nonzero) } else { - Err(Error::invalid_value( - Unexpected::Other(stringify!($ty)), - &self, - )) + Err(Error::invalid_value(Unexpected::Unsigned(0), &self)) } + } else { + Err(Error::invalid_value( + Unexpected::Other(stringify!($ty)), + &self, + )) } - }; - } + } + }; +} - impl_deserialize_num! { - i128, NonZeroI128 cfg(not(no_num_nonzero_signed)), deserialize_i128 - num_self!(i128:visit_i128); - num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); - num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); - num_128!(u128:visit_u128); - } +impl_deserialize_num! { + i128, NonZeroI128 cfg(not(no_num_nonzero_signed)), deserialize_i128 + num_self!(i128:visit_i128); + num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); + num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); + num_128!(u128:visit_u128); +} - impl_deserialize_num! { - u128, NonZeroU128, deserialize_u128 - num_self!(u128:visit_u128); - num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); - int_to_uint!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); - num_128!(i128:visit_i128); - } +impl_deserialize_num! { + u128, NonZeroU128, deserialize_u128 + num_self!(u128:visit_u128); + num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); + int_to_uint!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); + num_128!(i128:visit_i128); } //////////////////////////////////////////////////////////////////////////////// @@ -578,6 +597,7 @@ impl<'a, 'de> Visitor<'de> for StringInPlaceVisitor<'a> { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de> Deserialize<'de> for String { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -681,10 +701,10 @@ impl<'de> Visitor<'de> for CStringVisitor { where A: SeqAccess<'de>, { - let len = size_hint::cautious(seq.size_hint()); - let mut values = Vec::with_capacity(len); + let capacity = size_hint::cautious::<u8>(seq.size_hint()); + let mut values = Vec::<u8>::with_capacity(capacity); - while let Some(value) = try!(seq.next_element()) { + while let Some(value) = tri!(seq.next_element()) { values.push(value); } @@ -721,6 +741,7 @@ impl<'de> Visitor<'de> for CStringVisitor { } #[cfg(any(feature = "std", all(not(no_core_cstr), feature = "alloc")))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de> Deserialize<'de> for CString { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -732,10 +753,10 @@ impl<'de> Deserialize<'de> for CString { macro_rules! forwarded_impl { ( - $(#[doc = $doc:tt])* + $(#[$attr:meta])* ($($id:ident),*), $ty:ty, $func:expr ) => { - $(#[doc = $doc])* + $(#[$attr])* impl<'de $(, $id : Deserialize<'de>,)*> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -747,14 +768,15 @@ macro_rules! forwarded_impl { } } -#[cfg(all( - any(feature = "std", all(not(no_core_cstr), feature = "alloc")), - not(no_de_boxed_c_str) -))] -forwarded_impl!((), Box<CStr>, CString::into_boxed_c_str); +forwarded_impl! { + #[cfg(any(feature = "std", all(not(no_core_cstr), feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (), Box<CStr>, CString::into_boxed_c_str +} -#[cfg(not(no_core_reverse))] -forwarded_impl!((T), Reverse<T>, Reverse); +forwarded_impl! { + (T), Reverse<T>, Reverse +} //////////////////////////////////////////////////////////////////////////////// @@ -860,9 +882,9 @@ impl<'de, T: ?Sized> Deserialize<'de> for PhantomData<T> { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", feature = "alloc"))] macro_rules! seq_impl { ( + $(#[$attr:meta])* $ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound1:ident $(+ $bound2:ident)*)*>, $access:ident, $clear:expr, @@ -870,6 +892,7 @@ macro_rules! seq_impl { $reserve:expr, $insert:expr ) => { + $(#[$attr])* impl<'de, T $(, $typaram)*> Deserialize<'de> for $ty<T $(, $typaram)*> where T: Deserialize<'de> $(+ $tbound1 $(+ $tbound2)*)*, @@ -901,7 +924,7 @@ macro_rules! seq_impl { { let mut values = $with_capacity; - while let Some(value) = try!($access.next_element()) { + while let Some(value) = tri!($access.next_element()) { $insert(&mut values, value); } @@ -936,10 +959,10 @@ macro_rules! seq_impl { A: SeqAccess<'de>, { $clear(&mut self.0); - $reserve(&mut self.0, size_hint::cautious($access.size_hint())); + $reserve(&mut self.0, size_hint::cautious::<T>($access.size_hint())); // FIXME: try to overwrite old values here? (Vec, VecDeque, LinkedList) - while let Some(value) = try!($access.next_element()) { + while let Some(value) = tri!($access.next_element()) { $insert(&mut self.0, value); } @@ -957,18 +980,20 @@ macro_rules! seq_impl { #[cfg(any(feature = "std", feature = "alloc"))] fn nop_reserve<T>(_seq: T, _n: usize) {} -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] BinaryHeap<T: Ord>, seq, BinaryHeap::clear, - BinaryHeap::with_capacity(size_hint::cautious(seq.size_hint())), + BinaryHeap::with_capacity(size_hint::cautious::<T>(seq.size_hint())), BinaryHeap::reserve, BinaryHeap::push ); -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] BTreeSet<T: Eq + Ord>, seq, BTreeSet::clear, @@ -977,8 +1002,9 @@ seq_impl!( BTreeSet::insert ); -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] LinkedList<T>, seq, LinkedList::clear, @@ -987,21 +1013,24 @@ seq_impl!( LinkedList::push_back ); -#[cfg(feature = "std")] seq_impl!( + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] HashSet<T: Eq + Hash, S: BuildHasher + Default>, seq, HashSet::clear, - HashSet::with_capacity_and_hasher(size_hint::cautious(seq.size_hint()), S::default()), + HashSet::with_capacity_and_hasher(size_hint::cautious::<T>(seq.size_hint()), S::default()), HashSet::reserve, - HashSet::insert); + HashSet::insert +); -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] VecDeque<T>, seq, VecDeque::clear, - VecDeque::with_capacity(size_hint::cautious(seq.size_hint())), + VecDeque::with_capacity(size_hint::cautious::<T>(seq.size_hint())), VecDeque::reserve, VecDeque::push_back ); @@ -1009,6 +1038,7 @@ seq_impl!( //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, T> Deserialize<'de> for Vec<T> where T: Deserialize<'de>, @@ -1035,9 +1065,10 @@ where where A: SeqAccess<'de>, { - let mut values = Vec::with_capacity(size_hint::cautious(seq.size_hint())); + let capacity = size_hint::cautious::<T>(seq.size_hint()); + let mut values = Vec::<T>::with_capacity(capacity); - while let Some(value) = try!(seq.next_element()) { + while let Some(value) = tri!(seq.next_element()) { values.push(value); } @@ -1071,7 +1102,7 @@ where where A: SeqAccess<'de>, { - let hint = size_hint::cautious(seq.size_hint()); + let hint = size_hint::cautious::<T>(seq.size_hint()); if let Some(additional) = hint.checked_sub(self.0.len()) { self.0.reserve(additional); } @@ -1079,7 +1110,7 @@ where for i in 0..self.0.len() { let next = { let next_place = InPlaceSeed(&mut self.0[i]); - try!(seq.next_element_seed(next_place)) + tri!(seq.next_element_seed(next_place)) }; if next.is_none() { self.0.truncate(i); @@ -1087,7 +1118,7 @@ where } } - while let Some(value) = try!(seq.next_element()) { + while let Some(value) = tri!(seq.next_element()) { self.0.push(value); } @@ -1159,7 +1190,7 @@ macro_rules! array_impls { A: SeqAccess<'de>, { Ok([$( - match try!(seq.next_element()) { + match tri!(seq.next_element()) { Some(val) => val, None => return Err(Error::invalid_length($n, &self)), } @@ -1184,7 +1215,7 @@ macro_rules! array_impls { { let mut fail_idx = None; for (idx, dest) in self.0[..].iter_mut().enumerate() { - if try!(seq.next_element_seed(InPlaceSeed(dest))).is_none() { + if tri!(seq.next_element_seed(InPlaceSeed(dest))).is_none() { fail_idx = Some(idx); break; } @@ -1282,7 +1313,7 @@ macro_rules! tuple_impls { A: SeqAccess<'de>, { $( - let $name = match try!(seq.next_element()) { + let $name = match tri!(seq.next_element()) { Some(value) => value, None => return Err(Error::invalid_length($n, &self)), }; @@ -1316,7 +1347,7 @@ macro_rules! tuple_impls { A: SeqAccess<'de>, { $( - if try!(seq.next_element_seed(InPlaceSeed(&mut (self.0).$n))).is_none() { + if tri!(seq.next_element_seed(InPlaceSeed(&mut (self.0).$n))).is_none() { return Err(Error::invalid_length($n, &self)); } )+ @@ -1353,13 +1384,14 @@ tuple_impls! { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", feature = "alloc"))] macro_rules! map_impl { ( + $(#[$attr:meta])* $ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound1:ident $(+ $bound2:ident)*)*>, $access:ident, - $with_capacity:expr + $with_capacity:expr, ) => { + $(#[$attr])* impl<'de, K, V $(, $typaram)*> Deserialize<'de> for $ty<K, V $(, $typaram)*> where K: Deserialize<'de> $(+ $kbound1 $(+ $kbound2)*)*, @@ -1393,7 +1425,7 @@ macro_rules! map_impl { { let mut values = $with_capacity; - while let Some((key, value)) = try!($access.next_entry()) { + while let Some((key, value)) = tri!($access.next_entry()) { values.insert(key, value); } @@ -1408,23 +1440,30 @@ macro_rules! map_impl { } } -#[cfg(any(feature = "std", feature = "alloc"))] -map_impl!( +map_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] BTreeMap<K: Ord, V>, map, - BTreeMap::new()); + BTreeMap::new(), +} -#[cfg(feature = "std")] -map_impl!( +map_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] HashMap<K: Eq + Hash, V, S: BuildHasher + Default>, map, - HashMap::with_capacity_and_hasher(size_hint::cautious(map.size_hint()), S::default())); + HashMap::with_capacity_and_hasher(size_hint::cautious::<(K, V)>(map.size_hint()), S::default()), +} //////////////////////////////////////////////////////////////////////////////// -#[cfg(feature = "std")] macro_rules! parse_ip_impl { - ($expecting:tt $ty:ty; $size:tt) => { + ( + $(#[$attr:meta])* + $ty:ty, $expecting:expr, $size:tt + ) => { + $(#[$attr])* impl<'de> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1451,7 +1490,7 @@ macro_rules! variant_identifier { $($variant),* } - static $variants_name: &'static [&'static str] = &[$(stringify!($variant)),*]; + static $variants_name: &[&str] = &[$(stringify!($variant)),*]; impl<'de> Deserialize<'de> for $name_kind { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> @@ -1541,7 +1580,7 @@ macro_rules! deserialize_enum { where A: EnumAccess<'de>, { - match try!(data.variant()) { + match tri!(data.variant()) { $( ($name_kind :: $variant, v) => v.newtype_variant().map($name :: $variant), )* @@ -1553,6 +1592,7 @@ macro_rules! deserialize_enum { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for net::IpAddr { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1561,7 +1601,7 @@ impl<'de> Deserialize<'de> for net::IpAddr { if deserializer.is_human_readable() { deserializer.deserialize_str(FromStrVisitor::new("IP address")) } else { - use lib::net::IpAddr; + use crate::lib::net::IpAddr; deserialize_enum! { IpAddr IpAddrKind (V4; b"V4"; 0, V6; b"V6"; 1) "`V4` or `V6`", @@ -1571,15 +1611,25 @@ impl<'de> Deserialize<'de> for net::IpAddr { } } -#[cfg(feature = "std")] -parse_ip_impl!("IPv4 address" net::Ipv4Addr; 4); +parse_ip_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::Ipv4Addr, "IPv4 address", 4 +} -#[cfg(feature = "std")] -parse_ip_impl!("IPv6 address" net::Ipv6Addr; 16); +parse_ip_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::Ipv6Addr, "IPv6 address", 16 +} -#[cfg(feature = "std")] macro_rules! parse_socket_impl { - ($expecting:tt $ty:ty, $new:expr) => { + ( + $(#[$attr:meta])* + $ty:ty, $expecting:tt, + $new:expr, + ) => { + $(#[$attr])* impl<'de> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1588,7 +1638,7 @@ macro_rules! parse_socket_impl { if deserializer.is_human_readable() { deserializer.deserialize_str(FromStrVisitor::new($expecting)) } else { - <(_, u16)>::deserialize(deserializer).map(|(ip, port)| $new(ip, port)) + <(_, u16)>::deserialize(deserializer).map($new) } } } @@ -1596,6 +1646,7 @@ macro_rules! parse_socket_impl { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for net::SocketAddr { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1604,7 +1655,7 @@ impl<'de> Deserialize<'de> for net::SocketAddr { if deserializer.is_human_readable() { deserializer.deserialize_str(FromStrVisitor::new("socket address")) } else { - use lib::net::SocketAddr; + use crate::lib::net::SocketAddr; deserialize_enum! { SocketAddr SocketAddrKind (V4; b"V4"; 0, V6; b"V6"; 1) "`V4` or `V6`", @@ -1614,13 +1665,19 @@ impl<'de> Deserialize<'de> for net::SocketAddr { } } -#[cfg(feature = "std")] -parse_socket_impl!("IPv4 socket address" net::SocketAddrV4, net::SocketAddrV4::new); +parse_socket_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::SocketAddrV4, "IPv4 socket address", + |(ip, port)| net::SocketAddrV4::new(ip, port), +} -#[cfg(feature = "std")] -parse_socket_impl!("IPv6 socket address" net::SocketAddrV6, |ip, port| net::SocketAddrV6::new( - ip, port, 0, 0 -)); +parse_socket_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::SocketAddrV6, "IPv6 socket address", + |(ip, port)| net::SocketAddrV6::new(ip, port, 0, 0), +} //////////////////////////////////////////////////////////////////////////////// @@ -1653,6 +1710,7 @@ impl<'a> Visitor<'a> for PathVisitor { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de: 'a, 'a> Deserialize<'de> for &'a Path { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1707,6 +1765,7 @@ impl<'de> Visitor<'de> for PathBufVisitor { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for PathBuf { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1716,8 +1775,11 @@ impl<'de> Deserialize<'de> for PathBuf { } } -#[cfg(all(feature = "std", not(no_de_boxed_path)))] -forwarded_impl!((), Box<Path>, PathBuf::into_boxed_path); +forwarded_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + (), Box<Path>, PathBuf::into_boxed_path +} //////////////////////////////////////////////////////////////////////////////// @@ -1750,7 +1812,7 @@ impl<'de> Visitor<'de> for OsStringVisitor { { use std::os::unix::ffi::OsStringExt; - match try!(data.variant()) { + match tri!(data.variant()) { (OsStringKind::Unix, v) => v.newtype_variant().map(OsString::from_vec), (OsStringKind::Windows, _) => Err(Error::custom( "cannot deserialize Windows OS string on Unix", @@ -1765,7 +1827,7 @@ impl<'de> Visitor<'de> for OsStringVisitor { { use std::os::windows::ffi::OsStringExt; - match try!(data.variant()) { + match tri!(data.variant()) { (OsStringKind::Windows, v) => v .newtype_variant::<Vec<u16>>() .map(|vec| OsString::from_wide(&vec)), @@ -1777,6 +1839,7 @@ impl<'de> Visitor<'de> for OsStringVisitor { } #[cfg(all(feature = "std", any(unix, windows)))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] impl<'de> Deserialize<'de> for OsString { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1788,40 +1851,32 @@ impl<'de> Deserialize<'de> for OsString { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", feature = "alloc"))] -forwarded_impl!((T), Box<T>, Box::new); - -#[cfg(any(feature = "std", feature = "alloc"))] -forwarded_impl!((T), Box<[T]>, Vec::into_boxed_slice); +forwarded_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (T), Box<T>, Box::new +} -#[cfg(any(feature = "std", feature = "alloc"))] -forwarded_impl!((), Box<str>, String::into_boxed_str); +forwarded_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (T), Box<[T]>, Vec::into_boxed_slice +} -#[cfg(all(no_de_rc_dst, feature = "rc", any(feature = "std", feature = "alloc")))] forwarded_impl! { - /// This impl requires the [`"rc"`] Cargo feature of Serde. - /// - /// Deserializing a data structure containing `Arc` will not attempt to - /// deduplicate `Arc` references to the same data. Every deserialized `Arc` - /// will end up with a strong count of 1. - /// - /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc - (T), Arc<T>, Arc::new + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (), Box<str>, String::into_boxed_str } -#[cfg(all(no_de_rc_dst, feature = "rc", any(feature = "std", feature = "alloc")))] forwarded_impl! { - /// This impl requires the [`"rc"`] Cargo feature of Serde. - /// - /// Deserializing a data structure containing `Rc` will not attempt to - /// deduplicate `Rc` references to the same data. Every deserialized `Rc` - /// will end up with a strong count of 1. - /// - /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc - (T), Rc<T>, Rc::new + #[cfg(all(feature = "std", any(unix, windows)))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] + (), Box<OsStr>, OsString::into_boxed_os_str } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, 'a, T: ?Sized> Deserialize<'de> for Cow<'a, T> where T: ToOwned, @@ -1843,6 +1898,10 @@ where /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<'de, T: ?Sized> Deserialize<'de> for RcWeak<T> where T: Deserialize<'de>, @@ -1851,7 +1910,7 @@ where where D: Deserializer<'de>, { - try!(Option::<T>::deserialize(deserializer)); + tri!(Option::<T>::deserialize(deserializer)); Ok(RcWeak::new()) } } @@ -1861,6 +1920,10 @@ where /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<'de, T: ?Sized> Deserialize<'de> for ArcWeak<T> where T: Deserialize<'de>, @@ -1869,24 +1932,19 @@ where where D: Deserializer<'de>, { - try!(Option::<T>::deserialize(deserializer)); + tri!(Option::<T>::deserialize(deserializer)); Ok(ArcWeak::new()) } } //////////////////////////////////////////////////////////////////////////////// -#[cfg(all( - not(no_de_rc_dst), - feature = "rc", - any(feature = "std", feature = "alloc") -))] macro_rules! box_forwarded_impl { ( - $(#[doc = $doc:tt])* + $(#[$attr:meta])* $t:ident ) => { - $(#[doc = $doc])* + $(#[$attr])* impl<'de, T: ?Sized> Deserialize<'de> for $t<T> where Box<T>: Deserialize<'de>, @@ -1901,11 +1959,6 @@ macro_rules! box_forwarded_impl { }; } -#[cfg(all( - not(no_de_rc_dst), - feature = "rc", - any(feature = "std", feature = "alloc") -))] box_forwarded_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -1914,14 +1967,11 @@ box_forwarded_impl! { /// will end up with a strong count of 1. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] Rc } -#[cfg(all( - not(no_de_rc_dst), - feature = "rc", - any(feature = "std", feature = "alloc") -))] box_forwarded_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -1930,6 +1980,8 @@ box_forwarded_impl! { /// will end up with a strong count of 1. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] Arc } @@ -1947,13 +1999,21 @@ where } } -forwarded_impl!((T), RefCell<T>, RefCell::new); +forwarded_impl! { + (T), RefCell<T>, RefCell::new +} -#[cfg(feature = "std")] -forwarded_impl!((T), Mutex<T>, Mutex::new); +forwarded_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + (T), Mutex<T>, Mutex::new +} -#[cfg(feature = "std")] -forwarded_impl!((T), RwLock<T>, RwLock::new); +forwarded_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + (T), RwLock<T>, RwLock::new +} //////////////////////////////////////////////////////////////////////////////// @@ -1965,7 +2025,6 @@ forwarded_impl!((T), RwLock<T>, RwLock::new); // secs: u64, // nanos: u32, // } -#[cfg(any(feature = "std", not(no_core_duration)))] impl<'de> Deserialize<'de> for Duration { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -2013,7 +2072,7 @@ impl<'de> Deserialize<'de> for Duration { b"secs" => Ok(Field::Secs), b"nanos" => Ok(Field::Nanos), _ => { - let value = ::__private::from_utf8_lossy(value); + let value = crate::__private::from_utf8_lossy(value); Err(Error::unknown_field(&*value, FIELDS)) } } @@ -2048,19 +2107,19 @@ impl<'de> Deserialize<'de> for Duration { where A: SeqAccess<'de>, { - let secs: u64 = match try!(seq.next_element()) { + let secs: u64 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(0, &self)); } }; - let nanos: u32 = match try!(seq.next_element()) { + let nanos: u32 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(1, &self)); } }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } @@ -2070,19 +2129,19 @@ impl<'de> Deserialize<'de> for Duration { { let mut secs: Option<u64> = None; let mut nanos: Option<u32> = None; - while let Some(key) = try!(map.next_key()) { + while let Some(key) = tri!(map.next_key()) { match key { Field::Secs => { if secs.is_some() { return Err(<A::Error as Error>::duplicate_field("secs")); } - secs = Some(try!(map.next_value())); + secs = Some(tri!(map.next_value())); } Field::Nanos => { if nanos.is_some() { return Err(<A::Error as Error>::duplicate_field("nanos")); } - nanos = Some(try!(map.next_value())); + nanos = Some(tri!(map.next_value())); } } } @@ -2094,12 +2153,12 @@ impl<'de> Deserialize<'de> for Duration { Some(nanos) => nanos, None => return Err(<A::Error as Error>::missing_field("nanos")), }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } } - const FIELDS: &'static [&'static str] = &["secs", "nanos"]; + const FIELDS: &[&str] = &["secs", "nanos"]; deserializer.deserialize_struct("Duration", FIELDS, DurationVisitor) } } @@ -2107,6 +2166,7 @@ impl<'de> Deserialize<'de> for Duration { //////////////////////////////////////////////////////////////////////////////// #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for SystemTime { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -2186,19 +2246,19 @@ impl<'de> Deserialize<'de> for SystemTime { where A: SeqAccess<'de>, { - let secs: u64 = match try!(seq.next_element()) { + let secs: u64 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(0, &self)); } }; - let nanos: u32 = match try!(seq.next_element()) { + let nanos: u32 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(1, &self)); } }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } @@ -2208,7 +2268,7 @@ impl<'de> Deserialize<'de> for SystemTime { { let mut secs: Option<u64> = None; let mut nanos: Option<u32> = None; - while let Some(key) = try!(map.next_key()) { + while let Some(key) = tri!(map.next_key()) { match key { Field::Secs => { if secs.is_some() { @@ -2216,7 +2276,7 @@ impl<'de> Deserialize<'de> for SystemTime { "secs_since_epoch", )); } - secs = Some(try!(map.next_value())); + secs = Some(tri!(map.next_value())); } Field::Nanos => { if nanos.is_some() { @@ -2224,7 +2284,7 @@ impl<'de> Deserialize<'de> for SystemTime { "nanos_since_epoch", )); } - nanos = Some(try!(map.next_value())); + nanos = Some(tri!(map.next_value())); } } } @@ -2236,13 +2296,13 @@ impl<'de> Deserialize<'de> for SystemTime { Some(nanos) => nanos, None => return Err(<A::Error as Error>::missing_field("nanos_since_epoch")), }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } } - const FIELDS: &'static [&'static str] = &["secs_since_epoch", "nanos_since_epoch"]; - let duration = try!(deserializer.deserialize_struct("SystemTime", FIELDS, DurationVisitor)); + const FIELDS: &[&str] = &["secs_since_epoch", "nanos_since_epoch"]; + let duration = tri!(deserializer.deserialize_struct("SystemTime", FIELDS, DurationVisitor)); #[cfg(not(no_systemtime_checked_add))] let ret = UNIX_EPOCH .checked_add(duration) @@ -2259,9 +2319,9 @@ impl<'de> Deserialize<'de> for SystemTime { // // #[derive(Deserialize)] // #[serde(deny_unknown_fields)] -// struct Range { -// start: u64, -// end: u32, +// struct Range<Idx> { +// start: Idx, +// end: Idx, // } impl<'de, Idx> Deserialize<'de> for Range<Idx> where @@ -2271,7 +2331,7 @@ where where D: Deserializer<'de>, { - let (start, end) = try!(deserializer.deserialize_struct( + let (start, end) = tri!(deserializer.deserialize_struct( "Range", range::FIELDS, range::RangeVisitor { @@ -2283,7 +2343,6 @@ where } } -#[cfg(not(no_range_inclusive))] impl<'de, Idx> Deserialize<'de> for RangeInclusive<Idx> where Idx: Deserialize<'de>, @@ -2292,7 +2351,7 @@ where where D: Deserializer<'de>, { - let (start, end) = try!(deserializer.deserialize_struct( + let (start, end) = tri!(deserializer.deserialize_struct( "RangeInclusive", range::FIELDS, range::RangeVisitor { @@ -2305,11 +2364,11 @@ where } mod range { - use lib::*; + use crate::lib::*; - use de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; + use crate::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; - pub const FIELDS: &'static [&'static str] = &["start", "end"]; + pub const FIELDS: &[&str] = &["start", "end"]; // If this were outside of the serde crate, it would just use: // @@ -2353,7 +2412,7 @@ mod range { b"start" => Ok(Field::Start), b"end" => Ok(Field::End), _ => { - let value = ::__private::from_utf8_lossy(value); + let value = crate::__private::from_utf8_lossy(value); Err(Error::unknown_field(&*value, FIELDS)) } } @@ -2383,13 +2442,13 @@ mod range { where A: SeqAccess<'de>, { - let start: Idx = match try!(seq.next_element()) { + let start: Idx = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(0, &self)); } }; - let end: Idx = match try!(seq.next_element()) { + let end: Idx = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(1, &self)); @@ -2404,19 +2463,19 @@ mod range { { let mut start: Option<Idx> = None; let mut end: Option<Idx> = None; - while let Some(key) = try!(map.next_key()) { + while let Some(key) = tri!(map.next_key()) { match key { Field::Start => { if start.is_some() { return Err(<A::Error as Error>::duplicate_field("start")); } - start = Some(try!(map.next_value())); + start = Some(tri!(map.next_value())); } Field::End => { if end.is_some() { return Err(<A::Error as Error>::duplicate_field("end")); } - end = Some(try!(map.next_value())); + end = Some(tri!(map.next_value())); } } } @@ -2435,7 +2494,282 @@ mod range { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(not(no_ops_bound), all(feature = "std", not(no_collections_bound))))] +// Similar to: +// +// #[derive(Deserialize)] +// #[serde(deny_unknown_fields)] +// struct RangeFrom<Idx> { +// start: Idx, +// } +impl<'de, Idx> Deserialize<'de> for RangeFrom<Idx> +where + Idx: Deserialize<'de>, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let start = tri!(deserializer.deserialize_struct( + "RangeFrom", + range_from::FIELDS, + range_from::RangeFromVisitor { + expecting: "struct RangeFrom", + phantom: PhantomData, + }, + )); + Ok(start..) + } +} + +mod range_from { + use crate::lib::*; + + use crate::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; + + pub const FIELDS: &[&str] = &["start"]; + + // If this were outside of the serde crate, it would just use: + // + // #[derive(Deserialize)] + // #[serde(field_identifier, rename_all = "lowercase")] + enum Field { + Start, + } + + impl<'de> Deserialize<'de> for Field { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct FieldVisitor; + + impl<'de> Visitor<'de> for FieldVisitor { + type Value = Field; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("`start`") + } + + fn visit_str<E>(self, value: &str) -> Result<Self::Value, E> + where + E: Error, + { + match value { + "start" => Ok(Field::Start), + _ => Err(Error::unknown_field(value, FIELDS)), + } + } + + fn visit_bytes<E>(self, value: &[u8]) -> Result<Self::Value, E> + where + E: Error, + { + match value { + b"start" => Ok(Field::Start), + _ => { + let value = crate::__private::from_utf8_lossy(value); + Err(Error::unknown_field(&*value, FIELDS)) + } + } + } + } + + deserializer.deserialize_identifier(FieldVisitor) + } + } + + pub struct RangeFromVisitor<Idx> { + pub expecting: &'static str, + pub phantom: PhantomData<Idx>, + } + + impl<'de, Idx> Visitor<'de> for RangeFromVisitor<Idx> + where + Idx: Deserialize<'de>, + { + type Value = Idx; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(self.expecting) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let start: Idx = match tri!(seq.next_element()) { + Some(value) => value, + None => { + return Err(Error::invalid_length(0, &self)); + } + }; + Ok(start) + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: MapAccess<'de>, + { + let mut start: Option<Idx> = None; + while let Some(key) = tri!(map.next_key()) { + match key { + Field::Start => { + if start.is_some() { + return Err(<A::Error as Error>::duplicate_field("start")); + } + start = Some(tri!(map.next_value())); + } + } + } + let start = match start { + Some(start) => start, + None => return Err(<A::Error as Error>::missing_field("start")), + }; + Ok(start) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + +// Similar to: +// +// #[derive(Deserialize)] +// #[serde(deny_unknown_fields)] +// struct RangeTo<Idx> { +// end: Idx, +// } +impl<'de, Idx> Deserialize<'de> for RangeTo<Idx> +where + Idx: Deserialize<'de>, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let end = tri!(deserializer.deserialize_struct( + "RangeTo", + range_to::FIELDS, + range_to::RangeToVisitor { + expecting: "struct RangeTo", + phantom: PhantomData, + }, + )); + Ok(..end) + } +} + +mod range_to { + use crate::lib::*; + + use crate::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; + + pub const FIELDS: &[&str] = &["end"]; + + // If this were outside of the serde crate, it would just use: + // + // #[derive(Deserialize)] + // #[serde(field_identifier, rename_all = "lowercase")] + enum Field { + End, + } + + impl<'de> Deserialize<'de> for Field { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct FieldVisitor; + + impl<'de> Visitor<'de> for FieldVisitor { + type Value = Field; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("`end`") + } + + fn visit_str<E>(self, value: &str) -> Result<Self::Value, E> + where + E: Error, + { + match value { + "end" => Ok(Field::End), + _ => Err(Error::unknown_field(value, FIELDS)), + } + } + + fn visit_bytes<E>(self, value: &[u8]) -> Result<Self::Value, E> + where + E: Error, + { + match value { + b"end" => Ok(Field::End), + _ => { + let value = crate::__private::from_utf8_lossy(value); + Err(Error::unknown_field(&*value, FIELDS)) + } + } + } + } + + deserializer.deserialize_identifier(FieldVisitor) + } + } + + pub struct RangeToVisitor<Idx> { + pub expecting: &'static str, + pub phantom: PhantomData<Idx>, + } + + impl<'de, Idx> Visitor<'de> for RangeToVisitor<Idx> + where + Idx: Deserialize<'de>, + { + type Value = Idx; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(self.expecting) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let end: Idx = match tri!(seq.next_element()) { + Some(value) => value, + None => { + return Err(Error::invalid_length(0, &self)); + } + }; + Ok(end) + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: MapAccess<'de>, + { + let mut end: Option<Idx> = None; + while let Some(key) = tri!(map.next_key()) { + match key { + Field::End => { + if end.is_some() { + return Err(<A::Error as Error>::duplicate_field("end")); + } + end = Some(tri!(map.next_value())); + } + } + } + let end = match end { + Some(end) => end, + None => return Err(<A::Error as Error>::missing_field("end")), + }; + Ok(end) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + impl<'de, T> Deserialize<'de> for Bound<T> where T: Deserialize<'de>, @@ -2527,7 +2861,7 @@ where where A: EnumAccess<'de>, { - match try!(data.variant()) { + match tri!(data.variant()) { (Field::Unbounded, v) => v.unit_variant().map(|()| Bound::Unbounded), (Field::Included, v) => v.newtype_variant().map(Bound::Included), (Field::Excluded, v) => v.newtype_variant().map(Bound::Excluded), @@ -2535,7 +2869,7 @@ where } } - const VARIANTS: &'static [&'static str] = &["Unbounded", "Included", "Excluded"]; + const VARIANTS: &[&str] = &["Unbounded", "Included", "Excluded"]; deserializer.deserialize_enum("Bound", VARIANTS, BoundVisitor(PhantomData)) } @@ -2636,14 +2970,14 @@ where where A: EnumAccess<'de>, { - match try!(data.variant()) { + match tri!(data.variant()) { (Field::Ok, v) => v.newtype_variant().map(Ok), (Field::Err, v) => v.newtype_variant().map(Err), } } } - const VARIANTS: &'static [&'static str] = &["Ok", "Err"]; + const VARIANTS: &[&str] = &["Ok", "Err"]; deserializer.deserialize_enum("Result", VARIANTS, ResultVisitor(PhantomData)) } @@ -2668,6 +3002,7 @@ macro_rules! atomic_impl { ($($ty:ident $size:expr)*) => { $( #[cfg(any(no_target_has_atomic, target_has_atomic = $size))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", target_has_atomic = $size))))] impl<'de> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -2709,7 +3044,7 @@ struct FromStrVisitor<T> { impl<T> FromStrVisitor<T> { fn new(expecting: &'static str) -> Self { FromStrVisitor { - expecting: expecting, + expecting, ty: PhantomData, } } diff --git a/vendor/serde/src/de/mod.rs b/vendor/serde/src/de/mod.rs index ca29ec6..c9919d9 100644 --- a/vendor/serde/src/de/mod.rs +++ b/vendor/serde/src/de/mod.rs @@ -64,8 +64,8 @@ //! - RefCell\<T\> //! - Mutex\<T\> //! - RwLock\<T\> -//! - Rc\<T\> *(if* features = ["rc"] *is enabled)* -//! - Arc\<T\> *(if* features = ["rc"] *is enabled)* +//! - Rc\<T\> *(if* features = \["rc"\] *is enabled)* +//! - Arc\<T\> *(if* features = \["rc"\] *is enabled)* //! - **Collection types**: //! - BTreeMap\<K, V\> //! - BTreeSet\<T\> @@ -112,26 +112,28 @@ //! [derive section of the manual]: https://serde.rs/derive.html //! [data formats]: https://serde.rs/#data-formats -use lib::*; +use crate::lib::*; //////////////////////////////////////////////////////////////////////////////// pub mod value; -#[cfg(not(no_integer128))] mod format; mod ignored_any; mod impls; -mod utf8; +pub(crate) mod size_hint; pub use self::ignored_any::IgnoredAny; +#[cfg(not(any(feature = "std", feature = "unstable")))] +#[doc(no_inline)] +pub use crate::std_error::Error as StdError; +#[cfg(all(feature = "unstable", not(feature = "std")))] +#[doc(no_inline)] +pub use core::error::Error as StdError; #[cfg(feature = "std")] #[doc(no_inline)] pub use std::error::Error as StdError; -#[cfg(not(feature = "std"))] -#[doc(no_inline)] -pub use std_error::Error as StdError; //////////////////////////////////////////////////////////////////////////////// @@ -162,7 +164,7 @@ macro_rules! declare_error_trait { /// /// The message should not be capitalized and should not end with a period. /// - /// ```edition2018 + /// ```edition2021 /// # use std::str::FromStr; /// # /// # struct IpAddr; @@ -307,7 +309,7 @@ declare_error_trait!(Error: Sized + Debug + Display); /// This is used as an argument to the `invalid_type`, `invalid_value`, and /// `invalid_length` methods of the `Error` trait to build error messages. /// -/// ```edition2018 +/// ```edition2021 /// # use std::fmt; /// # /// # use serde::de::{self, Unexpected, Visitor}; @@ -432,10 +434,9 @@ impl<'a> fmt::Display for Unexpected<'a> { /// Within the context of a `Visitor` implementation, the `Visitor` itself /// (`&self`) is an implementation of this trait. /// -/// ```edition2018 -/// # use std::fmt; -/// # +/// ```edition2021 /// # use serde::de::{self, Unexpected, Visitor}; +/// # use std::fmt; /// # /// # struct Example; /// # @@ -457,7 +458,7 @@ impl<'a> fmt::Display for Unexpected<'a> { /// /// Outside of a `Visitor`, `&"..."` can be used. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::de::{self, Unexpected}; /// # /// # fn example<E>() -> Result<(), E> @@ -465,7 +466,10 @@ impl<'a> fmt::Display for Unexpected<'a> { /// # E: de::Error, /// # { /// # let v = true; -/// return Err(de::Error::invalid_type(Unexpected::Bool(v), &"a negative integer")); +/// return Err(de::Error::invalid_type( +/// Unexpected::Bool(v), +/// &"a negative integer", +/// )); /// # } /// ``` pub trait Expected { @@ -564,7 +568,7 @@ pub trait Deserialize<'de>: Sized { D: Deserializer<'de>, { // Default implementation just delegates to `deserialize` impl. - *place = try!(Deserialize::deserialize(deserializer)); + *place = tri!(Deserialize::deserialize(deserializer)); Ok(()) } } @@ -577,7 +581,7 @@ pub trait Deserialize<'de>: Sized { /// from the input string, but a `from_reader` function may only deserialize /// owned data. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::de::{Deserialize, DeserializeOwned}; /// # use std::io::{Read, Result}; /// # @@ -616,7 +620,7 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// /// The canonical API for stateless deserialization looks like this: /// -/// ```edition2018 +/// ```edition2021 /// # use serde::Deserialize; /// # /// # enum Error {} @@ -630,7 +634,7 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// Adjusting an API like this to support stateful deserialization is a matter /// of accepting a seed as input: /// -/// ```edition2018 +/// ```edition2021 /// # use serde::de::DeserializeSeed; /// # /// # enum Error {} @@ -663,12 +667,11 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// into it. This requires stateful deserialization using the `DeserializeSeed` /// trait. /// -/// ```edition2018 +/// ```edition2021 +/// use serde::de::{Deserialize, DeserializeSeed, Deserializer, SeqAccess, Visitor}; /// use std::fmt; /// use std::marker::PhantomData; /// -/// use serde::de::{Deserialize, DeserializeSeed, Deserializer, SeqAccess, Visitor}; -/// /// // A DeserializeSeed implementation that uses stateful deserialization to /// // append array elements onto the end of an existing vector. The preexisting /// // state ("seed") in this case is the Vec<T>. The `deserialize` method of @@ -709,7 +712,7 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// { /// // Decrease the number of reallocations if there are many elements /// if let Some(size_hint) = seq.size_hint() { -/// self.0.reserve(size_hint); +/// self.0.reserve(size_hint); /// } /// /// // Visit each element in the inner array and push it onto @@ -945,18 +948,15 @@ pub trait Deserializer<'de>: Sized { where V: Visitor<'de>; - serde_if_integer128! { - /// Hint that the `Deserialize` type is expecting an `i128` value. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value, Self::Error> - where - V: Visitor<'de> - { - let _ = visitor; - Err(Error::custom("i128 is not supported")) - } + /// Hint that the `Deserialize` type is expecting an `i128` value. + /// + /// The default behavior unconditionally returns an error. + fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + let _ = visitor; + Err(Error::custom("i128 is not supported")) } /// Hint that the `Deserialize` type is expecting a `u8` value. @@ -979,18 +979,15 @@ pub trait Deserializer<'de>: Sized { where V: Visitor<'de>; - serde_if_integer128! { - /// Hint that the `Deserialize` type is expecting an `u128` value. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value, Self::Error> - where - V: Visitor<'de> - { - let _ = visitor; - Err(Error::custom("u128 is not supported")) - } + /// Hint that the `Deserialize` type is expecting an `u128` value. + /// + /// The default behavior unconditionally returns an error. + fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + let _ = visitor; + Err(Error::custom("u128 is not supported")) } /// Hint that the `Deserialize` type is expecting a `f32` value. @@ -1158,7 +1155,7 @@ pub trait Deserializer<'de>: Sized { /// human-readable one and binary formats like Postcard will prefer the /// compact one. /// - /// ```edition2018 + /// ```edition2021 /// # use std::ops::Add; /// # use std::str::FromStr; /// # @@ -1225,11 +1222,11 @@ pub trait Deserializer<'de>: Sized { #[doc(hidden)] fn __deserialize_content<V>( self, - _: ::actually_private::T, + _: crate::actually_private::T, visitor: V, - ) -> Result<::private::de::Content<'de>, Self::Error> + ) -> Result<crate::__private::de::Content<'de>, Self::Error> where - V: Visitor<'de, Value = ::private::de::Content<'de>>, + V: Visitor<'de, Value = crate::__private::de::Content<'de>>, { self.deserialize_any(visitor) } @@ -1249,10 +1246,9 @@ pub trait Deserializer<'de>: Sized { /// /// # Example /// -/// ```edition2018 -/// # use std::fmt; -/// # +/// ```edition2021 /// # use serde::de::{self, Unexpected, Visitor}; +/// # use std::fmt; /// # /// /// A visitor that deserializes a long string - a string containing at least /// /// some minimum number of bytes. @@ -1290,7 +1286,7 @@ pub trait Visitor<'de>: Sized { /// "an integer between 0 and 64". The message should not be capitalized and /// should not end with a period. /// - /// ```edition2018 + /// ```edition2021 /// # use std::fmt; /// # /// # struct S { @@ -1363,20 +1359,20 @@ pub trait Visitor<'de>: Sized { Err(Error::invalid_type(Unexpected::Signed(v), &self)) } - serde_if_integer128! { - /// The input contains a `i128`. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default implementation fails with a type error. - fn visit_i128<E>(self, v: i128) -> Result<Self::Value, E> - where - E: Error, - { - let mut buf = [0u8; 58]; - let mut writer = format::Buf::new(&mut buf); - fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as i128", v)).unwrap(); - Err(Error::invalid_type(Unexpected::Other(writer.as_str()), &self)) - } + /// The input contains a `i128`. + /// + /// The default implementation fails with a type error. + fn visit_i128<E>(self, v: i128) -> Result<Self::Value, E> + where + E: Error, + { + let mut buf = [0u8; 58]; + let mut writer = format::Buf::new(&mut buf); + fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as i128", v)).unwrap(); + Err(Error::invalid_type( + Unexpected::Other(writer.as_str()), + &self, + )) } /// The input contains a `u8`. @@ -1425,20 +1421,20 @@ pub trait Visitor<'de>: Sized { Err(Error::invalid_type(Unexpected::Unsigned(v), &self)) } - serde_if_integer128! { - /// The input contains a `u128`. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default implementation fails with a type error. - fn visit_u128<E>(self, v: u128) -> Result<Self::Value, E> - where - E: Error, - { - let mut buf = [0u8; 57]; - let mut writer = format::Buf::new(&mut buf); - fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as u128", v)).unwrap(); - Err(Error::invalid_type(Unexpected::Other(writer.as_str()), &self)) - } + /// The input contains a `u128`. + /// + /// The default implementation fails with a type error. + fn visit_u128<E>(self, v: u128) -> Result<Self::Value, E> + where + E: Error, + { + let mut buf = [0u8; 57]; + let mut writer = format::Buf::new(&mut buf); + fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as u128", v)).unwrap(); + Err(Error::invalid_type( + Unexpected::Other(writer.as_str()), + &self, + )) } /// The input contains an `f32`. @@ -1474,7 +1470,7 @@ pub trait Visitor<'de>: Sized { where E: Error, { - self.visit_str(utf8::encode(v).as_str()) + self.visit_str(v.encode_utf8(&mut [0u8; 4])) } /// The input contains a string. The lifetime of the string is ephemeral and @@ -1529,6 +1525,7 @@ pub trait Visitor<'de>: Sized { /// `String`. #[inline] #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] fn visit_string<E>(self, v: String) -> Result<Self::Value, E> where E: Error, @@ -1551,7 +1548,6 @@ pub trait Visitor<'de>: Sized { where E: Error, { - let _ = v; Err(Error::invalid_type(Unexpected::Bytes(v), &self)) } @@ -1588,6 +1584,7 @@ pub trait Visitor<'de>: Sized { /// The default implementation forwards to `visit_bytes` and then drops the /// `Vec<u8>`. #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E> where E: Error, @@ -1831,9 +1828,9 @@ pub trait MapAccess<'de> { K: DeserializeSeed<'de>, V: DeserializeSeed<'de>, { - match try!(self.next_key_seed(kseed)) { + match tri!(self.next_key_seed(kseed)) { Some(key) => { - let value = try!(self.next_value_seed(vseed)); + let value = tri!(self.next_value_seed(vseed)); Ok(Some((key, value))) } None => Ok(None), @@ -2035,7 +2032,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2075,7 +2072,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2131,7 +2128,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2148,11 +2145,7 @@ pub trait VariantAccess<'de>: Sized { /// # T: DeserializeSeed<'de>, /// # { unimplemented!() } /// # - /// fn tuple_variant<V>( - /// self, - /// _len: usize, - /// _visitor: V, - /// ) -> Result<V::Value, Self::Error> + /// fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value, Self::Error> /// where /// V: Visitor<'de>, /// { @@ -2178,7 +2171,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2238,10 +2231,10 @@ pub trait VariantAccess<'de>: Sized { /// /// # Example /// -/// ```edition2018 +/// ```edition2021 +/// use serde::de::{value, Deserialize, IntoDeserializer}; +/// use serde_derive::Deserialize; /// use std::str::FromStr; -/// use serde::Deserialize; -/// use serde::de::{value, IntoDeserializer}; /// /// #[derive(Deserialize)] /// enum Setting { @@ -2285,12 +2278,12 @@ impl Display for OneOf { 1 => write!(formatter, "`{}`", self.names[0]), 2 => write!(formatter, "`{}` or `{}`", self.names[0], self.names[1]), _ => { - try!(write!(formatter, "one of ")); + tri!(write!(formatter, "one of ")); for (i, alt) in self.names.iter().enumerate() { if i > 0 { - try!(write!(formatter, ", ")); + tri!(write!(formatter, ", ")); } - try!(write!(formatter, "`{}`", alt)); + tri!(write!(formatter, "`{}`", alt)); } Ok(()) } diff --git a/vendor/serde/src/de/seed.rs b/vendor/serde/src/de/seed.rs index 13b7ea4..52fb89d 100644 --- a/vendor/serde/src/de/seed.rs +++ b/vendor/serde/src/de/seed.rs @@ -1,4 +1,4 @@ -use de::{Deserialize, DeserializeSeed, Deserializer}; +use crate::de::{Deserialize, DeserializeSeed, Deserializer}; /// A DeserializeSeed helper for implementing deserialize_in_place Visitors. /// diff --git a/vendor/serde/src/private/size_hint.rs b/vendor/serde/src/de/size_hint.rs similarity index 50% rename from vendor/serde/src/private/size_hint.rs rename to vendor/serde/src/de/size_hint.rs index ca71e61..4a4fe25 100644 --- a/vendor/serde/src/private/size_hint.rs +++ b/vendor/serde/src/de/size_hint.rs @@ -1,4 +1,4 @@ -use lib::*; +use crate::lib::*; pub fn from_bounds<I>(iter: &I) -> Option<usize> where @@ -8,9 +8,17 @@ where } #[cfg(any(feature = "std", feature = "alloc"))] -#[inline] -pub fn cautious(hint: Option<usize>) -> usize { - cmp::min(hint.unwrap_or(0), 4096) +pub fn cautious<Element>(hint: Option<usize>) -> usize { + const MAX_PREALLOC_BYTES: usize = 1024 * 1024; + + if mem::size_of::<Element>() == 0 { + 0 + } else { + cmp::min( + hint.unwrap_or(0), + MAX_PREALLOC_BYTES / mem::size_of::<Element>(), + ) + } } fn helper(bounds: (usize, Option<usize>)) -> Option<usize> { diff --git a/vendor/serde/src/de/utf8.rs b/vendor/serde/src/de/utf8.rs deleted file mode 100644 index 576fd03..0000000 --- a/vendor/serde/src/de/utf8.rs +++ /dev/null @@ -1,46 +0,0 @@ -use lib::*; - -const TAG_CONT: u8 = 0b1000_0000; -const TAG_TWO_B: u8 = 0b1100_0000; -const TAG_THREE_B: u8 = 0b1110_0000; -const TAG_FOUR_B: u8 = 0b1111_0000; -const MAX_ONE_B: u32 = 0x80; -const MAX_TWO_B: u32 = 0x800; -const MAX_THREE_B: u32 = 0x10000; - -#[inline] -pub fn encode(c: char) -> Encode { - let code = c as u32; - let mut buf = [0; 4]; - let pos = if code < MAX_ONE_B { - buf[3] = code as u8; - 3 - } else if code < MAX_TWO_B { - buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B { - buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 1 - } else { - buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 0 - }; - Encode { buf: buf, pos: pos } -} - -pub struct Encode { - buf: [u8; 4], - pos: usize, -} - -impl Encode { - pub fn as_str(&self) -> &str { - str::from_utf8(&self.buf[self.pos..]).unwrap() - } -} diff --git a/vendor/serde/src/de/value.rs b/vendor/serde/src/de/value.rs index 5d88862..b229eba 100644 --- a/vendor/serde/src/de/value.rs +++ b/vendor/serde/src/de/value.rs @@ -1,10 +1,10 @@ //! Building blocks for deserializing basic values using the `IntoDeserializer` //! trait. //! -//! ```edition2018 +//! ```edition2021 +//! use serde::de::{value, Deserialize, IntoDeserializer}; +//! use serde_derive::Deserialize; //! use std::str::FromStr; -//! use serde::Deserialize; -//! use serde::de::{value, IntoDeserializer}; //! //! #[derive(Deserialize)] //! enum Setting { @@ -21,12 +21,11 @@ //! } //! ``` -use lib::*; +use crate::lib::*; use self::private::{First, Second}; -use __private::size_hint; -use de::{self, Deserializer, Expected, IntoDeserializer, SeqAccess, Visitor}; -use ser; +use crate::de::{self, size_hint, Deserializer, Expected, IntoDeserializer, SeqAccess, Visitor}; +use crate::ser; //////////////////////////////////////////////////////////////////////////////// @@ -113,6 +112,7 @@ impl Debug for Error { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl error::Error for Error { fn description(&self) -> &str { &self.err @@ -185,12 +185,14 @@ impl<E> Debug for UnitDeserializer<E> { /// A deserializer that cannot be instantiated. #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] pub struct NeverDeserializer<E> { never: !, marker: PhantomData<E>, } #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] impl<'de, E> IntoDeserializer<'de, E> for ! where E: de::Error, @@ -251,7 +253,7 @@ macro_rules! primitive_deserializer { #[allow(missing_docs)] pub fn new(value: $ty) -> Self { $name { - value: value, + value, marker: PhantomData, } } @@ -293,20 +295,17 @@ primitive_deserializer!(i8, "an `i8`.", I8Deserializer, visit_i8); primitive_deserializer!(i16, "an `i16`.", I16Deserializer, visit_i16); primitive_deserializer!(i32, "an `i32`.", I32Deserializer, visit_i32); primitive_deserializer!(i64, "an `i64`.", I64Deserializer, visit_i64); +primitive_deserializer!(i128, "an `i128`.", I128Deserializer, visit_i128); primitive_deserializer!(isize, "an `isize`.", IsizeDeserializer, visit_i64 as i64); primitive_deserializer!(u8, "a `u8`.", U8Deserializer, visit_u8); primitive_deserializer!(u16, "a `u16`.", U16Deserializer, visit_u16); primitive_deserializer!(u64, "a `u64`.", U64Deserializer, visit_u64); +primitive_deserializer!(u128, "a `u128`.", U128Deserializer, visit_u128); primitive_deserializer!(usize, "a `usize`.", UsizeDeserializer, visit_u64 as u64); primitive_deserializer!(f32, "an `f32`.", F32Deserializer, visit_f32); primitive_deserializer!(f64, "an `f64`.", F64Deserializer, visit_f64); primitive_deserializer!(char, "a `char`.", CharDeserializer, visit_char); -serde_if_integer128! { - primitive_deserializer!(i128, "an `i128`.", I128Deserializer, visit_i128); - primitive_deserializer!(u128, "a `u128`.", U128Deserializer, visit_u128); -} - /// A deserializer holding a `u32`. pub struct U32Deserializer<E> { value: u32, @@ -330,7 +329,7 @@ impl<E> U32Deserializer<E> { #[allow(missing_docs)] pub fn new(value: u32) -> Self { U32Deserializer { - value: value, + value, marker: PhantomData, } } @@ -419,7 +418,7 @@ impl<'a, E> StrDeserializer<'a, E> { #[allow(missing_docs)] pub fn new(value: &'a str) -> Self { StrDeserializer { - value: value, + value, marker: PhantomData, } } @@ -498,7 +497,7 @@ impl<'de, E> BorrowedStrDeserializer<'de, E> { /// Create a new borrowed deserializer from the given string. pub fn new(value: &'de str) -> BorrowedStrDeserializer<'de, E> { BorrowedStrDeserializer { - value: value, + value, marker: PhantomData, } } @@ -566,6 +565,7 @@ impl<'de, E> Debug for BorrowedStrDeserializer<'de, E> { /// A deserializer holding a `String`. #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] pub struct StringDeserializer<E> { value: String, marker: PhantomData<E>, @@ -582,6 +582,7 @@ impl<E> Clone for StringDeserializer<E> { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, E> IntoDeserializer<'de, E> for String where E: de::Error, @@ -598,7 +599,7 @@ impl<E> StringDeserializer<E> { #[allow(missing_docs)] pub fn new(value: String) -> Self { StringDeserializer { - value: value, + value, marker: PhantomData, } } @@ -669,6 +670,7 @@ impl<E> Debug for StringDeserializer<E> { /// A deserializer holding a `Cow<str>`. #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] pub struct CowStrDeserializer<'a, E> { value: Cow<'a, str>, marker: PhantomData<E>, @@ -685,6 +687,7 @@ impl<'a, E> Clone for CowStrDeserializer<'a, E> { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, 'a, E> IntoDeserializer<'de, E> for Cow<'a, str> where E: de::Error, @@ -701,7 +704,7 @@ impl<'a, E> CowStrDeserializer<'a, E> { #[allow(missing_docs)] pub fn new(value: Cow<'a, str>) -> Self { CowStrDeserializer { - value: value, + value, marker: PhantomData, } } @@ -783,7 +786,7 @@ impl<'a, E> BytesDeserializer<'a, E> { /// Create a new deserializer from the given bytes. pub fn new(value: &'a [u8]) -> Self { BytesDeserializer { - value: value, + value, marker: PhantomData, } } @@ -842,7 +845,7 @@ impl<'de, E> BorrowedBytesDeserializer<'de, E> { /// Create a new borrowed deserializer from the given borrowed bytes. pub fn new(value: &'de [u8]) -> Self { BorrowedBytesDeserializer { - value: value, + value, marker: PhantomData, } } @@ -937,8 +940,8 @@ where where V: de::Visitor<'de>, { - let v = try!(visitor.visit_seq(&mut self)); - try!(self.end()); + let v = tri!(visitor.visit_seq(&mut self)); + tri!(self.end()); Ok(v) } @@ -1003,6 +1006,7 @@ where //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, T, E> IntoDeserializer<'de, E> for Vec<T> where T: IntoDeserializer<'de, E>, @@ -1016,6 +1020,7 @@ where } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, T, E> IntoDeserializer<'de, E> for BTreeSet<T> where T: IntoDeserializer<'de, E> + Eq + Ord, @@ -1029,6 +1034,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de, T, S, E> IntoDeserializer<'de, E> for HashSet<T, S> where T: IntoDeserializer<'de, E> + Eq + Hash, @@ -1053,7 +1059,7 @@ pub struct SeqAccessDeserializer<A> { impl<A> SeqAccessDeserializer<A> { /// Construct a new `SeqAccessDeserializer<A>`. pub fn new(seq: A) -> Self { - SeqAccessDeserializer { seq: seq } + SeqAccessDeserializer { seq } } } @@ -1162,8 +1168,8 @@ where where V: de::Visitor<'de>, { - let value = try!(visitor.visit_map(&mut self)); - try!(self.end()); + let value = tri!(visitor.visit_map(&mut self)); + tri!(self.end()); Ok(value) } @@ -1171,8 +1177,8 @@ where where V: de::Visitor<'de>, { - let value = try!(visitor.visit_seq(&mut self)); - try!(self.end()); + let value = tri!(visitor.visit_seq(&mut self)); + tri!(self.end()); Ok(value) } @@ -1236,8 +1242,8 @@ where { match self.next_pair() { Some((key, value)) => { - let key = try!(kseed.deserialize(key.into_deserializer())); - let value = try!(vseed.deserialize(value.into_deserializer())); + let key = tri!(kseed.deserialize(key.into_deserializer())); + let value = tri!(vseed.deserialize(value.into_deserializer())); Ok(Some((key, value))) } None => Ok(None), @@ -1341,7 +1347,7 @@ where V: de::Visitor<'de>, { let mut pair_visitor = PairVisitor(Some(self.0), Some(self.1), PhantomData); - let pair = try!(visitor.visit_seq(&mut pair_visitor)); + let pair = tri!(visitor.visit_seq(&mut pair_visitor)); if pair_visitor.1.is_none() { Ok(pair) } else { @@ -1415,6 +1421,7 @@ impl Expected for ExpectedInMap { //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, K, V, E> IntoDeserializer<'de, E> for BTreeMap<K, V> where K: IntoDeserializer<'de, E> + Eq + Ord, @@ -1429,6 +1436,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de, K, V, S, E> IntoDeserializer<'de, E> for HashMap<K, V, S> where K: IntoDeserializer<'de, E> + Eq + Hash, @@ -1454,7 +1462,7 @@ pub struct MapAccessDeserializer<A> { impl<A> MapAccessDeserializer<A> { /// Construct a new `MapAccessDeserializer<A>`. pub fn new(map: A) -> Self { - MapAccessDeserializer { map: map } + MapAccessDeserializer { map } } } @@ -1501,7 +1509,7 @@ where where T: de::DeserializeSeed<'de>, { - match try!(self.map.next_key_seed(seed)) { + match tri!(self.map.next_key_seed(seed)) { Some(key) => Ok((key, private::map_as_enum(self.map))), None => Err(de::Error::invalid_type(de::Unexpected::Map, &"enum")), } @@ -1519,7 +1527,7 @@ pub struct EnumAccessDeserializer<A> { impl<A> EnumAccessDeserializer<A> { /// Construct a new `EnumAccessDeserializer<A>`. pub fn new(access: A) -> Self { - EnumAccessDeserializer { access: access } + EnumAccessDeserializer { access } } } @@ -1546,9 +1554,11 @@ where //////////////////////////////////////////////////////////////////////////////// mod private { - use lib::*; + use crate::lib::*; - use de::{self, DeserializeSeed, Deserializer, MapAccess, Unexpected, VariantAccess, Visitor}; + use crate::de::{ + self, DeserializeSeed, Deserializer, MapAccess, Unexpected, VariantAccess, Visitor, + }; pub struct UnitOnly<E> { marker: PhantomData<E>, @@ -1613,7 +1623,7 @@ mod private { } pub fn map_as_enum<A>(map: A) -> MapAsEnum<A> { - MapAsEnum { map: map } + MapAsEnum { map } } impl<'de, A> VariantAccess<'de> for MapAsEnum<A> @@ -1637,10 +1647,7 @@ mod private { where V: Visitor<'de>, { - self.map.next_value_seed(SeedTupleVariant { - len: len, - visitor: visitor, - }) + self.map.next_value_seed(SeedTupleVariant { len, visitor }) } fn struct_variant<V>( @@ -1651,8 +1658,7 @@ mod private { where V: Visitor<'de>, { - self.map - .next_value_seed(SeedStructVariant { visitor: visitor }) + self.map.next_value_seed(SeedStructVariant { visitor }) } } diff --git a/vendor/serde/src/integer128.rs b/vendor/serde/src/integer128.rs index 904c2a2..2f94a64 100644 --- a/vendor/serde/src/integer128.rs +++ b/vendor/serde/src/integer128.rs @@ -1,82 +1,9 @@ -/// Conditional compilation depending on whether Serde is built with support for -/// 128-bit integers. -/// -/// Data formats that wish to support Rust compiler versions older than 1.26 -/// (or targets that lack 128-bit integers) may place the i128 / u128 methods -/// of their Serializer and Deserializer behind this macro. -/// -/// Data formats that require a minimum Rust compiler version of at least 1.26, -/// or do not target platforms that lack 128-bit integers, do not need to -/// bother with this macro and may assume support for 128-bit integers. -/// -/// ```edition2018 -/// # use serde::__private::doc::Error; -/// # -/// # struct MySerializer; -/// # -/// use serde::{serde_if_integer128, Serializer}; -/// -/// impl Serializer for MySerializer { -/// type Ok = (); -/// type Error = Error; -/// -/// fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> { -/// /* ... */ -/// # unimplemented!() -/// } -/// -/// /* ... */ -/// -/// serde_if_integer128! { -/// fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> { -/// /* ... */ -/// # unimplemented!() -/// } -/// -/// fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> { -/// /* ... */ -/// # unimplemented!() -/// } -/// } -/// # -/// # serde::__serialize_unimplemented! { -/// # bool i8 i16 i32 u8 u16 u32 u64 f32 f64 char str bytes none some -/// # unit unit_struct unit_variant newtype_struct newtype_variant seq -/// # tuple tuple_struct tuple_variant map struct struct_variant -/// # } -/// } -/// ``` -/// -/// When Serde is built with support for 128-bit integers, this macro expands -/// transparently into just the input tokens. -/// -/// ```edition2018 -/// macro_rules! serde_if_integer128 { -/// ($($tt:tt)*) => { -/// $($tt)* -/// }; -/// } -/// ``` -/// -/// When built without support for 128-bit integers, this macro expands to -/// nothing. -/// -/// ```edition2018 -/// macro_rules! serde_if_integer128 { -/// ($($tt:tt)*) => {}; -/// } -/// ``` -#[cfg(not(no_integer128))] +// No longer used. Old versions of serde used this macro for supporting targets +// that did not yet have 128-bit integer support. #[macro_export] +#[doc(hidden)] macro_rules! serde_if_integer128 { ($($tt:tt)*) => { $($tt)* }; } - -#[cfg(no_integer128)] -#[macro_export] -#[doc(hidden)] -macro_rules! serde_if_integer128 { - ($($tt:tt)*) => {}; -} diff --git a/vendor/serde/src/lib.rs b/vendor/serde/src/lib.rs index 82d42de..b0756af 100644 --- a/vendor/serde/src/lib.rs +++ b/vendor/serde/src/lib.rs @@ -63,6 +63,7 @@ //! and from DynamoDB. //! - [Hjson], a syntax extension to JSON designed around human reading and //! editing. *(deserialization only)* +//! - [CSV], Comma-separated values is a tabular text file format. //! //! [JSON]: https://github.com/serde-rs/json //! [Postcard]: https://github.com/jamesmunns/postcard @@ -89,66 +90,66 @@ //! [DynamoDB Items]: https://docs.rs/serde_dynamo //! [rusoto_dynamodb]: https://docs.rs/rusoto_dynamodb //! [Hjson]: https://github.com/Canop/deser-hjson +//! [CSV]: https://docs.rs/csv //////////////////////////////////////////////////////////////////////////////// // Serde types in rustdoc of other crates get linked to here. -#![doc(html_root_url = "https://docs.rs/serde/1.0.159")] +#![doc(html_root_url = "https://docs.rs/serde/1.0.195")] // Support using Serde without the standard library! #![cfg_attr(not(feature = "std"), no_std)] +// Show which crate feature enables conditionally compiled APIs in documentation. +#![cfg_attr(doc_cfg, feature(doc_cfg))] // Unstable functionality only if the user asks for it. For tracking and // discussion of these features please refer to this issue: // // https://github.com/serde-rs/serde/issues/812 #![cfg_attr(feature = "unstable", feature(error_in_core, never_type))] #![allow(unknown_lints, bare_trait_objects, deprecated)] -#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] // Ignored clippy and clippy_pedantic lints -#![cfg_attr( - feature = "cargo-clippy", - allow( - // clippy bug: https://github.com/rust-lang/rust-clippy/issues/5704 - unnested_or_patterns, - // clippy bug: https://github.com/rust-lang/rust-clippy/issues/7768 - semicolon_if_nothing_returned, - // not available in our oldest supported compiler - empty_enum, - type_repetition_in_bounds, // https://github.com/rust-lang/rust-clippy/issues/8772 - // integer and float ser/de requires these sorts of casts - cast_possible_truncation, - cast_possible_wrap, - cast_sign_loss, - // things are often more readable this way - cast_lossless, - module_name_repetitions, - option_if_let_else, - single_match_else, - type_complexity, - use_self, - zero_prefixed_literal, - // correctly used - derive_partial_eq_without_eq, - enum_glob_use, - explicit_auto_deref, - let_underscore_untyped, - map_err_ignore, - new_without_default, - result_unit_err, - wildcard_imports, - // not practical - needless_pass_by_value, - similar_names, - too_many_lines, - // preference - doc_markdown, - unseparated_literal_suffix, - // false positive - needless_doctest_main, - // noisy - missing_errors_doc, - must_use_candidate, - ) +#![allow( + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/5704 + clippy::unnested_or_patterns, + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/7768 + clippy::semicolon_if_nothing_returned, + // not available in our oldest supported compiler + clippy::empty_enum, + clippy::type_repetition_in_bounds, // https://github.com/rust-lang/rust-clippy/issues/8772 + // integer and float ser/de requires these sorts of casts + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + // things are often more readable this way + clippy::cast_lossless, + clippy::module_name_repetitions, + clippy::single_match_else, + clippy::type_complexity, + clippy::use_self, + clippy::zero_prefixed_literal, + // correctly used + clippy::derive_partial_eq_without_eq, + clippy::enum_glob_use, + clippy::explicit_auto_deref, + clippy::let_underscore_untyped, + clippy::map_err_ignore, + clippy::new_without_default, + clippy::result_unit_err, + clippy::wildcard_imports, + // not practical + clippy::needless_pass_by_value, + clippy::similar_names, + clippy::too_many_lines, + // preference + clippy::doc_markdown, + clippy::unseparated_literal_suffix, + // false positive + clippy::needless_doctest_main, + // noisy + clippy::missing_errors_doc, + clippy::must_use_candidate, )] +// Restrictions +#![deny(clippy::question_mark_used)] // Rustc lints. #![deny(missing_docs, unused_imports)] @@ -168,21 +169,26 @@ mod lib { pub use std::*; } - pub use self::core::{cmp, iter, mem, num, ptr, slice, str}; pub use self::core::{f32, f64}; pub use self::core::{i16, i32, i64, i8, isize}; + pub use self::core::{iter, num, ptr, str}; pub use self::core::{u16, u32, u64, u8, usize}; + #[cfg(any(feature = "std", feature = "alloc"))] + pub use self::core::{cmp, mem, slice}; + pub use self::core::cell::{Cell, RefCell}; pub use self::core::clone::{self, Clone}; + pub use self::core::cmp::Reverse; pub use self::core::convert::{self, From, Into}; pub use self::core::default::{self, Default}; pub use self::core::fmt::{self, Debug, Display}; pub use self::core::marker::{self, PhantomData}; pub use self::core::num::Wrapping; - pub use self::core::ops::Range; + pub use self::core::ops::{Bound, Range, RangeFrom, RangeInclusive, RangeTo}; pub use self::core::option::{self, Option}; pub use self::core::result::{self, Result}; + pub use self::core::time::Duration; #[cfg(all(feature = "alloc", not(feature = "std")))] pub use alloc::borrow::{Cow, ToOwned}; @@ -220,7 +226,7 @@ mod lib { pub use std::collections::{BTreeMap, BTreeSet, BinaryHeap, LinkedList, VecDeque}; #[cfg(all(not(no_core_cstr), not(feature = "std")))] - pub use core::ffi::CStr; + pub use self::core::ffi::CStr; #[cfg(feature = "std")] pub use std::ffi::CStr; @@ -247,18 +253,6 @@ mod lib { #[cfg(feature = "std")] pub use std::time::{SystemTime, UNIX_EPOCH}; - #[cfg(all(feature = "std", not(no_collections_bound), no_ops_bound))] - pub use std::collections::Bound; - - #[cfg(not(no_core_reverse))] - pub use self::core::cmp::Reverse; - - #[cfg(not(no_ops_bound))] - pub use self::core::ops::Bound; - - #[cfg(not(no_range_inclusive))] - pub use self::core::ops::RangeInclusive; - #[cfg(all(feature = "std", no_target_has_atomic, not(no_std_atomic)))] pub use std::sync::atomic::{ AtomicBool, AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, AtomicU8, @@ -279,16 +273,13 @@ mod lib { pub use std::sync::atomic::{AtomicI64, AtomicU64}; #[cfg(all(feature = "std", not(no_target_has_atomic), target_has_atomic = "ptr"))] pub use std::sync::atomic::{AtomicIsize, AtomicUsize}; - - #[cfg(any(feature = "std", not(no_core_duration)))] - pub use self::core::time::Duration; } // None of this crate's error handling needs the `From::from` error conversion // performed implicitly by the `?` operator or the standard library's `try!` // macro. This simplified macro gives a 5.5% improvement in compile time // compared to standard `try!`, and 9% improvement compared to `?`. -macro_rules! try { +macro_rules! tri { ($expr:expr) => { match $expr { Ok(val) => val, @@ -309,20 +300,15 @@ pub mod de; pub mod ser; #[doc(inline)] -pub use de::{Deserialize, Deserializer}; +pub use crate::de::{Deserialize, Deserializer}; #[doc(inline)] -pub use ser::{Serialize, Serializer}; +pub use crate::ser::{Serialize, Serializer}; // Used by generated code and doc tests. Not public API. #[doc(hidden)] #[path = "private/mod.rs"] pub mod __private; -#[allow(unused_imports)] -use self::__private as export; -#[allow(unused_imports)] -use self::__private as private; - #[path = "de/seed.rs"] mod seed; @@ -335,12 +321,11 @@ mod std_error; // be annoying for crates that provide handwritten impls or data formats. They // would need to disable default features and then explicitly re-enable std. #[cfg(feature = "serde_derive")] -#[allow(unused_imports)] -#[macro_use] extern crate serde_derive; /// Derive macro available if serde is built with `features = ["derive"]`. #[cfg(feature = "serde_derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] pub use serde_derive::{Deserialize, Serialize}; #[cfg(all(not(no_serde_derive), any(feature = "std", feature = "alloc")))] diff --git a/vendor/serde/src/macros.rs b/vendor/serde/src/macros.rs index 6502a23..a8fd85a 100644 --- a/vendor/serde/src/macros.rs +++ b/vendor/serde/src/macros.rs @@ -11,7 +11,7 @@ /// input. This requires repetitive implementations of all the [`Deserializer`] /// trait methods. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::forward_to_deserialize_any; /// # use serde::de::{value, Deserializer, Visitor}; /// # @@ -47,7 +47,7 @@ /// methods so that they forward directly to [`Deserializer::deserialize_any`]. /// You can choose which methods to forward. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::forward_to_deserialize_any; /// # use serde::de::{value, Deserializer, Visitor}; /// # @@ -78,11 +78,10 @@ /// called `V`. A different type parameter and a different lifetime can be /// specified explicitly if necessary. /// -/// ```edition2018 -/// # use std::marker::PhantomData; -/// # +/// ```edition2021 /// # use serde::forward_to_deserialize_any; /// # use serde::de::{value, Deserializer, Visitor}; +/// # use std::marker::PhantomData; /// # /// # struct MyDeserializer<V>(PhantomData<V>); /// # @@ -155,9 +154,7 @@ macro_rules! forward_to_deserialize_any_helper { forward_to_deserialize_any_method!{deserialize_i64<$l, $v>()} }; (i128<$l:tt, $v:ident>) => { - serde_if_integer128! { - forward_to_deserialize_any_method!{deserialize_i128<$l, $v>()} - } + forward_to_deserialize_any_method!{deserialize_i128<$l, $v>()} }; (u8<$l:tt, $v:ident>) => { forward_to_deserialize_any_method!{deserialize_u8<$l, $v>()} @@ -172,9 +169,7 @@ macro_rules! forward_to_deserialize_any_helper { forward_to_deserialize_any_method!{deserialize_u64<$l, $v>()} }; (u128<$l:tt, $v:ident>) => { - serde_if_integer128! { - forward_to_deserialize_any_method!{deserialize_u128<$l, $v>()} - } + forward_to_deserialize_any_method!{deserialize_u128<$l, $v>()} }; (f32<$l:tt, $v:ident>) => { forward_to_deserialize_any_method!{deserialize_f32<$l, $v>()} diff --git a/vendor/serde/src/private/de.rs b/vendor/serde/src/private/de.rs index e9c693d..883e690 100644 --- a/vendor/serde/src/private/de.rs +++ b/vendor/serde/src/private/de.rs @@ -1,10 +1,13 @@ -use lib::*; +use crate::lib::*; -use de::value::{BorrowedBytesDeserializer, BytesDeserializer}; -use de::{Deserialize, Deserializer, Error, IntoDeserializer, Visitor}; +use crate::de::value::{BorrowedBytesDeserializer, BytesDeserializer}; +use crate::de::{ + Deserialize, DeserializeSeed, Deserializer, EnumAccess, Error, IntoDeserializer, VariantAccess, + Visitor, +}; #[cfg(any(feature = "std", feature = "alloc"))] -use de::{DeserializeSeed, MapAccess, Unexpected}; +use crate::de::{MapAccess, Unexpected}; #[cfg(any(feature = "std", feature = "alloc"))] pub use self::content::{ @@ -13,7 +16,7 @@ pub use self::content::{ TagOrContentField, TagOrContentFieldVisitor, TaggedContentVisitor, UntaggedUnitVisitor, }; -pub use seed::InPlaceSeed; +pub use crate::seed::InPlaceSeed; /// If the missing field is of type `Option<T>` then treat is as `None`, /// otherwise it is an error. @@ -203,13 +206,13 @@ mod content { // This issue is tracking making some of this stuff public: // https://github.com/serde-rs/serde/issues/741 - use lib::*; + use crate::lib::*; - use __private::size_hint; - use actually_private; - use de::{ - self, Deserialize, DeserializeSeed, Deserializer, EnumAccess, Expected, IgnoredAny, - MapAccess, SeqAccess, Unexpected, Visitor, + use crate::actually_private; + use crate::de::value::{MapDeserializer, SeqDeserializer}; + use crate::de::{ + self, size_hint, Deserialize, DeserializeSeed, Deserializer, EnumAccess, Expected, + IgnoredAny, MapAccess, SeqAccess, Unexpected, Visitor, }; /// Used from generated code to buffer the contents of the Deserializer when @@ -299,6 +302,17 @@ mod content { } } + impl<'de, E> de::IntoDeserializer<'de, E> for Content<'de> + where + E: de::Error, + { + type Deserializer = ContentDeserializer<'de, E>; + + fn into_deserializer(self) -> Self::Deserializer { + ContentDeserializer::new(self) + } + } + struct ContentVisitor<'de> { value: PhantomData<Content<'de>>, } @@ -474,8 +488,9 @@ mod content { where V: SeqAccess<'de>, { - let mut vec = Vec::with_capacity(size_hint::cautious(visitor.size_hint())); - while let Some(e) = try!(visitor.next_element()) { + let mut vec = + Vec::<Content>::with_capacity(size_hint::cautious::<Content>(visitor.size_hint())); + while let Some(e) = tri!(visitor.next_element()) { vec.push(e); } Ok(Content::Seq(vec)) @@ -485,8 +500,11 @@ mod content { where V: MapAccess<'de>, { - let mut vec = Vec::with_capacity(size_hint::cautious(visitor.size_hint())); - while let Some(kv) = try!(visitor.next_entry()) { + let mut vec = + Vec::<(Content, Content)>::with_capacity( + size_hint::cautious::<(Content, Content)>(visitor.size_hint()), + ); + while let Some(kv) = tri!(visitor.next_entry()) { vec.push(kv); } Ok(Content::Map(vec)) @@ -518,7 +536,7 @@ mod content { impl<'de> TagOrContentVisitor<'de> { fn new(name: &'static str) -> Self { TagOrContentVisitor { - name: name, + name, value: PhantomData, } } @@ -797,51 +815,29 @@ mod content { /// Used by generated code to deserialize an internally tagged enum. /// /// Not public API. - pub struct TaggedContent<'de, T> { - pub tag: T, - pub content: Content<'de>, - } - - /// Not public API. - pub struct TaggedContentVisitor<'de, T> { + pub struct TaggedContentVisitor<T> { tag_name: &'static str, expecting: &'static str, - value: PhantomData<TaggedContent<'de, T>>, + value: PhantomData<T>, } - impl<'de, T> TaggedContentVisitor<'de, T> { + impl<T> TaggedContentVisitor<T> { /// Visitor for the content of an internally tagged enum with the given /// tag name. pub fn new(name: &'static str, expecting: &'static str) -> Self { TaggedContentVisitor { tag_name: name, - expecting: expecting, + expecting, value: PhantomData, } } } - impl<'de, T> DeserializeSeed<'de> for TaggedContentVisitor<'de, T> + impl<'de, T> Visitor<'de> for TaggedContentVisitor<T> where T: Deserialize<'de>, { - type Value = TaggedContent<'de, T>; - - fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error> - where - D: Deserializer<'de>, - { - // Internally tagged enums are only supported in self-describing - // formats. - deserializer.deserialize_any(self) - } - } - - impl<'de, T> Visitor<'de> for TaggedContentVisitor<'de, T> - where - T: Deserialize<'de>, - { - type Value = TaggedContent<'de, T>; + type Value = (T, Content<'de>); fn expecting(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.write_str(self.expecting) @@ -851,17 +847,14 @@ mod content { where S: SeqAccess<'de>, { - let tag = match try!(seq.next_element()) { + let tag = match tri!(seq.next_element()) { Some(tag) => tag, None => { return Err(de::Error::missing_field(self.tag_name)); } }; let rest = de::value::SeqAccessDeserializer::new(seq); - Ok(TaggedContent { - tag: tag, - content: try!(Content::deserialize(rest)), - }) + Ok((tag, tri!(Content::deserialize(rest)))) } fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error> @@ -869,27 +862,27 @@ mod content { M: MapAccess<'de>, { let mut tag = None; - let mut vec = Vec::with_capacity(size_hint::cautious(map.size_hint())); - while let Some(k) = try!(map.next_key_seed(TagOrContentVisitor::new(self.tag_name))) { + let mut vec = Vec::<(Content, Content)>::with_capacity(size_hint::cautious::<( + Content, + Content, + )>(map.size_hint())); + while let Some(k) = tri!(map.next_key_seed(TagOrContentVisitor::new(self.tag_name))) { match k { TagOrContent::Tag => { if tag.is_some() { return Err(de::Error::duplicate_field(self.tag_name)); } - tag = Some(try!(map.next_value())); + tag = Some(tri!(map.next_value())); } TagOrContent::Content(k) => { - let v = try!(map.next_value()); + let v = tri!(map.next_value()); vec.push((k, v)); } } } match tag { None => Err(de::Error::missing_field(self.tag_name)), - Some(tag) => Ok(TaggedContent { - tag: tag, - content: Content::Map(vec), - }), + Some(tag) => Ok((tag, Content::Map(vec))), } } } @@ -915,7 +908,7 @@ mod content { where D: Deserializer<'de>, { - deserializer.deserialize_str(self) + deserializer.deserialize_identifier(self) } } @@ -926,6 +919,20 @@ mod content { write!(formatter, "{:?} or {:?}", self.tag, self.content) } + fn visit_u64<E>(self, field_index: u64) -> Result<Self::Value, E> + where + E: de::Error, + { + match field_index { + 0 => Ok(TagOrContentField::Tag), + 1 => Ok(TagOrContentField::Content), + _ => Err(de::Error::invalid_value( + Unexpected::Unsigned(field_index), + &self, + )), + } + } + fn visit_str<E>(self, field: &str) -> Result<Self::Value, E> where E: de::Error, @@ -938,6 +945,19 @@ mod content { Err(de::Error::invalid_value(Unexpected::Str(field), &self)) } } + + fn visit_bytes<E>(self, field: &[u8]) -> Result<Self::Value, E> + where + E: de::Error, + { + if field == self.tag.as_bytes() { + Ok(TagOrContentField::Tag) + } else if field == self.content.as_bytes() { + Ok(TagOrContentField::Content) + } else { + Err(de::Error::invalid_value(Unexpected::Bytes(field), &self)) + } + } } /// Used by generated code to deserialize an adjacently tagged enum when @@ -963,7 +983,7 @@ mod content { where D: Deserializer<'de>, { - deserializer.deserialize_str(self) + deserializer.deserialize_identifier(self) } } @@ -978,13 +998,31 @@ mod content { ) } + fn visit_u64<E>(self, field_index: u64) -> Result<Self::Value, E> + where + E: de::Error, + { + match field_index { + 0 => Ok(TagContentOtherField::Tag), + 1 => Ok(TagContentOtherField::Content), + _ => Ok(TagContentOtherField::Other), + } + } + fn visit_str<E>(self, field: &str) -> Result<Self::Value, E> where E: de::Error, { - if field == self.tag { + self.visit_bytes(field.as_bytes()) + } + + fn visit_bytes<E>(self, field: &[u8]) -> Result<Self::Value, E> + where + E: de::Error, + { + if field == self.tag.as_bytes() { Ok(TagContentOtherField::Tag) - } else if field == self.content { + } else if field == self.content.as_bytes() { Ok(TagContentOtherField::Content) } else { Ok(TagContentOtherField::Other) @@ -1050,9 +1088,9 @@ mod content { E: de::Error, { let seq = content.into_iter().map(ContentDeserializer::new); - let mut seq_visitor = de::value::SeqDeserializer::new(seq); - let value = try!(visitor.visit_seq(&mut seq_visitor)); - try!(seq_visitor.end()); + let mut seq_visitor = SeqDeserializer::new(seq); + let value = tri!(visitor.visit_seq(&mut seq_visitor)); + tri!(seq_visitor.end()); Ok(value) } @@ -1067,9 +1105,9 @@ mod content { let map = content .into_iter() .map(|(k, v)| (ContentDeserializer::new(k), ContentDeserializer::new(v))); - let mut map_visitor = de::value::MapDeserializer::new(map); - let value = try!(visitor.visit_map(&mut map_visitor)); - try!(map_visitor.end()); + let mut map_visitor = MapDeserializer::new(map); + let value = tri!(visitor.visit_map(&mut map_visitor)); + tri!(map_visitor.end()); Ok(value) } @@ -1457,7 +1495,7 @@ mod content { /// private API, don't use pub fn new(content: Content<'de>) -> Self { ContentDeserializer { - content: content, + content, err: PhantomData, } } @@ -1478,8 +1516,8 @@ mod content { { pub fn new(variant: Content<'de>, value: Option<Content<'de>>) -> EnumDeserializer<'de, E> { EnumDeserializer { - variant: variant, - value: value, + variant, + value, err: PhantomData, } } @@ -1545,7 +1583,7 @@ mod content { { match self.value { Some(Content::Seq(v)) => { - de::Deserializer::deserialize_any(SeqDeserializer::new(v), visitor) + de::Deserializer::deserialize_any(SeqDeserializer::new(v.into_iter()), visitor) } Some(other) => Err(de::Error::invalid_type( other.unexpected(), @@ -1568,10 +1606,10 @@ mod content { { match self.value { Some(Content::Map(v)) => { - de::Deserializer::deserialize_any(MapDeserializer::new(v), visitor) + de::Deserializer::deserialize_any(MapDeserializer::new(v.into_iter()), visitor) } Some(Content::Seq(v)) => { - de::Deserializer::deserialize_any(SeqDeserializer::new(v), visitor) + de::Deserializer::deserialize_any(SeqDeserializer::new(v.into_iter()), visitor) } Some(other) => Err(de::Error::invalid_type( other.unexpected(), @@ -1585,156 +1623,6 @@ mod content { } } - struct SeqDeserializer<'de, E> - where - E: de::Error, - { - iter: <Vec<Content<'de>> as IntoIterator>::IntoIter, - err: PhantomData<E>, - } - - impl<'de, E> SeqDeserializer<'de, E> - where - E: de::Error, - { - fn new(vec: Vec<Content<'de>>) -> Self { - SeqDeserializer { - iter: vec.into_iter(), - err: PhantomData, - } - } - } - - impl<'de, E> de::Deserializer<'de> for SeqDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - #[inline] - fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error> - where - V: de::Visitor<'de>, - { - let len = self.iter.len(); - if len == 0 { - visitor.visit_unit() - } else { - let ret = try!(visitor.visit_seq(&mut self)); - let remaining = self.iter.len(); - if remaining == 0 { - Ok(ret) - } else { - Err(de::Error::invalid_length(len, &"fewer elements in array")) - } - } - } - - forward_to_deserialize_any! { - bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string - bytes byte_buf option unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier ignored_any - } - } - - impl<'de, E> de::SeqAccess<'de> for SeqDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error> - where - T: de::DeserializeSeed<'de>, - { - match self.iter.next() { - Some(value) => seed.deserialize(ContentDeserializer::new(value)).map(Some), - None => Ok(None), - } - } - - fn size_hint(&self) -> Option<usize> { - size_hint::from_bounds(&self.iter) - } - } - - struct MapDeserializer<'de, E> - where - E: de::Error, - { - iter: <Vec<(Content<'de>, Content<'de>)> as IntoIterator>::IntoIter, - value: Option<Content<'de>>, - err: PhantomData<E>, - } - - impl<'de, E> MapDeserializer<'de, E> - where - E: de::Error, - { - fn new(map: Vec<(Content<'de>, Content<'de>)>) -> Self { - MapDeserializer { - iter: map.into_iter(), - value: None, - err: PhantomData, - } - } - } - - impl<'de, E> de::MapAccess<'de> for MapDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - fn next_key_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error> - where - T: de::DeserializeSeed<'de>, - { - match self.iter.next() { - Some((key, value)) => { - self.value = Some(value); - seed.deserialize(ContentDeserializer::new(key)).map(Some) - } - None => Ok(None), - } - } - - fn next_value_seed<T>(&mut self, seed: T) -> Result<T::Value, Self::Error> - where - T: de::DeserializeSeed<'de>, - { - match self.value.take() { - Some(value) => seed.deserialize(ContentDeserializer::new(value)), - None => Err(de::Error::custom("value is missing")), - } - } - - fn size_hint(&self) -> Option<usize> { - size_hint::from_bounds(&self.iter) - } - } - - impl<'de, E> de::Deserializer<'de> for MapDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - #[inline] - fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> - where - V: de::Visitor<'de>, - { - visitor.visit_map(self) - } - - forward_to_deserialize_any! { - bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string - bytes byte_buf option unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier ignored_any - } - } - /// Not public API. pub struct ContentRefDeserializer<'a, 'de: 'a, E> { content: &'a Content<'de>, @@ -1796,9 +1684,9 @@ mod content { E: de::Error, { let seq = content.iter().map(ContentRefDeserializer::new); - let mut seq_visitor = de::value::SeqDeserializer::new(seq); - let value = try!(visitor.visit_seq(&mut seq_visitor)); - try!(seq_visitor.end()); + let mut seq_visitor = SeqDeserializer::new(seq); + let value = tri!(visitor.visit_seq(&mut seq_visitor)); + tri!(seq_visitor.end()); Ok(value) } @@ -1816,9 +1704,9 @@ mod content { ContentRefDeserializer::new(v), ) }); - let mut map_visitor = de::value::MapDeserializer::new(map); - let value = try!(visitor.visit_map(&mut map_visitor)); - try!(map_visitor.end()); + let mut map_visitor = MapDeserializer::new(map); + let value = tri!(visitor.visit_map(&mut map_visitor)); + tri!(map_visitor.end()); Ok(value) } @@ -2135,8 +2023,8 @@ mod content { }; visitor.visit_enum(EnumRefDeserializer { - variant: variant, - value: value, + variant, + value, err: PhantomData, }) } @@ -2180,12 +2068,20 @@ mod content { /// private API, don't use pub fn new(content: &'a Content<'de>) -> Self { ContentRefDeserializer { - content: content, + content, err: PhantomData, } } } + impl<'a, 'de: 'a, E> Copy for ContentRefDeserializer<'a, 'de, E> {} + + impl<'a, 'de: 'a, E> Clone for ContentRefDeserializer<'a, 'de, E> { + fn clone(&self) -> Self { + *self + } + } + struct EnumRefDeserializer<'a, 'de: 'a, E> where E: de::Error, @@ -2330,7 +2226,7 @@ mod content { if len == 0 { visitor.visit_unit() } else { - let ret = try!(visitor.visit_seq(&mut self)); + let ret = tri!(visitor.visit_seq(&mut self)); let remaining = self.iter.len(); if remaining == 0 { Ok(ret) @@ -2481,8 +2377,8 @@ mod content { /// Not public API. pub fn new(type_name: &'a str, variant_name: &'a str) -> Self { InternallyTaggedUnitVisitor { - type_name: type_name, - variant_name: variant_name, + type_name, + variant_name, } } } @@ -2509,7 +2405,7 @@ mod content { where M: MapAccess<'de>, { - while try!(access.next_entry::<IgnoredAny, IgnoredAny>()).is_some() {} + while tri!(access.next_entry::<IgnoredAny, IgnoredAny>()).is_some() {} Ok(()) } } @@ -2526,8 +2422,8 @@ mod content { /// Not public API. pub fn new(type_name: &'a str, variant_name: &'a str) -> Self { UntaggedUnitVisitor { - type_name: type_name, - variant_name: variant_name, + type_name, + variant_name, } } } @@ -2731,11 +2627,7 @@ where where V: Visitor<'de>, { - visitor.visit_map(FlatInternallyTaggedAccess { - iter: self.0.iter_mut(), - pending: None, - _marker: PhantomData, - }) + self.deserialize_map(visitor) } fn deserialize_enum<V>( @@ -2747,17 +2639,8 @@ where where V: Visitor<'de>, { - for item in self.0.iter_mut() { - // items in the vector are nulled out when used. So we can only use - // an item if it's still filled in and if the field is one we care - // about. - let use_item = match *item { - None => false, - Some((ref c, _)) => c.as_str().map_or(false, |x| variants.contains(&x)), - }; - - if use_item { - let (key, value) = item.take().unwrap(); + for entry in self.0 { + if let Some((key, value)) = flat_map_take_entry(entry, variants) { return visitor.visit_enum(EnumDeserializer::new(key, Some(value))); } } @@ -2772,7 +2655,11 @@ where where V: Visitor<'de>, { - visitor.visit_map(FlatMapAccess::new(self.0.iter())) + visitor.visit_map(FlatMapAccess { + iter: self.0.iter(), + pending_content: None, + _marker: PhantomData, + }) } fn deserialize_struct<V>( @@ -2784,7 +2671,12 @@ where where V: Visitor<'de>, { - visitor.visit_map(FlatStructAccess::new(self.0.iter_mut(), fields)) + visitor.visit_map(FlatStructAccess { + iter: self.0.iter_mut(), + pending_content: None, + fields, + _marker: PhantomData, + }) } fn deserialize_newtype_struct<V>(self, _name: &str, visitor: V) -> Result<V::Value, Self::Error> @@ -2811,6 +2703,13 @@ where visitor.visit_unit() } + fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + forward_to_deserialize_other! { deserialize_bool() deserialize_i8() @@ -2833,30 +2732,16 @@ where deserialize_tuple(usize) deserialize_tuple_struct(&'static str, usize) deserialize_identifier() - deserialize_ignored_any() } } #[cfg(any(feature = "std", feature = "alloc"))] -pub struct FlatMapAccess<'a, 'de: 'a, E> { +struct FlatMapAccess<'a, 'de: 'a, E> { iter: slice::Iter<'a, Option<(Content<'de>, Content<'de>)>>, pending_content: Option<&'a Content<'de>>, _marker: PhantomData<E>, } -#[cfg(any(feature = "std", feature = "alloc"))] -impl<'a, 'de, E> FlatMapAccess<'a, 'de, E> { - fn new( - iter: slice::Iter<'a, Option<(Content<'de>, Content<'de>)>>, - ) -> FlatMapAccess<'a, 'de, E> { - FlatMapAccess { - iter: iter, - pending_content: None, - _marker: PhantomData, - } - } -} - #[cfg(any(feature = "std", feature = "alloc"))] impl<'a, 'de, E> MapAccess<'de> for FlatMapAccess<'a, 'de, E> where @@ -2871,6 +2756,10 @@ where for item in &mut self.iter { // Items in the vector are nulled out when used by a struct. if let Some((ref key, ref content)) = *item { + // Do not take(), instead borrow this entry. The internally tagged + // enum does its own buffering so we can't tell whether this entry + // is going to be consumed. Borrowing here leaves the entry + // available for later flattened fields. self.pending_content = Some(content); return seed.deserialize(ContentRefDeserializer::new(key)).map(Some); } @@ -2890,28 +2779,13 @@ where } #[cfg(any(feature = "std", feature = "alloc"))] -pub struct FlatStructAccess<'a, 'de: 'a, E> { +struct FlatStructAccess<'a, 'de: 'a, E> { iter: slice::IterMut<'a, Option<(Content<'de>, Content<'de>)>>, pending_content: Option<Content<'de>>, fields: &'static [&'static str], _marker: PhantomData<E>, } -#[cfg(any(feature = "std", feature = "alloc"))] -impl<'a, 'de, E> FlatStructAccess<'a, 'de, E> { - fn new( - iter: slice::IterMut<'a, Option<(Content<'de>, Content<'de>)>>, - fields: &'static [&'static str], - ) -> FlatStructAccess<'a, 'de, E> { - FlatStructAccess { - iter: iter, - pending_content: None, - fields: fields, - _marker: PhantomData, - } - } -} - #[cfg(any(feature = "std", feature = "alloc"))] impl<'a, 'de, E> MapAccess<'de> for FlatStructAccess<'a, 'de, E> where @@ -2923,17 +2797,8 @@ where where T: DeserializeSeed<'de>, { - while let Some(item) = self.iter.next() { - // items in the vector are nulled out when used. So we can only use - // an item if it's still filled in and if the field is one we care - // about. In case we do not know which fields we want, we take them all. - let use_item = match *item { - None => false, - Some((ref c, _)) => c.as_str().map_or(false, |key| self.fields.contains(&key)), - }; - - if use_item { - let (key, content) = item.take().unwrap(); + for entry in self.iter.by_ref() { + if let Some((key, content)) = flat_map_take_entry(entry, self.fields) { self.pending_content = Some(content); return seed.deserialize(ContentDeserializer::new(key)).map(Some); } @@ -2952,44 +2817,76 @@ where } } +/// Claims one key-value pair from a FlatMapDeserializer's field buffer if the +/// field name matches any of the recognized ones. #[cfg(any(feature = "std", feature = "alloc"))] -pub struct FlatInternallyTaggedAccess<'a, 'de: 'a, E> { - iter: slice::IterMut<'a, Option<(Content<'de>, Content<'de>)>>, - pending: Option<&'a Content<'de>>, - _marker: PhantomData<E>, +fn flat_map_take_entry<'de>( + entry: &mut Option<(Content<'de>, Content<'de>)>, + recognized: &[&str], +) -> Option<(Content<'de>, Content<'de>)> { + // Entries in the FlatMapDeserializer buffer are nulled out as they get + // claimed for deserialization. We only use an entry if it is still present + // and if the field is one recognized by the current data structure. + let is_recognized = match entry { + None => false, + Some((k, _v)) => k.as_str().map_or(false, |name| recognized.contains(&name)), + }; + + if is_recognized { + entry.take() + } else { + None + } } -#[cfg(any(feature = "std", feature = "alloc"))] -impl<'a, 'de, E> MapAccess<'de> for FlatInternallyTaggedAccess<'a, 'de, E> +pub struct AdjacentlyTaggedEnumVariantSeed<F> { + pub enum_name: &'static str, + pub variants: &'static [&'static str], + pub fields_enum: PhantomData<F>, +} + +pub struct AdjacentlyTaggedEnumVariantVisitor<F> { + enum_name: &'static str, + fields_enum: PhantomData<F>, +} + +impl<'de, F> Visitor<'de> for AdjacentlyTaggedEnumVariantVisitor<F> where - E: Error, + F: Deserialize<'de>, { - type Error = E; + type Value = F; - fn next_key_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error> + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "variant of enum {}", self.enum_name) + } + + fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error> where - T: DeserializeSeed<'de>, + A: EnumAccess<'de>, { - for item in &mut self.iter { - if let Some((ref key, ref content)) = *item { - // Do not take(), instead borrow this entry. The internally tagged - // enum does its own buffering so we can't tell whether this entry - // is going to be consumed. Borrowing here leaves the entry - // available for later flattened fields. - self.pending = Some(content); - return seed.deserialize(ContentRefDeserializer::new(key)).map(Some); - } - } - Ok(None) + let (variant, variant_access) = tri!(data.variant()); + tri!(variant_access.unit_variant()); + Ok(variant) } +} - fn next_value_seed<T>(&mut self, seed: T) -> Result<T::Value, Self::Error> +impl<'de, F> DeserializeSeed<'de> for AdjacentlyTaggedEnumVariantSeed<F> +where + F: Deserialize<'de>, +{ + type Value = F; + + fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error> where - T: DeserializeSeed<'de>, + D: Deserializer<'de>, { - match self.pending.take() { - Some(value) => seed.deserialize(ContentRefDeserializer::new(value)), - None => panic!("value is missing"), - } + deserializer.deserialize_enum( + self.enum_name, + self.variants, + AdjacentlyTaggedEnumVariantVisitor { + enum_name: self.enum_name, + fields_enum: PhantomData, + }, + ) } } diff --git a/vendor/serde/src/private/doc.rs b/vendor/serde/src/private/doc.rs index f597af8..1b18fe6 100644 --- a/vendor/serde/src/private/doc.rs +++ b/vendor/serde/src/private/doc.rs @@ -1,8 +1,8 @@ // Used only by Serde doc tests. Not public API. -use lib::*; +use crate::lib::*; -use ser; +use crate::ser; #[doc(hidden)] #[derive(Debug)] diff --git a/vendor/serde/src/private/mod.rs b/vendor/serde/src/private/mod.rs index e896902..177f850 100644 --- a/vendor/serde/src/private/mod.rs +++ b/vendor/serde/src/private/mod.rs @@ -3,30 +3,28 @@ pub mod de; #[cfg(not(no_serde_derive))] pub mod ser; -pub mod size_hint; - // FIXME: #[cfg(doctest)] once https://github.com/rust-lang/rust/issues/67295 is fixed. pub mod doc; -pub use lib::clone::Clone; -pub use lib::convert::{From, Into}; -pub use lib::default::Default; -pub use lib::fmt::{self, Formatter}; -pub use lib::marker::PhantomData; -pub use lib::option::Option::{self, None, Some}; -pub use lib::ptr; -pub use lib::result::Result::{self, Err, Ok}; +pub use crate::lib::clone::Clone; +pub use crate::lib::convert::{From, Into}; +pub use crate::lib::default::Default; +pub use crate::lib::fmt::{self, Formatter}; +pub use crate::lib::marker::PhantomData; +pub use crate::lib::option::Option::{self, None, Some}; +pub use crate::lib::ptr; +pub use crate::lib::result::Result::{self, Err, Ok}; pub use self::string::from_utf8_lossy; #[cfg(any(feature = "alloc", feature = "std"))] -pub use lib::{ToString, Vec}; +pub use crate::lib::{ToString, Vec}; #[cfg(not(no_core_try_from))] -pub use lib::convert::TryFrom; +pub use crate::lib::convert::TryFrom; mod string { - use lib::*; + use crate::lib::*; #[cfg(any(feature = "std", feature = "alloc"))] pub fn from_utf8_lossy(bytes: &[u8]) -> Cow<str> { diff --git a/vendor/serde/src/private/ser.rs b/vendor/serde/src/private/ser.rs index 528e8c1..50bcb25 100644 --- a/vendor/serde/src/private/ser.rs +++ b/vendor/serde/src/private/ser.rs @@ -1,6 +1,6 @@ -use lib::*; +use crate::lib::*; -use ser::{self, Impossible, Serialize, SerializeMap, SerializeStruct, Serializer}; +use crate::ser::{self, Impossible, Serialize, SerializeMap, SerializeStruct, Serializer}; #[cfg(any(feature = "std", feature = "alloc"))] use self::content::{ @@ -27,10 +27,10 @@ where T: Serialize, { value.serialize(TaggedSerializer { - type_ident: type_ident, - variant_ident: variant_ident, - tag: tag, - variant_name: variant_name, + type_ident, + variant_ident, + tag, + variant_name, delegate: serializer, }) } @@ -182,14 +182,14 @@ where } fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(1))); - try!(map.serialize_entry(self.tag, self.variant_name)); + let mut map = tri!(self.delegate.serialize_map(Some(1))); + tri!(map.serialize_entry(self.tag, self.variant_name)); map.end() } fn serialize_unit_struct(self, _: &'static str) -> Result<Self::Ok, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(1))); - try!(map.serialize_entry(self.tag, self.variant_name)); + let mut map = tri!(self.delegate.serialize_map(Some(1))); + tri!(map.serialize_entry(self.tag, self.variant_name)); map.end() } @@ -199,9 +199,9 @@ where _: u32, inner_variant: &'static str, ) -> Result<Self::Ok, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_entry(inner_variant, &())); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_entry(inner_variant, &())); map.end() } @@ -226,9 +226,9 @@ where where T: Serialize, { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_entry(inner_variant, inner_value)); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_entry(inner_variant, inner_value)); map.end() } @@ -269,9 +269,9 @@ where inner_variant: &'static str, len: usize, ) -> Result<Self::SerializeTupleVariant, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_key(inner_variant)); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_key(inner_variant)); Ok(SerializeTupleVariantAsMapValue::new( map, inner_variant, @@ -280,8 +280,8 @@ where } fn serialize_map(self, len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { - let mut map = try!(self.delegate.serialize_map(len.map(|len| len + 1))); - try!(map.serialize_entry(self.tag, self.variant_name)); + let mut map = tri!(self.delegate.serialize_map(len.map(|len| len + 1))); + tri!(map.serialize_entry(self.tag, self.variant_name)); Ok(map) } @@ -290,8 +290,8 @@ where name: &'static str, len: usize, ) -> Result<Self::SerializeStruct, Self::Error> { - let mut state = try!(self.delegate.serialize_struct(name, len + 1)); - try!(state.serialize_field(self.tag, self.variant_name)); + let mut state = tri!(self.delegate.serialize_struct(name, len + 1)); + tri!(state.serialize_field(self.tag, self.variant_name)); Ok(state) } @@ -316,9 +316,9 @@ where inner_variant: &'static str, len: usize, ) -> Result<Self::SerializeStructVariant, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_key(inner_variant)); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_key(inner_variant)); Ok(SerializeStructVariantAsMapValue::new( map, inner_variant, @@ -337,9 +337,9 @@ where #[cfg(any(feature = "std", feature = "alloc"))] mod content { - use lib::*; + use crate::lib::*; - use ser::{self, Serialize, Serializer}; + use crate::ser::{self, Serialize, Serializer}; pub struct SerializeTupleVariantAsMapValue<M> { map: M, @@ -350,8 +350,8 @@ mod content { impl<M> SerializeTupleVariantAsMapValue<M> { pub fn new(map: M, name: &'static str, len: usize) -> Self { SerializeTupleVariantAsMapValue { - map: map, - name: name, + map, + name, fields: Vec::with_capacity(len), } } @@ -368,13 +368,13 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<M::Error>::new())); + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); self.fields.push(value); Ok(()) } fn end(mut self) -> Result<M::Ok, M::Error> { - try!(self + tri!(self .map .serialize_value(&Content::TupleStruct(self.name, self.fields))); self.map.end() @@ -390,8 +390,8 @@ mod content { impl<M> SerializeStructVariantAsMapValue<M> { pub fn new(map: M, name: &'static str, len: usize) -> Self { SerializeStructVariantAsMapValue { - map: map, - name: name, + map, + name, fields: Vec::with_capacity(len), } } @@ -412,13 +412,13 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<M::Error>::new())); + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); self.fields.push((key, value)); Ok(()) } fn end(mut self) -> Result<M::Ok, M::Error> { - try!(self + tri!(self .map .serialize_value(&Content::Struct(self.name, self.fields))); self.map.end() @@ -499,50 +499,50 @@ mod content { } Content::Seq(ref elements) => elements.serialize(serializer), Content::Tuple(ref elements) => { - use ser::SerializeTuple; - let mut tuple = try!(serializer.serialize_tuple(elements.len())); + use crate::ser::SerializeTuple; + let mut tuple = tri!(serializer.serialize_tuple(elements.len())); for e in elements { - try!(tuple.serialize_element(e)); + tri!(tuple.serialize_element(e)); } tuple.end() } Content::TupleStruct(n, ref fields) => { - use ser::SerializeTupleStruct; - let mut ts = try!(serializer.serialize_tuple_struct(n, fields.len())); + use crate::ser::SerializeTupleStruct; + let mut ts = tri!(serializer.serialize_tuple_struct(n, fields.len())); for f in fields { - try!(ts.serialize_field(f)); + tri!(ts.serialize_field(f)); } ts.end() } Content::TupleVariant(n, i, v, ref fields) => { - use ser::SerializeTupleVariant; - let mut tv = try!(serializer.serialize_tuple_variant(n, i, v, fields.len())); + use crate::ser::SerializeTupleVariant; + let mut tv = tri!(serializer.serialize_tuple_variant(n, i, v, fields.len())); for f in fields { - try!(tv.serialize_field(f)); + tri!(tv.serialize_field(f)); } tv.end() } Content::Map(ref entries) => { - use ser::SerializeMap; - let mut map = try!(serializer.serialize_map(Some(entries.len()))); + use crate::ser::SerializeMap; + let mut map = tri!(serializer.serialize_map(Some(entries.len()))); for (k, v) in entries { - try!(map.serialize_entry(k, v)); + tri!(map.serialize_entry(k, v)); } map.end() } Content::Struct(n, ref fields) => { - use ser::SerializeStruct; - let mut s = try!(serializer.serialize_struct(n, fields.len())); + use crate::ser::SerializeStruct; + let mut s = tri!(serializer.serialize_struct(n, fields.len())); for &(k, ref v) in fields { - try!(s.serialize_field(k, v)); + tri!(s.serialize_field(k, v)); } s.end() } Content::StructVariant(n, i, v, ref fields) => { - use ser::SerializeStructVariant; - let mut sv = try!(serializer.serialize_struct_variant(n, i, v, fields.len())); + use crate::ser::SerializeStructVariant; + let mut sv = tri!(serializer.serialize_struct_variant(n, i, v, fields.len())); for &(k, ref v) in fields { - try!(sv.serialize_field(k, v)); + tri!(sv.serialize_field(k, v)); } sv.end() } @@ -639,7 +639,7 @@ mod content { where T: Serialize, { - Ok(Content::Some(Box::new(try!(value.serialize(self))))) + Ok(Content::Some(Box::new(tri!(value.serialize(self))))) } fn serialize_unit(self) -> Result<Content, E> { @@ -669,7 +669,7 @@ mod content { { Ok(Content::NewtypeStruct( name, - Box::new(try!(value.serialize(self))), + Box::new(tri!(value.serialize(self))), )) } @@ -687,7 +687,7 @@ mod content { name, variant_index, variant, - Box::new(try!(value.serialize(self))), + Box::new(tri!(value.serialize(self))), )) } @@ -711,7 +711,7 @@ mod content { len: usize, ) -> Result<Self::SerializeTupleStruct, E> { Ok(SerializeTupleStruct { - name: name, + name, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -725,9 +725,9 @@ mod content { len: usize, ) -> Result<Self::SerializeTupleVariant, E> { Ok(SerializeTupleVariant { - name: name, - variant_index: variant_index, - variant: variant, + name, + variant_index, + variant, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -747,7 +747,7 @@ mod content { len: usize, ) -> Result<Self::SerializeStruct, E> { Ok(SerializeStruct { - name: name, + name, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -761,9 +761,9 @@ mod content { len: usize, ) -> Result<Self::SerializeStructVariant, E> { Ok(SerializeStructVariant { - name: name, - variant_index: variant_index, - variant: variant, + name, + variant_index, + variant, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -786,7 +786,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.elements.push(value); Ok(()) } @@ -812,7 +812,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.elements.push(value); Ok(()) } @@ -839,7 +839,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push(value); Ok(()) } @@ -868,7 +868,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push(value); Ok(()) } @@ -900,7 +900,7 @@ mod content { where T: Serialize, { - let key = try!(key.serialize(ContentSerializer::<E>::new())); + let key = tri!(key.serialize(ContentSerializer::<E>::new())); self.key = Some(key); Ok(()) } @@ -913,7 +913,7 @@ mod content { .key .take() .expect("serialize_value called before serialize_key"); - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.entries.push((key, value)); Ok(()) } @@ -927,8 +927,8 @@ mod content { K: Serialize, V: Serialize, { - let key = try!(key.serialize(ContentSerializer::<E>::new())); - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let key = tri!(key.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.entries.push((key, value)); Ok(()) } @@ -951,7 +951,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push((key, value)); Ok(()) } @@ -980,7 +980,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push((key, value)); Ok(()) } @@ -1025,7 +1025,7 @@ where type SerializeTupleStruct = Impossible<Self::Ok, M::Error>; type SerializeMap = FlatMapSerializeMap<'a, M>; type SerializeStruct = FlatMapSerializeStruct<'a, M>; - type SerializeTupleVariant = Impossible<Self::Ok, M::Error>; + type SerializeTupleVariant = FlatMapSerializeTupleVariantAsMapValue<'a, M>; type SerializeStructVariant = FlatMapSerializeStructVariantAsMapValue<'a, M>; fn serialize_bool(self, _: bool) -> Result<Self::Ok, Self::Error> { @@ -1133,7 +1133,7 @@ where where T: Serialize, { - try!(self.0.serialize_key(variant)); + tri!(self.0.serialize_key(variant)); self.0.serialize_value(value) } @@ -1157,10 +1157,11 @@ where self, _: &'static str, _: u32, - _: &'static str, + variant: &'static str, _: usize, ) -> Result<Self::SerializeTupleVariant, Self::Error> { - Err(Self::bad_type(Unsupported::Enum)) + tri!(self.0.serialize_key(variant)); + Ok(FlatMapSerializeTupleVariantAsMapValue::new(self.0)) } fn serialize_map(self, _: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { @@ -1182,7 +1183,7 @@ where inner_variant: &'static str, _: usize, ) -> Result<Self::SerializeStructVariant, Self::Error> { - try!(self.0.serialize_key(inner_variant)); + tri!(self.0.serialize_key(inner_variant)); Ok(FlatMapSerializeStructVariantAsMapValue::new( self.0, inner_variant, @@ -1259,6 +1260,52 @@ where } } +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#[cfg(any(feature = "std", feature = "alloc"))] +pub struct FlatMapSerializeTupleVariantAsMapValue<'a, M: 'a> { + map: &'a mut M, + fields: Vec<Content>, +} + +#[cfg(any(feature = "std", feature = "alloc"))] +impl<'a, M> FlatMapSerializeTupleVariantAsMapValue<'a, M> +where + M: SerializeMap + 'a, +{ + fn new(map: &'a mut M) -> Self { + FlatMapSerializeTupleVariantAsMapValue { + map, + fields: Vec::new(), + } + } +} + +#[cfg(any(feature = "std", feature = "alloc"))] +impl<'a, M> ser::SerializeTupleVariant for FlatMapSerializeTupleVariantAsMapValue<'a, M> +where + M: SerializeMap + 'a, +{ + type Ok = (); + type Error = M::Error; + + fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); + self.fields.push(value); + Ok(()) + } + + fn end(self) -> Result<(), Self::Error> { + tri!(self.map.serialize_value(&Content::Seq(self.fields))); + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + #[cfg(any(feature = "std", feature = "alloc"))] pub struct FlatMapSerializeStructVariantAsMapValue<'a, M: 'a> { map: &'a mut M, @@ -1273,8 +1320,8 @@ where { fn new(map: &'a mut M, name: &'static str) -> FlatMapSerializeStructVariantAsMapValue<'a, M> { FlatMapSerializeStructVariantAsMapValue { - map: map, - name: name, + map, + name, fields: Vec::new(), } } @@ -1296,15 +1343,43 @@ where where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<M::Error>::new())); + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); self.fields.push((key, value)); Ok(()) } fn end(self) -> Result<(), Self::Error> { - try!(self + tri!(self .map .serialize_value(&Content::Struct(self.name, self.fields))); Ok(()) } } + +pub struct AdjacentlyTaggedEnumVariant { + pub enum_name: &'static str, + pub variant_index: u32, + pub variant_name: &'static str, +} + +impl Serialize for AdjacentlyTaggedEnumVariant { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_unit_variant(self.enum_name, self.variant_index, self.variant_name) + } +} + +// Error when Serialize for a non_exhaustive remote enum encounters a variant +// that is not recognized. +pub struct CannotSerializeVariant<T>(pub T); + +impl<T> Display for CannotSerializeVariant<T> +where + T: Debug, +{ + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "enum variant cannot be serialized: {:?}", self.0) + } +} diff --git a/vendor/serde/src/ser/fmt.rs b/vendor/serde/src/ser/fmt.rs index e7e09a1..0650ab6 100644 --- a/vendor/serde/src/ser/fmt.rs +++ b/vendor/serde/src/ser/fmt.rs @@ -1,5 +1,5 @@ -use lib::*; -use ser::{Error, Impossible, Serialize, Serializer}; +use crate::lib::*; +use crate::ser::{Error, Impossible, Serialize, Serializer}; impl Error for fmt::Error { fn custom<T: Display>(_msg: T) -> Self { @@ -17,8 +17,9 @@ macro_rules! fmt_primitives { }; } -/// ```edition2018 -/// use serde::Serialize; +/// ```edition2021 +/// use serde::ser::Serialize; +/// use serde_derive::Serialize; /// use std::fmt::{self, Display}; /// /// #[derive(Serialize)] @@ -51,10 +52,12 @@ impl<'a, 'b> Serializer for &'a mut fmt::Formatter<'b> { serialize_i16: i16, serialize_i32: i32, serialize_i64: i64, + serialize_i128: i128, serialize_u8: u8, serialize_u16: u16, serialize_u32: u32, serialize_u64: u64, + serialize_u128: u128, serialize_f32: f32, serialize_f64: f64, serialize_char: char, @@ -62,13 +65,6 @@ impl<'a, 'b> Serializer for &'a mut fmt::Formatter<'b> { serialize_unit_struct: &'static str, } - serde_if_integer128! { - fmt_primitives! { - serialize_i128: i128, - serialize_u128: u128, - } - } - fn serialize_unit_variant( self, _name: &'static str, diff --git a/vendor/serde/src/ser/impls.rs b/vendor/serde/src/ser/impls.rs index a79326e..8c70634 100644 --- a/vendor/serde/src/ser/impls.rs +++ b/vendor/serde/src/ser/impls.rs @@ -1,6 +1,6 @@ -use lib::*; +use crate::lib::*; -use ser::{Error, Serialize, SerializeTuple, Serializer}; +use crate::ser::{Error, Serialize, SerializeTuple, Serializer}; //////////////////////////////////////////////////////////////////////////////// @@ -24,20 +24,17 @@ primitive_impl!(i8, serialize_i8); primitive_impl!(i16, serialize_i16); primitive_impl!(i32, serialize_i32); primitive_impl!(i64, serialize_i64); +primitive_impl!(i128, serialize_i128); primitive_impl!(usize, serialize_u64 as u64); primitive_impl!(u8, serialize_u8); primitive_impl!(u16, serialize_u16); primitive_impl!(u32, serialize_u32); primitive_impl!(u64, serialize_u64); +primitive_impl!(u128, serialize_u128); primitive_impl!(f32, serialize_f32); primitive_impl!(f64, serialize_f64); primitive_impl!(char, serialize_char); -serde_if_integer128! { - primitive_impl!(i128, serialize_i128); - primitive_impl!(u128, serialize_u128); -} - //////////////////////////////////////////////////////////////////////////////// impl Serialize for str { @@ -51,6 +48,7 @@ impl Serialize for str { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl Serialize for String { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -73,6 +71,7 @@ impl<'a> Serialize for fmt::Arguments<'a> { //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", not(no_core_cstr)))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for CStr { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -84,6 +83,7 @@ impl Serialize for CStr { } #[cfg(any(feature = "std", all(not(no_core_cstr), feature = "alloc")))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl Serialize for CString { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -133,7 +133,7 @@ impl<T> Serialize for [T; 0] { where S: Serializer, { - try!(serializer.serialize_tuple(0)).end() + tri!(serializer.serialize_tuple(0)).end() } } @@ -149,9 +149,9 @@ macro_rules! array_impls { where S: Serializer, { - let mut seq = try!(serializer.serialize_tuple($len)); + let mut seq = tri!(serializer.serialize_tuple($len)); for e in self { - try!(seq.serialize_element(e)); + tri!(seq.serialize_element(e)); } seq.end() } @@ -182,9 +182,13 @@ where } } -#[cfg(all(any(feature = "std", feature = "alloc"), not(no_relaxed_trait_bounds)))] +#[cfg(not(no_relaxed_trait_bounds))] macro_rules! seq_impl { - ($ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<T $(, $typaram)*> Serialize for $ty<T $(, $typaram)*> where T: Serialize, @@ -200,9 +204,13 @@ macro_rules! seq_impl { } } -#[cfg(all(any(feature = "std", feature = "alloc"), no_relaxed_trait_bounds))] +#[cfg(no_relaxed_trait_bounds)] macro_rules! seq_impl { - ($ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<T $(, $typaram)*> Serialize for $ty<T $(, $typaram)*> where T: Serialize $(+ $tbound1 $(+ $tbound2)*)*, @@ -219,23 +227,41 @@ macro_rules! seq_impl { } } -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(BinaryHeap<T: Ord>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + BinaryHeap<T: Ord> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(BTreeSet<T: Ord>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + BTreeSet<T: Ord> +} -#[cfg(feature = "std")] -seq_impl!(HashSet<T: Eq + Hash, H: BuildHasher>); +seq_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + HashSet<T: Eq + Hash, H: BuildHasher> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(LinkedList<T>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + LinkedList<T> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(Vec<T>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + Vec<T> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(VecDeque<T>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + VecDeque<T> +} //////////////////////////////////////////////////////////////////////////////// @@ -248,16 +274,32 @@ where S: Serializer, { use super::SerializeStruct; - let mut state = try!(serializer.serialize_struct("Range", 2)); - try!(state.serialize_field("start", &self.start)); - try!(state.serialize_field("end", &self.end)); + let mut state = tri!(serializer.serialize_struct("Range", 2)); + tri!(state.serialize_field("start", &self.start)); + tri!(state.serialize_field("end", &self.end)); + state.end() + } +} + +//////////////////////////////////////////////////////////////////////////////// + +impl<Idx> Serialize for RangeFrom<Idx> +where + Idx: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + use super::SerializeStruct; + let mut state = tri!(serializer.serialize_struct("RangeFrom", 1)); + tri!(state.serialize_field("start", &self.start)); state.end() } } //////////////////////////////////////////////////////////////////////////////// -#[cfg(not(no_range_inclusive))] impl<Idx> Serialize for RangeInclusive<Idx> where Idx: Serialize, @@ -267,16 +309,32 @@ where S: Serializer, { use super::SerializeStruct; - let mut state = try!(serializer.serialize_struct("RangeInclusive", 2)); - try!(state.serialize_field("start", &self.start())); - try!(state.serialize_field("end", &self.end())); + let mut state = tri!(serializer.serialize_struct("RangeInclusive", 2)); + tri!(state.serialize_field("start", &self.start())); + tri!(state.serialize_field("end", &self.end())); + state.end() + } +} + +//////////////////////////////////////////////////////////////////////////////// + +impl<Idx> Serialize for RangeTo<Idx> +where + Idx: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + use super::SerializeStruct; + let mut state = tri!(serializer.serialize_struct("RangeTo", 1)); + tri!(state.serialize_field("end", &self.end)); state.end() } } //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(not(no_ops_bound), all(feature = "std", not(no_collections_bound))))] impl<T> Serialize for Bound<T> where T: Serialize, @@ -310,6 +368,7 @@ impl Serialize for () { } #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] impl Serialize for ! { fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> where @@ -333,9 +392,9 @@ macro_rules! tuple_impls { where S: Serializer, { - let mut tuple = try!(serializer.serialize_tuple($len)); + let mut tuple = tri!(serializer.serialize_tuple($len)); $( - try!(tuple.serialize_element(&self.$n)); + tri!(tuple.serialize_element(&self.$n)); )+ tuple.end() } @@ -365,9 +424,13 @@ tuple_impls! { //////////////////////////////////////////////////////////////////////////////// -#[cfg(all(any(feature = "std", feature = "alloc"), not(no_relaxed_trait_bounds)))] +#[cfg(not(no_relaxed_trait_bounds))] macro_rules! map_impl { - ($ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<K, V $(, $typaram)*> Serialize for $ty<K, V $(, $typaram)*> where K: Serialize, @@ -384,9 +447,13 @@ macro_rules! map_impl { } } -#[cfg(all(any(feature = "std", feature = "alloc"), no_relaxed_trait_bounds))] +#[cfg(no_relaxed_trait_bounds)] macro_rules! map_impl { - ($ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<K, V $(, $typaram)*> Serialize for $ty<K, V $(, $typaram)*> where K: Serialize $(+ $kbound1 $(+ $kbound2)*)*, @@ -404,20 +471,26 @@ macro_rules! map_impl { } } -#[cfg(any(feature = "std", feature = "alloc"))] -map_impl!(BTreeMap<K: Ord, V>); +map_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + BTreeMap<K: Ord, V> +} -#[cfg(feature = "std")] -map_impl!(HashMap<K: Eq + Hash, V, H: BuildHasher>); +map_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + HashMap<K: Eq + Hash, V, H: BuildHasher> +} //////////////////////////////////////////////////////////////////////////////// macro_rules! deref_impl { ( - $(#[doc = $doc:tt])* + $(#[$attr:meta])* <$($desc:tt)+ ) => { - $(#[doc = $doc])* + $(#[$attr])* impl <$($desc)+ { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -430,13 +503,20 @@ macro_rules! deref_impl { }; } -deref_impl!(<'a, T: ?Sized> Serialize for &'a T where T: Serialize); -deref_impl!(<'a, T: ?Sized> Serialize for &'a mut T where T: Serialize); +deref_impl! { + <'a, T: ?Sized> Serialize for &'a T where T: Serialize +} -#[cfg(any(feature = "std", feature = "alloc"))] -deref_impl!(<T: ?Sized> Serialize for Box<T> where T: Serialize); +deref_impl! { + <'a, T: ?Sized> Serialize for &'a mut T where T: Serialize +} + +deref_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + <T: ?Sized> Serialize for Box<T> where T: Serialize +} -#[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] deref_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -446,10 +526,11 @@ deref_impl! { /// repeated data. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] <T: ?Sized> Serialize for Rc<T> where T: Serialize } -#[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] deref_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -459,11 +540,16 @@ deref_impl! { /// repeated data. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] <T: ?Sized> Serialize for Arc<T> where T: Serialize } -#[cfg(any(feature = "std", feature = "alloc"))] -deref_impl!(<'a, T: ?Sized> Serialize for Cow<'a, T> where T: Serialize + ToOwned); +deref_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + <'a, T: ?Sized> Serialize for Cow<'a, T> where T: Serialize + ToOwned +} //////////////////////////////////////////////////////////////////////////////// @@ -471,6 +557,10 @@ deref_impl!(<'a, T: ?Sized> Serialize for Cow<'a, T> where T: Serialize + ToOwne /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<T: ?Sized> Serialize for RcWeak<T> where T: Serialize, @@ -487,6 +577,10 @@ where /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<T: ?Sized> Serialize for ArcWeak<T> where T: Serialize, @@ -504,7 +598,6 @@ where macro_rules! nonzero_integers { ($($T:ident,)+) => { $( - #[cfg(not(no_num_nonzero))] impl Serialize for num::$T { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -522,6 +615,7 @@ nonzero_integers! { NonZeroU16, NonZeroU32, NonZeroU64, + NonZeroU128, NonZeroUsize, } @@ -531,22 +625,10 @@ nonzero_integers! { NonZeroI16, NonZeroI32, NonZeroI64, + NonZeroI128, NonZeroIsize, } -// Currently 128-bit integers do not work on Emscripten targets so we need an -// additional `#[cfg]` -serde_if_integer128! { - nonzero_integers! { - NonZeroU128, - } - - #[cfg(not(no_num_nonzero_signed))] - nonzero_integers! { - NonZeroI128, - } -} - impl<T> Serialize for Cell<T> where T: Serialize + Copy, @@ -575,6 +657,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<T: ?Sized> Serialize for Mutex<T> where T: Serialize, @@ -591,6 +674,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<T: ?Sized> Serialize for RwLock<T> where T: Serialize, @@ -628,16 +712,15 @@ where //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", not(no_core_duration)))] impl Serialize for Duration { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, { use super::SerializeStruct; - let mut state = try!(serializer.serialize_struct("Duration", 2)); - try!(state.serialize_field("secs", &self.as_secs())); - try!(state.serialize_field("nanos", &self.subsec_nanos())); + let mut state = tri!(serializer.serialize_struct("Duration", 2)); + tri!(state.serialize_field("secs", &self.as_secs())); + tri!(state.serialize_field("nanos", &self.subsec_nanos())); state.end() } } @@ -645,6 +728,7 @@ impl Serialize for Duration { //////////////////////////////////////////////////////////////////////////////// #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for SystemTime { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -655,9 +739,9 @@ impl Serialize for SystemTime { Ok(duration_since_epoch) => duration_since_epoch, Err(_) => return Err(S::Error::custom("SystemTime must be later than UNIX_EPOCH")), }; - let mut state = try!(serializer.serialize_struct("SystemTime", 2)); - try!(state.serialize_field("secs_since_epoch", &duration_since_epoch.as_secs())); - try!(state.serialize_field("nanos_since_epoch", &duration_since_epoch.subsec_nanos())); + let mut state = tri!(serializer.serialize_struct("SystemTime", 2)); + tri!(state.serialize_field("secs_since_epoch", &duration_since_epoch.as_secs())); + tri!(state.serialize_field("nanos_since_epoch", &duration_since_epoch.subsec_nanos())); state.end() } } @@ -689,6 +773,7 @@ macro_rules! serialize_display_bounded_length { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::IpAddr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -713,7 +798,7 @@ impl Serialize for net::IpAddr { } #[cfg(feature = "std")] -const DEC_DIGITS_LUT: &'static [u8] = b"\ +const DEC_DIGITS_LUT: &[u8] = b"\ 0001020304050607080910111213141516171819\ 2021222324252627282930313233343536373839\ 4041424344454647484950515253545556575859\ @@ -759,6 +844,7 @@ fn test_format_u8() { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::Ipv4Addr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -783,6 +869,7 @@ impl Serialize for net::Ipv4Addr { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::Ipv6Addr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -799,6 +886,7 @@ impl Serialize for net::Ipv6Addr { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::SocketAddr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -823,6 +911,7 @@ impl Serialize for net::SocketAddr { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::SocketAddrV4 { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -839,6 +928,7 @@ impl Serialize for net::SocketAddrV4 { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::SocketAddrV6 { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -860,6 +950,7 @@ impl Serialize for net::SocketAddrV6 { //////////////////////////////////////////////////////////////////////////////// #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for Path { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -873,6 +964,7 @@ impl Serialize for Path { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for PathBuf { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -883,6 +975,7 @@ impl Serialize for PathBuf { } #[cfg(all(feature = "std", any(unix, windows)))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] impl Serialize for OsStr { #[cfg(unix)] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -905,6 +998,7 @@ impl Serialize for OsStr { } #[cfg(all(feature = "std", any(unix, windows)))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] impl Serialize for OsString { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -929,7 +1023,6 @@ where } } -#[cfg(not(no_core_reverse))] impl<T> Serialize for Reverse<T> where T: Serialize, @@ -950,6 +1043,7 @@ macro_rules! atomic_impl { ($($ty:ident $size:expr)*) => { $( #[cfg(any(no_target_has_atomic, target_has_atomic = $size))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", target_has_atomic = $size))))] impl Serialize for $ty { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where diff --git a/vendor/serde/src/ser/impossible.rs b/vendor/serde/src/ser/impossible.rs index e8df9ca..479be94 100644 --- a/vendor/serde/src/ser/impossible.rs +++ b/vendor/serde/src/ser/impossible.rs @@ -1,8 +1,8 @@ //! This module contains `Impossible` serializer and its implementations. -use lib::*; +use crate::lib::*; -use ser::{ +use crate::ser::{ self, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, }; @@ -15,7 +15,7 @@ use ser::{ /// [`SerializeTuple`], [`SerializeTupleStruct`], [`SerializeTupleVariant`], /// [`SerializeMap`], [`SerializeStruct`], and [`SerializeStructVariant`]. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::ser::{Serializer, Impossible}; /// # use serde::__private::doc::Error; /// # diff --git a/vendor/serde/src/ser/mod.rs b/vendor/serde/src/ser/mod.rs index 5c45426..75c4514 100644 --- a/vendor/serde/src/ser/mod.rs +++ b/vendor/serde/src/ser/mod.rs @@ -61,8 +61,8 @@ //! - RefCell\<T\> //! - Mutex\<T\> //! - RwLock\<T\> -//! - Rc\<T\> *(if* features = ["rc"] *is enabled)* -//! - Arc\<T\> *(if* features = ["rc"] *is enabled)* +//! - Rc\<T\> *(if* features = \["rc"\] *is enabled)* +//! - Arc\<T\> *(if* features = \["rc"\] *is enabled)* //! - **Collection types**: //! - BTreeMap\<K, V\> //! - BTreeSet\<T\> @@ -107,7 +107,7 @@ //! [derive section of the manual]: https://serde.rs/derive.html //! [data formats]: https://serde.rs/#data-formats -use lib::*; +use crate::lib::*; mod fmt; mod impls; @@ -115,15 +115,15 @@ mod impossible; pub use self::impossible::Impossible; +#[cfg(not(any(feature = "std", feature = "unstable")))] +#[doc(no_inline)] +pub use crate::std_error::Error as StdError; #[cfg(all(feature = "unstable", not(feature = "std")))] -#[doc(inline)] +#[doc(no_inline)] pub use core::error::Error as StdError; #[cfg(feature = "std")] #[doc(no_inline)] pub use std::error::Error as StdError; -#[cfg(not(any(feature = "std", feature = "unstable")))] -#[doc(no_inline)] -pub use std_error::Error as StdError; //////////////////////////////////////////////////////////////////////////////// @@ -149,7 +149,7 @@ macro_rules! declare_error_trait { /// For example, a filesystem [`Path`] may refuse to serialize /// itself if it contains invalid UTF-8 data. /// - /// ```edition2018 + /// ```edition2021 /// # struct Path; /// # /// # impl Path { @@ -221,7 +221,7 @@ pub trait Serialize { /// See the [Implementing `Serialize`] section of the manual for more /// information about how to implement this method. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeStruct, Serializer}; /// /// struct Person { @@ -388,7 +388,7 @@ pub trait Serializer: Sized { /// Serialize a `bool` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -410,7 +410,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `i64` and /// forward to `serialize_i64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -432,7 +432,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `i64` and /// forward to `serialize_i64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -454,7 +454,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `i64` and /// forward to `serialize_i64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -472,7 +472,7 @@ pub trait Serializer: Sized { /// Serialize an `i64` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -488,30 +488,27 @@ pub trait Serializer: Sized { /// ``` fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error>; - serde_if_integer128! { - /// Serialize an `i128` value. - /// - /// ```edition2018 - /// # use serde::Serializer; - /// # - /// # serde::__private_serialize!(); - /// # - /// impl Serialize for i128 { - /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - /// where - /// S: Serializer, - /// { - /// serializer.serialize_i128(*self) - /// } - /// } - /// ``` - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> { - let _ = v; - Err(Error::custom("i128 is not supported")) - } + /// Serialize an `i128` value. + /// + /// ```edition2021 + /// # use serde::Serializer; + /// # + /// # serde::__private_serialize!(); + /// # + /// impl Serialize for i128 { + /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + /// where + /// S: Serializer, + /// { + /// serializer.serialize_i128(*self) + /// } + /// } + /// ``` + /// + /// The default behavior unconditionally returns an error. + fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> { + let _ = v; + Err(Error::custom("i128 is not supported")) } /// Serialize a `u8` value. @@ -520,7 +517,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `u64` and /// forward to `serialize_u64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -542,7 +539,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `u64` and /// forward to `serialize_u64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -564,7 +561,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `u64` and /// forward to `serialize_u64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -582,7 +579,7 @@ pub trait Serializer: Sized { /// Serialize a `u64` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -598,30 +595,27 @@ pub trait Serializer: Sized { /// ``` fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error>; - serde_if_integer128! { - /// Serialize a `u128` value. - /// - /// ```edition2018 - /// # use serde::Serializer; - /// # - /// # serde::__private_serialize!(); - /// # - /// impl Serialize for u128 { - /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - /// where - /// S: Serializer, - /// { - /// serializer.serialize_u128(*self) - /// } - /// } - /// ``` - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> { - let _ = v; - Err(Error::custom("u128 is not supported")) - } + /// Serialize a `u128` value. + /// + /// ```edition2021 + /// # use serde::Serializer; + /// # + /// # serde::__private_serialize!(); + /// # + /// impl Serialize for u128 { + /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + /// where + /// S: Serializer, + /// { + /// serializer.serialize_u128(*self) + /// } + /// } + /// ``` + /// + /// The default behavior unconditionally returns an error. + fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> { + let _ = v; + Err(Error::custom("u128 is not supported")) } /// Serialize an `f32` value. @@ -630,7 +624,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `f64` and /// forward to `serialize_f64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -648,7 +642,7 @@ pub trait Serializer: Sized { /// Serialize an `f64` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -669,7 +663,7 @@ pub trait Serializer: Sized { /// If the format does not support characters, it is reasonable to serialize /// it as a single element `str` or a `u32`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -687,7 +681,7 @@ pub trait Serializer: Sized { /// Serialize a `&str`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -711,7 +705,7 @@ pub trait Serializer: Sized { /// `serialize_seq`. If forwarded, the implementation looks usually just /// like this: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::ser::{Serializer, SerializeSeq}; /// # use serde::__private::doc::Error; /// # @@ -740,7 +734,7 @@ pub trait Serializer: Sized { /// Serialize a [`None`] value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::{Serialize, Serializer}; /// # /// # enum Option<T> { @@ -773,7 +767,7 @@ pub trait Serializer: Sized { /// Serialize a [`Some(T)`] value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::{Serialize, Serializer}; /// # /// # enum Option<T> { @@ -808,7 +802,7 @@ pub trait Serializer: Sized { /// Serialize a `()` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -828,7 +822,7 @@ pub trait Serializer: Sized { /// /// A reasonable implementation would be to forward to `serialize_unit`. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// struct Nothing; @@ -850,7 +844,7 @@ pub trait Serializer: Sized { /// this variant within the enum, and the `variant` is the name of the /// variant. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// enum E { @@ -883,7 +877,7 @@ pub trait Serializer: Sized { /// wrappers around the data they contain. A reasonable implementation would /// be to forward to `value.serialize(self)`. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// struct Millimeters(u8); @@ -911,7 +905,7 @@ pub trait Serializer: Sized { /// this variant within the enum, and the `variant` is the name of the /// variant. The `value` is the data contained within this newtype variant. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// enum E { @@ -949,7 +943,7 @@ pub trait Serializer: Sized { /// not be computable before the sequence is iterated. Some serializers only /// support sequences whose length is known up front. /// - /// ```edition2018 + /// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct Vec<T>(PhantomData<T>); @@ -962,14 +956,14 @@ pub trait Serializer: Sized { /// # /// # impl<'a, T> IntoIterator for &'a Vec<T> { /// # type Item = &'a T; - /// # type IntoIter = Box<Iterator<Item = &'a T>>; + /// # type IntoIter = Box<dyn Iterator<Item = &'a T>>; /// # /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # - /// use serde::ser::{Serialize, Serializer, SerializeSeq}; + /// use serde::ser::{Serialize, SerializeSeq, Serializer}; /// /// impl<T> Serialize for Vec<T> /// where @@ -994,8 +988,8 @@ pub trait Serializer: Sized { /// This call must be followed by zero or more calls to `serialize_element`, /// then a call to `end`. /// - /// ```edition2018 - /// use serde::ser::{Serialize, Serializer, SerializeTuple}; + /// ```edition2021 + /// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// # mod fool { /// # trait Serialize {} @@ -1024,7 +1018,7 @@ pub trait Serializer: Sized { /// } /// ``` /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// const VRAM_SIZE: usize = 386; @@ -1052,7 +1046,7 @@ pub trait Serializer: Sized { /// The `name` is the name of the tuple struct and the `len` is the number /// of data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleStruct, Serializer}; /// /// struct Rgb(u8, u8, u8); @@ -1084,7 +1078,7 @@ pub trait Serializer: Sized { /// this variant within the enum, the `variant` is the name of the variant, /// and the `len` is the number of data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleVariant, Serializer}; /// /// enum E { @@ -1130,7 +1124,7 @@ pub trait Serializer: Sized { /// be computable before the map is iterated. Some serializers only support /// maps whose length is known up front. /// - /// ```edition2018 + /// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct HashMap<K, V>(PhantomData<K>, PhantomData<V>); @@ -1143,14 +1137,14 @@ pub trait Serializer: Sized { /// # /// # impl<'a, K, V> IntoIterator for &'a HashMap<K, V> { /// # type Item = (&'a K, &'a V); - /// # type IntoIter = Box<Iterator<Item = (&'a K, &'a V)>>; + /// # type IntoIter = Box<dyn Iterator<Item = (&'a K, &'a V)>>; /// # /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # - /// use serde::ser::{Serialize, Serializer, SerializeMap}; + /// use serde::ser::{Serialize, SerializeMap, Serializer}; /// /// impl<K, V> Serialize for HashMap<K, V> /// where @@ -1178,7 +1172,7 @@ pub trait Serializer: Sized { /// The `name` is the name of the struct and the `len` is the number of /// data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeStruct, Serializer}; /// /// struct Rgb { @@ -1214,7 +1208,7 @@ pub trait Serializer: Sized { /// this variant within the enum, the `variant` is the name of the variant, /// and the `len` is the number of data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeStructVariant, Serializer}; /// /// enum E { @@ -1256,7 +1250,7 @@ pub trait Serializer: Sized { /// using [`serialize_seq`]. Implementors should not need to override this /// method. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// struct SecretlyOneHigher { @@ -1279,22 +1273,9 @@ pub trait Serializer: Sized { I: IntoIterator, <I as IntoIterator>::Item: Serialize, { - let iter = iter.into_iter(); - let mut serializer = try!(self.serialize_seq(iterator_len_hint(&iter))); - - #[cfg(not(no_iterator_try_fold))] - { - let mut iter = iter; - try!(iter.try_for_each(|item| serializer.serialize_element(&item))); - } - - #[cfg(no_iterator_try_fold)] - { - for item in iter { - try!(serializer.serialize_element(&item)); - } - } - + let mut iter = iter.into_iter(); + let mut serializer = tri!(self.serialize_seq(iterator_len_hint(&iter))); + tri!(iter.try_for_each(|item| serializer.serialize_element(&item))); serializer.end() } @@ -1304,7 +1285,7 @@ pub trait Serializer: Sized { /// using [`serialize_map`]. Implementors should not need to override this /// method. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// use std::collections::BTreeSet; /// @@ -1330,22 +1311,9 @@ pub trait Serializer: Sized { V: Serialize, I: IntoIterator<Item = (K, V)>, { - let iter = iter.into_iter(); - let mut serializer = try!(self.serialize_map(iterator_len_hint(&iter))); - - #[cfg(not(no_iterator_try_fold))] - { - let mut iter = iter; - try!(iter.try_for_each(|(key, value)| serializer.serialize_entry(&key, &value))); - } - - #[cfg(no_iterator_try_fold)] - { - for (key, value) in iter { - try!(serializer.serialize_entry(&key, &value)); - } - } - + let mut iter = iter.into_iter(); + let mut serializer = tri!(self.serialize_map(iterator_len_hint(&iter))); + tri!(iter.try_for_each(|(key, value)| serializer.serialize_entry(&key, &value))); serializer.end() } @@ -1355,7 +1323,7 @@ pub trait Serializer: Sized { /// delegates to [`serialize_str`]. Serializers are encouraged to provide a /// more efficient implementation if possible. /// - /// ```edition2018 + /// ```edition2021 /// # struct DateTime; /// # /// # impl DateTime { @@ -1370,9 +1338,7 @@ pub trait Serializer: Sized { /// where /// S: Serializer, /// { - /// serializer.collect_str(&format_args!("{:?}{:?}", - /// self.naive_local(), - /// self.offset())) + /// serializer.collect_str(&format_args!("{:?}{:?}", self.naive_local(), self.offset())) /// } /// } /// ``` @@ -1393,7 +1359,7 @@ pub trait Serializer: Sized { /// of this method. If no more sensible behavior is possible, the /// implementation is expected to return an error. /// - /// ```edition2018 + /// ```edition2021 /// # struct DateTime; /// # /// # impl DateTime { @@ -1408,9 +1374,7 @@ pub trait Serializer: Sized { /// where /// S: Serializer, /// { - /// serializer.collect_str(&format_args!("{:?}{:?}", - /// self.naive_local(), - /// self.offset())) + /// serializer.collect_str(&format_args!("{:?}{:?}", self.naive_local(), self.offset())) /// } /// } /// ``` @@ -1428,7 +1392,7 @@ pub trait Serializer: Sized { /// human-readable one and binary formats like Postcard will prefer the /// compact one. /// - /// ```edition2018 + /// ```edition2021 /// # use std::fmt::{self, Display}; /// # /// # struct Timestamp; @@ -1477,7 +1441,7 @@ pub trait Serializer: Sized { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct Vec<T>(PhantomData<T>); @@ -1490,13 +1454,13 @@ pub trait Serializer: Sized { /// # /// # impl<'a, T> IntoIterator for &'a Vec<T> { /// # type Item = &'a T; -/// # type IntoIter = Box<Iterator<Item = &'a T>>; +/// # type IntoIter = Box<dyn Iterator<Item = &'a T>>; /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # -/// use serde::ser::{Serialize, Serializer, SerializeSeq}; +/// use serde::ser::{Serialize, SerializeSeq, Serializer}; /// /// impl<T> Serialize for Vec<T> /// where @@ -1541,8 +1505,8 @@ pub trait SerializeSeq { /// /// # Example use /// -/// ```edition2018 -/// use serde::ser::{Serialize, Serializer, SerializeTuple}; +/// ```edition2021 +/// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// # mod fool { /// # trait Serialize {} @@ -1571,7 +1535,7 @@ pub trait SerializeSeq { /// } /// ``` /// -/// ```edition2018 +/// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct Array<T>(PhantomData<T>); @@ -1584,13 +1548,13 @@ pub trait SerializeSeq { /// # /// # impl<'a, T> IntoIterator for &'a Array<T> { /// # type Item = &'a T; -/// # type IntoIter = Box<Iterator<Item = &'a T>>; +/// # type IntoIter = Box<dyn Iterator<Item = &'a T>>; /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # -/// use serde::ser::{Serialize, Serializer, SerializeTuple}; +/// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// # mod fool { /// # trait Serialize {} @@ -1641,7 +1605,7 @@ pub trait SerializeTuple { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleStruct, Serializer}; /// /// struct Rgb(u8, u8, u8); @@ -1686,7 +1650,7 @@ pub trait SerializeTupleStruct { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleVariant, Serializer}; /// /// enum E { @@ -1744,7 +1708,7 @@ pub trait SerializeTupleVariant { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct HashMap<K, V>(PhantomData<K>, PhantomData<V>); @@ -1757,14 +1721,14 @@ pub trait SerializeTupleVariant { /// # /// # impl<'a, K, V> IntoIterator for &'a HashMap<K, V> { /// # type Item = (&'a K, &'a V); -/// # type IntoIter = Box<Iterator<Item = (&'a K, &'a V)>>; +/// # type IntoIter = Box<dyn Iterator<Item = (&'a K, &'a V)>>; /// # /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # -/// use serde::ser::{Serialize, Serializer, SerializeMap}; +/// use serde::ser::{Serialize, SerializeMap, Serializer}; /// /// impl<K, V> Serialize for HashMap<K, V> /// where @@ -1843,7 +1807,7 @@ pub trait SerializeMap { K: Serialize, V: Serialize, { - try!(self.serialize_key(key)); + tri!(self.serialize_key(key)); self.serialize_value(value) } @@ -1855,7 +1819,7 @@ pub trait SerializeMap { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeStruct, Serializer}; /// /// struct Rgb { @@ -1915,7 +1879,7 @@ pub trait SerializeStruct { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeStructVariant, Serializer}; /// /// enum E { diff --git a/vendor/serde/src/std_error.rs b/vendor/serde/src/std_error.rs index 1055e0f..f15a4d7 100644 --- a/vendor/serde/src/std_error.rs +++ b/vendor/serde/src/std_error.rs @@ -1,4 +1,4 @@ -use lib::{Debug, Display}; +use crate::lib::{Debug, Display}; /// Either a re-export of std::error::Error or a new identical trait, depending /// on whether Serde's "std" feature is enabled. @@ -9,7 +9,7 @@ use lib::{Debug, Display}; /// generally provide their error types with a `std::error::Error` impl /// directly: /// -/// ```edition2018 +/// ```edition2021 /// #[derive(Debug)] /// struct MySerError {...} /// @@ -29,7 +29,7 @@ use lib::{Debug, Display}; /// std = ["serde/std"] /// ``` /// -/// ```edition2018 +/// ```edition2021 /// #[cfg(feature = "std")] /// impl std::error::Error for MySerError {} /// ``` @@ -37,7 +37,7 @@ use lib::{Debug, Display}; /// ... or else provide the std Error impl unconditionally via Serde's /// re-export: /// -/// ```edition2018 +/// ```edition2021 /// impl serde::ser::StdError for MySerError {} /// ``` pub trait Error: Debug + Display { diff --git a/vendor/serde_derive/.cargo-checksum.json b/vendor/serde_derive/.cargo-checksum.json new file mode 100644 index 0000000..3e0519d --- /dev/null +++ b/vendor/serde_derive/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"292f527e949377d3d23759746419c447bb8e93603f48d933ef1af34a0bf8b666","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"c3ece10a36d19b4e857a770eaf74a2164d220f55fa11947065a3898c1697ecef","crates-io.md":"56e988ac4944c45f5bf5051e3827892ed8fb817853d99d9df1fff6621108e270","src/bound.rs":"6c5c20785ac95af9480f8d0de35a7e844cc36a16012f6468db148acd03cb15c2","src/de.rs":"c221ab2b94a5d80dccff74a37f3448b3d695656552b452595dc289c73b12fb2b","src/dummy.rs":"9533dfee23f20d92ea75734c739022820c2787ded0d54f459feacdeb770ec912","src/fragment.rs":"6757cb4c3131d4300f093572efc273c4ab5a20e3e1efb54a311dcfa52d0bd6eb","src/internals/ast.rs":"7dc997e4090033bbd1d0bdd870e8bb87b096b7f66cfd02047f6b85ebdd569b12","src/internals/attr.rs":"6584c0a02de0d17993877303f3cc2c1bccf235257632220421f98082d82d387a","src/internals/case.rs":"10c8dda2b32d8c6c6b63cf09cdc63d02375af7e95ecefe8fecb34f93b65191bb","src/internals/check.rs":"d842eb9912fd29311060b67f3bc62c438eb7b5d86093355acb4de7eee02a0ef8","src/internals/ctxt.rs":"83a4e6fbe0e439d578478883594407e03f2f340541be479bdf0b04a202633a37","src/internals/mod.rs":"ed021ca635c18132a0e5c3d90f21b7f65def0a61e946421a30200b5b9ab6ad43","src/internals/receiver.rs":"fe8a480669511b5edcfe71f5dd290cf72ccec54c9016ec85f2ac59dce538077f","src/internals/respan.rs":"899753859c58ce5f532a3ec4584796a52f13ed5a0533191e48c953ba5c1b52ff","src/internals/symbol.rs":"d619e88caa3c7a09b03014257f2b349ee922290062d9b97b4dd19d0e64532690","src/lib.rs":"3fc5a148c35cda8a27bc0a65da41a991d63f8ea0e0d607336082dd0f528750ef","src/pretend.rs":"7facc10a5b805564dd95735ae11118ec17ca6adcc49a59764e7c920e27b9fc4a","src/ser.rs":"e3341471cea9d7e2fb4043e5d1746862beb9a4e25196170879eeac529d460920","src/this.rs":"87818dc80cbb521b51938a653d09daf10aafc220bb10425948de82ad670fcb85"},"package":"46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c"} \ No newline at end of file diff --git a/vendor/serde_derive/Cargo.toml b/vendor/serde_derive/Cargo.toml new file mode 100644 index 0000000..66d752c --- /dev/null +++ b/vendor/serde_derive/Cargo.toml @@ -0,0 +1,59 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +rust-version = "1.56" +name = "serde_derive" +version = "1.0.195" +authors = [ + "Erick Tryzelaar <erick.tryzelaar@gmail.com>", + "David Tolnay <dtolnay@gmail.com>", +] +description = "Macros 1.1 implementation of #[derive(Serialize, Deserialize)]" +homepage = "https://serde.rs" +documentation = "https://serde.rs/derive.html" +readme = "crates-io.md" +keywords = [ + "serde", + "serialization", + "no_std", + "derive", +] +categories = [ + "no-std", + "no-std::no-alloc", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/serde-rs/serde" + +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] +targets = ["x86_64-unknown-linux-gnu"] + +[lib] +name = "serde_derive" +proc-macro = true + +[dependencies.proc-macro2] +version = "1.0.74" + +[dependencies.quote] +version = "1.0.35" + +[dependencies.syn] +version = "2.0.46" + +[dev-dependencies.serde] +version = "1" + +[features] +default = [] +deserialize_in_place = [] diff --git a/vendor/serde_derive/LICENSE-APACHE b/vendor/serde_derive/LICENSE-APACHE new file mode 100644 index 0000000..1b5ec8b --- /dev/null +++ b/vendor/serde_derive/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/serde_derive/LICENSE-MIT b/vendor/serde_derive/LICENSE-MIT new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/vendor/serde_derive/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/serde_derive/README.md b/vendor/serde_derive/README.md new file mode 100644 index 0000000..477fd36 --- /dev/null +++ b/vendor/serde_derive/README.md @@ -0,0 +1,114 @@ +# Serde   [![Build Status]][actions] [![Latest Version]][crates.io] [![serde: rustc 1.31+]][Rust 1.31] [![serde_derive: rustc 1.56+]][Rust 1.56] + +[Build Status]: https://img.shields.io/github/actions/workflow/status/serde-rs/serde/ci.yml?branch=master +[actions]: https://github.com/serde-rs/serde/actions?query=branch%3Amaster +[Latest Version]: https://img.shields.io/crates/v/serde.svg +[crates.io]: https://crates.io/crates/serde +[serde: rustc 1.31+]: https://img.shields.io/badge/serde-rustc_1.31+-lightgray.svg +[serde_derive: rustc 1.56+]: https://img.shields.io/badge/serde_derive-rustc_1.56+-lightgray.svg +[Rust 1.31]: https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html +[Rust 1.56]: https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html + +**Serde is a framework for *ser*ializing and *de*serializing Rust data structures efficiently and generically.** + +--- + +You may be looking for: + +- [An overview of Serde](https://serde.rs/) +- [Data formats supported by Serde](https://serde.rs/#data-formats) +- [Setting up `#[derive(Serialize, Deserialize)]`](https://serde.rs/derive.html) +- [Examples](https://serde.rs/examples.html) +- [API documentation](https://docs.rs/serde) +- [Release notes](https://github.com/serde-rs/serde/releases) + +## Serde in action + +<details> +<summary> +Click to show Cargo.toml. +<a href="https://play.rust-lang.org/?edition=2018&gist=72755f28f99afc95e01d63174b28c1f5" target="_blank">Run this code in the playground.</a> +</summary> + +```toml +[dependencies] + +# The core APIs, including the Serialize and Deserialize traits. Always +# required when using Serde. The "derive" feature is only required when +# using #[derive(Serialize, Deserialize)] to make Serde work with structs +# and enums defined in your crate. +serde = { version = "1.0", features = ["derive"] } + +# Each data format lives in its own crate; the sample code below uses JSON +# but you may be using a different one. +serde_json = "1.0" +``` + +</details> +<p></p> + +```rust +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug)] +struct Point { + x: i32, + y: i32, +} + +fn main() { + let point = Point { x: 1, y: 2 }; + + // Convert the Point to a JSON string. + let serialized = serde_json::to_string(&point).unwrap(); + + // Prints serialized = {"x":1,"y":2} + println!("serialized = {}", serialized); + + // Convert the JSON string back to a Point. + let deserialized: Point = serde_json::from_str(&serialized).unwrap(); + + // Prints deserialized = Point { x: 1, y: 2 } + println!("deserialized = {:?}", deserialized); +} +``` + +## Getting help + +Serde is one of the most widely used Rust libraries so any place that Rustaceans +congregate will be able to help you out. For chat, consider trying the +[#rust-questions] or [#rust-beginners] channels of the unofficial community +Discord (invite: <https://discord.gg/rust-lang-community>), the [#rust-usage] or +[#beginners] channels of the official Rust Project Discord (invite: +<https://discord.gg/rust-lang>), or the [#general][zulip] stream in Zulip. For +asynchronous, consider the [\[rust\] tag on StackOverflow][stackoverflow], the +[/r/rust] subreddit which has a pinned weekly easy questions post, or the Rust +[Discourse forum][discourse]. It's acceptable to file a support issue in this +repo but they tend not to get as many eyes as any of the above and may get +closed without a response after some time. + +[#rust-questions]: https://discord.com/channels/273534239310479360/274215136414400513 +[#rust-beginners]: https://discord.com/channels/273534239310479360/273541522815713281 +[#rust-usage]: https://discord.com/channels/442252698964721669/443150878111694848 +[#beginners]: https://discord.com/channels/442252698964721669/448238009733742612 +[zulip]: https://rust-lang.zulipchat.com/#narrow/stream/122651-general +[stackoverflow]: https://stackoverflow.com/questions/tagged/rust +[/r/rust]: https://www.reddit.com/r/rust +[discourse]: https://users.rust-lang.org + +<br> + +#### License + +<sup> +Licensed under either of <a href="LICENSE-APACHE">Apache License, Version +2.0</a> or <a href="LICENSE-MIT">MIT license</a> at your option. +</sup> + +<br> + +<sub> +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in Serde by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. +</sub> diff --git a/vendor/serde_derive/crates-io.md b/vendor/serde_derive/crates-io.md new file mode 100644 index 0000000..1871003 --- /dev/null +++ b/vendor/serde_derive/crates-io.md @@ -0,0 +1,65 @@ +<!-- Serde readme rendered on crates.io --> + +**Serde is a framework for *ser*ializing and *de*serializing Rust data structures efficiently and generically.** + +--- + +You may be looking for: + +- [An overview of Serde](https://serde.rs/) +- [Data formats supported by Serde](https://serde.rs/#data-formats) +- [Setting up `#[derive(Serialize, Deserialize)]`](https://serde.rs/derive.html) +- [Examples](https://serde.rs/examples.html) +- [API documentation](https://docs.rs/serde) +- [Release notes](https://github.com/serde-rs/serde/releases) + +## Serde in action + +```rust +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug)] +struct Point { + x: i32, + y: i32, +} + +fn main() { + let point = Point { x: 1, y: 2 }; + + // Convert the Point to a JSON string. + let serialized = serde_json::to_string(&point).unwrap(); + + // Prints serialized = {"x":1,"y":2} + println!("serialized = {}", serialized); + + // Convert the JSON string back to a Point. + let deserialized: Point = serde_json::from_str(&serialized).unwrap(); + + // Prints deserialized = Point { x: 1, y: 2 } + println!("deserialized = {:?}", deserialized); +} +``` + +## Getting help + +Serde is one of the most widely used Rust libraries so any place that Rustaceans +congregate will be able to help you out. For chat, consider trying the +[#rust-questions] or [#rust-beginners] channels of the unofficial community +Discord (invite: <https://discord.gg/rust-lang-community>, the [#rust-usage] or +[#beginners] channels of the official Rust Project Discord (invite: +<https://discord.gg/rust-lang>), or the [#general][zulip] stream in Zulip. For +asynchronous, consider the [\[rust\] tag on StackOverflow][stackoverflow], the +[/r/rust] subreddit which has a pinned weekly easy questions post, or the Rust +[Discourse forum][discourse]. It's acceptable to file a support issue in this +repo but they tend not to get as many eyes as any of the above and may get +closed without a response after some time. + +[#rust-questions]: https://discord.com/channels/273534239310479360/274215136414400513 +[#rust-beginners]: https://discord.com/channels/273534239310479360/273541522815713281 +[#rust-usage]: https://discord.com/channels/442252698964721669/443150878111694848 +[#beginners]: https://discord.com/channels/442252698964721669/448238009733742612 +[zulip]: https://rust-lang.zulipchat.com/#narrow/stream/122651-general +[stackoverflow]: https://stackoverflow.com/questions/tagged/rust +[/r/rust]: https://www.reddit.com/r/rust +[discourse]: https://users.rust-lang.org diff --git a/vendor/serde_derive/src/bound.rs b/vendor/serde_derive/src/bound.rs new file mode 100644 index 0000000..fe8ccff --- /dev/null +++ b/vendor/serde_derive/src/bound.rs @@ -0,0 +1,408 @@ +use crate::internals::ast::{Container, Data}; +use crate::internals::{attr, ungroup}; +use proc_macro2::Span; +use std::collections::HashSet; +use syn::punctuated::{Pair, Punctuated}; +use syn::Token; + +// Remove the default from every type parameter because in the generated impls +// they look like associated types: "error: associated type bindings are not +// allowed here". +pub fn without_defaults(generics: &syn::Generics) -> syn::Generics { + syn::Generics { + params: generics + .params + .iter() + .map(|param| match param { + syn::GenericParam::Type(param) => syn::GenericParam::Type(syn::TypeParam { + eq_token: None, + default: None, + ..param.clone() + }), + _ => param.clone(), + }) + .collect(), + ..generics.clone() + } +} + +pub fn with_where_predicates( + generics: &syn::Generics, + predicates: &[syn::WherePredicate], +) -> syn::Generics { + let mut generics = generics.clone(); + generics + .make_where_clause() + .predicates + .extend(predicates.iter().cloned()); + generics +} + +pub fn with_where_predicates_from_fields( + cont: &Container, + generics: &syn::Generics, + from_field: fn(&attr::Field) -> Option<&[syn::WherePredicate]>, +) -> syn::Generics { + let predicates = cont + .data + .all_fields() + .filter_map(|field| from_field(&field.attrs)) + .flat_map(<[syn::WherePredicate]>::to_vec); + + let mut generics = generics.clone(); + generics.make_where_clause().predicates.extend(predicates); + generics +} + +pub fn with_where_predicates_from_variants( + cont: &Container, + generics: &syn::Generics, + from_variant: fn(&attr::Variant) -> Option<&[syn::WherePredicate]>, +) -> syn::Generics { + let variants = match &cont.data { + Data::Enum(variants) => variants, + Data::Struct(_, _) => { + return generics.clone(); + } + }; + + let predicates = variants + .iter() + .filter_map(|variant| from_variant(&variant.attrs)) + .flat_map(<[syn::WherePredicate]>::to_vec); + + let mut generics = generics.clone(); + generics.make_where_clause().predicates.extend(predicates); + generics +} + +// Puts the given bound on any generic type parameters that are used in fields +// for which filter returns true. +// +// For example, the following struct needs the bound `A: Serialize, B: +// Serialize`. +// +// struct S<'b, A, B: 'b, C> { +// a: A, +// b: Option<&'b B> +// #[serde(skip_serializing)] +// c: C, +// } +pub fn with_bound( + cont: &Container, + generics: &syn::Generics, + filter: fn(&attr::Field, Option<&attr::Variant>) -> bool, + bound: &syn::Path, +) -> syn::Generics { + struct FindTyParams<'ast> { + // Set of all generic type parameters on the current struct (A, B, C in + // the example). Initialized up front. + all_type_params: HashSet<syn::Ident>, + + // Set of generic type parameters used in fields for which filter + // returns true (A and B in the example). Filled in as the visitor sees + // them. + relevant_type_params: HashSet<syn::Ident>, + + // Fields whose type is an associated type of one of the generic type + // parameters. + associated_type_usage: Vec<&'ast syn::TypePath>, + } + + impl<'ast> FindTyParams<'ast> { + fn visit_field(&mut self, field: &'ast syn::Field) { + if let syn::Type::Path(ty) = ungroup(&field.ty) { + if let Some(Pair::Punctuated(t, _)) = ty.path.segments.pairs().next() { + if self.all_type_params.contains(&t.ident) { + self.associated_type_usage.push(ty); + } + } + } + self.visit_type(&field.ty); + } + + fn visit_path(&mut self, path: &'ast syn::Path) { + if let Some(seg) = path.segments.last() { + if seg.ident == "PhantomData" { + // Hardcoded exception, because PhantomData<T> implements + // Serialize and Deserialize whether or not T implements it. + return; + } + } + if path.leading_colon.is_none() && path.segments.len() == 1 { + let id = &path.segments[0].ident; + if self.all_type_params.contains(id) { + self.relevant_type_params.insert(id.clone()); + } + } + for segment in &path.segments { + self.visit_path_segment(segment); + } + } + + // Everything below is simply traversing the syntax tree. + + fn visit_type(&mut self, ty: &'ast syn::Type) { + match ty { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + syn::Type::Array(ty) => self.visit_type(&ty.elem), + syn::Type::BareFn(ty) => { + for arg in &ty.inputs { + self.visit_type(&arg.ty); + } + self.visit_return_type(&ty.output); + } + syn::Type::Group(ty) => self.visit_type(&ty.elem), + syn::Type::ImplTrait(ty) => { + for bound in &ty.bounds { + self.visit_type_param_bound(bound); + } + } + syn::Type::Macro(ty) => self.visit_macro(&ty.mac), + syn::Type::Paren(ty) => self.visit_type(&ty.elem), + syn::Type::Path(ty) => { + if let Some(qself) = &ty.qself { + self.visit_type(&qself.ty); + } + self.visit_path(&ty.path); + } + syn::Type::Ptr(ty) => self.visit_type(&ty.elem), + syn::Type::Reference(ty) => self.visit_type(&ty.elem), + syn::Type::Slice(ty) => self.visit_type(&ty.elem), + syn::Type::TraitObject(ty) => { + for bound in &ty.bounds { + self.visit_type_param_bound(bound); + } + } + syn::Type::Tuple(ty) => { + for elem in &ty.elems { + self.visit_type(elem); + } + } + + syn::Type::Infer(_) | syn::Type::Never(_) | syn::Type::Verbatim(_) => {} + + _ => {} + } + } + + fn visit_path_segment(&mut self, segment: &'ast syn::PathSegment) { + self.visit_path_arguments(&segment.arguments); + } + + fn visit_path_arguments(&mut self, arguments: &'ast syn::PathArguments) { + match arguments { + syn::PathArguments::None => {} + syn::PathArguments::AngleBracketed(arguments) => { + for arg in &arguments.args { + match arg { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + syn::GenericArgument::Type(arg) => self.visit_type(arg), + syn::GenericArgument::AssocType(arg) => self.visit_type(&arg.ty), + syn::GenericArgument::Lifetime(_) + | syn::GenericArgument::Const(_) + | syn::GenericArgument::AssocConst(_) + | syn::GenericArgument::Constraint(_) => {} + _ => {} + } + } + } + syn::PathArguments::Parenthesized(arguments) => { + for argument in &arguments.inputs { + self.visit_type(argument); + } + self.visit_return_type(&arguments.output); + } + } + } + + fn visit_return_type(&mut self, return_type: &'ast syn::ReturnType) { + match return_type { + syn::ReturnType::Default => {} + syn::ReturnType::Type(_, output) => self.visit_type(output), + } + } + + fn visit_type_param_bound(&mut self, bound: &'ast syn::TypeParamBound) { + match bound { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + syn::TypeParamBound::Trait(bound) => self.visit_path(&bound.path), + syn::TypeParamBound::Lifetime(_) | syn::TypeParamBound::Verbatim(_) => {} + _ => {} + } + } + + // Type parameter should not be considered used by a macro path. + // + // struct TypeMacro<T> { + // mac: T!(), + // marker: PhantomData<T>, + // } + fn visit_macro(&mut self, _mac: &'ast syn::Macro) {} + } + + let all_type_params = generics + .type_params() + .map(|param| param.ident.clone()) + .collect(); + + let mut visitor = FindTyParams { + all_type_params, + relevant_type_params: HashSet::new(), + associated_type_usage: Vec::new(), + }; + match &cont.data { + Data::Enum(variants) => { + for variant in variants { + let relevant_fields = variant + .fields + .iter() + .filter(|field| filter(&field.attrs, Some(&variant.attrs))); + for field in relevant_fields { + visitor.visit_field(field.original); + } + } + } + Data::Struct(_, fields) => { + for field in fields.iter().filter(|field| filter(&field.attrs, None)) { + visitor.visit_field(field.original); + } + } + } + + let relevant_type_params = visitor.relevant_type_params; + let associated_type_usage = visitor.associated_type_usage; + let new_predicates = generics + .type_params() + .map(|param| param.ident.clone()) + .filter(|id| relevant_type_params.contains(id)) + .map(|id| syn::TypePath { + qself: None, + path: id.into(), + }) + .chain(associated_type_usage.into_iter().cloned()) + .map(|bounded_ty| { + syn::WherePredicate::Type(syn::PredicateType { + lifetimes: None, + // the type parameter that is being bounded e.g. T + bounded_ty: syn::Type::Path(bounded_ty), + colon_token: <Token![:]>::default(), + // the bound e.g. Serialize + bounds: vec![syn::TypeParamBound::Trait(syn::TraitBound { + paren_token: None, + modifier: syn::TraitBoundModifier::None, + lifetimes: None, + path: bound.clone(), + })] + .into_iter() + .collect(), + }) + }); + + let mut generics = generics.clone(); + generics + .make_where_clause() + .predicates + .extend(new_predicates); + generics +} + +pub fn with_self_bound( + cont: &Container, + generics: &syn::Generics, + bound: &syn::Path, +) -> syn::Generics { + let mut generics = generics.clone(); + generics + .make_where_clause() + .predicates + .push(syn::WherePredicate::Type(syn::PredicateType { + lifetimes: None, + // the type that is being bounded e.g. MyStruct<'a, T> + bounded_ty: type_of_item(cont), + colon_token: <Token![:]>::default(), + // the bound e.g. Default + bounds: vec![syn::TypeParamBound::Trait(syn::TraitBound { + paren_token: None, + modifier: syn::TraitBoundModifier::None, + lifetimes: None, + path: bound.clone(), + })] + .into_iter() + .collect(), + })); + generics +} + +pub fn with_lifetime_bound(generics: &syn::Generics, lifetime: &str) -> syn::Generics { + let bound = syn::Lifetime::new(lifetime, Span::call_site()); + let def = syn::LifetimeParam { + attrs: Vec::new(), + lifetime: bound.clone(), + colon_token: None, + bounds: Punctuated::new(), + }; + + let params = Some(syn::GenericParam::Lifetime(def)) + .into_iter() + .chain(generics.params.iter().cloned().map(|mut param| { + match &mut param { + syn::GenericParam::Lifetime(param) => { + param.bounds.push(bound.clone()); + } + syn::GenericParam::Type(param) => { + param + .bounds + .push(syn::TypeParamBound::Lifetime(bound.clone())); + } + syn::GenericParam::Const(_) => {} + } + param + })) + .collect(); + + syn::Generics { + params, + ..generics.clone() + } +} + +fn type_of_item(cont: &Container) -> syn::Type { + syn::Type::Path(syn::TypePath { + qself: None, + path: syn::Path { + leading_colon: None, + segments: vec![syn::PathSegment { + ident: cont.ident.clone(), + arguments: syn::PathArguments::AngleBracketed( + syn::AngleBracketedGenericArguments { + colon2_token: None, + lt_token: <Token![<]>::default(), + args: cont + .generics + .params + .iter() + .map(|param| match param { + syn::GenericParam::Type(param) => { + syn::GenericArgument::Type(syn::Type::Path(syn::TypePath { + qself: None, + path: param.ident.clone().into(), + })) + } + syn::GenericParam::Lifetime(param) => { + syn::GenericArgument::Lifetime(param.lifetime.clone()) + } + syn::GenericParam::Const(_) => { + panic!("Serde does not support const generics yet"); + } + }) + .collect(), + gt_token: <Token![>]>::default(), + }, + ), + }] + .into_iter() + .collect(), + }, + }) +} diff --git a/vendor/serde_derive/src/de.rs b/vendor/serde_derive/src/de.rs new file mode 100644 index 0000000..e3b737c --- /dev/null +++ b/vendor/serde_derive/src/de.rs @@ -0,0 +1,3148 @@ +use crate::fragment::{Expr, Fragment, Match, Stmts}; +use crate::internals::ast::{Container, Data, Field, Style, Variant}; +use crate::internals::{attr, replace_receiver, ungroup, Ctxt, Derive}; +use crate::{bound, dummy, pretend, this}; +use proc_macro2::{Literal, Span, TokenStream}; +use quote::{quote, quote_spanned, ToTokens}; +use std::collections::BTreeSet; +use std::ptr; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::{parse_quote, Ident, Index, Member}; + +pub fn expand_derive_deserialize(input: &mut syn::DeriveInput) -> syn::Result<TokenStream> { + replace_receiver(input); + + let ctxt = Ctxt::new(); + let cont = match Container::from_ast(&ctxt, input, Derive::Deserialize) { + Some(cont) => cont, + None => return Err(ctxt.check().unwrap_err()), + }; + precondition(&ctxt, &cont); + ctxt.check()?; + + let ident = &cont.ident; + let params = Parameters::new(&cont); + let (de_impl_generics, _, ty_generics, where_clause) = split_with_de_lifetime(¶ms); + let body = Stmts(deserialize_body(&cont, ¶ms)); + let delife = params.borrowed.de_lifetime(); + let serde = cont.attrs.serde_path(); + + let impl_block = if let Some(remote) = cont.attrs.remote() { + let vis = &input.vis; + let used = pretend::pretend_used(&cont, params.is_packed); + quote! { + impl #de_impl_generics #ident #ty_generics #where_clause { + #vis fn deserialize<__D>(__deserializer: __D) -> #serde::__private::Result<#remote #ty_generics, __D::Error> + where + __D: #serde::Deserializer<#delife>, + { + #used + #body + } + } + } + } else { + let fn_deserialize_in_place = deserialize_in_place_body(&cont, ¶ms); + + quote! { + #[automatically_derived] + impl #de_impl_generics #serde::Deserialize<#delife> for #ident #ty_generics #where_clause { + fn deserialize<__D>(__deserializer: __D) -> #serde::__private::Result<Self, __D::Error> + where + __D: #serde::Deserializer<#delife>, + { + #body + } + + #fn_deserialize_in_place + } + } + }; + + Ok(dummy::wrap_in_const( + cont.attrs.custom_serde_path(), + impl_block, + )) +} + +fn precondition(cx: &Ctxt, cont: &Container) { + precondition_sized(cx, cont); + precondition_no_de_lifetime(cx, cont); +} + +fn precondition_sized(cx: &Ctxt, cont: &Container) { + if let Data::Struct(_, fields) = &cont.data { + if let Some(last) = fields.last() { + if let syn::Type::Slice(_) = ungroup(last.ty) { + cx.error_spanned_by( + cont.original, + "cannot deserialize a dynamically sized struct", + ); + } + } + } +} + +fn precondition_no_de_lifetime(cx: &Ctxt, cont: &Container) { + if let BorrowedLifetimes::Borrowed(_) = borrowed_lifetimes(cont) { + for param in cont.generics.lifetimes() { + if param.lifetime.to_string() == "'de" { + cx.error_spanned_by( + ¶m.lifetime, + "cannot deserialize when there is a lifetime parameter called 'de", + ); + return; + } + } + } +} + +struct Parameters { + /// Name of the type the `derive` is on. + local: syn::Ident, + + /// Path to the type the impl is for. Either a single `Ident` for local + /// types (does not include generic parameters) or `some::remote::Path` for + /// remote types. + this_type: syn::Path, + + /// Same as `this_type` but using `::<T>` for generic parameters for use in + /// expression position. + this_value: syn::Path, + + /// Generics including any explicit and inferred bounds for the impl. + generics: syn::Generics, + + /// Lifetimes borrowed from the deserializer. These will become bounds on + /// the `'de` lifetime of the deserializer. + borrowed: BorrowedLifetimes, + + /// At least one field has a serde(getter) attribute, implying that the + /// remote type has a private field. + has_getter: bool, + + /// Type has a repr(packed) attribute. + is_packed: bool, +} + +impl Parameters { + fn new(cont: &Container) -> Self { + let local = cont.ident.clone(); + let this_type = this::this_type(cont); + let this_value = this::this_value(cont); + let borrowed = borrowed_lifetimes(cont); + let generics = build_generics(cont, &borrowed); + let has_getter = cont.data.has_getter(); + let is_packed = cont.attrs.is_packed(); + + Parameters { + local, + this_type, + this_value, + generics, + borrowed, + has_getter, + is_packed, + } + } + + /// Type name to use in error messages and `&'static str` arguments to + /// various Deserializer methods. + fn type_name(&self) -> String { + self.this_type.segments.last().unwrap().ident.to_string() + } +} + +// All the generics in the input, plus a bound `T: Deserialize` for each generic +// field type that will be deserialized by us, plus a bound `T: Default` for +// each generic field type that will be set to a default value. +fn build_generics(cont: &Container, borrowed: &BorrowedLifetimes) -> syn::Generics { + let generics = bound::without_defaults(cont.generics); + + let generics = bound::with_where_predicates_from_fields(cont, &generics, attr::Field::de_bound); + + let generics = + bound::with_where_predicates_from_variants(cont, &generics, attr::Variant::de_bound); + + match cont.attrs.de_bound() { + Some(predicates) => bound::with_where_predicates(&generics, predicates), + None => { + let generics = match *cont.attrs.default() { + attr::Default::Default => bound::with_self_bound( + cont, + &generics, + &parse_quote!(_serde::__private::Default), + ), + attr::Default::None | attr::Default::Path(_) => generics, + }; + + let delife = borrowed.de_lifetime(); + let generics = bound::with_bound( + cont, + &generics, + needs_deserialize_bound, + &parse_quote!(_serde::Deserialize<#delife>), + ); + + bound::with_bound( + cont, + &generics, + requires_default, + &parse_quote!(_serde::__private::Default), + ) + } + } +} + +// Fields with a `skip_deserializing` or `deserialize_with` attribute, or which +// belong to a variant with a `skip_deserializing` or `deserialize_with` +// attribute, are not deserialized by us so we do not generate a bound. Fields +// with a `bound` attribute specify their own bound so we do not generate one. +// All other fields may need a `T: Deserialize` bound where T is the type of the +// field. +fn needs_deserialize_bound(field: &attr::Field, variant: Option<&attr::Variant>) -> bool { + !field.skip_deserializing() + && field.deserialize_with().is_none() + && field.de_bound().is_none() + && variant.map_or(true, |variant| { + !variant.skip_deserializing() + && variant.deserialize_with().is_none() + && variant.de_bound().is_none() + }) +} + +// Fields with a `default` attribute (not `default=...`), and fields with a +// `skip_deserializing` attribute that do not also have `default=...`. +fn requires_default(field: &attr::Field, _variant: Option<&attr::Variant>) -> bool { + if let attr::Default::Default = *field.default() { + true + } else { + false + } +} + +enum BorrowedLifetimes { + Borrowed(BTreeSet<syn::Lifetime>), + Static, +} + +impl BorrowedLifetimes { + fn de_lifetime(&self) -> syn::Lifetime { + match *self { + BorrowedLifetimes::Borrowed(_) => syn::Lifetime::new("'de", Span::call_site()), + BorrowedLifetimes::Static => syn::Lifetime::new("'static", Span::call_site()), + } + } + + fn de_lifetime_param(&self) -> Option<syn::LifetimeParam> { + match self { + BorrowedLifetimes::Borrowed(bounds) => Some(syn::LifetimeParam { + attrs: Vec::new(), + lifetime: syn::Lifetime::new("'de", Span::call_site()), + colon_token: None, + bounds: bounds.iter().cloned().collect(), + }), + BorrowedLifetimes::Static => None, + } + } +} + +// The union of lifetimes borrowed by each field of the container. +// +// These turn into bounds on the `'de` lifetime of the Deserialize impl. If +// lifetimes `'a` and `'b` are borrowed but `'c` is not, the impl is: +// +// impl<'de: 'a + 'b, 'a, 'b, 'c> Deserialize<'de> for S<'a, 'b, 'c> +// +// If any borrowed lifetime is `'static`, then `'de: 'static` would be redundant +// and we use plain `'static` instead of `'de`. +fn borrowed_lifetimes(cont: &Container) -> BorrowedLifetimes { + let mut lifetimes = BTreeSet::new(); + for field in cont.data.all_fields() { + if !field.attrs.skip_deserializing() { + lifetimes.extend(field.attrs.borrowed_lifetimes().iter().cloned()); + } + } + if lifetimes.iter().any(|b| b.to_string() == "'static") { + BorrowedLifetimes::Static + } else { + BorrowedLifetimes::Borrowed(lifetimes) + } +} + +fn deserialize_body(cont: &Container, params: &Parameters) -> Fragment { + if cont.attrs.transparent() { + deserialize_transparent(cont, params) + } else if let Some(type_from) = cont.attrs.type_from() { + deserialize_from(type_from) + } else if let Some(type_try_from) = cont.attrs.type_try_from() { + deserialize_try_from(type_try_from) + } else if let attr::Identifier::No = cont.attrs.identifier() { + match &cont.data { + Data::Enum(variants) => deserialize_enum(params, variants, &cont.attrs), + Data::Struct(Style::Struct, fields) => { + deserialize_struct(params, fields, &cont.attrs, StructForm::Struct) + } + Data::Struct(Style::Tuple, fields) | Data::Struct(Style::Newtype, fields) => { + deserialize_tuple(params, fields, &cont.attrs, TupleForm::Tuple) + } + Data::Struct(Style::Unit, _) => deserialize_unit_struct(params, &cont.attrs), + } + } else { + match &cont.data { + Data::Enum(variants) => deserialize_custom_identifier(params, variants, &cont.attrs), + Data::Struct(_, _) => unreachable!("checked in serde_derive_internals"), + } + } +} + +#[cfg(feature = "deserialize_in_place")] +fn deserialize_in_place_body(cont: &Container, params: &Parameters) -> Option<Stmts> { + // Only remote derives have getters, and we do not generate + // deserialize_in_place for remote derives. + assert!(!params.has_getter); + + if cont.attrs.transparent() + || cont.attrs.type_from().is_some() + || cont.attrs.type_try_from().is_some() + || cont.attrs.identifier().is_some() + || cont + .data + .all_fields() + .all(|f| f.attrs.deserialize_with().is_some()) + { + return None; + } + + let code = match &cont.data { + Data::Struct(Style::Struct, fields) => { + deserialize_struct_in_place(params, fields, &cont.attrs)? + } + Data::Struct(Style::Tuple, fields) | Data::Struct(Style::Newtype, fields) => { + deserialize_tuple_in_place(params, fields, &cont.attrs) + } + Data::Enum(_) | Data::Struct(Style::Unit, _) => { + return None; + } + }; + + let delife = params.borrowed.de_lifetime(); + let stmts = Stmts(code); + + let fn_deserialize_in_place = quote_block! { + fn deserialize_in_place<__D>(__deserializer: __D, __place: &mut Self) -> _serde::__private::Result<(), __D::Error> + where + __D: _serde::Deserializer<#delife>, + { + #stmts + } + }; + + Some(Stmts(fn_deserialize_in_place)) +} + +#[cfg(not(feature = "deserialize_in_place"))] +fn deserialize_in_place_body(_cont: &Container, _params: &Parameters) -> Option<Stmts> { + None +} + +fn deserialize_transparent(cont: &Container, params: &Parameters) -> Fragment { + let fields = match &cont.data { + Data::Struct(_, fields) => fields, + Data::Enum(_) => unreachable!(), + }; + + let this_value = ¶ms.this_value; + let transparent_field = fields.iter().find(|f| f.attrs.transparent()).unwrap(); + + let path = match transparent_field.attrs.deserialize_with() { + Some(path) => quote!(#path), + None => { + let span = transparent_field.original.span(); + quote_spanned!(span=> _serde::Deserialize::deserialize) + } + }; + + let assign = fields.iter().map(|field| { + let member = &field.member; + if ptr::eq(field, transparent_field) { + quote!(#member: __transparent) + } else { + let value = match field.attrs.default() { + attr::Default::Default => quote!(_serde::__private::Default::default()), + attr::Default::Path(path) => quote!(#path()), + attr::Default::None => quote!(_serde::__private::PhantomData), + }; + quote!(#member: #value) + } + }); + + quote_block! { + _serde::__private::Result::map( + #path(__deserializer), + |__transparent| #this_value { #(#assign),* }) + } +} + +fn deserialize_from(type_from: &syn::Type) -> Fragment { + quote_block! { + _serde::__private::Result::map( + <#type_from as _serde::Deserialize>::deserialize(__deserializer), + _serde::__private::From::from) + } +} + +fn deserialize_try_from(type_try_from: &syn::Type) -> Fragment { + quote_block! { + _serde::__private::Result::and_then( + <#type_try_from as _serde::Deserialize>::deserialize(__deserializer), + |v| _serde::__private::TryFrom::try_from(v).map_err(_serde::de::Error::custom)) + } +} + +fn deserialize_unit_struct(params: &Parameters, cattrs: &attr::Container) -> Fragment { + let this_type = ¶ms.this_type; + let this_value = ¶ms.this_value; + let type_name = cattrs.name().deserialize_name(); + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + let expecting = format!("unit struct {}", params.type_name()); + let expecting = cattrs.expecting().unwrap_or(&expecting); + + quote_block! { + #[doc(hidden)] + struct __Visitor #de_impl_generics #where_clause { + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::de::Visitor<#delife> for __Visitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + #[inline] + fn visit_unit<__E>(self) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(#this_value) + } + } + + _serde::Deserializer::deserialize_unit_struct( + __deserializer, + #type_name, + __Visitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + }, + ) + } +} + +enum TupleForm<'a> { + Tuple, + /// Contains a variant name + ExternallyTagged(&'a syn::Ident), + /// Contains a variant name and an intermediate deserializer from which actual + /// deserialization will be performed + Untagged(&'a syn::Ident, TokenStream), +} + +fn deserialize_tuple( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, + form: TupleForm, +) -> Fragment { + assert!(!cattrs.has_flatten()); + + let field_count = fields + .iter() + .filter(|field| !field.attrs.skip_deserializing()) + .count(); + + let this_type = ¶ms.this_type; + let this_value = ¶ms.this_value; + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + // If there are getters (implying private fields), construct the local type + // and use an `Into` conversion to get the remote type. If there are no + // getters then construct the target type directly. + let construct = if params.has_getter { + let local = ¶ms.local; + quote!(#local) + } else { + quote!(#this_value) + }; + + let type_path = match form { + TupleForm::Tuple => construct, + TupleForm::ExternallyTagged(variant_ident) | TupleForm::Untagged(variant_ident, _) => { + quote!(#construct::#variant_ident) + } + }; + let expecting = match form { + TupleForm::Tuple => format!("tuple struct {}", params.type_name()), + TupleForm::ExternallyTagged(variant_ident) | TupleForm::Untagged(variant_ident, _) => { + format!("tuple variant {}::{}", params.type_name(), variant_ident) + } + }; + let expecting = cattrs.expecting().unwrap_or(&expecting); + + let nfields = fields.len(); + + let visit_newtype_struct = match form { + TupleForm::Tuple if nfields == 1 => { + Some(deserialize_newtype_struct(&type_path, params, &fields[0])) + } + _ => None, + }; + + let visit_seq = Stmts(deserialize_seq( + &type_path, params, fields, false, cattrs, expecting, + )); + + let visitor_expr = quote! { + __Visitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + } + }; + let dispatch = match form { + TupleForm::Tuple if nfields == 1 => { + let type_name = cattrs.name().deserialize_name(); + quote! { + _serde::Deserializer::deserialize_newtype_struct(__deserializer, #type_name, #visitor_expr) + } + } + TupleForm::Tuple => { + let type_name = cattrs.name().deserialize_name(); + quote! { + _serde::Deserializer::deserialize_tuple_struct(__deserializer, #type_name, #field_count, #visitor_expr) + } + } + TupleForm::ExternallyTagged(_) => quote! { + _serde::de::VariantAccess::tuple_variant(__variant, #field_count, #visitor_expr) + }, + TupleForm::Untagged(_, deserializer) => quote! { + _serde::Deserializer::deserialize_tuple(#deserializer, #field_count, #visitor_expr) + }, + }; + + let visitor_var = if field_count == 0 { + quote!(_) + } else { + quote!(mut __seq) + }; + + quote_block! { + #[doc(hidden)] + struct __Visitor #de_impl_generics #where_clause { + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::de::Visitor<#delife> for __Visitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + #visit_newtype_struct + + #[inline] + fn visit_seq<__A>(self, #visitor_var: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::SeqAccess<#delife>, + { + #visit_seq + } + } + + #dispatch + } +} + +#[cfg(feature = "deserialize_in_place")] +fn deserialize_tuple_in_place( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, +) -> Fragment { + assert!(!cattrs.has_flatten()); + + let field_count = fields + .iter() + .filter(|field| !field.attrs.skip_deserializing()) + .count(); + + let this_type = ¶ms.this_type; + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + let expecting = format!("tuple struct {}", params.type_name()); + let expecting = cattrs.expecting().unwrap_or(&expecting); + + let nfields = fields.len(); + + let visit_newtype_struct = if nfields == 1 { + // We do not generate deserialize_in_place if every field has a + // deserialize_with. + assert!(fields[0].attrs.deserialize_with().is_none()); + + Some(quote! { + #[inline] + fn visit_newtype_struct<__E>(self, __e: __E) -> _serde::__private::Result<Self::Value, __E::Error> + where + __E: _serde::Deserializer<#delife>, + { + _serde::Deserialize::deserialize_in_place(__e, &mut self.place.0) + } + }) + } else { + None + }; + + let visit_seq = Stmts(deserialize_seq_in_place(params, fields, cattrs, expecting)); + + let visitor_expr = quote! { + __Visitor { + place: __place, + lifetime: _serde::__private::PhantomData, + } + }; + + let type_name = cattrs.name().deserialize_name(); + let dispatch = if nfields == 1 { + quote!(_serde::Deserializer::deserialize_newtype_struct(__deserializer, #type_name, #visitor_expr)) + } else { + quote!(_serde::Deserializer::deserialize_tuple_struct(__deserializer, #type_name, #field_count, #visitor_expr)) + }; + + let visitor_var = if field_count == 0 { + quote!(_) + } else { + quote!(mut __seq) + }; + + let in_place_impl_generics = de_impl_generics.in_place(); + let in_place_ty_generics = de_ty_generics.in_place(); + let place_life = place_lifetime(); + + quote_block! { + #[doc(hidden)] + struct __Visitor #in_place_impl_generics #where_clause { + place: &#place_life mut #this_type #ty_generics, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #in_place_impl_generics _serde::de::Visitor<#delife> for __Visitor #in_place_ty_generics #where_clause { + type Value = (); + + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + #visit_newtype_struct + + #[inline] + fn visit_seq<__A>(self, #visitor_var: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::SeqAccess<#delife>, + { + #visit_seq + } + } + + #dispatch + } +} + +fn deserialize_seq( + type_path: &TokenStream, + params: &Parameters, + fields: &[Field], + is_struct: bool, + cattrs: &attr::Container, + expecting: &str, +) -> Fragment { + let vars = (0..fields.len()).map(field_i as fn(_) -> _); + + let deserialized_count = fields + .iter() + .filter(|field| !field.attrs.skip_deserializing()) + .count(); + let expecting = if deserialized_count == 1 { + format!("{} with 1 element", expecting) + } else { + format!("{} with {} elements", expecting, deserialized_count) + }; + let expecting = cattrs.expecting().unwrap_or(&expecting); + + let mut index_in_seq = 0_usize; + let let_values = vars.clone().zip(fields).map(|(var, field)| { + if field.attrs.skip_deserializing() { + let default = Expr(expr_is_missing(field, cattrs)); + quote! { + let #var = #default; + } + } else { + let visit = match field.attrs.deserialize_with() { + None => { + let field_ty = field.ty; + let span = field.original.span(); + let func = + quote_spanned!(span=> _serde::de::SeqAccess::next_element::<#field_ty>); + quote!(#func(&mut __seq)?) + } + Some(path) => { + let (wrapper, wrapper_ty) = wrap_deserialize_field_with(params, field.ty, path); + quote!({ + #wrapper + _serde::__private::Option::map( + _serde::de::SeqAccess::next_element::<#wrapper_ty>(&mut __seq)?, + |__wrap| __wrap.value) + }) + } + }; + let value_if_none = expr_is_missing_seq(None, index_in_seq, field, cattrs, expecting); + let assign = quote! { + let #var = match #visit { + _serde::__private::Some(__value) => __value, + _serde::__private::None => #value_if_none, + }; + }; + index_in_seq += 1; + assign + } + }); + + let mut result = if is_struct { + let names = fields.iter().map(|f| &f.member); + quote! { + #type_path { #( #names: #vars ),* } + } + } else { + quote! { + #type_path ( #(#vars),* ) + } + }; + + if params.has_getter { + let this_type = ¶ms.this_type; + let (_, ty_generics, _) = params.generics.split_for_impl(); + result = quote! { + _serde::__private::Into::<#this_type #ty_generics>::into(#result) + }; + } + + let let_default = match cattrs.default() { + attr::Default::Default => Some(quote!( + let __default: Self::Value = _serde::__private::Default::default(); + )), + attr::Default::Path(path) => Some(quote!( + let __default: Self::Value = #path(); + )), + attr::Default::None => { + // We don't need the default value, to prevent an unused variable warning + // we'll leave the line empty. + None + } + }; + + quote_block! { + #let_default + #(#let_values)* + _serde::__private::Ok(#result) + } +} + +#[cfg(feature = "deserialize_in_place")] +fn deserialize_seq_in_place( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, + expecting: &str, +) -> Fragment { + let deserialized_count = fields + .iter() + .filter(|field| !field.attrs.skip_deserializing()) + .count(); + let expecting = if deserialized_count == 1 { + format!("{} with 1 element", expecting) + } else { + format!("{} with {} elements", expecting, deserialized_count) + }; + let expecting = cattrs.expecting().unwrap_or(&expecting); + + let mut index_in_seq = 0usize; + let write_values = fields.iter().map(|field| { + let member = &field.member; + + if field.attrs.skip_deserializing() { + let default = Expr(expr_is_missing(field, cattrs)); + quote! { + self.place.#member = #default; + } + } else { + let value_if_none = expr_is_missing_seq(Some(quote!(self.place.#member = )), index_in_seq, field, cattrs, expecting); + let write = match field.attrs.deserialize_with() { + None => { + quote! { + if let _serde::__private::None = _serde::de::SeqAccess::next_element_seed(&mut __seq, + _serde::__private::de::InPlaceSeed(&mut self.place.#member))? + { + #value_if_none; + } + } + } + Some(path) => { + let (wrapper, wrapper_ty) = wrap_deserialize_field_with(params, field.ty, path); + quote!({ + #wrapper + match _serde::de::SeqAccess::next_element::<#wrapper_ty>(&mut __seq)? { + _serde::__private::Some(__wrap) => { + self.place.#member = __wrap.value; + } + _serde::__private::None => { + #value_if_none; + } + } + }) + } + }; + index_in_seq += 1; + write + } + }); + + let this_type = ¶ms.this_type; + let (_, ty_generics, _) = params.generics.split_for_impl(); + let let_default = match cattrs.default() { + attr::Default::Default => Some(quote!( + let __default: #this_type #ty_generics = _serde::__private::Default::default(); + )), + attr::Default::Path(path) => Some(quote!( + let __default: #this_type #ty_generics = #path(); + )), + attr::Default::None => { + // We don't need the default value, to prevent an unused variable warning + // we'll leave the line empty. + None + } + }; + + quote_block! { + #let_default + #(#write_values)* + _serde::__private::Ok(()) + } +} + +fn deserialize_newtype_struct( + type_path: &TokenStream, + params: &Parameters, + field: &Field, +) -> TokenStream { + let delife = params.borrowed.de_lifetime(); + let field_ty = field.ty; + + let value = match field.attrs.deserialize_with() { + None => { + let span = field.original.span(); + let func = quote_spanned!(span=> <#field_ty as _serde::Deserialize>::deserialize); + quote! { + #func(__e)? + } + } + Some(path) => { + quote! { + #path(__e)? + } + } + }; + + let mut result = quote!(#type_path(__field0)); + if params.has_getter { + let this_type = ¶ms.this_type; + let (_, ty_generics, _) = params.generics.split_for_impl(); + result = quote! { + _serde::__private::Into::<#this_type #ty_generics>::into(#result) + }; + } + + quote! { + #[inline] + fn visit_newtype_struct<__E>(self, __e: __E) -> _serde::__private::Result<Self::Value, __E::Error> + where + __E: _serde::Deserializer<#delife>, + { + let __field0: #field_ty = #value; + _serde::__private::Ok(#result) + } + } +} + +enum StructForm<'a> { + Struct, + /// Contains a variant name + ExternallyTagged(&'a syn::Ident), + /// Contains a variant name and an intermediate deserializer from which actual + /// deserialization will be performed + InternallyTagged(&'a syn::Ident, TokenStream), + /// Contains a variant name and an intermediate deserializer from which actual + /// deserialization will be performed + Untagged(&'a syn::Ident, TokenStream), +} + +fn deserialize_struct( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, + form: StructForm, +) -> Fragment { + let this_type = ¶ms.this_type; + let this_value = ¶ms.this_value; + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + // If there are getters (implying private fields), construct the local type + // and use an `Into` conversion to get the remote type. If there are no + // getters then construct the target type directly. + let construct = if params.has_getter { + let local = ¶ms.local; + quote!(#local) + } else { + quote!(#this_value) + }; + + let type_path = match form { + StructForm::Struct => construct, + StructForm::ExternallyTagged(variant_ident) + | StructForm::InternallyTagged(variant_ident, _) + | StructForm::Untagged(variant_ident, _) => quote!(#construct::#variant_ident), + }; + let expecting = match form { + StructForm::Struct => format!("struct {}", params.type_name()), + StructForm::ExternallyTagged(variant_ident) + | StructForm::InternallyTagged(variant_ident, _) + | StructForm::Untagged(variant_ident, _) => { + format!("struct variant {}::{}", params.type_name(), variant_ident) + } + }; + let expecting = cattrs.expecting().unwrap_or(&expecting); + + let field_names_idents: Vec<_> = fields + .iter() + .enumerate() + // Skip fields that shouldn't be deserialized or that were flattened, + // so they don't appear in the storage in their literal form + .filter(|&(_, field)| !field.attrs.skip_deserializing() && !field.attrs.flatten()) + .map(|(i, field)| { + ( + field.attrs.name().deserialize_name(), + field_i(i), + field.attrs.aliases(), + ) + }) + .collect(); + let field_visitor = deserialize_field_identifier(&field_names_idents, cattrs); + + // untagged struct variants do not get a visit_seq method. The same applies to + // structs that only have a map representation. + let visit_seq = match form { + StructForm::Untagged(..) => None, + _ if cattrs.has_flatten() => None, + _ => { + let mut_seq = if field_names_idents.is_empty() { + quote!(_) + } else { + quote!(mut __seq) + }; + + let visit_seq = Stmts(deserialize_seq( + &type_path, params, fields, true, cattrs, expecting, + )); + + Some(quote! { + #[inline] + fn visit_seq<__A>(self, #mut_seq: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::SeqAccess<#delife>, + { + #visit_seq + } + }) + } + }; + let visit_map = Stmts(deserialize_map(&type_path, params, fields, cattrs)); + + let visitor_seed = match form { + StructForm::ExternallyTagged(..) if cattrs.has_flatten() => Some(quote! { + impl #de_impl_generics _serde::de::DeserializeSeed<#delife> for __Visitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + fn deserialize<__D>(self, __deserializer: __D) -> _serde::__private::Result<Self::Value, __D::Error> + where + __D: _serde::Deserializer<#delife>, + { + _serde::Deserializer::deserialize_map(__deserializer, self) + } + } + }), + _ => None, + }; + + let fields_stmt = if cattrs.has_flatten() { + None + } else { + let field_names = field_names_idents + .iter() + .flat_map(|&(_, _, aliases)| aliases); + + Some(quote! { + #[doc(hidden)] + const FIELDS: &'static [&'static str] = &[ #(#field_names),* ]; + }) + }; + + let visitor_expr = quote! { + __Visitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + } + }; + let dispatch = match form { + StructForm::Struct if cattrs.has_flatten() => quote! { + _serde::Deserializer::deserialize_map(__deserializer, #visitor_expr) + }, + StructForm::Struct => { + let type_name = cattrs.name().deserialize_name(); + quote! { + _serde::Deserializer::deserialize_struct(__deserializer, #type_name, FIELDS, #visitor_expr) + } + } + StructForm::ExternallyTagged(_) if cattrs.has_flatten() => quote! { + _serde::de::VariantAccess::newtype_variant_seed(__variant, #visitor_expr) + }, + StructForm::ExternallyTagged(_) => quote! { + _serde::de::VariantAccess::struct_variant(__variant, FIELDS, #visitor_expr) + }, + StructForm::InternallyTagged(_, deserializer) => quote! { + _serde::Deserializer::deserialize_any(#deserializer, #visitor_expr) + }, + StructForm::Untagged(_, deserializer) => quote! { + _serde::Deserializer::deserialize_any(#deserializer, #visitor_expr) + }, + }; + + quote_block! { + #field_visitor + + #[doc(hidden)] + struct __Visitor #de_impl_generics #where_clause { + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::de::Visitor<#delife> for __Visitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + #visit_seq + + #[inline] + fn visit_map<__A>(self, mut __map: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::MapAccess<#delife>, + { + #visit_map + } + } + + #visitor_seed + + #fields_stmt + + #dispatch + } +} + +#[cfg(feature = "deserialize_in_place")] +fn deserialize_struct_in_place( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, +) -> Option<Fragment> { + // for now we do not support in_place deserialization for structs that + // are represented as map. + if cattrs.has_flatten() { + return None; + } + + let this_type = ¶ms.this_type; + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + let expecting = format!("struct {}", params.type_name()); + let expecting = cattrs.expecting().unwrap_or(&expecting); + + let field_names_idents: Vec<_> = fields + .iter() + .enumerate() + .filter(|&(_, field)| !field.attrs.skip_deserializing()) + .map(|(i, field)| { + ( + field.attrs.name().deserialize_name(), + field_i(i), + field.attrs.aliases(), + ) + }) + .collect(); + + let field_visitor = deserialize_field_identifier(&field_names_idents, cattrs); + + let mut_seq = if field_names_idents.is_empty() { + quote!(_) + } else { + quote!(mut __seq) + }; + let visit_seq = Stmts(deserialize_seq_in_place(params, fields, cattrs, expecting)); + let visit_map = Stmts(deserialize_map_in_place(params, fields, cattrs)); + let field_names = field_names_idents + .iter() + .flat_map(|&(_, _, aliases)| aliases); + let type_name = cattrs.name().deserialize_name(); + + let in_place_impl_generics = de_impl_generics.in_place(); + let in_place_ty_generics = de_ty_generics.in_place(); + let place_life = place_lifetime(); + + Some(quote_block! { + #field_visitor + + #[doc(hidden)] + struct __Visitor #in_place_impl_generics #where_clause { + place: &#place_life mut #this_type #ty_generics, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #in_place_impl_generics _serde::de::Visitor<#delife> for __Visitor #in_place_ty_generics #where_clause { + type Value = (); + + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + #[inline] + fn visit_seq<__A>(self, #mut_seq: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::SeqAccess<#delife>, + { + #visit_seq + } + + #[inline] + fn visit_map<__A>(self, mut __map: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::MapAccess<#delife>, + { + #visit_map + } + } + + #[doc(hidden)] + const FIELDS: &'static [&'static str] = &[ #(#field_names),* ]; + + _serde::Deserializer::deserialize_struct(__deserializer, #type_name, FIELDS, __Visitor { + place: __place, + lifetime: _serde::__private::PhantomData, + }) + }) +} + +fn deserialize_enum( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, +) -> Fragment { + // The variants have already been checked (in ast.rs) that all untagged variants appear at the end + match variants.iter().position(|var| var.attrs.untagged()) { + Some(variant_idx) => { + let (tagged, untagged) = variants.split_at(variant_idx); + let tagged_frag = Expr(deserialize_homogeneous_enum(params, tagged, cattrs)); + deserialize_untagged_enum_after(params, untagged, cattrs, Some(tagged_frag)) + } + None => deserialize_homogeneous_enum(params, variants, cattrs), + } +} + +fn deserialize_homogeneous_enum( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, +) -> Fragment { + match cattrs.tag() { + attr::TagType::External => deserialize_externally_tagged_enum(params, variants, cattrs), + attr::TagType::Internal { tag } => { + deserialize_internally_tagged_enum(params, variants, cattrs, tag) + } + attr::TagType::Adjacent { tag, content } => { + deserialize_adjacently_tagged_enum(params, variants, cattrs, tag, content) + } + attr::TagType::None => deserialize_untagged_enum(params, variants, cattrs), + } +} + +fn prepare_enum_variant_enum( + variants: &[Variant], + cattrs: &attr::Container, +) -> (TokenStream, Stmts) { + let mut deserialized_variants = variants + .iter() + .enumerate() + .filter(|&(_, variant)| !variant.attrs.skip_deserializing()); + + let variant_names_idents: Vec<_> = deserialized_variants + .clone() + .map(|(i, variant)| { + ( + variant.attrs.name().deserialize_name(), + field_i(i), + variant.attrs.aliases(), + ) + }) + .collect(); + + let fallthrough = deserialized_variants + .position(|(_, variant)| variant.attrs.other()) + .map(|other_idx| { + let ignore_variant = variant_names_idents[other_idx].1.clone(); + quote!(_serde::__private::Ok(__Field::#ignore_variant)) + }); + + let variants_stmt = { + let variant_names = variant_names_idents.iter().map(|(name, _, _)| name); + quote! { + #[doc(hidden)] + const VARIANTS: &'static [&'static str] = &[ #(#variant_names),* ]; + } + }; + + let variant_visitor = Stmts(deserialize_generated_identifier( + &variant_names_idents, + cattrs, + true, + None, + fallthrough, + )); + + (variants_stmt, variant_visitor) +} + +fn deserialize_externally_tagged_enum( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, +) -> Fragment { + let this_type = ¶ms.this_type; + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + let type_name = cattrs.name().deserialize_name(); + let expecting = format!("enum {}", params.type_name()); + let expecting = cattrs.expecting().unwrap_or(&expecting); + + let (variants_stmt, variant_visitor) = prepare_enum_variant_enum(variants, cattrs); + + // Match arms to extract a variant from a string + let variant_arms = variants + .iter() + .enumerate() + .filter(|&(_, variant)| !variant.attrs.skip_deserializing()) + .map(|(i, variant)| { + let variant_name = field_i(i); + + let block = Match(deserialize_externally_tagged_variant( + params, variant, cattrs, + )); + + quote! { + (__Field::#variant_name, __variant) => #block + } + }); + + let all_skipped = variants + .iter() + .all(|variant| variant.attrs.skip_deserializing()); + let match_variant = if all_skipped { + // This is an empty enum like `enum Impossible {}` or an enum in which + // all variants have `#[serde(skip_deserializing)]`. + quote! { + // FIXME: Once feature(exhaustive_patterns) is stable: + // let _serde::__private::Err(__err) = _serde::de::EnumAccess::variant::<__Field>(__data); + // _serde::__private::Err(__err) + _serde::__private::Result::map( + _serde::de::EnumAccess::variant::<__Field>(__data), + |(__impossible, _)| match __impossible {}) + } + } else { + quote! { + match _serde::de::EnumAccess::variant(__data)? { + #(#variant_arms)* + } + } + }; + + quote_block! { + #variant_visitor + + #[doc(hidden)] + struct __Visitor #de_impl_generics #where_clause { + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::de::Visitor<#delife> for __Visitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + fn visit_enum<__A>(self, __data: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::EnumAccess<#delife>, + { + #match_variant + } + } + + #variants_stmt + + _serde::Deserializer::deserialize_enum( + __deserializer, + #type_name, + VARIANTS, + __Visitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + }, + ) + } +} + +fn deserialize_internally_tagged_enum( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, + tag: &str, +) -> Fragment { + let (variants_stmt, variant_visitor) = prepare_enum_variant_enum(variants, cattrs); + + // Match arms to extract a variant from a string + let variant_arms = variants + .iter() + .enumerate() + .filter(|&(_, variant)| !variant.attrs.skip_deserializing()) + .map(|(i, variant)| { + let variant_name = field_i(i); + + let block = Match(deserialize_internally_tagged_variant( + params, + variant, + cattrs, + quote!(__deserializer), + )); + + quote! { + __Field::#variant_name => #block + } + }); + + let expecting = format!("internally tagged enum {}", params.type_name()); + let expecting = cattrs.expecting().unwrap_or(&expecting); + + quote_block! { + #variant_visitor + + #variants_stmt + + let (__tag, __content) = _serde::Deserializer::deserialize_any( + __deserializer, + _serde::__private::de::TaggedContentVisitor::<__Field>::new(#tag, #expecting))?; + let __deserializer = _serde::__private::de::ContentDeserializer::<__D::Error>::new(__content); + + match __tag { + #(#variant_arms)* + } + } +} + +fn deserialize_adjacently_tagged_enum( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, + tag: &str, + content: &str, +) -> Fragment { + let this_type = ¶ms.this_type; + let this_value = ¶ms.this_value; + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + let (variants_stmt, variant_visitor) = prepare_enum_variant_enum(variants, cattrs); + + let variant_arms: &Vec<_> = &variants + .iter() + .enumerate() + .filter(|&(_, variant)| !variant.attrs.skip_deserializing()) + .map(|(i, variant)| { + let variant_index = field_i(i); + + let block = Match(deserialize_untagged_variant( + params, + variant, + cattrs, + quote!(__deserializer), + )); + + quote! { + __Field::#variant_index => #block + } + }) + .collect(); + + let rust_name = params.type_name(); + let expecting = format!("adjacently tagged enum {}", rust_name); + let expecting = cattrs.expecting().unwrap_or(&expecting); + let type_name = cattrs.name().deserialize_name(); + let deny_unknown_fields = cattrs.deny_unknown_fields(); + + // If unknown fields are allowed, we pick the visitor that can step over + // those. Otherwise we pick the visitor that fails on unknown keys. + let field_visitor_ty = if deny_unknown_fields { + quote! { _serde::__private::de::TagOrContentFieldVisitor } + } else { + quote! { _serde::__private::de::TagContentOtherFieldVisitor } + }; + + let tag_or_content = quote! { + #field_visitor_ty { + tag: #tag, + content: #content, + } + }; + + let variant_seed = quote! { + _serde::__private::de::AdjacentlyTaggedEnumVariantSeed::<__Field> { + enum_name: #rust_name, + variants: VARIANTS, + fields_enum: _serde::__private::PhantomData + } + }; + + let mut missing_content = quote! { + _serde::__private::Err(<__A::Error as _serde::de::Error>::missing_field(#content)) + }; + let mut missing_content_fallthrough = quote!(); + let missing_content_arms = variants + .iter() + .enumerate() + .filter(|&(_, variant)| !variant.attrs.skip_deserializing()) + .filter_map(|(i, variant)| { + let variant_index = field_i(i); + let variant_ident = &variant.ident; + + let arm = match variant.style { + Style::Unit => quote! { + _serde::__private::Ok(#this_value::#variant_ident) + }, + Style::Newtype if variant.attrs.deserialize_with().is_none() => { + let span = variant.original.span(); + let func = quote_spanned!(span=> _serde::__private::de::missing_field); + quote! { + #func(#content).map(#this_value::#variant_ident) + } + } + _ => { + missing_content_fallthrough = quote!(_ => #missing_content); + return None; + } + }; + Some(quote! { + __Field::#variant_index => #arm, + }) + }) + .collect::<Vec<_>>(); + if !missing_content_arms.is_empty() { + missing_content = quote! { + match __field { + #(#missing_content_arms)* + #missing_content_fallthrough + } + }; + } + + // Advance the map by one key, returning early in case of error. + let next_key = quote! { + _serde::de::MapAccess::next_key_seed(&mut __map, #tag_or_content)? + }; + + let variant_from_map = quote! { + _serde::de::MapAccess::next_value_seed(&mut __map, #variant_seed)? + }; + + // When allowing unknown fields, we want to transparently step through keys + // we don't care about until we find `tag`, `content`, or run out of keys. + let next_relevant_key = if deny_unknown_fields { + next_key + } else { + quote!({ + let mut __rk : _serde::__private::Option<_serde::__private::de::TagOrContentField> = _serde::__private::None; + while let _serde::__private::Some(__k) = #next_key { + match __k { + _serde::__private::de::TagContentOtherField::Other => { + let _ = _serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)?; + continue; + }, + _serde::__private::de::TagContentOtherField::Tag => { + __rk = _serde::__private::Some(_serde::__private::de::TagOrContentField::Tag); + break; + } + _serde::__private::de::TagContentOtherField::Content => { + __rk = _serde::__private::Some(_serde::__private::de::TagOrContentField::Content); + break; + } + } + } + + __rk + }) + }; + + // Step through remaining keys, looking for duplicates of previously-seen + // keys. When unknown fields are denied, any key that isn't a duplicate will + // at this point immediately produce an error. + let visit_remaining_keys = quote! { + match #next_relevant_key { + _serde::__private::Some(_serde::__private::de::TagOrContentField::Tag) => { + _serde::__private::Err(<__A::Error as _serde::de::Error>::duplicate_field(#tag)) + } + _serde::__private::Some(_serde::__private::de::TagOrContentField::Content) => { + _serde::__private::Err(<__A::Error as _serde::de::Error>::duplicate_field(#content)) + } + _serde::__private::None => _serde::__private::Ok(__ret), + } + }; + + let finish_content_then_tag = if variant_arms.is_empty() { + quote! { + match #variant_from_map {} + } + } else { + quote! { + let __ret = match #variant_from_map { + // Deserialize the buffered content now that we know the variant. + #(#variant_arms)* + }?; + // Visit remaining keys, looking for duplicates. + #visit_remaining_keys + } + }; + + quote_block! { + #variant_visitor + + #variants_stmt + + #[doc(hidden)] + struct __Seed #de_impl_generics #where_clause { + field: __Field, + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::de::DeserializeSeed<#delife> for __Seed #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + fn deserialize<__D>(self, __deserializer: __D) -> _serde::__private::Result<Self::Value, __D::Error> + where + __D: _serde::Deserializer<#delife>, + { + match self.field { + #(#variant_arms)* + } + } + } + + #[doc(hidden)] + struct __Visitor #de_impl_generics #where_clause { + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::de::Visitor<#delife> for __Visitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + fn visit_map<__A>(self, mut __map: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::MapAccess<#delife>, + { + // Visit the first relevant key. + match #next_relevant_key { + // First key is the tag. + _serde::__private::Some(_serde::__private::de::TagOrContentField::Tag) => { + // Parse the tag. + let __field = #variant_from_map; + // Visit the second key. + match #next_relevant_key { + // Second key is a duplicate of the tag. + _serde::__private::Some(_serde::__private::de::TagOrContentField::Tag) => { + _serde::__private::Err(<__A::Error as _serde::de::Error>::duplicate_field(#tag)) + } + // Second key is the content. + _serde::__private::Some(_serde::__private::de::TagOrContentField::Content) => { + let __ret = _serde::de::MapAccess::next_value_seed(&mut __map, + __Seed { + field: __field, + marker: _serde::__private::PhantomData, + lifetime: _serde::__private::PhantomData, + })?; + // Visit remaining keys, looking for duplicates. + #visit_remaining_keys + } + // There is no second key; might be okay if the we have a unit variant. + _serde::__private::None => #missing_content + } + } + // First key is the content. + _serde::__private::Some(_serde::__private::de::TagOrContentField::Content) => { + // Buffer up the content. + let __content = _serde::de::MapAccess::next_value::<_serde::__private::de::Content>(&mut __map)?; + // Visit the second key. + match #next_relevant_key { + // Second key is the tag. + _serde::__private::Some(_serde::__private::de::TagOrContentField::Tag) => { + let __deserializer = _serde::__private::de::ContentDeserializer::<__A::Error>::new(__content); + #finish_content_then_tag + } + // Second key is a duplicate of the content. + _serde::__private::Some(_serde::__private::de::TagOrContentField::Content) => { + _serde::__private::Err(<__A::Error as _serde::de::Error>::duplicate_field(#content)) + } + // There is no second key. + _serde::__private::None => { + _serde::__private::Err(<__A::Error as _serde::de::Error>::missing_field(#tag)) + } + } + } + // There is no first key. + _serde::__private::None => { + _serde::__private::Err(<__A::Error as _serde::de::Error>::missing_field(#tag)) + } + } + } + + fn visit_seq<__A>(self, mut __seq: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::SeqAccess<#delife>, + { + // Visit the first element - the tag. + match _serde::de::SeqAccess::next_element(&mut __seq)? { + _serde::__private::Some(__field) => { + // Visit the second element - the content. + match _serde::de::SeqAccess::next_element_seed( + &mut __seq, + __Seed { + field: __field, + marker: _serde::__private::PhantomData, + lifetime: _serde::__private::PhantomData, + }, + )? { + _serde::__private::Some(__ret) => _serde::__private::Ok(__ret), + // There is no second element. + _serde::__private::None => { + _serde::__private::Err(_serde::de::Error::invalid_length(1, &self)) + } + } + } + // There is no first element. + _serde::__private::None => { + _serde::__private::Err(_serde::de::Error::invalid_length(0, &self)) + } + } + } + } + + #[doc(hidden)] + const FIELDS: &'static [&'static str] = &[#tag, #content]; + _serde::Deserializer::deserialize_struct( + __deserializer, + #type_name, + FIELDS, + __Visitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + }, + ) + } +} + +fn deserialize_untagged_enum( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, +) -> Fragment { + let first_attempt = None; + deserialize_untagged_enum_after(params, variants, cattrs, first_attempt) +} + +fn deserialize_untagged_enum_after( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, + first_attempt: Option<Expr>, +) -> Fragment { + let attempts = variants + .iter() + .filter(|variant| !variant.attrs.skip_deserializing()) + .map(|variant| { + Expr(deserialize_untagged_variant( + params, + variant, + cattrs, + quote!(__deserializer), + )) + }); + // TODO this message could be better by saving the errors from the failed + // attempts. The heuristic used by TOML was to count the number of fields + // processed before an error, and use the error that happened after the + // largest number of fields. I'm not sure I like that. Maybe it would be + // better to save all the errors and combine them into one message that + // explains why none of the variants matched. + let fallthrough_msg = format!( + "data did not match any variant of untagged enum {}", + params.type_name() + ); + let fallthrough_msg = cattrs.expecting().unwrap_or(&fallthrough_msg); + + // Ignore any error associated with non-untagged deserialization so that we + // can fall through to the untagged variants. This may be infallible so we + // need to provide the error type. + let first_attempt = first_attempt.map(|expr| { + quote! { + if let _serde::__private::Result::<_, __D::Error>::Ok(__ok) = (|| #expr)() { + return _serde::__private::Ok(__ok); + } + } + }); + + quote_block! { + let __content = <_serde::__private::de::Content as _serde::Deserialize>::deserialize(__deserializer)?; + let __deserializer = _serde::__private::de::ContentRefDeserializer::<__D::Error>::new(&__content); + + #first_attempt + + #( + if let _serde::__private::Ok(__ok) = #attempts { + return _serde::__private::Ok(__ok); + } + )* + + _serde::__private::Err(_serde::de::Error::custom(#fallthrough_msg)) + } +} + +fn deserialize_externally_tagged_variant( + params: &Parameters, + variant: &Variant, + cattrs: &attr::Container, +) -> Fragment { + if let Some(path) = variant.attrs.deserialize_with() { + let (wrapper, wrapper_ty, unwrap_fn) = wrap_deserialize_variant_with(params, variant, path); + return quote_block! { + #wrapper + _serde::__private::Result::map( + _serde::de::VariantAccess::newtype_variant::<#wrapper_ty>(__variant), #unwrap_fn) + }; + } + + let variant_ident = &variant.ident; + + match variant.style { + Style::Unit => { + let this_value = ¶ms.this_value; + quote_block! { + _serde::de::VariantAccess::unit_variant(__variant)?; + _serde::__private::Ok(#this_value::#variant_ident) + } + } + Style::Newtype => deserialize_externally_tagged_newtype_variant( + variant_ident, + params, + &variant.fields[0], + cattrs, + ), + Style::Tuple => deserialize_tuple( + params, + &variant.fields, + cattrs, + TupleForm::ExternallyTagged(variant_ident), + ), + Style::Struct => deserialize_struct( + params, + &variant.fields, + cattrs, + StructForm::ExternallyTagged(variant_ident), + ), + } +} + +// Generates significant part of the visit_seq and visit_map bodies of visitors +// for the variants of internally tagged enum. +fn deserialize_internally_tagged_variant( + params: &Parameters, + variant: &Variant, + cattrs: &attr::Container, + deserializer: TokenStream, +) -> Fragment { + if variant.attrs.deserialize_with().is_some() { + return deserialize_untagged_variant(params, variant, cattrs, deserializer); + } + + let variant_ident = &variant.ident; + + match effective_style(variant) { + Style::Unit => { + let this_value = ¶ms.this_value; + let type_name = params.type_name(); + let variant_name = variant.ident.to_string(); + let default = variant.fields.first().map(|field| { + let default = Expr(expr_is_missing(field, cattrs)); + quote!((#default)) + }); + quote_block! { + _serde::Deserializer::deserialize_any(#deserializer, _serde::__private::de::InternallyTaggedUnitVisitor::new(#type_name, #variant_name))?; + _serde::__private::Ok(#this_value::#variant_ident #default) + } + } + Style::Newtype => deserialize_untagged_newtype_variant( + variant_ident, + params, + &variant.fields[0], + &deserializer, + ), + Style::Struct => deserialize_struct( + params, + &variant.fields, + cattrs, + StructForm::InternallyTagged(variant_ident, deserializer), + ), + Style::Tuple => unreachable!("checked in serde_derive_internals"), + } +} + +fn deserialize_untagged_variant( + params: &Parameters, + variant: &Variant, + cattrs: &attr::Container, + deserializer: TokenStream, +) -> Fragment { + if let Some(path) = variant.attrs.deserialize_with() { + let unwrap_fn = unwrap_to_variant_closure(params, variant, false); + return quote_block! { + _serde::__private::Result::map(#path(#deserializer), #unwrap_fn) + }; + } + + let variant_ident = &variant.ident; + + match effective_style(variant) { + Style::Unit => { + let this_value = ¶ms.this_value; + let type_name = params.type_name(); + let variant_name = variant.ident.to_string(); + let default = variant.fields.first().map(|field| { + let default = Expr(expr_is_missing(field, cattrs)); + quote!((#default)) + }); + quote_expr! { + match _serde::Deserializer::deserialize_any( + #deserializer, + _serde::__private::de::UntaggedUnitVisitor::new(#type_name, #variant_name) + ) { + _serde::__private::Ok(()) => _serde::__private::Ok(#this_value::#variant_ident #default), + _serde::__private::Err(__err) => _serde::__private::Err(__err), + } + } + } + Style::Newtype => deserialize_untagged_newtype_variant( + variant_ident, + params, + &variant.fields[0], + &deserializer, + ), + Style::Tuple => deserialize_tuple( + params, + &variant.fields, + cattrs, + TupleForm::Untagged(variant_ident, deserializer), + ), + Style::Struct => deserialize_struct( + params, + &variant.fields, + cattrs, + StructForm::Untagged(variant_ident, deserializer), + ), + } +} + +fn deserialize_externally_tagged_newtype_variant( + variant_ident: &syn::Ident, + params: &Parameters, + field: &Field, + cattrs: &attr::Container, +) -> Fragment { + let this_value = ¶ms.this_value; + + if field.attrs.skip_deserializing() { + let default = Expr(expr_is_missing(field, cattrs)); + return quote_block! { + _serde::de::VariantAccess::unit_variant(__variant)?; + _serde::__private::Ok(#this_value::#variant_ident(#default)) + }; + } + + match field.attrs.deserialize_with() { + None => { + let field_ty = field.ty; + let span = field.original.span(); + let func = + quote_spanned!(span=> _serde::de::VariantAccess::newtype_variant::<#field_ty>); + quote_expr! { + _serde::__private::Result::map(#func(__variant), #this_value::#variant_ident) + } + } + Some(path) => { + let (wrapper, wrapper_ty) = wrap_deserialize_field_with(params, field.ty, path); + quote_block! { + #wrapper + _serde::__private::Result::map( + _serde::de::VariantAccess::newtype_variant::<#wrapper_ty>(__variant), + |__wrapper| #this_value::#variant_ident(__wrapper.value)) + } + } + } +} + +fn deserialize_untagged_newtype_variant( + variant_ident: &syn::Ident, + params: &Parameters, + field: &Field, + deserializer: &TokenStream, +) -> Fragment { + let this_value = ¶ms.this_value; + let field_ty = field.ty; + match field.attrs.deserialize_with() { + None => { + let span = field.original.span(); + let func = quote_spanned!(span=> <#field_ty as _serde::Deserialize>::deserialize); + quote_expr! { + _serde::__private::Result::map(#func(#deserializer), #this_value::#variant_ident) + } + } + Some(path) => { + quote_block! { + let __value: _serde::__private::Result<#field_ty, _> = #path(#deserializer); + _serde::__private::Result::map(__value, #this_value::#variant_ident) + } + } + } +} + +fn deserialize_generated_identifier( + fields: &[(&str, Ident, &BTreeSet<String>)], + cattrs: &attr::Container, + is_variant: bool, + ignore_variant: Option<TokenStream>, + fallthrough: Option<TokenStream>, +) -> Fragment { + let this_value = quote!(__Field); + let field_idents: &Vec<_> = &fields.iter().map(|(_, ident, _)| ident).collect(); + + let visitor_impl = Stmts(deserialize_identifier( + &this_value, + fields, + is_variant, + fallthrough, + None, + !is_variant && cattrs.has_flatten(), + None, + )); + + let lifetime = if !is_variant && cattrs.has_flatten() { + Some(quote!(<'de>)) + } else { + None + }; + + quote_block! { + #[allow(non_camel_case_types)] + #[doc(hidden)] + enum __Field #lifetime { + #(#field_idents,)* + #ignore_variant + } + + #[doc(hidden)] + struct __FieldVisitor; + + impl<'de> _serde::de::Visitor<'de> for __FieldVisitor { + type Value = __Field #lifetime; + + #visitor_impl + } + + impl<'de> _serde::Deserialize<'de> for __Field #lifetime { + #[inline] + fn deserialize<__D>(__deserializer: __D) -> _serde::__private::Result<Self, __D::Error> + where + __D: _serde::Deserializer<'de>, + { + _serde::Deserializer::deserialize_identifier(__deserializer, __FieldVisitor) + } + } + } +} + +/// Generates enum and its `Deserialize` implementation that represents each +/// non-skipped field of the struct +fn deserialize_field_identifier( + fields: &[(&str, Ident, &BTreeSet<String>)], + cattrs: &attr::Container, +) -> Stmts { + let (ignore_variant, fallthrough) = if cattrs.has_flatten() { + let ignore_variant = quote!(__other(_serde::__private::de::Content<'de>),); + let fallthrough = quote!(_serde::__private::Ok(__Field::__other(__value))); + (Some(ignore_variant), Some(fallthrough)) + } else if cattrs.deny_unknown_fields() { + (None, None) + } else { + let ignore_variant = quote!(__ignore,); + let fallthrough = quote!(_serde::__private::Ok(__Field::__ignore)); + (Some(ignore_variant), Some(fallthrough)) + }; + + Stmts(deserialize_generated_identifier( + fields, + cattrs, + false, + ignore_variant, + fallthrough, + )) +} + +// Generates `Deserialize::deserialize` body for an enum with +// `serde(field_identifier)` or `serde(variant_identifier)` attribute. +fn deserialize_custom_identifier( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, +) -> Fragment { + let is_variant = match cattrs.identifier() { + attr::Identifier::Variant => true, + attr::Identifier::Field => false, + attr::Identifier::No => unreachable!(), + }; + + let this_type = params.this_type.to_token_stream(); + let this_value = params.this_value.to_token_stream(); + + let (ordinary, fallthrough, fallthrough_borrowed) = if let Some(last) = variants.last() { + let last_ident = &last.ident; + if last.attrs.other() { + // Process `serde(other)` attribute. It would always be found on the + // last variant (checked in `check_identifier`), so all preceding + // are ordinary variants. + let ordinary = &variants[..variants.len() - 1]; + let fallthrough = quote!(_serde::__private::Ok(#this_value::#last_ident)); + (ordinary, Some(fallthrough), None) + } else if let Style::Newtype = last.style { + let ordinary = &variants[..variants.len() - 1]; + let fallthrough = |value| { + quote! { + _serde::__private::Result::map( + _serde::Deserialize::deserialize( + _serde::__private::de::IdentifierDeserializer::from(#value) + ), + #this_value::#last_ident) + } + }; + ( + ordinary, + Some(fallthrough(quote!(__value))), + Some(fallthrough(quote!(_serde::__private::de::Borrowed( + __value + )))), + ) + } else { + (variants, None, None) + } + } else { + (variants, None, None) + }; + + let names_idents: Vec<_> = ordinary + .iter() + .map(|variant| { + ( + variant.attrs.name().deserialize_name(), + variant.ident.clone(), + variant.attrs.aliases(), + ) + }) + .collect(); + + let names = names_idents.iter().flat_map(|&(_, _, aliases)| aliases); + + let names_const = if fallthrough.is_some() { + None + } else if is_variant { + let variants = quote! { + #[doc(hidden)] + const VARIANTS: &'static [&'static str] = &[ #(#names),* ]; + }; + Some(variants) + } else { + let fields = quote! { + #[doc(hidden)] + const FIELDS: &'static [&'static str] = &[ #(#names),* ]; + }; + Some(fields) + }; + + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + let visitor_impl = Stmts(deserialize_identifier( + &this_value, + &names_idents, + is_variant, + fallthrough, + fallthrough_borrowed, + false, + cattrs.expecting(), + )); + + quote_block! { + #names_const + + #[doc(hidden)] + struct __FieldVisitor #de_impl_generics #where_clause { + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::de::Visitor<#delife> for __FieldVisitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; + + #visitor_impl + } + + let __visitor = __FieldVisitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + }; + _serde::Deserializer::deserialize_identifier(__deserializer, __visitor) + } +} + +fn deserialize_identifier( + this_value: &TokenStream, + fields: &[(&str, Ident, &BTreeSet<String>)], + is_variant: bool, + fallthrough: Option<TokenStream>, + fallthrough_borrowed: Option<TokenStream>, + collect_other_fields: bool, + expecting: Option<&str>, +) -> Fragment { + let str_mapping = fields.iter().map(|(_, ident, aliases)| { + // `aliases` also contains a main name + quote!(#(#aliases)|* => _serde::__private::Ok(#this_value::#ident)) + }); + let bytes_mapping = fields.iter().map(|(_, ident, aliases)| { + // `aliases` also contains a main name + let aliases = aliases + .iter() + .map(|alias| Literal::byte_string(alias.as_bytes())); + quote!(#(#aliases)|* => _serde::__private::Ok(#this_value::#ident)) + }); + + let expecting = expecting.unwrap_or(if is_variant { + "variant identifier" + } else { + "field identifier" + }); + + let bytes_to_str = if fallthrough.is_some() || collect_other_fields { + None + } else { + Some(quote! { + let __value = &_serde::__private::from_utf8_lossy(__value); + }) + }; + + let ( + value_as_str_content, + value_as_borrowed_str_content, + value_as_bytes_content, + value_as_borrowed_bytes_content, + ) = if collect_other_fields { + ( + Some(quote! { + let __value = _serde::__private::de::Content::String(_serde::__private::ToString::to_string(__value)); + }), + Some(quote! { + let __value = _serde::__private::de::Content::Str(__value); + }), + Some(quote! { + let __value = _serde::__private::de::Content::ByteBuf(__value.to_vec()); + }), + Some(quote! { + let __value = _serde::__private::de::Content::Bytes(__value); + }), + ) + } else { + (None, None, None, None) + }; + + let fallthrough_arm_tokens; + let fallthrough_arm = if let Some(fallthrough) = &fallthrough { + fallthrough + } else if is_variant { + fallthrough_arm_tokens = quote! { + _serde::__private::Err(_serde::de::Error::unknown_variant(__value, VARIANTS)) + }; + &fallthrough_arm_tokens + } else { + fallthrough_arm_tokens = quote! { + _serde::__private::Err(_serde::de::Error::unknown_field(__value, FIELDS)) + }; + &fallthrough_arm_tokens + }; + + let visit_other = if collect_other_fields { + quote! { + fn visit_bool<__E>(self, __value: bool) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::Bool(__value))) + } + + fn visit_i8<__E>(self, __value: i8) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::I8(__value))) + } + + fn visit_i16<__E>(self, __value: i16) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::I16(__value))) + } + + fn visit_i32<__E>(self, __value: i32) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::I32(__value))) + } + + fn visit_i64<__E>(self, __value: i64) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::I64(__value))) + } + + fn visit_u8<__E>(self, __value: u8) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::U8(__value))) + } + + fn visit_u16<__E>(self, __value: u16) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::U16(__value))) + } + + fn visit_u32<__E>(self, __value: u32) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::U32(__value))) + } + + fn visit_u64<__E>(self, __value: u64) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::U64(__value))) + } + + fn visit_f32<__E>(self, __value: f32) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::F32(__value))) + } + + fn visit_f64<__E>(self, __value: f64) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::F64(__value))) + } + + fn visit_char<__E>(self, __value: char) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::Char(__value))) + } + + fn visit_unit<__E>(self) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + _serde::__private::Ok(__Field::__other(_serde::__private::de::Content::Unit)) + } + } + } else { + let u64_mapping = fields.iter().enumerate().map(|(i, (_, ident, _))| { + let i = i as u64; + quote!(#i => _serde::__private::Ok(#this_value::#ident)) + }); + + let u64_fallthrough_arm_tokens; + let u64_fallthrough_arm = if let Some(fallthrough) = &fallthrough { + fallthrough + } else { + let index_expecting = if is_variant { "variant" } else { "field" }; + let fallthrough_msg = format!("{} index 0 <= i < {}", index_expecting, fields.len()); + u64_fallthrough_arm_tokens = quote! { + _serde::__private::Err(_serde::de::Error::invalid_value( + _serde::de::Unexpected::Unsigned(__value), + &#fallthrough_msg, + )) + }; + &u64_fallthrough_arm_tokens + }; + + quote! { + fn visit_u64<__E>(self, __value: u64) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + match __value { + #(#u64_mapping,)* + _ => #u64_fallthrough_arm, + } + } + } + }; + + let visit_borrowed = if fallthrough_borrowed.is_some() || collect_other_fields { + let str_mapping = str_mapping.clone(); + let bytes_mapping = bytes_mapping.clone(); + let fallthrough_borrowed_arm = fallthrough_borrowed.as_ref().unwrap_or(fallthrough_arm); + Some(quote! { + fn visit_borrowed_str<__E>(self, __value: &'de str) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + match __value { + #(#str_mapping,)* + _ => { + #value_as_borrowed_str_content + #fallthrough_borrowed_arm + } + } + } + + fn visit_borrowed_bytes<__E>(self, __value: &'de [u8]) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + match __value { + #(#bytes_mapping,)* + _ => { + #bytes_to_str + #value_as_borrowed_bytes_content + #fallthrough_borrowed_arm + } + } + } + }) + } else { + None + }; + + quote_block! { + fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { + _serde::__private::Formatter::write_str(__formatter, #expecting) + } + + #visit_other + + fn visit_str<__E>(self, __value: &str) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + match __value { + #(#str_mapping,)* + _ => { + #value_as_str_content + #fallthrough_arm + } + } + } + + fn visit_bytes<__E>(self, __value: &[u8]) -> _serde::__private::Result<Self::Value, __E> + where + __E: _serde::de::Error, + { + match __value { + #(#bytes_mapping,)* + _ => { + #bytes_to_str + #value_as_bytes_content + #fallthrough_arm + } + } + } + + #visit_borrowed + } +} + +fn deserialize_map( + struct_path: &TokenStream, + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, +) -> Fragment { + // Create the field names for the fields. + let fields_names: Vec<_> = fields + .iter() + .enumerate() + .map(|(i, field)| (field, field_i(i))) + .collect(); + + // Declare each field that will be deserialized. + let let_values = fields_names + .iter() + .filter(|&&(field, _)| !field.attrs.skip_deserializing() && !field.attrs.flatten()) + .map(|(field, name)| { + let field_ty = field.ty; + quote! { + let mut #name: _serde::__private::Option<#field_ty> = _serde::__private::None; + } + }); + + // Collect contents for flatten fields into a buffer + let let_collect = if cattrs.has_flatten() { + Some(quote! { + let mut __collect = _serde::__private::Vec::<_serde::__private::Option<( + _serde::__private::de::Content, + _serde::__private::de::Content + )>>::new(); + }) + } else { + None + }; + + // Match arms to extract a value for a field. + let value_arms = fields_names + .iter() + .filter(|&&(field, _)| !field.attrs.skip_deserializing() && !field.attrs.flatten()) + .map(|(field, name)| { + let deser_name = field.attrs.name().deserialize_name(); + + let visit = match field.attrs.deserialize_with() { + None => { + let field_ty = field.ty; + let span = field.original.span(); + let func = + quote_spanned!(span=> _serde::de::MapAccess::next_value::<#field_ty>); + quote! { + #func(&mut __map)? + } + } + Some(path) => { + let (wrapper, wrapper_ty) = wrap_deserialize_field_with(params, field.ty, path); + quote!({ + #wrapper + match _serde::de::MapAccess::next_value::<#wrapper_ty>(&mut __map) { + _serde::__private::Ok(__wrapper) => __wrapper.value, + _serde::__private::Err(__err) => { + return _serde::__private::Err(__err); + } + } + }) + } + }; + quote! { + __Field::#name => { + if _serde::__private::Option::is_some(&#name) { + return _serde::__private::Err(<__A::Error as _serde::de::Error>::duplicate_field(#deser_name)); + } + #name = _serde::__private::Some(#visit); + } + } + }); + + // Visit ignored values to consume them + let ignored_arm = if cattrs.has_flatten() { + Some(quote! { + __Field::__other(__name) => { + __collect.push(_serde::__private::Some(( + __name, + _serde::de::MapAccess::next_value(&mut __map)?))); + } + }) + } else if cattrs.deny_unknown_fields() { + None + } else { + Some(quote! { + _ => { let _ = _serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)?; } + }) + }; + + let all_skipped = fields.iter().all(|field| field.attrs.skip_deserializing()); + let match_keys = if cattrs.deny_unknown_fields() && all_skipped { + quote! { + // FIXME: Once feature(exhaustive_patterns) is stable: + // let _serde::__private::None::<__Field> = _serde::de::MapAccess::next_key(&mut __map)?; + _serde::__private::Option::map( + _serde::de::MapAccess::next_key::<__Field>(&mut __map)?, + |__impossible| match __impossible {}); + } + } else { + quote! { + while let _serde::__private::Some(__key) = _serde::de::MapAccess::next_key::<__Field>(&mut __map)? { + match __key { + #(#value_arms)* + #ignored_arm + } + } + } + }; + + let extract_values = fields_names + .iter() + .filter(|&&(field, _)| !field.attrs.skip_deserializing() && !field.attrs.flatten()) + .map(|(field, name)| { + let missing_expr = Match(expr_is_missing(field, cattrs)); + + quote! { + let #name = match #name { + _serde::__private::Some(#name) => #name, + _serde::__private::None => #missing_expr + }; + } + }); + + let extract_collected = fields_names + .iter() + .filter(|&&(field, _)| field.attrs.flatten() && !field.attrs.skip_deserializing()) + .map(|(field, name)| { + let field_ty = field.ty; + let func = match field.attrs.deserialize_with() { + None => { + let span = field.original.span(); + quote_spanned!(span=> _serde::de::Deserialize::deserialize) + } + Some(path) => quote!(#path), + }; + quote! { + let #name: #field_ty = #func( + _serde::__private::de::FlatMapDeserializer( + &mut __collect, + _serde::__private::PhantomData))?; + } + }); + + let collected_deny_unknown_fields = if cattrs.has_flatten() && cattrs.deny_unknown_fields() { + Some(quote! { + if let _serde::__private::Some(_serde::__private::Some((__key, _))) = + __collect.into_iter().filter(_serde::__private::Option::is_some).next() + { + if let _serde::__private::Some(__key) = __key.as_str() { + return _serde::__private::Err( + _serde::de::Error::custom(format_args!("unknown field `{}`", &__key))); + } else { + return _serde::__private::Err( + _serde::de::Error::custom(format_args!("unexpected map key"))); + } + } + }) + } else { + None + }; + + let result = fields_names.iter().map(|(field, name)| { + let member = &field.member; + if field.attrs.skip_deserializing() { + let value = Expr(expr_is_missing(field, cattrs)); + quote!(#member: #value) + } else { + quote!(#member: #name) + } + }); + + let let_default = match cattrs.default() { + attr::Default::Default => Some(quote!( + let __default: Self::Value = _serde::__private::Default::default(); + )), + attr::Default::Path(path) => Some(quote!( + let __default: Self::Value = #path(); + )), + attr::Default::None => { + // We don't need the default value, to prevent an unused variable warning + // we'll leave the line empty. + None + } + }; + + let mut result = quote!(#struct_path { #(#result),* }); + if params.has_getter { + let this_type = ¶ms.this_type; + let (_, ty_generics, _) = params.generics.split_for_impl(); + result = quote! { + _serde::__private::Into::<#this_type #ty_generics>::into(#result) + }; + } + + quote_block! { + #(#let_values)* + + #let_collect + + #match_keys + + #let_default + + #(#extract_values)* + + #(#extract_collected)* + + #collected_deny_unknown_fields + + _serde::__private::Ok(#result) + } +} + +#[cfg(feature = "deserialize_in_place")] +fn deserialize_map_in_place( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, +) -> Fragment { + assert!(!cattrs.has_flatten()); + + // Create the field names for the fields. + let fields_names: Vec<_> = fields + .iter() + .enumerate() + .map(|(i, field)| (field, field_i(i))) + .collect(); + + // For deserialize_in_place, declare booleans for each field that will be + // deserialized. + let let_flags = fields_names + .iter() + .filter(|&&(field, _)| !field.attrs.skip_deserializing()) + .map(|(_, name)| { + quote! { + let mut #name: bool = false; + } + }); + + // Match arms to extract a value for a field. + let value_arms_from = fields_names + .iter() + .filter(|&&(field, _)| !field.attrs.skip_deserializing()) + .map(|(field, name)| { + let deser_name = field.attrs.name().deserialize_name(); + let member = &field.member; + + let visit = match field.attrs.deserialize_with() { + None => { + quote! { + _serde::de::MapAccess::next_value_seed(&mut __map, _serde::__private::de::InPlaceSeed(&mut self.place.#member))? + } + } + Some(path) => { + let (wrapper, wrapper_ty) = wrap_deserialize_field_with(params, field.ty, path); + quote!({ + #wrapper + self.place.#member = match _serde::de::MapAccess::next_value::<#wrapper_ty>(&mut __map) { + _serde::__private::Ok(__wrapper) => __wrapper.value, + _serde::__private::Err(__err) => { + return _serde::__private::Err(__err); + } + }; + }) + } + }; + quote! { + __Field::#name => { + if #name { + return _serde::__private::Err(<__A::Error as _serde::de::Error>::duplicate_field(#deser_name)); + } + #visit; + #name = true; + } + } + }); + + // Visit ignored values to consume them + let ignored_arm = if cattrs.deny_unknown_fields() { + None + } else { + Some(quote! { + _ => { let _ = _serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)?; } + }) + }; + + let all_skipped = fields.iter().all(|field| field.attrs.skip_deserializing()); + + let match_keys = if cattrs.deny_unknown_fields() && all_skipped { + quote! { + // FIXME: Once feature(exhaustive_patterns) is stable: + // let _serde::__private::None::<__Field> = _serde::de::MapAccess::next_key(&mut __map)?; + _serde::__private::Option::map( + _serde::de::MapAccess::next_key::<__Field>(&mut __map)?, + |__impossible| match __impossible {}); + } + } else { + quote! { + while let _serde::__private::Some(__key) = _serde::de::MapAccess::next_key::<__Field>(&mut __map)? { + match __key { + #(#value_arms_from)* + #ignored_arm + } + } + } + }; + + let check_flags = fields_names + .iter() + .filter(|&&(field, _)| !field.attrs.skip_deserializing()) + .map(|(field, name)| { + let missing_expr = expr_is_missing(field, cattrs); + // If missing_expr unconditionally returns an error, don't try + // to assign its value to self.place. + if field.attrs.default().is_none() + && cattrs.default().is_none() + && field.attrs.deserialize_with().is_some() + { + let missing_expr = Stmts(missing_expr); + quote! { + if !#name { + #missing_expr; + } + } + } else { + let member = &field.member; + let missing_expr = Expr(missing_expr); + quote! { + if !#name { + self.place.#member = #missing_expr; + }; + } + } + }); + + let this_type = ¶ms.this_type; + let (_, _, ty_generics, _) = split_with_de_lifetime(params); + + let let_default = match cattrs.default() { + attr::Default::Default => Some(quote!( + let __default: #this_type #ty_generics = _serde::__private::Default::default(); + )), + attr::Default::Path(path) => Some(quote!( + let __default: #this_type #ty_generics = #path(); + )), + attr::Default::None => { + // We don't need the default value, to prevent an unused variable warning + // we'll leave the line empty. + None + } + }; + + quote_block! { + #(#let_flags)* + + #match_keys + + #let_default + + #(#check_flags)* + + _serde::__private::Ok(()) + } +} + +fn field_i(i: usize) -> Ident { + Ident::new(&format!("__field{}", i), Span::call_site()) +} + +/// This function wraps the expression in `#[serde(deserialize_with = "...")]` +/// in a trait to prevent it from accessing the internal `Deserialize` state. +fn wrap_deserialize_with( + params: &Parameters, + value_ty: &TokenStream, + deserialize_with: &syn::ExprPath, +) -> (TokenStream, TokenStream) { + let this_type = ¶ms.this_type; + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); + + let wrapper = quote! { + #[doc(hidden)] + struct __DeserializeWith #de_impl_generics #where_clause { + value: #value_ty, + phantom: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } + + impl #de_impl_generics _serde::Deserialize<#delife> for __DeserializeWith #de_ty_generics #where_clause { + fn deserialize<__D>(__deserializer: __D) -> _serde::__private::Result<Self, __D::Error> + where + __D: _serde::Deserializer<#delife>, + { + _serde::__private::Ok(__DeserializeWith { + value: #deserialize_with(__deserializer)?, + phantom: _serde::__private::PhantomData, + lifetime: _serde::__private::PhantomData, + }) + } + } + }; + + let wrapper_ty = quote!(__DeserializeWith #de_ty_generics); + + (wrapper, wrapper_ty) +} + +fn wrap_deserialize_field_with( + params: &Parameters, + field_ty: &syn::Type, + deserialize_with: &syn::ExprPath, +) -> (TokenStream, TokenStream) { + wrap_deserialize_with(params, "e!(#field_ty), deserialize_with) +} + +fn wrap_deserialize_variant_with( + params: &Parameters, + variant: &Variant, + deserialize_with: &syn::ExprPath, +) -> (TokenStream, TokenStream, TokenStream) { + let field_tys = variant.fields.iter().map(|field| field.ty); + let (wrapper, wrapper_ty) = + wrap_deserialize_with(params, "e!((#(#field_tys),*)), deserialize_with); + + let unwrap_fn = unwrap_to_variant_closure(params, variant, true); + + (wrapper, wrapper_ty, unwrap_fn) +} + +// Generates closure that converts single input parameter to the final value. +fn unwrap_to_variant_closure( + params: &Parameters, + variant: &Variant, + with_wrapper: bool, +) -> TokenStream { + let this_value = ¶ms.this_value; + let variant_ident = &variant.ident; + + let (arg, wrapper) = if with_wrapper { + (quote! { __wrap }, quote! { __wrap.value }) + } else { + let field_tys = variant.fields.iter().map(|field| field.ty); + (quote! { __wrap: (#(#field_tys),*) }, quote! { __wrap }) + }; + + let field_access = (0..variant.fields.len()).map(|n| { + Member::Unnamed(Index { + index: n as u32, + span: Span::call_site(), + }) + }); + + match variant.style { + Style::Struct if variant.fields.len() == 1 => { + let member = &variant.fields[0].member; + quote! { + |#arg| #this_value::#variant_ident { #member: #wrapper } + } + } + Style::Struct => { + let members = variant.fields.iter().map(|field| &field.member); + quote! { + |#arg| #this_value::#variant_ident { #(#members: #wrapper.#field_access),* } + } + } + Style::Tuple => quote! { + |#arg| #this_value::#variant_ident(#(#wrapper.#field_access),*) + }, + Style::Newtype => quote! { + |#arg| #this_value::#variant_ident(#wrapper) + }, + Style::Unit => quote! { + |#arg| #this_value::#variant_ident + }, + } +} + +fn expr_is_missing(field: &Field, cattrs: &attr::Container) -> Fragment { + match field.attrs.default() { + attr::Default::Default => { + let span = field.original.span(); + let func = quote_spanned!(span=> _serde::__private::Default::default); + return quote_expr!(#func()); + } + attr::Default::Path(path) => { + return quote_expr!(#path()); + } + attr::Default::None => { /* below */ } + } + + match *cattrs.default() { + attr::Default::Default | attr::Default::Path(_) => { + let member = &field.member; + return quote_expr!(__default.#member); + } + attr::Default::None => { /* below */ } + } + + let name = field.attrs.name().deserialize_name(); + match field.attrs.deserialize_with() { + None => { + let span = field.original.span(); + let func = quote_spanned!(span=> _serde::__private::de::missing_field); + quote_expr! { + #func(#name)? + } + } + Some(_) => { + quote_expr! { + return _serde::__private::Err(<__A::Error as _serde::de::Error>::missing_field(#name)) + } + } + } +} + +fn expr_is_missing_seq( + assign_to: Option<TokenStream>, + index: usize, + field: &Field, + cattrs: &attr::Container, + expecting: &str, +) -> TokenStream { + match field.attrs.default() { + attr::Default::Default => { + let span = field.original.span(); + return quote_spanned!(span=> #assign_to _serde::__private::Default::default()); + } + attr::Default::Path(path) => { + return quote_spanned!(path.span()=> #assign_to #path()); + } + attr::Default::None => { /* below */ } + } + + match *cattrs.default() { + attr::Default::Default | attr::Default::Path(_) => { + let member = &field.member; + quote!(#assign_to __default.#member) + } + attr::Default::None => quote!( + return _serde::__private::Err(_serde::de::Error::invalid_length(#index, &#expecting)) + ), + } +} + +fn effective_style(variant: &Variant) -> Style { + match variant.style { + Style::Newtype if variant.fields[0].attrs.skip_deserializing() => Style::Unit, + other => other, + } +} + +struct DeImplGenerics<'a>(&'a Parameters); +#[cfg(feature = "deserialize_in_place")] +struct InPlaceImplGenerics<'a>(&'a Parameters); + +impl<'a> ToTokens for DeImplGenerics<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + let mut generics = self.0.generics.clone(); + if let Some(de_lifetime) = self.0.borrowed.de_lifetime_param() { + generics.params = Some(syn::GenericParam::Lifetime(de_lifetime)) + .into_iter() + .chain(generics.params) + .collect(); + } + let (impl_generics, _, _) = generics.split_for_impl(); + impl_generics.to_tokens(tokens); + } +} + +#[cfg(feature = "deserialize_in_place")] +impl<'a> ToTokens for InPlaceImplGenerics<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + let place_lifetime = place_lifetime(); + let mut generics = self.0.generics.clone(); + + // Add lifetime for `&'place mut Self, and `'a: 'place` + for param in &mut generics.params { + match param { + syn::GenericParam::Lifetime(param) => { + param.bounds.push(place_lifetime.lifetime.clone()); + } + syn::GenericParam::Type(param) => { + param.bounds.push(syn::TypeParamBound::Lifetime( + place_lifetime.lifetime.clone(), + )); + } + syn::GenericParam::Const(_) => {} + } + } + generics.params = Some(syn::GenericParam::Lifetime(place_lifetime)) + .into_iter() + .chain(generics.params) + .collect(); + if let Some(de_lifetime) = self.0.borrowed.de_lifetime_param() { + generics.params = Some(syn::GenericParam::Lifetime(de_lifetime)) + .into_iter() + .chain(generics.params) + .collect(); + } + let (impl_generics, _, _) = generics.split_for_impl(); + impl_generics.to_tokens(tokens); + } +} + +#[cfg(feature = "deserialize_in_place")] +impl<'a> DeImplGenerics<'a> { + fn in_place(self) -> InPlaceImplGenerics<'a> { + InPlaceImplGenerics(self.0) + } +} + +struct DeTypeGenerics<'a>(&'a Parameters); +#[cfg(feature = "deserialize_in_place")] +struct InPlaceTypeGenerics<'a>(&'a Parameters); + +fn de_type_generics_to_tokens( + mut generics: syn::Generics, + borrowed: &BorrowedLifetimes, + tokens: &mut TokenStream, +) { + if borrowed.de_lifetime_param().is_some() { + let def = syn::LifetimeParam { + attrs: Vec::new(), + lifetime: syn::Lifetime::new("'de", Span::call_site()), + colon_token: None, + bounds: Punctuated::new(), + }; + // Prepend 'de lifetime to list of generics + generics.params = Some(syn::GenericParam::Lifetime(def)) + .into_iter() + .chain(generics.params) + .collect(); + } + let (_, ty_generics, _) = generics.split_for_impl(); + ty_generics.to_tokens(tokens); +} + +impl<'a> ToTokens for DeTypeGenerics<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + de_type_generics_to_tokens(self.0.generics.clone(), &self.0.borrowed, tokens); + } +} + +#[cfg(feature = "deserialize_in_place")] +impl<'a> ToTokens for InPlaceTypeGenerics<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + let mut generics = self.0.generics.clone(); + generics.params = Some(syn::GenericParam::Lifetime(place_lifetime())) + .into_iter() + .chain(generics.params) + .collect(); + + de_type_generics_to_tokens(generics, &self.0.borrowed, tokens); + } +} + +#[cfg(feature = "deserialize_in_place")] +impl<'a> DeTypeGenerics<'a> { + fn in_place(self) -> InPlaceTypeGenerics<'a> { + InPlaceTypeGenerics(self.0) + } +} + +#[cfg(feature = "deserialize_in_place")] +fn place_lifetime() -> syn::LifetimeParam { + syn::LifetimeParam { + attrs: Vec::new(), + lifetime: syn::Lifetime::new("'place", Span::call_site()), + colon_token: None, + bounds: Punctuated::new(), + } +} + +fn split_with_de_lifetime( + params: &Parameters, +) -> ( + DeImplGenerics, + DeTypeGenerics, + syn::TypeGenerics, + Option<&syn::WhereClause>, +) { + let de_impl_generics = DeImplGenerics(params); + let de_ty_generics = DeTypeGenerics(params); + let (_, ty_generics, where_clause) = params.generics.split_for_impl(); + (de_impl_generics, de_ty_generics, ty_generics, where_clause) +} diff --git a/vendor/serde_derive/src/dummy.rs b/vendor/serde_derive/src/dummy.rs new file mode 100644 index 0000000..095f950 --- /dev/null +++ b/vendor/serde_derive/src/dummy.rs @@ -0,0 +1,23 @@ +use proc_macro2::TokenStream; +use quote::quote; + +pub fn wrap_in_const(serde_path: Option<&syn::Path>, code: TokenStream) -> TokenStream { + let use_serde = match serde_path { + Some(path) => quote! { + use #path as _serde; + }, + None => quote! { + #[allow(unused_extern_crates, clippy::useless_attribute)] + extern crate serde as _serde; + }, + }; + + quote! { + #[doc(hidden)] + #[allow(non_upper_case_globals, unused_attributes, unused_qualifications)] + const _: () = { + #use_serde + #code + }; + } +} diff --git a/vendor/serde_derive/src/fragment.rs b/vendor/serde_derive/src/fragment.rs new file mode 100644 index 0000000..6627c26 --- /dev/null +++ b/vendor/serde_derive/src/fragment.rs @@ -0,0 +1,74 @@ +use proc_macro2::TokenStream; +use quote::ToTokens; +use syn::{token, Token}; + +pub enum Fragment { + /// Tokens that can be used as an expression. + Expr(TokenStream), + /// Tokens that can be used inside a block. The surrounding curly braces are + /// not part of these tokens. + Block(TokenStream), +} + +macro_rules! quote_expr { + ($($tt:tt)*) => { + $crate::fragment::Fragment::Expr(quote!($($tt)*)) + } +} + +macro_rules! quote_block { + ($($tt:tt)*) => { + $crate::fragment::Fragment::Block(quote!($($tt)*)) + } +} + +/// Interpolate a fragment in place of an expression. This involves surrounding +/// Block fragments in curly braces. +pub struct Expr(pub Fragment); +impl ToTokens for Expr { + fn to_tokens(&self, out: &mut TokenStream) { + match &self.0 { + Fragment::Expr(expr) => expr.to_tokens(out), + Fragment::Block(block) => { + token::Brace::default().surround(out, |out| block.to_tokens(out)); + } + } + } +} + +/// Interpolate a fragment as the statements of a block. +pub struct Stmts(pub Fragment); +impl ToTokens for Stmts { + fn to_tokens(&self, out: &mut TokenStream) { + match &self.0 { + Fragment::Expr(expr) => expr.to_tokens(out), + Fragment::Block(block) => block.to_tokens(out), + } + } +} + +/// Interpolate a fragment as the value part of a `match` expression. This +/// involves putting a comma after expressions and curly braces around blocks. +pub struct Match(pub Fragment); +impl ToTokens for Match { + fn to_tokens(&self, out: &mut TokenStream) { + match &self.0 { + Fragment::Expr(expr) => { + expr.to_tokens(out); + <Token![,]>::default().to_tokens(out); + } + Fragment::Block(block) => { + token::Brace::default().surround(out, |out| block.to_tokens(out)); + } + } + } +} + +impl AsRef<TokenStream> for Fragment { + fn as_ref(&self) -> &TokenStream { + match self { + Fragment::Expr(expr) => expr, + Fragment::Block(block) => block, + } + } +} diff --git a/vendor/serde_derive/src/internals/ast.rs b/vendor/serde_derive/src/internals/ast.rs new file mode 100644 index 0000000..a28d3ae --- /dev/null +++ b/vendor/serde_derive/src/internals/ast.rs @@ -0,0 +1,216 @@ +//! A Serde ast, parsed from the Syn ast and ready to generate Rust code. + +use crate::internals::{attr, check, Ctxt, Derive}; +use syn::punctuated::Punctuated; +use syn::Token; + +/// A source data structure annotated with `#[derive(Serialize)]` and/or `#[derive(Deserialize)]`, +/// parsed into an internal representation. +pub struct Container<'a> { + /// The struct or enum name (without generics). + pub ident: syn::Ident, + /// Attributes on the structure, parsed for Serde. + pub attrs: attr::Container, + /// The contents of the struct or enum. + pub data: Data<'a>, + /// Any generics on the struct or enum. + pub generics: &'a syn::Generics, + /// Original input. + pub original: &'a syn::DeriveInput, +} + +/// The fields of a struct or enum. +/// +/// Analogous to `syn::Data`. +pub enum Data<'a> { + Enum(Vec<Variant<'a>>), + Struct(Style, Vec<Field<'a>>), +} + +/// A variant of an enum. +pub struct Variant<'a> { + pub ident: syn::Ident, + pub attrs: attr::Variant, + pub style: Style, + pub fields: Vec<Field<'a>>, + pub original: &'a syn::Variant, +} + +/// A field of a struct. +pub struct Field<'a> { + pub member: syn::Member, + pub attrs: attr::Field, + pub ty: &'a syn::Type, + pub original: &'a syn::Field, +} + +#[derive(Copy, Clone)] +pub enum Style { + /// Named fields. + Struct, + /// Many unnamed fields. + Tuple, + /// One unnamed field. + Newtype, + /// No fields. + Unit, +} + +impl<'a> Container<'a> { + /// Convert the raw Syn ast into a parsed container object, collecting errors in `cx`. + pub fn from_ast( + cx: &Ctxt, + item: &'a syn::DeriveInput, + derive: Derive, + ) -> Option<Container<'a>> { + let mut attrs = attr::Container::from_ast(cx, item); + + let mut data = match &item.data { + syn::Data::Enum(data) => Data::Enum(enum_from_ast(cx, &data.variants, attrs.default())), + syn::Data::Struct(data) => { + let (style, fields) = struct_from_ast(cx, &data.fields, None, attrs.default()); + Data::Struct(style, fields) + } + syn::Data::Union(_) => { + cx.error_spanned_by(item, "Serde does not support derive for unions"); + return None; + } + }; + + let mut has_flatten = false; + match &mut data { + Data::Enum(variants) => { + for variant in variants { + variant.attrs.rename_by_rules(attrs.rename_all_rules()); + for field in &mut variant.fields { + if field.attrs.flatten() { + has_flatten = true; + } + field.attrs.rename_by_rules( + variant + .attrs + .rename_all_rules() + .or(attrs.rename_all_fields_rules()), + ); + } + } + } + Data::Struct(_, fields) => { + for field in fields { + if field.attrs.flatten() { + has_flatten = true; + } + field.attrs.rename_by_rules(attrs.rename_all_rules()); + } + } + } + + if has_flatten { + attrs.mark_has_flatten(); + } + + let mut item = Container { + ident: item.ident.clone(), + attrs, + data, + generics: &item.generics, + original: item, + }; + check::check(cx, &mut item, derive); + Some(item) + } +} + +impl<'a> Data<'a> { + pub fn all_fields(&'a self) -> Box<dyn Iterator<Item = &'a Field<'a>> + 'a> { + match self { + Data::Enum(variants) => { + Box::new(variants.iter().flat_map(|variant| variant.fields.iter())) + } + Data::Struct(_, fields) => Box::new(fields.iter()), + } + } + + pub fn has_getter(&self) -> bool { + self.all_fields().any(|f| f.attrs.getter().is_some()) + } +} + +fn enum_from_ast<'a>( + cx: &Ctxt, + variants: &'a Punctuated<syn::Variant, Token![,]>, + container_default: &attr::Default, +) -> Vec<Variant<'a>> { + let variants: Vec<Variant> = variants + .iter() + .map(|variant| { + let attrs = attr::Variant::from_ast(cx, variant); + let (style, fields) = + struct_from_ast(cx, &variant.fields, Some(&attrs), container_default); + Variant { + ident: variant.ident.clone(), + attrs, + style, + fields, + original: variant, + } + }) + .collect(); + + let index_of_last_tagged_variant = variants + .iter() + .rposition(|variant| !variant.attrs.untagged()); + if let Some(index_of_last_tagged_variant) = index_of_last_tagged_variant { + for variant in &variants[..index_of_last_tagged_variant] { + if variant.attrs.untagged() { + cx.error_spanned_by(&variant.ident, "all variants with the #[serde(untagged)] attribute must be placed at the end of the enum"); + } + } + } + + variants +} + +fn struct_from_ast<'a>( + cx: &Ctxt, + fields: &'a syn::Fields, + attrs: Option<&attr::Variant>, + container_default: &attr::Default, +) -> (Style, Vec<Field<'a>>) { + match fields { + syn::Fields::Named(fields) => ( + Style::Struct, + fields_from_ast(cx, &fields.named, attrs, container_default), + ), + syn::Fields::Unnamed(fields) if fields.unnamed.len() == 1 => ( + Style::Newtype, + fields_from_ast(cx, &fields.unnamed, attrs, container_default), + ), + syn::Fields::Unnamed(fields) => ( + Style::Tuple, + fields_from_ast(cx, &fields.unnamed, attrs, container_default), + ), + syn::Fields::Unit => (Style::Unit, Vec::new()), + } +} + +fn fields_from_ast<'a>( + cx: &Ctxt, + fields: &'a Punctuated<syn::Field, Token![,]>, + attrs: Option<&attr::Variant>, + container_default: &attr::Default, +) -> Vec<Field<'a>> { + fields + .iter() + .enumerate() + .map(|(i, field)| Field { + member: match &field.ident { + Some(ident) => syn::Member::Named(ident.clone()), + None => syn::Member::Unnamed(i.into()), + }, + attrs: attr::Field::from_ast(cx, i, field, attrs, container_default), + ty: &field.ty, + original: field, + }) + .collect() +} diff --git a/vendor/serde_derive/src/internals/attr.rs b/vendor/serde_derive/src/internals/attr.rs new file mode 100644 index 0000000..bb9de32 --- /dev/null +++ b/vendor/serde_derive/src/internals/attr.rs @@ -0,0 +1,1881 @@ +use crate::internals::symbol::*; +use crate::internals::{ungroup, Ctxt}; +use proc_macro2::{Spacing, Span, TokenStream, TokenTree}; +use quote::ToTokens; +use std::borrow::Cow; +use std::collections::BTreeSet; +use std::iter::FromIterator; +use syn::meta::ParseNestedMeta; +use syn::parse::ParseStream; +use syn::punctuated::Punctuated; +use syn::{parse_quote, token, Ident, Lifetime, Token}; + +// This module handles parsing of `#[serde(...)]` attributes. The entrypoints +// are `attr::Container::from_ast`, `attr::Variant::from_ast`, and +// `attr::Field::from_ast`. Each returns an instance of the corresponding +// struct. Note that none of them return a Result. Unrecognized, malformed, or +// duplicated attributes result in a span_err but otherwise are ignored. The +// user will see errors simultaneously for all bad attributes in the crate +// rather than just the first. + +pub use crate::internals::case::RenameRule; + +struct Attr<'c, T> { + cx: &'c Ctxt, + name: Symbol, + tokens: TokenStream, + value: Option<T>, +} + +impl<'c, T> Attr<'c, T> { + fn none(cx: &'c Ctxt, name: Symbol) -> Self { + Attr { + cx, + name, + tokens: TokenStream::new(), + value: None, + } + } + + fn set<A: ToTokens>(&mut self, obj: A, value: T) { + let tokens = obj.into_token_stream(); + + if self.value.is_some() { + let msg = format!("duplicate serde attribute `{}`", self.name); + self.cx.error_spanned_by(tokens, msg); + } else { + self.tokens = tokens; + self.value = Some(value); + } + } + + fn set_opt<A: ToTokens>(&mut self, obj: A, value: Option<T>) { + if let Some(value) = value { + self.set(obj, value); + } + } + + fn set_if_none(&mut self, value: T) { + if self.value.is_none() { + self.value = Some(value); + } + } + + fn get(self) -> Option<T> { + self.value + } + + fn get_with_tokens(self) -> Option<(TokenStream, T)> { + match self.value { + Some(v) => Some((self.tokens, v)), + None => None, + } + } +} + +struct BoolAttr<'c>(Attr<'c, ()>); + +impl<'c> BoolAttr<'c> { + fn none(cx: &'c Ctxt, name: Symbol) -> Self { + BoolAttr(Attr::none(cx, name)) + } + + fn set_true<A: ToTokens>(&mut self, obj: A) { + self.0.set(obj, ()); + } + + fn get(&self) -> bool { + self.0.value.is_some() + } +} + +struct VecAttr<'c, T> { + cx: &'c Ctxt, + name: Symbol, + first_dup_tokens: TokenStream, + values: Vec<T>, +} + +impl<'c, T> VecAttr<'c, T> { + fn none(cx: &'c Ctxt, name: Symbol) -> Self { + VecAttr { + cx, + name, + first_dup_tokens: TokenStream::new(), + values: Vec::new(), + } + } + + fn insert<A: ToTokens>(&mut self, obj: A, value: T) { + if self.values.len() == 1 { + self.first_dup_tokens = obj.into_token_stream(); + } + self.values.push(value); + } + + fn at_most_one(mut self) -> Option<T> { + if self.values.len() > 1 { + let dup_token = self.first_dup_tokens; + let msg = format!("duplicate serde attribute `{}`", self.name); + self.cx.error_spanned_by(dup_token, msg); + None + } else { + self.values.pop() + } + } + + fn get(self) -> Vec<T> { + self.values + } +} + +pub struct Name { + serialize: String, + serialize_renamed: bool, + deserialize: String, + deserialize_renamed: bool, + deserialize_aliases: BTreeSet<String>, +} + +fn unraw(ident: &Ident) -> String { + ident.to_string().trim_start_matches("r#").to_owned() +} + +impl Name { + fn from_attrs( + source_name: String, + ser_name: Attr<String>, + de_name: Attr<String>, + de_aliases: Option<VecAttr<String>>, + ) -> Name { + let mut alias_set = BTreeSet::new(); + if let Some(de_aliases) = de_aliases { + for alias_name in de_aliases.get() { + alias_set.insert(alias_name); + } + } + + let ser_name = ser_name.get(); + let ser_renamed = ser_name.is_some(); + let de_name = de_name.get(); + let de_renamed = de_name.is_some(); + Name { + serialize: ser_name.unwrap_or_else(|| source_name.clone()), + serialize_renamed: ser_renamed, + deserialize: de_name.unwrap_or(source_name), + deserialize_renamed: de_renamed, + deserialize_aliases: alias_set, + } + } + + /// Return the container name for the container when serializing. + pub fn serialize_name(&self) -> &str { + &self.serialize + } + + /// Return the container name for the container when deserializing. + pub fn deserialize_name(&self) -> &str { + &self.deserialize + } + + fn deserialize_aliases(&self) -> &BTreeSet<String> { + &self.deserialize_aliases + } +} + +#[derive(Copy, Clone)] +pub struct RenameAllRules { + serialize: RenameRule, + deserialize: RenameRule, +} + +impl RenameAllRules { + /// Returns a new `RenameAllRules` with the individual rules of `self` and + /// `other_rules` joined by `RenameRules::or`. + pub fn or(self, other_rules: Self) -> Self { + Self { + serialize: self.serialize.or(other_rules.serialize), + deserialize: self.deserialize.or(other_rules.deserialize), + } + } +} + +/// Represents struct or enum attribute information. +pub struct Container { + name: Name, + transparent: bool, + deny_unknown_fields: bool, + default: Default, + rename_all_rules: RenameAllRules, + rename_all_fields_rules: RenameAllRules, + ser_bound: Option<Vec<syn::WherePredicate>>, + de_bound: Option<Vec<syn::WherePredicate>>, + tag: TagType, + type_from: Option<syn::Type>, + type_try_from: Option<syn::Type>, + type_into: Option<syn::Type>, + remote: Option<syn::Path>, + identifier: Identifier, + has_flatten: bool, + serde_path: Option<syn::Path>, + is_packed: bool, + /// Error message generated when type can't be deserialized + expecting: Option<String>, + non_exhaustive: bool, +} + +/// Styles of representing an enum. +pub enum TagType { + /// The default. + /// + /// ```json + /// {"variant1": {"key1": "value1", "key2": "value2"}} + /// ``` + External, + + /// `#[serde(tag = "type")]` + /// + /// ```json + /// {"type": "variant1", "key1": "value1", "key2": "value2"} + /// ``` + Internal { tag: String }, + + /// `#[serde(tag = "t", content = "c")]` + /// + /// ```json + /// {"t": "variant1", "c": {"key1": "value1", "key2": "value2"}} + /// ``` + Adjacent { tag: String, content: String }, + + /// `#[serde(untagged)]` + /// + /// ```json + /// {"key1": "value1", "key2": "value2"} + /// ``` + None, +} + +/// Whether this enum represents the fields of a struct or the variants of an +/// enum. +#[derive(Copy, Clone)] +pub enum Identifier { + /// It does not. + No, + + /// This enum represents the fields of a struct. All of the variants must be + /// unit variants, except possibly one which is annotated with + /// `#[serde(other)]` and is a newtype variant. + Field, + + /// This enum represents the variants of an enum. All of the variants must + /// be unit variants. + Variant, +} + +impl Identifier { + #[cfg(feature = "deserialize_in_place")] + pub fn is_some(self) -> bool { + match self { + Identifier::No => false, + Identifier::Field | Identifier::Variant => true, + } + } +} + +impl Container { + /// Extract out the `#[serde(...)]` attributes from an item. + pub fn from_ast(cx: &Ctxt, item: &syn::DeriveInput) -> Self { + let mut ser_name = Attr::none(cx, RENAME); + let mut de_name = Attr::none(cx, RENAME); + let mut transparent = BoolAttr::none(cx, TRANSPARENT); + let mut deny_unknown_fields = BoolAttr::none(cx, DENY_UNKNOWN_FIELDS); + let mut default = Attr::none(cx, DEFAULT); + let mut rename_all_ser_rule = Attr::none(cx, RENAME_ALL); + let mut rename_all_de_rule = Attr::none(cx, RENAME_ALL); + let mut rename_all_fields_ser_rule = Attr::none(cx, RENAME_ALL_FIELDS); + let mut rename_all_fields_de_rule = Attr::none(cx, RENAME_ALL_FIELDS); + let mut ser_bound = Attr::none(cx, BOUND); + let mut de_bound = Attr::none(cx, BOUND); + let mut untagged = BoolAttr::none(cx, UNTAGGED); + let mut internal_tag = Attr::none(cx, TAG); + let mut content = Attr::none(cx, CONTENT); + let mut type_from = Attr::none(cx, FROM); + let mut type_try_from = Attr::none(cx, TRY_FROM); + let mut type_into = Attr::none(cx, INTO); + let mut remote = Attr::none(cx, REMOTE); + let mut field_identifier = BoolAttr::none(cx, FIELD_IDENTIFIER); + let mut variant_identifier = BoolAttr::none(cx, VARIANT_IDENTIFIER); + let mut serde_path = Attr::none(cx, CRATE); + let mut expecting = Attr::none(cx, EXPECTING); + let mut non_exhaustive = false; + + for attr in &item.attrs { + if attr.path() != SERDE { + non_exhaustive |= + matches!(&attr.meta, syn::Meta::Path(path) if path == NON_EXHAUSTIVE); + continue; + } + + if let syn::Meta::List(meta) = &attr.meta { + if meta.tokens.is_empty() { + continue; + } + } + + if let Err(err) = attr.parse_nested_meta(|meta| { + if meta.path == RENAME { + // #[serde(rename = "foo")] + // #[serde(rename(serialize = "foo", deserialize = "bar"))] + let (ser, de) = get_renames(cx, RENAME, &meta)?; + ser_name.set_opt(&meta.path, ser.as_ref().map(syn::LitStr::value)); + de_name.set_opt(&meta.path, de.as_ref().map(syn::LitStr::value)); + } else if meta.path == RENAME_ALL { + // #[serde(rename_all = "foo")] + // #[serde(rename_all(serialize = "foo", deserialize = "bar"))] + let one_name = meta.input.peek(Token![=]); + let (ser, de) = get_renames(cx, RENAME_ALL, &meta)?; + if let Some(ser) = ser { + match RenameRule::from_str(&ser.value()) { + Ok(rename_rule) => rename_all_ser_rule.set(&meta.path, rename_rule), + Err(err) => cx.error_spanned_by(ser, err), + } + } + if let Some(de) = de { + match RenameRule::from_str(&de.value()) { + Ok(rename_rule) => rename_all_de_rule.set(&meta.path, rename_rule), + Err(err) => { + if !one_name { + cx.error_spanned_by(de, err); + } + } + } + } + } else if meta.path == RENAME_ALL_FIELDS { + // #[serde(rename_all_fields = "foo")] + // #[serde(rename_all_fields(serialize = "foo", deserialize = "bar"))] + let one_name = meta.input.peek(Token![=]); + let (ser, de) = get_renames(cx, RENAME_ALL_FIELDS, &meta)?; + + match item.data { + syn::Data::Enum(_) => { + if let Some(ser) = ser { + match RenameRule::from_str(&ser.value()) { + Ok(rename_rule) => { + rename_all_fields_ser_rule.set(&meta.path, rename_rule); + } + Err(err) => cx.error_spanned_by(ser, err), + } + } + if let Some(de) = de { + match RenameRule::from_str(&de.value()) { + Ok(rename_rule) => { + rename_all_fields_de_rule.set(&meta.path, rename_rule); + } + Err(err) => { + if !one_name { + cx.error_spanned_by(de, err); + } + } + } + } + } + syn::Data::Struct(_) => { + let msg = "#[serde(rename_all_fields)] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + syn::Data::Union(_) => { + let msg = "#[serde(rename_all_fields)] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + } + } else if meta.path == TRANSPARENT { + // #[serde(transparent)] + transparent.set_true(meta.path); + } else if meta.path == DENY_UNKNOWN_FIELDS { + // #[serde(deny_unknown_fields)] + deny_unknown_fields.set_true(meta.path); + } else if meta.path == DEFAULT { + if meta.input.peek(Token![=]) { + // #[serde(default = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, DEFAULT, &meta)? { + match &item.data { + syn::Data::Struct(syn::DataStruct { fields, .. }) => match fields { + syn::Fields::Named(_) | syn::Fields::Unnamed(_) => { + default.set(&meta.path, Default::Path(path)); + } + syn::Fields::Unit => { + let msg = "#[serde(default = \"...\")] can only be used on structs that have fields"; + cx.syn_error(meta.error(msg)); + } + }, + syn::Data::Enum(_) => { + let msg = "#[serde(default = \"...\")] can only be used on structs"; + cx.syn_error(meta.error(msg)); + } + syn::Data::Union(_) => { + let msg = "#[serde(default = \"...\")] can only be used on structs"; + cx.syn_error(meta.error(msg)); + } + } + } + } else { + // #[serde(default)] + match &item.data { + syn::Data::Struct(syn::DataStruct { fields, .. }) => match fields { + syn::Fields::Named(_) | syn::Fields::Unnamed(_) => { + default.set(meta.path, Default::Default); + } + syn::Fields::Unit => { + let msg = "#[serde(default)] can only be used on structs that have fields"; + cx.error_spanned_by(fields, msg); + } + }, + syn::Data::Enum(_) => { + let msg = "#[serde(default)] can only be used on structs"; + cx.syn_error(meta.error(msg)); + } + syn::Data::Union(_) => { + let msg = "#[serde(default)] can only be used on structs"; + cx.syn_error(meta.error(msg)); + } + } + } + } else if meta.path == BOUND { + // #[serde(bound = "T: SomeBound")] + // #[serde(bound(serialize = "...", deserialize = "..."))] + let (ser, de) = get_where_predicates(cx, &meta)?; + ser_bound.set_opt(&meta.path, ser); + de_bound.set_opt(&meta.path, de); + } else if meta.path == UNTAGGED { + // #[serde(untagged)] + match item.data { + syn::Data::Enum(_) => { + untagged.set_true(&meta.path); + } + syn::Data::Struct(_) => { + let msg = "#[serde(untagged)] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + syn::Data::Union(_) => { + let msg = "#[serde(untagged)] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + } + } else if meta.path == TAG { + // #[serde(tag = "type")] + if let Some(s) = get_lit_str(cx, TAG, &meta)? { + match &item.data { + syn::Data::Enum(_) => { + internal_tag.set(&meta.path, s.value()); + } + syn::Data::Struct(syn::DataStruct { fields, .. }) => match fields { + syn::Fields::Named(_) => { + internal_tag.set(&meta.path, s.value()); + } + syn::Fields::Unnamed(_) | syn::Fields::Unit => { + let msg = "#[serde(tag = \"...\")] can only be used on enums and structs with named fields"; + cx.syn_error(meta.error(msg)); + } + }, + syn::Data::Union(_) => { + let msg = "#[serde(tag = \"...\")] can only be used on enums and structs with named fields"; + cx.syn_error(meta.error(msg)); + } + } + } + } else if meta.path == CONTENT { + // #[serde(content = "c")] + if let Some(s) = get_lit_str(cx, CONTENT, &meta)? { + match &item.data { + syn::Data::Enum(_) => { + content.set(&meta.path, s.value()); + } + syn::Data::Struct(_) => { + let msg = "#[serde(content = \"...\")] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + syn::Data::Union(_) => { + let msg = "#[serde(content = \"...\")] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + } + } + } else if meta.path == FROM { + // #[serde(from = "Type")] + if let Some(from_ty) = parse_lit_into_ty(cx, FROM, &meta)? { + type_from.set_opt(&meta.path, Some(from_ty)); + } + } else if meta.path == TRY_FROM { + // #[serde(try_from = "Type")] + if let Some(try_from_ty) = parse_lit_into_ty(cx, TRY_FROM, &meta)? { + type_try_from.set_opt(&meta.path, Some(try_from_ty)); + } + } else if meta.path == INTO { + // #[serde(into = "Type")] + if let Some(into_ty) = parse_lit_into_ty(cx, INTO, &meta)? { + type_into.set_opt(&meta.path, Some(into_ty)); + } + } else if meta.path == REMOTE { + // #[serde(remote = "...")] + if let Some(path) = parse_lit_into_path(cx, REMOTE, &meta)? { + if is_primitive_path(&path, "Self") { + remote.set(&meta.path, item.ident.clone().into()); + } else { + remote.set(&meta.path, path); + } + } + } else if meta.path == FIELD_IDENTIFIER { + // #[serde(field_identifier)] + field_identifier.set_true(&meta.path); + } else if meta.path == VARIANT_IDENTIFIER { + // #[serde(variant_identifier)] + variant_identifier.set_true(&meta.path); + } else if meta.path == CRATE { + // #[serde(crate = "foo")] + if let Some(path) = parse_lit_into_path(cx, CRATE, &meta)? { + serde_path.set(&meta.path, path); + } + } else if meta.path == EXPECTING { + // #[serde(expecting = "a message")] + if let Some(s) = get_lit_str(cx, EXPECTING, &meta)? { + expecting.set(&meta.path, s.value()); + } + } else { + let path = meta.path.to_token_stream().to_string().replace(' ', ""); + return Err( + meta.error(format_args!("unknown serde container attribute `{}`", path)) + ); + } + Ok(()) + }) { + cx.syn_error(err); + } + } + + let mut is_packed = false; + for attr in &item.attrs { + if attr.path() == REPR { + let _ = attr.parse_args_with(|input: ParseStream| { + while let Some(token) = input.parse()? { + if let TokenTree::Ident(ident) = token { + is_packed |= ident == "packed"; + } + } + Ok(()) + }); + } + } + + Container { + name: Name::from_attrs(unraw(&item.ident), ser_name, de_name, None), + transparent: transparent.get(), + deny_unknown_fields: deny_unknown_fields.get(), + default: default.get().unwrap_or(Default::None), + rename_all_rules: RenameAllRules { + serialize: rename_all_ser_rule.get().unwrap_or(RenameRule::None), + deserialize: rename_all_de_rule.get().unwrap_or(RenameRule::None), + }, + rename_all_fields_rules: RenameAllRules { + serialize: rename_all_fields_ser_rule.get().unwrap_or(RenameRule::None), + deserialize: rename_all_fields_de_rule.get().unwrap_or(RenameRule::None), + }, + ser_bound: ser_bound.get(), + de_bound: de_bound.get(), + tag: decide_tag(cx, item, untagged, internal_tag, content), + type_from: type_from.get(), + type_try_from: type_try_from.get(), + type_into: type_into.get(), + remote: remote.get(), + identifier: decide_identifier(cx, item, field_identifier, variant_identifier), + has_flatten: false, + serde_path: serde_path.get(), + is_packed, + expecting: expecting.get(), + non_exhaustive, + } + } + + pub fn name(&self) -> &Name { + &self.name + } + + pub fn rename_all_rules(&self) -> RenameAllRules { + self.rename_all_rules + } + + pub fn rename_all_fields_rules(&self) -> RenameAllRules { + self.rename_all_fields_rules + } + + pub fn transparent(&self) -> bool { + self.transparent + } + + pub fn deny_unknown_fields(&self) -> bool { + self.deny_unknown_fields + } + + pub fn default(&self) -> &Default { + &self.default + } + + pub fn ser_bound(&self) -> Option<&[syn::WherePredicate]> { + self.ser_bound.as_ref().map(|vec| &vec[..]) + } + + pub fn de_bound(&self) -> Option<&[syn::WherePredicate]> { + self.de_bound.as_ref().map(|vec| &vec[..]) + } + + pub fn tag(&self) -> &TagType { + &self.tag + } + + pub fn type_from(&self) -> Option<&syn::Type> { + self.type_from.as_ref() + } + + pub fn type_try_from(&self) -> Option<&syn::Type> { + self.type_try_from.as_ref() + } + + pub fn type_into(&self) -> Option<&syn::Type> { + self.type_into.as_ref() + } + + pub fn remote(&self) -> Option<&syn::Path> { + self.remote.as_ref() + } + + pub fn is_packed(&self) -> bool { + self.is_packed + } + + pub fn identifier(&self) -> Identifier { + self.identifier + } + + pub fn has_flatten(&self) -> bool { + self.has_flatten + } + + pub fn mark_has_flatten(&mut self) { + self.has_flatten = true; + } + + pub fn custom_serde_path(&self) -> Option<&syn::Path> { + self.serde_path.as_ref() + } + + pub fn serde_path(&self) -> Cow<syn::Path> { + self.custom_serde_path() + .map_or_else(|| Cow::Owned(parse_quote!(_serde)), Cow::Borrowed) + } + + /// Error message generated when type can't be deserialized. + /// If `None`, default message will be used + pub fn expecting(&self) -> Option<&str> { + self.expecting.as_ref().map(String::as_ref) + } + + pub fn non_exhaustive(&self) -> bool { + self.non_exhaustive + } +} + +fn decide_tag( + cx: &Ctxt, + item: &syn::DeriveInput, + untagged: BoolAttr, + internal_tag: Attr<String>, + content: Attr<String>, +) -> TagType { + match ( + untagged.0.get_with_tokens(), + internal_tag.get_with_tokens(), + content.get_with_tokens(), + ) { + (None, None, None) => TagType::External, + (Some(_), None, None) => TagType::None, + (None, Some((_, tag)), None) => { + // Check that there are no tuple variants. + if let syn::Data::Enum(data) = &item.data { + for variant in &data.variants { + match &variant.fields { + syn::Fields::Named(_) | syn::Fields::Unit => {} + syn::Fields::Unnamed(fields) => { + if fields.unnamed.len() != 1 { + let msg = + "#[serde(tag = \"...\")] cannot be used with tuple variants"; + cx.error_spanned_by(variant, msg); + break; + } + } + } + } + } + TagType::Internal { tag } + } + (Some((untagged_tokens, ())), Some((tag_tokens, _)), None) => { + let msg = "enum cannot be both untagged and internally tagged"; + cx.error_spanned_by(untagged_tokens, msg); + cx.error_spanned_by(tag_tokens, msg); + TagType::External // doesn't matter, will error + } + (None, None, Some((content_tokens, _))) => { + let msg = "#[serde(tag = \"...\", content = \"...\")] must be used together"; + cx.error_spanned_by(content_tokens, msg); + TagType::External + } + (Some((untagged_tokens, ())), None, Some((content_tokens, _))) => { + let msg = "untagged enum cannot have #[serde(content = \"...\")]"; + cx.error_spanned_by(untagged_tokens, msg); + cx.error_spanned_by(content_tokens, msg); + TagType::External + } + (None, Some((_, tag)), Some((_, content))) => TagType::Adjacent { tag, content }, + (Some((untagged_tokens, ())), Some((tag_tokens, _)), Some((content_tokens, _))) => { + let msg = "untagged enum cannot have #[serde(tag = \"...\", content = \"...\")]"; + cx.error_spanned_by(untagged_tokens, msg); + cx.error_spanned_by(tag_tokens, msg); + cx.error_spanned_by(content_tokens, msg); + TagType::External + } + } +} + +fn decide_identifier( + cx: &Ctxt, + item: &syn::DeriveInput, + field_identifier: BoolAttr, + variant_identifier: BoolAttr, +) -> Identifier { + match ( + &item.data, + field_identifier.0.get_with_tokens(), + variant_identifier.0.get_with_tokens(), + ) { + (_, None, None) => Identifier::No, + (_, Some((field_identifier_tokens, ())), Some((variant_identifier_tokens, ()))) => { + let msg = + "#[serde(field_identifier)] and #[serde(variant_identifier)] cannot both be set"; + cx.error_spanned_by(field_identifier_tokens, msg); + cx.error_spanned_by(variant_identifier_tokens, msg); + Identifier::No + } + (syn::Data::Enum(_), Some(_), None) => Identifier::Field, + (syn::Data::Enum(_), None, Some(_)) => Identifier::Variant, + (syn::Data::Struct(syn::DataStruct { struct_token, .. }), Some(_), None) => { + let msg = "#[serde(field_identifier)] can only be used on an enum"; + cx.error_spanned_by(struct_token, msg); + Identifier::No + } + (syn::Data::Union(syn::DataUnion { union_token, .. }), Some(_), None) => { + let msg = "#[serde(field_identifier)] can only be used on an enum"; + cx.error_spanned_by(union_token, msg); + Identifier::No + } + (syn::Data::Struct(syn::DataStruct { struct_token, .. }), None, Some(_)) => { + let msg = "#[serde(variant_identifier)] can only be used on an enum"; + cx.error_spanned_by(struct_token, msg); + Identifier::No + } + (syn::Data::Union(syn::DataUnion { union_token, .. }), None, Some(_)) => { + let msg = "#[serde(variant_identifier)] can only be used on an enum"; + cx.error_spanned_by(union_token, msg); + Identifier::No + } + } +} + +/// Represents variant attribute information +pub struct Variant { + name: Name, + rename_all_rules: RenameAllRules, + ser_bound: Option<Vec<syn::WherePredicate>>, + de_bound: Option<Vec<syn::WherePredicate>>, + skip_deserializing: bool, + skip_serializing: bool, + other: bool, + serialize_with: Option<syn::ExprPath>, + deserialize_with: Option<syn::ExprPath>, + borrow: Option<BorrowAttribute>, + untagged: bool, +} + +struct BorrowAttribute { + path: syn::Path, + lifetimes: Option<BTreeSet<syn::Lifetime>>, +} + +impl Variant { + pub fn from_ast(cx: &Ctxt, variant: &syn::Variant) -> Self { + let mut ser_name = Attr::none(cx, RENAME); + let mut de_name = Attr::none(cx, RENAME); + let mut de_aliases = VecAttr::none(cx, RENAME); + let mut skip_deserializing = BoolAttr::none(cx, SKIP_DESERIALIZING); + let mut skip_serializing = BoolAttr::none(cx, SKIP_SERIALIZING); + let mut rename_all_ser_rule = Attr::none(cx, RENAME_ALL); + let mut rename_all_de_rule = Attr::none(cx, RENAME_ALL); + let mut ser_bound = Attr::none(cx, BOUND); + let mut de_bound = Attr::none(cx, BOUND); + let mut other = BoolAttr::none(cx, OTHER); + let mut serialize_with = Attr::none(cx, SERIALIZE_WITH); + let mut deserialize_with = Attr::none(cx, DESERIALIZE_WITH); + let mut borrow = Attr::none(cx, BORROW); + let mut untagged = BoolAttr::none(cx, UNTAGGED); + + for attr in &variant.attrs { + if attr.path() != SERDE { + continue; + } + + if let syn::Meta::List(meta) = &attr.meta { + if meta.tokens.is_empty() { + continue; + } + } + + if let Err(err) = attr.parse_nested_meta(|meta| { + if meta.path == RENAME { + // #[serde(rename = "foo")] + // #[serde(rename(serialize = "foo", deserialize = "bar"))] + let (ser, de) = get_multiple_renames(cx, &meta)?; + ser_name.set_opt(&meta.path, ser.as_ref().map(syn::LitStr::value)); + for de_value in de { + de_name.set_if_none(de_value.value()); + de_aliases.insert(&meta.path, de_value.value()); + } + } else if meta.path == ALIAS { + // #[serde(alias = "foo")] + if let Some(s) = get_lit_str(cx, ALIAS, &meta)? { + de_aliases.insert(&meta.path, s.value()); + } + } else if meta.path == RENAME_ALL { + // #[serde(rename_all = "foo")] + // #[serde(rename_all(serialize = "foo", deserialize = "bar"))] + let one_name = meta.input.peek(Token![=]); + let (ser, de) = get_renames(cx, RENAME_ALL, &meta)?; + if let Some(ser) = ser { + match RenameRule::from_str(&ser.value()) { + Ok(rename_rule) => rename_all_ser_rule.set(&meta.path, rename_rule), + Err(err) => cx.error_spanned_by(ser, err), + } + } + if let Some(de) = de { + match RenameRule::from_str(&de.value()) { + Ok(rename_rule) => rename_all_de_rule.set(&meta.path, rename_rule), + Err(err) => { + if !one_name { + cx.error_spanned_by(de, err); + } + } + } + } + } else if meta.path == SKIP { + // #[serde(skip)] + skip_serializing.set_true(&meta.path); + skip_deserializing.set_true(&meta.path); + } else if meta.path == SKIP_DESERIALIZING { + // #[serde(skip_deserializing)] + skip_deserializing.set_true(&meta.path); + } else if meta.path == SKIP_SERIALIZING { + // #[serde(skip_serializing)] + skip_serializing.set_true(&meta.path); + } else if meta.path == OTHER { + // #[serde(other)] + other.set_true(&meta.path); + } else if meta.path == BOUND { + // #[serde(bound = "T: SomeBound")] + // #[serde(bound(serialize = "...", deserialize = "..."))] + let (ser, de) = get_where_predicates(cx, &meta)?; + ser_bound.set_opt(&meta.path, ser); + de_bound.set_opt(&meta.path, de); + } else if meta.path == WITH { + // #[serde(with = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, WITH, &meta)? { + let mut ser_path = path.clone(); + ser_path + .path + .segments + .push(Ident::new("serialize", Span::call_site()).into()); + serialize_with.set(&meta.path, ser_path); + let mut de_path = path; + de_path + .path + .segments + .push(Ident::new("deserialize", Span::call_site()).into()); + deserialize_with.set(&meta.path, de_path); + } + } else if meta.path == SERIALIZE_WITH { + // #[serde(serialize_with = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, SERIALIZE_WITH, &meta)? { + serialize_with.set(&meta.path, path); + } + } else if meta.path == DESERIALIZE_WITH { + // #[serde(deserialize_with = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, DESERIALIZE_WITH, &meta)? { + deserialize_with.set(&meta.path, path); + } + } else if meta.path == BORROW { + let borrow_attribute = if meta.input.peek(Token![=]) { + // #[serde(borrow = "'a + 'b")] + let lifetimes = parse_lit_into_lifetimes(cx, &meta)?; + BorrowAttribute { + path: meta.path.clone(), + lifetimes: Some(lifetimes), + } + } else { + // #[serde(borrow)] + BorrowAttribute { + path: meta.path.clone(), + lifetimes: None, + } + }; + match &variant.fields { + syn::Fields::Unnamed(fields) if fields.unnamed.len() == 1 => { + borrow.set(&meta.path, borrow_attribute); + } + _ => { + let msg = "#[serde(borrow)] may only be used on newtype variants"; + cx.error_spanned_by(variant, msg); + } + } + } else if meta.path == UNTAGGED { + untagged.set_true(&meta.path); + } else { + let path = meta.path.to_token_stream().to_string().replace(' ', ""); + return Err( + meta.error(format_args!("unknown serde variant attribute `{}`", path)) + ); + } + Ok(()) + }) { + cx.syn_error(err); + } + } + + Variant { + name: Name::from_attrs(unraw(&variant.ident), ser_name, de_name, Some(de_aliases)), + rename_all_rules: RenameAllRules { + serialize: rename_all_ser_rule.get().unwrap_or(RenameRule::None), + deserialize: rename_all_de_rule.get().unwrap_or(RenameRule::None), + }, + ser_bound: ser_bound.get(), + de_bound: de_bound.get(), + skip_deserializing: skip_deserializing.get(), + skip_serializing: skip_serializing.get(), + other: other.get(), + serialize_with: serialize_with.get(), + deserialize_with: deserialize_with.get(), + borrow: borrow.get(), + untagged: untagged.get(), + } + } + + pub fn name(&self) -> &Name { + &self.name + } + + pub fn aliases(&self) -> &BTreeSet<String> { + self.name.deserialize_aliases() + } + + pub fn rename_by_rules(&mut self, rules: RenameAllRules) { + if !self.name.serialize_renamed { + self.name.serialize = rules.serialize.apply_to_variant(&self.name.serialize); + } + if !self.name.deserialize_renamed { + self.name.deserialize = rules.deserialize.apply_to_variant(&self.name.deserialize); + } + self.name + .deserialize_aliases + .insert(self.name.deserialize.clone()); + } + + pub fn rename_all_rules(&self) -> RenameAllRules { + self.rename_all_rules + } + + pub fn ser_bound(&self) -> Option<&[syn::WherePredicate]> { + self.ser_bound.as_ref().map(|vec| &vec[..]) + } + + pub fn de_bound(&self) -> Option<&[syn::WherePredicate]> { + self.de_bound.as_ref().map(|vec| &vec[..]) + } + + pub fn skip_deserializing(&self) -> bool { + self.skip_deserializing + } + + pub fn skip_serializing(&self) -> bool { + self.skip_serializing + } + + pub fn other(&self) -> bool { + self.other + } + + pub fn serialize_with(&self) -> Option<&syn::ExprPath> { + self.serialize_with.as_ref() + } + + pub fn deserialize_with(&self) -> Option<&syn::ExprPath> { + self.deserialize_with.as_ref() + } + + pub fn untagged(&self) -> bool { + self.untagged + } +} + +/// Represents field attribute information +pub struct Field { + name: Name, + skip_serializing: bool, + skip_deserializing: bool, + skip_serializing_if: Option<syn::ExprPath>, + default: Default, + serialize_with: Option<syn::ExprPath>, + deserialize_with: Option<syn::ExprPath>, + ser_bound: Option<Vec<syn::WherePredicate>>, + de_bound: Option<Vec<syn::WherePredicate>>, + borrowed_lifetimes: BTreeSet<syn::Lifetime>, + getter: Option<syn::ExprPath>, + flatten: bool, + transparent: bool, +} + +/// Represents the default to use for a field when deserializing. +pub enum Default { + /// Field must always be specified because it does not have a default. + None, + /// The default is given by `std::default::Default::default()`. + Default, + /// The default is given by this function. + Path(syn::ExprPath), +} + +impl Default { + pub fn is_none(&self) -> bool { + match self { + Default::None => true, + Default::Default | Default::Path(_) => false, + } + } +} + +impl Field { + /// Extract out the `#[serde(...)]` attributes from a struct field. + pub fn from_ast( + cx: &Ctxt, + index: usize, + field: &syn::Field, + attrs: Option<&Variant>, + container_default: &Default, + ) -> Self { + let mut ser_name = Attr::none(cx, RENAME); + let mut de_name = Attr::none(cx, RENAME); + let mut de_aliases = VecAttr::none(cx, RENAME); + let mut skip_serializing = BoolAttr::none(cx, SKIP_SERIALIZING); + let mut skip_deserializing = BoolAttr::none(cx, SKIP_DESERIALIZING); + let mut skip_serializing_if = Attr::none(cx, SKIP_SERIALIZING_IF); + let mut default = Attr::none(cx, DEFAULT); + let mut serialize_with = Attr::none(cx, SERIALIZE_WITH); + let mut deserialize_with = Attr::none(cx, DESERIALIZE_WITH); + let mut ser_bound = Attr::none(cx, BOUND); + let mut de_bound = Attr::none(cx, BOUND); + let mut borrowed_lifetimes = Attr::none(cx, BORROW); + let mut getter = Attr::none(cx, GETTER); + let mut flatten = BoolAttr::none(cx, FLATTEN); + + let ident = match &field.ident { + Some(ident) => unraw(ident), + None => index.to_string(), + }; + + if let Some(borrow_attribute) = attrs.and_then(|variant| variant.borrow.as_ref()) { + if let Ok(borrowable) = borrowable_lifetimes(cx, &ident, field) { + if let Some(lifetimes) = &borrow_attribute.lifetimes { + for lifetime in lifetimes { + if !borrowable.contains(lifetime) { + let msg = + format!("field `{}` does not have lifetime {}", ident, lifetime); + cx.error_spanned_by(field, msg); + } + } + borrowed_lifetimes.set(&borrow_attribute.path, lifetimes.clone()); + } else { + borrowed_lifetimes.set(&borrow_attribute.path, borrowable); + } + } + } + + for attr in &field.attrs { + if attr.path() != SERDE { + continue; + } + + if let syn::Meta::List(meta) = &attr.meta { + if meta.tokens.is_empty() { + continue; + } + } + + if let Err(err) = attr.parse_nested_meta(|meta| { + if meta.path == RENAME { + // #[serde(rename = "foo")] + // #[serde(rename(serialize = "foo", deserialize = "bar"))] + let (ser, de) = get_multiple_renames(cx, &meta)?; + ser_name.set_opt(&meta.path, ser.as_ref().map(syn::LitStr::value)); + for de_value in de { + de_name.set_if_none(de_value.value()); + de_aliases.insert(&meta.path, de_value.value()); + } + } else if meta.path == ALIAS { + // #[serde(alias = "foo")] + if let Some(s) = get_lit_str(cx, ALIAS, &meta)? { + de_aliases.insert(&meta.path, s.value()); + } + } else if meta.path == DEFAULT { + if meta.input.peek(Token![=]) { + // #[serde(default = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, DEFAULT, &meta)? { + default.set(&meta.path, Default::Path(path)); + } + } else { + // #[serde(default)] + default.set(&meta.path, Default::Default); + } + } else if meta.path == SKIP_SERIALIZING { + // #[serde(skip_serializing)] + skip_serializing.set_true(&meta.path); + } else if meta.path == SKIP_DESERIALIZING { + // #[serde(skip_deserializing)] + skip_deserializing.set_true(&meta.path); + } else if meta.path == SKIP { + // #[serde(skip)] + skip_serializing.set_true(&meta.path); + skip_deserializing.set_true(&meta.path); + } else if meta.path == SKIP_SERIALIZING_IF { + // #[serde(skip_serializing_if = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, SKIP_SERIALIZING_IF, &meta)? { + skip_serializing_if.set(&meta.path, path); + } + } else if meta.path == SERIALIZE_WITH { + // #[serde(serialize_with = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, SERIALIZE_WITH, &meta)? { + serialize_with.set(&meta.path, path); + } + } else if meta.path == DESERIALIZE_WITH { + // #[serde(deserialize_with = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, DESERIALIZE_WITH, &meta)? { + deserialize_with.set(&meta.path, path); + } + } else if meta.path == WITH { + // #[serde(with = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, WITH, &meta)? { + let mut ser_path = path.clone(); + ser_path + .path + .segments + .push(Ident::new("serialize", Span::call_site()).into()); + serialize_with.set(&meta.path, ser_path); + let mut de_path = path; + de_path + .path + .segments + .push(Ident::new("deserialize", Span::call_site()).into()); + deserialize_with.set(&meta.path, de_path); + } + } else if meta.path == BOUND { + // #[serde(bound = "T: SomeBound")] + // #[serde(bound(serialize = "...", deserialize = "..."))] + let (ser, de) = get_where_predicates(cx, &meta)?; + ser_bound.set_opt(&meta.path, ser); + de_bound.set_opt(&meta.path, de); + } else if meta.path == BORROW { + if meta.input.peek(Token![=]) { + // #[serde(borrow = "'a + 'b")] + let lifetimes = parse_lit_into_lifetimes(cx, &meta)?; + if let Ok(borrowable) = borrowable_lifetimes(cx, &ident, field) { + for lifetime in &lifetimes { + if !borrowable.contains(lifetime) { + let msg = format!( + "field `{}` does not have lifetime {}", + ident, lifetime, + ); + cx.error_spanned_by(field, msg); + } + } + borrowed_lifetimes.set(&meta.path, lifetimes); + } + } else { + // #[serde(borrow)] + if let Ok(borrowable) = borrowable_lifetimes(cx, &ident, field) { + borrowed_lifetimes.set(&meta.path, borrowable); + } + } + } else if meta.path == GETTER { + // #[serde(getter = "...")] + if let Some(path) = parse_lit_into_expr_path(cx, GETTER, &meta)? { + getter.set(&meta.path, path); + } + } else if meta.path == FLATTEN { + // #[serde(flatten)] + flatten.set_true(&meta.path); + } else { + let path = meta.path.to_token_stream().to_string().replace(' ', ""); + return Err( + meta.error(format_args!("unknown serde field attribute `{}`", path)) + ); + } + Ok(()) + }) { + cx.syn_error(err); + } + } + + // Is skip_deserializing, initialize the field to Default::default() unless a + // different default is specified by `#[serde(default = "...")]` on + // ourselves or our container (e.g. the struct we are in). + if let Default::None = *container_default { + if skip_deserializing.0.value.is_some() { + default.set_if_none(Default::Default); + } + } + + let mut borrowed_lifetimes = borrowed_lifetimes.get().unwrap_or_default(); + if !borrowed_lifetimes.is_empty() { + // Cow<str> and Cow<[u8]> never borrow by default: + // + // impl<'de, 'a, T: ?Sized> Deserialize<'de> for Cow<'a, T> + // + // A #[serde(borrow)] attribute enables borrowing that corresponds + // roughly to these impls: + // + // impl<'de: 'a, 'a> Deserialize<'de> for Cow<'a, str> + // impl<'de: 'a, 'a> Deserialize<'de> for Cow<'a, [u8]> + if is_cow(&field.ty, is_str) { + let mut path = syn::Path { + leading_colon: None, + segments: Punctuated::new(), + }; + let span = Span::call_site(); + path.segments.push(Ident::new("_serde", span).into()); + path.segments.push(Ident::new("__private", span).into()); + path.segments.push(Ident::new("de", span).into()); + path.segments + .push(Ident::new("borrow_cow_str", span).into()); + let expr = syn::ExprPath { + attrs: Vec::new(), + qself: None, + path, + }; + deserialize_with.set_if_none(expr); + } else if is_cow(&field.ty, is_slice_u8) { + let mut path = syn::Path { + leading_colon: None, + segments: Punctuated::new(), + }; + let span = Span::call_site(); + path.segments.push(Ident::new("_serde", span).into()); + path.segments.push(Ident::new("__private", span).into()); + path.segments.push(Ident::new("de", span).into()); + path.segments + .push(Ident::new("borrow_cow_bytes", span).into()); + let expr = syn::ExprPath { + attrs: Vec::new(), + qself: None, + path, + }; + deserialize_with.set_if_none(expr); + } + } else if is_implicitly_borrowed(&field.ty) { + // Types &str and &[u8] are always implicitly borrowed. No need for + // a #[serde(borrow)]. + collect_lifetimes(&field.ty, &mut borrowed_lifetimes); + } + + Field { + name: Name::from_attrs(ident, ser_name, de_name, Some(de_aliases)), + skip_serializing: skip_serializing.get(), + skip_deserializing: skip_deserializing.get(), + skip_serializing_if: skip_serializing_if.get(), + default: default.get().unwrap_or(Default::None), + serialize_with: serialize_with.get(), + deserialize_with: deserialize_with.get(), + ser_bound: ser_bound.get(), + de_bound: de_bound.get(), + borrowed_lifetimes, + getter: getter.get(), + flatten: flatten.get(), + transparent: false, + } + } + + pub fn name(&self) -> &Name { + &self.name + } + + pub fn aliases(&self) -> &BTreeSet<String> { + self.name.deserialize_aliases() + } + + pub fn rename_by_rules(&mut self, rules: RenameAllRules) { + if !self.name.serialize_renamed { + self.name.serialize = rules.serialize.apply_to_field(&self.name.serialize); + } + if !self.name.deserialize_renamed { + self.name.deserialize = rules.deserialize.apply_to_field(&self.name.deserialize); + } + self.name + .deserialize_aliases + .insert(self.name.deserialize.clone()); + } + + pub fn skip_serializing(&self) -> bool { + self.skip_serializing + } + + pub fn skip_deserializing(&self) -> bool { + self.skip_deserializing + } + + pub fn skip_serializing_if(&self) -> Option<&syn::ExprPath> { + self.skip_serializing_if.as_ref() + } + + pub fn default(&self) -> &Default { + &self.default + } + + pub fn serialize_with(&self) -> Option<&syn::ExprPath> { + self.serialize_with.as_ref() + } + + pub fn deserialize_with(&self) -> Option<&syn::ExprPath> { + self.deserialize_with.as_ref() + } + + pub fn ser_bound(&self) -> Option<&[syn::WherePredicate]> { + self.ser_bound.as_ref().map(|vec| &vec[..]) + } + + pub fn de_bound(&self) -> Option<&[syn::WherePredicate]> { + self.de_bound.as_ref().map(|vec| &vec[..]) + } + + pub fn borrowed_lifetimes(&self) -> &BTreeSet<syn::Lifetime> { + &self.borrowed_lifetimes + } + + pub fn getter(&self) -> Option<&syn::ExprPath> { + self.getter.as_ref() + } + + pub fn flatten(&self) -> bool { + self.flatten + } + + pub fn transparent(&self) -> bool { + self.transparent + } + + pub fn mark_transparent(&mut self) { + self.transparent = true; + } +} + +type SerAndDe<T> = (Option<T>, Option<T>); + +fn get_ser_and_de<'c, T, F, R>( + cx: &'c Ctxt, + attr_name: Symbol, + meta: &ParseNestedMeta, + f: F, +) -> syn::Result<(VecAttr<'c, T>, VecAttr<'c, T>)> +where + T: Clone, + F: Fn(&Ctxt, Symbol, Symbol, &ParseNestedMeta) -> syn::Result<R>, + R: Into<Option<T>>, +{ + let mut ser_meta = VecAttr::none(cx, attr_name); + let mut de_meta = VecAttr::none(cx, attr_name); + + let lookahead = meta.input.lookahead1(); + if lookahead.peek(Token![=]) { + if let Some(both) = f(cx, attr_name, attr_name, meta)?.into() { + ser_meta.insert(&meta.path, both.clone()); + de_meta.insert(&meta.path, both); + } + } else if lookahead.peek(token::Paren) { + meta.parse_nested_meta(|meta| { + if meta.path == SERIALIZE { + if let Some(v) = f(cx, attr_name, SERIALIZE, &meta)?.into() { + ser_meta.insert(&meta.path, v); + } + } else if meta.path == DESERIALIZE { + if let Some(v) = f(cx, attr_name, DESERIALIZE, &meta)?.into() { + de_meta.insert(&meta.path, v); + } + } else { + return Err(meta.error(format_args!( + "malformed {0} attribute, expected `{0}(serialize = ..., deserialize = ...)`", + attr_name, + ))); + } + Ok(()) + })?; + } else { + return Err(lookahead.error()); + } + + Ok((ser_meta, de_meta)) +} + +fn get_renames( + cx: &Ctxt, + attr_name: Symbol, + meta: &ParseNestedMeta, +) -> syn::Result<SerAndDe<syn::LitStr>> { + let (ser, de) = get_ser_and_de(cx, attr_name, meta, get_lit_str2)?; + Ok((ser.at_most_one(), de.at_most_one())) +} + +fn get_multiple_renames( + cx: &Ctxt, + meta: &ParseNestedMeta, +) -> syn::Result<(Option<syn::LitStr>, Vec<syn::LitStr>)> { + let (ser, de) = get_ser_and_de(cx, RENAME, meta, get_lit_str2)?; + Ok((ser.at_most_one(), de.get())) +} + +fn get_where_predicates( + cx: &Ctxt, + meta: &ParseNestedMeta, +) -> syn::Result<SerAndDe<Vec<syn::WherePredicate>>> { + let (ser, de) = get_ser_and_de(cx, BOUND, meta, parse_lit_into_where)?; + Ok((ser.at_most_one(), de.at_most_one())) +} + +fn get_lit_str( + cx: &Ctxt, + attr_name: Symbol, + meta: &ParseNestedMeta, +) -> syn::Result<Option<syn::LitStr>> { + get_lit_str2(cx, attr_name, attr_name, meta) +} + +fn get_lit_str2( + cx: &Ctxt, + attr_name: Symbol, + meta_item_name: Symbol, + meta: &ParseNestedMeta, +) -> syn::Result<Option<syn::LitStr>> { + let expr: syn::Expr = meta.value()?.parse()?; + let mut value = &expr; + while let syn::Expr::Group(e) = value { + value = &e.expr; + } + if let syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Str(lit), + .. + }) = value + { + let suffix = lit.suffix(); + if !suffix.is_empty() { + cx.error_spanned_by( + lit, + format!("unexpected suffix `{}` on string literal", suffix), + ); + } + Ok(Some(lit.clone())) + } else { + cx.error_spanned_by( + expr, + format!( + "expected serde {} attribute to be a string: `{} = \"...\"`", + attr_name, meta_item_name + ), + ); + Ok(None) + } +} + +fn parse_lit_into_path( + cx: &Ctxt, + attr_name: Symbol, + meta: &ParseNestedMeta, +) -> syn::Result<Option<syn::Path>> { + let string = match get_lit_str(cx, attr_name, meta)? { + Some(string) => string, + None => return Ok(None), + }; + + Ok(match string.parse() { + Ok(path) => Some(path), + Err(_) => { + cx.error_spanned_by( + &string, + format!("failed to parse path: {:?}", string.value()), + ); + None + } + }) +} + +fn parse_lit_into_expr_path( + cx: &Ctxt, + attr_name: Symbol, + meta: &ParseNestedMeta, +) -> syn::Result<Option<syn::ExprPath>> { + let string = match get_lit_str(cx, attr_name, meta)? { + Some(string) => string, + None => return Ok(None), + }; + + Ok(match string.parse() { + Ok(expr) => Some(expr), + Err(_) => { + cx.error_spanned_by( + &string, + format!("failed to parse path: {:?}", string.value()), + ); + None + } + }) +} + +fn parse_lit_into_where( + cx: &Ctxt, + attr_name: Symbol, + meta_item_name: Symbol, + meta: &ParseNestedMeta, +) -> syn::Result<Vec<syn::WherePredicate>> { + let string = match get_lit_str2(cx, attr_name, meta_item_name, meta)? { + Some(string) => string, + None => return Ok(Vec::new()), + }; + + Ok( + match string.parse_with(Punctuated::<syn::WherePredicate, Token![,]>::parse_terminated) { + Ok(predicates) => Vec::from_iter(predicates), + Err(err) => { + cx.error_spanned_by(string, err); + Vec::new() + } + }, + ) +} + +fn parse_lit_into_ty( + cx: &Ctxt, + attr_name: Symbol, + meta: &ParseNestedMeta, +) -> syn::Result<Option<syn::Type>> { + let string = match get_lit_str(cx, attr_name, meta)? { + Some(string) => string, + None => return Ok(None), + }; + + Ok(match string.parse() { + Ok(ty) => Some(ty), + Err(_) => { + cx.error_spanned_by( + &string, + format!("failed to parse type: {} = {:?}", attr_name, string.value()), + ); + None + } + }) +} + +// Parses a string literal like "'a + 'b + 'c" containing a nonempty list of +// lifetimes separated by `+`. +fn parse_lit_into_lifetimes( + cx: &Ctxt, + meta: &ParseNestedMeta, +) -> syn::Result<BTreeSet<syn::Lifetime>> { + let string = match get_lit_str(cx, BORROW, meta)? { + Some(string) => string, + None => return Ok(BTreeSet::new()), + }; + + if let Ok(lifetimes) = string.parse_with(|input: ParseStream| { + let mut set = BTreeSet::new(); + while !input.is_empty() { + let lifetime: Lifetime = input.parse()?; + if !set.insert(lifetime.clone()) { + cx.error_spanned_by( + &string, + format!("duplicate borrowed lifetime `{}`", lifetime), + ); + } + if input.is_empty() { + break; + } + input.parse::<Token![+]>()?; + } + Ok(set) + }) { + if lifetimes.is_empty() { + cx.error_spanned_by(string, "at least one lifetime must be borrowed"); + } + return Ok(lifetimes); + } + + cx.error_spanned_by( + &string, + format!("failed to parse borrowed lifetimes: {:?}", string.value()), + ); + Ok(BTreeSet::new()) +} + +fn is_implicitly_borrowed(ty: &syn::Type) -> bool { + is_implicitly_borrowed_reference(ty) || is_option(ty, is_implicitly_borrowed_reference) +} + +fn is_implicitly_borrowed_reference(ty: &syn::Type) -> bool { + is_reference(ty, is_str) || is_reference(ty, is_slice_u8) +} + +// Whether the type looks like it might be `std::borrow::Cow<T>` where elem="T". +// This can have false negatives and false positives. +// +// False negative: +// +// use std::borrow::Cow as Pig; +// +// #[derive(Deserialize)] +// struct S<'a> { +// #[serde(borrow)] +// pig: Pig<'a, str>, +// } +// +// False positive: +// +// type str = [i16]; +// +// #[derive(Deserialize)] +// struct S<'a> { +// #[serde(borrow)] +// cow: Cow<'a, str>, +// } +fn is_cow(ty: &syn::Type, elem: fn(&syn::Type) -> bool) -> bool { + let path = match ungroup(ty) { + syn::Type::Path(ty) => &ty.path, + _ => { + return false; + } + }; + let seg = match path.segments.last() { + Some(seg) => seg, + None => { + return false; + } + }; + let args = match &seg.arguments { + syn::PathArguments::AngleBracketed(bracketed) => &bracketed.args, + _ => { + return false; + } + }; + seg.ident == "Cow" + && args.len() == 2 + && match (&args[0], &args[1]) { + (syn::GenericArgument::Lifetime(_), syn::GenericArgument::Type(arg)) => elem(arg), + _ => false, + } +} + +fn is_option(ty: &syn::Type, elem: fn(&syn::Type) -> bool) -> bool { + let path = match ungroup(ty) { + syn::Type::Path(ty) => &ty.path, + _ => { + return false; + } + }; + let seg = match path.segments.last() { + Some(seg) => seg, + None => { + return false; + } + }; + let args = match &seg.arguments { + syn::PathArguments::AngleBracketed(bracketed) => &bracketed.args, + _ => { + return false; + } + }; + seg.ident == "Option" + && args.len() == 1 + && match &args[0] { + syn::GenericArgument::Type(arg) => elem(arg), + _ => false, + } +} + +// Whether the type looks like it might be `&T` where elem="T". This can have +// false negatives and false positives. +// +// False negative: +// +// type Yarn = str; +// +// #[derive(Deserialize)] +// struct S<'a> { +// r: &'a Yarn, +// } +// +// False positive: +// +// type str = [i16]; +// +// #[derive(Deserialize)] +// struct S<'a> { +// r: &'a str, +// } +fn is_reference(ty: &syn::Type, elem: fn(&syn::Type) -> bool) -> bool { + match ungroup(ty) { + syn::Type::Reference(ty) => ty.mutability.is_none() && elem(&ty.elem), + _ => false, + } +} + +fn is_str(ty: &syn::Type) -> bool { + is_primitive_type(ty, "str") +} + +fn is_slice_u8(ty: &syn::Type) -> bool { + match ungroup(ty) { + syn::Type::Slice(ty) => is_primitive_type(&ty.elem, "u8"), + _ => false, + } +} + +fn is_primitive_type(ty: &syn::Type, primitive: &str) -> bool { + match ungroup(ty) { + syn::Type::Path(ty) => ty.qself.is_none() && is_primitive_path(&ty.path, primitive), + _ => false, + } +} + +fn is_primitive_path(path: &syn::Path, primitive: &str) -> bool { + path.leading_colon.is_none() + && path.segments.len() == 1 + && path.segments[0].ident == primitive + && path.segments[0].arguments.is_empty() +} + +// All lifetimes that this type could borrow from a Deserializer. +// +// For example a type `S<'a, 'b>` could borrow `'a` and `'b`. On the other hand +// a type `for<'a> fn(&'a str)` could not borrow `'a` from the Deserializer. +// +// This is used when there is an explicit or implicit `#[serde(borrow)]` +// attribute on the field so there must be at least one borrowable lifetime. +fn borrowable_lifetimes( + cx: &Ctxt, + name: &str, + field: &syn::Field, +) -> Result<BTreeSet<syn::Lifetime>, ()> { + let mut lifetimes = BTreeSet::new(); + collect_lifetimes(&field.ty, &mut lifetimes); + if lifetimes.is_empty() { + let msg = format!("field `{}` has no lifetimes to borrow", name); + cx.error_spanned_by(field, msg); + Err(()) + } else { + Ok(lifetimes) + } +} + +fn collect_lifetimes(ty: &syn::Type, out: &mut BTreeSet<syn::Lifetime>) { + match ty { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + syn::Type::Slice(ty) => { + collect_lifetimes(&ty.elem, out); + } + syn::Type::Array(ty) => { + collect_lifetimes(&ty.elem, out); + } + syn::Type::Ptr(ty) => { + collect_lifetimes(&ty.elem, out); + } + syn::Type::Reference(ty) => { + out.extend(ty.lifetime.iter().cloned()); + collect_lifetimes(&ty.elem, out); + } + syn::Type::Tuple(ty) => { + for elem in &ty.elems { + collect_lifetimes(elem, out); + } + } + syn::Type::Path(ty) => { + if let Some(qself) = &ty.qself { + collect_lifetimes(&qself.ty, out); + } + for seg in &ty.path.segments { + if let syn::PathArguments::AngleBracketed(bracketed) = &seg.arguments { + for arg in &bracketed.args { + match arg { + syn::GenericArgument::Lifetime(lifetime) => { + out.insert(lifetime.clone()); + } + syn::GenericArgument::Type(ty) => { + collect_lifetimes(ty, out); + } + syn::GenericArgument::AssocType(binding) => { + collect_lifetimes(&binding.ty, out); + } + syn::GenericArgument::Const(_) + | syn::GenericArgument::AssocConst(_) + | syn::GenericArgument::Constraint(_) + | _ => {} + } + } + } + } + } + syn::Type::Paren(ty) => { + collect_lifetimes(&ty.elem, out); + } + syn::Type::Group(ty) => { + collect_lifetimes(&ty.elem, out); + } + syn::Type::Macro(ty) => { + collect_lifetimes_from_tokens(ty.mac.tokens.clone(), out); + } + syn::Type::BareFn(_) + | syn::Type::Never(_) + | syn::Type::TraitObject(_) + | syn::Type::ImplTrait(_) + | syn::Type::Infer(_) + | syn::Type::Verbatim(_) => {} + + _ => {} + } +} + +fn collect_lifetimes_from_tokens(tokens: TokenStream, out: &mut BTreeSet<syn::Lifetime>) { + let mut iter = tokens.into_iter(); + while let Some(tt) = iter.next() { + match &tt { + TokenTree::Punct(op) if op.as_char() == '\'' && op.spacing() == Spacing::Joint => { + if let Some(TokenTree::Ident(ident)) = iter.next() { + out.insert(syn::Lifetime { + apostrophe: op.span(), + ident, + }); + } + } + TokenTree::Group(group) => { + let tokens = group.stream(); + collect_lifetimes_from_tokens(tokens, out); + } + _ => {} + } + } +} diff --git a/vendor/serde_derive/src/internals/case.rs b/vendor/serde_derive/src/internals/case.rs new file mode 100644 index 0000000..8c8c02e --- /dev/null +++ b/vendor/serde_derive/src/internals/case.rs @@ -0,0 +1,200 @@ +//! Code to convert the Rust-styled field/variant (e.g. `my_field`, `MyType`) to the +//! case of the source (e.g. `my-field`, `MY_FIELD`). + +use self::RenameRule::*; +use std::fmt::{self, Debug, Display}; + +/// The different possible ways to change case of fields in a struct, or variants in an enum. +#[derive(Copy, Clone, PartialEq)] +pub enum RenameRule { + /// Don't apply a default rename rule. + None, + /// Rename direct children to "lowercase" style. + LowerCase, + /// Rename direct children to "UPPERCASE" style. + UpperCase, + /// Rename direct children to "PascalCase" style, as typically used for + /// enum variants. + PascalCase, + /// Rename direct children to "camelCase" style. + CamelCase, + /// Rename direct children to "snake_case" style, as commonly used for + /// fields. + SnakeCase, + /// Rename direct children to "SCREAMING_SNAKE_CASE" style, as commonly + /// used for constants. + ScreamingSnakeCase, + /// Rename direct children to "kebab-case" style. + KebabCase, + /// Rename direct children to "SCREAMING-KEBAB-CASE" style. + ScreamingKebabCase, +} + +static RENAME_RULES: &[(&str, RenameRule)] = &[ + ("lowercase", LowerCase), + ("UPPERCASE", UpperCase), + ("PascalCase", PascalCase), + ("camelCase", CamelCase), + ("snake_case", SnakeCase), + ("SCREAMING_SNAKE_CASE", ScreamingSnakeCase), + ("kebab-case", KebabCase), + ("SCREAMING-KEBAB-CASE", ScreamingKebabCase), +]; + +impl RenameRule { + pub fn from_str(rename_all_str: &str) -> Result<Self, ParseError> { + for (name, rule) in RENAME_RULES { + if rename_all_str == *name { + return Ok(*rule); + } + } + Err(ParseError { + unknown: rename_all_str, + }) + } + + /// Apply a renaming rule to an enum variant, returning the version expected in the source. + pub fn apply_to_variant(self, variant: &str) -> String { + match self { + None | PascalCase => variant.to_owned(), + LowerCase => variant.to_ascii_lowercase(), + UpperCase => variant.to_ascii_uppercase(), + CamelCase => variant[..1].to_ascii_lowercase() + &variant[1..], + SnakeCase => { + let mut snake = String::new(); + for (i, ch) in variant.char_indices() { + if i > 0 && ch.is_uppercase() { + snake.push('_'); + } + snake.push(ch.to_ascii_lowercase()); + } + snake + } + ScreamingSnakeCase => SnakeCase.apply_to_variant(variant).to_ascii_uppercase(), + KebabCase => SnakeCase.apply_to_variant(variant).replace('_', "-"), + ScreamingKebabCase => ScreamingSnakeCase + .apply_to_variant(variant) + .replace('_', "-"), + } + } + + /// Apply a renaming rule to a struct field, returning the version expected in the source. + pub fn apply_to_field(self, field: &str) -> String { + match self { + None | LowerCase | SnakeCase => field.to_owned(), + UpperCase => field.to_ascii_uppercase(), + PascalCase => { + let mut pascal = String::new(); + let mut capitalize = true; + for ch in field.chars() { + if ch == '_' { + capitalize = true; + } else if capitalize { + pascal.push(ch.to_ascii_uppercase()); + capitalize = false; + } else { + pascal.push(ch); + } + } + pascal + } + CamelCase => { + let pascal = PascalCase.apply_to_field(field); + pascal[..1].to_ascii_lowercase() + &pascal[1..] + } + ScreamingSnakeCase => field.to_ascii_uppercase(), + KebabCase => field.replace('_', "-"), + ScreamingKebabCase => ScreamingSnakeCase.apply_to_field(field).replace('_', "-"), + } + } + + /// Returns the `RenameRule` if it is not `None`, `rule_b` otherwise. + pub fn or(self, rule_b: Self) -> Self { + match self { + None => rule_b, + _ => self, + } + } +} + +pub struct ParseError<'a> { + unknown: &'a str, +} + +impl<'a> Display for ParseError<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("unknown rename rule `rename_all = ")?; + Debug::fmt(self.unknown, f)?; + f.write_str("`, expected one of ")?; + for (i, (name, _rule)) in RENAME_RULES.iter().enumerate() { + if i > 0 { + f.write_str(", ")?; + } + Debug::fmt(name, f)?; + } + Ok(()) + } +} + +#[test] +fn rename_variants() { + for &(original, lower, upper, camel, snake, screaming, kebab, screaming_kebab) in &[ + ( + "Outcome", "outcome", "OUTCOME", "outcome", "outcome", "OUTCOME", "outcome", "OUTCOME", + ), + ( + "VeryTasty", + "verytasty", + "VERYTASTY", + "veryTasty", + "very_tasty", + "VERY_TASTY", + "very-tasty", + "VERY-TASTY", + ), + ("A", "a", "A", "a", "a", "A", "a", "A"), + ("Z42", "z42", "Z42", "z42", "z42", "Z42", "z42", "Z42"), + ] { + assert_eq!(None.apply_to_variant(original), original); + assert_eq!(LowerCase.apply_to_variant(original), lower); + assert_eq!(UpperCase.apply_to_variant(original), upper); + assert_eq!(PascalCase.apply_to_variant(original), original); + assert_eq!(CamelCase.apply_to_variant(original), camel); + assert_eq!(SnakeCase.apply_to_variant(original), snake); + assert_eq!(ScreamingSnakeCase.apply_to_variant(original), screaming); + assert_eq!(KebabCase.apply_to_variant(original), kebab); + assert_eq!( + ScreamingKebabCase.apply_to_variant(original), + screaming_kebab + ); + } +} + +#[test] +fn rename_fields() { + for &(original, upper, pascal, camel, screaming, kebab, screaming_kebab) in &[ + ( + "outcome", "OUTCOME", "Outcome", "outcome", "OUTCOME", "outcome", "OUTCOME", + ), + ( + "very_tasty", + "VERY_TASTY", + "VeryTasty", + "veryTasty", + "VERY_TASTY", + "very-tasty", + "VERY-TASTY", + ), + ("a", "A", "A", "a", "A", "a", "A"), + ("z42", "Z42", "Z42", "z42", "Z42", "z42", "Z42"), + ] { + assert_eq!(None.apply_to_field(original), original); + assert_eq!(UpperCase.apply_to_field(original), upper); + assert_eq!(PascalCase.apply_to_field(original), pascal); + assert_eq!(CamelCase.apply_to_field(original), camel); + assert_eq!(SnakeCase.apply_to_field(original), original); + assert_eq!(ScreamingSnakeCase.apply_to_field(original), screaming); + assert_eq!(KebabCase.apply_to_field(original), kebab); + assert_eq!(ScreamingKebabCase.apply_to_field(original), screaming_kebab); + } +} diff --git a/vendor/serde_derive/src/internals/check.rs b/vendor/serde_derive/src/internals/check.rs new file mode 100644 index 0000000..52b0f37 --- /dev/null +++ b/vendor/serde_derive/src/internals/check.rs @@ -0,0 +1,477 @@ +use crate::internals::ast::{Container, Data, Field, Style}; +use crate::internals::attr::{Default, Identifier, TagType}; +use crate::internals::{ungroup, Ctxt, Derive}; +use syn::{Member, Type}; + +// Cross-cutting checks that require looking at more than a single attrs object. +// Simpler checks should happen when parsing and building the attrs. +pub fn check(cx: &Ctxt, cont: &mut Container, derive: Derive) { + check_default_on_tuple(cx, cont); + check_remote_generic(cx, cont); + check_getter(cx, cont); + check_flatten(cx, cont); + check_identifier(cx, cont); + check_variant_skip_attrs(cx, cont); + check_internal_tag_field_name_conflict(cx, cont); + check_adjacent_tag_conflict(cx, cont); + check_transparent(cx, cont, derive); + check_from_and_try_from(cx, cont); +} + +// If some field of a tuple struct is marked #[serde(default)] then all fields +// after it must also be marked with that attribute, or the struct must have a +// container-level serde(default) attribute. A field's default value is only +// used for tuple fields if the sequence is exhausted at that point; that means +// all subsequent fields will fail to deserialize if they don't have their own +// default. +fn check_default_on_tuple(cx: &Ctxt, cont: &Container) { + if let Default::None = cont.attrs.default() { + if let Data::Struct(Style::Tuple, fields) = &cont.data { + let mut first_default_index = None; + for (i, field) in fields.iter().enumerate() { + // Skipped fields automatically get the #[serde(default)] + // attribute. We are interested only on non-skipped fields here. + if field.attrs.skip_deserializing() { + continue; + } + if let Default::None = field.attrs.default() { + if let Some(first) = first_default_index { + cx.error_spanned_by( + field.ty, + format!("field must have #[serde(default)] because previous field {} has #[serde(default)]", first), + ); + } + continue; + } + if first_default_index.is_none() { + first_default_index = Some(i); + } + } + } + } +} + +// Remote derive definition type must have either all of the generics of the +// remote type: +// +// #[serde(remote = "Generic")] +// struct Generic<T> {…} +// +// or none of them, i.e. defining impls for one concrete instantiation of the +// remote type only: +// +// #[serde(remote = "Generic<T>")] +// struct ConcreteDef {…} +// +fn check_remote_generic(cx: &Ctxt, cont: &Container) { + if let Some(remote) = cont.attrs.remote() { + let local_has_generic = !cont.generics.params.is_empty(); + let remote_has_generic = !remote.segments.last().unwrap().arguments.is_none(); + if local_has_generic && remote_has_generic { + cx.error_spanned_by(remote, "remove generic parameters from this path"); + } + } +} + +// Getters are only allowed inside structs (not enums) with the `remote` +// attribute. +fn check_getter(cx: &Ctxt, cont: &Container) { + match cont.data { + Data::Enum(_) => { + if cont.data.has_getter() { + cx.error_spanned_by( + cont.original, + "#[serde(getter = \"...\")] is not allowed in an enum", + ); + } + } + Data::Struct(_, _) => { + if cont.data.has_getter() && cont.attrs.remote().is_none() { + cx.error_spanned_by( + cont.original, + "#[serde(getter = \"...\")] can only be used in structs that have #[serde(remote = \"...\")]", + ); + } + } + } +} + +// Flattening has some restrictions we can test. +fn check_flatten(cx: &Ctxt, cont: &Container) { + match &cont.data { + Data::Enum(variants) => { + for variant in variants { + for field in &variant.fields { + check_flatten_field(cx, variant.style, field); + } + } + } + Data::Struct(style, fields) => { + for field in fields { + check_flatten_field(cx, *style, field); + } + } + } +} + +fn check_flatten_field(cx: &Ctxt, style: Style, field: &Field) { + if !field.attrs.flatten() { + return; + } + match style { + Style::Tuple => { + cx.error_spanned_by( + field.original, + "#[serde(flatten)] cannot be used on tuple structs", + ); + } + Style::Newtype => { + cx.error_spanned_by( + field.original, + "#[serde(flatten)] cannot be used on newtype structs", + ); + } + _ => {} + } +} + +// The `other` attribute must be used at most once and it must be the last +// variant of an enum. +// +// Inside a `variant_identifier` all variants must be unit variants. Inside a +// `field_identifier` all but possibly one variant must be unit variants. The +// last variant may be a newtype variant which is an implicit "other" case. +fn check_identifier(cx: &Ctxt, cont: &Container) { + let variants = match &cont.data { + Data::Enum(variants) => variants, + Data::Struct(_, _) => return, + }; + + for (i, variant) in variants.iter().enumerate() { + match ( + variant.style, + cont.attrs.identifier(), + variant.attrs.other(), + cont.attrs.tag(), + ) { + // The `other` attribute may not be used in a variant_identifier. + (_, Identifier::Variant, true, _) => { + cx.error_spanned_by( + variant.original, + "#[serde(other)] may not be used on a variant identifier", + ); + } + + // Variant with `other` attribute cannot appear in untagged enum + (_, Identifier::No, true, &TagType::None) => { + cx.error_spanned_by( + variant.original, + "#[serde(other)] cannot appear on untagged enum", + ); + } + + // Variant with `other` attribute must be the last one. + (Style::Unit, Identifier::Field, true, _) | (Style::Unit, Identifier::No, true, _) => { + if i < variants.len() - 1 { + cx.error_spanned_by( + variant.original, + "#[serde(other)] must be on the last variant", + ); + } + } + + // Variant with `other` attribute must be a unit variant. + (_, Identifier::Field, true, _) | (_, Identifier::No, true, _) => { + cx.error_spanned_by( + variant.original, + "#[serde(other)] must be on a unit variant", + ); + } + + // Any sort of variant is allowed if this is not an identifier. + (_, Identifier::No, false, _) => {} + + // Unit variant without `other` attribute is always fine. + (Style::Unit, _, false, _) => {} + + // The last field is allowed to be a newtype catch-all. + (Style::Newtype, Identifier::Field, false, _) => { + if i < variants.len() - 1 { + cx.error_spanned_by( + variant.original, + format!("`{}` must be the last variant", variant.ident), + ); + } + } + + (_, Identifier::Field, false, _) => { + cx.error_spanned_by( + variant.original, + "#[serde(field_identifier)] may only contain unit variants", + ); + } + + (_, Identifier::Variant, false, _) => { + cx.error_spanned_by( + variant.original, + "#[serde(variant_identifier)] may only contain unit variants", + ); + } + } + } +} + +// Skip-(de)serializing attributes are not allowed on variants marked +// (de)serialize_with. +fn check_variant_skip_attrs(cx: &Ctxt, cont: &Container) { + let variants = match &cont.data { + Data::Enum(variants) => variants, + Data::Struct(_, _) => return, + }; + + for variant in variants { + if variant.attrs.serialize_with().is_some() { + if variant.attrs.skip_serializing() { + cx.error_spanned_by( + variant.original, + format!( + "variant `{}` cannot have both #[serde(serialize_with)] and #[serde(skip_serializing)]", + variant.ident + ), + ); + } + + for field in &variant.fields { + let member = member_message(&field.member); + + if field.attrs.skip_serializing() { + cx.error_spanned_by( + variant.original, + format!( + "variant `{}` cannot have both #[serde(serialize_with)] and a field {} marked with #[serde(skip_serializing)]", + variant.ident, member + ), + ); + } + + if field.attrs.skip_serializing_if().is_some() { + cx.error_spanned_by( + variant.original, + format!( + "variant `{}` cannot have both #[serde(serialize_with)] and a field {} marked with #[serde(skip_serializing_if)]", + variant.ident, member + ), + ); + } + } + } + + if variant.attrs.deserialize_with().is_some() { + if variant.attrs.skip_deserializing() { + cx.error_spanned_by( + variant.original, + format!( + "variant `{}` cannot have both #[serde(deserialize_with)] and #[serde(skip_deserializing)]", + variant.ident + ), + ); + } + + for field in &variant.fields { + if field.attrs.skip_deserializing() { + let member = member_message(&field.member); + + cx.error_spanned_by( + variant.original, + format!( + "variant `{}` cannot have both #[serde(deserialize_with)] and a field {} marked with #[serde(skip_deserializing)]", + variant.ident, member + ), + ); + } + } + } + } +} + +// The tag of an internally-tagged struct variant must not be the same as either +// one of its fields, as this would result in duplicate keys in the serialized +// output and/or ambiguity in the to-be-deserialized input. +fn check_internal_tag_field_name_conflict(cx: &Ctxt, cont: &Container) { + let variants = match &cont.data { + Data::Enum(variants) => variants, + Data::Struct(_, _) => return, + }; + + let tag = match cont.attrs.tag() { + TagType::Internal { tag } => tag.as_str(), + TagType::External | TagType::Adjacent { .. } | TagType::None => return, + }; + + let diagnose_conflict = || { + cx.error_spanned_by( + cont.original, + format!("variant field name `{}` conflicts with internal tag", tag), + ); + }; + + for variant in variants { + match variant.style { + Style::Struct => { + if variant.attrs.untagged() { + continue; + } + for field in &variant.fields { + let check_ser = + !(field.attrs.skip_serializing() || variant.attrs.skip_serializing()); + let check_de = + !(field.attrs.skip_deserializing() || variant.attrs.skip_deserializing()); + let name = field.attrs.name(); + let ser_name = name.serialize_name(); + + if check_ser && ser_name == tag { + diagnose_conflict(); + return; + } + + for de_name in field.attrs.aliases() { + if check_de && de_name == tag { + diagnose_conflict(); + return; + } + } + } + } + Style::Unit | Style::Newtype | Style::Tuple => {} + } + } +} + +// In the case of adjacently-tagged enums, the type and the contents tag must +// differ, for the same reason. +fn check_adjacent_tag_conflict(cx: &Ctxt, cont: &Container) { + let (type_tag, content_tag) = match cont.attrs.tag() { + TagType::Adjacent { tag, content } => (tag, content), + TagType::Internal { .. } | TagType::External | TagType::None => return, + }; + + if type_tag == content_tag { + cx.error_spanned_by( + cont.original, + format!( + "enum tags `{}` for type and content conflict with each other", + type_tag + ), + ); + } +} + +// Enums and unit structs cannot be transparent. +fn check_transparent(cx: &Ctxt, cont: &mut Container, derive: Derive) { + if !cont.attrs.transparent() { + return; + } + + if cont.attrs.type_from().is_some() { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] is not allowed with #[serde(from = \"...\")]", + ); + } + + if cont.attrs.type_try_from().is_some() { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] is not allowed with #[serde(try_from = \"...\")]", + ); + } + + if cont.attrs.type_into().is_some() { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] is not allowed with #[serde(into = \"...\")]", + ); + } + + let fields = match &mut cont.data { + Data::Enum(_) => { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] is not allowed on an enum", + ); + return; + } + Data::Struct(Style::Unit, _) => { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] is not allowed on a unit struct", + ); + return; + } + Data::Struct(_, fields) => fields, + }; + + let mut transparent_field = None; + + for field in fields { + if allow_transparent(field, derive) { + if transparent_field.is_some() { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] requires struct to have at most one transparent field", + ); + return; + } + transparent_field = Some(field); + } + } + + match transparent_field { + Some(transparent_field) => transparent_field.attrs.mark_transparent(), + None => match derive { + Derive::Serialize => { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] requires at least one field that is not skipped", + ); + } + Derive::Deserialize => { + cx.error_spanned_by( + cont.original, + "#[serde(transparent)] requires at least one field that is neither skipped nor has a default", + ); + } + }, + } +} + +fn member_message(member: &Member) -> String { + match member { + Member::Named(ident) => format!("`{}`", ident), + Member::Unnamed(i) => format!("#{}", i.index), + } +} + +fn allow_transparent(field: &Field, derive: Derive) -> bool { + if let Type::Path(ty) = ungroup(field.ty) { + if let Some(seg) = ty.path.segments.last() { + if seg.ident == "PhantomData" { + return false; + } + } + } + + match derive { + Derive::Serialize => !field.attrs.skip_serializing(), + Derive::Deserialize => !field.attrs.skip_deserializing() && field.attrs.default().is_none(), + } +} + +fn check_from_and_try_from(cx: &Ctxt, cont: &mut Container) { + if cont.attrs.type_from().is_some() && cont.attrs.type_try_from().is_some() { + cx.error_spanned_by( + cont.original, + "#[serde(from = \"...\")] and #[serde(try_from = \"...\")] conflict with each other", + ); + } +} diff --git a/vendor/serde_derive/src/internals/ctxt.rs b/vendor/serde_derive/src/internals/ctxt.rs new file mode 100644 index 0000000..a47bfa4 --- /dev/null +++ b/vendor/serde_derive/src/internals/ctxt.rs @@ -0,0 +1,68 @@ +use quote::ToTokens; +use std::cell::RefCell; +use std::fmt::Display; +use std::thread; + +/// A type to collect errors together and format them. +/// +/// Dropping this object will cause a panic. It must be consumed using `check`. +/// +/// References can be shared since this type uses run-time exclusive mut checking. +#[derive(Default)] +pub struct Ctxt { + // The contents will be set to `None` during checking. This is so that checking can be + // enforced. + errors: RefCell<Option<Vec<syn::Error>>>, +} + +impl Ctxt { + /// Create a new context object. + /// + /// This object contains no errors, but will still trigger a panic if it is not `check`ed. + pub fn new() -> Self { + Ctxt { + errors: RefCell::new(Some(Vec::new())), + } + } + + /// Add an error to the context object with a tokenenizable object. + /// + /// The object is used for spanning in error messages. + pub fn error_spanned_by<A: ToTokens, T: Display>(&self, obj: A, msg: T) { + self.errors + .borrow_mut() + .as_mut() + .unwrap() + // Curb monomorphization from generating too many identical methods. + .push(syn::Error::new_spanned(obj.into_token_stream(), msg)); + } + + /// Add one of Syn's parse errors. + pub fn syn_error(&self, err: syn::Error) { + self.errors.borrow_mut().as_mut().unwrap().push(err); + } + + /// Consume this object, producing a formatted error string if there are errors. + pub fn check(self) -> syn::Result<()> { + let mut errors = self.errors.borrow_mut().take().unwrap().into_iter(); + + let mut combined = match errors.next() { + Some(first) => first, + None => return Ok(()), + }; + + for rest in errors { + combined.combine(rest); + } + + Err(combined) + } +} + +impl Drop for Ctxt { + fn drop(&mut self) { + if !thread::panicking() && self.errors.borrow().is_some() { + panic!("forgot to check for errors"); + } + } +} diff --git a/vendor/serde_derive/src/internals/mod.rs b/vendor/serde_derive/src/internals/mod.rs new file mode 100644 index 0000000..f98ef08 --- /dev/null +++ b/vendor/serde_derive/src/internals/mod.rs @@ -0,0 +1,27 @@ +pub mod ast; +pub mod attr; + +mod case; +mod check; +mod ctxt; +mod receiver; +mod respan; +mod symbol; + +use syn::Type; + +pub use self::ctxt::Ctxt; +pub use self::receiver::replace_receiver; + +#[derive(Copy, Clone)] +pub enum Derive { + Serialize, + Deserialize, +} + +pub fn ungroup(mut ty: &Type) -> &Type { + while let Type::Group(group) = ty { + ty = &group.elem; + } + ty +} diff --git a/vendor/serde_derive/src/internals/receiver.rs b/vendor/serde_derive/src/internals/receiver.rs new file mode 100644 index 0000000..fa2a77d --- /dev/null +++ b/vendor/serde_derive/src/internals/receiver.rs @@ -0,0 +1,292 @@ +use crate::internals::respan::respan; +use proc_macro2::Span; +use quote::ToTokens; +use std::mem; +use syn::punctuated::Punctuated; +use syn::{ + parse_quote, Data, DeriveInput, Expr, ExprPath, GenericArgument, GenericParam, Generics, Macro, + Path, PathArguments, QSelf, ReturnType, Token, Type, TypeParamBound, TypePath, WherePredicate, +}; + +pub fn replace_receiver(input: &mut DeriveInput) { + let self_ty = { + let ident = &input.ident; + let ty_generics = input.generics.split_for_impl().1; + parse_quote!(#ident #ty_generics) + }; + let mut visitor = ReplaceReceiver(&self_ty); + visitor.visit_generics_mut(&mut input.generics); + visitor.visit_data_mut(&mut input.data); +} + +struct ReplaceReceiver<'a>(&'a TypePath); + +impl ReplaceReceiver<'_> { + fn self_ty(&self, span: Span) -> TypePath { + let tokens = self.0.to_token_stream(); + let respanned = respan(tokens, span); + syn::parse2(respanned).unwrap() + } + + fn self_to_qself(&self, qself: &mut Option<QSelf>, path: &mut Path) { + if path.leading_colon.is_some() || path.segments[0].ident != "Self" { + return; + } + + if path.segments.len() == 1 { + self.self_to_expr_path(path); + return; + } + + let span = path.segments[0].ident.span(); + *qself = Some(QSelf { + lt_token: Token![<](span), + ty: Box::new(Type::Path(self.self_ty(span))), + position: 0, + as_token: None, + gt_token: Token![>](span), + }); + + path.leading_colon = Some(**path.segments.pairs().next().unwrap().punct().unwrap()); + + let segments = mem::replace(&mut path.segments, Punctuated::new()); + path.segments = segments.into_pairs().skip(1).collect(); + } + + fn self_to_expr_path(&self, path: &mut Path) { + let self_ty = self.self_ty(path.segments[0].ident.span()); + let variant = mem::replace(path, self_ty.path); + for segment in &mut path.segments { + if let PathArguments::AngleBracketed(bracketed) = &mut segment.arguments { + if bracketed.colon2_token.is_none() && !bracketed.args.is_empty() { + bracketed.colon2_token = Some(<Token![::]>::default()); + } + } + } + if variant.segments.len() > 1 { + path.segments.push_punct(<Token![::]>::default()); + path.segments.extend(variant.segments.into_pairs().skip(1)); + } + } +} + +impl ReplaceReceiver<'_> { + // `Self` -> `Receiver` + fn visit_type_mut(&mut self, ty: &mut Type) { + let span = if let Type::Path(node) = ty { + if node.qself.is_none() && node.path.is_ident("Self") { + node.path.segments[0].ident.span() + } else { + self.visit_type_path_mut(node); + return; + } + } else { + self.visit_type_mut_impl(ty); + return; + }; + *ty = self.self_ty(span).into(); + } + + // `Self::Assoc` -> `<Receiver>::Assoc` + fn visit_type_path_mut(&mut self, ty: &mut TypePath) { + if ty.qself.is_none() { + self.self_to_qself(&mut ty.qself, &mut ty.path); + } + self.visit_type_path_mut_impl(ty); + } + + // `Self::method` -> `<Receiver>::method` + fn visit_expr_path_mut(&mut self, expr: &mut ExprPath) { + if expr.qself.is_none() { + self.self_to_qself(&mut expr.qself, &mut expr.path); + } + self.visit_expr_path_mut_impl(expr); + } + + // Everything below is simply traversing the syntax tree. + + fn visit_type_mut_impl(&mut self, ty: &mut Type) { + match ty { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + Type::Array(ty) => { + self.visit_type_mut(&mut ty.elem); + self.visit_expr_mut(&mut ty.len); + } + Type::BareFn(ty) => { + for arg in &mut ty.inputs { + self.visit_type_mut(&mut arg.ty); + } + self.visit_return_type_mut(&mut ty.output); + } + Type::Group(ty) => self.visit_type_mut(&mut ty.elem), + Type::ImplTrait(ty) => { + for bound in &mut ty.bounds { + self.visit_type_param_bound_mut(bound); + } + } + Type::Macro(ty) => self.visit_macro_mut(&mut ty.mac), + Type::Paren(ty) => self.visit_type_mut(&mut ty.elem), + Type::Path(ty) => { + if let Some(qself) = &mut ty.qself { + self.visit_type_mut(&mut qself.ty); + } + self.visit_path_mut(&mut ty.path); + } + Type::Ptr(ty) => self.visit_type_mut(&mut ty.elem), + Type::Reference(ty) => self.visit_type_mut(&mut ty.elem), + Type::Slice(ty) => self.visit_type_mut(&mut ty.elem), + Type::TraitObject(ty) => { + for bound in &mut ty.bounds { + self.visit_type_param_bound_mut(bound); + } + } + Type::Tuple(ty) => { + for elem in &mut ty.elems { + self.visit_type_mut(elem); + } + } + + Type::Infer(_) | Type::Never(_) | Type::Verbatim(_) => {} + + _ => {} + } + } + + fn visit_type_path_mut_impl(&mut self, ty: &mut TypePath) { + if let Some(qself) = &mut ty.qself { + self.visit_type_mut(&mut qself.ty); + } + self.visit_path_mut(&mut ty.path); + } + + fn visit_expr_path_mut_impl(&mut self, expr: &mut ExprPath) { + if let Some(qself) = &mut expr.qself { + self.visit_type_mut(&mut qself.ty); + } + self.visit_path_mut(&mut expr.path); + } + + fn visit_path_mut(&mut self, path: &mut Path) { + for segment in &mut path.segments { + self.visit_path_arguments_mut(&mut segment.arguments); + } + } + + fn visit_path_arguments_mut(&mut self, arguments: &mut PathArguments) { + match arguments { + PathArguments::None => {} + PathArguments::AngleBracketed(arguments) => { + for arg in &mut arguments.args { + match arg { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + GenericArgument::Type(arg) => self.visit_type_mut(arg), + GenericArgument::AssocType(arg) => self.visit_type_mut(&mut arg.ty), + GenericArgument::Lifetime(_) + | GenericArgument::Const(_) + | GenericArgument::AssocConst(_) + | GenericArgument::Constraint(_) => {} + _ => {} + } + } + } + PathArguments::Parenthesized(arguments) => { + for argument in &mut arguments.inputs { + self.visit_type_mut(argument); + } + self.visit_return_type_mut(&mut arguments.output); + } + } + } + + fn visit_return_type_mut(&mut self, return_type: &mut ReturnType) { + match return_type { + ReturnType::Default => {} + ReturnType::Type(_, output) => self.visit_type_mut(output), + } + } + + fn visit_type_param_bound_mut(&mut self, bound: &mut TypeParamBound) { + match bound { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + TypeParamBound::Trait(bound) => self.visit_path_mut(&mut bound.path), + TypeParamBound::Lifetime(_) | TypeParamBound::Verbatim(_) => {} + _ => {} + } + } + + fn visit_generics_mut(&mut self, generics: &mut Generics) { + for param in &mut generics.params { + match param { + GenericParam::Type(param) => { + for bound in &mut param.bounds { + self.visit_type_param_bound_mut(bound); + } + } + GenericParam::Lifetime(_) | GenericParam::Const(_) => {} + } + } + if let Some(where_clause) = &mut generics.where_clause { + for predicate in &mut where_clause.predicates { + match predicate { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] + WherePredicate::Type(predicate) => { + self.visit_type_mut(&mut predicate.bounded_ty); + for bound in &mut predicate.bounds { + self.visit_type_param_bound_mut(bound); + } + } + WherePredicate::Lifetime(_) => {} + _ => {} + } + } + } + } + + fn visit_data_mut(&mut self, data: &mut Data) { + match data { + Data::Struct(data) => { + for field in &mut data.fields { + self.visit_type_mut(&mut field.ty); + } + } + Data::Enum(data) => { + for variant in &mut data.variants { + for field in &mut variant.fields { + self.visit_type_mut(&mut field.ty); + } + } + } + Data::Union(_) => {} + } + } + + fn visit_expr_mut(&mut self, expr: &mut Expr) { + match expr { + Expr::Binary(expr) => { + self.visit_expr_mut(&mut expr.left); + self.visit_expr_mut(&mut expr.right); + } + Expr::Call(expr) => { + self.visit_expr_mut(&mut expr.func); + for arg in &mut expr.args { + self.visit_expr_mut(arg); + } + } + Expr::Cast(expr) => { + self.visit_expr_mut(&mut expr.expr); + self.visit_type_mut(&mut expr.ty); + } + Expr::Field(expr) => self.visit_expr_mut(&mut expr.base), + Expr::Index(expr) => { + self.visit_expr_mut(&mut expr.expr); + self.visit_expr_mut(&mut expr.index); + } + Expr::Paren(expr) => self.visit_expr_mut(&mut expr.expr), + Expr::Path(expr) => self.visit_expr_path_mut(expr), + Expr::Unary(expr) => self.visit_expr_mut(&mut expr.expr), + _ => {} + } + } + + fn visit_macro_mut(&mut self, _mac: &mut Macro) {} +} diff --git a/vendor/serde_derive/src/internals/respan.rs b/vendor/serde_derive/src/internals/respan.rs new file mode 100644 index 0000000..dcec701 --- /dev/null +++ b/vendor/serde_derive/src/internals/respan.rs @@ -0,0 +1,16 @@ +use proc_macro2::{Group, Span, TokenStream, TokenTree}; + +pub(crate) fn respan(stream: TokenStream, span: Span) -> TokenStream { + stream + .into_iter() + .map(|token| respan_token(token, span)) + .collect() +} + +fn respan_token(mut token: TokenTree, span: Span) -> TokenTree { + if let TokenTree::Group(g) = &mut token { + *g = Group::new(g.delimiter(), respan(g.stream(), span)); + } + token.set_span(span); + token +} diff --git a/vendor/serde_derive/src/internals/symbol.rs b/vendor/serde_derive/src/internals/symbol.rs new file mode 100644 index 0000000..572391a --- /dev/null +++ b/vendor/serde_derive/src/internals/symbol.rs @@ -0,0 +1,71 @@ +use std::fmt::{self, Display}; +use syn::{Ident, Path}; + +#[derive(Copy, Clone)] +pub struct Symbol(&'static str); + +pub const ALIAS: Symbol = Symbol("alias"); +pub const BORROW: Symbol = Symbol("borrow"); +pub const BOUND: Symbol = Symbol("bound"); +pub const CONTENT: Symbol = Symbol("content"); +pub const CRATE: Symbol = Symbol("crate"); +pub const DEFAULT: Symbol = Symbol("default"); +pub const DENY_UNKNOWN_FIELDS: Symbol = Symbol("deny_unknown_fields"); +pub const DESERIALIZE: Symbol = Symbol("deserialize"); +pub const DESERIALIZE_WITH: Symbol = Symbol("deserialize_with"); +pub const EXPECTING: Symbol = Symbol("expecting"); +pub const FIELD_IDENTIFIER: Symbol = Symbol("field_identifier"); +pub const FLATTEN: Symbol = Symbol("flatten"); +pub const FROM: Symbol = Symbol("from"); +pub const GETTER: Symbol = Symbol("getter"); +pub const INTO: Symbol = Symbol("into"); +pub const NON_EXHAUSTIVE: Symbol = Symbol("non_exhaustive"); +pub const OTHER: Symbol = Symbol("other"); +pub const REMOTE: Symbol = Symbol("remote"); +pub const RENAME: Symbol = Symbol("rename"); +pub const RENAME_ALL: Symbol = Symbol("rename_all"); +pub const RENAME_ALL_FIELDS: Symbol = Symbol("rename_all_fields"); +pub const REPR: Symbol = Symbol("repr"); +pub const SERDE: Symbol = Symbol("serde"); +pub const SERIALIZE: Symbol = Symbol("serialize"); +pub const SERIALIZE_WITH: Symbol = Symbol("serialize_with"); +pub const SKIP: Symbol = Symbol("skip"); +pub const SKIP_DESERIALIZING: Symbol = Symbol("skip_deserializing"); +pub const SKIP_SERIALIZING: Symbol = Symbol("skip_serializing"); +pub const SKIP_SERIALIZING_IF: Symbol = Symbol("skip_serializing_if"); +pub const TAG: Symbol = Symbol("tag"); +pub const TRANSPARENT: Symbol = Symbol("transparent"); +pub const TRY_FROM: Symbol = Symbol("try_from"); +pub const UNTAGGED: Symbol = Symbol("untagged"); +pub const VARIANT_IDENTIFIER: Symbol = Symbol("variant_identifier"); +pub const WITH: Symbol = Symbol("with"); + +impl PartialEq<Symbol> for Ident { + fn eq(&self, word: &Symbol) -> bool { + self == word.0 + } +} + +impl<'a> PartialEq<Symbol> for &'a Ident { + fn eq(&self, word: &Symbol) -> bool { + *self == word.0 + } +} + +impl PartialEq<Symbol> for Path { + fn eq(&self, word: &Symbol) -> bool { + self.is_ident(word.0) + } +} + +impl<'a> PartialEq<Symbol> for &'a Path { + fn eq(&self, word: &Symbol) -> bool { + self.is_ident(word.0) + } +} + +impl Display for Symbol { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(self.0) + } +} diff --git a/vendor/serde_derive/src/lib.rs b/vendor/serde_derive/src/lib.rs new file mode 100644 index 0000000..8c4a6ca --- /dev/null +++ b/vendor/serde_derive/src/lib.rs @@ -0,0 +1,101 @@ +//! This crate provides Serde's two derive macros. +//! +//! ```edition2021 +//! # use serde_derive::{Deserialize, Serialize}; +//! # +//! #[derive(Serialize, Deserialize)] +//! # struct S; +//! # +//! # fn main() {} +//! ``` +//! +//! Please refer to [https://serde.rs/derive.html] for how to set this up. +//! +//! [https://serde.rs/derive.html]: https://serde.rs/derive.html + +#![doc(html_root_url = "https://docs.rs/serde_derive/1.0.195")] +// Ignored clippy lints +#![allow( + // clippy false positive: https://github.com/rust-lang/rust-clippy/issues/7054 + clippy::branches_sharing_code, + clippy::cognitive_complexity, + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/7575 + clippy::collapsible_match, + clippy::derive_partial_eq_without_eq, + clippy::enum_variant_names, + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/6797 + clippy::manual_map, + clippy::match_like_matches_macro, + clippy::needless_pass_by_value, + clippy::too_many_arguments, + clippy::trivially_copy_pass_by_ref, + clippy::used_underscore_binding, + clippy::wildcard_in_or_patterns, + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/5704 + clippy::unnested_or_patterns, +)] +// Ignored clippy_pedantic lints +#![allow( + clippy::cast_possible_truncation, + clippy::checked_conversions, + clippy::doc_markdown, + clippy::enum_glob_use, + clippy::indexing_slicing, + clippy::items_after_statements, + clippy::let_underscore_untyped, + clippy::manual_assert, + clippy::map_err_ignore, + clippy::match_same_arms, + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/6984 + clippy::match_wildcard_for_single_variants, + clippy::module_name_repetitions, + clippy::must_use_candidate, + clippy::similar_names, + clippy::single_match_else, + clippy::struct_excessive_bools, + clippy::too_many_lines, + clippy::unseparated_literal_suffix, + clippy::unused_self, + clippy::use_self, + clippy::wildcard_imports +)] +#![cfg_attr(all(test, exhaustive), feature(non_exhaustive_omitted_patterns_lint))] + +extern crate proc_macro2; +extern crate quote; +extern crate syn; + +extern crate proc_macro; + +mod internals; + +use proc_macro::TokenStream; +use syn::parse_macro_input; +use syn::DeriveInput; + +#[macro_use] +mod bound; +#[macro_use] +mod fragment; + +mod de; +mod dummy; +mod pretend; +mod ser; +mod this; + +#[proc_macro_derive(Serialize, attributes(serde))] +pub fn derive_serialize(input: TokenStream) -> TokenStream { + let mut input = parse_macro_input!(input as DeriveInput); + ser::expand_derive_serialize(&mut input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() +} + +#[proc_macro_derive(Deserialize, attributes(serde))] +pub fn derive_deserialize(input: TokenStream) -> TokenStream { + let mut input = parse_macro_input!(input as DeriveInput); + de::expand_derive_deserialize(&mut input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() +} diff --git a/vendor/serde_derive/src/pretend.rs b/vendor/serde_derive/src/pretend.rs new file mode 100644 index 0000000..2c9e779 --- /dev/null +++ b/vendor/serde_derive/src/pretend.rs @@ -0,0 +1,185 @@ +use crate::internals::ast::{Container, Data, Field, Style, Variant}; +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; + +// Suppress dead_code warnings that would otherwise appear when using a remote +// derive. Other than this pretend code, a struct annotated with remote derive +// never has its fields referenced and an enum annotated with remote derive +// never has its variants constructed. +// +// warning: field is never used: `i` +// --> src/main.rs:4:20 +// | +// 4 | struct StructDef { i: i32 } +// | ^^^^^^ +// +// warning: variant is never constructed: `V` +// --> src/main.rs:8:16 +// | +// 8 | enum EnumDef { V } +// | ^ +// +pub fn pretend_used(cont: &Container, is_packed: bool) -> TokenStream { + let pretend_fields = pretend_fields_used(cont, is_packed); + let pretend_variants = pretend_variants_used(cont); + + quote! { + #pretend_fields + #pretend_variants + } +} + +// For structs with named fields, expands to: +// +// match None::<&T> { +// Some(T { a: __v0, b: __v1 }) => {} +// _ => {} +// } +// +// For packed structs on sufficiently new rustc, expands to: +// +// match None::<&T> { +// Some(__v @ T { a: _, b: _ }) => { +// let _ = addr_of!(__v.a); +// let _ = addr_of!(__v.b); +// } +// _ => {} +// } +// +// For packed structs on older rustc, we assume Sized and !Drop, and expand to: +// +// match None::<T> { +// Some(T { a: __v0, b: __v1 }) => {} +// _ => {} +// } +// +// For enums, expands to the following but only including struct variants: +// +// match None::<&T> { +// Some(T::A { a: __v0 }) => {} +// Some(T::B { b: __v0 }) => {} +// _ => {} +// } +// +fn pretend_fields_used(cont: &Container, is_packed: bool) -> TokenStream { + match &cont.data { + Data::Enum(variants) => pretend_fields_used_enum(cont, variants), + Data::Struct(Style::Struct | Style::Tuple | Style::Newtype, fields) => { + if is_packed { + pretend_fields_used_struct_packed(cont, fields) + } else { + pretend_fields_used_struct(cont, fields) + } + } + Data::Struct(Style::Unit, _) => quote!(), + } +} + +fn pretend_fields_used_struct(cont: &Container, fields: &[Field]) -> TokenStream { + let type_ident = &cont.ident; + let (_, ty_generics, _) = cont.generics.split_for_impl(); + + let members = fields.iter().map(|field| &field.member); + let placeholders = (0usize..).map(|i| format_ident!("__v{}", i)); + + quote! { + match _serde::__private::None::<&#type_ident #ty_generics> { + _serde::__private::Some(#type_ident { #(#members: #placeholders),* }) => {} + _ => {} + } + } +} + +fn pretend_fields_used_struct_packed(cont: &Container, fields: &[Field]) -> TokenStream { + let type_ident = &cont.ident; + let (_, ty_generics, _) = cont.generics.split_for_impl(); + + let members = fields.iter().map(|field| &field.member).collect::<Vec<_>>(); + + quote! { + match _serde::__private::None::<&#type_ident #ty_generics> { + _serde::__private::Some(__v @ #type_ident { #(#members: _),* }) => { + #( + let _ = _serde::__private::ptr::addr_of!(__v.#members); + )* + } + _ => {} + } + } +} + +fn pretend_fields_used_enum(cont: &Container, variants: &[Variant]) -> TokenStream { + let type_ident = &cont.ident; + let (_, ty_generics, _) = cont.generics.split_for_impl(); + + let patterns = variants + .iter() + .filter_map(|variant| match variant.style { + Style::Struct | Style::Tuple | Style::Newtype => { + let variant_ident = &variant.ident; + let members = variant.fields.iter().map(|field| &field.member); + let placeholders = (0usize..).map(|i| format_ident!("__v{}", i)); + Some(quote!(#type_ident::#variant_ident { #(#members: #placeholders),* })) + } + Style::Unit => None, + }) + .collect::<Vec<_>>(); + + quote! { + match _serde::__private::None::<&#type_ident #ty_generics> { + #( + _serde::__private::Some(#patterns) => {} + )* + _ => {} + } + } +} + +// Expands to one of these per enum variant: +// +// match None { +// Some((__v0, __v1,)) => { +// let _ = E::V { a: __v0, b: __v1 }; +// } +// _ => {} +// } +// +fn pretend_variants_used(cont: &Container) -> TokenStream { + let variants = match &cont.data { + Data::Enum(variants) => variants, + Data::Struct(_, _) => { + return quote!(); + } + }; + + let type_ident = &cont.ident; + let (_, ty_generics, _) = cont.generics.split_for_impl(); + let turbofish = ty_generics.as_turbofish(); + + let cases = variants.iter().map(|variant| { + let variant_ident = &variant.ident; + let placeholders = &(0..variant.fields.len()) + .map(|i| format_ident!("__v{}", i)) + .collect::<Vec<_>>(); + + let pat = match variant.style { + Style::Struct => { + let members = variant.fields.iter().map(|field| &field.member); + quote!({ #(#members: #placeholders),* }) + } + Style::Tuple | Style::Newtype => quote!(( #(#placeholders),* )), + Style::Unit => quote!(), + }; + + quote! { + match _serde::__private::None { + _serde::__private::Some((#(#placeholders,)*)) => { + let _ = #type_ident::#variant_ident #turbofish #pat; + } + _ => {} + } + } + }); + + quote!(#(#cases)*) +} diff --git a/vendor/serde_derive/src/ser.rs b/vendor/serde_derive/src/ser.rs new file mode 100644 index 0000000..3be51ee --- /dev/null +++ b/vendor/serde_derive/src/ser.rs @@ -0,0 +1,1359 @@ +use crate::fragment::{Fragment, Match, Stmts}; +use crate::internals::ast::{Container, Data, Field, Style, Variant}; +use crate::internals::{attr, replace_receiver, Ctxt, Derive}; +use crate::{bound, dummy, pretend, this}; +use proc_macro2::{Span, TokenStream}; +use quote::{quote, quote_spanned}; +use syn::spanned::Spanned; +use syn::{parse_quote, Ident, Index, Member}; + +pub fn expand_derive_serialize(input: &mut syn::DeriveInput) -> syn::Result<TokenStream> { + replace_receiver(input); + + let ctxt = Ctxt::new(); + let cont = match Container::from_ast(&ctxt, input, Derive::Serialize) { + Some(cont) => cont, + None => return Err(ctxt.check().unwrap_err()), + }; + precondition(&ctxt, &cont); + ctxt.check()?; + + let ident = &cont.ident; + let params = Parameters::new(&cont); + let (impl_generics, ty_generics, where_clause) = params.generics.split_for_impl(); + let body = Stmts(serialize_body(&cont, ¶ms)); + let serde = cont.attrs.serde_path(); + + let impl_block = if let Some(remote) = cont.attrs.remote() { + let vis = &input.vis; + let used = pretend::pretend_used(&cont, params.is_packed); + quote! { + impl #impl_generics #ident #ty_generics #where_clause { + #vis fn serialize<__S>(__self: &#remote #ty_generics, __serializer: __S) -> #serde::__private::Result<__S::Ok, __S::Error> + where + __S: #serde::Serializer, + { + #used + #body + } + } + } + } else { + quote! { + #[automatically_derived] + impl #impl_generics #serde::Serialize for #ident #ty_generics #where_clause { + fn serialize<__S>(&self, __serializer: __S) -> #serde::__private::Result<__S::Ok, __S::Error> + where + __S: #serde::Serializer, + { + #body + } + } + } + }; + + Ok(dummy::wrap_in_const( + cont.attrs.custom_serde_path(), + impl_block, + )) +} + +fn precondition(cx: &Ctxt, cont: &Container) { + match cont.attrs.identifier() { + attr::Identifier::No => {} + attr::Identifier::Field => { + cx.error_spanned_by(cont.original, "field identifiers cannot be serialized"); + } + attr::Identifier::Variant => { + cx.error_spanned_by(cont.original, "variant identifiers cannot be serialized"); + } + } +} + +struct Parameters { + /// Variable holding the value being serialized. Either `self` for local + /// types or `__self` for remote types. + self_var: Ident, + + /// Path to the type the impl is for. Either a single `Ident` for local + /// types (does not include generic parameters) or `some::remote::Path` for + /// remote types. + this_type: syn::Path, + + /// Same as `this_type` but using `::<T>` for generic parameters for use in + /// expression position. + this_value: syn::Path, + + /// Generics including any explicit and inferred bounds for the impl. + generics: syn::Generics, + + /// Type has a `serde(remote = "...")` attribute. + is_remote: bool, + + /// Type has a repr(packed) attribute. + is_packed: bool, +} + +impl Parameters { + fn new(cont: &Container) -> Self { + let is_remote = cont.attrs.remote().is_some(); + let self_var = if is_remote { + Ident::new("__self", Span::call_site()) + } else { + Ident::new("self", Span::call_site()) + }; + + let this_type = this::this_type(cont); + let this_value = this::this_value(cont); + let is_packed = cont.attrs.is_packed(); + let generics = build_generics(cont); + + Parameters { + self_var, + this_type, + this_value, + generics, + is_remote, + is_packed, + } + } + + /// Type name to use in error messages and `&'static str` arguments to + /// various Serializer methods. + fn type_name(&self) -> String { + self.this_type.segments.last().unwrap().ident.to_string() + } +} + +// All the generics in the input, plus a bound `T: Serialize` for each generic +// field type that will be serialized by us. +fn build_generics(cont: &Container) -> syn::Generics { + let generics = bound::without_defaults(cont.generics); + + let generics = + bound::with_where_predicates_from_fields(cont, &generics, attr::Field::ser_bound); + + let generics = + bound::with_where_predicates_from_variants(cont, &generics, attr::Variant::ser_bound); + + match cont.attrs.ser_bound() { + Some(predicates) => bound::with_where_predicates(&generics, predicates), + None => bound::with_bound( + cont, + &generics, + needs_serialize_bound, + &parse_quote!(_serde::Serialize), + ), + } +} + +// Fields with a `skip_serializing` or `serialize_with` attribute, or which +// belong to a variant with a 'skip_serializing` or `serialize_with` attribute, +// are not serialized by us so we do not generate a bound. Fields with a `bound` +// attribute specify their own bound so we do not generate one. All other fields +// may need a `T: Serialize` bound where T is the type of the field. +fn needs_serialize_bound(field: &attr::Field, variant: Option<&attr::Variant>) -> bool { + !field.skip_serializing() + && field.serialize_with().is_none() + && field.ser_bound().is_none() + && variant.map_or(true, |variant| { + !variant.skip_serializing() + && variant.serialize_with().is_none() + && variant.ser_bound().is_none() + }) +} + +fn serialize_body(cont: &Container, params: &Parameters) -> Fragment { + if cont.attrs.transparent() { + serialize_transparent(cont, params) + } else if let Some(type_into) = cont.attrs.type_into() { + serialize_into(params, type_into) + } else { + match &cont.data { + Data::Enum(variants) => serialize_enum(params, variants, &cont.attrs), + Data::Struct(Style::Struct, fields) => serialize_struct(params, fields, &cont.attrs), + Data::Struct(Style::Tuple, fields) => { + serialize_tuple_struct(params, fields, &cont.attrs) + } + Data::Struct(Style::Newtype, fields) => { + serialize_newtype_struct(params, &fields[0], &cont.attrs) + } + Data::Struct(Style::Unit, _) => serialize_unit_struct(&cont.attrs), + } + } +} + +fn serialize_transparent(cont: &Container, params: &Parameters) -> Fragment { + let fields = match &cont.data { + Data::Struct(_, fields) => fields, + Data::Enum(_) => unreachable!(), + }; + + let self_var = ¶ms.self_var; + let transparent_field = fields.iter().find(|f| f.attrs.transparent()).unwrap(); + let member = &transparent_field.member; + + let path = match transparent_field.attrs.serialize_with() { + Some(path) => quote!(#path), + None => { + let span = transparent_field.original.span(); + quote_spanned!(span=> _serde::Serialize::serialize) + } + }; + + quote_block! { + #path(&#self_var.#member, __serializer) + } +} + +fn serialize_into(params: &Parameters, type_into: &syn::Type) -> Fragment { + let self_var = ¶ms.self_var; + quote_block! { + _serde::Serialize::serialize( + &_serde::__private::Into::<#type_into>::into(_serde::__private::Clone::clone(#self_var)), + __serializer) + } +} + +fn serialize_unit_struct(cattrs: &attr::Container) -> Fragment { + let type_name = cattrs.name().serialize_name(); + + quote_expr! { + _serde::Serializer::serialize_unit_struct(__serializer, #type_name) + } +} + +fn serialize_newtype_struct( + params: &Parameters, + field: &Field, + cattrs: &attr::Container, +) -> Fragment { + let type_name = cattrs.name().serialize_name(); + + let mut field_expr = get_member( + params, + field, + &Member::Unnamed(Index { + index: 0, + span: Span::call_site(), + }), + ); + if let Some(path) = field.attrs.serialize_with() { + field_expr = wrap_serialize_field_with(params, field.ty, path, &field_expr); + } + + let span = field.original.span(); + let func = quote_spanned!(span=> _serde::Serializer::serialize_newtype_struct); + quote_expr! { + #func(__serializer, #type_name, #field_expr) + } +} + +fn serialize_tuple_struct( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, +) -> Fragment { + let serialize_stmts = + serialize_tuple_struct_visitor(fields, params, false, &TupleTrait::SerializeTupleStruct); + + let type_name = cattrs.name().serialize_name(); + + let mut serialized_fields = fields + .iter() + .enumerate() + .filter(|(_, field)| !field.attrs.skip_serializing()) + .peekable(); + + let let_mut = mut_if(serialized_fields.peek().is_some()); + + let len = serialized_fields + .map(|(i, field)| match field.attrs.skip_serializing_if() { + None => quote!(1), + Some(path) => { + let index = syn::Index { + index: i as u32, + span: Span::call_site(), + }; + let field_expr = get_member(params, field, &Member::Unnamed(index)); + quote!(if #path(#field_expr) { 0 } else { 1 }) + } + }) + .fold(quote!(0), |sum, expr| quote!(#sum + #expr)); + + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_tuple_struct(__serializer, #type_name, #len)?; + #(#serialize_stmts)* + _serde::ser::SerializeTupleStruct::end(__serde_state) + } +} + +fn serialize_struct(params: &Parameters, fields: &[Field], cattrs: &attr::Container) -> Fragment { + assert!(fields.len() as u64 <= u64::from(u32::max_value())); + + if cattrs.has_flatten() { + serialize_struct_as_map(params, fields, cattrs) + } else { + serialize_struct_as_struct(params, fields, cattrs) + } +} + +fn serialize_struct_tag_field(cattrs: &attr::Container, struct_trait: &StructTrait) -> TokenStream { + match cattrs.tag() { + attr::TagType::Internal { tag } => { + let type_name = cattrs.name().serialize_name(); + let func = struct_trait.serialize_field(Span::call_site()); + quote! { + #func(&mut __serde_state, #tag, #type_name)?; + } + } + _ => quote! {}, + } +} + +fn serialize_struct_as_struct( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, +) -> Fragment { + let serialize_fields = + serialize_struct_visitor(fields, params, false, &StructTrait::SerializeStruct); + + let type_name = cattrs.name().serialize_name(); + + let tag_field = serialize_struct_tag_field(cattrs, &StructTrait::SerializeStruct); + let tag_field_exists = !tag_field.is_empty(); + + let mut serialized_fields = fields + .iter() + .filter(|&field| !field.attrs.skip_serializing()) + .peekable(); + + let let_mut = mut_if(serialized_fields.peek().is_some() || tag_field_exists); + + let len = serialized_fields + .map(|field| match field.attrs.skip_serializing_if() { + None => quote!(1), + Some(path) => { + let field_expr = get_member(params, field, &field.member); + quote!(if #path(#field_expr) { 0 } else { 1 }) + } + }) + .fold( + quote!(#tag_field_exists as usize), + |sum, expr| quote!(#sum + #expr), + ); + + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_struct(__serializer, #type_name, #len)?; + #tag_field + #(#serialize_fields)* + _serde::ser::SerializeStruct::end(__serde_state) + } +} + +fn serialize_struct_as_map( + params: &Parameters, + fields: &[Field], + cattrs: &attr::Container, +) -> Fragment { + let serialize_fields = + serialize_struct_visitor(fields, params, false, &StructTrait::SerializeMap); + + let tag_field = serialize_struct_tag_field(cattrs, &StructTrait::SerializeMap); + let tag_field_exists = !tag_field.is_empty(); + + let mut serialized_fields = fields + .iter() + .filter(|&field| !field.attrs.skip_serializing()) + .peekable(); + + let let_mut = mut_if(serialized_fields.peek().is_some() || tag_field_exists); + + let len = if cattrs.has_flatten() { + quote!(_serde::__private::None) + } else { + let len = serialized_fields + .map(|field| match field.attrs.skip_serializing_if() { + None => quote!(1), + Some(path) => { + let field_expr = get_member(params, field, &field.member); + quote!(if #path(#field_expr) { 0 } else { 1 }) + } + }) + .fold( + quote!(#tag_field_exists as usize), + |sum, expr| quote!(#sum + #expr), + ); + quote!(_serde::__private::Some(#len)) + }; + + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_map(__serializer, #len)?; + #tag_field + #(#serialize_fields)* + _serde::ser::SerializeMap::end(__serde_state) + } +} + +fn serialize_enum(params: &Parameters, variants: &[Variant], cattrs: &attr::Container) -> Fragment { + assert!(variants.len() as u64 <= u64::from(u32::max_value())); + + let self_var = ¶ms.self_var; + + let mut arms: Vec<_> = variants + .iter() + .enumerate() + .map(|(variant_index, variant)| { + serialize_variant(params, variant, variant_index as u32, cattrs) + }) + .collect(); + + if cattrs.remote().is_some() && cattrs.non_exhaustive() { + arms.push(quote! { + ref unrecognized => _serde::__private::Err(_serde::ser::Error::custom(_serde::__private::ser::CannotSerializeVariant(unrecognized))), + }); + } + + quote_expr! { + match *#self_var { + #(#arms)* + } + } +} + +fn serialize_variant( + params: &Parameters, + variant: &Variant, + variant_index: u32, + cattrs: &attr::Container, +) -> TokenStream { + let this_value = ¶ms.this_value; + let variant_ident = &variant.ident; + + if variant.attrs.skip_serializing() { + let skipped_msg = format!( + "the enum variant {}::{} cannot be serialized", + params.type_name(), + variant_ident + ); + let skipped_err = quote! { + _serde::__private::Err(_serde::ser::Error::custom(#skipped_msg)) + }; + let fields_pat = match variant.style { + Style::Unit => quote!(), + Style::Newtype | Style::Tuple => quote!((..)), + Style::Struct => quote!({ .. }), + }; + quote! { + #this_value::#variant_ident #fields_pat => #skipped_err, + } + } else { + // variant wasn't skipped + let case = match variant.style { + Style::Unit => { + quote! { + #this_value::#variant_ident + } + } + Style::Newtype => { + quote! { + #this_value::#variant_ident(ref __field0) + } + } + Style::Tuple => { + let field_names = (0..variant.fields.len()) + .map(|i| Ident::new(&format!("__field{}", i), Span::call_site())); + quote! { + #this_value::#variant_ident(#(ref #field_names),*) + } + } + Style::Struct => { + let members = variant.fields.iter().map(|f| &f.member); + quote! { + #this_value::#variant_ident { #(ref #members),* } + } + } + }; + + let body = Match(match (cattrs.tag(), variant.attrs.untagged()) { + (attr::TagType::External, false) => { + serialize_externally_tagged_variant(params, variant, variant_index, cattrs) + } + (attr::TagType::Internal { tag }, false) => { + serialize_internally_tagged_variant(params, variant, cattrs, tag) + } + (attr::TagType::Adjacent { tag, content }, false) => { + serialize_adjacently_tagged_variant( + params, + variant, + cattrs, + variant_index, + tag, + content, + ) + } + (attr::TagType::None, _) | (_, true) => { + serialize_untagged_variant(params, variant, cattrs) + } + }); + + quote! { + #case => #body + } + } +} + +fn serialize_externally_tagged_variant( + params: &Parameters, + variant: &Variant, + variant_index: u32, + cattrs: &attr::Container, +) -> Fragment { + let type_name = cattrs.name().serialize_name(); + let variant_name = variant.attrs.name().serialize_name(); + + if let Some(path) = variant.attrs.serialize_with() { + let ser = wrap_serialize_variant_with(params, path, variant); + return quote_expr! { + _serde::Serializer::serialize_newtype_variant( + __serializer, + #type_name, + #variant_index, + #variant_name, + #ser, + ) + }; + } + + match effective_style(variant) { + Style::Unit => { + quote_expr! { + _serde::Serializer::serialize_unit_variant( + __serializer, + #type_name, + #variant_index, + #variant_name, + ) + } + } + Style::Newtype => { + let field = &variant.fields[0]; + let mut field_expr = quote!(__field0); + if let Some(path) = field.attrs.serialize_with() { + field_expr = wrap_serialize_field_with(params, field.ty, path, &field_expr); + } + + let span = field.original.span(); + let func = quote_spanned!(span=> _serde::Serializer::serialize_newtype_variant); + quote_expr! { + #func( + __serializer, + #type_name, + #variant_index, + #variant_name, + #field_expr, + ) + } + } + Style::Tuple => serialize_tuple_variant( + TupleVariant::ExternallyTagged { + type_name, + variant_index, + variant_name, + }, + params, + &variant.fields, + ), + Style::Struct => serialize_struct_variant( + StructVariant::ExternallyTagged { + variant_index, + variant_name, + }, + params, + &variant.fields, + type_name, + ), + } +} + +fn serialize_internally_tagged_variant( + params: &Parameters, + variant: &Variant, + cattrs: &attr::Container, + tag: &str, +) -> Fragment { + let type_name = cattrs.name().serialize_name(); + let variant_name = variant.attrs.name().serialize_name(); + + let enum_ident_str = params.type_name(); + let variant_ident_str = variant.ident.to_string(); + + if let Some(path) = variant.attrs.serialize_with() { + let ser = wrap_serialize_variant_with(params, path, variant); + return quote_expr! { + _serde::__private::ser::serialize_tagged_newtype( + __serializer, + #enum_ident_str, + #variant_ident_str, + #tag, + #variant_name, + #ser, + ) + }; + } + + match effective_style(variant) { + Style::Unit => { + quote_block! { + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 1)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #variant_name)?; + _serde::ser::SerializeStruct::end(__struct) + } + } + Style::Newtype => { + let field = &variant.fields[0]; + let mut field_expr = quote!(__field0); + if let Some(path) = field.attrs.serialize_with() { + field_expr = wrap_serialize_field_with(params, field.ty, path, &field_expr); + } + + let span = field.original.span(); + let func = quote_spanned!(span=> _serde::__private::ser::serialize_tagged_newtype); + quote_expr! { + #func( + __serializer, + #enum_ident_str, + #variant_ident_str, + #tag, + #variant_name, + #field_expr, + ) + } + } + Style::Struct => serialize_struct_variant( + StructVariant::InternallyTagged { tag, variant_name }, + params, + &variant.fields, + type_name, + ), + Style::Tuple => unreachable!("checked in serde_derive_internals"), + } +} + +fn serialize_adjacently_tagged_variant( + params: &Parameters, + variant: &Variant, + cattrs: &attr::Container, + variant_index: u32, + tag: &str, + content: &str, +) -> Fragment { + let this_type = ¶ms.this_type; + let type_name = cattrs.name().serialize_name(); + let variant_name = variant.attrs.name().serialize_name(); + let serialize_variant = quote! { + &_serde::__private::ser::AdjacentlyTaggedEnumVariant { + enum_name: #type_name, + variant_index: #variant_index, + variant_name: #variant_name, + } + }; + + let inner = Stmts(if let Some(path) = variant.attrs.serialize_with() { + let ser = wrap_serialize_variant_with(params, path, variant); + quote_expr! { + _serde::Serialize::serialize(#ser, __serializer) + } + } else { + match effective_style(variant) { + Style::Unit => { + return quote_block! { + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 1)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #serialize_variant)?; + _serde::ser::SerializeStruct::end(__struct) + }; + } + Style::Newtype => { + let field = &variant.fields[0]; + let mut field_expr = quote!(__field0); + if let Some(path) = field.attrs.serialize_with() { + field_expr = wrap_serialize_field_with(params, field.ty, path, &field_expr); + } + + let span = field.original.span(); + let func = quote_spanned!(span=> _serde::ser::SerializeStruct::serialize_field); + return quote_block! { + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 2)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #serialize_variant)?; + #func( + &mut __struct, #content, #field_expr)?; + _serde::ser::SerializeStruct::end(__struct) + }; + } + Style::Tuple => { + serialize_tuple_variant(TupleVariant::Untagged, params, &variant.fields) + } + Style::Struct => serialize_struct_variant( + StructVariant::Untagged, + params, + &variant.fields, + variant_name, + ), + } + }); + + let fields_ty = variant.fields.iter().map(|f| &f.ty); + let fields_ident: &[_] = &match variant.style { + Style::Unit => { + if variant.attrs.serialize_with().is_some() { + vec![] + } else { + unreachable!() + } + } + Style::Newtype => vec![Member::Named(Ident::new("__field0", Span::call_site()))], + Style::Tuple => (0..variant.fields.len()) + .map(|i| Member::Named(Ident::new(&format!("__field{}", i), Span::call_site()))) + .collect(), + Style::Struct => variant.fields.iter().map(|f| f.member.clone()).collect(), + }; + + let (_, ty_generics, where_clause) = params.generics.split_for_impl(); + + let wrapper_generics = if fields_ident.is_empty() { + params.generics.clone() + } else { + bound::with_lifetime_bound(¶ms.generics, "'__a") + }; + let (wrapper_impl_generics, wrapper_ty_generics, _) = wrapper_generics.split_for_impl(); + + quote_block! { + #[doc(hidden)] + struct __AdjacentlyTagged #wrapper_generics #where_clause { + data: (#(&'__a #fields_ty,)*), + phantom: _serde::__private::PhantomData<#this_type #ty_generics>, + } + + impl #wrapper_impl_generics _serde::Serialize for __AdjacentlyTagged #wrapper_ty_generics #where_clause { + fn serialize<__S>(&self, __serializer: __S) -> _serde::__private::Result<__S::Ok, __S::Error> + where + __S: _serde::Serializer, + { + // Elements that have skip_serializing will be unused. + #[allow(unused_variables)] + let (#(#fields_ident,)*) = self.data; + #inner + } + } + + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 2)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #serialize_variant)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #content, &__AdjacentlyTagged { + data: (#(#fields_ident,)*), + phantom: _serde::__private::PhantomData::<#this_type #ty_generics>, + })?; + _serde::ser::SerializeStruct::end(__struct) + } +} + +fn serialize_untagged_variant( + params: &Parameters, + variant: &Variant, + cattrs: &attr::Container, +) -> Fragment { + if let Some(path) = variant.attrs.serialize_with() { + let ser = wrap_serialize_variant_with(params, path, variant); + return quote_expr! { + _serde::Serialize::serialize(#ser, __serializer) + }; + } + + match effective_style(variant) { + Style::Unit => { + quote_expr! { + _serde::Serializer::serialize_unit(__serializer) + } + } + Style::Newtype => { + let field = &variant.fields[0]; + let mut field_expr = quote!(__field0); + if let Some(path) = field.attrs.serialize_with() { + field_expr = wrap_serialize_field_with(params, field.ty, path, &field_expr); + } + + let span = field.original.span(); + let func = quote_spanned!(span=> _serde::Serialize::serialize); + quote_expr! { + #func(#field_expr, __serializer) + } + } + Style::Tuple => serialize_tuple_variant(TupleVariant::Untagged, params, &variant.fields), + Style::Struct => { + let type_name = cattrs.name().serialize_name(); + serialize_struct_variant(StructVariant::Untagged, params, &variant.fields, type_name) + } + } +} + +enum TupleVariant<'a> { + ExternallyTagged { + type_name: &'a str, + variant_index: u32, + variant_name: &'a str, + }, + Untagged, +} + +fn serialize_tuple_variant( + context: TupleVariant, + params: &Parameters, + fields: &[Field], +) -> Fragment { + let tuple_trait = match context { + TupleVariant::ExternallyTagged { .. } => TupleTrait::SerializeTupleVariant, + TupleVariant::Untagged => TupleTrait::SerializeTuple, + }; + + let serialize_stmts = serialize_tuple_struct_visitor(fields, params, true, &tuple_trait); + + let mut serialized_fields = fields + .iter() + .enumerate() + .filter(|(_, field)| !field.attrs.skip_serializing()) + .peekable(); + + let let_mut = mut_if(serialized_fields.peek().is_some()); + + let len = serialized_fields + .map(|(i, field)| match field.attrs.skip_serializing_if() { + None => quote!(1), + Some(path) => { + let field_expr = Ident::new(&format!("__field{}", i), Span::call_site()); + quote!(if #path(#field_expr) { 0 } else { 1 }) + } + }) + .fold(quote!(0), |sum, expr| quote!(#sum + #expr)); + + match context { + TupleVariant::ExternallyTagged { + type_name, + variant_index, + variant_name, + } => { + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_tuple_variant( + __serializer, + #type_name, + #variant_index, + #variant_name, + #len)?; + #(#serialize_stmts)* + _serde::ser::SerializeTupleVariant::end(__serde_state) + } + } + TupleVariant::Untagged => { + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_tuple( + __serializer, + #len)?; + #(#serialize_stmts)* + _serde::ser::SerializeTuple::end(__serde_state) + } + } + } +} + +enum StructVariant<'a> { + ExternallyTagged { + variant_index: u32, + variant_name: &'a str, + }, + InternallyTagged { + tag: &'a str, + variant_name: &'a str, + }, + Untagged, +} + +fn serialize_struct_variant( + context: StructVariant, + params: &Parameters, + fields: &[Field], + name: &str, +) -> Fragment { + if fields.iter().any(|field| field.attrs.flatten()) { + return serialize_struct_variant_with_flatten(context, params, fields, name); + } + + let struct_trait = match context { + StructVariant::ExternallyTagged { .. } => StructTrait::SerializeStructVariant, + StructVariant::InternallyTagged { .. } | StructVariant::Untagged => { + StructTrait::SerializeStruct + } + }; + + let serialize_fields = serialize_struct_visitor(fields, params, true, &struct_trait); + + let mut serialized_fields = fields + .iter() + .filter(|&field| !field.attrs.skip_serializing()) + .peekable(); + + let let_mut = mut_if(serialized_fields.peek().is_some()); + + let len = serialized_fields + .map(|field| { + let member = &field.member; + + match field.attrs.skip_serializing_if() { + Some(path) => quote!(if #path(#member) { 0 } else { 1 }), + None => quote!(1), + } + }) + .fold(quote!(0), |sum, expr| quote!(#sum + #expr)); + + match context { + StructVariant::ExternallyTagged { + variant_index, + variant_name, + } => { + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_struct_variant( + __serializer, + #name, + #variant_index, + #variant_name, + #len, + )?; + #(#serialize_fields)* + _serde::ser::SerializeStructVariant::end(__serde_state) + } + } + StructVariant::InternallyTagged { tag, variant_name } => { + quote_block! { + let mut __serde_state = _serde::Serializer::serialize_struct( + __serializer, + #name, + #len + 1, + )?; + _serde::ser::SerializeStruct::serialize_field( + &mut __serde_state, + #tag, + #variant_name, + )?; + #(#serialize_fields)* + _serde::ser::SerializeStruct::end(__serde_state) + } + } + StructVariant::Untagged => { + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_struct( + __serializer, + #name, + #len, + )?; + #(#serialize_fields)* + _serde::ser::SerializeStruct::end(__serde_state) + } + } + } +} + +fn serialize_struct_variant_with_flatten( + context: StructVariant, + params: &Parameters, + fields: &[Field], + name: &str, +) -> Fragment { + let struct_trait = StructTrait::SerializeMap; + let serialize_fields = serialize_struct_visitor(fields, params, true, &struct_trait); + + let mut serialized_fields = fields + .iter() + .filter(|&field| !field.attrs.skip_serializing()) + .peekable(); + + let let_mut = mut_if(serialized_fields.peek().is_some()); + + match context { + StructVariant::ExternallyTagged { + variant_index, + variant_name, + } => { + let this_type = ¶ms.this_type; + let fields_ty = fields.iter().map(|f| &f.ty); + let members = &fields.iter().map(|f| &f.member).collect::<Vec<_>>(); + + let (_, ty_generics, where_clause) = params.generics.split_for_impl(); + let wrapper_generics = bound::with_lifetime_bound(¶ms.generics, "'__a"); + let (wrapper_impl_generics, wrapper_ty_generics, _) = wrapper_generics.split_for_impl(); + + quote_block! { + #[doc(hidden)] + struct __EnumFlatten #wrapper_generics #where_clause { + data: (#(&'__a #fields_ty,)*), + phantom: _serde::__private::PhantomData<#this_type #ty_generics>, + } + + impl #wrapper_impl_generics _serde::Serialize for __EnumFlatten #wrapper_ty_generics #where_clause { + fn serialize<__S>(&self, __serializer: __S) -> _serde::__private::Result<__S::Ok, __S::Error> + where + __S: _serde::Serializer, + { + let (#(#members,)*) = self.data; + let #let_mut __serde_state = _serde::Serializer::serialize_map( + __serializer, + _serde::__private::None)?; + #(#serialize_fields)* + _serde::ser::SerializeMap::end(__serde_state) + } + } + + _serde::Serializer::serialize_newtype_variant( + __serializer, + #name, + #variant_index, + #variant_name, + &__EnumFlatten { + data: (#(#members,)*), + phantom: _serde::__private::PhantomData::<#this_type #ty_generics>, + }) + } + } + StructVariant::InternallyTagged { tag, variant_name } => { + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_map( + __serializer, + _serde::__private::None)?; + _serde::ser::SerializeMap::serialize_entry( + &mut __serde_state, + #tag, + #variant_name, + )?; + #(#serialize_fields)* + _serde::ser::SerializeMap::end(__serde_state) + } + } + StructVariant::Untagged => { + quote_block! { + let #let_mut __serde_state = _serde::Serializer::serialize_map( + __serializer, + _serde::__private::None)?; + #(#serialize_fields)* + _serde::ser::SerializeMap::end(__serde_state) + } + } + } +} + +fn serialize_tuple_struct_visitor( + fields: &[Field], + params: &Parameters, + is_enum: bool, + tuple_trait: &TupleTrait, +) -> Vec<TokenStream> { + fields + .iter() + .enumerate() + .filter(|(_, field)| !field.attrs.skip_serializing()) + .map(|(i, field)| { + let mut field_expr = if is_enum { + let id = Ident::new(&format!("__field{}", i), Span::call_site()); + quote!(#id) + } else { + get_member( + params, + field, + &Member::Unnamed(Index { + index: i as u32, + span: Span::call_site(), + }), + ) + }; + + let skip = field + .attrs + .skip_serializing_if() + .map(|path| quote!(#path(#field_expr))); + + if let Some(path) = field.attrs.serialize_with() { + field_expr = wrap_serialize_field_with(params, field.ty, path, &field_expr); + } + + let span = field.original.span(); + let func = tuple_trait.serialize_element(span); + let ser = quote! { + #func(&mut __serde_state, #field_expr)?; + }; + + match skip { + None => ser, + Some(skip) => quote!(if !#skip { #ser }), + } + }) + .collect() +} + +fn serialize_struct_visitor( + fields: &[Field], + params: &Parameters, + is_enum: bool, + struct_trait: &StructTrait, +) -> Vec<TokenStream> { + fields + .iter() + .filter(|&field| !field.attrs.skip_serializing()) + .map(|field| { + let member = &field.member; + + let mut field_expr = if is_enum { + quote!(#member) + } else { + get_member(params, field, member) + }; + + let key_expr = field.attrs.name().serialize_name(); + + let skip = field + .attrs + .skip_serializing_if() + .map(|path| quote!(#path(#field_expr))); + + if let Some(path) = field.attrs.serialize_with() { + field_expr = wrap_serialize_field_with(params, field.ty, path, &field_expr); + } + + let span = field.original.span(); + let ser = if field.attrs.flatten() { + let func = quote_spanned!(span=> _serde::Serialize::serialize); + quote! { + #func(&#field_expr, _serde::__private::ser::FlatMapSerializer(&mut __serde_state))?; + } + } else { + let func = struct_trait.serialize_field(span); + quote! { + #func(&mut __serde_state, #key_expr, #field_expr)?; + } + }; + + match skip { + None => ser, + Some(skip) => { + if let Some(skip_func) = struct_trait.skip_field(span) { + quote! { + if !#skip { + #ser + } else { + #skip_func(&mut __serde_state, #key_expr)?; + } + } + } else { + quote! { + if !#skip { + #ser + } + } + } + } + } + }) + .collect() +} + +fn wrap_serialize_field_with( + params: &Parameters, + field_ty: &syn::Type, + serialize_with: &syn::ExprPath, + field_expr: &TokenStream, +) -> TokenStream { + wrap_serialize_with(params, serialize_with, &[field_ty], &[quote!(#field_expr)]) +} + +fn wrap_serialize_variant_with( + params: &Parameters, + serialize_with: &syn::ExprPath, + variant: &Variant, +) -> TokenStream { + let field_tys: Vec<_> = variant.fields.iter().map(|field| field.ty).collect(); + let field_exprs: Vec<_> = variant + .fields + .iter() + .map(|field| { + let id = match &field.member { + Member::Named(ident) => ident.clone(), + Member::Unnamed(member) => { + Ident::new(&format!("__field{}", member.index), Span::call_site()) + } + }; + quote!(#id) + }) + .collect(); + wrap_serialize_with( + params, + serialize_with, + field_tys.as_slice(), + field_exprs.as_slice(), + ) +} + +fn wrap_serialize_with( + params: &Parameters, + serialize_with: &syn::ExprPath, + field_tys: &[&syn::Type], + field_exprs: &[TokenStream], +) -> TokenStream { + let this_type = ¶ms.this_type; + let (_, ty_generics, where_clause) = params.generics.split_for_impl(); + + let wrapper_generics = if field_exprs.is_empty() { + params.generics.clone() + } else { + bound::with_lifetime_bound(¶ms.generics, "'__a") + }; + let (wrapper_impl_generics, wrapper_ty_generics, _) = wrapper_generics.split_for_impl(); + + let field_access = (0..field_exprs.len()).map(|n| { + Member::Unnamed(Index { + index: n as u32, + span: Span::call_site(), + }) + }); + + quote!({ + #[doc(hidden)] + struct __SerializeWith #wrapper_impl_generics #where_clause { + values: (#(&'__a #field_tys, )*), + phantom: _serde::__private::PhantomData<#this_type #ty_generics>, + } + + impl #wrapper_impl_generics _serde::Serialize for __SerializeWith #wrapper_ty_generics #where_clause { + fn serialize<__S>(&self, __s: __S) -> _serde::__private::Result<__S::Ok, __S::Error> + where + __S: _serde::Serializer, + { + #serialize_with(#(self.values.#field_access, )* __s) + } + } + + &__SerializeWith { + values: (#(#field_exprs, )*), + phantom: _serde::__private::PhantomData::<#this_type #ty_generics>, + } + }) +} + +// Serialization of an empty struct results in code like: +// +// let mut __serde_state = serializer.serialize_struct("S", 0)?; +// _serde::ser::SerializeStruct::end(__serde_state) +// +// where we want to omit the `mut` to avoid a warning. +fn mut_if(is_mut: bool) -> Option<TokenStream> { + if is_mut { + Some(quote!(mut)) + } else { + None + } +} + +fn get_member(params: &Parameters, field: &Field, member: &Member) -> TokenStream { + let self_var = ¶ms.self_var; + match (params.is_remote, field.attrs.getter()) { + (false, None) => { + if params.is_packed { + quote!(&{#self_var.#member}) + } else { + quote!(&#self_var.#member) + } + } + (true, None) => { + let inner = if params.is_packed { + quote!(&{#self_var.#member}) + } else { + quote!(&#self_var.#member) + }; + let ty = field.ty; + quote!(_serde::__private::ser::constrain::<#ty>(#inner)) + } + (true, Some(getter)) => { + let ty = field.ty; + quote!(_serde::__private::ser::constrain::<#ty>(&#getter(#self_var))) + } + (false, Some(_)) => { + unreachable!("getter is only allowed for remote impls"); + } + } +} + +fn effective_style(variant: &Variant) -> Style { + match variant.style { + Style::Newtype if variant.fields[0].attrs.skip_serializing() => Style::Unit, + other => other, + } +} + +enum StructTrait { + SerializeMap, + SerializeStruct, + SerializeStructVariant, +} + +impl StructTrait { + fn serialize_field(&self, span: Span) -> TokenStream { + match *self { + StructTrait::SerializeMap => { + quote_spanned!(span=> _serde::ser::SerializeMap::serialize_entry) + } + StructTrait::SerializeStruct => { + quote_spanned!(span=> _serde::ser::SerializeStruct::serialize_field) + } + StructTrait::SerializeStructVariant => { + quote_spanned!(span=> _serde::ser::SerializeStructVariant::serialize_field) + } + } + } + + fn skip_field(&self, span: Span) -> Option<TokenStream> { + match *self { + StructTrait::SerializeMap => None, + StructTrait::SerializeStruct => { + Some(quote_spanned!(span=> _serde::ser::SerializeStruct::skip_field)) + } + StructTrait::SerializeStructVariant => { + Some(quote_spanned!(span=> _serde::ser::SerializeStructVariant::skip_field)) + } + } + } +} + +enum TupleTrait { + SerializeTuple, + SerializeTupleStruct, + SerializeTupleVariant, +} + +impl TupleTrait { + fn serialize_element(&self, span: Span) -> TokenStream { + match *self { + TupleTrait::SerializeTuple => { + quote_spanned!(span=> _serde::ser::SerializeTuple::serialize_element) + } + TupleTrait::SerializeTupleStruct => { + quote_spanned!(span=> _serde::ser::SerializeTupleStruct::serialize_field) + } + TupleTrait::SerializeTupleVariant => { + quote_spanned!(span=> _serde::ser::SerializeTupleVariant::serialize_field) + } + } + } +} diff --git a/vendor/serde_derive/src/this.rs b/vendor/serde_derive/src/this.rs new file mode 100644 index 0000000..941cea4 --- /dev/null +++ b/vendor/serde_derive/src/this.rs @@ -0,0 +1,32 @@ +use crate::internals::ast::Container; +use syn::{Path, PathArguments, Token}; + +pub fn this_type(cont: &Container) -> Path { + if let Some(remote) = cont.attrs.remote() { + let mut this = remote.clone(); + for segment in &mut this.segments { + if let PathArguments::AngleBracketed(arguments) = &mut segment.arguments { + arguments.colon2_token = None; + } + } + this + } else { + Path::from(cont.ident.clone()) + } +} + +pub fn this_value(cont: &Container) -> Path { + if let Some(remote) = cont.attrs.remote() { + let mut this = remote.clone(); + for segment in &mut this.segments { + if let PathArguments::AngleBracketed(arguments) = &mut segment.arguments { + if arguments.colon2_token.is_none() { + arguments.colon2_token = Some(Token![::](arguments.lt_token.span)); + } + } + } + this + } else { + Path::from(cont.ident.clone()) + } +} diff --git a/vendor/serde_spanned/.cargo-checksum.json b/vendor/serde_spanned/.cargo-checksum.json index 1491e1f..5807092 100644 --- a/vendor/serde_spanned/.cargo-checksum.json +++ b/vendor/serde_spanned/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"bc5fae76d3e18a589e883f9bd458a00673bd5bc1819a7f251f5baf3d37de9756","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"b7f9ff28df562bb645d6466476cb8855e96c4a162f4ffbe1ecb82d814a9f19f6","src/lib.rs":"b46fc8a4d877770b9e217e824ef99db7fdbb3c5215083dc42306eaa9958d79a3","src/spanned.rs":"2ef5235ea754def23c09e138aa11eef7d7f518d772e2ab2d6d4d63eb33a20370"},"package":"0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4"} \ No newline at end of file +{"files":{"Cargo.toml":"85429aade314f881dcdfe183bdc047f50e0be27330a37a49f0282bf2fa5962dd","LICENSE-APACHE":"c6596eb7be8581c18be736c846fb9173b69eccf6ef94c5135893ec56bd92ba08","LICENSE-MIT":"6efb0476a1cc085077ed49357026d8c173bf33017278ef440f222fb9cbcb66e6","README.md":"b7f9ff28df562bb645d6466476cb8855e96c4a162f4ffbe1ecb82d814a9f19f6","src/lib.rs":"b46fc8a4d877770b9e217e824ef99db7fdbb3c5215083dc42306eaa9958d79a3","src/spanned.rs":"d188b10290af11adec2a78272eede2b7c04beb42e27b73679b1d8faa10ab0bc5"},"package":"eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1"} \ No newline at end of file diff --git a/vendor/serde_spanned/Cargo.toml b/vendor/serde_spanned/Cargo.toml index f2ab57e..73887a4 100644 --- a/vendor/serde_spanned/Cargo.toml +++ b/vendor/serde_spanned/Cargo.toml @@ -11,25 +11,22 @@ [package] edition = "2021" -rust-version = "1.60.0" +rust-version = "1.67" name = "serde_spanned" -version = "0.6.1" +version = "0.6.5" include = [ + "build.rs", "src/**/*", "Cargo.toml", + "Cargo.lock", "LICENSE*", "README.md", - "examples/**/*", "benches/**/*", + "examples/**/*", "tests/**/*", ] -description = """ -A native Rust encoder and decoder of TOML-formatted files and streams. Provides -implementations of the standard Serialize/Deserialize traits for TOML data to -facilitate deserializing and serializing Rust structures. -""" +description = "Serde-compatible spanned Value" homepage = "https://github.com/toml-rs/toml" -documentation = "https://docs.rs/serde_spanned" readme = "README.md" keywords = [ "serde", @@ -44,48 +41,57 @@ categories = [ license = "MIT OR Apache-2.0" repository = "https://github.com/toml-rs/toml" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "Unreleased" -replace = "{{version}}" min = 1 +replace = "{{version}}" +search = "Unreleased" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = '\.\.\.HEAD' replace = "...{{tag_name}}" -exactly = 1 +search = '\.\.\.HEAD' [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "ReleaseDate" -replace = "{{date}}" min = 1 +replace = "{{date}}" +search = "ReleaseDate" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-header -->" replace = """ <!-- next-header --> ## [Unreleased] - ReleaseDate """ -exactly = 1 +search = "<!-- next-header -->" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-url -->" replace = """ <!-- next-url --> [Unreleased]: https://github.com/toml-rs/toml/compare/{{tag_name}}...HEAD""" -exactly = 1 - -[package.metadata.docs.rs] -rustdoc-args = [ - "--cfg", - "docsrs", -] -all-features = true +search = "<!-- next-url -->" [dependencies.serde] version = "1.0.145" optional = true + +[dev-dependencies.serde] +version = "1" + +[dev-dependencies.serde-untagged] +version = "0.1" + +[dev-dependencies.serde_derive] +version = "1" diff --git a/vendor/serde_spanned/LICENSE-APACHE b/vendor/serde_spanned/LICENSE-APACHE index 16fe87b..8f71f43 100644 --- a/vendor/serde_spanned/LICENSE-APACHE +++ b/vendor/serde_spanned/LICENSE-APACHE @@ -1,201 +1,202 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/vendor/serde_spanned/LICENSE-MIT b/vendor/serde_spanned/LICENSE-MIT index 39e0ed6..a2d0108 100644 --- a/vendor/serde_spanned/LICENSE-MIT +++ b/vendor/serde_spanned/LICENSE-MIT @@ -1,25 +1,19 @@ -Copyright (c) 2014 Alex Crichton +Copyright (c) Individual contributors -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/serde_spanned/src/spanned.rs b/vendor/serde_spanned/src/spanned.rs index ac351ca..f4042a8 100644 --- a/vendor/serde_spanned/src/spanned.rs +++ b/vendor/serde_spanned/src/spanned.rs @@ -35,6 +35,63 @@ pub struct Spanned<T> { } impl<T> Spanned<T> { + /// Create a spanned value encompassing the given byte range. + /// + /// # Example + /// + /// Transposing a `Spanned<Enum<T>>` into `Enum<Spanned<T>>`: + /// + /// ``` + /// use serde::de::{Deserialize, Deserializer}; + /// use serde_untagged::UntaggedEnumVisitor; + /// use toml::Spanned; + /// + /// pub enum Dependency { + /// Simple(Spanned<String>), + /// Detailed(Spanned<DetailedDependency>), + /// } + /// + /// impl<'de> Deserialize<'de> for Dependency { + /// fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + /// where + /// D: Deserializer<'de>, + /// { + /// enum DependencyKind { + /// Simple(String), + /// Detailed(DetailedDependency), + /// } + /// + /// impl<'de> Deserialize<'de> for DependencyKind { + /// fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + /// where + /// D: Deserializer<'de>, + /// { + /// UntaggedEnumVisitor::new() + /// .expecting( + /// "a version string like \"0.9.8\" or a \ + /// detailed dependency like { version = \"0.9.8\" }", + /// ) + /// .string(|value| Ok(DependencyKind::Simple(value.to_owned()))) + /// .map(|value| value.deserialize().map(DependencyKind::Detailed)) + /// .deserialize(deserializer) + /// } + /// } + /// + /// let spanned: Spanned<DependencyKind> = Deserialize::deserialize(deserializer)?; + /// let range = spanned.span(); + /// Ok(match spanned.into_inner() { + /// DependencyKind::Simple(simple) => Dependency::Simple(Spanned::new(range, simple)), + /// DependencyKind::Detailed(detailed) => Dependency::Detailed(Spanned::new(range, detailed)), + /// }) + /// } + /// } + /// # + /// # type DetailedDependency = std::collections::BTreeMap<String, String>; + /// ``` + pub fn new(range: std::ops::Range<usize>, value: T) -> Self { + Spanned { span: range, value } + } + /// Byte range pub fn span(&self) -> std::ops::Range<usize> { self.span.clone() diff --git a/vendor/syn/.cargo-checksum.json b/vendor/syn/.cargo-checksum.json new file mode 100644 index 0000000..9746b35 --- /dev/null +++ b/vendor/syn/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"bfa0938a6819ec2386d3de70587d6738774dde0c4b9bacda780e20661d7e5602","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"f6904878f9082d7d267b6d0d737ef211ff165cfd039a4d45ad88e9861f3e217f","benches/file.rs":"0a0527c78d849148cbb6118b4d36f72da7d4add865ba1a410e0a1be9e8dbfe0e","benches/rust.rs":"84dedb2d912a19ce3b8c15cda3bbcdb2994996167fe4435f71d1b5f74590299d","src/attr.rs":"2833aadc7f0e180caeae32793f41e120467a38bb0635b0a3c33546cf29584635","src/bigint.rs":"0299829b2f7a1a798fe2f7bc1680e4a10f9b6f4a852d09af4da2deab466c4242","src/buffer.rs":"30a9fffaf1b7a4fd8ca7eed5a9cde7a13d75d0b7b92569975bee1e7083d2555e","src/custom_keyword.rs":"b2e25cbe73d237b459140a38083ee21e4836ea444859b033f400652414630290","src/custom_punctuation.rs":"c2c344c9cdbebc66a5833eb0f9ded817ce9807dbb75b616283b2b60960d1ffb0","src/data.rs":"106be8b10d62ddf4fd636cf682b72a43748b25069a32cd2ff59333b3c8478695","src/derive.rs":"3132e7f064725c7ca43f26daee93ec78037d46a935c6b0758af905cff450c15c","src/discouraged.rs":"482970b03bdee3cbc30c034f644e3293b25387db46300da5d8d8efd97dad8507","src/drops.rs":"013385f1dd95663f1afab41abc1e2eea04181998644828935ca564c74d6462ae","src/error.rs":"f0d80891d42459ae9d490f2c5a749b501f3d06acc6ae2429c3558fd5dea38d0d","src/export.rs":"b260cc49da1da3489e7755832bc8015cfad79e84f6c74e237f65ae25a2385e56","src/expr.rs":"e84faad2faca52feaa1e3046dbf1c12dd3f4720110017a19c9794c9a20570f22","src/ext.rs":"3cf2d869812e5be894aa1c48bf074da262143fb2df1c9ac1b5ee965bf2a96a1c","src/file.rs":"a4d510dd0e2756bd54983dfa747601918c801e987cbf92deab44cdca6a201aeb","src/gen/clone.rs":"837df226926225f7f084ad4deb98a379dc98d079270ea5ae5db2f74d3a23bcb8","src/gen/debug.rs":"9532acd0fe48ea06073519956cae063882f89ef6f3579c32d9c1b3efb115a499","src/gen/eq.rs":"86279869463047ab62e01f72e2a166a6bbcf2e2082bec3e4679f8876ceac4ae9","src/gen/fold.rs":"33ce8928a4ba1e66517890244ce61089cebf19142e03220b227f60a10ba35f7f","src/gen/hash.rs":"4f0ab10b7aaddea9f988b02fe1946d6f81b8e51f75b5a1777ab0be67ed278816","src/gen/visit.rs":"33251b48083c41640a5f5b1fe6c6658ba62cb76dac0b630efaa5e18471bd06c2","src/gen/visit_mut.rs":"ff9fafdda337c6bf2f2d4ca3ce75714fabe45a98e14d729fd5b7b392499a1cc8","src/gen_helper.rs":"750caab67ba0ba11a95ea28cd38026485227bb4aa114cdb497472386f60fdb35","src/generics.rs":"699923668dfb2e5a129dab0f1f333f2f49d52d0a79c9314b8806e72c802169f5","src/group.rs":"f5911e9cf2dc2dffab546590167c48de30409cb7708aa3307d22be143df720e4","src/ident.rs":"1b2d86d2e380b1fa83aee6ed113c47f0b482a229c25be428dac921deec0b4852","src/item.rs":"ff8e41ce833d24deefde92c5c539e8a75d91fa05b33d742cbfdab34ed5d0eeb5","src/lib.rs":"fee820320cf90d964930f5c66662bb7d7bfd785968c41cddc463201c9805ce66","src/lifetime.rs":"64abfd72af5dd271b2de1a8304ca0899b90da7750df9dec2ee8102bceb08a426","src/lit.rs":"b382c85f0b81faebdf2e97a198afb5904738955ce130f5393b548430c2a17f9b","src/lookahead.rs":"376092f91a1c32e1b277db0a6790fdda151c9ec51bd971fe6a6545b5b9e73b5d","src/mac.rs":"b1cf73f34a27a8f1429125e726623a524fb5dce875eb68ead3beaffa976442c3","src/macros.rs":"e68e246752d67307f9afb5ada6c1d689ddc0b0d9a51d40ba6191427d1e1b8bdc","src/meta.rs":"603dfd4fd71d9e4a5a33cb8ba8a9a1160b78898e3f41ccd5b1be503f47edea92","src/op.rs":"09a8dece03265aa1a9cd36c73f4ed7f783e311e4166c88671cf5ba833fb51d21","src/parse.rs":"1320c3b54f8c5dff87fd06d2b27e8dcd5edbe682c6946335eba30ace989d4bce","src/parse_macro_input.rs":"4a753b2a6dbfefd6dc93852d66b4f6d73ebd6b8b9be74019fc476f429b9a892d","src/parse_quote.rs":"6bf1498244557fcecff9247091b7c701e9b2775cd061ff936c52eeffb101278b","src/pat.rs":"dd17c1ac958469f723bb4743b301f6c0ca2929b7f9e097dade5916eee949d6c5","src/path.rs":"d90644e63e8f87b09651ea09694934bdccfeb863f270a6de617828098602f8cd","src/print.rs":"22910bf0521ab868ebd7c62601c55912d12cfb400c65723e08e5cfa3a2d111c0","src/punctuated.rs":"dffaab15b9215c70d7db416e8d4c44b78c43ba2f255485e6211363f0f0fea063","src/restriction.rs":"1aed612ed2f39ab3f8edbb72a8f2aa506e5b6eb4d9f05f1b97d9bb32369a3fc9","src/sealed.rs":"6ece3b3dcb30f6bb98b93d83759ca7712ee8592bef9c0511141039c38765db0e","src/span.rs":"0a48e375e5c9768f6f64174a91ba6a255f4b021e2fb3548d8494e617f142601b","src/spanned.rs":"1bba75d73dd4dc5be6c4e11fdd72686d340fb25b5808830bd603ddc840beabdc","src/stmt.rs":"fe7d7fd60ad1d6841c7a3602ae7e5d0ea9fcba2aeba82eb6f85d41b5c722ee64","src/thread.rs":"1f1deb1272525ab2af9a36aac4bce8f65b0e315adb1656641fd7075662f49222","src/token.rs":"e6b2373fb0465158d267bd25eac16266bd4b492d7dbe8ae4b5405a841a1ac57d","src/tt.rs":"32490509abcc4a5a3c7eb5628337172b3b49d30697d2f7b7df4d8045255c13da","src/ty.rs":"e76b69ae598be6af7af0947a698fc45dbece3b479d086f8cd97a0d470e730410","src/verbatim.rs":"87cbe82a90f48efb57ffd09141042698b3e011a21d0d5412154d80324b0a5ef0","src/whitespace.rs":"718a80c12cdd145358e2690f0f68ff7779a91ec17ce9fde9bb755f635fce69ad","tests/common/eq.rs":"d87b07c88cdb890d230fff55bd713e62b3571aba7edc869664213f424de06718","tests/common/mod.rs":"432ad35577f836a20b517d8c26ed994ac25fe73ef2f461c67688b61b99762015","tests/common/parse.rs":"246ddf1d303a9dbbc380e8d0689bd851cef3c3146d09d2627175deb9203b003d","tests/debug/gen.rs":"daa5be1f3dc2379d5067137e0106874e0f3ed5eba7f2a8436fdada0bf33d7186","tests/debug/mod.rs":"b56136586267ae1812a937b69215dd053ada2c21717771d89dcd3ce52bcb27f5","tests/macros/mod.rs":"ac0ef6edc8d4a740c620eb73ac69e81ae442d3f73bcb37cc49845e483a296194","tests/regression.rs":"e9565ea0efecb4136f099164ffcfa26e1996b0a27fb9c6659e90ad9bdd42e7b6","tests/regression/issue1108.rs":"f32db35244a674e22ff824ca9e5bbec2184e287b59f022db68c418b5878a2edc","tests/regression/issue1235.rs":"a2266b10c3f7c7af5734817ab0a3e8b309b51e7d177b63f26e67e6b744d280b0","tests/repo/mod.rs":"d92f0f9afa0613e95ab8ed1e7ec9b4d863b6eff5b790371ed8eac86f44902bf1","tests/repo/progress.rs":"c08d0314a7f3ecf760d471f27da3cd2a500aeb9f1c8331bffb2aa648f9fabf3f","tests/test_asyncness.rs":"8982f6bc4e36510f924e288247473403e72697389ce9dda4e4b5ab0a8e49259f","tests/test_attribute.rs":"b35550a43bbd187bb330997ba36f90c65d8fc489135b1d32ef4547f145cb7612","tests/test_derive_input.rs":"99c4e6e45e3322ea9e269b309059c8a00fda1dcc03aed41f6e7d8c7e0a72fa2b","tests/test_expr.rs":"f7726efc959b860aa8aca6c51bc10c466cb1957774c1dfac24c3907cd8ea99a6","tests/test_generics.rs":"2fcc8575d695b568f3724b3b33d853b8fa6d9864eb816b5e3ca82420682e6155","tests/test_grouping.rs":"ecbe3324878b2e2be42640a3dec198620cff18731fcb95ee7e94eacd11d2fec1","tests/test_ident.rs":"9eb53d1e21edf23e7c9e14dc74dcc2b2538e9221e19dbcc0a44e3acc2e90f3f6","tests/test_item.rs":"9398997f2be33c89de52eb40f8c2fce86cf4ce5810fe709d2f20916ed6e2bb47","tests/test_iterators.rs":"f4dacb5f3a8e0473dfb0d27f05270d41e79eddb4759b1fad3e88e379b4731e17","tests/test_lit.rs":"8e30c2d7837673a742d77aef01212788bbd099182dd5c1d10ee474cfeb786c39","tests/test_meta.rs":"70fd75b42d1d913f05825c9e8280a4802e81de0b2343ad876850d2b7c588f0bf","tests/test_parse_buffer.rs":"92f5e898c1a6625011497f8cc8684eac0311850566ae9ab1848444305c9bdddf","tests/test_parse_quote.rs":"5bb7ec6773c3b878b3abedf17952948e707d8990b7e131605ee03d31f9ecae5b","tests/test_parse_stream.rs":"91a7ec997ea67d3c9d3028495345d89f2f67eb01bf11af3f99a1cef42a41aa05","tests/test_pat.rs":"8467fbef7cba36e6ce105cbc1a038b13ec154505bd34c863a18cfdfeac02c0b1","tests/test_path.rs":"b202244f034e58bf17e4c39bef696b0567e0ed42a63427ed4866acd14aaa90df","tests/test_precedence.rs":"f8b57c4a10ad249d3c6984cbe5db56a0c1e1381900dbc837fdc6f0e7f19221a7","tests/test_receiver.rs":"af64117acd66fbf42edc476f731ecd20c88009d9cb641dbd7a1d6384ae99ae73","tests/test_round_trip.rs":"61345d746dc03f99ec7e9e23fa6e5fe80526175d863f12d42163f7fa11d8c5cb","tests/test_shebang.rs":"98e8a6690c04e0aad2893b747593620b51836fe704f50f5c6fe352609837138a","tests/test_should_parse.rs":"1d3535698a446e2755bfc360676bdb161841a1f454cdef6e7556c6d06a95c89d","tests/test_size.rs":"b446868d55ae820196aad03dba3aa3529b727ac0465113c6fc66706423a73350","tests/test_stmt.rs":"761946f7d020f37dcc9f3a6c4b17c8d26c30d609193ac13c0672a2833b80f6dc","tests/test_token_trees.rs":"d012da9c3c861073711b006bf6ffdc073821fb9fb0a08733628cdae57124d1f5","tests/test_ty.rs":"e0262eb0c65c06a5bd8781cf0e256d7f093182202e8160623f00de98594845d2","tests/test_visibility.rs":"cf4c93997cd88821ef7f8e2dd5d1586175cce4614407cd3bdf371ecc4d8abc44","tests/zzz_stable.rs":"2a862e59cb446235ed99aec0e6ada8e16d3ecc30229b29d825b7c0bbc2602989"},"package":"0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"} \ No newline at end of file diff --git a/vendor/syn/Cargo.toml b/vendor/syn/Cargo.toml new file mode 100644 index 0000000..a467eb8 --- /dev/null +++ b/vendor/syn/Cargo.toml @@ -0,0 +1,147 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.56" +name = "syn" +version = "2.0.48" +authors = ["David Tolnay <dtolnay@gmail.com>"] +include = [ + "/benches/**", + "/Cargo.toml", + "/LICENSE-APACHE", + "/LICENSE-MIT", + "/README.md", + "/src/**", + "/tests/**", +] +description = "Parser for Rust source code" +documentation = "https://docs.rs/syn" +readme = "README.md" +keywords = [ + "macros", + "syn", +] +categories = [ + "development-tools::procedural-macro-helpers", + "parser-implementations", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/dtolnay/syn" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "doc_cfg", + "--generate-link-to-definition", +] +targets = ["x86_64-unknown-linux-gnu"] + +[package.metadata.playground] +features = [ + "full", + "visit", + "visit-mut", + "fold", + "extra-traits", +] + +[lib] +doc-scrape-examples = false + +[[bench]] +name = "rust" +harness = false +required-features = [ + "full", + "parsing", +] + +[[bench]] +name = "file" +required-features = [ + "full", + "parsing", +] + +[dependencies.proc-macro2] +version = "1.0.75" +default-features = false + +[dependencies.quote] +version = "1.0.35" +optional = true +default-features = false + +[dependencies.unicode-ident] +version = "1" + +[dev-dependencies.anyhow] +version = "1" + +[dev-dependencies.automod] +version = "1" + +[dev-dependencies.flate2] +version = "1" + +[dev-dependencies.insta] +version = "1" + +[dev-dependencies.rayon] +version = "1" + +[dev-dependencies.ref-cast] +version = "1" + +[dev-dependencies.reqwest] +version = "0.11" +features = ["blocking"] + +[dev-dependencies.rustversion] +version = "1" + +[dev-dependencies.syn-test-suite] +version = "0" + +[dev-dependencies.tar] +version = "0.4.16" + +[dev-dependencies.termcolor] +version = "1" + +[dev-dependencies.walkdir] +version = "2.3.2" + +[features] +clone-impls = [] +default = [ + "derive", + "parsing", + "printing", + "clone-impls", + "proc-macro", +] +derive = [] +extra-traits = [] +fold = [] +full = [] +parsing = [] +printing = ["quote"] +proc-macro = [ + "proc-macro2/proc-macro", + "quote/proc-macro", +] +test = ["syn-test-suite/all-features"] +visit = [] +visit-mut = [] diff --git a/vendor/syn/LICENSE-APACHE b/vendor/syn/LICENSE-APACHE new file mode 100644 index 0000000..1b5ec8b --- /dev/null +++ b/vendor/syn/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/syn/LICENSE-MIT b/vendor/syn/LICENSE-MIT new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/vendor/syn/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/syn/README.md b/vendor/syn/README.md new file mode 100644 index 0000000..e8d99ab --- /dev/null +++ b/vendor/syn/README.md @@ -0,0 +1,284 @@ +Parser for Rust source code +=========================== + +[<img alt="github" src="https://img.shields.io/badge/github-dtolnay/syn-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/dtolnay/syn) +[<img alt="crates.io" src="https://img.shields.io/crates/v/syn.svg?style=for-the-badge&color=fc8d62&logo=rust" height="20">](https://crates.io/crates/syn) +[<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-syn-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs" height="20">](https://docs.rs/syn) +[<img alt="build status" src="https://img.shields.io/github/actions/workflow/status/dtolnay/syn/ci.yml?branch=master&style=for-the-badge" height="20">](https://github.com/dtolnay/syn/actions?query=branch%3Amaster) + +Syn is a parsing library for parsing a stream of Rust tokens into a syntax tree +of Rust source code. + +Currently this library is geared toward use in Rust procedural macros, but +contains some APIs that may be useful more generally. + +- **Data structures** — Syn provides a complete syntax tree that can represent + any valid Rust source code. The syntax tree is rooted at [`syn::File`] which + represents a full source file, but there are other entry points that may be + useful to procedural macros including [`syn::Item`], [`syn::Expr`] and + [`syn::Type`]. + +- **Derives** — Of particular interest to derive macros is [`syn::DeriveInput`] + which is any of the three legal input items to a derive macro. An example + below shows using this type in a library that can derive implementations of a + user-defined trait. + +- **Parsing** — Parsing in Syn is built around [parser functions] with the + signature `fn(ParseStream) -> Result<T>`. Every syntax tree node defined by + Syn is individually parsable and may be used as a building block for custom + syntaxes, or you may dream up your own brand new syntax without involving any + of our syntax tree types. + +- **Location information** — Every token parsed by Syn is associated with a + `Span` that tracks line and column information back to the source of that + token. These spans allow a procedural macro to display detailed error messages + pointing to all the right places in the user's code. There is an example of + this below. + +- **Feature flags** — Functionality is aggressively feature gated so your + procedural macros enable only what they need, and do not pay in compile time + for all the rest. + +[`syn::File`]: https://docs.rs/syn/2.0/syn/struct.File.html +[`syn::Item`]: https://docs.rs/syn/2.0/syn/enum.Item.html +[`syn::Expr`]: https://docs.rs/syn/2.0/syn/enum.Expr.html +[`syn::Type`]: https://docs.rs/syn/2.0/syn/enum.Type.html +[`syn::DeriveInput`]: https://docs.rs/syn/2.0/syn/struct.DeriveInput.html +[parser functions]: https://docs.rs/syn/2.0/syn/parse/index.html + +*Version requirement: Syn supports rustc 1.56 and up.* + +[*Release notes*](https://github.com/dtolnay/syn/releases) + +<br> + +## Resources + +The best way to learn about procedural macros is by writing some. Consider +working through [this procedural macro workshop][workshop] to get familiar with +the different types of procedural macros. The workshop contains relevant links +into the Syn documentation as you work through each project. + +[workshop]: https://github.com/dtolnay/proc-macro-workshop + +<br> + +## Example of a derive macro + +The canonical derive macro using Syn looks like this. We write an ordinary Rust +function tagged with a `proc_macro_derive` attribute and the name of the trait +we are deriving. Any time that derive appears in the user's code, the Rust +compiler passes their data structure as tokens into our macro. We get to execute +arbitrary Rust code to figure out what to do with those tokens, then hand some +tokens back to the compiler to compile into the user's crate. + +[`TokenStream`]: https://doc.rust-lang.org/proc_macro/struct.TokenStream.html + +```toml +[dependencies] +syn = "2.0" +quote = "1.0" + +[lib] +proc-macro = true +``` + +```rust +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, DeriveInput}; + +#[proc_macro_derive(MyMacro)] +pub fn my_macro(input: TokenStream) -> TokenStream { + // Parse the input tokens into a syntax tree + let input = parse_macro_input!(input as DeriveInput); + + // Build the output, possibly using quasi-quotation + let expanded = quote! { + // ... + }; + + // Hand the output tokens back to the compiler + TokenStream::from(expanded) +} +``` + +The [`heapsize`] example directory shows a complete working implementation of a +derive macro. The example derives a `HeapSize` trait which computes an estimate +of the amount of heap memory owned by a value. + +[`heapsize`]: examples/heapsize + +```rust +pub trait HeapSize { + /// Total number of bytes of heap memory owned by `self`. + fn heap_size_of_children(&self) -> usize; +} +``` + +The derive macro allows users to write `#[derive(HeapSize)]` on data structures +in their program. + +```rust +#[derive(HeapSize)] +struct Demo<'a, T: ?Sized> { + a: Box<T>, + b: u8, + c: &'a str, + d: String, +} +``` + +<br> + +## Spans and error reporting + +The token-based procedural macro API provides great control over where the +compiler's error messages are displayed in user code. Consider the error the +user sees if one of their field types does not implement `HeapSize`. + +```rust +#[derive(HeapSize)] +struct Broken { + ok: String, + bad: std::thread::Thread, +} +``` + +By tracking span information all the way through the expansion of a procedural +macro as shown in the `heapsize` example, token-based macros in Syn are able to +trigger errors that directly pinpoint the source of the problem. + +```console +error[E0277]: the trait bound `std::thread::Thread: HeapSize` is not satisfied + --> src/main.rs:7:5 + | +7 | bad: std::thread::Thread, + | ^^^^^^^^^^^^^^^^^^^^^^^^ the trait `HeapSize` is not implemented for `std::thread::Thread` +``` + +<br> + +## Parsing a custom syntax + +The [`lazy-static`] example directory shows the implementation of a +`functionlike!(...)` procedural macro in which the input tokens are parsed using +Syn's parsing API. + +[`lazy-static`]: examples/lazy-static + +The example reimplements the popular `lazy_static` crate from crates.io as a +procedural macro. + +```rust +lazy_static! { + static ref USERNAME: Regex = Regex::new("^[a-z0-9_-]{3,16}$").unwrap(); +} +``` + +The implementation shows how to trigger custom warnings and error messages on +the macro input. + +```console +warning: come on, pick a more creative name + --> src/main.rs:10:16 + | +10 | static ref FOO: String = "lazy_static".to_owned(); + | ^^^ +``` + +<br> + +## Testing + +When testing macros, we often care not just that the macro can be used +successfully but also that when the macro is provided with invalid input it +produces maximally helpful error messages. Consider using the [`trybuild`] crate +to write tests for errors that are emitted by your macro or errors detected by +the Rust compiler in the expanded code following misuse of the macro. Such tests +help avoid regressions from later refactors that mistakenly make an error no +longer trigger or be less helpful than it used to be. + +[`trybuild`]: https://github.com/dtolnay/trybuild + +<br> + +## Debugging + +When developing a procedural macro it can be helpful to look at what the +generated code looks like. Use `cargo rustc -- -Zunstable-options +--pretty=expanded` or the [`cargo expand`] subcommand. + +[`cargo expand`]: https://github.com/dtolnay/cargo-expand + +To show the expanded code for some crate that uses your procedural macro, run +`cargo expand` from that crate. To show the expanded code for one of your own +test cases, run `cargo expand --test the_test_case` where the last argument is +the name of the test file without the `.rs` extension. + +This write-up by Brandon W Maister discusses debugging in more detail: +[Debugging Rust's new Custom Derive system][debugging]. + +[debugging]: https://quodlibetor.github.io/posts/debugging-rusts-new-custom-derive-system/ + +<br> + +## Optional features + +Syn puts a lot of functionality behind optional features in order to optimize +compile time for the most common use cases. The following features are +available. + +- **`derive`** *(enabled by default)* — Data structures for representing the + possible input to a derive macro, including structs and enums and types. +- **`full`** — Data structures for representing the syntax tree of all valid + Rust source code, including items and expressions. +- **`parsing`** *(enabled by default)* — Ability to parse input tokens into a + syntax tree node of a chosen type. +- **`printing`** *(enabled by default)* — Ability to print a syntax tree node as + tokens of Rust source code. +- **`visit`** — Trait for traversing a syntax tree. +- **`visit-mut`** — Trait for traversing and mutating in place a syntax tree. +- **`fold`** — Trait for transforming an owned syntax tree. +- **`clone-impls`** *(enabled by default)* — Clone impls for all syntax tree + types. +- **`extra-traits`** — Debug, Eq, PartialEq, Hash impls for all syntax tree + types. +- **`proc-macro`** *(enabled by default)* — Runtime dependency on the dynamic + library libproc_macro from rustc toolchain. + +<br> + +## Proc macro shim + +Syn operates on the token representation provided by the [proc-macro2] crate +from crates.io rather than using the compiler's built in proc-macro crate +directly. This enables code using Syn to execute outside of the context of a +procedural macro, such as in unit tests or build.rs, and we avoid needing +incompatible ecosystems for proc macros vs non-macro use cases. + +In general all of your code should be written against proc-macro2 rather than +proc-macro. The one exception is in the signatures of procedural macro entry +points, which are required by the language to use `proc_macro::TokenStream`. + +The proc-macro2 crate will automatically detect and use the compiler's data +structures when a procedural macro is active. + +[proc-macro2]: https://docs.rs/proc-macro2/1.0/proc_macro2/ + +<br> + +#### License + +<sup> +Licensed under either of <a href="LICENSE-APACHE">Apache License, Version +2.0</a> or <a href="LICENSE-MIT">MIT license</a> at your option. +</sup> + +<br> + +<sub> +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this crate by you, as defined in the Apache-2.0 license, shall +be dual licensed as above, without any additional terms or conditions. +</sub> diff --git a/vendor/syn/benches/file.rs b/vendor/syn/benches/file.rs new file mode 100644 index 0000000..b424723 --- /dev/null +++ b/vendor/syn/benches/file.rs @@ -0,0 +1,57 @@ +// $ cargo bench --features full,test --bench file + +#![feature(rustc_private, test)] +#![recursion_limit = "1024"] +#![allow( + clippy::items_after_statements, + clippy::manual_let_else, + clippy::match_like_matches_macro, + clippy::missing_panics_doc, + clippy::must_use_candidate, + clippy::uninlined_format_args +)] + +extern crate test; + +#[macro_use] +#[path = "../tests/macros/mod.rs"] +mod macros; + +#[allow(dead_code)] +#[path = "../tests/repo/mod.rs"] +mod repo; + +use proc_macro2::{Span, TokenStream}; +use std::fs; +use std::str::FromStr; +use syn::parse::{ParseStream, Parser}; +use test::Bencher; + +const FILE: &str = "tests/rust/library/core/src/str/mod.rs"; + +fn get_tokens() -> TokenStream { + repo::clone_rust(); + let content = fs::read_to_string(FILE).unwrap(); + TokenStream::from_str(&content).unwrap() +} + +#[bench] +fn baseline(b: &mut Bencher) { + let tokens = get_tokens(); + b.iter(|| drop(tokens.clone())); +} + +#[bench] +fn create_token_buffer(b: &mut Bencher) { + let tokens = get_tokens(); + fn immediate_fail(_input: ParseStream) -> syn::Result<()> { + Err(syn::Error::new(Span::call_site(), "")) + } + b.iter(|| immediate_fail.parse2(tokens.clone())); +} + +#[bench] +fn parse_file(b: &mut Bencher) { + let tokens = get_tokens(); + b.iter(|| syn::parse2::<syn::File>(tokens.clone())); +} diff --git a/vendor/syn/benches/rust.rs b/vendor/syn/benches/rust.rs new file mode 100644 index 0000000..15536db --- /dev/null +++ b/vendor/syn/benches/rust.rs @@ -0,0 +1,182 @@ +// $ cargo bench --features full,test --bench rust +// +// Syn only, useful for profiling: +// $ RUSTFLAGS='--cfg syn_only' cargo build --release --features full,test --bench rust + +#![cfg_attr(not(syn_only), feature(rustc_private))] +#![recursion_limit = "1024"] +#![allow( + clippy::arc_with_non_send_sync, + clippy::cast_lossless, + clippy::let_underscore_untyped, + clippy::manual_let_else, + clippy::match_like_matches_macro, + clippy::uninlined_format_args, + clippy::unnecessary_wraps +)] + +#[macro_use] +#[path = "../tests/macros/mod.rs"] +mod macros; + +#[allow(dead_code)] +#[path = "../tests/repo/mod.rs"] +mod repo; + +use std::fs; +use std::time::{Duration, Instant}; + +#[cfg(not(syn_only))] +mod tokenstream_parse { + use proc_macro2::TokenStream; + use std::str::FromStr; + + pub fn bench(content: &str) -> Result<(), ()> { + TokenStream::from_str(content).map(drop).map_err(drop) + } +} + +mod syn_parse { + pub fn bench(content: &str) -> Result<(), ()> { + syn::parse_file(content).map(drop).map_err(drop) + } +} + +#[cfg(not(syn_only))] +mod librustc_parse { + extern crate rustc_data_structures; + extern crate rustc_driver; + extern crate rustc_error_messages; + extern crate rustc_errors; + extern crate rustc_parse; + extern crate rustc_session; + extern crate rustc_span; + + use rustc_data_structures::sync::Lrc; + use rustc_error_messages::FluentBundle; + use rustc_errors::{emitter::Emitter, translation::Translate, DiagCtxt, Diagnostic}; + use rustc_session::parse::ParseSess; + use rustc_span::source_map::{FilePathMapping, SourceMap}; + use rustc_span::{edition::Edition, FileName}; + + pub fn bench(content: &str) -> Result<(), ()> { + struct SilentEmitter; + + impl Emitter for SilentEmitter { + fn emit_diagnostic(&mut self, _diag: &Diagnostic) {} + fn source_map(&self) -> Option<&Lrc<SourceMap>> { + None + } + } + + impl Translate for SilentEmitter { + fn fluent_bundle(&self) -> Option<&Lrc<FluentBundle>> { + None + } + fn fallback_fluent_bundle(&self) -> &FluentBundle { + panic!("silent emitter attempted to translate a diagnostic"); + } + } + + rustc_span::create_session_if_not_set_then(Edition::Edition2018, |_| { + let source_map = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let emitter = Box::new(SilentEmitter); + let handler = DiagCtxt::with_emitter(emitter); + let sess = ParseSess::with_dcx(handler, source_map); + if let Err(diagnostic) = rustc_parse::parse_crate_from_source_str( + FileName::Custom("bench".to_owned()), + content.to_owned(), + &sess, + ) { + diagnostic.cancel(); + return Err(()); + }; + Ok(()) + }) + } +} + +#[cfg(not(syn_only))] +mod read_from_disk { + pub fn bench(content: &str) -> Result<(), ()> { + let _ = content; + Ok(()) + } +} + +fn exec(mut codepath: impl FnMut(&str) -> Result<(), ()>) -> Duration { + let begin = Instant::now(); + let mut success = 0; + let mut total = 0; + + ["tests/rust/compiler", "tests/rust/library"] + .iter() + .flat_map(|dir| { + walkdir::WalkDir::new(dir) + .into_iter() + .filter_entry(repo::base_dir_filter) + }) + .for_each(|entry| { + let entry = entry.unwrap(); + let path = entry.path(); + if path.is_dir() { + return; + } + let content = fs::read_to_string(path).unwrap(); + let ok = codepath(&content).is_ok(); + success += ok as usize; + total += 1; + if !ok { + eprintln!("FAIL {}", path.display()); + } + }); + + assert_eq!(success, total); + begin.elapsed() +} + +fn main() { + repo::clone_rust(); + + macro_rules! testcases { + ($($(#[$cfg:meta])* $name:ident,)*) => { + [ + $( + $(#[$cfg])* + (stringify!($name), $name::bench as fn(&str) -> Result<(), ()>), + )* + ] + }; + } + + #[cfg(not(syn_only))] + { + let mut lines = 0; + let mut files = 0; + exec(|content| { + lines += content.lines().count(); + files += 1; + Ok(()) + }); + eprintln!("\n{} lines in {} files", lines, files); + } + + for (name, f) in testcases!( + #[cfg(not(syn_only))] + read_from_disk, + #[cfg(not(syn_only))] + tokenstream_parse, + syn_parse, + #[cfg(not(syn_only))] + librustc_parse, + ) { + eprint!("{:20}", format!("{}:", name)); + let elapsed = exec(f); + eprintln!( + "elapsed={}.{:03}s", + elapsed.as_secs(), + elapsed.subsec_millis(), + ); + } + eprintln!(); +} diff --git a/vendor/syn/src/attr.rs b/vendor/syn/src/attr.rs new file mode 100644 index 0000000..b6c4675 --- /dev/null +++ b/vendor/syn/src/attr.rs @@ -0,0 +1,776 @@ +use super::*; +use proc_macro2::TokenStream; +use std::iter; +use std::slice; + +#[cfg(feature = "parsing")] +use crate::meta::{self, ParseNestedMeta}; +#[cfg(feature = "parsing")] +use crate::parse::{Parse, ParseStream, Parser, Result}; + +ast_struct! { + /// An attribute, like `#[repr(transparent)]`. + /// + /// <br> + /// + /// # Syntax + /// + /// Rust has six types of attributes. + /// + /// - Outer attributes like `#[repr(transparent)]`. These appear outside or + /// in front of the item they describe. + /// + /// - Inner attributes like `#![feature(proc_macro)]`. These appear inside + /// of the item they describe, usually a module. + /// + /// - Outer one-line doc comments like `/// Example`. + /// + /// - Inner one-line doc comments like `//! Please file an issue`. + /// + /// - Outer documentation blocks `/** Example */`. + /// + /// - Inner documentation blocks `/*! Please file an issue */`. + /// + /// The `style` field of type `AttrStyle` distinguishes whether an attribute + /// is outer or inner. + /// + /// Every attribute has a `path` that indicates the intended interpretation + /// of the rest of the attribute's contents. The path and the optional + /// additional contents are represented together in the `meta` field of the + /// attribute in three possible varieties: + /// + /// - Meta::Path — attributes whose information content conveys just a + /// path, for example the `#[test]` attribute. + /// + /// - Meta::List — attributes that carry arbitrary tokens after the + /// path, surrounded by a delimiter (parenthesis, bracket, or brace). For + /// example `#[derive(Copy)]` or `#[precondition(x < 5)]`. + /// + /// - Meta::NameValue — attributes with an `=` sign after the path, + /// followed by a Rust expression. For example `#[path = + /// "sys/windows.rs"]`. + /// + /// All doc comments are represented in the NameValue style with a path of + /// "doc", as this is how they are processed by the compiler and by + /// `macro_rules!` macros. + /// + /// ```text + /// #[derive(Copy, Clone)] + /// ~~~~~~Path + /// ^^^^^^^^^^^^^^^^^^^Meta::List + /// + /// #[path = "sys/windows.rs"] + /// ~~~~Path + /// ^^^^^^^^^^^^^^^^^^^^^^^Meta::NameValue + /// + /// #[test] + /// ^^^^Meta::Path + /// ``` + /// + /// <br> + /// + /// # Parsing from tokens to Attribute + /// + /// This type does not implement the [`Parse`] trait and thus cannot be + /// parsed directly by [`ParseStream::parse`]. Instead use + /// [`ParseStream::call`] with one of the two parser functions + /// [`Attribute::parse_outer`] or [`Attribute::parse_inner`] depending on + /// which you intend to parse. + /// + /// [`Parse`]: parse::Parse + /// [`ParseStream::parse`]: parse::ParseBuffer::parse + /// [`ParseStream::call`]: parse::ParseBuffer::call + /// + /// ``` + /// use syn::{Attribute, Ident, Result, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // Parses a unit struct with attributes. + /// // + /// // #[path = "s.tmpl"] + /// // struct S; + /// struct UnitStruct { + /// attrs: Vec<Attribute>, + /// struct_token: Token![struct], + /// name: Ident, + /// semi_token: Token![;], + /// } + /// + /// impl Parse for UnitStruct { + /// fn parse(input: ParseStream) -> Result<Self> { + /// Ok(UnitStruct { + /// attrs: input.call(Attribute::parse_outer)?, + /// struct_token: input.parse()?, + /// name: input.parse()?, + /// semi_token: input.parse()?, + /// }) + /// } + /// } + /// ``` + /// + /// <p><br></p> + /// + /// # Parsing from Attribute to structured arguments + /// + /// The grammar of attributes in Rust is very flexible, which makes the + /// syntax tree not that useful on its own. In particular, arguments of the + /// `Meta::List` variety of attribute are held in an arbitrary `tokens: + /// TokenStream`. Macros are expected to check the `path` of the attribute, + /// decide whether they recognize it, and then parse the remaining tokens + /// according to whatever grammar they wish to require for that kind of + /// attribute. Use [`parse_args()`] to parse those tokens into the expected + /// data structure. + /// + /// [`parse_args()`]: Attribute::parse_args + /// + /// <p><br></p> + /// + /// # Doc comments + /// + /// The compiler transforms doc comments, such as `/// comment` and `/*! + /// comment */`, into attributes before macros are expanded. Each comment is + /// expanded into an attribute of the form `#[doc = r"comment"]`. + /// + /// As an example, the following `mod` items are expanded identically: + /// + /// ``` + /// # use syn::{ItemMod, parse_quote}; + /// let doc: ItemMod = parse_quote! { + /// /// Single line doc comments + /// /// We write so many! + /// /** + /// * Multi-line comments... + /// * May span many lines + /// */ + /// mod example { + /// //! Of course, they can be inner too + /// /*! And fit in a single line */ + /// } + /// }; + /// let attr: ItemMod = parse_quote! { + /// #[doc = r" Single line doc comments"] + /// #[doc = r" We write so many!"] + /// #[doc = r" + /// * Multi-line comments... + /// * May span many lines + /// "] + /// mod example { + /// #![doc = r" Of course, they can be inner too"] + /// #![doc = r" And fit in a single line "] + /// } + /// }; + /// assert_eq!(doc, attr); + /// ``` + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Attribute { + pub pound_token: Token![#], + pub style: AttrStyle, + pub bracket_token: token::Bracket, + pub meta: Meta, + } +} + +impl Attribute { + /// Returns the path that identifies the interpretation of this attribute. + /// + /// For example this would return the `test` in `#[test]`, the `derive` in + /// `#[derive(Copy)]`, and the `path` in `#[path = "sys/windows.rs"]`. + pub fn path(&self) -> &Path { + self.meta.path() + } + + /// Parse the arguments to the attribute as a syntax tree. + /// + /// This is similar to pulling out the `TokenStream` from `Meta::List` and + /// doing `syn::parse2::<T>(meta_list.tokens)`, except that using + /// `parse_args` the error message has a more useful span when `tokens` is + /// empty. + /// + /// The surrounding delimiters are *not* included in the input to the + /// parser. + /// + /// ```text + /// #[my_attr(value < 5)] + /// ^^^^^^^^^ what gets parsed + /// ``` + /// + /// # Example + /// + /// ``` + /// use syn::{parse_quote, Attribute, Expr}; + /// + /// let attr: Attribute = parse_quote! { + /// #[precondition(value < 5)] + /// }; + /// + /// if attr.path().is_ident("precondition") { + /// let precondition: Expr = attr.parse_args()?; + /// // ... + /// } + /// # anyhow::Ok(()) + /// ``` + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_args<T: Parse>(&self) -> Result<T> { + self.parse_args_with(T::parse) + } + + /// Parse the arguments to the attribute using the given parser. + /// + /// # Example + /// + /// ``` + /// use syn::{parse_quote, Attribute}; + /// + /// let attr: Attribute = parse_quote! { + /// #[inception { #[brrrrrrraaaaawwwwrwrrrmrmrmmrmrmmmmm] }] + /// }; + /// + /// let bwom = attr.parse_args_with(Attribute::parse_outer)?; + /// + /// // Attribute does not have a Parse impl, so we couldn't directly do: + /// // let bwom: Attribute = attr.parse_args()?; + /// # anyhow::Ok(()) + /// ``` + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_args_with<F: Parser>(&self, parser: F) -> Result<F::Output> { + match &self.meta { + Meta::Path(path) => Err(crate::error::new2( + path.segments.first().unwrap().ident.span(), + path.segments.last().unwrap().ident.span(), + format!( + "expected attribute arguments in parentheses: {}[{}(...)]", + parsing::DisplayAttrStyle(&self.style), + parsing::DisplayPath(path), + ), + )), + Meta::NameValue(meta) => Err(Error::new( + meta.eq_token.span, + format_args!( + "expected parentheses: {}[{}(...)]", + parsing::DisplayAttrStyle(&self.style), + parsing::DisplayPath(&meta.path), + ), + )), + Meta::List(meta) => meta.parse_args_with(parser), + } + } + + /// Parse the arguments to the attribute, expecting it to follow the + /// conventional structure used by most of Rust's built-in attributes. + /// + /// The [*Meta Item Attribute Syntax*][syntax] section in the Rust reference + /// explains the convention in more detail. Not all attributes follow this + /// convention, so [`parse_args()`][Self::parse_args] is available if you + /// need to parse arbitrarily goofy attribute syntax. + /// + /// [syntax]: https://doc.rust-lang.org/reference/attributes.html#meta-item-attribute-syntax + /// + /// # Example + /// + /// We'll parse a struct, and then parse some of Rust's `#[repr]` attribute + /// syntax. + /// + /// ``` + /// use syn::{parenthesized, parse_quote, token, ItemStruct, LitInt}; + /// + /// let input: ItemStruct = parse_quote! { + /// #[repr(C, align(4))] + /// pub struct MyStruct(u16, u32); + /// }; + /// + /// let mut repr_c = false; + /// let mut repr_transparent = false; + /// let mut repr_align = None::<usize>; + /// let mut repr_packed = None::<usize>; + /// for attr in &input.attrs { + /// if attr.path().is_ident("repr") { + /// attr.parse_nested_meta(|meta| { + /// // #[repr(C)] + /// if meta.path.is_ident("C") { + /// repr_c = true; + /// return Ok(()); + /// } + /// + /// // #[repr(transparent)] + /// if meta.path.is_ident("transparent") { + /// repr_transparent = true; + /// return Ok(()); + /// } + /// + /// // #[repr(align(N))] + /// if meta.path.is_ident("align") { + /// let content; + /// parenthesized!(content in meta.input); + /// let lit: LitInt = content.parse()?; + /// let n: usize = lit.base10_parse()?; + /// repr_align = Some(n); + /// return Ok(()); + /// } + /// + /// // #[repr(packed)] or #[repr(packed(N))], omitted N means 1 + /// if meta.path.is_ident("packed") { + /// if meta.input.peek(token::Paren) { + /// let content; + /// parenthesized!(content in meta.input); + /// let lit: LitInt = content.parse()?; + /// let n: usize = lit.base10_parse()?; + /// repr_packed = Some(n); + /// } else { + /// repr_packed = Some(1); + /// } + /// return Ok(()); + /// } + /// + /// Err(meta.error("unrecognized repr")) + /// })?; + /// } + /// } + /// # anyhow::Ok(()) + /// ``` + /// + /// # Alternatives + /// + /// In some cases, for attributes which have nested layers of structured + /// content, the following less flexible approach might be more convenient: + /// + /// ``` + /// # use syn::{parse_quote, ItemStruct}; + /// # + /// # let input: ItemStruct = parse_quote! { + /// # #[repr(C, align(4))] + /// # pub struct MyStruct(u16, u32); + /// # }; + /// # + /// use syn::punctuated::Punctuated; + /// use syn::{parenthesized, token, Error, LitInt, Meta, Token}; + /// + /// let mut repr_c = false; + /// let mut repr_transparent = false; + /// let mut repr_align = None::<usize>; + /// let mut repr_packed = None::<usize>; + /// for attr in &input.attrs { + /// if attr.path().is_ident("repr") { + /// let nested = attr.parse_args_with(Punctuated::<Meta, Token![,]>::parse_terminated)?; + /// for meta in nested { + /// match meta { + /// // #[repr(C)] + /// Meta::Path(path) if path.is_ident("C") => { + /// repr_c = true; + /// } + /// + /// // #[repr(align(N))] + /// Meta::List(meta) if meta.path.is_ident("align") => { + /// let lit: LitInt = meta.parse_args()?; + /// let n: usize = lit.base10_parse()?; + /// repr_align = Some(n); + /// } + /// + /// /* ... */ + /// + /// _ => { + /// return Err(Error::new_spanned(meta, "unrecognized repr")); + /// } + /// } + /// } + /// } + /// } + /// # Ok(()) + /// ``` + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_nested_meta( + &self, + logic: impl FnMut(ParseNestedMeta) -> Result<()>, + ) -> Result<()> { + self.parse_args_with(meta::parser(logic)) + } + + /// Parses zero or more outer attributes from the stream. + /// + /// # Example + /// + /// See + /// [*Parsing from tokens to Attribute*](#parsing-from-tokens-to-attribute). + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_outer(input: ParseStream) -> Result<Vec<Self>> { + let mut attrs = Vec::new(); + while input.peek(Token![#]) { + attrs.push(input.call(parsing::single_parse_outer)?); + } + Ok(attrs) + } + + /// Parses zero or more inner attributes from the stream. + /// + /// # Example + /// + /// See + /// [*Parsing from tokens to Attribute*](#parsing-from-tokens-to-attribute). + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_inner(input: ParseStream) -> Result<Vec<Self>> { + let mut attrs = Vec::new(); + parsing::parse_inner(input, &mut attrs)?; + Ok(attrs) + } +} + +ast_enum! { + /// Distinguishes between attributes that decorate an item and attributes + /// that are contained within an item. + /// + /// # Outer attributes + /// + /// - `#[repr(transparent)]` + /// - `/// # Example` + /// - `/** Please file an issue */` + /// + /// # Inner attributes + /// + /// - `#![feature(proc_macro)]` + /// - `//! # Example` + /// - `/*! Please file an issue */` + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum AttrStyle { + Outer, + Inner(Token![!]), + } +} + +ast_enum_of_structs! { + /// Content of a compile-time structured attribute. + /// + /// ## Path + /// + /// A meta path is like the `test` in `#[test]`. + /// + /// ## List + /// + /// A meta list is like the `derive(Copy)` in `#[derive(Copy)]`. + /// + /// ## NameValue + /// + /// A name-value meta is like the `path = "..."` in `#[path = + /// "sys/windows.rs"]`. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum Meta { + Path(Path), + + /// A structured list within an attribute, like `derive(Copy, Clone)`. + List(MetaList), + + /// A name-value pair within an attribute, like `feature = "nightly"`. + NameValue(MetaNameValue), + } +} + +ast_struct! { + /// A structured list within an attribute, like `derive(Copy, Clone)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct MetaList { + pub path: Path, + pub delimiter: MacroDelimiter, + pub tokens: TokenStream, + } +} + +ast_struct! { + /// A name-value pair within an attribute, like `feature = "nightly"`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct MetaNameValue { + pub path: Path, + pub eq_token: Token![=], + pub value: Expr, + } +} + +impl Meta { + /// Returns the path that begins this structured meta item. + /// + /// For example this would return the `test` in `#[test]`, the `derive` in + /// `#[derive(Copy)]`, and the `path` in `#[path = "sys/windows.rs"]`. + pub fn path(&self) -> &Path { + match self { + Meta::Path(path) => path, + Meta::List(meta) => &meta.path, + Meta::NameValue(meta) => &meta.path, + } + } + + /// Error if this is a `Meta::List` or `Meta::NameValue`. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn require_path_only(&self) -> Result<&Path> { + let error_span = match self { + Meta::Path(path) => return Ok(path), + Meta::List(meta) => meta.delimiter.span().open(), + Meta::NameValue(meta) => meta.eq_token.span, + }; + Err(Error::new(error_span, "unexpected token in attribute")) + } + + /// Error if this is a `Meta::Path` or `Meta::NameValue`. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn require_list(&self) -> Result<&MetaList> { + match self { + Meta::List(meta) => Ok(meta), + Meta::Path(path) => Err(crate::error::new2( + path.segments.first().unwrap().ident.span(), + path.segments.last().unwrap().ident.span(), + format!( + "expected attribute arguments in parentheses: `{}(...)`", + parsing::DisplayPath(path), + ), + )), + Meta::NameValue(meta) => Err(Error::new(meta.eq_token.span, "expected `(`")), + } + } + + /// Error if this is a `Meta::Path` or `Meta::List`. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn require_name_value(&self) -> Result<&MetaNameValue> { + match self { + Meta::NameValue(meta) => Ok(meta), + Meta::Path(path) => Err(crate::error::new2( + path.segments.first().unwrap().ident.span(), + path.segments.last().unwrap().ident.span(), + format!( + "expected a value for this attribute: `{} = ...`", + parsing::DisplayPath(path), + ), + )), + Meta::List(meta) => Err(Error::new(meta.delimiter.span().open(), "expected `=`")), + } + } +} + +impl MetaList { + /// See [`Attribute::parse_args`]. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_args<T: Parse>(&self) -> Result<T> { + self.parse_args_with(T::parse) + } + + /// See [`Attribute::parse_args_with`]. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_args_with<F: Parser>(&self, parser: F) -> Result<F::Output> { + let scope = self.delimiter.span().close(); + crate::parse::parse_scoped(parser, scope, self.tokens.clone()) + } + + /// See [`Attribute::parse_nested_meta`]. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_nested_meta( + &self, + logic: impl FnMut(ParseNestedMeta) -> Result<()>, + ) -> Result<()> { + self.parse_args_with(meta::parser(logic)) + } +} + +pub(crate) trait FilterAttrs<'a> { + type Ret: Iterator<Item = &'a Attribute>; + + fn outer(self) -> Self::Ret; + fn inner(self) -> Self::Ret; +} + +impl<'a> FilterAttrs<'a> for &'a [Attribute] { + type Ret = iter::Filter<slice::Iter<'a, Attribute>, fn(&&Attribute) -> bool>; + + fn outer(self) -> Self::Ret { + fn is_outer(attr: &&Attribute) -> bool { + match attr.style { + AttrStyle::Outer => true, + AttrStyle::Inner(_) => false, + } + } + self.iter().filter(is_outer) + } + + fn inner(self) -> Self::Ret { + fn is_inner(attr: &&Attribute) -> bool { + match attr.style { + AttrStyle::Inner(_) => true, + AttrStyle::Outer => false, + } + } + self.iter().filter(is_inner) + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream, Result}; + use std::fmt::{self, Display}; + + pub(crate) fn parse_inner(input: ParseStream, attrs: &mut Vec<Attribute>) -> Result<()> { + while input.peek(Token![#]) && input.peek2(Token![!]) { + attrs.push(input.call(parsing::single_parse_inner)?); + } + Ok(()) + } + + pub(crate) fn single_parse_inner(input: ParseStream) -> Result<Attribute> { + let content; + Ok(Attribute { + pound_token: input.parse()?, + style: AttrStyle::Inner(input.parse()?), + bracket_token: bracketed!(content in input), + meta: content.parse()?, + }) + } + + pub(crate) fn single_parse_outer(input: ParseStream) -> Result<Attribute> { + let content; + Ok(Attribute { + pound_token: input.parse()?, + style: AttrStyle::Outer, + bracket_token: bracketed!(content in input), + meta: content.parse()?, + }) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Meta { + fn parse(input: ParseStream) -> Result<Self> { + let path = input.call(Path::parse_mod_style)?; + parse_meta_after_path(path, input) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for MetaList { + fn parse(input: ParseStream) -> Result<Self> { + let path = input.call(Path::parse_mod_style)?; + parse_meta_list_after_path(path, input) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for MetaNameValue { + fn parse(input: ParseStream) -> Result<Self> { + let path = input.call(Path::parse_mod_style)?; + parse_meta_name_value_after_path(path, input) + } + } + + pub(crate) fn parse_meta_after_path(path: Path, input: ParseStream) -> Result<Meta> { + if input.peek(token::Paren) || input.peek(token::Bracket) || input.peek(token::Brace) { + parse_meta_list_after_path(path, input).map(Meta::List) + } else if input.peek(Token![=]) { + parse_meta_name_value_after_path(path, input).map(Meta::NameValue) + } else { + Ok(Meta::Path(path)) + } + } + + fn parse_meta_list_after_path(path: Path, input: ParseStream) -> Result<MetaList> { + let (delimiter, tokens) = mac::parse_delimiter(input)?; + Ok(MetaList { + path, + delimiter, + tokens, + }) + } + + fn parse_meta_name_value_after_path(path: Path, input: ParseStream) -> Result<MetaNameValue> { + let eq_token: Token![=] = input.parse()?; + let ahead = input.fork(); + let lit: Option<Lit> = ahead.parse()?; + let value = if let (Some(lit), true) = (lit, ahead.is_empty()) { + input.advance_to(&ahead); + Expr::Lit(ExprLit { + attrs: Vec::new(), + lit, + }) + } else if input.peek(Token![#]) && input.peek2(token::Bracket) { + return Err(input.error("unexpected attribute inside of attribute")); + } else { + input.parse()? + }; + Ok(MetaNameValue { + path, + eq_token, + value, + }) + } + + pub(super) struct DisplayAttrStyle<'a>(pub &'a AttrStyle); + + impl<'a> Display for DisplayAttrStyle<'a> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(match self.0 { + AttrStyle::Outer => "#", + AttrStyle::Inner(_) => "#!", + }) + } + } + + pub(super) struct DisplayPath<'a>(pub &'a Path); + + impl<'a> Display for DisplayPath<'a> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + for (i, segment) in self.0.segments.iter().enumerate() { + if i > 0 || self.0.leading_colon.is_some() { + formatter.write_str("::")?; + } + write!(formatter, "{}", segment.ident)?; + } + Ok(()) + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::TokenStream; + use quote::ToTokens; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Attribute { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.pound_token.to_tokens(tokens); + if let AttrStyle::Inner(b) = &self.style { + b.to_tokens(tokens); + } + self.bracket_token.surround(tokens, |tokens| { + self.meta.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for MetaList { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.path.to_tokens(tokens); + self.delimiter.surround(tokens, self.tokens.clone()); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for MetaNameValue { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.path.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.value.to_tokens(tokens); + } + } +} diff --git a/vendor/syn/src/bigint.rs b/vendor/syn/src/bigint.rs new file mode 100644 index 0000000..66aaa93 --- /dev/null +++ b/vendor/syn/src/bigint.rs @@ -0,0 +1,66 @@ +use std::ops::{AddAssign, MulAssign}; + +// For implementing base10_digits() accessor on LitInt. +pub(crate) struct BigInt { + digits: Vec<u8>, +} + +impl BigInt { + pub(crate) fn new() -> Self { + BigInt { digits: Vec::new() } + } + + pub(crate) fn to_string(&self) -> String { + let mut repr = String::with_capacity(self.digits.len()); + + let mut has_nonzero = false; + for digit in self.digits.iter().rev() { + has_nonzero |= *digit != 0; + if has_nonzero { + repr.push((*digit + b'0') as char); + } + } + + if repr.is_empty() { + repr.push('0'); + } + + repr + } + + fn reserve_two_digits(&mut self) { + let len = self.digits.len(); + let desired = + len + !self.digits.ends_with(&[0, 0]) as usize + !self.digits.ends_with(&[0]) as usize; + self.digits.resize(desired, 0); + } +} + +impl AddAssign<u8> for BigInt { + // Assumes increment <16. + fn add_assign(&mut self, mut increment: u8) { + self.reserve_two_digits(); + + let mut i = 0; + while increment > 0 { + let sum = self.digits[i] + increment; + self.digits[i] = sum % 10; + increment = sum / 10; + i += 1; + } + } +} + +impl MulAssign<u8> for BigInt { + // Assumes base <=16. + fn mul_assign(&mut self, base: u8) { + self.reserve_two_digits(); + + let mut carry = 0; + for digit in &mut self.digits { + let prod = *digit * base + carry; + *digit = prod % 10; + carry = prod / 10; + } + } +} diff --git a/vendor/syn/src/buffer.rs b/vendor/syn/src/buffer.rs new file mode 100644 index 0000000..86dec46 --- /dev/null +++ b/vendor/syn/src/buffer.rs @@ -0,0 +1,432 @@ +//! A stably addressed token buffer supporting efficient traversal based on a +//! cheaply copyable cursor. + +// This module is heavily commented as it contains most of the unsafe code in +// Syn, and caution should be used when editing it. The public-facing interface +// is 100% safe but the implementation is fragile internally. + +use crate::Lifetime; +use proc_macro2::extra::DelimSpan; +use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree}; +use std::cmp::Ordering; +use std::marker::PhantomData; + +/// Internal type which is used instead of `TokenTree` to represent a token tree +/// within a `TokenBuffer`. +enum Entry { + // Mimicking types from proc-macro. + // Group entries contain the offset to the matching End entry. + Group(Group, usize), + Ident(Ident), + Punct(Punct), + Literal(Literal), + // End entries contain the offset (negative) to the start of the buffer. + End(isize), +} + +/// A buffer that can be efficiently traversed multiple times, unlike +/// `TokenStream` which requires a deep copy in order to traverse more than +/// once. +pub struct TokenBuffer { + // NOTE: Do not implement clone on this - while the current design could be + // cloned, other designs which could be desirable may not be cloneable. + entries: Box<[Entry]>, +} + +impl TokenBuffer { + fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) { + for tt in stream { + match tt { + TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)), + TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)), + TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)), + TokenTree::Group(group) => { + let group_start_index = entries.len(); + entries.push(Entry::End(0)); // we replace this below + Self::recursive_new(entries, group.stream()); + let group_end_index = entries.len(); + entries.push(Entry::End(-(group_end_index as isize))); + let group_end_offset = group_end_index - group_start_index; + entries[group_start_index] = Entry::Group(group, group_end_offset); + } + } + } + } + + /// Creates a `TokenBuffer` containing all the tokens from the input + /// `proc_macro::TokenStream`. + #[cfg(feature = "proc-macro")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] + pub fn new(stream: proc_macro::TokenStream) -> Self { + Self::new2(stream.into()) + } + + /// Creates a `TokenBuffer` containing all the tokens from the input + /// `proc_macro2::TokenStream`. + pub fn new2(stream: TokenStream) -> Self { + let mut entries = Vec::new(); + Self::recursive_new(&mut entries, stream); + entries.push(Entry::End(-(entries.len() as isize))); + Self { + entries: entries.into_boxed_slice(), + } + } + + /// Creates a cursor referencing the first token in the buffer and able to + /// traverse until the end of the buffer. + pub fn begin(&self) -> Cursor { + let ptr = self.entries.as_ptr(); + unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) } + } +} + +/// A cheaply copyable cursor into a `TokenBuffer`. +/// +/// This cursor holds a shared reference into the immutable data which is used +/// internally to represent a `TokenStream`, and can be efficiently manipulated +/// and copied around. +/// +/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer` +/// object and get a cursor to its first token with `begin()`. +pub struct Cursor<'a> { + // The current entry which the `Cursor` is pointing at. + ptr: *const Entry, + // This is the only `Entry::End` object which this cursor is allowed to + // point at. All other `End` objects are skipped over in `Cursor::create`. + scope: *const Entry, + // Cursor is covariant in 'a. This field ensures that our pointers are still + // valid. + marker: PhantomData<&'a Entry>, +} + +impl<'a> Cursor<'a> { + /// Creates a cursor referencing a static empty TokenStream. + pub fn empty() -> Self { + // It's safe in this situation for us to put an `Entry` object in global + // storage, despite it not actually being safe to send across threads + // (`Ident` is a reference into a thread-local table). This is because + // this entry never includes a `Ident` object. + // + // This wrapper struct allows us to break the rules and put a `Sync` + // object in global storage. + struct UnsafeSyncEntry(Entry); + unsafe impl Sync for UnsafeSyncEntry {} + static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0)); + + Cursor { + ptr: &EMPTY_ENTRY.0, + scope: &EMPTY_ENTRY.0, + marker: PhantomData, + } + } + + /// This create method intelligently exits non-explicitly-entered + /// `None`-delimited scopes when the cursor reaches the end of them, + /// allowing for them to be treated transparently. + unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self { + // NOTE: If we're looking at a `End`, we want to advance the cursor + // past it, unless `ptr == scope`, which means that we're at the edge of + // our cursor's scope. We should only have `ptr != scope` at the exit + // from None-delimited groups entered with `ignore_none`. + while let Entry::End(_) = unsafe { &*ptr } { + if ptr == scope { + break; + } + ptr = unsafe { ptr.add(1) }; + } + + Cursor { + ptr, + scope, + marker: PhantomData, + } + } + + /// Get the current entry. + fn entry(self) -> &'a Entry { + unsafe { &*self.ptr } + } + + /// Bump the cursor to point at the next token after the current one. This + /// is undefined behavior if the cursor is currently looking at an + /// `Entry::End`. + /// + /// If the cursor is looking at an `Entry::Group`, the bumped cursor will + /// point at the first token in the group (with the same scope end). + unsafe fn bump_ignore_group(self) -> Cursor<'a> { + unsafe { Cursor::create(self.ptr.offset(1), self.scope) } + } + + /// While the cursor is looking at a `None`-delimited group, move it to look + /// at the first token inside instead. If the group is empty, this will move + /// the cursor past the `None`-delimited group. + /// + /// WARNING: This mutates its argument. + fn ignore_none(&mut self) { + while let Entry::Group(group, _) = self.entry() { + if group.delimiter() == Delimiter::None { + unsafe { *self = self.bump_ignore_group() }; + } else { + break; + } + } + } + + /// Checks whether the cursor is currently pointing at the end of its valid + /// scope. + pub fn eof(self) -> bool { + // We're at eof if we're at the end of our scope. + self.ptr == self.scope + } + + /// If the cursor is pointing at a `Group` with the given delimiter, returns + /// a cursor into that group and one pointing to the next `TokenTree`. + pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> { + // If we're not trying to enter a none-delimited group, we want to + // ignore them. We have to make sure to _not_ ignore them when we want + // to enter them, of course. For obvious reasons. + if delim != Delimiter::None { + self.ignore_none(); + } + + if let Entry::Group(group, end_offset) = self.entry() { + if group.delimiter() == delim { + let span = group.delim_span(); + let end_of_group = unsafe { self.ptr.add(*end_offset) }; + let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) }; + let after_group = unsafe { Cursor::create(end_of_group, self.scope) }; + return Some((inside_of_group, span, after_group)); + } + } + + None + } + + pub(crate) fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> { + if let Entry::Group(group, end_offset) = self.entry() { + let delimiter = group.delimiter(); + let span = group.delim_span(); + let end_of_group = unsafe { self.ptr.add(*end_offset) }; + let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) }; + let after_group = unsafe { Cursor::create(end_of_group, self.scope) }; + return Some((inside_of_group, delimiter, span, after_group)); + } + + None + } + + pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> { + if let Entry::Group(group, end_offset) = self.entry() { + let end_of_group = unsafe { self.ptr.add(*end_offset) }; + let after_group = unsafe { Cursor::create(end_of_group, self.scope) }; + return Some((group.clone(), after_group)); + } + + None + } + + /// If the cursor is pointing at a `Ident`, returns it along with a cursor + /// pointing at the next `TokenTree`. + pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> { + self.ignore_none(); + match self.entry() { + Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })), + _ => None, + } + } + + /// If the cursor is pointing at a `Punct`, returns it along with a cursor + /// pointing at the next `TokenTree`. + pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> { + self.ignore_none(); + match self.entry() { + Entry::Punct(punct) if punct.as_char() != '\'' => { + Some((punct.clone(), unsafe { self.bump_ignore_group() })) + } + _ => None, + } + } + + /// If the cursor is pointing at a `Literal`, return it along with a cursor + /// pointing at the next `TokenTree`. + pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> { + self.ignore_none(); + match self.entry() { + Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })), + _ => None, + } + } + + /// If the cursor is pointing at a `Lifetime`, returns it along with a + /// cursor pointing at the next `TokenTree`. + pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> { + self.ignore_none(); + match self.entry() { + Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => { + let next = unsafe { self.bump_ignore_group() }; + let (ident, rest) = next.ident()?; + let lifetime = Lifetime { + apostrophe: punct.span(), + ident, + }; + Some((lifetime, rest)) + } + _ => None, + } + } + + /// Copies all remaining tokens visible from this cursor into a + /// `TokenStream`. + pub fn token_stream(self) -> TokenStream { + let mut tts = Vec::new(); + let mut cursor = self; + while let Some((tt, rest)) = cursor.token_tree() { + tts.push(tt); + cursor = rest; + } + tts.into_iter().collect() + } + + /// If the cursor is pointing at a `TokenTree`, returns it along with a + /// cursor pointing at the next `TokenTree`. + /// + /// Returns `None` if the cursor has reached the end of its stream. + /// + /// This method does not treat `None`-delimited groups as transparent, and + /// will return a `Group(None, ..)` if the cursor is looking at one. + pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> { + let (tree, len) = match self.entry() { + Entry::Group(group, end_offset) => (group.clone().into(), *end_offset), + Entry::Literal(literal) => (literal.clone().into(), 1), + Entry::Ident(ident) => (ident.clone().into(), 1), + Entry::Punct(punct) => (punct.clone().into(), 1), + Entry::End(_) => return None, + }; + + let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) }; + Some((tree, rest)) + } + + /// Returns the `Span` of the current token, or `Span::call_site()` if this + /// cursor points to eof. + pub fn span(self) -> Span { + match self.entry() { + Entry::Group(group, _) => group.span(), + Entry::Literal(literal) => literal.span(), + Entry::Ident(ident) => ident.span(), + Entry::Punct(punct) => punct.span(), + Entry::End(_) => Span::call_site(), + } + } + + /// Returns the `Span` of the token immediately prior to the position of + /// this cursor, or of the current token if there is no previous one. + #[cfg(any(feature = "full", feature = "derive"))] + pub(crate) fn prev_span(mut self) -> Span { + if start_of_buffer(self) < self.ptr { + self.ptr = unsafe { self.ptr.offset(-1) }; + if let Entry::End(_) = self.entry() { + // Locate the matching Group begin token. + let mut depth = 1; + loop { + self.ptr = unsafe { self.ptr.offset(-1) }; + match self.entry() { + Entry::Group(group, _) => { + depth -= 1; + if depth == 0 { + return group.span(); + } + } + Entry::End(_) => depth += 1, + Entry::Literal(_) | Entry::Ident(_) | Entry::Punct(_) => {} + } + } + } + } + self.span() + } + + /// Skip over the next token without cloning it. Returns `None` if this + /// cursor points to eof. + /// + /// This method treats `'lifetimes` as a single token. + pub(crate) fn skip(self) -> Option<Cursor<'a>> { + let len = match self.entry() { + Entry::End(_) => return None, + + // Treat lifetimes as a single tt for the purposes of 'skip'. + Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => { + match unsafe { &*self.ptr.add(1) } { + Entry::Ident(_) => 2, + _ => 1, + } + } + + Entry::Group(_, end_offset) => *end_offset, + _ => 1, + }; + + Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) }) + } +} + +impl<'a> Copy for Cursor<'a> {} + +impl<'a> Clone for Cursor<'a> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a> Eq for Cursor<'a> {} + +impl<'a> PartialEq for Cursor<'a> { + fn eq(&self, other: &Self) -> bool { + self.ptr == other.ptr + } +} + +impl<'a> PartialOrd for Cursor<'a> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + if same_buffer(*self, *other) { + Some(cmp_assuming_same_buffer(*self, *other)) + } else { + None + } + } +} + +pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool { + a.scope == b.scope +} + +pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool { + start_of_buffer(a) == start_of_buffer(b) +} + +fn start_of_buffer(cursor: Cursor) -> *const Entry { + unsafe { + match &*cursor.scope { + Entry::End(offset) => cursor.scope.offset(*offset), + _ => unreachable!(), + } + } +} + +pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering { + a.ptr.cmp(&b.ptr) +} + +pub(crate) fn open_span_of_group(cursor: Cursor) -> Span { + match cursor.entry() { + Entry::Group(group, _) => group.span_open(), + _ => cursor.span(), + } +} + +pub(crate) fn close_span_of_group(cursor: Cursor) -> Span { + match cursor.entry() { + Entry::Group(group, _) => group.span_close(), + _ => cursor.span(), + } +} diff --git a/vendor/syn/src/custom_keyword.rs b/vendor/syn/src/custom_keyword.rs new file mode 100644 index 0000000..6ce23db --- /dev/null +++ b/vendor/syn/src/custom_keyword.rs @@ -0,0 +1,259 @@ +/// Define a type that supports parsing and printing a given identifier as if it +/// were a keyword. +/// +/// # Usage +/// +/// As a convention, it is recommended that this macro be invoked within a +/// module called `kw` or `keyword` and that the resulting parser be invoked +/// with a `kw::` or `keyword::` prefix. +/// +/// ``` +/// mod kw { +/// syn::custom_keyword!(whatever); +/// } +/// ``` +/// +/// The generated syntax tree node supports the following operations just like +/// any built-in keyword token. +/// +/// - [Peeking] — `input.peek(kw::whatever)` +/// +/// - [Parsing] — `input.parse::<kw::whatever>()?` +/// +/// - [Printing] — `quote!( ... #whatever_token ... )` +/// +/// - Construction from a [`Span`] — `let whatever_token = kw::whatever(sp)` +/// +/// - Field access to its span — `let sp = whatever_token.span` +/// +/// [Peeking]: crate::parse::ParseBuffer::peek +/// [Parsing]: crate::parse::ParseBuffer::parse +/// [Printing]: quote::ToTokens +/// [`Span`]: proc_macro2::Span +/// +/// # Example +/// +/// This example parses input that looks like `bool = true` or `str = "value"`. +/// The key must be either the identifier `bool` or the identifier `str`. If +/// `bool`, the value may be either `true` or `false`. If `str`, the value may +/// be any string literal. +/// +/// The symbols `bool` and `str` are not reserved keywords in Rust so these are +/// not considered keywords in the `syn::token` module. Like any other +/// identifier that is not a keyword, these can be declared as custom keywords +/// by crates that need to use them as such. +/// +/// ``` +/// use syn::{LitBool, LitStr, Result, Token}; +/// use syn::parse::{Parse, ParseStream}; +/// +/// mod kw { +/// syn::custom_keyword!(bool); +/// syn::custom_keyword!(str); +/// } +/// +/// enum Argument { +/// Bool { +/// bool_token: kw::bool, +/// eq_token: Token![=], +/// value: LitBool, +/// }, +/// Str { +/// str_token: kw::str, +/// eq_token: Token![=], +/// value: LitStr, +/// }, +/// } +/// +/// impl Parse for Argument { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let lookahead = input.lookahead1(); +/// if lookahead.peek(kw::bool) { +/// Ok(Argument::Bool { +/// bool_token: input.parse::<kw::bool>()?, +/// eq_token: input.parse()?, +/// value: input.parse()?, +/// }) +/// } else if lookahead.peek(kw::str) { +/// Ok(Argument::Str { +/// str_token: input.parse::<kw::str>()?, +/// eq_token: input.parse()?, +/// value: input.parse()?, +/// }) +/// } else { +/// Err(lookahead.error()) +/// } +/// } +/// } +/// ``` +#[macro_export] +macro_rules! custom_keyword { + ($ident:ident) => { + #[allow(non_camel_case_types)] + pub struct $ident { + pub span: $crate::__private::Span, + } + + #[doc(hidden)] + #[allow(dead_code, non_snake_case)] + pub fn $ident<__S: $crate::__private::IntoSpans<$crate::__private::Span>>( + span: __S, + ) -> $ident { + $ident { + span: $crate::__private::IntoSpans::into_spans(span), + } + } + + const _: () = { + impl $crate::__private::Default for $ident { + fn default() -> Self { + $ident { + span: $crate::__private::Span::call_site(), + } + } + } + + $crate::impl_parse_for_custom_keyword!($ident); + $crate::impl_to_tokens_for_custom_keyword!($ident); + $crate::impl_clone_for_custom_keyword!($ident); + $crate::impl_extra_traits_for_custom_keyword!($ident); + }; + }; +} + +// Not public API. +#[cfg(feature = "parsing")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_parse_for_custom_keyword { + ($ident:ident) => { + // For peek. + impl $crate::__private::CustomToken for $ident { + fn peek(cursor: $crate::buffer::Cursor) -> $crate::__private::bool { + if let $crate::__private::Some((ident, _rest)) = cursor.ident() { + ident == $crate::__private::stringify!($ident) + } else { + false + } + } + + fn display() -> &'static $crate::__private::str { + $crate::__private::concat!("`", $crate::__private::stringify!($ident), "`") + } + } + + impl $crate::parse::Parse for $ident { + fn parse(input: $crate::parse::ParseStream) -> $crate::parse::Result<$ident> { + input.step(|cursor| { + if let $crate::__private::Some((ident, rest)) = cursor.ident() { + if ident == $crate::__private::stringify!($ident) { + return $crate::__private::Ok(($ident { span: ident.span() }, rest)); + } + } + $crate::__private::Err(cursor.error($crate::__private::concat!( + "expected `", + $crate::__private::stringify!($ident), + "`", + ))) + }) + } + } + }; +} + +// Not public API. +#[cfg(not(feature = "parsing"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_parse_for_custom_keyword { + ($ident:ident) => {}; +} + +// Not public API. +#[cfg(feature = "printing")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_to_tokens_for_custom_keyword { + ($ident:ident) => { + impl $crate::__private::ToTokens for $ident { + fn to_tokens(&self, tokens: &mut $crate::__private::TokenStream2) { + let ident = $crate::Ident::new($crate::__private::stringify!($ident), self.span); + $crate::__private::TokenStreamExt::append(tokens, ident); + } + } + }; +} + +// Not public API. +#[cfg(not(feature = "printing"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_to_tokens_for_custom_keyword { + ($ident:ident) => {}; +} + +// Not public API. +#[cfg(feature = "clone-impls")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_clone_for_custom_keyword { + ($ident:ident) => { + impl $crate::__private::Copy for $ident {} + + #[allow(clippy::expl_impl_clone_on_copy)] + impl $crate::__private::Clone for $ident { + fn clone(&self) -> Self { + *self + } + } + }; +} + +// Not public API. +#[cfg(not(feature = "clone-impls"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_clone_for_custom_keyword { + ($ident:ident) => {}; +} + +// Not public API. +#[cfg(feature = "extra-traits")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_extra_traits_for_custom_keyword { + ($ident:ident) => { + impl $crate::__private::Debug for $ident { + fn fmt(&self, f: &mut $crate::__private::Formatter) -> $crate::__private::FmtResult { + $crate::__private::Formatter::write_str( + f, + $crate::__private::concat!( + "Keyword [", + $crate::__private::stringify!($ident), + "]", + ), + ) + } + } + + impl $crate::__private::Eq for $ident {} + + impl $crate::__private::PartialEq for $ident { + fn eq(&self, _other: &Self) -> $crate::__private::bool { + true + } + } + + impl $crate::__private::Hash for $ident { + fn hash<__H: $crate::__private::Hasher>(&self, _state: &mut __H) {} + } + }; +} + +// Not public API. +#[cfg(not(feature = "extra-traits"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_extra_traits_for_custom_keyword { + ($ident:ident) => {}; +} diff --git a/vendor/syn/src/custom_punctuation.rs b/vendor/syn/src/custom_punctuation.rs new file mode 100644 index 0000000..1b2c768 --- /dev/null +++ b/vendor/syn/src/custom_punctuation.rs @@ -0,0 +1,302 @@ +/// Define a type that supports parsing and printing a multi-character symbol +/// as if it were a punctuation token. +/// +/// # Usage +/// +/// ``` +/// syn::custom_punctuation!(LeftRightArrow, <=>); +/// ``` +/// +/// The generated syntax tree node supports the following operations just like +/// any built-in punctuation token. +/// +/// - [Peeking] — `input.peek(LeftRightArrow)` +/// +/// - [Parsing] — `input.parse::<LeftRightArrow>()?` +/// +/// - [Printing] — `quote!( ... #lrarrow ... )` +/// +/// - Construction from a [`Span`] — `let lrarrow = LeftRightArrow(sp)` +/// +/// - Construction from multiple [`Span`] — `let lrarrow = LeftRightArrow([sp, sp, sp])` +/// +/// - Field access to its spans — `let spans = lrarrow.spans` +/// +/// [Peeking]: crate::parse::ParseBuffer::peek +/// [Parsing]: crate::parse::ParseBuffer::parse +/// [Printing]: quote::ToTokens +/// [`Span`]: proc_macro2::Span +/// +/// # Example +/// +/// ``` +/// use proc_macro2::{TokenStream, TokenTree}; +/// use syn::parse::{Parse, ParseStream, Peek, Result}; +/// use syn::punctuated::Punctuated; +/// use syn::Expr; +/// +/// syn::custom_punctuation!(PathSeparator, </>); +/// +/// // expr </> expr </> expr ... +/// struct PathSegments { +/// segments: Punctuated<Expr, PathSeparator>, +/// } +/// +/// impl Parse for PathSegments { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let mut segments = Punctuated::new(); +/// +/// let first = parse_until(input, PathSeparator)?; +/// segments.push_value(syn::parse2(first)?); +/// +/// while input.peek(PathSeparator) { +/// segments.push_punct(input.parse()?); +/// +/// let next = parse_until(input, PathSeparator)?; +/// segments.push_value(syn::parse2(next)?); +/// } +/// +/// Ok(PathSegments { segments }) +/// } +/// } +/// +/// fn parse_until<E: Peek>(input: ParseStream, end: E) -> Result<TokenStream> { +/// let mut tokens = TokenStream::new(); +/// while !input.is_empty() && !input.peek(end) { +/// let next: TokenTree = input.parse()?; +/// tokens.extend(Some(next)); +/// } +/// Ok(tokens) +/// } +/// +/// fn main() { +/// let input = r#" a::b </> c::d::e "#; +/// let _: PathSegments = syn::parse_str(input).unwrap(); +/// } +/// ``` +#[macro_export] +macro_rules! custom_punctuation { + ($ident:ident, $($tt:tt)+) => { + pub struct $ident { + pub spans: $crate::custom_punctuation_repr!($($tt)+), + } + + #[doc(hidden)] + #[allow(dead_code, non_snake_case)] + pub fn $ident<__S: $crate::__private::IntoSpans<$crate::custom_punctuation_repr!($($tt)+)>>( + spans: __S, + ) -> $ident { + let _validate_len = 0 $(+ $crate::custom_punctuation_len!(strict, $tt))*; + $ident { + spans: $crate::__private::IntoSpans::into_spans(spans) + } + } + + const _: () = { + impl $crate::__private::Default for $ident { + fn default() -> Self { + $ident($crate::__private::Span::call_site()) + } + } + + $crate::impl_parse_for_custom_punctuation!($ident, $($tt)+); + $crate::impl_to_tokens_for_custom_punctuation!($ident, $($tt)+); + $crate::impl_clone_for_custom_punctuation!($ident, $($tt)+); + $crate::impl_extra_traits_for_custom_punctuation!($ident, $($tt)+); + }; + }; +} + +// Not public API. +#[cfg(feature = "parsing")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_parse_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => { + impl $crate::__private::CustomToken for $ident { + fn peek(cursor: $crate::buffer::Cursor) -> $crate::__private::bool { + $crate::__private::peek_punct(cursor, $crate::stringify_punct!($($tt)+)) + } + + fn display() -> &'static $crate::__private::str { + $crate::__private::concat!("`", $crate::stringify_punct!($($tt)+), "`") + } + } + + impl $crate::parse::Parse for $ident { + fn parse(input: $crate::parse::ParseStream) -> $crate::parse::Result<$ident> { + let spans: $crate::custom_punctuation_repr!($($tt)+) = + $crate::__private::parse_punct(input, $crate::stringify_punct!($($tt)+))?; + Ok($ident(spans)) + } + } + }; +} + +// Not public API. +#[cfg(not(feature = "parsing"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_parse_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => {}; +} + +// Not public API. +#[cfg(feature = "printing")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_to_tokens_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => { + impl $crate::__private::ToTokens for $ident { + fn to_tokens(&self, tokens: &mut $crate::__private::TokenStream2) { + $crate::__private::print_punct($crate::stringify_punct!($($tt)+), &self.spans, tokens) + } + } + }; +} + +// Not public API. +#[cfg(not(feature = "printing"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_to_tokens_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => {}; +} + +// Not public API. +#[cfg(feature = "clone-impls")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_clone_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => { + impl $crate::__private::Copy for $ident {} + + #[allow(clippy::expl_impl_clone_on_copy)] + impl $crate::__private::Clone for $ident { + fn clone(&self) -> Self { + *self + } + } + }; +} + +// Not public API. +#[cfg(not(feature = "clone-impls"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_clone_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => {}; +} + +// Not public API. +#[cfg(feature = "extra-traits")] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_extra_traits_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => { + impl $crate::__private::Debug for $ident { + fn fmt(&self, f: &mut $crate::__private::Formatter) -> $crate::__private::FmtResult { + $crate::__private::Formatter::write_str(f, $crate::__private::stringify!($ident)) + } + } + + impl $crate::__private::Eq for $ident {} + + impl $crate::__private::PartialEq for $ident { + fn eq(&self, _other: &Self) -> $crate::__private::bool { + true + } + } + + impl $crate::__private::Hash for $ident { + fn hash<__H: $crate::__private::Hasher>(&self, _state: &mut __H) {} + } + }; +} + +// Not public API. +#[cfg(not(feature = "extra-traits"))] +#[doc(hidden)] +#[macro_export] +macro_rules! impl_extra_traits_for_custom_punctuation { + ($ident:ident, $($tt:tt)+) => {}; +} + +// Not public API. +#[doc(hidden)] +#[macro_export] +macro_rules! custom_punctuation_repr { + ($($tt:tt)+) => { + [$crate::__private::Span; 0 $(+ $crate::custom_punctuation_len!(lenient, $tt))+] + }; +} + +// Not public API. +#[doc(hidden)] +#[macro_export] +#[rustfmt::skip] +macro_rules! custom_punctuation_len { + ($mode:ident, +) => { 1 }; + ($mode:ident, +=) => { 2 }; + ($mode:ident, &) => { 1 }; + ($mode:ident, &&) => { 2 }; + ($mode:ident, &=) => { 2 }; + ($mode:ident, @) => { 1 }; + ($mode:ident, !) => { 1 }; + ($mode:ident, ^) => { 1 }; + ($mode:ident, ^=) => { 2 }; + ($mode:ident, :) => { 1 }; + ($mode:ident, ::) => { 2 }; + ($mode:ident, ,) => { 1 }; + ($mode:ident, /) => { 1 }; + ($mode:ident, /=) => { 2 }; + ($mode:ident, .) => { 1 }; + ($mode:ident, ..) => { 2 }; + ($mode:ident, ...) => { 3 }; + ($mode:ident, ..=) => { 3 }; + ($mode:ident, =) => { 1 }; + ($mode:ident, ==) => { 2 }; + ($mode:ident, >=) => { 2 }; + ($mode:ident, >) => { 1 }; + ($mode:ident, <=) => { 2 }; + ($mode:ident, <) => { 1 }; + ($mode:ident, *=) => { 2 }; + ($mode:ident, !=) => { 2 }; + ($mode:ident, |) => { 1 }; + ($mode:ident, |=) => { 2 }; + ($mode:ident, ||) => { 2 }; + ($mode:ident, #) => { 1 }; + ($mode:ident, ?) => { 1 }; + ($mode:ident, ->) => { 2 }; + ($mode:ident, <-) => { 2 }; + ($mode:ident, %) => { 1 }; + ($mode:ident, %=) => { 2 }; + ($mode:ident, =>) => { 2 }; + ($mode:ident, ;) => { 1 }; + ($mode:ident, <<) => { 2 }; + ($mode:ident, <<=) => { 3 }; + ($mode:ident, >>) => { 2 }; + ($mode:ident, >>=) => { 3 }; + ($mode:ident, *) => { 1 }; + ($mode:ident, -) => { 1 }; + ($mode:ident, -=) => { 2 }; + ($mode:ident, ~) => { 1 }; + (lenient, $tt:tt) => { 0 }; + (strict, $tt:tt) => {{ $crate::custom_punctuation_unexpected!($tt); 0 }}; +} + +// Not public API. +#[doc(hidden)] +#[macro_export] +macro_rules! custom_punctuation_unexpected { + () => {}; +} + +// Not public API. +#[doc(hidden)] +#[macro_export] +macro_rules! stringify_punct { + ($($tt:tt)+) => { + $crate::__private::concat!($($crate::__private::stringify!($tt)),+) + }; +} diff --git a/vendor/syn/src/data.rs b/vendor/syn/src/data.rs new file mode 100644 index 0000000..134b76b --- /dev/null +++ b/vendor/syn/src/data.rs @@ -0,0 +1,404 @@ +use super::*; +use crate::punctuated::Punctuated; + +ast_struct! { + /// An enum variant. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Variant { + pub attrs: Vec<Attribute>, + + /// Name of the variant. + pub ident: Ident, + + /// Content stored in the variant. + pub fields: Fields, + + /// Explicit discriminant: `Variant = 1` + pub discriminant: Option<(Token![=], Expr)>, + } +} + +ast_enum_of_structs! { + /// Data stored within an enum variant or struct. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum Fields { + /// Named fields of a struct or struct variant such as `Point { x: f64, + /// y: f64 }`. + Named(FieldsNamed), + + /// Unnamed fields of a tuple struct or tuple variant such as `Some(T)`. + Unnamed(FieldsUnnamed), + + /// Unit struct or unit variant such as `None`. + Unit, + } +} + +ast_struct! { + /// Named fields of a struct or struct variant such as `Point { x: f64, + /// y: f64 }`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct FieldsNamed { + pub brace_token: token::Brace, + pub named: Punctuated<Field, Token![,]>, + } +} + +ast_struct! { + /// Unnamed fields of a tuple struct or tuple variant such as `Some(T)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct FieldsUnnamed { + pub paren_token: token::Paren, + pub unnamed: Punctuated<Field, Token![,]>, + } +} + +impl Fields { + /// Get an iterator over the borrowed [`Field`] items in this object. This + /// iterator can be used to iterate over a named or unnamed struct or + /// variant's fields uniformly. + pub fn iter(&self) -> punctuated::Iter<Field> { + match self { + Fields::Unit => crate::punctuated::empty_punctuated_iter(), + Fields::Named(f) => f.named.iter(), + Fields::Unnamed(f) => f.unnamed.iter(), + } + } + + /// Get an iterator over the mutably borrowed [`Field`] items in this + /// object. This iterator can be used to iterate over a named or unnamed + /// struct or variant's fields uniformly. + pub fn iter_mut(&mut self) -> punctuated::IterMut<Field> { + match self { + Fields::Unit => crate::punctuated::empty_punctuated_iter_mut(), + Fields::Named(f) => f.named.iter_mut(), + Fields::Unnamed(f) => f.unnamed.iter_mut(), + } + } + + /// Returns the number of fields. + pub fn len(&self) -> usize { + match self { + Fields::Unit => 0, + Fields::Named(f) => f.named.len(), + Fields::Unnamed(f) => f.unnamed.len(), + } + } + + /// Returns `true` if there are zero fields. + pub fn is_empty(&self) -> bool { + match self { + Fields::Unit => true, + Fields::Named(f) => f.named.is_empty(), + Fields::Unnamed(f) => f.unnamed.is_empty(), + } + } +} + +impl IntoIterator for Fields { + type Item = Field; + type IntoIter = punctuated::IntoIter<Field>; + + fn into_iter(self) -> Self::IntoIter { + match self { + Fields::Unit => Punctuated::<Field, ()>::new().into_iter(), + Fields::Named(f) => f.named.into_iter(), + Fields::Unnamed(f) => f.unnamed.into_iter(), + } + } +} + +impl<'a> IntoIterator for &'a Fields { + type Item = &'a Field; + type IntoIter = punctuated::Iter<'a, Field>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> IntoIterator for &'a mut Fields { + type Item = &'a mut Field; + type IntoIter = punctuated::IterMut<'a, Field>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +ast_struct! { + /// A field of a struct or enum variant. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Field { + pub attrs: Vec<Attribute>, + + pub vis: Visibility, + + pub mutability: FieldMutability, + + /// Name of the field, if any. + /// + /// Fields of tuple structs have no names. + pub ident: Option<Ident>, + + pub colon_token: Option<Token![:]>, + + pub ty: Type, + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::ext::IdentExt as _; + #[cfg(not(feature = "full"))] + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Variant { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let _visibility: Visibility = input.parse()?; + let ident: Ident = input.parse()?; + let fields = if input.peek(token::Brace) { + Fields::Named(input.parse()?) + } else if input.peek(token::Paren) { + Fields::Unnamed(input.parse()?) + } else { + Fields::Unit + }; + let discriminant = if input.peek(Token![=]) { + let eq_token: Token![=] = input.parse()?; + #[cfg(feature = "full")] + let discriminant: Expr = input.parse()?; + #[cfg(not(feature = "full"))] + let discriminant = { + let begin = input.fork(); + let ahead = input.fork(); + let mut discriminant: Result<Expr> = ahead.parse(); + if discriminant.is_ok() { + input.advance_to(&ahead); + } else if scan_lenient_discriminant(input).is_ok() { + discriminant = Ok(Expr::Verbatim(verbatim::between(&begin, input))); + } + discriminant? + }; + Some((eq_token, discriminant)) + } else { + None + }; + Ok(Variant { + attrs, + ident, + fields, + discriminant, + }) + } + } + + #[cfg(not(feature = "full"))] + pub(crate) fn scan_lenient_discriminant(input: ParseStream) -> Result<()> { + use proc_macro2::Delimiter::{self, Brace, Bracket, Parenthesis}; + + let consume = |delimiter: Delimiter| { + Result::unwrap(input.step(|cursor| match cursor.group(delimiter) { + Some((_inside, _span, rest)) => Ok((true, rest)), + None => Ok((false, *cursor)), + })) + }; + + macro_rules! consume { + [$token:tt] => { + input.parse::<Option<Token![$token]>>().unwrap().is_some() + }; + } + + let mut initial = true; + let mut depth = 0usize; + loop { + if initial { + if consume![&] { + input.parse::<Option<Token![mut]>>()?; + } else if consume![if] || consume![match] || consume![while] { + depth += 1; + } else if input.parse::<Option<Lit>>()?.is_some() + || (consume(Brace) || consume(Bracket) || consume(Parenthesis)) + || (consume![async] || consume![const] || consume![loop] || consume![unsafe]) + && (consume(Brace) || break) + { + initial = false; + } else if consume![let] { + while !consume![=] { + if !((consume![|] || consume![ref] || consume![mut] || consume![@]) + || (consume![!] || input.parse::<Option<Lit>>()?.is_some()) + || (consume![..=] || consume![..] || consume![&] || consume![_]) + || (consume(Brace) || consume(Bracket) || consume(Parenthesis))) + { + path::parsing::qpath(input, true)?; + } + } + } else if input.parse::<Option<Lifetime>>()?.is_some() && !consume![:] { + break; + } else if input.parse::<UnOp>().is_err() { + path::parsing::qpath(input, true)?; + initial = consume![!] || depth == 0 && input.peek(token::Brace); + } + } else if input.is_empty() || input.peek(Token![,]) { + return Ok(()); + } else if depth > 0 && consume(Brace) { + if consume![else] && !consume(Brace) { + initial = consume![if] || break; + } else { + depth -= 1; + } + } else if input.parse::<BinOp>().is_ok() || (consume![..] | consume![=]) { + initial = true; + } else if consume![.] { + if input.parse::<Option<LitFloat>>()?.is_none() + && (input.parse::<Member>()?.is_named() && consume![::]) + { + AngleBracketedGenericArguments::do_parse(None, input)?; + } + } else if consume![as] { + input.parse::<Type>()?; + } else if !(consume(Brace) || consume(Bracket) || consume(Parenthesis)) { + break; + } + } + + Err(input.error("unsupported expression")) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for FieldsNamed { + fn parse(input: ParseStream) -> Result<Self> { + let content; + Ok(FieldsNamed { + brace_token: braced!(content in input), + named: content.parse_terminated(Field::parse_named, Token![,])?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for FieldsUnnamed { + fn parse(input: ParseStream) -> Result<Self> { + let content; + Ok(FieldsUnnamed { + paren_token: parenthesized!(content in input), + unnamed: content.parse_terminated(Field::parse_unnamed, Token![,])?, + }) + } + } + + impl Field { + /// Parses a named (braced struct) field. + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_named(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + + let unnamed_field = cfg!(feature = "full") && input.peek(Token![_]); + let ident = if unnamed_field { + input.call(Ident::parse_any) + } else { + input.parse() + }?; + + let colon_token: Token![:] = input.parse()?; + + let ty: Type = if unnamed_field + && (input.peek(Token![struct]) + || input.peek(Token![union]) && input.peek2(token::Brace)) + { + let begin = input.fork(); + input.call(Ident::parse_any)?; + input.parse::<FieldsNamed>()?; + Type::Verbatim(verbatim::between(&begin, input)) + } else { + input.parse()? + }; + + Ok(Field { + attrs, + vis, + mutability: FieldMutability::None, + ident: Some(ident), + colon_token: Some(colon_token), + ty, + }) + } + + /// Parses an unnamed (tuple struct) field. + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_unnamed(input: ParseStream) -> Result<Self> { + Ok(Field { + attrs: input.call(Attribute::parse_outer)?, + vis: input.parse()?, + mutability: FieldMutability::None, + ident: None, + colon_token: None, + ty: input.parse()?, + }) + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use crate::print::TokensOrDefault; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Variant { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(&self.attrs); + self.ident.to_tokens(tokens); + self.fields.to_tokens(tokens); + if let Some((eq_token, disc)) = &self.discriminant { + eq_token.to_tokens(tokens); + disc.to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for FieldsNamed { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.brace_token.surround(tokens, |tokens| { + self.named.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for FieldsUnnamed { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.paren_token.surround(tokens, |tokens| { + self.unnamed.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Field { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(&self.attrs); + self.vis.to_tokens(tokens); + if let Some(ident) = &self.ident { + ident.to_tokens(tokens); + TokensOrDefault(&self.colon_token).to_tokens(tokens); + } + self.ty.to_tokens(tokens); + } + } +} diff --git a/vendor/syn/src/derive.rs b/vendor/syn/src/derive.rs new file mode 100644 index 0000000..25fa4c9 --- /dev/null +++ b/vendor/syn/src/derive.rs @@ -0,0 +1,245 @@ +use super::*; +use crate::punctuated::Punctuated; + +ast_struct! { + /// Data structure sent to a `proc_macro_derive` macro. + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + pub struct DeriveInput { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub ident: Ident, + pub generics: Generics, + pub data: Data, + } +} + +ast_enum! { + /// The storage of a struct, enum or union data structure. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + pub enum Data { + Struct(DataStruct), + Enum(DataEnum), + Union(DataUnion), + } +} + +ast_struct! { + /// A struct input to a `proc_macro_derive` macro. + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + pub struct DataStruct { + pub struct_token: Token![struct], + pub fields: Fields, + pub semi_token: Option<Token![;]>, + } +} + +ast_struct! { + /// An enum input to a `proc_macro_derive` macro. + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + pub struct DataEnum { + pub enum_token: Token![enum], + pub brace_token: token::Brace, + pub variants: Punctuated<Variant, Token![,]>, + } +} + +ast_struct! { + /// An untagged union input to a `proc_macro_derive` macro. + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + pub struct DataUnion { + pub union_token: Token![union], + pub fields: FieldsNamed, + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for DeriveInput { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis = input.parse::<Visibility>()?; + + let lookahead = input.lookahead1(); + if lookahead.peek(Token![struct]) { + let struct_token = input.parse::<Token![struct]>()?; + let ident = input.parse::<Ident>()?; + let generics = input.parse::<Generics>()?; + let (where_clause, fields, semi) = data_struct(input)?; + Ok(DeriveInput { + attrs, + vis, + ident, + generics: Generics { + where_clause, + ..generics + }, + data: Data::Struct(DataStruct { + struct_token, + fields, + semi_token: semi, + }), + }) + } else if lookahead.peek(Token![enum]) { + let enum_token = input.parse::<Token![enum]>()?; + let ident = input.parse::<Ident>()?; + let generics = input.parse::<Generics>()?; + let (where_clause, brace, variants) = data_enum(input)?; + Ok(DeriveInput { + attrs, + vis, + ident, + generics: Generics { + where_clause, + ..generics + }, + data: Data::Enum(DataEnum { + enum_token, + brace_token: brace, + variants, + }), + }) + } else if lookahead.peek(Token![union]) { + let union_token = input.parse::<Token![union]>()?; + let ident = input.parse::<Ident>()?; + let generics = input.parse::<Generics>()?; + let (where_clause, fields) = data_union(input)?; + Ok(DeriveInput { + attrs, + vis, + ident, + generics: Generics { + where_clause, + ..generics + }, + data: Data::Union(DataUnion { + union_token, + fields, + }), + }) + } else { + Err(lookahead.error()) + } + } + } + + pub(crate) fn data_struct( + input: ParseStream, + ) -> Result<(Option<WhereClause>, Fields, Option<Token![;]>)> { + let mut lookahead = input.lookahead1(); + let mut where_clause = None; + if lookahead.peek(Token![where]) { + where_clause = Some(input.parse()?); + lookahead = input.lookahead1(); + } + + if where_clause.is_none() && lookahead.peek(token::Paren) { + let fields = input.parse()?; + + lookahead = input.lookahead1(); + if lookahead.peek(Token![where]) { + where_clause = Some(input.parse()?); + lookahead = input.lookahead1(); + } + + if lookahead.peek(Token![;]) { + let semi = input.parse()?; + Ok((where_clause, Fields::Unnamed(fields), Some(semi))) + } else { + Err(lookahead.error()) + } + } else if lookahead.peek(token::Brace) { + let fields = input.parse()?; + Ok((where_clause, Fields::Named(fields), None)) + } else if lookahead.peek(Token![;]) { + let semi = input.parse()?; + Ok((where_clause, Fields::Unit, Some(semi))) + } else { + Err(lookahead.error()) + } + } + + pub(crate) fn data_enum( + input: ParseStream, + ) -> Result<( + Option<WhereClause>, + token::Brace, + Punctuated<Variant, Token![,]>, + )> { + let where_clause = input.parse()?; + + let content; + let brace = braced!(content in input); + let variants = content.parse_terminated(Variant::parse, Token![,])?; + + Ok((where_clause, brace, variants)) + } + + pub(crate) fn data_union(input: ParseStream) -> Result<(Option<WhereClause>, FieldsNamed)> { + let where_clause = input.parse()?; + let fields = input.parse()?; + Ok((where_clause, fields)) + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use crate::attr::FilterAttrs; + use crate::print::TokensOrDefault; + use proc_macro2::TokenStream; + use quote::ToTokens; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for DeriveInput { + fn to_tokens(&self, tokens: &mut TokenStream) { + for attr in self.attrs.outer() { + attr.to_tokens(tokens); + } + self.vis.to_tokens(tokens); + match &self.data { + Data::Struct(d) => d.struct_token.to_tokens(tokens), + Data::Enum(d) => d.enum_token.to_tokens(tokens), + Data::Union(d) => d.union_token.to_tokens(tokens), + } + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + match &self.data { + Data::Struct(data) => match &data.fields { + Fields::Named(fields) => { + self.generics.where_clause.to_tokens(tokens); + fields.to_tokens(tokens); + } + Fields::Unnamed(fields) => { + fields.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + TokensOrDefault(&data.semi_token).to_tokens(tokens); + } + Fields::Unit => { + self.generics.where_clause.to_tokens(tokens); + TokensOrDefault(&data.semi_token).to_tokens(tokens); + } + }, + Data::Enum(data) => { + self.generics.where_clause.to_tokens(tokens); + data.brace_token.surround(tokens, |tokens| { + data.variants.to_tokens(tokens); + }); + } + Data::Union(data) => { + self.generics.where_clause.to_tokens(tokens); + data.fields.to_tokens(tokens); + } + } + } + } +} diff --git a/vendor/syn/src/discouraged.rs b/vendor/syn/src/discouraged.rs new file mode 100644 index 0000000..fb98d63 --- /dev/null +++ b/vendor/syn/src/discouraged.rs @@ -0,0 +1,219 @@ +//! Extensions to the parsing API with niche applicability. + +use super::*; +use proc_macro2::extra::DelimSpan; + +/// Extensions to the `ParseStream` API to support speculative parsing. +pub trait Speculative { + /// Advance this parse stream to the position of a forked parse stream. + /// + /// This is the opposite operation to [`ParseStream::fork`]. You can fork a + /// parse stream, perform some speculative parsing, then join the original + /// stream to the fork to "commit" the parsing from the fork to the main + /// stream. + /// + /// If you can avoid doing this, you should, as it limits the ability to + /// generate useful errors. That said, it is often the only way to parse + /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem + /// is that when the fork fails to parse an `A`, it's impossible to tell + /// whether that was because of a syntax error and the user meant to provide + /// an `A`, or that the `A`s are finished and it's time to start parsing + /// `B`s. Use with care. + /// + /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by + /// parsing `B*` and removing the leading members of `A` from the + /// repetition, bypassing the need to involve the downsides associated with + /// speculative parsing. + /// + /// [`ParseStream::fork`]: ParseBuffer::fork + /// + /// # Example + /// + /// There has been chatter about the possibility of making the colons in the + /// turbofish syntax like `path::to::<T>` no longer required by accepting + /// `path::to<T>` in expression position. Specifically, according to [RFC + /// 2544], [`PathSegment`] parsing should always try to consume a following + /// `<` token as the start of generic arguments, and reset to the `<` if + /// that fails (e.g. the token is acting as a less-than operator). + /// + /// This is the exact kind of parsing behavior which requires the "fork, + /// try, commit" behavior that [`ParseStream::fork`] discourages. With + /// `advance_to`, we can avoid having to parse the speculatively parsed + /// content a second time. + /// + /// This change in behavior can be implemented in syn by replacing just the + /// `Parse` implementation for `PathSegment`: + /// + /// ``` + /// # use syn::ext::IdentExt; + /// use syn::parse::discouraged::Speculative; + /// # use syn::parse::{Parse, ParseStream}; + /// # use syn::{Ident, PathArguments, Result, Token}; + /// + /// pub struct PathSegment { + /// pub ident: Ident, + /// pub arguments: PathArguments, + /// } + /// # + /// # impl<T> From<T> for PathSegment + /// # where + /// # T: Into<Ident>, + /// # { + /// # fn from(ident: T) -> Self { + /// # PathSegment { + /// # ident: ident.into(), + /// # arguments: PathArguments::None, + /// # } + /// # } + /// # } + /// + /// impl Parse for PathSegment { + /// fn parse(input: ParseStream) -> Result<Self> { + /// if input.peek(Token![super]) + /// || input.peek(Token![self]) + /// || input.peek(Token![Self]) + /// || input.peek(Token![crate]) + /// { + /// let ident = input.call(Ident::parse_any)?; + /// return Ok(PathSegment::from(ident)); + /// } + /// + /// let ident = input.parse()?; + /// if input.peek(Token![::]) && input.peek3(Token![<]) { + /// return Ok(PathSegment { + /// ident, + /// arguments: PathArguments::AngleBracketed(input.parse()?), + /// }); + /// } + /// if input.peek(Token![<]) && !input.peek(Token![<=]) { + /// let fork = input.fork(); + /// if let Ok(arguments) = fork.parse() { + /// input.advance_to(&fork); + /// return Ok(PathSegment { + /// ident, + /// arguments: PathArguments::AngleBracketed(arguments), + /// }); + /// } + /// } + /// Ok(PathSegment::from(ident)) + /// } + /// } + /// + /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); + /// ``` + /// + /// # Drawbacks + /// + /// The main drawback of this style of speculative parsing is in error + /// presentation. Even if the lookahead is the "correct" parse, the error + /// that is shown is that of the "fallback" parse. To use the same example + /// as the turbofish above, take the following unfinished "turbofish": + /// + /// ```text + /// let _ = f<&'a fn(), for<'a> serde::>(); + /// ``` + /// + /// If this is parsed as generic arguments, we can provide the error message + /// + /// ```text + /// error: expected identifier + /// --> src.rs:L:C + /// | + /// L | let _ = f<&'a fn(), for<'a> serde::>(); + /// | ^ + /// ``` + /// + /// but if parsed using the above speculative parsing, it falls back to + /// assuming that the `<` is a less-than when it fails to parse the generic + /// arguments, and tries to interpret the `&'a` as the start of a labelled + /// loop, resulting in the much less helpful error + /// + /// ```text + /// error: expected `:` + /// --> src.rs:L:C + /// | + /// L | let _ = f<&'a fn(), for<'a> serde::>(); + /// | ^^ + /// ``` + /// + /// This can be mitigated with various heuristics (two examples: show both + /// forks' parse errors, or show the one that consumed more tokens), but + /// when you can control the grammar, sticking to something that can be + /// parsed LL(3) and without the LL(*) speculative parsing this makes + /// possible, displaying reasonable errors becomes much more simple. + /// + /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 + /// [`PathSegment`]: crate::PathSegment + /// + /// # Performance + /// + /// This method performs a cheap fixed amount of work that does not depend + /// on how far apart the two streams are positioned. + /// + /// # Panics + /// + /// The forked stream in the argument of `advance_to` must have been + /// obtained by forking `self`. Attempting to advance to any other stream + /// will cause a panic. + fn advance_to(&self, fork: &Self); +} + +impl<'a> Speculative for ParseBuffer<'a> { + fn advance_to(&self, fork: &Self) { + if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { + panic!("Fork was not derived from the advancing parse stream"); + } + + let (self_unexp, self_sp) = inner_unexpected(self); + let (fork_unexp, fork_sp) = inner_unexpected(fork); + if !Rc::ptr_eq(&self_unexp, &fork_unexp) { + match (fork_sp, self_sp) { + // Unexpected set on the fork, but not on `self`, copy it over. + (Some(span), None) => { + self_unexp.set(Unexpected::Some(span)); + } + // Unexpected unset. Use chain to propagate errors from fork. + (None, None) => { + fork_unexp.set(Unexpected::Chain(self_unexp)); + + // Ensure toplevel 'unexpected' tokens from the fork don't + // bubble up the chain by replacing the root `unexpected` + // pointer, only 'unexpected' tokens from existing group + // parsers should bubble. + fork.unexpected + .set(Some(Rc::new(Cell::new(Unexpected::None)))); + } + // Unexpected has been set on `self`. No changes needed. + (_, Some(_)) => {} + } + } + + // See comment on `cell` in the struct definition. + self.cell + .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); + } +} + +/// Extensions to the `ParseStream` API to support manipulating invisible +/// delimiters the same as if they were visible. +pub trait AnyDelimiter { + /// Returns the delimiter, the span of the delimiter token, and the nested + /// contents for further parsing. + fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>; +} + +impl<'a> AnyDelimiter for ParseBuffer<'a> { + fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> { + self.step(|cursor| { + if let Some((content, delimiter, span, rest)) = cursor.any_group() { + let scope = crate::buffer::close_span_of_group(*cursor); + let nested = crate::parse::advance_step_cursor(cursor, content); + let unexpected = crate::parse::get_unexpected(self); + let content = crate::parse::new_parse_buffer(scope, nested, unexpected); + Ok(((delimiter, span, content), rest)) + } else { + Err(cursor.error("expected any delimiter")) + } + }) + } +} diff --git a/vendor/syn/src/drops.rs b/vendor/syn/src/drops.rs new file mode 100644 index 0000000..89b42d8 --- /dev/null +++ b/vendor/syn/src/drops.rs @@ -0,0 +1,58 @@ +use std::iter; +use std::mem::ManuallyDrop; +use std::ops::{Deref, DerefMut}; +use std::option; +use std::slice; + +#[repr(transparent)] +pub(crate) struct NoDrop<T: ?Sized>(ManuallyDrop<T>); + +impl<T> NoDrop<T> { + pub(crate) fn new(value: T) -> Self + where + T: TrivialDrop, + { + NoDrop(ManuallyDrop::new(value)) + } +} + +impl<T: ?Sized> Deref for NoDrop<T> { + type Target = T; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<T: ?Sized> DerefMut for NoDrop<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +pub(crate) trait TrivialDrop {} + +impl<T> TrivialDrop for iter::Empty<T> {} +impl<'a, T> TrivialDrop for slice::Iter<'a, T> {} +impl<'a, T> TrivialDrop for slice::IterMut<'a, T> {} +impl<'a, T> TrivialDrop for option::IntoIter<&'a T> {} +impl<'a, T> TrivialDrop for option::IntoIter<&'a mut T> {} + +#[test] +fn test_needs_drop() { + use std::mem::needs_drop; + + struct NeedsDrop; + + impl Drop for NeedsDrop { + fn drop(&mut self) {} + } + + assert!(needs_drop::<NeedsDrop>()); + + // Test each of the types with a handwritten TrivialDrop impl above. + assert!(!needs_drop::<iter::Empty<NeedsDrop>>()); + assert!(!needs_drop::<slice::Iter<NeedsDrop>>()); + assert!(!needs_drop::<slice::IterMut<NeedsDrop>>()); + assert!(!needs_drop::<option::IntoIter<&NeedsDrop>>()); + assert!(!needs_drop::<option::IntoIter<&mut NeedsDrop>>()); +} diff --git a/vendor/syn/src/error.rs b/vendor/syn/src/error.rs new file mode 100644 index 0000000..71247cd --- /dev/null +++ b/vendor/syn/src/error.rs @@ -0,0 +1,467 @@ +#[cfg(feature = "parsing")] +use crate::buffer::Cursor; +use crate::thread::ThreadBound; +use proc_macro2::{ + Delimiter, Group, Ident, LexError, Literal, Punct, Spacing, Span, TokenStream, TokenTree, +}; +#[cfg(feature = "printing")] +use quote::ToTokens; +use std::fmt::{self, Debug, Display}; +use std::slice; +use std::vec; + +/// The result of a Syn parser. +pub type Result<T> = std::result::Result<T, Error>; + +/// Error returned when a Syn parser cannot parse the input tokens. +/// +/// # Error reporting in proc macros +/// +/// The correct way to report errors back to the compiler from a procedural +/// macro is by emitting an appropriately spanned invocation of +/// [`compile_error!`] in the generated code. This produces a better diagnostic +/// message than simply panicking the macro. +/// +/// [`compile_error!`]: std::compile_error! +/// +/// When parsing macro input, the [`parse_macro_input!`] macro handles the +/// conversion to `compile_error!` automatically. +/// +/// [`parse_macro_input!`]: crate::parse_macro_input! +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// use proc_macro::TokenStream; +/// use syn::parse::{Parse, ParseStream, Result}; +/// use syn::{parse_macro_input, ItemFn}; +/// +/// # const IGNORE: &str = stringify! { +/// #[proc_macro_attribute] +/// # }; +/// pub fn my_attr(args: TokenStream, input: TokenStream) -> TokenStream { +/// let args = parse_macro_input!(args as MyAttrArgs); +/// let input = parse_macro_input!(input as ItemFn); +/// +/// /* ... */ +/// # TokenStream::new() +/// } +/// +/// struct MyAttrArgs { +/// # _k: [(); { stringify! { +/// ... +/// # }; 0 }] +/// } +/// +/// impl Parse for MyAttrArgs { +/// fn parse(input: ParseStream) -> Result<Self> { +/// # stringify! { +/// ... +/// # }; +/// # unimplemented!() +/// } +/// } +/// ``` +/// +/// For errors that arise later than the initial parsing stage, the +/// [`.to_compile_error()`] or [`.into_compile_error()`] methods can be used to +/// perform an explicit conversion to `compile_error!`. +/// +/// [`.to_compile_error()`]: Error::to_compile_error +/// [`.into_compile_error()`]: Error::into_compile_error +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// # use proc_macro::TokenStream; +/// # use syn::{parse_macro_input, DeriveInput}; +/// # +/// # const IGNORE: &str = stringify! { +/// #[proc_macro_derive(MyDerive)] +/// # }; +/// pub fn my_derive(input: TokenStream) -> TokenStream { +/// let input = parse_macro_input!(input as DeriveInput); +/// +/// // fn(DeriveInput) -> syn::Result<proc_macro2::TokenStream> +/// expand::my_derive(input) +/// .unwrap_or_else(syn::Error::into_compile_error) +/// .into() +/// } +/// # +/// # mod expand { +/// # use proc_macro2::TokenStream; +/// # use syn::{DeriveInput, Result}; +/// # +/// # pub fn my_derive(input: DeriveInput) -> Result<TokenStream> { +/// # unimplemented!() +/// # } +/// # } +/// ``` +pub struct Error { + messages: Vec<ErrorMessage>, +} + +struct ErrorMessage { + // Span is implemented as an index into a thread-local interner to keep the + // size small. It is not safe to access from a different thread. We want + // errors to be Send and Sync to play nicely with ecosystem crates for error + // handling, so pin the span we're given to its original thread and assume + // it is Span::call_site if accessed from any other thread. + span: ThreadBound<SpanRange>, + message: String, +} + +// Cannot use std::ops::Range<Span> because that does not implement Copy, +// whereas ThreadBound<T> requires a Copy impl as a way to ensure no Drop impls +// are involved. +struct SpanRange { + start: Span, + end: Span, +} + +#[cfg(test)] +struct _Test +where + Error: Send + Sync; + +impl Error { + /// Usually the [`ParseStream::error`] method will be used instead, which + /// automatically uses the correct span from the current position of the + /// parse stream. + /// + /// Use `Error::new` when the error needs to be triggered on some span other + /// than where the parse stream is currently positioned. + /// + /// [`ParseStream::error`]: crate::parse::ParseBuffer::error + /// + /// # Example + /// + /// ``` + /// use syn::{Error, Ident, LitStr, Result, Token}; + /// use syn::parse::ParseStream; + /// + /// // Parses input that looks like `name = "string"` where the key must be + /// // the identifier `name` and the value may be any string literal. + /// // Returns the string literal. + /// fn parse_name(input: ParseStream) -> Result<LitStr> { + /// let name_token: Ident = input.parse()?; + /// if name_token != "name" { + /// // Trigger an error not on the current position of the stream, + /// // but on the position of the unexpected identifier. + /// return Err(Error::new(name_token.span(), "expected `name`")); + /// } + /// input.parse::<Token![=]>()?; + /// let s: LitStr = input.parse()?; + /// Ok(s) + /// } + /// ``` + pub fn new<T: Display>(span: Span, message: T) -> Self { + return new(span, message.to_string()); + + fn new(span: Span, message: String) -> Error { + Error { + messages: vec![ErrorMessage { + span: ThreadBound::new(SpanRange { + start: span, + end: span, + }), + message, + }], + } + } + } + + /// Creates an error with the specified message spanning the given syntax + /// tree node. + /// + /// Unlike the `Error::new` constructor, this constructor takes an argument + /// `tokens` which is a syntax tree node. This allows the resulting `Error` + /// to attempt to span all tokens inside of `tokens`. While you would + /// typically be able to use the `Spanned` trait with the above `Error::new` + /// constructor, implementation limitations today mean that + /// `Error::new_spanned` may provide a higher-quality error message on + /// stable Rust. + /// + /// When in doubt it's recommended to stick to `Error::new` (or + /// `ParseStream::error`)! + #[cfg(feature = "printing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + pub fn new_spanned<T: ToTokens, U: Display>(tokens: T, message: U) -> Self { + return new_spanned(tokens.into_token_stream(), message.to_string()); + + fn new_spanned(tokens: TokenStream, message: String) -> Error { + let mut iter = tokens.into_iter(); + let start = iter.next().map_or_else(Span::call_site, |t| t.span()); + let end = iter.last().map_or(start, |t| t.span()); + Error { + messages: vec![ErrorMessage { + span: ThreadBound::new(SpanRange { start, end }), + message, + }], + } + } + } + + /// The source location of the error. + /// + /// Spans are not thread-safe so this function returns `Span::call_site()` + /// if called from a different thread than the one on which the `Error` was + /// originally created. + pub fn span(&self) -> Span { + let SpanRange { start, end } = match self.messages[0].span.get() { + Some(span) => *span, + None => return Span::call_site(), + }; + start.join(end).unwrap_or(start) + } + + /// Render the error as an invocation of [`compile_error!`]. + /// + /// The [`parse_macro_input!`] macro provides a convenient way to invoke + /// this method correctly in a procedural macro. + /// + /// [`compile_error!`]: std::compile_error! + /// [`parse_macro_input!`]: crate::parse_macro_input! + pub fn to_compile_error(&self) -> TokenStream { + self.messages + .iter() + .map(ErrorMessage::to_compile_error) + .collect() + } + + /// Render the error as an invocation of [`compile_error!`]. + /// + /// [`compile_error!`]: std::compile_error! + /// + /// # Example + /// + /// ``` + /// # extern crate proc_macro; + /// # + /// use proc_macro::TokenStream; + /// use syn::{parse_macro_input, DeriveInput, Error}; + /// + /// # const _: &str = stringify! { + /// #[proc_macro_derive(MyTrait)] + /// # }; + /// pub fn derive_my_trait(input: TokenStream) -> TokenStream { + /// let input = parse_macro_input!(input as DeriveInput); + /// my_trait::expand(input) + /// .unwrap_or_else(Error::into_compile_error) + /// .into() + /// } + /// + /// mod my_trait { + /// use proc_macro2::TokenStream; + /// use syn::{DeriveInput, Result}; + /// + /// pub(crate) fn expand(input: DeriveInput) -> Result<TokenStream> { + /// /* ... */ + /// # unimplemented!() + /// } + /// } + /// ``` + pub fn into_compile_error(self) -> TokenStream { + self.to_compile_error() + } + + /// Add another error message to self such that when `to_compile_error()` is + /// called, both errors will be emitted together. + pub fn combine(&mut self, another: Error) { + self.messages.extend(another.messages); + } +} + +impl ErrorMessage { + fn to_compile_error(&self) -> TokenStream { + let (start, end) = match self.span.get() { + Some(range) => (range.start, range.end), + None => (Span::call_site(), Span::call_site()), + }; + + // ::core::compile_error!($message) + TokenStream::from_iter(vec![ + TokenTree::Punct({ + let mut punct = Punct::new(':', Spacing::Joint); + punct.set_span(start); + punct + }), + TokenTree::Punct({ + let mut punct = Punct::new(':', Spacing::Alone); + punct.set_span(start); + punct + }), + TokenTree::Ident(Ident::new("core", start)), + TokenTree::Punct({ + let mut punct = Punct::new(':', Spacing::Joint); + punct.set_span(start); + punct + }), + TokenTree::Punct({ + let mut punct = Punct::new(':', Spacing::Alone); + punct.set_span(start); + punct + }), + TokenTree::Ident(Ident::new("compile_error", start)), + TokenTree::Punct({ + let mut punct = Punct::new('!', Spacing::Alone); + punct.set_span(start); + punct + }), + TokenTree::Group({ + let mut group = Group::new(Delimiter::Brace, { + TokenStream::from_iter(vec![TokenTree::Literal({ + let mut string = Literal::string(&self.message); + string.set_span(end); + string + })]) + }); + group.set_span(end); + group + }), + ]) + } +} + +#[cfg(feature = "parsing")] +pub(crate) fn new_at<T: Display>(scope: Span, cursor: Cursor, message: T) -> Error { + if cursor.eof() { + Error::new(scope, format!("unexpected end of input, {}", message)) + } else { + let span = crate::buffer::open_span_of_group(cursor); + Error::new(span, message) + } +} + +#[cfg(all(feature = "parsing", any(feature = "full", feature = "derive")))] +pub(crate) fn new2<T: Display>(start: Span, end: Span, message: T) -> Error { + return new2(start, end, message.to_string()); + + fn new2(start: Span, end: Span, message: String) -> Error { + Error { + messages: vec![ErrorMessage { + span: ThreadBound::new(SpanRange { start, end }), + message, + }], + } + } +} + +impl Debug for Error { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + if self.messages.len() == 1 { + formatter + .debug_tuple("Error") + .field(&self.messages[0]) + .finish() + } else { + formatter + .debug_tuple("Error") + .field(&self.messages) + .finish() + } + } +} + +impl Debug for ErrorMessage { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.message, formatter) + } +} + +impl Display for Error { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(&self.messages[0].message) + } +} + +impl Clone for Error { + fn clone(&self) -> Self { + Error { + messages: self.messages.clone(), + } + } +} + +impl Clone for ErrorMessage { + fn clone(&self) -> Self { + ErrorMessage { + span: self.span, + message: self.message.clone(), + } + } +} + +impl Clone for SpanRange { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for SpanRange {} + +impl std::error::Error for Error {} + +impl From<LexError> for Error { + fn from(err: LexError) -> Self { + Error::new(err.span(), err) + } +} + +impl IntoIterator for Error { + type Item = Error; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter { + messages: self.messages.into_iter(), + } + } +} + +pub struct IntoIter { + messages: vec::IntoIter<ErrorMessage>, +} + +impl Iterator for IntoIter { + type Item = Error; + + fn next(&mut self) -> Option<Self::Item> { + Some(Error { + messages: vec![self.messages.next()?], + }) + } +} + +impl<'a> IntoIterator for &'a Error { + type Item = Error; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Self::IntoIter { + Iter { + messages: self.messages.iter(), + } + } +} + +pub struct Iter<'a> { + messages: slice::Iter<'a, ErrorMessage>, +} + +impl<'a> Iterator for Iter<'a> { + type Item = Error; + + fn next(&mut self) -> Option<Self::Item> { + Some(Error { + messages: vec![self.messages.next()?.clone()], + }) + } +} + +impl Extend<Error> for Error { + fn extend<T: IntoIterator<Item = Error>>(&mut self, iter: T) { + for err in iter { + self.combine(err); + } + } +} diff --git a/vendor/syn/src/export.rs b/vendor/syn/src/export.rs new file mode 100644 index 0000000..b9ea5c7 --- /dev/null +++ b/vendor/syn/src/export.rs @@ -0,0 +1,73 @@ +#[doc(hidden)] +pub use std::clone::Clone; +#[doc(hidden)] +pub use std::cmp::{Eq, PartialEq}; +#[doc(hidden)] +pub use std::concat; +#[doc(hidden)] +pub use std::default::Default; +#[doc(hidden)] +pub use std::fmt::Debug; +#[doc(hidden)] +pub use std::hash::{Hash, Hasher}; +#[doc(hidden)] +pub use std::marker::Copy; +#[doc(hidden)] +pub use std::option::Option::{None, Some}; +#[doc(hidden)] +pub use std::result::Result::{Err, Ok}; +#[doc(hidden)] +pub use std::stringify; + +#[doc(hidden)] +pub type Formatter<'a> = std::fmt::Formatter<'a>; +#[doc(hidden)] +pub type FmtResult = std::fmt::Result; + +#[doc(hidden)] +pub type bool = std::primitive::bool; +#[doc(hidden)] +pub type str = std::primitive::str; + +#[cfg(feature = "printing")] +#[doc(hidden)] +pub use quote; + +#[doc(hidden)] +pub type Span = proc_macro2::Span; +#[doc(hidden)] +pub type TokenStream2 = proc_macro2::TokenStream; + +#[cfg(feature = "parsing")] +#[doc(hidden)] +pub use crate::group::{parse_braces, parse_brackets, parse_parens}; + +#[doc(hidden)] +pub use crate::span::IntoSpans; + +#[cfg(all(feature = "parsing", feature = "printing"))] +#[doc(hidden)] +pub use crate::parse_quote::parse as parse_quote; + +#[cfg(feature = "parsing")] +#[doc(hidden)] +pub use crate::token::parsing::{peek_punct, punct as parse_punct}; + +#[cfg(feature = "printing")] +#[doc(hidden)] +pub use crate::token::printing::punct as print_punct; + +#[cfg(feature = "parsing")] +#[doc(hidden)] +pub use crate::token::private::CustomToken; + +#[cfg(feature = "proc-macro")] +#[doc(hidden)] +pub type TokenStream = proc_macro::TokenStream; + +#[cfg(feature = "printing")] +#[doc(hidden)] +pub use quote::{ToTokens, TokenStreamExt}; + +#[doc(hidden)] +pub struct private(pub(crate) ()); diff --git a/vendor/syn/src/expr.rs b/vendor/syn/src/expr.rs new file mode 100644 index 0000000..7fb0f7b --- /dev/null +++ b/vendor/syn/src/expr.rs @@ -0,0 +1,3506 @@ +use super::*; +use crate::punctuated::Punctuated; +use proc_macro2::{Span, TokenStream}; +#[cfg(feature = "printing")] +use quote::IdentFragment; +#[cfg(feature = "printing")] +use std::fmt::{self, Display}; +use std::hash::{Hash, Hasher}; +#[cfg(feature = "parsing")] +use std::mem; + +ast_enum_of_structs! { + /// A Rust expression. + /// + /// *This type is available only if Syn is built with the `"derive"` or `"full"` + /// feature, but most of the variants are not available unless "full" is enabled.* + /// + /// # Syntax tree enums + /// + /// This type is a syntax tree enum. In Syn this and other syntax tree enums + /// are designed to be traversed using the following rebinding idiom. + /// + /// ``` + /// # use syn::Expr; + /// # + /// # fn example(expr: Expr) { + /// # const IGNORE: &str = stringify! { + /// let expr: Expr = /* ... */; + /// # }; + /// match expr { + /// Expr::MethodCall(expr) => { + /// /* ... */ + /// } + /// Expr::Cast(expr) => { + /// /* ... */ + /// } + /// Expr::If(expr) => { + /// /* ... */ + /// } + /// + /// /* ... */ + /// # _ => {} + /// # } + /// # } + /// ``` + /// + /// We begin with a variable `expr` of type `Expr` that has no fields + /// (because it is an enum), and by matching on it and rebinding a variable + /// with the same name `expr` we effectively imbue our variable with all of + /// the data fields provided by the variant that it turned out to be. So for + /// example above if we ended up in the `MethodCall` case then we get to use + /// `expr.receiver`, `expr.args` etc; if we ended up in the `If` case we get + /// to use `expr.cond`, `expr.then_branch`, `expr.else_branch`. + /// + /// This approach avoids repeating the variant names twice on every line. + /// + /// ``` + /// # use syn::{Expr, ExprMethodCall}; + /// # + /// # fn example(expr: Expr) { + /// // Repetitive; recommend not doing this. + /// match expr { + /// Expr::MethodCall(ExprMethodCall { method, args, .. }) => { + /// # } + /// # _ => {} + /// # } + /// # } + /// ``` + /// + /// In general, the name to which a syntax tree enum variant is bound should + /// be a suitable name for the complete syntax tree enum type. + /// + /// ``` + /// # use syn::{Expr, ExprField}; + /// # + /// # fn example(discriminant: ExprField) { + /// // Binding is called `base` which is the name I would use if I were + /// // assigning `*discriminant.base` without an `if let`. + /// if let Expr::Tuple(base) = *discriminant.base { + /// # } + /// # } + /// ``` + /// + /// A sign that you may not be choosing the right variable names is if you + /// see names getting repeated in your code, like accessing + /// `receiver.receiver` or `pat.pat` or `cond.cond`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum Expr { + /// A slice literal expression: `[a, b, c, d]`. + Array(ExprArray), + + /// An assignment expression: `a = compute()`. + Assign(ExprAssign), + + /// An async block: `async { ... }`. + Async(ExprAsync), + + /// An await expression: `fut.await`. + Await(ExprAwait), + + /// A binary operation: `a + b`, `a += b`. + Binary(ExprBinary), + + /// A blocked scope: `{ ... }`. + Block(ExprBlock), + + /// A `break`, with an optional label to break and an optional + /// expression. + Break(ExprBreak), + + /// A function call expression: `invoke(a, b)`. + Call(ExprCall), + + /// A cast expression: `foo as f64`. + Cast(ExprCast), + + /// A closure expression: `|a, b| a + b`. + Closure(ExprClosure), + + /// A const block: `const { ... }`. + Const(ExprConst), + + /// A `continue`, with an optional label. + Continue(ExprContinue), + + /// Access of a named struct field (`obj.k`) or unnamed tuple struct + /// field (`obj.0`). + Field(ExprField), + + /// A for loop: `for pat in expr { ... }`. + ForLoop(ExprForLoop), + + /// An expression contained within invisible delimiters. + /// + /// This variant is important for faithfully representing the precedence + /// of expressions and is related to `None`-delimited spans in a + /// `TokenStream`. + Group(ExprGroup), + + /// An `if` expression with an optional `else` block: `if expr { ... } + /// else { ... }`. + /// + /// The `else` branch expression may only be an `If` or `Block` + /// expression, not any of the other types of expression. + If(ExprIf), + + /// A square bracketed indexing expression: `vector[2]`. + Index(ExprIndex), + + /// The inferred value of a const generic argument, denoted `_`. + Infer(ExprInfer), + + /// A `let` guard: `let Some(x) = opt`. + Let(ExprLet), + + /// A literal in place of an expression: `1`, `"foo"`. + Lit(ExprLit), + + /// Conditionless loop: `loop { ... }`. + Loop(ExprLoop), + + /// A macro invocation expression: `format!("{}", q)`. + Macro(ExprMacro), + + /// A `match` expression: `match n { Some(n) => {}, None => {} }`. + Match(ExprMatch), + + /// A method call expression: `x.foo::<T>(a, b)`. + MethodCall(ExprMethodCall), + + /// A parenthesized expression: `(a + b)`. + Paren(ExprParen), + + /// A path like `std::mem::replace` possibly containing generic + /// parameters and a qualified self-type. + /// + /// A plain identifier like `x` is a path of length 1. + Path(ExprPath), + + /// A range expression: `1..2`, `1..`, `..2`, `1..=2`, `..=2`. + Range(ExprRange), + + /// A referencing operation: `&a` or `&mut a`. + Reference(ExprReference), + + /// An array literal constructed from one repeated element: `[0u8; N]`. + Repeat(ExprRepeat), + + /// A `return`, with an optional value to be returned. + Return(ExprReturn), + + /// A struct literal expression: `Point { x: 1, y: 1 }`. + /// + /// The `rest` provides the value of the remaining fields as in `S { a: + /// 1, b: 1, ..rest }`. + Struct(ExprStruct), + + /// A try-expression: `expr?`. + Try(ExprTry), + + /// A try block: `try { ... }`. + TryBlock(ExprTryBlock), + + /// A tuple expression: `(a, b, c, d)`. + Tuple(ExprTuple), + + /// A unary operation: `!x`, `*x`. + Unary(ExprUnary), + + /// An unsafe block: `unsafe { ... }`. + Unsafe(ExprUnsafe), + + /// Tokens in expression position not interpreted by Syn. + Verbatim(TokenStream), + + /// A while loop: `while expr { ... }`. + While(ExprWhile), + + /// A yield expression: `yield expr`. + Yield(ExprYield), + + // For testing exhaustiveness in downstream code, use the following idiom: + // + // match expr { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // + // Expr::Array(expr) => {...} + // Expr::Assign(expr) => {...} + // ... + // Expr::Yield(expr) => {...} + // + // _ => { /* some sane fallback */ } + // } + // + // This way we fail your tests but don't break your library when adding + // a variant. You will be notified by a test failure when a variant is + // added, so that you can add code to handle it, but your library will + // continue to compile and work for downstream users in the interim. + } +} + +ast_struct! { + /// A slice literal expression: `[a, b, c, d]`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprArray #full { + pub attrs: Vec<Attribute>, + pub bracket_token: token::Bracket, + pub elems: Punctuated<Expr, Token![,]>, + } +} + +ast_struct! { + /// An assignment expression: `a = compute()`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprAssign #full { + pub attrs: Vec<Attribute>, + pub left: Box<Expr>, + pub eq_token: Token![=], + pub right: Box<Expr>, + } +} + +ast_struct! { + /// An async block: `async { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprAsync #full { + pub attrs: Vec<Attribute>, + pub async_token: Token![async], + pub capture: Option<Token![move]>, + pub block: Block, + } +} + +ast_struct! { + /// An await expression: `fut.await`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprAwait #full { + pub attrs: Vec<Attribute>, + pub base: Box<Expr>, + pub dot_token: Token![.], + pub await_token: Token![await], + } +} + +ast_struct! { + /// A binary operation: `a + b`, `a += b`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprBinary { + pub attrs: Vec<Attribute>, + pub left: Box<Expr>, + pub op: BinOp, + pub right: Box<Expr>, + } +} + +ast_struct! { + /// A blocked scope: `{ ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprBlock #full { + pub attrs: Vec<Attribute>, + pub label: Option<Label>, + pub block: Block, + } +} + +ast_struct! { + /// A `break`, with an optional label to break and an optional + /// expression. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprBreak #full { + pub attrs: Vec<Attribute>, + pub break_token: Token![break], + pub label: Option<Lifetime>, + pub expr: Option<Box<Expr>>, + } +} + +ast_struct! { + /// A function call expression: `invoke(a, b)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprCall { + pub attrs: Vec<Attribute>, + pub func: Box<Expr>, + pub paren_token: token::Paren, + pub args: Punctuated<Expr, Token![,]>, + } +} + +ast_struct! { + /// A cast expression: `foo as f64`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprCast { + pub attrs: Vec<Attribute>, + pub expr: Box<Expr>, + pub as_token: Token![as], + pub ty: Box<Type>, + } +} + +ast_struct! { + /// A closure expression: `|a, b| a + b`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprClosure #full { + pub attrs: Vec<Attribute>, + pub lifetimes: Option<BoundLifetimes>, + pub constness: Option<Token![const]>, + pub movability: Option<Token![static]>, + pub asyncness: Option<Token![async]>, + pub capture: Option<Token![move]>, + pub or1_token: Token![|], + pub inputs: Punctuated<Pat, Token![,]>, + pub or2_token: Token![|], + pub output: ReturnType, + pub body: Box<Expr>, + } +} + +ast_struct! { + /// A const block: `const { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprConst #full { + pub attrs: Vec<Attribute>, + pub const_token: Token![const], + pub block: Block, + } +} + +ast_struct! { + /// A `continue`, with an optional label. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprContinue #full { + pub attrs: Vec<Attribute>, + pub continue_token: Token![continue], + pub label: Option<Lifetime>, + } +} + +ast_struct! { + /// Access of a named struct field (`obj.k`) or unnamed tuple struct + /// field (`obj.0`). + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprField { + pub attrs: Vec<Attribute>, + pub base: Box<Expr>, + pub dot_token: Token![.], + pub member: Member, + } +} + +ast_struct! { + /// A for loop: `for pat in expr { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprForLoop #full { + pub attrs: Vec<Attribute>, + pub label: Option<Label>, + pub for_token: Token![for], + pub pat: Box<Pat>, + pub in_token: Token![in], + pub expr: Box<Expr>, + pub body: Block, + } +} + +ast_struct! { + /// An expression contained within invisible delimiters. + /// + /// This variant is important for faithfully representing the precedence + /// of expressions and is related to `None`-delimited spans in a + /// `TokenStream`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprGroup { + pub attrs: Vec<Attribute>, + pub group_token: token::Group, + pub expr: Box<Expr>, + } +} + +ast_struct! { + /// An `if` expression with an optional `else` block: `if expr { ... } + /// else { ... }`. + /// + /// The `else` branch expression may only be an `If` or `Block` + /// expression, not any of the other types of expression. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprIf #full { + pub attrs: Vec<Attribute>, + pub if_token: Token![if], + pub cond: Box<Expr>, + pub then_branch: Block, + pub else_branch: Option<(Token![else], Box<Expr>)>, + } +} + +ast_struct! { + /// A square bracketed indexing expression: `vector[2]`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprIndex { + pub attrs: Vec<Attribute>, + pub expr: Box<Expr>, + pub bracket_token: token::Bracket, + pub index: Box<Expr>, + } +} + +ast_struct! { + /// The inferred value of a const generic argument, denoted `_`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprInfer #full { + pub attrs: Vec<Attribute>, + pub underscore_token: Token![_], + } +} + +ast_struct! { + /// A `let` guard: `let Some(x) = opt`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprLet #full { + pub attrs: Vec<Attribute>, + pub let_token: Token![let], + pub pat: Box<Pat>, + pub eq_token: Token![=], + pub expr: Box<Expr>, + } +} + +ast_struct! { + /// A literal in place of an expression: `1`, `"foo"`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprLit { + pub attrs: Vec<Attribute>, + pub lit: Lit, + } +} + +ast_struct! { + /// Conditionless loop: `loop { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprLoop #full { + pub attrs: Vec<Attribute>, + pub label: Option<Label>, + pub loop_token: Token![loop], + pub body: Block, + } +} + +ast_struct! { + /// A macro invocation expression: `format!("{}", q)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprMacro { + pub attrs: Vec<Attribute>, + pub mac: Macro, + } +} + +ast_struct! { + /// A `match` expression: `match n { Some(n) => {}, None => {} }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprMatch #full { + pub attrs: Vec<Attribute>, + pub match_token: Token![match], + pub expr: Box<Expr>, + pub brace_token: token::Brace, + pub arms: Vec<Arm>, + } +} + +ast_struct! { + /// A method call expression: `x.foo::<T>(a, b)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprMethodCall { + pub attrs: Vec<Attribute>, + pub receiver: Box<Expr>, + pub dot_token: Token![.], + pub method: Ident, + pub turbofish: Option<AngleBracketedGenericArguments>, + pub paren_token: token::Paren, + pub args: Punctuated<Expr, Token![,]>, + } +} + +ast_struct! { + /// A parenthesized expression: `(a + b)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprParen { + pub attrs: Vec<Attribute>, + pub paren_token: token::Paren, + pub expr: Box<Expr>, + } +} + +ast_struct! { + /// A path like `std::mem::replace` possibly containing generic + /// parameters and a qualified self-type. + /// + /// A plain identifier like `x` is a path of length 1. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprPath { + pub attrs: Vec<Attribute>, + pub qself: Option<QSelf>, + pub path: Path, + } +} + +ast_struct! { + /// A range expression: `1..2`, `1..`, `..2`, `1..=2`, `..=2`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprRange #full { + pub attrs: Vec<Attribute>, + pub start: Option<Box<Expr>>, + pub limits: RangeLimits, + pub end: Option<Box<Expr>>, + } +} + +ast_struct! { + /// A referencing operation: `&a` or `&mut a`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprReference { + pub attrs: Vec<Attribute>, + pub and_token: Token![&], + pub mutability: Option<Token![mut]>, + pub expr: Box<Expr>, + } +} + +ast_struct! { + /// An array literal constructed from one repeated element: `[0u8; N]`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprRepeat #full { + pub attrs: Vec<Attribute>, + pub bracket_token: token::Bracket, + pub expr: Box<Expr>, + pub semi_token: Token![;], + pub len: Box<Expr>, + } +} + +ast_struct! { + /// A `return`, with an optional value to be returned. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprReturn #full { + pub attrs: Vec<Attribute>, + pub return_token: Token![return], + pub expr: Option<Box<Expr>>, + } +} + +ast_struct! { + /// A struct literal expression: `Point { x: 1, y: 1 }`. + /// + /// The `rest` provides the value of the remaining fields as in `S { a: + /// 1, b: 1, ..rest }`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprStruct { + pub attrs: Vec<Attribute>, + pub qself: Option<QSelf>, + pub path: Path, + pub brace_token: token::Brace, + pub fields: Punctuated<FieldValue, Token![,]>, + pub dot2_token: Option<Token![..]>, + pub rest: Option<Box<Expr>>, + } +} + +ast_struct! { + /// A try-expression: `expr?`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprTry #full { + pub attrs: Vec<Attribute>, + pub expr: Box<Expr>, + pub question_token: Token![?], + } +} + +ast_struct! { + /// A try block: `try { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprTryBlock #full { + pub attrs: Vec<Attribute>, + pub try_token: Token![try], + pub block: Block, + } +} + +ast_struct! { + /// A tuple expression: `(a, b, c, d)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprTuple #full { + pub attrs: Vec<Attribute>, + pub paren_token: token::Paren, + pub elems: Punctuated<Expr, Token![,]>, + } +} + +ast_struct! { + /// A unary operation: `!x`, `*x`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprUnary { + pub attrs: Vec<Attribute>, + pub op: UnOp, + pub expr: Box<Expr>, + } +} + +ast_struct! { + /// An unsafe block: `unsafe { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprUnsafe #full { + pub attrs: Vec<Attribute>, + pub unsafe_token: Token![unsafe], + pub block: Block, + } +} + +ast_struct! { + /// A while loop: `while expr { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprWhile #full { + pub attrs: Vec<Attribute>, + pub label: Option<Label>, + pub while_token: Token![while], + pub cond: Box<Expr>, + pub body: Block, + } +} + +ast_struct! { + /// A yield expression: `yield expr`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ExprYield #full { + pub attrs: Vec<Attribute>, + pub yield_token: Token![yield], + pub expr: Option<Box<Expr>>, + } +} + +impl Expr { + #[cfg(feature = "parsing")] + const DUMMY: Self = Expr::Path(ExprPath { + attrs: Vec::new(), + qself: None, + path: Path { + leading_colon: None, + segments: Punctuated::new(), + }, + }); + + #[cfg(all(feature = "parsing", feature = "full"))] + pub(crate) fn replace_attrs(&mut self, new: Vec<Attribute>) -> Vec<Attribute> { + match self { + Expr::Array(ExprArray { attrs, .. }) + | Expr::Assign(ExprAssign { attrs, .. }) + | Expr::Async(ExprAsync { attrs, .. }) + | Expr::Await(ExprAwait { attrs, .. }) + | Expr::Binary(ExprBinary { attrs, .. }) + | Expr::Block(ExprBlock { attrs, .. }) + | Expr::Break(ExprBreak { attrs, .. }) + | Expr::Call(ExprCall { attrs, .. }) + | Expr::Cast(ExprCast { attrs, .. }) + | Expr::Closure(ExprClosure { attrs, .. }) + | Expr::Const(ExprConst { attrs, .. }) + | Expr::Continue(ExprContinue { attrs, .. }) + | Expr::Field(ExprField { attrs, .. }) + | Expr::ForLoop(ExprForLoop { attrs, .. }) + | Expr::Group(ExprGroup { attrs, .. }) + | Expr::If(ExprIf { attrs, .. }) + | Expr::Index(ExprIndex { attrs, .. }) + | Expr::Infer(ExprInfer { attrs, .. }) + | Expr::Let(ExprLet { attrs, .. }) + | Expr::Lit(ExprLit { attrs, .. }) + | Expr::Loop(ExprLoop { attrs, .. }) + | Expr::Macro(ExprMacro { attrs, .. }) + | Expr::Match(ExprMatch { attrs, .. }) + | Expr::MethodCall(ExprMethodCall { attrs, .. }) + | Expr::Paren(ExprParen { attrs, .. }) + | Expr::Path(ExprPath { attrs, .. }) + | Expr::Range(ExprRange { attrs, .. }) + | Expr::Reference(ExprReference { attrs, .. }) + | Expr::Repeat(ExprRepeat { attrs, .. }) + | Expr::Return(ExprReturn { attrs, .. }) + | Expr::Struct(ExprStruct { attrs, .. }) + | Expr::Try(ExprTry { attrs, .. }) + | Expr::TryBlock(ExprTryBlock { attrs, .. }) + | Expr::Tuple(ExprTuple { attrs, .. }) + | Expr::Unary(ExprUnary { attrs, .. }) + | Expr::Unsafe(ExprUnsafe { attrs, .. }) + | Expr::While(ExprWhile { attrs, .. }) + | Expr::Yield(ExprYield { attrs, .. }) => mem::replace(attrs, new), + Expr::Verbatim(_) => Vec::new(), + } + } +} + +ast_enum! { + /// A struct or tuple struct field accessed in a struct literal or field + /// expression. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum Member { + /// A named field like `self.x`. + Named(Ident), + /// An unnamed field like `self.0`. + Unnamed(Index), + } +} + +impl From<Ident> for Member { + fn from(ident: Ident) -> Member { + Member::Named(ident) + } +} + +impl From<Index> for Member { + fn from(index: Index) -> Member { + Member::Unnamed(index) + } +} + +impl From<usize> for Member { + fn from(index: usize) -> Member { + Member::Unnamed(Index::from(index)) + } +} + +impl Eq for Member {} + +impl PartialEq for Member { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Member::Named(this), Member::Named(other)) => this == other, + (Member::Unnamed(this), Member::Unnamed(other)) => this == other, + _ => false, + } + } +} + +impl Hash for Member { + fn hash<H: Hasher>(&self, state: &mut H) { + match self { + Member::Named(m) => m.hash(state), + Member::Unnamed(m) => m.hash(state), + } + } +} + +#[cfg(feature = "printing")] +impl IdentFragment for Member { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self { + Member::Named(m) => Display::fmt(m, formatter), + Member::Unnamed(m) => Display::fmt(&m.index, formatter), + } + } + + fn span(&self) -> Option<Span> { + match self { + Member::Named(m) => Some(m.span()), + Member::Unnamed(m) => Some(m.span), + } + } +} + +ast_struct! { + /// The index of an unnamed tuple struct field. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Index { + pub index: u32, + pub span: Span, + } +} + +impl From<usize> for Index { + fn from(index: usize) -> Index { + assert!(index < u32::max_value() as usize); + Index { + index: index as u32, + span: Span::call_site(), + } + } +} + +impl Eq for Index {} + +impl PartialEq for Index { + fn eq(&self, other: &Self) -> bool { + self.index == other.index + } +} + +impl Hash for Index { + fn hash<H: Hasher>(&self, state: &mut H) { + self.index.hash(state); + } +} + +#[cfg(feature = "printing")] +impl IdentFragment for Index { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.index, formatter) + } + + fn span(&self) -> Option<Span> { + Some(self.span) + } +} + +ast_struct! { + /// A field-value pair in a struct literal. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct FieldValue { + pub attrs: Vec<Attribute>, + pub member: Member, + + /// The colon in `Struct { x: x }`. If written in shorthand like + /// `Struct { x }`, there is no colon. + pub colon_token: Option<Token![:]>, + + pub expr: Expr, + } +} + +#[cfg(feature = "full")] +ast_struct! { + /// A lifetime labeling a `for`, `while`, or `loop`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct Label { + pub name: Lifetime, + pub colon_token: Token![:], + } +} + +#[cfg(feature = "full")] +ast_struct! { + /// One arm of a `match` expression: `0..=10 => { return true; }`. + /// + /// As in: + /// + /// ``` + /// # fn f() -> bool { + /// # let n = 0; + /// match n { + /// 0..=10 => { + /// return true; + /// } + /// // ... + /// # _ => {} + /// } + /// # false + /// # } + /// ``` + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct Arm { + pub attrs: Vec<Attribute>, + pub pat: Pat, + pub guard: Option<(Token![if], Box<Expr>)>, + pub fat_arrow_token: Token![=>], + pub body: Box<Expr>, + pub comma: Option<Token![,]>, + } +} + +#[cfg(feature = "full")] +ast_enum! { + /// Limit types of a range, inclusive or exclusive. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub enum RangeLimits { + /// Inclusive at the beginning, exclusive at the end. + HalfOpen(Token![..]), + /// Inclusive at the beginning and end. + Closed(Token![..=]), + } +} + +#[cfg(any(feature = "parsing", feature = "printing"))] +#[cfg(feature = "full")] +pub(crate) fn requires_terminator(expr: &Expr) -> bool { + // see https://github.com/rust-lang/rust/blob/9a19e7604/compiler/rustc_ast/src/util/classify.rs#L7-L26 + match expr { + Expr::If(_) + | Expr::Match(_) + | Expr::Block(_) | Expr::Unsafe(_) // both under ExprKind::Block in rustc + | Expr::While(_) + | Expr::Loop(_) + | Expr::ForLoop(_) + | Expr::TryBlock(_) + | Expr::Const(_) => false, + Expr::Array(_) + | Expr::Assign(_) + | Expr::Async(_) + | Expr::Await(_) + | Expr::Binary(_) + | Expr::Break(_) + | Expr::Call(_) + | Expr::Cast(_) + | Expr::Closure(_) + | Expr::Continue(_) + | Expr::Field(_) + | Expr::Group(_) + | Expr::Index(_) + | Expr::Infer(_) + | Expr::Let(_) + | Expr::Lit(_) + | Expr::Macro(_) + | Expr::MethodCall(_) + | Expr::Paren(_) + | Expr::Path(_) + | Expr::Range(_) + | Expr::Reference(_) + | Expr::Repeat(_) + | Expr::Return(_) + | Expr::Struct(_) + | Expr::Try(_) + | Expr::Tuple(_) + | Expr::Unary(_) + | Expr::Yield(_) + | Expr::Verbatim(_) => true + } +} + +#[cfg(feature = "parsing")] +mod precedence { + use super::BinOp; + + pub(crate) enum Precedence { + Any, + Assign, + Range, + Or, + And, + Compare, + BitOr, + BitXor, + BitAnd, + Shift, + Arithmetic, + Term, + Cast, + } + + impl Precedence { + pub(crate) fn of(op: &BinOp) -> Self { + match op { + BinOp::Add(_) | BinOp::Sub(_) => Precedence::Arithmetic, + BinOp::Mul(_) | BinOp::Div(_) | BinOp::Rem(_) => Precedence::Term, + BinOp::And(_) => Precedence::And, + BinOp::Or(_) => Precedence::Or, + BinOp::BitXor(_) => Precedence::BitXor, + BinOp::BitAnd(_) => Precedence::BitAnd, + BinOp::BitOr(_) => Precedence::BitOr, + BinOp::Shl(_) | BinOp::Shr(_) => Precedence::Shift, + BinOp::Eq(_) + | BinOp::Lt(_) + | BinOp::Le(_) + | BinOp::Ne(_) + | BinOp::Ge(_) + | BinOp::Gt(_) => Precedence::Compare, + BinOp::AddAssign(_) + | BinOp::SubAssign(_) + | BinOp::MulAssign(_) + | BinOp::DivAssign(_) + | BinOp::RemAssign(_) + | BinOp::BitXorAssign(_) + | BinOp::BitAndAssign(_) + | BinOp::BitOrAssign(_) + | BinOp::ShlAssign(_) + | BinOp::ShrAssign(_) => Precedence::Assign, + } + } + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::precedence::Precedence; + use super::*; + #[cfg(feature = "full")] + use crate::ext::IdentExt as _; + use crate::parse::discouraged::Speculative as _; + #[cfg(feature = "full")] + use crate::parse::ParseBuffer; + use crate::parse::{Parse, ParseStream, Result}; + use crate::path; + use std::cmp::Ordering; + + mod kw { + crate::custom_keyword!(builtin); + crate::custom_keyword!(raw); + } + + // When we're parsing expressions which occur before blocks, like in an if + // statement's condition, we cannot parse a struct literal. + // + // Struct literals are ambiguous in certain positions + // https://github.com/rust-lang/rfcs/pull/92 + #[cfg(feature = "full")] + pub(crate) struct AllowStruct(bool); + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Expr { + fn parse(input: ParseStream) -> Result<Self> { + ambiguous_expr( + input, + #[cfg(feature = "full")] + AllowStruct(true), + ) + } + } + + impl Expr { + /// An alternative to the primary `Expr::parse` parser (from the + /// [`Parse`] trait) for ambiguous syntactic positions in which a + /// trailing brace should not be taken as part of the expression. + /// + /// Rust grammar has an ambiguity where braces sometimes turn a path + /// expression into a struct initialization and sometimes do not. In the + /// following code, the expression `S {}` is one expression. Presumably + /// there is an empty struct `struct S {}` defined somewhere which it is + /// instantiating. + /// + /// ``` + /// # struct S; + /// # impl std::ops::Deref for S { + /// # type Target = bool; + /// # fn deref(&self) -> &Self::Target { + /// # &true + /// # } + /// # } + /// let _ = *S {}; + /// + /// // parsed by rustc as: `*(S {})` + /// ``` + /// + /// We would want to parse the above using `Expr::parse` after the `=` + /// token. + /// + /// But in the following, `S {}` is *not* a struct init expression. + /// + /// ``` + /// # const S: &bool = &true; + /// if *S {} {} + /// + /// // parsed by rustc as: + /// // + /// // if (*S) { + /// // /* empty block */ + /// // } + /// // { + /// // /* another empty block */ + /// // } + /// ``` + /// + /// For that reason we would want to parse if-conditions using + /// `Expr::parse_without_eager_brace` after the `if` token. Same for + /// similar syntactic positions such as the condition expr after a + /// `while` token or the expr at the top of a `match`. + /// + /// The Rust grammar's choices around which way this ambiguity is + /// resolved at various syntactic positions is fairly arbitrary. Really + /// either parse behavior could work in most positions, and language + /// designers just decide each case based on which is more likely to be + /// what the programmer had in mind most of the time. + /// + /// ``` + /// # struct S; + /// # fn doc() -> S { + /// if return S {} {} + /// # unreachable!() + /// # } + /// + /// // parsed by rustc as: + /// // + /// // if (return (S {})) { + /// // } + /// // + /// // but could equally well have been this other arbitrary choice: + /// // + /// // if (return S) { + /// // } + /// // {} + /// ``` + /// + /// Note the grammar ambiguity on trailing braces is distinct from + /// precedence and is not captured by assigning a precedence level to + /// the braced struct init expr in relation to other operators. This can + /// be illustrated by `return 0..S {}` vs `match 0..S {}`. The former + /// parses as `return (0..(S {}))` implying tighter precedence for + /// struct init than `..`, while the latter parses as `match (0..S) {}` + /// implying tighter precedence for `..` than struct init, a + /// contradiction. + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "full", feature = "parsing"))))] + pub fn parse_without_eager_brace(input: ParseStream) -> Result<Expr> { + ambiguous_expr(input, AllowStruct(false)) + } + } + + #[cfg(feature = "full")] + impl Copy for AllowStruct {} + + #[cfg(feature = "full")] + impl Clone for AllowStruct { + fn clone(&self) -> Self { + *self + } + } + + impl Copy for Precedence {} + + impl Clone for Precedence { + fn clone(&self) -> Self { + *self + } + } + + impl PartialEq for Precedence { + fn eq(&self, other: &Self) -> bool { + *self as u8 == *other as u8 + } + } + + impl PartialOrd for Precedence { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + let this = *self as u8; + let other = *other as u8; + Some(this.cmp(&other)) + } + } + + #[cfg(feature = "full")] + fn can_begin_expr(input: ParseStream) -> bool { + input.peek(Ident::peek_any) // value name or keyword + || input.peek(token::Paren) // tuple + || input.peek(token::Bracket) // array + || input.peek(token::Brace) // block + || input.peek(Lit) // literal + || input.peek(Token![!]) && !input.peek(Token![!=]) // operator not + || input.peek(Token![-]) && !input.peek(Token![-=]) && !input.peek(Token![->]) // unary minus + || input.peek(Token![*]) && !input.peek(Token![*=]) // dereference + || input.peek(Token![|]) && !input.peek(Token![|=]) // closure + || input.peek(Token![&]) && !input.peek(Token![&=]) // reference + || input.peek(Token![..]) // range notation + || input.peek(Token![<]) && !input.peek(Token![<=]) && !input.peek(Token![<<=]) // associated path + || input.peek(Token![::]) // global path + || input.peek(Lifetime) // labeled loop + || input.peek(Token![#]) // expression attributes + } + + #[cfg(feature = "full")] + fn parse_expr( + input: ParseStream, + mut lhs: Expr, + allow_struct: AllowStruct, + base: Precedence, + ) -> Result<Expr> { + loop { + let ahead = input.fork(); + if let Some(op) = match ahead.parse::<BinOp>() { + Ok(op) if Precedence::of(&op) >= base => Some(op), + _ => None, + } { + input.advance_to(&ahead); + let precedence = Precedence::of(&op); + let mut rhs = unary_expr(input, allow_struct)?; + loop { + let next = peek_precedence(input); + if next > precedence || next == precedence && precedence == Precedence::Assign { + rhs = parse_expr(input, rhs, allow_struct, next)?; + } else { + break; + } + } + lhs = Expr::Binary(ExprBinary { + attrs: Vec::new(), + left: Box::new(lhs), + op, + right: Box::new(rhs), + }); + } else if Precedence::Assign >= base + && input.peek(Token![=]) + && !input.peek(Token![==]) + && !input.peek(Token![=>]) + { + let eq_token: Token![=] = input.parse()?; + let mut rhs = unary_expr(input, allow_struct)?; + loop { + let next = peek_precedence(input); + if next >= Precedence::Assign { + rhs = parse_expr(input, rhs, allow_struct, next)?; + } else { + break; + } + } + lhs = Expr::Assign(ExprAssign { + attrs: Vec::new(), + left: Box::new(lhs), + eq_token, + right: Box::new(rhs), + }); + } else if Precedence::Range >= base && input.peek(Token![..]) { + let limits: RangeLimits = input.parse()?; + let rhs = if matches!(limits, RangeLimits::HalfOpen(_)) + && (input.is_empty() + || input.peek(Token![,]) + || input.peek(Token![;]) + || input.peek(Token![.]) && !input.peek(Token![..]) + || !allow_struct.0 && input.peek(token::Brace)) + { + None + } else { + let mut rhs = unary_expr(input, allow_struct)?; + loop { + let next = peek_precedence(input); + if next > Precedence::Range { + rhs = parse_expr(input, rhs, allow_struct, next)?; + } else { + break; + } + } + Some(rhs) + }; + lhs = Expr::Range(ExprRange { + attrs: Vec::new(), + start: Some(Box::new(lhs)), + limits, + end: rhs.map(Box::new), + }); + } else if Precedence::Cast >= base && input.peek(Token![as]) { + let as_token: Token![as] = input.parse()?; + let allow_plus = false; + let allow_group_generic = false; + let ty = ty::parsing::ambig_ty(input, allow_plus, allow_group_generic)?; + check_cast(input)?; + lhs = Expr::Cast(ExprCast { + attrs: Vec::new(), + expr: Box::new(lhs), + as_token, + ty: Box::new(ty), + }); + } else { + break; + } + } + Ok(lhs) + } + + #[cfg(not(feature = "full"))] + fn parse_expr(input: ParseStream, mut lhs: Expr, base: Precedence) -> Result<Expr> { + loop { + let ahead = input.fork(); + if let Some(op) = match ahead.parse::<BinOp>() { + Ok(op) if Precedence::of(&op) >= base => Some(op), + _ => None, + } { + input.advance_to(&ahead); + let precedence = Precedence::of(&op); + let mut rhs = unary_expr(input)?; + loop { + let next = peek_precedence(input); + if next > precedence || next == precedence && precedence == Precedence::Assign { + rhs = parse_expr(input, rhs, next)?; + } else { + break; + } + } + lhs = Expr::Binary(ExprBinary { + attrs: Vec::new(), + left: Box::new(lhs), + op, + right: Box::new(rhs), + }); + } else if Precedence::Cast >= base && input.peek(Token![as]) { + let as_token: Token![as] = input.parse()?; + let allow_plus = false; + let allow_group_generic = false; + let ty = ty::parsing::ambig_ty(input, allow_plus, allow_group_generic)?; + check_cast(input)?; + lhs = Expr::Cast(ExprCast { + attrs: Vec::new(), + expr: Box::new(lhs), + as_token, + ty: Box::new(ty), + }); + } else { + break; + } + } + Ok(lhs) + } + + fn peek_precedence(input: ParseStream) -> Precedence { + if let Ok(op) = input.fork().parse() { + Precedence::of(&op) + } else if input.peek(Token![=]) && !input.peek(Token![=>]) { + Precedence::Assign + } else if input.peek(Token![..]) { + Precedence::Range + } else if input.peek(Token![as]) { + Precedence::Cast + } else { + Precedence::Any + } + } + + // Parse an arbitrary expression. + fn ambiguous_expr( + input: ParseStream, + #[cfg(feature = "full")] allow_struct: AllowStruct, + ) -> Result<Expr> { + let lhs = unary_expr( + input, + #[cfg(feature = "full")] + allow_struct, + )?; + parse_expr( + input, + lhs, + #[cfg(feature = "full")] + allow_struct, + Precedence::Any, + ) + } + + #[cfg(feature = "full")] + fn expr_attrs(input: ParseStream) -> Result<Vec<Attribute>> { + let mut attrs = Vec::new(); + while !input.peek(token::Group) && input.peek(Token![#]) { + attrs.push(input.call(attr::parsing::single_parse_outer)?); + } + Ok(attrs) + } + + // <UnOp> <trailer> + // & <trailer> + // &mut <trailer> + // box <trailer> + #[cfg(feature = "full")] + fn unary_expr(input: ParseStream, allow_struct: AllowStruct) -> Result<Expr> { + let begin = input.fork(); + let attrs = input.call(expr_attrs)?; + if input.peek(token::Group) { + return trailer_expr(begin, attrs, input, allow_struct); + } + + if input.peek(Token![&]) { + let and_token: Token![&] = input.parse()?; + let raw: Option<kw::raw> = if input.peek(kw::raw) + && (input.peek2(Token![mut]) || input.peek2(Token![const])) + { + Some(input.parse()?) + } else { + None + }; + let mutability: Option<Token![mut]> = input.parse()?; + if raw.is_some() && mutability.is_none() { + input.parse::<Token![const]>()?; + } + let expr = Box::new(unary_expr(input, allow_struct)?); + if raw.is_some() { + Ok(Expr::Verbatim(verbatim::between(&begin, input))) + } else { + Ok(Expr::Reference(ExprReference { + attrs, + and_token, + mutability, + expr, + })) + } + } else if input.peek(Token![*]) || input.peek(Token![!]) || input.peek(Token![-]) { + expr_unary(input, attrs, allow_struct).map(Expr::Unary) + } else { + trailer_expr(begin, attrs, input, allow_struct) + } + } + + #[cfg(not(feature = "full"))] + fn unary_expr(input: ParseStream) -> Result<Expr> { + if input.peek(Token![&]) { + Ok(Expr::Reference(ExprReference { + attrs: Vec::new(), + and_token: input.parse()?, + mutability: input.parse()?, + expr: Box::new(unary_expr(input)?), + })) + } else if input.peek(Token![*]) || input.peek(Token![!]) || input.peek(Token![-]) { + Ok(Expr::Unary(ExprUnary { + attrs: Vec::new(), + op: input.parse()?, + expr: Box::new(unary_expr(input)?), + })) + } else { + trailer_expr(input) + } + } + + // <atom> (..<args>) ... + // <atom> . <ident> (..<args>) ... + // <atom> . <ident> ... + // <atom> . <lit> ... + // <atom> [ <expr> ] ... + // <atom> ? ... + #[cfg(feature = "full")] + fn trailer_expr( + begin: ParseBuffer, + mut attrs: Vec<Attribute>, + input: ParseStream, + allow_struct: AllowStruct, + ) -> Result<Expr> { + let atom = atom_expr(input, allow_struct)?; + let mut e = trailer_helper(input, atom)?; + + if let Expr::Verbatim(tokens) = &mut e { + *tokens = verbatim::between(&begin, input); + } else { + let inner_attrs = e.replace_attrs(Vec::new()); + attrs.extend(inner_attrs); + e.replace_attrs(attrs); + } + + Ok(e) + } + + #[cfg(feature = "full")] + fn trailer_helper(input: ParseStream, mut e: Expr) -> Result<Expr> { + loop { + if input.peek(token::Paren) { + let content; + e = Expr::Call(ExprCall { + attrs: Vec::new(), + func: Box::new(e), + paren_token: parenthesized!(content in input), + args: content.parse_terminated(Expr::parse, Token![,])?, + }); + } else if input.peek(Token![.]) + && !input.peek(Token![..]) + && match e { + Expr::Range(_) => false, + _ => true, + } + { + let mut dot_token: Token![.] = input.parse()?; + + let float_token: Option<LitFloat> = input.parse()?; + if let Some(float_token) = float_token { + if multi_index(&mut e, &mut dot_token, float_token)? { + continue; + } + } + + let await_token: Option<Token![await]> = input.parse()?; + if let Some(await_token) = await_token { + e = Expr::Await(ExprAwait { + attrs: Vec::new(), + base: Box::new(e), + dot_token, + await_token, + }); + continue; + } + + let member: Member = input.parse()?; + let turbofish = if member.is_named() && input.peek(Token![::]) { + Some(AngleBracketedGenericArguments::parse_turbofish(input)?) + } else { + None + }; + + if turbofish.is_some() || input.peek(token::Paren) { + if let Member::Named(method) = member { + let content; + e = Expr::MethodCall(ExprMethodCall { + attrs: Vec::new(), + receiver: Box::new(e), + dot_token, + method, + turbofish, + paren_token: parenthesized!(content in input), + args: content.parse_terminated(Expr::parse, Token![,])?, + }); + continue; + } + } + + e = Expr::Field(ExprField { + attrs: Vec::new(), + base: Box::new(e), + dot_token, + member, + }); + } else if input.peek(token::Bracket) { + let content; + e = Expr::Index(ExprIndex { + attrs: Vec::new(), + expr: Box::new(e), + bracket_token: bracketed!(content in input), + index: content.parse()?, + }); + } else if input.peek(Token![?]) { + e = Expr::Try(ExprTry { + attrs: Vec::new(), + expr: Box::new(e), + question_token: input.parse()?, + }); + } else { + break; + } + } + Ok(e) + } + + #[cfg(not(feature = "full"))] + fn trailer_expr(input: ParseStream) -> Result<Expr> { + let mut e = atom_expr(input)?; + + loop { + if input.peek(token::Paren) { + let content; + e = Expr::Call(ExprCall { + attrs: Vec::new(), + func: Box::new(e), + paren_token: parenthesized!(content in input), + args: content.parse_terminated(Expr::parse, Token![,])?, + }); + } else if input.peek(Token![.]) + && !input.peek(Token![..]) + && !input.peek2(Token![await]) + { + let mut dot_token: Token![.] = input.parse()?; + + let float_token: Option<LitFloat> = input.parse()?; + if let Some(float_token) = float_token { + if multi_index(&mut e, &mut dot_token, float_token)? { + continue; + } + } + + let member: Member = input.parse()?; + let turbofish = if member.is_named() && input.peek(Token![::]) { + let colon2_token: Token![::] = input.parse()?; + let turbofish = + AngleBracketedGenericArguments::do_parse(Some(colon2_token), input)?; + Some(turbofish) + } else { + None + }; + + if turbofish.is_some() || input.peek(token::Paren) { + if let Member::Named(method) = member { + let content; + e = Expr::MethodCall(ExprMethodCall { + attrs: Vec::new(), + receiver: Box::new(e), + dot_token, + method, + turbofish, + paren_token: parenthesized!(content in input), + args: content.parse_terminated(Expr::parse, Token![,])?, + }); + continue; + } + } + + e = Expr::Field(ExprField { + attrs: Vec::new(), + base: Box::new(e), + dot_token, + member, + }); + } else if input.peek(token::Bracket) { + let content; + e = Expr::Index(ExprIndex { + attrs: Vec::new(), + expr: Box::new(e), + bracket_token: bracketed!(content in input), + index: content.parse()?, + }); + } else { + break; + } + } + + Ok(e) + } + + // Parse all atomic expressions which don't have to worry about precedence + // interactions, as they are fully contained. + #[cfg(feature = "full")] + fn atom_expr(input: ParseStream, allow_struct: AllowStruct) -> Result<Expr> { + if input.peek(token::Group) { + expr_group(input, allow_struct) + } else if input.peek(Lit) { + input.parse().map(Expr::Lit) + } else if input.peek(Token![async]) + && (input.peek2(token::Brace) || input.peek2(Token![move]) && input.peek3(token::Brace)) + { + input.parse().map(Expr::Async) + } else if input.peek(Token![try]) && input.peek2(token::Brace) { + input.parse().map(Expr::TryBlock) + } else if input.peek(Token![|]) + || input.peek(Token![move]) + || input.peek(Token![for]) + && input.peek2(Token![<]) + && (input.peek3(Lifetime) || input.peek3(Token![>])) + || input.peek(Token![const]) && !input.peek2(token::Brace) + || input.peek(Token![static]) + || input.peek(Token![async]) && (input.peek2(Token![|]) || input.peek2(Token![move])) + { + expr_closure(input, allow_struct).map(Expr::Closure) + } else if input.peek(kw::builtin) && input.peek2(Token![#]) { + expr_builtin(input) + } else if input.peek(Ident) + || input.peek(Token![::]) + || input.peek(Token![<]) + || input.peek(Token![self]) + || input.peek(Token![Self]) + || input.peek(Token![super]) + || input.peek(Token![crate]) + || input.peek(Token![try]) && (input.peek2(Token![!]) || input.peek2(Token![::])) + { + path_or_macro_or_struct(input, allow_struct) + } else if input.peek(token::Paren) { + paren_or_tuple(input) + } else if input.peek(Token![break]) { + expr_break(input, allow_struct).map(Expr::Break) + } else if input.peek(Token![continue]) { + input.parse().map(Expr::Continue) + } else if input.peek(Token![return]) { + expr_return(input, allow_struct).map(Expr::Return) + } else if input.peek(token::Bracket) { + array_or_repeat(input) + } else if input.peek(Token![let]) { + input.parse().map(Expr::Let) + } else if input.peek(Token![if]) { + input.parse().map(Expr::If) + } else if input.peek(Token![while]) { + input.parse().map(Expr::While) + } else if input.peek(Token![for]) { + input.parse().map(Expr::ForLoop) + } else if input.peek(Token![loop]) { + input.parse().map(Expr::Loop) + } else if input.peek(Token![match]) { + input.parse().map(Expr::Match) + } else if input.peek(Token![yield]) { + input.parse().map(Expr::Yield) + } else if input.peek(Token![unsafe]) { + input.parse().map(Expr::Unsafe) + } else if input.peek(Token![const]) { + input.parse().map(Expr::Const) + } else if input.peek(token::Brace) { + input.parse().map(Expr::Block) + } else if input.peek(Token![..]) { + expr_range(input, allow_struct).map(Expr::Range) + } else if input.peek(Token![_]) { + input.parse().map(Expr::Infer) + } else if input.peek(Lifetime) { + atom_labeled(input) + } else { + Err(input.error("expected an expression")) + } + } + + #[cfg(feature = "full")] + fn atom_labeled(input: ParseStream) -> Result<Expr> { + let the_label: Label = input.parse()?; + let mut expr = if input.peek(Token![while]) { + Expr::While(input.parse()?) + } else if input.peek(Token![for]) { + Expr::ForLoop(input.parse()?) + } else if input.peek(Token![loop]) { + Expr::Loop(input.parse()?) + } else if input.peek(token::Brace) { + Expr::Block(input.parse()?) + } else { + return Err(input.error("expected loop or block expression")); + }; + match &mut expr { + Expr::While(ExprWhile { label, .. }) + | Expr::ForLoop(ExprForLoop { label, .. }) + | Expr::Loop(ExprLoop { label, .. }) + | Expr::Block(ExprBlock { label, .. }) => *label = Some(the_label), + _ => unreachable!(), + } + Ok(expr) + } + + #[cfg(not(feature = "full"))] + fn atom_expr(input: ParseStream) -> Result<Expr> { + if input.peek(token::Group) { + expr_group(input) + } else if input.peek(Lit) { + input.parse().map(Expr::Lit) + } else if input.peek(token::Paren) { + input.call(expr_paren).map(Expr::Paren) + } else if input.peek(Ident) + || input.peek(Token![::]) + || input.peek(Token![<]) + || input.peek(Token![self]) + || input.peek(Token![Self]) + || input.peek(Token![super]) + || input.peek(Token![crate]) + { + path_or_macro_or_struct(input) + } else if input.is_empty() { + Err(input.error("expected an expression")) + } else { + if input.peek(token::Brace) { + let scan = input.fork(); + let content; + braced!(content in scan); + if content.parse::<Expr>().is_ok() && content.is_empty() { + let expr_block = verbatim::between(input, &scan); + input.advance_to(&scan); + return Ok(Expr::Verbatim(expr_block)); + } + } + Err(input.error("unsupported expression; enable syn's features=[\"full\"]")) + } + } + + #[cfg(feature = "full")] + fn expr_builtin(input: ParseStream) -> Result<Expr> { + let begin = input.fork(); + + input.parse::<kw::builtin>()?; + input.parse::<Token![#]>()?; + input.parse::<Ident>()?; + + let args; + parenthesized!(args in input); + args.parse::<TokenStream>()?; + + Ok(Expr::Verbatim(verbatim::between(&begin, input))) + } + + fn path_or_macro_or_struct( + input: ParseStream, + #[cfg(feature = "full")] allow_struct: AllowStruct, + ) -> Result<Expr> { + let (qself, path) = path::parsing::qpath(input, true)?; + rest_of_path_or_macro_or_struct( + qself, + path, + input, + #[cfg(feature = "full")] + allow_struct, + ) + } + + fn rest_of_path_or_macro_or_struct( + qself: Option<QSelf>, + path: Path, + input: ParseStream, + #[cfg(feature = "full")] allow_struct: AllowStruct, + ) -> Result<Expr> { + if qself.is_none() + && input.peek(Token![!]) + && !input.peek(Token![!=]) + && path.is_mod_style() + { + let bang_token: Token![!] = input.parse()?; + let (delimiter, tokens) = mac::parse_delimiter(input)?; + return Ok(Expr::Macro(ExprMacro { + attrs: Vec::new(), + mac: Macro { + path, + bang_token, + delimiter, + tokens, + }, + })); + } + + #[cfg(not(feature = "full"))] + let allow_struct = (true,); + if allow_struct.0 && input.peek(token::Brace) { + return expr_struct_helper(input, qself, path).map(Expr::Struct); + } + + Ok(Expr::Path(ExprPath { + attrs: Vec::new(), + qself, + path, + })) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprMacro { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprMacro { + attrs: Vec::new(), + mac: input.parse()?, + }) + } + } + + #[cfg(feature = "full")] + fn paren_or_tuple(input: ParseStream) -> Result<Expr> { + let content; + let paren_token = parenthesized!(content in input); + if content.is_empty() { + return Ok(Expr::Tuple(ExprTuple { + attrs: Vec::new(), + paren_token, + elems: Punctuated::new(), + })); + } + + let first: Expr = content.parse()?; + if content.is_empty() { + return Ok(Expr::Paren(ExprParen { + attrs: Vec::new(), + paren_token, + expr: Box::new(first), + })); + } + + let mut elems = Punctuated::new(); + elems.push_value(first); + while !content.is_empty() { + let punct = content.parse()?; + elems.push_punct(punct); + if content.is_empty() { + break; + } + let value = content.parse()?; + elems.push_value(value); + } + Ok(Expr::Tuple(ExprTuple { + attrs: Vec::new(), + paren_token, + elems, + })) + } + + #[cfg(feature = "full")] + fn array_or_repeat(input: ParseStream) -> Result<Expr> { + let content; + let bracket_token = bracketed!(content in input); + if content.is_empty() { + return Ok(Expr::Array(ExprArray { + attrs: Vec::new(), + bracket_token, + elems: Punctuated::new(), + })); + } + + let first: Expr = content.parse()?; + if content.is_empty() || content.peek(Token![,]) { + let mut elems = Punctuated::new(); + elems.push_value(first); + while !content.is_empty() { + let punct = content.parse()?; + elems.push_punct(punct); + if content.is_empty() { + break; + } + let value = content.parse()?; + elems.push_value(value); + } + Ok(Expr::Array(ExprArray { + attrs: Vec::new(), + bracket_token, + elems, + })) + } else if content.peek(Token![;]) { + let semi_token: Token![;] = content.parse()?; + let len: Expr = content.parse()?; + Ok(Expr::Repeat(ExprRepeat { + attrs: Vec::new(), + bracket_token, + expr: Box::new(first), + semi_token, + len: Box::new(len), + })) + } else { + Err(content.error("expected `,` or `;`")) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprArray { + fn parse(input: ParseStream) -> Result<Self> { + let content; + let bracket_token = bracketed!(content in input); + let mut elems = Punctuated::new(); + + while !content.is_empty() { + let first: Expr = content.parse()?; + elems.push_value(first); + if content.is_empty() { + break; + } + let punct = content.parse()?; + elems.push_punct(punct); + } + + Ok(ExprArray { + attrs: Vec::new(), + bracket_token, + elems, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprRepeat { + fn parse(input: ParseStream) -> Result<Self> { + let content; + Ok(ExprRepeat { + bracket_token: bracketed!(content in input), + attrs: Vec::new(), + expr: content.parse()?, + semi_token: content.parse()?, + len: content.parse()?, + }) + } + } + + #[cfg(feature = "full")] + pub(crate) fn expr_early(input: ParseStream) -> Result<Expr> { + let mut attrs = input.call(expr_attrs)?; + let mut expr = if input.peek(token::Group) { + let allow_struct = AllowStruct(true); + let atom = expr_group(input, allow_struct)?; + if continue_parsing_early(&atom) { + trailer_helper(input, atom)? + } else { + atom + } + } else if input.peek(Token![if]) { + Expr::If(input.parse()?) + } else if input.peek(Token![while]) { + Expr::While(input.parse()?) + } else if input.peek(Token![for]) + && !(input.peek2(Token![<]) && (input.peek3(Lifetime) || input.peek3(Token![>]))) + { + Expr::ForLoop(input.parse()?) + } else if input.peek(Token![loop]) { + Expr::Loop(input.parse()?) + } else if input.peek(Token![match]) { + Expr::Match(input.parse()?) + } else if input.peek(Token![try]) && input.peek2(token::Brace) { + Expr::TryBlock(input.parse()?) + } else if input.peek(Token![unsafe]) { + Expr::Unsafe(input.parse()?) + } else if input.peek(Token![const]) && input.peek2(token::Brace) { + Expr::Const(input.parse()?) + } else if input.peek(token::Brace) { + Expr::Block(input.parse()?) + } else if input.peek(Lifetime) { + atom_labeled(input)? + } else { + let allow_struct = AllowStruct(true); + unary_expr(input, allow_struct)? + }; + + if continue_parsing_early(&expr) { + attrs.extend(expr.replace_attrs(Vec::new())); + expr.replace_attrs(attrs); + + let allow_struct = AllowStruct(true); + return parse_expr(input, expr, allow_struct, Precedence::Any); + } + + if input.peek(Token![.]) && !input.peek(Token![..]) || input.peek(Token![?]) { + expr = trailer_helper(input, expr)?; + + attrs.extend(expr.replace_attrs(Vec::new())); + expr.replace_attrs(attrs); + + let allow_struct = AllowStruct(true); + return parse_expr(input, expr, allow_struct, Precedence::Any); + } + + attrs.extend(expr.replace_attrs(Vec::new())); + expr.replace_attrs(attrs); + Ok(expr) + } + + #[cfg(feature = "full")] + fn continue_parsing_early(mut expr: &Expr) -> bool { + while let Expr::Group(group) = expr { + expr = &group.expr; + } + match expr { + Expr::If(_) + | Expr::While(_) + | Expr::ForLoop(_) + | Expr::Loop(_) + | Expr::Match(_) + | Expr::TryBlock(_) + | Expr::Unsafe(_) + | Expr::Const(_) + | Expr::Block(_) => false, + _ => true, + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprLit { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprLit { + attrs: Vec::new(), + lit: input.parse()?, + }) + } + } + + fn expr_group( + input: ParseStream, + #[cfg(feature = "full")] allow_struct: AllowStruct, + ) -> Result<Expr> { + let group = crate::group::parse_group(input)?; + let mut inner: Expr = group.content.parse()?; + + match inner { + Expr::Path(mut expr) if expr.attrs.is_empty() => { + let grouped_len = expr.path.segments.len(); + Path::parse_rest(input, &mut expr.path, true)?; + match rest_of_path_or_macro_or_struct( + expr.qself, + expr.path, + input, + #[cfg(feature = "full")] + allow_struct, + )? { + Expr::Path(expr) if expr.path.segments.len() == grouped_len => { + inner = Expr::Path(expr); + } + extended => return Ok(extended), + } + } + _ => {} + } + + Ok(Expr::Group(ExprGroup { + attrs: Vec::new(), + group_token: group.token, + expr: Box::new(inner), + })) + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprParen { + fn parse(input: ParseStream) -> Result<Self> { + expr_paren(input) + } + } + + fn expr_paren(input: ParseStream) -> Result<ExprParen> { + let content; + Ok(ExprParen { + attrs: Vec::new(), + paren_token: parenthesized!(content in input), + expr: content.parse()?, + }) + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprLet { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprLet { + attrs: Vec::new(), + let_token: input.parse()?, + pat: Box::new(Pat::parse_multi_with_leading_vert(input)?), + eq_token: input.parse()?, + expr: Box::new({ + let allow_struct = AllowStruct(false); + let lhs = unary_expr(input, allow_struct)?; + parse_expr(input, lhs, allow_struct, Precedence::Compare)? + }), + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprIf { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + Ok(ExprIf { + attrs, + if_token: input.parse()?, + cond: Box::new(input.call(Expr::parse_without_eager_brace)?), + then_branch: input.parse()?, + else_branch: { + if input.peek(Token![else]) { + Some(input.call(else_block)?) + } else { + None + } + }, + }) + } + } + + #[cfg(feature = "full")] + fn else_block(input: ParseStream) -> Result<(Token![else], Box<Expr>)> { + let else_token: Token![else] = input.parse()?; + + let lookahead = input.lookahead1(); + let else_branch = if lookahead.peek(Token![if]) { + input.parse().map(Expr::If)? + } else if lookahead.peek(token::Brace) { + Expr::Block(ExprBlock { + attrs: Vec::new(), + label: None, + block: input.parse()?, + }) + } else { + return Err(lookahead.error()); + }; + + Ok((else_token, Box::new(else_branch))) + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprInfer { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprInfer { + attrs: input.call(Attribute::parse_outer)?, + underscore_token: input.parse()?, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprForLoop { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let label: Option<Label> = input.parse()?; + let for_token: Token![for] = input.parse()?; + + let pat = Pat::parse_multi_with_leading_vert(input)?; + + let in_token: Token![in] = input.parse()?; + let expr: Expr = input.call(Expr::parse_without_eager_brace)?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let stmts = content.call(Block::parse_within)?; + + Ok(ExprForLoop { + attrs, + label, + for_token, + pat: Box::new(pat), + in_token, + expr: Box::new(expr), + body: Block { brace_token, stmts }, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprLoop { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let label: Option<Label> = input.parse()?; + let loop_token: Token![loop] = input.parse()?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let stmts = content.call(Block::parse_within)?; + + Ok(ExprLoop { + attrs, + label, + loop_token, + body: Block { brace_token, stmts }, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprMatch { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let match_token: Token![match] = input.parse()?; + let expr = Expr::parse_without_eager_brace(input)?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + + let mut arms = Vec::new(); + while !content.is_empty() { + arms.push(content.call(Arm::parse)?); + } + + Ok(ExprMatch { + attrs, + match_token, + expr: Box::new(expr), + brace_token, + arms, + }) + } + } + + macro_rules! impl_by_parsing_expr { + ( + $( + $expr_type:ty, $variant:ident, $msg:expr, + )* + ) => { + $( + #[cfg(all(feature = "full", feature = "printing"))] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for $expr_type { + fn parse(input: ParseStream) -> Result<Self> { + let mut expr: Expr = input.parse()?; + loop { + match expr { + Expr::$variant(inner) => return Ok(inner), + Expr::Group(next) => expr = *next.expr, + _ => return Err(Error::new_spanned(expr, $msg)), + } + } + } + } + )* + }; + } + + impl_by_parsing_expr! { + ExprAssign, Assign, "expected assignment expression", + ExprAwait, Await, "expected await expression", + ExprBinary, Binary, "expected binary operation", + ExprCall, Call, "expected function call expression", + ExprCast, Cast, "expected cast expression", + ExprField, Field, "expected struct field access", + ExprIndex, Index, "expected indexing expression", + ExprMethodCall, MethodCall, "expected method call expression", + ExprRange, Range, "expected range expression", + ExprTry, Try, "expected try expression", + ExprTuple, Tuple, "expected tuple expression", + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprUnary { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = Vec::new(); + let allow_struct = AllowStruct(true); + expr_unary(input, attrs, allow_struct) + } + } + + #[cfg(feature = "full")] + fn expr_unary( + input: ParseStream, + attrs: Vec<Attribute>, + allow_struct: AllowStruct, + ) -> Result<ExprUnary> { + Ok(ExprUnary { + attrs, + op: input.parse()?, + expr: Box::new(unary_expr(input, allow_struct)?), + }) + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprClosure { + fn parse(input: ParseStream) -> Result<Self> { + let allow_struct = AllowStruct(true); + expr_closure(input, allow_struct) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprReference { + fn parse(input: ParseStream) -> Result<Self> { + let allow_struct = AllowStruct(true); + Ok(ExprReference { + attrs: Vec::new(), + and_token: input.parse()?, + mutability: input.parse()?, + expr: Box::new(unary_expr(input, allow_struct)?), + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprBreak { + fn parse(input: ParseStream) -> Result<Self> { + let allow_struct = AllowStruct(true); + expr_break(input, allow_struct) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprReturn { + fn parse(input: ParseStream) -> Result<Self> { + let allow_struct = AllowStruct(true); + expr_return(input, allow_struct) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprTryBlock { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprTryBlock { + attrs: Vec::new(), + try_token: input.parse()?, + block: input.parse()?, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprYield { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprYield { + attrs: Vec::new(), + yield_token: input.parse()?, + expr: { + if can_begin_expr(input) { + Some(input.parse()?) + } else { + None + } + }, + }) + } + } + + #[cfg(feature = "full")] + fn expr_closure(input: ParseStream, allow_struct: AllowStruct) -> Result<ExprClosure> { + let lifetimes: Option<BoundLifetimes> = input.parse()?; + let constness: Option<Token![const]> = input.parse()?; + let movability: Option<Token![static]> = input.parse()?; + let asyncness: Option<Token![async]> = input.parse()?; + let capture: Option<Token![move]> = input.parse()?; + let or1_token: Token![|] = input.parse()?; + + let mut inputs = Punctuated::new(); + loop { + if input.peek(Token![|]) { + break; + } + let value = closure_arg(input)?; + inputs.push_value(value); + if input.peek(Token![|]) { + break; + } + let punct: Token![,] = input.parse()?; + inputs.push_punct(punct); + } + + let or2_token: Token![|] = input.parse()?; + + let (output, body) = if input.peek(Token![->]) { + let arrow_token: Token![->] = input.parse()?; + let ty: Type = input.parse()?; + let body: Block = input.parse()?; + let output = ReturnType::Type(arrow_token, Box::new(ty)); + let block = Expr::Block(ExprBlock { + attrs: Vec::new(), + label: None, + block: body, + }); + (output, block) + } else { + let body = ambiguous_expr(input, allow_struct)?; + (ReturnType::Default, body) + }; + + Ok(ExprClosure { + attrs: Vec::new(), + lifetimes, + constness, + movability, + asyncness, + capture, + or1_token, + inputs, + or2_token, + output, + body: Box::new(body), + }) + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprAsync { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprAsync { + attrs: Vec::new(), + async_token: input.parse()?, + capture: input.parse()?, + block: input.parse()?, + }) + } + } + + #[cfg(feature = "full")] + fn closure_arg(input: ParseStream) -> Result<Pat> { + let attrs = input.call(Attribute::parse_outer)?; + let mut pat = Pat::parse_single(input)?; + + if input.peek(Token![:]) { + Ok(Pat::Type(PatType { + attrs, + pat: Box::new(pat), + colon_token: input.parse()?, + ty: input.parse()?, + })) + } else { + match &mut pat { + Pat::Const(pat) => pat.attrs = attrs, + Pat::Ident(pat) => pat.attrs = attrs, + Pat::Lit(pat) => pat.attrs = attrs, + Pat::Macro(pat) => pat.attrs = attrs, + Pat::Or(pat) => pat.attrs = attrs, + Pat::Paren(pat) => pat.attrs = attrs, + Pat::Path(pat) => pat.attrs = attrs, + Pat::Range(pat) => pat.attrs = attrs, + Pat::Reference(pat) => pat.attrs = attrs, + Pat::Rest(pat) => pat.attrs = attrs, + Pat::Slice(pat) => pat.attrs = attrs, + Pat::Struct(pat) => pat.attrs = attrs, + Pat::Tuple(pat) => pat.attrs = attrs, + Pat::TupleStruct(pat) => pat.attrs = attrs, + Pat::Type(_) => unreachable!(), + Pat::Verbatim(_) => {} + Pat::Wild(pat) => pat.attrs = attrs, + } + Ok(pat) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprWhile { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let label: Option<Label> = input.parse()?; + let while_token: Token![while] = input.parse()?; + let cond = Expr::parse_without_eager_brace(input)?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let stmts = content.call(Block::parse_within)?; + + Ok(ExprWhile { + attrs, + label, + while_token, + cond: Box::new(cond), + body: Block { brace_token, stmts }, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprConst { + fn parse(input: ParseStream) -> Result<Self> { + let const_token: Token![const] = input.parse()?; + + let content; + let brace_token = braced!(content in input); + let inner_attrs = content.call(Attribute::parse_inner)?; + let stmts = content.call(Block::parse_within)?; + + Ok(ExprConst { + attrs: inner_attrs, + const_token, + block: Block { brace_token, stmts }, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Label { + fn parse(input: ParseStream) -> Result<Self> { + Ok(Label { + name: input.parse()?, + colon_token: input.parse()?, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Option<Label> { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Lifetime) { + input.parse().map(Some) + } else { + Ok(None) + } + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprContinue { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ExprContinue { + attrs: Vec::new(), + continue_token: input.parse()?, + label: input.parse()?, + }) + } + } + + #[cfg(feature = "full")] + fn expr_break(input: ParseStream, allow_struct: AllowStruct) -> Result<ExprBreak> { + let break_token: Token![break] = input.parse()?; + + let ahead = input.fork(); + let label: Option<Lifetime> = ahead.parse()?; + if label.is_some() && ahead.peek(Token![:]) { + // Not allowed: `break 'label: loop {...}` + // Parentheses are required. `break ('label: loop {...})` + let _ = ambiguous_expr(input, allow_struct)?; + let start_span = label.unwrap().apostrophe; + let end_span = input.cursor().prev_span(); + return Err(crate::error::new2( + start_span, + end_span, + "parentheses required", + )); + } + + input.advance_to(&ahead); + let expr = if can_begin_expr(input) && (allow_struct.0 || !input.peek(token::Brace)) { + let expr = ambiguous_expr(input, allow_struct)?; + Some(Box::new(expr)) + } else { + None + }; + + Ok(ExprBreak { + attrs: Vec::new(), + break_token, + label, + expr, + }) + } + + #[cfg(feature = "full")] + fn expr_return(input: ParseStream, allow_struct: AllowStruct) -> Result<ExprReturn> { + Ok(ExprReturn { + attrs: Vec::new(), + return_token: input.parse()?, + expr: { + if can_begin_expr(input) { + // NOTE: return is greedy and eats blocks after it even when in a + // position where structs are not allowed, such as in if statement + // conditions. For example: + // + // if return { println!("A") } {} // Prints "A" + let expr = ambiguous_expr(input, allow_struct)?; + Some(Box::new(expr)) + } else { + None + } + }, + }) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for FieldValue { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let member: Member = input.parse()?; + let (colon_token, value) = if input.peek(Token![:]) || !member.is_named() { + let colon_token: Token![:] = input.parse()?; + let value: Expr = input.parse()?; + (Some(colon_token), value) + } else if let Member::Named(ident) = &member { + let value = Expr::Path(ExprPath { + attrs: Vec::new(), + qself: None, + path: Path::from(ident.clone()), + }); + (None, value) + } else { + unreachable!() + }; + + Ok(FieldValue { + attrs, + member, + colon_token, + expr: value, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprStruct { + fn parse(input: ParseStream) -> Result<Self> { + let (qself, path) = path::parsing::qpath(input, true)?; + expr_struct_helper(input, qself, path) + } + } + + fn expr_struct_helper( + input: ParseStream, + qself: Option<QSelf>, + path: Path, + ) -> Result<ExprStruct> { + let content; + let brace_token = braced!(content in input); + + let mut fields = Punctuated::new(); + while !content.is_empty() { + if content.peek(Token![..]) { + return Ok(ExprStruct { + attrs: Vec::new(), + qself, + path, + brace_token, + fields, + dot2_token: Some(content.parse()?), + rest: if content.is_empty() { + None + } else { + Some(Box::new(content.parse()?)) + }, + }); + } + + fields.push(content.parse()?); + if content.is_empty() { + break; + } + let punct: Token![,] = content.parse()?; + fields.push_punct(punct); + } + + Ok(ExprStruct { + attrs: Vec::new(), + qself, + path, + brace_token, + fields, + dot2_token: None, + rest: None, + }) + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprUnsafe { + fn parse(input: ParseStream) -> Result<Self> { + let unsafe_token: Token![unsafe] = input.parse()?; + + let content; + let brace_token = braced!(content in input); + let inner_attrs = content.call(Attribute::parse_inner)?; + let stmts = content.call(Block::parse_within)?; + + Ok(ExprUnsafe { + attrs: inner_attrs, + unsafe_token, + block: Block { brace_token, stmts }, + }) + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprBlock { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let label: Option<Label> = input.parse()?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let stmts = content.call(Block::parse_within)?; + + Ok(ExprBlock { + attrs, + label, + block: Block { brace_token, stmts }, + }) + } + } + + #[cfg(feature = "full")] + fn expr_range(input: ParseStream, allow_struct: AllowStruct) -> Result<ExprRange> { + let limits: RangeLimits = input.parse()?; + let end = if matches!(limits, RangeLimits::HalfOpen(_)) + && (input.is_empty() + || input.peek(Token![,]) + || input.peek(Token![;]) + || input.peek(Token![.]) && !input.peek(Token![..]) + || !allow_struct.0 && input.peek(token::Brace)) + { + None + } else { + let to = ambiguous_expr(input, allow_struct)?; + Some(Box::new(to)) + }; + Ok(ExprRange { + attrs: Vec::new(), + start: None, + limits, + end, + }) + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for RangeLimits { + fn parse(input: ParseStream) -> Result<Self> { + let lookahead = input.lookahead1(); + let dot_dot = lookahead.peek(Token![..]); + let dot_dot_eq = dot_dot && lookahead.peek(Token![..=]); + let dot_dot_dot = dot_dot && input.peek(Token![...]); + if dot_dot_eq { + input.parse().map(RangeLimits::Closed) + } else if dot_dot && !dot_dot_dot { + input.parse().map(RangeLimits::HalfOpen) + } else { + Err(lookahead.error()) + } + } + } + + #[cfg(feature = "full")] + impl RangeLimits { + pub(crate) fn parse_obsolete(input: ParseStream) -> Result<Self> { + let lookahead = input.lookahead1(); + let dot_dot = lookahead.peek(Token![..]); + let dot_dot_eq = dot_dot && lookahead.peek(Token![..=]); + let dot_dot_dot = dot_dot && input.peek(Token![...]); + if dot_dot_eq { + input.parse().map(RangeLimits::Closed) + } else if dot_dot_dot { + let dot3: Token![...] = input.parse()?; + Ok(RangeLimits::Closed(Token![..=](dot3.spans))) + } else if dot_dot { + input.parse().map(RangeLimits::HalfOpen) + } else { + Err(lookahead.error()) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ExprPath { + fn parse(input: ParseStream) -> Result<Self> { + #[cfg(not(feature = "full"))] + let attrs = Vec::new(); + #[cfg(feature = "full")] + let attrs = input.call(Attribute::parse_outer)?; + + let (qself, path) = path::parsing::qpath(input, true)?; + + Ok(ExprPath { attrs, qself, path }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Member { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Ident) { + input.parse().map(Member::Named) + } else if input.peek(LitInt) { + input.parse().map(Member::Unnamed) + } else { + Err(input.error("expected identifier or integer")) + } + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Arm { + fn parse(input: ParseStream) -> Result<Arm> { + let requires_comma; + Ok(Arm { + attrs: input.call(Attribute::parse_outer)?, + pat: Pat::parse_multi_with_leading_vert(input)?, + guard: { + if input.peek(Token![if]) { + let if_token: Token![if] = input.parse()?; + let guard: Expr = input.parse()?; + Some((if_token, Box::new(guard))) + } else { + None + } + }, + fat_arrow_token: input.parse()?, + body: { + let body = input.call(expr_early)?; + requires_comma = requires_terminator(&body); + Box::new(body) + }, + comma: { + if requires_comma && !input.is_empty() { + Some(input.parse()?) + } else { + input.parse()? + } + }, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Index { + fn parse(input: ParseStream) -> Result<Self> { + let lit: LitInt = input.parse()?; + if lit.suffix().is_empty() { + Ok(Index { + index: lit + .base10_digits() + .parse() + .map_err(|err| Error::new(lit.span(), err))?, + span: lit.span(), + }) + } else { + Err(Error::new(lit.span(), "expected unsuffixed integer")) + } + } + } + + fn multi_index(e: &mut Expr, dot_token: &mut Token![.], float: LitFloat) -> Result<bool> { + let float_token = float.token(); + let float_span = float_token.span(); + let mut float_repr = float_token.to_string(); + let trailing_dot = float_repr.ends_with('.'); + if trailing_dot { + float_repr.truncate(float_repr.len() - 1); + } + + let mut offset = 0; + for part in float_repr.split('.') { + let mut index: Index = + crate::parse_str(part).map_err(|err| Error::new(float_span, err))?; + let part_end = offset + part.len(); + index.span = float_token.subspan(offset..part_end).unwrap_or(float_span); + + let base = mem::replace(e, Expr::DUMMY); + *e = Expr::Field(ExprField { + attrs: Vec::new(), + base: Box::new(base), + dot_token: Token![.](dot_token.span), + member: Member::Unnamed(index), + }); + + let dot_span = float_token + .subspan(part_end..part_end + 1) + .unwrap_or(float_span); + *dot_token = Token![.](dot_span); + offset = part_end + 1; + } + + Ok(!trailing_dot) + } + + impl Member { + pub(crate) fn is_named(&self) -> bool { + match self { + Member::Named(_) => true, + Member::Unnamed(_) => false, + } + } + } + + fn check_cast(input: ParseStream) -> Result<()> { + let kind = if input.peek(Token![.]) && !input.peek(Token![..]) { + if input.peek2(Token![await]) { + "`.await`" + } else if input.peek2(Ident) && (input.peek3(token::Paren) || input.peek3(Token![::])) { + "a method call" + } else { + "a field access" + } + } else if input.peek(Token![?]) { + "`?`" + } else if input.peek(token::Bracket) { + "indexing" + } else if input.peek(token::Paren) { + "a function call" + } else { + return Ok(()); + }; + let msg = format!("casts cannot be followed by {}", kind); + Err(input.error(msg)) + } +} + +#[cfg(feature = "printing")] +pub(crate) mod printing { + use super::*; + #[cfg(feature = "full")] + use crate::attr::FilterAttrs; + use proc_macro2::{Literal, TokenStream}; + use quote::{ToTokens, TokenStreamExt}; + + // If the given expression is a bare `ExprStruct`, wraps it in parenthesis + // before appending it to `TokenStream`. + #[cfg(feature = "full")] + fn wrap_bare_struct(tokens: &mut TokenStream, e: &Expr) { + if let Expr::Struct(_) = *e { + token::Paren::default().surround(tokens, |tokens| { + e.to_tokens(tokens); + }); + } else { + e.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + pub(crate) fn outer_attrs_to_tokens(attrs: &[Attribute], tokens: &mut TokenStream) { + tokens.append_all(attrs.outer()); + } + + #[cfg(feature = "full")] + fn inner_attrs_to_tokens(attrs: &[Attribute], tokens: &mut TokenStream) { + tokens.append_all(attrs.inner()); + } + + #[cfg(not(feature = "full"))] + pub(crate) fn outer_attrs_to_tokens(_attrs: &[Attribute], _tokens: &mut TokenStream) {} + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprArray { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.bracket_token.surround(tokens, |tokens| { + self.elems.to_tokens(tokens); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprAssign { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.left.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.right.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprAsync { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.async_token.to_tokens(tokens); + self.capture.to_tokens(tokens); + self.block.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprAwait { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.base.to_tokens(tokens); + self.dot_token.to_tokens(tokens); + self.await_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprBinary { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.left.to_tokens(tokens); + self.op.to_tokens(tokens); + self.right.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprBlock { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.label.to_tokens(tokens); + self.block.brace_token.surround(tokens, |tokens| { + inner_attrs_to_tokens(&self.attrs, tokens); + tokens.append_all(&self.block.stmts); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprBreak { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.break_token.to_tokens(tokens); + self.label.to_tokens(tokens); + self.expr.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprCall { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.func.to_tokens(tokens); + self.paren_token.surround(tokens, |tokens| { + self.args.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprCast { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.expr.to_tokens(tokens); + self.as_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprClosure { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.lifetimes.to_tokens(tokens); + self.constness.to_tokens(tokens); + self.movability.to_tokens(tokens); + self.asyncness.to_tokens(tokens); + self.capture.to_tokens(tokens); + self.or1_token.to_tokens(tokens); + self.inputs.to_tokens(tokens); + self.or2_token.to_tokens(tokens); + self.output.to_tokens(tokens); + self.body.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprConst { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.const_token.to_tokens(tokens); + self.block.brace_token.surround(tokens, |tokens| { + inner_attrs_to_tokens(&self.attrs, tokens); + tokens.append_all(&self.block.stmts); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprContinue { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.continue_token.to_tokens(tokens); + self.label.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprField { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.base.to_tokens(tokens); + self.dot_token.to_tokens(tokens); + self.member.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprForLoop { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.label.to_tokens(tokens); + self.for_token.to_tokens(tokens); + self.pat.to_tokens(tokens); + self.in_token.to_tokens(tokens); + wrap_bare_struct(tokens, &self.expr); + self.body.brace_token.surround(tokens, |tokens| { + inner_attrs_to_tokens(&self.attrs, tokens); + tokens.append_all(&self.body.stmts); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprGroup { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.group_token.surround(tokens, |tokens| { + self.expr.to_tokens(tokens); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprIf { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.if_token.to_tokens(tokens); + wrap_bare_struct(tokens, &self.cond); + self.then_branch.to_tokens(tokens); + if let Some((else_token, else_)) = &self.else_branch { + else_token.to_tokens(tokens); + // If we are not one of the valid expressions to exist in an else + // clause, wrap ourselves in a block. + match **else_ { + Expr::If(_) | Expr::Block(_) => else_.to_tokens(tokens), + _ => token::Brace::default().surround(tokens, |tokens| else_.to_tokens(tokens)), + } + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprIndex { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.expr.to_tokens(tokens); + self.bracket_token.surround(tokens, |tokens| { + self.index.to_tokens(tokens); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprInfer { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.underscore_token.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprLet { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.let_token.to_tokens(tokens); + self.pat.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + wrap_bare_struct(tokens, &self.expr); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprLit { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.lit.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprLoop { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.label.to_tokens(tokens); + self.loop_token.to_tokens(tokens); + self.body.brace_token.surround(tokens, |tokens| { + inner_attrs_to_tokens(&self.attrs, tokens); + tokens.append_all(&self.body.stmts); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprMacro { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.mac.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprMatch { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.match_token.to_tokens(tokens); + wrap_bare_struct(tokens, &self.expr); + self.brace_token.surround(tokens, |tokens| { + inner_attrs_to_tokens(&self.attrs, tokens); + for (i, arm) in self.arms.iter().enumerate() { + arm.to_tokens(tokens); + // Ensure that we have a comma after a non-block arm, except + // for the last one. + let is_last = i == self.arms.len() - 1; + if !is_last && requires_terminator(&arm.body) && arm.comma.is_none() { + <Token![,]>::default().to_tokens(tokens); + } + } + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprMethodCall { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.receiver.to_tokens(tokens); + self.dot_token.to_tokens(tokens); + self.method.to_tokens(tokens); + self.turbofish.to_tokens(tokens); + self.paren_token.surround(tokens, |tokens| { + self.args.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprParen { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.paren_token.surround(tokens, |tokens| { + self.expr.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprPath { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + path::printing::print_path(tokens, &self.qself, &self.path); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprRange { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.start.to_tokens(tokens); + self.limits.to_tokens(tokens); + self.end.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprReference { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.and_token.to_tokens(tokens); + self.mutability.to_tokens(tokens); + self.expr.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprRepeat { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.bracket_token.surround(tokens, |tokens| { + self.expr.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + self.len.to_tokens(tokens); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprReturn { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.return_token.to_tokens(tokens); + self.expr.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprStruct { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + path::printing::print_path(tokens, &self.qself, &self.path); + self.brace_token.surround(tokens, |tokens| { + self.fields.to_tokens(tokens); + if let Some(dot2_token) = &self.dot2_token { + dot2_token.to_tokens(tokens); + } else if self.rest.is_some() { + Token![..](Span::call_site()).to_tokens(tokens); + } + self.rest.to_tokens(tokens); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprTry { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.expr.to_tokens(tokens); + self.question_token.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprTryBlock { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.try_token.to_tokens(tokens); + self.block.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprTuple { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.paren_token.surround(tokens, |tokens| { + self.elems.to_tokens(tokens); + // If we only have one argument, we need a trailing comma to + // distinguish ExprTuple from ExprParen. + if self.elems.len() == 1 && !self.elems.trailing_punct() { + <Token![,]>::default().to_tokens(tokens); + } + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprUnary { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.op.to_tokens(tokens); + self.expr.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprUnsafe { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.unsafe_token.to_tokens(tokens); + self.block.brace_token.surround(tokens, |tokens| { + inner_attrs_to_tokens(&self.attrs, tokens); + tokens.append_all(&self.block.stmts); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprWhile { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.label.to_tokens(tokens); + self.while_token.to_tokens(tokens); + wrap_bare_struct(tokens, &self.cond); + self.body.brace_token.surround(tokens, |tokens| { + inner_attrs_to_tokens(&self.attrs, tokens); + tokens.append_all(&self.body.stmts); + }); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ExprYield { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.yield_token.to_tokens(tokens); + self.expr.to_tokens(tokens); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Arm { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(&self.attrs); + self.pat.to_tokens(tokens); + if let Some((if_token, guard)) = &self.guard { + if_token.to_tokens(tokens); + guard.to_tokens(tokens); + } + self.fat_arrow_token.to_tokens(tokens); + self.body.to_tokens(tokens); + self.comma.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for FieldValue { + fn to_tokens(&self, tokens: &mut TokenStream) { + outer_attrs_to_tokens(&self.attrs, tokens); + self.member.to_tokens(tokens); + if let Some(colon_token) = &self.colon_token { + colon_token.to_tokens(tokens); + self.expr.to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Index { + fn to_tokens(&self, tokens: &mut TokenStream) { + let mut lit = Literal::i64_unsuffixed(i64::from(self.index)); + lit.set_span(self.span); + tokens.append(lit); + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Label { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.name.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Member { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + Member::Named(ident) => ident.to_tokens(tokens), + Member::Unnamed(index) => index.to_tokens(tokens), + } + } + } + + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for RangeLimits { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + RangeLimits::HalfOpen(t) => t.to_tokens(tokens), + RangeLimits::Closed(t) => t.to_tokens(tokens), + } + } + } +} diff --git a/vendor/syn/src/ext.rs b/vendor/syn/src/ext.rs new file mode 100644 index 0000000..9ee5672 --- /dev/null +++ b/vendor/syn/src/ext.rs @@ -0,0 +1,135 @@ +//! Extension traits to provide parsing methods on foreign types. + +use crate::buffer::Cursor; +use crate::parse::Peek; +use crate::parse::{ParseStream, Result}; +use crate::sealed::lookahead; +use crate::token::CustomToken; +use proc_macro2::Ident; + +/// Additional methods for `Ident` not provided by proc-macro2 or libproc_macro. +/// +/// This trait is sealed and cannot be implemented for types outside of Syn. It +/// is implemented only for `proc_macro2::Ident`. +pub trait IdentExt: Sized + private::Sealed { + /// Parses any identifier including keywords. + /// + /// This is useful when parsing macro input which allows Rust keywords as + /// identifiers. + /// + /// # Example + /// + /// ``` + /// use syn::{Error, Ident, Result, Token}; + /// use syn::ext::IdentExt; + /// use syn::parse::ParseStream; + /// + /// mod kw { + /// syn::custom_keyword!(name); + /// } + /// + /// // Parses input that looks like `name = NAME` where `NAME` can be + /// // any identifier. + /// // + /// // Examples: + /// // + /// // name = anything + /// // name = impl + /// fn parse_dsl(input: ParseStream) -> Result<Ident> { + /// input.parse::<kw::name>()?; + /// input.parse::<Token![=]>()?; + /// let name = input.call(Ident::parse_any)?; + /// Ok(name) + /// } + /// ``` + fn parse_any(input: ParseStream) -> Result<Self>; + + /// Peeks any identifier including keywords. Usage: + /// `input.peek(Ident::peek_any)` + /// + /// This is different from `input.peek(Ident)` which only returns true in + /// the case of an ident which is not a Rust keyword. + #[allow(non_upper_case_globals)] + const peek_any: private::PeekFn = private::PeekFn; + + /// Strips the raw marker `r#`, if any, from the beginning of an ident. + /// + /// - unraw(`x`) = `x` + /// - unraw(`move`) = `move` + /// - unraw(`r#move`) = `move` + /// + /// # Example + /// + /// In the case of interop with other languages like Python that have a + /// different set of keywords than Rust, we might come across macro input + /// that involves raw identifiers to refer to ordinary variables in the + /// other language with a name that happens to be a Rust keyword. + /// + /// The function below appends an identifier from the caller's input onto a + /// fixed prefix. Without using `unraw()`, this would tend to produce + /// invalid identifiers like `__pyo3_get_r#move`. + /// + /// ``` + /// use proc_macro2::Span; + /// use syn::Ident; + /// use syn::ext::IdentExt; + /// + /// fn ident_for_getter(variable: &Ident) -> Ident { + /// let getter = format!("__pyo3_get_{}", variable.unraw()); + /// Ident::new(&getter, Span::call_site()) + /// } + /// ``` + fn unraw(&self) -> Ident; +} + +impl IdentExt for Ident { + fn parse_any(input: ParseStream) -> Result<Self> { + input.step(|cursor| match cursor.ident() { + Some((ident, rest)) => Ok((ident, rest)), + None => Err(cursor.error("expected ident")), + }) + } + + fn unraw(&self) -> Ident { + let string = self.to_string(); + if let Some(string) = string.strip_prefix("r#") { + Ident::new(string, self.span()) + } else { + self.clone() + } + } +} + +impl Peek for private::PeekFn { + type Token = private::IdentAny; +} + +impl CustomToken for private::IdentAny { + fn peek(cursor: Cursor) -> bool { + cursor.ident().is_some() + } + + fn display() -> &'static str { + "identifier" + } +} + +impl lookahead::Sealed for private::PeekFn {} + +mod private { + use proc_macro2::Ident; + + pub trait Sealed {} + + impl Sealed for Ident {} + + pub struct PeekFn; + pub struct IdentAny; + + impl Copy for PeekFn {} + impl Clone for PeekFn { + fn clone(&self) -> Self { + *self + } + } +} diff --git a/vendor/syn/src/file.rs b/vendor/syn/src/file.rs new file mode 100644 index 0000000..2d9f298 --- /dev/null +++ b/vendor/syn/src/file.rs @@ -0,0 +1,125 @@ +use super::*; + +ast_struct! { + /// A complete file of Rust source code. + /// + /// Typically `File` objects are created with [`parse_file`]. + /// + /// # Example + /// + /// Parse a Rust source file into a `syn::File` and print out a debug + /// representation of the syntax tree. + /// + /// ``` + /// use std::env; + /// use std::fs::File; + /// use std::io::Read; + /// use std::process; + /// + /// fn main() { + /// # } + /// # + /// # fn fake_main() { + /// let mut args = env::args(); + /// let _ = args.next(); // executable name + /// + /// let filename = match (args.next(), args.next()) { + /// (Some(filename), None) => filename, + /// _ => { + /// eprintln!("Usage: dump-syntax path/to/filename.rs"); + /// process::exit(1); + /// } + /// }; + /// + /// let mut file = File::open(&filename).expect("Unable to open file"); + /// + /// let mut src = String::new(); + /// file.read_to_string(&mut src).expect("Unable to read file"); + /// + /// let syntax = syn::parse_file(&src).expect("Unable to parse file"); + /// + /// // Debug impl is available if Syn is built with "extra-traits" feature. + /// println!("{:#?}", syntax); + /// } + /// ``` + /// + /// Running with its own source code as input, this program prints output + /// that begins with: + /// + /// ```text + /// File { + /// shebang: None, + /// attrs: [], + /// items: [ + /// Use( + /// ItemUse { + /// attrs: [], + /// vis: Inherited, + /// use_token: Use, + /// leading_colon: None, + /// tree: Path( + /// UsePath { + /// ident: Ident( + /// std, + /// ), + /// colon2_token: Colon2, + /// tree: Name( + /// UseName { + /// ident: Ident( + /// env, + /// ), + /// }, + /// ), + /// }, + /// ), + /// semi_token: Semi, + /// }, + /// ), + /// ... + /// ``` + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct File { + pub shebang: Option<String>, + pub attrs: Vec<Attribute>, + pub items: Vec<Item>, + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for File { + fn parse(input: ParseStream) -> Result<Self> { + Ok(File { + shebang: None, + attrs: input.call(Attribute::parse_inner)?, + items: { + let mut items = Vec::new(); + while !input.is_empty() { + items.push(input.parse()?); + } + items + }, + }) + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use crate::attr::FilterAttrs; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for File { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&self.items); + } + } +} diff --git a/vendor/syn/src/gen/clone.rs b/vendor/syn/src/gen/clone.rs new file mode 100644 index 0000000..9593679 --- /dev/null +++ b/vendor/syn/src/gen/clone.rs @@ -0,0 +1,2181 @@ +// This file is @generated by syn-internal-codegen. +// It is not intended for manual editing. + +#![allow(clippy::clone_on_copy, clippy::expl_impl_clone_on_copy)] +use crate::*; +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Abi { + fn clone(&self) -> Self { + Abi { + extern_token: self.extern_token.clone(), + name: self.name.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for AngleBracketedGenericArguments { + fn clone(&self) -> Self { + AngleBracketedGenericArguments { + colon2_token: self.colon2_token.clone(), + lt_token: self.lt_token.clone(), + args: self.args.clone(), + gt_token: self.gt_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Arm { + fn clone(&self) -> Self { + Arm { + attrs: self.attrs.clone(), + pat: self.pat.clone(), + guard: self.guard.clone(), + fat_arrow_token: self.fat_arrow_token.clone(), + body: self.body.clone(), + comma: self.comma.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for AssocConst { + fn clone(&self) -> Self { + AssocConst { + ident: self.ident.clone(), + generics: self.generics.clone(), + eq_token: self.eq_token.clone(), + value: self.value.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for AssocType { + fn clone(&self) -> Self { + AssocType { + ident: self.ident.clone(), + generics: self.generics.clone(), + eq_token: self.eq_token.clone(), + ty: self.ty.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Copy for AttrStyle {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for AttrStyle { + fn clone(&self) -> Self { + *self + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Attribute { + fn clone(&self) -> Self { + Attribute { + pound_token: self.pound_token.clone(), + style: self.style.clone(), + bracket_token: self.bracket_token.clone(), + meta: self.meta.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for BareFnArg { + fn clone(&self) -> Self { + BareFnArg { + attrs: self.attrs.clone(), + name: self.name.clone(), + ty: self.ty.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for BareVariadic { + fn clone(&self) -> Self { + BareVariadic { + attrs: self.attrs.clone(), + name: self.name.clone(), + dots: self.dots.clone(), + comma: self.comma.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Copy for BinOp {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for BinOp { + fn clone(&self) -> Self { + *self + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Block { + fn clone(&self) -> Self { + Block { + brace_token: self.brace_token.clone(), + stmts: self.stmts.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for BoundLifetimes { + fn clone(&self) -> Self { + BoundLifetimes { + for_token: self.for_token.clone(), + lt_token: self.lt_token.clone(), + lifetimes: self.lifetimes.clone(), + gt_token: self.gt_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ConstParam { + fn clone(&self) -> Self { + ConstParam { + attrs: self.attrs.clone(), + const_token: self.const_token.clone(), + ident: self.ident.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + eq_token: self.eq_token.clone(), + default: self.default.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Constraint { + fn clone(&self) -> Self { + Constraint { + ident: self.ident.clone(), + generics: self.generics.clone(), + colon_token: self.colon_token.clone(), + bounds: self.bounds.clone(), + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Data { + fn clone(&self) -> Self { + match self { + Data::Struct(v0) => Data::Struct(v0.clone()), + Data::Enum(v0) => Data::Enum(v0.clone()), + Data::Union(v0) => Data::Union(v0.clone()), + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for DataEnum { + fn clone(&self) -> Self { + DataEnum { + enum_token: self.enum_token.clone(), + brace_token: self.brace_token.clone(), + variants: self.variants.clone(), + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for DataStruct { + fn clone(&self) -> Self { + DataStruct { + struct_token: self.struct_token.clone(), + fields: self.fields.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for DataUnion { + fn clone(&self) -> Self { + DataUnion { + union_token: self.union_token.clone(), + fields: self.fields.clone(), + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for DeriveInput { + fn clone(&self) -> Self { + DeriveInput { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + data: self.data.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Expr { + fn clone(&self) -> Self { + match self { + #[cfg(feature = "full")] + Expr::Array(v0) => Expr::Array(v0.clone()), + #[cfg(feature = "full")] + Expr::Assign(v0) => Expr::Assign(v0.clone()), + #[cfg(feature = "full")] + Expr::Async(v0) => Expr::Async(v0.clone()), + #[cfg(feature = "full")] + Expr::Await(v0) => Expr::Await(v0.clone()), + Expr::Binary(v0) => Expr::Binary(v0.clone()), + #[cfg(feature = "full")] + Expr::Block(v0) => Expr::Block(v0.clone()), + #[cfg(feature = "full")] + Expr::Break(v0) => Expr::Break(v0.clone()), + Expr::Call(v0) => Expr::Call(v0.clone()), + Expr::Cast(v0) => Expr::Cast(v0.clone()), + #[cfg(feature = "full")] + Expr::Closure(v0) => Expr::Closure(v0.clone()), + #[cfg(feature = "full")] + Expr::Const(v0) => Expr::Const(v0.clone()), + #[cfg(feature = "full")] + Expr::Continue(v0) => Expr::Continue(v0.clone()), + Expr::Field(v0) => Expr::Field(v0.clone()), + #[cfg(feature = "full")] + Expr::ForLoop(v0) => Expr::ForLoop(v0.clone()), + Expr::Group(v0) => Expr::Group(v0.clone()), + #[cfg(feature = "full")] + Expr::If(v0) => Expr::If(v0.clone()), + Expr::Index(v0) => Expr::Index(v0.clone()), + #[cfg(feature = "full")] + Expr::Infer(v0) => Expr::Infer(v0.clone()), + #[cfg(feature = "full")] + Expr::Let(v0) => Expr::Let(v0.clone()), + Expr::Lit(v0) => Expr::Lit(v0.clone()), + #[cfg(feature = "full")] + Expr::Loop(v0) => Expr::Loop(v0.clone()), + Expr::Macro(v0) => Expr::Macro(v0.clone()), + #[cfg(feature = "full")] + Expr::Match(v0) => Expr::Match(v0.clone()), + Expr::MethodCall(v0) => Expr::MethodCall(v0.clone()), + Expr::Paren(v0) => Expr::Paren(v0.clone()), + Expr::Path(v0) => Expr::Path(v0.clone()), + #[cfg(feature = "full")] + Expr::Range(v0) => Expr::Range(v0.clone()), + Expr::Reference(v0) => Expr::Reference(v0.clone()), + #[cfg(feature = "full")] + Expr::Repeat(v0) => Expr::Repeat(v0.clone()), + #[cfg(feature = "full")] + Expr::Return(v0) => Expr::Return(v0.clone()), + Expr::Struct(v0) => Expr::Struct(v0.clone()), + #[cfg(feature = "full")] + Expr::Try(v0) => Expr::Try(v0.clone()), + #[cfg(feature = "full")] + Expr::TryBlock(v0) => Expr::TryBlock(v0.clone()), + #[cfg(feature = "full")] + Expr::Tuple(v0) => Expr::Tuple(v0.clone()), + Expr::Unary(v0) => Expr::Unary(v0.clone()), + #[cfg(feature = "full")] + Expr::Unsafe(v0) => Expr::Unsafe(v0.clone()), + Expr::Verbatim(v0) => Expr::Verbatim(v0.clone()), + #[cfg(feature = "full")] + Expr::While(v0) => Expr::While(v0.clone()), + #[cfg(feature = "full")] + Expr::Yield(v0) => Expr::Yield(v0.clone()), + #[cfg(not(feature = "full"))] + _ => unreachable!(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprArray { + fn clone(&self) -> Self { + ExprArray { + attrs: self.attrs.clone(), + bracket_token: self.bracket_token.clone(), + elems: self.elems.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprAssign { + fn clone(&self) -> Self { + ExprAssign { + attrs: self.attrs.clone(), + left: self.left.clone(), + eq_token: self.eq_token.clone(), + right: self.right.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprAsync { + fn clone(&self) -> Self { + ExprAsync { + attrs: self.attrs.clone(), + async_token: self.async_token.clone(), + capture: self.capture.clone(), + block: self.block.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprAwait { + fn clone(&self) -> Self { + ExprAwait { + attrs: self.attrs.clone(), + base: self.base.clone(), + dot_token: self.dot_token.clone(), + await_token: self.await_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprBinary { + fn clone(&self) -> Self { + ExprBinary { + attrs: self.attrs.clone(), + left: self.left.clone(), + op: self.op.clone(), + right: self.right.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprBlock { + fn clone(&self) -> Self { + ExprBlock { + attrs: self.attrs.clone(), + label: self.label.clone(), + block: self.block.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprBreak { + fn clone(&self) -> Self { + ExprBreak { + attrs: self.attrs.clone(), + break_token: self.break_token.clone(), + label: self.label.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprCall { + fn clone(&self) -> Self { + ExprCall { + attrs: self.attrs.clone(), + func: self.func.clone(), + paren_token: self.paren_token.clone(), + args: self.args.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprCast { + fn clone(&self) -> Self { + ExprCast { + attrs: self.attrs.clone(), + expr: self.expr.clone(), + as_token: self.as_token.clone(), + ty: self.ty.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprClosure { + fn clone(&self) -> Self { + ExprClosure { + attrs: self.attrs.clone(), + lifetimes: self.lifetimes.clone(), + constness: self.constness.clone(), + movability: self.movability.clone(), + asyncness: self.asyncness.clone(), + capture: self.capture.clone(), + or1_token: self.or1_token.clone(), + inputs: self.inputs.clone(), + or2_token: self.or2_token.clone(), + output: self.output.clone(), + body: self.body.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprConst { + fn clone(&self) -> Self { + ExprConst { + attrs: self.attrs.clone(), + const_token: self.const_token.clone(), + block: self.block.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprContinue { + fn clone(&self) -> Self { + ExprContinue { + attrs: self.attrs.clone(), + continue_token: self.continue_token.clone(), + label: self.label.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprField { + fn clone(&self) -> Self { + ExprField { + attrs: self.attrs.clone(), + base: self.base.clone(), + dot_token: self.dot_token.clone(), + member: self.member.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprForLoop { + fn clone(&self) -> Self { + ExprForLoop { + attrs: self.attrs.clone(), + label: self.label.clone(), + for_token: self.for_token.clone(), + pat: self.pat.clone(), + in_token: self.in_token.clone(), + expr: self.expr.clone(), + body: self.body.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprGroup { + fn clone(&self) -> Self { + ExprGroup { + attrs: self.attrs.clone(), + group_token: self.group_token.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprIf { + fn clone(&self) -> Self { + ExprIf { + attrs: self.attrs.clone(), + if_token: self.if_token.clone(), + cond: self.cond.clone(), + then_branch: self.then_branch.clone(), + else_branch: self.else_branch.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprIndex { + fn clone(&self) -> Self { + ExprIndex { + attrs: self.attrs.clone(), + expr: self.expr.clone(), + bracket_token: self.bracket_token.clone(), + index: self.index.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprInfer { + fn clone(&self) -> Self { + ExprInfer { + attrs: self.attrs.clone(), + underscore_token: self.underscore_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprLet { + fn clone(&self) -> Self { + ExprLet { + attrs: self.attrs.clone(), + let_token: self.let_token.clone(), + pat: self.pat.clone(), + eq_token: self.eq_token.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprLit { + fn clone(&self) -> Self { + ExprLit { + attrs: self.attrs.clone(), + lit: self.lit.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprLoop { + fn clone(&self) -> Self { + ExprLoop { + attrs: self.attrs.clone(), + label: self.label.clone(), + loop_token: self.loop_token.clone(), + body: self.body.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprMacro { + fn clone(&self) -> Self { + ExprMacro { + attrs: self.attrs.clone(), + mac: self.mac.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprMatch { + fn clone(&self) -> Self { + ExprMatch { + attrs: self.attrs.clone(), + match_token: self.match_token.clone(), + expr: self.expr.clone(), + brace_token: self.brace_token.clone(), + arms: self.arms.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprMethodCall { + fn clone(&self) -> Self { + ExprMethodCall { + attrs: self.attrs.clone(), + receiver: self.receiver.clone(), + dot_token: self.dot_token.clone(), + method: self.method.clone(), + turbofish: self.turbofish.clone(), + paren_token: self.paren_token.clone(), + args: self.args.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprParen { + fn clone(&self) -> Self { + ExprParen { + attrs: self.attrs.clone(), + paren_token: self.paren_token.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprPath { + fn clone(&self) -> Self { + ExprPath { + attrs: self.attrs.clone(), + qself: self.qself.clone(), + path: self.path.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprRange { + fn clone(&self) -> Self { + ExprRange { + attrs: self.attrs.clone(), + start: self.start.clone(), + limits: self.limits.clone(), + end: self.end.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprReference { + fn clone(&self) -> Self { + ExprReference { + attrs: self.attrs.clone(), + and_token: self.and_token.clone(), + mutability: self.mutability.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprRepeat { + fn clone(&self) -> Self { + ExprRepeat { + attrs: self.attrs.clone(), + bracket_token: self.bracket_token.clone(), + expr: self.expr.clone(), + semi_token: self.semi_token.clone(), + len: self.len.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprReturn { + fn clone(&self) -> Self { + ExprReturn { + attrs: self.attrs.clone(), + return_token: self.return_token.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprStruct { + fn clone(&self) -> Self { + ExprStruct { + attrs: self.attrs.clone(), + qself: self.qself.clone(), + path: self.path.clone(), + brace_token: self.brace_token.clone(), + fields: self.fields.clone(), + dot2_token: self.dot2_token.clone(), + rest: self.rest.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprTry { + fn clone(&self) -> Self { + ExprTry { + attrs: self.attrs.clone(), + expr: self.expr.clone(), + question_token: self.question_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprTryBlock { + fn clone(&self) -> Self { + ExprTryBlock { + attrs: self.attrs.clone(), + try_token: self.try_token.clone(), + block: self.block.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprTuple { + fn clone(&self) -> Self { + ExprTuple { + attrs: self.attrs.clone(), + paren_token: self.paren_token.clone(), + elems: self.elems.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprUnary { + fn clone(&self) -> Self { + ExprUnary { + attrs: self.attrs.clone(), + op: self.op.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprUnsafe { + fn clone(&self) -> Self { + ExprUnsafe { + attrs: self.attrs.clone(), + unsafe_token: self.unsafe_token.clone(), + block: self.block.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprWhile { + fn clone(&self) -> Self { + ExprWhile { + attrs: self.attrs.clone(), + label: self.label.clone(), + while_token: self.while_token.clone(), + cond: self.cond.clone(), + body: self.body.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ExprYield { + fn clone(&self) -> Self { + ExprYield { + attrs: self.attrs.clone(), + yield_token: self.yield_token.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Field { + fn clone(&self) -> Self { + Field { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + mutability: self.mutability.clone(), + ident: self.ident.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for FieldMutability { + fn clone(&self) -> Self { + match self { + FieldMutability::None => FieldMutability::None, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for FieldPat { + fn clone(&self) -> Self { + FieldPat { + attrs: self.attrs.clone(), + member: self.member.clone(), + colon_token: self.colon_token.clone(), + pat: self.pat.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for FieldValue { + fn clone(&self) -> Self { + FieldValue { + attrs: self.attrs.clone(), + member: self.member.clone(), + colon_token: self.colon_token.clone(), + expr: self.expr.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Fields { + fn clone(&self) -> Self { + match self { + Fields::Named(v0) => Fields::Named(v0.clone()), + Fields::Unnamed(v0) => Fields::Unnamed(v0.clone()), + Fields::Unit => Fields::Unit, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for FieldsNamed { + fn clone(&self) -> Self { + FieldsNamed { + brace_token: self.brace_token.clone(), + named: self.named.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for FieldsUnnamed { + fn clone(&self) -> Self { + FieldsUnnamed { + paren_token: self.paren_token.clone(), + unnamed: self.unnamed.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for File { + fn clone(&self) -> Self { + File { + shebang: self.shebang.clone(), + attrs: self.attrs.clone(), + items: self.items.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for FnArg { + fn clone(&self) -> Self { + match self { + FnArg::Receiver(v0) => FnArg::Receiver(v0.clone()), + FnArg::Typed(v0) => FnArg::Typed(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ForeignItem { + fn clone(&self) -> Self { + match self { + ForeignItem::Fn(v0) => ForeignItem::Fn(v0.clone()), + ForeignItem::Static(v0) => ForeignItem::Static(v0.clone()), + ForeignItem::Type(v0) => ForeignItem::Type(v0.clone()), + ForeignItem::Macro(v0) => ForeignItem::Macro(v0.clone()), + ForeignItem::Verbatim(v0) => ForeignItem::Verbatim(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ForeignItemFn { + fn clone(&self) -> Self { + ForeignItemFn { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + sig: self.sig.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ForeignItemMacro { + fn clone(&self) -> Self { + ForeignItemMacro { + attrs: self.attrs.clone(), + mac: self.mac.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ForeignItemStatic { + fn clone(&self) -> Self { + ForeignItemStatic { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + static_token: self.static_token.clone(), + mutability: self.mutability.clone(), + ident: self.ident.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ForeignItemType { + fn clone(&self) -> Self { + ForeignItemType { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + type_token: self.type_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for GenericArgument { + fn clone(&self) -> Self { + match self { + GenericArgument::Lifetime(v0) => GenericArgument::Lifetime(v0.clone()), + GenericArgument::Type(v0) => GenericArgument::Type(v0.clone()), + GenericArgument::Const(v0) => GenericArgument::Const(v0.clone()), + GenericArgument::AssocType(v0) => GenericArgument::AssocType(v0.clone()), + GenericArgument::AssocConst(v0) => GenericArgument::AssocConst(v0.clone()), + GenericArgument::Constraint(v0) => GenericArgument::Constraint(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for GenericParam { + fn clone(&self) -> Self { + match self { + GenericParam::Lifetime(v0) => GenericParam::Lifetime(v0.clone()), + GenericParam::Type(v0) => GenericParam::Type(v0.clone()), + GenericParam::Const(v0) => GenericParam::Const(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Generics { + fn clone(&self) -> Self { + Generics { + lt_token: self.lt_token.clone(), + params: self.params.clone(), + gt_token: self.gt_token.clone(), + where_clause: self.where_clause.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ImplItem { + fn clone(&self) -> Self { + match self { + ImplItem::Const(v0) => ImplItem::Const(v0.clone()), + ImplItem::Fn(v0) => ImplItem::Fn(v0.clone()), + ImplItem::Type(v0) => ImplItem::Type(v0.clone()), + ImplItem::Macro(v0) => ImplItem::Macro(v0.clone()), + ImplItem::Verbatim(v0) => ImplItem::Verbatim(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ImplItemConst { + fn clone(&self) -> Self { + ImplItemConst { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + defaultness: self.defaultness.clone(), + const_token: self.const_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + eq_token: self.eq_token.clone(), + expr: self.expr.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ImplItemFn { + fn clone(&self) -> Self { + ImplItemFn { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + defaultness: self.defaultness.clone(), + sig: self.sig.clone(), + block: self.block.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ImplItemMacro { + fn clone(&self) -> Self { + ImplItemMacro { + attrs: self.attrs.clone(), + mac: self.mac.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ImplItemType { + fn clone(&self) -> Self { + ImplItemType { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + defaultness: self.defaultness.clone(), + type_token: self.type_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + eq_token: self.eq_token.clone(), + ty: self.ty.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ImplRestriction { + fn clone(&self) -> Self { + match *self {} + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Index { + fn clone(&self) -> Self { + Index { + index: self.index.clone(), + span: self.span.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Item { + fn clone(&self) -> Self { + match self { + Item::Const(v0) => Item::Const(v0.clone()), + Item::Enum(v0) => Item::Enum(v0.clone()), + Item::ExternCrate(v0) => Item::ExternCrate(v0.clone()), + Item::Fn(v0) => Item::Fn(v0.clone()), + Item::ForeignMod(v0) => Item::ForeignMod(v0.clone()), + Item::Impl(v0) => Item::Impl(v0.clone()), + Item::Macro(v0) => Item::Macro(v0.clone()), + Item::Mod(v0) => Item::Mod(v0.clone()), + Item::Static(v0) => Item::Static(v0.clone()), + Item::Struct(v0) => Item::Struct(v0.clone()), + Item::Trait(v0) => Item::Trait(v0.clone()), + Item::TraitAlias(v0) => Item::TraitAlias(v0.clone()), + Item::Type(v0) => Item::Type(v0.clone()), + Item::Union(v0) => Item::Union(v0.clone()), + Item::Use(v0) => Item::Use(v0.clone()), + Item::Verbatim(v0) => Item::Verbatim(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemConst { + fn clone(&self) -> Self { + ItemConst { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + const_token: self.const_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + eq_token: self.eq_token.clone(), + expr: self.expr.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemEnum { + fn clone(&self) -> Self { + ItemEnum { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + enum_token: self.enum_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + brace_token: self.brace_token.clone(), + variants: self.variants.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemExternCrate { + fn clone(&self) -> Self { + ItemExternCrate { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + extern_token: self.extern_token.clone(), + crate_token: self.crate_token.clone(), + ident: self.ident.clone(), + rename: self.rename.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemFn { + fn clone(&self) -> Self { + ItemFn { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + sig: self.sig.clone(), + block: self.block.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemForeignMod { + fn clone(&self) -> Self { + ItemForeignMod { + attrs: self.attrs.clone(), + unsafety: self.unsafety.clone(), + abi: self.abi.clone(), + brace_token: self.brace_token.clone(), + items: self.items.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemImpl { + fn clone(&self) -> Self { + ItemImpl { + attrs: self.attrs.clone(), + defaultness: self.defaultness.clone(), + unsafety: self.unsafety.clone(), + impl_token: self.impl_token.clone(), + generics: self.generics.clone(), + trait_: self.trait_.clone(), + self_ty: self.self_ty.clone(), + brace_token: self.brace_token.clone(), + items: self.items.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemMacro { + fn clone(&self) -> Self { + ItemMacro { + attrs: self.attrs.clone(), + ident: self.ident.clone(), + mac: self.mac.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemMod { + fn clone(&self) -> Self { + ItemMod { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + unsafety: self.unsafety.clone(), + mod_token: self.mod_token.clone(), + ident: self.ident.clone(), + content: self.content.clone(), + semi: self.semi.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemStatic { + fn clone(&self) -> Self { + ItemStatic { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + static_token: self.static_token.clone(), + mutability: self.mutability.clone(), + ident: self.ident.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + eq_token: self.eq_token.clone(), + expr: self.expr.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemStruct { + fn clone(&self) -> Self { + ItemStruct { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + struct_token: self.struct_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + fields: self.fields.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemTrait { + fn clone(&self) -> Self { + ItemTrait { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + unsafety: self.unsafety.clone(), + auto_token: self.auto_token.clone(), + restriction: self.restriction.clone(), + trait_token: self.trait_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + colon_token: self.colon_token.clone(), + supertraits: self.supertraits.clone(), + brace_token: self.brace_token.clone(), + items: self.items.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemTraitAlias { + fn clone(&self) -> Self { + ItemTraitAlias { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + trait_token: self.trait_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + eq_token: self.eq_token.clone(), + bounds: self.bounds.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemType { + fn clone(&self) -> Self { + ItemType { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + type_token: self.type_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + eq_token: self.eq_token.clone(), + ty: self.ty.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemUnion { + fn clone(&self) -> Self { + ItemUnion { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + union_token: self.union_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + fields: self.fields.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ItemUse { + fn clone(&self) -> Self { + ItemUse { + attrs: self.attrs.clone(), + vis: self.vis.clone(), + use_token: self.use_token.clone(), + leading_colon: self.leading_colon.clone(), + tree: self.tree.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Label { + fn clone(&self) -> Self { + Label { + name: self.name.clone(), + colon_token: self.colon_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for LifetimeParam { + fn clone(&self) -> Self { + LifetimeParam { + attrs: self.attrs.clone(), + lifetime: self.lifetime.clone(), + colon_token: self.colon_token.clone(), + bounds: self.bounds.clone(), + } + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Lit { + fn clone(&self) -> Self { + match self { + Lit::Str(v0) => Lit::Str(v0.clone()), + Lit::ByteStr(v0) => Lit::ByteStr(v0.clone()), + Lit::Byte(v0) => Lit::Byte(v0.clone()), + Lit::Char(v0) => Lit::Char(v0.clone()), + Lit::Int(v0) => Lit::Int(v0.clone()), + Lit::Float(v0) => Lit::Float(v0.clone()), + Lit::Bool(v0) => Lit::Bool(v0.clone()), + Lit::Verbatim(v0) => Lit::Verbatim(v0.clone()), + } + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for LitBool { + fn clone(&self) -> Self { + LitBool { + value: self.value.clone(), + span: self.span.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Local { + fn clone(&self) -> Self { + Local { + attrs: self.attrs.clone(), + let_token: self.let_token.clone(), + pat: self.pat.clone(), + init: self.init.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for LocalInit { + fn clone(&self) -> Self { + LocalInit { + eq_token: self.eq_token.clone(), + expr: self.expr.clone(), + diverge: self.diverge.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Macro { + fn clone(&self) -> Self { + Macro { + path: self.path.clone(), + bang_token: self.bang_token.clone(), + delimiter: self.delimiter.clone(), + tokens: self.tokens.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for MacroDelimiter { + fn clone(&self) -> Self { + match self { + MacroDelimiter::Paren(v0) => MacroDelimiter::Paren(v0.clone()), + MacroDelimiter::Brace(v0) => MacroDelimiter::Brace(v0.clone()), + MacroDelimiter::Bracket(v0) => MacroDelimiter::Bracket(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Member { + fn clone(&self) -> Self { + match self { + Member::Named(v0) => Member::Named(v0.clone()), + Member::Unnamed(v0) => Member::Unnamed(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Meta { + fn clone(&self) -> Self { + match self { + Meta::Path(v0) => Meta::Path(v0.clone()), + Meta::List(v0) => Meta::List(v0.clone()), + Meta::NameValue(v0) => Meta::NameValue(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for MetaList { + fn clone(&self) -> Self { + MetaList { + path: self.path.clone(), + delimiter: self.delimiter.clone(), + tokens: self.tokens.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for MetaNameValue { + fn clone(&self) -> Self { + MetaNameValue { + path: self.path.clone(), + eq_token: self.eq_token.clone(), + value: self.value.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ParenthesizedGenericArguments { + fn clone(&self) -> Self { + ParenthesizedGenericArguments { + paren_token: self.paren_token.clone(), + inputs: self.inputs.clone(), + output: self.output.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Pat { + fn clone(&self) -> Self { + match self { + Pat::Const(v0) => Pat::Const(v0.clone()), + Pat::Ident(v0) => Pat::Ident(v0.clone()), + Pat::Lit(v0) => Pat::Lit(v0.clone()), + Pat::Macro(v0) => Pat::Macro(v0.clone()), + Pat::Or(v0) => Pat::Or(v0.clone()), + Pat::Paren(v0) => Pat::Paren(v0.clone()), + Pat::Path(v0) => Pat::Path(v0.clone()), + Pat::Range(v0) => Pat::Range(v0.clone()), + Pat::Reference(v0) => Pat::Reference(v0.clone()), + Pat::Rest(v0) => Pat::Rest(v0.clone()), + Pat::Slice(v0) => Pat::Slice(v0.clone()), + Pat::Struct(v0) => Pat::Struct(v0.clone()), + Pat::Tuple(v0) => Pat::Tuple(v0.clone()), + Pat::TupleStruct(v0) => Pat::TupleStruct(v0.clone()), + Pat::Type(v0) => Pat::Type(v0.clone()), + Pat::Verbatim(v0) => Pat::Verbatim(v0.clone()), + Pat::Wild(v0) => Pat::Wild(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatIdent { + fn clone(&self) -> Self { + PatIdent { + attrs: self.attrs.clone(), + by_ref: self.by_ref.clone(), + mutability: self.mutability.clone(), + ident: self.ident.clone(), + subpat: self.subpat.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatOr { + fn clone(&self) -> Self { + PatOr { + attrs: self.attrs.clone(), + leading_vert: self.leading_vert.clone(), + cases: self.cases.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatParen { + fn clone(&self) -> Self { + PatParen { + attrs: self.attrs.clone(), + paren_token: self.paren_token.clone(), + pat: self.pat.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatReference { + fn clone(&self) -> Self { + PatReference { + attrs: self.attrs.clone(), + and_token: self.and_token.clone(), + mutability: self.mutability.clone(), + pat: self.pat.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatRest { + fn clone(&self) -> Self { + PatRest { + attrs: self.attrs.clone(), + dot2_token: self.dot2_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatSlice { + fn clone(&self) -> Self { + PatSlice { + attrs: self.attrs.clone(), + bracket_token: self.bracket_token.clone(), + elems: self.elems.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatStruct { + fn clone(&self) -> Self { + PatStruct { + attrs: self.attrs.clone(), + qself: self.qself.clone(), + path: self.path.clone(), + brace_token: self.brace_token.clone(), + fields: self.fields.clone(), + rest: self.rest.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatTuple { + fn clone(&self) -> Self { + PatTuple { + attrs: self.attrs.clone(), + paren_token: self.paren_token.clone(), + elems: self.elems.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatTupleStruct { + fn clone(&self) -> Self { + PatTupleStruct { + attrs: self.attrs.clone(), + qself: self.qself.clone(), + path: self.path.clone(), + paren_token: self.paren_token.clone(), + elems: self.elems.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatType { + fn clone(&self) -> Self { + PatType { + attrs: self.attrs.clone(), + pat: self.pat.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PatWild { + fn clone(&self) -> Self { + PatWild { + attrs: self.attrs.clone(), + underscore_token: self.underscore_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Path { + fn clone(&self) -> Self { + Path { + leading_colon: self.leading_colon.clone(), + segments: self.segments.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PathArguments { + fn clone(&self) -> Self { + match self { + PathArguments::None => PathArguments::None, + PathArguments::AngleBracketed(v0) => { + PathArguments::AngleBracketed(v0.clone()) + } + PathArguments::Parenthesized(v0) => PathArguments::Parenthesized(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PathSegment { + fn clone(&self) -> Self { + PathSegment { + ident: self.ident.clone(), + arguments: self.arguments.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PredicateLifetime { + fn clone(&self) -> Self { + PredicateLifetime { + lifetime: self.lifetime.clone(), + colon_token: self.colon_token.clone(), + bounds: self.bounds.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for PredicateType { + fn clone(&self) -> Self { + PredicateType { + lifetimes: self.lifetimes.clone(), + bounded_ty: self.bounded_ty.clone(), + colon_token: self.colon_token.clone(), + bounds: self.bounds.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for QSelf { + fn clone(&self) -> Self { + QSelf { + lt_token: self.lt_token.clone(), + ty: self.ty.clone(), + position: self.position.clone(), + as_token: self.as_token.clone(), + gt_token: self.gt_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Copy for RangeLimits {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for RangeLimits { + fn clone(&self) -> Self { + *self + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Receiver { + fn clone(&self) -> Self { + Receiver { + attrs: self.attrs.clone(), + reference: self.reference.clone(), + mutability: self.mutability.clone(), + self_token: self.self_token.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for ReturnType { + fn clone(&self) -> Self { + match self { + ReturnType::Default => ReturnType::Default, + ReturnType::Type(v0, v1) => ReturnType::Type(v0.clone(), v1.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Signature { + fn clone(&self) -> Self { + Signature { + constness: self.constness.clone(), + asyncness: self.asyncness.clone(), + unsafety: self.unsafety.clone(), + abi: self.abi.clone(), + fn_token: self.fn_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + paren_token: self.paren_token.clone(), + inputs: self.inputs.clone(), + variadic: self.variadic.clone(), + output: self.output.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for StaticMutability { + fn clone(&self) -> Self { + match self { + StaticMutability::Mut(v0) => StaticMutability::Mut(v0.clone()), + StaticMutability::None => StaticMutability::None, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Stmt { + fn clone(&self) -> Self { + match self { + Stmt::Local(v0) => Stmt::Local(v0.clone()), + Stmt::Item(v0) => Stmt::Item(v0.clone()), + Stmt::Expr(v0, v1) => Stmt::Expr(v0.clone(), v1.clone()), + Stmt::Macro(v0) => Stmt::Macro(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for StmtMacro { + fn clone(&self) -> Self { + StmtMacro { + attrs: self.attrs.clone(), + mac: self.mac.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TraitBound { + fn clone(&self) -> Self { + TraitBound { + paren_token: self.paren_token.clone(), + modifier: self.modifier.clone(), + lifetimes: self.lifetimes.clone(), + path: self.path.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Copy for TraitBoundModifier {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TraitBoundModifier { + fn clone(&self) -> Self { + *self + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TraitItem { + fn clone(&self) -> Self { + match self { + TraitItem::Const(v0) => TraitItem::Const(v0.clone()), + TraitItem::Fn(v0) => TraitItem::Fn(v0.clone()), + TraitItem::Type(v0) => TraitItem::Type(v0.clone()), + TraitItem::Macro(v0) => TraitItem::Macro(v0.clone()), + TraitItem::Verbatim(v0) => TraitItem::Verbatim(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TraitItemConst { + fn clone(&self) -> Self { + TraitItemConst { + attrs: self.attrs.clone(), + const_token: self.const_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + colon_token: self.colon_token.clone(), + ty: self.ty.clone(), + default: self.default.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TraitItemFn { + fn clone(&self) -> Self { + TraitItemFn { + attrs: self.attrs.clone(), + sig: self.sig.clone(), + default: self.default.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TraitItemMacro { + fn clone(&self) -> Self { + TraitItemMacro { + attrs: self.attrs.clone(), + mac: self.mac.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TraitItemType { + fn clone(&self) -> Self { + TraitItemType { + attrs: self.attrs.clone(), + type_token: self.type_token.clone(), + ident: self.ident.clone(), + generics: self.generics.clone(), + colon_token: self.colon_token.clone(), + bounds: self.bounds.clone(), + default: self.default.clone(), + semi_token: self.semi_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Type { + fn clone(&self) -> Self { + match self { + Type::Array(v0) => Type::Array(v0.clone()), + Type::BareFn(v0) => Type::BareFn(v0.clone()), + Type::Group(v0) => Type::Group(v0.clone()), + Type::ImplTrait(v0) => Type::ImplTrait(v0.clone()), + Type::Infer(v0) => Type::Infer(v0.clone()), + Type::Macro(v0) => Type::Macro(v0.clone()), + Type::Never(v0) => Type::Never(v0.clone()), + Type::Paren(v0) => Type::Paren(v0.clone()), + Type::Path(v0) => Type::Path(v0.clone()), + Type::Ptr(v0) => Type::Ptr(v0.clone()), + Type::Reference(v0) => Type::Reference(v0.clone()), + Type::Slice(v0) => Type::Slice(v0.clone()), + Type::TraitObject(v0) => Type::TraitObject(v0.clone()), + Type::Tuple(v0) => Type::Tuple(v0.clone()), + Type::Verbatim(v0) => Type::Verbatim(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeArray { + fn clone(&self) -> Self { + TypeArray { + bracket_token: self.bracket_token.clone(), + elem: self.elem.clone(), + semi_token: self.semi_token.clone(), + len: self.len.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeBareFn { + fn clone(&self) -> Self { + TypeBareFn { + lifetimes: self.lifetimes.clone(), + unsafety: self.unsafety.clone(), + abi: self.abi.clone(), + fn_token: self.fn_token.clone(), + paren_token: self.paren_token.clone(), + inputs: self.inputs.clone(), + variadic: self.variadic.clone(), + output: self.output.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeGroup { + fn clone(&self) -> Self { + TypeGroup { + group_token: self.group_token.clone(), + elem: self.elem.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeImplTrait { + fn clone(&self) -> Self { + TypeImplTrait { + impl_token: self.impl_token.clone(), + bounds: self.bounds.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeInfer { + fn clone(&self) -> Self { + TypeInfer { + underscore_token: self.underscore_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeMacro { + fn clone(&self) -> Self { + TypeMacro { mac: self.mac.clone() } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeNever { + fn clone(&self) -> Self { + TypeNever { + bang_token: self.bang_token.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeParam { + fn clone(&self) -> Self { + TypeParam { + attrs: self.attrs.clone(), + ident: self.ident.clone(), + colon_token: self.colon_token.clone(), + bounds: self.bounds.clone(), + eq_token: self.eq_token.clone(), + default: self.default.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeParamBound { + fn clone(&self) -> Self { + match self { + TypeParamBound::Trait(v0) => TypeParamBound::Trait(v0.clone()), + TypeParamBound::Lifetime(v0) => TypeParamBound::Lifetime(v0.clone()), + TypeParamBound::Verbatim(v0) => TypeParamBound::Verbatim(v0.clone()), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeParen { + fn clone(&self) -> Self { + TypeParen { + paren_token: self.paren_token.clone(), + elem: self.elem.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypePath { + fn clone(&self) -> Self { + TypePath { + qself: self.qself.clone(), + path: self.path.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypePtr { + fn clone(&self) -> Self { + TypePtr { + star_token: self.star_token.clone(), + const_token: self.const_token.clone(), + mutability: self.mutability.clone(), + elem: self.elem.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeReference { + fn clone(&self) -> Self { + TypeReference { + and_token: self.and_token.clone(), + lifetime: self.lifetime.clone(), + mutability: self.mutability.clone(), + elem: self.elem.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeSlice { + fn clone(&self) -> Self { + TypeSlice { + bracket_token: self.bracket_token.clone(), + elem: self.elem.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeTraitObject { + fn clone(&self) -> Self { + TypeTraitObject { + dyn_token: self.dyn_token.clone(), + bounds: self.bounds.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for TypeTuple { + fn clone(&self) -> Self { + TypeTuple { + paren_token: self.paren_token.clone(), + elems: self.elems.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Copy for UnOp {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for UnOp { + fn clone(&self) -> Self { + *self + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for UseGlob { + fn clone(&self) -> Self { + UseGlob { + star_token: self.star_token.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for UseGroup { + fn clone(&self) -> Self { + UseGroup { + brace_token: self.brace_token.clone(), + items: self.items.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for UseName { + fn clone(&self) -> Self { + UseName { + ident: self.ident.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for UsePath { + fn clone(&self) -> Self { + UsePath { + ident: self.ident.clone(), + colon2_token: self.colon2_token.clone(), + tree: self.tree.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for UseRename { + fn clone(&self) -> Self { + UseRename { + ident: self.ident.clone(), + as_token: self.as_token.clone(), + rename: self.rename.clone(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for UseTree { + fn clone(&self) -> Self { + match self { + UseTree::Path(v0) => UseTree::Path(v0.clone()), + UseTree::Name(v0) => UseTree::Name(v0.clone()), + UseTree::Rename(v0) => UseTree::Rename(v0.clone()), + UseTree::Glob(v0) => UseTree::Glob(v0.clone()), + UseTree::Group(v0) => UseTree::Group(v0.clone()), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Variadic { + fn clone(&self) -> Self { + Variadic { + attrs: self.attrs.clone(), + pat: self.pat.clone(), + dots: self.dots.clone(), + comma: self.comma.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Variant { + fn clone(&self) -> Self { + Variant { + attrs: self.attrs.clone(), + ident: self.ident.clone(), + fields: self.fields.clone(), + discriminant: self.discriminant.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for VisRestricted { + fn clone(&self) -> Self { + VisRestricted { + pub_token: self.pub_token.clone(), + paren_token: self.paren_token.clone(), + in_token: self.in_token.clone(), + path: self.path.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Visibility { + fn clone(&self) -> Self { + match self { + Visibility::Public(v0) => Visibility::Public(v0.clone()), + Visibility::Restricted(v0) => Visibility::Restricted(v0.clone()), + Visibility::Inherited => Visibility::Inherited, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for WhereClause { + fn clone(&self) -> Self { + WhereClause { + where_token: self.where_token.clone(), + predicates: self.predicates.clone(), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for WherePredicate { + fn clone(&self) -> Self { + match self { + WherePredicate::Lifetime(v0) => WherePredicate::Lifetime(v0.clone()), + WherePredicate::Type(v0) => WherePredicate::Type(v0.clone()), + } + } +} diff --git a/vendor/syn/src/gen/debug.rs b/vendor/syn/src/gen/debug.rs new file mode 100644 index 0000000..553497b --- /dev/null +++ b/vendor/syn/src/gen/debug.rs @@ -0,0 +1,3052 @@ +// This file is @generated by syn-internal-codegen. +// It is not intended for manual editing. + +use crate::*; +use std::fmt::{self, Debug}; +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Abi { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Abi"); + formatter.field("extern_token", &self.extern_token); + formatter.field("name", &self.name); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for AngleBracketedGenericArguments { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl AngleBracketedGenericArguments { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("colon2_token", &self.colon2_token); + formatter.field("lt_token", &self.lt_token); + formatter.field("args", &self.args); + formatter.field("gt_token", &self.gt_token); + formatter.finish() + } + } + self.debug(formatter, "AngleBracketedGenericArguments") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Arm { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Arm"); + formatter.field("attrs", &self.attrs); + formatter.field("pat", &self.pat); + formatter.field("guard", &self.guard); + formatter.field("fat_arrow_token", &self.fat_arrow_token); + formatter.field("body", &self.body); + formatter.field("comma", &self.comma); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for AssocConst { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("AssocConst"); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("value", &self.value); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for AssocType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("AssocType"); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("ty", &self.ty); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for AttrStyle { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("AttrStyle::")?; + match self { + AttrStyle::Outer => formatter.write_str("Outer"), + AttrStyle::Inner(v0) => { + let mut formatter = formatter.debug_tuple("Inner"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Attribute { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Attribute"); + formatter.field("pound_token", &self.pound_token); + formatter.field("style", &self.style); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("meta", &self.meta); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for BareFnArg { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("BareFnArg"); + formatter.field("attrs", &self.attrs); + formatter.field("name", &self.name); + formatter.field("ty", &self.ty); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for BareVariadic { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("BareVariadic"); + formatter.field("attrs", &self.attrs); + formatter.field("name", &self.name); + formatter.field("dots", &self.dots); + formatter.field("comma", &self.comma); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for BinOp { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("BinOp::")?; + match self { + BinOp::Add(v0) => { + let mut formatter = formatter.debug_tuple("Add"); + formatter.field(v0); + formatter.finish() + } + BinOp::Sub(v0) => { + let mut formatter = formatter.debug_tuple("Sub"); + formatter.field(v0); + formatter.finish() + } + BinOp::Mul(v0) => { + let mut formatter = formatter.debug_tuple("Mul"); + formatter.field(v0); + formatter.finish() + } + BinOp::Div(v0) => { + let mut formatter = formatter.debug_tuple("Div"); + formatter.field(v0); + formatter.finish() + } + BinOp::Rem(v0) => { + let mut formatter = formatter.debug_tuple("Rem"); + formatter.field(v0); + formatter.finish() + } + BinOp::And(v0) => { + let mut formatter = formatter.debug_tuple("And"); + formatter.field(v0); + formatter.finish() + } + BinOp::Or(v0) => { + let mut formatter = formatter.debug_tuple("Or"); + formatter.field(v0); + formatter.finish() + } + BinOp::BitXor(v0) => { + let mut formatter = formatter.debug_tuple("BitXor"); + formatter.field(v0); + formatter.finish() + } + BinOp::BitAnd(v0) => { + let mut formatter = formatter.debug_tuple("BitAnd"); + formatter.field(v0); + formatter.finish() + } + BinOp::BitOr(v0) => { + let mut formatter = formatter.debug_tuple("BitOr"); + formatter.field(v0); + formatter.finish() + } + BinOp::Shl(v0) => { + let mut formatter = formatter.debug_tuple("Shl"); + formatter.field(v0); + formatter.finish() + } + BinOp::Shr(v0) => { + let mut formatter = formatter.debug_tuple("Shr"); + formatter.field(v0); + formatter.finish() + } + BinOp::Eq(v0) => { + let mut formatter = formatter.debug_tuple("Eq"); + formatter.field(v0); + formatter.finish() + } + BinOp::Lt(v0) => { + let mut formatter = formatter.debug_tuple("Lt"); + formatter.field(v0); + formatter.finish() + } + BinOp::Le(v0) => { + let mut formatter = formatter.debug_tuple("Le"); + formatter.field(v0); + formatter.finish() + } + BinOp::Ne(v0) => { + let mut formatter = formatter.debug_tuple("Ne"); + formatter.field(v0); + formatter.finish() + } + BinOp::Ge(v0) => { + let mut formatter = formatter.debug_tuple("Ge"); + formatter.field(v0); + formatter.finish() + } + BinOp::Gt(v0) => { + let mut formatter = formatter.debug_tuple("Gt"); + formatter.field(v0); + formatter.finish() + } + BinOp::AddAssign(v0) => { + let mut formatter = formatter.debug_tuple("AddAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::SubAssign(v0) => { + let mut formatter = formatter.debug_tuple("SubAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::MulAssign(v0) => { + let mut formatter = formatter.debug_tuple("MulAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::DivAssign(v0) => { + let mut formatter = formatter.debug_tuple("DivAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::RemAssign(v0) => { + let mut formatter = formatter.debug_tuple("RemAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::BitXorAssign(v0) => { + let mut formatter = formatter.debug_tuple("BitXorAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::BitAndAssign(v0) => { + let mut formatter = formatter.debug_tuple("BitAndAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::BitOrAssign(v0) => { + let mut formatter = formatter.debug_tuple("BitOrAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::ShlAssign(v0) => { + let mut formatter = formatter.debug_tuple("ShlAssign"); + formatter.field(v0); + formatter.finish() + } + BinOp::ShrAssign(v0) => { + let mut formatter = formatter.debug_tuple("ShrAssign"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Block { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Block"); + formatter.field("brace_token", &self.brace_token); + formatter.field("stmts", &self.stmts); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for BoundLifetimes { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("BoundLifetimes"); + formatter.field("for_token", &self.for_token); + formatter.field("lt_token", &self.lt_token); + formatter.field("lifetimes", &self.lifetimes); + formatter.field("gt_token", &self.gt_token); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ConstParam { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("ConstParam"); + formatter.field("attrs", &self.attrs); + formatter.field("const_token", &self.const_token); + formatter.field("ident", &self.ident); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("eq_token", &self.eq_token); + formatter.field("default", &self.default); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Constraint { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Constraint"); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Data { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Data::")?; + match self { + Data::Struct(v0) => v0.debug(formatter, "Struct"), + Data::Enum(v0) => v0.debug(formatter, "Enum"), + Data::Union(v0) => v0.debug(formatter, "Union"), + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for DataEnum { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl DataEnum { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("enum_token", &self.enum_token); + formatter.field("brace_token", &self.brace_token); + formatter.field("variants", &self.variants); + formatter.finish() + } + } + self.debug(formatter, "DataEnum") + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for DataStruct { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl DataStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("struct_token", &self.struct_token); + formatter.field("fields", &self.fields); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "DataStruct") + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for DataUnion { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl DataUnion { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("union_token", &self.union_token); + formatter.field("fields", &self.fields); + formatter.finish() + } + } + self.debug(formatter, "DataUnion") + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for DeriveInput { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("DeriveInput"); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("data", &self.data); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Expr { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Expr::")?; + match self { + #[cfg(feature = "full")] + Expr::Array(v0) => v0.debug(formatter, "Array"), + #[cfg(feature = "full")] + Expr::Assign(v0) => v0.debug(formatter, "Assign"), + #[cfg(feature = "full")] + Expr::Async(v0) => v0.debug(formatter, "Async"), + #[cfg(feature = "full")] + Expr::Await(v0) => v0.debug(formatter, "Await"), + Expr::Binary(v0) => v0.debug(formatter, "Binary"), + #[cfg(feature = "full")] + Expr::Block(v0) => v0.debug(formatter, "Block"), + #[cfg(feature = "full")] + Expr::Break(v0) => v0.debug(formatter, "Break"), + Expr::Call(v0) => v0.debug(formatter, "Call"), + Expr::Cast(v0) => v0.debug(formatter, "Cast"), + #[cfg(feature = "full")] + Expr::Closure(v0) => v0.debug(formatter, "Closure"), + #[cfg(feature = "full")] + Expr::Const(v0) => v0.debug(formatter, "Const"), + #[cfg(feature = "full")] + Expr::Continue(v0) => v0.debug(formatter, "Continue"), + Expr::Field(v0) => v0.debug(formatter, "Field"), + #[cfg(feature = "full")] + Expr::ForLoop(v0) => v0.debug(formatter, "ForLoop"), + Expr::Group(v0) => v0.debug(formatter, "Group"), + #[cfg(feature = "full")] + Expr::If(v0) => v0.debug(formatter, "If"), + Expr::Index(v0) => v0.debug(formatter, "Index"), + #[cfg(feature = "full")] + Expr::Infer(v0) => v0.debug(formatter, "Infer"), + #[cfg(feature = "full")] + Expr::Let(v0) => v0.debug(formatter, "Let"), + Expr::Lit(v0) => v0.debug(formatter, "Lit"), + #[cfg(feature = "full")] + Expr::Loop(v0) => v0.debug(formatter, "Loop"), + Expr::Macro(v0) => v0.debug(formatter, "Macro"), + #[cfg(feature = "full")] + Expr::Match(v0) => v0.debug(formatter, "Match"), + Expr::MethodCall(v0) => v0.debug(formatter, "MethodCall"), + Expr::Paren(v0) => v0.debug(formatter, "Paren"), + Expr::Path(v0) => v0.debug(formatter, "Path"), + #[cfg(feature = "full")] + Expr::Range(v0) => v0.debug(formatter, "Range"), + Expr::Reference(v0) => v0.debug(formatter, "Reference"), + #[cfg(feature = "full")] + Expr::Repeat(v0) => v0.debug(formatter, "Repeat"), + #[cfg(feature = "full")] + Expr::Return(v0) => v0.debug(formatter, "Return"), + Expr::Struct(v0) => v0.debug(formatter, "Struct"), + #[cfg(feature = "full")] + Expr::Try(v0) => v0.debug(formatter, "Try"), + #[cfg(feature = "full")] + Expr::TryBlock(v0) => v0.debug(formatter, "TryBlock"), + #[cfg(feature = "full")] + Expr::Tuple(v0) => v0.debug(formatter, "Tuple"), + Expr::Unary(v0) => v0.debug(formatter, "Unary"), + #[cfg(feature = "full")] + Expr::Unsafe(v0) => v0.debug(formatter, "Unsafe"), + Expr::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + #[cfg(feature = "full")] + Expr::While(v0) => v0.debug(formatter, "While"), + #[cfg(feature = "full")] + Expr::Yield(v0) => v0.debug(formatter, "Yield"), + #[cfg(not(feature = "full"))] + _ => unreachable!(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprArray { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprArray { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elems", &self.elems); + formatter.finish() + } + } + self.debug(formatter, "ExprArray") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprAssign { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprAssign { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("left", &self.left); + formatter.field("eq_token", &self.eq_token); + formatter.field("right", &self.right); + formatter.finish() + } + } + self.debug(formatter, "ExprAssign") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprAsync { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprAsync { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("async_token", &self.async_token); + formatter.field("capture", &self.capture); + formatter.field("block", &self.block); + formatter.finish() + } + } + self.debug(formatter, "ExprAsync") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprAwait { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprAwait { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("base", &self.base); + formatter.field("dot_token", &self.dot_token); + formatter.field("await_token", &self.await_token); + formatter.finish() + } + } + self.debug(formatter, "ExprAwait") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprBinary { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprBinary { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("left", &self.left); + formatter.field("op", &self.op); + formatter.field("right", &self.right); + formatter.finish() + } + } + self.debug(formatter, "ExprBinary") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprBlock { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprBlock { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("block", &self.block); + formatter.finish() + } + } + self.debug(formatter, "ExprBlock") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprBreak { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprBreak { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("break_token", &self.break_token); + formatter.field("label", &self.label); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprBreak") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprCall { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprCall { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("func", &self.func); + formatter.field("paren_token", &self.paren_token); + formatter.field("args", &self.args); + formatter.finish() + } + } + self.debug(formatter, "ExprCall") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprCast { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprCast { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("expr", &self.expr); + formatter.field("as_token", &self.as_token); + formatter.field("ty", &self.ty); + formatter.finish() + } + } + self.debug(formatter, "ExprCast") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprClosure { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprClosure { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("lifetimes", &self.lifetimes); + formatter.field("constness", &self.constness); + formatter.field("movability", &self.movability); + formatter.field("asyncness", &self.asyncness); + formatter.field("capture", &self.capture); + formatter.field("or1_token", &self.or1_token); + formatter.field("inputs", &self.inputs); + formatter.field("or2_token", &self.or2_token); + formatter.field("output", &self.output); + formatter.field("body", &self.body); + formatter.finish() + } + } + self.debug(formatter, "ExprClosure") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprConst { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("const_token", &self.const_token); + formatter.field("block", &self.block); + formatter.finish() + } + } + self.debug(formatter, "ExprConst") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprContinue { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprContinue { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("continue_token", &self.continue_token); + formatter.field("label", &self.label); + formatter.finish() + } + } + self.debug(formatter, "ExprContinue") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprField { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprField { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("base", &self.base); + formatter.field("dot_token", &self.dot_token); + formatter.field("member", &self.member); + formatter.finish() + } + } + self.debug(formatter, "ExprField") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprForLoop { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprForLoop { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("for_token", &self.for_token); + formatter.field("pat", &self.pat); + formatter.field("in_token", &self.in_token); + formatter.field("expr", &self.expr); + formatter.field("body", &self.body); + formatter.finish() + } + } + self.debug(formatter, "ExprForLoop") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprGroup { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprGroup { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("group_token", &self.group_token); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprGroup") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprIf { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprIf { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("if_token", &self.if_token); + formatter.field("cond", &self.cond); + formatter.field("then_branch", &self.then_branch); + formatter.field("else_branch", &self.else_branch); + formatter.finish() + } + } + self.debug(formatter, "ExprIf") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprIndex { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprIndex { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("expr", &self.expr); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("index", &self.index); + formatter.finish() + } + } + self.debug(formatter, "ExprIndex") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprInfer { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprInfer { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("underscore_token", &self.underscore_token); + formatter.finish() + } + } + self.debug(formatter, "ExprInfer") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprLet { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprLet { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("let_token", &self.let_token); + formatter.field("pat", &self.pat); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprLet") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprLit { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprLit { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("lit", &self.lit); + formatter.finish() + } + } + self.debug(formatter, "ExprLit") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprLoop { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprLoop { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("loop_token", &self.loop_token); + formatter.field("body", &self.body); + formatter.finish() + } + } + self.debug(formatter, "ExprLoop") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprMacro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.finish() + } + } + self.debug(formatter, "ExprMacro") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprMatch { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprMatch { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("match_token", &self.match_token); + formatter.field("expr", &self.expr); + formatter.field("brace_token", &self.brace_token); + formatter.field("arms", &self.arms); + formatter.finish() + } + } + self.debug(formatter, "ExprMatch") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprMethodCall { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprMethodCall { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("receiver", &self.receiver); + formatter.field("dot_token", &self.dot_token); + formatter.field("method", &self.method); + formatter.field("turbofish", &self.turbofish); + formatter.field("paren_token", &self.paren_token); + formatter.field("args", &self.args); + formatter.finish() + } + } + self.debug(formatter, "ExprMethodCall") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprParen { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprParen { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprParen") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprPath { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprPath { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.finish() + } + } + self.debug(formatter, "ExprPath") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprRange { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprRange { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("start", &self.start); + formatter.field("limits", &self.limits); + formatter.field("end", &self.end); + formatter.finish() + } + } + self.debug(formatter, "ExprRange") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprReference { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprReference { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("and_token", &self.and_token); + formatter.field("mutability", &self.mutability); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprReference") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprRepeat { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprRepeat { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.field("len", &self.len); + formatter.finish() + } + } + self.debug(formatter, "ExprRepeat") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprReturn { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprReturn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("return_token", &self.return_token); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprReturn") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprStruct { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.field("brace_token", &self.brace_token); + formatter.field("fields", &self.fields); + formatter.field("dot2_token", &self.dot2_token); + formatter.field("rest", &self.rest); + formatter.finish() + } + } + self.debug(formatter, "ExprStruct") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprTry { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprTry { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("expr", &self.expr); + formatter.field("question_token", &self.question_token); + formatter.finish() + } + } + self.debug(formatter, "ExprTry") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprTryBlock { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprTryBlock { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("try_token", &self.try_token); + formatter.field("block", &self.block); + formatter.finish() + } + } + self.debug(formatter, "ExprTryBlock") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprTuple { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprTuple { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } + } + self.debug(formatter, "ExprTuple") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprUnary { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprUnary { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("op", &self.op); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprUnary") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprUnsafe { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprUnsafe { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("unsafe_token", &self.unsafe_token); + formatter.field("block", &self.block); + formatter.finish() + } + } + self.debug(formatter, "ExprUnsafe") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprWhile { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprWhile { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("while_token", &self.while_token); + formatter.field("cond", &self.cond); + formatter.field("body", &self.body); + formatter.finish() + } + } + self.debug(formatter, "ExprWhile") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ExprYield { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ExprYield { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("yield_token", &self.yield_token); + formatter.field("expr", &self.expr); + formatter.finish() + } + } + self.debug(formatter, "ExprYield") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Field { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Field"); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("mutability", &self.mutability); + formatter.field("ident", &self.ident); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for FieldMutability { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("FieldMutability::")?; + match self { + FieldMutability::None => formatter.write_str("None"), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for FieldPat { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("FieldPat"); + formatter.field("attrs", &self.attrs); + formatter.field("member", &self.member); + formatter.field("colon_token", &self.colon_token); + formatter.field("pat", &self.pat); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for FieldValue { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("FieldValue"); + formatter.field("attrs", &self.attrs); + formatter.field("member", &self.member); + formatter.field("colon_token", &self.colon_token); + formatter.field("expr", &self.expr); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Fields { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Fields::")?; + match self { + Fields::Named(v0) => v0.debug(formatter, "Named"), + Fields::Unnamed(v0) => v0.debug(formatter, "Unnamed"), + Fields::Unit => formatter.write_str("Unit"), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for FieldsNamed { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl FieldsNamed { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("brace_token", &self.brace_token); + formatter.field("named", &self.named); + formatter.finish() + } + } + self.debug(formatter, "FieldsNamed") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for FieldsUnnamed { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl FieldsUnnamed { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("unnamed", &self.unnamed); + formatter.finish() + } + } + self.debug(formatter, "FieldsUnnamed") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for File { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("File"); + formatter.field("shebang", &self.shebang); + formatter.field("attrs", &self.attrs); + formatter.field("items", &self.items); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for FnArg { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("FnArg::")?; + match self { + FnArg::Receiver(v0) => { + let mut formatter = formatter.debug_tuple("Receiver"); + formatter.field(v0); + formatter.finish() + } + FnArg::Typed(v0) => { + let mut formatter = formatter.debug_tuple("Typed"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ForeignItem { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("ForeignItem::")?; + match self { + ForeignItem::Fn(v0) => v0.debug(formatter, "Fn"), + ForeignItem::Static(v0) => v0.debug(formatter, "Static"), + ForeignItem::Type(v0) => v0.debug(formatter, "Type"), + ForeignItem::Macro(v0) => v0.debug(formatter, "Macro"), + ForeignItem::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ForeignItemFn { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ForeignItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("sig", &self.sig); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ForeignItemFn") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ForeignItemMacro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ForeignItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ForeignItemMacro") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ForeignItemStatic { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ForeignItemStatic { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("static_token", &self.static_token); + formatter.field("mutability", &self.mutability); + formatter.field("ident", &self.ident); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ForeignItemStatic") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ForeignItemType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ForeignItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ForeignItemType") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for GenericArgument { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("GenericArgument::")?; + match self { + GenericArgument::Lifetime(v0) => { + let mut formatter = formatter.debug_tuple("Lifetime"); + formatter.field(v0); + formatter.finish() + } + GenericArgument::Type(v0) => { + let mut formatter = formatter.debug_tuple("Type"); + formatter.field(v0); + formatter.finish() + } + GenericArgument::Const(v0) => { + let mut formatter = formatter.debug_tuple("Const"); + formatter.field(v0); + formatter.finish() + } + GenericArgument::AssocType(v0) => { + let mut formatter = formatter.debug_tuple("AssocType"); + formatter.field(v0); + formatter.finish() + } + GenericArgument::AssocConst(v0) => { + let mut formatter = formatter.debug_tuple("AssocConst"); + formatter.field(v0); + formatter.finish() + } + GenericArgument::Constraint(v0) => { + let mut formatter = formatter.debug_tuple("Constraint"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for GenericParam { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("GenericParam::")?; + match self { + GenericParam::Lifetime(v0) => { + let mut formatter = formatter.debug_tuple("Lifetime"); + formatter.field(v0); + formatter.finish() + } + GenericParam::Type(v0) => { + let mut formatter = formatter.debug_tuple("Type"); + formatter.field(v0); + formatter.finish() + } + GenericParam::Const(v0) => { + let mut formatter = formatter.debug_tuple("Const"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Generics { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Generics"); + formatter.field("lt_token", &self.lt_token); + formatter.field("params", &self.params); + formatter.field("gt_token", &self.gt_token); + formatter.field("where_clause", &self.where_clause); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ImplItem { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("ImplItem::")?; + match self { + ImplItem::Const(v0) => v0.debug(formatter, "Const"), + ImplItem::Fn(v0) => v0.debug(formatter, "Fn"), + ImplItem::Type(v0) => v0.debug(formatter, "Type"), + ImplItem::Macro(v0) => v0.debug(formatter, "Macro"), + ImplItem::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ImplItemConst { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ImplItemConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("defaultness", &self.defaultness); + formatter.field("const_token", &self.const_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ImplItemConst") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ImplItemFn { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ImplItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("defaultness", &self.defaultness); + formatter.field("sig", &self.sig); + formatter.field("block", &self.block); + formatter.finish() + } + } + self.debug(formatter, "ImplItemFn") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ImplItemMacro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ImplItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ImplItemMacro") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ImplItemType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ImplItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("defaultness", &self.defaultness); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("ty", &self.ty); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ImplItemType") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ImplRestriction { + fn fmt(&self, _formatter: &mut fmt::Formatter) -> fmt::Result { + match *self {} + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Index { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Index"); + formatter.field("index", &self.index); + formatter.field("span", &self.span); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Item { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Item::")?; + match self { + Item::Const(v0) => v0.debug(formatter, "Const"), + Item::Enum(v0) => v0.debug(formatter, "Enum"), + Item::ExternCrate(v0) => v0.debug(formatter, "ExternCrate"), + Item::Fn(v0) => v0.debug(formatter, "Fn"), + Item::ForeignMod(v0) => v0.debug(formatter, "ForeignMod"), + Item::Impl(v0) => v0.debug(formatter, "Impl"), + Item::Macro(v0) => v0.debug(formatter, "Macro"), + Item::Mod(v0) => v0.debug(formatter, "Mod"), + Item::Static(v0) => v0.debug(formatter, "Static"), + Item::Struct(v0) => v0.debug(formatter, "Struct"), + Item::Trait(v0) => v0.debug(formatter, "Trait"), + Item::TraitAlias(v0) => v0.debug(formatter, "TraitAlias"), + Item::Type(v0) => v0.debug(formatter, "Type"), + Item::Union(v0) => v0.debug(formatter, "Union"), + Item::Use(v0) => v0.debug(formatter, "Use"), + Item::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemConst { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("const_token", &self.const_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemConst") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemEnum { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemEnum { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("enum_token", &self.enum_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("brace_token", &self.brace_token); + formatter.field("variants", &self.variants); + formatter.finish() + } + } + self.debug(formatter, "ItemEnum") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemExternCrate { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemExternCrate { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("extern_token", &self.extern_token); + formatter.field("crate_token", &self.crate_token); + formatter.field("ident", &self.ident); + formatter.field("rename", &self.rename); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemExternCrate") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemFn { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("sig", &self.sig); + formatter.field("block", &self.block); + formatter.finish() + } + } + self.debug(formatter, "ItemFn") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemForeignMod { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemForeignMod { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("unsafety", &self.unsafety); + formatter.field("abi", &self.abi); + formatter.field("brace_token", &self.brace_token); + formatter.field("items", &self.items); + formatter.finish() + } + } + self.debug(formatter, "ItemForeignMod") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemImpl { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemImpl { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("defaultness", &self.defaultness); + formatter.field("unsafety", &self.unsafety); + formatter.field("impl_token", &self.impl_token); + formatter.field("generics", &self.generics); + formatter.field("trait_", &self.trait_); + formatter.field("self_ty", &self.self_ty); + formatter.field("brace_token", &self.brace_token); + formatter.field("items", &self.items); + formatter.finish() + } + } + self.debug(formatter, "ItemImpl") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemMacro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("ident", &self.ident); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemMacro") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemMod { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemMod { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("unsafety", &self.unsafety); + formatter.field("mod_token", &self.mod_token); + formatter.field("ident", &self.ident); + formatter.field("content", &self.content); + formatter.field("semi", &self.semi); + formatter.finish() + } + } + self.debug(formatter, "ItemMod") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemStatic { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemStatic { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("static_token", &self.static_token); + formatter.field("mutability", &self.mutability); + formatter.field("ident", &self.ident); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemStatic") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemStruct { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("struct_token", &self.struct_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("fields", &self.fields); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemStruct") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemTrait { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemTrait { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("unsafety", &self.unsafety); + formatter.field("auto_token", &self.auto_token); + formatter.field("restriction", &self.restriction); + formatter.field("trait_token", &self.trait_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("supertraits", &self.supertraits); + formatter.field("brace_token", &self.brace_token); + formatter.field("items", &self.items); + formatter.finish() + } + } + self.debug(formatter, "ItemTrait") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemTraitAlias { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemTraitAlias { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("trait_token", &self.trait_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("bounds", &self.bounds); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemTraitAlias") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("ty", &self.ty); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemType") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemUnion { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemUnion { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("union_token", &self.union_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("fields", &self.fields); + formatter.finish() + } + } + self.debug(formatter, "ItemUnion") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ItemUse { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ItemUse { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("use_token", &self.use_token); + formatter.field("leading_colon", &self.leading_colon); + formatter.field("tree", &self.tree); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "ItemUse") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Label { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Label"); + formatter.field("name", &self.name); + formatter.field("colon_token", &self.colon_token); + formatter.finish() + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Lifetime { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl Lifetime { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("apostrophe", &self.apostrophe); + formatter.field("ident", &self.ident); + formatter.finish() + } + } + self.debug(formatter, "Lifetime") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for LifetimeParam { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("LifetimeParam"); + formatter.field("attrs", &self.attrs); + formatter.field("lifetime", &self.lifetime); + formatter.field("colon_token", &self.colon_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Lit { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Lit::")?; + match self { + Lit::Str(v0) => v0.debug(formatter, "Str"), + Lit::ByteStr(v0) => v0.debug(formatter, "ByteStr"), + Lit::Byte(v0) => v0.debug(formatter, "Byte"), + Lit::Char(v0) => v0.debug(formatter, "Char"), + Lit::Int(v0) => v0.debug(formatter, "Int"), + Lit::Float(v0) => v0.debug(formatter, "Float"), + Lit::Bool(v0) => v0.debug(formatter, "Bool"), + Lit::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Local { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl Local { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("let_token", &self.let_token); + formatter.field("pat", &self.pat); + formatter.field("init", &self.init); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "Local") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for LocalInit { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("LocalInit"); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.field("diverge", &self.diverge); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Macro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Macro"); + formatter.field("path", &self.path); + formatter.field("bang_token", &self.bang_token); + formatter.field("delimiter", &self.delimiter); + formatter.field("tokens", &self.tokens); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for MacroDelimiter { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("MacroDelimiter::")?; + match self { + MacroDelimiter::Paren(v0) => { + let mut formatter = formatter.debug_tuple("Paren"); + formatter.field(v0); + formatter.finish() + } + MacroDelimiter::Brace(v0) => { + let mut formatter = formatter.debug_tuple("Brace"); + formatter.field(v0); + formatter.finish() + } + MacroDelimiter::Bracket(v0) => { + let mut formatter = formatter.debug_tuple("Bracket"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Member { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Member::")?; + match self { + Member::Named(v0) => { + let mut formatter = formatter.debug_tuple("Named"); + formatter.field(v0); + formatter.finish() + } + Member::Unnamed(v0) => { + let mut formatter = formatter.debug_tuple("Unnamed"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Meta { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Meta::")?; + match self { + Meta::Path(v0) => v0.debug(formatter, "Path"), + Meta::List(v0) => v0.debug(formatter, "List"), + Meta::NameValue(v0) => v0.debug(formatter, "NameValue"), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for MetaList { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl MetaList { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("path", &self.path); + formatter.field("delimiter", &self.delimiter); + formatter.field("tokens", &self.tokens); + formatter.finish() + } + } + self.debug(formatter, "MetaList") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for MetaNameValue { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl MetaNameValue { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("path", &self.path); + formatter.field("eq_token", &self.eq_token); + formatter.field("value", &self.value); + formatter.finish() + } + } + self.debug(formatter, "MetaNameValue") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ParenthesizedGenericArguments { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl ParenthesizedGenericArguments { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("inputs", &self.inputs); + formatter.field("output", &self.output); + formatter.finish() + } + } + self.debug(formatter, "ParenthesizedGenericArguments") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Pat { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Pat::")?; + match self { + Pat::Const(v0) => v0.debug(formatter, "Const"), + Pat::Ident(v0) => v0.debug(formatter, "Ident"), + Pat::Lit(v0) => v0.debug(formatter, "Lit"), + Pat::Macro(v0) => v0.debug(formatter, "Macro"), + Pat::Or(v0) => v0.debug(formatter, "Or"), + Pat::Paren(v0) => v0.debug(formatter, "Paren"), + Pat::Path(v0) => v0.debug(formatter, "Path"), + Pat::Range(v0) => v0.debug(formatter, "Range"), + Pat::Reference(v0) => v0.debug(formatter, "Reference"), + Pat::Rest(v0) => v0.debug(formatter, "Rest"), + Pat::Slice(v0) => v0.debug(formatter, "Slice"), + Pat::Struct(v0) => v0.debug(formatter, "Struct"), + Pat::Tuple(v0) => v0.debug(formatter, "Tuple"), + Pat::TupleStruct(v0) => v0.debug(formatter, "TupleStruct"), + Pat::Type(v0) => v0.debug(formatter, "Type"), + Pat::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + Pat::Wild(v0) => v0.debug(formatter, "Wild"), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatIdent { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatIdent { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("by_ref", &self.by_ref); + formatter.field("mutability", &self.mutability); + formatter.field("ident", &self.ident); + formatter.field("subpat", &self.subpat); + formatter.finish() + } + } + self.debug(formatter, "PatIdent") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatOr { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatOr { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("leading_vert", &self.leading_vert); + formatter.field("cases", &self.cases); + formatter.finish() + } + } + self.debug(formatter, "PatOr") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatParen { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatParen { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("pat", &self.pat); + formatter.finish() + } + } + self.debug(formatter, "PatParen") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatReference { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatReference { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("and_token", &self.and_token); + formatter.field("mutability", &self.mutability); + formatter.field("pat", &self.pat); + formatter.finish() + } + } + self.debug(formatter, "PatReference") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatRest { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatRest { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("dot2_token", &self.dot2_token); + formatter.finish() + } + } + self.debug(formatter, "PatRest") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatSlice { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatSlice { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elems", &self.elems); + formatter.finish() + } + } + self.debug(formatter, "PatSlice") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatStruct { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.field("brace_token", &self.brace_token); + formatter.field("fields", &self.fields); + formatter.field("rest", &self.rest); + formatter.finish() + } + } + self.debug(formatter, "PatStruct") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatTuple { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatTuple { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } + } + self.debug(formatter, "PatTuple") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatTupleStruct { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatTupleStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } + } + self.debug(formatter, "PatTupleStruct") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("pat", &self.pat); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.finish() + } + } + self.debug(formatter, "PatType") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PatWild { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl PatWild { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("underscore_token", &self.underscore_token); + formatter.finish() + } + } + self.debug(formatter, "PatWild") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Path { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl Path { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("leading_colon", &self.leading_colon); + formatter.field("segments", &self.segments); + formatter.finish() + } + } + self.debug(formatter, "Path") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PathArguments { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("PathArguments::")?; + match self { + PathArguments::None => formatter.write_str("None"), + PathArguments::AngleBracketed(v0) => v0.debug(formatter, "AngleBracketed"), + PathArguments::Parenthesized(v0) => v0.debug(formatter, "Parenthesized"), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PathSegment { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("PathSegment"); + formatter.field("ident", &self.ident); + formatter.field("arguments", &self.arguments); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PredicateLifetime { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("PredicateLifetime"); + formatter.field("lifetime", &self.lifetime); + formatter.field("colon_token", &self.colon_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for PredicateType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("PredicateType"); + formatter.field("lifetimes", &self.lifetimes); + formatter.field("bounded_ty", &self.bounded_ty); + formatter.field("colon_token", &self.colon_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for QSelf { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("QSelf"); + formatter.field("lt_token", &self.lt_token); + formatter.field("ty", &self.ty); + formatter.field("position", &self.position); + formatter.field("as_token", &self.as_token); + formatter.field("gt_token", &self.gt_token); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for RangeLimits { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("RangeLimits::")?; + match self { + RangeLimits::HalfOpen(v0) => { + let mut formatter = formatter.debug_tuple("HalfOpen"); + formatter.field(v0); + formatter.finish() + } + RangeLimits::Closed(v0) => { + let mut formatter = formatter.debug_tuple("Closed"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Receiver { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Receiver"); + formatter.field("attrs", &self.attrs); + formatter.field("reference", &self.reference); + formatter.field("mutability", &self.mutability); + formatter.field("self_token", &self.self_token); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for ReturnType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("ReturnType::")?; + match self { + ReturnType::Default => formatter.write_str("Default"), + ReturnType::Type(v0, v1) => { + let mut formatter = formatter.debug_tuple("Type"); + formatter.field(v0); + formatter.field(v1); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Signature { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Signature"); + formatter.field("constness", &self.constness); + formatter.field("asyncness", &self.asyncness); + formatter.field("unsafety", &self.unsafety); + formatter.field("abi", &self.abi); + formatter.field("fn_token", &self.fn_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("paren_token", &self.paren_token); + formatter.field("inputs", &self.inputs); + formatter.field("variadic", &self.variadic); + formatter.field("output", &self.output); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for StaticMutability { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("StaticMutability::")?; + match self { + StaticMutability::Mut(v0) => { + let mut formatter = formatter.debug_tuple("Mut"); + formatter.field(v0); + formatter.finish() + } + StaticMutability::None => formatter.write_str("None"), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Stmt { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Stmt::")?; + match self { + Stmt::Local(v0) => v0.debug(formatter, "Local"), + Stmt::Item(v0) => { + let mut formatter = formatter.debug_tuple("Item"); + formatter.field(v0); + formatter.finish() + } + Stmt::Expr(v0, v1) => { + let mut formatter = formatter.debug_tuple("Expr"); + formatter.field(v0); + formatter.field(v1); + formatter.finish() + } + Stmt::Macro(v0) => v0.debug(formatter, "Macro"), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for StmtMacro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl StmtMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "StmtMacro") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TraitBound { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("TraitBound"); + formatter.field("paren_token", &self.paren_token); + formatter.field("modifier", &self.modifier); + formatter.field("lifetimes", &self.lifetimes); + formatter.field("path", &self.path); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TraitBoundModifier { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("TraitBoundModifier::")?; + match self { + TraitBoundModifier::None => formatter.write_str("None"), + TraitBoundModifier::Maybe(v0) => { + let mut formatter = formatter.debug_tuple("Maybe"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TraitItem { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("TraitItem::")?; + match self { + TraitItem::Const(v0) => v0.debug(formatter, "Const"), + TraitItem::Fn(v0) => v0.debug(formatter, "Fn"), + TraitItem::Type(v0) => v0.debug(formatter, "Type"), + TraitItem::Macro(v0) => v0.debug(formatter, "Macro"), + TraitItem::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TraitItemConst { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TraitItemConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("const_token", &self.const_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("default", &self.default); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "TraitItemConst") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TraitItemFn { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TraitItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("sig", &self.sig); + formatter.field("default", &self.default); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "TraitItemFn") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TraitItemMacro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TraitItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "TraitItemMacro") + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TraitItemType { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TraitItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("bounds", &self.bounds); + formatter.field("default", &self.default); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } + } + self.debug(formatter, "TraitItemType") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Type { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Type::")?; + match self { + Type::Array(v0) => v0.debug(formatter, "Array"), + Type::BareFn(v0) => v0.debug(formatter, "BareFn"), + Type::Group(v0) => v0.debug(formatter, "Group"), + Type::ImplTrait(v0) => v0.debug(formatter, "ImplTrait"), + Type::Infer(v0) => v0.debug(formatter, "Infer"), + Type::Macro(v0) => v0.debug(formatter, "Macro"), + Type::Never(v0) => v0.debug(formatter, "Never"), + Type::Paren(v0) => v0.debug(formatter, "Paren"), + Type::Path(v0) => v0.debug(formatter, "Path"), + Type::Ptr(v0) => v0.debug(formatter, "Ptr"), + Type::Reference(v0) => v0.debug(formatter, "Reference"), + Type::Slice(v0) => v0.debug(formatter, "Slice"), + Type::TraitObject(v0) => v0.debug(formatter, "TraitObject"), + Type::Tuple(v0) => v0.debug(formatter, "Tuple"), + Type::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeArray { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeArray { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elem", &self.elem); + formatter.field("semi_token", &self.semi_token); + formatter.field("len", &self.len); + formatter.finish() + } + } + self.debug(formatter, "TypeArray") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeBareFn { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeBareFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("lifetimes", &self.lifetimes); + formatter.field("unsafety", &self.unsafety); + formatter.field("abi", &self.abi); + formatter.field("fn_token", &self.fn_token); + formatter.field("paren_token", &self.paren_token); + formatter.field("inputs", &self.inputs); + formatter.field("variadic", &self.variadic); + formatter.field("output", &self.output); + formatter.finish() + } + } + self.debug(formatter, "TypeBareFn") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeGroup { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeGroup { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("group_token", &self.group_token); + formatter.field("elem", &self.elem); + formatter.finish() + } + } + self.debug(formatter, "TypeGroup") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeImplTrait { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeImplTrait { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("impl_token", &self.impl_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } + } + self.debug(formatter, "TypeImplTrait") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeInfer { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeInfer { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("underscore_token", &self.underscore_token); + formatter.finish() + } + } + self.debug(formatter, "TypeInfer") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeMacro { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("mac", &self.mac); + formatter.finish() + } + } + self.debug(formatter, "TypeMacro") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeNever { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeNever { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("bang_token", &self.bang_token); + formatter.finish() + } + } + self.debug(formatter, "TypeNever") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeParam { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("TypeParam"); + formatter.field("attrs", &self.attrs); + formatter.field("ident", &self.ident); + formatter.field("colon_token", &self.colon_token); + formatter.field("bounds", &self.bounds); + formatter.field("eq_token", &self.eq_token); + formatter.field("default", &self.default); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeParamBound { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("TypeParamBound::")?; + match self { + TypeParamBound::Trait(v0) => { + let mut formatter = formatter.debug_tuple("Trait"); + formatter.field(v0); + formatter.finish() + } + TypeParamBound::Lifetime(v0) => v0.debug(formatter, "Lifetime"), + TypeParamBound::Verbatim(v0) => { + let mut formatter = formatter.debug_tuple("Verbatim"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeParen { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeParen { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("elem", &self.elem); + formatter.finish() + } + } + self.debug(formatter, "TypeParen") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypePath { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypePath { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.finish() + } + } + self.debug(formatter, "TypePath") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypePtr { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypePtr { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("star_token", &self.star_token); + formatter.field("const_token", &self.const_token); + formatter.field("mutability", &self.mutability); + formatter.field("elem", &self.elem); + formatter.finish() + } + } + self.debug(formatter, "TypePtr") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeReference { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeReference { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("and_token", &self.and_token); + formatter.field("lifetime", &self.lifetime); + formatter.field("mutability", &self.mutability); + formatter.field("elem", &self.elem); + formatter.finish() + } + } + self.debug(formatter, "TypeReference") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeSlice { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeSlice { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elem", &self.elem); + formatter.finish() + } + } + self.debug(formatter, "TypeSlice") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeTraitObject { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeTraitObject { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("dyn_token", &self.dyn_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } + } + self.debug(formatter, "TypeTraitObject") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for TypeTuple { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl TypeTuple { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } + } + self.debug(formatter, "TypeTuple") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for UnOp { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("UnOp::")?; + match self { + UnOp::Deref(v0) => { + let mut formatter = formatter.debug_tuple("Deref"); + formatter.field(v0); + formatter.finish() + } + UnOp::Not(v0) => { + let mut formatter = formatter.debug_tuple("Not"); + formatter.field(v0); + formatter.finish() + } + UnOp::Neg(v0) => { + let mut formatter = formatter.debug_tuple("Neg"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for UseGlob { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("UseGlob"); + formatter.field("star_token", &self.star_token); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for UseGroup { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("UseGroup"); + formatter.field("brace_token", &self.brace_token); + formatter.field("items", &self.items); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for UseName { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("UseName"); + formatter.field("ident", &self.ident); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for UsePath { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("UsePath"); + formatter.field("ident", &self.ident); + formatter.field("colon2_token", &self.colon2_token); + formatter.field("tree", &self.tree); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for UseRename { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("UseRename"); + formatter.field("ident", &self.ident); + formatter.field("as_token", &self.as_token); + formatter.field("rename", &self.rename); + formatter.finish() + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for UseTree { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("UseTree::")?; + match self { + UseTree::Path(v0) => { + let mut formatter = formatter.debug_tuple("Path"); + formatter.field(v0); + formatter.finish() + } + UseTree::Name(v0) => { + let mut formatter = formatter.debug_tuple("Name"); + formatter.field(v0); + formatter.finish() + } + UseTree::Rename(v0) => { + let mut formatter = formatter.debug_tuple("Rename"); + formatter.field(v0); + formatter.finish() + } + UseTree::Glob(v0) => { + let mut formatter = formatter.debug_tuple("Glob"); + formatter.field(v0); + formatter.finish() + } + UseTree::Group(v0) => { + let mut formatter = formatter.debug_tuple("Group"); + formatter.field(v0); + formatter.finish() + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Variadic { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Variadic"); + formatter.field("attrs", &self.attrs); + formatter.field("pat", &self.pat); + formatter.field("dots", &self.dots); + formatter.field("comma", &self.comma); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Variant { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("Variant"); + formatter.field("attrs", &self.attrs); + formatter.field("ident", &self.ident); + formatter.field("fields", &self.fields); + formatter.field("discriminant", &self.discriminant); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for VisRestricted { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl VisRestricted { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("pub_token", &self.pub_token); + formatter.field("paren_token", &self.paren_token); + formatter.field("in_token", &self.in_token); + formatter.field("path", &self.path); + formatter.finish() + } + } + self.debug(formatter, "VisRestricted") + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Visibility { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Visibility::")?; + match self { + Visibility::Public(v0) => { + let mut formatter = formatter.debug_tuple("Public"); + formatter.field(v0); + formatter.finish() + } + Visibility::Restricted(v0) => v0.debug(formatter, "Restricted"), + Visibility::Inherited => formatter.write_str("Inherited"), + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for WhereClause { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let mut formatter = formatter.debug_struct("WhereClause"); + formatter.field("where_token", &self.where_token); + formatter.field("predicates", &self.predicates); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for WherePredicate { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("WherePredicate::")?; + match self { + WherePredicate::Lifetime(v0) => { + let mut formatter = formatter.debug_tuple("Lifetime"); + formatter.field(v0); + formatter.finish() + } + WherePredicate::Type(v0) => { + let mut formatter = formatter.debug_tuple("Type"); + formatter.field(v0); + formatter.finish() + } + } + } +} diff --git a/vendor/syn/src/gen/eq.rs b/vendor/syn/src/gen/eq.rs new file mode 100644 index 0000000..a1fb3b4 --- /dev/null +++ b/vendor/syn/src/gen/eq.rs @@ -0,0 +1,2148 @@ +// This file is @generated by syn-internal-codegen. +// It is not intended for manual editing. + +#[cfg(any(feature = "derive", feature = "full"))] +use crate::tt::TokenStreamHelper; +use crate::*; +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Abi {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Abi { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for AngleBracketedGenericArguments {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for AngleBracketedGenericArguments { + fn eq(&self, other: &Self) -> bool { + self.colon2_token == other.colon2_token && self.args == other.args + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Arm {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Arm { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.pat == other.pat && self.guard == other.guard + && self.body == other.body && self.comma == other.comma + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for AssocConst {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for AssocConst { + fn eq(&self, other: &Self) -> bool { + self.ident == other.ident && self.generics == other.generics + && self.value == other.value + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for AssocType {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for AssocType { + fn eq(&self, other: &Self) -> bool { + self.ident == other.ident && self.generics == other.generics + && self.ty == other.ty + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for AttrStyle {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for AttrStyle { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (AttrStyle::Outer, AttrStyle::Outer) => true, + (AttrStyle::Inner(_), AttrStyle::Inner(_)) => true, + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Attribute {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Attribute { + fn eq(&self, other: &Self) -> bool { + self.style == other.style && self.meta == other.meta + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for BareFnArg {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for BareFnArg { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.name == other.name && self.ty == other.ty + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for BareVariadic {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for BareVariadic { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.name == other.name && self.comma == other.comma + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for BinOp {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for BinOp { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (BinOp::Add(_), BinOp::Add(_)) => true, + (BinOp::Sub(_), BinOp::Sub(_)) => true, + (BinOp::Mul(_), BinOp::Mul(_)) => true, + (BinOp::Div(_), BinOp::Div(_)) => true, + (BinOp::Rem(_), BinOp::Rem(_)) => true, + (BinOp::And(_), BinOp::And(_)) => true, + (BinOp::Or(_), BinOp::Or(_)) => true, + (BinOp::BitXor(_), BinOp::BitXor(_)) => true, + (BinOp::BitAnd(_), BinOp::BitAnd(_)) => true, + (BinOp::BitOr(_), BinOp::BitOr(_)) => true, + (BinOp::Shl(_), BinOp::Shl(_)) => true, + (BinOp::Shr(_), BinOp::Shr(_)) => true, + (BinOp::Eq(_), BinOp::Eq(_)) => true, + (BinOp::Lt(_), BinOp::Lt(_)) => true, + (BinOp::Le(_), BinOp::Le(_)) => true, + (BinOp::Ne(_), BinOp::Ne(_)) => true, + (BinOp::Ge(_), BinOp::Ge(_)) => true, + (BinOp::Gt(_), BinOp::Gt(_)) => true, + (BinOp::AddAssign(_), BinOp::AddAssign(_)) => true, + (BinOp::SubAssign(_), BinOp::SubAssign(_)) => true, + (BinOp::MulAssign(_), BinOp::MulAssign(_)) => true, + (BinOp::DivAssign(_), BinOp::DivAssign(_)) => true, + (BinOp::RemAssign(_), BinOp::RemAssign(_)) => true, + (BinOp::BitXorAssign(_), BinOp::BitXorAssign(_)) => true, + (BinOp::BitAndAssign(_), BinOp::BitAndAssign(_)) => true, + (BinOp::BitOrAssign(_), BinOp::BitOrAssign(_)) => true, + (BinOp::ShlAssign(_), BinOp::ShlAssign(_)) => true, + (BinOp::ShrAssign(_), BinOp::ShrAssign(_)) => true, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Block {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Block { + fn eq(&self, other: &Self) -> bool { + self.stmts == other.stmts + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for BoundLifetimes {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for BoundLifetimes { + fn eq(&self, other: &Self) -> bool { + self.lifetimes == other.lifetimes + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ConstParam {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ConstParam { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.ident == other.ident && self.ty == other.ty + && self.eq_token == other.eq_token && self.default == other.default + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Constraint {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Constraint { + fn eq(&self, other: &Self) -> bool { + self.ident == other.ident && self.generics == other.generics + && self.bounds == other.bounds + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Data {} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Data { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Data::Struct(self0), Data::Struct(other0)) => self0 == other0, + (Data::Enum(self0), Data::Enum(other0)) => self0 == other0, + (Data::Union(self0), Data::Union(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for DataEnum {} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for DataEnum { + fn eq(&self, other: &Self) -> bool { + self.variants == other.variants + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for DataStruct {} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for DataStruct { + fn eq(&self, other: &Self) -> bool { + self.fields == other.fields && self.semi_token == other.semi_token + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for DataUnion {} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for DataUnion { + fn eq(&self, other: &Self) -> bool { + self.fields == other.fields + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for DeriveInput {} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for DeriveInput { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics && self.data == other.data + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Expr {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Expr { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + #[cfg(feature = "full")] + (Expr::Array(self0), Expr::Array(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Assign(self0), Expr::Assign(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Async(self0), Expr::Async(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Await(self0), Expr::Await(other0)) => self0 == other0, + (Expr::Binary(self0), Expr::Binary(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Block(self0), Expr::Block(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Break(self0), Expr::Break(other0)) => self0 == other0, + (Expr::Call(self0), Expr::Call(other0)) => self0 == other0, + (Expr::Cast(self0), Expr::Cast(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Closure(self0), Expr::Closure(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Const(self0), Expr::Const(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Continue(self0), Expr::Continue(other0)) => self0 == other0, + (Expr::Field(self0), Expr::Field(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::ForLoop(self0), Expr::ForLoop(other0)) => self0 == other0, + (Expr::Group(self0), Expr::Group(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::If(self0), Expr::If(other0)) => self0 == other0, + (Expr::Index(self0), Expr::Index(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Infer(self0), Expr::Infer(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Let(self0), Expr::Let(other0)) => self0 == other0, + (Expr::Lit(self0), Expr::Lit(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Loop(self0), Expr::Loop(other0)) => self0 == other0, + (Expr::Macro(self0), Expr::Macro(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Match(self0), Expr::Match(other0)) => self0 == other0, + (Expr::MethodCall(self0), Expr::MethodCall(other0)) => self0 == other0, + (Expr::Paren(self0), Expr::Paren(other0)) => self0 == other0, + (Expr::Path(self0), Expr::Path(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Range(self0), Expr::Range(other0)) => self0 == other0, + (Expr::Reference(self0), Expr::Reference(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Repeat(self0), Expr::Repeat(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Return(self0), Expr::Return(other0)) => self0 == other0, + (Expr::Struct(self0), Expr::Struct(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Try(self0), Expr::Try(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::TryBlock(self0), Expr::TryBlock(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Tuple(self0), Expr::Tuple(other0)) => self0 == other0, + (Expr::Unary(self0), Expr::Unary(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Unsafe(self0), Expr::Unsafe(other0)) => self0 == other0, + (Expr::Verbatim(self0), Expr::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + #[cfg(feature = "full")] + (Expr::While(self0), Expr::While(other0)) => self0 == other0, + #[cfg(feature = "full")] + (Expr::Yield(self0), Expr::Yield(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprArray {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprArray { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.elems == other.elems + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprAssign {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprAssign { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.left == other.left && self.right == other.right + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprAsync {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprAsync { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.capture == other.capture + && self.block == other.block + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprAwait {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprAwait { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.base == other.base + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprBinary {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprBinary { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.left == other.left && self.op == other.op + && self.right == other.right + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprBlock {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprBlock { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.label == other.label + && self.block == other.block + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprBreak {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprBreak { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.label == other.label && self.expr == other.expr + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprCall {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprCall { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.func == other.func && self.args == other.args + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprCast {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprCast { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr && self.ty == other.ty + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprClosure {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprClosure { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.lifetimes == other.lifetimes + && self.constness == other.constness && self.movability == other.movability + && self.asyncness == other.asyncness && self.capture == other.capture + && self.inputs == other.inputs && self.output == other.output + && self.body == other.body + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprConst {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprConst { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.block == other.block + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprContinue {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprContinue { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.label == other.label + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprField {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprField { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.base == other.base + && self.member == other.member + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprForLoop {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprForLoop { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.label == other.label && self.pat == other.pat + && self.expr == other.expr && self.body == other.body + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprGroup {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprGroup { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprIf {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprIf { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.cond == other.cond + && self.then_branch == other.then_branch + && self.else_branch == other.else_branch + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprIndex {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprIndex { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr && self.index == other.index + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprInfer {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprInfer { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprLet {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprLet { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.pat == other.pat && self.expr == other.expr + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprLit {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprLit { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.lit == other.lit + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprLoop {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprLoop { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.label == other.label && self.body == other.body + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprMacro {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprMacro { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.mac == other.mac + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprMatch {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprMatch { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr && self.arms == other.arms + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprMethodCall {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprMethodCall { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.receiver == other.receiver + && self.method == other.method && self.turbofish == other.turbofish + && self.args == other.args + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprParen {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprParen { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprPath {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprPath { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.qself == other.qself && self.path == other.path + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprRange {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprRange { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.start == other.start + && self.limits == other.limits && self.end == other.end + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprReference {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprReference { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.mutability == other.mutability + && self.expr == other.expr + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprRepeat {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprRepeat { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr && self.len == other.len + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprReturn {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprReturn { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprStruct {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprStruct { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.qself == other.qself && self.path == other.path + && self.fields == other.fields && self.dot2_token == other.dot2_token + && self.rest == other.rest + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprTry {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprTry { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprTryBlock {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprTryBlock { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.block == other.block + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprTuple {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprTuple { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.elems == other.elems + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprUnary {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprUnary { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.op == other.op && self.expr == other.expr + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprUnsafe {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprUnsafe { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.block == other.block + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprWhile {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprWhile { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.label == other.label && self.cond == other.cond + && self.body == other.body + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ExprYield {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ExprYield { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.expr == other.expr + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Field {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Field { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.mutability == other.mutability && self.ident == other.ident + && self.colon_token == other.colon_token && self.ty == other.ty + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for FieldMutability {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for FieldMutability { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (FieldMutability::None, FieldMutability::None) => true, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for FieldPat {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for FieldPat { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.member == other.member + && self.colon_token == other.colon_token && self.pat == other.pat + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for FieldValue {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for FieldValue { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.member == other.member + && self.colon_token == other.colon_token && self.expr == other.expr + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Fields {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Fields { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Fields::Named(self0), Fields::Named(other0)) => self0 == other0, + (Fields::Unnamed(self0), Fields::Unnamed(other0)) => self0 == other0, + (Fields::Unit, Fields::Unit) => true, + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for FieldsNamed {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for FieldsNamed { + fn eq(&self, other: &Self) -> bool { + self.named == other.named + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for FieldsUnnamed {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for FieldsUnnamed { + fn eq(&self, other: &Self) -> bool { + self.unnamed == other.unnamed + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for File {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for File { + fn eq(&self, other: &Self) -> bool { + self.shebang == other.shebang && self.attrs == other.attrs + && self.items == other.items + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for FnArg {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for FnArg { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (FnArg::Receiver(self0), FnArg::Receiver(other0)) => self0 == other0, + (FnArg::Typed(self0), FnArg::Typed(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ForeignItem {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ForeignItem { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (ForeignItem::Fn(self0), ForeignItem::Fn(other0)) => self0 == other0, + (ForeignItem::Static(self0), ForeignItem::Static(other0)) => self0 == other0, + (ForeignItem::Type(self0), ForeignItem::Type(other0)) => self0 == other0, + (ForeignItem::Macro(self0), ForeignItem::Macro(other0)) => self0 == other0, + (ForeignItem::Verbatim(self0), ForeignItem::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ForeignItemFn {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ForeignItemFn { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.sig == other.sig + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ForeignItemMacro {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ForeignItemMacro { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.mac == other.mac + && self.semi_token == other.semi_token + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ForeignItemStatic {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ForeignItemStatic { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.mutability == other.mutability && self.ident == other.ident + && self.ty == other.ty + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ForeignItemType {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ForeignItemType { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for GenericArgument {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for GenericArgument { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (GenericArgument::Lifetime(self0), GenericArgument::Lifetime(other0)) => { + self0 == other0 + } + (GenericArgument::Type(self0), GenericArgument::Type(other0)) => { + self0 == other0 + } + (GenericArgument::Const(self0), GenericArgument::Const(other0)) => { + self0 == other0 + } + (GenericArgument::AssocType(self0), GenericArgument::AssocType(other0)) => { + self0 == other0 + } + (GenericArgument::AssocConst(self0), GenericArgument::AssocConst(other0)) => { + self0 == other0 + } + (GenericArgument::Constraint(self0), GenericArgument::Constraint(other0)) => { + self0 == other0 + } + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for GenericParam {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for GenericParam { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (GenericParam::Lifetime(self0), GenericParam::Lifetime(other0)) => { + self0 == other0 + } + (GenericParam::Type(self0), GenericParam::Type(other0)) => self0 == other0, + (GenericParam::Const(self0), GenericParam::Const(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Generics {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Generics { + fn eq(&self, other: &Self) -> bool { + self.lt_token == other.lt_token && self.params == other.params + && self.gt_token == other.gt_token && self.where_clause == other.where_clause + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ImplItem {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ImplItem { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (ImplItem::Const(self0), ImplItem::Const(other0)) => self0 == other0, + (ImplItem::Fn(self0), ImplItem::Fn(other0)) => self0 == other0, + (ImplItem::Type(self0), ImplItem::Type(other0)) => self0 == other0, + (ImplItem::Macro(self0), ImplItem::Macro(other0)) => self0 == other0, + (ImplItem::Verbatim(self0), ImplItem::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ImplItemConst {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ImplItemConst { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.defaultness == other.defaultness && self.ident == other.ident + && self.generics == other.generics && self.ty == other.ty + && self.expr == other.expr + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ImplItemFn {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ImplItemFn { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.defaultness == other.defaultness && self.sig == other.sig + && self.block == other.block + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ImplItemMacro {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ImplItemMacro { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.mac == other.mac + && self.semi_token == other.semi_token + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ImplItemType {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ImplItemType { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.defaultness == other.defaultness && self.ident == other.ident + && self.generics == other.generics && self.ty == other.ty + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ImplRestriction {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ImplRestriction { + fn eq(&self, _other: &Self) -> bool { + match *self {} + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Item {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Item { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Item::Const(self0), Item::Const(other0)) => self0 == other0, + (Item::Enum(self0), Item::Enum(other0)) => self0 == other0, + (Item::ExternCrate(self0), Item::ExternCrate(other0)) => self0 == other0, + (Item::Fn(self0), Item::Fn(other0)) => self0 == other0, + (Item::ForeignMod(self0), Item::ForeignMod(other0)) => self0 == other0, + (Item::Impl(self0), Item::Impl(other0)) => self0 == other0, + (Item::Macro(self0), Item::Macro(other0)) => self0 == other0, + (Item::Mod(self0), Item::Mod(other0)) => self0 == other0, + (Item::Static(self0), Item::Static(other0)) => self0 == other0, + (Item::Struct(self0), Item::Struct(other0)) => self0 == other0, + (Item::Trait(self0), Item::Trait(other0)) => self0 == other0, + (Item::TraitAlias(self0), Item::TraitAlias(other0)) => self0 == other0, + (Item::Type(self0), Item::Type(other0)) => self0 == other0, + (Item::Union(self0), Item::Union(other0)) => self0 == other0, + (Item::Use(self0), Item::Use(other0)) => self0 == other0, + (Item::Verbatim(self0), Item::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemConst {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemConst { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics && self.ty == other.ty + && self.expr == other.expr + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemEnum {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemEnum { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics && self.variants == other.variants + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemExternCrate {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemExternCrate { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.rename == other.rename + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemFn {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemFn { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.sig == other.sig + && self.block == other.block + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemForeignMod {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemForeignMod { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.unsafety == other.unsafety + && self.abi == other.abi && self.items == other.items + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemImpl {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemImpl { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.defaultness == other.defaultness + && self.unsafety == other.unsafety && self.generics == other.generics + && self.trait_ == other.trait_ && self.self_ty == other.self_ty + && self.items == other.items + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemMacro {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemMacro { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.ident == other.ident && self.mac == other.mac + && self.semi_token == other.semi_token + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemMod {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemMod { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.unsafety == other.unsafety && self.ident == other.ident + && self.content == other.content && self.semi == other.semi + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemStatic {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemStatic { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.mutability == other.mutability && self.ident == other.ident + && self.ty == other.ty && self.expr == other.expr + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemStruct {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemStruct { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics && self.fields == other.fields + && self.semi_token == other.semi_token + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemTrait {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemTrait { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.unsafety == other.unsafety && self.auto_token == other.auto_token + && self.restriction == other.restriction && self.ident == other.ident + && self.generics == other.generics && self.colon_token == other.colon_token + && self.supertraits == other.supertraits && self.items == other.items + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemTraitAlias {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemTraitAlias { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics && self.bounds == other.bounds + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemType {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemType { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics && self.ty == other.ty + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemUnion {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemUnion { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident + && self.generics == other.generics && self.fields == other.fields + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ItemUse {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ItemUse { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.vis == other.vis + && self.leading_colon == other.leading_colon && self.tree == other.tree + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Label {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Label { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LifetimeParam {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for LifetimeParam { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.lifetime == other.lifetime + && self.colon_token == other.colon_token && self.bounds == other.bounds + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Lit {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Lit { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Lit::Str(self0), Lit::Str(other0)) => self0 == other0, + (Lit::ByteStr(self0), Lit::ByteStr(other0)) => self0 == other0, + (Lit::Byte(self0), Lit::Byte(other0)) => self0 == other0, + (Lit::Char(self0), Lit::Char(other0)) => self0 == other0, + (Lit::Int(self0), Lit::Int(other0)) => self0 == other0, + (Lit::Float(self0), Lit::Float(other0)) => self0 == other0, + (Lit::Bool(self0), Lit::Bool(other0)) => self0 == other0, + (Lit::Verbatim(self0), Lit::Verbatim(other0)) => { + self0.to_string() == other0.to_string() + } + _ => false, + } + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LitBool {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for LitBool { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LitByte {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LitByteStr {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LitChar {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LitFloat {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LitInt {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LitStr {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Local {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Local { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.pat == other.pat && self.init == other.init + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for LocalInit {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for LocalInit { + fn eq(&self, other: &Self) -> bool { + self.expr == other.expr && self.diverge == other.diverge + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Macro {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Macro { + fn eq(&self, other: &Self) -> bool { + self.path == other.path && self.delimiter == other.delimiter + && TokenStreamHelper(&self.tokens) == TokenStreamHelper(&other.tokens) + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for MacroDelimiter {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for MacroDelimiter { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (MacroDelimiter::Paren(_), MacroDelimiter::Paren(_)) => true, + (MacroDelimiter::Brace(_), MacroDelimiter::Brace(_)) => true, + (MacroDelimiter::Bracket(_), MacroDelimiter::Bracket(_)) => true, + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Meta {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Meta { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Meta::Path(self0), Meta::Path(other0)) => self0 == other0, + (Meta::List(self0), Meta::List(other0)) => self0 == other0, + (Meta::NameValue(self0), Meta::NameValue(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for MetaList {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for MetaList { + fn eq(&self, other: &Self) -> bool { + self.path == other.path && self.delimiter == other.delimiter + && TokenStreamHelper(&self.tokens) == TokenStreamHelper(&other.tokens) + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for MetaNameValue {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for MetaNameValue { + fn eq(&self, other: &Self) -> bool { + self.path == other.path && self.value == other.value + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ParenthesizedGenericArguments {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ParenthesizedGenericArguments { + fn eq(&self, other: &Self) -> bool { + self.inputs == other.inputs && self.output == other.output + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Pat {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Pat { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Pat::Const(self0), Pat::Const(other0)) => self0 == other0, + (Pat::Ident(self0), Pat::Ident(other0)) => self0 == other0, + (Pat::Lit(self0), Pat::Lit(other0)) => self0 == other0, + (Pat::Macro(self0), Pat::Macro(other0)) => self0 == other0, + (Pat::Or(self0), Pat::Or(other0)) => self0 == other0, + (Pat::Paren(self0), Pat::Paren(other0)) => self0 == other0, + (Pat::Path(self0), Pat::Path(other0)) => self0 == other0, + (Pat::Range(self0), Pat::Range(other0)) => self0 == other0, + (Pat::Reference(self0), Pat::Reference(other0)) => self0 == other0, + (Pat::Rest(self0), Pat::Rest(other0)) => self0 == other0, + (Pat::Slice(self0), Pat::Slice(other0)) => self0 == other0, + (Pat::Struct(self0), Pat::Struct(other0)) => self0 == other0, + (Pat::Tuple(self0), Pat::Tuple(other0)) => self0 == other0, + (Pat::TupleStruct(self0), Pat::TupleStruct(other0)) => self0 == other0, + (Pat::Type(self0), Pat::Type(other0)) => self0 == other0, + (Pat::Verbatim(self0), Pat::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + (Pat::Wild(self0), Pat::Wild(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatIdent {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatIdent { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.by_ref == other.by_ref + && self.mutability == other.mutability && self.ident == other.ident + && self.subpat == other.subpat + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatOr {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatOr { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.leading_vert == other.leading_vert + && self.cases == other.cases + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatParen {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatParen { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.pat == other.pat + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatReference {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatReference { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.mutability == other.mutability + && self.pat == other.pat + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatRest {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatRest { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatSlice {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatSlice { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.elems == other.elems + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatStruct {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatStruct { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.qself == other.qself && self.path == other.path + && self.fields == other.fields && self.rest == other.rest + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatTuple {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatTuple { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.elems == other.elems + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatTupleStruct {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatTupleStruct { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.qself == other.qself && self.path == other.path + && self.elems == other.elems + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatType {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatType { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.pat == other.pat && self.ty == other.ty + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PatWild {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PatWild { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Path {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Path { + fn eq(&self, other: &Self) -> bool { + self.leading_colon == other.leading_colon && self.segments == other.segments + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PathArguments {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PathArguments { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (PathArguments::None, PathArguments::None) => true, + ( + PathArguments::AngleBracketed(self0), + PathArguments::AngleBracketed(other0), + ) => self0 == other0, + ( + PathArguments::Parenthesized(self0), + PathArguments::Parenthesized(other0), + ) => self0 == other0, + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PathSegment {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PathSegment { + fn eq(&self, other: &Self) -> bool { + self.ident == other.ident && self.arguments == other.arguments + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PredicateLifetime {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PredicateLifetime { + fn eq(&self, other: &Self) -> bool { + self.lifetime == other.lifetime && self.bounds == other.bounds + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for PredicateType {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for PredicateType { + fn eq(&self, other: &Self) -> bool { + self.lifetimes == other.lifetimes && self.bounded_ty == other.bounded_ty + && self.bounds == other.bounds + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for QSelf {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for QSelf { + fn eq(&self, other: &Self) -> bool { + self.ty == other.ty && self.position == other.position + && self.as_token == other.as_token + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for RangeLimits {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for RangeLimits { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (RangeLimits::HalfOpen(_), RangeLimits::HalfOpen(_)) => true, + (RangeLimits::Closed(_), RangeLimits::Closed(_)) => true, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Receiver {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Receiver { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.reference == other.reference + && self.mutability == other.mutability + && self.colon_token == other.colon_token && self.ty == other.ty + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for ReturnType {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for ReturnType { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (ReturnType::Default, ReturnType::Default) => true, + (ReturnType::Type(_, self1), ReturnType::Type(_, other1)) => self1 == other1, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Signature {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Signature { + fn eq(&self, other: &Self) -> bool { + self.constness == other.constness && self.asyncness == other.asyncness + && self.unsafety == other.unsafety && self.abi == other.abi + && self.ident == other.ident && self.generics == other.generics + && self.inputs == other.inputs && self.variadic == other.variadic + && self.output == other.output + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for StaticMutability {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for StaticMutability { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (StaticMutability::Mut(_), StaticMutability::Mut(_)) => true, + (StaticMutability::None, StaticMutability::None) => true, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Stmt {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Stmt { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Stmt::Local(self0), Stmt::Local(other0)) => self0 == other0, + (Stmt::Item(self0), Stmt::Item(other0)) => self0 == other0, + (Stmt::Expr(self0, self1), Stmt::Expr(other0, other1)) => { + self0 == other0 && self1 == other1 + } + (Stmt::Macro(self0), Stmt::Macro(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for StmtMacro {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for StmtMacro { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.mac == other.mac + && self.semi_token == other.semi_token + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TraitBound {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TraitBound { + fn eq(&self, other: &Self) -> bool { + self.paren_token == other.paren_token && self.modifier == other.modifier + && self.lifetimes == other.lifetimes && self.path == other.path + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TraitBoundModifier {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TraitBoundModifier { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (TraitBoundModifier::None, TraitBoundModifier::None) => true, + (TraitBoundModifier::Maybe(_), TraitBoundModifier::Maybe(_)) => true, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TraitItem {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TraitItem { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (TraitItem::Const(self0), TraitItem::Const(other0)) => self0 == other0, + (TraitItem::Fn(self0), TraitItem::Fn(other0)) => self0 == other0, + (TraitItem::Type(self0), TraitItem::Type(other0)) => self0 == other0, + (TraitItem::Macro(self0), TraitItem::Macro(other0)) => self0 == other0, + (TraitItem::Verbatim(self0), TraitItem::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TraitItemConst {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TraitItemConst { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.ident == other.ident + && self.generics == other.generics && self.ty == other.ty + && self.default == other.default + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TraitItemFn {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TraitItemFn { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.sig == other.sig + && self.default == other.default && self.semi_token == other.semi_token + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TraitItemMacro {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TraitItemMacro { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.mac == other.mac + && self.semi_token == other.semi_token + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TraitItemType {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TraitItemType { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.ident == other.ident + && self.generics == other.generics && self.colon_token == other.colon_token + && self.bounds == other.bounds && self.default == other.default + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Type {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Type { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Type::Array(self0), Type::Array(other0)) => self0 == other0, + (Type::BareFn(self0), Type::BareFn(other0)) => self0 == other0, + (Type::Group(self0), Type::Group(other0)) => self0 == other0, + (Type::ImplTrait(self0), Type::ImplTrait(other0)) => self0 == other0, + (Type::Infer(self0), Type::Infer(other0)) => self0 == other0, + (Type::Macro(self0), Type::Macro(other0)) => self0 == other0, + (Type::Never(self0), Type::Never(other0)) => self0 == other0, + (Type::Paren(self0), Type::Paren(other0)) => self0 == other0, + (Type::Path(self0), Type::Path(other0)) => self0 == other0, + (Type::Ptr(self0), Type::Ptr(other0)) => self0 == other0, + (Type::Reference(self0), Type::Reference(other0)) => self0 == other0, + (Type::Slice(self0), Type::Slice(other0)) => self0 == other0, + (Type::TraitObject(self0), Type::TraitObject(other0)) => self0 == other0, + (Type::Tuple(self0), Type::Tuple(other0)) => self0 == other0, + (Type::Verbatim(self0), Type::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeArray {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeArray { + fn eq(&self, other: &Self) -> bool { + self.elem == other.elem && self.len == other.len + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeBareFn {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeBareFn { + fn eq(&self, other: &Self) -> bool { + self.lifetimes == other.lifetimes && self.unsafety == other.unsafety + && self.abi == other.abi && self.inputs == other.inputs + && self.variadic == other.variadic && self.output == other.output + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeGroup {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeGroup { + fn eq(&self, other: &Self) -> bool { + self.elem == other.elem + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeImplTrait {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeImplTrait { + fn eq(&self, other: &Self) -> bool { + self.bounds == other.bounds + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeInfer {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeInfer { + fn eq(&self, _other: &Self) -> bool { + true + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeMacro {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeMacro { + fn eq(&self, other: &Self) -> bool { + self.mac == other.mac + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeNever {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeNever { + fn eq(&self, _other: &Self) -> bool { + true + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeParam {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeParam { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.ident == other.ident + && self.colon_token == other.colon_token && self.bounds == other.bounds + && self.eq_token == other.eq_token && self.default == other.default + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeParamBound {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeParamBound { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (TypeParamBound::Trait(self0), TypeParamBound::Trait(other0)) => { + self0 == other0 + } + (TypeParamBound::Lifetime(self0), TypeParamBound::Lifetime(other0)) => { + self0 == other0 + } + (TypeParamBound::Verbatim(self0), TypeParamBound::Verbatim(other0)) => { + TokenStreamHelper(self0) == TokenStreamHelper(other0) + } + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeParen {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeParen { + fn eq(&self, other: &Self) -> bool { + self.elem == other.elem + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypePath {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypePath { + fn eq(&self, other: &Self) -> bool { + self.qself == other.qself && self.path == other.path + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypePtr {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypePtr { + fn eq(&self, other: &Self) -> bool { + self.const_token == other.const_token && self.mutability == other.mutability + && self.elem == other.elem + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeReference {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeReference { + fn eq(&self, other: &Self) -> bool { + self.lifetime == other.lifetime && self.mutability == other.mutability + && self.elem == other.elem + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeSlice {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeSlice { + fn eq(&self, other: &Self) -> bool { + self.elem == other.elem + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeTraitObject {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeTraitObject { + fn eq(&self, other: &Self) -> bool { + self.dyn_token == other.dyn_token && self.bounds == other.bounds + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for TypeTuple {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for TypeTuple { + fn eq(&self, other: &Self) -> bool { + self.elems == other.elems + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for UnOp {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for UnOp { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (UnOp::Deref(_), UnOp::Deref(_)) => true, + (UnOp::Not(_), UnOp::Not(_)) => true, + (UnOp::Neg(_), UnOp::Neg(_)) => true, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for UseGlob {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for UseGlob { + fn eq(&self, _other: &Self) -> bool { + true + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for UseGroup {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for UseGroup { + fn eq(&self, other: &Self) -> bool { + self.items == other.items + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for UseName {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for UseName { + fn eq(&self, other: &Self) -> bool { + self.ident == other.ident + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for UsePath {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for UsePath { + fn eq(&self, other: &Self) -> bool { + self.ident == other.ident && self.tree == other.tree + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for UseRename {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for UseRename { + fn eq(&self, other: &Self) -> bool { + self.ident == other.ident && self.rename == other.rename + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for UseTree {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for UseTree { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (UseTree::Path(self0), UseTree::Path(other0)) => self0 == other0, + (UseTree::Name(self0), UseTree::Name(other0)) => self0 == other0, + (UseTree::Rename(self0), UseTree::Rename(other0)) => self0 == other0, + (UseTree::Glob(self0), UseTree::Glob(other0)) => self0 == other0, + (UseTree::Group(self0), UseTree::Group(other0)) => self0 == other0, + _ => false, + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Variadic {} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Variadic { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.pat == other.pat && self.comma == other.comma + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Variant {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Variant { + fn eq(&self, other: &Self) -> bool { + self.attrs == other.attrs && self.ident == other.ident + && self.fields == other.fields && self.discriminant == other.discriminant + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for VisRestricted {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for VisRestricted { + fn eq(&self, other: &Self) -> bool { + self.in_token == other.in_token && self.path == other.path + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Visibility {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Visibility { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Visibility::Public(_), Visibility::Public(_)) => true, + (Visibility::Restricted(self0), Visibility::Restricted(other0)) => { + self0 == other0 + } + (Visibility::Inherited, Visibility::Inherited) => true, + _ => false, + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for WhereClause {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for WhereClause { + fn eq(&self, other: &Self) -> bool { + self.predicates == other.predicates + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for WherePredicate {} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for WherePredicate { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (WherePredicate::Lifetime(self0), WherePredicate::Lifetime(other0)) => { + self0 == other0 + } + (WherePredicate::Type(self0), WherePredicate::Type(other0)) => { + self0 == other0 + } + _ => false, + } + } +} diff --git a/vendor/syn/src/gen/fold.rs b/vendor/syn/src/gen/fold.rs new file mode 100644 index 0000000..6bd058b --- /dev/null +++ b/vendor/syn/src/gen/fold.rs @@ -0,0 +1,3459 @@ +// This file is @generated by syn-internal-codegen. +// It is not intended for manual editing. + +#![allow(unreachable_code, unused_variables)] +#![allow( + clippy::match_wildcard_for_single_variants, + clippy::needless_match, + clippy::needless_pass_by_ref_mut, +)] +#[cfg(any(feature = "full", feature = "derive"))] +use crate::gen::helper::fold::*; +use crate::*; +use proc_macro2::Span; +#[cfg(feature = "full")] +macro_rules! full { + ($e:expr) => { + $e + }; +} +#[cfg(all(feature = "derive", not(feature = "full")))] +macro_rules! full { + ($e:expr) => { + unreachable!() + }; +} +/// Syntax tree traversal to transform the nodes of an owned syntax tree. +/// +/// See the [module documentation] for details. +/// +/// [module documentation]: self +pub trait Fold { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_abi(&mut self, i: Abi) -> Abi { + fold_abi(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_angle_bracketed_generic_arguments( + &mut self, + i: AngleBracketedGenericArguments, + ) -> AngleBracketedGenericArguments { + fold_angle_bracketed_generic_arguments(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_arm(&mut self, i: Arm) -> Arm { + fold_arm(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_assoc_const(&mut self, i: AssocConst) -> AssocConst { + fold_assoc_const(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_assoc_type(&mut self, i: AssocType) -> AssocType { + fold_assoc_type(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_attr_style(&mut self, i: AttrStyle) -> AttrStyle { + fold_attr_style(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_attribute(&mut self, i: Attribute) -> Attribute { + fold_attribute(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bare_fn_arg(&mut self, i: BareFnArg) -> BareFnArg { + fold_bare_fn_arg(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bare_variadic(&mut self, i: BareVariadic) -> BareVariadic { + fold_bare_variadic(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bin_op(&mut self, i: BinOp) -> BinOp { + fold_bin_op(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_block(&mut self, i: Block) -> Block { + fold_block(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bound_lifetimes(&mut self, i: BoundLifetimes) -> BoundLifetimes { + fold_bound_lifetimes(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_const_param(&mut self, i: ConstParam) -> ConstParam { + fold_const_param(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_constraint(&mut self, i: Constraint) -> Constraint { + fold_constraint(self, i) + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data(&mut self, i: Data) -> Data { + fold_data(self, i) + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data_enum(&mut self, i: DataEnum) -> DataEnum { + fold_data_enum(self, i) + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data_struct(&mut self, i: DataStruct) -> DataStruct { + fold_data_struct(self, i) + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data_union(&mut self, i: DataUnion) -> DataUnion { + fold_data_union(self, i) + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_derive_input(&mut self, i: DeriveInput) -> DeriveInput { + fold_derive_input(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr(&mut self, i: Expr) -> Expr { + fold_expr(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_array(&mut self, i: ExprArray) -> ExprArray { + fold_expr_array(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_assign(&mut self, i: ExprAssign) -> ExprAssign { + fold_expr_assign(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_async(&mut self, i: ExprAsync) -> ExprAsync { + fold_expr_async(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_await(&mut self, i: ExprAwait) -> ExprAwait { + fold_expr_await(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_binary(&mut self, i: ExprBinary) -> ExprBinary { + fold_expr_binary(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_block(&mut self, i: ExprBlock) -> ExprBlock { + fold_expr_block(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_break(&mut self, i: ExprBreak) -> ExprBreak { + fold_expr_break(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_call(&mut self, i: ExprCall) -> ExprCall { + fold_expr_call(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_cast(&mut self, i: ExprCast) -> ExprCast { + fold_expr_cast(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_closure(&mut self, i: ExprClosure) -> ExprClosure { + fold_expr_closure(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_const(&mut self, i: ExprConst) -> ExprConst { + fold_expr_const(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_continue(&mut self, i: ExprContinue) -> ExprContinue { + fold_expr_continue(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_field(&mut self, i: ExprField) -> ExprField { + fold_expr_field(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_for_loop(&mut self, i: ExprForLoop) -> ExprForLoop { + fold_expr_for_loop(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_group(&mut self, i: ExprGroup) -> ExprGroup { + fold_expr_group(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_if(&mut self, i: ExprIf) -> ExprIf { + fold_expr_if(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_index(&mut self, i: ExprIndex) -> ExprIndex { + fold_expr_index(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_infer(&mut self, i: ExprInfer) -> ExprInfer { + fold_expr_infer(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_let(&mut self, i: ExprLet) -> ExprLet { + fold_expr_let(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_lit(&mut self, i: ExprLit) -> ExprLit { + fold_expr_lit(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_loop(&mut self, i: ExprLoop) -> ExprLoop { + fold_expr_loop(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_macro(&mut self, i: ExprMacro) -> ExprMacro { + fold_expr_macro(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_match(&mut self, i: ExprMatch) -> ExprMatch { + fold_expr_match(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_method_call(&mut self, i: ExprMethodCall) -> ExprMethodCall { + fold_expr_method_call(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_paren(&mut self, i: ExprParen) -> ExprParen { + fold_expr_paren(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_path(&mut self, i: ExprPath) -> ExprPath { + fold_expr_path(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_range(&mut self, i: ExprRange) -> ExprRange { + fold_expr_range(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_reference(&mut self, i: ExprReference) -> ExprReference { + fold_expr_reference(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_repeat(&mut self, i: ExprRepeat) -> ExprRepeat { + fold_expr_repeat(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_return(&mut self, i: ExprReturn) -> ExprReturn { + fold_expr_return(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_struct(&mut self, i: ExprStruct) -> ExprStruct { + fold_expr_struct(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_try(&mut self, i: ExprTry) -> ExprTry { + fold_expr_try(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_try_block(&mut self, i: ExprTryBlock) -> ExprTryBlock { + fold_expr_try_block(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_tuple(&mut self, i: ExprTuple) -> ExprTuple { + fold_expr_tuple(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_unary(&mut self, i: ExprUnary) -> ExprUnary { + fold_expr_unary(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_unsafe(&mut self, i: ExprUnsafe) -> ExprUnsafe { + fold_expr_unsafe(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_while(&mut self, i: ExprWhile) -> ExprWhile { + fold_expr_while(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_yield(&mut self, i: ExprYield) -> ExprYield { + fold_expr_yield(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_field(&mut self, i: Field) -> Field { + fold_field(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_field_mutability(&mut self, i: FieldMutability) -> FieldMutability { + fold_field_mutability(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_field_pat(&mut self, i: FieldPat) -> FieldPat { + fold_field_pat(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_field_value(&mut self, i: FieldValue) -> FieldValue { + fold_field_value(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_fields(&mut self, i: Fields) -> Fields { + fold_fields(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_fields_named(&mut self, i: FieldsNamed) -> FieldsNamed { + fold_fields_named(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_fields_unnamed(&mut self, i: FieldsUnnamed) -> FieldsUnnamed { + fold_fields_unnamed(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_file(&mut self, i: File) -> File { + fold_file(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_fn_arg(&mut self, i: FnArg) -> FnArg { + fold_fn_arg(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item(&mut self, i: ForeignItem) -> ForeignItem { + fold_foreign_item(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_fn(&mut self, i: ForeignItemFn) -> ForeignItemFn { + fold_foreign_item_fn(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_macro(&mut self, i: ForeignItemMacro) -> ForeignItemMacro { + fold_foreign_item_macro(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_static(&mut self, i: ForeignItemStatic) -> ForeignItemStatic { + fold_foreign_item_static(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_type(&mut self, i: ForeignItemType) -> ForeignItemType { + fold_foreign_item_type(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_generic_argument(&mut self, i: GenericArgument) -> GenericArgument { + fold_generic_argument(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_generic_param(&mut self, i: GenericParam) -> GenericParam { + fold_generic_param(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_generics(&mut self, i: Generics) -> Generics { + fold_generics(self, i) + } + fn fold_ident(&mut self, i: Ident) -> Ident { + fold_ident(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item(&mut self, i: ImplItem) -> ImplItem { + fold_impl_item(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_const(&mut self, i: ImplItemConst) -> ImplItemConst { + fold_impl_item_const(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_fn(&mut self, i: ImplItemFn) -> ImplItemFn { + fold_impl_item_fn(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_macro(&mut self, i: ImplItemMacro) -> ImplItemMacro { + fold_impl_item_macro(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_type(&mut self, i: ImplItemType) -> ImplItemType { + fold_impl_item_type(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_restriction(&mut self, i: ImplRestriction) -> ImplRestriction { + fold_impl_restriction(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_index(&mut self, i: Index) -> Index { + fold_index(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item(&mut self, i: Item) -> Item { + fold_item(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_const(&mut self, i: ItemConst) -> ItemConst { + fold_item_const(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_enum(&mut self, i: ItemEnum) -> ItemEnum { + fold_item_enum(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_extern_crate(&mut self, i: ItemExternCrate) -> ItemExternCrate { + fold_item_extern_crate(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_fn(&mut self, i: ItemFn) -> ItemFn { + fold_item_fn(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_foreign_mod(&mut self, i: ItemForeignMod) -> ItemForeignMod { + fold_item_foreign_mod(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_impl(&mut self, i: ItemImpl) -> ItemImpl { + fold_item_impl(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_macro(&mut self, i: ItemMacro) -> ItemMacro { + fold_item_macro(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_mod(&mut self, i: ItemMod) -> ItemMod { + fold_item_mod(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_static(&mut self, i: ItemStatic) -> ItemStatic { + fold_item_static(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_struct(&mut self, i: ItemStruct) -> ItemStruct { + fold_item_struct(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_trait(&mut self, i: ItemTrait) -> ItemTrait { + fold_item_trait(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_trait_alias(&mut self, i: ItemTraitAlias) -> ItemTraitAlias { + fold_item_trait_alias(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_type(&mut self, i: ItemType) -> ItemType { + fold_item_type(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_union(&mut self, i: ItemUnion) -> ItemUnion { + fold_item_union(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_use(&mut self, i: ItemUse) -> ItemUse { + fold_item_use(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_label(&mut self, i: Label) -> Label { + fold_label(self, i) + } + fn fold_lifetime(&mut self, i: Lifetime) -> Lifetime { + fold_lifetime(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_lifetime_param(&mut self, i: LifetimeParam) -> LifetimeParam { + fold_lifetime_param(self, i) + } + fn fold_lit(&mut self, i: Lit) -> Lit { + fold_lit(self, i) + } + fn fold_lit_bool(&mut self, i: LitBool) -> LitBool { + fold_lit_bool(self, i) + } + fn fold_lit_byte(&mut self, i: LitByte) -> LitByte { + fold_lit_byte(self, i) + } + fn fold_lit_byte_str(&mut self, i: LitByteStr) -> LitByteStr { + fold_lit_byte_str(self, i) + } + fn fold_lit_char(&mut self, i: LitChar) -> LitChar { + fold_lit_char(self, i) + } + fn fold_lit_float(&mut self, i: LitFloat) -> LitFloat { + fold_lit_float(self, i) + } + fn fold_lit_int(&mut self, i: LitInt) -> LitInt { + fold_lit_int(self, i) + } + fn fold_lit_str(&mut self, i: LitStr) -> LitStr { + fold_lit_str(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_local(&mut self, i: Local) -> Local { + fold_local(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_local_init(&mut self, i: LocalInit) -> LocalInit { + fold_local_init(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_macro(&mut self, i: Macro) -> Macro { + fold_macro(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_macro_delimiter(&mut self, i: MacroDelimiter) -> MacroDelimiter { + fold_macro_delimiter(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_member(&mut self, i: Member) -> Member { + fold_member(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_meta(&mut self, i: Meta) -> Meta { + fold_meta(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_meta_list(&mut self, i: MetaList) -> MetaList { + fold_meta_list(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_meta_name_value(&mut self, i: MetaNameValue) -> MetaNameValue { + fold_meta_name_value(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_parenthesized_generic_arguments( + &mut self, + i: ParenthesizedGenericArguments, + ) -> ParenthesizedGenericArguments { + fold_parenthesized_generic_arguments(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat(&mut self, i: Pat) -> Pat { + fold_pat(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_ident(&mut self, i: PatIdent) -> PatIdent { + fold_pat_ident(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_or(&mut self, i: PatOr) -> PatOr { + fold_pat_or(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_paren(&mut self, i: PatParen) -> PatParen { + fold_pat_paren(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_reference(&mut self, i: PatReference) -> PatReference { + fold_pat_reference(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_rest(&mut self, i: PatRest) -> PatRest { + fold_pat_rest(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_slice(&mut self, i: PatSlice) -> PatSlice { + fold_pat_slice(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_struct(&mut self, i: PatStruct) -> PatStruct { + fold_pat_struct(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_tuple(&mut self, i: PatTuple) -> PatTuple { + fold_pat_tuple(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_tuple_struct(&mut self, i: PatTupleStruct) -> PatTupleStruct { + fold_pat_tuple_struct(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_type(&mut self, i: PatType) -> PatType { + fold_pat_type(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_wild(&mut self, i: PatWild) -> PatWild { + fold_pat_wild(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_path(&mut self, i: Path) -> Path { + fold_path(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_path_arguments(&mut self, i: PathArguments) -> PathArguments { + fold_path_arguments(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_path_segment(&mut self, i: PathSegment) -> PathSegment { + fold_path_segment(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_predicate_lifetime(&mut self, i: PredicateLifetime) -> PredicateLifetime { + fold_predicate_lifetime(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_predicate_type(&mut self, i: PredicateType) -> PredicateType { + fold_predicate_type(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_qself(&mut self, i: QSelf) -> QSelf { + fold_qself(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_range_limits(&mut self, i: RangeLimits) -> RangeLimits { + fold_range_limits(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_receiver(&mut self, i: Receiver) -> Receiver { + fold_receiver(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_return_type(&mut self, i: ReturnType) -> ReturnType { + fold_return_type(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_signature(&mut self, i: Signature) -> Signature { + fold_signature(self, i) + } + fn fold_span(&mut self, i: Span) -> Span { + fold_span(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_static_mutability(&mut self, i: StaticMutability) -> StaticMutability { + fold_static_mutability(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_stmt(&mut self, i: Stmt) -> Stmt { + fold_stmt(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_stmt_macro(&mut self, i: StmtMacro) -> StmtMacro { + fold_stmt_macro(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_trait_bound(&mut self, i: TraitBound) -> TraitBound { + fold_trait_bound(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_trait_bound_modifier( + &mut self, + i: TraitBoundModifier, + ) -> TraitBoundModifier { + fold_trait_bound_modifier(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item(&mut self, i: TraitItem) -> TraitItem { + fold_trait_item(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_const(&mut self, i: TraitItemConst) -> TraitItemConst { + fold_trait_item_const(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_fn(&mut self, i: TraitItemFn) -> TraitItemFn { + fold_trait_item_fn(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_macro(&mut self, i: TraitItemMacro) -> TraitItemMacro { + fold_trait_item_macro(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_type(&mut self, i: TraitItemType) -> TraitItemType { + fold_trait_item_type(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type(&mut self, i: Type) -> Type { + fold_type(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_array(&mut self, i: TypeArray) -> TypeArray { + fold_type_array(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_bare_fn(&mut self, i: TypeBareFn) -> TypeBareFn { + fold_type_bare_fn(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_group(&mut self, i: TypeGroup) -> TypeGroup { + fold_type_group(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_impl_trait(&mut self, i: TypeImplTrait) -> TypeImplTrait { + fold_type_impl_trait(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_infer(&mut self, i: TypeInfer) -> TypeInfer { + fold_type_infer(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_macro(&mut self, i: TypeMacro) -> TypeMacro { + fold_type_macro(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_never(&mut self, i: TypeNever) -> TypeNever { + fold_type_never(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_param(&mut self, i: TypeParam) -> TypeParam { + fold_type_param(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_param_bound(&mut self, i: TypeParamBound) -> TypeParamBound { + fold_type_param_bound(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_paren(&mut self, i: TypeParen) -> TypeParen { + fold_type_paren(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_path(&mut self, i: TypePath) -> TypePath { + fold_type_path(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_ptr(&mut self, i: TypePtr) -> TypePtr { + fold_type_ptr(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_reference(&mut self, i: TypeReference) -> TypeReference { + fold_type_reference(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_slice(&mut self, i: TypeSlice) -> TypeSlice { + fold_type_slice(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_trait_object(&mut self, i: TypeTraitObject) -> TypeTraitObject { + fold_type_trait_object(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_tuple(&mut self, i: TypeTuple) -> TypeTuple { + fold_type_tuple(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_un_op(&mut self, i: UnOp) -> UnOp { + fold_un_op(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_glob(&mut self, i: UseGlob) -> UseGlob { + fold_use_glob(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_group(&mut self, i: UseGroup) -> UseGroup { + fold_use_group(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_name(&mut self, i: UseName) -> UseName { + fold_use_name(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_path(&mut self, i: UsePath) -> UsePath { + fold_use_path(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_rename(&mut self, i: UseRename) -> UseRename { + fold_use_rename(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_tree(&mut self, i: UseTree) -> UseTree { + fold_use_tree(self, i) + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_variadic(&mut self, i: Variadic) -> Variadic { + fold_variadic(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_variant(&mut self, i: Variant) -> Variant { + fold_variant(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_vis_restricted(&mut self, i: VisRestricted) -> VisRestricted { + fold_vis_restricted(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_visibility(&mut self, i: Visibility) -> Visibility { + fold_visibility(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_where_clause(&mut self, i: WhereClause) -> WhereClause { + fold_where_clause(self, i) + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_where_predicate(&mut self, i: WherePredicate) -> WherePredicate { + fold_where_predicate(self, i) + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_abi<F>(f: &mut F, node: Abi) -> Abi +where + F: Fold + ?Sized, +{ + Abi { + extern_token: node.extern_token, + name: (node.name).map(|it| f.fold_lit_str(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_angle_bracketed_generic_arguments<F>( + f: &mut F, + node: AngleBracketedGenericArguments, +) -> AngleBracketedGenericArguments +where + F: Fold + ?Sized, +{ + AngleBracketedGenericArguments { + colon2_token: node.colon2_token, + lt_token: node.lt_token, + args: FoldHelper::lift(node.args, |it| f.fold_generic_argument(it)), + gt_token: node.gt_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_arm<F>(f: &mut F, node: Arm) -> Arm +where + F: Fold + ?Sized, +{ + Arm { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + pat: f.fold_pat(node.pat), + guard: (node.guard).map(|it| ((it).0, Box::new(f.fold_expr(*(it).1)))), + fat_arrow_token: node.fat_arrow_token, + body: Box::new(f.fold_expr(*node.body)), + comma: node.comma, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_assoc_const<F>(f: &mut F, node: AssocConst) -> AssocConst +where + F: Fold + ?Sized, +{ + AssocConst { + ident: f.fold_ident(node.ident), + generics: (node.generics).map(|it| f.fold_angle_bracketed_generic_arguments(it)), + eq_token: node.eq_token, + value: f.fold_expr(node.value), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_assoc_type<F>(f: &mut F, node: AssocType) -> AssocType +where + F: Fold + ?Sized, +{ + AssocType { + ident: f.fold_ident(node.ident), + generics: (node.generics).map(|it| f.fold_angle_bracketed_generic_arguments(it)), + eq_token: node.eq_token, + ty: f.fold_type(node.ty), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_attr_style<F>(f: &mut F, node: AttrStyle) -> AttrStyle +where + F: Fold + ?Sized, +{ + match node { + AttrStyle::Outer => AttrStyle::Outer, + AttrStyle::Inner(_binding_0) => AttrStyle::Inner(_binding_0), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_attribute<F>(f: &mut F, node: Attribute) -> Attribute +where + F: Fold + ?Sized, +{ + Attribute { + pound_token: node.pound_token, + style: f.fold_attr_style(node.style), + bracket_token: node.bracket_token, + meta: f.fold_meta(node.meta), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bare_fn_arg<F>(f: &mut F, node: BareFnArg) -> BareFnArg +where + F: Fold + ?Sized, +{ + BareFnArg { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + name: (node.name).map(|it| (f.fold_ident((it).0), (it).1)), + ty: f.fold_type(node.ty), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bare_variadic<F>(f: &mut F, node: BareVariadic) -> BareVariadic +where + F: Fold + ?Sized, +{ + BareVariadic { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + name: (node.name).map(|it| (f.fold_ident((it).0), (it).1)), + dots: node.dots, + comma: node.comma, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bin_op<F>(f: &mut F, node: BinOp) -> BinOp +where + F: Fold + ?Sized, +{ + match node { + BinOp::Add(_binding_0) => BinOp::Add(_binding_0), + BinOp::Sub(_binding_0) => BinOp::Sub(_binding_0), + BinOp::Mul(_binding_0) => BinOp::Mul(_binding_0), + BinOp::Div(_binding_0) => BinOp::Div(_binding_0), + BinOp::Rem(_binding_0) => BinOp::Rem(_binding_0), + BinOp::And(_binding_0) => BinOp::And(_binding_0), + BinOp::Or(_binding_0) => BinOp::Or(_binding_0), + BinOp::BitXor(_binding_0) => BinOp::BitXor(_binding_0), + BinOp::BitAnd(_binding_0) => BinOp::BitAnd(_binding_0), + BinOp::BitOr(_binding_0) => BinOp::BitOr(_binding_0), + BinOp::Shl(_binding_0) => BinOp::Shl(_binding_0), + BinOp::Shr(_binding_0) => BinOp::Shr(_binding_0), + BinOp::Eq(_binding_0) => BinOp::Eq(_binding_0), + BinOp::Lt(_binding_0) => BinOp::Lt(_binding_0), + BinOp::Le(_binding_0) => BinOp::Le(_binding_0), + BinOp::Ne(_binding_0) => BinOp::Ne(_binding_0), + BinOp::Ge(_binding_0) => BinOp::Ge(_binding_0), + BinOp::Gt(_binding_0) => BinOp::Gt(_binding_0), + BinOp::AddAssign(_binding_0) => BinOp::AddAssign(_binding_0), + BinOp::SubAssign(_binding_0) => BinOp::SubAssign(_binding_0), + BinOp::MulAssign(_binding_0) => BinOp::MulAssign(_binding_0), + BinOp::DivAssign(_binding_0) => BinOp::DivAssign(_binding_0), + BinOp::RemAssign(_binding_0) => BinOp::RemAssign(_binding_0), + BinOp::BitXorAssign(_binding_0) => BinOp::BitXorAssign(_binding_0), + BinOp::BitAndAssign(_binding_0) => BinOp::BitAndAssign(_binding_0), + BinOp::BitOrAssign(_binding_0) => BinOp::BitOrAssign(_binding_0), + BinOp::ShlAssign(_binding_0) => BinOp::ShlAssign(_binding_0), + BinOp::ShrAssign(_binding_0) => BinOp::ShrAssign(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_block<F>(f: &mut F, node: Block) -> Block +where + F: Fold + ?Sized, +{ + Block { + brace_token: node.brace_token, + stmts: FoldHelper::lift(node.stmts, |it| f.fold_stmt(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bound_lifetimes<F>(f: &mut F, node: BoundLifetimes) -> BoundLifetimes +where + F: Fold + ?Sized, +{ + BoundLifetimes { + for_token: node.for_token, + lt_token: node.lt_token, + lifetimes: FoldHelper::lift(node.lifetimes, |it| f.fold_generic_param(it)), + gt_token: node.gt_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_const_param<F>(f: &mut F, node: ConstParam) -> ConstParam +where + F: Fold + ?Sized, +{ + ConstParam { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + const_token: node.const_token, + ident: f.fold_ident(node.ident), + colon_token: node.colon_token, + ty: f.fold_type(node.ty), + eq_token: node.eq_token, + default: (node.default).map(|it| f.fold_expr(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_constraint<F>(f: &mut F, node: Constraint) -> Constraint +where + F: Fold + ?Sized, +{ + Constraint { + ident: f.fold_ident(node.ident), + generics: (node.generics).map(|it| f.fold_angle_bracketed_generic_arguments(it)), + colon_token: node.colon_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data<F>(f: &mut F, node: Data) -> Data +where + F: Fold + ?Sized, +{ + match node { + Data::Struct(_binding_0) => Data::Struct(f.fold_data_struct(_binding_0)), + Data::Enum(_binding_0) => Data::Enum(f.fold_data_enum(_binding_0)), + Data::Union(_binding_0) => Data::Union(f.fold_data_union(_binding_0)), + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data_enum<F>(f: &mut F, node: DataEnum) -> DataEnum +where + F: Fold + ?Sized, +{ + DataEnum { + enum_token: node.enum_token, + brace_token: node.brace_token, + variants: FoldHelper::lift(node.variants, |it| f.fold_variant(it)), + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data_struct<F>(f: &mut F, node: DataStruct) -> DataStruct +where + F: Fold + ?Sized, +{ + DataStruct { + struct_token: node.struct_token, + fields: f.fold_fields(node.fields), + semi_token: node.semi_token, + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data_union<F>(f: &mut F, node: DataUnion) -> DataUnion +where + F: Fold + ?Sized, +{ + DataUnion { + union_token: node.union_token, + fields: f.fold_fields_named(node.fields), + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_derive_input<F>(f: &mut F, node: DeriveInput) -> DeriveInput +where + F: Fold + ?Sized, +{ + DeriveInput { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + data: f.fold_data(node.data), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr<F>(f: &mut F, node: Expr) -> Expr +where + F: Fold + ?Sized, +{ + match node { + Expr::Array(_binding_0) => Expr::Array(full!(f.fold_expr_array(_binding_0))), + Expr::Assign(_binding_0) => Expr::Assign(full!(f.fold_expr_assign(_binding_0))), + Expr::Async(_binding_0) => Expr::Async(full!(f.fold_expr_async(_binding_0))), + Expr::Await(_binding_0) => Expr::Await(full!(f.fold_expr_await(_binding_0))), + Expr::Binary(_binding_0) => Expr::Binary(f.fold_expr_binary(_binding_0)), + Expr::Block(_binding_0) => Expr::Block(full!(f.fold_expr_block(_binding_0))), + Expr::Break(_binding_0) => Expr::Break(full!(f.fold_expr_break(_binding_0))), + Expr::Call(_binding_0) => Expr::Call(f.fold_expr_call(_binding_0)), + Expr::Cast(_binding_0) => Expr::Cast(f.fold_expr_cast(_binding_0)), + Expr::Closure(_binding_0) => { + Expr::Closure(full!(f.fold_expr_closure(_binding_0))) + } + Expr::Const(_binding_0) => Expr::Const(full!(f.fold_expr_const(_binding_0))), + Expr::Continue(_binding_0) => { + Expr::Continue(full!(f.fold_expr_continue(_binding_0))) + } + Expr::Field(_binding_0) => Expr::Field(f.fold_expr_field(_binding_0)), + Expr::ForLoop(_binding_0) => { + Expr::ForLoop(full!(f.fold_expr_for_loop(_binding_0))) + } + Expr::Group(_binding_0) => Expr::Group(f.fold_expr_group(_binding_0)), + Expr::If(_binding_0) => Expr::If(full!(f.fold_expr_if(_binding_0))), + Expr::Index(_binding_0) => Expr::Index(f.fold_expr_index(_binding_0)), + Expr::Infer(_binding_0) => Expr::Infer(full!(f.fold_expr_infer(_binding_0))), + Expr::Let(_binding_0) => Expr::Let(full!(f.fold_expr_let(_binding_0))), + Expr::Lit(_binding_0) => Expr::Lit(f.fold_expr_lit(_binding_0)), + Expr::Loop(_binding_0) => Expr::Loop(full!(f.fold_expr_loop(_binding_0))), + Expr::Macro(_binding_0) => Expr::Macro(f.fold_expr_macro(_binding_0)), + Expr::Match(_binding_0) => Expr::Match(full!(f.fold_expr_match(_binding_0))), + Expr::MethodCall(_binding_0) => { + Expr::MethodCall(f.fold_expr_method_call(_binding_0)) + } + Expr::Paren(_binding_0) => Expr::Paren(f.fold_expr_paren(_binding_0)), + Expr::Path(_binding_0) => Expr::Path(f.fold_expr_path(_binding_0)), + Expr::Range(_binding_0) => Expr::Range(full!(f.fold_expr_range(_binding_0))), + Expr::Reference(_binding_0) => Expr::Reference(f.fold_expr_reference(_binding_0)), + Expr::Repeat(_binding_0) => Expr::Repeat(full!(f.fold_expr_repeat(_binding_0))), + Expr::Return(_binding_0) => Expr::Return(full!(f.fold_expr_return(_binding_0))), + Expr::Struct(_binding_0) => Expr::Struct(f.fold_expr_struct(_binding_0)), + Expr::Try(_binding_0) => Expr::Try(full!(f.fold_expr_try(_binding_0))), + Expr::TryBlock(_binding_0) => { + Expr::TryBlock(full!(f.fold_expr_try_block(_binding_0))) + } + Expr::Tuple(_binding_0) => Expr::Tuple(full!(f.fold_expr_tuple(_binding_0))), + Expr::Unary(_binding_0) => Expr::Unary(f.fold_expr_unary(_binding_0)), + Expr::Unsafe(_binding_0) => Expr::Unsafe(full!(f.fold_expr_unsafe(_binding_0))), + Expr::Verbatim(_binding_0) => Expr::Verbatim(_binding_0), + Expr::While(_binding_0) => Expr::While(full!(f.fold_expr_while(_binding_0))), + Expr::Yield(_binding_0) => Expr::Yield(full!(f.fold_expr_yield(_binding_0))), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_array<F>(f: &mut F, node: ExprArray) -> ExprArray +where + F: Fold + ?Sized, +{ + ExprArray { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + bracket_token: node.bracket_token, + elems: FoldHelper::lift(node.elems, |it| f.fold_expr(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_assign<F>(f: &mut F, node: ExprAssign) -> ExprAssign +where + F: Fold + ?Sized, +{ + ExprAssign { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + left: Box::new(f.fold_expr(*node.left)), + eq_token: node.eq_token, + right: Box::new(f.fold_expr(*node.right)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_async<F>(f: &mut F, node: ExprAsync) -> ExprAsync +where + F: Fold + ?Sized, +{ + ExprAsync { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + async_token: node.async_token, + capture: node.capture, + block: f.fold_block(node.block), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_await<F>(f: &mut F, node: ExprAwait) -> ExprAwait +where + F: Fold + ?Sized, +{ + ExprAwait { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + base: Box::new(f.fold_expr(*node.base)), + dot_token: node.dot_token, + await_token: node.await_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_binary<F>(f: &mut F, node: ExprBinary) -> ExprBinary +where + F: Fold + ?Sized, +{ + ExprBinary { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + left: Box::new(f.fold_expr(*node.left)), + op: f.fold_bin_op(node.op), + right: Box::new(f.fold_expr(*node.right)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_block<F>(f: &mut F, node: ExprBlock) -> ExprBlock +where + F: Fold + ?Sized, +{ + ExprBlock { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + label: (node.label).map(|it| f.fold_label(it)), + block: f.fold_block(node.block), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_break<F>(f: &mut F, node: ExprBreak) -> ExprBreak +where + F: Fold + ?Sized, +{ + ExprBreak { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + break_token: node.break_token, + label: (node.label).map(|it| f.fold_lifetime(it)), + expr: (node.expr).map(|it| Box::new(f.fold_expr(*it))), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_call<F>(f: &mut F, node: ExprCall) -> ExprCall +where + F: Fold + ?Sized, +{ + ExprCall { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + func: Box::new(f.fold_expr(*node.func)), + paren_token: node.paren_token, + args: FoldHelper::lift(node.args, |it| f.fold_expr(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_cast<F>(f: &mut F, node: ExprCast) -> ExprCast +where + F: Fold + ?Sized, +{ + ExprCast { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + expr: Box::new(f.fold_expr(*node.expr)), + as_token: node.as_token, + ty: Box::new(f.fold_type(*node.ty)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_closure<F>(f: &mut F, node: ExprClosure) -> ExprClosure +where + F: Fold + ?Sized, +{ + ExprClosure { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), + constness: node.constness, + movability: node.movability, + asyncness: node.asyncness, + capture: node.capture, + or1_token: node.or1_token, + inputs: FoldHelper::lift(node.inputs, |it| f.fold_pat(it)), + or2_token: node.or2_token, + output: f.fold_return_type(node.output), + body: Box::new(f.fold_expr(*node.body)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_const<F>(f: &mut F, node: ExprConst) -> ExprConst +where + F: Fold + ?Sized, +{ + ExprConst { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + const_token: node.const_token, + block: f.fold_block(node.block), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_continue<F>(f: &mut F, node: ExprContinue) -> ExprContinue +where + F: Fold + ?Sized, +{ + ExprContinue { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + continue_token: node.continue_token, + label: (node.label).map(|it| f.fold_lifetime(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_field<F>(f: &mut F, node: ExprField) -> ExprField +where + F: Fold + ?Sized, +{ + ExprField { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + base: Box::new(f.fold_expr(*node.base)), + dot_token: node.dot_token, + member: f.fold_member(node.member), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_for_loop<F>(f: &mut F, node: ExprForLoop) -> ExprForLoop +where + F: Fold + ?Sized, +{ + ExprForLoop { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + label: (node.label).map(|it| f.fold_label(it)), + for_token: node.for_token, + pat: Box::new(f.fold_pat(*node.pat)), + in_token: node.in_token, + expr: Box::new(f.fold_expr(*node.expr)), + body: f.fold_block(node.body), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_group<F>(f: &mut F, node: ExprGroup) -> ExprGroup +where + F: Fold + ?Sized, +{ + ExprGroup { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + group_token: node.group_token, + expr: Box::new(f.fold_expr(*node.expr)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_if<F>(f: &mut F, node: ExprIf) -> ExprIf +where + F: Fold + ?Sized, +{ + ExprIf { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + if_token: node.if_token, + cond: Box::new(f.fold_expr(*node.cond)), + then_branch: f.fold_block(node.then_branch), + else_branch: (node.else_branch) + .map(|it| ((it).0, Box::new(f.fold_expr(*(it).1)))), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_index<F>(f: &mut F, node: ExprIndex) -> ExprIndex +where + F: Fold + ?Sized, +{ + ExprIndex { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + expr: Box::new(f.fold_expr(*node.expr)), + bracket_token: node.bracket_token, + index: Box::new(f.fold_expr(*node.index)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_infer<F>(f: &mut F, node: ExprInfer) -> ExprInfer +where + F: Fold + ?Sized, +{ + ExprInfer { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + underscore_token: node.underscore_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_let<F>(f: &mut F, node: ExprLet) -> ExprLet +where + F: Fold + ?Sized, +{ + ExprLet { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + let_token: node.let_token, + pat: Box::new(f.fold_pat(*node.pat)), + eq_token: node.eq_token, + expr: Box::new(f.fold_expr(*node.expr)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_lit<F>(f: &mut F, node: ExprLit) -> ExprLit +where + F: Fold + ?Sized, +{ + ExprLit { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + lit: f.fold_lit(node.lit), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_loop<F>(f: &mut F, node: ExprLoop) -> ExprLoop +where + F: Fold + ?Sized, +{ + ExprLoop { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + label: (node.label).map(|it| f.fold_label(it)), + loop_token: node.loop_token, + body: f.fold_block(node.body), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_macro<F>(f: &mut F, node: ExprMacro) -> ExprMacro +where + F: Fold + ?Sized, +{ + ExprMacro { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + mac: f.fold_macro(node.mac), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_match<F>(f: &mut F, node: ExprMatch) -> ExprMatch +where + F: Fold + ?Sized, +{ + ExprMatch { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + match_token: node.match_token, + expr: Box::new(f.fold_expr(*node.expr)), + brace_token: node.brace_token, + arms: FoldHelper::lift(node.arms, |it| f.fold_arm(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_method_call<F>(f: &mut F, node: ExprMethodCall) -> ExprMethodCall +where + F: Fold + ?Sized, +{ + ExprMethodCall { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + receiver: Box::new(f.fold_expr(*node.receiver)), + dot_token: node.dot_token, + method: f.fold_ident(node.method), + turbofish: (node.turbofish) + .map(|it| f.fold_angle_bracketed_generic_arguments(it)), + paren_token: node.paren_token, + args: FoldHelper::lift(node.args, |it| f.fold_expr(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_paren<F>(f: &mut F, node: ExprParen) -> ExprParen +where + F: Fold + ?Sized, +{ + ExprParen { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + paren_token: node.paren_token, + expr: Box::new(f.fold_expr(*node.expr)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_path<F>(f: &mut F, node: ExprPath) -> ExprPath +where + F: Fold + ?Sized, +{ + ExprPath { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + qself: (node.qself).map(|it| f.fold_qself(it)), + path: f.fold_path(node.path), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_range<F>(f: &mut F, node: ExprRange) -> ExprRange +where + F: Fold + ?Sized, +{ + ExprRange { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + start: (node.start).map(|it| Box::new(f.fold_expr(*it))), + limits: f.fold_range_limits(node.limits), + end: (node.end).map(|it| Box::new(f.fold_expr(*it))), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_reference<F>(f: &mut F, node: ExprReference) -> ExprReference +where + F: Fold + ?Sized, +{ + ExprReference { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + and_token: node.and_token, + mutability: node.mutability, + expr: Box::new(f.fold_expr(*node.expr)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_repeat<F>(f: &mut F, node: ExprRepeat) -> ExprRepeat +where + F: Fold + ?Sized, +{ + ExprRepeat { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + bracket_token: node.bracket_token, + expr: Box::new(f.fold_expr(*node.expr)), + semi_token: node.semi_token, + len: Box::new(f.fold_expr(*node.len)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_return<F>(f: &mut F, node: ExprReturn) -> ExprReturn +where + F: Fold + ?Sized, +{ + ExprReturn { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + return_token: node.return_token, + expr: (node.expr).map(|it| Box::new(f.fold_expr(*it))), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_struct<F>(f: &mut F, node: ExprStruct) -> ExprStruct +where + F: Fold + ?Sized, +{ + ExprStruct { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + qself: (node.qself).map(|it| f.fold_qself(it)), + path: f.fold_path(node.path), + brace_token: node.brace_token, + fields: FoldHelper::lift(node.fields, |it| f.fold_field_value(it)), + dot2_token: node.dot2_token, + rest: (node.rest).map(|it| Box::new(f.fold_expr(*it))), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_try<F>(f: &mut F, node: ExprTry) -> ExprTry +where + F: Fold + ?Sized, +{ + ExprTry { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + expr: Box::new(f.fold_expr(*node.expr)), + question_token: node.question_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_try_block<F>(f: &mut F, node: ExprTryBlock) -> ExprTryBlock +where + F: Fold + ?Sized, +{ + ExprTryBlock { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + try_token: node.try_token, + block: f.fold_block(node.block), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_tuple<F>(f: &mut F, node: ExprTuple) -> ExprTuple +where + F: Fold + ?Sized, +{ + ExprTuple { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + paren_token: node.paren_token, + elems: FoldHelper::lift(node.elems, |it| f.fold_expr(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_unary<F>(f: &mut F, node: ExprUnary) -> ExprUnary +where + F: Fold + ?Sized, +{ + ExprUnary { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + op: f.fold_un_op(node.op), + expr: Box::new(f.fold_expr(*node.expr)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_unsafe<F>(f: &mut F, node: ExprUnsafe) -> ExprUnsafe +where + F: Fold + ?Sized, +{ + ExprUnsafe { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + unsafe_token: node.unsafe_token, + block: f.fold_block(node.block), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_while<F>(f: &mut F, node: ExprWhile) -> ExprWhile +where + F: Fold + ?Sized, +{ + ExprWhile { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + label: (node.label).map(|it| f.fold_label(it)), + while_token: node.while_token, + cond: Box::new(f.fold_expr(*node.cond)), + body: f.fold_block(node.body), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_yield<F>(f: &mut F, node: ExprYield) -> ExprYield +where + F: Fold + ?Sized, +{ + ExprYield { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + yield_token: node.yield_token, + expr: (node.expr).map(|it| Box::new(f.fold_expr(*it))), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_field<F>(f: &mut F, node: Field) -> Field +where + F: Fold + ?Sized, +{ + Field { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + mutability: f.fold_field_mutability(node.mutability), + ident: (node.ident).map(|it| f.fold_ident(it)), + colon_token: node.colon_token, + ty: f.fold_type(node.ty), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_field_mutability<F>(f: &mut F, node: FieldMutability) -> FieldMutability +where + F: Fold + ?Sized, +{ + match node { + FieldMutability::None => FieldMutability::None, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_field_pat<F>(f: &mut F, node: FieldPat) -> FieldPat +where + F: Fold + ?Sized, +{ + FieldPat { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + member: f.fold_member(node.member), + colon_token: node.colon_token, + pat: Box::new(f.fold_pat(*node.pat)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_field_value<F>(f: &mut F, node: FieldValue) -> FieldValue +where + F: Fold + ?Sized, +{ + FieldValue { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + member: f.fold_member(node.member), + colon_token: node.colon_token, + expr: f.fold_expr(node.expr), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_fields<F>(f: &mut F, node: Fields) -> Fields +where + F: Fold + ?Sized, +{ + match node { + Fields::Named(_binding_0) => Fields::Named(f.fold_fields_named(_binding_0)), + Fields::Unnamed(_binding_0) => Fields::Unnamed(f.fold_fields_unnamed(_binding_0)), + Fields::Unit => Fields::Unit, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_fields_named<F>(f: &mut F, node: FieldsNamed) -> FieldsNamed +where + F: Fold + ?Sized, +{ + FieldsNamed { + brace_token: node.brace_token, + named: FoldHelper::lift(node.named, |it| f.fold_field(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_fields_unnamed<F>(f: &mut F, node: FieldsUnnamed) -> FieldsUnnamed +where + F: Fold + ?Sized, +{ + FieldsUnnamed { + paren_token: node.paren_token, + unnamed: FoldHelper::lift(node.unnamed, |it| f.fold_field(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_file<F>(f: &mut F, node: File) -> File +where + F: Fold + ?Sized, +{ + File { + shebang: node.shebang, + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + items: FoldHelper::lift(node.items, |it| f.fold_item(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_fn_arg<F>(f: &mut F, node: FnArg) -> FnArg +where + F: Fold + ?Sized, +{ + match node { + FnArg::Receiver(_binding_0) => FnArg::Receiver(f.fold_receiver(_binding_0)), + FnArg::Typed(_binding_0) => FnArg::Typed(f.fold_pat_type(_binding_0)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item<F>(f: &mut F, node: ForeignItem) -> ForeignItem +where + F: Fold + ?Sized, +{ + match node { + ForeignItem::Fn(_binding_0) => { + ForeignItem::Fn(f.fold_foreign_item_fn(_binding_0)) + } + ForeignItem::Static(_binding_0) => { + ForeignItem::Static(f.fold_foreign_item_static(_binding_0)) + } + ForeignItem::Type(_binding_0) => { + ForeignItem::Type(f.fold_foreign_item_type(_binding_0)) + } + ForeignItem::Macro(_binding_0) => { + ForeignItem::Macro(f.fold_foreign_item_macro(_binding_0)) + } + ForeignItem::Verbatim(_binding_0) => ForeignItem::Verbatim(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item_fn<F>(f: &mut F, node: ForeignItemFn) -> ForeignItemFn +where + F: Fold + ?Sized, +{ + ForeignItemFn { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + sig: f.fold_signature(node.sig), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item_macro<F>(f: &mut F, node: ForeignItemMacro) -> ForeignItemMacro +where + F: Fold + ?Sized, +{ + ForeignItemMacro { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + mac: f.fold_macro(node.mac), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item_static<F>( + f: &mut F, + node: ForeignItemStatic, +) -> ForeignItemStatic +where + F: Fold + ?Sized, +{ + ForeignItemStatic { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + static_token: node.static_token, + mutability: f.fold_static_mutability(node.mutability), + ident: f.fold_ident(node.ident), + colon_token: node.colon_token, + ty: Box::new(f.fold_type(*node.ty)), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item_type<F>(f: &mut F, node: ForeignItemType) -> ForeignItemType +where + F: Fold + ?Sized, +{ + ForeignItemType { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + type_token: node.type_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + semi_token: node.semi_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_generic_argument<F>(f: &mut F, node: GenericArgument) -> GenericArgument +where + F: Fold + ?Sized, +{ + match node { + GenericArgument::Lifetime(_binding_0) => { + GenericArgument::Lifetime(f.fold_lifetime(_binding_0)) + } + GenericArgument::Type(_binding_0) => { + GenericArgument::Type(f.fold_type(_binding_0)) + } + GenericArgument::Const(_binding_0) => { + GenericArgument::Const(f.fold_expr(_binding_0)) + } + GenericArgument::AssocType(_binding_0) => { + GenericArgument::AssocType(f.fold_assoc_type(_binding_0)) + } + GenericArgument::AssocConst(_binding_0) => { + GenericArgument::AssocConst(f.fold_assoc_const(_binding_0)) + } + GenericArgument::Constraint(_binding_0) => { + GenericArgument::Constraint(f.fold_constraint(_binding_0)) + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_generic_param<F>(f: &mut F, node: GenericParam) -> GenericParam +where + F: Fold + ?Sized, +{ + match node { + GenericParam::Lifetime(_binding_0) => { + GenericParam::Lifetime(f.fold_lifetime_param(_binding_0)) + } + GenericParam::Type(_binding_0) => { + GenericParam::Type(f.fold_type_param(_binding_0)) + } + GenericParam::Const(_binding_0) => { + GenericParam::Const(f.fold_const_param(_binding_0)) + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_generics<F>(f: &mut F, node: Generics) -> Generics +where + F: Fold + ?Sized, +{ + Generics { + lt_token: node.lt_token, + params: FoldHelper::lift(node.params, |it| f.fold_generic_param(it)), + gt_token: node.gt_token, + where_clause: (node.where_clause).map(|it| f.fold_where_clause(it)), + } +} +pub fn fold_ident<F>(f: &mut F, node: Ident) -> Ident +where + F: Fold + ?Sized, +{ + let mut node = node; + let span = f.fold_span(node.span()); + node.set_span(span); + node +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item<F>(f: &mut F, node: ImplItem) -> ImplItem +where + F: Fold + ?Sized, +{ + match node { + ImplItem::Const(_binding_0) => { + ImplItem::Const(f.fold_impl_item_const(_binding_0)) + } + ImplItem::Fn(_binding_0) => ImplItem::Fn(f.fold_impl_item_fn(_binding_0)), + ImplItem::Type(_binding_0) => ImplItem::Type(f.fold_impl_item_type(_binding_0)), + ImplItem::Macro(_binding_0) => { + ImplItem::Macro(f.fold_impl_item_macro(_binding_0)) + } + ImplItem::Verbatim(_binding_0) => ImplItem::Verbatim(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_const<F>(f: &mut F, node: ImplItemConst) -> ImplItemConst +where + F: Fold + ?Sized, +{ + ImplItemConst { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + defaultness: node.defaultness, + const_token: node.const_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + colon_token: node.colon_token, + ty: f.fold_type(node.ty), + eq_token: node.eq_token, + expr: f.fold_expr(node.expr), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_fn<F>(f: &mut F, node: ImplItemFn) -> ImplItemFn +where + F: Fold + ?Sized, +{ + ImplItemFn { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + defaultness: node.defaultness, + sig: f.fold_signature(node.sig), + block: f.fold_block(node.block), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_macro<F>(f: &mut F, node: ImplItemMacro) -> ImplItemMacro +where + F: Fold + ?Sized, +{ + ImplItemMacro { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + mac: f.fold_macro(node.mac), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_type<F>(f: &mut F, node: ImplItemType) -> ImplItemType +where + F: Fold + ?Sized, +{ + ImplItemType { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + defaultness: node.defaultness, + type_token: node.type_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + eq_token: node.eq_token, + ty: f.fold_type(node.ty), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_restriction<F>(f: &mut F, node: ImplRestriction) -> ImplRestriction +where + F: Fold + ?Sized, +{ + match node {} +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_index<F>(f: &mut F, node: Index) -> Index +where + F: Fold + ?Sized, +{ + Index { + index: node.index, + span: f.fold_span(node.span), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item<F>(f: &mut F, node: Item) -> Item +where + F: Fold + ?Sized, +{ + match node { + Item::Const(_binding_0) => Item::Const(f.fold_item_const(_binding_0)), + Item::Enum(_binding_0) => Item::Enum(f.fold_item_enum(_binding_0)), + Item::ExternCrate(_binding_0) => { + Item::ExternCrate(f.fold_item_extern_crate(_binding_0)) + } + Item::Fn(_binding_0) => Item::Fn(f.fold_item_fn(_binding_0)), + Item::ForeignMod(_binding_0) => { + Item::ForeignMod(f.fold_item_foreign_mod(_binding_0)) + } + Item::Impl(_binding_0) => Item::Impl(f.fold_item_impl(_binding_0)), + Item::Macro(_binding_0) => Item::Macro(f.fold_item_macro(_binding_0)), + Item::Mod(_binding_0) => Item::Mod(f.fold_item_mod(_binding_0)), + Item::Static(_binding_0) => Item::Static(f.fold_item_static(_binding_0)), + Item::Struct(_binding_0) => Item::Struct(f.fold_item_struct(_binding_0)), + Item::Trait(_binding_0) => Item::Trait(f.fold_item_trait(_binding_0)), + Item::TraitAlias(_binding_0) => { + Item::TraitAlias(f.fold_item_trait_alias(_binding_0)) + } + Item::Type(_binding_0) => Item::Type(f.fold_item_type(_binding_0)), + Item::Union(_binding_0) => Item::Union(f.fold_item_union(_binding_0)), + Item::Use(_binding_0) => Item::Use(f.fold_item_use(_binding_0)), + Item::Verbatim(_binding_0) => Item::Verbatim(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_const<F>(f: &mut F, node: ItemConst) -> ItemConst +where + F: Fold + ?Sized, +{ + ItemConst { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + const_token: node.const_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + colon_token: node.colon_token, + ty: Box::new(f.fold_type(*node.ty)), + eq_token: node.eq_token, + expr: Box::new(f.fold_expr(*node.expr)), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_enum<F>(f: &mut F, node: ItemEnum) -> ItemEnum +where + F: Fold + ?Sized, +{ + ItemEnum { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + enum_token: node.enum_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + brace_token: node.brace_token, + variants: FoldHelper::lift(node.variants, |it| f.fold_variant(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_extern_crate<F>(f: &mut F, node: ItemExternCrate) -> ItemExternCrate +where + F: Fold + ?Sized, +{ + ItemExternCrate { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + extern_token: node.extern_token, + crate_token: node.crate_token, + ident: f.fold_ident(node.ident), + rename: (node.rename).map(|it| ((it).0, f.fold_ident((it).1))), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_fn<F>(f: &mut F, node: ItemFn) -> ItemFn +where + F: Fold + ?Sized, +{ + ItemFn { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + sig: f.fold_signature(node.sig), + block: Box::new(f.fold_block(*node.block)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_foreign_mod<F>(f: &mut F, node: ItemForeignMod) -> ItemForeignMod +where + F: Fold + ?Sized, +{ + ItemForeignMod { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + unsafety: node.unsafety, + abi: f.fold_abi(node.abi), + brace_token: node.brace_token, + items: FoldHelper::lift(node.items, |it| f.fold_foreign_item(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_impl<F>(f: &mut F, node: ItemImpl) -> ItemImpl +where + F: Fold + ?Sized, +{ + ItemImpl { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + defaultness: node.defaultness, + unsafety: node.unsafety, + impl_token: node.impl_token, + generics: f.fold_generics(node.generics), + trait_: (node.trait_).map(|it| ((it).0, f.fold_path((it).1), (it).2)), + self_ty: Box::new(f.fold_type(*node.self_ty)), + brace_token: node.brace_token, + items: FoldHelper::lift(node.items, |it| f.fold_impl_item(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_macro<F>(f: &mut F, node: ItemMacro) -> ItemMacro +where + F: Fold + ?Sized, +{ + ItemMacro { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + ident: (node.ident).map(|it| f.fold_ident(it)), + mac: f.fold_macro(node.mac), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_mod<F>(f: &mut F, node: ItemMod) -> ItemMod +where + F: Fold + ?Sized, +{ + ItemMod { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + unsafety: node.unsafety, + mod_token: node.mod_token, + ident: f.fold_ident(node.ident), + content: (node.content) + .map(|it| ((it).0, FoldHelper::lift((it).1, |it| f.fold_item(it)))), + semi: node.semi, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_static<F>(f: &mut F, node: ItemStatic) -> ItemStatic +where + F: Fold + ?Sized, +{ + ItemStatic { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + static_token: node.static_token, + mutability: f.fold_static_mutability(node.mutability), + ident: f.fold_ident(node.ident), + colon_token: node.colon_token, + ty: Box::new(f.fold_type(*node.ty)), + eq_token: node.eq_token, + expr: Box::new(f.fold_expr(*node.expr)), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_struct<F>(f: &mut F, node: ItemStruct) -> ItemStruct +where + F: Fold + ?Sized, +{ + ItemStruct { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + struct_token: node.struct_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + fields: f.fold_fields(node.fields), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_trait<F>(f: &mut F, node: ItemTrait) -> ItemTrait +where + F: Fold + ?Sized, +{ + ItemTrait { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + unsafety: node.unsafety, + auto_token: node.auto_token, + restriction: (node.restriction).map(|it| f.fold_impl_restriction(it)), + trait_token: node.trait_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + colon_token: node.colon_token, + supertraits: FoldHelper::lift( + node.supertraits, + |it| f.fold_type_param_bound(it), + ), + brace_token: node.brace_token, + items: FoldHelper::lift(node.items, |it| f.fold_trait_item(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_trait_alias<F>(f: &mut F, node: ItemTraitAlias) -> ItemTraitAlias +where + F: Fold + ?Sized, +{ + ItemTraitAlias { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + trait_token: node.trait_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + eq_token: node.eq_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_type<F>(f: &mut F, node: ItemType) -> ItemType +where + F: Fold + ?Sized, +{ + ItemType { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + type_token: node.type_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + eq_token: node.eq_token, + ty: Box::new(f.fold_type(*node.ty)), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_union<F>(f: &mut F, node: ItemUnion) -> ItemUnion +where + F: Fold + ?Sized, +{ + ItemUnion { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + union_token: node.union_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + fields: f.fold_fields_named(node.fields), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_use<F>(f: &mut F, node: ItemUse) -> ItemUse +where + F: Fold + ?Sized, +{ + ItemUse { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + vis: f.fold_visibility(node.vis), + use_token: node.use_token, + leading_colon: node.leading_colon, + tree: f.fold_use_tree(node.tree), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_label<F>(f: &mut F, node: Label) -> Label +where + F: Fold + ?Sized, +{ + Label { + name: f.fold_lifetime(node.name), + colon_token: node.colon_token, + } +} +pub fn fold_lifetime<F>(f: &mut F, node: Lifetime) -> Lifetime +where + F: Fold + ?Sized, +{ + Lifetime { + apostrophe: f.fold_span(node.apostrophe), + ident: f.fold_ident(node.ident), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_lifetime_param<F>(f: &mut F, node: LifetimeParam) -> LifetimeParam +where + F: Fold + ?Sized, +{ + LifetimeParam { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + lifetime: f.fold_lifetime(node.lifetime), + colon_token: node.colon_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_lifetime(it)), + } +} +pub fn fold_lit<F>(f: &mut F, node: Lit) -> Lit +where + F: Fold + ?Sized, +{ + match node { + Lit::Str(_binding_0) => Lit::Str(f.fold_lit_str(_binding_0)), + Lit::ByteStr(_binding_0) => Lit::ByteStr(f.fold_lit_byte_str(_binding_0)), + Lit::Byte(_binding_0) => Lit::Byte(f.fold_lit_byte(_binding_0)), + Lit::Char(_binding_0) => Lit::Char(f.fold_lit_char(_binding_0)), + Lit::Int(_binding_0) => Lit::Int(f.fold_lit_int(_binding_0)), + Lit::Float(_binding_0) => Lit::Float(f.fold_lit_float(_binding_0)), + Lit::Bool(_binding_0) => Lit::Bool(f.fold_lit_bool(_binding_0)), + Lit::Verbatim(_binding_0) => Lit::Verbatim(_binding_0), + } +} +pub fn fold_lit_bool<F>(f: &mut F, node: LitBool) -> LitBool +where + F: Fold + ?Sized, +{ + LitBool { + value: node.value, + span: f.fold_span(node.span), + } +} +pub fn fold_lit_byte<F>(f: &mut F, node: LitByte) -> LitByte +where + F: Fold + ?Sized, +{ + let span = f.fold_span(node.span()); + let mut node = node; + node.set_span(span); + node +} +pub fn fold_lit_byte_str<F>(f: &mut F, node: LitByteStr) -> LitByteStr +where + F: Fold + ?Sized, +{ + let span = f.fold_span(node.span()); + let mut node = node; + node.set_span(span); + node +} +pub fn fold_lit_char<F>(f: &mut F, node: LitChar) -> LitChar +where + F: Fold + ?Sized, +{ + let span = f.fold_span(node.span()); + let mut node = node; + node.set_span(span); + node +} +pub fn fold_lit_float<F>(f: &mut F, node: LitFloat) -> LitFloat +where + F: Fold + ?Sized, +{ + let span = f.fold_span(node.span()); + let mut node = node; + node.set_span(span); + node +} +pub fn fold_lit_int<F>(f: &mut F, node: LitInt) -> LitInt +where + F: Fold + ?Sized, +{ + let span = f.fold_span(node.span()); + let mut node = node; + node.set_span(span); + node +} +pub fn fold_lit_str<F>(f: &mut F, node: LitStr) -> LitStr +where + F: Fold + ?Sized, +{ + let span = f.fold_span(node.span()); + let mut node = node; + node.set_span(span); + node +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_local<F>(f: &mut F, node: Local) -> Local +where + F: Fold + ?Sized, +{ + Local { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + let_token: node.let_token, + pat: f.fold_pat(node.pat), + init: (node.init).map(|it| f.fold_local_init(it)), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_local_init<F>(f: &mut F, node: LocalInit) -> LocalInit +where + F: Fold + ?Sized, +{ + LocalInit { + eq_token: node.eq_token, + expr: Box::new(f.fold_expr(*node.expr)), + diverge: (node.diverge).map(|it| ((it).0, Box::new(f.fold_expr(*(it).1)))), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_macro<F>(f: &mut F, node: Macro) -> Macro +where + F: Fold + ?Sized, +{ + Macro { + path: f.fold_path(node.path), + bang_token: node.bang_token, + delimiter: f.fold_macro_delimiter(node.delimiter), + tokens: node.tokens, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_macro_delimiter<F>(f: &mut F, node: MacroDelimiter) -> MacroDelimiter +where + F: Fold + ?Sized, +{ + match node { + MacroDelimiter::Paren(_binding_0) => MacroDelimiter::Paren(_binding_0), + MacroDelimiter::Brace(_binding_0) => MacroDelimiter::Brace(_binding_0), + MacroDelimiter::Bracket(_binding_0) => MacroDelimiter::Bracket(_binding_0), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_member<F>(f: &mut F, node: Member) -> Member +where + F: Fold + ?Sized, +{ + match node { + Member::Named(_binding_0) => Member::Named(f.fold_ident(_binding_0)), + Member::Unnamed(_binding_0) => Member::Unnamed(f.fold_index(_binding_0)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_meta<F>(f: &mut F, node: Meta) -> Meta +where + F: Fold + ?Sized, +{ + match node { + Meta::Path(_binding_0) => Meta::Path(f.fold_path(_binding_0)), + Meta::List(_binding_0) => Meta::List(f.fold_meta_list(_binding_0)), + Meta::NameValue(_binding_0) => { + Meta::NameValue(f.fold_meta_name_value(_binding_0)) + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_meta_list<F>(f: &mut F, node: MetaList) -> MetaList +where + F: Fold + ?Sized, +{ + MetaList { + path: f.fold_path(node.path), + delimiter: f.fold_macro_delimiter(node.delimiter), + tokens: node.tokens, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_meta_name_value<F>(f: &mut F, node: MetaNameValue) -> MetaNameValue +where + F: Fold + ?Sized, +{ + MetaNameValue { + path: f.fold_path(node.path), + eq_token: node.eq_token, + value: f.fold_expr(node.value), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_parenthesized_generic_arguments<F>( + f: &mut F, + node: ParenthesizedGenericArguments, +) -> ParenthesizedGenericArguments +where + F: Fold + ?Sized, +{ + ParenthesizedGenericArguments { + paren_token: node.paren_token, + inputs: FoldHelper::lift(node.inputs, |it| f.fold_type(it)), + output: f.fold_return_type(node.output), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat<F>(f: &mut F, node: Pat) -> Pat +where + F: Fold + ?Sized, +{ + match node { + Pat::Const(_binding_0) => Pat::Const(f.fold_expr_const(_binding_0)), + Pat::Ident(_binding_0) => Pat::Ident(f.fold_pat_ident(_binding_0)), + Pat::Lit(_binding_0) => Pat::Lit(f.fold_expr_lit(_binding_0)), + Pat::Macro(_binding_0) => Pat::Macro(f.fold_expr_macro(_binding_0)), + Pat::Or(_binding_0) => Pat::Or(f.fold_pat_or(_binding_0)), + Pat::Paren(_binding_0) => Pat::Paren(f.fold_pat_paren(_binding_0)), + Pat::Path(_binding_0) => Pat::Path(f.fold_expr_path(_binding_0)), + Pat::Range(_binding_0) => Pat::Range(f.fold_expr_range(_binding_0)), + Pat::Reference(_binding_0) => Pat::Reference(f.fold_pat_reference(_binding_0)), + Pat::Rest(_binding_0) => Pat::Rest(f.fold_pat_rest(_binding_0)), + Pat::Slice(_binding_0) => Pat::Slice(f.fold_pat_slice(_binding_0)), + Pat::Struct(_binding_0) => Pat::Struct(f.fold_pat_struct(_binding_0)), + Pat::Tuple(_binding_0) => Pat::Tuple(f.fold_pat_tuple(_binding_0)), + Pat::TupleStruct(_binding_0) => { + Pat::TupleStruct(f.fold_pat_tuple_struct(_binding_0)) + } + Pat::Type(_binding_0) => Pat::Type(f.fold_pat_type(_binding_0)), + Pat::Verbatim(_binding_0) => Pat::Verbatim(_binding_0), + Pat::Wild(_binding_0) => Pat::Wild(f.fold_pat_wild(_binding_0)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_ident<F>(f: &mut F, node: PatIdent) -> PatIdent +where + F: Fold + ?Sized, +{ + PatIdent { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + by_ref: node.by_ref, + mutability: node.mutability, + ident: f.fold_ident(node.ident), + subpat: (node.subpat).map(|it| ((it).0, Box::new(f.fold_pat(*(it).1)))), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_or<F>(f: &mut F, node: PatOr) -> PatOr +where + F: Fold + ?Sized, +{ + PatOr { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + leading_vert: node.leading_vert, + cases: FoldHelper::lift(node.cases, |it| f.fold_pat(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_paren<F>(f: &mut F, node: PatParen) -> PatParen +where + F: Fold + ?Sized, +{ + PatParen { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + paren_token: node.paren_token, + pat: Box::new(f.fold_pat(*node.pat)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_reference<F>(f: &mut F, node: PatReference) -> PatReference +where + F: Fold + ?Sized, +{ + PatReference { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + and_token: node.and_token, + mutability: node.mutability, + pat: Box::new(f.fold_pat(*node.pat)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_rest<F>(f: &mut F, node: PatRest) -> PatRest +where + F: Fold + ?Sized, +{ + PatRest { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + dot2_token: node.dot2_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_slice<F>(f: &mut F, node: PatSlice) -> PatSlice +where + F: Fold + ?Sized, +{ + PatSlice { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + bracket_token: node.bracket_token, + elems: FoldHelper::lift(node.elems, |it| f.fold_pat(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_struct<F>(f: &mut F, node: PatStruct) -> PatStruct +where + F: Fold + ?Sized, +{ + PatStruct { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + qself: (node.qself).map(|it| f.fold_qself(it)), + path: f.fold_path(node.path), + brace_token: node.brace_token, + fields: FoldHelper::lift(node.fields, |it| f.fold_field_pat(it)), + rest: (node.rest).map(|it| f.fold_pat_rest(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_tuple<F>(f: &mut F, node: PatTuple) -> PatTuple +where + F: Fold + ?Sized, +{ + PatTuple { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + paren_token: node.paren_token, + elems: FoldHelper::lift(node.elems, |it| f.fold_pat(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_tuple_struct<F>(f: &mut F, node: PatTupleStruct) -> PatTupleStruct +where + F: Fold + ?Sized, +{ + PatTupleStruct { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + qself: (node.qself).map(|it| f.fold_qself(it)), + path: f.fold_path(node.path), + paren_token: node.paren_token, + elems: FoldHelper::lift(node.elems, |it| f.fold_pat(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_type<F>(f: &mut F, node: PatType) -> PatType +where + F: Fold + ?Sized, +{ + PatType { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + pat: Box::new(f.fold_pat(*node.pat)), + colon_token: node.colon_token, + ty: Box::new(f.fold_type(*node.ty)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_wild<F>(f: &mut F, node: PatWild) -> PatWild +where + F: Fold + ?Sized, +{ + PatWild { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + underscore_token: node.underscore_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_path<F>(f: &mut F, node: Path) -> Path +where + F: Fold + ?Sized, +{ + Path { + leading_colon: node.leading_colon, + segments: FoldHelper::lift(node.segments, |it| f.fold_path_segment(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_path_arguments<F>(f: &mut F, node: PathArguments) -> PathArguments +where + F: Fold + ?Sized, +{ + match node { + PathArguments::None => PathArguments::None, + PathArguments::AngleBracketed(_binding_0) => { + PathArguments::AngleBracketed( + f.fold_angle_bracketed_generic_arguments(_binding_0), + ) + } + PathArguments::Parenthesized(_binding_0) => { + PathArguments::Parenthesized( + f.fold_parenthesized_generic_arguments(_binding_0), + ) + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_path_segment<F>(f: &mut F, node: PathSegment) -> PathSegment +where + F: Fold + ?Sized, +{ + PathSegment { + ident: f.fold_ident(node.ident), + arguments: f.fold_path_arguments(node.arguments), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_predicate_lifetime<F>( + f: &mut F, + node: PredicateLifetime, +) -> PredicateLifetime +where + F: Fold + ?Sized, +{ + PredicateLifetime { + lifetime: f.fold_lifetime(node.lifetime), + colon_token: node.colon_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_lifetime(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_predicate_type<F>(f: &mut F, node: PredicateType) -> PredicateType +where + F: Fold + ?Sized, +{ + PredicateType { + lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), + bounded_ty: f.fold_type(node.bounded_ty), + colon_token: node.colon_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_qself<F>(f: &mut F, node: QSelf) -> QSelf +where + F: Fold + ?Sized, +{ + QSelf { + lt_token: node.lt_token, + ty: Box::new(f.fold_type(*node.ty)), + position: node.position, + as_token: node.as_token, + gt_token: node.gt_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_range_limits<F>(f: &mut F, node: RangeLimits) -> RangeLimits +where + F: Fold + ?Sized, +{ + match node { + RangeLimits::HalfOpen(_binding_0) => RangeLimits::HalfOpen(_binding_0), + RangeLimits::Closed(_binding_0) => RangeLimits::Closed(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_receiver<F>(f: &mut F, node: Receiver) -> Receiver +where + F: Fold + ?Sized, +{ + Receiver { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + reference: (node.reference) + .map(|it| ((it).0, ((it).1).map(|it| f.fold_lifetime(it)))), + mutability: node.mutability, + self_token: node.self_token, + colon_token: node.colon_token, + ty: Box::new(f.fold_type(*node.ty)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_return_type<F>(f: &mut F, node: ReturnType) -> ReturnType +where + F: Fold + ?Sized, +{ + match node { + ReturnType::Default => ReturnType::Default, + ReturnType::Type(_binding_0, _binding_1) => { + ReturnType::Type(_binding_0, Box::new(f.fold_type(*_binding_1))) + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_signature<F>(f: &mut F, node: Signature) -> Signature +where + F: Fold + ?Sized, +{ + Signature { + constness: node.constness, + asyncness: node.asyncness, + unsafety: node.unsafety, + abi: (node.abi).map(|it| f.fold_abi(it)), + fn_token: node.fn_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + paren_token: node.paren_token, + inputs: FoldHelper::lift(node.inputs, |it| f.fold_fn_arg(it)), + variadic: (node.variadic).map(|it| f.fold_variadic(it)), + output: f.fold_return_type(node.output), + } +} +pub fn fold_span<F>(f: &mut F, node: Span) -> Span +where + F: Fold + ?Sized, +{ + node +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_static_mutability<F>(f: &mut F, node: StaticMutability) -> StaticMutability +where + F: Fold + ?Sized, +{ + match node { + StaticMutability::Mut(_binding_0) => StaticMutability::Mut(_binding_0), + StaticMutability::None => StaticMutability::None, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_stmt<F>(f: &mut F, node: Stmt) -> Stmt +where + F: Fold + ?Sized, +{ + match node { + Stmt::Local(_binding_0) => Stmt::Local(f.fold_local(_binding_0)), + Stmt::Item(_binding_0) => Stmt::Item(f.fold_item(_binding_0)), + Stmt::Expr(_binding_0, _binding_1) => { + Stmt::Expr(f.fold_expr(_binding_0), _binding_1) + } + Stmt::Macro(_binding_0) => Stmt::Macro(f.fold_stmt_macro(_binding_0)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_stmt_macro<F>(f: &mut F, node: StmtMacro) -> StmtMacro +where + F: Fold + ?Sized, +{ + StmtMacro { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + mac: f.fold_macro(node.mac), + semi_token: node.semi_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_trait_bound<F>(f: &mut F, node: TraitBound) -> TraitBound +where + F: Fold + ?Sized, +{ + TraitBound { + paren_token: node.paren_token, + modifier: f.fold_trait_bound_modifier(node.modifier), + lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), + path: f.fold_path(node.path), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_trait_bound_modifier<F>( + f: &mut F, + node: TraitBoundModifier, +) -> TraitBoundModifier +where + F: Fold + ?Sized, +{ + match node { + TraitBoundModifier::None => TraitBoundModifier::None, + TraitBoundModifier::Maybe(_binding_0) => TraitBoundModifier::Maybe(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item<F>(f: &mut F, node: TraitItem) -> TraitItem +where + F: Fold + ?Sized, +{ + match node { + TraitItem::Const(_binding_0) => { + TraitItem::Const(f.fold_trait_item_const(_binding_0)) + } + TraitItem::Fn(_binding_0) => TraitItem::Fn(f.fold_trait_item_fn(_binding_0)), + TraitItem::Type(_binding_0) => { + TraitItem::Type(f.fold_trait_item_type(_binding_0)) + } + TraitItem::Macro(_binding_0) => { + TraitItem::Macro(f.fold_trait_item_macro(_binding_0)) + } + TraitItem::Verbatim(_binding_0) => TraitItem::Verbatim(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_const<F>(f: &mut F, node: TraitItemConst) -> TraitItemConst +where + F: Fold + ?Sized, +{ + TraitItemConst { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + const_token: node.const_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + colon_token: node.colon_token, + ty: f.fold_type(node.ty), + default: (node.default).map(|it| ((it).0, f.fold_expr((it).1))), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_fn<F>(f: &mut F, node: TraitItemFn) -> TraitItemFn +where + F: Fold + ?Sized, +{ + TraitItemFn { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + sig: f.fold_signature(node.sig), + default: (node.default).map(|it| f.fold_block(it)), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_macro<F>(f: &mut F, node: TraitItemMacro) -> TraitItemMacro +where + F: Fold + ?Sized, +{ + TraitItemMacro { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + mac: f.fold_macro(node.mac), + semi_token: node.semi_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_type<F>(f: &mut F, node: TraitItemType) -> TraitItemType +where + F: Fold + ?Sized, +{ + TraitItemType { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + type_token: node.type_token, + ident: f.fold_ident(node.ident), + generics: f.fold_generics(node.generics), + colon_token: node.colon_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), + default: (node.default).map(|it| ((it).0, f.fold_type((it).1))), + semi_token: node.semi_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type<F>(f: &mut F, node: Type) -> Type +where + F: Fold + ?Sized, +{ + match node { + Type::Array(_binding_0) => Type::Array(f.fold_type_array(_binding_0)), + Type::BareFn(_binding_0) => Type::BareFn(f.fold_type_bare_fn(_binding_0)), + Type::Group(_binding_0) => Type::Group(f.fold_type_group(_binding_0)), + Type::ImplTrait(_binding_0) => { + Type::ImplTrait(f.fold_type_impl_trait(_binding_0)) + } + Type::Infer(_binding_0) => Type::Infer(f.fold_type_infer(_binding_0)), + Type::Macro(_binding_0) => Type::Macro(f.fold_type_macro(_binding_0)), + Type::Never(_binding_0) => Type::Never(f.fold_type_never(_binding_0)), + Type::Paren(_binding_0) => Type::Paren(f.fold_type_paren(_binding_0)), + Type::Path(_binding_0) => Type::Path(f.fold_type_path(_binding_0)), + Type::Ptr(_binding_0) => Type::Ptr(f.fold_type_ptr(_binding_0)), + Type::Reference(_binding_0) => Type::Reference(f.fold_type_reference(_binding_0)), + Type::Slice(_binding_0) => Type::Slice(f.fold_type_slice(_binding_0)), + Type::TraitObject(_binding_0) => { + Type::TraitObject(f.fold_type_trait_object(_binding_0)) + } + Type::Tuple(_binding_0) => Type::Tuple(f.fold_type_tuple(_binding_0)), + Type::Verbatim(_binding_0) => Type::Verbatim(_binding_0), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_array<F>(f: &mut F, node: TypeArray) -> TypeArray +where + F: Fold + ?Sized, +{ + TypeArray { + bracket_token: node.bracket_token, + elem: Box::new(f.fold_type(*node.elem)), + semi_token: node.semi_token, + len: f.fold_expr(node.len), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_bare_fn<F>(f: &mut F, node: TypeBareFn) -> TypeBareFn +where + F: Fold + ?Sized, +{ + TypeBareFn { + lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), + unsafety: node.unsafety, + abi: (node.abi).map(|it| f.fold_abi(it)), + fn_token: node.fn_token, + paren_token: node.paren_token, + inputs: FoldHelper::lift(node.inputs, |it| f.fold_bare_fn_arg(it)), + variadic: (node.variadic).map(|it| f.fold_bare_variadic(it)), + output: f.fold_return_type(node.output), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_group<F>(f: &mut F, node: TypeGroup) -> TypeGroup +where + F: Fold + ?Sized, +{ + TypeGroup { + group_token: node.group_token, + elem: Box::new(f.fold_type(*node.elem)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_impl_trait<F>(f: &mut F, node: TypeImplTrait) -> TypeImplTrait +where + F: Fold + ?Sized, +{ + TypeImplTrait { + impl_token: node.impl_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_infer<F>(f: &mut F, node: TypeInfer) -> TypeInfer +where + F: Fold + ?Sized, +{ + TypeInfer { + underscore_token: node.underscore_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_macro<F>(f: &mut F, node: TypeMacro) -> TypeMacro +where + F: Fold + ?Sized, +{ + TypeMacro { + mac: f.fold_macro(node.mac), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_never<F>(f: &mut F, node: TypeNever) -> TypeNever +where + F: Fold + ?Sized, +{ + TypeNever { + bang_token: node.bang_token, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_param<F>(f: &mut F, node: TypeParam) -> TypeParam +where + F: Fold + ?Sized, +{ + TypeParam { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + ident: f.fold_ident(node.ident), + colon_token: node.colon_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), + eq_token: node.eq_token, + default: (node.default).map(|it| f.fold_type(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_param_bound<F>(f: &mut F, node: TypeParamBound) -> TypeParamBound +where + F: Fold + ?Sized, +{ + match node { + TypeParamBound::Trait(_binding_0) => { + TypeParamBound::Trait(f.fold_trait_bound(_binding_0)) + } + TypeParamBound::Lifetime(_binding_0) => { + TypeParamBound::Lifetime(f.fold_lifetime(_binding_0)) + } + TypeParamBound::Verbatim(_binding_0) => TypeParamBound::Verbatim(_binding_0), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_paren<F>(f: &mut F, node: TypeParen) -> TypeParen +where + F: Fold + ?Sized, +{ + TypeParen { + paren_token: node.paren_token, + elem: Box::new(f.fold_type(*node.elem)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_path<F>(f: &mut F, node: TypePath) -> TypePath +where + F: Fold + ?Sized, +{ + TypePath { + qself: (node.qself).map(|it| f.fold_qself(it)), + path: f.fold_path(node.path), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_ptr<F>(f: &mut F, node: TypePtr) -> TypePtr +where + F: Fold + ?Sized, +{ + TypePtr { + star_token: node.star_token, + const_token: node.const_token, + mutability: node.mutability, + elem: Box::new(f.fold_type(*node.elem)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_reference<F>(f: &mut F, node: TypeReference) -> TypeReference +where + F: Fold + ?Sized, +{ + TypeReference { + and_token: node.and_token, + lifetime: (node.lifetime).map(|it| f.fold_lifetime(it)), + mutability: node.mutability, + elem: Box::new(f.fold_type(*node.elem)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_slice<F>(f: &mut F, node: TypeSlice) -> TypeSlice +where + F: Fold + ?Sized, +{ + TypeSlice { + bracket_token: node.bracket_token, + elem: Box::new(f.fold_type(*node.elem)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_trait_object<F>(f: &mut F, node: TypeTraitObject) -> TypeTraitObject +where + F: Fold + ?Sized, +{ + TypeTraitObject { + dyn_token: node.dyn_token, + bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_tuple<F>(f: &mut F, node: TypeTuple) -> TypeTuple +where + F: Fold + ?Sized, +{ + TypeTuple { + paren_token: node.paren_token, + elems: FoldHelper::lift(node.elems, |it| f.fold_type(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_un_op<F>(f: &mut F, node: UnOp) -> UnOp +where + F: Fold + ?Sized, +{ + match node { + UnOp::Deref(_binding_0) => UnOp::Deref(_binding_0), + UnOp::Not(_binding_0) => UnOp::Not(_binding_0), + UnOp::Neg(_binding_0) => UnOp::Neg(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_glob<F>(f: &mut F, node: UseGlob) -> UseGlob +where + F: Fold + ?Sized, +{ + UseGlob { + star_token: node.star_token, + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_group<F>(f: &mut F, node: UseGroup) -> UseGroup +where + F: Fold + ?Sized, +{ + UseGroup { + brace_token: node.brace_token, + items: FoldHelper::lift(node.items, |it| f.fold_use_tree(it)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_name<F>(f: &mut F, node: UseName) -> UseName +where + F: Fold + ?Sized, +{ + UseName { + ident: f.fold_ident(node.ident), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_path<F>(f: &mut F, node: UsePath) -> UsePath +where + F: Fold + ?Sized, +{ + UsePath { + ident: f.fold_ident(node.ident), + colon2_token: node.colon2_token, + tree: Box::new(f.fold_use_tree(*node.tree)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_rename<F>(f: &mut F, node: UseRename) -> UseRename +where + F: Fold + ?Sized, +{ + UseRename { + ident: f.fold_ident(node.ident), + as_token: node.as_token, + rename: f.fold_ident(node.rename), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_tree<F>(f: &mut F, node: UseTree) -> UseTree +where + F: Fold + ?Sized, +{ + match node { + UseTree::Path(_binding_0) => UseTree::Path(f.fold_use_path(_binding_0)), + UseTree::Name(_binding_0) => UseTree::Name(f.fold_use_name(_binding_0)), + UseTree::Rename(_binding_0) => UseTree::Rename(f.fold_use_rename(_binding_0)), + UseTree::Glob(_binding_0) => UseTree::Glob(f.fold_use_glob(_binding_0)), + UseTree::Group(_binding_0) => UseTree::Group(f.fold_use_group(_binding_0)), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_variadic<F>(f: &mut F, node: Variadic) -> Variadic +where + F: Fold + ?Sized, +{ + Variadic { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + pat: (node.pat).map(|it| (Box::new(f.fold_pat(*(it).0)), (it).1)), + dots: node.dots, + comma: node.comma, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_variant<F>(f: &mut F, node: Variant) -> Variant +where + F: Fold + ?Sized, +{ + Variant { + attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), + ident: f.fold_ident(node.ident), + fields: f.fold_fields(node.fields), + discriminant: (node.discriminant).map(|it| ((it).0, f.fold_expr((it).1))), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_vis_restricted<F>(f: &mut F, node: VisRestricted) -> VisRestricted +where + F: Fold + ?Sized, +{ + VisRestricted { + pub_token: node.pub_token, + paren_token: node.paren_token, + in_token: node.in_token, + path: Box::new(f.fold_path(*node.path)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_visibility<F>(f: &mut F, node: Visibility) -> Visibility +where + F: Fold + ?Sized, +{ + match node { + Visibility::Public(_binding_0) => Visibility::Public(_binding_0), + Visibility::Restricted(_binding_0) => { + Visibility::Restricted(f.fold_vis_restricted(_binding_0)) + } + Visibility::Inherited => Visibility::Inherited, + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_where_clause<F>(f: &mut F, node: WhereClause) -> WhereClause +where + F: Fold + ?Sized, +{ + WhereClause { + where_token: node.where_token, + predicates: FoldHelper::lift(node.predicates, |it| f.fold_where_predicate(it)), + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_where_predicate<F>(f: &mut F, node: WherePredicate) -> WherePredicate +where + F: Fold + ?Sized, +{ + match node { + WherePredicate::Lifetime(_binding_0) => { + WherePredicate::Lifetime(f.fold_predicate_lifetime(_binding_0)) + } + WherePredicate::Type(_binding_0) => { + WherePredicate::Type(f.fold_predicate_type(_binding_0)) + } + } +} diff --git a/vendor/syn/src/gen/hash.rs b/vendor/syn/src/gen/hash.rs new file mode 100644 index 0000000..1c75341 --- /dev/null +++ b/vendor/syn/src/gen/hash.rs @@ -0,0 +1,2804 @@ +// This file is @generated by syn-internal-codegen. +// It is not intended for manual editing. + +#[cfg(any(feature = "derive", feature = "full"))] +use crate::tt::TokenStreamHelper; +use crate::*; +use std::hash::{Hash, Hasher}; +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Abi { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.name.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for AngleBracketedGenericArguments { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.colon2_token.hash(state); + self.args.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Arm { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.pat.hash(state); + self.guard.hash(state); + self.body.hash(state); + self.comma.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for AssocConst { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ident.hash(state); + self.generics.hash(state); + self.value.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for AssocType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ident.hash(state); + self.generics.hash(state); + self.ty.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for AttrStyle { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + AttrStyle::Outer => { + state.write_u8(0u8); + } + AttrStyle::Inner(_) => { + state.write_u8(1u8); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Attribute { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.style.hash(state); + self.meta.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for BareFnArg { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.name.hash(state); + self.ty.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for BareVariadic { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.name.hash(state); + self.comma.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for BinOp { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + BinOp::Add(_) => { + state.write_u8(0u8); + } + BinOp::Sub(_) => { + state.write_u8(1u8); + } + BinOp::Mul(_) => { + state.write_u8(2u8); + } + BinOp::Div(_) => { + state.write_u8(3u8); + } + BinOp::Rem(_) => { + state.write_u8(4u8); + } + BinOp::And(_) => { + state.write_u8(5u8); + } + BinOp::Or(_) => { + state.write_u8(6u8); + } + BinOp::BitXor(_) => { + state.write_u8(7u8); + } + BinOp::BitAnd(_) => { + state.write_u8(8u8); + } + BinOp::BitOr(_) => { + state.write_u8(9u8); + } + BinOp::Shl(_) => { + state.write_u8(10u8); + } + BinOp::Shr(_) => { + state.write_u8(11u8); + } + BinOp::Eq(_) => { + state.write_u8(12u8); + } + BinOp::Lt(_) => { + state.write_u8(13u8); + } + BinOp::Le(_) => { + state.write_u8(14u8); + } + BinOp::Ne(_) => { + state.write_u8(15u8); + } + BinOp::Ge(_) => { + state.write_u8(16u8); + } + BinOp::Gt(_) => { + state.write_u8(17u8); + } + BinOp::AddAssign(_) => { + state.write_u8(18u8); + } + BinOp::SubAssign(_) => { + state.write_u8(19u8); + } + BinOp::MulAssign(_) => { + state.write_u8(20u8); + } + BinOp::DivAssign(_) => { + state.write_u8(21u8); + } + BinOp::RemAssign(_) => { + state.write_u8(22u8); + } + BinOp::BitXorAssign(_) => { + state.write_u8(23u8); + } + BinOp::BitAndAssign(_) => { + state.write_u8(24u8); + } + BinOp::BitOrAssign(_) => { + state.write_u8(25u8); + } + BinOp::ShlAssign(_) => { + state.write_u8(26u8); + } + BinOp::ShrAssign(_) => { + state.write_u8(27u8); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Block { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.stmts.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for BoundLifetimes { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.lifetimes.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ConstParam { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.ident.hash(state); + self.ty.hash(state); + self.eq_token.hash(state); + self.default.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Constraint { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ident.hash(state); + self.generics.hash(state); + self.bounds.hash(state); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Data { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Data::Struct(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Data::Enum(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Data::Union(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for DataEnum { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.variants.hash(state); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for DataStruct { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.fields.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for DataUnion { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.fields.hash(state); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for DeriveInput { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.data.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Expr { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + #[cfg(feature = "full")] + Expr::Array(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Assign(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Async(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Await(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + Expr::Binary(v0) => { + state.write_u8(4u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Block(v0) => { + state.write_u8(5u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Break(v0) => { + state.write_u8(6u8); + v0.hash(state); + } + Expr::Call(v0) => { + state.write_u8(7u8); + v0.hash(state); + } + Expr::Cast(v0) => { + state.write_u8(8u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Closure(v0) => { + state.write_u8(9u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Const(v0) => { + state.write_u8(10u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Continue(v0) => { + state.write_u8(11u8); + v0.hash(state); + } + Expr::Field(v0) => { + state.write_u8(12u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::ForLoop(v0) => { + state.write_u8(13u8); + v0.hash(state); + } + Expr::Group(v0) => { + state.write_u8(14u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::If(v0) => { + state.write_u8(15u8); + v0.hash(state); + } + Expr::Index(v0) => { + state.write_u8(16u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Infer(v0) => { + state.write_u8(17u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Let(v0) => { + state.write_u8(18u8); + v0.hash(state); + } + Expr::Lit(v0) => { + state.write_u8(19u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Loop(v0) => { + state.write_u8(20u8); + v0.hash(state); + } + Expr::Macro(v0) => { + state.write_u8(21u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Match(v0) => { + state.write_u8(22u8); + v0.hash(state); + } + Expr::MethodCall(v0) => { + state.write_u8(23u8); + v0.hash(state); + } + Expr::Paren(v0) => { + state.write_u8(24u8); + v0.hash(state); + } + Expr::Path(v0) => { + state.write_u8(25u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Range(v0) => { + state.write_u8(26u8); + v0.hash(state); + } + Expr::Reference(v0) => { + state.write_u8(27u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Repeat(v0) => { + state.write_u8(28u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Return(v0) => { + state.write_u8(29u8); + v0.hash(state); + } + Expr::Struct(v0) => { + state.write_u8(30u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Try(v0) => { + state.write_u8(31u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::TryBlock(v0) => { + state.write_u8(32u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Tuple(v0) => { + state.write_u8(33u8); + v0.hash(state); + } + Expr::Unary(v0) => { + state.write_u8(34u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Unsafe(v0) => { + state.write_u8(35u8); + v0.hash(state); + } + Expr::Verbatim(v0) => { + state.write_u8(36u8); + TokenStreamHelper(v0).hash(state); + } + #[cfg(feature = "full")] + Expr::While(v0) => { + state.write_u8(37u8); + v0.hash(state); + } + #[cfg(feature = "full")] + Expr::Yield(v0) => { + state.write_u8(38u8); + v0.hash(state); + } + #[cfg(not(feature = "full"))] + _ => unreachable!(), + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprArray { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.elems.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprAssign { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.left.hash(state); + self.right.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprAsync { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.capture.hash(state); + self.block.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprAwait { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.base.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprBinary { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.left.hash(state); + self.op.hash(state); + self.right.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprBlock { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.label.hash(state); + self.block.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprBreak { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.label.hash(state); + self.expr.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprCall { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.func.hash(state); + self.args.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprCast { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + self.ty.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprClosure { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.lifetimes.hash(state); + self.constness.hash(state); + self.movability.hash(state); + self.asyncness.hash(state); + self.capture.hash(state); + self.inputs.hash(state); + self.output.hash(state); + self.body.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprConst { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.block.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprContinue { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.label.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprField { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.base.hash(state); + self.member.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprForLoop { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.label.hash(state); + self.pat.hash(state); + self.expr.hash(state); + self.body.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprGroup { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprIf { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.cond.hash(state); + self.then_branch.hash(state); + self.else_branch.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprIndex { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + self.index.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprInfer { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprLet { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.pat.hash(state); + self.expr.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprLit { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.lit.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprLoop { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.label.hash(state); + self.body.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprMacro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.mac.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprMatch { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + self.arms.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprMethodCall { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.receiver.hash(state); + self.method.hash(state); + self.turbofish.hash(state); + self.args.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprParen { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprPath { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.qself.hash(state); + self.path.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprRange { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.start.hash(state); + self.limits.hash(state); + self.end.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprReference { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.mutability.hash(state); + self.expr.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprRepeat { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + self.len.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprReturn { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprStruct { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.qself.hash(state); + self.path.hash(state); + self.fields.hash(state); + self.dot2_token.hash(state); + self.rest.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprTry { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprTryBlock { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.block.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprTuple { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.elems.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprUnary { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.op.hash(state); + self.expr.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprUnsafe { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.block.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprWhile { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.label.hash(state); + self.cond.hash(state); + self.body.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ExprYield { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.expr.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Field { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.mutability.hash(state); + self.ident.hash(state); + self.colon_token.hash(state); + self.ty.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for FieldMutability { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + FieldMutability::None => { + state.write_u8(0u8); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for FieldPat { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.member.hash(state); + self.colon_token.hash(state); + self.pat.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for FieldValue { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.member.hash(state); + self.colon_token.hash(state); + self.expr.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Fields { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Fields::Named(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Fields::Unnamed(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Fields::Unit => { + state.write_u8(2u8); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for FieldsNamed { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.named.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for FieldsUnnamed { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.unnamed.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for File { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.shebang.hash(state); + self.attrs.hash(state); + self.items.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for FnArg { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + FnArg::Receiver(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + FnArg::Typed(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ForeignItem { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + ForeignItem::Fn(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + ForeignItem::Static(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + ForeignItem::Type(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + ForeignItem::Macro(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + ForeignItem::Verbatim(v0) => { + state.write_u8(4u8); + TokenStreamHelper(v0).hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ForeignItemFn { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.sig.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ForeignItemMacro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.mac.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ForeignItemStatic { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.mutability.hash(state); + self.ident.hash(state); + self.ty.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ForeignItemType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for GenericArgument { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + GenericArgument::Lifetime(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + GenericArgument::Type(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + GenericArgument::Const(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + GenericArgument::AssocType(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + GenericArgument::AssocConst(v0) => { + state.write_u8(4u8); + v0.hash(state); + } + GenericArgument::Constraint(v0) => { + state.write_u8(5u8); + v0.hash(state); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for GenericParam { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + GenericParam::Lifetime(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + GenericParam::Type(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + GenericParam::Const(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Generics { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.lt_token.hash(state); + self.params.hash(state); + self.gt_token.hash(state); + self.where_clause.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ImplItem { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + ImplItem::Const(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + ImplItem::Fn(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + ImplItem::Type(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + ImplItem::Macro(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + ImplItem::Verbatim(v0) => { + state.write_u8(4u8); + TokenStreamHelper(v0).hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ImplItemConst { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.defaultness.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.ty.hash(state); + self.expr.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ImplItemFn { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.defaultness.hash(state); + self.sig.hash(state); + self.block.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ImplItemMacro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.mac.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ImplItemType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.defaultness.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.ty.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ImplRestriction { + fn hash<H>(&self, _state: &mut H) + where + H: Hasher, + { + match *self {} + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Item { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Item::Const(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Item::Enum(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Item::ExternCrate(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + Item::Fn(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + Item::ForeignMod(v0) => { + state.write_u8(4u8); + v0.hash(state); + } + Item::Impl(v0) => { + state.write_u8(5u8); + v0.hash(state); + } + Item::Macro(v0) => { + state.write_u8(6u8); + v0.hash(state); + } + Item::Mod(v0) => { + state.write_u8(7u8); + v0.hash(state); + } + Item::Static(v0) => { + state.write_u8(8u8); + v0.hash(state); + } + Item::Struct(v0) => { + state.write_u8(9u8); + v0.hash(state); + } + Item::Trait(v0) => { + state.write_u8(10u8); + v0.hash(state); + } + Item::TraitAlias(v0) => { + state.write_u8(11u8); + v0.hash(state); + } + Item::Type(v0) => { + state.write_u8(12u8); + v0.hash(state); + } + Item::Union(v0) => { + state.write_u8(13u8); + v0.hash(state); + } + Item::Use(v0) => { + state.write_u8(14u8); + v0.hash(state); + } + Item::Verbatim(v0) => { + state.write_u8(15u8); + TokenStreamHelper(v0).hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemConst { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.ty.hash(state); + self.expr.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemEnum { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.variants.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemExternCrate { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.rename.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemFn { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.sig.hash(state); + self.block.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemForeignMod { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.unsafety.hash(state); + self.abi.hash(state); + self.items.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemImpl { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.defaultness.hash(state); + self.unsafety.hash(state); + self.generics.hash(state); + self.trait_.hash(state); + self.self_ty.hash(state); + self.items.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemMacro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.ident.hash(state); + self.mac.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemMod { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.unsafety.hash(state); + self.ident.hash(state); + self.content.hash(state); + self.semi.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemStatic { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.mutability.hash(state); + self.ident.hash(state); + self.ty.hash(state); + self.expr.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemStruct { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.fields.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemTrait { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.unsafety.hash(state); + self.auto_token.hash(state); + self.restriction.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.colon_token.hash(state); + self.supertraits.hash(state); + self.items.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemTraitAlias { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.bounds.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.ty.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemUnion { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.fields.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ItemUse { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.vis.hash(state); + self.leading_colon.hash(state); + self.tree.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Label { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.name.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for LifetimeParam { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.lifetime.hash(state); + self.colon_token.hash(state); + self.bounds.hash(state); + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Lit { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Lit::Str(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Lit::ByteStr(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Lit::Byte(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + Lit::Char(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + Lit::Int(v0) => { + state.write_u8(4u8); + v0.hash(state); + } + Lit::Float(v0) => { + state.write_u8(5u8); + v0.hash(state); + } + Lit::Bool(v0) => { + state.write_u8(6u8); + v0.hash(state); + } + Lit::Verbatim(v0) => { + state.write_u8(7u8); + v0.to_string().hash(state); + } + } + } +} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for LitBool { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.value.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Local { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.pat.hash(state); + self.init.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for LocalInit { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.expr.hash(state); + self.diverge.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Macro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.path.hash(state); + self.delimiter.hash(state); + TokenStreamHelper(&self.tokens).hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for MacroDelimiter { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + MacroDelimiter::Paren(_) => { + state.write_u8(0u8); + } + MacroDelimiter::Brace(_) => { + state.write_u8(1u8); + } + MacroDelimiter::Bracket(_) => { + state.write_u8(2u8); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Meta { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Meta::Path(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Meta::List(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Meta::NameValue(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for MetaList { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.path.hash(state); + self.delimiter.hash(state); + TokenStreamHelper(&self.tokens).hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for MetaNameValue { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.path.hash(state); + self.value.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ParenthesizedGenericArguments { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.inputs.hash(state); + self.output.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Pat { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Pat::Const(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Pat::Ident(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Pat::Lit(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + Pat::Macro(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + Pat::Or(v0) => { + state.write_u8(4u8); + v0.hash(state); + } + Pat::Paren(v0) => { + state.write_u8(5u8); + v0.hash(state); + } + Pat::Path(v0) => { + state.write_u8(6u8); + v0.hash(state); + } + Pat::Range(v0) => { + state.write_u8(7u8); + v0.hash(state); + } + Pat::Reference(v0) => { + state.write_u8(8u8); + v0.hash(state); + } + Pat::Rest(v0) => { + state.write_u8(9u8); + v0.hash(state); + } + Pat::Slice(v0) => { + state.write_u8(10u8); + v0.hash(state); + } + Pat::Struct(v0) => { + state.write_u8(11u8); + v0.hash(state); + } + Pat::Tuple(v0) => { + state.write_u8(12u8); + v0.hash(state); + } + Pat::TupleStruct(v0) => { + state.write_u8(13u8); + v0.hash(state); + } + Pat::Type(v0) => { + state.write_u8(14u8); + v0.hash(state); + } + Pat::Verbatim(v0) => { + state.write_u8(15u8); + TokenStreamHelper(v0).hash(state); + } + Pat::Wild(v0) => { + state.write_u8(16u8); + v0.hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatIdent { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.by_ref.hash(state); + self.mutability.hash(state); + self.ident.hash(state); + self.subpat.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatOr { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.leading_vert.hash(state); + self.cases.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatParen { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.pat.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatReference { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.mutability.hash(state); + self.pat.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatRest { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatSlice { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.elems.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatStruct { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.qself.hash(state); + self.path.hash(state); + self.fields.hash(state); + self.rest.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatTuple { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.elems.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatTupleStruct { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.qself.hash(state); + self.path.hash(state); + self.elems.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.pat.hash(state); + self.ty.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PatWild { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Path { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.leading_colon.hash(state); + self.segments.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PathArguments { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + PathArguments::None => { + state.write_u8(0u8); + } + PathArguments::AngleBracketed(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + PathArguments::Parenthesized(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PathSegment { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ident.hash(state); + self.arguments.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PredicateLifetime { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.lifetime.hash(state); + self.bounds.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for PredicateType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.lifetimes.hash(state); + self.bounded_ty.hash(state); + self.bounds.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for QSelf { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ty.hash(state); + self.position.hash(state); + self.as_token.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for RangeLimits { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + RangeLimits::HalfOpen(_) => { + state.write_u8(0u8); + } + RangeLimits::Closed(_) => { + state.write_u8(1u8); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Receiver { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.reference.hash(state); + self.mutability.hash(state); + self.colon_token.hash(state); + self.ty.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for ReturnType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + ReturnType::Default => { + state.write_u8(0u8); + } + ReturnType::Type(_, v1) => { + state.write_u8(1u8); + v1.hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Signature { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.constness.hash(state); + self.asyncness.hash(state); + self.unsafety.hash(state); + self.abi.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.inputs.hash(state); + self.variadic.hash(state); + self.output.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for StaticMutability { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + StaticMutability::Mut(_) => { + state.write_u8(0u8); + } + StaticMutability::None => { + state.write_u8(1u8); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Stmt { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Stmt::Local(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Stmt::Item(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Stmt::Expr(v0, v1) => { + state.write_u8(2u8); + v0.hash(state); + v1.hash(state); + } + Stmt::Macro(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for StmtMacro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.mac.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TraitBound { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.paren_token.hash(state); + self.modifier.hash(state); + self.lifetimes.hash(state); + self.path.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TraitBoundModifier { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + TraitBoundModifier::None => { + state.write_u8(0u8); + } + TraitBoundModifier::Maybe(_) => { + state.write_u8(1u8); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TraitItem { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + TraitItem::Const(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + TraitItem::Fn(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + TraitItem::Type(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + TraitItem::Macro(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + TraitItem::Verbatim(v0) => { + state.write_u8(4u8); + TokenStreamHelper(v0).hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TraitItemConst { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.ty.hash(state); + self.default.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TraitItemFn { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.sig.hash(state); + self.default.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TraitItemMacro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.mac.hash(state); + self.semi_token.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TraitItemType { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.ident.hash(state); + self.generics.hash(state); + self.colon_token.hash(state); + self.bounds.hash(state); + self.default.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Type { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Type::Array(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + Type::BareFn(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Type::Group(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + Type::ImplTrait(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + Type::Infer(v0) => { + state.write_u8(4u8); + v0.hash(state); + } + Type::Macro(v0) => { + state.write_u8(5u8); + v0.hash(state); + } + Type::Never(v0) => { + state.write_u8(6u8); + v0.hash(state); + } + Type::Paren(v0) => { + state.write_u8(7u8); + v0.hash(state); + } + Type::Path(v0) => { + state.write_u8(8u8); + v0.hash(state); + } + Type::Ptr(v0) => { + state.write_u8(9u8); + v0.hash(state); + } + Type::Reference(v0) => { + state.write_u8(10u8); + v0.hash(state); + } + Type::Slice(v0) => { + state.write_u8(11u8); + v0.hash(state); + } + Type::TraitObject(v0) => { + state.write_u8(12u8); + v0.hash(state); + } + Type::Tuple(v0) => { + state.write_u8(13u8); + v0.hash(state); + } + Type::Verbatim(v0) => { + state.write_u8(14u8); + TokenStreamHelper(v0).hash(state); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeArray { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.elem.hash(state); + self.len.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeBareFn { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.lifetimes.hash(state); + self.unsafety.hash(state); + self.abi.hash(state); + self.inputs.hash(state); + self.variadic.hash(state); + self.output.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeGroup { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.elem.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeImplTrait { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.bounds.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeInfer { + fn hash<H>(&self, _state: &mut H) + where + H: Hasher, + {} +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeMacro { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.mac.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeNever { + fn hash<H>(&self, _state: &mut H) + where + H: Hasher, + {} +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeParam { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.ident.hash(state); + self.colon_token.hash(state); + self.bounds.hash(state); + self.eq_token.hash(state); + self.default.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeParamBound { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + TypeParamBound::Trait(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + TypeParamBound::Lifetime(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + TypeParamBound::Verbatim(v0) => { + state.write_u8(2u8); + TokenStreamHelper(v0).hash(state); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeParen { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.elem.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypePath { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.qself.hash(state); + self.path.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypePtr { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.const_token.hash(state); + self.mutability.hash(state); + self.elem.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeReference { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.lifetime.hash(state); + self.mutability.hash(state); + self.elem.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeSlice { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.elem.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeTraitObject { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.dyn_token.hash(state); + self.bounds.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for TypeTuple { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.elems.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for UnOp { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + UnOp::Deref(_) => { + state.write_u8(0u8); + } + UnOp::Not(_) => { + state.write_u8(1u8); + } + UnOp::Neg(_) => { + state.write_u8(2u8); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for UseGlob { + fn hash<H>(&self, _state: &mut H) + where + H: Hasher, + {} +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for UseGroup { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.items.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for UseName { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ident.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for UsePath { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ident.hash(state); + self.tree.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for UseRename { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.ident.hash(state); + self.rename.hash(state); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for UseTree { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + UseTree::Path(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + UseTree::Name(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + UseTree::Rename(v0) => { + state.write_u8(2u8); + v0.hash(state); + } + UseTree::Glob(v0) => { + state.write_u8(3u8); + v0.hash(state); + } + UseTree::Group(v0) => { + state.write_u8(4u8); + v0.hash(state); + } + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Variadic { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.pat.hash(state); + self.comma.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Variant { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.attrs.hash(state); + self.ident.hash(state); + self.fields.hash(state); + self.discriminant.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for VisRestricted { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.in_token.hash(state); + self.path.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Visibility { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + Visibility::Public(_) => { + state.write_u8(0u8); + } + Visibility::Restricted(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + Visibility::Inherited => { + state.write_u8(2u8); + } + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for WhereClause { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.predicates.hash(state); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for WherePredicate { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + match self { + WherePredicate::Lifetime(v0) => { + state.write_u8(0u8); + v0.hash(state); + } + WherePredicate::Type(v0) => { + state.write_u8(1u8); + v0.hash(state); + } + } + } +} diff --git a/vendor/syn/src/gen/visit.rs b/vendor/syn/src/gen/visit.rs new file mode 100644 index 0000000..572e0ad --- /dev/null +++ b/vendor/syn/src/gen/visit.rs @@ -0,0 +1,3844 @@ +// This file is @generated by syn-internal-codegen. +// It is not intended for manual editing. + +#![allow(unused_variables)] +#![allow(clippy::needless_pass_by_ref_mut)] +#[cfg(any(feature = "full", feature = "derive"))] +use crate::punctuated::Punctuated; +use crate::*; +use proc_macro2::Span; +#[cfg(feature = "full")] +macro_rules! full { + ($e:expr) => { + $e + }; +} +#[cfg(all(feature = "derive", not(feature = "full")))] +macro_rules! full { + ($e:expr) => { + unreachable!() + }; +} +macro_rules! skip { + ($($tt:tt)*) => {}; +} +/// Syntax tree traversal to walk a shared borrow of a syntax tree. +/// +/// See the [module documentation] for details. +/// +/// [module documentation]: self +pub trait Visit<'ast> { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_abi(&mut self, i: &'ast Abi) { + visit_abi(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_angle_bracketed_generic_arguments( + &mut self, + i: &'ast AngleBracketedGenericArguments, + ) { + visit_angle_bracketed_generic_arguments(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_arm(&mut self, i: &'ast Arm) { + visit_arm(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_const(&mut self, i: &'ast AssocConst) { + visit_assoc_const(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_type(&mut self, i: &'ast AssocType) { + visit_assoc_type(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attr_style(&mut self, i: &'ast AttrStyle) { + visit_attr_style(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attribute(&mut self, i: &'ast Attribute) { + visit_attribute(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_fn_arg(&mut self, i: &'ast BareFnArg) { + visit_bare_fn_arg(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_variadic(&mut self, i: &'ast BareVariadic) { + visit_bare_variadic(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bin_op(&mut self, i: &'ast BinOp) { + visit_bin_op(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_block(&mut self, i: &'ast Block) { + visit_block(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bound_lifetimes(&mut self, i: &'ast BoundLifetimes) { + visit_bound_lifetimes(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_const_param(&mut self, i: &'ast ConstParam) { + visit_const_param(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_constraint(&mut self, i: &'ast Constraint) { + visit_constraint(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data(&mut self, i: &'ast Data) { + visit_data(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_enum(&mut self, i: &'ast DataEnum) { + visit_data_enum(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_struct(&mut self, i: &'ast DataStruct) { + visit_data_struct(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_union(&mut self, i: &'ast DataUnion) { + visit_data_union(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_derive_input(&mut self, i: &'ast DeriveInput) { + visit_derive_input(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr(&mut self, i: &'ast Expr) { + visit_expr(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_array(&mut self, i: &'ast ExprArray) { + visit_expr_array(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_assign(&mut self, i: &'ast ExprAssign) { + visit_expr_assign(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_async(&mut self, i: &'ast ExprAsync) { + visit_expr_async(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_await(&mut self, i: &'ast ExprAwait) { + visit_expr_await(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_binary(&mut self, i: &'ast ExprBinary) { + visit_expr_binary(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_block(&mut self, i: &'ast ExprBlock) { + visit_expr_block(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_break(&mut self, i: &'ast ExprBreak) { + visit_expr_break(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_call(&mut self, i: &'ast ExprCall) { + visit_expr_call(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_cast(&mut self, i: &'ast ExprCast) { + visit_expr_cast(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_closure(&mut self, i: &'ast ExprClosure) { + visit_expr_closure(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_const(&mut self, i: &'ast ExprConst) { + visit_expr_const(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_continue(&mut self, i: &'ast ExprContinue) { + visit_expr_continue(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_field(&mut self, i: &'ast ExprField) { + visit_expr_field(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_for_loop(&mut self, i: &'ast ExprForLoop) { + visit_expr_for_loop(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_group(&mut self, i: &'ast ExprGroup) { + visit_expr_group(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_if(&mut self, i: &'ast ExprIf) { + visit_expr_if(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_index(&mut self, i: &'ast ExprIndex) { + visit_expr_index(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_infer(&mut self, i: &'ast ExprInfer) { + visit_expr_infer(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_let(&mut self, i: &'ast ExprLet) { + visit_expr_let(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_lit(&mut self, i: &'ast ExprLit) { + visit_expr_lit(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_loop(&mut self, i: &'ast ExprLoop) { + visit_expr_loop(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_macro(&mut self, i: &'ast ExprMacro) { + visit_expr_macro(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_match(&mut self, i: &'ast ExprMatch) { + visit_expr_match(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_method_call(&mut self, i: &'ast ExprMethodCall) { + visit_expr_method_call(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_paren(&mut self, i: &'ast ExprParen) { + visit_expr_paren(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_path(&mut self, i: &'ast ExprPath) { + visit_expr_path(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_range(&mut self, i: &'ast ExprRange) { + visit_expr_range(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_reference(&mut self, i: &'ast ExprReference) { + visit_expr_reference(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_repeat(&mut self, i: &'ast ExprRepeat) { + visit_expr_repeat(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_return(&mut self, i: &'ast ExprReturn) { + visit_expr_return(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_struct(&mut self, i: &'ast ExprStruct) { + visit_expr_struct(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try(&mut self, i: &'ast ExprTry) { + visit_expr_try(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try_block(&mut self, i: &'ast ExprTryBlock) { + visit_expr_try_block(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_tuple(&mut self, i: &'ast ExprTuple) { + visit_expr_tuple(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_unary(&mut self, i: &'ast ExprUnary) { + visit_expr_unary(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_unsafe(&mut self, i: &'ast ExprUnsafe) { + visit_expr_unsafe(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_while(&mut self, i: &'ast ExprWhile) { + visit_expr_while(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_yield(&mut self, i: &'ast ExprYield) { + visit_expr_yield(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field(&mut self, i: &'ast Field) { + visit_field(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_mutability(&mut self, i: &'ast FieldMutability) { + visit_field_mutability(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_field_pat(&mut self, i: &'ast FieldPat) { + visit_field_pat(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_value(&mut self, i: &'ast FieldValue) { + visit_field_value(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields(&mut self, i: &'ast Fields) { + visit_fields(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_named(&mut self, i: &'ast FieldsNamed) { + visit_fields_named(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_unnamed(&mut self, i: &'ast FieldsUnnamed) { + visit_fields_unnamed(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_file(&mut self, i: &'ast File) { + visit_file(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_fn_arg(&mut self, i: &'ast FnArg) { + visit_fn_arg(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item(&mut self, i: &'ast ForeignItem) { + visit_foreign_item(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_fn(&mut self, i: &'ast ForeignItemFn) { + visit_foreign_item_fn(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_macro(&mut self, i: &'ast ForeignItemMacro) { + visit_foreign_item_macro(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_static(&mut self, i: &'ast ForeignItemStatic) { + visit_foreign_item_static(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_type(&mut self, i: &'ast ForeignItemType) { + visit_foreign_item_type(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_argument(&mut self, i: &'ast GenericArgument) { + visit_generic_argument(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_param(&mut self, i: &'ast GenericParam) { + visit_generic_param(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generics(&mut self, i: &'ast Generics) { + visit_generics(self, i); + } + fn visit_ident(&mut self, i: &'ast Ident) { + visit_ident(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item(&mut self, i: &'ast ImplItem) { + visit_impl_item(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_const(&mut self, i: &'ast ImplItemConst) { + visit_impl_item_const(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_fn(&mut self, i: &'ast ImplItemFn) { + visit_impl_item_fn(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_macro(&mut self, i: &'ast ImplItemMacro) { + visit_impl_item_macro(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_type(&mut self, i: &'ast ImplItemType) { + visit_impl_item_type(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_restriction(&mut self, i: &'ast ImplRestriction) { + visit_impl_restriction(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_index(&mut self, i: &'ast Index) { + visit_index(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item(&mut self, i: &'ast Item) { + visit_item(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_const(&mut self, i: &'ast ItemConst) { + visit_item_const(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_enum(&mut self, i: &'ast ItemEnum) { + visit_item_enum(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_extern_crate(&mut self, i: &'ast ItemExternCrate) { + visit_item_extern_crate(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_fn(&mut self, i: &'ast ItemFn) { + visit_item_fn(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_foreign_mod(&mut self, i: &'ast ItemForeignMod) { + visit_item_foreign_mod(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_impl(&mut self, i: &'ast ItemImpl) { + visit_item_impl(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_macro(&mut self, i: &'ast ItemMacro) { + visit_item_macro(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_mod(&mut self, i: &'ast ItemMod) { + visit_item_mod(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_static(&mut self, i: &'ast ItemStatic) { + visit_item_static(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_struct(&mut self, i: &'ast ItemStruct) { + visit_item_struct(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait(&mut self, i: &'ast ItemTrait) { + visit_item_trait(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait_alias(&mut self, i: &'ast ItemTraitAlias) { + visit_item_trait_alias(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_type(&mut self, i: &'ast ItemType) { + visit_item_type(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_union(&mut self, i: &'ast ItemUnion) { + visit_item_union(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_use(&mut self, i: &'ast ItemUse) { + visit_item_use(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_label(&mut self, i: &'ast Label) { + visit_label(self, i); + } + fn visit_lifetime(&mut self, i: &'ast Lifetime) { + visit_lifetime(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_lifetime_param(&mut self, i: &'ast LifetimeParam) { + visit_lifetime_param(self, i); + } + fn visit_lit(&mut self, i: &'ast Lit) { + visit_lit(self, i); + } + fn visit_lit_bool(&mut self, i: &'ast LitBool) { + visit_lit_bool(self, i); + } + fn visit_lit_byte(&mut self, i: &'ast LitByte) { + visit_lit_byte(self, i); + } + fn visit_lit_byte_str(&mut self, i: &'ast LitByteStr) { + visit_lit_byte_str(self, i); + } + fn visit_lit_char(&mut self, i: &'ast LitChar) { + visit_lit_char(self, i); + } + fn visit_lit_float(&mut self, i: &'ast LitFloat) { + visit_lit_float(self, i); + } + fn visit_lit_int(&mut self, i: &'ast LitInt) { + visit_lit_int(self, i); + } + fn visit_lit_str(&mut self, i: &'ast LitStr) { + visit_lit_str(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local(&mut self, i: &'ast Local) { + visit_local(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local_init(&mut self, i: &'ast LocalInit) { + visit_local_init(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro(&mut self, i: &'ast Macro) { + visit_macro(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro_delimiter(&mut self, i: &'ast MacroDelimiter) { + visit_macro_delimiter(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_member(&mut self, i: &'ast Member) { + visit_member(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta(&mut self, i: &'ast Meta) { + visit_meta(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_list(&mut self, i: &'ast MetaList) { + visit_meta_list(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_name_value(&mut self, i: &'ast MetaNameValue) { + visit_meta_name_value(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_parenthesized_generic_arguments( + &mut self, + i: &'ast ParenthesizedGenericArguments, + ) { + visit_parenthesized_generic_arguments(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat(&mut self, i: &'ast Pat) { + visit_pat(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_ident(&mut self, i: &'ast PatIdent) { + visit_pat_ident(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_or(&mut self, i: &'ast PatOr) { + visit_pat_or(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_paren(&mut self, i: &'ast PatParen) { + visit_pat_paren(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_reference(&mut self, i: &'ast PatReference) { + visit_pat_reference(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_rest(&mut self, i: &'ast PatRest) { + visit_pat_rest(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_slice(&mut self, i: &'ast PatSlice) { + visit_pat_slice(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_struct(&mut self, i: &'ast PatStruct) { + visit_pat_struct(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple(&mut self, i: &'ast PatTuple) { + visit_pat_tuple(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple_struct(&mut self, i: &'ast PatTupleStruct) { + visit_pat_tuple_struct(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_type(&mut self, i: &'ast PatType) { + visit_pat_type(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_wild(&mut self, i: &'ast PatWild) { + visit_pat_wild(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path(&mut self, i: &'ast Path) { + visit_path(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_arguments(&mut self, i: &'ast PathArguments) { + visit_path_arguments(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_segment(&mut self, i: &'ast PathSegment) { + visit_path_segment(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_lifetime(&mut self, i: &'ast PredicateLifetime) { + visit_predicate_lifetime(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_type(&mut self, i: &'ast PredicateType) { + visit_predicate_type(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_qself(&mut self, i: &'ast QSelf) { + visit_qself(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_range_limits(&mut self, i: &'ast RangeLimits) { + visit_range_limits(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_receiver(&mut self, i: &'ast Receiver) { + visit_receiver(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_return_type(&mut self, i: &'ast ReturnType) { + visit_return_type(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_signature(&mut self, i: &'ast Signature) { + visit_signature(self, i); + } + fn visit_span(&mut self, i: &Span) { + visit_span(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_static_mutability(&mut self, i: &'ast StaticMutability) { + visit_static_mutability(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt(&mut self, i: &'ast Stmt) { + visit_stmt(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt_macro(&mut self, i: &'ast StmtMacro) { + visit_stmt_macro(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound(&mut self, i: &'ast TraitBound) { + visit_trait_bound(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound_modifier(&mut self, i: &'ast TraitBoundModifier) { + visit_trait_bound_modifier(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item(&mut self, i: &'ast TraitItem) { + visit_trait_item(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_const(&mut self, i: &'ast TraitItemConst) { + visit_trait_item_const(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_fn(&mut self, i: &'ast TraitItemFn) { + visit_trait_item_fn(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_macro(&mut self, i: &'ast TraitItemMacro) { + visit_trait_item_macro(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_type(&mut self, i: &'ast TraitItemType) { + visit_trait_item_type(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type(&mut self, i: &'ast Type) { + visit_type(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_array(&mut self, i: &'ast TypeArray) { + visit_type_array(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_bare_fn(&mut self, i: &'ast TypeBareFn) { + visit_type_bare_fn(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_group(&mut self, i: &'ast TypeGroup) { + visit_type_group(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_impl_trait(&mut self, i: &'ast TypeImplTrait) { + visit_type_impl_trait(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_infer(&mut self, i: &'ast TypeInfer) { + visit_type_infer(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_macro(&mut self, i: &'ast TypeMacro) { + visit_type_macro(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_never(&mut self, i: &'ast TypeNever) { + visit_type_never(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param(&mut self, i: &'ast TypeParam) { + visit_type_param(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param_bound(&mut self, i: &'ast TypeParamBound) { + visit_type_param_bound(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_paren(&mut self, i: &'ast TypeParen) { + visit_type_paren(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_path(&mut self, i: &'ast TypePath) { + visit_type_path(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_ptr(&mut self, i: &'ast TypePtr) { + visit_type_ptr(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_reference(&mut self, i: &'ast TypeReference) { + visit_type_reference(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_slice(&mut self, i: &'ast TypeSlice) { + visit_type_slice(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_trait_object(&mut self, i: &'ast TypeTraitObject) { + visit_type_trait_object(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_tuple(&mut self, i: &'ast TypeTuple) { + visit_type_tuple(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_un_op(&mut self, i: &'ast UnOp) { + visit_un_op(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_glob(&mut self, i: &'ast UseGlob) { + visit_use_glob(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_group(&mut self, i: &'ast UseGroup) { + visit_use_group(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_name(&mut self, i: &'ast UseName) { + visit_use_name(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_path(&mut self, i: &'ast UsePath) { + visit_use_path(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_rename(&mut self, i: &'ast UseRename) { + visit_use_rename(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_tree(&mut self, i: &'ast UseTree) { + visit_use_tree(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_variadic(&mut self, i: &'ast Variadic) { + visit_variadic(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_variant(&mut self, i: &'ast Variant) { + visit_variant(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_vis_restricted(&mut self, i: &'ast VisRestricted) { + visit_vis_restricted(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_visibility(&mut self, i: &'ast Visibility) { + visit_visibility(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_clause(&mut self, i: &'ast WhereClause) { + visit_where_clause(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_predicate(&mut self, i: &'ast WherePredicate) { + visit_where_predicate(self, i); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_abi<'ast, V>(v: &mut V, node: &'ast Abi) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.extern_token); + if let Some(it) = &node.name { + v.visit_lit_str(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_angle_bracketed_generic_arguments<'ast, V>( + v: &mut V, + node: &'ast AngleBracketedGenericArguments, +) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.colon2_token); + skip!(node.lt_token); + for el in Punctuated::pairs(&node.args) { + let it = el.value(); + v.visit_generic_argument(it); + } + skip!(node.gt_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_arm<'ast, V>(v: &mut V, node: &'ast Arm) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_pat(&node.pat); + if let Some(it) = &node.guard { + skip!((it).0); + v.visit_expr(&*(it).1); + } + skip!(node.fat_arrow_token); + v.visit_expr(&*node.body); + skip!(node.comma); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_const<'ast, V>(v: &mut V, node: &'ast AssocConst) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_ident(&node.ident); + if let Some(it) = &node.generics { + v.visit_angle_bracketed_generic_arguments(it); + } + skip!(node.eq_token); + v.visit_expr(&node.value); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_type<'ast, V>(v: &mut V, node: &'ast AssocType) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_ident(&node.ident); + if let Some(it) = &node.generics { + v.visit_angle_bracketed_generic_arguments(it); + } + skip!(node.eq_token); + v.visit_type(&node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attr_style<'ast, V>(v: &mut V, node: &'ast AttrStyle) +where + V: Visit<'ast> + ?Sized, +{ + match node { + AttrStyle::Outer => {} + AttrStyle::Inner(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attribute<'ast, V>(v: &mut V, node: &'ast Attribute) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.pound_token); + v.visit_attr_style(&node.style); + skip!(node.bracket_token); + v.visit_meta(&node.meta); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_fn_arg<'ast, V>(v: &mut V, node: &'ast BareFnArg) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.name { + v.visit_ident(&(it).0); + skip!((it).1); + } + v.visit_type(&node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_variadic<'ast, V>(v: &mut V, node: &'ast BareVariadic) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.name { + v.visit_ident(&(it).0); + skip!((it).1); + } + skip!(node.dots); + skip!(node.comma); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bin_op<'ast, V>(v: &mut V, node: &'ast BinOp) +where + V: Visit<'ast> + ?Sized, +{ + match node { + BinOp::Add(_binding_0) => { + skip!(_binding_0); + } + BinOp::Sub(_binding_0) => { + skip!(_binding_0); + } + BinOp::Mul(_binding_0) => { + skip!(_binding_0); + } + BinOp::Div(_binding_0) => { + skip!(_binding_0); + } + BinOp::Rem(_binding_0) => { + skip!(_binding_0); + } + BinOp::And(_binding_0) => { + skip!(_binding_0); + } + BinOp::Or(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitXor(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitAnd(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitOr(_binding_0) => { + skip!(_binding_0); + } + BinOp::Shl(_binding_0) => { + skip!(_binding_0); + } + BinOp::Shr(_binding_0) => { + skip!(_binding_0); + } + BinOp::Eq(_binding_0) => { + skip!(_binding_0); + } + BinOp::Lt(_binding_0) => { + skip!(_binding_0); + } + BinOp::Le(_binding_0) => { + skip!(_binding_0); + } + BinOp::Ne(_binding_0) => { + skip!(_binding_0); + } + BinOp::Ge(_binding_0) => { + skip!(_binding_0); + } + BinOp::Gt(_binding_0) => { + skip!(_binding_0); + } + BinOp::AddAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::SubAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::MulAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::DivAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::RemAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitXorAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitAndAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitOrAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::ShlAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::ShrAssign(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_block<'ast, V>(v: &mut V, node: &'ast Block) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.brace_token); + for it in &node.stmts { + v.visit_stmt(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bound_lifetimes<'ast, V>(v: &mut V, node: &'ast BoundLifetimes) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.for_token); + skip!(node.lt_token); + for el in Punctuated::pairs(&node.lifetimes) { + let it = el.value(); + v.visit_generic_param(it); + } + skip!(node.gt_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_const_param<'ast, V>(v: &mut V, node: &'ast ConstParam) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.const_token); + v.visit_ident(&node.ident); + skip!(node.colon_token); + v.visit_type(&node.ty); + skip!(node.eq_token); + if let Some(it) = &node.default { + v.visit_expr(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_constraint<'ast, V>(v: &mut V, node: &'ast Constraint) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_ident(&node.ident); + if let Some(it) = &node.generics { + v.visit_angle_bracketed_generic_arguments(it); + } + skip!(node.colon_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_type_param_bound(it); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data<'ast, V>(v: &mut V, node: &'ast Data) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Data::Struct(_binding_0) => { + v.visit_data_struct(_binding_0); + } + Data::Enum(_binding_0) => { + v.visit_data_enum(_binding_0); + } + Data::Union(_binding_0) => { + v.visit_data_union(_binding_0); + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_enum<'ast, V>(v: &mut V, node: &'ast DataEnum) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.enum_token); + skip!(node.brace_token); + for el in Punctuated::pairs(&node.variants) { + let it = el.value(); + v.visit_variant(it); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_struct<'ast, V>(v: &mut V, node: &'ast DataStruct) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.struct_token); + v.visit_fields(&node.fields); + skip!(node.semi_token); +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_union<'ast, V>(v: &mut V, node: &'ast DataUnion) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.union_token); + v.visit_fields_named(&node.fields); +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_derive_input<'ast, V>(v: &mut V, node: &'ast DeriveInput) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + v.visit_data(&node.data); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr<'ast, V>(v: &mut V, node: &'ast Expr) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Expr::Array(_binding_0) => { + full!(v.visit_expr_array(_binding_0)); + } + Expr::Assign(_binding_0) => { + full!(v.visit_expr_assign(_binding_0)); + } + Expr::Async(_binding_0) => { + full!(v.visit_expr_async(_binding_0)); + } + Expr::Await(_binding_0) => { + full!(v.visit_expr_await(_binding_0)); + } + Expr::Binary(_binding_0) => { + v.visit_expr_binary(_binding_0); + } + Expr::Block(_binding_0) => { + full!(v.visit_expr_block(_binding_0)); + } + Expr::Break(_binding_0) => { + full!(v.visit_expr_break(_binding_0)); + } + Expr::Call(_binding_0) => { + v.visit_expr_call(_binding_0); + } + Expr::Cast(_binding_0) => { + v.visit_expr_cast(_binding_0); + } + Expr::Closure(_binding_0) => { + full!(v.visit_expr_closure(_binding_0)); + } + Expr::Const(_binding_0) => { + full!(v.visit_expr_const(_binding_0)); + } + Expr::Continue(_binding_0) => { + full!(v.visit_expr_continue(_binding_0)); + } + Expr::Field(_binding_0) => { + v.visit_expr_field(_binding_0); + } + Expr::ForLoop(_binding_0) => { + full!(v.visit_expr_for_loop(_binding_0)); + } + Expr::Group(_binding_0) => { + v.visit_expr_group(_binding_0); + } + Expr::If(_binding_0) => { + full!(v.visit_expr_if(_binding_0)); + } + Expr::Index(_binding_0) => { + v.visit_expr_index(_binding_0); + } + Expr::Infer(_binding_0) => { + full!(v.visit_expr_infer(_binding_0)); + } + Expr::Let(_binding_0) => { + full!(v.visit_expr_let(_binding_0)); + } + Expr::Lit(_binding_0) => { + v.visit_expr_lit(_binding_0); + } + Expr::Loop(_binding_0) => { + full!(v.visit_expr_loop(_binding_0)); + } + Expr::Macro(_binding_0) => { + v.visit_expr_macro(_binding_0); + } + Expr::Match(_binding_0) => { + full!(v.visit_expr_match(_binding_0)); + } + Expr::MethodCall(_binding_0) => { + v.visit_expr_method_call(_binding_0); + } + Expr::Paren(_binding_0) => { + v.visit_expr_paren(_binding_0); + } + Expr::Path(_binding_0) => { + v.visit_expr_path(_binding_0); + } + Expr::Range(_binding_0) => { + full!(v.visit_expr_range(_binding_0)); + } + Expr::Reference(_binding_0) => { + v.visit_expr_reference(_binding_0); + } + Expr::Repeat(_binding_0) => { + full!(v.visit_expr_repeat(_binding_0)); + } + Expr::Return(_binding_0) => { + full!(v.visit_expr_return(_binding_0)); + } + Expr::Struct(_binding_0) => { + v.visit_expr_struct(_binding_0); + } + Expr::Try(_binding_0) => { + full!(v.visit_expr_try(_binding_0)); + } + Expr::TryBlock(_binding_0) => { + full!(v.visit_expr_try_block(_binding_0)); + } + Expr::Tuple(_binding_0) => { + full!(v.visit_expr_tuple(_binding_0)); + } + Expr::Unary(_binding_0) => { + v.visit_expr_unary(_binding_0); + } + Expr::Unsafe(_binding_0) => { + full!(v.visit_expr_unsafe(_binding_0)); + } + Expr::Verbatim(_binding_0) => { + skip!(_binding_0); + } + Expr::While(_binding_0) => { + full!(v.visit_expr_while(_binding_0)); + } + Expr::Yield(_binding_0) => { + full!(v.visit_expr_yield(_binding_0)); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_array<'ast, V>(v: &mut V, node: &'ast ExprArray) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.bracket_token); + for el in Punctuated::pairs(&node.elems) { + let it = el.value(); + v.visit_expr(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_assign<'ast, V>(v: &mut V, node: &'ast ExprAssign) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.left); + skip!(node.eq_token); + v.visit_expr(&*node.right); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_async<'ast, V>(v: &mut V, node: &'ast ExprAsync) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.async_token); + skip!(node.capture); + v.visit_block(&node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_await<'ast, V>(v: &mut V, node: &'ast ExprAwait) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.base); + skip!(node.dot_token); + skip!(node.await_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_binary<'ast, V>(v: &mut V, node: &'ast ExprBinary) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.left); + v.visit_bin_op(&node.op); + v.visit_expr(&*node.right); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_block<'ast, V>(v: &mut V, node: &'ast ExprBlock) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.label { + v.visit_label(it); + } + v.visit_block(&node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_break<'ast, V>(v: &mut V, node: &'ast ExprBreak) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.break_token); + if let Some(it) = &node.label { + v.visit_lifetime(it); + } + if let Some(it) = &node.expr { + v.visit_expr(&**it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_call<'ast, V>(v: &mut V, node: &'ast ExprCall) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.func); + skip!(node.paren_token); + for el in Punctuated::pairs(&node.args) { + let it = el.value(); + v.visit_expr(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_cast<'ast, V>(v: &mut V, node: &'ast ExprCast) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.expr); + skip!(node.as_token); + v.visit_type(&*node.ty); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_closure<'ast, V>(v: &mut V, node: &'ast ExprClosure) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.lifetimes { + v.visit_bound_lifetimes(it); + } + skip!(node.constness); + skip!(node.movability); + skip!(node.asyncness); + skip!(node.capture); + skip!(node.or1_token); + for el in Punctuated::pairs(&node.inputs) { + let it = el.value(); + v.visit_pat(it); + } + skip!(node.or2_token); + v.visit_return_type(&node.output); + v.visit_expr(&*node.body); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_const<'ast, V>(v: &mut V, node: &'ast ExprConst) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.const_token); + v.visit_block(&node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_continue<'ast, V>(v: &mut V, node: &'ast ExprContinue) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.continue_token); + if let Some(it) = &node.label { + v.visit_lifetime(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_field<'ast, V>(v: &mut V, node: &'ast ExprField) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.base); + skip!(node.dot_token); + v.visit_member(&node.member); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_for_loop<'ast, V>(v: &mut V, node: &'ast ExprForLoop) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.label { + v.visit_label(it); + } + skip!(node.for_token); + v.visit_pat(&*node.pat); + skip!(node.in_token); + v.visit_expr(&*node.expr); + v.visit_block(&node.body); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_group<'ast, V>(v: &mut V, node: &'ast ExprGroup) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.group_token); + v.visit_expr(&*node.expr); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_if<'ast, V>(v: &mut V, node: &'ast ExprIf) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.if_token); + v.visit_expr(&*node.cond); + v.visit_block(&node.then_branch); + if let Some(it) = &node.else_branch { + skip!((it).0); + v.visit_expr(&*(it).1); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_index<'ast, V>(v: &mut V, node: &'ast ExprIndex) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.expr); + skip!(node.bracket_token); + v.visit_expr(&*node.index); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_infer<'ast, V>(v: &mut V, node: &'ast ExprInfer) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.underscore_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_let<'ast, V>(v: &mut V, node: &'ast ExprLet) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.let_token); + v.visit_pat(&*node.pat); + skip!(node.eq_token); + v.visit_expr(&*node.expr); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_lit<'ast, V>(v: &mut V, node: &'ast ExprLit) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_lit(&node.lit); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_loop<'ast, V>(v: &mut V, node: &'ast ExprLoop) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.label { + v.visit_label(it); + } + skip!(node.loop_token); + v.visit_block(&node.body); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_macro<'ast, V>(v: &mut V, node: &'ast ExprMacro) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_macro(&node.mac); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_match<'ast, V>(v: &mut V, node: &'ast ExprMatch) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.match_token); + v.visit_expr(&*node.expr); + skip!(node.brace_token); + for it in &node.arms { + v.visit_arm(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_method_call<'ast, V>(v: &mut V, node: &'ast ExprMethodCall) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.receiver); + skip!(node.dot_token); + v.visit_ident(&node.method); + if let Some(it) = &node.turbofish { + v.visit_angle_bracketed_generic_arguments(it); + } + skip!(node.paren_token); + for el in Punctuated::pairs(&node.args) { + let it = el.value(); + v.visit_expr(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_paren<'ast, V>(v: &mut V, node: &'ast ExprParen) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.paren_token); + v.visit_expr(&*node.expr); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_path<'ast, V>(v: &mut V, node: &'ast ExprPath) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.qself { + v.visit_qself(it); + } + v.visit_path(&node.path); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_range<'ast, V>(v: &mut V, node: &'ast ExprRange) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.start { + v.visit_expr(&**it); + } + v.visit_range_limits(&node.limits); + if let Some(it) = &node.end { + v.visit_expr(&**it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_reference<'ast, V>(v: &mut V, node: &'ast ExprReference) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.and_token); + skip!(node.mutability); + v.visit_expr(&*node.expr); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_repeat<'ast, V>(v: &mut V, node: &'ast ExprRepeat) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.bracket_token); + v.visit_expr(&*node.expr); + skip!(node.semi_token); + v.visit_expr(&*node.len); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_return<'ast, V>(v: &mut V, node: &'ast ExprReturn) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.return_token); + if let Some(it) = &node.expr { + v.visit_expr(&**it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_struct<'ast, V>(v: &mut V, node: &'ast ExprStruct) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.qself { + v.visit_qself(it); + } + v.visit_path(&node.path); + skip!(node.brace_token); + for el in Punctuated::pairs(&node.fields) { + let it = el.value(); + v.visit_field_value(it); + } + skip!(node.dot2_token); + if let Some(it) = &node.rest { + v.visit_expr(&**it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try<'ast, V>(v: &mut V, node: &'ast ExprTry) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_expr(&*node.expr); + skip!(node.question_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try_block<'ast, V>(v: &mut V, node: &'ast ExprTryBlock) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.try_token); + v.visit_block(&node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_tuple<'ast, V>(v: &mut V, node: &'ast ExprTuple) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.paren_token); + for el in Punctuated::pairs(&node.elems) { + let it = el.value(); + v.visit_expr(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_unary<'ast, V>(v: &mut V, node: &'ast ExprUnary) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_un_op(&node.op); + v.visit_expr(&*node.expr); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_unsafe<'ast, V>(v: &mut V, node: &'ast ExprUnsafe) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.unsafe_token); + v.visit_block(&node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_while<'ast, V>(v: &mut V, node: &'ast ExprWhile) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.label { + v.visit_label(it); + } + skip!(node.while_token); + v.visit_expr(&*node.cond); + v.visit_block(&node.body); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_yield<'ast, V>(v: &mut V, node: &'ast ExprYield) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.yield_token); + if let Some(it) = &node.expr { + v.visit_expr(&**it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field<'ast, V>(v: &mut V, node: &'ast Field) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + v.visit_field_mutability(&node.mutability); + if let Some(it) = &node.ident { + v.visit_ident(it); + } + skip!(node.colon_token); + v.visit_type(&node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_mutability<'ast, V>(v: &mut V, node: &'ast FieldMutability) +where + V: Visit<'ast> + ?Sized, +{ + match node { + FieldMutability::None => {} + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_field_pat<'ast, V>(v: &mut V, node: &'ast FieldPat) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_member(&node.member); + skip!(node.colon_token); + v.visit_pat(&*node.pat); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_value<'ast, V>(v: &mut V, node: &'ast FieldValue) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_member(&node.member); + skip!(node.colon_token); + v.visit_expr(&node.expr); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields<'ast, V>(v: &mut V, node: &'ast Fields) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Fields::Named(_binding_0) => { + v.visit_fields_named(_binding_0); + } + Fields::Unnamed(_binding_0) => { + v.visit_fields_unnamed(_binding_0); + } + Fields::Unit => {} + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_named<'ast, V>(v: &mut V, node: &'ast FieldsNamed) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.brace_token); + for el in Punctuated::pairs(&node.named) { + let it = el.value(); + v.visit_field(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_unnamed<'ast, V>(v: &mut V, node: &'ast FieldsUnnamed) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.paren_token); + for el in Punctuated::pairs(&node.unnamed) { + let it = el.value(); + v.visit_field(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_file<'ast, V>(v: &mut V, node: &'ast File) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.shebang); + for it in &node.attrs { + v.visit_attribute(it); + } + for it in &node.items { + v.visit_item(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_fn_arg<'ast, V>(v: &mut V, node: &'ast FnArg) +where + V: Visit<'ast> + ?Sized, +{ + match node { + FnArg::Receiver(_binding_0) => { + v.visit_receiver(_binding_0); + } + FnArg::Typed(_binding_0) => { + v.visit_pat_type(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item<'ast, V>(v: &mut V, node: &'ast ForeignItem) +where + V: Visit<'ast> + ?Sized, +{ + match node { + ForeignItem::Fn(_binding_0) => { + v.visit_foreign_item_fn(_binding_0); + } + ForeignItem::Static(_binding_0) => { + v.visit_foreign_item_static(_binding_0); + } + ForeignItem::Type(_binding_0) => { + v.visit_foreign_item_type(_binding_0); + } + ForeignItem::Macro(_binding_0) => { + v.visit_foreign_item_macro(_binding_0); + } + ForeignItem::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_fn<'ast, V>(v: &mut V, node: &'ast ForeignItemFn) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + v.visit_signature(&node.sig); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_macro<'ast, V>(v: &mut V, node: &'ast ForeignItemMacro) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_macro(&node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_static<'ast, V>(v: &mut V, node: &'ast ForeignItemStatic) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.static_token); + v.visit_static_mutability(&node.mutability); + v.visit_ident(&node.ident); + skip!(node.colon_token); + v.visit_type(&*node.ty); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_type<'ast, V>(v: &mut V, node: &'ast ForeignItemType) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.type_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.semi_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_argument<'ast, V>(v: &mut V, node: &'ast GenericArgument) +where + V: Visit<'ast> + ?Sized, +{ + match node { + GenericArgument::Lifetime(_binding_0) => { + v.visit_lifetime(_binding_0); + } + GenericArgument::Type(_binding_0) => { + v.visit_type(_binding_0); + } + GenericArgument::Const(_binding_0) => { + v.visit_expr(_binding_0); + } + GenericArgument::AssocType(_binding_0) => { + v.visit_assoc_type(_binding_0); + } + GenericArgument::AssocConst(_binding_0) => { + v.visit_assoc_const(_binding_0); + } + GenericArgument::Constraint(_binding_0) => { + v.visit_constraint(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_param<'ast, V>(v: &mut V, node: &'ast GenericParam) +where + V: Visit<'ast> + ?Sized, +{ + match node { + GenericParam::Lifetime(_binding_0) => { + v.visit_lifetime_param(_binding_0); + } + GenericParam::Type(_binding_0) => { + v.visit_type_param(_binding_0); + } + GenericParam::Const(_binding_0) => { + v.visit_const_param(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generics<'ast, V>(v: &mut V, node: &'ast Generics) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.lt_token); + for el in Punctuated::pairs(&node.params) { + let it = el.value(); + v.visit_generic_param(it); + } + skip!(node.gt_token); + if let Some(it) = &node.where_clause { + v.visit_where_clause(it); + } +} +pub fn visit_ident<'ast, V>(v: &mut V, node: &'ast Ident) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_span(&node.span()); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item<'ast, V>(v: &mut V, node: &'ast ImplItem) +where + V: Visit<'ast> + ?Sized, +{ + match node { + ImplItem::Const(_binding_0) => { + v.visit_impl_item_const(_binding_0); + } + ImplItem::Fn(_binding_0) => { + v.visit_impl_item_fn(_binding_0); + } + ImplItem::Type(_binding_0) => { + v.visit_impl_item_type(_binding_0); + } + ImplItem::Macro(_binding_0) => { + v.visit_impl_item_macro(_binding_0); + } + ImplItem::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_const<'ast, V>(v: &mut V, node: &'ast ImplItemConst) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.defaultness); + skip!(node.const_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.colon_token); + v.visit_type(&node.ty); + skip!(node.eq_token); + v.visit_expr(&node.expr); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_fn<'ast, V>(v: &mut V, node: &'ast ImplItemFn) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.defaultness); + v.visit_signature(&node.sig); + v.visit_block(&node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_macro<'ast, V>(v: &mut V, node: &'ast ImplItemMacro) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_macro(&node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_type<'ast, V>(v: &mut V, node: &'ast ImplItemType) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.defaultness); + skip!(node.type_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.eq_token); + v.visit_type(&node.ty); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_restriction<'ast, V>(v: &mut V, node: &'ast ImplRestriction) +where + V: Visit<'ast> + ?Sized, +{ + match *node {} +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_index<'ast, V>(v: &mut V, node: &'ast Index) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.index); + v.visit_span(&node.span); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item<'ast, V>(v: &mut V, node: &'ast Item) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Item::Const(_binding_0) => { + v.visit_item_const(_binding_0); + } + Item::Enum(_binding_0) => { + v.visit_item_enum(_binding_0); + } + Item::ExternCrate(_binding_0) => { + v.visit_item_extern_crate(_binding_0); + } + Item::Fn(_binding_0) => { + v.visit_item_fn(_binding_0); + } + Item::ForeignMod(_binding_0) => { + v.visit_item_foreign_mod(_binding_0); + } + Item::Impl(_binding_0) => { + v.visit_item_impl(_binding_0); + } + Item::Macro(_binding_0) => { + v.visit_item_macro(_binding_0); + } + Item::Mod(_binding_0) => { + v.visit_item_mod(_binding_0); + } + Item::Static(_binding_0) => { + v.visit_item_static(_binding_0); + } + Item::Struct(_binding_0) => { + v.visit_item_struct(_binding_0); + } + Item::Trait(_binding_0) => { + v.visit_item_trait(_binding_0); + } + Item::TraitAlias(_binding_0) => { + v.visit_item_trait_alias(_binding_0); + } + Item::Type(_binding_0) => { + v.visit_item_type(_binding_0); + } + Item::Union(_binding_0) => { + v.visit_item_union(_binding_0); + } + Item::Use(_binding_0) => { + v.visit_item_use(_binding_0); + } + Item::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_const<'ast, V>(v: &mut V, node: &'ast ItemConst) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.const_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.colon_token); + v.visit_type(&*node.ty); + skip!(node.eq_token); + v.visit_expr(&*node.expr); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_enum<'ast, V>(v: &mut V, node: &'ast ItemEnum) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.enum_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.brace_token); + for el in Punctuated::pairs(&node.variants) { + let it = el.value(); + v.visit_variant(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_extern_crate<'ast, V>(v: &mut V, node: &'ast ItemExternCrate) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.extern_token); + skip!(node.crate_token); + v.visit_ident(&node.ident); + if let Some(it) = &node.rename { + skip!((it).0); + v.visit_ident(&(it).1); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_fn<'ast, V>(v: &mut V, node: &'ast ItemFn) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + v.visit_signature(&node.sig); + v.visit_block(&*node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_foreign_mod<'ast, V>(v: &mut V, node: &'ast ItemForeignMod) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.unsafety); + v.visit_abi(&node.abi); + skip!(node.brace_token); + for it in &node.items { + v.visit_foreign_item(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_impl<'ast, V>(v: &mut V, node: &'ast ItemImpl) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.defaultness); + skip!(node.unsafety); + skip!(node.impl_token); + v.visit_generics(&node.generics); + if let Some(it) = &node.trait_ { + skip!((it).0); + v.visit_path(&(it).1); + skip!((it).2); + } + v.visit_type(&*node.self_ty); + skip!(node.brace_token); + for it in &node.items { + v.visit_impl_item(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_macro<'ast, V>(v: &mut V, node: &'ast ItemMacro) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.ident { + v.visit_ident(it); + } + v.visit_macro(&node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_mod<'ast, V>(v: &mut V, node: &'ast ItemMod) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.unsafety); + skip!(node.mod_token); + v.visit_ident(&node.ident); + if let Some(it) = &node.content { + skip!((it).0); + for it in &(it).1 { + v.visit_item(it); + } + } + skip!(node.semi); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_static<'ast, V>(v: &mut V, node: &'ast ItemStatic) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.static_token); + v.visit_static_mutability(&node.mutability); + v.visit_ident(&node.ident); + skip!(node.colon_token); + v.visit_type(&*node.ty); + skip!(node.eq_token); + v.visit_expr(&*node.expr); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_struct<'ast, V>(v: &mut V, node: &'ast ItemStruct) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.struct_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + v.visit_fields(&node.fields); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait<'ast, V>(v: &mut V, node: &'ast ItemTrait) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.unsafety); + skip!(node.auto_token); + if let Some(it) = &node.restriction { + v.visit_impl_restriction(it); + } + skip!(node.trait_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.colon_token); + for el in Punctuated::pairs(&node.supertraits) { + let it = el.value(); + v.visit_type_param_bound(it); + } + skip!(node.brace_token); + for it in &node.items { + v.visit_trait_item(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait_alias<'ast, V>(v: &mut V, node: &'ast ItemTraitAlias) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.trait_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.eq_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_type_param_bound(it); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_type<'ast, V>(v: &mut V, node: &'ast ItemType) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.type_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.eq_token); + v.visit_type(&*node.ty); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_union<'ast, V>(v: &mut V, node: &'ast ItemUnion) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.union_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + v.visit_fields_named(&node.fields); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_use<'ast, V>(v: &mut V, node: &'ast ItemUse) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_visibility(&node.vis); + skip!(node.use_token); + skip!(node.leading_colon); + v.visit_use_tree(&node.tree); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_label<'ast, V>(v: &mut V, node: &'ast Label) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_lifetime(&node.name); + skip!(node.colon_token); +} +pub fn visit_lifetime<'ast, V>(v: &mut V, node: &'ast Lifetime) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_span(&node.apostrophe); + v.visit_ident(&node.ident); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_lifetime_param<'ast, V>(v: &mut V, node: &'ast LifetimeParam) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_lifetime(&node.lifetime); + skip!(node.colon_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_lifetime(it); + } +} +pub fn visit_lit<'ast, V>(v: &mut V, node: &'ast Lit) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Lit::Str(_binding_0) => { + v.visit_lit_str(_binding_0); + } + Lit::ByteStr(_binding_0) => { + v.visit_lit_byte_str(_binding_0); + } + Lit::Byte(_binding_0) => { + v.visit_lit_byte(_binding_0); + } + Lit::Char(_binding_0) => { + v.visit_lit_char(_binding_0); + } + Lit::Int(_binding_0) => { + v.visit_lit_int(_binding_0); + } + Lit::Float(_binding_0) => { + v.visit_lit_float(_binding_0); + } + Lit::Bool(_binding_0) => { + v.visit_lit_bool(_binding_0); + } + Lit::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +pub fn visit_lit_bool<'ast, V>(v: &mut V, node: &'ast LitBool) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.value); + v.visit_span(&node.span); +} +pub fn visit_lit_byte<'ast, V>(v: &mut V, node: &'ast LitByte) +where + V: Visit<'ast> + ?Sized, +{} +pub fn visit_lit_byte_str<'ast, V>(v: &mut V, node: &'ast LitByteStr) +where + V: Visit<'ast> + ?Sized, +{} +pub fn visit_lit_char<'ast, V>(v: &mut V, node: &'ast LitChar) +where + V: Visit<'ast> + ?Sized, +{} +pub fn visit_lit_float<'ast, V>(v: &mut V, node: &'ast LitFloat) +where + V: Visit<'ast> + ?Sized, +{} +pub fn visit_lit_int<'ast, V>(v: &mut V, node: &'ast LitInt) +where + V: Visit<'ast> + ?Sized, +{} +pub fn visit_lit_str<'ast, V>(v: &mut V, node: &'ast LitStr) +where + V: Visit<'ast> + ?Sized, +{} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local<'ast, V>(v: &mut V, node: &'ast Local) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.let_token); + v.visit_pat(&node.pat); + if let Some(it) = &node.init { + v.visit_local_init(it); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local_init<'ast, V>(v: &mut V, node: &'ast LocalInit) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.eq_token); + v.visit_expr(&*node.expr); + if let Some(it) = &node.diverge { + skip!((it).0); + v.visit_expr(&*(it).1); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro<'ast, V>(v: &mut V, node: &'ast Macro) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_path(&node.path); + skip!(node.bang_token); + v.visit_macro_delimiter(&node.delimiter); + skip!(node.tokens); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro_delimiter<'ast, V>(v: &mut V, node: &'ast MacroDelimiter) +where + V: Visit<'ast> + ?Sized, +{ + match node { + MacroDelimiter::Paren(_binding_0) => { + skip!(_binding_0); + } + MacroDelimiter::Brace(_binding_0) => { + skip!(_binding_0); + } + MacroDelimiter::Bracket(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_member<'ast, V>(v: &mut V, node: &'ast Member) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Member::Named(_binding_0) => { + v.visit_ident(_binding_0); + } + Member::Unnamed(_binding_0) => { + v.visit_index(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta<'ast, V>(v: &mut V, node: &'ast Meta) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Meta::Path(_binding_0) => { + v.visit_path(_binding_0); + } + Meta::List(_binding_0) => { + v.visit_meta_list(_binding_0); + } + Meta::NameValue(_binding_0) => { + v.visit_meta_name_value(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_list<'ast, V>(v: &mut V, node: &'ast MetaList) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_path(&node.path); + v.visit_macro_delimiter(&node.delimiter); + skip!(node.tokens); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_name_value<'ast, V>(v: &mut V, node: &'ast MetaNameValue) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_path(&node.path); + skip!(node.eq_token); + v.visit_expr(&node.value); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_parenthesized_generic_arguments<'ast, V>( + v: &mut V, + node: &'ast ParenthesizedGenericArguments, +) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.paren_token); + for el in Punctuated::pairs(&node.inputs) { + let it = el.value(); + v.visit_type(it); + } + v.visit_return_type(&node.output); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat<'ast, V>(v: &mut V, node: &'ast Pat) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Pat::Const(_binding_0) => { + v.visit_expr_const(_binding_0); + } + Pat::Ident(_binding_0) => { + v.visit_pat_ident(_binding_0); + } + Pat::Lit(_binding_0) => { + v.visit_expr_lit(_binding_0); + } + Pat::Macro(_binding_0) => { + v.visit_expr_macro(_binding_0); + } + Pat::Or(_binding_0) => { + v.visit_pat_or(_binding_0); + } + Pat::Paren(_binding_0) => { + v.visit_pat_paren(_binding_0); + } + Pat::Path(_binding_0) => { + v.visit_expr_path(_binding_0); + } + Pat::Range(_binding_0) => { + v.visit_expr_range(_binding_0); + } + Pat::Reference(_binding_0) => { + v.visit_pat_reference(_binding_0); + } + Pat::Rest(_binding_0) => { + v.visit_pat_rest(_binding_0); + } + Pat::Slice(_binding_0) => { + v.visit_pat_slice(_binding_0); + } + Pat::Struct(_binding_0) => { + v.visit_pat_struct(_binding_0); + } + Pat::Tuple(_binding_0) => { + v.visit_pat_tuple(_binding_0); + } + Pat::TupleStruct(_binding_0) => { + v.visit_pat_tuple_struct(_binding_0); + } + Pat::Type(_binding_0) => { + v.visit_pat_type(_binding_0); + } + Pat::Verbatim(_binding_0) => { + skip!(_binding_0); + } + Pat::Wild(_binding_0) => { + v.visit_pat_wild(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_ident<'ast, V>(v: &mut V, node: &'ast PatIdent) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.by_ref); + skip!(node.mutability); + v.visit_ident(&node.ident); + if let Some(it) = &node.subpat { + skip!((it).0); + v.visit_pat(&*(it).1); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_or<'ast, V>(v: &mut V, node: &'ast PatOr) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.leading_vert); + for el in Punctuated::pairs(&node.cases) { + let it = el.value(); + v.visit_pat(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_paren<'ast, V>(v: &mut V, node: &'ast PatParen) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.paren_token); + v.visit_pat(&*node.pat); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_reference<'ast, V>(v: &mut V, node: &'ast PatReference) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.and_token); + skip!(node.mutability); + v.visit_pat(&*node.pat); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_rest<'ast, V>(v: &mut V, node: &'ast PatRest) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.dot2_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_slice<'ast, V>(v: &mut V, node: &'ast PatSlice) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.bracket_token); + for el in Punctuated::pairs(&node.elems) { + let it = el.value(); + v.visit_pat(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_struct<'ast, V>(v: &mut V, node: &'ast PatStruct) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.qself { + v.visit_qself(it); + } + v.visit_path(&node.path); + skip!(node.brace_token); + for el in Punctuated::pairs(&node.fields) { + let it = el.value(); + v.visit_field_pat(it); + } + if let Some(it) = &node.rest { + v.visit_pat_rest(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple<'ast, V>(v: &mut V, node: &'ast PatTuple) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.paren_token); + for el in Punctuated::pairs(&node.elems) { + let it = el.value(); + v.visit_pat(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple_struct<'ast, V>(v: &mut V, node: &'ast PatTupleStruct) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.qself { + v.visit_qself(it); + } + v.visit_path(&node.path); + skip!(node.paren_token); + for el in Punctuated::pairs(&node.elems) { + let it = el.value(); + v.visit_pat(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_type<'ast, V>(v: &mut V, node: &'ast PatType) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_pat(&*node.pat); + skip!(node.colon_token); + v.visit_type(&*node.ty); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_wild<'ast, V>(v: &mut V, node: &'ast PatWild) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.underscore_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path<'ast, V>(v: &mut V, node: &'ast Path) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.leading_colon); + for el in Punctuated::pairs(&node.segments) { + let it = el.value(); + v.visit_path_segment(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_arguments<'ast, V>(v: &mut V, node: &'ast PathArguments) +where + V: Visit<'ast> + ?Sized, +{ + match node { + PathArguments::None => {} + PathArguments::AngleBracketed(_binding_0) => { + v.visit_angle_bracketed_generic_arguments(_binding_0); + } + PathArguments::Parenthesized(_binding_0) => { + v.visit_parenthesized_generic_arguments(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_segment<'ast, V>(v: &mut V, node: &'ast PathSegment) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_ident(&node.ident); + v.visit_path_arguments(&node.arguments); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_lifetime<'ast, V>(v: &mut V, node: &'ast PredicateLifetime) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_lifetime(&node.lifetime); + skip!(node.colon_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_lifetime(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_type<'ast, V>(v: &mut V, node: &'ast PredicateType) +where + V: Visit<'ast> + ?Sized, +{ + if let Some(it) = &node.lifetimes { + v.visit_bound_lifetimes(it); + } + v.visit_type(&node.bounded_ty); + skip!(node.colon_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_type_param_bound(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_qself<'ast, V>(v: &mut V, node: &'ast QSelf) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.lt_token); + v.visit_type(&*node.ty); + skip!(node.position); + skip!(node.as_token); + skip!(node.gt_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_range_limits<'ast, V>(v: &mut V, node: &'ast RangeLimits) +where + V: Visit<'ast> + ?Sized, +{ + match node { + RangeLimits::HalfOpen(_binding_0) => { + skip!(_binding_0); + } + RangeLimits::Closed(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_receiver<'ast, V>(v: &mut V, node: &'ast Receiver) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.reference { + skip!((it).0); + if let Some(it) = &(it).1 { + v.visit_lifetime(it); + } + } + skip!(node.mutability); + skip!(node.self_token); + skip!(node.colon_token); + v.visit_type(&*node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_return_type<'ast, V>(v: &mut V, node: &'ast ReturnType) +where + V: Visit<'ast> + ?Sized, +{ + match node { + ReturnType::Default => {} + ReturnType::Type(_binding_0, _binding_1) => { + skip!(_binding_0); + v.visit_type(&**_binding_1); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_signature<'ast, V>(v: &mut V, node: &'ast Signature) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.constness); + skip!(node.asyncness); + skip!(node.unsafety); + if let Some(it) = &node.abi { + v.visit_abi(it); + } + skip!(node.fn_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.paren_token); + for el in Punctuated::pairs(&node.inputs) { + let it = el.value(); + v.visit_fn_arg(it); + } + if let Some(it) = &node.variadic { + v.visit_variadic(it); + } + v.visit_return_type(&node.output); +} +pub fn visit_span<'ast, V>(v: &mut V, node: &Span) +where + V: Visit<'ast> + ?Sized, +{} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_static_mutability<'ast, V>(v: &mut V, node: &'ast StaticMutability) +where + V: Visit<'ast> + ?Sized, +{ + match node { + StaticMutability::Mut(_binding_0) => { + skip!(_binding_0); + } + StaticMutability::None => {} + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt<'ast, V>(v: &mut V, node: &'ast Stmt) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Stmt::Local(_binding_0) => { + v.visit_local(_binding_0); + } + Stmt::Item(_binding_0) => { + v.visit_item(_binding_0); + } + Stmt::Expr(_binding_0, _binding_1) => { + v.visit_expr(_binding_0); + skip!(_binding_1); + } + Stmt::Macro(_binding_0) => { + v.visit_stmt_macro(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt_macro<'ast, V>(v: &mut V, node: &'ast StmtMacro) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_macro(&node.mac); + skip!(node.semi_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound<'ast, V>(v: &mut V, node: &'ast TraitBound) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.paren_token); + v.visit_trait_bound_modifier(&node.modifier); + if let Some(it) = &node.lifetimes { + v.visit_bound_lifetimes(it); + } + v.visit_path(&node.path); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound_modifier<'ast, V>(v: &mut V, node: &'ast TraitBoundModifier) +where + V: Visit<'ast> + ?Sized, +{ + match node { + TraitBoundModifier::None => {} + TraitBoundModifier::Maybe(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item<'ast, V>(v: &mut V, node: &'ast TraitItem) +where + V: Visit<'ast> + ?Sized, +{ + match node { + TraitItem::Const(_binding_0) => { + v.visit_trait_item_const(_binding_0); + } + TraitItem::Fn(_binding_0) => { + v.visit_trait_item_fn(_binding_0); + } + TraitItem::Type(_binding_0) => { + v.visit_trait_item_type(_binding_0); + } + TraitItem::Macro(_binding_0) => { + v.visit_trait_item_macro(_binding_0); + } + TraitItem::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_const<'ast, V>(v: &mut V, node: &'ast TraitItemConst) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.const_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.colon_token); + v.visit_type(&node.ty); + if let Some(it) = &node.default { + skip!((it).0); + v.visit_expr(&(it).1); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_fn<'ast, V>(v: &mut V, node: &'ast TraitItemFn) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_signature(&node.sig); + if let Some(it) = &node.default { + v.visit_block(it); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_macro<'ast, V>(v: &mut V, node: &'ast TraitItemMacro) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_macro(&node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_type<'ast, V>(v: &mut V, node: &'ast TraitItemType) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + skip!(node.type_token); + v.visit_ident(&node.ident); + v.visit_generics(&node.generics); + skip!(node.colon_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_type_param_bound(it); + } + if let Some(it) = &node.default { + skip!((it).0); + v.visit_type(&(it).1); + } + skip!(node.semi_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type<'ast, V>(v: &mut V, node: &'ast Type) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Type::Array(_binding_0) => { + v.visit_type_array(_binding_0); + } + Type::BareFn(_binding_0) => { + v.visit_type_bare_fn(_binding_0); + } + Type::Group(_binding_0) => { + v.visit_type_group(_binding_0); + } + Type::ImplTrait(_binding_0) => { + v.visit_type_impl_trait(_binding_0); + } + Type::Infer(_binding_0) => { + v.visit_type_infer(_binding_0); + } + Type::Macro(_binding_0) => { + v.visit_type_macro(_binding_0); + } + Type::Never(_binding_0) => { + v.visit_type_never(_binding_0); + } + Type::Paren(_binding_0) => { + v.visit_type_paren(_binding_0); + } + Type::Path(_binding_0) => { + v.visit_type_path(_binding_0); + } + Type::Ptr(_binding_0) => { + v.visit_type_ptr(_binding_0); + } + Type::Reference(_binding_0) => { + v.visit_type_reference(_binding_0); + } + Type::Slice(_binding_0) => { + v.visit_type_slice(_binding_0); + } + Type::TraitObject(_binding_0) => { + v.visit_type_trait_object(_binding_0); + } + Type::Tuple(_binding_0) => { + v.visit_type_tuple(_binding_0); + } + Type::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_array<'ast, V>(v: &mut V, node: &'ast TypeArray) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.bracket_token); + v.visit_type(&*node.elem); + skip!(node.semi_token); + v.visit_expr(&node.len); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_bare_fn<'ast, V>(v: &mut V, node: &'ast TypeBareFn) +where + V: Visit<'ast> + ?Sized, +{ + if let Some(it) = &node.lifetimes { + v.visit_bound_lifetimes(it); + } + skip!(node.unsafety); + if let Some(it) = &node.abi { + v.visit_abi(it); + } + skip!(node.fn_token); + skip!(node.paren_token); + for el in Punctuated::pairs(&node.inputs) { + let it = el.value(); + v.visit_bare_fn_arg(it); + } + if let Some(it) = &node.variadic { + v.visit_bare_variadic(it); + } + v.visit_return_type(&node.output); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_group<'ast, V>(v: &mut V, node: &'ast TypeGroup) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.group_token); + v.visit_type(&*node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_impl_trait<'ast, V>(v: &mut V, node: &'ast TypeImplTrait) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.impl_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_type_param_bound(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_infer<'ast, V>(v: &mut V, node: &'ast TypeInfer) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.underscore_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_macro<'ast, V>(v: &mut V, node: &'ast TypeMacro) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_macro(&node.mac); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_never<'ast, V>(v: &mut V, node: &'ast TypeNever) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.bang_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param<'ast, V>(v: &mut V, node: &'ast TypeParam) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_ident(&node.ident); + skip!(node.colon_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_type_param_bound(it); + } + skip!(node.eq_token); + if let Some(it) = &node.default { + v.visit_type(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param_bound<'ast, V>(v: &mut V, node: &'ast TypeParamBound) +where + V: Visit<'ast> + ?Sized, +{ + match node { + TypeParamBound::Trait(_binding_0) => { + v.visit_trait_bound(_binding_0); + } + TypeParamBound::Lifetime(_binding_0) => { + v.visit_lifetime(_binding_0); + } + TypeParamBound::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_paren<'ast, V>(v: &mut V, node: &'ast TypeParen) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.paren_token); + v.visit_type(&*node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_path<'ast, V>(v: &mut V, node: &'ast TypePath) +where + V: Visit<'ast> + ?Sized, +{ + if let Some(it) = &node.qself { + v.visit_qself(it); + } + v.visit_path(&node.path); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_ptr<'ast, V>(v: &mut V, node: &'ast TypePtr) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.star_token); + skip!(node.const_token); + skip!(node.mutability); + v.visit_type(&*node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_reference<'ast, V>(v: &mut V, node: &'ast TypeReference) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.and_token); + if let Some(it) = &node.lifetime { + v.visit_lifetime(it); + } + skip!(node.mutability); + v.visit_type(&*node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_slice<'ast, V>(v: &mut V, node: &'ast TypeSlice) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.bracket_token); + v.visit_type(&*node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_trait_object<'ast, V>(v: &mut V, node: &'ast TypeTraitObject) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.dyn_token); + for el in Punctuated::pairs(&node.bounds) { + let it = el.value(); + v.visit_type_param_bound(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_tuple<'ast, V>(v: &mut V, node: &'ast TypeTuple) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.paren_token); + for el in Punctuated::pairs(&node.elems) { + let it = el.value(); + v.visit_type(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_un_op<'ast, V>(v: &mut V, node: &'ast UnOp) +where + V: Visit<'ast> + ?Sized, +{ + match node { + UnOp::Deref(_binding_0) => { + skip!(_binding_0); + } + UnOp::Not(_binding_0) => { + skip!(_binding_0); + } + UnOp::Neg(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_glob<'ast, V>(v: &mut V, node: &'ast UseGlob) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.star_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_group<'ast, V>(v: &mut V, node: &'ast UseGroup) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.brace_token); + for el in Punctuated::pairs(&node.items) { + let it = el.value(); + v.visit_use_tree(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_name<'ast, V>(v: &mut V, node: &'ast UseName) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_ident(&node.ident); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_path<'ast, V>(v: &mut V, node: &'ast UsePath) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_ident(&node.ident); + skip!(node.colon2_token); + v.visit_use_tree(&*node.tree); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_rename<'ast, V>(v: &mut V, node: &'ast UseRename) +where + V: Visit<'ast> + ?Sized, +{ + v.visit_ident(&node.ident); + skip!(node.as_token); + v.visit_ident(&node.rename); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_tree<'ast, V>(v: &mut V, node: &'ast UseTree) +where + V: Visit<'ast> + ?Sized, +{ + match node { + UseTree::Path(_binding_0) => { + v.visit_use_path(_binding_0); + } + UseTree::Name(_binding_0) => { + v.visit_use_name(_binding_0); + } + UseTree::Rename(_binding_0) => { + v.visit_use_rename(_binding_0); + } + UseTree::Glob(_binding_0) => { + v.visit_use_glob(_binding_0); + } + UseTree::Group(_binding_0) => { + v.visit_use_group(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_variadic<'ast, V>(v: &mut V, node: &'ast Variadic) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + if let Some(it) = &node.pat { + v.visit_pat(&*(it).0); + skip!((it).1); + } + skip!(node.dots); + skip!(node.comma); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_variant<'ast, V>(v: &mut V, node: &'ast Variant) +where + V: Visit<'ast> + ?Sized, +{ + for it in &node.attrs { + v.visit_attribute(it); + } + v.visit_ident(&node.ident); + v.visit_fields(&node.fields); + if let Some(it) = &node.discriminant { + skip!((it).0); + v.visit_expr(&(it).1); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_vis_restricted<'ast, V>(v: &mut V, node: &'ast VisRestricted) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.pub_token); + skip!(node.paren_token); + skip!(node.in_token); + v.visit_path(&*node.path); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_visibility<'ast, V>(v: &mut V, node: &'ast Visibility) +where + V: Visit<'ast> + ?Sized, +{ + match node { + Visibility::Public(_binding_0) => { + skip!(_binding_0); + } + Visibility::Restricted(_binding_0) => { + v.visit_vis_restricted(_binding_0); + } + Visibility::Inherited => {} + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_clause<'ast, V>(v: &mut V, node: &'ast WhereClause) +where + V: Visit<'ast> + ?Sized, +{ + skip!(node.where_token); + for el in Punctuated::pairs(&node.predicates) { + let it = el.value(); + v.visit_where_predicate(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_predicate<'ast, V>(v: &mut V, node: &'ast WherePredicate) +where + V: Visit<'ast> + ?Sized, +{ + match node { + WherePredicate::Lifetime(_binding_0) => { + v.visit_predicate_lifetime(_binding_0); + } + WherePredicate::Type(_binding_0) => { + v.visit_predicate_type(_binding_0); + } + } +} diff --git a/vendor/syn/src/gen/visit_mut.rs b/vendor/syn/src/gen/visit_mut.rs new file mode 100644 index 0000000..3e81aae --- /dev/null +++ b/vendor/syn/src/gen/visit_mut.rs @@ -0,0 +1,3847 @@ +// This file is @generated by syn-internal-codegen. +// It is not intended for manual editing. + +#![allow(unused_variables)] +#![allow(clippy::needless_pass_by_ref_mut)] +#[cfg(any(feature = "full", feature = "derive"))] +use crate::punctuated::Punctuated; +use crate::*; +use proc_macro2::Span; +#[cfg(feature = "full")] +macro_rules! full { + ($e:expr) => { + $e + }; +} +#[cfg(all(feature = "derive", not(feature = "full")))] +macro_rules! full { + ($e:expr) => { + unreachable!() + }; +} +macro_rules! skip { + ($($tt:tt)*) => {}; +} +/// Syntax tree traversal to mutate an exclusive borrow of a syntax tree in +/// place. +/// +/// See the [module documentation] for details. +/// +/// [module documentation]: self +pub trait VisitMut { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_abi_mut(&mut self, i: &mut Abi) { + visit_abi_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_angle_bracketed_generic_arguments_mut( + &mut self, + i: &mut AngleBracketedGenericArguments, + ) { + visit_angle_bracketed_generic_arguments_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_arm_mut(&mut self, i: &mut Arm) { + visit_arm_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_const_mut(&mut self, i: &mut AssocConst) { + visit_assoc_const_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_type_mut(&mut self, i: &mut AssocType) { + visit_assoc_type_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attr_style_mut(&mut self, i: &mut AttrStyle) { + visit_attr_style_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attribute_mut(&mut self, i: &mut Attribute) { + visit_attribute_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_fn_arg_mut(&mut self, i: &mut BareFnArg) { + visit_bare_fn_arg_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_variadic_mut(&mut self, i: &mut BareVariadic) { + visit_bare_variadic_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bin_op_mut(&mut self, i: &mut BinOp) { + visit_bin_op_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_block_mut(&mut self, i: &mut Block) { + visit_block_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bound_lifetimes_mut(&mut self, i: &mut BoundLifetimes) { + visit_bound_lifetimes_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_const_param_mut(&mut self, i: &mut ConstParam) { + visit_const_param_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_constraint_mut(&mut self, i: &mut Constraint) { + visit_constraint_mut(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_mut(&mut self, i: &mut Data) { + visit_data_mut(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_enum_mut(&mut self, i: &mut DataEnum) { + visit_data_enum_mut(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_struct_mut(&mut self, i: &mut DataStruct) { + visit_data_struct_mut(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_union_mut(&mut self, i: &mut DataUnion) { + visit_data_union_mut(self, i); + } + #[cfg(feature = "derive")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_derive_input_mut(&mut self, i: &mut DeriveInput) { + visit_derive_input_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_mut(&mut self, i: &mut Expr) { + visit_expr_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_array_mut(&mut self, i: &mut ExprArray) { + visit_expr_array_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_assign_mut(&mut self, i: &mut ExprAssign) { + visit_expr_assign_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_async_mut(&mut self, i: &mut ExprAsync) { + visit_expr_async_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_await_mut(&mut self, i: &mut ExprAwait) { + visit_expr_await_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_binary_mut(&mut self, i: &mut ExprBinary) { + visit_expr_binary_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_block_mut(&mut self, i: &mut ExprBlock) { + visit_expr_block_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_break_mut(&mut self, i: &mut ExprBreak) { + visit_expr_break_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_call_mut(&mut self, i: &mut ExprCall) { + visit_expr_call_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_cast_mut(&mut self, i: &mut ExprCast) { + visit_expr_cast_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_closure_mut(&mut self, i: &mut ExprClosure) { + visit_expr_closure_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_const_mut(&mut self, i: &mut ExprConst) { + visit_expr_const_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_continue_mut(&mut self, i: &mut ExprContinue) { + visit_expr_continue_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_field_mut(&mut self, i: &mut ExprField) { + visit_expr_field_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_for_loop_mut(&mut self, i: &mut ExprForLoop) { + visit_expr_for_loop_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_group_mut(&mut self, i: &mut ExprGroup) { + visit_expr_group_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_if_mut(&mut self, i: &mut ExprIf) { + visit_expr_if_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_index_mut(&mut self, i: &mut ExprIndex) { + visit_expr_index_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_infer_mut(&mut self, i: &mut ExprInfer) { + visit_expr_infer_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_let_mut(&mut self, i: &mut ExprLet) { + visit_expr_let_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_lit_mut(&mut self, i: &mut ExprLit) { + visit_expr_lit_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_loop_mut(&mut self, i: &mut ExprLoop) { + visit_expr_loop_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_macro_mut(&mut self, i: &mut ExprMacro) { + visit_expr_macro_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_match_mut(&mut self, i: &mut ExprMatch) { + visit_expr_match_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_method_call_mut(&mut self, i: &mut ExprMethodCall) { + visit_expr_method_call_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_paren_mut(&mut self, i: &mut ExprParen) { + visit_expr_paren_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_path_mut(&mut self, i: &mut ExprPath) { + visit_expr_path_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_range_mut(&mut self, i: &mut ExprRange) { + visit_expr_range_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_reference_mut(&mut self, i: &mut ExprReference) { + visit_expr_reference_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_repeat_mut(&mut self, i: &mut ExprRepeat) { + visit_expr_repeat_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_return_mut(&mut self, i: &mut ExprReturn) { + visit_expr_return_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_struct_mut(&mut self, i: &mut ExprStruct) { + visit_expr_struct_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try_mut(&mut self, i: &mut ExprTry) { + visit_expr_try_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try_block_mut(&mut self, i: &mut ExprTryBlock) { + visit_expr_try_block_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_tuple_mut(&mut self, i: &mut ExprTuple) { + visit_expr_tuple_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_unary_mut(&mut self, i: &mut ExprUnary) { + visit_expr_unary_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_unsafe_mut(&mut self, i: &mut ExprUnsafe) { + visit_expr_unsafe_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_while_mut(&mut self, i: &mut ExprWhile) { + visit_expr_while_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_yield_mut(&mut self, i: &mut ExprYield) { + visit_expr_yield_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_mut(&mut self, i: &mut Field) { + visit_field_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_mutability_mut(&mut self, i: &mut FieldMutability) { + visit_field_mutability_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_field_pat_mut(&mut self, i: &mut FieldPat) { + visit_field_pat_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_value_mut(&mut self, i: &mut FieldValue) { + visit_field_value_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_mut(&mut self, i: &mut Fields) { + visit_fields_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_named_mut(&mut self, i: &mut FieldsNamed) { + visit_fields_named_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_unnamed_mut(&mut self, i: &mut FieldsUnnamed) { + visit_fields_unnamed_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_file_mut(&mut self, i: &mut File) { + visit_file_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_fn_arg_mut(&mut self, i: &mut FnArg) { + visit_fn_arg_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_mut(&mut self, i: &mut ForeignItem) { + visit_foreign_item_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_fn_mut(&mut self, i: &mut ForeignItemFn) { + visit_foreign_item_fn_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_macro_mut(&mut self, i: &mut ForeignItemMacro) { + visit_foreign_item_macro_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_static_mut(&mut self, i: &mut ForeignItemStatic) { + visit_foreign_item_static_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_type_mut(&mut self, i: &mut ForeignItemType) { + visit_foreign_item_type_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_argument_mut(&mut self, i: &mut GenericArgument) { + visit_generic_argument_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_param_mut(&mut self, i: &mut GenericParam) { + visit_generic_param_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generics_mut(&mut self, i: &mut Generics) { + visit_generics_mut(self, i); + } + fn visit_ident_mut(&mut self, i: &mut Ident) { + visit_ident_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_mut(&mut self, i: &mut ImplItem) { + visit_impl_item_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_const_mut(&mut self, i: &mut ImplItemConst) { + visit_impl_item_const_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_fn_mut(&mut self, i: &mut ImplItemFn) { + visit_impl_item_fn_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_macro_mut(&mut self, i: &mut ImplItemMacro) { + visit_impl_item_macro_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_type_mut(&mut self, i: &mut ImplItemType) { + visit_impl_item_type_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_restriction_mut(&mut self, i: &mut ImplRestriction) { + visit_impl_restriction_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_index_mut(&mut self, i: &mut Index) { + visit_index_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_mut(&mut self, i: &mut Item) { + visit_item_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_const_mut(&mut self, i: &mut ItemConst) { + visit_item_const_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_enum_mut(&mut self, i: &mut ItemEnum) { + visit_item_enum_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_extern_crate_mut(&mut self, i: &mut ItemExternCrate) { + visit_item_extern_crate_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_fn_mut(&mut self, i: &mut ItemFn) { + visit_item_fn_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_foreign_mod_mut(&mut self, i: &mut ItemForeignMod) { + visit_item_foreign_mod_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_impl_mut(&mut self, i: &mut ItemImpl) { + visit_item_impl_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_macro_mut(&mut self, i: &mut ItemMacro) { + visit_item_macro_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_mod_mut(&mut self, i: &mut ItemMod) { + visit_item_mod_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_static_mut(&mut self, i: &mut ItemStatic) { + visit_item_static_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_struct_mut(&mut self, i: &mut ItemStruct) { + visit_item_struct_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait_mut(&mut self, i: &mut ItemTrait) { + visit_item_trait_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait_alias_mut(&mut self, i: &mut ItemTraitAlias) { + visit_item_trait_alias_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_type_mut(&mut self, i: &mut ItemType) { + visit_item_type_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_union_mut(&mut self, i: &mut ItemUnion) { + visit_item_union_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_use_mut(&mut self, i: &mut ItemUse) { + visit_item_use_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_label_mut(&mut self, i: &mut Label) { + visit_label_mut(self, i); + } + fn visit_lifetime_mut(&mut self, i: &mut Lifetime) { + visit_lifetime_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_lifetime_param_mut(&mut self, i: &mut LifetimeParam) { + visit_lifetime_param_mut(self, i); + } + fn visit_lit_mut(&mut self, i: &mut Lit) { + visit_lit_mut(self, i); + } + fn visit_lit_bool_mut(&mut self, i: &mut LitBool) { + visit_lit_bool_mut(self, i); + } + fn visit_lit_byte_mut(&mut self, i: &mut LitByte) { + visit_lit_byte_mut(self, i); + } + fn visit_lit_byte_str_mut(&mut self, i: &mut LitByteStr) { + visit_lit_byte_str_mut(self, i); + } + fn visit_lit_char_mut(&mut self, i: &mut LitChar) { + visit_lit_char_mut(self, i); + } + fn visit_lit_float_mut(&mut self, i: &mut LitFloat) { + visit_lit_float_mut(self, i); + } + fn visit_lit_int_mut(&mut self, i: &mut LitInt) { + visit_lit_int_mut(self, i); + } + fn visit_lit_str_mut(&mut self, i: &mut LitStr) { + visit_lit_str_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local_mut(&mut self, i: &mut Local) { + visit_local_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local_init_mut(&mut self, i: &mut LocalInit) { + visit_local_init_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro_mut(&mut self, i: &mut Macro) { + visit_macro_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro_delimiter_mut(&mut self, i: &mut MacroDelimiter) { + visit_macro_delimiter_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_member_mut(&mut self, i: &mut Member) { + visit_member_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_mut(&mut self, i: &mut Meta) { + visit_meta_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_list_mut(&mut self, i: &mut MetaList) { + visit_meta_list_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_name_value_mut(&mut self, i: &mut MetaNameValue) { + visit_meta_name_value_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_parenthesized_generic_arguments_mut( + &mut self, + i: &mut ParenthesizedGenericArguments, + ) { + visit_parenthesized_generic_arguments_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_mut(&mut self, i: &mut Pat) { + visit_pat_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_ident_mut(&mut self, i: &mut PatIdent) { + visit_pat_ident_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_or_mut(&mut self, i: &mut PatOr) { + visit_pat_or_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_paren_mut(&mut self, i: &mut PatParen) { + visit_pat_paren_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_reference_mut(&mut self, i: &mut PatReference) { + visit_pat_reference_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_rest_mut(&mut self, i: &mut PatRest) { + visit_pat_rest_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_slice_mut(&mut self, i: &mut PatSlice) { + visit_pat_slice_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_struct_mut(&mut self, i: &mut PatStruct) { + visit_pat_struct_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple_mut(&mut self, i: &mut PatTuple) { + visit_pat_tuple_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple_struct_mut(&mut self, i: &mut PatTupleStruct) { + visit_pat_tuple_struct_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_type_mut(&mut self, i: &mut PatType) { + visit_pat_type_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_wild_mut(&mut self, i: &mut PatWild) { + visit_pat_wild_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_mut(&mut self, i: &mut Path) { + visit_path_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_arguments_mut(&mut self, i: &mut PathArguments) { + visit_path_arguments_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_segment_mut(&mut self, i: &mut PathSegment) { + visit_path_segment_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_lifetime_mut(&mut self, i: &mut PredicateLifetime) { + visit_predicate_lifetime_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_type_mut(&mut self, i: &mut PredicateType) { + visit_predicate_type_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_qself_mut(&mut self, i: &mut QSelf) { + visit_qself_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_range_limits_mut(&mut self, i: &mut RangeLimits) { + visit_range_limits_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_receiver_mut(&mut self, i: &mut Receiver) { + visit_receiver_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_return_type_mut(&mut self, i: &mut ReturnType) { + visit_return_type_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_signature_mut(&mut self, i: &mut Signature) { + visit_signature_mut(self, i); + } + fn visit_span_mut(&mut self, i: &mut Span) { + visit_span_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_static_mutability_mut(&mut self, i: &mut StaticMutability) { + visit_static_mutability_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt_mut(&mut self, i: &mut Stmt) { + visit_stmt_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt_macro_mut(&mut self, i: &mut StmtMacro) { + visit_stmt_macro_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound_mut(&mut self, i: &mut TraitBound) { + visit_trait_bound_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound_modifier_mut(&mut self, i: &mut TraitBoundModifier) { + visit_trait_bound_modifier_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_mut(&mut self, i: &mut TraitItem) { + visit_trait_item_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_const_mut(&mut self, i: &mut TraitItemConst) { + visit_trait_item_const_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_fn_mut(&mut self, i: &mut TraitItemFn) { + visit_trait_item_fn_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_macro_mut(&mut self, i: &mut TraitItemMacro) { + visit_trait_item_macro_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_type_mut(&mut self, i: &mut TraitItemType) { + visit_trait_item_type_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_mut(&mut self, i: &mut Type) { + visit_type_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_array_mut(&mut self, i: &mut TypeArray) { + visit_type_array_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_bare_fn_mut(&mut self, i: &mut TypeBareFn) { + visit_type_bare_fn_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_group_mut(&mut self, i: &mut TypeGroup) { + visit_type_group_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_impl_trait_mut(&mut self, i: &mut TypeImplTrait) { + visit_type_impl_trait_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_infer_mut(&mut self, i: &mut TypeInfer) { + visit_type_infer_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_macro_mut(&mut self, i: &mut TypeMacro) { + visit_type_macro_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_never_mut(&mut self, i: &mut TypeNever) { + visit_type_never_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param_mut(&mut self, i: &mut TypeParam) { + visit_type_param_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param_bound_mut(&mut self, i: &mut TypeParamBound) { + visit_type_param_bound_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_paren_mut(&mut self, i: &mut TypeParen) { + visit_type_paren_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_path_mut(&mut self, i: &mut TypePath) { + visit_type_path_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_ptr_mut(&mut self, i: &mut TypePtr) { + visit_type_ptr_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_reference_mut(&mut self, i: &mut TypeReference) { + visit_type_reference_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_slice_mut(&mut self, i: &mut TypeSlice) { + visit_type_slice_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_trait_object_mut(&mut self, i: &mut TypeTraitObject) { + visit_type_trait_object_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_tuple_mut(&mut self, i: &mut TypeTuple) { + visit_type_tuple_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_un_op_mut(&mut self, i: &mut UnOp) { + visit_un_op_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_glob_mut(&mut self, i: &mut UseGlob) { + visit_use_glob_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_group_mut(&mut self, i: &mut UseGroup) { + visit_use_group_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_name_mut(&mut self, i: &mut UseName) { + visit_use_name_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_path_mut(&mut self, i: &mut UsePath) { + visit_use_path_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_rename_mut(&mut self, i: &mut UseRename) { + visit_use_rename_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_tree_mut(&mut self, i: &mut UseTree) { + visit_use_tree_mut(self, i); + } + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_variadic_mut(&mut self, i: &mut Variadic) { + visit_variadic_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_variant_mut(&mut self, i: &mut Variant) { + visit_variant_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_vis_restricted_mut(&mut self, i: &mut VisRestricted) { + visit_vis_restricted_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_visibility_mut(&mut self, i: &mut Visibility) { + visit_visibility_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_clause_mut(&mut self, i: &mut WhereClause) { + visit_where_clause_mut(self, i); + } + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_predicate_mut(&mut self, i: &mut WherePredicate) { + visit_where_predicate_mut(self, i); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_abi_mut<V>(v: &mut V, node: &mut Abi) +where + V: VisitMut + ?Sized, +{ + skip!(node.extern_token); + if let Some(it) = &mut node.name { + v.visit_lit_str_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_angle_bracketed_generic_arguments_mut<V>( + v: &mut V, + node: &mut AngleBracketedGenericArguments, +) +where + V: VisitMut + ?Sized, +{ + skip!(node.colon2_token); + skip!(node.lt_token); + for mut el in Punctuated::pairs_mut(&mut node.args) { + let it = el.value_mut(); + v.visit_generic_argument_mut(it); + } + skip!(node.gt_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_arm_mut<V>(v: &mut V, node: &mut Arm) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_pat_mut(&mut node.pat); + if let Some(it) = &mut node.guard { + skip!((it).0); + v.visit_expr_mut(&mut *(it).1); + } + skip!(node.fat_arrow_token); + v.visit_expr_mut(&mut *node.body); + skip!(node.comma); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_const_mut<V>(v: &mut V, node: &mut AssocConst) +where + V: VisitMut + ?Sized, +{ + v.visit_ident_mut(&mut node.ident); + if let Some(it) = &mut node.generics { + v.visit_angle_bracketed_generic_arguments_mut(it); + } + skip!(node.eq_token); + v.visit_expr_mut(&mut node.value); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_type_mut<V>(v: &mut V, node: &mut AssocType) +where + V: VisitMut + ?Sized, +{ + v.visit_ident_mut(&mut node.ident); + if let Some(it) = &mut node.generics { + v.visit_angle_bracketed_generic_arguments_mut(it); + } + skip!(node.eq_token); + v.visit_type_mut(&mut node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attr_style_mut<V>(v: &mut V, node: &mut AttrStyle) +where + V: VisitMut + ?Sized, +{ + match node { + AttrStyle::Outer => {} + AttrStyle::Inner(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attribute_mut<V>(v: &mut V, node: &mut Attribute) +where + V: VisitMut + ?Sized, +{ + skip!(node.pound_token); + v.visit_attr_style_mut(&mut node.style); + skip!(node.bracket_token); + v.visit_meta_mut(&mut node.meta); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_fn_arg_mut<V>(v: &mut V, node: &mut BareFnArg) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.name { + v.visit_ident_mut(&mut (it).0); + skip!((it).1); + } + v.visit_type_mut(&mut node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_variadic_mut<V>(v: &mut V, node: &mut BareVariadic) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.name { + v.visit_ident_mut(&mut (it).0); + skip!((it).1); + } + skip!(node.dots); + skip!(node.comma); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bin_op_mut<V>(v: &mut V, node: &mut BinOp) +where + V: VisitMut + ?Sized, +{ + match node { + BinOp::Add(_binding_0) => { + skip!(_binding_0); + } + BinOp::Sub(_binding_0) => { + skip!(_binding_0); + } + BinOp::Mul(_binding_0) => { + skip!(_binding_0); + } + BinOp::Div(_binding_0) => { + skip!(_binding_0); + } + BinOp::Rem(_binding_0) => { + skip!(_binding_0); + } + BinOp::And(_binding_0) => { + skip!(_binding_0); + } + BinOp::Or(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitXor(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitAnd(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitOr(_binding_0) => { + skip!(_binding_0); + } + BinOp::Shl(_binding_0) => { + skip!(_binding_0); + } + BinOp::Shr(_binding_0) => { + skip!(_binding_0); + } + BinOp::Eq(_binding_0) => { + skip!(_binding_0); + } + BinOp::Lt(_binding_0) => { + skip!(_binding_0); + } + BinOp::Le(_binding_0) => { + skip!(_binding_0); + } + BinOp::Ne(_binding_0) => { + skip!(_binding_0); + } + BinOp::Ge(_binding_0) => { + skip!(_binding_0); + } + BinOp::Gt(_binding_0) => { + skip!(_binding_0); + } + BinOp::AddAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::SubAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::MulAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::DivAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::RemAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitXorAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitAndAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::BitOrAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::ShlAssign(_binding_0) => { + skip!(_binding_0); + } + BinOp::ShrAssign(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_block_mut<V>(v: &mut V, node: &mut Block) +where + V: VisitMut + ?Sized, +{ + skip!(node.brace_token); + for it in &mut node.stmts { + v.visit_stmt_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bound_lifetimes_mut<V>(v: &mut V, node: &mut BoundLifetimes) +where + V: VisitMut + ?Sized, +{ + skip!(node.for_token); + skip!(node.lt_token); + for mut el in Punctuated::pairs_mut(&mut node.lifetimes) { + let it = el.value_mut(); + v.visit_generic_param_mut(it); + } + skip!(node.gt_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_const_param_mut<V>(v: &mut V, node: &mut ConstParam) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.const_token); + v.visit_ident_mut(&mut node.ident); + skip!(node.colon_token); + v.visit_type_mut(&mut node.ty); + skip!(node.eq_token); + if let Some(it) = &mut node.default { + v.visit_expr_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_constraint_mut<V>(v: &mut V, node: &mut Constraint) +where + V: VisitMut + ?Sized, +{ + v.visit_ident_mut(&mut node.ident); + if let Some(it) = &mut node.generics { + v.visit_angle_bracketed_generic_arguments_mut(it); + } + skip!(node.colon_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_mut<V>(v: &mut V, node: &mut Data) +where + V: VisitMut + ?Sized, +{ + match node { + Data::Struct(_binding_0) => { + v.visit_data_struct_mut(_binding_0); + } + Data::Enum(_binding_0) => { + v.visit_data_enum_mut(_binding_0); + } + Data::Union(_binding_0) => { + v.visit_data_union_mut(_binding_0); + } + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_enum_mut<V>(v: &mut V, node: &mut DataEnum) +where + V: VisitMut + ?Sized, +{ + skip!(node.enum_token); + skip!(node.brace_token); + for mut el in Punctuated::pairs_mut(&mut node.variants) { + let it = el.value_mut(); + v.visit_variant_mut(it); + } +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_struct_mut<V>(v: &mut V, node: &mut DataStruct) +where + V: VisitMut + ?Sized, +{ + skip!(node.struct_token); + v.visit_fields_mut(&mut node.fields); + skip!(node.semi_token); +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_union_mut<V>(v: &mut V, node: &mut DataUnion) +where + V: VisitMut + ?Sized, +{ + skip!(node.union_token); + v.visit_fields_named_mut(&mut node.fields); +} +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_derive_input_mut<V>(v: &mut V, node: &mut DeriveInput) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + v.visit_data_mut(&mut node.data); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_mut<V>(v: &mut V, node: &mut Expr) +where + V: VisitMut + ?Sized, +{ + match node { + Expr::Array(_binding_0) => { + full!(v.visit_expr_array_mut(_binding_0)); + } + Expr::Assign(_binding_0) => { + full!(v.visit_expr_assign_mut(_binding_0)); + } + Expr::Async(_binding_0) => { + full!(v.visit_expr_async_mut(_binding_0)); + } + Expr::Await(_binding_0) => { + full!(v.visit_expr_await_mut(_binding_0)); + } + Expr::Binary(_binding_0) => { + v.visit_expr_binary_mut(_binding_0); + } + Expr::Block(_binding_0) => { + full!(v.visit_expr_block_mut(_binding_0)); + } + Expr::Break(_binding_0) => { + full!(v.visit_expr_break_mut(_binding_0)); + } + Expr::Call(_binding_0) => { + v.visit_expr_call_mut(_binding_0); + } + Expr::Cast(_binding_0) => { + v.visit_expr_cast_mut(_binding_0); + } + Expr::Closure(_binding_0) => { + full!(v.visit_expr_closure_mut(_binding_0)); + } + Expr::Const(_binding_0) => { + full!(v.visit_expr_const_mut(_binding_0)); + } + Expr::Continue(_binding_0) => { + full!(v.visit_expr_continue_mut(_binding_0)); + } + Expr::Field(_binding_0) => { + v.visit_expr_field_mut(_binding_0); + } + Expr::ForLoop(_binding_0) => { + full!(v.visit_expr_for_loop_mut(_binding_0)); + } + Expr::Group(_binding_0) => { + v.visit_expr_group_mut(_binding_0); + } + Expr::If(_binding_0) => { + full!(v.visit_expr_if_mut(_binding_0)); + } + Expr::Index(_binding_0) => { + v.visit_expr_index_mut(_binding_0); + } + Expr::Infer(_binding_0) => { + full!(v.visit_expr_infer_mut(_binding_0)); + } + Expr::Let(_binding_0) => { + full!(v.visit_expr_let_mut(_binding_0)); + } + Expr::Lit(_binding_0) => { + v.visit_expr_lit_mut(_binding_0); + } + Expr::Loop(_binding_0) => { + full!(v.visit_expr_loop_mut(_binding_0)); + } + Expr::Macro(_binding_0) => { + v.visit_expr_macro_mut(_binding_0); + } + Expr::Match(_binding_0) => { + full!(v.visit_expr_match_mut(_binding_0)); + } + Expr::MethodCall(_binding_0) => { + v.visit_expr_method_call_mut(_binding_0); + } + Expr::Paren(_binding_0) => { + v.visit_expr_paren_mut(_binding_0); + } + Expr::Path(_binding_0) => { + v.visit_expr_path_mut(_binding_0); + } + Expr::Range(_binding_0) => { + full!(v.visit_expr_range_mut(_binding_0)); + } + Expr::Reference(_binding_0) => { + v.visit_expr_reference_mut(_binding_0); + } + Expr::Repeat(_binding_0) => { + full!(v.visit_expr_repeat_mut(_binding_0)); + } + Expr::Return(_binding_0) => { + full!(v.visit_expr_return_mut(_binding_0)); + } + Expr::Struct(_binding_0) => { + v.visit_expr_struct_mut(_binding_0); + } + Expr::Try(_binding_0) => { + full!(v.visit_expr_try_mut(_binding_0)); + } + Expr::TryBlock(_binding_0) => { + full!(v.visit_expr_try_block_mut(_binding_0)); + } + Expr::Tuple(_binding_0) => { + full!(v.visit_expr_tuple_mut(_binding_0)); + } + Expr::Unary(_binding_0) => { + v.visit_expr_unary_mut(_binding_0); + } + Expr::Unsafe(_binding_0) => { + full!(v.visit_expr_unsafe_mut(_binding_0)); + } + Expr::Verbatim(_binding_0) => { + skip!(_binding_0); + } + Expr::While(_binding_0) => { + full!(v.visit_expr_while_mut(_binding_0)); + } + Expr::Yield(_binding_0) => { + full!(v.visit_expr_yield_mut(_binding_0)); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_array_mut<V>(v: &mut V, node: &mut ExprArray) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.bracket_token); + for mut el in Punctuated::pairs_mut(&mut node.elems) { + let it = el.value_mut(); + v.visit_expr_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_assign_mut<V>(v: &mut V, node: &mut ExprAssign) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.left); + skip!(node.eq_token); + v.visit_expr_mut(&mut *node.right); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_async_mut<V>(v: &mut V, node: &mut ExprAsync) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.async_token); + skip!(node.capture); + v.visit_block_mut(&mut node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_await_mut<V>(v: &mut V, node: &mut ExprAwait) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.base); + skip!(node.dot_token); + skip!(node.await_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_binary_mut<V>(v: &mut V, node: &mut ExprBinary) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.left); + v.visit_bin_op_mut(&mut node.op); + v.visit_expr_mut(&mut *node.right); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_block_mut<V>(v: &mut V, node: &mut ExprBlock) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.label { + v.visit_label_mut(it); + } + v.visit_block_mut(&mut node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_break_mut<V>(v: &mut V, node: &mut ExprBreak) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.break_token); + if let Some(it) = &mut node.label { + v.visit_lifetime_mut(it); + } + if let Some(it) = &mut node.expr { + v.visit_expr_mut(&mut **it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_call_mut<V>(v: &mut V, node: &mut ExprCall) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.func); + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.args) { + let it = el.value_mut(); + v.visit_expr_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_cast_mut<V>(v: &mut V, node: &mut ExprCast) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.expr); + skip!(node.as_token); + v.visit_type_mut(&mut *node.ty); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_closure_mut<V>(v: &mut V, node: &mut ExprClosure) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.lifetimes { + v.visit_bound_lifetimes_mut(it); + } + skip!(node.constness); + skip!(node.movability); + skip!(node.asyncness); + skip!(node.capture); + skip!(node.or1_token); + for mut el in Punctuated::pairs_mut(&mut node.inputs) { + let it = el.value_mut(); + v.visit_pat_mut(it); + } + skip!(node.or2_token); + v.visit_return_type_mut(&mut node.output); + v.visit_expr_mut(&mut *node.body); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_const_mut<V>(v: &mut V, node: &mut ExprConst) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.const_token); + v.visit_block_mut(&mut node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_continue_mut<V>(v: &mut V, node: &mut ExprContinue) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.continue_token); + if let Some(it) = &mut node.label { + v.visit_lifetime_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_field_mut<V>(v: &mut V, node: &mut ExprField) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.base); + skip!(node.dot_token); + v.visit_member_mut(&mut node.member); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_for_loop_mut<V>(v: &mut V, node: &mut ExprForLoop) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.label { + v.visit_label_mut(it); + } + skip!(node.for_token); + v.visit_pat_mut(&mut *node.pat); + skip!(node.in_token); + v.visit_expr_mut(&mut *node.expr); + v.visit_block_mut(&mut node.body); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_group_mut<V>(v: &mut V, node: &mut ExprGroup) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.group_token); + v.visit_expr_mut(&mut *node.expr); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_if_mut<V>(v: &mut V, node: &mut ExprIf) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.if_token); + v.visit_expr_mut(&mut *node.cond); + v.visit_block_mut(&mut node.then_branch); + if let Some(it) = &mut node.else_branch { + skip!((it).0); + v.visit_expr_mut(&mut *(it).1); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_index_mut<V>(v: &mut V, node: &mut ExprIndex) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.expr); + skip!(node.bracket_token); + v.visit_expr_mut(&mut *node.index); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_infer_mut<V>(v: &mut V, node: &mut ExprInfer) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.underscore_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_let_mut<V>(v: &mut V, node: &mut ExprLet) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.let_token); + v.visit_pat_mut(&mut *node.pat); + skip!(node.eq_token); + v.visit_expr_mut(&mut *node.expr); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_lit_mut<V>(v: &mut V, node: &mut ExprLit) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_lit_mut(&mut node.lit); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_loop_mut<V>(v: &mut V, node: &mut ExprLoop) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.label { + v.visit_label_mut(it); + } + skip!(node.loop_token); + v.visit_block_mut(&mut node.body); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_macro_mut<V>(v: &mut V, node: &mut ExprMacro) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_macro_mut(&mut node.mac); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_match_mut<V>(v: &mut V, node: &mut ExprMatch) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.match_token); + v.visit_expr_mut(&mut *node.expr); + skip!(node.brace_token); + for it in &mut node.arms { + v.visit_arm_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_method_call_mut<V>(v: &mut V, node: &mut ExprMethodCall) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.receiver); + skip!(node.dot_token); + v.visit_ident_mut(&mut node.method); + if let Some(it) = &mut node.turbofish { + v.visit_angle_bracketed_generic_arguments_mut(it); + } + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.args) { + let it = el.value_mut(); + v.visit_expr_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_paren_mut<V>(v: &mut V, node: &mut ExprParen) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.paren_token); + v.visit_expr_mut(&mut *node.expr); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_path_mut<V>(v: &mut V, node: &mut ExprPath) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.qself { + v.visit_qself_mut(it); + } + v.visit_path_mut(&mut node.path); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_range_mut<V>(v: &mut V, node: &mut ExprRange) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.start { + v.visit_expr_mut(&mut **it); + } + v.visit_range_limits_mut(&mut node.limits); + if let Some(it) = &mut node.end { + v.visit_expr_mut(&mut **it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_reference_mut<V>(v: &mut V, node: &mut ExprReference) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.and_token); + skip!(node.mutability); + v.visit_expr_mut(&mut *node.expr); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_repeat_mut<V>(v: &mut V, node: &mut ExprRepeat) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.bracket_token); + v.visit_expr_mut(&mut *node.expr); + skip!(node.semi_token); + v.visit_expr_mut(&mut *node.len); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_return_mut<V>(v: &mut V, node: &mut ExprReturn) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.return_token); + if let Some(it) = &mut node.expr { + v.visit_expr_mut(&mut **it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_struct_mut<V>(v: &mut V, node: &mut ExprStruct) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.qself { + v.visit_qself_mut(it); + } + v.visit_path_mut(&mut node.path); + skip!(node.brace_token); + for mut el in Punctuated::pairs_mut(&mut node.fields) { + let it = el.value_mut(); + v.visit_field_value_mut(it); + } + skip!(node.dot2_token); + if let Some(it) = &mut node.rest { + v.visit_expr_mut(&mut **it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try_mut<V>(v: &mut V, node: &mut ExprTry) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_expr_mut(&mut *node.expr); + skip!(node.question_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try_block_mut<V>(v: &mut V, node: &mut ExprTryBlock) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.try_token); + v.visit_block_mut(&mut node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_tuple_mut<V>(v: &mut V, node: &mut ExprTuple) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.elems) { + let it = el.value_mut(); + v.visit_expr_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_unary_mut<V>(v: &mut V, node: &mut ExprUnary) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_un_op_mut(&mut node.op); + v.visit_expr_mut(&mut *node.expr); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_unsafe_mut<V>(v: &mut V, node: &mut ExprUnsafe) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.unsafe_token); + v.visit_block_mut(&mut node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_while_mut<V>(v: &mut V, node: &mut ExprWhile) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.label { + v.visit_label_mut(it); + } + skip!(node.while_token); + v.visit_expr_mut(&mut *node.cond); + v.visit_block_mut(&mut node.body); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_yield_mut<V>(v: &mut V, node: &mut ExprYield) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.yield_token); + if let Some(it) = &mut node.expr { + v.visit_expr_mut(&mut **it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_mut<V>(v: &mut V, node: &mut Field) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + v.visit_field_mutability_mut(&mut node.mutability); + if let Some(it) = &mut node.ident { + v.visit_ident_mut(it); + } + skip!(node.colon_token); + v.visit_type_mut(&mut node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_mutability_mut<V>(v: &mut V, node: &mut FieldMutability) +where + V: VisitMut + ?Sized, +{ + match node { + FieldMutability::None => {} + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_field_pat_mut<V>(v: &mut V, node: &mut FieldPat) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_member_mut(&mut node.member); + skip!(node.colon_token); + v.visit_pat_mut(&mut *node.pat); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_value_mut<V>(v: &mut V, node: &mut FieldValue) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_member_mut(&mut node.member); + skip!(node.colon_token); + v.visit_expr_mut(&mut node.expr); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_mut<V>(v: &mut V, node: &mut Fields) +where + V: VisitMut + ?Sized, +{ + match node { + Fields::Named(_binding_0) => { + v.visit_fields_named_mut(_binding_0); + } + Fields::Unnamed(_binding_0) => { + v.visit_fields_unnamed_mut(_binding_0); + } + Fields::Unit => {} + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_named_mut<V>(v: &mut V, node: &mut FieldsNamed) +where + V: VisitMut + ?Sized, +{ + skip!(node.brace_token); + for mut el in Punctuated::pairs_mut(&mut node.named) { + let it = el.value_mut(); + v.visit_field_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_unnamed_mut<V>(v: &mut V, node: &mut FieldsUnnamed) +where + V: VisitMut + ?Sized, +{ + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.unnamed) { + let it = el.value_mut(); + v.visit_field_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_file_mut<V>(v: &mut V, node: &mut File) +where + V: VisitMut + ?Sized, +{ + skip!(node.shebang); + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + for it in &mut node.items { + v.visit_item_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_fn_arg_mut<V>(v: &mut V, node: &mut FnArg) +where + V: VisitMut + ?Sized, +{ + match node { + FnArg::Receiver(_binding_0) => { + v.visit_receiver_mut(_binding_0); + } + FnArg::Typed(_binding_0) => { + v.visit_pat_type_mut(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_mut<V>(v: &mut V, node: &mut ForeignItem) +where + V: VisitMut + ?Sized, +{ + match node { + ForeignItem::Fn(_binding_0) => { + v.visit_foreign_item_fn_mut(_binding_0); + } + ForeignItem::Static(_binding_0) => { + v.visit_foreign_item_static_mut(_binding_0); + } + ForeignItem::Type(_binding_0) => { + v.visit_foreign_item_type_mut(_binding_0); + } + ForeignItem::Macro(_binding_0) => { + v.visit_foreign_item_macro_mut(_binding_0); + } + ForeignItem::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_fn_mut<V>(v: &mut V, node: &mut ForeignItemFn) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + v.visit_signature_mut(&mut node.sig); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_macro_mut<V>(v: &mut V, node: &mut ForeignItemMacro) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_macro_mut(&mut node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_static_mut<V>(v: &mut V, node: &mut ForeignItemStatic) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.static_token); + v.visit_static_mutability_mut(&mut node.mutability); + v.visit_ident_mut(&mut node.ident); + skip!(node.colon_token); + v.visit_type_mut(&mut *node.ty); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_type_mut<V>(v: &mut V, node: &mut ForeignItemType) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.type_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.semi_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_argument_mut<V>(v: &mut V, node: &mut GenericArgument) +where + V: VisitMut + ?Sized, +{ + match node { + GenericArgument::Lifetime(_binding_0) => { + v.visit_lifetime_mut(_binding_0); + } + GenericArgument::Type(_binding_0) => { + v.visit_type_mut(_binding_0); + } + GenericArgument::Const(_binding_0) => { + v.visit_expr_mut(_binding_0); + } + GenericArgument::AssocType(_binding_0) => { + v.visit_assoc_type_mut(_binding_0); + } + GenericArgument::AssocConst(_binding_0) => { + v.visit_assoc_const_mut(_binding_0); + } + GenericArgument::Constraint(_binding_0) => { + v.visit_constraint_mut(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_param_mut<V>(v: &mut V, node: &mut GenericParam) +where + V: VisitMut + ?Sized, +{ + match node { + GenericParam::Lifetime(_binding_0) => { + v.visit_lifetime_param_mut(_binding_0); + } + GenericParam::Type(_binding_0) => { + v.visit_type_param_mut(_binding_0); + } + GenericParam::Const(_binding_0) => { + v.visit_const_param_mut(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generics_mut<V>(v: &mut V, node: &mut Generics) +where + V: VisitMut + ?Sized, +{ + skip!(node.lt_token); + for mut el in Punctuated::pairs_mut(&mut node.params) { + let it = el.value_mut(); + v.visit_generic_param_mut(it); + } + skip!(node.gt_token); + if let Some(it) = &mut node.where_clause { + v.visit_where_clause_mut(it); + } +} +pub fn visit_ident_mut<V>(v: &mut V, node: &mut Ident) +where + V: VisitMut + ?Sized, +{ + let mut span = node.span(); + v.visit_span_mut(&mut span); + node.set_span(span); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_mut<V>(v: &mut V, node: &mut ImplItem) +where + V: VisitMut + ?Sized, +{ + match node { + ImplItem::Const(_binding_0) => { + v.visit_impl_item_const_mut(_binding_0); + } + ImplItem::Fn(_binding_0) => { + v.visit_impl_item_fn_mut(_binding_0); + } + ImplItem::Type(_binding_0) => { + v.visit_impl_item_type_mut(_binding_0); + } + ImplItem::Macro(_binding_0) => { + v.visit_impl_item_macro_mut(_binding_0); + } + ImplItem::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_const_mut<V>(v: &mut V, node: &mut ImplItemConst) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.defaultness); + skip!(node.const_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.colon_token); + v.visit_type_mut(&mut node.ty); + skip!(node.eq_token); + v.visit_expr_mut(&mut node.expr); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_fn_mut<V>(v: &mut V, node: &mut ImplItemFn) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.defaultness); + v.visit_signature_mut(&mut node.sig); + v.visit_block_mut(&mut node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_macro_mut<V>(v: &mut V, node: &mut ImplItemMacro) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_macro_mut(&mut node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_type_mut<V>(v: &mut V, node: &mut ImplItemType) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.defaultness); + skip!(node.type_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.eq_token); + v.visit_type_mut(&mut node.ty); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_restriction_mut<V>(v: &mut V, node: &mut ImplRestriction) +where + V: VisitMut + ?Sized, +{ + match *node {} +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_index_mut<V>(v: &mut V, node: &mut Index) +where + V: VisitMut + ?Sized, +{ + skip!(node.index); + v.visit_span_mut(&mut node.span); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_mut<V>(v: &mut V, node: &mut Item) +where + V: VisitMut + ?Sized, +{ + match node { + Item::Const(_binding_0) => { + v.visit_item_const_mut(_binding_0); + } + Item::Enum(_binding_0) => { + v.visit_item_enum_mut(_binding_0); + } + Item::ExternCrate(_binding_0) => { + v.visit_item_extern_crate_mut(_binding_0); + } + Item::Fn(_binding_0) => { + v.visit_item_fn_mut(_binding_0); + } + Item::ForeignMod(_binding_0) => { + v.visit_item_foreign_mod_mut(_binding_0); + } + Item::Impl(_binding_0) => { + v.visit_item_impl_mut(_binding_0); + } + Item::Macro(_binding_0) => { + v.visit_item_macro_mut(_binding_0); + } + Item::Mod(_binding_0) => { + v.visit_item_mod_mut(_binding_0); + } + Item::Static(_binding_0) => { + v.visit_item_static_mut(_binding_0); + } + Item::Struct(_binding_0) => { + v.visit_item_struct_mut(_binding_0); + } + Item::Trait(_binding_0) => { + v.visit_item_trait_mut(_binding_0); + } + Item::TraitAlias(_binding_0) => { + v.visit_item_trait_alias_mut(_binding_0); + } + Item::Type(_binding_0) => { + v.visit_item_type_mut(_binding_0); + } + Item::Union(_binding_0) => { + v.visit_item_union_mut(_binding_0); + } + Item::Use(_binding_0) => { + v.visit_item_use_mut(_binding_0); + } + Item::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_const_mut<V>(v: &mut V, node: &mut ItemConst) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.const_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.colon_token); + v.visit_type_mut(&mut *node.ty); + skip!(node.eq_token); + v.visit_expr_mut(&mut *node.expr); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_enum_mut<V>(v: &mut V, node: &mut ItemEnum) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.enum_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.brace_token); + for mut el in Punctuated::pairs_mut(&mut node.variants) { + let it = el.value_mut(); + v.visit_variant_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_extern_crate_mut<V>(v: &mut V, node: &mut ItemExternCrate) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.extern_token); + skip!(node.crate_token); + v.visit_ident_mut(&mut node.ident); + if let Some(it) = &mut node.rename { + skip!((it).0); + v.visit_ident_mut(&mut (it).1); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_fn_mut<V>(v: &mut V, node: &mut ItemFn) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + v.visit_signature_mut(&mut node.sig); + v.visit_block_mut(&mut *node.block); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_foreign_mod_mut<V>(v: &mut V, node: &mut ItemForeignMod) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.unsafety); + v.visit_abi_mut(&mut node.abi); + skip!(node.brace_token); + for it in &mut node.items { + v.visit_foreign_item_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_impl_mut<V>(v: &mut V, node: &mut ItemImpl) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.defaultness); + skip!(node.unsafety); + skip!(node.impl_token); + v.visit_generics_mut(&mut node.generics); + if let Some(it) = &mut node.trait_ { + skip!((it).0); + v.visit_path_mut(&mut (it).1); + skip!((it).2); + } + v.visit_type_mut(&mut *node.self_ty); + skip!(node.brace_token); + for it in &mut node.items { + v.visit_impl_item_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_macro_mut<V>(v: &mut V, node: &mut ItemMacro) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.ident { + v.visit_ident_mut(it); + } + v.visit_macro_mut(&mut node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_mod_mut<V>(v: &mut V, node: &mut ItemMod) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.unsafety); + skip!(node.mod_token); + v.visit_ident_mut(&mut node.ident); + if let Some(it) = &mut node.content { + skip!((it).0); + for it in &mut (it).1 { + v.visit_item_mut(it); + } + } + skip!(node.semi); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_static_mut<V>(v: &mut V, node: &mut ItemStatic) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.static_token); + v.visit_static_mutability_mut(&mut node.mutability); + v.visit_ident_mut(&mut node.ident); + skip!(node.colon_token); + v.visit_type_mut(&mut *node.ty); + skip!(node.eq_token); + v.visit_expr_mut(&mut *node.expr); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_struct_mut<V>(v: &mut V, node: &mut ItemStruct) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.struct_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + v.visit_fields_mut(&mut node.fields); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait_mut<V>(v: &mut V, node: &mut ItemTrait) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.unsafety); + skip!(node.auto_token); + if let Some(it) = &mut node.restriction { + v.visit_impl_restriction_mut(it); + } + skip!(node.trait_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.colon_token); + for mut el in Punctuated::pairs_mut(&mut node.supertraits) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } + skip!(node.brace_token); + for it in &mut node.items { + v.visit_trait_item_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait_alias_mut<V>(v: &mut V, node: &mut ItemTraitAlias) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.trait_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.eq_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_type_mut<V>(v: &mut V, node: &mut ItemType) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.type_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.eq_token); + v.visit_type_mut(&mut *node.ty); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_union_mut<V>(v: &mut V, node: &mut ItemUnion) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.union_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + v.visit_fields_named_mut(&mut node.fields); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_use_mut<V>(v: &mut V, node: &mut ItemUse) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_visibility_mut(&mut node.vis); + skip!(node.use_token); + skip!(node.leading_colon); + v.visit_use_tree_mut(&mut node.tree); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_label_mut<V>(v: &mut V, node: &mut Label) +where + V: VisitMut + ?Sized, +{ + v.visit_lifetime_mut(&mut node.name); + skip!(node.colon_token); +} +pub fn visit_lifetime_mut<V>(v: &mut V, node: &mut Lifetime) +where + V: VisitMut + ?Sized, +{ + v.visit_span_mut(&mut node.apostrophe); + v.visit_ident_mut(&mut node.ident); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_lifetime_param_mut<V>(v: &mut V, node: &mut LifetimeParam) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_lifetime_mut(&mut node.lifetime); + skip!(node.colon_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_lifetime_mut(it); + } +} +pub fn visit_lit_mut<V>(v: &mut V, node: &mut Lit) +where + V: VisitMut + ?Sized, +{ + match node { + Lit::Str(_binding_0) => { + v.visit_lit_str_mut(_binding_0); + } + Lit::ByteStr(_binding_0) => { + v.visit_lit_byte_str_mut(_binding_0); + } + Lit::Byte(_binding_0) => { + v.visit_lit_byte_mut(_binding_0); + } + Lit::Char(_binding_0) => { + v.visit_lit_char_mut(_binding_0); + } + Lit::Int(_binding_0) => { + v.visit_lit_int_mut(_binding_0); + } + Lit::Float(_binding_0) => { + v.visit_lit_float_mut(_binding_0); + } + Lit::Bool(_binding_0) => { + v.visit_lit_bool_mut(_binding_0); + } + Lit::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +pub fn visit_lit_bool_mut<V>(v: &mut V, node: &mut LitBool) +where + V: VisitMut + ?Sized, +{ + skip!(node.value); + v.visit_span_mut(&mut node.span); +} +pub fn visit_lit_byte_mut<V>(v: &mut V, node: &mut LitByte) +where + V: VisitMut + ?Sized, +{} +pub fn visit_lit_byte_str_mut<V>(v: &mut V, node: &mut LitByteStr) +where + V: VisitMut + ?Sized, +{} +pub fn visit_lit_char_mut<V>(v: &mut V, node: &mut LitChar) +where + V: VisitMut + ?Sized, +{} +pub fn visit_lit_float_mut<V>(v: &mut V, node: &mut LitFloat) +where + V: VisitMut + ?Sized, +{} +pub fn visit_lit_int_mut<V>(v: &mut V, node: &mut LitInt) +where + V: VisitMut + ?Sized, +{} +pub fn visit_lit_str_mut<V>(v: &mut V, node: &mut LitStr) +where + V: VisitMut + ?Sized, +{} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local_mut<V>(v: &mut V, node: &mut Local) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.let_token); + v.visit_pat_mut(&mut node.pat); + if let Some(it) = &mut node.init { + v.visit_local_init_mut(it); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local_init_mut<V>(v: &mut V, node: &mut LocalInit) +where + V: VisitMut + ?Sized, +{ + skip!(node.eq_token); + v.visit_expr_mut(&mut *node.expr); + if let Some(it) = &mut node.diverge { + skip!((it).0); + v.visit_expr_mut(&mut *(it).1); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro_mut<V>(v: &mut V, node: &mut Macro) +where + V: VisitMut + ?Sized, +{ + v.visit_path_mut(&mut node.path); + skip!(node.bang_token); + v.visit_macro_delimiter_mut(&mut node.delimiter); + skip!(node.tokens); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro_delimiter_mut<V>(v: &mut V, node: &mut MacroDelimiter) +where + V: VisitMut + ?Sized, +{ + match node { + MacroDelimiter::Paren(_binding_0) => { + skip!(_binding_0); + } + MacroDelimiter::Brace(_binding_0) => { + skip!(_binding_0); + } + MacroDelimiter::Bracket(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_member_mut<V>(v: &mut V, node: &mut Member) +where + V: VisitMut + ?Sized, +{ + match node { + Member::Named(_binding_0) => { + v.visit_ident_mut(_binding_0); + } + Member::Unnamed(_binding_0) => { + v.visit_index_mut(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_mut<V>(v: &mut V, node: &mut Meta) +where + V: VisitMut + ?Sized, +{ + match node { + Meta::Path(_binding_0) => { + v.visit_path_mut(_binding_0); + } + Meta::List(_binding_0) => { + v.visit_meta_list_mut(_binding_0); + } + Meta::NameValue(_binding_0) => { + v.visit_meta_name_value_mut(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_list_mut<V>(v: &mut V, node: &mut MetaList) +where + V: VisitMut + ?Sized, +{ + v.visit_path_mut(&mut node.path); + v.visit_macro_delimiter_mut(&mut node.delimiter); + skip!(node.tokens); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_name_value_mut<V>(v: &mut V, node: &mut MetaNameValue) +where + V: VisitMut + ?Sized, +{ + v.visit_path_mut(&mut node.path); + skip!(node.eq_token); + v.visit_expr_mut(&mut node.value); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_parenthesized_generic_arguments_mut<V>( + v: &mut V, + node: &mut ParenthesizedGenericArguments, +) +where + V: VisitMut + ?Sized, +{ + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.inputs) { + let it = el.value_mut(); + v.visit_type_mut(it); + } + v.visit_return_type_mut(&mut node.output); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_mut<V>(v: &mut V, node: &mut Pat) +where + V: VisitMut + ?Sized, +{ + match node { + Pat::Const(_binding_0) => { + v.visit_expr_const_mut(_binding_0); + } + Pat::Ident(_binding_0) => { + v.visit_pat_ident_mut(_binding_0); + } + Pat::Lit(_binding_0) => { + v.visit_expr_lit_mut(_binding_0); + } + Pat::Macro(_binding_0) => { + v.visit_expr_macro_mut(_binding_0); + } + Pat::Or(_binding_0) => { + v.visit_pat_or_mut(_binding_0); + } + Pat::Paren(_binding_0) => { + v.visit_pat_paren_mut(_binding_0); + } + Pat::Path(_binding_0) => { + v.visit_expr_path_mut(_binding_0); + } + Pat::Range(_binding_0) => { + v.visit_expr_range_mut(_binding_0); + } + Pat::Reference(_binding_0) => { + v.visit_pat_reference_mut(_binding_0); + } + Pat::Rest(_binding_0) => { + v.visit_pat_rest_mut(_binding_0); + } + Pat::Slice(_binding_0) => { + v.visit_pat_slice_mut(_binding_0); + } + Pat::Struct(_binding_0) => { + v.visit_pat_struct_mut(_binding_0); + } + Pat::Tuple(_binding_0) => { + v.visit_pat_tuple_mut(_binding_0); + } + Pat::TupleStruct(_binding_0) => { + v.visit_pat_tuple_struct_mut(_binding_0); + } + Pat::Type(_binding_0) => { + v.visit_pat_type_mut(_binding_0); + } + Pat::Verbatim(_binding_0) => { + skip!(_binding_0); + } + Pat::Wild(_binding_0) => { + v.visit_pat_wild_mut(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_ident_mut<V>(v: &mut V, node: &mut PatIdent) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.by_ref); + skip!(node.mutability); + v.visit_ident_mut(&mut node.ident); + if let Some(it) = &mut node.subpat { + skip!((it).0); + v.visit_pat_mut(&mut *(it).1); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_or_mut<V>(v: &mut V, node: &mut PatOr) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.leading_vert); + for mut el in Punctuated::pairs_mut(&mut node.cases) { + let it = el.value_mut(); + v.visit_pat_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_paren_mut<V>(v: &mut V, node: &mut PatParen) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.paren_token); + v.visit_pat_mut(&mut *node.pat); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_reference_mut<V>(v: &mut V, node: &mut PatReference) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.and_token); + skip!(node.mutability); + v.visit_pat_mut(&mut *node.pat); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_rest_mut<V>(v: &mut V, node: &mut PatRest) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.dot2_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_slice_mut<V>(v: &mut V, node: &mut PatSlice) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.bracket_token); + for mut el in Punctuated::pairs_mut(&mut node.elems) { + let it = el.value_mut(); + v.visit_pat_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_struct_mut<V>(v: &mut V, node: &mut PatStruct) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.qself { + v.visit_qself_mut(it); + } + v.visit_path_mut(&mut node.path); + skip!(node.brace_token); + for mut el in Punctuated::pairs_mut(&mut node.fields) { + let it = el.value_mut(); + v.visit_field_pat_mut(it); + } + if let Some(it) = &mut node.rest { + v.visit_pat_rest_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple_mut<V>(v: &mut V, node: &mut PatTuple) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.elems) { + let it = el.value_mut(); + v.visit_pat_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple_struct_mut<V>(v: &mut V, node: &mut PatTupleStruct) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.qself { + v.visit_qself_mut(it); + } + v.visit_path_mut(&mut node.path); + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.elems) { + let it = el.value_mut(); + v.visit_pat_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_type_mut<V>(v: &mut V, node: &mut PatType) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_pat_mut(&mut *node.pat); + skip!(node.colon_token); + v.visit_type_mut(&mut *node.ty); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_wild_mut<V>(v: &mut V, node: &mut PatWild) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.underscore_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_mut<V>(v: &mut V, node: &mut Path) +where + V: VisitMut + ?Sized, +{ + skip!(node.leading_colon); + for mut el in Punctuated::pairs_mut(&mut node.segments) { + let it = el.value_mut(); + v.visit_path_segment_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_arguments_mut<V>(v: &mut V, node: &mut PathArguments) +where + V: VisitMut + ?Sized, +{ + match node { + PathArguments::None => {} + PathArguments::AngleBracketed(_binding_0) => { + v.visit_angle_bracketed_generic_arguments_mut(_binding_0); + } + PathArguments::Parenthesized(_binding_0) => { + v.visit_parenthesized_generic_arguments_mut(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_segment_mut<V>(v: &mut V, node: &mut PathSegment) +where + V: VisitMut + ?Sized, +{ + v.visit_ident_mut(&mut node.ident); + v.visit_path_arguments_mut(&mut node.arguments); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_lifetime_mut<V>(v: &mut V, node: &mut PredicateLifetime) +where + V: VisitMut + ?Sized, +{ + v.visit_lifetime_mut(&mut node.lifetime); + skip!(node.colon_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_lifetime_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_type_mut<V>(v: &mut V, node: &mut PredicateType) +where + V: VisitMut + ?Sized, +{ + if let Some(it) = &mut node.lifetimes { + v.visit_bound_lifetimes_mut(it); + } + v.visit_type_mut(&mut node.bounded_ty); + skip!(node.colon_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_qself_mut<V>(v: &mut V, node: &mut QSelf) +where + V: VisitMut + ?Sized, +{ + skip!(node.lt_token); + v.visit_type_mut(&mut *node.ty); + skip!(node.position); + skip!(node.as_token); + skip!(node.gt_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_range_limits_mut<V>(v: &mut V, node: &mut RangeLimits) +where + V: VisitMut + ?Sized, +{ + match node { + RangeLimits::HalfOpen(_binding_0) => { + skip!(_binding_0); + } + RangeLimits::Closed(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_receiver_mut<V>(v: &mut V, node: &mut Receiver) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.reference { + skip!((it).0); + if let Some(it) = &mut (it).1 { + v.visit_lifetime_mut(it); + } + } + skip!(node.mutability); + skip!(node.self_token); + skip!(node.colon_token); + v.visit_type_mut(&mut *node.ty); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_return_type_mut<V>(v: &mut V, node: &mut ReturnType) +where + V: VisitMut + ?Sized, +{ + match node { + ReturnType::Default => {} + ReturnType::Type(_binding_0, _binding_1) => { + skip!(_binding_0); + v.visit_type_mut(&mut **_binding_1); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_signature_mut<V>(v: &mut V, node: &mut Signature) +where + V: VisitMut + ?Sized, +{ + skip!(node.constness); + skip!(node.asyncness); + skip!(node.unsafety); + if let Some(it) = &mut node.abi { + v.visit_abi_mut(it); + } + skip!(node.fn_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.inputs) { + let it = el.value_mut(); + v.visit_fn_arg_mut(it); + } + if let Some(it) = &mut node.variadic { + v.visit_variadic_mut(it); + } + v.visit_return_type_mut(&mut node.output); +} +pub fn visit_span_mut<V>(v: &mut V, node: &mut Span) +where + V: VisitMut + ?Sized, +{} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_static_mutability_mut<V>(v: &mut V, node: &mut StaticMutability) +where + V: VisitMut + ?Sized, +{ + match node { + StaticMutability::Mut(_binding_0) => { + skip!(_binding_0); + } + StaticMutability::None => {} + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt_mut<V>(v: &mut V, node: &mut Stmt) +where + V: VisitMut + ?Sized, +{ + match node { + Stmt::Local(_binding_0) => { + v.visit_local_mut(_binding_0); + } + Stmt::Item(_binding_0) => { + v.visit_item_mut(_binding_0); + } + Stmt::Expr(_binding_0, _binding_1) => { + v.visit_expr_mut(_binding_0); + skip!(_binding_1); + } + Stmt::Macro(_binding_0) => { + v.visit_stmt_macro_mut(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt_macro_mut<V>(v: &mut V, node: &mut StmtMacro) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_macro_mut(&mut node.mac); + skip!(node.semi_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound_mut<V>(v: &mut V, node: &mut TraitBound) +where + V: VisitMut + ?Sized, +{ + skip!(node.paren_token); + v.visit_trait_bound_modifier_mut(&mut node.modifier); + if let Some(it) = &mut node.lifetimes { + v.visit_bound_lifetimes_mut(it); + } + v.visit_path_mut(&mut node.path); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound_modifier_mut<V>(v: &mut V, node: &mut TraitBoundModifier) +where + V: VisitMut + ?Sized, +{ + match node { + TraitBoundModifier::None => {} + TraitBoundModifier::Maybe(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_mut<V>(v: &mut V, node: &mut TraitItem) +where + V: VisitMut + ?Sized, +{ + match node { + TraitItem::Const(_binding_0) => { + v.visit_trait_item_const_mut(_binding_0); + } + TraitItem::Fn(_binding_0) => { + v.visit_trait_item_fn_mut(_binding_0); + } + TraitItem::Type(_binding_0) => { + v.visit_trait_item_type_mut(_binding_0); + } + TraitItem::Macro(_binding_0) => { + v.visit_trait_item_macro_mut(_binding_0); + } + TraitItem::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_const_mut<V>(v: &mut V, node: &mut TraitItemConst) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.const_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.colon_token); + v.visit_type_mut(&mut node.ty); + if let Some(it) = &mut node.default { + skip!((it).0); + v.visit_expr_mut(&mut (it).1); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_fn_mut<V>(v: &mut V, node: &mut TraitItemFn) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_signature_mut(&mut node.sig); + if let Some(it) = &mut node.default { + v.visit_block_mut(it); + } + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_macro_mut<V>(v: &mut V, node: &mut TraitItemMacro) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_macro_mut(&mut node.mac); + skip!(node.semi_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_type_mut<V>(v: &mut V, node: &mut TraitItemType) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + skip!(node.type_token); + v.visit_ident_mut(&mut node.ident); + v.visit_generics_mut(&mut node.generics); + skip!(node.colon_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } + if let Some(it) = &mut node.default { + skip!((it).0); + v.visit_type_mut(&mut (it).1); + } + skip!(node.semi_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_mut<V>(v: &mut V, node: &mut Type) +where + V: VisitMut + ?Sized, +{ + match node { + Type::Array(_binding_0) => { + v.visit_type_array_mut(_binding_0); + } + Type::BareFn(_binding_0) => { + v.visit_type_bare_fn_mut(_binding_0); + } + Type::Group(_binding_0) => { + v.visit_type_group_mut(_binding_0); + } + Type::ImplTrait(_binding_0) => { + v.visit_type_impl_trait_mut(_binding_0); + } + Type::Infer(_binding_0) => { + v.visit_type_infer_mut(_binding_0); + } + Type::Macro(_binding_0) => { + v.visit_type_macro_mut(_binding_0); + } + Type::Never(_binding_0) => { + v.visit_type_never_mut(_binding_0); + } + Type::Paren(_binding_0) => { + v.visit_type_paren_mut(_binding_0); + } + Type::Path(_binding_0) => { + v.visit_type_path_mut(_binding_0); + } + Type::Ptr(_binding_0) => { + v.visit_type_ptr_mut(_binding_0); + } + Type::Reference(_binding_0) => { + v.visit_type_reference_mut(_binding_0); + } + Type::Slice(_binding_0) => { + v.visit_type_slice_mut(_binding_0); + } + Type::TraitObject(_binding_0) => { + v.visit_type_trait_object_mut(_binding_0); + } + Type::Tuple(_binding_0) => { + v.visit_type_tuple_mut(_binding_0); + } + Type::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_array_mut<V>(v: &mut V, node: &mut TypeArray) +where + V: VisitMut + ?Sized, +{ + skip!(node.bracket_token); + v.visit_type_mut(&mut *node.elem); + skip!(node.semi_token); + v.visit_expr_mut(&mut node.len); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_bare_fn_mut<V>(v: &mut V, node: &mut TypeBareFn) +where + V: VisitMut + ?Sized, +{ + if let Some(it) = &mut node.lifetimes { + v.visit_bound_lifetimes_mut(it); + } + skip!(node.unsafety); + if let Some(it) = &mut node.abi { + v.visit_abi_mut(it); + } + skip!(node.fn_token); + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.inputs) { + let it = el.value_mut(); + v.visit_bare_fn_arg_mut(it); + } + if let Some(it) = &mut node.variadic { + v.visit_bare_variadic_mut(it); + } + v.visit_return_type_mut(&mut node.output); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_group_mut<V>(v: &mut V, node: &mut TypeGroup) +where + V: VisitMut + ?Sized, +{ + skip!(node.group_token); + v.visit_type_mut(&mut *node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_impl_trait_mut<V>(v: &mut V, node: &mut TypeImplTrait) +where + V: VisitMut + ?Sized, +{ + skip!(node.impl_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_infer_mut<V>(v: &mut V, node: &mut TypeInfer) +where + V: VisitMut + ?Sized, +{ + skip!(node.underscore_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_macro_mut<V>(v: &mut V, node: &mut TypeMacro) +where + V: VisitMut + ?Sized, +{ + v.visit_macro_mut(&mut node.mac); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_never_mut<V>(v: &mut V, node: &mut TypeNever) +where + V: VisitMut + ?Sized, +{ + skip!(node.bang_token); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param_mut<V>(v: &mut V, node: &mut TypeParam) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_ident_mut(&mut node.ident); + skip!(node.colon_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } + skip!(node.eq_token); + if let Some(it) = &mut node.default { + v.visit_type_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param_bound_mut<V>(v: &mut V, node: &mut TypeParamBound) +where + V: VisitMut + ?Sized, +{ + match node { + TypeParamBound::Trait(_binding_0) => { + v.visit_trait_bound_mut(_binding_0); + } + TypeParamBound::Lifetime(_binding_0) => { + v.visit_lifetime_mut(_binding_0); + } + TypeParamBound::Verbatim(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_paren_mut<V>(v: &mut V, node: &mut TypeParen) +where + V: VisitMut + ?Sized, +{ + skip!(node.paren_token); + v.visit_type_mut(&mut *node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_path_mut<V>(v: &mut V, node: &mut TypePath) +where + V: VisitMut + ?Sized, +{ + if let Some(it) = &mut node.qself { + v.visit_qself_mut(it); + } + v.visit_path_mut(&mut node.path); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_ptr_mut<V>(v: &mut V, node: &mut TypePtr) +where + V: VisitMut + ?Sized, +{ + skip!(node.star_token); + skip!(node.const_token); + skip!(node.mutability); + v.visit_type_mut(&mut *node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_reference_mut<V>(v: &mut V, node: &mut TypeReference) +where + V: VisitMut + ?Sized, +{ + skip!(node.and_token); + if let Some(it) = &mut node.lifetime { + v.visit_lifetime_mut(it); + } + skip!(node.mutability); + v.visit_type_mut(&mut *node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_slice_mut<V>(v: &mut V, node: &mut TypeSlice) +where + V: VisitMut + ?Sized, +{ + skip!(node.bracket_token); + v.visit_type_mut(&mut *node.elem); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_trait_object_mut<V>(v: &mut V, node: &mut TypeTraitObject) +where + V: VisitMut + ?Sized, +{ + skip!(node.dyn_token); + for mut el in Punctuated::pairs_mut(&mut node.bounds) { + let it = el.value_mut(); + v.visit_type_param_bound_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_tuple_mut<V>(v: &mut V, node: &mut TypeTuple) +where + V: VisitMut + ?Sized, +{ + skip!(node.paren_token); + for mut el in Punctuated::pairs_mut(&mut node.elems) { + let it = el.value_mut(); + v.visit_type_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_un_op_mut<V>(v: &mut V, node: &mut UnOp) +where + V: VisitMut + ?Sized, +{ + match node { + UnOp::Deref(_binding_0) => { + skip!(_binding_0); + } + UnOp::Not(_binding_0) => { + skip!(_binding_0); + } + UnOp::Neg(_binding_0) => { + skip!(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_glob_mut<V>(v: &mut V, node: &mut UseGlob) +where + V: VisitMut + ?Sized, +{ + skip!(node.star_token); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_group_mut<V>(v: &mut V, node: &mut UseGroup) +where + V: VisitMut + ?Sized, +{ + skip!(node.brace_token); + for mut el in Punctuated::pairs_mut(&mut node.items) { + let it = el.value_mut(); + v.visit_use_tree_mut(it); + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_name_mut<V>(v: &mut V, node: &mut UseName) +where + V: VisitMut + ?Sized, +{ + v.visit_ident_mut(&mut node.ident); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_path_mut<V>(v: &mut V, node: &mut UsePath) +where + V: VisitMut + ?Sized, +{ + v.visit_ident_mut(&mut node.ident); + skip!(node.colon2_token); + v.visit_use_tree_mut(&mut *node.tree); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_rename_mut<V>(v: &mut V, node: &mut UseRename) +where + V: VisitMut + ?Sized, +{ + v.visit_ident_mut(&mut node.ident); + skip!(node.as_token); + v.visit_ident_mut(&mut node.rename); +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_tree_mut<V>(v: &mut V, node: &mut UseTree) +where + V: VisitMut + ?Sized, +{ + match node { + UseTree::Path(_binding_0) => { + v.visit_use_path_mut(_binding_0); + } + UseTree::Name(_binding_0) => { + v.visit_use_name_mut(_binding_0); + } + UseTree::Rename(_binding_0) => { + v.visit_use_rename_mut(_binding_0); + } + UseTree::Glob(_binding_0) => { + v.visit_use_glob_mut(_binding_0); + } + UseTree::Group(_binding_0) => { + v.visit_use_group_mut(_binding_0); + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_variadic_mut<V>(v: &mut V, node: &mut Variadic) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + if let Some(it) = &mut node.pat { + v.visit_pat_mut(&mut *(it).0); + skip!((it).1); + } + skip!(node.dots); + skip!(node.comma); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_variant_mut<V>(v: &mut V, node: &mut Variant) +where + V: VisitMut + ?Sized, +{ + for it in &mut node.attrs { + v.visit_attribute_mut(it); + } + v.visit_ident_mut(&mut node.ident); + v.visit_fields_mut(&mut node.fields); + if let Some(it) = &mut node.discriminant { + skip!((it).0); + v.visit_expr_mut(&mut (it).1); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_vis_restricted_mut<V>(v: &mut V, node: &mut VisRestricted) +where + V: VisitMut + ?Sized, +{ + skip!(node.pub_token); + skip!(node.paren_token); + skip!(node.in_token); + v.visit_path_mut(&mut *node.path); +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_visibility_mut<V>(v: &mut V, node: &mut Visibility) +where + V: VisitMut + ?Sized, +{ + match node { + Visibility::Public(_binding_0) => { + skip!(_binding_0); + } + Visibility::Restricted(_binding_0) => { + v.visit_vis_restricted_mut(_binding_0); + } + Visibility::Inherited => {} + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_clause_mut<V>(v: &mut V, node: &mut WhereClause) +where + V: VisitMut + ?Sized, +{ + skip!(node.where_token); + for mut el in Punctuated::pairs_mut(&mut node.predicates) { + let it = el.value_mut(); + v.visit_where_predicate_mut(it); + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_predicate_mut<V>(v: &mut V, node: &mut WherePredicate) +where + V: VisitMut + ?Sized, +{ + match node { + WherePredicate::Lifetime(_binding_0) => { + v.visit_predicate_lifetime_mut(_binding_0); + } + WherePredicate::Type(_binding_0) => { + v.visit_predicate_type_mut(_binding_0); + } + } +} diff --git a/vendor/syn/src/gen_helper.rs b/vendor/syn/src/gen_helper.rs new file mode 100644 index 0000000..e433bac --- /dev/null +++ b/vendor/syn/src/gen_helper.rs @@ -0,0 +1,34 @@ +#[cfg(feature = "fold")] +pub(crate) mod fold { + use crate::punctuated::{Pair, Punctuated}; + + pub(crate) trait FoldHelper { + type Item; + fn lift<F>(self, f: F) -> Self + where + F: FnMut(Self::Item) -> Self::Item; + } + + impl<T> FoldHelper for Vec<T> { + type Item = T; + fn lift<F>(self, f: F) -> Self + where + F: FnMut(Self::Item) -> Self::Item, + { + self.into_iter().map(f).collect() + } + } + + impl<T, U> FoldHelper for Punctuated<T, U> { + type Item = T; + fn lift<F>(self, mut f: F) -> Self + where + F: FnMut(Self::Item) -> Self::Item, + { + self.into_pairs() + .map(Pair::into_tuple) + .map(|(t, u)| Pair::new(f(t), u)) + .collect() + } + } +} diff --git a/vendor/syn/src/generics.rs b/vendor/syn/src/generics.rs new file mode 100644 index 0000000..aed50e3 --- /dev/null +++ b/vendor/syn/src/generics.rs @@ -0,0 +1,1227 @@ +use super::*; +use crate::punctuated::{Iter, IterMut, Punctuated}; +use proc_macro2::TokenStream; +#[cfg(all(feature = "printing", feature = "extra-traits"))] +use std::fmt::{self, Debug}; +#[cfg(all(feature = "printing", feature = "extra-traits"))] +use std::hash::{Hash, Hasher}; + +ast_struct! { + /// Lifetimes and type parameters attached to a declaration of a function, + /// enum, trait, etc. + /// + /// This struct represents two distinct optional syntactic elements, + /// [generic parameters] and [where clause]. In some locations of the + /// grammar, there may be other tokens in between these two things. + /// + /// [generic parameters]: https://doc.rust-lang.org/stable/reference/items/generics.html#generic-parameters + /// [where clause]: https://doc.rust-lang.org/stable/reference/items/generics.html#where-clauses + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Generics { + pub lt_token: Option<Token![<]>, + pub params: Punctuated<GenericParam, Token![,]>, + pub gt_token: Option<Token![>]>, + pub where_clause: Option<WhereClause>, + } +} + +ast_enum_of_structs! { + /// A generic type parameter, lifetime, or const generic: `T: Into<String>`, + /// `'a: 'b`, `const LEN: usize`. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum GenericParam { + /// A lifetime parameter: `'a: 'b + 'c + 'd`. + Lifetime(LifetimeParam), + + /// A generic type parameter: `T: Into<String>`. + Type(TypeParam), + + /// A const generic parameter: `const LENGTH: usize`. + Const(ConstParam), + } +} + +ast_struct! { + /// A lifetime definition: `'a: 'b + 'c + 'd`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct LifetimeParam { + pub attrs: Vec<Attribute>, + pub lifetime: Lifetime, + pub colon_token: Option<Token![:]>, + pub bounds: Punctuated<Lifetime, Token![+]>, + } +} + +ast_struct! { + /// A generic type parameter: `T: Into<String>`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeParam { + pub attrs: Vec<Attribute>, + pub ident: Ident, + pub colon_token: Option<Token![:]>, + pub bounds: Punctuated<TypeParamBound, Token![+]>, + pub eq_token: Option<Token![=]>, + pub default: Option<Type>, + } +} + +ast_struct! { + /// A const generic parameter: `const LENGTH: usize`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ConstParam { + pub attrs: Vec<Attribute>, + pub const_token: Token![const], + pub ident: Ident, + pub colon_token: Token![:], + pub ty: Type, + pub eq_token: Option<Token![=]>, + pub default: Option<Expr>, + } +} + +impl Default for Generics { + fn default() -> Self { + Generics { + lt_token: None, + params: Punctuated::new(), + gt_token: None, + where_clause: None, + } + } +} + +impl Generics { + /// Returns an + /// <code + /// style="padding-right:0;">Iterator<Item = &</code><a + /// href="struct.LifetimeParam.html"><code + /// style="padding-left:0;padding-right:0;">LifetimeParam</code></a><code + /// style="padding-left:0;">></code> + /// over the lifetime parameters in `self.params`. + pub fn lifetimes(&self) -> Lifetimes { + Lifetimes(self.params.iter()) + } + + /// Returns an + /// <code + /// style="padding-right:0;">Iterator<Item = &mut </code><a + /// href="struct.LifetimeParam.html"><code + /// style="padding-left:0;padding-right:0;">LifetimeParam</code></a><code + /// style="padding-left:0;">></code> + /// over the lifetime parameters in `self.params`. + pub fn lifetimes_mut(&mut self) -> LifetimesMut { + LifetimesMut(self.params.iter_mut()) + } + + /// Returns an + /// <code + /// style="padding-right:0;">Iterator<Item = &</code><a + /// href="struct.TypeParam.html"><code + /// style="padding-left:0;padding-right:0;">TypeParam</code></a><code + /// style="padding-left:0;">></code> + /// over the type parameters in `self.params`. + pub fn type_params(&self) -> TypeParams { + TypeParams(self.params.iter()) + } + + /// Returns an + /// <code + /// style="padding-right:0;">Iterator<Item = &mut </code><a + /// href="struct.TypeParam.html"><code + /// style="padding-left:0;padding-right:0;">TypeParam</code></a><code + /// style="padding-left:0;">></code> + /// over the type parameters in `self.params`. + pub fn type_params_mut(&mut self) -> TypeParamsMut { + TypeParamsMut(self.params.iter_mut()) + } + + /// Returns an + /// <code + /// style="padding-right:0;">Iterator<Item = &</code><a + /// href="struct.ConstParam.html"><code + /// style="padding-left:0;padding-right:0;">ConstParam</code></a><code + /// style="padding-left:0;">></code> + /// over the constant parameters in `self.params`. + pub fn const_params(&self) -> ConstParams { + ConstParams(self.params.iter()) + } + + /// Returns an + /// <code + /// style="padding-right:0;">Iterator<Item = &mut </code><a + /// href="struct.ConstParam.html"><code + /// style="padding-left:0;padding-right:0;">ConstParam</code></a><code + /// style="padding-left:0;">></code> + /// over the constant parameters in `self.params`. + pub fn const_params_mut(&mut self) -> ConstParamsMut { + ConstParamsMut(self.params.iter_mut()) + } + + /// Initializes an empty `where`-clause if there is not one present already. + pub fn make_where_clause(&mut self) -> &mut WhereClause { + self.where_clause.get_or_insert_with(|| WhereClause { + where_token: <Token![where]>::default(), + predicates: Punctuated::new(), + }) + } +} + +pub struct Lifetimes<'a>(Iter<'a, GenericParam>); + +impl<'a> Iterator for Lifetimes<'a> { + type Item = &'a LifetimeParam; + + fn next(&mut self) -> Option<Self::Item> { + let next = match self.0.next() { + Some(item) => item, + None => return None, + }; + if let GenericParam::Lifetime(lifetime) = next { + Some(lifetime) + } else { + self.next() + } + } +} + +pub struct LifetimesMut<'a>(IterMut<'a, GenericParam>); + +impl<'a> Iterator for LifetimesMut<'a> { + type Item = &'a mut LifetimeParam; + + fn next(&mut self) -> Option<Self::Item> { + let next = match self.0.next() { + Some(item) => item, + None => return None, + }; + if let GenericParam::Lifetime(lifetime) = next { + Some(lifetime) + } else { + self.next() + } + } +} + +pub struct TypeParams<'a>(Iter<'a, GenericParam>); + +impl<'a> Iterator for TypeParams<'a> { + type Item = &'a TypeParam; + + fn next(&mut self) -> Option<Self::Item> { + let next = match self.0.next() { + Some(item) => item, + None => return None, + }; + if let GenericParam::Type(type_param) = next { + Some(type_param) + } else { + self.next() + } + } +} + +pub struct TypeParamsMut<'a>(IterMut<'a, GenericParam>); + +impl<'a> Iterator for TypeParamsMut<'a> { + type Item = &'a mut TypeParam; + + fn next(&mut self) -> Option<Self::Item> { + let next = match self.0.next() { + Some(item) => item, + None => return None, + }; + if let GenericParam::Type(type_param) = next { + Some(type_param) + } else { + self.next() + } + } +} + +pub struct ConstParams<'a>(Iter<'a, GenericParam>); + +impl<'a> Iterator for ConstParams<'a> { + type Item = &'a ConstParam; + + fn next(&mut self) -> Option<Self::Item> { + let next = match self.0.next() { + Some(item) => item, + None => return None, + }; + if let GenericParam::Const(const_param) = next { + Some(const_param) + } else { + self.next() + } + } +} + +pub struct ConstParamsMut<'a>(IterMut<'a, GenericParam>); + +impl<'a> Iterator for ConstParamsMut<'a> { + type Item = &'a mut ConstParam; + + fn next(&mut self) -> Option<Self::Item> { + let next = match self.0.next() { + Some(item) => item, + None => return None, + }; + if let GenericParam::Const(const_param) = next { + Some(const_param) + } else { + self.next() + } + } +} + +/// Returned by `Generics::split_for_impl`. +#[cfg(feature = "printing")] +#[cfg_attr( + doc_cfg, + doc(cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))) +)] +pub struct ImplGenerics<'a>(&'a Generics); + +/// Returned by `Generics::split_for_impl`. +#[cfg(feature = "printing")] +#[cfg_attr( + doc_cfg, + doc(cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))) +)] +pub struct TypeGenerics<'a>(&'a Generics); + +/// Returned by `TypeGenerics::as_turbofish`. +#[cfg(feature = "printing")] +#[cfg_attr( + doc_cfg, + doc(cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))) +)] +pub struct Turbofish<'a>(&'a Generics); + +#[cfg(feature = "printing")] +impl Generics { + /// Split a type's generics into the pieces required for impl'ing a trait + /// for that type. + /// + /// ``` + /// # use proc_macro2::{Span, Ident}; + /// # use quote::quote; + /// # + /// # let generics: syn::Generics = Default::default(); + /// # let name = Ident::new("MyType", Span::call_site()); + /// # + /// let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + /// quote! { + /// impl #impl_generics MyTrait for #name #ty_generics #where_clause { + /// // ... + /// } + /// } + /// # ; + /// ``` + #[cfg_attr( + doc_cfg, + doc(cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))) + )] + pub fn split_for_impl(&self) -> (ImplGenerics, TypeGenerics, Option<&WhereClause>) { + ( + ImplGenerics(self), + TypeGenerics(self), + self.where_clause.as_ref(), + ) + } +} + +#[cfg(feature = "printing")] +macro_rules! generics_wrapper_impls { + ($ty:ident) => { + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl<'a> Clone for $ty<'a> { + fn clone(&self) -> Self { + $ty(self.0) + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl<'a> Debug for $ty<'a> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter + .debug_tuple(stringify!($ty)) + .field(self.0) + .finish() + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl<'a> Eq for $ty<'a> {} + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl<'a> PartialEq for $ty<'a> { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl<'a> Hash for $ty<'a> { + fn hash<H: Hasher>(&self, state: &mut H) { + self.0.hash(state); + } + } + }; +} + +#[cfg(feature = "printing")] +generics_wrapper_impls!(ImplGenerics); +#[cfg(feature = "printing")] +generics_wrapper_impls!(TypeGenerics); +#[cfg(feature = "printing")] +generics_wrapper_impls!(Turbofish); + +#[cfg(feature = "printing")] +impl<'a> TypeGenerics<'a> { + /// Turn a type's generics like `<X, Y>` into a turbofish like `::<X, Y>`. + pub fn as_turbofish(&self) -> Turbofish { + Turbofish(self.0) + } +} + +ast_struct! { + /// A set of bound lifetimes: `for<'a, 'b, 'c>`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct BoundLifetimes { + pub for_token: Token![for], + pub lt_token: Token![<], + pub lifetimes: Punctuated<GenericParam, Token![,]>, + pub gt_token: Token![>], + } +} + +impl Default for BoundLifetimes { + fn default() -> Self { + BoundLifetimes { + for_token: Default::default(), + lt_token: Default::default(), + lifetimes: Punctuated::new(), + gt_token: Default::default(), + } + } +} + +impl LifetimeParam { + pub fn new(lifetime: Lifetime) -> Self { + LifetimeParam { + attrs: Vec::new(), + lifetime, + colon_token: None, + bounds: Punctuated::new(), + } + } +} + +impl From<Ident> for TypeParam { + fn from(ident: Ident) -> Self { + TypeParam { + attrs: vec![], + ident, + colon_token: None, + bounds: Punctuated::new(), + eq_token: None, + default: None, + } + } +} + +ast_enum_of_structs! { + /// A trait or lifetime used as a bound on a type parameter. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum TypeParamBound { + Trait(TraitBound), + Lifetime(Lifetime), + Verbatim(TokenStream), + } +} + +ast_struct! { + /// A trait used as a bound on a type parameter. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TraitBound { + pub paren_token: Option<token::Paren>, + pub modifier: TraitBoundModifier, + /// The `for<'a>` in `for<'a> Foo<&'a T>` + pub lifetimes: Option<BoundLifetimes>, + /// The `Foo<&'a T>` in `for<'a> Foo<&'a T>` + pub path: Path, + } +} + +ast_enum! { + /// A modifier on a trait bound, currently only used for the `?` in + /// `?Sized`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum TraitBoundModifier { + None, + Maybe(Token![?]), + } +} + +ast_struct! { + /// A `where` clause in a definition: `where T: Deserialize<'de>, D: + /// 'static`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct WhereClause { + pub where_token: Token![where], + pub predicates: Punctuated<WherePredicate, Token![,]>, + } +} + +ast_enum_of_structs! { + /// A single predicate in a `where` clause: `T: Deserialize<'de>`. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum WherePredicate { + /// A lifetime predicate in a `where` clause: `'a: 'b + 'c`. + Lifetime(PredicateLifetime), + + /// A type predicate in a `where` clause: `for<'c> Foo<'c>: Trait<'c>`. + Type(PredicateType), + } +} + +ast_struct! { + /// A lifetime predicate in a `where` clause: `'a: 'b + 'c`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct PredicateLifetime { + pub lifetime: Lifetime, + pub colon_token: Token![:], + pub bounds: Punctuated<Lifetime, Token![+]>, + } +} + +ast_struct! { + /// A type predicate in a `where` clause: `for<'c> Foo<'c>: Trait<'c>`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct PredicateType { + /// Any lifetimes from a `for` binding + pub lifetimes: Option<BoundLifetimes>, + /// The type being bounded + pub bounded_ty: Type, + pub colon_token: Token![:], + /// Trait and lifetime bounds (`Clone+Send+'static`) + pub bounds: Punctuated<TypeParamBound, Token![+]>, + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::ext::IdentExt as _; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Generics { + fn parse(input: ParseStream) -> Result<Self> { + if !input.peek(Token![<]) { + return Ok(Generics::default()); + } + + let lt_token: Token![<] = input.parse()?; + + let mut params = Punctuated::new(); + loop { + if input.peek(Token![>]) { + break; + } + + let attrs = input.call(Attribute::parse_outer)?; + let lookahead = input.lookahead1(); + if lookahead.peek(Lifetime) { + params.push_value(GenericParam::Lifetime(LifetimeParam { + attrs, + ..input.parse()? + })); + } else if lookahead.peek(Ident) { + params.push_value(GenericParam::Type(TypeParam { + attrs, + ..input.parse()? + })); + } else if lookahead.peek(Token![const]) { + params.push_value(GenericParam::Const(ConstParam { + attrs, + ..input.parse()? + })); + } else if input.peek(Token![_]) { + params.push_value(GenericParam::Type(TypeParam { + attrs, + ident: input.call(Ident::parse_any)?, + colon_token: None, + bounds: Punctuated::new(), + eq_token: None, + default: None, + })); + } else { + return Err(lookahead.error()); + } + + if input.peek(Token![>]) { + break; + } + let punct = input.parse()?; + params.push_punct(punct); + } + + let gt_token: Token![>] = input.parse()?; + + Ok(Generics { + lt_token: Some(lt_token), + params, + gt_token: Some(gt_token), + where_clause: None, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for GenericParam { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + + let lookahead = input.lookahead1(); + if lookahead.peek(Ident) { + Ok(GenericParam::Type(TypeParam { + attrs, + ..input.parse()? + })) + } else if lookahead.peek(Lifetime) { + Ok(GenericParam::Lifetime(LifetimeParam { + attrs, + ..input.parse()? + })) + } else if lookahead.peek(Token![const]) { + Ok(GenericParam::Const(ConstParam { + attrs, + ..input.parse()? + })) + } else { + Err(lookahead.error()) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LifetimeParam { + fn parse(input: ParseStream) -> Result<Self> { + let has_colon; + Ok(LifetimeParam { + attrs: input.call(Attribute::parse_outer)?, + lifetime: input.parse()?, + colon_token: { + if input.peek(Token![:]) { + has_colon = true; + Some(input.parse()?) + } else { + has_colon = false; + None + } + }, + bounds: { + let mut bounds = Punctuated::new(); + if has_colon { + loop { + if input.peek(Token![,]) || input.peek(Token![>]) { + break; + } + let value = input.parse()?; + bounds.push_value(value); + if !input.peek(Token![+]) { + break; + } + let punct = input.parse()?; + bounds.push_punct(punct); + } + } + bounds + }, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for BoundLifetimes { + fn parse(input: ParseStream) -> Result<Self> { + Ok(BoundLifetimes { + for_token: input.parse()?, + lt_token: input.parse()?, + lifetimes: { + let mut lifetimes = Punctuated::new(); + while !input.peek(Token![>]) { + let attrs = input.call(Attribute::parse_outer)?; + let lifetime: Lifetime = input.parse()?; + lifetimes.push_value(GenericParam::Lifetime(LifetimeParam { + attrs, + lifetime, + colon_token: None, + bounds: Punctuated::new(), + })); + if input.peek(Token![>]) { + break; + } + lifetimes.push_punct(input.parse()?); + } + lifetimes + }, + gt_token: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Option<BoundLifetimes> { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Token![for]) { + input.parse().map(Some) + } else { + Ok(None) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeParam { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let ident: Ident = input.parse()?; + let colon_token: Option<Token![:]> = input.parse()?; + + let mut bounds = Punctuated::new(); + if colon_token.is_some() { + loop { + if input.peek(Token![,]) || input.peek(Token![>]) || input.peek(Token![=]) { + break; + } + let value: TypeParamBound = input.parse()?; + bounds.push_value(value); + if !input.peek(Token![+]) { + break; + } + let punct: Token![+] = input.parse()?; + bounds.push_punct(punct); + } + } + + let eq_token: Option<Token![=]> = input.parse()?; + let default = if eq_token.is_some() { + Some(input.parse::<Type>()?) + } else { + None + }; + + Ok(TypeParam { + attrs, + ident, + colon_token, + bounds, + eq_token, + default, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeParamBound { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Lifetime) { + return input.parse().map(TypeParamBound::Lifetime); + } + + let begin = input.fork(); + + let content; + let (paren_token, content) = if input.peek(token::Paren) { + (Some(parenthesized!(content in input)), &content) + } else { + (None, input) + }; + + let is_tilde_const = + cfg!(feature = "full") && content.peek(Token![~]) && content.peek2(Token![const]); + if is_tilde_const { + content.parse::<Token![~]>()?; + content.parse::<Token![const]>()?; + } + + let mut bound: TraitBound = content.parse()?; + bound.paren_token = paren_token; + + if is_tilde_const { + Ok(TypeParamBound::Verbatim(verbatim::between(&begin, input))) + } else { + Ok(TypeParamBound::Trait(bound)) + } + } + } + + impl TypeParamBound { + pub(crate) fn parse_multiple( + input: ParseStream, + allow_plus: bool, + ) -> Result<Punctuated<Self, Token![+]>> { + let mut bounds = Punctuated::new(); + loop { + bounds.push_value(input.parse()?); + if !(allow_plus && input.peek(Token![+])) { + break; + } + bounds.push_punct(input.parse()?); + if !(input.peek(Ident::peek_any) + || input.peek(Token![::]) + || input.peek(Token![?]) + || input.peek(Lifetime) + || input.peek(token::Paren) + || input.peek(Token![~])) + { + break; + } + } + Ok(bounds) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TraitBound { + fn parse(input: ParseStream) -> Result<Self> { + let modifier: TraitBoundModifier = input.parse()?; + let lifetimes: Option<BoundLifetimes> = input.parse()?; + + let mut path: Path = input.parse()?; + if path.segments.last().unwrap().arguments.is_empty() + && (input.peek(token::Paren) || input.peek(Token![::]) && input.peek3(token::Paren)) + { + input.parse::<Option<Token![::]>>()?; + let args: ParenthesizedGenericArguments = input.parse()?; + let parenthesized = PathArguments::Parenthesized(args); + path.segments.last_mut().unwrap().arguments = parenthesized; + } + + Ok(TraitBound { + paren_token: None, + modifier, + lifetimes, + path, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TraitBoundModifier { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Token![?]) { + input.parse().map(TraitBoundModifier::Maybe) + } else { + Ok(TraitBoundModifier::None) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ConstParam { + fn parse(input: ParseStream) -> Result<Self> { + let mut default = None; + Ok(ConstParam { + attrs: input.call(Attribute::parse_outer)?, + const_token: input.parse()?, + ident: input.parse()?, + colon_token: input.parse()?, + ty: input.parse()?, + eq_token: { + if input.peek(Token![=]) { + let eq_token = input.parse()?; + default = Some(path::parsing::const_argument(input)?); + Some(eq_token) + } else { + None + } + }, + default, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for WhereClause { + fn parse(input: ParseStream) -> Result<Self> { + Ok(WhereClause { + where_token: input.parse()?, + predicates: { + let mut predicates = Punctuated::new(); + loop { + if input.is_empty() + || input.peek(token::Brace) + || input.peek(Token![,]) + || input.peek(Token![;]) + || input.peek(Token![:]) && !input.peek(Token![::]) + || input.peek(Token![=]) + { + break; + } + let value = input.parse()?; + predicates.push_value(value); + if !input.peek(Token![,]) { + break; + } + let punct = input.parse()?; + predicates.push_punct(punct); + } + predicates + }, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Option<WhereClause> { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Token![where]) { + input.parse().map(Some) + } else { + Ok(None) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for WherePredicate { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Lifetime) && input.peek2(Token![:]) { + Ok(WherePredicate::Lifetime(PredicateLifetime { + lifetime: input.parse()?, + colon_token: input.parse()?, + bounds: { + let mut bounds = Punctuated::new(); + loop { + if input.is_empty() + || input.peek(token::Brace) + || input.peek(Token![,]) + || input.peek(Token![;]) + || input.peek(Token![:]) + || input.peek(Token![=]) + { + break; + } + let value = input.parse()?; + bounds.push_value(value); + if !input.peek(Token![+]) { + break; + } + let punct = input.parse()?; + bounds.push_punct(punct); + } + bounds + }, + })) + } else { + Ok(WherePredicate::Type(PredicateType { + lifetimes: input.parse()?, + bounded_ty: input.parse()?, + colon_token: input.parse()?, + bounds: { + let mut bounds = Punctuated::new(); + loop { + if input.is_empty() + || input.peek(token::Brace) + || input.peek(Token![,]) + || input.peek(Token![;]) + || input.peek(Token![:]) && !input.peek(Token![::]) + || input.peek(Token![=]) + { + break; + } + let value = input.parse()?; + bounds.push_value(value); + if !input.peek(Token![+]) { + break; + } + let punct = input.parse()?; + bounds.push_punct(punct); + } + bounds + }, + })) + } + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use crate::attr::FilterAttrs; + use crate::print::TokensOrDefault; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Generics { + fn to_tokens(&self, tokens: &mut TokenStream) { + if self.params.is_empty() { + return; + } + + TokensOrDefault(&self.lt_token).to_tokens(tokens); + + // Print lifetimes before types and consts, regardless of their + // order in self.params. + let mut trailing_or_empty = true; + for param in self.params.pairs() { + if let GenericParam::Lifetime(_) = **param.value() { + param.to_tokens(tokens); + trailing_or_empty = param.punct().is_some(); + } + } + for param in self.params.pairs() { + match param.value() { + GenericParam::Type(_) | GenericParam::Const(_) => { + if !trailing_or_empty { + <Token![,]>::default().to_tokens(tokens); + trailing_or_empty = true; + } + param.to_tokens(tokens); + } + GenericParam::Lifetime(_) => {} + } + } + + TokensOrDefault(&self.gt_token).to_tokens(tokens); + } + } + + impl<'a> ToTokens for ImplGenerics<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + if self.0.params.is_empty() { + return; + } + + TokensOrDefault(&self.0.lt_token).to_tokens(tokens); + + // Print lifetimes before types and consts, regardless of their + // order in self.params. + let mut trailing_or_empty = true; + for param in self.0.params.pairs() { + if let GenericParam::Lifetime(_) = **param.value() { + param.to_tokens(tokens); + trailing_or_empty = param.punct().is_some(); + } + } + for param in self.0.params.pairs() { + if let GenericParam::Lifetime(_) = **param.value() { + continue; + } + if !trailing_or_empty { + <Token![,]>::default().to_tokens(tokens); + trailing_or_empty = true; + } + match param.value() { + GenericParam::Lifetime(_) => unreachable!(), + GenericParam::Type(param) => { + // Leave off the type parameter defaults + tokens.append_all(param.attrs.outer()); + param.ident.to_tokens(tokens); + if !param.bounds.is_empty() { + TokensOrDefault(¶m.colon_token).to_tokens(tokens); + param.bounds.to_tokens(tokens); + } + } + GenericParam::Const(param) => { + // Leave off the const parameter defaults + tokens.append_all(param.attrs.outer()); + param.const_token.to_tokens(tokens); + param.ident.to_tokens(tokens); + param.colon_token.to_tokens(tokens); + param.ty.to_tokens(tokens); + } + } + param.punct().to_tokens(tokens); + } + + TokensOrDefault(&self.0.gt_token).to_tokens(tokens); + } + } + + impl<'a> ToTokens for TypeGenerics<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + if self.0.params.is_empty() { + return; + } + + TokensOrDefault(&self.0.lt_token).to_tokens(tokens); + + // Print lifetimes before types and consts, regardless of their + // order in self.params. + let mut trailing_or_empty = true; + for param in self.0.params.pairs() { + if let GenericParam::Lifetime(def) = *param.value() { + // Leave off the lifetime bounds and attributes + def.lifetime.to_tokens(tokens); + param.punct().to_tokens(tokens); + trailing_or_empty = param.punct().is_some(); + } + } + for param in self.0.params.pairs() { + if let GenericParam::Lifetime(_) = **param.value() { + continue; + } + if !trailing_or_empty { + <Token![,]>::default().to_tokens(tokens); + trailing_or_empty = true; + } + match param.value() { + GenericParam::Lifetime(_) => unreachable!(), + GenericParam::Type(param) => { + // Leave off the type parameter defaults + param.ident.to_tokens(tokens); + } + GenericParam::Const(param) => { + // Leave off the const parameter defaults + param.ident.to_tokens(tokens); + } + } + param.punct().to_tokens(tokens); + } + + TokensOrDefault(&self.0.gt_token).to_tokens(tokens); + } + } + + impl<'a> ToTokens for Turbofish<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + if !self.0.params.is_empty() { + <Token![::]>::default().to_tokens(tokens); + TypeGenerics(self.0).to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for BoundLifetimes { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.for_token.to_tokens(tokens); + self.lt_token.to_tokens(tokens); + self.lifetimes.to_tokens(tokens); + self.gt_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LifetimeParam { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.lifetime.to_tokens(tokens); + if !self.bounds.is_empty() { + TokensOrDefault(&self.colon_token).to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeParam { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.ident.to_tokens(tokens); + if !self.bounds.is_empty() { + TokensOrDefault(&self.colon_token).to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + if let Some(default) = &self.default { + TokensOrDefault(&self.eq_token).to_tokens(tokens); + default.to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TraitBound { + fn to_tokens(&self, tokens: &mut TokenStream) { + let to_tokens = |tokens: &mut TokenStream| { + self.modifier.to_tokens(tokens); + self.lifetimes.to_tokens(tokens); + self.path.to_tokens(tokens); + }; + match &self.paren_token { + Some(paren) => paren.surround(tokens, to_tokens), + None => to_tokens(tokens), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TraitBoundModifier { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + TraitBoundModifier::None => {} + TraitBoundModifier::Maybe(t) => t.to_tokens(tokens), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ConstParam { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.const_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + if let Some(default) = &self.default { + TokensOrDefault(&self.eq_token).to_tokens(tokens); + default.to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for WhereClause { + fn to_tokens(&self, tokens: &mut TokenStream) { + if !self.predicates.is_empty() { + self.where_token.to_tokens(tokens); + self.predicates.to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PredicateLifetime { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.lifetime.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PredicateType { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.lifetimes.to_tokens(tokens); + self.bounded_ty.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + } +} diff --git a/vendor/syn/src/group.rs b/vendor/syn/src/group.rs new file mode 100644 index 0000000..2730233 --- /dev/null +++ b/vendor/syn/src/group.rs @@ -0,0 +1,291 @@ +use crate::error::Result; +use crate::parse::ParseBuffer; +use crate::token; +use proc_macro2::extra::DelimSpan; +use proc_macro2::Delimiter; + +// Not public API. +#[doc(hidden)] +pub struct Parens<'a> { + #[doc(hidden)] + pub token: token::Paren, + #[doc(hidden)] + pub content: ParseBuffer<'a>, +} + +// Not public API. +#[doc(hidden)] +pub struct Braces<'a> { + #[doc(hidden)] + pub token: token::Brace, + #[doc(hidden)] + pub content: ParseBuffer<'a>, +} + +// Not public API. +#[doc(hidden)] +pub struct Brackets<'a> { + #[doc(hidden)] + pub token: token::Bracket, + #[doc(hidden)] + pub content: ParseBuffer<'a>, +} + +// Not public API. +#[cfg(any(feature = "full", feature = "derive"))] +#[doc(hidden)] +pub struct Group<'a> { + #[doc(hidden)] + pub token: token::Group, + #[doc(hidden)] + pub content: ParseBuffer<'a>, +} + +// Not public API. +#[doc(hidden)] +pub fn parse_parens<'a>(input: &ParseBuffer<'a>) -> Result<Parens<'a>> { + parse_delimited(input, Delimiter::Parenthesis).map(|(span, content)| Parens { + token: token::Paren(span), + content, + }) +} + +// Not public API. +#[doc(hidden)] +pub fn parse_braces<'a>(input: &ParseBuffer<'a>) -> Result<Braces<'a>> { + parse_delimited(input, Delimiter::Brace).map(|(span, content)| Braces { + token: token::Brace(span), + content, + }) +} + +// Not public API. +#[doc(hidden)] +pub fn parse_brackets<'a>(input: &ParseBuffer<'a>) -> Result<Brackets<'a>> { + parse_delimited(input, Delimiter::Bracket).map(|(span, content)| Brackets { + token: token::Bracket(span), + content, + }) +} + +#[cfg(any(feature = "full", feature = "derive"))] +pub(crate) fn parse_group<'a>(input: &ParseBuffer<'a>) -> Result<Group<'a>> { + parse_delimited(input, Delimiter::None).map(|(span, content)| Group { + token: token::Group(span.join()), + content, + }) +} + +fn parse_delimited<'a>( + input: &ParseBuffer<'a>, + delimiter: Delimiter, +) -> Result<(DelimSpan, ParseBuffer<'a>)> { + input.step(|cursor| { + if let Some((content, span, rest)) = cursor.group(delimiter) { + let scope = crate::buffer::close_span_of_group(*cursor); + let nested = crate::parse::advance_step_cursor(cursor, content); + let unexpected = crate::parse::get_unexpected(input); + let content = crate::parse::new_parse_buffer(scope, nested, unexpected); + Ok(((span, content), rest)) + } else { + let message = match delimiter { + Delimiter::Parenthesis => "expected parentheses", + Delimiter::Brace => "expected curly braces", + Delimiter::Bracket => "expected square brackets", + Delimiter::None => "expected invisible group", + }; + Err(cursor.error(message)) + } + }) +} + +/// Parse a set of parentheses and expose their content to subsequent parsers. +/// +/// # Example +/// +/// ``` +/// # use quote::quote; +/// # +/// use syn::{parenthesized, token, Ident, Result, Token, Type}; +/// use syn::parse::{Parse, ParseStream}; +/// use syn::punctuated::Punctuated; +/// +/// // Parse a simplified tuple struct syntax like: +/// // +/// // struct S(A, B); +/// struct TupleStruct { +/// struct_token: Token![struct], +/// ident: Ident, +/// paren_token: token::Paren, +/// fields: Punctuated<Type, Token![,]>, +/// semi_token: Token![;], +/// } +/// +/// impl Parse for TupleStruct { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let content; +/// Ok(TupleStruct { +/// struct_token: input.parse()?, +/// ident: input.parse()?, +/// paren_token: parenthesized!(content in input), +/// fields: content.parse_terminated(Type::parse, Token![,])?, +/// semi_token: input.parse()?, +/// }) +/// } +/// } +/// # +/// # fn main() { +/// # let input = quote! { +/// # struct S(A, B); +/// # }; +/// # syn::parse2::<TupleStruct>(input).unwrap(); +/// # } +/// ``` +#[macro_export] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +macro_rules! parenthesized { + ($content:ident in $cursor:expr) => { + match $crate::__private::parse_parens(&$cursor) { + $crate::__private::Ok(parens) => { + $content = parens.content; + parens.token + } + $crate::__private::Err(error) => { + return $crate::__private::Err(error); + } + } + }; +} + +/// Parse a set of curly braces and expose their content to subsequent parsers. +/// +/// # Example +/// +/// ``` +/// # use quote::quote; +/// # +/// use syn::{braced, token, Ident, Result, Token, Type}; +/// use syn::parse::{Parse, ParseStream}; +/// use syn::punctuated::Punctuated; +/// +/// // Parse a simplified struct syntax like: +/// // +/// // struct S { +/// // a: A, +/// // b: B, +/// // } +/// struct Struct { +/// struct_token: Token![struct], +/// ident: Ident, +/// brace_token: token::Brace, +/// fields: Punctuated<Field, Token![,]>, +/// } +/// +/// struct Field { +/// name: Ident, +/// colon_token: Token![:], +/// ty: Type, +/// } +/// +/// impl Parse for Struct { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let content; +/// Ok(Struct { +/// struct_token: input.parse()?, +/// ident: input.parse()?, +/// brace_token: braced!(content in input), +/// fields: content.parse_terminated(Field::parse, Token![,])?, +/// }) +/// } +/// } +/// +/// impl Parse for Field { +/// fn parse(input: ParseStream) -> Result<Self> { +/// Ok(Field { +/// name: input.parse()?, +/// colon_token: input.parse()?, +/// ty: input.parse()?, +/// }) +/// } +/// } +/// # +/// # fn main() { +/// # let input = quote! { +/// # struct S { +/// # a: A, +/// # b: B, +/// # } +/// # }; +/// # syn::parse2::<Struct>(input).unwrap(); +/// # } +/// ``` +#[macro_export] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +macro_rules! braced { + ($content:ident in $cursor:expr) => { + match $crate::__private::parse_braces(&$cursor) { + $crate::__private::Ok(braces) => { + $content = braces.content; + braces.token + } + $crate::__private::Err(error) => { + return $crate::__private::Err(error); + } + } + }; +} + +/// Parse a set of square brackets and expose their content to subsequent +/// parsers. +/// +/// # Example +/// +/// ``` +/// # use quote::quote; +/// # +/// use proc_macro2::TokenStream; +/// use syn::{bracketed, token, Result, Token}; +/// use syn::parse::{Parse, ParseStream}; +/// +/// // Parse an outer attribute like: +/// // +/// // #[repr(C, packed)] +/// struct OuterAttribute { +/// pound_token: Token![#], +/// bracket_token: token::Bracket, +/// content: TokenStream, +/// } +/// +/// impl Parse for OuterAttribute { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let content; +/// Ok(OuterAttribute { +/// pound_token: input.parse()?, +/// bracket_token: bracketed!(content in input), +/// content: content.parse()?, +/// }) +/// } +/// } +/// # +/// # fn main() { +/// # let input = quote! { +/// # #[repr(C, packed)] +/// # }; +/// # syn::parse2::<OuterAttribute>(input).unwrap(); +/// # } +/// ``` +#[macro_export] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +macro_rules! bracketed { + ($content:ident in $cursor:expr) => { + match $crate::__private::parse_brackets(&$cursor) { + $crate::__private::Ok(brackets) => { + $content = brackets.content; + brackets.token + } + $crate::__private::Err(error) => { + return $crate::__private::Err(error); + } + } + }; +} diff --git a/vendor/syn/src/ident.rs b/vendor/syn/src/ident.rs new file mode 100644 index 0000000..d0f4ba0 --- /dev/null +++ b/vendor/syn/src/ident.rs @@ -0,0 +1,107 @@ +#[cfg(feature = "parsing")] +use crate::lookahead; + +pub use proc_macro2::Ident; + +#[cfg(feature = "parsing")] +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn Ident(marker: lookahead::TokenMarker) -> Ident { + match marker {} + } +} + +macro_rules! ident_from_token { + ($token:ident) => { + impl From<Token![$token]> for Ident { + fn from(token: Token![$token]) -> Ident { + Ident::new(stringify!($token), token.span) + } + } + }; +} + +ident_from_token!(self); +ident_from_token!(Self); +ident_from_token!(super); +ident_from_token!(crate); +ident_from_token!(extern); + +impl From<Token![_]> for Ident { + fn from(token: Token![_]) -> Ident { + Ident::new("_", token.span) + } +} + +pub(crate) fn xid_ok(symbol: &str) -> bool { + let mut chars = symbol.chars(); + let first = chars.next().unwrap(); + if !(first == '_' || unicode_ident::is_xid_start(first)) { + return false; + } + for ch in chars { + if !unicode_ident::is_xid_continue(ch) { + return false; + } + } + true +} + +#[cfg(feature = "parsing")] +mod parsing { + use crate::buffer::Cursor; + use crate::parse::{Parse, ParseStream, Result}; + use crate::token::Token; + use proc_macro2::Ident; + + fn accept_as_ident(ident: &Ident) -> bool { + match ident.to_string().as_str() { + "_" | + // Based on https://doc.rust-lang.org/1.65.0/reference/keywords.html + "abstract" | "as" | "async" | "await" | "become" | "box" | "break" | + "const" | "continue" | "crate" | "do" | "dyn" | "else" | "enum" | + "extern" | "false" | "final" | "fn" | "for" | "if" | "impl" | "in" | + "let" | "loop" | "macro" | "match" | "mod" | "move" | "mut" | + "override" | "priv" | "pub" | "ref" | "return" | "Self" | "self" | + "static" | "struct" | "super" | "trait" | "true" | "try" | "type" | + "typeof" | "unsafe" | "unsized" | "use" | "virtual" | "where" | + "while" | "yield" => false, + _ => true, + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Ident { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| { + if let Some((ident, rest)) = cursor.ident() { + if accept_as_ident(&ident) { + Ok((ident, rest)) + } else { + Err(cursor.error(format_args!( + "expected identifier, found keyword `{}`", + ident, + ))) + } + } else { + Err(cursor.error("expected identifier")) + } + }) + } + } + + impl Token for Ident { + fn peek(cursor: Cursor) -> bool { + if let Some((ident, _rest)) = cursor.ident() { + accept_as_ident(&ident) + } else { + false + } + } + + fn display() -> &'static str { + "identifier" + } + } +} diff --git a/vendor/syn/src/item.rs b/vendor/syn/src/item.rs new file mode 100644 index 0000000..4e404e2 --- /dev/null +++ b/vendor/syn/src/item.rs @@ -0,0 +1,3404 @@ +use super::*; +use crate::derive::{Data, DataEnum, DataStruct, DataUnion, DeriveInput}; +use crate::punctuated::Punctuated; +use proc_macro2::TokenStream; + +#[cfg(feature = "parsing")] +use std::mem; + +ast_enum_of_structs! { + /// Things that can appear directly inside of a module or scope. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[non_exhaustive] + pub enum Item { + /// A constant item: `const MAX: u16 = 65535`. + Const(ItemConst), + + /// An enum definition: `enum Foo<A, B> { A(A), B(B) }`. + Enum(ItemEnum), + + /// An `extern crate` item: `extern crate serde`. + ExternCrate(ItemExternCrate), + + /// A free-standing function: `fn process(n: usize) -> Result<()> { ... + /// }`. + Fn(ItemFn), + + /// A block of foreign items: `extern "C" { ... }`. + ForeignMod(ItemForeignMod), + + /// An impl block providing trait or associated items: `impl<A> Trait + /// for Data<A> { ... }`. + Impl(ItemImpl), + + /// A macro invocation, which includes `macro_rules!` definitions. + Macro(ItemMacro), + + /// A module or module declaration: `mod m` or `mod m { ... }`. + Mod(ItemMod), + + /// A static item: `static BIKE: Shed = Shed(42)`. + Static(ItemStatic), + + /// A struct definition: `struct Foo<A> { x: A }`. + Struct(ItemStruct), + + /// A trait definition: `pub trait Iterator { ... }`. + Trait(ItemTrait), + + /// A trait alias: `pub trait SharableIterator = Iterator + Sync`. + TraitAlias(ItemTraitAlias), + + /// A type alias: `type Result<T> = std::result::Result<T, MyError>`. + Type(ItemType), + + /// A union definition: `union Foo<A, B> { x: A, y: B }`. + Union(ItemUnion), + + /// A use declaration: `use std::collections::HashMap`. + Use(ItemUse), + + /// Tokens forming an item not interpreted by Syn. + Verbatim(TokenStream), + + // For testing exhaustiveness in downstream code, use the following idiom: + // + // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // + // Item::Const(item) => {...} + // Item::Enum(item) => {...} + // ... + // Item::Verbatim(item) => {...} + // + // _ => { /* some sane fallback */ } + // } + // + // This way we fail your tests but don't break your library when adding + // a variant. You will be notified by a test failure when a variant is + // added, so that you can add code to handle it, but your library will + // continue to compile and work for downstream users in the interim. + } +} + +ast_struct! { + /// A constant item: `const MAX: u16 = 65535`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemConst { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub const_token: Token![const], + pub ident: Ident, + pub generics: Generics, + pub colon_token: Token![:], + pub ty: Box<Type>, + pub eq_token: Token![=], + pub expr: Box<Expr>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// An enum definition: `enum Foo<A, B> { A(A), B(B) }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemEnum { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub enum_token: Token![enum], + pub ident: Ident, + pub generics: Generics, + pub brace_token: token::Brace, + pub variants: Punctuated<Variant, Token![,]>, + } +} + +ast_struct! { + /// An `extern crate` item: `extern crate serde`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemExternCrate { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub extern_token: Token![extern], + pub crate_token: Token![crate], + pub ident: Ident, + pub rename: Option<(Token![as], Ident)>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A free-standing function: `fn process(n: usize) -> Result<()> { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemFn { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub sig: Signature, + pub block: Box<Block>, + } +} + +ast_struct! { + /// A block of foreign items: `extern "C" { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemForeignMod { + pub attrs: Vec<Attribute>, + pub unsafety: Option<Token![unsafe]>, + pub abi: Abi, + pub brace_token: token::Brace, + pub items: Vec<ForeignItem>, + } +} + +ast_struct! { + /// An impl block providing trait or associated items: `impl<A> Trait + /// for Data<A> { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemImpl { + pub attrs: Vec<Attribute>, + pub defaultness: Option<Token![default]>, + pub unsafety: Option<Token![unsafe]>, + pub impl_token: Token![impl], + pub generics: Generics, + /// Trait this impl implements. + pub trait_: Option<(Option<Token![!]>, Path, Token![for])>, + /// The Self type of the impl. + pub self_ty: Box<Type>, + pub brace_token: token::Brace, + pub items: Vec<ImplItem>, + } +} + +ast_struct! { + /// A macro invocation, which includes `macro_rules!` definitions. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemMacro { + pub attrs: Vec<Attribute>, + /// The `example` in `macro_rules! example { ... }`. + pub ident: Option<Ident>, + pub mac: Macro, + pub semi_token: Option<Token![;]>, + } +} + +ast_struct! { + /// A module or module declaration: `mod m` or `mod m { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemMod { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub unsafety: Option<Token![unsafe]>, + pub mod_token: Token![mod], + pub ident: Ident, + pub content: Option<(token::Brace, Vec<Item>)>, + pub semi: Option<Token![;]>, + } +} + +ast_struct! { + /// A static item: `static BIKE: Shed = Shed(42)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemStatic { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub static_token: Token![static], + pub mutability: StaticMutability, + pub ident: Ident, + pub colon_token: Token![:], + pub ty: Box<Type>, + pub eq_token: Token![=], + pub expr: Box<Expr>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A struct definition: `struct Foo<A> { x: A }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemStruct { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub struct_token: Token![struct], + pub ident: Ident, + pub generics: Generics, + pub fields: Fields, + pub semi_token: Option<Token![;]>, + } +} + +ast_struct! { + /// A trait definition: `pub trait Iterator { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemTrait { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub unsafety: Option<Token![unsafe]>, + pub auto_token: Option<Token![auto]>, + pub restriction: Option<ImplRestriction>, + pub trait_token: Token![trait], + pub ident: Ident, + pub generics: Generics, + pub colon_token: Option<Token![:]>, + pub supertraits: Punctuated<TypeParamBound, Token![+]>, + pub brace_token: token::Brace, + pub items: Vec<TraitItem>, + } +} + +ast_struct! { + /// A trait alias: `pub trait SharableIterator = Iterator + Sync`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemTraitAlias { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub trait_token: Token![trait], + pub ident: Ident, + pub generics: Generics, + pub eq_token: Token![=], + pub bounds: Punctuated<TypeParamBound, Token![+]>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A type alias: `type Result<T> = std::result::Result<T, MyError>`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemType { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub type_token: Token![type], + pub ident: Ident, + pub generics: Generics, + pub eq_token: Token![=], + pub ty: Box<Type>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A union definition: `union Foo<A, B> { x: A, y: B }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemUnion { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub union_token: Token![union], + pub ident: Ident, + pub generics: Generics, + pub fields: FieldsNamed, + } +} + +ast_struct! { + /// A use declaration: `use std::collections::HashMap`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ItemUse { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub use_token: Token![use], + pub leading_colon: Option<Token![::]>, + pub tree: UseTree, + pub semi_token: Token![;], + } +} + +impl Item { + #[cfg(feature = "parsing")] + pub(crate) fn replace_attrs(&mut self, new: Vec<Attribute>) -> Vec<Attribute> { + match self { + Item::Const(ItemConst { attrs, .. }) + | Item::Enum(ItemEnum { attrs, .. }) + | Item::ExternCrate(ItemExternCrate { attrs, .. }) + | Item::Fn(ItemFn { attrs, .. }) + | Item::ForeignMod(ItemForeignMod { attrs, .. }) + | Item::Impl(ItemImpl { attrs, .. }) + | Item::Macro(ItemMacro { attrs, .. }) + | Item::Mod(ItemMod { attrs, .. }) + | Item::Static(ItemStatic { attrs, .. }) + | Item::Struct(ItemStruct { attrs, .. }) + | Item::Trait(ItemTrait { attrs, .. }) + | Item::TraitAlias(ItemTraitAlias { attrs, .. }) + | Item::Type(ItemType { attrs, .. }) + | Item::Union(ItemUnion { attrs, .. }) + | Item::Use(ItemUse { attrs, .. }) => mem::replace(attrs, new), + Item::Verbatim(_) => Vec::new(), + } + } +} + +impl From<DeriveInput> for Item { + fn from(input: DeriveInput) -> Item { + match input.data { + Data::Struct(data) => Item::Struct(ItemStruct { + attrs: input.attrs, + vis: input.vis, + struct_token: data.struct_token, + ident: input.ident, + generics: input.generics, + fields: data.fields, + semi_token: data.semi_token, + }), + Data::Enum(data) => Item::Enum(ItemEnum { + attrs: input.attrs, + vis: input.vis, + enum_token: data.enum_token, + ident: input.ident, + generics: input.generics, + brace_token: data.brace_token, + variants: data.variants, + }), + Data::Union(data) => Item::Union(ItemUnion { + attrs: input.attrs, + vis: input.vis, + union_token: data.union_token, + ident: input.ident, + generics: input.generics, + fields: data.fields, + }), + } + } +} + +impl From<ItemStruct> for DeriveInput { + fn from(input: ItemStruct) -> DeriveInput { + DeriveInput { + attrs: input.attrs, + vis: input.vis, + ident: input.ident, + generics: input.generics, + data: Data::Struct(DataStruct { + struct_token: input.struct_token, + fields: input.fields, + semi_token: input.semi_token, + }), + } + } +} + +impl From<ItemEnum> for DeriveInput { + fn from(input: ItemEnum) -> DeriveInput { + DeriveInput { + attrs: input.attrs, + vis: input.vis, + ident: input.ident, + generics: input.generics, + data: Data::Enum(DataEnum { + enum_token: input.enum_token, + brace_token: input.brace_token, + variants: input.variants, + }), + } + } +} + +impl From<ItemUnion> for DeriveInput { + fn from(input: ItemUnion) -> DeriveInput { + DeriveInput { + attrs: input.attrs, + vis: input.vis, + ident: input.ident, + generics: input.generics, + data: Data::Union(DataUnion { + union_token: input.union_token, + fields: input.fields, + }), + } + } +} + +ast_enum_of_structs! { + /// A suffix of an import tree in a `use` item: `Type as Renamed` or `*`. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub enum UseTree { + /// A path prefix of imports in a `use` item: `std::...`. + Path(UsePath), + + /// An identifier imported by a `use` item: `HashMap`. + Name(UseName), + + /// An renamed identifier imported by a `use` item: `HashMap as Map`. + Rename(UseRename), + + /// A glob import in a `use` item: `*`. + Glob(UseGlob), + + /// A braced group of imports in a `use` item: `{A, B, C}`. + Group(UseGroup), + } +} + +ast_struct! { + /// A path prefix of imports in a `use` item: `std::...`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct UsePath { + pub ident: Ident, + pub colon2_token: Token![::], + pub tree: Box<UseTree>, + } +} + +ast_struct! { + /// An identifier imported by a `use` item: `HashMap`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct UseName { + pub ident: Ident, + } +} + +ast_struct! { + /// An renamed identifier imported by a `use` item: `HashMap as Map`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct UseRename { + pub ident: Ident, + pub as_token: Token![as], + pub rename: Ident, + } +} + +ast_struct! { + /// A glob import in a `use` item: `*`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct UseGlob { + pub star_token: Token![*], + } +} + +ast_struct! { + /// A braced group of imports in a `use` item: `{A, B, C}`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct UseGroup { + pub brace_token: token::Brace, + pub items: Punctuated<UseTree, Token![,]>, + } +} + +ast_enum_of_structs! { + /// An item within an `extern` block. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[non_exhaustive] + pub enum ForeignItem { + /// A foreign function in an `extern` block. + Fn(ForeignItemFn), + + /// A foreign static item in an `extern` block: `static ext: u8`. + Static(ForeignItemStatic), + + /// A foreign type in an `extern` block: `type void`. + Type(ForeignItemType), + + /// A macro invocation within an extern block. + Macro(ForeignItemMacro), + + /// Tokens in an `extern` block not interpreted by Syn. + Verbatim(TokenStream), + + // For testing exhaustiveness in downstream code, use the following idiom: + // + // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // + // ForeignItem::Fn(item) => {...} + // ForeignItem::Static(item) => {...} + // ... + // ForeignItem::Verbatim(item) => {...} + // + // _ => { /* some sane fallback */ } + // } + // + // This way we fail your tests but don't break your library when adding + // a variant. You will be notified by a test failure when a variant is + // added, so that you can add code to handle it, but your library will + // continue to compile and work for downstream users in the interim. + } +} + +ast_struct! { + /// A foreign function in an `extern` block. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ForeignItemFn { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub sig: Signature, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A foreign static item in an `extern` block: `static ext: u8`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ForeignItemStatic { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub static_token: Token![static], + pub mutability: StaticMutability, + pub ident: Ident, + pub colon_token: Token![:], + pub ty: Box<Type>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A foreign type in an `extern` block: `type void`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ForeignItemType { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub type_token: Token![type], + pub ident: Ident, + pub generics: Generics, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A macro invocation within an extern block. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ForeignItemMacro { + pub attrs: Vec<Attribute>, + pub mac: Macro, + pub semi_token: Option<Token![;]>, + } +} + +ast_enum_of_structs! { + /// An item declaration within the definition of a trait. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[non_exhaustive] + pub enum TraitItem { + /// An associated constant within the definition of a trait. + Const(TraitItemConst), + + /// An associated function within the definition of a trait. + Fn(TraitItemFn), + + /// An associated type within the definition of a trait. + Type(TraitItemType), + + /// A macro invocation within the definition of a trait. + Macro(TraitItemMacro), + + /// Tokens within the definition of a trait not interpreted by Syn. + Verbatim(TokenStream), + + // For testing exhaustiveness in downstream code, use the following idiom: + // + // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // + // TraitItem::Const(item) => {...} + // TraitItem::Fn(item) => {...} + // ... + // TraitItem::Verbatim(item) => {...} + // + // _ => { /* some sane fallback */ } + // } + // + // This way we fail your tests but don't break your library when adding + // a variant. You will be notified by a test failure when a variant is + // added, so that you can add code to handle it, but your library will + // continue to compile and work for downstream users in the interim. + } +} + +ast_struct! { + /// An associated constant within the definition of a trait. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct TraitItemConst { + pub attrs: Vec<Attribute>, + pub const_token: Token![const], + pub ident: Ident, + pub generics: Generics, + pub colon_token: Token![:], + pub ty: Type, + pub default: Option<(Token![=], Expr)>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// An associated function within the definition of a trait. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct TraitItemFn { + pub attrs: Vec<Attribute>, + pub sig: Signature, + pub default: Option<Block>, + pub semi_token: Option<Token![;]>, + } +} + +ast_struct! { + /// An associated type within the definition of a trait. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct TraitItemType { + pub attrs: Vec<Attribute>, + pub type_token: Token![type], + pub ident: Ident, + pub generics: Generics, + pub colon_token: Option<Token![:]>, + pub bounds: Punctuated<TypeParamBound, Token![+]>, + pub default: Option<(Token![=], Type)>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A macro invocation within the definition of a trait. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct TraitItemMacro { + pub attrs: Vec<Attribute>, + pub mac: Macro, + pub semi_token: Option<Token![;]>, + } +} + +ast_enum_of_structs! { + /// An item within an impl block. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[non_exhaustive] + pub enum ImplItem { + /// An associated constant within an impl block. + Const(ImplItemConst), + + /// An associated function within an impl block. + Fn(ImplItemFn), + + /// An associated type within an impl block. + Type(ImplItemType), + + /// A macro invocation within an impl block. + Macro(ImplItemMacro), + + /// Tokens within an impl block not interpreted by Syn. + Verbatim(TokenStream), + + // For testing exhaustiveness in downstream code, use the following idiom: + // + // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // + // ImplItem::Const(item) => {...} + // ImplItem::Fn(item) => {...} + // ... + // ImplItem::Verbatim(item) => {...} + // + // _ => { /* some sane fallback */ } + // } + // + // This way we fail your tests but don't break your library when adding + // a variant. You will be notified by a test failure when a variant is + // added, so that you can add code to handle it, but your library will + // continue to compile and work for downstream users in the interim. + } +} + +ast_struct! { + /// An associated constant within an impl block. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ImplItemConst { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub defaultness: Option<Token![default]>, + pub const_token: Token![const], + pub ident: Ident, + pub generics: Generics, + pub colon_token: Token![:], + pub ty: Type, + pub eq_token: Token![=], + pub expr: Expr, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// An associated function within an impl block. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ImplItemFn { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub defaultness: Option<Token![default]>, + pub sig: Signature, + pub block: Block, + } +} + +ast_struct! { + /// An associated type within an impl block. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ImplItemType { + pub attrs: Vec<Attribute>, + pub vis: Visibility, + pub defaultness: Option<Token![default]>, + pub type_token: Token![type], + pub ident: Ident, + pub generics: Generics, + pub eq_token: Token![=], + pub ty: Type, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// A macro invocation within an impl block. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct ImplItemMacro { + pub attrs: Vec<Attribute>, + pub mac: Macro, + pub semi_token: Option<Token![;]>, + } +} + +ast_struct! { + /// A function signature in a trait or implementation: `unsafe fn + /// initialize(&self)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct Signature { + pub constness: Option<Token![const]>, + pub asyncness: Option<Token![async]>, + pub unsafety: Option<Token![unsafe]>, + pub abi: Option<Abi>, + pub fn_token: Token![fn], + pub ident: Ident, + pub generics: Generics, + pub paren_token: token::Paren, + pub inputs: Punctuated<FnArg, Token![,]>, + pub variadic: Option<Variadic>, + pub output: ReturnType, + } +} + +impl Signature { + /// A method's `self` receiver, such as `&self` or `self: Box<Self>`. + pub fn receiver(&self) -> Option<&Receiver> { + let arg = self.inputs.first()?; + match arg { + FnArg::Receiver(receiver) => Some(receiver), + FnArg::Typed(_) => None, + } + } +} + +ast_enum_of_structs! { + /// An argument in a function signature: the `n: usize` in `fn f(n: usize)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub enum FnArg { + /// The `self` argument of an associated method. + Receiver(Receiver), + + /// A function argument accepted by pattern and type. + Typed(PatType), + } +} + +ast_struct! { + /// The `self` argument of an associated method. + /// + /// If `colon_token` is present, the receiver is written with an explicit + /// type such as `self: Box<Self>`. If `colon_token` is absent, the receiver + /// is written in shorthand such as `self` or `&self` or `&mut self`. In the + /// shorthand case, the type in `ty` is reconstructed as one of `Self`, + /// `&Self`, or `&mut Self`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct Receiver { + pub attrs: Vec<Attribute>, + pub reference: Option<(Token![&], Option<Lifetime>)>, + pub mutability: Option<Token![mut]>, + pub self_token: Token![self], + pub colon_token: Option<Token![:]>, + pub ty: Box<Type>, + } +} + +impl Receiver { + pub fn lifetime(&self) -> Option<&Lifetime> { + self.reference.as_ref()?.1.as_ref() + } +} + +ast_struct! { + /// The variadic argument of a foreign function. + /// + /// ```rust + /// # struct c_char; + /// # struct c_int; + /// # + /// extern "C" { + /// fn printf(format: *const c_char, ...) -> c_int; + /// // ^^^ + /// } + /// ``` + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct Variadic { + pub attrs: Vec<Attribute>, + pub pat: Option<(Box<Pat>, Token![:])>, + pub dots: Token![...], + pub comma: Option<Token![,]>, + } +} + +ast_enum! { + /// The mutability of an `Item::Static` or `ForeignItem::Static`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[non_exhaustive] + pub enum StaticMutability { + Mut(Token![mut]), + None, + } +} + +ast_enum! { + /// Unused, but reserved for RFC 3323 restrictions. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[non_exhaustive] + pub enum ImplRestriction {} + + + // TODO: https://rust-lang.github.io/rfcs/3323-restrictions.html + // + // pub struct ImplRestriction { + // pub impl_token: Token![impl], + // pub paren_token: token::Paren, + // pub in_token: Option<Token![in]>, + // pub path: Box<Path>, + // } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::ext::IdentExt as _; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseBuffer, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Item { + fn parse(input: ParseStream) -> Result<Self> { + let begin = input.fork(); + let attrs = input.call(Attribute::parse_outer)?; + parse_rest_of_item(begin, attrs, input) + } + } + + pub(crate) fn parse_rest_of_item( + begin: ParseBuffer, + mut attrs: Vec<Attribute>, + input: ParseStream, + ) -> Result<Item> { + let ahead = input.fork(); + let vis: Visibility = ahead.parse()?; + + let lookahead = ahead.lookahead1(); + let mut item = if lookahead.peek(Token![fn]) || peek_signature(&ahead) { + let vis: Visibility = input.parse()?; + let sig: Signature = input.parse()?; + if input.peek(Token![;]) { + input.parse::<Token![;]>()?; + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } else { + parse_rest_of_fn(input, Vec::new(), vis, sig).map(Item::Fn) + } + } else if lookahead.peek(Token![extern]) { + ahead.parse::<Token![extern]>()?; + let lookahead = ahead.lookahead1(); + if lookahead.peek(Token![crate]) { + input.parse().map(Item::ExternCrate) + } else if lookahead.peek(token::Brace) { + input.parse().map(Item::ForeignMod) + } else if lookahead.peek(LitStr) { + ahead.parse::<LitStr>()?; + let lookahead = ahead.lookahead1(); + if lookahead.peek(token::Brace) { + input.parse().map(Item::ForeignMod) + } else { + Err(lookahead.error()) + } + } else { + Err(lookahead.error()) + } + } else if lookahead.peek(Token![use]) { + let allow_crate_root_in_path = true; + match parse_item_use(input, allow_crate_root_in_path)? { + Some(item_use) => Ok(Item::Use(item_use)), + None => Ok(Item::Verbatim(verbatim::between(&begin, input))), + } + } else if lookahead.peek(Token![static]) { + let vis = input.parse()?; + let static_token = input.parse()?; + let mutability = input.parse()?; + let ident = input.parse()?; + if input.peek(Token![=]) { + input.parse::<Token![=]>()?; + input.parse::<Expr>()?; + input.parse::<Token![;]>()?; + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } else { + let colon_token = input.parse()?; + let ty = input.parse()?; + if input.peek(Token![;]) { + input.parse::<Token![;]>()?; + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } else { + Ok(Item::Static(ItemStatic { + attrs: Vec::new(), + vis, + static_token, + mutability, + ident, + colon_token, + ty, + eq_token: input.parse()?, + expr: input.parse()?, + semi_token: input.parse()?, + })) + } + } + } else if lookahead.peek(Token![const]) { + let vis = input.parse()?; + let const_token: Token![const] = input.parse()?; + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + let mut generics: Generics = input.parse()?; + let colon_token = input.parse()?; + let ty = input.parse()?; + let value = if let Some(eq_token) = input.parse::<Option<Token![=]>>()? { + let expr: Expr = input.parse()?; + Some((eq_token, expr)) + } else { + None + }; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + match value { + Some((eq_token, expr)) + if generics.lt_token.is_none() && generics.where_clause.is_none() => + { + Ok(Item::Const(ItemConst { + attrs: Vec::new(), + vis, + const_token, + ident, + generics, + colon_token, + ty, + eq_token, + expr: Box::new(expr), + semi_token, + })) + } + _ => Ok(Item::Verbatim(verbatim::between(&begin, input))), + } + } else if lookahead.peek(Token![unsafe]) { + ahead.parse::<Token![unsafe]>()?; + let lookahead = ahead.lookahead1(); + if lookahead.peek(Token![trait]) + || lookahead.peek(Token![auto]) && ahead.peek2(Token![trait]) + { + input.parse().map(Item::Trait) + } else if lookahead.peek(Token![impl]) { + let allow_verbatim_impl = true; + if let Some(item) = parse_impl(input, allow_verbatim_impl)? { + Ok(Item::Impl(item)) + } else { + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } + } else if lookahead.peek(Token![extern]) { + input.parse().map(Item::ForeignMod) + } else if lookahead.peek(Token![mod]) { + input.parse().map(Item::Mod) + } else { + Err(lookahead.error()) + } + } else if lookahead.peek(Token![mod]) { + input.parse().map(Item::Mod) + } else if lookahead.peek(Token![type]) { + parse_item_type(begin, input) + } else if lookahead.peek(Token![struct]) { + input.parse().map(Item::Struct) + } else if lookahead.peek(Token![enum]) { + input.parse().map(Item::Enum) + } else if lookahead.peek(Token![union]) && ahead.peek2(Ident) { + input.parse().map(Item::Union) + } else if lookahead.peek(Token![trait]) { + input.call(parse_trait_or_trait_alias) + } else if lookahead.peek(Token![auto]) && ahead.peek2(Token![trait]) { + input.parse().map(Item::Trait) + } else if lookahead.peek(Token![impl]) + || lookahead.peek(Token![default]) && !ahead.peek2(Token![!]) + { + let allow_verbatim_impl = true; + if let Some(item) = parse_impl(input, allow_verbatim_impl)? { + Ok(Item::Impl(item)) + } else { + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } + } else if lookahead.peek(Token![macro]) { + input.advance_to(&ahead); + parse_macro2(begin, vis, input) + } else if vis.is_inherited() + && (lookahead.peek(Ident) + || lookahead.peek(Token![self]) + || lookahead.peek(Token![super]) + || lookahead.peek(Token![crate]) + || lookahead.peek(Token![::])) + { + input.parse().map(Item::Macro) + } else { + Err(lookahead.error()) + }?; + + attrs.extend(item.replace_attrs(Vec::new())); + item.replace_attrs(attrs); + Ok(item) + } + + struct FlexibleItemType { + vis: Visibility, + defaultness: Option<Token![default]>, + type_token: Token![type], + ident: Ident, + generics: Generics, + colon_token: Option<Token![:]>, + bounds: Punctuated<TypeParamBound, Token![+]>, + ty: Option<(Token![=], Type)>, + semi_token: Token![;], + } + + enum TypeDefaultness { + Optional, + Disallowed, + } + + enum WhereClauseLocation { + // type Ty<T> where T: 'static = T; + BeforeEq, + // type Ty<T> = T where T: 'static; + AfterEq, + // TODO: goes away once the migration period on rust-lang/rust#89122 is over + Both, + } + + impl FlexibleItemType { + fn parse( + input: ParseStream, + allow_defaultness: TypeDefaultness, + where_clause_location: WhereClauseLocation, + ) -> Result<Self> { + let vis: Visibility = input.parse()?; + let defaultness: Option<Token![default]> = match allow_defaultness { + TypeDefaultness::Optional => input.parse()?, + TypeDefaultness::Disallowed => None, + }; + let type_token: Token![type] = input.parse()?; + let ident: Ident = input.parse()?; + let mut generics: Generics = input.parse()?; + let (colon_token, bounds) = Self::parse_optional_bounds(input)?; + + match where_clause_location { + WhereClauseLocation::BeforeEq | WhereClauseLocation::Both => { + generics.where_clause = input.parse()?; + } + WhereClauseLocation::AfterEq => {} + } + + let ty = Self::parse_optional_definition(input)?; + + match where_clause_location { + WhereClauseLocation::AfterEq | WhereClauseLocation::Both + if generics.where_clause.is_none() => + { + generics.where_clause = input.parse()?; + } + _ => {} + } + + let semi_token: Token![;] = input.parse()?; + + Ok(FlexibleItemType { + vis, + defaultness, + type_token, + ident, + generics, + colon_token, + bounds, + ty, + semi_token, + }) + } + + fn parse_optional_bounds( + input: ParseStream, + ) -> Result<(Option<Token![:]>, Punctuated<TypeParamBound, Token![+]>)> { + let colon_token: Option<Token![:]> = input.parse()?; + + let mut bounds = Punctuated::new(); + if colon_token.is_some() { + loop { + if input.peek(Token![where]) || input.peek(Token![=]) || input.peek(Token![;]) { + break; + } + bounds.push_value(input.parse::<TypeParamBound>()?); + if input.peek(Token![where]) || input.peek(Token![=]) || input.peek(Token![;]) { + break; + } + bounds.push_punct(input.parse::<Token![+]>()?); + } + } + + Ok((colon_token, bounds)) + } + + fn parse_optional_definition(input: ParseStream) -> Result<Option<(Token![=], Type)>> { + let eq_token: Option<Token![=]> = input.parse()?; + if let Some(eq_token) = eq_token { + let definition: Type = input.parse()?; + Ok(Some((eq_token, definition))) + } else { + Ok(None) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemMacro { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let path = input.call(Path::parse_mod_style)?; + let bang_token: Token![!] = input.parse()?; + let ident: Option<Ident> = if input.peek(Token![try]) { + input.call(Ident::parse_any).map(Some) + } else { + input.parse() + }?; + let (delimiter, tokens) = input.call(mac::parse_delimiter)?; + let semi_token: Option<Token![;]> = if !delimiter.is_brace() { + Some(input.parse()?) + } else { + None + }; + Ok(ItemMacro { + attrs, + ident, + mac: Macro { + path, + bang_token, + delimiter, + tokens, + }, + semi_token, + }) + } + } + + fn parse_macro2(begin: ParseBuffer, _vis: Visibility, input: ParseStream) -> Result<Item> { + input.parse::<Token![macro]>()?; + input.parse::<Ident>()?; + + let mut lookahead = input.lookahead1(); + if lookahead.peek(token::Paren) { + let paren_content; + parenthesized!(paren_content in input); + paren_content.parse::<TokenStream>()?; + lookahead = input.lookahead1(); + } + + if lookahead.peek(token::Brace) { + let brace_content; + braced!(brace_content in input); + brace_content.parse::<TokenStream>()?; + } else { + return Err(lookahead.error()); + } + + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemExternCrate { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ItemExternCrate { + attrs: input.call(Attribute::parse_outer)?, + vis: input.parse()?, + extern_token: input.parse()?, + crate_token: input.parse()?, + ident: { + if input.peek(Token![self]) { + input.call(Ident::parse_any)? + } else { + input.parse()? + } + }, + rename: { + if input.peek(Token![as]) { + let as_token: Token![as] = input.parse()?; + let rename: Ident = if input.peek(Token![_]) { + Ident::from(input.parse::<Token![_]>()?) + } else { + input.parse()? + }; + Some((as_token, rename)) + } else { + None + } + }, + semi_token: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemUse { + fn parse(input: ParseStream) -> Result<Self> { + let allow_crate_root_in_path = false; + parse_item_use(input, allow_crate_root_in_path).map(Option::unwrap) + } + } + + fn parse_item_use( + input: ParseStream, + allow_crate_root_in_path: bool, + ) -> Result<Option<ItemUse>> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let use_token: Token![use] = input.parse()?; + let leading_colon: Option<Token![::]> = input.parse()?; + let tree = parse_use_tree(input, allow_crate_root_in_path && leading_colon.is_none())?; + let semi_token: Token![;] = input.parse()?; + + let tree = match tree { + Some(tree) => tree, + None => return Ok(None), + }; + + Ok(Some(ItemUse { + attrs, + vis, + use_token, + leading_colon, + tree, + semi_token, + })) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for UseTree { + fn parse(input: ParseStream) -> Result<UseTree> { + let allow_crate_root_in_path = false; + parse_use_tree(input, allow_crate_root_in_path).map(Option::unwrap) + } + } + + fn parse_use_tree( + input: ParseStream, + allow_crate_root_in_path: bool, + ) -> Result<Option<UseTree>> { + let lookahead = input.lookahead1(); + if lookahead.peek(Ident) + || lookahead.peek(Token![self]) + || lookahead.peek(Token![super]) + || lookahead.peek(Token![crate]) + || lookahead.peek(Token![try]) + { + let ident = input.call(Ident::parse_any)?; + if input.peek(Token![::]) { + Ok(Some(UseTree::Path(UsePath { + ident, + colon2_token: input.parse()?, + tree: Box::new(input.parse()?), + }))) + } else if input.peek(Token![as]) { + Ok(Some(UseTree::Rename(UseRename { + ident, + as_token: input.parse()?, + rename: { + if input.peek(Ident) { + input.parse()? + } else if input.peek(Token![_]) { + Ident::from(input.parse::<Token![_]>()?) + } else { + return Err(input.error("expected identifier or underscore")); + } + }, + }))) + } else { + Ok(Some(UseTree::Name(UseName { ident }))) + } + } else if lookahead.peek(Token![*]) { + Ok(Some(UseTree::Glob(UseGlob { + star_token: input.parse()?, + }))) + } else if lookahead.peek(token::Brace) { + let content; + let brace_token = braced!(content in input); + let mut items = Punctuated::new(); + let mut has_any_crate_root_in_path = false; + loop { + if content.is_empty() { + break; + } + let this_tree_starts_with_crate_root = + allow_crate_root_in_path && content.parse::<Option<Token![::]>>()?.is_some(); + has_any_crate_root_in_path |= this_tree_starts_with_crate_root; + match parse_use_tree( + &content, + allow_crate_root_in_path && !this_tree_starts_with_crate_root, + )? { + Some(tree) => items.push_value(tree), + None => has_any_crate_root_in_path = true, + } + if content.is_empty() { + break; + } + let comma: Token![,] = content.parse()?; + items.push_punct(comma); + } + if has_any_crate_root_in_path { + Ok(None) + } else { + Ok(Some(UseTree::Group(UseGroup { brace_token, items }))) + } + } else { + Err(lookahead.error()) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemStatic { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ItemStatic { + attrs: input.call(Attribute::parse_outer)?, + vis: input.parse()?, + static_token: input.parse()?, + mutability: input.parse()?, + ident: input.parse()?, + colon_token: input.parse()?, + ty: input.parse()?, + eq_token: input.parse()?, + expr: input.parse()?, + semi_token: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemConst { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let const_token: Token![const] = input.parse()?; + + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let eq_token: Token![=] = input.parse()?; + let expr: Expr = input.parse()?; + let semi_token: Token![;] = input.parse()?; + + Ok(ItemConst { + attrs, + vis, + const_token, + ident, + generics: Generics::default(), + colon_token, + ty: Box::new(ty), + eq_token, + expr: Box::new(expr), + semi_token, + }) + } + } + + fn peek_signature(input: ParseStream) -> bool { + let fork = input.fork(); + fork.parse::<Option<Token![const]>>().is_ok() + && fork.parse::<Option<Token![async]>>().is_ok() + && fork.parse::<Option<Token![unsafe]>>().is_ok() + && fork.parse::<Option<Abi>>().is_ok() + && fork.peek(Token![fn]) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Signature { + fn parse(input: ParseStream) -> Result<Self> { + let constness: Option<Token![const]> = input.parse()?; + let asyncness: Option<Token![async]> = input.parse()?; + let unsafety: Option<Token![unsafe]> = input.parse()?; + let abi: Option<Abi> = input.parse()?; + let fn_token: Token![fn] = input.parse()?; + let ident: Ident = input.parse()?; + let mut generics: Generics = input.parse()?; + + let content; + let paren_token = parenthesized!(content in input); + let (inputs, variadic) = parse_fn_args(&content)?; + + let output: ReturnType = input.parse()?; + generics.where_clause = input.parse()?; + + Ok(Signature { + constness, + asyncness, + unsafety, + abi, + fn_token, + ident, + generics, + paren_token, + inputs, + variadic, + output, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemFn { + fn parse(input: ParseStream) -> Result<Self> { + let outer_attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let sig: Signature = input.parse()?; + parse_rest_of_fn(input, outer_attrs, vis, sig) + } + } + + fn parse_rest_of_fn( + input: ParseStream, + mut attrs: Vec<Attribute>, + vis: Visibility, + sig: Signature, + ) -> Result<ItemFn> { + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let stmts = content.call(Block::parse_within)?; + + Ok(ItemFn { + attrs, + vis, + sig, + block: Box::new(Block { brace_token, stmts }), + }) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for FnArg { + fn parse(input: ParseStream) -> Result<Self> { + let allow_variadic = false; + let attrs = input.call(Attribute::parse_outer)?; + match parse_fn_arg_or_variadic(input, attrs, allow_variadic)? { + FnArgOrVariadic::FnArg(arg) => Ok(arg), + FnArgOrVariadic::Variadic(_) => unreachable!(), + } + } + } + + enum FnArgOrVariadic { + FnArg(FnArg), + Variadic(Variadic), + } + + fn parse_fn_arg_or_variadic( + input: ParseStream, + attrs: Vec<Attribute>, + allow_variadic: bool, + ) -> Result<FnArgOrVariadic> { + let ahead = input.fork(); + if let Ok(mut receiver) = ahead.parse::<Receiver>() { + input.advance_to(&ahead); + receiver.attrs = attrs; + return Ok(FnArgOrVariadic::FnArg(FnArg::Receiver(receiver))); + } + + // Hack to parse pre-2018 syntax in + // test/ui/rfc-2565-param-attrs/param-attrs-pretty.rs + // because the rest of the test case is valuable. + if input.peek(Ident) && input.peek2(Token![<]) { + let span = input.fork().parse::<Ident>()?.span(); + return Ok(FnArgOrVariadic::FnArg(FnArg::Typed(PatType { + attrs, + pat: Box::new(Pat::Wild(PatWild { + attrs: Vec::new(), + underscore_token: Token![_](span), + })), + colon_token: Token![:](span), + ty: input.parse()?, + }))); + } + + let pat = Box::new(Pat::parse_single(input)?); + let colon_token: Token![:] = input.parse()?; + + if allow_variadic { + if let Some(dots) = input.parse::<Option<Token![...]>>()? { + return Ok(FnArgOrVariadic::Variadic(Variadic { + attrs, + pat: Some((pat, colon_token)), + dots, + comma: None, + })); + } + } + + Ok(FnArgOrVariadic::FnArg(FnArg::Typed(PatType { + attrs, + pat, + colon_token, + ty: input.parse()?, + }))) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Receiver { + fn parse(input: ParseStream) -> Result<Self> { + let reference = if input.peek(Token![&]) { + let ampersand: Token![&] = input.parse()?; + let lifetime: Option<Lifetime> = input.parse()?; + Some((ampersand, lifetime)) + } else { + None + }; + let mutability: Option<Token![mut]> = input.parse()?; + let self_token: Token![self] = input.parse()?; + let colon_token: Option<Token![:]> = if reference.is_some() { + None + } else { + input.parse()? + }; + let ty: Type = if colon_token.is_some() { + input.parse()? + } else { + let mut ty = Type::Path(TypePath { + qself: None, + path: Path::from(Ident::new("Self", self_token.span)), + }); + if let Some((ampersand, lifetime)) = reference.as_ref() { + ty = Type::Reference(TypeReference { + and_token: Token![&](ampersand.span), + lifetime: lifetime.clone(), + mutability: mutability.as_ref().map(|m| Token![mut](m.span)), + elem: Box::new(ty), + }); + } + ty + }; + Ok(Receiver { + attrs: Vec::new(), + reference, + mutability, + self_token, + colon_token, + ty: Box::new(ty), + }) + } + } + + fn parse_fn_args( + input: ParseStream, + ) -> Result<(Punctuated<FnArg, Token![,]>, Option<Variadic>)> { + let mut args = Punctuated::new(); + let mut variadic = None; + let mut has_receiver = false; + + while !input.is_empty() { + let attrs = input.call(Attribute::parse_outer)?; + + if let Some(dots) = input.parse::<Option<Token![...]>>()? { + variadic = Some(Variadic { + attrs, + pat: None, + dots, + comma: if input.is_empty() { + None + } else { + Some(input.parse()?) + }, + }); + break; + } + + let allow_variadic = true; + let arg = match parse_fn_arg_or_variadic(input, attrs, allow_variadic)? { + FnArgOrVariadic::FnArg(arg) => arg, + FnArgOrVariadic::Variadic(arg) => { + variadic = Some(Variadic { + comma: if input.is_empty() { + None + } else { + Some(input.parse()?) + }, + ..arg + }); + break; + } + }; + + match &arg { + FnArg::Receiver(receiver) if has_receiver => { + return Err(Error::new( + receiver.self_token.span, + "unexpected second method receiver", + )); + } + FnArg::Receiver(receiver) if !args.is_empty() => { + return Err(Error::new( + receiver.self_token.span, + "unexpected method receiver", + )); + } + FnArg::Receiver(_) => has_receiver = true, + FnArg::Typed(_) => {} + } + args.push_value(arg); + + if input.is_empty() { + break; + } + + let comma: Token![,] = input.parse()?; + args.push_punct(comma); + } + + Ok((args, variadic)) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemMod { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let unsafety: Option<Token![unsafe]> = input.parse()?; + let mod_token: Token![mod] = input.parse()?; + let ident: Ident = if input.peek(Token![try]) { + input.call(Ident::parse_any) + } else { + input.parse() + }?; + + let lookahead = input.lookahead1(); + if lookahead.peek(Token![;]) { + Ok(ItemMod { + attrs, + vis, + unsafety, + mod_token, + ident, + content: None, + semi: Some(input.parse()?), + }) + } else if lookahead.peek(token::Brace) { + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + + let mut items = Vec::new(); + while !content.is_empty() { + items.push(content.parse()?); + } + + Ok(ItemMod { + attrs, + vis, + unsafety, + mod_token, + ident, + content: Some((brace_token, items)), + semi: None, + }) + } else { + Err(lookahead.error()) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemForeignMod { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let unsafety: Option<Token![unsafe]> = input.parse()?; + let abi: Abi = input.parse()?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let mut items = Vec::new(); + while !content.is_empty() { + items.push(content.parse()?); + } + + Ok(ItemForeignMod { + attrs, + unsafety, + abi, + brace_token, + items, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ForeignItem { + fn parse(input: ParseStream) -> Result<Self> { + let begin = input.fork(); + let mut attrs = input.call(Attribute::parse_outer)?; + let ahead = input.fork(); + let vis: Visibility = ahead.parse()?; + + let lookahead = ahead.lookahead1(); + let mut item = if lookahead.peek(Token![fn]) || peek_signature(&ahead) { + let vis: Visibility = input.parse()?; + let sig: Signature = input.parse()?; + if input.peek(token::Brace) { + let content; + braced!(content in input); + content.call(Attribute::parse_inner)?; + content.call(Block::parse_within)?; + + Ok(ForeignItem::Verbatim(verbatim::between(&begin, input))) + } else { + Ok(ForeignItem::Fn(ForeignItemFn { + attrs: Vec::new(), + vis, + sig, + semi_token: input.parse()?, + })) + } + } else if lookahead.peek(Token![static]) { + let vis = input.parse()?; + let static_token = input.parse()?; + let mutability = input.parse()?; + let ident = input.parse()?; + let colon_token = input.parse()?; + let ty = input.parse()?; + if input.peek(Token![=]) { + input.parse::<Token![=]>()?; + input.parse::<Expr>()?; + input.parse::<Token![;]>()?; + Ok(ForeignItem::Verbatim(verbatim::between(&begin, input))) + } else { + Ok(ForeignItem::Static(ForeignItemStatic { + attrs: Vec::new(), + vis, + static_token, + mutability, + ident, + colon_token, + ty, + semi_token: input.parse()?, + })) + } + } else if lookahead.peek(Token![type]) { + parse_foreign_item_type(begin, input) + } else if vis.is_inherited() + && (lookahead.peek(Ident) + || lookahead.peek(Token![self]) + || lookahead.peek(Token![super]) + || lookahead.peek(Token![crate]) + || lookahead.peek(Token![::])) + { + input.parse().map(ForeignItem::Macro) + } else { + Err(lookahead.error()) + }?; + + let item_attrs = match &mut item { + ForeignItem::Fn(item) => &mut item.attrs, + ForeignItem::Static(item) => &mut item.attrs, + ForeignItem::Type(item) => &mut item.attrs, + ForeignItem::Macro(item) => &mut item.attrs, + ForeignItem::Verbatim(_) => return Ok(item), + }; + attrs.append(item_attrs); + *item_attrs = attrs; + + Ok(item) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ForeignItemFn { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let sig: Signature = input.parse()?; + let semi_token: Token![;] = input.parse()?; + Ok(ForeignItemFn { + attrs, + vis, + sig, + semi_token, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ForeignItemStatic { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ForeignItemStatic { + attrs: input.call(Attribute::parse_outer)?, + vis: input.parse()?, + static_token: input.parse()?, + mutability: input.parse()?, + ident: input.parse()?, + colon_token: input.parse()?, + ty: input.parse()?, + semi_token: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ForeignItemType { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ForeignItemType { + attrs: input.call(Attribute::parse_outer)?, + vis: input.parse()?, + type_token: input.parse()?, + ident: input.parse()?, + generics: { + let mut generics: Generics = input.parse()?; + generics.where_clause = input.parse()?; + generics + }, + semi_token: input.parse()?, + }) + } + } + + fn parse_foreign_item_type(begin: ParseBuffer, input: ParseStream) -> Result<ForeignItem> { + let FlexibleItemType { + vis, + defaultness: _, + type_token, + ident, + generics, + colon_token, + bounds: _, + ty, + semi_token, + } = FlexibleItemType::parse( + input, + TypeDefaultness::Disallowed, + WhereClauseLocation::Both, + )?; + + if colon_token.is_some() || ty.is_some() { + Ok(ForeignItem::Verbatim(verbatim::between(&begin, input))) + } else { + Ok(ForeignItem::Type(ForeignItemType { + attrs: Vec::new(), + vis, + type_token, + ident, + generics, + semi_token, + })) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ForeignItemMacro { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let mac: Macro = input.parse()?; + let semi_token: Option<Token![;]> = if mac.delimiter.is_brace() { + None + } else { + Some(input.parse()?) + }; + Ok(ForeignItemMacro { + attrs, + mac, + semi_token, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemType { + fn parse(input: ParseStream) -> Result<Self> { + Ok(ItemType { + attrs: input.call(Attribute::parse_outer)?, + vis: input.parse()?, + type_token: input.parse()?, + ident: input.parse()?, + generics: { + let mut generics: Generics = input.parse()?; + generics.where_clause = input.parse()?; + generics + }, + eq_token: input.parse()?, + ty: input.parse()?, + semi_token: input.parse()?, + }) + } + } + + fn parse_item_type(begin: ParseBuffer, input: ParseStream) -> Result<Item> { + let FlexibleItemType { + vis, + defaultness: _, + type_token, + ident, + generics, + colon_token, + bounds: _, + ty, + semi_token, + } = FlexibleItemType::parse( + input, + TypeDefaultness::Disallowed, + WhereClauseLocation::BeforeEq, + )?; + + let (eq_token, ty) = match ty { + Some(ty) if colon_token.is_none() => ty, + _ => return Ok(Item::Verbatim(verbatim::between(&begin, input))), + }; + + Ok(Item::Type(ItemType { + attrs: Vec::new(), + vis, + type_token, + ident, + generics, + eq_token, + ty: Box::new(ty), + semi_token, + })) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemStruct { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis = input.parse::<Visibility>()?; + let struct_token = input.parse::<Token![struct]>()?; + let ident = input.parse::<Ident>()?; + let generics = input.parse::<Generics>()?; + let (where_clause, fields, semi_token) = derive::parsing::data_struct(input)?; + Ok(ItemStruct { + attrs, + vis, + struct_token, + ident, + generics: Generics { + where_clause, + ..generics + }, + fields, + semi_token, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemEnum { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis = input.parse::<Visibility>()?; + let enum_token = input.parse::<Token![enum]>()?; + let ident = input.parse::<Ident>()?; + let generics = input.parse::<Generics>()?; + let (where_clause, brace_token, variants) = derive::parsing::data_enum(input)?; + Ok(ItemEnum { + attrs, + vis, + enum_token, + ident, + generics: Generics { + where_clause, + ..generics + }, + brace_token, + variants, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemUnion { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis = input.parse::<Visibility>()?; + let union_token = input.parse::<Token![union]>()?; + let ident = input.parse::<Ident>()?; + let generics = input.parse::<Generics>()?; + let (where_clause, fields) = derive::parsing::data_union(input)?; + Ok(ItemUnion { + attrs, + vis, + union_token, + ident, + generics: Generics { + where_clause, + ..generics + }, + fields, + }) + } + } + + fn parse_trait_or_trait_alias(input: ParseStream) -> Result<Item> { + let (attrs, vis, trait_token, ident, generics) = parse_start_of_trait_alias(input)?; + let lookahead = input.lookahead1(); + if lookahead.peek(token::Brace) + || lookahead.peek(Token![:]) + || lookahead.peek(Token![where]) + { + let unsafety = None; + let auto_token = None; + parse_rest_of_trait( + input, + attrs, + vis, + unsafety, + auto_token, + trait_token, + ident, + generics, + ) + .map(Item::Trait) + } else if lookahead.peek(Token![=]) { + parse_rest_of_trait_alias(input, attrs, vis, trait_token, ident, generics) + .map(Item::TraitAlias) + } else { + Err(lookahead.error()) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemTrait { + fn parse(input: ParseStream) -> Result<Self> { + let outer_attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let unsafety: Option<Token![unsafe]> = input.parse()?; + let auto_token: Option<Token![auto]> = input.parse()?; + let trait_token: Token![trait] = input.parse()?; + let ident: Ident = input.parse()?; + let generics: Generics = input.parse()?; + parse_rest_of_trait( + input, + outer_attrs, + vis, + unsafety, + auto_token, + trait_token, + ident, + generics, + ) + } + } + + fn parse_rest_of_trait( + input: ParseStream, + mut attrs: Vec<Attribute>, + vis: Visibility, + unsafety: Option<Token![unsafe]>, + auto_token: Option<Token![auto]>, + trait_token: Token![trait], + ident: Ident, + mut generics: Generics, + ) -> Result<ItemTrait> { + let colon_token: Option<Token![:]> = input.parse()?; + + let mut supertraits = Punctuated::new(); + if colon_token.is_some() { + loop { + if input.peek(Token![where]) || input.peek(token::Brace) { + break; + } + supertraits.push_value(input.parse()?); + if input.peek(Token![where]) || input.peek(token::Brace) { + break; + } + supertraits.push_punct(input.parse()?); + } + } + + generics.where_clause = input.parse()?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let mut items = Vec::new(); + while !content.is_empty() { + items.push(content.parse()?); + } + + Ok(ItemTrait { + attrs, + vis, + unsafety, + auto_token, + restriction: None, + trait_token, + ident, + generics, + colon_token, + supertraits, + brace_token, + items, + }) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemTraitAlias { + fn parse(input: ParseStream) -> Result<Self> { + let (attrs, vis, trait_token, ident, generics) = parse_start_of_trait_alias(input)?; + parse_rest_of_trait_alias(input, attrs, vis, trait_token, ident, generics) + } + } + + fn parse_start_of_trait_alias( + input: ParseStream, + ) -> Result<(Vec<Attribute>, Visibility, Token![trait], Ident, Generics)> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let trait_token: Token![trait] = input.parse()?; + let ident: Ident = input.parse()?; + let generics: Generics = input.parse()?; + Ok((attrs, vis, trait_token, ident, generics)) + } + + fn parse_rest_of_trait_alias( + input: ParseStream, + attrs: Vec<Attribute>, + vis: Visibility, + trait_token: Token![trait], + ident: Ident, + mut generics: Generics, + ) -> Result<ItemTraitAlias> { + let eq_token: Token![=] = input.parse()?; + + let mut bounds = Punctuated::new(); + loop { + if input.peek(Token![where]) || input.peek(Token![;]) { + break; + } + bounds.push_value(input.parse()?); + if input.peek(Token![where]) || input.peek(Token![;]) { + break; + } + bounds.push_punct(input.parse()?); + } + + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + + Ok(ItemTraitAlias { + attrs, + vis, + trait_token, + ident, + generics, + eq_token, + bounds, + semi_token, + }) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TraitItem { + fn parse(input: ParseStream) -> Result<Self> { + let begin = input.fork(); + let mut attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let defaultness: Option<Token![default]> = input.parse()?; + let ahead = input.fork(); + + let lookahead = ahead.lookahead1(); + let mut item = if lookahead.peek(Token![fn]) || peek_signature(&ahead) { + input.parse().map(TraitItem::Fn) + } else if lookahead.peek(Token![const]) { + let const_token: Token![const] = ahead.parse()?; + let lookahead = ahead.lookahead1(); + if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.advance_to(&ahead); + let ident = input.call(Ident::parse_any)?; + let mut generics: Generics = input.parse()?; + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let default = if let Some(eq_token) = input.parse::<Option<Token![=]>>()? { + let expr: Expr = input.parse()?; + Some((eq_token, expr)) + } else { + None + }; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + if generics.lt_token.is_none() && generics.where_clause.is_none() { + Ok(TraitItem::Const(TraitItemConst { + attrs: Vec::new(), + const_token, + ident, + generics, + colon_token, + ty, + default, + semi_token, + })) + } else { + return Ok(TraitItem::Verbatim(verbatim::between(&begin, input))); + } + } else if lookahead.peek(Token![async]) + || lookahead.peek(Token![unsafe]) + || lookahead.peek(Token![extern]) + || lookahead.peek(Token![fn]) + { + input.parse().map(TraitItem::Fn) + } else { + Err(lookahead.error()) + } + } else if lookahead.peek(Token![type]) { + parse_trait_item_type(begin.fork(), input) + } else if vis.is_inherited() + && defaultness.is_none() + && (lookahead.peek(Ident) + || lookahead.peek(Token![self]) + || lookahead.peek(Token![super]) + || lookahead.peek(Token![crate]) + || lookahead.peek(Token![::])) + { + input.parse().map(TraitItem::Macro) + } else { + Err(lookahead.error()) + }?; + + match (vis, defaultness) { + (Visibility::Inherited, None) => {} + _ => return Ok(TraitItem::Verbatim(verbatim::between(&begin, input))), + } + + let item_attrs = match &mut item { + TraitItem::Const(item) => &mut item.attrs, + TraitItem::Fn(item) => &mut item.attrs, + TraitItem::Type(item) => &mut item.attrs, + TraitItem::Macro(item) => &mut item.attrs, + TraitItem::Verbatim(_) => unreachable!(), + }; + attrs.append(item_attrs); + *item_attrs = attrs; + Ok(item) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TraitItemConst { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let const_token: Token![const] = input.parse()?; + + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let default = if input.peek(Token![=]) { + let eq_token: Token![=] = input.parse()?; + let default: Expr = input.parse()?; + Some((eq_token, default)) + } else { + None + }; + let semi_token: Token![;] = input.parse()?; + + Ok(TraitItemConst { + attrs, + const_token, + ident, + generics: Generics::default(), + colon_token, + ty, + default, + semi_token, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TraitItemFn { + fn parse(input: ParseStream) -> Result<Self> { + let mut attrs = input.call(Attribute::parse_outer)?; + let sig: Signature = input.parse()?; + + let lookahead = input.lookahead1(); + let (brace_token, stmts, semi_token) = if lookahead.peek(token::Brace) { + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + let stmts = content.call(Block::parse_within)?; + (Some(brace_token), stmts, None) + } else if lookahead.peek(Token![;]) { + let semi_token: Token![;] = input.parse()?; + (None, Vec::new(), Some(semi_token)) + } else { + return Err(lookahead.error()); + }; + + Ok(TraitItemFn { + attrs, + sig, + default: brace_token.map(|brace_token| Block { brace_token, stmts }), + semi_token, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TraitItemType { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let type_token: Token![type] = input.parse()?; + let ident: Ident = input.parse()?; + let mut generics: Generics = input.parse()?; + let (colon_token, bounds) = FlexibleItemType::parse_optional_bounds(input)?; + let default = FlexibleItemType::parse_optional_definition(input)?; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + Ok(TraitItemType { + attrs, + type_token, + ident, + generics, + colon_token, + bounds, + default, + semi_token, + }) + } + } + + fn parse_trait_item_type(begin: ParseBuffer, input: ParseStream) -> Result<TraitItem> { + let FlexibleItemType { + vis, + defaultness: _, + type_token, + ident, + generics, + colon_token, + bounds, + ty, + semi_token, + } = FlexibleItemType::parse( + input, + TypeDefaultness::Disallowed, + WhereClauseLocation::AfterEq, + )?; + + if vis.is_some() { + Ok(TraitItem::Verbatim(verbatim::between(&begin, input))) + } else { + Ok(TraitItem::Type(TraitItemType { + attrs: Vec::new(), + type_token, + ident, + generics, + colon_token, + bounds, + default: ty, + semi_token, + })) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TraitItemMacro { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let mac: Macro = input.parse()?; + let semi_token: Option<Token![;]> = if mac.delimiter.is_brace() { + None + } else { + Some(input.parse()?) + }; + Ok(TraitItemMacro { + attrs, + mac, + semi_token, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ItemImpl { + fn parse(input: ParseStream) -> Result<Self> { + let allow_verbatim_impl = false; + parse_impl(input, allow_verbatim_impl).map(Option::unwrap) + } + } + + fn parse_impl(input: ParseStream, allow_verbatim_impl: bool) -> Result<Option<ItemImpl>> { + let mut attrs = input.call(Attribute::parse_outer)?; + let has_visibility = allow_verbatim_impl && input.parse::<Visibility>()?.is_some(); + let defaultness: Option<Token![default]> = input.parse()?; + let unsafety: Option<Token![unsafe]> = input.parse()?; + let impl_token: Token![impl] = input.parse()?; + + let has_generics = input.peek(Token![<]) + && (input.peek2(Token![>]) + || input.peek2(Token![#]) + || (input.peek2(Ident) || input.peek2(Lifetime)) + && (input.peek3(Token![:]) + || input.peek3(Token![,]) + || input.peek3(Token![>]) + || input.peek3(Token![=])) + || input.peek2(Token![const])); + let mut generics: Generics = if has_generics { + input.parse()? + } else { + Generics::default() + }; + + let is_const_impl = allow_verbatim_impl + && (input.peek(Token![const]) || input.peek(Token![?]) && input.peek2(Token![const])); + if is_const_impl { + input.parse::<Option<Token![?]>>()?; + input.parse::<Token![const]>()?; + } + + let begin = input.fork(); + let polarity = if input.peek(Token![!]) && !input.peek2(token::Brace) { + Some(input.parse::<Token![!]>()?) + } else { + None + }; + + #[cfg(not(feature = "printing"))] + let first_ty_span = input.span(); + let mut first_ty: Type = input.parse()?; + let self_ty: Type; + let trait_; + + let is_impl_for = input.peek(Token![for]); + if is_impl_for { + let for_token: Token![for] = input.parse()?; + let mut first_ty_ref = &first_ty; + while let Type::Group(ty) = first_ty_ref { + first_ty_ref = &ty.elem; + } + if let Type::Path(TypePath { qself: None, .. }) = first_ty_ref { + while let Type::Group(ty) = first_ty { + first_ty = *ty.elem; + } + if let Type::Path(TypePath { qself: None, path }) = first_ty { + trait_ = Some((polarity, path, for_token)); + } else { + unreachable!(); + } + } else if !allow_verbatim_impl { + #[cfg(feature = "printing")] + return Err(Error::new_spanned(first_ty_ref, "expected trait path")); + #[cfg(not(feature = "printing"))] + return Err(Error::new(first_ty_span, "expected trait path")); + } else { + trait_ = None; + } + self_ty = input.parse()?; + } else { + trait_ = None; + self_ty = if polarity.is_none() { + first_ty + } else { + Type::Verbatim(verbatim::between(&begin, input)) + }; + } + + generics.where_clause = input.parse()?; + + let content; + let brace_token = braced!(content in input); + attr::parsing::parse_inner(&content, &mut attrs)?; + + let mut items = Vec::new(); + while !content.is_empty() { + items.push(content.parse()?); + } + + if has_visibility || is_const_impl || is_impl_for && trait_.is_none() { + Ok(None) + } else { + Ok(Some(ItemImpl { + attrs, + defaultness, + unsafety, + impl_token, + generics, + trait_, + self_ty: Box::new(self_ty), + brace_token, + items, + })) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ImplItem { + fn parse(input: ParseStream) -> Result<Self> { + let begin = input.fork(); + let mut attrs = input.call(Attribute::parse_outer)?; + let ahead = input.fork(); + let vis: Visibility = ahead.parse()?; + + let mut lookahead = ahead.lookahead1(); + let defaultness = if lookahead.peek(Token![default]) && !ahead.peek2(Token![!]) { + let defaultness: Token![default] = ahead.parse()?; + lookahead = ahead.lookahead1(); + Some(defaultness) + } else { + None + }; + + let mut item = if lookahead.peek(Token![fn]) || peek_signature(&ahead) { + let allow_omitted_body = true; + if let Some(item) = parse_impl_item_fn(input, allow_omitted_body)? { + Ok(ImplItem::Fn(item)) + } else { + Ok(ImplItem::Verbatim(verbatim::between(&begin, input))) + } + } else if lookahead.peek(Token![const]) { + input.advance_to(&ahead); + let const_token: Token![const] = input.parse()?; + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + let mut generics: Generics = input.parse()?; + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let value = if let Some(eq_token) = input.parse::<Option<Token![=]>>()? { + let expr: Expr = input.parse()?; + Some((eq_token, expr)) + } else { + None + }; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + return match value { + Some((eq_token, expr)) + if generics.lt_token.is_none() && generics.where_clause.is_none() => + { + Ok(ImplItem::Const(ImplItemConst { + attrs, + vis, + defaultness, + const_token, + ident, + generics, + colon_token, + ty, + eq_token, + expr, + semi_token, + })) + } + _ => Ok(ImplItem::Verbatim(verbatim::between(&begin, input))), + }; + } else if lookahead.peek(Token![type]) { + parse_impl_item_type(begin, input) + } else if vis.is_inherited() + && defaultness.is_none() + && (lookahead.peek(Ident) + || lookahead.peek(Token![self]) + || lookahead.peek(Token![super]) + || lookahead.peek(Token![crate]) + || lookahead.peek(Token![::])) + { + input.parse().map(ImplItem::Macro) + } else { + Err(lookahead.error()) + }?; + + { + let item_attrs = match &mut item { + ImplItem::Const(item) => &mut item.attrs, + ImplItem::Fn(item) => &mut item.attrs, + ImplItem::Type(item) => &mut item.attrs, + ImplItem::Macro(item) => &mut item.attrs, + ImplItem::Verbatim(_) => return Ok(item), + }; + attrs.append(item_attrs); + *item_attrs = attrs; + } + + Ok(item) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ImplItemConst { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let defaultness: Option<Token![default]> = input.parse()?; + let const_token: Token![const] = input.parse()?; + + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let eq_token: Token![=] = input.parse()?; + let expr: Expr = input.parse()?; + let semi_token: Token![;] = input.parse()?; + + Ok(ImplItemConst { + attrs, + vis, + defaultness, + const_token, + ident, + generics: Generics::default(), + colon_token, + ty, + eq_token, + expr, + semi_token, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ImplItemFn { + fn parse(input: ParseStream) -> Result<Self> { + let allow_omitted_body = false; + parse_impl_item_fn(input, allow_omitted_body).map(Option::unwrap) + } + } + + fn parse_impl_item_fn( + input: ParseStream, + allow_omitted_body: bool, + ) -> Result<Option<ImplItemFn>> { + let mut attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let defaultness: Option<Token![default]> = input.parse()?; + let sig: Signature = input.parse()?; + + // Accept functions without a body in an impl block because rustc's + // *parser* does not reject them (the compilation error is emitted later + // than parsing) and it can be useful for macro DSLs. + if allow_omitted_body && input.parse::<Option<Token![;]>>()?.is_some() { + return Ok(None); + } + + let content; + let brace_token = braced!(content in input); + attrs.extend(content.call(Attribute::parse_inner)?); + let block = Block { + brace_token, + stmts: content.call(Block::parse_within)?, + }; + + Ok(Some(ImplItemFn { + attrs, + vis, + defaultness, + sig, + block, + })) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ImplItemType { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let defaultness: Option<Token![default]> = input.parse()?; + let type_token: Token![type] = input.parse()?; + let ident: Ident = input.parse()?; + let mut generics: Generics = input.parse()?; + let eq_token: Token![=] = input.parse()?; + let ty: Type = input.parse()?; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + Ok(ImplItemType { + attrs, + vis, + defaultness, + type_token, + ident, + generics, + eq_token, + ty, + semi_token, + }) + } + } + + fn parse_impl_item_type(begin: ParseBuffer, input: ParseStream) -> Result<ImplItem> { + let FlexibleItemType { + vis, + defaultness, + type_token, + ident, + generics, + colon_token, + bounds: _, + ty, + semi_token, + } = FlexibleItemType::parse( + input, + TypeDefaultness::Optional, + WhereClauseLocation::AfterEq, + )?; + + let (eq_token, ty) = match ty { + Some(ty) if colon_token.is_none() => ty, + _ => return Ok(ImplItem::Verbatim(verbatim::between(&begin, input))), + }; + + Ok(ImplItem::Type(ImplItemType { + attrs: Vec::new(), + vis, + defaultness, + type_token, + ident, + generics, + eq_token, + ty, + semi_token, + })) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ImplItemMacro { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let mac: Macro = input.parse()?; + let semi_token: Option<Token![;]> = if mac.delimiter.is_brace() { + None + } else { + Some(input.parse()?) + }; + Ok(ImplItemMacro { + attrs, + mac, + semi_token, + }) + } + } + + impl Visibility { + fn is_inherited(&self) -> bool { + match self { + Visibility::Inherited => true, + _ => false, + } + } + } + + impl MacroDelimiter { + pub(crate) fn is_brace(&self) -> bool { + match self { + MacroDelimiter::Brace(_) => true, + MacroDelimiter::Paren(_) | MacroDelimiter::Bracket(_) => false, + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for StaticMutability { + fn parse(input: ParseStream) -> Result<Self> { + let mut_token: Option<Token![mut]> = input.parse()?; + Ok(mut_token.map_or(StaticMutability::None, StaticMutability::Mut)) + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use crate::attr::FilterAttrs; + use crate::print::TokensOrDefault; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemExternCrate { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.extern_token.to_tokens(tokens); + self.crate_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + if let Some((as_token, rename)) = &self.rename { + as_token.to_tokens(tokens); + rename.to_tokens(tokens); + } + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemUse { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.use_token.to_tokens(tokens); + self.leading_colon.to_tokens(tokens); + self.tree.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemStatic { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.static_token.to_tokens(tokens); + self.mutability.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.expr.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemConst { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.const_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.expr.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemFn { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.sig.to_tokens(tokens); + self.block.brace_token.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&self.block.stmts); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemMod { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.unsafety.to_tokens(tokens); + self.mod_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + if let Some((brace, items)) = &self.content { + brace.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(items); + }); + } else { + TokensOrDefault(&self.semi).to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemForeignMod { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.unsafety.to_tokens(tokens); + self.abi.to_tokens(tokens); + self.brace_token.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&self.items); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemType { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.type_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemEnum { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.enum_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + self.brace_token.surround(tokens, |tokens| { + self.variants.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemStruct { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.struct_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + match &self.fields { + Fields::Named(fields) => { + self.generics.where_clause.to_tokens(tokens); + fields.to_tokens(tokens); + } + Fields::Unnamed(fields) => { + fields.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + TokensOrDefault(&self.semi_token).to_tokens(tokens); + } + Fields::Unit => { + self.generics.where_clause.to_tokens(tokens); + TokensOrDefault(&self.semi_token).to_tokens(tokens); + } + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemUnion { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.union_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + self.fields.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemTrait { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.unsafety.to_tokens(tokens); + self.auto_token.to_tokens(tokens); + self.trait_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + if !self.supertraits.is_empty() { + TokensOrDefault(&self.colon_token).to_tokens(tokens); + self.supertraits.to_tokens(tokens); + } + self.generics.where_clause.to_tokens(tokens); + self.brace_token.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&self.items); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemTraitAlias { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.trait_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.bounds.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemImpl { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.defaultness.to_tokens(tokens); + self.unsafety.to_tokens(tokens); + self.impl_token.to_tokens(tokens); + self.generics.to_tokens(tokens); + if let Some((polarity, path, for_token)) = &self.trait_ { + polarity.to_tokens(tokens); + path.to_tokens(tokens); + for_token.to_tokens(tokens); + } + self.self_ty.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + self.brace_token.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&self.items); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ItemMacro { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.mac.path.to_tokens(tokens); + self.mac.bang_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + match &self.mac.delimiter { + MacroDelimiter::Paren(paren) => { + paren.surround(tokens, |tokens| self.mac.tokens.to_tokens(tokens)); + } + MacroDelimiter::Brace(brace) => { + brace.surround(tokens, |tokens| self.mac.tokens.to_tokens(tokens)); + } + MacroDelimiter::Bracket(bracket) => { + bracket.surround(tokens, |tokens| self.mac.tokens.to_tokens(tokens)); + } + } + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for UsePath { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.ident.to_tokens(tokens); + self.colon2_token.to_tokens(tokens); + self.tree.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for UseName { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.ident.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for UseRename { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.ident.to_tokens(tokens); + self.as_token.to_tokens(tokens); + self.rename.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for UseGlob { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.star_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for UseGroup { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.brace_token.surround(tokens, |tokens| { + self.items.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TraitItemConst { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.const_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + if let Some((eq_token, default)) = &self.default { + eq_token.to_tokens(tokens); + default.to_tokens(tokens); + } + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TraitItemFn { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.sig.to_tokens(tokens); + match &self.default { + Some(block) => { + block.brace_token.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&block.stmts); + }); + } + None => { + TokensOrDefault(&self.semi_token).to_tokens(tokens); + } + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TraitItemType { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.type_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + if !self.bounds.is_empty() { + TokensOrDefault(&self.colon_token).to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + if let Some((eq_token, default)) = &self.default { + eq_token.to_tokens(tokens); + default.to_tokens(tokens); + } + self.generics.where_clause.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TraitItemMacro { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.mac.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ImplItemConst { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.defaultness.to_tokens(tokens); + self.const_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.expr.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ImplItemFn { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.defaultness.to_tokens(tokens); + self.sig.to_tokens(tokens); + self.block.brace_token.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&self.block.stmts); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ImplItemType { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.defaultness.to_tokens(tokens); + self.type_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ImplItemMacro { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.mac.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ForeignItemFn { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.sig.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ForeignItemStatic { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.static_token.to_tokens(tokens); + self.mutability.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ForeignItemType { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.type_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ForeignItemMacro { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.mac.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Signature { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.constness.to_tokens(tokens); + self.asyncness.to_tokens(tokens); + self.unsafety.to_tokens(tokens); + self.abi.to_tokens(tokens); + self.fn_token.to_tokens(tokens); + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.paren_token.surround(tokens, |tokens| { + self.inputs.to_tokens(tokens); + if let Some(variadic) = &self.variadic { + if !self.inputs.empty_or_trailing() { + <Token![,]>::default().to_tokens(tokens); + } + variadic.to_tokens(tokens); + } + }); + self.output.to_tokens(tokens); + self.generics.where_clause.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Receiver { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + if let Some((ampersand, lifetime)) = &self.reference { + ampersand.to_tokens(tokens); + lifetime.to_tokens(tokens); + } + self.mutability.to_tokens(tokens); + self.self_token.to_tokens(tokens); + if let Some(colon_token) = &self.colon_token { + colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + } else { + let consistent = match (&self.reference, &self.mutability, &*self.ty) { + (Some(_), mutability, Type::Reference(ty)) => { + mutability.is_some() == ty.mutability.is_some() + && match &*ty.elem { + Type::Path(ty) => ty.qself.is_none() && ty.path.is_ident("Self"), + _ => false, + } + } + (None, _, Type::Path(ty)) => ty.qself.is_none() && ty.path.is_ident("Self"), + _ => false, + }; + if !consistent { + <Token![:]>::default().to_tokens(tokens); + self.ty.to_tokens(tokens); + } + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Variadic { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + if let Some((pat, colon)) = &self.pat { + pat.to_tokens(tokens); + colon.to_tokens(tokens); + } + self.dots.to_tokens(tokens); + self.comma.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for StaticMutability { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + StaticMutability::None => {} + StaticMutability::Mut(mut_token) => mut_token.to_tokens(tokens), + } + } + } +} diff --git a/vendor/syn/src/lib.rs b/vendor/syn/src/lib.rs new file mode 100644 index 0000000..cb8daf6 --- /dev/null +++ b/vendor/syn/src/lib.rs @@ -0,0 +1,1010 @@ +//! [![github]](https://github.com/dtolnay/syn) [![crates-io]](https://crates.io/crates/syn) [![docs-rs]](crate) +//! +//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github +//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs +//! +//! <br> +//! +//! Syn is a parsing library for parsing a stream of Rust tokens into a syntax +//! tree of Rust source code. +//! +//! Currently this library is geared toward use in Rust procedural macros, but +//! contains some APIs that may be useful more generally. +//! +//! - **Data structures** — Syn provides a complete syntax tree that can +//! represent any valid Rust source code. The syntax tree is rooted at +//! [`syn::File`] which represents a full source file, but there are other +//! entry points that may be useful to procedural macros including +//! [`syn::Item`], [`syn::Expr`] and [`syn::Type`]. +//! +//! - **Derives** — Of particular interest to derive macros is +//! [`syn::DeriveInput`] which is any of the three legal input items to a +//! derive macro. An example below shows using this type in a library that can +//! derive implementations of a user-defined trait. +//! +//! - **Parsing** — Parsing in Syn is built around [parser functions] with the +//! signature `fn(ParseStream) -> Result<T>`. Every syntax tree node defined +//! by Syn is individually parsable and may be used as a building block for +//! custom syntaxes, or you may dream up your own brand new syntax without +//! involving any of our syntax tree types. +//! +//! - **Location information** — Every token parsed by Syn is associated with a +//! `Span` that tracks line and column information back to the source of that +//! token. These spans allow a procedural macro to display detailed error +//! messages pointing to all the right places in the user's code. There is an +//! example of this below. +//! +//! - **Feature flags** — Functionality is aggressively feature gated so your +//! procedural macros enable only what they need, and do not pay in compile +//! time for all the rest. +//! +//! [`syn::File`]: File +//! [`syn::Item`]: Item +//! [`syn::Expr`]: Expr +//! [`syn::Type`]: Type +//! [`syn::DeriveInput`]: DeriveInput +//! [parser functions]: mod@parse +//! +//! <br> +//! +//! # Example of a derive macro +//! +//! The canonical derive macro using Syn looks like this. We write an ordinary +//! Rust function tagged with a `proc_macro_derive` attribute and the name of +//! the trait we are deriving. Any time that derive appears in the user's code, +//! the Rust compiler passes their data structure as tokens into our macro. We +//! get to execute arbitrary Rust code to figure out what to do with those +//! tokens, then hand some tokens back to the compiler to compile into the +//! user's crate. +//! +//! [`TokenStream`]: proc_macro::TokenStream +//! +//! ```toml +//! [dependencies] +//! syn = "2.0" +//! quote = "1.0" +//! +//! [lib] +//! proc-macro = true +//! ``` +//! +//! ``` +//! # extern crate proc_macro; +//! # +//! use proc_macro::TokenStream; +//! use quote::quote; +//! use syn::{parse_macro_input, DeriveInput}; +//! +//! # const IGNORE_TOKENS: &str = stringify! { +//! #[proc_macro_derive(MyMacro)] +//! # }; +//! pub fn my_macro(input: TokenStream) -> TokenStream { +//! // Parse the input tokens into a syntax tree +//! let input = parse_macro_input!(input as DeriveInput); +//! +//! // Build the output, possibly using quasi-quotation +//! let expanded = quote! { +//! // ... +//! }; +//! +//! // Hand the output tokens back to the compiler +//! TokenStream::from(expanded) +//! } +//! ``` +//! +//! The [`heapsize`] example directory shows a complete working implementation +//! of a derive macro. The example derives a `HeapSize` trait which computes an +//! estimate of the amount of heap memory owned by a value. +//! +//! [`heapsize`]: https://github.com/dtolnay/syn/tree/master/examples/heapsize +//! +//! ``` +//! pub trait HeapSize { +//! /// Total number of bytes of heap memory owned by `self`. +//! fn heap_size_of_children(&self) -> usize; +//! } +//! ``` +//! +//! The derive macro allows users to write `#[derive(HeapSize)]` on data +//! structures in their program. +//! +//! ``` +//! # const IGNORE_TOKENS: &str = stringify! { +//! #[derive(HeapSize)] +//! # }; +//! struct Demo<'a, T: ?Sized> { +//! a: Box<T>, +//! b: u8, +//! c: &'a str, +//! d: String, +//! } +//! ``` +//! +//! <p><br></p> +//! +//! # Spans and error reporting +//! +//! The token-based procedural macro API provides great control over where the +//! compiler's error messages are displayed in user code. Consider the error the +//! user sees if one of their field types does not implement `HeapSize`. +//! +//! ``` +//! # const IGNORE_TOKENS: &str = stringify! { +//! #[derive(HeapSize)] +//! # }; +//! struct Broken { +//! ok: String, +//! bad: std::thread::Thread, +//! } +//! ``` +//! +//! By tracking span information all the way through the expansion of a +//! procedural macro as shown in the `heapsize` example, token-based macros in +//! Syn are able to trigger errors that directly pinpoint the source of the +//! problem. +//! +//! ```text +//! error[E0277]: the trait bound `std::thread::Thread: HeapSize` is not satisfied +//! --> src/main.rs:7:5 +//! | +//! 7 | bad: std::thread::Thread, +//! | ^^^^^^^^^^^^^^^^^^^^^^^^ the trait `HeapSize` is not implemented for `Thread` +//! ``` +//! +//! <br> +//! +//! # Parsing a custom syntax +//! +//! The [`lazy-static`] example directory shows the implementation of a +//! `functionlike!(...)` procedural macro in which the input tokens are parsed +//! using Syn's parsing API. +//! +//! [`lazy-static`]: https://github.com/dtolnay/syn/tree/master/examples/lazy-static +//! +//! The example reimplements the popular `lazy_static` crate from crates.io as a +//! procedural macro. +//! +//! ``` +//! # macro_rules! lazy_static { +//! # ($($tt:tt)*) => {} +//! # } +//! # +//! lazy_static! { +//! static ref USERNAME: Regex = Regex::new("^[a-z0-9_-]{3,16}$").unwrap(); +//! } +//! ``` +//! +//! The implementation shows how to trigger custom warnings and error messages +//! on the macro input. +//! +//! ```text +//! warning: come on, pick a more creative name +//! --> src/main.rs:10:16 +//! | +//! 10 | static ref FOO: String = "lazy_static".to_owned(); +//! | ^^^ +//! ``` +//! +//! <br> +//! +//! # Testing +//! +//! When testing macros, we often care not just that the macro can be used +//! successfully but also that when the macro is provided with invalid input it +//! produces maximally helpful error messages. Consider using the [`trybuild`] +//! crate to write tests for errors that are emitted by your macro or errors +//! detected by the Rust compiler in the expanded code following misuse of the +//! macro. Such tests help avoid regressions from later refactors that +//! mistakenly make an error no longer trigger or be less helpful than it used +//! to be. +//! +//! [`trybuild`]: https://github.com/dtolnay/trybuild +//! +//! <br> +//! +//! # Debugging +//! +//! When developing a procedural macro it can be helpful to look at what the +//! generated code looks like. Use `cargo rustc -- -Zunstable-options +//! --pretty=expanded` or the [`cargo expand`] subcommand. +//! +//! [`cargo expand`]: https://github.com/dtolnay/cargo-expand +//! +//! To show the expanded code for some crate that uses your procedural macro, +//! run `cargo expand` from that crate. To show the expanded code for one of +//! your own test cases, run `cargo expand --test the_test_case` where the last +//! argument is the name of the test file without the `.rs` extension. +//! +//! This write-up by Brandon W Maister discusses debugging in more detail: +//! [Debugging Rust's new Custom Derive system][debugging]. +//! +//! [debugging]: https://quodlibetor.github.io/posts/debugging-rusts-new-custom-derive-system/ +//! +//! <br> +//! +//! # Optional features +//! +//! Syn puts a lot of functionality behind optional features in order to +//! optimize compile time for the most common use cases. The following features +//! are available. +//! +//! - **`derive`** *(enabled by default)* — Data structures for representing the +//! possible input to a derive macro, including structs and enums and types. +//! - **`full`** — Data structures for representing the syntax tree of all valid +//! Rust source code, including items and expressions. +//! - **`parsing`** *(enabled by default)* — Ability to parse input tokens into +//! a syntax tree node of a chosen type. +//! - **`printing`** *(enabled by default)* — Ability to print a syntax tree +//! node as tokens of Rust source code. +//! - **`visit`** — Trait for traversing a syntax tree. +//! - **`visit-mut`** — Trait for traversing and mutating in place a syntax +//! tree. +//! - **`fold`** — Trait for transforming an owned syntax tree. +//! - **`clone-impls`** *(enabled by default)* — Clone impls for all syntax tree +//! types. +//! - **`extra-traits`** — Debug, Eq, PartialEq, Hash impls for all syntax tree +//! types. +//! - **`proc-macro`** *(enabled by default)* — Runtime dependency on the +//! dynamic library libproc_macro from rustc toolchain. + +// Syn types in rustdoc of other crates get linked to here. +#![doc(html_root_url = "https://docs.rs/syn/2.0.48")] +#![cfg_attr(doc_cfg, feature(doc_cfg))] +#![deny(unsafe_op_in_unsafe_fn)] +#![allow(non_camel_case_types)] +#![allow( + clippy::bool_to_int_with_if, + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_ptr_alignment, + clippy::default_trait_access, + clippy::derivable_impls, + clippy::diverging_sub_expression, + clippy::doc_markdown, + clippy::expl_impl_clone_on_copy, + clippy::explicit_auto_deref, + clippy::if_not_else, + clippy::inherent_to_string, + clippy::into_iter_without_iter, + clippy::items_after_statements, + clippy::large_enum_variant, + clippy::let_underscore_untyped, // https://github.com/rust-lang/rust-clippy/issues/10410 + clippy::manual_assert, + clippy::manual_let_else, + clippy::match_like_matches_macro, + clippy::match_on_vec_items, + clippy::match_same_arms, + clippy::match_wildcard_for_single_variants, // clippy bug: https://github.com/rust-lang/rust-clippy/issues/6984 + clippy::missing_errors_doc, + clippy::missing_panics_doc, + clippy::module_name_repetitions, + clippy::must_use_candidate, + clippy::needless_doctest_main, + clippy::needless_pass_by_value, + clippy::never_loop, + clippy::range_plus_one, + clippy::redundant_else, + clippy::return_self_not_must_use, + clippy::similar_names, + clippy::single_match_else, + clippy::too_many_arguments, + clippy::too_many_lines, + clippy::trivially_copy_pass_by_ref, + clippy::uninhabited_references, + clippy::uninlined_format_args, + clippy::unnecessary_box_returns, + clippy::unnecessary_unwrap, + clippy::used_underscore_binding, + clippy::wildcard_imports, +)] + +#[cfg(feature = "proc-macro")] +extern crate proc_macro; + +#[macro_use] +mod macros; + +#[cfg(feature = "parsing")] +#[macro_use] +mod group; + +#[macro_use] +pub mod token; + +#[cfg(any(feature = "full", feature = "derive"))] +mod attr; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::attr::{AttrStyle, Attribute, Meta, MetaList, MetaNameValue}; + +mod bigint; + +#[cfg(feature = "parsing")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +pub mod buffer; + +mod custom_keyword; + +mod custom_punctuation; + +#[cfg(any(feature = "full", feature = "derive"))] +mod data; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::data::{Field, Fields, FieldsNamed, FieldsUnnamed, Variant}; + +#[cfg(any(feature = "full", feature = "derive"))] +mod derive; +#[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub use crate::derive::{Data, DataEnum, DataStruct, DataUnion, DeriveInput}; + +mod drops; + +mod error; +pub use crate::error::{Error, Result}; + +#[cfg(any(feature = "full", feature = "derive"))] +mod expr; +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::expr::{Arm, Label, RangeLimits}; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::expr::{ + Expr, ExprBinary, ExprCall, ExprCast, ExprField, ExprIndex, ExprLit, ExprMacro, ExprMethodCall, + ExprParen, ExprPath, ExprReference, ExprStruct, ExprUnary, FieldValue, Index, Member, +}; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::expr::{ + ExprArray, ExprAssign, ExprAsync, ExprAwait, ExprBlock, ExprBreak, ExprClosure, ExprConst, + ExprContinue, ExprForLoop, ExprGroup, ExprIf, ExprInfer, ExprLet, ExprLoop, ExprMatch, + ExprRange, ExprRepeat, ExprReturn, ExprTry, ExprTryBlock, ExprTuple, ExprUnsafe, ExprWhile, + ExprYield, +}; + +#[cfg(feature = "parsing")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +pub mod ext; + +#[cfg(feature = "full")] +mod file; +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::file::File; + +#[cfg(any(feature = "full", feature = "derive"))] +mod generics; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::generics::{ + BoundLifetimes, ConstParam, GenericParam, Generics, LifetimeParam, PredicateLifetime, + PredicateType, TraitBound, TraitBoundModifier, TypeParam, TypeParamBound, WhereClause, + WherePredicate, +}; +#[cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))) +)] +pub use crate::generics::{ImplGenerics, Turbofish, TypeGenerics}; + +mod ident; +#[doc(inline)] +pub use crate::ident::Ident; + +#[cfg(feature = "full")] +mod item; +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::item::{ + FnArg, ForeignItem, ForeignItemFn, ForeignItemMacro, ForeignItemStatic, ForeignItemType, + ImplItem, ImplItemConst, ImplItemFn, ImplItemMacro, ImplItemType, ImplRestriction, Item, + ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemForeignMod, ItemImpl, ItemMacro, ItemMod, + ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse, Receiver, + Signature, StaticMutability, TraitItem, TraitItemConst, TraitItemFn, TraitItemMacro, + TraitItemType, UseGlob, UseGroup, UseName, UsePath, UseRename, UseTree, Variadic, +}; + +mod lifetime; +#[doc(inline)] +pub use crate::lifetime::Lifetime; + +mod lit; +#[doc(hidden)] // https://github.com/dtolnay/syn/issues/1566 +pub use crate::lit::StrStyle; +#[doc(inline)] +pub use crate::lit::{Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; + +#[cfg(feature = "parsing")] +mod lookahead; + +#[cfg(any(feature = "full", feature = "derive"))] +mod mac; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::mac::{Macro, MacroDelimiter}; + +#[cfg(all(feature = "parsing", any(feature = "full", feature = "derive")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "parsing", any(feature = "full", feature = "derive")))) +)] +pub mod meta; + +#[cfg(any(feature = "full", feature = "derive"))] +mod op; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::op::{BinOp, UnOp}; + +#[cfg(feature = "parsing")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +pub mod parse; + +#[cfg(all(feature = "parsing", feature = "proc-macro"))] +mod parse_macro_input; + +#[cfg(all(feature = "parsing", feature = "printing"))] +mod parse_quote; + +#[cfg(feature = "full")] +mod pat; +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::expr::{ + ExprConst as PatConst, ExprLit as PatLit, ExprMacro as PatMacro, ExprPath as PatPath, + ExprRange as PatRange, +}; +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::pat::{ + FieldPat, Pat, PatIdent, PatOr, PatParen, PatReference, PatRest, PatSlice, PatStruct, PatTuple, + PatTupleStruct, PatType, PatWild, +}; + +#[cfg(any(feature = "full", feature = "derive"))] +mod path; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::path::{ + AngleBracketedGenericArguments, AssocConst, AssocType, Constraint, GenericArgument, + ParenthesizedGenericArguments, Path, PathArguments, PathSegment, QSelf, +}; + +#[cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))] +mod print; + +pub mod punctuated; + +#[cfg(any(feature = "full", feature = "derive"))] +mod restriction; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::restriction::{FieldMutability, VisRestricted, Visibility}; + +mod sealed; + +mod span; + +#[cfg(all(feature = "parsing", feature = "printing"))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "printing"))))] +pub mod spanned; + +#[cfg(feature = "full")] +mod stmt; +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::stmt::{Block, Local, LocalInit, Stmt, StmtMacro}; + +mod thread; + +#[cfg(all(any(feature = "full", feature = "derive"), feature = "extra-traits"))] +mod tt; + +#[cfg(any(feature = "full", feature = "derive"))] +mod ty; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] +pub use crate::ty::{ + Abi, BareFnArg, BareVariadic, ReturnType, Type, TypeArray, TypeBareFn, TypeGroup, + TypeImplTrait, TypeInfer, TypeMacro, TypeNever, TypeParen, TypePath, TypePtr, TypeReference, + TypeSlice, TypeTraitObject, TypeTuple, +}; + +#[cfg(all(any(feature = "full", feature = "derive"), feature = "parsing"))] +mod verbatim; + +#[cfg(all(feature = "parsing", feature = "full"))] +mod whitespace; + +mod gen { + /// Syntax tree traversal to transform the nodes of an owned syntax tree. + /// + /// Each method of the [`Fold`] trait is a hook that can be overridden to + /// customize the behavior when transforming the corresponding type of node. + /// By default, every method recursively visits the substructure of the + /// input by invoking the right visitor method of each of its fields. + /// + /// [`Fold`]: fold::Fold + /// + /// ``` + /// # use syn::{Attribute, BinOp, Expr, ExprBinary}; + /// # + /// pub trait Fold { + /// /* ... */ + /// + /// fn fold_expr_binary(&mut self, node: ExprBinary) -> ExprBinary { + /// fold_expr_binary(self, node) + /// } + /// + /// /* ... */ + /// # fn fold_attribute(&mut self, node: Attribute) -> Attribute; + /// # fn fold_expr(&mut self, node: Expr) -> Expr; + /// # fn fold_bin_op(&mut self, node: BinOp) -> BinOp; + /// } + /// + /// pub fn fold_expr_binary<V>(v: &mut V, node: ExprBinary) -> ExprBinary + /// where + /// V: Fold + ?Sized, + /// { + /// ExprBinary { + /// attrs: node + /// .attrs + /// .into_iter() + /// .map(|attr| v.fold_attribute(attr)) + /// .collect(), + /// left: Box::new(v.fold_expr(*node.left)), + /// op: v.fold_bin_op(node.op), + /// right: Box::new(v.fold_expr(*node.right)), + /// } + /// } + /// + /// /* ... */ + /// ``` + /// + /// <br> + /// + /// # Example + /// + /// This fold inserts parentheses to fully parenthesizes any expression. + /// + /// ``` + /// // [dependencies] + /// // quote = "1.0" + /// // syn = { version = "2.0", features = ["fold", "full"] } + /// + /// use quote::quote; + /// use syn::fold::{fold_expr, Fold}; + /// use syn::{token, Expr, ExprParen}; + /// + /// struct ParenthesizeEveryExpr; + /// + /// impl Fold for ParenthesizeEveryExpr { + /// fn fold_expr(&mut self, expr: Expr) -> Expr { + /// Expr::Paren(ExprParen { + /// attrs: Vec::new(), + /// expr: Box::new(fold_expr(self, expr)), + /// paren_token: token::Paren::default(), + /// }) + /// } + /// } + /// + /// fn main() { + /// let code = quote! { a() + b(1) * c.d }; + /// let expr: Expr = syn::parse2(code).unwrap(); + /// let parenthesized = ParenthesizeEveryExpr.fold_expr(expr); + /// println!("{}", quote!(#parenthesized)); + /// + /// // Output: (((a)()) + (((b)((1))) * ((c).d))) + /// } + /// ``` + #[cfg(feature = "fold")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "fold")))] + #[rustfmt::skip] + pub mod fold; + + /// Syntax tree traversal to walk a shared borrow of a syntax tree. + /// + /// Each method of the [`Visit`] trait is a hook that can be overridden to + /// customize the behavior when visiting the corresponding type of node. By + /// default, every method recursively visits the substructure of the input + /// by invoking the right visitor method of each of its fields. + /// + /// [`Visit`]: visit::Visit + /// + /// ``` + /// # use syn::{Attribute, BinOp, Expr, ExprBinary}; + /// # + /// pub trait Visit<'ast> { + /// /* ... */ + /// + /// fn visit_expr_binary(&mut self, node: &'ast ExprBinary) { + /// visit_expr_binary(self, node); + /// } + /// + /// /* ... */ + /// # fn visit_attribute(&mut self, node: &'ast Attribute); + /// # fn visit_expr(&mut self, node: &'ast Expr); + /// # fn visit_bin_op(&mut self, node: &'ast BinOp); + /// } + /// + /// pub fn visit_expr_binary<'ast, V>(v: &mut V, node: &'ast ExprBinary) + /// where + /// V: Visit<'ast> + ?Sized, + /// { + /// for attr in &node.attrs { + /// v.visit_attribute(attr); + /// } + /// v.visit_expr(&*node.left); + /// v.visit_bin_op(&node.op); + /// v.visit_expr(&*node.right); + /// } + /// + /// /* ... */ + /// ``` + /// + /// <br> + /// + /// # Example + /// + /// This visitor will print the name of every freestanding function in the + /// syntax tree, including nested functions. + /// + /// ``` + /// // [dependencies] + /// // quote = "1.0" + /// // syn = { version = "2.0", features = ["full", "visit"] } + /// + /// use quote::quote; + /// use syn::visit::{self, Visit}; + /// use syn::{File, ItemFn}; + /// + /// struct FnVisitor; + /// + /// impl<'ast> Visit<'ast> for FnVisitor { + /// fn visit_item_fn(&mut self, node: &'ast ItemFn) { + /// println!("Function with name={}", node.sig.ident); + /// + /// // Delegate to the default impl to visit any nested functions. + /// visit::visit_item_fn(self, node); + /// } + /// } + /// + /// fn main() { + /// let code = quote! { + /// pub fn f() { + /// fn g() {} + /// } + /// }; + /// + /// let syntax_tree: File = syn::parse2(code).unwrap(); + /// FnVisitor.visit_file(&syntax_tree); + /// } + /// ``` + /// + /// The `'ast` lifetime on the input references means that the syntax tree + /// outlives the complete recursive visit call, so the visitor is allowed to + /// hold on to references into the syntax tree. + /// + /// ``` + /// use quote::quote; + /// use syn::visit::{self, Visit}; + /// use syn::{File, ItemFn}; + /// + /// struct FnVisitor<'ast> { + /// functions: Vec<&'ast ItemFn>, + /// } + /// + /// impl<'ast> Visit<'ast> for FnVisitor<'ast> { + /// fn visit_item_fn(&mut self, node: &'ast ItemFn) { + /// self.functions.push(node); + /// visit::visit_item_fn(self, node); + /// } + /// } + /// + /// fn main() { + /// let code = quote! { + /// pub fn f() { + /// fn g() {} + /// } + /// }; + /// + /// let syntax_tree: File = syn::parse2(code).unwrap(); + /// let mut visitor = FnVisitor { functions: Vec::new() }; + /// visitor.visit_file(&syntax_tree); + /// for f in visitor.functions { + /// println!("Function with name={}", f.sig.ident); + /// } + /// } + /// ``` + #[cfg(feature = "visit")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "visit")))] + #[rustfmt::skip] + pub mod visit; + + /// Syntax tree traversal to mutate an exclusive borrow of a syntax tree in + /// place. + /// + /// Each method of the [`VisitMut`] trait is a hook that can be overridden + /// to customize the behavior when mutating the corresponding type of node. + /// By default, every method recursively visits the substructure of the + /// input by invoking the right visitor method of each of its fields. + /// + /// [`VisitMut`]: visit_mut::VisitMut + /// + /// ``` + /// # use syn::{Attribute, BinOp, Expr, ExprBinary}; + /// # + /// pub trait VisitMut { + /// /* ... */ + /// + /// fn visit_expr_binary_mut(&mut self, node: &mut ExprBinary) { + /// visit_expr_binary_mut(self, node); + /// } + /// + /// /* ... */ + /// # fn visit_attribute_mut(&mut self, node: &mut Attribute); + /// # fn visit_expr_mut(&mut self, node: &mut Expr); + /// # fn visit_bin_op_mut(&mut self, node: &mut BinOp); + /// } + /// + /// pub fn visit_expr_binary_mut<V>(v: &mut V, node: &mut ExprBinary) + /// where + /// V: VisitMut + ?Sized, + /// { + /// for attr in &mut node.attrs { + /// v.visit_attribute_mut(attr); + /// } + /// v.visit_expr_mut(&mut *node.left); + /// v.visit_bin_op_mut(&mut node.op); + /// v.visit_expr_mut(&mut *node.right); + /// } + /// + /// /* ... */ + /// ``` + /// + /// <br> + /// + /// # Example + /// + /// This mut visitor replace occurrences of u256 suffixed integer literals + /// like `999u256` with a macro invocation `bigint::u256!(999)`. + /// + /// ``` + /// // [dependencies] + /// // quote = "1.0" + /// // syn = { version = "2.0", features = ["full", "visit-mut"] } + /// + /// use quote::quote; + /// use syn::visit_mut::{self, VisitMut}; + /// use syn::{parse_quote, Expr, File, Lit, LitInt}; + /// + /// struct BigintReplace; + /// + /// impl VisitMut for BigintReplace { + /// fn visit_expr_mut(&mut self, node: &mut Expr) { + /// if let Expr::Lit(expr) = &node { + /// if let Lit::Int(int) = &expr.lit { + /// if int.suffix() == "u256" { + /// let digits = int.base10_digits(); + /// let unsuffixed: LitInt = syn::parse_str(digits).unwrap(); + /// *node = parse_quote!(bigint::u256!(#unsuffixed)); + /// return; + /// } + /// } + /// } + /// + /// // Delegate to the default impl to visit nested expressions. + /// visit_mut::visit_expr_mut(self, node); + /// } + /// } + /// + /// fn main() { + /// let code = quote! { + /// fn main() { + /// let _ = 999u256; + /// } + /// }; + /// + /// let mut syntax_tree: File = syn::parse2(code).unwrap(); + /// BigintReplace.visit_file_mut(&mut syntax_tree); + /// println!("{}", quote!(#syntax_tree)); + /// } + /// ``` + #[cfg(feature = "visit-mut")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "visit-mut")))] + #[rustfmt::skip] + pub mod visit_mut; + + #[cfg(feature = "clone-impls")] + #[rustfmt::skip] + mod clone; + + #[cfg(feature = "extra-traits")] + #[rustfmt::skip] + mod debug; + + #[cfg(feature = "extra-traits")] + #[rustfmt::skip] + mod eq; + + #[cfg(feature = "extra-traits")] + #[rustfmt::skip] + mod hash; + + #[cfg(any(feature = "full", feature = "derive"))] + #[path = "../gen_helper.rs"] + mod helper; +} + +#[cfg(feature = "fold")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "fold")))] +pub use crate::gen::fold; + +#[cfg(feature = "visit")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "visit")))] +pub use crate::gen::visit; + +#[cfg(feature = "visit-mut")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "visit-mut")))] +pub use crate::gen::visit_mut; + +// Not public API. +#[doc(hidden)] +#[path = "export.rs"] +pub mod __private; + +/// Parse tokens of source code into the chosen syntax tree node. +/// +/// This is preferred over parsing a string because tokens are able to preserve +/// information about where in the user's code they were originally written (the +/// "span" of the token), possibly allowing the compiler to produce better error +/// messages. +/// +/// This function parses a `proc_macro::TokenStream` which is the type used for +/// interop with the compiler in a procedural macro. To parse a +/// `proc_macro2::TokenStream`, use [`syn::parse2`] instead. +/// +/// [`syn::parse2`]: parse2 +/// +/// # Examples +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// use proc_macro::TokenStream; +/// use quote::quote; +/// use syn::DeriveInput; +/// +/// # const IGNORE_TOKENS: &str = stringify! { +/// #[proc_macro_derive(MyMacro)] +/// # }; +/// pub fn my_macro(input: TokenStream) -> TokenStream { +/// // Parse the tokens into a syntax tree +/// let ast: DeriveInput = syn::parse(input).unwrap(); +/// +/// // Build the output, possibly using quasi-quotation +/// let expanded = quote! { +/// /* ... */ +/// }; +/// +/// // Convert into a token stream and return it +/// expanded.into() +/// } +/// ``` +#[cfg(all(feature = "parsing", feature = "proc-macro"))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "proc-macro"))))] +pub fn parse<T: parse::Parse>(tokens: proc_macro::TokenStream) -> Result<T> { + parse::Parser::parse(T::parse, tokens) +} + +/// Parse a proc-macro2 token stream into the chosen syntax tree node. +/// +/// This function will check that the input is fully parsed. If there are +/// any unparsed tokens at the end of the stream, an error is returned. +/// +/// This function parses a `proc_macro2::TokenStream` which is commonly useful +/// when the input comes from a node of the Syn syntax tree, for example the +/// body tokens of a [`Macro`] node. When in a procedural macro parsing the +/// `proc_macro::TokenStream` provided by the compiler, use [`syn::parse`] +/// instead. +/// +/// [`syn::parse`]: parse() +#[cfg(feature = "parsing")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +pub fn parse2<T: parse::Parse>(tokens: proc_macro2::TokenStream) -> Result<T> { + parse::Parser::parse2(T::parse, tokens) +} + +/// Parse a string of Rust code into the chosen syntax tree node. +/// +/// # Hygiene +/// +/// Every span in the resulting syntax tree will be set to resolve at the macro +/// call site. +/// +/// # Examples +/// +/// ``` +/// use syn::{Expr, Result}; +/// +/// fn run() -> Result<()> { +/// let code = "assert_eq!(u8::max_value(), 255)"; +/// let expr = syn::parse_str::<Expr>(code)?; +/// println!("{:#?}", expr); +/// Ok(()) +/// } +/// # +/// # run().unwrap(); +/// ``` +#[cfg(feature = "parsing")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +pub fn parse_str<T: parse::Parse>(s: &str) -> Result<T> { + parse::Parser::parse_str(T::parse, s) +} + +// FIXME the name parse_file makes it sound like you might pass in a path to a +// file, rather than the content. +/// Parse the content of a file of Rust code. +/// +/// This is different from `syn::parse_str::<File>(content)` in two ways: +/// +/// - It discards a leading byte order mark `\u{FEFF}` if the file has one. +/// - It preserves the shebang line of the file, such as `#!/usr/bin/env rustx`. +/// +/// If present, either of these would be an error using `from_str`. +/// +/// # Examples +/// +/// ```no_run +/// use std::error::Error; +/// use std::fs::File; +/// use std::io::Read; +/// +/// fn run() -> Result<(), Box<dyn Error>> { +/// let mut file = File::open("path/to/code.rs")?; +/// let mut content = String::new(); +/// file.read_to_string(&mut content)?; +/// +/// let ast = syn::parse_file(&content)?; +/// if let Some(shebang) = ast.shebang { +/// println!("{}", shebang); +/// } +/// println!("{} items", ast.items.len()); +/// +/// Ok(()) +/// } +/// # +/// # run().unwrap(); +/// ``` +#[cfg(all(feature = "parsing", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "full"))))] +pub fn parse_file(mut content: &str) -> Result<File> { + // Strip the BOM if it is present + const BOM: &str = "\u{feff}"; + if content.starts_with(BOM) { + content = &content[BOM.len()..]; + } + + let mut shebang = None; + if content.starts_with("#!") { + let rest = whitespace::skip(&content[2..]); + if !rest.starts_with('[') { + if let Some(idx) = content.find('\n') { + shebang = Some(content[..idx].to_string()); + content = &content[idx..]; + } else { + shebang = Some(content.to_string()); + content = ""; + } + } + } + + let mut file: File = parse_str(content)?; + file.shebang = shebang; + Ok(file) +} diff --git a/vendor/syn/src/lifetime.rs b/vendor/syn/src/lifetime.rs new file mode 100644 index 0000000..29f4cfd --- /dev/null +++ b/vendor/syn/src/lifetime.rs @@ -0,0 +1,156 @@ +use proc_macro2::{Ident, Span}; +use std::cmp::Ordering; +use std::fmt::{self, Display}; +use std::hash::{Hash, Hasher}; + +#[cfg(feature = "parsing")] +use crate::lookahead; + +/// A Rust lifetime: `'a`. +/// +/// Lifetime names must conform to the following rules: +/// +/// - Must start with an apostrophe. +/// - Must not consist of just an apostrophe: `'`. +/// - Character after the apostrophe must be `_` or a Unicode code point with +/// the XID_Start property. +/// - All following characters must be Unicode code points with the XID_Continue +/// property. +pub struct Lifetime { + pub apostrophe: Span, + pub ident: Ident, +} + +impl Lifetime { + /// # Panics + /// + /// Panics if the lifetime does not conform to the bulleted rules above. + /// + /// # Invocation + /// + /// ``` + /// # use proc_macro2::Span; + /// # use syn::Lifetime; + /// # + /// # fn f() -> Lifetime { + /// Lifetime::new("'a", Span::call_site()) + /// # } + /// ``` + pub fn new(symbol: &str, span: Span) -> Self { + if !symbol.starts_with('\'') { + panic!( + "lifetime name must start with apostrophe as in \"'a\", got {:?}", + symbol + ); + } + + if symbol == "'" { + panic!("lifetime name must not be empty"); + } + + if !crate::ident::xid_ok(&symbol[1..]) { + panic!("{:?} is not a valid lifetime name", symbol); + } + + Lifetime { + apostrophe: span, + ident: Ident::new(&symbol[1..], span), + } + } + + pub fn span(&self) -> Span { + self.apostrophe + .join(self.ident.span()) + .unwrap_or(self.apostrophe) + } + + pub fn set_span(&mut self, span: Span) { + self.apostrophe = span; + self.ident.set_span(span); + } +} + +impl Display for Lifetime { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + "'".fmt(formatter)?; + self.ident.fmt(formatter) + } +} + +impl Clone for Lifetime { + fn clone(&self) -> Self { + Lifetime { + apostrophe: self.apostrophe, + ident: self.ident.clone(), + } + } +} + +impl PartialEq for Lifetime { + fn eq(&self, other: &Lifetime) -> bool { + self.ident.eq(&other.ident) + } +} + +impl Eq for Lifetime {} + +impl PartialOrd for Lifetime { + fn partial_cmp(&self, other: &Lifetime) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for Lifetime { + fn cmp(&self, other: &Lifetime) -> Ordering { + self.ident.cmp(&other.ident) + } +} + +impl Hash for Lifetime { + fn hash<H: Hasher>(&self, h: &mut H) { + self.ident.hash(h); + } +} + +#[cfg(feature = "parsing")] +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn Lifetime(marker: lookahead::TokenMarker) -> Lifetime { + match marker {} + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Lifetime { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| { + cursor + .lifetime() + .ok_or_else(|| cursor.error("expected lifetime")) + }) + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::{Punct, Spacing, TokenStream}; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Lifetime { + fn to_tokens(&self, tokens: &mut TokenStream) { + let mut apostrophe = Punct::new('\'', Spacing::Joint); + apostrophe.set_span(self.apostrophe); + tokens.append(apostrophe); + self.ident.to_tokens(tokens); + } + } +} diff --git a/vendor/syn/src/lit.rs b/vendor/syn/src/lit.rs new file mode 100644 index 0000000..24524bb --- /dev/null +++ b/vendor/syn/src/lit.rs @@ -0,0 +1,1651 @@ +#[cfg(feature = "parsing")] +use crate::lookahead; +#[cfg(feature = "parsing")] +use crate::parse::{Parse, Parser}; +use crate::{Error, Result}; +use proc_macro2::{Ident, Literal, Span}; +#[cfg(feature = "parsing")] +use proc_macro2::{TokenStream, TokenTree}; +use std::fmt::{self, Display}; +#[cfg(feature = "extra-traits")] +use std::hash::{Hash, Hasher}; +use std::str::{self, FromStr}; + +ast_enum_of_structs! { + /// A Rust literal such as a string or integer or boolean. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: crate::Expr#syntax-tree-enums + #[non_exhaustive] + pub enum Lit { + /// A UTF-8 string literal: `"foo"`. + Str(LitStr), + + /// A byte string literal: `b"foo"`. + ByteStr(LitByteStr), + + /// A byte literal: `b'f'`. + Byte(LitByte), + + /// A character literal: `'a'`. + Char(LitChar), + + /// An integer literal: `1` or `1u16`. + Int(LitInt), + + /// A floating point literal: `1f64` or `1.0e10f64`. + /// + /// Must be finite. May not be infinite or NaN. + Float(LitFloat), + + /// A boolean literal: `true` or `false`. + Bool(LitBool), + + /// A raw token literal not interpreted by Syn. + Verbatim(Literal), + } +} + +ast_struct! { + /// A UTF-8 string literal: `"foo"`. + pub struct LitStr { + repr: Box<LitRepr>, + } +} + +ast_struct! { + /// A byte string literal: `b"foo"`. + pub struct LitByteStr { + repr: Box<LitRepr>, + } +} + +ast_struct! { + /// A byte literal: `b'f'`. + pub struct LitByte { + repr: Box<LitRepr>, + } +} + +ast_struct! { + /// A character literal: `'a'`. + pub struct LitChar { + repr: Box<LitRepr>, + } +} + +struct LitRepr { + token: Literal, + suffix: Box<str>, +} + +ast_struct! { + /// An integer literal: `1` or `1u16`. + pub struct LitInt { + repr: Box<LitIntRepr>, + } +} + +struct LitIntRepr { + token: Literal, + digits: Box<str>, + suffix: Box<str>, +} + +ast_struct! { + /// A floating point literal: `1f64` or `1.0e10f64`. + /// + /// Must be finite. May not be infinite or NaN. + pub struct LitFloat { + repr: Box<LitFloatRepr>, + } +} + +struct LitFloatRepr { + token: Literal, + digits: Box<str>, + suffix: Box<str>, +} + +ast_struct! { + /// A boolean literal: `true` or `false`. + pub struct LitBool { + pub value: bool, + pub span: Span, + } +} + +impl LitStr { + pub fn new(value: &str, span: Span) -> Self { + let mut token = Literal::string(value); + token.set_span(span); + LitStr { + repr: Box::new(LitRepr { + token, + suffix: Box::<str>::default(), + }), + } + } + + pub fn value(&self) -> String { + let repr = self.repr.token.to_string(); + let (value, _suffix) = value::parse_lit_str(&repr); + String::from(value) + } + + /// Parse a syntax tree node from the content of this string literal. + /// + /// All spans in the syntax tree will point to the span of this `LitStr`. + /// + /// # Example + /// + /// ``` + /// use syn::{Attribute, Error, Expr, Lit, Meta, Path, Result}; + /// + /// // Parses the path from an attribute that looks like: + /// // + /// // #[path = "a::b::c"] + /// // + /// // or returns `None` if the input is some other attribute. + /// fn get_path(attr: &Attribute) -> Result<Option<Path>> { + /// if !attr.path().is_ident("path") { + /// return Ok(None); + /// } + /// + /// if let Meta::NameValue(meta) = &attr.meta { + /// if let Expr::Lit(expr) = &meta.value { + /// if let Lit::Str(lit_str) = &expr.lit { + /// return lit_str.parse().map(Some); + /// } + /// } + /// } + /// + /// let message = "expected #[path = \"...\"]"; + /// Err(Error::new_spanned(attr, message)) + /// } + /// ``` + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse<T: Parse>(&self) -> Result<T> { + self.parse_with(T::parse) + } + + /// Invoke parser on the content of this string literal. + /// + /// All spans in the syntax tree will point to the span of this `LitStr`. + /// + /// # Example + /// + /// ``` + /// # use proc_macro2::Span; + /// # use syn::{LitStr, Result}; + /// # + /// # fn main() -> Result<()> { + /// # let lit_str = LitStr::new("a::b::c", Span::call_site()); + /// # + /// # const IGNORE: &str = stringify! { + /// let lit_str: LitStr = /* ... */; + /// # }; + /// + /// // Parse a string literal like "a::b::c" into a Path, not allowing + /// // generic arguments on any of the path segments. + /// let basic_path = lit_str.parse_with(syn::Path::parse_mod_style)?; + /// # + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_with<F: Parser>(&self, parser: F) -> Result<F::Output> { + use proc_macro2::Group; + + // Token stream with every span replaced by the given one. + fn respan_token_stream(stream: TokenStream, span: Span) -> TokenStream { + stream + .into_iter() + .map(|token| respan_token_tree(token, span)) + .collect() + } + + // Token tree with every span replaced by the given one. + fn respan_token_tree(mut token: TokenTree, span: Span) -> TokenTree { + match &mut token { + TokenTree::Group(g) => { + let stream = respan_token_stream(g.stream(), span); + *g = Group::new(g.delimiter(), stream); + g.set_span(span); + } + other => other.set_span(span), + } + token + } + + // Parse string literal into a token stream with every span equal to the + // original literal's span. + let mut tokens = TokenStream::from_str(&self.value())?; + tokens = respan_token_stream(tokens, self.span()); + + let result = parser.parse2(tokens)?; + + let suffix = self.suffix(); + if !suffix.is_empty() { + return Err(Error::new( + self.span(), + format!("unexpected suffix `{}` on string literal", suffix), + )); + } + + Ok(result) + } + + pub fn span(&self) -> Span { + self.repr.token.span() + } + + pub fn set_span(&mut self, span: Span) { + self.repr.token.set_span(span); + } + + pub fn suffix(&self) -> &str { + &self.repr.suffix + } + + pub fn token(&self) -> Literal { + self.repr.token.clone() + } +} + +impl LitByteStr { + pub fn new(value: &[u8], span: Span) -> Self { + let mut token = Literal::byte_string(value); + token.set_span(span); + LitByteStr { + repr: Box::new(LitRepr { + token, + suffix: Box::<str>::default(), + }), + } + } + + pub fn value(&self) -> Vec<u8> { + let repr = self.repr.token.to_string(); + let (value, _suffix) = value::parse_lit_byte_str(&repr); + value + } + + pub fn span(&self) -> Span { + self.repr.token.span() + } + + pub fn set_span(&mut self, span: Span) { + self.repr.token.set_span(span); + } + + pub fn suffix(&self) -> &str { + &self.repr.suffix + } + + pub fn token(&self) -> Literal { + self.repr.token.clone() + } +} + +impl LitByte { + pub fn new(value: u8, span: Span) -> Self { + let mut token = Literal::u8_suffixed(value); + token.set_span(span); + LitByte { + repr: Box::new(LitRepr { + token, + suffix: Box::<str>::default(), + }), + } + } + + pub fn value(&self) -> u8 { + let repr = self.repr.token.to_string(); + let (value, _suffix) = value::parse_lit_byte(&repr); + value + } + + pub fn span(&self) -> Span { + self.repr.token.span() + } + + pub fn set_span(&mut self, span: Span) { + self.repr.token.set_span(span); + } + + pub fn suffix(&self) -> &str { + &self.repr.suffix + } + + pub fn token(&self) -> Literal { + self.repr.token.clone() + } +} + +impl LitChar { + pub fn new(value: char, span: Span) -> Self { + let mut token = Literal::character(value); + token.set_span(span); + LitChar { + repr: Box::new(LitRepr { + token, + suffix: Box::<str>::default(), + }), + } + } + + pub fn value(&self) -> char { + let repr = self.repr.token.to_string(); + let (value, _suffix) = value::parse_lit_char(&repr); + value + } + + pub fn span(&self) -> Span { + self.repr.token.span() + } + + pub fn set_span(&mut self, span: Span) { + self.repr.token.set_span(span); + } + + pub fn suffix(&self) -> &str { + &self.repr.suffix + } + + pub fn token(&self) -> Literal { + self.repr.token.clone() + } +} + +impl LitInt { + pub fn new(repr: &str, span: Span) -> Self { + let (digits, suffix) = match value::parse_lit_int(repr) { + Some(parse) => parse, + None => panic!("Not an integer literal: `{}`", repr), + }; + + let mut token: Literal = repr.parse().unwrap(); + token.set_span(span); + LitInt { + repr: Box::new(LitIntRepr { + token, + digits, + suffix, + }), + } + } + + pub fn base10_digits(&self) -> &str { + &self.repr.digits + } + + /// Parses the literal into a selected number type. + /// + /// This is equivalent to `lit.base10_digits().parse()` except that the + /// resulting errors will be correctly spanned to point to the literal token + /// in the macro input. + /// + /// ``` + /// use syn::LitInt; + /// use syn::parse::{Parse, ParseStream, Result}; + /// + /// struct Port { + /// value: u16, + /// } + /// + /// impl Parse for Port { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let lit: LitInt = input.parse()?; + /// let value = lit.base10_parse::<u16>()?; + /// Ok(Port { value }) + /// } + /// } + /// ``` + pub fn base10_parse<N>(&self) -> Result<N> + where + N: FromStr, + N::Err: Display, + { + self.base10_digits() + .parse() + .map_err(|err| Error::new(self.span(), err)) + } + + pub fn suffix(&self) -> &str { + &self.repr.suffix + } + + pub fn span(&self) -> Span { + self.repr.token.span() + } + + pub fn set_span(&mut self, span: Span) { + self.repr.token.set_span(span); + } + + pub fn token(&self) -> Literal { + self.repr.token.clone() + } +} + +impl From<Literal> for LitInt { + fn from(token: Literal) -> Self { + let repr = token.to_string(); + if let Some((digits, suffix)) = value::parse_lit_int(&repr) { + LitInt { + repr: Box::new(LitIntRepr { + token, + digits, + suffix, + }), + } + } else { + panic!("Not an integer literal: `{}`", repr); + } + } +} + +impl Display for LitInt { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + self.repr.token.fmt(formatter) + } +} + +impl LitFloat { + pub fn new(repr: &str, span: Span) -> Self { + let (digits, suffix) = match value::parse_lit_float(repr) { + Some(parse) => parse, + None => panic!("Not a float literal: `{}`", repr), + }; + + let mut token: Literal = repr.parse().unwrap(); + token.set_span(span); + LitFloat { + repr: Box::new(LitFloatRepr { + token, + digits, + suffix, + }), + } + } + + pub fn base10_digits(&self) -> &str { + &self.repr.digits + } + + pub fn base10_parse<N>(&self) -> Result<N> + where + N: FromStr, + N::Err: Display, + { + self.base10_digits() + .parse() + .map_err(|err| Error::new(self.span(), err)) + } + + pub fn suffix(&self) -> &str { + &self.repr.suffix + } + + pub fn span(&self) -> Span { + self.repr.token.span() + } + + pub fn set_span(&mut self, span: Span) { + self.repr.token.set_span(span); + } + + pub fn token(&self) -> Literal { + self.repr.token.clone() + } +} + +impl From<Literal> for LitFloat { + fn from(token: Literal) -> Self { + let repr = token.to_string(); + if let Some((digits, suffix)) = value::parse_lit_float(&repr) { + LitFloat { + repr: Box::new(LitFloatRepr { + token, + digits, + suffix, + }), + } + } else { + panic!("Not a float literal: `{}`", repr); + } + } +} + +impl Display for LitFloat { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + self.repr.token.fmt(formatter) + } +} + +impl LitBool { + pub fn new(value: bool, span: Span) -> Self { + LitBool { value, span } + } + + pub fn value(&self) -> bool { + self.value + } + + pub fn span(&self) -> Span { + self.span + } + + pub fn set_span(&mut self, span: Span) { + self.span = span; + } + + pub fn token(&self) -> Ident { + let s = if self.value { "true" } else { "false" }; + Ident::new(s, self.span) + } +} + +#[cfg(feature = "extra-traits")] +mod debug_impls { + use super::*; + use std::fmt::{self, Debug}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitStr { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl LitStr { + pub(crate) fn debug( + &self, + formatter: &mut fmt::Formatter, + name: &str, + ) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + self.debug(formatter, "LitStr") + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitByteStr { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl LitByteStr { + pub(crate) fn debug( + &self, + formatter: &mut fmt::Formatter, + name: &str, + ) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + self.debug(formatter, "LitByteStr") + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitByte { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl LitByte { + pub(crate) fn debug( + &self, + formatter: &mut fmt::Formatter, + name: &str, + ) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + self.debug(formatter, "LitByte") + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitChar { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl LitChar { + pub(crate) fn debug( + &self, + formatter: &mut fmt::Formatter, + name: &str, + ) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + self.debug(formatter, "LitChar") + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitInt { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl LitInt { + pub(crate) fn debug( + &self, + formatter: &mut fmt::Formatter, + name: &str, + ) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + self.debug(formatter, "LitInt") + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitFloat { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl LitFloat { + pub(crate) fn debug( + &self, + formatter: &mut fmt::Formatter, + name: &str, + ) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + self.debug(formatter, "LitFloat") + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitBool { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + impl LitBool { + pub(crate) fn debug( + &self, + formatter: &mut fmt::Formatter, + name: &str, + ) -> fmt::Result { + formatter + .debug_struct(name) + .field("value", &self.value) + .finish() + } + } + self.debug(formatter, "LitBool") + } + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for LitRepr { + fn clone(&self) -> Self { + LitRepr { + token: self.token.clone(), + suffix: self.suffix.clone(), + } + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for LitIntRepr { + fn clone(&self) -> Self { + LitIntRepr { + token: self.token.clone(), + digits: self.digits.clone(), + suffix: self.suffix.clone(), + } + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for LitFloatRepr { + fn clone(&self) -> Self { + LitFloatRepr { + token: self.token.clone(), + digits: self.digits.clone(), + suffix: self.suffix.clone(), + } + } +} + +macro_rules! lit_extra_traits { + ($ty:ident) => { + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl Clone for $ty { + fn clone(&self) -> Self { + $ty { + repr: self.repr.clone(), + } + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl PartialEq for $ty { + fn eq(&self, other: &Self) -> bool { + self.repr.token.to_string() == other.repr.token.to_string() + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Hash for $ty { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + self.repr.token.to_string().hash(state); + } + } + + #[cfg(feature = "parsing")] + pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn $ty(marker: lookahead::TokenMarker) -> $ty { + match marker {} + } + } + }; +} + +lit_extra_traits!(LitStr); +lit_extra_traits!(LitByteStr); +lit_extra_traits!(LitByte); +lit_extra_traits!(LitChar); +lit_extra_traits!(LitInt); +lit_extra_traits!(LitFloat); + +#[cfg(feature = "parsing")] +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn LitBool(marker: lookahead::TokenMarker) -> LitBool { + match marker {} + } +} + +/// The style of a string literal, either plain quoted or a raw string like +/// `r##"data"##`. +#[doc(hidden)] // https://github.com/dtolnay/syn/issues/1566 +pub enum StrStyle { + /// An ordinary string like `"data"`. + Cooked, + /// A raw string like `r##"data"##`. + /// + /// The unsigned integer is the number of `#` symbols used. + Raw(usize), +} + +#[cfg(feature = "parsing")] +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn Lit(marker: lookahead::TokenMarker) -> Lit { + match marker {} + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::buffer::Cursor; + use crate::parse::{Parse, ParseStream, Result}; + use proc_macro2::Punct; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Lit { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| { + if let Some((lit, rest)) = cursor.literal() { + return Ok((Lit::new(lit), rest)); + } + + if let Some((ident, rest)) = cursor.ident() { + let value = ident == "true"; + if value || ident == "false" { + let lit_bool = LitBool { + value, + span: ident.span(), + }; + return Ok((Lit::Bool(lit_bool), rest)); + } + } + + if let Some((punct, rest)) = cursor.punct() { + if punct.as_char() == '-' { + if let Some((lit, rest)) = parse_negative_lit(punct, rest) { + return Ok((lit, rest)); + } + } + } + + Err(cursor.error("expected literal")) + }) + } + } + + fn parse_negative_lit(neg: Punct, cursor: Cursor) -> Option<(Lit, Cursor)> { + let (lit, rest) = cursor.literal()?; + + let mut span = neg.span(); + span = span.join(lit.span()).unwrap_or(span); + + let mut repr = lit.to_string(); + repr.insert(0, '-'); + + if let Some((digits, suffix)) = value::parse_lit_int(&repr) { + let mut token: Literal = repr.parse().unwrap(); + token.set_span(span); + return Some(( + Lit::Int(LitInt { + repr: Box::new(LitIntRepr { + token, + digits, + suffix, + }), + }), + rest, + )); + } + + let (digits, suffix) = value::parse_lit_float(&repr)?; + let mut token: Literal = repr.parse().unwrap(); + token.set_span(span); + Some(( + Lit::Float(LitFloat { + repr: Box::new(LitFloatRepr { + token, + digits, + suffix, + }), + }), + rest, + )) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitStr { + fn parse(input: ParseStream) -> Result<Self> { + let head = input.fork(); + match input.parse() { + Ok(Lit::Str(lit)) => Ok(lit), + _ => Err(head.error("expected string literal")), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitByteStr { + fn parse(input: ParseStream) -> Result<Self> { + let head = input.fork(); + match input.parse() { + Ok(Lit::ByteStr(lit)) => Ok(lit), + _ => Err(head.error("expected byte string literal")), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitByte { + fn parse(input: ParseStream) -> Result<Self> { + let head = input.fork(); + match input.parse() { + Ok(Lit::Byte(lit)) => Ok(lit), + _ => Err(head.error("expected byte literal")), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitChar { + fn parse(input: ParseStream) -> Result<Self> { + let head = input.fork(); + match input.parse() { + Ok(Lit::Char(lit)) => Ok(lit), + _ => Err(head.error("expected character literal")), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitInt { + fn parse(input: ParseStream) -> Result<Self> { + let head = input.fork(); + match input.parse() { + Ok(Lit::Int(lit)) => Ok(lit), + _ => Err(head.error("expected integer literal")), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitFloat { + fn parse(input: ParseStream) -> Result<Self> { + let head = input.fork(); + match input.parse() { + Ok(Lit::Float(lit)) => Ok(lit), + _ => Err(head.error("expected floating point literal")), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitBool { + fn parse(input: ParseStream) -> Result<Self> { + let head = input.fork(); + match input.parse() { + Ok(Lit::Bool(lit)) => Ok(lit), + _ => Err(head.error("expected boolean literal")), + } + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitStr { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.repr.token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitByteStr { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.repr.token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitByte { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.repr.token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitChar { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.repr.token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitInt { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.repr.token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitFloat { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.repr.token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitBool { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(self.token()); + } + } +} + +mod value { + use super::*; + use crate::bigint::BigInt; + use std::char; + use std::ops::{Index, RangeFrom}; + + impl Lit { + /// Interpret a Syn literal from a proc-macro2 literal. + pub fn new(token: Literal) -> Self { + let repr = token.to_string(); + + match byte(&repr, 0) { + // "...", r"...", r#"..."# + b'"' | b'r' => { + let (_, suffix) = parse_lit_str(&repr); + return Lit::Str(LitStr { + repr: Box::new(LitRepr { token, suffix }), + }); + } + b'b' => match byte(&repr, 1) { + // b"...", br"...", br#"...#" + b'"' | b'r' => { + let (_, suffix) = parse_lit_byte_str(&repr); + return Lit::ByteStr(LitByteStr { + repr: Box::new(LitRepr { token, suffix }), + }); + } + // b'...' + b'\'' => { + let (_, suffix) = parse_lit_byte(&repr); + return Lit::Byte(LitByte { + repr: Box::new(LitRepr { token, suffix }), + }); + } + _ => {} + }, + // '...' + b'\'' => { + let (_, suffix) = parse_lit_char(&repr); + return Lit::Char(LitChar { + repr: Box::new(LitRepr { token, suffix }), + }); + } + b'0'..=b'9' | b'-' => { + // 0, 123, 0xFF, 0o77, 0b11 + if let Some((digits, suffix)) = parse_lit_int(&repr) { + return Lit::Int(LitInt { + repr: Box::new(LitIntRepr { + token, + digits, + suffix, + }), + }); + } + // 1.0, 1e-1, 1e+1 + if let Some((digits, suffix)) = parse_lit_float(&repr) { + return Lit::Float(LitFloat { + repr: Box::new(LitFloatRepr { + token, + digits, + suffix, + }), + }); + } + } + // true, false + b't' | b'f' => { + if repr == "true" || repr == "false" { + return Lit::Bool(LitBool { + value: repr == "true", + span: token.span(), + }); + } + } + // c"...", cr"...", cr#"..."# + // TODO: add a Lit::CStr variant? + b'c' => return Lit::Verbatim(token), + b'(' if repr == "(/*ERROR*/)" => return Lit::Verbatim(token), + _ => {} + } + + panic!("Unrecognized literal: `{}`", repr); + } + + pub fn suffix(&self) -> &str { + match self { + Lit::Str(lit) => lit.suffix(), + Lit::ByteStr(lit) => lit.suffix(), + Lit::Byte(lit) => lit.suffix(), + Lit::Char(lit) => lit.suffix(), + Lit::Int(lit) => lit.suffix(), + Lit::Float(lit) => lit.suffix(), + Lit::Bool(_) | Lit::Verbatim(_) => "", + } + } + + pub fn span(&self) -> Span { + match self { + Lit::Str(lit) => lit.span(), + Lit::ByteStr(lit) => lit.span(), + Lit::Byte(lit) => lit.span(), + Lit::Char(lit) => lit.span(), + Lit::Int(lit) => lit.span(), + Lit::Float(lit) => lit.span(), + Lit::Bool(lit) => lit.span, + Lit::Verbatim(lit) => lit.span(), + } + } + + pub fn set_span(&mut self, span: Span) { + match self { + Lit::Str(lit) => lit.set_span(span), + Lit::ByteStr(lit) => lit.set_span(span), + Lit::Byte(lit) => lit.set_span(span), + Lit::Char(lit) => lit.set_span(span), + Lit::Int(lit) => lit.set_span(span), + Lit::Float(lit) => lit.set_span(span), + Lit::Bool(lit) => lit.span = span, + Lit::Verbatim(lit) => lit.set_span(span), + } + } + } + + /// Get the byte at offset idx, or a default of `b'\0'` if we're looking + /// past the end of the input buffer. + pub(crate) fn byte<S: AsRef<[u8]> + ?Sized>(s: &S, idx: usize) -> u8 { + let s = s.as_ref(); + if idx < s.len() { + s[idx] + } else { + 0 + } + } + + fn next_chr(s: &str) -> char { + s.chars().next().unwrap_or('\0') + } + + // Returns (content, suffix). + pub(crate) fn parse_lit_str(s: &str) -> (Box<str>, Box<str>) { + match byte(s, 0) { + b'"' => parse_lit_str_cooked(s), + b'r' => parse_lit_str_raw(s), + _ => unreachable!(), + } + } + + // Clippy false positive + // https://github.com/rust-lang-nursery/rust-clippy/issues/2329 + #[allow(clippy::needless_continue)] + fn parse_lit_str_cooked(mut s: &str) -> (Box<str>, Box<str>) { + assert_eq!(byte(s, 0), b'"'); + s = &s[1..]; + + let mut content = String::new(); + 'outer: loop { + let ch = match byte(s, 0) { + b'"' => break, + b'\\' => { + let b = byte(s, 1); + s = &s[2..]; + match b { + b'x' => { + let (byte, rest) = backslash_x(s); + s = rest; + assert!(byte <= 0x7F, "Invalid \\x byte in string literal"); + char::from_u32(u32::from(byte)).unwrap() + } + b'u' => { + let (chr, rest) = backslash_u(s); + s = rest; + chr + } + b'n' => '\n', + b'r' => '\r', + b't' => '\t', + b'\\' => '\\', + b'0' => '\0', + b'\'' => '\'', + b'"' => '"', + b'\r' | b'\n' => loop { + let b = byte(s, 0); + match b { + b' ' | b'\t' | b'\n' | b'\r' => s = &s[1..], + _ => continue 'outer, + } + }, + b => panic!("unexpected byte {:?} after \\ character in byte literal", b), + } + } + b'\r' => { + assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string"); + s = &s[2..]; + '\n' + } + _ => { + let ch = next_chr(s); + s = &s[ch.len_utf8()..]; + ch + } + }; + content.push(ch); + } + + assert!(s.starts_with('"')); + let content = content.into_boxed_str(); + let suffix = s[1..].to_owned().into_boxed_str(); + (content, suffix) + } + + fn parse_lit_str_raw(mut s: &str) -> (Box<str>, Box<str>) { + assert_eq!(byte(s, 0), b'r'); + s = &s[1..]; + + let mut pounds = 0; + while byte(s, pounds) == b'#' { + pounds += 1; + } + assert_eq!(byte(s, pounds), b'"'); + let close = s.rfind('"').unwrap(); + for end in s[close + 1..close + 1 + pounds].bytes() { + assert_eq!(end, b'#'); + } + + let content = s[pounds + 1..close].to_owned().into_boxed_str(); + let suffix = s[close + 1 + pounds..].to_owned().into_boxed_str(); + (content, suffix) + } + + // Returns (content, suffix). + pub(crate) fn parse_lit_byte_str(s: &str) -> (Vec<u8>, Box<str>) { + assert_eq!(byte(s, 0), b'b'); + match byte(s, 1) { + b'"' => parse_lit_byte_str_cooked(s), + b'r' => parse_lit_byte_str_raw(s), + _ => unreachable!(), + } + } + + // Clippy false positive + // https://github.com/rust-lang-nursery/rust-clippy/issues/2329 + #[allow(clippy::needless_continue)] + fn parse_lit_byte_str_cooked(mut s: &str) -> (Vec<u8>, Box<str>) { + assert_eq!(byte(s, 0), b'b'); + assert_eq!(byte(s, 1), b'"'); + s = &s[2..]; + + // We're going to want to have slices which don't respect codepoint boundaries. + let mut v = s.as_bytes(); + + let mut out = Vec::new(); + 'outer: loop { + let byte = match byte(v, 0) { + b'"' => break, + b'\\' => { + let b = byte(v, 1); + v = &v[2..]; + match b { + b'x' => { + let (b, rest) = backslash_x(v); + v = rest; + b + } + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'\\' => b'\\', + b'0' => b'\0', + b'\'' => b'\'', + b'"' => b'"', + b'\r' | b'\n' => loop { + let byte = byte(v, 0); + if matches!(byte, b' ' | b'\t' | b'\n' | b'\r') { + v = &v[1..]; + } else { + continue 'outer; + } + }, + b => panic!("unexpected byte {:?} after \\ character in byte literal", b), + } + } + b'\r' => { + assert_eq!(byte(v, 1), b'\n', "Bare CR not allowed in string"); + v = &v[2..]; + b'\n' + } + b => { + v = &v[1..]; + b + } + }; + out.push(byte); + } + + assert_eq!(byte(v, 0), b'"'); + let suffix = s[s.len() - v.len() + 1..].to_owned().into_boxed_str(); + (out, suffix) + } + + fn parse_lit_byte_str_raw(s: &str) -> (Vec<u8>, Box<str>) { + assert_eq!(byte(s, 0), b'b'); + let (value, suffix) = parse_lit_str_raw(&s[1..]); + (String::from(value).into_bytes(), suffix) + } + + // Returns (value, suffix). + pub(crate) fn parse_lit_byte(s: &str) -> (u8, Box<str>) { + assert_eq!(byte(s, 0), b'b'); + assert_eq!(byte(s, 1), b'\''); + + // We're going to want to have slices which don't respect codepoint boundaries. + let mut v = s[2..].as_bytes(); + + let b = match byte(v, 0) { + b'\\' => { + let b = byte(v, 1); + v = &v[2..]; + match b { + b'x' => { + let (b, rest) = backslash_x(v); + v = rest; + b + } + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'\\' => b'\\', + b'0' => b'\0', + b'\'' => b'\'', + b'"' => b'"', + b => panic!("unexpected byte {:?} after \\ character in byte literal", b), + } + } + b => { + v = &v[1..]; + b + } + }; + + assert_eq!(byte(v, 0), b'\''); + let suffix = s[s.len() - v.len() + 1..].to_owned().into_boxed_str(); + (b, suffix) + } + + // Returns (value, suffix). + pub(crate) fn parse_lit_char(mut s: &str) -> (char, Box<str>) { + assert_eq!(byte(s, 0), b'\''); + s = &s[1..]; + + let ch = match byte(s, 0) { + b'\\' => { + let b = byte(s, 1); + s = &s[2..]; + match b { + b'x' => { + let (byte, rest) = backslash_x(s); + s = rest; + assert!(byte <= 0x80, "Invalid \\x byte in string literal"); + char::from_u32(u32::from(byte)).unwrap() + } + b'u' => { + let (chr, rest) = backslash_u(s); + s = rest; + chr + } + b'n' => '\n', + b'r' => '\r', + b't' => '\t', + b'\\' => '\\', + b'0' => '\0', + b'\'' => '\'', + b'"' => '"', + b => panic!("unexpected byte {:?} after \\ character in byte literal", b), + } + } + _ => { + let ch = next_chr(s); + s = &s[ch.len_utf8()..]; + ch + } + }; + assert_eq!(byte(s, 0), b'\''); + let suffix = s[1..].to_owned().into_boxed_str(); + (ch, suffix) + } + + fn backslash_x<S>(s: &S) -> (u8, &S) + where + S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized, + { + let mut ch = 0; + let b0 = byte(s, 0); + let b1 = byte(s, 1); + ch += 0x10 + * match b0 { + b'0'..=b'9' => b0 - b'0', + b'a'..=b'f' => 10 + (b0 - b'a'), + b'A'..=b'F' => 10 + (b0 - b'A'), + _ => panic!("unexpected non-hex character after \\x"), + }; + ch += match b1 { + b'0'..=b'9' => b1 - b'0', + b'a'..=b'f' => 10 + (b1 - b'a'), + b'A'..=b'F' => 10 + (b1 - b'A'), + _ => panic!("unexpected non-hex character after \\x"), + }; + (ch, &s[2..]) + } + + fn backslash_u(mut s: &str) -> (char, &str) { + if byte(s, 0) != b'{' { + panic!("{}", "expected { after \\u"); + } + s = &s[1..]; + + let mut ch = 0; + let mut digits = 0; + loop { + let b = byte(s, 0); + let digit = match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' => 10 + b - b'a', + b'A'..=b'F' => 10 + b - b'A', + b'_' if digits > 0 => { + s = &s[1..]; + continue; + } + b'}' if digits == 0 => panic!("invalid empty unicode escape"), + b'}' => break, + _ => panic!("unexpected non-hex character after \\u"), + }; + if digits == 6 { + panic!("overlong unicode escape (must have at most 6 hex digits)"); + } + ch *= 0x10; + ch += u32::from(digit); + digits += 1; + s = &s[1..]; + } + assert!(byte(s, 0) == b'}'); + s = &s[1..]; + + if let Some(ch) = char::from_u32(ch) { + (ch, s) + } else { + panic!("character code {:x} is not a valid unicode character", ch); + } + } + + // Returns base 10 digits and suffix. + pub(crate) fn parse_lit_int(mut s: &str) -> Option<(Box<str>, Box<str>)> { + let negative = byte(s, 0) == b'-'; + if negative { + s = &s[1..]; + } + + let base = match (byte(s, 0), byte(s, 1)) { + (b'0', b'x') => { + s = &s[2..]; + 16 + } + (b'0', b'o') => { + s = &s[2..]; + 8 + } + (b'0', b'b') => { + s = &s[2..]; + 2 + } + (b'0'..=b'9', _) => 10, + _ => return None, + }; + + let mut value = BigInt::new(); + let mut has_digit = false; + 'outer: loop { + let b = byte(s, 0); + let digit = match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' if base > 10 => b - b'a' + 10, + b'A'..=b'F' if base > 10 => b - b'A' + 10, + b'_' => { + s = &s[1..]; + continue; + } + // If looking at a floating point literal, we don't want to + // consider it an integer. + b'.' if base == 10 => return None, + b'e' | b'E' if base == 10 => { + let mut has_exp = false; + for (i, b) in s[1..].bytes().enumerate() { + match b { + b'_' => {} + b'-' | b'+' => return None, + b'0'..=b'9' => has_exp = true, + _ => { + let suffix = &s[1 + i..]; + if has_exp && crate::ident::xid_ok(suffix) { + return None; + } else { + break 'outer; + } + } + } + } + if has_exp { + return None; + } else { + break; + } + } + _ => break, + }; + + if digit >= base { + return None; + } + + has_digit = true; + value *= base; + value += digit; + s = &s[1..]; + } + + if !has_digit { + return None; + } + + let suffix = s; + if suffix.is_empty() || crate::ident::xid_ok(suffix) { + let mut repr = value.to_string(); + if negative { + repr.insert(0, '-'); + } + Some((repr.into_boxed_str(), suffix.to_owned().into_boxed_str())) + } else { + None + } + } + + // Returns base 10 digits and suffix. + pub(crate) fn parse_lit_float(input: &str) -> Option<(Box<str>, Box<str>)> { + // Rust's floating point literals are very similar to the ones parsed by + // the standard library, except that rust's literals can contain + // ignorable underscores. Let's remove those underscores. + + let mut bytes = input.to_owned().into_bytes(); + + let start = (*bytes.first()? == b'-') as usize; + match bytes.get(start)? { + b'0'..=b'9' => {} + _ => return None, + } + + let mut read = start; + let mut write = start; + let mut has_dot = false; + let mut has_e = false; + let mut has_sign = false; + let mut has_exponent = false; + while read < bytes.len() { + match bytes[read] { + b'_' => { + // Don't increase write + read += 1; + continue; + } + b'0'..=b'9' => { + if has_e { + has_exponent = true; + } + bytes[write] = bytes[read]; + } + b'.' => { + if has_e || has_dot { + return None; + } + has_dot = true; + bytes[write] = b'.'; + } + b'e' | b'E' => { + match bytes[read + 1..] + .iter() + .find(|b| **b != b'_') + .unwrap_or(&b'\0') + { + b'-' | b'+' | b'0'..=b'9' => {} + _ => break, + } + if has_e { + if has_exponent { + break; + } else { + return None; + } + } + has_e = true; + bytes[write] = b'e'; + } + b'-' | b'+' => { + if has_sign || has_exponent || !has_e { + return None; + } + has_sign = true; + if bytes[read] == b'-' { + bytes[write] = bytes[read]; + } else { + // Omit '+' + read += 1; + continue; + } + } + _ => break, + } + read += 1; + write += 1; + } + + if has_e && !has_exponent { + return None; + } + + let mut digits = String::from_utf8(bytes).unwrap(); + let suffix = digits.split_off(read); + digits.truncate(write); + if suffix.is_empty() || crate::ident::xid_ok(&suffix) { + Some((digits.into_boxed_str(), suffix.into_boxed_str())) + } else { + None + } + } +} diff --git a/vendor/syn/src/lookahead.rs b/vendor/syn/src/lookahead.rs new file mode 100644 index 0000000..e89a4c7 --- /dev/null +++ b/vendor/syn/src/lookahead.rs @@ -0,0 +1,169 @@ +use crate::buffer::Cursor; +use crate::error::{self, Error}; +use crate::sealed::lookahead::Sealed; +use crate::span::IntoSpans; +use crate::token::Token; +use proc_macro2::{Delimiter, Span}; +use std::cell::RefCell; + +/// Support for checking the next token in a stream to decide how to parse. +/// +/// An important advantage over [`ParseStream::peek`] is that here we +/// automatically construct an appropriate error message based on the token +/// alternatives that get peeked. If you are producing your own error message, +/// go ahead and use `ParseStream::peek` instead. +/// +/// Use [`ParseStream::lookahead1`] to construct this object. +/// +/// [`ParseStream::peek`]: crate::parse::ParseBuffer::peek +/// [`ParseStream::lookahead1`]: crate::parse::ParseBuffer::lookahead1 +/// +/// Consuming tokens from the source stream after constructing a lookahead +/// object does not also advance the lookahead object. +/// +/// # Example +/// +/// ``` +/// use syn::{ConstParam, Ident, Lifetime, LifetimeParam, Result, Token, TypeParam}; +/// use syn::parse::{Parse, ParseStream}; +/// +/// // A generic parameter, a single one of the comma-separated elements inside +/// // angle brackets in: +/// // +/// // fn f<T: Clone, 'a, 'b: 'a, const N: usize>() { ... } +/// // +/// // On invalid input, lookahead gives us a reasonable error message. +/// // +/// // error: expected one of: identifier, lifetime, `const` +/// // | +/// // 5 | fn f<!Sized>() {} +/// // | ^ +/// enum GenericParam { +/// Type(TypeParam), +/// Lifetime(LifetimeParam), +/// Const(ConstParam), +/// } +/// +/// impl Parse for GenericParam { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let lookahead = input.lookahead1(); +/// if lookahead.peek(Ident) { +/// input.parse().map(GenericParam::Type) +/// } else if lookahead.peek(Lifetime) { +/// input.parse().map(GenericParam::Lifetime) +/// } else if lookahead.peek(Token![const]) { +/// input.parse().map(GenericParam::Const) +/// } else { +/// Err(lookahead.error()) +/// } +/// } +/// } +/// ``` +pub struct Lookahead1<'a> { + scope: Span, + cursor: Cursor<'a>, + comparisons: RefCell<Vec<&'static str>>, +} + +pub(crate) fn new(scope: Span, cursor: Cursor) -> Lookahead1 { + Lookahead1 { + scope, + cursor, + comparisons: RefCell::new(Vec::new()), + } +} + +fn peek_impl( + lookahead: &Lookahead1, + peek: fn(Cursor) -> bool, + display: fn() -> &'static str, +) -> bool { + if peek(lookahead.cursor) { + return true; + } + lookahead.comparisons.borrow_mut().push(display()); + false +} + +impl<'a> Lookahead1<'a> { + /// Looks at the next token in the parse stream to determine whether it + /// matches the requested type of token. + /// + /// # Syntax + /// + /// Note that this method does not use turbofish syntax. Pass the peek type + /// inside of parentheses. + /// + /// - `input.peek(Token![struct])` + /// - `input.peek(Token![==])` + /// - `input.peek(Ident)` *(does not accept keywords)* + /// - `input.peek(Ident::peek_any)` + /// - `input.peek(Lifetime)` + /// - `input.peek(token::Brace)` + pub fn peek<T: Peek>(&self, token: T) -> bool { + let _ = token; + peek_impl(self, T::Token::peek, T::Token::display) + } + + /// Triggers an error at the current position of the parse stream. + /// + /// The error message will identify all of the expected token types that + /// have been peeked against this lookahead instance. + pub fn error(self) -> Error { + let comparisons = self.comparisons.borrow(); + match comparisons.len() { + 0 => { + if self.cursor.eof() { + Error::new(self.scope, "unexpected end of input") + } else { + Error::new(self.cursor.span(), "unexpected token") + } + } + 1 => { + let message = format!("expected {}", comparisons[0]); + error::new_at(self.scope, self.cursor, message) + } + 2 => { + let message = format!("expected {} or {}", comparisons[0], comparisons[1]); + error::new_at(self.scope, self.cursor, message) + } + _ => { + let join = comparisons.join(", "); + let message = format!("expected one of: {}", join); + error::new_at(self.scope, self.cursor, message) + } + } + } +} + +/// Types that can be parsed by looking at just one token. +/// +/// Use [`ParseStream::peek`] to peek one of these types in a parse stream +/// without consuming it from the stream. +/// +/// This trait is sealed and cannot be implemented for types outside of Syn. +/// +/// [`ParseStream::peek`]: crate::parse::ParseBuffer::peek +pub trait Peek: Sealed { + // Not public API. + #[doc(hidden)] + type Token: Token; +} + +impl<F: Copy + FnOnce(TokenMarker) -> T, T: Token> Peek for F { + type Token = T; +} + +pub enum TokenMarker {} + +impl<S> IntoSpans<S> for TokenMarker { + fn into_spans(self) -> S { + match self {} + } +} + +pub(crate) fn is_delimiter(cursor: Cursor, delimiter: Delimiter) -> bool { + cursor.group(delimiter).is_some() +} + +impl<F: Copy + FnOnce(TokenMarker) -> T, T: Token> Sealed for F {} diff --git a/vendor/syn/src/mac.rs b/vendor/syn/src/mac.rs new file mode 100644 index 0000000..8f687cc --- /dev/null +++ b/vendor/syn/src/mac.rs @@ -0,0 +1,211 @@ +use super::*; +use crate::token::{Brace, Bracket, Paren}; +use proc_macro2::extra::DelimSpan; +#[cfg(any(feature = "parsing", feature = "printing"))] +use proc_macro2::Delimiter; +use proc_macro2::TokenStream; +#[cfg(feature = "parsing")] +use proc_macro2::TokenTree; + +#[cfg(feature = "parsing")] +use crate::parse::{Parse, ParseStream, Parser, Result}; + +ast_struct! { + /// A macro invocation: `println!("{}", mac)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Macro { + pub path: Path, + pub bang_token: Token![!], + pub delimiter: MacroDelimiter, + pub tokens: TokenStream, + } +} + +ast_enum! { + /// A grouping token that surrounds a macro body: `m!(...)` or `m!{...}` or `m![...]`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum MacroDelimiter { + Paren(Paren), + Brace(Brace), + Bracket(Bracket), + } +} + +impl MacroDelimiter { + pub fn span(&self) -> &DelimSpan { + match self { + MacroDelimiter::Paren(token) => &token.span, + MacroDelimiter::Brace(token) => &token.span, + MacroDelimiter::Bracket(token) => &token.span, + } + } +} + +impl Macro { + /// Parse the tokens within the macro invocation's delimiters into a syntax + /// tree. + /// + /// This is equivalent to `syn::parse2::<T>(mac.tokens)` except that it + /// produces a more useful span when `tokens` is empty. + /// + /// # Example + /// + /// ``` + /// use syn::{parse_quote, Expr, ExprLit, Ident, Lit, LitStr, Macro, Token}; + /// use syn::ext::IdentExt; + /// use syn::parse::{Error, Parse, ParseStream, Result}; + /// use syn::punctuated::Punctuated; + /// + /// // The arguments expected by libcore's format_args macro, and as a + /// // result most other formatting and printing macros like println. + /// // + /// // println!("{} is {number:.prec$}", "x", prec=5, number=0.01) + /// struct FormatArgs { + /// format_string: Expr, + /// positional_args: Vec<Expr>, + /// named_args: Vec<(Ident, Expr)>, + /// } + /// + /// impl Parse for FormatArgs { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let format_string: Expr; + /// let mut positional_args = Vec::new(); + /// let mut named_args = Vec::new(); + /// + /// format_string = input.parse()?; + /// while !input.is_empty() { + /// input.parse::<Token![,]>()?; + /// if input.is_empty() { + /// break; + /// } + /// if input.peek(Ident::peek_any) && input.peek2(Token![=]) { + /// while !input.is_empty() { + /// let name: Ident = input.call(Ident::parse_any)?; + /// input.parse::<Token![=]>()?; + /// let value: Expr = input.parse()?; + /// named_args.push((name, value)); + /// if input.is_empty() { + /// break; + /// } + /// input.parse::<Token![,]>()?; + /// } + /// break; + /// } + /// positional_args.push(input.parse()?); + /// } + /// + /// Ok(FormatArgs { + /// format_string, + /// positional_args, + /// named_args, + /// }) + /// } + /// } + /// + /// // Extract the first argument, the format string literal, from an + /// // invocation of a formatting or printing macro. + /// fn get_format_string(m: &Macro) -> Result<LitStr> { + /// let args: FormatArgs = m.parse_body()?; + /// match args.format_string { + /// Expr::Lit(ExprLit { lit: Lit::Str(lit), .. }) => Ok(lit), + /// other => { + /// // First argument was not a string literal expression. + /// // Maybe something like: println!(concat!(...), ...) + /// Err(Error::new_spanned(other, "format string must be a string literal")) + /// } + /// } + /// } + /// + /// fn main() { + /// let invocation = parse_quote! { + /// println!("{:?}", Instant::now()) + /// }; + /// let lit = get_format_string(&invocation).unwrap(); + /// assert_eq!(lit.value(), "{:?}"); + /// } + /// ``` + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_body<T: Parse>(&self) -> Result<T> { + self.parse_body_with(T::parse) + } + + /// Parse the tokens within the macro invocation's delimiters using the + /// given parser. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_body_with<F: Parser>(&self, parser: F) -> Result<F::Output> { + let scope = self.delimiter.span().close(); + crate::parse::parse_scoped(parser, scope, self.tokens.clone()) + } +} + +#[cfg(feature = "parsing")] +pub(crate) fn parse_delimiter(input: ParseStream) -> Result<(MacroDelimiter, TokenStream)> { + input.step(|cursor| { + if let Some((TokenTree::Group(g), rest)) = cursor.token_tree() { + let span = g.delim_span(); + let delimiter = match g.delimiter() { + Delimiter::Parenthesis => MacroDelimiter::Paren(Paren(span)), + Delimiter::Brace => MacroDelimiter::Brace(Brace(span)), + Delimiter::Bracket => MacroDelimiter::Bracket(Bracket(span)), + Delimiter::None => { + return Err(cursor.error("expected delimiter")); + } + }; + Ok(((delimiter, g.stream()), rest)) + } else { + Err(cursor.error("expected delimiter")) + } + }) +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Macro { + fn parse(input: ParseStream) -> Result<Self> { + let tokens; + Ok(Macro { + path: input.call(Path::parse_mod_style)?, + bang_token: input.parse()?, + delimiter: { + let (delimiter, content) = parse_delimiter(input)?; + tokens = content; + delimiter + }, + tokens, + }) + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::TokenStream; + use quote::ToTokens; + + impl MacroDelimiter { + pub(crate) fn surround(&self, tokens: &mut TokenStream, inner: TokenStream) { + let (delim, span) = match self { + MacroDelimiter::Paren(paren) => (Delimiter::Parenthesis, paren.span), + MacroDelimiter::Brace(brace) => (Delimiter::Brace, brace.span), + MacroDelimiter::Bracket(bracket) => (Delimiter::Bracket, bracket.span), + }; + token::printing::delim(delim, span.join(), tokens, inner); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Macro { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.path.to_tokens(tokens); + self.bang_token.to_tokens(tokens); + self.delimiter.surround(tokens, self.tokens.clone()); + } + } +} diff --git a/vendor/syn/src/macros.rs b/vendor/syn/src/macros.rs new file mode 100644 index 0000000..46d2832 --- /dev/null +++ b/vendor/syn/src/macros.rs @@ -0,0 +1,176 @@ +#[cfg_attr( + not(any(feature = "full", feature = "derive")), + allow(unknown_lints, unused_macro_rules) +)] +macro_rules! ast_struct { + ( + $(#[$attr:meta])* + $pub:ident $struct:ident $name:ident #full $body:tt + ) => { + check_keyword_matches!(pub $pub); + check_keyword_matches!(struct $struct); + + #[cfg(feature = "full")] + $(#[$attr])* $pub $struct $name $body + + #[cfg(not(feature = "full"))] + $(#[$attr])* $pub $struct $name { + _noconstruct: ::std::marker::PhantomData<::proc_macro2::Span>, + } + + #[cfg(all(not(feature = "full"), feature = "printing"))] + impl ::quote::ToTokens for $name { + fn to_tokens(&self, _: &mut ::proc_macro2::TokenStream) { + unreachable!() + } + } + }; + + ( + $(#[$attr:meta])* + $pub:ident $struct:ident $name:ident $body:tt + ) => { + check_keyword_matches!(pub $pub); + check_keyword_matches!(struct $struct); + + $(#[$attr])* $pub $struct $name $body + }; +} + +#[cfg(any(feature = "full", feature = "derive"))] +macro_rules! ast_enum { + ( + $(#[$enum_attr:meta])* + $pub:ident $enum:ident $name:ident $body:tt + ) => { + check_keyword_matches!(pub $pub); + check_keyword_matches!(enum $enum); + + $(#[$enum_attr])* $pub $enum $name $body + }; +} + +macro_rules! ast_enum_of_structs { + ( + $(#[$enum_attr:meta])* + $pub:ident $enum:ident $name:ident $body:tt + ) => { + check_keyword_matches!(pub $pub); + check_keyword_matches!(enum $enum); + + $(#[$enum_attr])* $pub $enum $name $body + + ast_enum_of_structs_impl!($name $body); + }; +} + +macro_rules! ast_enum_of_structs_impl { + ( + $name:ident { + $( + $(#[cfg $cfg_attr:tt])* + $(#[doc $($doc_attr:tt)*])* + $variant:ident $( ($($member:ident)::+) )*, + )* + } + ) => { + $($( + ast_enum_from_struct!($name::$variant, $($member)::+); + )*)* + + #[cfg(feature = "printing")] + generate_to_tokens! { + () + tokens + $name { + $( + $(#[cfg $cfg_attr])* + $(#[doc $($doc_attr)*])* + $variant $($($member)::+)*, + )* + } + } + }; +} + +macro_rules! ast_enum_from_struct { + // No From<TokenStream> for verbatim variants. + ($name:ident::Verbatim, $member:ident) => {}; + + ($name:ident::$variant:ident, $member:ident) => { + impl From<$member> for $name { + fn from(e: $member) -> $name { + $name::$variant(e) + } + } + }; +} + +#[cfg(feature = "printing")] +macro_rules! generate_to_tokens { + ( + ($($arms:tt)*) $tokens:ident $name:ident { + $(#[cfg $cfg_attr:tt])* + $(#[doc $($doc_attr:tt)*])* + $variant:ident, + $($next:tt)* + } + ) => { + generate_to_tokens!( + ($($arms)* $(#[cfg $cfg_attr])* $name::$variant => {}) + $tokens $name { $($next)* } + ); + }; + + ( + ($($arms:tt)*) $tokens:ident $name:ident { + $(#[cfg $cfg_attr:tt])* + $(#[doc $($doc_attr:tt)*])* + $variant:ident $member:ident, + $($next:tt)* + } + ) => { + generate_to_tokens!( + ($($arms)* $(#[cfg $cfg_attr])* $name::$variant(_e) => _e.to_tokens($tokens),) + $tokens $name { $($next)* } + ); + }; + + (($($arms:tt)*) $tokens:ident $name:ident {}) => { + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ::quote::ToTokens for $name { + fn to_tokens(&self, $tokens: &mut ::proc_macro2::TokenStream) { + match self { + $($arms)* + } + } + } + }; +} + +// Rustdoc bug: does not respect the doc(hidden) on some items. +#[cfg(all(doc, feature = "parsing"))] +macro_rules! pub_if_not_doc { + ($(#[$m:meta])* $pub:ident $($item:tt)*) => { + check_keyword_matches!(pub $pub); + + $(#[$m])* + $pub(crate) $($item)* + }; +} + +#[cfg(all(not(doc), feature = "parsing"))] +macro_rules! pub_if_not_doc { + ($(#[$m:meta])* $pub:ident $($item:tt)*) => { + check_keyword_matches!(pub $pub); + + $(#[$m])* + $pub $($item)* + }; +} + +macro_rules! check_keyword_matches { + (enum enum) => {}; + (pub pub) => {}; + (struct struct) => {}; +} diff --git a/vendor/syn/src/meta.rs b/vendor/syn/src/meta.rs new file mode 100644 index 0000000..66a0a46 --- /dev/null +++ b/vendor/syn/src/meta.rs @@ -0,0 +1,426 @@ +//! Facility for interpreting structured content inside of an `Attribute`. + +use crate::ext::IdentExt as _; +use crate::lit::Lit; +use crate::parse::{Error, ParseStream, Parser, Result}; +use crate::path::{Path, PathSegment}; +use crate::punctuated::Punctuated; +use proc_macro2::Ident; +use std::fmt::Display; + +/// Make a parser that is usable with `parse_macro_input!` in a +/// `#[proc_macro_attribute]` macro. +/// +/// *Warning:* When parsing attribute args **other than** the +/// `proc_macro::TokenStream` input of a `proc_macro_attribute`, you do **not** +/// need this function. In several cases your callers will get worse error +/// messages if you use this function, because the surrounding delimiter's span +/// is concealed from attribute macros by rustc. Use +/// [`Attribute::parse_nested_meta`] instead. +/// +/// [`Attribute::parse_nested_meta`]: crate::Attribute::parse_nested_meta +/// +/// # Example +/// +/// This example implements an attribute macro whose invocations look like this: +/// +/// ``` +/// # const IGNORE: &str = stringify! { +/// #[tea(kind = "EarlGrey", hot)] +/// struct Picard {...} +/// # }; +/// ``` +/// +/// The "parameters" supported by the attribute are: +/// +/// - `kind = "..."` +/// - `hot` +/// - `with(sugar, milk, ...)`, a comma-separated list of ingredients +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// use proc_macro::TokenStream; +/// use syn::{parse_macro_input, LitStr, Path}; +/// +/// # const IGNORE: &str = stringify! { +/// #[proc_macro_attribute] +/// # }; +/// pub fn tea(args: TokenStream, input: TokenStream) -> TokenStream { +/// let mut kind: Option<LitStr> = None; +/// let mut hot: bool = false; +/// let mut with: Vec<Path> = Vec::new(); +/// let tea_parser = syn::meta::parser(|meta| { +/// if meta.path.is_ident("kind") { +/// kind = Some(meta.value()?.parse()?); +/// Ok(()) +/// } else if meta.path.is_ident("hot") { +/// hot = true; +/// Ok(()) +/// } else if meta.path.is_ident("with") { +/// meta.parse_nested_meta(|meta| { +/// with.push(meta.path); +/// Ok(()) +/// }) +/// } else { +/// Err(meta.error("unsupported tea property")) +/// } +/// }); +/// +/// parse_macro_input!(args with tea_parser); +/// eprintln!("kind={kind:?} hot={hot} with={with:?}"); +/// +/// /* ... */ +/// # TokenStream::new() +/// } +/// ``` +/// +/// The `syn::meta` library will take care of dealing with the commas including +/// trailing commas, and producing sensible error messages on unexpected input. +/// +/// ```console +/// error: expected `,` +/// --> src/main.rs:3:37 +/// | +/// 3 | #[tea(kind = "EarlGrey", with(sugar = "lol", milk))] +/// | ^ +/// ``` +/// +/// # Example +/// +/// Same as above but we factor out most of the logic into a separate function. +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// use proc_macro::TokenStream; +/// use syn::meta::ParseNestedMeta; +/// use syn::parse::{Parser, Result}; +/// use syn::{parse_macro_input, LitStr, Path}; +/// +/// # const IGNORE: &str = stringify! { +/// #[proc_macro_attribute] +/// # }; +/// pub fn tea(args: TokenStream, input: TokenStream) -> TokenStream { +/// let mut attrs = TeaAttributes::default(); +/// let tea_parser = syn::meta::parser(|meta| attrs.parse(meta)); +/// parse_macro_input!(args with tea_parser); +/// +/// /* ... */ +/// # TokenStream::new() +/// } +/// +/// #[derive(Default)] +/// struct TeaAttributes { +/// kind: Option<LitStr>, +/// hot: bool, +/// with: Vec<Path>, +/// } +/// +/// impl TeaAttributes { +/// fn parse(&mut self, meta: ParseNestedMeta) -> Result<()> { +/// if meta.path.is_ident("kind") { +/// self.kind = Some(meta.value()?.parse()?); +/// Ok(()) +/// } else /* just like in last example */ +/// # { unimplemented!() } +/// +/// } +/// } +/// ``` +pub fn parser(logic: impl FnMut(ParseNestedMeta) -> Result<()>) -> impl Parser<Output = ()> { + |input: ParseStream| { + if input.is_empty() { + Ok(()) + } else { + parse_nested_meta(input, logic) + } + } +} + +/// Context for parsing a single property in the conventional syntax for +/// structured attributes. +/// +/// # Examples +/// +/// Refer to usage examples on the following two entry-points: +/// +/// - [`Attribute::parse_nested_meta`] if you have an entire `Attribute` to +/// parse. Always use this if possible. Generally this is able to produce +/// better error messages because `Attribute` holds span information for all +/// of the delimiters therein. +/// +/// - [`syn::meta::parser`] if you are implementing a `proc_macro_attribute` +/// macro and parsing the arguments to the attribute macro, i.e. the ones +/// written in the same attribute that dispatched the macro invocation. Rustc +/// does not pass span information for the surrounding delimiters into the +/// attribute macro invocation in this situation, so error messages might be +/// less precise. +/// +/// [`Attribute::parse_nested_meta`]: crate::Attribute::parse_nested_meta +/// [`syn::meta::parser`]: crate::meta::parser +#[non_exhaustive] +pub struct ParseNestedMeta<'a> { + pub path: Path, + pub input: ParseStream<'a>, +} + +impl<'a> ParseNestedMeta<'a> { + /// Used when parsing `key = "value"` syntax. + /// + /// All it does is advance `meta.input` past the `=` sign in the input. You + /// could accomplish the same effect by writing + /// `meta.parse::<Token![=]>()?`, so at most it is a minor convenience to + /// use `meta.value()?`. + /// + /// # Example + /// + /// ``` + /// use syn::{parse_quote, Attribute, LitStr}; + /// + /// let attr: Attribute = parse_quote! { + /// #[tea(kind = "EarlGrey")] + /// }; + /// // conceptually: + /// if attr.path().is_ident("tea") { // this parses the `tea` + /// attr.parse_nested_meta(|meta| { // this parses the `(` + /// if meta.path.is_ident("kind") { // this parses the `kind` + /// let value = meta.value()?; // this parses the `=` + /// let s: LitStr = value.parse()?; // this parses `"EarlGrey"` + /// if s.value() == "EarlGrey" { + /// // ... + /// } + /// Ok(()) + /// } else { + /// Err(meta.error("unsupported attribute")) + /// } + /// })?; + /// } + /// # anyhow::Ok(()) + /// ``` + pub fn value(&self) -> Result<ParseStream<'a>> { + self.input.parse::<Token![=]>()?; + Ok(self.input) + } + + /// Used when parsing `list(...)` syntax **if** the content inside the + /// nested parentheses is also expected to conform to Rust's structured + /// attribute convention. + /// + /// # Example + /// + /// ``` + /// use syn::{parse_quote, Attribute}; + /// + /// let attr: Attribute = parse_quote! { + /// #[tea(with(sugar, milk))] + /// }; + /// + /// if attr.path().is_ident("tea") { + /// attr.parse_nested_meta(|meta| { + /// if meta.path.is_ident("with") { + /// meta.parse_nested_meta(|meta| { // <--- + /// if meta.path.is_ident("sugar") { + /// // Here we can go even deeper if needed. + /// Ok(()) + /// } else if meta.path.is_ident("milk") { + /// Ok(()) + /// } else { + /// Err(meta.error("unsupported ingredient")) + /// } + /// }) + /// } else { + /// Err(meta.error("unsupported tea property")) + /// } + /// })?; + /// } + /// # anyhow::Ok(()) + /// ``` + /// + /// # Counterexample + /// + /// If you don't need `parse_nested_meta`'s help in parsing the content + /// written within the nested parentheses, keep in mind that you can always + /// just parse it yourself from the exposed ParseStream. Rust syntax permits + /// arbitrary tokens within those parentheses so for the crazier stuff, + /// `parse_nested_meta` is not what you want. + /// + /// ``` + /// use syn::{parenthesized, parse_quote, Attribute, LitInt}; + /// + /// let attr: Attribute = parse_quote! { + /// #[repr(align(32))] + /// }; + /// + /// let mut align: Option<LitInt> = None; + /// if attr.path().is_ident("repr") { + /// attr.parse_nested_meta(|meta| { + /// if meta.path.is_ident("align") { + /// let content; + /// parenthesized!(content in meta.input); + /// align = Some(content.parse()?); + /// Ok(()) + /// } else { + /// Err(meta.error("unsupported repr")) + /// } + /// })?; + /// } + /// # anyhow::Ok(()) + /// ``` + pub fn parse_nested_meta( + &self, + logic: impl FnMut(ParseNestedMeta) -> Result<()>, + ) -> Result<()> { + let content; + parenthesized!(content in self.input); + parse_nested_meta(&content, logic) + } + + /// Report that the attribute's content did not conform to expectations. + /// + /// The span of the resulting error will cover `meta.path` *and* everything + /// that has been parsed so far since it. + /// + /// There are 2 ways you might call this. First, if `meta.path` is not + /// something you recognize: + /// + /// ``` + /// # use syn::Attribute; + /// # + /// # fn example(attr: &Attribute) -> syn::Result<()> { + /// attr.parse_nested_meta(|meta| { + /// if meta.path.is_ident("kind") { + /// // ... + /// Ok(()) + /// } else { + /// Err(meta.error("unsupported tea property")) + /// } + /// })?; + /// # Ok(()) + /// # } + /// ``` + /// + /// In this case, it behaves exactly like + /// `syn::Error::new_spanned(&meta.path, "message...")`. + /// + /// ```console + /// error: unsupported tea property + /// --> src/main.rs:3:26 + /// | + /// 3 | #[tea(kind = "EarlGrey", wat = "foo")] + /// | ^^^ + /// ``` + /// + /// More usefully, the second place is if you've already parsed a value but + /// have decided not to accept the value: + /// + /// ``` + /// # use syn::Attribute; + /// # + /// # fn example(attr: &Attribute) -> syn::Result<()> { + /// use syn::Expr; + /// + /// attr.parse_nested_meta(|meta| { + /// if meta.path.is_ident("kind") { + /// let expr: Expr = meta.value()?.parse()?; + /// match expr { + /// Expr::Lit(expr) => /* ... */ + /// # unimplemented!(), + /// Expr::Path(expr) => /* ... */ + /// # unimplemented!(), + /// Expr::Macro(expr) => /* ... */ + /// # unimplemented!(), + /// _ => Err(meta.error("tea kind must be a string literal, path, or macro")), + /// } + /// } else /* as above */ + /// # { unimplemented!() } + /// + /// })?; + /// # Ok(()) + /// # } + /// ``` + /// + /// ```console + /// error: tea kind must be a string literal, path, or macro + /// --> src/main.rs:3:7 + /// | + /// 3 | #[tea(kind = async { replicator.await })] + /// | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /// ``` + /// + /// Often you may want to use `syn::Error::new_spanned` even in this + /// situation. In the above code, that would be: + /// + /// ``` + /// # use syn::{Error, Expr}; + /// # + /// # fn example(expr: Expr) -> syn::Result<()> { + /// match expr { + /// Expr::Lit(expr) => /* ... */ + /// # unimplemented!(), + /// Expr::Path(expr) => /* ... */ + /// # unimplemented!(), + /// Expr::Macro(expr) => /* ... */ + /// # unimplemented!(), + /// _ => Err(Error::new_spanned(expr, "unsupported expression type for `kind`")), + /// } + /// # } + /// ``` + /// + /// ```console + /// error: unsupported expression type for `kind` + /// --> src/main.rs:3:14 + /// | + /// 3 | #[tea(kind = async { replicator.await })] + /// | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + /// ``` + pub fn error(&self, msg: impl Display) -> Error { + let start_span = self.path.segments[0].ident.span(); + let end_span = self.input.cursor().prev_span(); + crate::error::new2(start_span, end_span, msg) + } +} + +pub(crate) fn parse_nested_meta( + input: ParseStream, + mut logic: impl FnMut(ParseNestedMeta) -> Result<()>, +) -> Result<()> { + loop { + let path = input.call(parse_meta_path)?; + logic(ParseNestedMeta { path, input })?; + if input.is_empty() { + return Ok(()); + } + input.parse::<Token![,]>()?; + if input.is_empty() { + return Ok(()); + } + } +} + +// Like Path::parse_mod_style, but accepts keywords in the path. +fn parse_meta_path(input: ParseStream) -> Result<Path> { + Ok(Path { + leading_colon: input.parse()?, + segments: { + let mut segments = Punctuated::new(); + if input.peek(Ident::peek_any) { + let ident = Ident::parse_any(input)?; + segments.push_value(PathSegment::from(ident)); + } else if input.is_empty() { + return Err(input.error("expected nested attribute")); + } else if input.peek(Lit) { + return Err(input.error("unexpected literal in nested attribute, expected ident")); + } else { + return Err(input.error("unexpected token in nested attribute, expected ident")); + } + while input.peek(Token![::]) { + let punct = input.parse()?; + segments.push_punct(punct); + let ident = Ident::parse_any(input)?; + segments.push_value(PathSegment::from(ident)); + } + segments + }, + }) +} diff --git a/vendor/syn/src/op.rs b/vendor/syn/src/op.rs new file mode 100644 index 0000000..c231285 --- /dev/null +++ b/vendor/syn/src/op.rs @@ -0,0 +1,218 @@ +ast_enum! { + /// A binary operator: `+`, `+=`, `&`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum BinOp { + /// The `+` operator (addition) + Add(Token![+]), + /// The `-` operator (subtraction) + Sub(Token![-]), + /// The `*` operator (multiplication) + Mul(Token![*]), + /// The `/` operator (division) + Div(Token![/]), + /// The `%` operator (modulus) + Rem(Token![%]), + /// The `&&` operator (logical and) + And(Token![&&]), + /// The `||` operator (logical or) + Or(Token![||]), + /// The `^` operator (bitwise xor) + BitXor(Token![^]), + /// The `&` operator (bitwise and) + BitAnd(Token![&]), + /// The `|` operator (bitwise or) + BitOr(Token![|]), + /// The `<<` operator (shift left) + Shl(Token![<<]), + /// The `>>` operator (shift right) + Shr(Token![>>]), + /// The `==` operator (equality) + Eq(Token![==]), + /// The `<` operator (less than) + Lt(Token![<]), + /// The `<=` operator (less than or equal to) + Le(Token![<=]), + /// The `!=` operator (not equal to) + Ne(Token![!=]), + /// The `>=` operator (greater than or equal to) + Ge(Token![>=]), + /// The `>` operator (greater than) + Gt(Token![>]), + /// The `+=` operator + AddAssign(Token![+=]), + /// The `-=` operator + SubAssign(Token![-=]), + /// The `*=` operator + MulAssign(Token![*=]), + /// The `/=` operator + DivAssign(Token![/=]), + /// The `%=` operator + RemAssign(Token![%=]), + /// The `^=` operator + BitXorAssign(Token![^=]), + /// The `&=` operator + BitAndAssign(Token![&=]), + /// The `|=` operator + BitOrAssign(Token![|=]), + /// The `<<=` operator + ShlAssign(Token![<<=]), + /// The `>>=` operator + ShrAssign(Token![>>=]), + } +} + +ast_enum! { + /// A unary operator: `*`, `!`, `-`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum UnOp { + /// The `*` operator for dereferencing + Deref(Token![*]), + /// The `!` operator for logical inversion + Not(Token![!]), + /// The `-` operator for negation + Neg(Token![-]), + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for BinOp { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Token![+=]) { + input.parse().map(BinOp::AddAssign) + } else if input.peek(Token![-=]) { + input.parse().map(BinOp::SubAssign) + } else if input.peek(Token![*=]) { + input.parse().map(BinOp::MulAssign) + } else if input.peek(Token![/=]) { + input.parse().map(BinOp::DivAssign) + } else if input.peek(Token![%=]) { + input.parse().map(BinOp::RemAssign) + } else if input.peek(Token![^=]) { + input.parse().map(BinOp::BitXorAssign) + } else if input.peek(Token![&=]) { + input.parse().map(BinOp::BitAndAssign) + } else if input.peek(Token![|=]) { + input.parse().map(BinOp::BitOrAssign) + } else if input.peek(Token![<<=]) { + input.parse().map(BinOp::ShlAssign) + } else if input.peek(Token![>>=]) { + input.parse().map(BinOp::ShrAssign) + } else if input.peek(Token![&&]) { + input.parse().map(BinOp::And) + } else if input.peek(Token![||]) { + input.parse().map(BinOp::Or) + } else if input.peek(Token![<<]) { + input.parse().map(BinOp::Shl) + } else if input.peek(Token![>>]) { + input.parse().map(BinOp::Shr) + } else if input.peek(Token![==]) { + input.parse().map(BinOp::Eq) + } else if input.peek(Token![<=]) { + input.parse().map(BinOp::Le) + } else if input.peek(Token![!=]) { + input.parse().map(BinOp::Ne) + } else if input.peek(Token![>=]) { + input.parse().map(BinOp::Ge) + } else if input.peek(Token![+]) { + input.parse().map(BinOp::Add) + } else if input.peek(Token![-]) { + input.parse().map(BinOp::Sub) + } else if input.peek(Token![*]) { + input.parse().map(BinOp::Mul) + } else if input.peek(Token![/]) { + input.parse().map(BinOp::Div) + } else if input.peek(Token![%]) { + input.parse().map(BinOp::Rem) + } else if input.peek(Token![^]) { + input.parse().map(BinOp::BitXor) + } else if input.peek(Token![&]) { + input.parse().map(BinOp::BitAnd) + } else if input.peek(Token![|]) { + input.parse().map(BinOp::BitOr) + } else if input.peek(Token![<]) { + input.parse().map(BinOp::Lt) + } else if input.peek(Token![>]) { + input.parse().map(BinOp::Gt) + } else { + Err(input.error("expected binary operator")) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for UnOp { + fn parse(input: ParseStream) -> Result<Self> { + let lookahead = input.lookahead1(); + if lookahead.peek(Token![*]) { + input.parse().map(UnOp::Deref) + } else if lookahead.peek(Token![!]) { + input.parse().map(UnOp::Not) + } else if lookahead.peek(Token![-]) { + input.parse().map(UnOp::Neg) + } else { + Err(lookahead.error()) + } + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::TokenStream; + use quote::ToTokens; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for BinOp { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + BinOp::Add(t) => t.to_tokens(tokens), + BinOp::Sub(t) => t.to_tokens(tokens), + BinOp::Mul(t) => t.to_tokens(tokens), + BinOp::Div(t) => t.to_tokens(tokens), + BinOp::Rem(t) => t.to_tokens(tokens), + BinOp::And(t) => t.to_tokens(tokens), + BinOp::Or(t) => t.to_tokens(tokens), + BinOp::BitXor(t) => t.to_tokens(tokens), + BinOp::BitAnd(t) => t.to_tokens(tokens), + BinOp::BitOr(t) => t.to_tokens(tokens), + BinOp::Shl(t) => t.to_tokens(tokens), + BinOp::Shr(t) => t.to_tokens(tokens), + BinOp::Eq(t) => t.to_tokens(tokens), + BinOp::Lt(t) => t.to_tokens(tokens), + BinOp::Le(t) => t.to_tokens(tokens), + BinOp::Ne(t) => t.to_tokens(tokens), + BinOp::Ge(t) => t.to_tokens(tokens), + BinOp::Gt(t) => t.to_tokens(tokens), + BinOp::AddAssign(t) => t.to_tokens(tokens), + BinOp::SubAssign(t) => t.to_tokens(tokens), + BinOp::MulAssign(t) => t.to_tokens(tokens), + BinOp::DivAssign(t) => t.to_tokens(tokens), + BinOp::RemAssign(t) => t.to_tokens(tokens), + BinOp::BitXorAssign(t) => t.to_tokens(tokens), + BinOp::BitAndAssign(t) => t.to_tokens(tokens), + BinOp::BitOrAssign(t) => t.to_tokens(tokens), + BinOp::ShlAssign(t) => t.to_tokens(tokens), + BinOp::ShrAssign(t) => t.to_tokens(tokens), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for UnOp { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + UnOp::Deref(t) => t.to_tokens(tokens), + UnOp::Not(t) => t.to_tokens(tokens), + UnOp::Neg(t) => t.to_tokens(tokens), + } + } + } +} diff --git a/vendor/syn/src/parse.rs b/vendor/syn/src/parse.rs new file mode 100644 index 0000000..13f488d --- /dev/null +++ b/vendor/syn/src/parse.rs @@ -0,0 +1,1386 @@ +//! Parsing interface for parsing a token stream into a syntax tree node. +//! +//! Parsing in Syn is built on parser functions that take in a [`ParseStream`] +//! and produce a [`Result<T>`] where `T` is some syntax tree node. Underlying +//! these parser functions is a lower level mechanism built around the +//! [`Cursor`] type. `Cursor` is a cheaply copyable cursor over a range of +//! tokens in a token stream. +//! +//! [`Result<T>`]: Result +//! [`Cursor`]: crate::buffer::Cursor +//! +//! # Example +//! +//! Here is a snippet of parsing code to get a feel for the style of the +//! library. We define data structures for a subset of Rust syntax including +//! enums (not shown) and structs, then provide implementations of the [`Parse`] +//! trait to parse these syntax tree data structures from a token stream. +//! +//! Once `Parse` impls have been defined, they can be called conveniently from a +//! procedural macro through [`parse_macro_input!`] as shown at the bottom of +//! the snippet. If the caller provides syntactically invalid input to the +//! procedural macro, they will receive a helpful compiler error message +//! pointing out the exact token that triggered the failure to parse. +//! +//! [`parse_macro_input!`]: crate::parse_macro_input! +//! +//! ``` +//! # extern crate proc_macro; +//! # +//! use proc_macro::TokenStream; +//! use syn::{braced, parse_macro_input, token, Field, Ident, Result, Token}; +//! use syn::parse::{Parse, ParseStream}; +//! use syn::punctuated::Punctuated; +//! +//! enum Item { +//! Struct(ItemStruct), +//! Enum(ItemEnum), +//! } +//! +//! struct ItemStruct { +//! struct_token: Token![struct], +//! ident: Ident, +//! brace_token: token::Brace, +//! fields: Punctuated<Field, Token![,]>, +//! } +//! # +//! # enum ItemEnum {} +//! +//! impl Parse for Item { +//! fn parse(input: ParseStream) -> Result<Self> { +//! let lookahead = input.lookahead1(); +//! if lookahead.peek(Token![struct]) { +//! input.parse().map(Item::Struct) +//! } else if lookahead.peek(Token![enum]) { +//! input.parse().map(Item::Enum) +//! } else { +//! Err(lookahead.error()) +//! } +//! } +//! } +//! +//! impl Parse for ItemStruct { +//! fn parse(input: ParseStream) -> Result<Self> { +//! let content; +//! Ok(ItemStruct { +//! struct_token: input.parse()?, +//! ident: input.parse()?, +//! brace_token: braced!(content in input), +//! fields: content.parse_terminated(Field::parse_named, Token![,])?, +//! }) +//! } +//! } +//! # +//! # impl Parse for ItemEnum { +//! # fn parse(input: ParseStream) -> Result<Self> { +//! # unimplemented!() +//! # } +//! # } +//! +//! # const IGNORE: &str = stringify! { +//! #[proc_macro] +//! # }; +//! pub fn my_macro(tokens: TokenStream) -> TokenStream { +//! let input = parse_macro_input!(tokens as Item); +//! +//! /* ... */ +//! # TokenStream::new() +//! } +//! ``` +//! +//! # The `syn::parse*` functions +//! +//! The [`syn::parse`], [`syn::parse2`], and [`syn::parse_str`] functions serve +//! as an entry point for parsing syntax tree nodes that can be parsed in an +//! obvious default way. These functions can return any syntax tree node that +//! implements the [`Parse`] trait, which includes most types in Syn. +//! +//! [`syn::parse`]: crate::parse() +//! [`syn::parse2`]: crate::parse2() +//! [`syn::parse_str`]: crate::parse_str() +//! +//! ``` +//! use syn::Type; +//! +//! # fn run_parser() -> syn::Result<()> { +//! let t: Type = syn::parse_str("std::collections::HashMap<String, Value>")?; +//! # Ok(()) +//! # } +//! # +//! # run_parser().unwrap(); +//! ``` +//! +//! The [`parse_quote!`] macro also uses this approach. +//! +//! [`parse_quote!`]: crate::parse_quote! +//! +//! # The `Parser` trait +//! +//! Some types can be parsed in several ways depending on context. For example +//! an [`Attribute`] can be either "outer" like `#[...]` or "inner" like +//! `#![...]` and parsing the wrong one would be a bug. Similarly [`Punctuated`] +//! may or may not allow trailing punctuation, and parsing it the wrong way +//! would either reject valid input or accept invalid input. +//! +//! [`Attribute`]: crate::Attribute +//! [`Punctuated`]: crate::punctuated +//! +//! The `Parse` trait is not implemented in these cases because there is no good +//! behavior to consider the default. +//! +//! ```compile_fail +//! # extern crate proc_macro; +//! # +//! # use syn::punctuated::Punctuated; +//! # use syn::{PathSegment, Result, Token}; +//! # +//! # fn f(tokens: proc_macro::TokenStream) -> Result<()> { +//! # +//! // Can't parse `Punctuated` without knowing whether trailing punctuation +//! // should be allowed in this context. +//! let path: Punctuated<PathSegment, Token![::]> = syn::parse(tokens)?; +//! # +//! # Ok(()) +//! # } +//! ``` +//! +//! In these cases the types provide a choice of parser functions rather than a +//! single `Parse` implementation, and those parser functions can be invoked +//! through the [`Parser`] trait. +//! +//! +//! ``` +//! # extern crate proc_macro; +//! # +//! use proc_macro::TokenStream; +//! use syn::parse::Parser; +//! use syn::punctuated::Punctuated; +//! use syn::{Attribute, Expr, PathSegment, Result, Token}; +//! +//! fn call_some_parser_methods(input: TokenStream) -> Result<()> { +//! // Parse a nonempty sequence of path segments separated by `::` punctuation +//! // with no trailing punctuation. +//! let tokens = input.clone(); +//! let parser = Punctuated::<PathSegment, Token![::]>::parse_separated_nonempty; +//! let _path = parser.parse(tokens)?; +//! +//! // Parse a possibly empty sequence of expressions terminated by commas with +//! // an optional trailing punctuation. +//! let tokens = input.clone(); +//! let parser = Punctuated::<Expr, Token![,]>::parse_terminated; +//! let _args = parser.parse(tokens)?; +//! +//! // Parse zero or more outer attributes but not inner attributes. +//! let tokens = input.clone(); +//! let parser = Attribute::parse_outer; +//! let _attrs = parser.parse(tokens)?; +//! +//! Ok(()) +//! } +//! ``` + +#[path = "discouraged.rs"] +pub mod discouraged; + +use crate::buffer::{Cursor, TokenBuffer}; +use crate::error; +use crate::lookahead; +#[cfg(feature = "proc-macro")] +use crate::proc_macro; +use crate::punctuated::Punctuated; +use crate::token::Token; +use proc_macro2::{self, Delimiter, Group, Literal, Punct, Span, TokenStream, TokenTree}; +use std::cell::Cell; +use std::fmt::{self, Debug, Display}; +#[cfg(feature = "extra-traits")] +use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; +use std::mem; +use std::ops::Deref; +use std::rc::Rc; +use std::str::FromStr; + +pub use crate::error::{Error, Result}; +pub use crate::lookahead::{Lookahead1, Peek}; + +/// Parsing interface implemented by all types that can be parsed in a default +/// way from a token stream. +/// +/// Refer to the [module documentation] for details about implementing and using +/// the `Parse` trait. +/// +/// [module documentation]: self +pub trait Parse: Sized { + fn parse(input: ParseStream) -> Result<Self>; +} + +/// Input to a Syn parser function. +/// +/// See the methods of this type under the documentation of [`ParseBuffer`]. For +/// an overview of parsing in Syn, refer to the [module documentation]. +/// +/// [module documentation]: self +pub type ParseStream<'a> = &'a ParseBuffer<'a>; + +/// Cursor position within a buffered token stream. +/// +/// This type is more commonly used through the type alias [`ParseStream`] which +/// is an alias for `&ParseBuffer`. +/// +/// `ParseStream` is the input type for all parser functions in Syn. They have +/// the signature `fn(ParseStream) -> Result<T>`. +/// +/// ## Calling a parser function +/// +/// There is no public way to construct a `ParseBuffer`. Instead, if you are +/// looking to invoke a parser function that requires `ParseStream` as input, +/// you will need to go through one of the public parsing entry points. +/// +/// - The [`parse_macro_input!`] macro if parsing input of a procedural macro; +/// - One of [the `syn::parse*` functions][syn-parse]; or +/// - A method of the [`Parser`] trait. +/// +/// [`parse_macro_input!`]: crate::parse_macro_input! +/// [syn-parse]: self#the-synparse-functions +pub struct ParseBuffer<'a> { + scope: Span, + // Instead of Cell<Cursor<'a>> so that ParseBuffer<'a> is covariant in 'a. + // The rest of the code in this module needs to be careful that only a + // cursor derived from this `cell` is ever assigned to this `cell`. + // + // Cell<Cursor<'a>> cannot be covariant in 'a because then we could take a + // ParseBuffer<'a>, upcast to ParseBuffer<'short> for some lifetime shorter + // than 'a, and then assign a Cursor<'short> into the Cell. + // + // By extension, it would not be safe to expose an API that accepts a + // Cursor<'a> and trusts that it lives as long as the cursor currently in + // the cell. + cell: Cell<Cursor<'static>>, + marker: PhantomData<Cursor<'a>>, + unexpected: Cell<Option<Rc<Cell<Unexpected>>>>, +} + +impl<'a> Drop for ParseBuffer<'a> { + fn drop(&mut self) { + if let Some(unexpected_span) = span_of_unexpected_ignoring_nones(self.cursor()) { + let (inner, old_span) = inner_unexpected(self); + if old_span.is_none() { + inner.set(Unexpected::Some(unexpected_span)); + } + } + } +} + +impl<'a> Display for ParseBuffer<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&self.cursor().token_stream(), f) + } +} + +impl<'a> Debug for ParseBuffer<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.cursor().token_stream(), f) + } +} + +/// Cursor state associated with speculative parsing. +/// +/// This type is the input of the closure provided to [`ParseStream::step`]. +/// +/// [`ParseStream::step`]: ParseBuffer::step +/// +/// # Example +/// +/// ``` +/// use proc_macro2::TokenTree; +/// use syn::Result; +/// use syn::parse::ParseStream; +/// +/// // This function advances the stream past the next occurrence of `@`. If +/// // no `@` is present in the stream, the stream position is unchanged and +/// // an error is returned. +/// fn skip_past_next_at(input: ParseStream) -> Result<()> { +/// input.step(|cursor| { +/// let mut rest = *cursor; +/// while let Some((tt, next)) = rest.token_tree() { +/// match &tt { +/// TokenTree::Punct(punct) if punct.as_char() == '@' => { +/// return Ok(((), next)); +/// } +/// _ => rest = next, +/// } +/// } +/// Err(cursor.error("no `@` was found after this point")) +/// }) +/// } +/// # +/// # fn remainder_after_skipping_past_next_at( +/// # input: ParseStream, +/// # ) -> Result<proc_macro2::TokenStream> { +/// # skip_past_next_at(input)?; +/// # input.parse() +/// # } +/// # +/// # use syn::parse::Parser; +/// # let remainder = remainder_after_skipping_past_next_at +/// # .parse_str("a @ b c") +/// # .unwrap(); +/// # assert_eq!(remainder.to_string(), "b c"); +/// ``` +pub struct StepCursor<'c, 'a> { + scope: Span, + // This field is covariant in 'c. + cursor: Cursor<'c>, + // This field is contravariant in 'c. Together these make StepCursor + // invariant in 'c. Also covariant in 'a. The user cannot cast 'c to a + // different lifetime but can upcast into a StepCursor with a shorter + // lifetime 'a. + // + // As long as we only ever construct a StepCursor for which 'c outlives 'a, + // this means if ever a StepCursor<'c, 'a> exists we are guaranteed that 'c + // outlives 'a. + marker: PhantomData<fn(Cursor<'c>) -> Cursor<'a>>, +} + +impl<'c, 'a> Deref for StepCursor<'c, 'a> { + type Target = Cursor<'c>; + + fn deref(&self) -> &Self::Target { + &self.cursor + } +} + +impl<'c, 'a> Copy for StepCursor<'c, 'a> {} + +impl<'c, 'a> Clone for StepCursor<'c, 'a> { + fn clone(&self) -> Self { + *self + } +} + +impl<'c, 'a> StepCursor<'c, 'a> { + /// Triggers an error at the current position of the parse stream. + /// + /// The `ParseStream::step` invocation will return this same error without + /// advancing the stream state. + pub fn error<T: Display>(self, message: T) -> Error { + error::new_at(self.scope, self.cursor, message) + } +} + +pub(crate) fn advance_step_cursor<'c, 'a>(proof: StepCursor<'c, 'a>, to: Cursor<'c>) -> Cursor<'a> { + // Refer to the comments within the StepCursor definition. We use the + // fact that a StepCursor<'c, 'a> exists as proof that 'c outlives 'a. + // Cursor is covariant in its lifetime parameter so we can cast a + // Cursor<'c> to one with the shorter lifetime Cursor<'a>. + let _ = proof; + unsafe { mem::transmute::<Cursor<'c>, Cursor<'a>>(to) } +} + +pub(crate) fn new_parse_buffer( + scope: Span, + cursor: Cursor, + unexpected: Rc<Cell<Unexpected>>, +) -> ParseBuffer { + ParseBuffer { + scope, + // See comment on `cell` in the struct definition. + cell: Cell::new(unsafe { mem::transmute::<Cursor, Cursor<'static>>(cursor) }), + marker: PhantomData, + unexpected: Cell::new(Some(unexpected)), + } +} + +pub(crate) enum Unexpected { + None, + Some(Span), + Chain(Rc<Cell<Unexpected>>), +} + +impl Default for Unexpected { + fn default() -> Self { + Unexpected::None + } +} + +impl Clone for Unexpected { + fn clone(&self) -> Self { + match self { + Unexpected::None => Unexpected::None, + Unexpected::Some(span) => Unexpected::Some(*span), + Unexpected::Chain(next) => Unexpected::Chain(next.clone()), + } + } +} + +// We call this on Cell<Unexpected> and Cell<Option<T>> where temporarily +// swapping in a None is cheap. +fn cell_clone<T: Default + Clone>(cell: &Cell<T>) -> T { + let prev = cell.take(); + let ret = prev.clone(); + cell.set(prev); + ret +} + +fn inner_unexpected(buffer: &ParseBuffer) -> (Rc<Cell<Unexpected>>, Option<Span>) { + let mut unexpected = get_unexpected(buffer); + loop { + match cell_clone(&unexpected) { + Unexpected::None => return (unexpected, None), + Unexpected::Some(span) => return (unexpected, Some(span)), + Unexpected::Chain(next) => unexpected = next, + } + } +} + +pub(crate) fn get_unexpected(buffer: &ParseBuffer) -> Rc<Cell<Unexpected>> { + cell_clone(&buffer.unexpected).unwrap() +} + +fn span_of_unexpected_ignoring_nones(mut cursor: Cursor) -> Option<Span> { + if cursor.eof() { + return None; + } + while let Some((inner, _span, rest)) = cursor.group(Delimiter::None) { + if let Some(unexpected) = span_of_unexpected_ignoring_nones(inner) { + return Some(unexpected); + } + cursor = rest; + } + if cursor.eof() { + None + } else { + Some(cursor.span()) + } +} + +impl<'a> ParseBuffer<'a> { + /// Parses a syntax tree node of type `T`, advancing the position of our + /// parse stream past it. + pub fn parse<T: Parse>(&self) -> Result<T> { + T::parse(self) + } + + /// Calls the given parser function to parse a syntax tree node of type `T` + /// from this stream. + /// + /// # Example + /// + /// The parser below invokes [`Attribute::parse_outer`] to parse a vector of + /// zero or more outer attributes. + /// + /// [`Attribute::parse_outer`]: crate::Attribute::parse_outer + /// + /// ``` + /// use syn::{Attribute, Ident, Result, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // Parses a unit struct with attributes. + /// // + /// // #[path = "s.tmpl"] + /// // struct S; + /// struct UnitStruct { + /// attrs: Vec<Attribute>, + /// struct_token: Token![struct], + /// name: Ident, + /// semi_token: Token![;], + /// } + /// + /// impl Parse for UnitStruct { + /// fn parse(input: ParseStream) -> Result<Self> { + /// Ok(UnitStruct { + /// attrs: input.call(Attribute::parse_outer)?, + /// struct_token: input.parse()?, + /// name: input.parse()?, + /// semi_token: input.parse()?, + /// }) + /// } + /// } + /// ``` + pub fn call<T>(&self, function: fn(ParseStream) -> Result<T>) -> Result<T> { + function(self) + } + + /// Looks at the next token in the parse stream to determine whether it + /// matches the requested type of token. + /// + /// Does not advance the position of the parse stream. + /// + /// # Syntax + /// + /// Note that this method does not use turbofish syntax. Pass the peek type + /// inside of parentheses. + /// + /// - `input.peek(Token![struct])` + /// - `input.peek(Token![==])` + /// - `input.peek(syn::Ident)` *(does not accept keywords)* + /// - `input.peek(syn::Ident::peek_any)` + /// - `input.peek(Lifetime)` + /// - `input.peek(token::Brace)` + /// + /// # Example + /// + /// In this example we finish parsing the list of supertraits when the next + /// token in the input is either `where` or an opening curly brace. + /// + /// ``` + /// use syn::{braced, token, Generics, Ident, Result, Token, TypeParamBound}; + /// use syn::parse::{Parse, ParseStream}; + /// use syn::punctuated::Punctuated; + /// + /// // Parses a trait definition containing no associated items. + /// // + /// // trait Marker<'de, T>: A + B<'de> where Box<T>: Clone {} + /// struct MarkerTrait { + /// trait_token: Token![trait], + /// ident: Ident, + /// generics: Generics, + /// colon_token: Option<Token![:]>, + /// supertraits: Punctuated<TypeParamBound, Token![+]>, + /// brace_token: token::Brace, + /// } + /// + /// impl Parse for MarkerTrait { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let trait_token: Token![trait] = input.parse()?; + /// let ident: Ident = input.parse()?; + /// let mut generics: Generics = input.parse()?; + /// let colon_token: Option<Token![:]> = input.parse()?; + /// + /// let mut supertraits = Punctuated::new(); + /// if colon_token.is_some() { + /// loop { + /// supertraits.push_value(input.parse()?); + /// if input.peek(Token![where]) || input.peek(token::Brace) { + /// break; + /// } + /// supertraits.push_punct(input.parse()?); + /// } + /// } + /// + /// generics.where_clause = input.parse()?; + /// let content; + /// let empty_brace_token = braced!(content in input); + /// + /// Ok(MarkerTrait { + /// trait_token, + /// ident, + /// generics, + /// colon_token, + /// supertraits, + /// brace_token: empty_brace_token, + /// }) + /// } + /// } + /// ``` + pub fn peek<T: Peek>(&self, token: T) -> bool { + let _ = token; + T::Token::peek(self.cursor()) + } + + /// Looks at the second-next token in the parse stream. + /// + /// This is commonly useful as a way to implement contextual keywords. + /// + /// # Example + /// + /// This example needs to use `peek2` because the symbol `union` is not a + /// keyword in Rust. We can't use just `peek` and decide to parse a union if + /// the very next token is `union`, because someone is free to write a `mod + /// union` and a macro invocation that looks like `union::some_macro! { ... + /// }`. In other words `union` is a contextual keyword. + /// + /// ``` + /// use syn::{Ident, ItemUnion, Macro, Result, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // Parses either a union or a macro invocation. + /// enum UnionOrMacro { + /// // union MaybeUninit<T> { uninit: (), value: T } + /// Union(ItemUnion), + /// // lazy_static! { ... } + /// Macro(Macro), + /// } + /// + /// impl Parse for UnionOrMacro { + /// fn parse(input: ParseStream) -> Result<Self> { + /// if input.peek(Token![union]) && input.peek2(Ident) { + /// input.parse().map(UnionOrMacro::Union) + /// } else { + /// input.parse().map(UnionOrMacro::Macro) + /// } + /// } + /// } + /// ``` + pub fn peek2<T: Peek>(&self, token: T) -> bool { + fn peek2(buffer: &ParseBuffer, peek: fn(Cursor) -> bool) -> bool { + if let Some(group) = buffer.cursor().group(Delimiter::None) { + if group.0.skip().map_or(false, peek) { + return true; + } + } + buffer.cursor().skip().map_or(false, peek) + } + + let _ = token; + peek2(self, T::Token::peek) + } + + /// Looks at the third-next token in the parse stream. + pub fn peek3<T: Peek>(&self, token: T) -> bool { + fn peek3(buffer: &ParseBuffer, peek: fn(Cursor) -> bool) -> bool { + if let Some(group) = buffer.cursor().group(Delimiter::None) { + if group.0.skip().and_then(Cursor::skip).map_or(false, peek) { + return true; + } + } + buffer + .cursor() + .skip() + .and_then(Cursor::skip) + .map_or(false, peek) + } + + let _ = token; + peek3(self, T::Token::peek) + } + + /// Parses zero or more occurrences of `T` separated by punctuation of type + /// `P`, with optional trailing punctuation. + /// + /// Parsing continues until the end of this parse stream. The entire content + /// of this parse stream must consist of `T` and `P`. + /// + /// # Example + /// + /// ``` + /// # use quote::quote; + /// # + /// use syn::{parenthesized, token, Ident, Result, Token, Type}; + /// use syn::parse::{Parse, ParseStream}; + /// use syn::punctuated::Punctuated; + /// + /// // Parse a simplified tuple struct syntax like: + /// // + /// // struct S(A, B); + /// struct TupleStruct { + /// struct_token: Token![struct], + /// ident: Ident, + /// paren_token: token::Paren, + /// fields: Punctuated<Type, Token![,]>, + /// semi_token: Token![;], + /// } + /// + /// impl Parse for TupleStruct { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let content; + /// Ok(TupleStruct { + /// struct_token: input.parse()?, + /// ident: input.parse()?, + /// paren_token: parenthesized!(content in input), + /// fields: content.parse_terminated(Type::parse, Token![,])?, + /// semi_token: input.parse()?, + /// }) + /// } + /// } + /// # + /// # let input = quote! { + /// # struct S(A, B); + /// # }; + /// # syn::parse2::<TupleStruct>(input).unwrap(); + /// ``` + /// + /// # See also + /// + /// If your separator is anything more complicated than an invocation of the + /// `Token!` macro, this method won't be applicable and you can instead + /// directly use `Punctuated`'s parser functions: [`parse_terminated`], + /// [`parse_separated_nonempty`] etc. + /// + /// [`parse_terminated`]: Punctuated::parse_terminated + /// [`parse_separated_nonempty`]: Punctuated::parse_separated_nonempty + /// + /// ``` + /// use syn::{custom_keyword, Expr, Result, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// use syn::punctuated::Punctuated; + /// + /// mod kw { + /// syn::custom_keyword!(fin); + /// } + /// + /// struct Fin(kw::fin, Token![;]); + /// + /// impl Parse for Fin { + /// fn parse(input: ParseStream) -> Result<Self> { + /// Ok(Self(input.parse()?, input.parse()?)) + /// } + /// } + /// + /// struct Thing { + /// steps: Punctuated<Expr, Fin>, + /// } + /// + /// impl Parse for Thing { + /// fn parse(input: ParseStream) -> Result<Self> { + /// # if true { + /// Ok(Thing { + /// steps: Punctuated::parse_terminated(input)?, + /// }) + /// # } else { + /// // or equivalently, this means the same thing: + /// # Ok(Thing { + /// steps: input.call(Punctuated::parse_terminated)?, + /// # }) + /// # } + /// } + /// } + /// ``` + pub fn parse_terminated<T, P>( + &self, + parser: fn(ParseStream) -> Result<T>, + separator: P, + ) -> Result<Punctuated<T, P::Token>> + where + P: Peek, + P::Token: Parse, + { + let _ = separator; + Punctuated::parse_terminated_with(self, parser) + } + + /// Returns whether there are tokens remaining in this stream. + /// + /// This method returns true at the end of the content of a set of + /// delimiters, as well as at the very end of the complete macro input. + /// + /// # Example + /// + /// ``` + /// use syn::{braced, token, Ident, Item, Result, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // Parses a Rust `mod m { ... }` containing zero or more items. + /// struct Mod { + /// mod_token: Token![mod], + /// name: Ident, + /// brace_token: token::Brace, + /// items: Vec<Item>, + /// } + /// + /// impl Parse for Mod { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let content; + /// Ok(Mod { + /// mod_token: input.parse()?, + /// name: input.parse()?, + /// brace_token: braced!(content in input), + /// items: { + /// let mut items = Vec::new(); + /// while !content.is_empty() { + /// items.push(content.parse()?); + /// } + /// items + /// }, + /// }) + /// } + /// } + /// ``` + pub fn is_empty(&self) -> bool { + self.cursor().eof() + } + + /// Constructs a helper for peeking at the next token in this stream and + /// building an error message if it is not one of a set of expected tokens. + /// + /// # Example + /// + /// ``` + /// use syn::{ConstParam, Ident, Lifetime, LifetimeParam, Result, Token, TypeParam}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // A generic parameter, a single one of the comma-separated elements inside + /// // angle brackets in: + /// // + /// // fn f<T: Clone, 'a, 'b: 'a, const N: usize>() { ... } + /// // + /// // On invalid input, lookahead gives us a reasonable error message. + /// // + /// // error: expected one of: identifier, lifetime, `const` + /// // | + /// // 5 | fn f<!Sized>() {} + /// // | ^ + /// enum GenericParam { + /// Type(TypeParam), + /// Lifetime(LifetimeParam), + /// Const(ConstParam), + /// } + /// + /// impl Parse for GenericParam { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let lookahead = input.lookahead1(); + /// if lookahead.peek(Ident) { + /// input.parse().map(GenericParam::Type) + /// } else if lookahead.peek(Lifetime) { + /// input.parse().map(GenericParam::Lifetime) + /// } else if lookahead.peek(Token![const]) { + /// input.parse().map(GenericParam::Const) + /// } else { + /// Err(lookahead.error()) + /// } + /// } + /// } + /// ``` + pub fn lookahead1(&self) -> Lookahead1<'a> { + lookahead::new(self.scope, self.cursor()) + } + + /// Forks a parse stream so that parsing tokens out of either the original + /// or the fork does not advance the position of the other. + /// + /// # Performance + /// + /// Forking a parse stream is a cheap fixed amount of work and does not + /// involve copying token buffers. Where you might hit performance problems + /// is if your macro ends up parsing a large amount of content more than + /// once. + /// + /// ``` + /// # use syn::{Expr, Result}; + /// # use syn::parse::ParseStream; + /// # + /// # fn bad(input: ParseStream) -> Result<Expr> { + /// // Do not do this. + /// if input.fork().parse::<Expr>().is_ok() { + /// return input.parse::<Expr>(); + /// } + /// # unimplemented!() + /// # } + /// ``` + /// + /// As a rule, avoid parsing an unbounded amount of tokens out of a forked + /// parse stream. Only use a fork when the amount of work performed against + /// the fork is small and bounded. + /// + /// When complex speculative parsing against the forked stream is + /// unavoidable, use [`parse::discouraged::Speculative`] to advance the + /// original stream once the fork's parse is determined to have been + /// successful. + /// + /// For a lower level way to perform speculative parsing at the token level, + /// consider using [`ParseStream::step`] instead. + /// + /// [`parse::discouraged::Speculative`]: discouraged::Speculative + /// [`ParseStream::step`]: ParseBuffer::step + /// + /// # Example + /// + /// The parse implementation shown here parses possibly restricted `pub` + /// visibilities. + /// + /// - `pub` + /// - `pub(crate)` + /// - `pub(self)` + /// - `pub(super)` + /// - `pub(in some::path)` + /// + /// To handle the case of visibilities inside of tuple structs, the parser + /// needs to distinguish parentheses that specify visibility restrictions + /// from parentheses that form part of a tuple type. + /// + /// ``` + /// # struct A; + /// # struct B; + /// # struct C; + /// # + /// struct S(pub(crate) A, pub (B, C)); + /// ``` + /// + /// In this example input the first tuple struct element of `S` has + /// `pub(crate)` visibility while the second tuple struct element has `pub` + /// visibility; the parentheses around `(B, C)` are part of the type rather + /// than part of a visibility restriction. + /// + /// The parser uses a forked parse stream to check the first token inside of + /// parentheses after the `pub` keyword. This is a small bounded amount of + /// work performed against the forked parse stream. + /// + /// ``` + /// use syn::{parenthesized, token, Ident, Path, Result, Token}; + /// use syn::ext::IdentExt; + /// use syn::parse::{Parse, ParseStream}; + /// + /// struct PubVisibility { + /// pub_token: Token![pub], + /// restricted: Option<Restricted>, + /// } + /// + /// struct Restricted { + /// paren_token: token::Paren, + /// in_token: Option<Token![in]>, + /// path: Path, + /// } + /// + /// impl Parse for PubVisibility { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let pub_token: Token![pub] = input.parse()?; + /// + /// if input.peek(token::Paren) { + /// let ahead = input.fork(); + /// let mut content; + /// parenthesized!(content in ahead); + /// + /// if content.peek(Token![crate]) + /// || content.peek(Token![self]) + /// || content.peek(Token![super]) + /// { + /// return Ok(PubVisibility { + /// pub_token, + /// restricted: Some(Restricted { + /// paren_token: parenthesized!(content in input), + /// in_token: None, + /// path: Path::from(content.call(Ident::parse_any)?), + /// }), + /// }); + /// } else if content.peek(Token![in]) { + /// return Ok(PubVisibility { + /// pub_token, + /// restricted: Some(Restricted { + /// paren_token: parenthesized!(content in input), + /// in_token: Some(content.parse()?), + /// path: content.call(Path::parse_mod_style)?, + /// }), + /// }); + /// } + /// } + /// + /// Ok(PubVisibility { + /// pub_token, + /// restricted: None, + /// }) + /// } + /// } + /// ``` + pub fn fork(&self) -> Self { + ParseBuffer { + scope: self.scope, + cell: self.cell.clone(), + marker: PhantomData, + // Not the parent's unexpected. Nothing cares whether the clone + // parses all the way unless we `advance_to`. + unexpected: Cell::new(Some(Rc::new(Cell::new(Unexpected::None)))), + } + } + + /// Triggers an error at the current position of the parse stream. + /// + /// # Example + /// + /// ``` + /// use syn::{Expr, Result, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // Some kind of loop: `while` or `for` or `loop`. + /// struct Loop { + /// expr: Expr, + /// } + /// + /// impl Parse for Loop { + /// fn parse(input: ParseStream) -> Result<Self> { + /// if input.peek(Token![while]) + /// || input.peek(Token![for]) + /// || input.peek(Token![loop]) + /// { + /// Ok(Loop { + /// expr: input.parse()?, + /// }) + /// } else { + /// Err(input.error("expected some kind of loop")) + /// } + /// } + /// } + /// ``` + pub fn error<T: Display>(&self, message: T) -> Error { + error::new_at(self.scope, self.cursor(), message) + } + + /// Speculatively parses tokens from this parse stream, advancing the + /// position of this stream only if parsing succeeds. + /// + /// This is a powerful low-level API used for defining the `Parse` impls of + /// the basic built-in token types. It is not something that will be used + /// widely outside of the Syn codebase. + /// + /// # Example + /// + /// ``` + /// use proc_macro2::TokenTree; + /// use syn::Result; + /// use syn::parse::ParseStream; + /// + /// // This function advances the stream past the next occurrence of `@`. If + /// // no `@` is present in the stream, the stream position is unchanged and + /// // an error is returned. + /// fn skip_past_next_at(input: ParseStream) -> Result<()> { + /// input.step(|cursor| { + /// let mut rest = *cursor; + /// while let Some((tt, next)) = rest.token_tree() { + /// match &tt { + /// TokenTree::Punct(punct) if punct.as_char() == '@' => { + /// return Ok(((), next)); + /// } + /// _ => rest = next, + /// } + /// } + /// Err(cursor.error("no `@` was found after this point")) + /// }) + /// } + /// # + /// # fn remainder_after_skipping_past_next_at( + /// # input: ParseStream, + /// # ) -> Result<proc_macro2::TokenStream> { + /// # skip_past_next_at(input)?; + /// # input.parse() + /// # } + /// # + /// # use syn::parse::Parser; + /// # let remainder = remainder_after_skipping_past_next_at + /// # .parse_str("a @ b c") + /// # .unwrap(); + /// # assert_eq!(remainder.to_string(), "b c"); + /// ``` + pub fn step<F, R>(&self, function: F) -> Result<R> + where + F: for<'c> FnOnce(StepCursor<'c, 'a>) -> Result<(R, Cursor<'c>)>, + { + // Since the user's function is required to work for any 'c, we know + // that the Cursor<'c> they return is either derived from the input + // StepCursor<'c, 'a> or from a Cursor<'static>. + // + // It would not be legal to write this function without the invariant + // lifetime 'c in StepCursor<'c, 'a>. If this function were written only + // in terms of 'a, the user could take our ParseBuffer<'a>, upcast it to + // a ParseBuffer<'short> which some shorter lifetime than 'a, invoke + // `step` on their ParseBuffer<'short> with a closure that returns + // Cursor<'short>, and we would wrongly write that Cursor<'short> into + // the Cell intended to hold Cursor<'a>. + // + // In some cases it may be necessary for R to contain a Cursor<'a>. + // Within Syn we solve this using `advance_step_cursor` which uses the + // existence of a StepCursor<'c, 'a> as proof that it is safe to cast + // from Cursor<'c> to Cursor<'a>. If needed outside of Syn, it would be + // safe to expose that API as a method on StepCursor. + let (node, rest) = function(StepCursor { + scope: self.scope, + cursor: self.cell.get(), + marker: PhantomData, + })?; + self.cell.set(rest); + Ok(node) + } + + /// Returns the `Span` of the next token in the parse stream, or + /// `Span::call_site()` if this parse stream has completely exhausted its + /// input `TokenStream`. + pub fn span(&self) -> Span { + let cursor = self.cursor(); + if cursor.eof() { + self.scope + } else { + crate::buffer::open_span_of_group(cursor) + } + } + + /// Provides low-level access to the token representation underlying this + /// parse stream. + /// + /// Cursors are immutable so no operations you perform against the cursor + /// will affect the state of this parse stream. + /// + /// # Example + /// + /// ``` + /// use proc_macro2::TokenStream; + /// use syn::buffer::Cursor; + /// use syn::parse::{ParseStream, Result}; + /// + /// // Run a parser that returns T, but get its output as TokenStream instead of T. + /// // This works without T needing to implement ToTokens. + /// fn recognize_token_stream<T>( + /// recognizer: fn(ParseStream) -> Result<T>, + /// ) -> impl Fn(ParseStream) -> Result<TokenStream> { + /// move |input| { + /// let begin = input.cursor(); + /// recognizer(input)?; + /// let end = input.cursor(); + /// Ok(tokens_between(begin, end)) + /// } + /// } + /// + /// // Collect tokens between two cursors as a TokenStream. + /// fn tokens_between(begin: Cursor, end: Cursor) -> TokenStream { + /// assert!(begin <= end); + /// + /// let mut cursor = begin; + /// let mut tokens = TokenStream::new(); + /// while cursor < end { + /// let (token, next) = cursor.token_tree().unwrap(); + /// tokens.extend(std::iter::once(token)); + /// cursor = next; + /// } + /// tokens + /// } + /// + /// fn main() { + /// use quote::quote; + /// use syn::parse::{Parse, Parser}; + /// use syn::Token; + /// + /// // Parse syn::Type as a TokenStream, surrounded by angle brackets. + /// fn example(input: ParseStream) -> Result<TokenStream> { + /// let _langle: Token![<] = input.parse()?; + /// let ty = recognize_token_stream(syn::Type::parse)(input)?; + /// let _rangle: Token![>] = input.parse()?; + /// Ok(ty) + /// } + /// + /// let tokens = quote! { <fn() -> u8> }; + /// println!("{}", example.parse2(tokens).unwrap()); + /// } + /// ``` + pub fn cursor(&self) -> Cursor<'a> { + self.cell.get() + } + + fn check_unexpected(&self) -> Result<()> { + match inner_unexpected(self).1 { + Some(span) => Err(Error::new(span, "unexpected token")), + None => Ok(()), + } + } +} + +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl<T: Parse> Parse for Box<T> { + fn parse(input: ParseStream) -> Result<Self> { + input.parse().map(Box::new) + } +} + +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl<T: Parse + Token> Parse for Option<T> { + fn parse(input: ParseStream) -> Result<Self> { + if T::peek(input.cursor()) { + Ok(Some(input.parse()?)) + } else { + Ok(None) + } + } +} + +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl Parse for TokenStream { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| Ok((cursor.token_stream(), Cursor::empty()))) + } +} + +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl Parse for TokenTree { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| match cursor.token_tree() { + Some((tt, rest)) => Ok((tt, rest)), + None => Err(cursor.error("expected token tree")), + }) + } +} + +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl Parse for Group { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| { + if let Some((group, rest)) = cursor.any_group_token() { + if group.delimiter() != Delimiter::None { + return Ok((group, rest)); + } + } + Err(cursor.error("expected group token")) + }) + } +} + +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl Parse for Punct { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| match cursor.punct() { + Some((punct, rest)) => Ok((punct, rest)), + None => Err(cursor.error("expected punctuation token")), + }) + } +} + +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl Parse for Literal { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| match cursor.literal() { + Some((literal, rest)) => Ok((literal, rest)), + None => Err(cursor.error("expected literal token")), + }) + } +} + +/// Parser that can parse Rust tokens into a particular syntax tree node. +/// +/// Refer to the [module documentation] for details about parsing in Syn. +/// +/// [module documentation]: self +pub trait Parser: Sized { + type Output; + + /// Parse a proc-macro2 token stream into the chosen syntax tree node. + /// + /// This function will check that the input is fully parsed. If there are + /// any unparsed tokens at the end of the stream, an error is returned. + fn parse2(self, tokens: TokenStream) -> Result<Self::Output>; + + /// Parse tokens of source code into the chosen syntax tree node. + /// + /// This function will check that the input is fully parsed. If there are + /// any unparsed tokens at the end of the stream, an error is returned. + #[cfg(feature = "proc-macro")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] + fn parse(self, tokens: proc_macro::TokenStream) -> Result<Self::Output> { + self.parse2(proc_macro2::TokenStream::from(tokens)) + } + + /// Parse a string of Rust code into the chosen syntax tree node. + /// + /// This function will check that the input is fully parsed. If there are + /// any unparsed tokens at the end of the string, an error is returned. + /// + /// # Hygiene + /// + /// Every span in the resulting syntax tree will be set to resolve at the + /// macro call site. + fn parse_str(self, s: &str) -> Result<Self::Output> { + self.parse2(proc_macro2::TokenStream::from_str(s)?) + } + + // Not public API. + #[doc(hidden)] + #[cfg(any(feature = "full", feature = "derive"))] + fn __parse_scoped(self, scope: Span, tokens: TokenStream) -> Result<Self::Output> { + let _ = scope; + self.parse2(tokens) + } +} + +fn tokens_to_parse_buffer(tokens: &TokenBuffer) -> ParseBuffer { + let scope = Span::call_site(); + let cursor = tokens.begin(); + let unexpected = Rc::new(Cell::new(Unexpected::None)); + new_parse_buffer(scope, cursor, unexpected) +} + +impl<F, T> Parser for F +where + F: FnOnce(ParseStream) -> Result<T>, +{ + type Output = T; + + fn parse2(self, tokens: TokenStream) -> Result<T> { + let buf = TokenBuffer::new2(tokens); + let state = tokens_to_parse_buffer(&buf); + let node = self(&state)?; + state.check_unexpected()?; + if let Some(unexpected_span) = span_of_unexpected_ignoring_nones(state.cursor()) { + Err(Error::new(unexpected_span, "unexpected token")) + } else { + Ok(node) + } + } + + #[cfg(any(feature = "full", feature = "derive"))] + fn __parse_scoped(self, scope: Span, tokens: TokenStream) -> Result<Self::Output> { + let buf = TokenBuffer::new2(tokens); + let cursor = buf.begin(); + let unexpected = Rc::new(Cell::new(Unexpected::None)); + let state = new_parse_buffer(scope, cursor, unexpected); + let node = self(&state)?; + state.check_unexpected()?; + if let Some(unexpected_span) = span_of_unexpected_ignoring_nones(state.cursor()) { + Err(Error::new(unexpected_span, "unexpected token")) + } else { + Ok(node) + } + } +} + +#[cfg(any(feature = "full", feature = "derive"))] +pub(crate) fn parse_scoped<F: Parser>(f: F, scope: Span, tokens: TokenStream) -> Result<F::Output> { + f.__parse_scoped(scope, tokens) +} + +/// An empty syntax tree node that consumes no tokens when parsed. +/// +/// This is useful for attribute macros that want to ensure they are not +/// provided any attribute args. +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// use proc_macro::TokenStream; +/// use syn::parse_macro_input; +/// use syn::parse::Nothing; +/// +/// # const IGNORE: &str = stringify! { +/// #[proc_macro_attribute] +/// # }; +/// pub fn my_attr(args: TokenStream, input: TokenStream) -> TokenStream { +/// parse_macro_input!(args as Nothing); +/// +/// /* ... */ +/// # TokenStream::new() +/// } +/// ``` +/// +/// ```text +/// error: unexpected token +/// --> src/main.rs:3:19 +/// | +/// 3 | #[my_attr(asdf)] +/// | ^^^^ +/// ``` +pub struct Nothing; + +impl Parse for Nothing { + fn parse(_input: ParseStream) -> Result<Self> { + Ok(Nothing) + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Nothing { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("Nothing") + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for Nothing {} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Nothing { + fn eq(&self, _other: &Self) -> bool { + true + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Nothing { + fn hash<H: Hasher>(&self, _state: &mut H) {} +} diff --git a/vendor/syn/src/parse_macro_input.rs b/vendor/syn/src/parse_macro_input.rs new file mode 100644 index 0000000..6f1562f --- /dev/null +++ b/vendor/syn/src/parse_macro_input.rs @@ -0,0 +1,128 @@ +/// Parse the input TokenStream of a macro, triggering a compile error if the +/// tokens fail to parse. +/// +/// Refer to the [`parse` module] documentation for more details about parsing +/// in Syn. +/// +/// [`parse` module]: mod@crate::parse +/// +/// <br> +/// +/// # Intended usage +/// +/// This macro must be called from a function that returns +/// `proc_macro::TokenStream`. Usually this will be your proc macro entry point, +/// the function that has the #\[proc_macro\] / #\[proc_macro_derive\] / +/// #\[proc_macro_attribute\] attribute. +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// use proc_macro::TokenStream; +/// use syn::{parse_macro_input, Result}; +/// use syn::parse::{Parse, ParseStream}; +/// +/// struct MyMacroInput { +/// /* ... */ +/// } +/// +/// impl Parse for MyMacroInput { +/// fn parse(input: ParseStream) -> Result<Self> { +/// /* ... */ +/// # Ok(MyMacroInput {}) +/// } +/// } +/// +/// # const IGNORE: &str = stringify! { +/// #[proc_macro] +/// # }; +/// pub fn my_macro(tokens: TokenStream) -> TokenStream { +/// let input = parse_macro_input!(tokens as MyMacroInput); +/// +/// /* ... */ +/// # TokenStream::new() +/// } +/// ``` +/// +/// <br> +/// +/// # Usage with Parser +/// +/// This macro can also be used with the [`Parser` trait] for types that have +/// multiple ways that they can be parsed. +/// +/// [`Parser` trait]: crate::parse::Parser +/// +/// ``` +/// # extern crate proc_macro; +/// # +/// # use proc_macro::TokenStream; +/// # use syn::{parse_macro_input, Result}; +/// # use syn::parse::ParseStream; +/// # +/// # struct MyMacroInput {} +/// # +/// impl MyMacroInput { +/// fn parse_alternate(input: ParseStream) -> Result<Self> { +/// /* ... */ +/// # Ok(MyMacroInput {}) +/// } +/// } +/// +/// # const IGNORE: &str = stringify! { +/// #[proc_macro] +/// # }; +/// pub fn my_macro(tokens: TokenStream) -> TokenStream { +/// let input = parse_macro_input!(tokens with MyMacroInput::parse_alternate); +/// +/// /* ... */ +/// # TokenStream::new() +/// } +/// ``` +/// +/// <br> +/// +/// # Expansion +/// +/// `parse_macro_input!($variable as $Type)` expands to something like: +/// +/// ```no_run +/// # extern crate proc_macro; +/// # +/// # macro_rules! doc_test { +/// # ($variable:ident as $Type:ty) => { +/// match syn::parse::<$Type>($variable) { +/// Ok(syntax_tree) => syntax_tree, +/// Err(err) => return proc_macro::TokenStream::from(err.to_compile_error()), +/// } +/// # }; +/// # } +/// # +/// # fn test(input: proc_macro::TokenStream) -> proc_macro::TokenStream { +/// # let _ = doc_test!(input as syn::Ident); +/// # proc_macro::TokenStream::new() +/// # } +/// ``` +#[macro_export] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "proc-macro"))))] +macro_rules! parse_macro_input { + ($tokenstream:ident as $ty:ty) => { + match $crate::parse::<$ty>($tokenstream) { + $crate::__private::Ok(data) => data, + $crate::__private::Err(err) => { + return $crate::__private::TokenStream::from(err.to_compile_error()); + } + } + }; + ($tokenstream:ident with $parser:path) => { + match $crate::parse::Parser::parse($parser, $tokenstream) { + $crate::__private::Ok(data) => data, + $crate::__private::Err(err) => { + return $crate::__private::TokenStream::from(err.to_compile_error()); + } + } + }; + ($tokenstream:ident) => { + $crate::parse_macro_input!($tokenstream as _) + }; +} diff --git a/vendor/syn/src/parse_quote.rs b/vendor/syn/src/parse_quote.rs new file mode 100644 index 0000000..18de474 --- /dev/null +++ b/vendor/syn/src/parse_quote.rs @@ -0,0 +1,209 @@ +/// Quasi-quotation macro that accepts input like the [`quote!`] macro but uses +/// type inference to figure out a return type for those tokens. +/// +/// [`quote!`]: https://docs.rs/quote/1.0/quote/index.html +/// +/// The return type can be any syntax tree node that implements the [`Parse`] +/// trait. +/// +/// [`Parse`]: crate::parse::Parse +/// +/// ``` +/// use quote::quote; +/// use syn::{parse_quote, Stmt}; +/// +/// fn main() { +/// let name = quote!(v); +/// let ty = quote!(u8); +/// +/// let stmt: Stmt = parse_quote! { +/// let #name: #ty = Default::default(); +/// }; +/// +/// println!("{:#?}", stmt); +/// } +/// ``` +/// +/// *This macro is available only if Syn is built with both the `"parsing"` and +/// `"printing"` features.* +/// +/// # Example +/// +/// The following helper function adds a bound `T: HeapSize` to every type +/// parameter `T` in the input generics. +/// +/// ``` +/// use syn::{parse_quote, Generics, GenericParam}; +/// +/// // Add a bound `T: HeapSize` to every type parameter T. +/// fn add_trait_bounds(mut generics: Generics) -> Generics { +/// for param in &mut generics.params { +/// if let GenericParam::Type(type_param) = param { +/// type_param.bounds.push(parse_quote!(HeapSize)); +/// } +/// } +/// generics +/// } +/// ``` +/// +/// # Special cases +/// +/// This macro can parse the following additional types as a special case even +/// though they do not implement the `Parse` trait. +/// +/// - [`Attribute`] — parses one attribute, allowing either outer like `#[...]` +/// or inner like `#![...]` +/// - [`Punctuated<T, P>`] — parses zero or more `T` separated by punctuation +/// `P` with optional trailing punctuation +/// - [`Vec<Stmt>`] — parses the same as `Block::parse_within` +/// +/// [`Vec<Stmt>`]: Block::parse_within +/// +/// # Panics +/// +/// Panics if the tokens fail to parse as the expected syntax tree type. The +/// caller is responsible for ensuring that the input tokens are syntactically +/// valid. +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "printing"))))] +#[macro_export] +macro_rules! parse_quote { + ($($tt:tt)*) => { + $crate::__private::parse_quote($crate::__private::quote::quote!($($tt)*)) + }; +} + +/// This macro is [`parse_quote!`] + [`quote_spanned!`][quote::quote_spanned]. +/// +/// Please refer to each of their documentation. +/// +/// # Example +/// +/// ``` +/// use quote::{quote, quote_spanned}; +/// use syn::spanned::Spanned; +/// use syn::{parse_quote_spanned, ReturnType, Signature}; +/// +/// // Changes `fn()` to `fn() -> Pin<Box<dyn Future<Output = ()>>>`, +/// // and `fn() -> T` to `fn() -> Pin<Box<dyn Future<Output = T>>>`, +/// // without introducing any call_site() spans. +/// fn make_ret_pinned_future(sig: &mut Signature) { +/// let ret = match &sig.output { +/// ReturnType::Default => quote_spanned!(sig.paren_token.span=> ()), +/// ReturnType::Type(_, ret) => quote!(#ret), +/// }; +/// sig.output = parse_quote_spanned! {ret.span()=> +/// -> ::std::pin::Pin<::std::boxed::Box<dyn ::std::future::Future<Output = #ret>>> +/// }; +/// } +/// ``` +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "printing"))))] +#[macro_export] +macro_rules! parse_quote_spanned { + ($span:expr=> $($tt:tt)*) => { + $crate::__private::parse_quote($crate::__private::quote::quote_spanned!($span=> $($tt)*)) + }; +} + +//////////////////////////////////////////////////////////////////////////////// +// Can parse any type that implements Parse. + +use crate::parse::{Parse, ParseStream, Parser, Result}; +use proc_macro2::TokenStream; + +// Not public API. +#[doc(hidden)] +pub fn parse<T: ParseQuote>(token_stream: TokenStream) -> T { + let parser = T::parse; + match parser.parse2(token_stream) { + Ok(t) => t, + Err(err) => panic!("{}", err), + } +} + +#[doc(hidden)] +pub trait ParseQuote: Sized { + fn parse(input: ParseStream) -> Result<Self>; +} + +impl<T: Parse> ParseQuote for T { + fn parse(input: ParseStream) -> Result<Self> { + <T as Parse>::parse(input) + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Any other types that we want `parse_quote!` to be able to parse. + +use crate::punctuated::Punctuated; +#[cfg(any(feature = "full", feature = "derive"))] +use crate::{attr, Attribute, Field, FieldMutability, Ident, Type, Visibility}; +#[cfg(feature = "full")] +use crate::{Block, Pat, Stmt}; + +#[cfg(any(feature = "full", feature = "derive"))] +impl ParseQuote for Attribute { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Token![#]) && input.peek2(Token![!]) { + attr::parsing::single_parse_inner(input) + } else { + attr::parsing::single_parse_outer(input) + } + } +} + +#[cfg(any(feature = "full", feature = "derive"))] +impl ParseQuote for Field { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + + let ident: Option<Ident>; + let colon_token: Option<Token![:]>; + let is_named = input.peek(Ident) && input.peek2(Token![:]) && !input.peek2(Token![::]); + if is_named { + ident = Some(input.parse()?); + colon_token = Some(input.parse()?); + } else { + ident = None; + colon_token = None; + } + + let ty: Type = input.parse()?; + + Ok(Field { + attrs, + vis, + mutability: FieldMutability::None, + ident, + colon_token, + ty, + }) + } +} + +#[cfg(feature = "full")] +impl ParseQuote for Pat { + fn parse(input: ParseStream) -> Result<Self> { + Pat::parse_multi_with_leading_vert(input) + } +} + +#[cfg(feature = "full")] +impl ParseQuote for Box<Pat> { + fn parse(input: ParseStream) -> Result<Self> { + <Pat as ParseQuote>::parse(input).map(Box::new) + } +} + +impl<T: Parse, P: Parse> ParseQuote for Punctuated<T, P> { + fn parse(input: ParseStream) -> Result<Self> { + Self::parse_terminated(input) + } +} + +#[cfg(feature = "full")] +impl ParseQuote for Vec<Stmt> { + fn parse(input: ParseStream) -> Result<Self> { + Block::parse_within(input) + } +} diff --git a/vendor/syn/src/pat.rs b/vendor/syn/src/pat.rs new file mode 100644 index 0000000..ce6399d --- /dev/null +++ b/vendor/syn/src/pat.rs @@ -0,0 +1,917 @@ +use super::*; +use crate::punctuated::Punctuated; +use proc_macro2::TokenStream; + +ast_enum_of_structs! { + /// A pattern in a local binding, function signature, match expression, or + /// various other places. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[non_exhaustive] + pub enum Pat { + /// A const block: `const { ... }`. + Const(PatConst), + + /// A pattern that binds a new variable: `ref mut binding @ SUBPATTERN`. + Ident(PatIdent), + + /// A literal pattern: `0`. + Lit(PatLit), + + /// A macro in pattern position. + Macro(PatMacro), + + /// A pattern that matches any one of a set of cases. + Or(PatOr), + + /// A parenthesized pattern: `(A | B)`. + Paren(PatParen), + + /// A path pattern like `Color::Red`, optionally qualified with a + /// self-type. + /// + /// Unqualified path patterns can legally refer to variants, structs, + /// constants or associated constants. Qualified path patterns like + /// `<A>::B::C` and `<A as Trait>::B::C` can only legally refer to + /// associated constants. + Path(PatPath), + + /// A range pattern: `1..=2`. + Range(PatRange), + + /// A reference pattern: `&mut var`. + Reference(PatReference), + + /// The dots in a tuple or slice pattern: `[0, 1, ..]`. + Rest(PatRest), + + /// A dynamically sized slice pattern: `[a, b, ref i @ .., y, z]`. + Slice(PatSlice), + + /// A struct or struct variant pattern: `Variant { x, y, .. }`. + Struct(PatStruct), + + /// A tuple pattern: `(a, b)`. + Tuple(PatTuple), + + /// A tuple struct or tuple variant pattern: `Variant(x, y, .., z)`. + TupleStruct(PatTupleStruct), + + /// A type ascription pattern: `foo: f64`. + Type(PatType), + + /// Tokens in pattern position not interpreted by Syn. + Verbatim(TokenStream), + + /// A pattern that matches any value: `_`. + Wild(PatWild), + + // For testing exhaustiveness in downstream code, use the following idiom: + // + // match pat { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // + // Pat::Box(pat) => {...} + // Pat::Ident(pat) => {...} + // ... + // Pat::Wild(pat) => {...} + // + // _ => { /* some sane fallback */ } + // } + // + // This way we fail your tests but don't break your library when adding + // a variant. You will be notified by a test failure when a variant is + // added, so that you can add code to handle it, but your library will + // continue to compile and work for downstream users in the interim. + } +} + +ast_struct! { + /// A pattern that binds a new variable: `ref mut binding @ SUBPATTERN`. + /// + /// It may also be a unit struct or struct variant (e.g. `None`), or a + /// constant; these cannot be distinguished syntactically. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatIdent { + pub attrs: Vec<Attribute>, + pub by_ref: Option<Token![ref]>, + pub mutability: Option<Token![mut]>, + pub ident: Ident, + pub subpat: Option<(Token![@], Box<Pat>)>, + } +} + +ast_struct! { + /// A pattern that matches any one of a set of cases. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatOr { + pub attrs: Vec<Attribute>, + pub leading_vert: Option<Token![|]>, + pub cases: Punctuated<Pat, Token![|]>, + } +} + +ast_struct! { + /// A parenthesized pattern: `(A | B)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatParen { + pub attrs: Vec<Attribute>, + pub paren_token: token::Paren, + pub pat: Box<Pat>, + } +} + +ast_struct! { + /// A reference pattern: `&mut var`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatReference { + pub attrs: Vec<Attribute>, + pub and_token: Token![&], + pub mutability: Option<Token![mut]>, + pub pat: Box<Pat>, + } +} + +ast_struct! { + /// The dots in a tuple or slice pattern: `[0, 1, ..]`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatRest { + pub attrs: Vec<Attribute>, + pub dot2_token: Token![..], + } +} + +ast_struct! { + /// A dynamically sized slice pattern: `[a, b, ref i @ .., y, z]`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatSlice { + pub attrs: Vec<Attribute>, + pub bracket_token: token::Bracket, + pub elems: Punctuated<Pat, Token![,]>, + } +} + +ast_struct! { + /// A struct or struct variant pattern: `Variant { x, y, .. }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatStruct { + pub attrs: Vec<Attribute>, + pub qself: Option<QSelf>, + pub path: Path, + pub brace_token: token::Brace, + pub fields: Punctuated<FieldPat, Token![,]>, + pub rest: Option<PatRest>, + } +} + +ast_struct! { + /// A tuple pattern: `(a, b)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatTuple { + pub attrs: Vec<Attribute>, + pub paren_token: token::Paren, + pub elems: Punctuated<Pat, Token![,]>, + } +} + +ast_struct! { + /// A tuple struct or tuple variant pattern: `Variant(x, y, .., z)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatTupleStruct { + pub attrs: Vec<Attribute>, + pub qself: Option<QSelf>, + pub path: Path, + pub paren_token: token::Paren, + pub elems: Punctuated<Pat, Token![,]>, + } +} + +ast_struct! { + /// A type ascription pattern: `foo: f64`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatType { + pub attrs: Vec<Attribute>, + pub pat: Box<Pat>, + pub colon_token: Token![:], + pub ty: Box<Type>, + } +} + +ast_struct! { + /// A pattern that matches any value: `_`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct PatWild { + pub attrs: Vec<Attribute>, + pub underscore_token: Token![_], + } +} + +ast_struct! { + /// A single field in a struct pattern. + /// + /// Patterns like the fields of Foo `{ x, ref y, ref mut z }` are treated + /// the same as `x: x, y: ref y, z: ref mut z` but there is no colon token. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct FieldPat { + pub attrs: Vec<Attribute>, + pub member: Member, + pub colon_token: Option<Token![:]>, + pub pat: Box<Pat>, + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::ext::IdentExt as _; + use crate::parse::{Parse, ParseBuffer, ParseStream, Result}; + use crate::path; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Pat { + /// Parse a pattern that does _not_ involve `|` at the top level. + /// + /// This parser matches the behavior of the `$:pat_param` macro_rules + /// matcher, and on editions prior to Rust 2021, the behavior of + /// `$:pat`. + /// + /// In Rust syntax, some examples of where this syntax would occur are + /// in the argument pattern of functions and closures. Patterns using + /// `|` are not allowed to occur in these positions. + /// + /// ```compile_fail + /// fn f(Some(_) | None: Option<T>) { + /// let _ = |Some(_) | None: Option<T>| {}; + /// // ^^^^^^^^^^^^^^^^^^^^^^^^^??? :( + /// } + /// ``` + /// + /// ```console + /// error: top-level or-patterns are not allowed in function parameters + /// --> src/main.rs:1:6 + /// | + /// 1 | fn f(Some(_) | None: Option<T>) { + /// | ^^^^^^^^^^^^^^ help: wrap the pattern in parentheses: `(Some(_) | None)` + /// ``` + pub fn parse_single(input: ParseStream) -> Result<Self> { + let begin = input.fork(); + let lookahead = input.lookahead1(); + if lookahead.peek(Ident) + && (input.peek2(Token![::]) + || input.peek2(Token![!]) + || input.peek2(token::Brace) + || input.peek2(token::Paren) + || input.peek2(Token![..])) + || input.peek(Token![self]) && input.peek2(Token![::]) + || lookahead.peek(Token![::]) + || lookahead.peek(Token![<]) + || input.peek(Token![Self]) + || input.peek(Token![super]) + || input.peek(Token![crate]) + { + pat_path_or_macro_or_struct_or_range(input) + } else if lookahead.peek(Token![_]) { + input.call(pat_wild).map(Pat::Wild) + } else if input.peek(Token![box]) { + pat_box(begin, input) + } else if input.peek(Token![-]) || lookahead.peek(Lit) || lookahead.peek(Token![const]) + { + pat_lit_or_range(input) + } else if lookahead.peek(Token![ref]) + || lookahead.peek(Token![mut]) + || input.peek(Token![self]) + || input.peek(Ident) + { + input.call(pat_ident).map(Pat::Ident) + } else if lookahead.peek(Token![&]) { + input.call(pat_reference).map(Pat::Reference) + } else if lookahead.peek(token::Paren) { + input.call(pat_paren_or_tuple) + } else if lookahead.peek(token::Bracket) { + input.call(pat_slice).map(Pat::Slice) + } else if lookahead.peek(Token![..]) && !input.peek(Token![...]) { + pat_range_half_open(input) + } else if lookahead.peek(Token![const]) { + input.call(pat_const).map(Pat::Verbatim) + } else { + Err(lookahead.error()) + } + } + + /// Parse a pattern, possibly involving `|`, but not a leading `|`. + pub fn parse_multi(input: ParseStream) -> Result<Self> { + multi_pat_impl(input, None) + } + + /// Parse a pattern, possibly involving `|`, possibly including a + /// leading `|`. + /// + /// This parser matches the behavior of the Rust 2021 edition's `$:pat` + /// macro_rules matcher. + /// + /// In Rust syntax, an example of where this syntax would occur is in + /// the pattern of a `match` arm, where the language permits an optional + /// leading `|`, although it is not idiomatic to write one there in + /// handwritten code. + /// + /// ``` + /// # let wat = None; + /// match wat { + /// | None | Some(false) => {} + /// | Some(true) => {} + /// } + /// ``` + /// + /// The compiler accepts it only to facilitate some situations in + /// macro-generated code where a macro author might need to write: + /// + /// ``` + /// # macro_rules! doc { + /// # ($value:expr, ($($conditions1:pat),*), ($($conditions2:pat),*), $then:expr) => { + /// match $value { + /// $(| $conditions1)* $(| $conditions2)* => $then + /// } + /// # }; + /// # } + /// # + /// # doc!(true, (true), (false), {}); + /// # doc!(true, (), (true, false), {}); + /// # doc!(true, (true, false), (), {}); + /// ``` + /// + /// Expressing the same thing correctly in the case that either one (but + /// not both) of `$conditions1` and `$conditions2` might be empty, + /// without leading `|`, is complex. + /// + /// Use [`Pat::parse_multi`] instead if you are not intending to support + /// macro-generated macro input. + pub fn parse_multi_with_leading_vert(input: ParseStream) -> Result<Self> { + let leading_vert: Option<Token![|]> = input.parse()?; + multi_pat_impl(input, leading_vert) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for PatType { + fn parse(input: ParseStream) -> Result<Self> { + Ok(PatType { + attrs: Vec::new(), + pat: Box::new(Pat::parse_single(input)?), + colon_token: input.parse()?, + ty: input.parse()?, + }) + } + } + + fn multi_pat_impl(input: ParseStream, leading_vert: Option<Token![|]>) -> Result<Pat> { + let mut pat = Pat::parse_single(input)?; + if leading_vert.is_some() + || input.peek(Token![|]) && !input.peek(Token![||]) && !input.peek(Token![|=]) + { + let mut cases = Punctuated::new(); + cases.push_value(pat); + while input.peek(Token![|]) && !input.peek(Token![||]) && !input.peek(Token![|=]) { + let punct = input.parse()?; + cases.push_punct(punct); + let pat = Pat::parse_single(input)?; + cases.push_value(pat); + } + pat = Pat::Or(PatOr { + attrs: Vec::new(), + leading_vert, + cases, + }); + } + Ok(pat) + } + + fn pat_path_or_macro_or_struct_or_range(input: ParseStream) -> Result<Pat> { + let (qself, path) = path::parsing::qpath(input, true)?; + + if qself.is_none() + && input.peek(Token![!]) + && !input.peek(Token![!=]) + && path.is_mod_style() + { + let bang_token: Token![!] = input.parse()?; + let (delimiter, tokens) = mac::parse_delimiter(input)?; + return Ok(Pat::Macro(ExprMacro { + attrs: Vec::new(), + mac: Macro { + path, + bang_token, + delimiter, + tokens, + }, + })); + } + + if input.peek(token::Brace) { + pat_struct(input, qself, path).map(Pat::Struct) + } else if input.peek(token::Paren) { + pat_tuple_struct(input, qself, path).map(Pat::TupleStruct) + } else if input.peek(Token![..]) { + pat_range(input, qself, path) + } else { + Ok(Pat::Path(ExprPath { + attrs: Vec::new(), + qself, + path, + })) + } + } + + fn pat_wild(input: ParseStream) -> Result<PatWild> { + Ok(PatWild { + attrs: Vec::new(), + underscore_token: input.parse()?, + }) + } + + fn pat_box(begin: ParseBuffer, input: ParseStream) -> Result<Pat> { + input.parse::<Token![box]>()?; + Pat::parse_single(input)?; + Ok(Pat::Verbatim(verbatim::between(&begin, input))) + } + + fn pat_ident(input: ParseStream) -> Result<PatIdent> { + Ok(PatIdent { + attrs: Vec::new(), + by_ref: input.parse()?, + mutability: input.parse()?, + ident: input.call(Ident::parse_any)?, + subpat: { + if input.peek(Token![@]) { + let at_token: Token![@] = input.parse()?; + let subpat = Pat::parse_single(input)?; + Some((at_token, Box::new(subpat))) + } else { + None + } + }, + }) + } + + fn pat_tuple_struct( + input: ParseStream, + qself: Option<QSelf>, + path: Path, + ) -> Result<PatTupleStruct> { + let content; + let paren_token = parenthesized!(content in input); + + let mut elems = Punctuated::new(); + while !content.is_empty() { + let value = Pat::parse_multi_with_leading_vert(&content)?; + elems.push_value(value); + if content.is_empty() { + break; + } + let punct = content.parse()?; + elems.push_punct(punct); + } + + Ok(PatTupleStruct { + attrs: Vec::new(), + qself, + path, + paren_token, + elems, + }) + } + + fn pat_struct(input: ParseStream, qself: Option<QSelf>, path: Path) -> Result<PatStruct> { + let content; + let brace_token = braced!(content in input); + + let mut fields = Punctuated::new(); + let mut rest = None; + while !content.is_empty() { + let attrs = content.call(Attribute::parse_outer)?; + if content.peek(Token![..]) { + rest = Some(PatRest { + attrs, + dot2_token: content.parse()?, + }); + break; + } + let mut value = content.call(field_pat)?; + value.attrs = attrs; + fields.push_value(value); + if content.is_empty() { + break; + } + let punct: Token![,] = content.parse()?; + fields.push_punct(punct); + } + + Ok(PatStruct { + attrs: Vec::new(), + qself, + path, + brace_token, + fields, + rest, + }) + } + + fn field_pat(input: ParseStream) -> Result<FieldPat> { + let begin = input.fork(); + let boxed: Option<Token![box]> = input.parse()?; + let by_ref: Option<Token![ref]> = input.parse()?; + let mutability: Option<Token![mut]> = input.parse()?; + + let member = if boxed.is_some() || by_ref.is_some() || mutability.is_some() { + input.parse().map(Member::Named) + } else { + input.parse() + }?; + + if boxed.is_none() && by_ref.is_none() && mutability.is_none() && input.peek(Token![:]) + || !member.is_named() + { + return Ok(FieldPat { + attrs: Vec::new(), + member, + colon_token: Some(input.parse()?), + pat: Box::new(Pat::parse_multi_with_leading_vert(input)?), + }); + } + + let ident = match member { + Member::Named(ident) => ident, + Member::Unnamed(_) => unreachable!(), + }; + + let pat = if boxed.is_some() { + Pat::Verbatim(verbatim::between(&begin, input)) + } else { + Pat::Ident(PatIdent { + attrs: Vec::new(), + by_ref, + mutability, + ident: ident.clone(), + subpat: None, + }) + }; + + Ok(FieldPat { + attrs: Vec::new(), + member: Member::Named(ident), + colon_token: None, + pat: Box::new(pat), + }) + } + + fn pat_range(input: ParseStream, qself: Option<QSelf>, path: Path) -> Result<Pat> { + let limits = RangeLimits::parse_obsolete(input)?; + let end = input.call(pat_range_bound)?; + if let (RangeLimits::Closed(_), None) = (&limits, &end) { + return Err(input.error("expected range upper bound")); + } + Ok(Pat::Range(ExprRange { + attrs: Vec::new(), + start: Some(Box::new(Expr::Path(ExprPath { + attrs: Vec::new(), + qself, + path, + }))), + limits, + end: end.map(PatRangeBound::into_expr), + })) + } + + fn pat_range_half_open(input: ParseStream) -> Result<Pat> { + let limits: RangeLimits = input.parse()?; + let end = input.call(pat_range_bound)?; + if end.is_some() { + Ok(Pat::Range(ExprRange { + attrs: Vec::new(), + start: None, + limits, + end: end.map(PatRangeBound::into_expr), + })) + } else { + match limits { + RangeLimits::HalfOpen(dot2_token) => Ok(Pat::Rest(PatRest { + attrs: Vec::new(), + dot2_token, + })), + RangeLimits::Closed(_) => Err(input.error("expected range upper bound")), + } + } + } + + fn pat_paren_or_tuple(input: ParseStream) -> Result<Pat> { + let content; + let paren_token = parenthesized!(content in input); + + let mut elems = Punctuated::new(); + while !content.is_empty() { + let value = Pat::parse_multi_with_leading_vert(&content)?; + if content.is_empty() { + if elems.is_empty() && !matches!(value, Pat::Rest(_)) { + return Ok(Pat::Paren(PatParen { + attrs: Vec::new(), + paren_token, + pat: Box::new(value), + })); + } + elems.push_value(value); + break; + } + elems.push_value(value); + let punct = content.parse()?; + elems.push_punct(punct); + } + + Ok(Pat::Tuple(PatTuple { + attrs: Vec::new(), + paren_token, + elems, + })) + } + + fn pat_reference(input: ParseStream) -> Result<PatReference> { + Ok(PatReference { + attrs: Vec::new(), + and_token: input.parse()?, + mutability: input.parse()?, + pat: Box::new(Pat::parse_single(input)?), + }) + } + + fn pat_lit_or_range(input: ParseStream) -> Result<Pat> { + let start = input.call(pat_range_bound)?.unwrap(); + if input.peek(Token![..]) { + let limits = RangeLimits::parse_obsolete(input)?; + let end = input.call(pat_range_bound)?; + if let (RangeLimits::Closed(_), None) = (&limits, &end) { + return Err(input.error("expected range upper bound")); + } + Ok(Pat::Range(ExprRange { + attrs: Vec::new(), + start: Some(start.into_expr()), + limits, + end: end.map(PatRangeBound::into_expr), + })) + } else { + Ok(start.into_pat()) + } + } + + // Patterns that can appear on either side of a range pattern. + enum PatRangeBound { + Const(ExprConst), + Lit(ExprLit), + Path(ExprPath), + } + + impl PatRangeBound { + fn into_expr(self) -> Box<Expr> { + Box::new(match self { + PatRangeBound::Const(pat) => Expr::Const(pat), + PatRangeBound::Lit(pat) => Expr::Lit(pat), + PatRangeBound::Path(pat) => Expr::Path(pat), + }) + } + + fn into_pat(self) -> Pat { + match self { + PatRangeBound::Const(pat) => Pat::Const(pat), + PatRangeBound::Lit(pat) => Pat::Lit(pat), + PatRangeBound::Path(pat) => Pat::Path(pat), + } + } + } + + fn pat_range_bound(input: ParseStream) -> Result<Option<PatRangeBound>> { + if input.is_empty() + || input.peek(Token![|]) + || input.peek(Token![=]) + || input.peek(Token![:]) && !input.peek(Token![::]) + || input.peek(Token![,]) + || input.peek(Token![;]) + || input.peek(Token![if]) + { + return Ok(None); + } + + let lookahead = input.lookahead1(); + let expr = if lookahead.peek(Lit) { + PatRangeBound::Lit(input.parse()?) + } else if lookahead.peek(Ident) + || lookahead.peek(Token![::]) + || lookahead.peek(Token![<]) + || lookahead.peek(Token![self]) + || lookahead.peek(Token![Self]) + || lookahead.peek(Token![super]) + || lookahead.peek(Token![crate]) + { + PatRangeBound::Path(input.parse()?) + } else if lookahead.peek(Token![const]) { + PatRangeBound::Const(input.parse()?) + } else { + return Err(lookahead.error()); + }; + + Ok(Some(expr)) + } + + fn pat_slice(input: ParseStream) -> Result<PatSlice> { + let content; + let bracket_token = bracketed!(content in input); + + let mut elems = Punctuated::new(); + while !content.is_empty() { + let value = Pat::parse_multi_with_leading_vert(&content)?; + match value { + Pat::Range(pat) if pat.start.is_none() || pat.end.is_none() => { + let (start, end) = match pat.limits { + RangeLimits::HalfOpen(dot_dot) => (dot_dot.spans[0], dot_dot.spans[1]), + RangeLimits::Closed(dot_dot_eq) => { + (dot_dot_eq.spans[0], dot_dot_eq.spans[2]) + } + }; + let msg = "range pattern is not allowed unparenthesized inside slice pattern"; + return Err(error::new2(start, end, msg)); + } + _ => {} + } + elems.push_value(value); + if content.is_empty() { + break; + } + let punct = content.parse()?; + elems.push_punct(punct); + } + + Ok(PatSlice { + attrs: Vec::new(), + bracket_token, + elems, + }) + } + + fn pat_const(input: ParseStream) -> Result<TokenStream> { + let begin = input.fork(); + input.parse::<Token![const]>()?; + + let content; + braced!(content in input); + content.call(Attribute::parse_inner)?; + content.call(Block::parse_within)?; + + Ok(verbatim::between(&begin, input)) + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use crate::attr::FilterAttrs; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatIdent { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.by_ref.to_tokens(tokens); + self.mutability.to_tokens(tokens); + self.ident.to_tokens(tokens); + if let Some((at_token, subpat)) = &self.subpat { + at_token.to_tokens(tokens); + subpat.to_tokens(tokens); + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatOr { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.leading_vert.to_tokens(tokens); + self.cases.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatParen { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.paren_token.surround(tokens, |tokens| { + self.pat.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatReference { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.and_token.to_tokens(tokens); + self.mutability.to_tokens(tokens); + self.pat.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatRest { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.dot2_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatSlice { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.bracket_token.surround(tokens, |tokens| { + self.elems.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatStruct { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + path::printing::print_path(tokens, &self.qself, &self.path); + self.brace_token.surround(tokens, |tokens| { + self.fields.to_tokens(tokens); + // NOTE: We need a comma before the dot2 token if it is present. + if !self.fields.empty_or_trailing() && self.rest.is_some() { + <Token![,]>::default().to_tokens(tokens); + } + self.rest.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatTuple { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.paren_token.surround(tokens, |tokens| { + self.elems.to_tokens(tokens); + // If there is only one element, a trailing comma is needed to + // distinguish PatTuple from PatParen, unless this is `(..)` + // which is a tuple pattern even without comma. + if self.elems.len() == 1 + && !self.elems.trailing_punct() + && !matches!(self.elems[0], Pat::Rest { .. }) + { + <Token![,]>::default().to_tokens(tokens); + } + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatTupleStruct { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + path::printing::print_path(tokens, &self.qself, &self.path); + self.paren_token.surround(tokens, |tokens| { + self.elems.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatType { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.pat.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PatWild { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.underscore_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for FieldPat { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + if let Some(colon_token) = &self.colon_token { + self.member.to_tokens(tokens); + colon_token.to_tokens(tokens); + } + self.pat.to_tokens(tokens); + } + } +} diff --git a/vendor/syn/src/path.rs b/vendor/syn/src/path.rs new file mode 100644 index 0000000..975154a --- /dev/null +++ b/vendor/syn/src/path.rs @@ -0,0 +1,877 @@ +use super::*; +use crate::punctuated::Punctuated; + +ast_struct! { + /// A path at which a named item is exported (e.g. `std::collections::HashMap`). + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Path { + pub leading_colon: Option<Token![::]>, + pub segments: Punctuated<PathSegment, Token![::]>, + } +} + +impl<T> From<T> for Path +where + T: Into<PathSegment>, +{ + fn from(segment: T) -> Self { + let mut path = Path { + leading_colon: None, + segments: Punctuated::new(), + }; + path.segments.push_value(segment.into()); + path + } +} + +impl Path { + /// Determines whether this is a path of length 1 equal to the given + /// ident. + /// + /// For them to compare equal, it must be the case that: + /// + /// - the path has no leading colon, + /// - the number of path segments is 1, + /// - the first path segment has no angle bracketed or parenthesized + /// path arguments, and + /// - the ident of the first path segment is equal to the given one. + /// + /// # Example + /// + /// ``` + /// use proc_macro2::TokenStream; + /// use syn::{Attribute, Error, Meta, Result}; + /// + /// fn get_serde_meta_item(attr: &Attribute) -> Result<Option<&TokenStream>> { + /// if attr.path().is_ident("serde") { + /// match &attr.meta { + /// Meta::List(meta) => Ok(Some(&meta.tokens)), + /// bad => Err(Error::new_spanned(bad, "unrecognized attribute")), + /// } + /// } else { + /// Ok(None) + /// } + /// } + /// ``` + pub fn is_ident<I>(&self, ident: &I) -> bool + where + I: ?Sized, + Ident: PartialEq<I>, + { + match self.get_ident() { + Some(id) => id == ident, + None => false, + } + } + + /// If this path consists of a single ident, returns the ident. + /// + /// A path is considered an ident if: + /// + /// - the path has no leading colon, + /// - the number of path segments is 1, and + /// - the first path segment has no angle bracketed or parenthesized + /// path arguments. + pub fn get_ident(&self) -> Option<&Ident> { + if self.leading_colon.is_none() + && self.segments.len() == 1 + && self.segments[0].arguments.is_none() + { + Some(&self.segments[0].ident) + } else { + None + } + } + + /// An error if this path is not a single ident, as defined in `get_ident`. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn require_ident(&self) -> Result<&Ident> { + self.get_ident().ok_or_else(|| { + crate::error::new2( + self.segments.first().unwrap().ident.span(), + self.segments.last().unwrap().ident.span(), + "expected this path to be an identifier", + ) + }) + } +} + +ast_struct! { + /// A segment of a path together with any path arguments on that segment. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct PathSegment { + pub ident: Ident, + pub arguments: PathArguments, + } +} + +impl<T> From<T> for PathSegment +where + T: Into<Ident>, +{ + fn from(ident: T) -> Self { + PathSegment { + ident: ident.into(), + arguments: PathArguments::None, + } + } +} + +ast_enum! { + /// Angle bracketed or parenthesized arguments of a path segment. + /// + /// ## Angle bracketed + /// + /// The `<'a, T>` in `std::slice::iter<'a, T>`. + /// + /// ## Parenthesized + /// + /// The `(A, B) -> C` in `Fn(A, B) -> C`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum PathArguments { + None, + /// The `<'a, T>` in `std::slice::iter<'a, T>`. + AngleBracketed(AngleBracketedGenericArguments), + /// The `(A, B) -> C` in `Fn(A, B) -> C`. + Parenthesized(ParenthesizedGenericArguments), + } +} + +impl Default for PathArguments { + fn default() -> Self { + PathArguments::None + } +} + +impl PathArguments { + pub fn is_empty(&self) -> bool { + match self { + PathArguments::None => true, + PathArguments::AngleBracketed(bracketed) => bracketed.args.is_empty(), + PathArguments::Parenthesized(_) => false, + } + } + + pub fn is_none(&self) -> bool { + match self { + PathArguments::None => true, + PathArguments::AngleBracketed(_) | PathArguments::Parenthesized(_) => false, + } + } +} + +ast_enum! { + /// An individual generic argument, like `'a`, `T`, or `Item = T`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum GenericArgument { + /// A lifetime argument. + Lifetime(Lifetime), + /// A type argument. + Type(Type), + /// A const expression. Must be inside of a block. + /// + /// NOTE: Identity expressions are represented as Type arguments, as + /// they are indistinguishable syntactically. + Const(Expr), + /// A binding (equality constraint) on an associated type: the `Item = + /// u8` in `Iterator<Item = u8>`. + AssocType(AssocType), + /// An equality constraint on an associated constant: the `PANIC = + /// false` in `Trait<PANIC = false>`. + AssocConst(AssocConst), + /// An associated type bound: `Iterator<Item: Display>`. + Constraint(Constraint), + } +} + +ast_struct! { + /// Angle bracketed arguments of a path segment: the `<K, V>` in `HashMap<K, + /// V>`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct AngleBracketedGenericArguments { + pub colon2_token: Option<Token![::]>, + pub lt_token: Token![<], + pub args: Punctuated<GenericArgument, Token![,]>, + pub gt_token: Token![>], + } +} + +ast_struct! { + /// A binding (equality constraint) on an associated type: the `Item = u8` + /// in `Iterator<Item = u8>`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct AssocType { + pub ident: Ident, + pub generics: Option<AngleBracketedGenericArguments>, + pub eq_token: Token![=], + pub ty: Type, + } +} + +ast_struct! { + /// An equality constraint on an associated constant: the `PANIC = false` in + /// `Trait<PANIC = false>`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct AssocConst { + pub ident: Ident, + pub generics: Option<AngleBracketedGenericArguments>, + pub eq_token: Token![=], + pub value: Expr, + } +} + +ast_struct! { + /// An associated type bound: `Iterator<Item: Display>`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Constraint { + pub ident: Ident, + pub generics: Option<AngleBracketedGenericArguments>, + pub colon_token: Token![:], + pub bounds: Punctuated<TypeParamBound, Token![+]>, + } +} + +ast_struct! { + /// Arguments of a function path segment: the `(A, B) -> C` in `Fn(A,B) -> + /// C`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ParenthesizedGenericArguments { + pub paren_token: token::Paren, + /// `(A, B)` + pub inputs: Punctuated<Type, Token![,]>, + /// `C` + pub output: ReturnType, + } +} + +ast_struct! { + /// The explicit Self type in a qualified path: the `T` in `<T as + /// Display>::fmt`. + /// + /// The actual path, including the trait and the associated item, is stored + /// separately. The `position` field represents the index of the associated + /// item qualified with this Self type. + /// + /// ```text + /// <Vec<T> as a::b::Trait>::AssociatedItem + /// ^~~~~~ ~~~~~~~~~~~~~~^ + /// ty position = 3 + /// + /// <Vec<T>>::AssociatedItem + /// ^~~~~~ ^ + /// ty position = 0 + /// ``` + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct QSelf { + pub lt_token: Token![<], + pub ty: Box<Type>, + pub position: usize, + pub as_token: Option<Token![as]>, + pub gt_token: Token![>], + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + + use crate::ext::IdentExt as _; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Path { + fn parse(input: ParseStream) -> Result<Self> { + Self::parse_helper(input, false) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for GenericArgument { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Lifetime) && !input.peek2(Token![+]) { + return Ok(GenericArgument::Lifetime(input.parse()?)); + } + + if input.peek(Lit) || input.peek(token::Brace) { + return const_argument(input).map(GenericArgument::Const); + } + + let mut argument: Type = input.parse()?; + + match argument { + Type::Path(mut ty) + if ty.qself.is_none() + && ty.path.leading_colon.is_none() + && ty.path.segments.len() == 1 + && match &ty.path.segments[0].arguments { + PathArguments::None | PathArguments::AngleBracketed(_) => true, + PathArguments::Parenthesized(_) => false, + } => + { + if let Some(eq_token) = input.parse::<Option<Token![=]>>()? { + let segment = ty.path.segments.pop().unwrap().into_value(); + let ident = segment.ident; + let generics = match segment.arguments { + PathArguments::None => None, + PathArguments::AngleBracketed(arguments) => Some(arguments), + PathArguments::Parenthesized(_) => unreachable!(), + }; + return if input.peek(Lit) || input.peek(token::Brace) { + Ok(GenericArgument::AssocConst(AssocConst { + ident, + generics, + eq_token, + value: const_argument(input)?, + })) + } else { + Ok(GenericArgument::AssocType(AssocType { + ident, + generics, + eq_token, + ty: input.parse()?, + })) + }; + } + + #[cfg(feature = "full")] + if let Some(colon_token) = input.parse::<Option<Token![:]>>()? { + let segment = ty.path.segments.pop().unwrap().into_value(); + return Ok(GenericArgument::Constraint(Constraint { + ident: segment.ident, + generics: match segment.arguments { + PathArguments::None => None, + PathArguments::AngleBracketed(arguments) => Some(arguments), + PathArguments::Parenthesized(_) => unreachable!(), + }, + colon_token, + bounds: { + let mut bounds = Punctuated::new(); + loop { + if input.peek(Token![,]) || input.peek(Token![>]) { + break; + } + let value: TypeParamBound = input.parse()?; + bounds.push_value(value); + if !input.peek(Token![+]) { + break; + } + let punct: Token![+] = input.parse()?; + bounds.push_punct(punct); + } + bounds + }, + })); + } + + argument = Type::Path(ty); + } + _ => {} + } + + Ok(GenericArgument::Type(argument)) + } + } + + pub(crate) fn const_argument(input: ParseStream) -> Result<Expr> { + let lookahead = input.lookahead1(); + + if input.peek(Lit) { + let lit = input.parse()?; + return Ok(Expr::Lit(lit)); + } + + if input.peek(Ident) { + let ident: Ident = input.parse()?; + return Ok(Expr::Path(ExprPath { + attrs: Vec::new(), + qself: None, + path: Path::from(ident), + })); + } + + if input.peek(token::Brace) { + #[cfg(feature = "full")] + { + let block: ExprBlock = input.parse()?; + return Ok(Expr::Block(block)); + } + + #[cfg(not(feature = "full"))] + { + let begin = input.fork(); + let content; + braced!(content in input); + content.parse::<Expr>()?; + let verbatim = verbatim::between(&begin, input); + return Ok(Expr::Verbatim(verbatim)); + } + } + + Err(lookahead.error()) + } + + impl AngleBracketedGenericArguments { + /// Parse `::<…>` with mandatory leading `::`. + /// + /// The ordinary [`Parse`] impl for `AngleBracketedGenericArguments` + /// parses optional leading `::`. + #[cfg(feature = "full")] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "full"))))] + pub fn parse_turbofish(input: ParseStream) -> Result<Self> { + let colon2_token: Token![::] = input.parse()?; + Self::do_parse(Some(colon2_token), input) + } + + pub(crate) fn do_parse( + colon2_token: Option<Token![::]>, + input: ParseStream, + ) -> Result<Self> { + Ok(AngleBracketedGenericArguments { + colon2_token, + lt_token: input.parse()?, + args: { + let mut args = Punctuated::new(); + loop { + if input.peek(Token![>]) { + break; + } + let value: GenericArgument = input.parse()?; + args.push_value(value); + if input.peek(Token![>]) { + break; + } + let punct: Token![,] = input.parse()?; + args.push_punct(punct); + } + args + }, + gt_token: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for AngleBracketedGenericArguments { + fn parse(input: ParseStream) -> Result<Self> { + let colon2_token: Option<Token![::]> = input.parse()?; + Self::do_parse(colon2_token, input) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ParenthesizedGenericArguments { + fn parse(input: ParseStream) -> Result<Self> { + let content; + Ok(ParenthesizedGenericArguments { + paren_token: parenthesized!(content in input), + inputs: content.parse_terminated(Type::parse, Token![,])?, + output: input.call(ReturnType::without_plus)?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for PathSegment { + fn parse(input: ParseStream) -> Result<Self> { + Self::parse_helper(input, false) + } + } + + impl PathSegment { + fn parse_helper(input: ParseStream, expr_style: bool) -> Result<Self> { + if input.peek(Token![super]) + || input.peek(Token![self]) + || input.peek(Token![crate]) + || cfg!(feature = "full") && input.peek(Token![try]) + { + let ident = input.call(Ident::parse_any)?; + return Ok(PathSegment::from(ident)); + } + + let ident = if input.peek(Token![Self]) { + input.call(Ident::parse_any)? + } else { + input.parse()? + }; + + if !expr_style && input.peek(Token![<]) && !input.peek(Token![<=]) + || input.peek(Token![::]) && input.peek3(Token![<]) + { + Ok(PathSegment { + ident, + arguments: PathArguments::AngleBracketed(input.parse()?), + }) + } else { + Ok(PathSegment::from(ident)) + } + } + } + + impl Path { + /// Parse a `Path` containing no path arguments on any of its segments. + /// + /// # Example + /// + /// ``` + /// use syn::{Path, Result, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // A simplified single `use` statement like: + /// // + /// // use std::collections::HashMap; + /// // + /// // Note that generic parameters are not allowed in a `use` statement + /// // so the following must not be accepted. + /// // + /// // use a::<b>::c; + /// struct SingleUse { + /// use_token: Token![use], + /// path: Path, + /// } + /// + /// impl Parse for SingleUse { + /// fn parse(input: ParseStream) -> Result<Self> { + /// Ok(SingleUse { + /// use_token: input.parse()?, + /// path: input.call(Path::parse_mod_style)?, + /// }) + /// } + /// } + /// ``` + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_mod_style(input: ParseStream) -> Result<Self> { + Ok(Path { + leading_colon: input.parse()?, + segments: { + let mut segments = Punctuated::new(); + loop { + if !input.peek(Ident) + && !input.peek(Token![super]) + && !input.peek(Token![self]) + && !input.peek(Token![Self]) + && !input.peek(Token![crate]) + { + break; + } + let ident = Ident::parse_any(input)?; + segments.push_value(PathSegment::from(ident)); + if !input.peek(Token![::]) { + break; + } + let punct = input.parse()?; + segments.push_punct(punct); + } + if segments.is_empty() { + return Err(input.parse::<Ident>().unwrap_err()); + } else if segments.trailing_punct() { + return Err(input.error("expected path segment after `::`")); + } + segments + }, + }) + } + + pub(crate) fn parse_helper(input: ParseStream, expr_style: bool) -> Result<Self> { + let mut path = Path { + leading_colon: input.parse()?, + segments: { + let mut segments = Punctuated::new(); + let value = PathSegment::parse_helper(input, expr_style)?; + segments.push_value(value); + segments + }, + }; + Path::parse_rest(input, &mut path, expr_style)?; + Ok(path) + } + + pub(crate) fn parse_rest( + input: ParseStream, + path: &mut Self, + expr_style: bool, + ) -> Result<()> { + while input.peek(Token![::]) && !input.peek3(token::Paren) { + let punct: Token![::] = input.parse()?; + path.segments.push_punct(punct); + let value = PathSegment::parse_helper(input, expr_style)?; + path.segments.push_value(value); + } + Ok(()) + } + + pub(crate) fn is_mod_style(&self) -> bool { + self.segments + .iter() + .all(|segment| segment.arguments.is_none()) + } + } + + pub(crate) fn qpath(input: ParseStream, expr_style: bool) -> Result<(Option<QSelf>, Path)> { + if input.peek(Token![<]) { + let lt_token: Token![<] = input.parse()?; + let this: Type = input.parse()?; + let path = if input.peek(Token![as]) { + let as_token: Token![as] = input.parse()?; + let path: Path = input.parse()?; + Some((as_token, path)) + } else { + None + }; + let gt_token: Token![>] = input.parse()?; + let colon2_token: Token![::] = input.parse()?; + let mut rest = Punctuated::new(); + loop { + let path = PathSegment::parse_helper(input, expr_style)?; + rest.push_value(path); + if !input.peek(Token![::]) { + break; + } + let punct: Token![::] = input.parse()?; + rest.push_punct(punct); + } + let (position, as_token, path) = match path { + Some((as_token, mut path)) => { + let pos = path.segments.len(); + path.segments.push_punct(colon2_token); + path.segments.extend(rest.into_pairs()); + (pos, Some(as_token), path) + } + None => { + let path = Path { + leading_colon: Some(colon2_token), + segments: rest, + }; + (0, None, path) + } + }; + let qself = QSelf { + lt_token, + ty: Box::new(this), + position, + as_token, + gt_token, + }; + Ok((Some(qself), path)) + } else { + let path = Path::parse_helper(input, expr_style)?; + Ok((None, path)) + } + } +} + +#[cfg(feature = "printing")] +pub(crate) mod printing { + use super::*; + use crate::print::TokensOrDefault; + #[cfg(feature = "parsing")] + use crate::spanned::Spanned; + #[cfg(feature = "parsing")] + use proc_macro2::Span; + use proc_macro2::TokenStream; + use quote::ToTokens; + use std::cmp; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Path { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.leading_colon.to_tokens(tokens); + self.segments.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PathSegment { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.ident.to_tokens(tokens); + self.arguments.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for PathArguments { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + PathArguments::None => {} + PathArguments::AngleBracketed(arguments) => { + arguments.to_tokens(tokens); + } + PathArguments::Parenthesized(arguments) => { + arguments.to_tokens(tokens); + } + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for GenericArgument { + #[allow(clippy::match_same_arms)] + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + GenericArgument::Lifetime(lt) => lt.to_tokens(tokens), + GenericArgument::Type(ty) => ty.to_tokens(tokens), + GenericArgument::Const(expr) => match expr { + Expr::Lit(expr) => expr.to_tokens(tokens), + + Expr::Path(expr) + if expr.attrs.is_empty() + && expr.qself.is_none() + && expr.path.get_ident().is_some() => + { + expr.to_tokens(tokens); + } + + #[cfg(feature = "full")] + Expr::Block(expr) => expr.to_tokens(tokens), + + #[cfg(not(feature = "full"))] + Expr::Verbatim(expr) => expr.to_tokens(tokens), + + // ERROR CORRECTION: Add braces to make sure that the + // generated code is valid. + _ => token::Brace::default().surround(tokens, |tokens| { + expr.to_tokens(tokens); + }), + }, + GenericArgument::AssocType(assoc) => assoc.to_tokens(tokens), + GenericArgument::AssocConst(assoc) => assoc.to_tokens(tokens), + GenericArgument::Constraint(constraint) => constraint.to_tokens(tokens), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for AngleBracketedGenericArguments { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.colon2_token.to_tokens(tokens); + self.lt_token.to_tokens(tokens); + + // Print lifetimes before types/consts/bindings, regardless of their + // order in self.args. + let mut trailing_or_empty = true; + for param in self.args.pairs() { + match param.value() { + GenericArgument::Lifetime(_) => { + param.to_tokens(tokens); + trailing_or_empty = param.punct().is_some(); + } + GenericArgument::Type(_) + | GenericArgument::Const(_) + | GenericArgument::AssocType(_) + | GenericArgument::AssocConst(_) + | GenericArgument::Constraint(_) => {} + } + } + for param in self.args.pairs() { + match param.value() { + GenericArgument::Type(_) + | GenericArgument::Const(_) + | GenericArgument::AssocType(_) + | GenericArgument::AssocConst(_) + | GenericArgument::Constraint(_) => { + if !trailing_or_empty { + <Token![,]>::default().to_tokens(tokens); + } + param.to_tokens(tokens); + trailing_or_empty = param.punct().is_some(); + } + GenericArgument::Lifetime(_) => {} + } + } + + self.gt_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for AssocType { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.ty.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for AssocConst { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.eq_token.to_tokens(tokens); + self.value.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Constraint { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.ident.to_tokens(tokens); + self.generics.to_tokens(tokens); + self.colon_token.to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ParenthesizedGenericArguments { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.paren_token.surround(tokens, |tokens| { + self.inputs.to_tokens(tokens); + }); + self.output.to_tokens(tokens); + } + } + + pub(crate) fn print_path(tokens: &mut TokenStream, qself: &Option<QSelf>, path: &Path) { + let qself = match qself { + Some(qself) => qself, + None => { + path.to_tokens(tokens); + return; + } + }; + qself.lt_token.to_tokens(tokens); + qself.ty.to_tokens(tokens); + + let pos = cmp::min(qself.position, path.segments.len()); + let mut segments = path.segments.pairs(); + if pos > 0 { + TokensOrDefault(&qself.as_token).to_tokens(tokens); + path.leading_colon.to_tokens(tokens); + for (i, segment) in segments.by_ref().take(pos).enumerate() { + if i + 1 == pos { + segment.value().to_tokens(tokens); + qself.gt_token.to_tokens(tokens); + segment.punct().to_tokens(tokens); + } else { + segment.to_tokens(tokens); + } + } + } else { + qself.gt_token.to_tokens(tokens); + path.leading_colon.to_tokens(tokens); + } + for segment in segments { + segment.to_tokens(tokens); + } + } + + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "printing"))))] + impl Spanned for QSelf { + fn span(&self) -> Span { + struct QSelfDelimiters<'a>(&'a QSelf); + + impl<'a> ToTokens for QSelfDelimiters<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.0.lt_token.to_tokens(tokens); + self.0.gt_token.to_tokens(tokens); + } + } + + QSelfDelimiters(self).span() + } + } +} diff --git a/vendor/syn/src/print.rs b/vendor/syn/src/print.rs new file mode 100644 index 0000000..0740993 --- /dev/null +++ b/vendor/syn/src/print.rs @@ -0,0 +1,16 @@ +use proc_macro2::TokenStream; +use quote::ToTokens; + +pub(crate) struct TokensOrDefault<'a, T: 'a>(pub &'a Option<T>); + +impl<'a, T> ToTokens for TokensOrDefault<'a, T> +where + T: ToTokens + Default, +{ + fn to_tokens(&self, tokens: &mut TokenStream) { + match self.0 { + Some(t) => t.to_tokens(tokens), + None => T::default().to_tokens(tokens), + } + } +} diff --git a/vendor/syn/src/punctuated.rs b/vendor/syn/src/punctuated.rs new file mode 100644 index 0000000..3ea8a1d --- /dev/null +++ b/vendor/syn/src/punctuated.rs @@ -0,0 +1,1108 @@ +//! A punctuated sequence of syntax tree nodes separated by punctuation. +//! +//! Lots of things in Rust are punctuated sequences. +//! +//! - The fields of a struct are `Punctuated<Field, Token![,]>`. +//! - The segments of a path are `Punctuated<PathSegment, Token![::]>`. +//! - The bounds on a generic parameter are `Punctuated<TypeParamBound, +//! Token![+]>`. +//! - The arguments to a function call are `Punctuated<Expr, Token![,]>`. +//! +//! This module provides a common representation for these punctuated sequences +//! in the form of the [`Punctuated<T, P>`] type. We store a vector of pairs of +//! syntax tree node + punctuation, where every node in the sequence is followed +//! by punctuation except for possibly the final one. +//! +//! [`Punctuated<T, P>`]: Punctuated +//! +//! ```text +//! a_function_call(arg1, arg2, arg3); +//! ~~~~^ ~~~~^ ~~~~ +//! ``` + +#[cfg(feature = "extra-traits")] +use std::fmt::{self, Debug}; +#[cfg(feature = "extra-traits")] +use std::hash::{Hash, Hasher}; +#[cfg(any(feature = "full", feature = "derive"))] +use std::iter; +use std::ops::{Index, IndexMut}; +use std::option; +use std::slice; +use std::vec; + +use crate::drops::{NoDrop, TrivialDrop}; +#[cfg(feature = "parsing")] +use crate::parse::{Parse, ParseStream, Result}; +#[cfg(feature = "parsing")] +use crate::token::Token; + +/// **A punctuated sequence of syntax tree nodes of type `T` separated by +/// punctuation of type `P`.** +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub struct Punctuated<T, P> { + inner: Vec<(T, P)>, + last: Option<Box<T>>, +} + +impl<T, P> Punctuated<T, P> { + /// Creates an empty punctuated sequence. + pub const fn new() -> Self { + Punctuated { + inner: Vec::new(), + last: None, + } + } + + /// Determines whether this punctuated sequence is empty, meaning it + /// contains no syntax tree nodes or punctuation. + pub fn is_empty(&self) -> bool { + self.inner.len() == 0 && self.last.is_none() + } + + /// Returns the number of syntax tree nodes in this punctuated sequence. + /// + /// This is the number of nodes of type `T`, not counting the punctuation of + /// type `P`. + pub fn len(&self) -> usize { + self.inner.len() + if self.last.is_some() { 1 } else { 0 } + } + + /// Borrows the first element in this sequence. + pub fn first(&self) -> Option<&T> { + self.iter().next() + } + + /// Mutably borrows the first element in this sequence. + pub fn first_mut(&mut self) -> Option<&mut T> { + self.iter_mut().next() + } + + /// Borrows the last element in this sequence. + pub fn last(&self) -> Option<&T> { + self.iter().next_back() + } + + /// Mutably borrows the last element in this sequence. + pub fn last_mut(&mut self) -> Option<&mut T> { + self.iter_mut().next_back() + } + + /// Returns an iterator over borrowed syntax tree nodes of type `&T`. + pub fn iter(&self) -> Iter<T> { + Iter { + inner: Box::new(NoDrop::new(PrivateIter { + inner: self.inner.iter(), + last: self.last.as_ref().map(Box::as_ref).into_iter(), + })), + } + } + + /// Returns an iterator over mutably borrowed syntax tree nodes of type + /// `&mut T`. + pub fn iter_mut(&mut self) -> IterMut<T> { + IterMut { + inner: Box::new(NoDrop::new(PrivateIterMut { + inner: self.inner.iter_mut(), + last: self.last.as_mut().map(Box::as_mut).into_iter(), + })), + } + } + + /// Returns an iterator over the contents of this sequence as borrowed + /// punctuated pairs. + pub fn pairs(&self) -> Pairs<T, P> { + Pairs { + inner: self.inner.iter(), + last: self.last.as_ref().map(Box::as_ref).into_iter(), + } + } + + /// Returns an iterator over the contents of this sequence as mutably + /// borrowed punctuated pairs. + pub fn pairs_mut(&mut self) -> PairsMut<T, P> { + PairsMut { + inner: self.inner.iter_mut(), + last: self.last.as_mut().map(Box::as_mut).into_iter(), + } + } + + /// Returns an iterator over the contents of this sequence as owned + /// punctuated pairs. + pub fn into_pairs(self) -> IntoPairs<T, P> { + IntoPairs { + inner: self.inner.into_iter(), + last: self.last.map(|t| *t).into_iter(), + } + } + + /// Appends a syntax tree node onto the end of this punctuated sequence. The + /// sequence must already have a trailing punctuation, or be empty. + /// + /// Use [`push`] instead if the punctuated sequence may or may not already + /// have trailing punctuation. + /// + /// [`push`]: Punctuated::push + /// + /// # Panics + /// + /// Panics if the sequence is nonempty and does not already have a trailing + /// punctuation. + pub fn push_value(&mut self, value: T) { + assert!( + self.empty_or_trailing(), + "Punctuated::push_value: cannot push value if Punctuated is missing trailing punctuation", + ); + + self.last = Some(Box::new(value)); + } + + /// Appends a trailing punctuation onto the end of this punctuated sequence. + /// The sequence must be non-empty and must not already have trailing + /// punctuation. + /// + /// # Panics + /// + /// Panics if the sequence is empty or already has a trailing punctuation. + pub fn push_punct(&mut self, punctuation: P) { + assert!( + self.last.is_some(), + "Punctuated::push_punct: cannot push punctuation if Punctuated is empty or already has trailing punctuation", + ); + + let last = self.last.take().unwrap(); + self.inner.push((*last, punctuation)); + } + + /// Removes the last punctuated pair from this sequence, or `None` if the + /// sequence is empty. + pub fn pop(&mut self) -> Option<Pair<T, P>> { + if self.last.is_some() { + self.last.take().map(|t| Pair::End(*t)) + } else { + self.inner.pop().map(|(t, p)| Pair::Punctuated(t, p)) + } + } + + /// Removes the trailing punctuation from this punctuated sequence, or + /// `None` if there isn't any. + pub fn pop_punct(&mut self) -> Option<P> { + if self.last.is_some() { + None + } else { + let (t, p) = self.inner.pop()?; + self.last = Some(Box::new(t)); + Some(p) + } + } + + /// Determines whether this punctuated sequence ends with a trailing + /// punctuation. + pub fn trailing_punct(&self) -> bool { + self.last.is_none() && !self.is_empty() + } + + /// Returns true if either this `Punctuated` is empty, or it has a trailing + /// punctuation. + /// + /// Equivalent to `punctuated.is_empty() || punctuated.trailing_punct()`. + pub fn empty_or_trailing(&self) -> bool { + self.last.is_none() + } + + /// Appends a syntax tree node onto the end of this punctuated sequence. + /// + /// If there is not a trailing punctuation in this sequence when this method + /// is called, the default value of punctuation type `P` is inserted before + /// the given value of type `T`. + pub fn push(&mut self, value: T) + where + P: Default, + { + if !self.empty_or_trailing() { + self.push_punct(Default::default()); + } + self.push_value(value); + } + + /// Inserts an element at position `index`. + /// + /// # Panics + /// + /// Panics if `index` is greater than the number of elements previously in + /// this punctuated sequence. + pub fn insert(&mut self, index: usize, value: T) + where + P: Default, + { + assert!( + index <= self.len(), + "Punctuated::insert: index out of range", + ); + + if index == self.len() { + self.push(value); + } else { + self.inner.insert(index, (value, Default::default())); + } + } + + /// Clears the sequence of all values and punctuation, making it empty. + pub fn clear(&mut self) { + self.inner.clear(); + self.last = None; + } + + /// Parses zero or more occurrences of `T` separated by punctuation of type + /// `P`, with optional trailing punctuation. + /// + /// Parsing continues until the end of this parse stream. The entire content + /// of this parse stream must consist of `T` and `P`. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_terminated(input: ParseStream) -> Result<Self> + where + T: Parse, + P: Parse, + { + Self::parse_terminated_with(input, T::parse) + } + + /// Parses zero or more occurrences of `T` using the given parse function, + /// separated by punctuation of type `P`, with optional trailing + /// punctuation. + /// + /// Like [`parse_terminated`], the entire content of this stream is expected + /// to be parsed. + /// + /// [`parse_terminated`]: Punctuated::parse_terminated + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_terminated_with( + input: ParseStream, + parser: fn(ParseStream) -> Result<T>, + ) -> Result<Self> + where + P: Parse, + { + let mut punctuated = Punctuated::new(); + + loop { + if input.is_empty() { + break; + } + let value = parser(input)?; + punctuated.push_value(value); + if input.is_empty() { + break; + } + let punct = input.parse()?; + punctuated.push_punct(punct); + } + + Ok(punctuated) + } + + /// Parses one or more occurrences of `T` separated by punctuation of type + /// `P`, not accepting trailing punctuation. + /// + /// Parsing continues as long as punctuation `P` is present at the head of + /// the stream. This method returns upon parsing a `T` and observing that it + /// is not followed by a `P`, even if there are remaining tokens in the + /// stream. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_separated_nonempty(input: ParseStream) -> Result<Self> + where + T: Parse, + P: Token + Parse, + { + Self::parse_separated_nonempty_with(input, T::parse) + } + + /// Parses one or more occurrences of `T` using the given parse function, + /// separated by punctuation of type `P`, not accepting trailing + /// punctuation. + /// + /// Like [`parse_separated_nonempty`], may complete early without parsing + /// the entire content of this stream. + /// + /// [`parse_separated_nonempty`]: Punctuated::parse_separated_nonempty + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_separated_nonempty_with( + input: ParseStream, + parser: fn(ParseStream) -> Result<T>, + ) -> Result<Self> + where + P: Token + Parse, + { + let mut punctuated = Punctuated::new(); + + loop { + let value = parser(input)?; + punctuated.push_value(value); + if !P::peek(input.cursor()) { + break; + } + let punct = input.parse()?; + punctuated.push_punct(punct); + } + + Ok(punctuated) + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl<T, P> Clone for Punctuated<T, P> +where + T: Clone, + P: Clone, +{ + fn clone(&self) -> Self { + Punctuated { + inner: self.inner.clone(), + last: self.last.clone(), + } + } + + fn clone_from(&mut self, other: &Self) { + self.inner.clone_from(&other.inner); + self.last.clone_from(&other.last); + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl<T, P> Eq for Punctuated<T, P> +where + T: Eq, + P: Eq, +{ +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl<T, P> PartialEq for Punctuated<T, P> +where + T: PartialEq, + P: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + let Punctuated { inner, last } = self; + *inner == other.inner && *last == other.last + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl<T, P> Hash for Punctuated<T, P> +where + T: Hash, + P: Hash, +{ + fn hash<H: Hasher>(&self, state: &mut H) { + let Punctuated { inner, last } = self; + inner.hash(state); + last.hash(state); + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl<T: Debug, P: Debug> Debug for Punctuated<T, P> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut list = f.debug_list(); + for (t, p) in &self.inner { + list.entry(t); + list.entry(p); + } + if let Some(last) = &self.last { + list.entry(last); + } + list.finish() + } +} + +impl<T, P> FromIterator<T> for Punctuated<T, P> +where + P: Default, +{ + fn from_iter<I: IntoIterator<Item = T>>(i: I) -> Self { + let mut ret = Punctuated::new(); + ret.extend(i); + ret + } +} + +impl<T, P> Extend<T> for Punctuated<T, P> +where + P: Default, +{ + fn extend<I: IntoIterator<Item = T>>(&mut self, i: I) { + for value in i { + self.push(value); + } + } +} + +impl<T, P> FromIterator<Pair<T, P>> for Punctuated<T, P> { + fn from_iter<I: IntoIterator<Item = Pair<T, P>>>(i: I) -> Self { + let mut ret = Punctuated::new(); + do_extend(&mut ret, i.into_iter()); + ret + } +} + +impl<T, P> Extend<Pair<T, P>> for Punctuated<T, P> +where + P: Default, +{ + fn extend<I: IntoIterator<Item = Pair<T, P>>>(&mut self, i: I) { + if !self.empty_or_trailing() { + self.push_punct(P::default()); + } + do_extend(self, i.into_iter()); + } +} + +fn do_extend<T, P, I>(punctuated: &mut Punctuated<T, P>, i: I) +where + I: Iterator<Item = Pair<T, P>>, +{ + let mut nomore = false; + for pair in i { + if nomore { + panic!("Punctuated extended with items after a Pair::End"); + } + match pair { + Pair::Punctuated(a, b) => punctuated.inner.push((a, b)), + Pair::End(a) => { + punctuated.last = Some(Box::new(a)); + nomore = true; + } + } + } +} + +impl<T, P> IntoIterator for Punctuated<T, P> { + type Item = T; + type IntoIter = IntoIter<T>; + + fn into_iter(self) -> Self::IntoIter { + let mut elements = Vec::with_capacity(self.len()); + elements.extend(self.inner.into_iter().map(|pair| pair.0)); + elements.extend(self.last.map(|t| *t)); + + IntoIter { + inner: elements.into_iter(), + } + } +} + +impl<'a, T, P> IntoIterator for &'a Punctuated<T, P> { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + Punctuated::iter(self) + } +} + +impl<'a, T, P> IntoIterator for &'a mut Punctuated<T, P> { + type Item = &'a mut T; + type IntoIter = IterMut<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + Punctuated::iter_mut(self) + } +} + +impl<T, P> Default for Punctuated<T, P> { + fn default() -> Self { + Punctuated::new() + } +} + +/// An iterator over borrowed pairs of type `Pair<&T, &P>`. +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub struct Pairs<'a, T: 'a, P: 'a> { + inner: slice::Iter<'a, (T, P)>, + last: option::IntoIter<&'a T>, +} + +impl<'a, T, P> Iterator for Pairs<'a, T, P> { + type Item = Pair<&'a T, &'a P>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|(t, p)| Pair::Punctuated(t, p)) + .or_else(|| self.last.next().map(Pair::End)) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +impl<'a, T, P> DoubleEndedIterator for Pairs<'a, T, P> { + fn next_back(&mut self) -> Option<Self::Item> { + self.last + .next() + .map(Pair::End) + .or_else(|| self.inner.next_back().map(|(t, p)| Pair::Punctuated(t, p))) + } +} + +impl<'a, T, P> ExactSizeIterator for Pairs<'a, T, P> { + fn len(&self) -> usize { + self.inner.len() + self.last.len() + } +} + +// No Clone bound on T or P. +impl<'a, T, P> Clone for Pairs<'a, T, P> { + fn clone(&self) -> Self { + Pairs { + inner: self.inner.clone(), + last: self.last.clone(), + } + } +} + +/// An iterator over mutably borrowed pairs of type `Pair<&mut T, &mut P>`. +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub struct PairsMut<'a, T: 'a, P: 'a> { + inner: slice::IterMut<'a, (T, P)>, + last: option::IntoIter<&'a mut T>, +} + +impl<'a, T, P> Iterator for PairsMut<'a, T, P> { + type Item = Pair<&'a mut T, &'a mut P>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|(t, p)| Pair::Punctuated(t, p)) + .or_else(|| self.last.next().map(Pair::End)) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +impl<'a, T, P> DoubleEndedIterator for PairsMut<'a, T, P> { + fn next_back(&mut self) -> Option<Self::Item> { + self.last + .next() + .map(Pair::End) + .or_else(|| self.inner.next_back().map(|(t, p)| Pair::Punctuated(t, p))) + } +} + +impl<'a, T, P> ExactSizeIterator for PairsMut<'a, T, P> { + fn len(&self) -> usize { + self.inner.len() + self.last.len() + } +} + +/// An iterator over owned pairs of type `Pair<T, P>`. +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub struct IntoPairs<T, P> { + inner: vec::IntoIter<(T, P)>, + last: option::IntoIter<T>, +} + +impl<T, P> Iterator for IntoPairs<T, P> { + type Item = Pair<T, P>; + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|(t, p)| Pair::Punctuated(t, p)) + .or_else(|| self.last.next().map(Pair::End)) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +impl<T, P> DoubleEndedIterator for IntoPairs<T, P> { + fn next_back(&mut self) -> Option<Self::Item> { + self.last + .next() + .map(Pair::End) + .or_else(|| self.inner.next_back().map(|(t, p)| Pair::Punctuated(t, p))) + } +} + +impl<T, P> ExactSizeIterator for IntoPairs<T, P> { + fn len(&self) -> usize { + self.inner.len() + self.last.len() + } +} + +impl<T, P> Clone for IntoPairs<T, P> +where + T: Clone, + P: Clone, +{ + fn clone(&self) -> Self { + IntoPairs { + inner: self.inner.clone(), + last: self.last.clone(), + } + } +} + +/// An iterator over owned values of type `T`. +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub struct IntoIter<T> { + inner: vec::IntoIter<T>, +} + +impl<T> Iterator for IntoIter<T> { + type Item = T; + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +impl<T> DoubleEndedIterator for IntoIter<T> { + fn next_back(&mut self) -> Option<Self::Item> { + self.inner.next_back() + } +} + +impl<T> ExactSizeIterator for IntoIter<T> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<T> Clone for IntoIter<T> +where + T: Clone, +{ + fn clone(&self) -> Self { + IntoIter { + inner: self.inner.clone(), + } + } +} + +/// An iterator over borrowed values of type `&T`. +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub struct Iter<'a, T: 'a> { + inner: Box<NoDrop<dyn IterTrait<'a, T> + 'a>>, +} + +trait IterTrait<'a, T: 'a>: Iterator<Item = &'a T> + DoubleEndedIterator + ExactSizeIterator { + fn clone_box(&self) -> Box<NoDrop<dyn IterTrait<'a, T> + 'a>>; +} + +struct PrivateIter<'a, T: 'a, P: 'a> { + inner: slice::Iter<'a, (T, P)>, + last: option::IntoIter<&'a T>, +} + +impl<'a, T, P> TrivialDrop for PrivateIter<'a, T, P> +where + slice::Iter<'a, (T, P)>: TrivialDrop, + option::IntoIter<&'a T>: TrivialDrop, +{ +} + +#[cfg(any(feature = "full", feature = "derive"))] +pub(crate) fn empty_punctuated_iter<'a, T>() -> Iter<'a, T> { + Iter { + inner: Box::new(NoDrop::new(iter::empty())), + } +} + +// No Clone bound on T. +impl<'a, T> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone_box(), + } + } +} + +impl<'a, T> Iterator for Iter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +impl<'a, T> DoubleEndedIterator for Iter<'a, T> { + fn next_back(&mut self) -> Option<Self::Item> { + self.inner.next_back() + } +} + +impl<'a, T> ExactSizeIterator for Iter<'a, T> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, T, P> Iterator for PrivateIter<'a, T, P> { + type Item = &'a T; + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|pair| &pair.0) + .or_else(|| self.last.next()) + } +} + +impl<'a, T, P> DoubleEndedIterator for PrivateIter<'a, T, P> { + fn next_back(&mut self) -> Option<Self::Item> { + self.last + .next() + .or_else(|| self.inner.next_back().map(|pair| &pair.0)) + } +} + +impl<'a, T, P> ExactSizeIterator for PrivateIter<'a, T, P> { + fn len(&self) -> usize { + self.inner.len() + self.last.len() + } +} + +// No Clone bound on T or P. +impl<'a, T, P> Clone for PrivateIter<'a, T, P> { + fn clone(&self) -> Self { + PrivateIter { + inner: self.inner.clone(), + last: self.last.clone(), + } + } +} + +impl<'a, T, I> IterTrait<'a, T> for I +where + T: 'a, + I: DoubleEndedIterator<Item = &'a T> + + ExactSizeIterator<Item = &'a T> + + Clone + + TrivialDrop + + 'a, +{ + fn clone_box(&self) -> Box<NoDrop<dyn IterTrait<'a, T> + 'a>> { + Box::new(NoDrop::new(self.clone())) + } +} + +/// An iterator over mutably borrowed values of type `&mut T`. +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub struct IterMut<'a, T: 'a> { + inner: Box<NoDrop<dyn IterMutTrait<'a, T, Item = &'a mut T> + 'a>>, +} + +trait IterMutTrait<'a, T: 'a>: + DoubleEndedIterator<Item = &'a mut T> + ExactSizeIterator<Item = &'a mut T> +{ +} + +struct PrivateIterMut<'a, T: 'a, P: 'a> { + inner: slice::IterMut<'a, (T, P)>, + last: option::IntoIter<&'a mut T>, +} + +impl<'a, T, P> TrivialDrop for PrivateIterMut<'a, T, P> +where + slice::IterMut<'a, (T, P)>: TrivialDrop, + option::IntoIter<&'a mut T>: TrivialDrop, +{ +} + +#[cfg(any(feature = "full", feature = "derive"))] +pub(crate) fn empty_punctuated_iter_mut<'a, T>() -> IterMut<'a, T> { + IterMut { + inner: Box::new(NoDrop::new(iter::empty())), + } +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +impl<'a, T> DoubleEndedIterator for IterMut<'a, T> { + fn next_back(&mut self) -> Option<Self::Item> { + self.inner.next_back() + } +} + +impl<'a, T> ExactSizeIterator for IterMut<'a, T> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, T, P> Iterator for PrivateIterMut<'a, T, P> { + type Item = &'a mut T; + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|pair| &mut pair.0) + .or_else(|| self.last.next()) + } +} + +impl<'a, T, P> DoubleEndedIterator for PrivateIterMut<'a, T, P> { + fn next_back(&mut self) -> Option<Self::Item> { + self.last + .next() + .or_else(|| self.inner.next_back().map(|pair| &mut pair.0)) + } +} + +impl<'a, T, P> ExactSizeIterator for PrivateIterMut<'a, T, P> { + fn len(&self) -> usize { + self.inner.len() + self.last.len() + } +} + +impl<'a, T, I> IterMutTrait<'a, T> for I +where + T: 'a, + I: DoubleEndedIterator<Item = &'a mut T> + ExactSizeIterator<Item = &'a mut T> + 'a, +{ +} + +/// A single syntax tree node of type `T` followed by its trailing punctuation +/// of type `P` if any. +/// +/// Refer to the [module documentation] for details about punctuated sequences. +/// +/// [module documentation]: self +pub enum Pair<T, P> { + Punctuated(T, P), + End(T), +} + +impl<T, P> Pair<T, P> { + /// Extracts the syntax tree node from this punctuated pair, discarding the + /// following punctuation. + pub fn into_value(self) -> T { + match self { + Pair::Punctuated(t, _) | Pair::End(t) => t, + } + } + + /// Borrows the syntax tree node from this punctuated pair. + pub fn value(&self) -> &T { + match self { + Pair::Punctuated(t, _) | Pair::End(t) => t, + } + } + + /// Mutably borrows the syntax tree node from this punctuated pair. + pub fn value_mut(&mut self) -> &mut T { + match self { + Pair::Punctuated(t, _) | Pair::End(t) => t, + } + } + + /// Borrows the punctuation from this punctuated pair, unless this pair is + /// the final one and there is no trailing punctuation. + pub fn punct(&self) -> Option<&P> { + match self { + Pair::Punctuated(_, p) => Some(p), + Pair::End(_) => None, + } + } + + /// Mutably borrows the punctuation from this punctuated pair, unless the + /// pair is the final one and there is no trailing punctuation. + /// + /// # Example + /// + /// ``` + /// # use proc_macro2::Span; + /// # use syn::punctuated::Punctuated; + /// # use syn::{parse_quote, Token, TypeParamBound}; + /// # + /// # let mut punctuated = Punctuated::<TypeParamBound, Token![+]>::new(); + /// # let span = Span::call_site(); + /// # + /// punctuated.insert(0, parse_quote!('lifetime)); + /// if let Some(punct) = punctuated.pairs_mut().next().unwrap().punct_mut() { + /// punct.span = span; + /// } + /// ``` + pub fn punct_mut(&mut self) -> Option<&mut P> { + match self { + Pair::Punctuated(_, p) => Some(p), + Pair::End(_) => None, + } + } + + /// Creates a punctuated pair out of a syntax tree node and an optional + /// following punctuation. + pub fn new(t: T, p: Option<P>) -> Self { + match p { + Some(p) => Pair::Punctuated(t, p), + None => Pair::End(t), + } + } + + /// Produces this punctuated pair as a tuple of syntax tree node and + /// optional following punctuation. + pub fn into_tuple(self) -> (T, Option<P>) { + match self { + Pair::Punctuated(t, p) => (t, Some(p)), + Pair::End(t) => (t, None), + } + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl<T, P> Pair<&T, &P> { + pub fn cloned(self) -> Pair<T, P> + where + T: Clone, + P: Clone, + { + match self { + Pair::Punctuated(t, p) => Pair::Punctuated(t.clone(), p.clone()), + Pair::End(t) => Pair::End(t.clone()), + } + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl<T, P> Clone for Pair<T, P> +where + T: Clone, + P: Clone, +{ + fn clone(&self) -> Self { + match self { + Pair::Punctuated(t, p) => Pair::Punctuated(t.clone(), p.clone()), + Pair::End(t) => Pair::End(t.clone()), + } + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl<T, P> Copy for Pair<T, P> +where + T: Copy, + P: Copy, +{ +} + +impl<T, P> Index<usize> for Punctuated<T, P> { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + if index == self.len() - 1 { + match &self.last { + Some(t) => t, + None => &self.inner[index].0, + } + } else { + &self.inner[index].0 + } + } +} + +impl<T, P> IndexMut<usize> for Punctuated<T, P> { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + if index == self.len() - 1 { + match &mut self.last { + Some(t) => t, + None => &mut self.inner[index].0, + } + } else { + &mut self.inner[index].0 + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl<T, P> ToTokens for Punctuated<T, P> + where + T: ToTokens, + P: ToTokens, + { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.pairs()); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl<T, P> ToTokens for Pair<T, P> + where + T: ToTokens, + P: ToTokens, + { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + Pair::Punctuated(a, b) => { + a.to_tokens(tokens); + b.to_tokens(tokens); + } + Pair::End(a) => a.to_tokens(tokens), + } + } + } +} diff --git a/vendor/syn/src/restriction.rs b/vendor/syn/src/restriction.rs new file mode 100644 index 0000000..e66b17f --- /dev/null +++ b/vendor/syn/src/restriction.rs @@ -0,0 +1,171 @@ +use super::*; + +ast_enum! { + /// The visibility level of an item: inherited or `pub` or + /// `pub(restricted)`. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum Visibility { + /// A public visibility level: `pub`. + Public(Token![pub]), + + /// A visibility level restricted to some path: `pub(self)` or + /// `pub(super)` or `pub(crate)` or `pub(in some::module)`. + Restricted(VisRestricted), + + /// An inherited visibility, which usually means private. + Inherited, + } +} + +ast_struct! { + /// A visibility level restricted to some path: `pub(self)` or + /// `pub(super)` or `pub(crate)` or `pub(in some::module)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct VisRestricted { + pub pub_token: Token![pub], + pub paren_token: token::Paren, + pub in_token: Option<Token![in]>, + pub path: Box<Path>, + } +} + +ast_enum! { + /// Unused, but reserved for RFC 3323 restrictions. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum FieldMutability { + None, + + // TODO: https://rust-lang.github.io/rfcs/3323-restrictions.html + // + // FieldMutability::Restricted(MutRestricted) + // + // pub struct MutRestricted { + // pub mut_token: Token![mut], + // pub paren_token: token::Paren, + // pub in_token: Option<Token![in]>, + // pub path: Box<Path>, + // } + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::ext::IdentExt as _; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream, Result}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Visibility { + fn parse(input: ParseStream) -> Result<Self> { + // Recognize an empty None-delimited group, as produced by a $:vis + // matcher that matched no tokens. + if input.peek(token::Group) { + let ahead = input.fork(); + let group = crate::group::parse_group(&ahead)?; + if group.content.is_empty() { + input.advance_to(&ahead); + return Ok(Visibility::Inherited); + } + } + + if input.peek(Token![pub]) { + Self::parse_pub(input) + } else { + Ok(Visibility::Inherited) + } + } + } + + impl Visibility { + fn parse_pub(input: ParseStream) -> Result<Self> { + let pub_token = input.parse::<Token![pub]>()?; + + if input.peek(token::Paren) { + let ahead = input.fork(); + + let content; + let paren_token = parenthesized!(content in ahead); + if content.peek(Token![crate]) + || content.peek(Token![self]) + || content.peek(Token![super]) + { + let path = content.call(Ident::parse_any)?; + + // Ensure there are no additional tokens within `content`. + // Without explicitly checking, we may misinterpret a tuple + // field as a restricted visibility, causing a parse error. + // e.g. `pub (crate::A, crate::B)` (Issue #720). + if content.is_empty() { + input.advance_to(&ahead); + return Ok(Visibility::Restricted(VisRestricted { + pub_token, + paren_token, + in_token: None, + path: Box::new(Path::from(path)), + })); + } + } else if content.peek(Token![in]) { + let in_token: Token![in] = content.parse()?; + let path = content.call(Path::parse_mod_style)?; + + input.advance_to(&ahead); + return Ok(Visibility::Restricted(VisRestricted { + pub_token, + paren_token, + in_token: Some(in_token), + path: Box::new(path), + })); + } + } + + Ok(Visibility::Public(pub_token)) + } + + #[cfg(feature = "full")] + pub(crate) fn is_some(&self) -> bool { + match self { + Visibility::Inherited => false, + _ => true, + } + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::TokenStream; + use quote::ToTokens; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Visibility { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + Visibility::Public(pub_token) => pub_token.to_tokens(tokens), + Visibility::Restricted(vis_restricted) => vis_restricted.to_tokens(tokens), + Visibility::Inherited => {} + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for VisRestricted { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.pub_token.to_tokens(tokens); + self.paren_token.surround(tokens, |tokens| { + // TODO: If we have a path which is not "self" or "super" or + // "crate", automatically add the "in" token. + self.in_token.to_tokens(tokens); + self.path.to_tokens(tokens); + }); + } + } +} diff --git a/vendor/syn/src/sealed.rs b/vendor/syn/src/sealed.rs new file mode 100644 index 0000000..dc80474 --- /dev/null +++ b/vendor/syn/src/sealed.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "parsing")] +pub(crate) mod lookahead { + pub trait Sealed: Copy {} +} diff --git a/vendor/syn/src/span.rs b/vendor/syn/src/span.rs new file mode 100644 index 0000000..eb27794 --- /dev/null +++ b/vendor/syn/src/span.rs @@ -0,0 +1,63 @@ +use proc_macro2::extra::DelimSpan; +use proc_macro2::{Delimiter, Group, Span, TokenStream}; + +#[doc(hidden)] +pub trait IntoSpans<S> { + fn into_spans(self) -> S; +} + +impl IntoSpans<Span> for Span { + fn into_spans(self) -> Span { + self + } +} + +impl IntoSpans<[Span; 1]> for Span { + fn into_spans(self) -> [Span; 1] { + [self] + } +} + +impl IntoSpans<[Span; 2]> for Span { + fn into_spans(self) -> [Span; 2] { + [self, self] + } +} + +impl IntoSpans<[Span; 3]> for Span { + fn into_spans(self) -> [Span; 3] { + [self, self, self] + } +} + +impl IntoSpans<[Span; 1]> for [Span; 1] { + fn into_spans(self) -> [Span; 1] { + self + } +} + +impl IntoSpans<[Span; 2]> for [Span; 2] { + fn into_spans(self) -> [Span; 2] { + self + } +} + +impl IntoSpans<[Span; 3]> for [Span; 3] { + fn into_spans(self) -> [Span; 3] { + self + } +} + +impl IntoSpans<DelimSpan> for Span { + fn into_spans(self) -> DelimSpan { + let mut group = Group::new(Delimiter::None, TokenStream::new()); + group.set_span(self); + group.delim_span() + } +} + +impl IntoSpans<DelimSpan> for DelimSpan { + fn into_spans(self) -> DelimSpan { + self + } +} diff --git a/vendor/syn/src/spanned.rs b/vendor/syn/src/spanned.rs new file mode 100644 index 0000000..98aa0aa --- /dev/null +++ b/vendor/syn/src/spanned.rs @@ -0,0 +1,118 @@ +//! A trait that can provide the `Span` of the complete contents of a syntax +//! tree node. +//! +//! <br> +//! +//! # Example +//! +//! Suppose in a procedural macro we have a [`Type`] that we want to assert +//! implements the [`Sync`] trait. Maybe this is the type of one of the fields +//! of a struct for which we are deriving a trait implementation, and we need to +//! be able to pass a reference to one of those fields across threads. +//! +//! [`Type`]: crate::Type +//! [`Sync`]: std::marker::Sync +//! +//! If the field type does *not* implement `Sync` as required, we want the +//! compiler to report an error pointing out exactly which type it was. +//! +//! The following macro code takes a variable `ty` of type `Type` and produces a +//! static assertion that `Sync` is implemented for that type. +//! +//! ``` +//! # extern crate proc_macro; +//! # +//! use proc_macro::TokenStream; +//! use proc_macro2::Span; +//! use quote::quote_spanned; +//! use syn::Type; +//! use syn::spanned::Spanned; +//! +//! # const IGNORE_TOKENS: &str = stringify! { +//! #[proc_macro_derive(MyMacro)] +//! # }; +//! pub fn my_macro(input: TokenStream) -> TokenStream { +//! # let ty = get_a_type(); +//! /* ... */ +//! +//! let assert_sync = quote_spanned! {ty.span()=> +//! struct _AssertSync where #ty: Sync; +//! }; +//! +//! /* ... */ +//! # input +//! } +//! # +//! # fn get_a_type() -> Type { +//! # unimplemented!() +//! # } +//! ``` +//! +//! By inserting this `assert_sync` fragment into the output code generated by +//! our macro, the user's code will fail to compile if `ty` does not implement +//! `Sync`. The errors they would see look like the following. +//! +//! ```text +//! error[E0277]: the trait bound `*const i32: std::marker::Sync` is not satisfied +//! --> src/main.rs:10:21 +//! | +//! 10 | bad_field: *const i32, +//! | ^^^^^^^^^^ `*const i32` cannot be shared between threads safely +//! ``` +//! +//! In this technique, using the `Type`'s span for the error message makes the +//! error appear in the correct place underlining the right type. +//! +//! <br> +//! +//! # Limitations +//! +//! The underlying [`proc_macro::Span::join`] method is nightly-only. When +//! called from within a procedural macro in a nightly compiler, `Spanned` will +//! use `join` to produce the intended span. When not using a nightly compiler, +//! only the span of the *first token* of the syntax tree node is returned. +//! +//! In the common case of wanting to use the joined span as the span of a +//! `syn::Error`, consider instead using [`syn::Error::new_spanned`] which is +//! able to span the error correctly under the complete syntax tree node without +//! needing the unstable `join`. +//! +//! [`syn::Error::new_spanned`]: crate::Error::new_spanned + +use proc_macro2::Span; +use quote::spanned::Spanned as ToTokens; + +/// A trait that can provide the `Span` of the complete contents of a syntax +/// tree node. +/// +/// This trait is automatically implemented for all types that implement +/// [`ToTokens`] from the `quote` crate, as well as for `Span` itself. +/// +/// [`ToTokens`]: quote::ToTokens +/// +/// See the [module documentation] for an example. +/// +/// [module documentation]: self +pub trait Spanned: private::Sealed { + /// Returns a `Span` covering the complete contents of this syntax tree + /// node, or [`Span::call_site()`] if this node is empty. + /// + /// [`Span::call_site()`]: proc_macro2::Span::call_site + fn span(&self) -> Span; +} + +impl<T: ?Sized + ToTokens> Spanned for T { + fn span(&self) -> Span { + self.__span() + } +} + +mod private { + use super::*; + + pub trait Sealed {} + impl<T: ?Sized + ToTokens> Sealed for T {} + + #[cfg(any(feature = "full", feature = "derive"))] + impl Sealed for crate::QSelf {} +} diff --git a/vendor/syn/src/stmt.rs b/vendor/syn/src/stmt.rs new file mode 100644 index 0000000..b6d0664 --- /dev/null +++ b/vendor/syn/src/stmt.rs @@ -0,0 +1,452 @@ +use super::*; + +ast_struct! { + /// A braced block containing Rust statements. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct Block { + pub brace_token: token::Brace, + /// Statements in a block + pub stmts: Vec<Stmt>, + } +} + +ast_enum! { + /// A statement, usually ending in a semicolon. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub enum Stmt { + /// A local (let) binding. + Local(Local), + + /// An item definition. + Item(Item), + + /// Expression, with or without trailing semicolon. + Expr(Expr, Option<Token![;]>), + + /// A macro invocation in statement position. + /// + /// Syntactically it's ambiguous which other kind of statement this + /// macro would expand to. It can be any of local variable (`let`), + /// item, or expression. + Macro(StmtMacro), + } +} + +ast_struct! { + /// A local `let` binding: `let x: u64 = s.parse()?`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct Local { + pub attrs: Vec<Attribute>, + pub let_token: Token![let], + pub pat: Pat, + pub init: Option<LocalInit>, + pub semi_token: Token![;], + } +} + +ast_struct! { + /// The expression assigned in a local `let` binding, including optional + /// diverging `else` block. + /// + /// `LocalInit` represents `= s.parse()?` in `let x: u64 = s.parse()?` and + /// `= r else { return }` in `let Ok(x) = r else { return }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct LocalInit { + pub eq_token: Token![=], + pub expr: Box<Expr>, + pub diverge: Option<(Token![else], Box<Expr>)>, + } +} + +ast_struct! { + /// A macro invocation in statement position. + /// + /// Syntactically it's ambiguous which other kind of statement this macro + /// would expand to. It can be any of local variable (`let`), item, or + /// expression. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + pub struct StmtMacro { + pub attrs: Vec<Attribute>, + pub mac: Macro, + pub semi_token: Option<Token![;]>, + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream, Result}; + use proc_macro2::TokenStream; + + struct AllowNoSemi(bool); + + impl Block { + /// Parse the body of a block as zero or more statements, possibly + /// including one trailing expression. + /// + /// # Example + /// + /// ``` + /// use syn::{braced, token, Attribute, Block, Ident, Result, Stmt, Token}; + /// use syn::parse::{Parse, ParseStream}; + /// + /// // Parse a function with no generics or parameter list. + /// // + /// // fn playground { + /// // let mut x = 1; + /// // x += 1; + /// // println!("{}", x); + /// // } + /// struct MiniFunction { + /// attrs: Vec<Attribute>, + /// fn_token: Token![fn], + /// name: Ident, + /// brace_token: token::Brace, + /// stmts: Vec<Stmt>, + /// } + /// + /// impl Parse for MiniFunction { + /// fn parse(input: ParseStream) -> Result<Self> { + /// let outer_attrs = input.call(Attribute::parse_outer)?; + /// let fn_token: Token![fn] = input.parse()?; + /// let name: Ident = input.parse()?; + /// + /// let content; + /// let brace_token = braced!(content in input); + /// let inner_attrs = content.call(Attribute::parse_inner)?; + /// let stmts = content.call(Block::parse_within)?; + /// + /// Ok(MiniFunction { + /// attrs: { + /// let mut attrs = outer_attrs; + /// attrs.extend(inner_attrs); + /// attrs + /// }, + /// fn_token, + /// name, + /// brace_token, + /// stmts, + /// }) + /// } + /// } + /// ``` + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn parse_within(input: ParseStream) -> Result<Vec<Stmt>> { + let mut stmts = Vec::new(); + loop { + while let semi @ Some(_) = input.parse()? { + stmts.push(Stmt::Expr(Expr::Verbatim(TokenStream::new()), semi)); + } + if input.is_empty() { + break; + } + let stmt = parse_stmt(input, AllowNoSemi(true))?; + let requires_semicolon = match &stmt { + Stmt::Expr(stmt, None) => expr::requires_terminator(stmt), + Stmt::Macro(stmt) => { + stmt.semi_token.is_none() && !stmt.mac.delimiter.is_brace() + } + Stmt::Local(_) | Stmt::Item(_) | Stmt::Expr(_, Some(_)) => false, + }; + stmts.push(stmt); + if input.is_empty() { + break; + } else if requires_semicolon { + return Err(input.error("unexpected token, expected `;`")); + } + } + Ok(stmts) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Block { + fn parse(input: ParseStream) -> Result<Self> { + let content; + Ok(Block { + brace_token: braced!(content in input), + stmts: content.call(Block::parse_within)?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Stmt { + fn parse(input: ParseStream) -> Result<Self> { + let allow_nosemi = AllowNoSemi(false); + parse_stmt(input, allow_nosemi) + } + } + + fn parse_stmt(input: ParseStream, allow_nosemi: AllowNoSemi) -> Result<Stmt> { + let begin = input.fork(); + let attrs = input.call(Attribute::parse_outer)?; + + // brace-style macros; paren and bracket macros get parsed as + // expression statements. + let ahead = input.fork(); + let mut is_item_macro = false; + if let Ok(path) = ahead.call(Path::parse_mod_style) { + if ahead.peek(Token![!]) { + if ahead.peek2(Ident) || ahead.peek2(Token![try]) { + is_item_macro = true; + } else if ahead.peek2(token::Brace) + && !(ahead.peek3(Token![.]) || ahead.peek3(Token![?])) + { + input.advance_to(&ahead); + return stmt_mac(input, attrs, path).map(Stmt::Macro); + } + } + } + + if input.peek(Token![let]) && !input.peek(token::Group) { + stmt_local(input, attrs).map(Stmt::Local) + } else if input.peek(Token![pub]) + || input.peek(Token![crate]) && !input.peek2(Token![::]) + || input.peek(Token![extern]) + || input.peek(Token![use]) + || input.peek(Token![static]) + && (input.peek2(Token![mut]) + || input.peek2(Ident) + && !(input.peek2(Token![async]) + && (input.peek3(Token![move]) || input.peek3(Token![|])))) + || input.peek(Token![const]) + && !(input.peek2(token::Brace) + || input.peek2(Token![static]) + || input.peek2(Token![async]) + && !(input.peek3(Token![unsafe]) + || input.peek3(Token![extern]) + || input.peek3(Token![fn])) + || input.peek2(Token![move]) + || input.peek2(Token![|])) + || input.peek(Token![unsafe]) && !input.peek2(token::Brace) + || input.peek(Token![async]) + && (input.peek2(Token![unsafe]) + || input.peek2(Token![extern]) + || input.peek2(Token![fn])) + || input.peek(Token![fn]) + || input.peek(Token![mod]) + || input.peek(Token![type]) + || input.peek(Token![struct]) + || input.peek(Token![enum]) + || input.peek(Token![union]) && input.peek2(Ident) + || input.peek(Token![auto]) && input.peek2(Token![trait]) + || input.peek(Token![trait]) + || input.peek(Token![default]) + && (input.peek2(Token![unsafe]) || input.peek2(Token![impl])) + || input.peek(Token![impl]) + || input.peek(Token![macro]) + || is_item_macro + { + let item = item::parsing::parse_rest_of_item(begin, attrs, input)?; + Ok(Stmt::Item(item)) + } else { + stmt_expr(input, allow_nosemi, attrs) + } + } + + fn stmt_mac(input: ParseStream, attrs: Vec<Attribute>, path: Path) -> Result<StmtMacro> { + let bang_token: Token![!] = input.parse()?; + let (delimiter, tokens) = mac::parse_delimiter(input)?; + let semi_token: Option<Token![;]> = input.parse()?; + + Ok(StmtMacro { + attrs, + mac: Macro { + path, + bang_token, + delimiter, + tokens, + }, + semi_token, + }) + } + + fn stmt_local(input: ParseStream, attrs: Vec<Attribute>) -> Result<Local> { + let let_token: Token![let] = input.parse()?; + + let mut pat = Pat::parse_single(input)?; + if input.peek(Token![:]) { + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + pat = Pat::Type(PatType { + attrs: Vec::new(), + pat: Box::new(pat), + colon_token, + ty: Box::new(ty), + }); + } + + let init = if let Some(eq_token) = input.parse()? { + let eq_token: Token![=] = eq_token; + let expr: Expr = input.parse()?; + + let diverge = if let Some(else_token) = input.parse()? { + let else_token: Token![else] = else_token; + let diverge = ExprBlock { + attrs: Vec::new(), + label: None, + block: input.parse()?, + }; + Some((else_token, Box::new(Expr::Block(diverge)))) + } else { + None + }; + + Some(LocalInit { + eq_token, + expr: Box::new(expr), + diverge, + }) + } else { + None + }; + + let semi_token: Token![;] = input.parse()?; + + Ok(Local { + attrs, + let_token, + pat, + init, + semi_token, + }) + } + + fn stmt_expr( + input: ParseStream, + allow_nosemi: AllowNoSemi, + mut attrs: Vec<Attribute>, + ) -> Result<Stmt> { + let mut e = expr::parsing::expr_early(input)?; + + let mut attr_target = &mut e; + loop { + attr_target = match attr_target { + Expr::Assign(e) => &mut e.left, + Expr::Binary(e) => &mut e.left, + Expr::Cast(e) => &mut e.expr, + Expr::Array(_) + | Expr::Async(_) + | Expr::Await(_) + | Expr::Block(_) + | Expr::Break(_) + | Expr::Call(_) + | Expr::Closure(_) + | Expr::Const(_) + | Expr::Continue(_) + | Expr::Field(_) + | Expr::ForLoop(_) + | Expr::Group(_) + | Expr::If(_) + | Expr::Index(_) + | Expr::Infer(_) + | Expr::Let(_) + | Expr::Lit(_) + | Expr::Loop(_) + | Expr::Macro(_) + | Expr::Match(_) + | Expr::MethodCall(_) + | Expr::Paren(_) + | Expr::Path(_) + | Expr::Range(_) + | Expr::Reference(_) + | Expr::Repeat(_) + | Expr::Return(_) + | Expr::Struct(_) + | Expr::Try(_) + | Expr::TryBlock(_) + | Expr::Tuple(_) + | Expr::Unary(_) + | Expr::Unsafe(_) + | Expr::While(_) + | Expr::Yield(_) + | Expr::Verbatim(_) => break, + }; + } + attrs.extend(attr_target.replace_attrs(Vec::new())); + attr_target.replace_attrs(attrs); + + let semi_token: Option<Token![;]> = input.parse()?; + + match e { + Expr::Macro(ExprMacro { attrs, mac }) + if semi_token.is_some() || mac.delimiter.is_brace() => + { + return Ok(Stmt::Macro(StmtMacro { + attrs, + mac, + semi_token, + })); + } + _ => {} + } + + if semi_token.is_some() { + Ok(Stmt::Expr(e, semi_token)) + } else if allow_nosemi.0 || !expr::requires_terminator(&e) { + Ok(Stmt::Expr(e, None)) + } else { + Err(input.error("expected semicolon")) + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Block { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.brace_token.surround(tokens, |tokens| { + tokens.append_all(&self.stmts); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Stmt { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + Stmt::Local(local) => local.to_tokens(tokens), + Stmt::Item(item) => item.to_tokens(tokens), + Stmt::Expr(expr, semi) => { + expr.to_tokens(tokens); + semi.to_tokens(tokens); + } + Stmt::Macro(mac) => mac.to_tokens(tokens), + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Local { + fn to_tokens(&self, tokens: &mut TokenStream) { + expr::printing::outer_attrs_to_tokens(&self.attrs, tokens); + self.let_token.to_tokens(tokens); + self.pat.to_tokens(tokens); + if let Some(init) = &self.init { + init.eq_token.to_tokens(tokens); + init.expr.to_tokens(tokens); + if let Some((else_token, diverge)) = &init.diverge { + else_token.to_tokens(tokens); + diverge.to_tokens(tokens); + } + } + self.semi_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for StmtMacro { + fn to_tokens(&self, tokens: &mut TokenStream) { + expr::printing::outer_attrs_to_tokens(&self.attrs, tokens); + self.mac.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + } + } +} diff --git a/vendor/syn/src/thread.rs b/vendor/syn/src/thread.rs new file mode 100644 index 0000000..b33d248 --- /dev/null +++ b/vendor/syn/src/thread.rs @@ -0,0 +1,60 @@ +use std::fmt::{self, Debug}; +use std::thread::{self, ThreadId}; + +/// ThreadBound is a Sync-maker and Send-maker that allows accessing a value +/// of type T only from the original thread on which the ThreadBound was +/// constructed. +pub(crate) struct ThreadBound<T> { + value: T, + thread_id: ThreadId, +} + +unsafe impl<T> Sync for ThreadBound<T> {} + +// Send bound requires Copy, as otherwise Drop could run in the wrong place. +// +// Today Copy and Drop are mutually exclusive so `T: Copy` implies `T: !Drop`. +// This impl needs to be revisited if that restriction is relaxed in the future. +unsafe impl<T: Copy> Send for ThreadBound<T> {} + +impl<T> ThreadBound<T> { + pub(crate) fn new(value: T) -> Self { + ThreadBound { + value, + thread_id: thread::current().id(), + } + } + + pub(crate) fn get(&self) -> Option<&T> { + if thread::current().id() == self.thread_id { + Some(&self.value) + } else { + None + } + } +} + +impl<T: Debug> Debug for ThreadBound<T> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self.get() { + Some(value) => Debug::fmt(value, formatter), + None => formatter.write_str("unknown"), + } + } +} + +// Copy the bytes of T, even if the currently running thread is the "wrong" +// thread. This is fine as long as the original thread is not simultaneously +// mutating this value via interior mutability, which would be a data race. +// +// Currently `T: Copy` is sufficient to guarantee that T contains no interior +// mutability, because _all_ interior mutability in Rust is built on +// std::cell::UnsafeCell, which has no Copy impl. This impl needs to be +// revisited if that restriction is relaxed in the future. +impl<T: Copy> Copy for ThreadBound<T> {} + +impl<T: Copy> Clone for ThreadBound<T> { + fn clone(&self) -> Self { + *self + } +} diff --git a/vendor/syn/src/token.rs b/vendor/syn/src/token.rs new file mode 100644 index 0000000..05d8f56 --- /dev/null +++ b/vendor/syn/src/token.rs @@ -0,0 +1,1138 @@ +//! Tokens representing Rust punctuation, keywords, and delimiters. +//! +//! The type names in this module can be difficult to keep straight, so we +//! prefer to use the [`Token!`] macro instead. This is a type-macro that +//! expands to the token type of the given token. +//! +//! [`Token!`]: crate::Token +//! +//! # Example +//! +//! The [`ItemStatic`] syntax tree node is defined like this. +//! +//! [`ItemStatic`]: crate::ItemStatic +//! +//! ``` +//! # use syn::{Attribute, Expr, Ident, Token, Type, Visibility}; +//! # +//! pub struct ItemStatic { +//! pub attrs: Vec<Attribute>, +//! pub vis: Visibility, +//! pub static_token: Token![static], +//! pub mutability: Option<Token![mut]>, +//! pub ident: Ident, +//! pub colon_token: Token![:], +//! pub ty: Box<Type>, +//! pub eq_token: Token![=], +//! pub expr: Box<Expr>, +//! pub semi_token: Token![;], +//! } +//! ``` +//! +//! # Parsing +//! +//! Keywords and punctuation can be parsed through the [`ParseStream::parse`] +//! method. Delimiter tokens are parsed using the [`parenthesized!`], +//! [`bracketed!`] and [`braced!`] macros. +//! +//! [`ParseStream::parse`]: crate::parse::ParseBuffer::parse() +//! [`parenthesized!`]: crate::parenthesized! +//! [`bracketed!`]: crate::bracketed! +//! [`braced!`]: crate::braced! +//! +//! ``` +//! use syn::{Attribute, Result}; +//! use syn::parse::{Parse, ParseStream}; +//! # +//! # enum ItemStatic {} +//! +//! // Parse the ItemStatic struct shown above. +//! impl Parse for ItemStatic { +//! fn parse(input: ParseStream) -> Result<Self> { +//! # use syn::ItemStatic; +//! # fn parse(input: ParseStream) -> Result<ItemStatic> { +//! Ok(ItemStatic { +//! attrs: input.call(Attribute::parse_outer)?, +//! vis: input.parse()?, +//! static_token: input.parse()?, +//! mutability: input.parse()?, +//! ident: input.parse()?, +//! colon_token: input.parse()?, +//! ty: input.parse()?, +//! eq_token: input.parse()?, +//! expr: input.parse()?, +//! semi_token: input.parse()?, +//! }) +//! # } +//! # unimplemented!() +//! } +//! } +//! ``` +//! +//! # Other operations +//! +//! Every keyword and punctuation token supports the following operations. +//! +//! - [Peeking] — `input.peek(Token![...])` +//! +//! - [Parsing] — `input.parse::<Token![...]>()?` +//! +//! - [Printing] — `quote!( ... #the_token ... )` +//! +//! - Construction from a [`Span`] — `let the_token = Token![...](sp)` +//! +//! - Field access to its span — `let sp = the_token.span` +//! +//! [Peeking]: crate::parse::ParseBuffer::peek() +//! [Parsing]: crate::parse::ParseBuffer::parse() +//! [Printing]: https://docs.rs/quote/1.0/quote/trait.ToTokens.html +//! [`Span`]: https://docs.rs/proc-macro2/1.0/proc_macro2/struct.Span.html + +#[cfg(feature = "parsing")] +pub(crate) use self::private::CustomToken; +use self::private::WithSpan; +#[cfg(feature = "parsing")] +use crate::buffer::Cursor; +#[cfg(feature = "parsing")] +use crate::error::Result; +#[cfg(feature = "parsing")] +use crate::lifetime::Lifetime; +#[cfg(feature = "parsing")] +use crate::lit::{Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; +#[cfg(feature = "parsing")] +use crate::lookahead; +#[cfg(feature = "parsing")] +use crate::parse::{Parse, ParseStream}; +use crate::span::IntoSpans; +use proc_macro2::extra::DelimSpan; +use proc_macro2::Span; +#[cfg(feature = "printing")] +use proc_macro2::TokenStream; +#[cfg(any(feature = "parsing", feature = "printing"))] +use proc_macro2::{Delimiter, Ident}; +#[cfg(feature = "parsing")] +use proc_macro2::{Literal, Punct, TokenTree}; +#[cfg(feature = "printing")] +use quote::{ToTokens, TokenStreamExt}; +#[cfg(feature = "extra-traits")] +use std::cmp; +#[cfg(feature = "extra-traits")] +use std::fmt::{self, Debug}; +#[cfg(feature = "extra-traits")] +use std::hash::{Hash, Hasher}; +use std::ops::{Deref, DerefMut}; + +/// Marker trait for types that represent single tokens. +/// +/// This trait is sealed and cannot be implemented for types outside of Syn. +#[cfg(feature = "parsing")] +pub trait Token: private::Sealed { + // Not public API. + #[doc(hidden)] + fn peek(cursor: Cursor) -> bool; + + // Not public API. + #[doc(hidden)] + fn display() -> &'static str; +} + +pub(crate) mod private { + #[cfg(feature = "parsing")] + use crate::buffer::Cursor; + use proc_macro2::Span; + + #[cfg(feature = "parsing")] + pub trait Sealed {} + + /// Support writing `token.span` rather than `token.spans[0]` on tokens that + /// hold a single span. + #[repr(transparent)] + #[allow(unknown_lints, repr_transparent_external_private_fields)] // False positive: https://github.com/rust-lang/rust/issues/78586#issuecomment-1722680482 + pub struct WithSpan { + pub span: Span, + } + + // Not public API. + #[doc(hidden)] + #[cfg(feature = "parsing")] + pub trait CustomToken { + fn peek(cursor: Cursor) -> bool; + fn display() -> &'static str; + } +} + +#[cfg(feature = "parsing")] +impl private::Sealed for Ident {} + +#[cfg(feature = "parsing")] +fn peek_impl(cursor: Cursor, peek: fn(ParseStream) -> bool) -> bool { + use crate::parse::Unexpected; + use std::cell::Cell; + use std::rc::Rc; + + let scope = Span::call_site(); + let unexpected = Rc::new(Cell::new(Unexpected::None)); + let buffer = crate::parse::new_parse_buffer(scope, cursor, unexpected); + peek(&buffer) +} + +macro_rules! impl_token { + ($display:literal $name:ty) => { + #[cfg(feature = "parsing")] + impl Token for $name { + fn peek(cursor: Cursor) -> bool { + fn peek(input: ParseStream) -> bool { + <$name as Parse>::parse(input).is_ok() + } + peek_impl(cursor, peek) + } + + fn display() -> &'static str { + $display + } + } + + #[cfg(feature = "parsing")] + impl private::Sealed for $name {} + }; +} + +impl_token!("lifetime" Lifetime); +impl_token!("literal" Lit); +impl_token!("string literal" LitStr); +impl_token!("byte string literal" LitByteStr); +impl_token!("byte literal" LitByte); +impl_token!("character literal" LitChar); +impl_token!("integer literal" LitInt); +impl_token!("floating point literal" LitFloat); +impl_token!("boolean literal" LitBool); +impl_token!("group token" proc_macro2::Group); + +macro_rules! impl_low_level_token { + ($display:literal $ty:ident $get:ident) => { + #[cfg(feature = "parsing")] + impl Token for $ty { + fn peek(cursor: Cursor) -> bool { + cursor.$get().is_some() + } + + fn display() -> &'static str { + $display + } + } + + #[cfg(feature = "parsing")] + impl private::Sealed for $ty {} + }; +} + +impl_low_level_token!("punctuation token" Punct punct); +impl_low_level_token!("literal" Literal literal); +impl_low_level_token!("token" TokenTree token_tree); + +#[cfg(feature = "parsing")] +impl<T: CustomToken> private::Sealed for T {} + +#[cfg(feature = "parsing")] +impl<T: CustomToken> Token for T { + fn peek(cursor: Cursor) -> bool { + <Self as CustomToken>::peek(cursor) + } + + fn display() -> &'static str { + <Self as CustomToken>::display() + } +} + +macro_rules! define_keywords { + ($($token:literal pub struct $name:ident)*) => { + $( + #[doc = concat!('`', $token, '`')] + /// + /// Don't try to remember the name of this type — use the + /// [`Token!`] macro instead. + /// + /// [`Token!`]: crate::token + pub struct $name { + pub span: Span, + } + + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn $name<S: IntoSpans<Span>>(span: S) -> $name { + $name { + span: span.into_spans(), + } + } + + impl std::default::Default for $name { + fn default() -> Self { + $name { + span: Span::call_site(), + } + } + } + + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl Copy for $name {} + + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl Clone for $name { + fn clone(&self) -> Self { + *self + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(stringify!($name)) + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl cmp::Eq for $name {} + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl PartialEq for $name { + fn eq(&self, _other: &$name) -> bool { + true + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Hash for $name { + fn hash<H: Hasher>(&self, _state: &mut H) {} + } + + #[cfg(feature = "printing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for $name { + fn to_tokens(&self, tokens: &mut TokenStream) { + printing::keyword($token, self.span, tokens); + } + } + + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for $name { + fn parse(input: ParseStream) -> Result<Self> { + Ok($name { + span: parsing::keyword(input, $token)?, + }) + } + } + + #[cfg(feature = "parsing")] + impl Token for $name { + fn peek(cursor: Cursor) -> bool { + parsing::peek_keyword(cursor, $token) + } + + fn display() -> &'static str { + concat!("`", $token, "`") + } + } + + #[cfg(feature = "parsing")] + impl private::Sealed for $name {} + )* + }; +} + +macro_rules! impl_deref_if_len_is_1 { + ($name:ident/1) => { + impl Deref for $name { + type Target = WithSpan; + + fn deref(&self) -> &Self::Target { + unsafe { &*(self as *const Self).cast::<WithSpan>() } + } + } + + impl DerefMut for $name { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *(self as *mut Self).cast::<WithSpan>() } + } + } + }; + + ($name:ident/$len:literal) => {}; +} + +macro_rules! define_punctuation_structs { + ($($token:literal pub struct $name:ident/$len:tt #[doc = $usage:literal])*) => { + $( + #[cfg_attr(not(doc), repr(transparent))] + #[allow(unknown_lints, repr_transparent_external_private_fields)] // False positive: https://github.com/rust-lang/rust/issues/78586#issuecomment-1722680482 + #[doc = concat!('`', $token, '`')] + /// + /// Usage: + #[doc = concat!($usage, '.')] + /// + /// Don't try to remember the name of this type — use the + /// [`Token!`] macro instead. + /// + /// [`Token!`]: crate::token + pub struct $name { + pub spans: [Span; $len], + } + + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn $name<S: IntoSpans<[Span; $len]>>(spans: S) -> $name { + $name { + spans: spans.into_spans(), + } + } + + impl std::default::Default for $name { + fn default() -> Self { + $name { + spans: [Span::call_site(); $len], + } + } + } + + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl Copy for $name {} + + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl Clone for $name { + fn clone(&self) -> Self { + *self + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(stringify!($name)) + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl cmp::Eq for $name {} + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl PartialEq for $name { + fn eq(&self, _other: &$name) -> bool { + true + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Hash for $name { + fn hash<H: Hasher>(&self, _state: &mut H) {} + } + + impl_deref_if_len_is_1!($name/$len); + )* + }; +} + +macro_rules! define_punctuation { + ($($token:literal pub struct $name:ident/$len:tt #[doc = $usage:literal])*) => { + $( + define_punctuation_structs! { + $token pub struct $name/$len #[doc = $usage] + } + + #[cfg(feature = "printing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for $name { + fn to_tokens(&self, tokens: &mut TokenStream) { + printing::punct($token, &self.spans, tokens); + } + } + + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for $name { + fn parse(input: ParseStream) -> Result<Self> { + Ok($name { + spans: parsing::punct(input, $token)?, + }) + } + } + + #[cfg(feature = "parsing")] + impl Token for $name { + fn peek(cursor: Cursor) -> bool { + parsing::peek_punct(cursor, $token) + } + + fn display() -> &'static str { + concat!("`", $token, "`") + } + } + + #[cfg(feature = "parsing")] + impl private::Sealed for $name {} + )* + }; +} + +macro_rules! define_delimiters { + ($($delim:ident pub struct $name:ident #[$doc:meta])*) => { + $( + #[$doc] + pub struct $name { + pub span: DelimSpan, + } + + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn $name<S: IntoSpans<DelimSpan>>(span: S) -> $name { + $name { + span: span.into_spans(), + } + } + + impl std::default::Default for $name { + fn default() -> Self { + $name(Span::call_site()) + } + } + + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl Copy for $name {} + + #[cfg(feature = "clone-impls")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] + impl Clone for $name { + fn clone(&self) -> Self { + *self + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(stringify!($name)) + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl cmp::Eq for $name {} + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl PartialEq for $name { + fn eq(&self, _other: &$name) -> bool { + true + } + } + + #[cfg(feature = "extra-traits")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Hash for $name { + fn hash<H: Hasher>(&self, _state: &mut H) {} + } + + impl $name { + #[cfg(feature = "printing")] + pub fn surround<F>(&self, tokens: &mut TokenStream, f: F) + where + F: FnOnce(&mut TokenStream), + { + let mut inner = TokenStream::new(); + f(&mut inner); + printing::delim(Delimiter::$delim, self.span.join(), tokens, inner); + } + } + + #[cfg(feature = "parsing")] + impl private::Sealed for $name {} + )* + }; +} + +define_punctuation_structs! { + "_" pub struct Underscore/1 /// wildcard patterns, inferred types, unnamed items in constants, extern crates, use declarations, and destructuring assignment +} + +#[cfg(feature = "printing")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] +impl ToTokens for Underscore { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append(Ident::new("_", self.span)); + } +} + +#[cfg(feature = "parsing")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] +impl Parse for Underscore { + fn parse(input: ParseStream) -> Result<Self> { + input.step(|cursor| { + if let Some((ident, rest)) = cursor.ident() { + if ident == "_" { + return Ok((Underscore(ident.span()), rest)); + } + } + if let Some((punct, rest)) = cursor.punct() { + if punct.as_char() == '_' { + return Ok((Underscore(punct.span()), rest)); + } + } + Err(cursor.error("expected `_`")) + }) + } +} + +#[cfg(feature = "parsing")] +impl Token for Underscore { + fn peek(cursor: Cursor) -> bool { + if let Some((ident, _rest)) = cursor.ident() { + return ident == "_"; + } + if let Some((punct, _rest)) = cursor.punct() { + return punct.as_char() == '_'; + } + false + } + + fn display() -> &'static str { + "`_`" + } +} + +#[cfg(feature = "parsing")] +impl private::Sealed for Underscore {} + +/// None-delimited group +pub struct Group { + pub span: Span, +} + +#[doc(hidden)] +#[allow(non_snake_case)] +pub fn Group<S: IntoSpans<Span>>(span: S) -> Group { + Group { + span: span.into_spans(), + } +} + +impl std::default::Default for Group { + fn default() -> Self { + Group { + span: Span::call_site(), + } + } +} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Copy for Group {} + +#[cfg(feature = "clone-impls")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] +impl Clone for Group { + fn clone(&self) -> Self { + *self + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for Group { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("Group") + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl cmp::Eq for Group {} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl PartialEq for Group { + fn eq(&self, _other: &Group) -> bool { + true + } +} + +#[cfg(feature = "extra-traits")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Hash for Group { + fn hash<H: Hasher>(&self, _state: &mut H) {} +} + +impl Group { + #[cfg(feature = "printing")] + pub fn surround<F>(&self, tokens: &mut TokenStream, f: F) + where + F: FnOnce(&mut TokenStream), + { + let mut inner = TokenStream::new(); + f(&mut inner); + printing::delim(Delimiter::None, self.span, tokens, inner); + } +} + +#[cfg(feature = "parsing")] +impl private::Sealed for Group {} + +#[cfg(feature = "parsing")] +impl Token for Paren { + fn peek(cursor: Cursor) -> bool { + lookahead::is_delimiter(cursor, Delimiter::Parenthesis) + } + + fn display() -> &'static str { + "parentheses" + } +} + +#[cfg(feature = "parsing")] +impl Token for Brace { + fn peek(cursor: Cursor) -> bool { + lookahead::is_delimiter(cursor, Delimiter::Brace) + } + + fn display() -> &'static str { + "curly braces" + } +} + +#[cfg(feature = "parsing")] +impl Token for Bracket { + fn peek(cursor: Cursor) -> bool { + lookahead::is_delimiter(cursor, Delimiter::Bracket) + } + + fn display() -> &'static str { + "square brackets" + } +} + +#[cfg(feature = "parsing")] +impl Token for Group { + fn peek(cursor: Cursor) -> bool { + lookahead::is_delimiter(cursor, Delimiter::None) + } + + fn display() -> &'static str { + "invisible group" + } +} + +define_keywords! { + "abstract" pub struct Abstract + "as" pub struct As + "async" pub struct Async + "auto" pub struct Auto + "await" pub struct Await + "become" pub struct Become + "box" pub struct Box + "break" pub struct Break + "const" pub struct Const + "continue" pub struct Continue + "crate" pub struct Crate + "default" pub struct Default + "do" pub struct Do + "dyn" pub struct Dyn + "else" pub struct Else + "enum" pub struct Enum + "extern" pub struct Extern + "final" pub struct Final + "fn" pub struct Fn + "for" pub struct For + "if" pub struct If + "impl" pub struct Impl + "in" pub struct In + "let" pub struct Let + "loop" pub struct Loop + "macro" pub struct Macro + "match" pub struct Match + "mod" pub struct Mod + "move" pub struct Move + "mut" pub struct Mut + "override" pub struct Override + "priv" pub struct Priv + "pub" pub struct Pub + "ref" pub struct Ref + "return" pub struct Return + "Self" pub struct SelfType + "self" pub struct SelfValue + "static" pub struct Static + "struct" pub struct Struct + "super" pub struct Super + "trait" pub struct Trait + "try" pub struct Try + "type" pub struct Type + "typeof" pub struct Typeof + "union" pub struct Union + "unsafe" pub struct Unsafe + "unsized" pub struct Unsized + "use" pub struct Use + "virtual" pub struct Virtual + "where" pub struct Where + "while" pub struct While + "yield" pub struct Yield +} + +define_punctuation! { + "&" pub struct And/1 /// bitwise and logical AND, borrow, references, reference patterns + "&&" pub struct AndAnd/2 /// lazy AND, borrow, references, reference patterns + "&=" pub struct AndEq/2 /// bitwise AND assignment + "@" pub struct At/1 /// subpattern binding + "^" pub struct Caret/1 /// bitwise and logical XOR + "^=" pub struct CaretEq/2 /// bitwise XOR assignment + ":" pub struct Colon/1 /// various separators + "," pub struct Comma/1 /// various separators + "$" pub struct Dollar/1 /// macros + "." pub struct Dot/1 /// field access, tuple index + ".." pub struct DotDot/2 /// range, struct expressions, patterns, range patterns + "..." pub struct DotDotDot/3 /// variadic functions, range patterns + "..=" pub struct DotDotEq/3 /// inclusive range, range patterns + "=" pub struct Eq/1 /// assignment, attributes, various type definitions + "==" pub struct EqEq/2 /// equal + "=>" pub struct FatArrow/2 /// match arms, macros + ">=" pub struct Ge/2 /// greater than or equal to, generics + ">" pub struct Gt/1 /// greater than, generics, paths + "<-" pub struct LArrow/2 /// unused + "<=" pub struct Le/2 /// less than or equal to + "<" pub struct Lt/1 /// less than, generics, paths + "-" pub struct Minus/1 /// subtraction, negation + "-=" pub struct MinusEq/2 /// subtraction assignment + "!=" pub struct Ne/2 /// not equal + "!" pub struct Not/1 /// bitwise and logical NOT, macro calls, inner attributes, never type, negative impls + "|" pub struct Or/1 /// bitwise and logical OR, closures, patterns in match, if let, and while let + "|=" pub struct OrEq/2 /// bitwise OR assignment + "||" pub struct OrOr/2 /// lazy OR, closures + "::" pub struct PathSep/2 /// path separator + "%" pub struct Percent/1 /// remainder + "%=" pub struct PercentEq/2 /// remainder assignment + "+" pub struct Plus/1 /// addition, trait bounds, macro Kleene matcher + "+=" pub struct PlusEq/2 /// addition assignment + "#" pub struct Pound/1 /// attributes + "?" pub struct Question/1 /// question mark operator, questionably sized, macro Kleene matcher + "->" pub struct RArrow/2 /// function return type, closure return type, function pointer type + ";" pub struct Semi/1 /// terminator for various items and statements, array types + "<<" pub struct Shl/2 /// shift left, nested generics + "<<=" pub struct ShlEq/3 /// shift left assignment + ">>" pub struct Shr/2 /// shift right, nested generics + ">>=" pub struct ShrEq/3 /// shift right assignment, nested generics + "/" pub struct Slash/1 /// division + "/=" pub struct SlashEq/2 /// division assignment + "*" pub struct Star/1 /// multiplication, dereference, raw pointers, macro Kleene matcher, use wildcards + "*=" pub struct StarEq/2 /// multiplication assignment + "~" pub struct Tilde/1 /// unused since before Rust 1.0 +} + +define_delimiters! { + Brace pub struct Brace /// `{`…`}` + Bracket pub struct Bracket /// `[`…`]` + Parenthesis pub struct Paren /// `(`…`)` +} + +/// A type-macro that expands to the name of the Rust type representation of a +/// given token. +/// +/// As a type, `Token!` is commonly used in the type of struct fields, the type +/// of a `let` statement, or in turbofish for a `parse` function. +/// +/// ``` +/// use syn::{Ident, Token}; +/// use syn::parse::{Parse, ParseStream, Result}; +/// +/// // `struct Foo;` +/// pub struct UnitStruct { +/// struct_token: Token![struct], +/// ident: Ident, +/// semi_token: Token![;], +/// } +/// +/// impl Parse for UnitStruct { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let struct_token: Token![struct] = input.parse()?; +/// let ident: Ident = input.parse()?; +/// let semi_token = input.parse::<Token![;]>()?; +/// Ok(UnitStruct { struct_token, ident, semi_token }) +/// } +/// } +/// ``` +/// +/// As an expression, `Token!` is used for peeking tokens or instantiating +/// tokens from a span. +/// +/// ``` +/// # use syn::{Ident, Token}; +/// # use syn::parse::{Parse, ParseStream, Result}; +/// # +/// # struct UnitStruct { +/// # struct_token: Token![struct], +/// # ident: Ident, +/// # semi_token: Token![;], +/// # } +/// # +/// # impl Parse for UnitStruct { +/// # fn parse(input: ParseStream) -> Result<Self> { +/// # unimplemented!() +/// # } +/// # } +/// # +/// fn make_unit_struct(name: Ident) -> UnitStruct { +/// let span = name.span(); +/// UnitStruct { +/// struct_token: Token![struct](span), +/// ident: name, +/// semi_token: Token![;](span), +/// } +/// } +/// +/// # fn parse(input: ParseStream) -> Result<()> { +/// if input.peek(Token![struct]) { +/// let unit_struct: UnitStruct = input.parse()?; +/// /* ... */ +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// See the [token module] documentation for details and examples. +/// +/// [token module]: crate::token +#[macro_export] +macro_rules! Token { + [abstract] => { $crate::token::Abstract }; + [as] => { $crate::token::As }; + [async] => { $crate::token::Async }; + [auto] => { $crate::token::Auto }; + [await] => { $crate::token::Await }; + [become] => { $crate::token::Become }; + [box] => { $crate::token::Box }; + [break] => { $crate::token::Break }; + [const] => { $crate::token::Const }; + [continue] => { $crate::token::Continue }; + [crate] => { $crate::token::Crate }; + [default] => { $crate::token::Default }; + [do] => { $crate::token::Do }; + [dyn] => { $crate::token::Dyn }; + [else] => { $crate::token::Else }; + [enum] => { $crate::token::Enum }; + [extern] => { $crate::token::Extern }; + [final] => { $crate::token::Final }; + [fn] => { $crate::token::Fn }; + [for] => { $crate::token::For }; + [if] => { $crate::token::If }; + [impl] => { $crate::token::Impl }; + [in] => { $crate::token::In }; + [let] => { $crate::token::Let }; + [loop] => { $crate::token::Loop }; + [macro] => { $crate::token::Macro }; + [match] => { $crate::token::Match }; + [mod] => { $crate::token::Mod }; + [move] => { $crate::token::Move }; + [mut] => { $crate::token::Mut }; + [override] => { $crate::token::Override }; + [priv] => { $crate::token::Priv }; + [pub] => { $crate::token::Pub }; + [ref] => { $crate::token::Ref }; + [return] => { $crate::token::Return }; + [Self] => { $crate::token::SelfType }; + [self] => { $crate::token::SelfValue }; + [static] => { $crate::token::Static }; + [struct] => { $crate::token::Struct }; + [super] => { $crate::token::Super }; + [trait] => { $crate::token::Trait }; + [try] => { $crate::token::Try }; + [type] => { $crate::token::Type }; + [typeof] => { $crate::token::Typeof }; + [union] => { $crate::token::Union }; + [unsafe] => { $crate::token::Unsafe }; + [unsized] => { $crate::token::Unsized }; + [use] => { $crate::token::Use }; + [virtual] => { $crate::token::Virtual }; + [where] => { $crate::token::Where }; + [while] => { $crate::token::While }; + [yield] => { $crate::token::Yield }; + [&] => { $crate::token::And }; + [&&] => { $crate::token::AndAnd }; + [&=] => { $crate::token::AndEq }; + [@] => { $crate::token::At }; + [^] => { $crate::token::Caret }; + [^=] => { $crate::token::CaretEq }; + [:] => { $crate::token::Colon }; + [,] => { $crate::token::Comma }; + [$] => { $crate::token::Dollar }; + [.] => { $crate::token::Dot }; + [..] => { $crate::token::DotDot }; + [...] => { $crate::token::DotDotDot }; + [..=] => { $crate::token::DotDotEq }; + [=] => { $crate::token::Eq }; + [==] => { $crate::token::EqEq }; + [=>] => { $crate::token::FatArrow }; + [>=] => { $crate::token::Ge }; + [>] => { $crate::token::Gt }; + [<-] => { $crate::token::LArrow }; + [<=] => { $crate::token::Le }; + [<] => { $crate::token::Lt }; + [-] => { $crate::token::Minus }; + [-=] => { $crate::token::MinusEq }; + [!=] => { $crate::token::Ne }; + [!] => { $crate::token::Not }; + [|] => { $crate::token::Or }; + [|=] => { $crate::token::OrEq }; + [||] => { $crate::token::OrOr }; + [::] => { $crate::token::PathSep }; + [%] => { $crate::token::Percent }; + [%=] => { $crate::token::PercentEq }; + [+] => { $crate::token::Plus }; + [+=] => { $crate::token::PlusEq }; + [#] => { $crate::token::Pound }; + [?] => { $crate::token::Question }; + [->] => { $crate::token::RArrow }; + [;] => { $crate::token::Semi }; + [<<] => { $crate::token::Shl }; + [<<=] => { $crate::token::ShlEq }; + [>>] => { $crate::token::Shr }; + [>>=] => { $crate::token::ShrEq }; + [/] => { $crate::token::Slash }; + [/=] => { $crate::token::SlashEq }; + [*] => { $crate::token::Star }; + [*=] => { $crate::token::StarEq }; + [~] => { $crate::token::Tilde }; + [_] => { $crate::token::Underscore }; +} + +// Not public API. +#[doc(hidden)] +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use crate::buffer::Cursor; + use crate::error::{Error, Result}; + use crate::parse::ParseStream; + use proc_macro2::{Spacing, Span}; + + pub(crate) fn keyword(input: ParseStream, token: &str) -> Result<Span> { + input.step(|cursor| { + if let Some((ident, rest)) = cursor.ident() { + if ident == token { + return Ok((ident.span(), rest)); + } + } + Err(cursor.error(format!("expected `{}`", token))) + }) + } + + pub(crate) fn peek_keyword(cursor: Cursor, token: &str) -> bool { + if let Some((ident, _rest)) = cursor.ident() { + ident == token + } else { + false + } + } + + #[doc(hidden)] + pub fn punct<const N: usize>(input: ParseStream, token: &str) -> Result<[Span; N]> { + let mut spans = [input.span(); N]; + punct_helper(input, token, &mut spans)?; + Ok(spans) + } + + fn punct_helper(input: ParseStream, token: &str, spans: &mut [Span]) -> Result<()> { + input.step(|cursor| { + let mut cursor = *cursor; + assert_eq!(token.len(), spans.len()); + + for (i, ch) in token.chars().enumerate() { + match cursor.punct() { + Some((punct, rest)) => { + spans[i] = punct.span(); + if punct.as_char() != ch { + break; + } else if i == token.len() - 1 { + return Ok(((), rest)); + } else if punct.spacing() != Spacing::Joint { + break; + } + cursor = rest; + } + None => break, + } + } + + Err(Error::new(spans[0], format!("expected `{}`", token))) + }) + } + + #[doc(hidden)] + pub fn peek_punct(mut cursor: Cursor, token: &str) -> bool { + for (i, ch) in token.chars().enumerate() { + match cursor.punct() { + Some((punct, rest)) => { + if punct.as_char() != ch { + break; + } else if i == token.len() - 1 { + return true; + } else if punct.spacing() != Spacing::Joint { + break; + } + cursor = rest; + } + None => break, + } + } + false + } +} + +// Not public API. +#[doc(hidden)] +#[cfg(feature = "printing")] +pub(crate) mod printing { + use proc_macro2::{Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream}; + use quote::TokenStreamExt; + + #[doc(hidden)] + pub fn punct(s: &str, spans: &[Span], tokens: &mut TokenStream) { + assert_eq!(s.len(), spans.len()); + + let mut chars = s.chars(); + let mut spans = spans.iter(); + let ch = chars.next_back().unwrap(); + let span = spans.next_back().unwrap(); + for (ch, span) in chars.zip(spans) { + let mut op = Punct::new(ch, Spacing::Joint); + op.set_span(*span); + tokens.append(op); + } + + let mut op = Punct::new(ch, Spacing::Alone); + op.set_span(*span); + tokens.append(op); + } + + pub(crate) fn keyword(s: &str, span: Span, tokens: &mut TokenStream) { + tokens.append(Ident::new(s, span)); + } + + pub(crate) fn delim( + delim: Delimiter, + span: Span, + tokens: &mut TokenStream, + inner: TokenStream, + ) { + let mut g = Group::new(delim, inner); + g.set_span(span); + tokens.append(g); + } +} diff --git a/vendor/syn/src/tt.rs b/vendor/syn/src/tt.rs new file mode 100644 index 0000000..d8dc9aa --- /dev/null +++ b/vendor/syn/src/tt.rs @@ -0,0 +1,107 @@ +use proc_macro2::{Delimiter, TokenStream, TokenTree}; +use std::hash::{Hash, Hasher}; + +pub(crate) struct TokenTreeHelper<'a>(pub &'a TokenTree); + +impl<'a> PartialEq for TokenTreeHelper<'a> { + fn eq(&self, other: &Self) -> bool { + use proc_macro2::Spacing; + + match (self.0, other.0) { + (TokenTree::Group(g1), TokenTree::Group(g2)) => { + match (g1.delimiter(), g2.delimiter()) { + (Delimiter::Parenthesis, Delimiter::Parenthesis) + | (Delimiter::Brace, Delimiter::Brace) + | (Delimiter::Bracket, Delimiter::Bracket) + | (Delimiter::None, Delimiter::None) => {} + _ => return false, + } + + let s1 = g1.stream().into_iter(); + let mut s2 = g2.stream().into_iter(); + + for item1 in s1 { + let item2 = match s2.next() { + Some(item) => item, + None => return false, + }; + if TokenTreeHelper(&item1) != TokenTreeHelper(&item2) { + return false; + } + } + s2.next().is_none() + } + (TokenTree::Punct(o1), TokenTree::Punct(o2)) => { + o1.as_char() == o2.as_char() + && match (o1.spacing(), o2.spacing()) { + (Spacing::Alone, Spacing::Alone) | (Spacing::Joint, Spacing::Joint) => true, + _ => false, + } + } + (TokenTree::Literal(l1), TokenTree::Literal(l2)) => l1.to_string() == l2.to_string(), + (TokenTree::Ident(s1), TokenTree::Ident(s2)) => s1 == s2, + _ => false, + } + } +} + +impl<'a> Hash for TokenTreeHelper<'a> { + fn hash<H: Hasher>(&self, h: &mut H) { + use proc_macro2::Spacing; + + match self.0 { + TokenTree::Group(g) => { + 0u8.hash(h); + match g.delimiter() { + Delimiter::Parenthesis => 0u8.hash(h), + Delimiter::Brace => 1u8.hash(h), + Delimiter::Bracket => 2u8.hash(h), + Delimiter::None => 3u8.hash(h), + } + + for item in g.stream() { + TokenTreeHelper(&item).hash(h); + } + 0xffu8.hash(h); // terminator w/ a variant we don't normally hash + } + TokenTree::Punct(op) => { + 1u8.hash(h); + op.as_char().hash(h); + match op.spacing() { + Spacing::Alone => 0u8.hash(h), + Spacing::Joint => 1u8.hash(h), + } + } + TokenTree::Literal(lit) => (2u8, lit.to_string()).hash(h), + TokenTree::Ident(word) => (3u8, word).hash(h), + } + } +} + +pub(crate) struct TokenStreamHelper<'a>(pub &'a TokenStream); + +impl<'a> PartialEq for TokenStreamHelper<'a> { + fn eq(&self, other: &Self) -> bool { + let left = self.0.clone().into_iter().collect::<Vec<_>>(); + let right = other.0.clone().into_iter().collect::<Vec<_>>(); + if left.len() != right.len() { + return false; + } + for (a, b) in left.into_iter().zip(right) { + if TokenTreeHelper(&a) != TokenTreeHelper(&b) { + return false; + } + } + true + } +} + +impl<'a> Hash for TokenStreamHelper<'a> { + fn hash<H: Hasher>(&self, state: &mut H) { + let tts = self.0.clone().into_iter().collect::<Vec<_>>(); + tts.len().hash(state); + for tt in tts { + TokenTreeHelper(&tt).hash(state); + } + } +} diff --git a/vendor/syn/src/ty.rs b/vendor/syn/src/ty.rs new file mode 100644 index 0000000..3213e7f --- /dev/null +++ b/vendor/syn/src/ty.rs @@ -0,0 +1,1189 @@ +use super::*; +use crate::punctuated::Punctuated; +use proc_macro2::TokenStream; + +ast_enum_of_structs! { + /// The possible types that a Rust value could have. + /// + /// # Syntax tree enum + /// + /// This type is a [syntax tree enum]. + /// + /// [syntax tree enum]: Expr#syntax-tree-enums + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + #[non_exhaustive] + pub enum Type { + /// A fixed size array type: `[T; n]`. + Array(TypeArray), + + /// A bare function type: `fn(usize) -> bool`. + BareFn(TypeBareFn), + + /// A type contained within invisible delimiters. + Group(TypeGroup), + + /// An `impl Bound1 + Bound2 + Bound3` type where `Bound` is a trait or + /// a lifetime. + ImplTrait(TypeImplTrait), + + /// Indication that a type should be inferred by the compiler: `_`. + Infer(TypeInfer), + + /// A macro in the type position. + Macro(TypeMacro), + + /// The never type: `!`. + Never(TypeNever), + + /// A parenthesized type equivalent to the inner type. + Paren(TypeParen), + + /// A path like `std::slice::Iter`, optionally qualified with a + /// self-type as in `<Vec<T> as SomeTrait>::Associated`. + Path(TypePath), + + /// A raw pointer type: `*const T` or `*mut T`. + Ptr(TypePtr), + + /// A reference type: `&'a T` or `&'a mut T`. + Reference(TypeReference), + + /// A dynamically sized slice type: `[T]`. + Slice(TypeSlice), + + /// A trait object type `dyn Bound1 + Bound2 + Bound3` where `Bound` is a + /// trait or a lifetime. + TraitObject(TypeTraitObject), + + /// A tuple type: `(A, B, C, String)`. + Tuple(TypeTuple), + + /// Tokens in type position not interpreted by Syn. + Verbatim(TokenStream), + + // For testing exhaustiveness in downstream code, use the following idiom: + // + // match ty { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // + // Type::Array(ty) => {...} + // Type::BareFn(ty) => {...} + // ... + // Type::Verbatim(ty) => {...} + // + // _ => { /* some sane fallback */ } + // } + // + // This way we fail your tests but don't break your library when adding + // a variant. You will be notified by a test failure when a variant is + // added, so that you can add code to handle it, but your library will + // continue to compile and work for downstream users in the interim. + } +} + +ast_struct! { + /// A fixed size array type: `[T; n]`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeArray { + pub bracket_token: token::Bracket, + pub elem: Box<Type>, + pub semi_token: Token![;], + pub len: Expr, + } +} + +ast_struct! { + /// A bare function type: `fn(usize) -> bool`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeBareFn { + pub lifetimes: Option<BoundLifetimes>, + pub unsafety: Option<Token![unsafe]>, + pub abi: Option<Abi>, + pub fn_token: Token![fn], + pub paren_token: token::Paren, + pub inputs: Punctuated<BareFnArg, Token![,]>, + pub variadic: Option<BareVariadic>, + pub output: ReturnType, + } +} + +ast_struct! { + /// A type contained within invisible delimiters. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeGroup { + pub group_token: token::Group, + pub elem: Box<Type>, + } +} + +ast_struct! { + /// An `impl Bound1 + Bound2 + Bound3` type where `Bound` is a trait or + /// a lifetime. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeImplTrait { + pub impl_token: Token![impl], + pub bounds: Punctuated<TypeParamBound, Token![+]>, + } +} + +ast_struct! { + /// Indication that a type should be inferred by the compiler: `_`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeInfer { + pub underscore_token: Token![_], + } +} + +ast_struct! { + /// A macro in the type position. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeMacro { + pub mac: Macro, + } +} + +ast_struct! { + /// The never type: `!`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeNever { + pub bang_token: Token![!], + } +} + +ast_struct! { + /// A parenthesized type equivalent to the inner type. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeParen { + pub paren_token: token::Paren, + pub elem: Box<Type>, + } +} + +ast_struct! { + /// A path like `std::slice::Iter`, optionally qualified with a + /// self-type as in `<Vec<T> as SomeTrait>::Associated`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypePath { + pub qself: Option<QSelf>, + pub path: Path, + } +} + +ast_struct! { + /// A raw pointer type: `*const T` or `*mut T`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypePtr { + pub star_token: Token![*], + pub const_token: Option<Token![const]>, + pub mutability: Option<Token![mut]>, + pub elem: Box<Type>, + } +} + +ast_struct! { + /// A reference type: `&'a T` or `&'a mut T`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeReference { + pub and_token: Token![&], + pub lifetime: Option<Lifetime>, + pub mutability: Option<Token![mut]>, + pub elem: Box<Type>, + } +} + +ast_struct! { + /// A dynamically sized slice type: `[T]`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeSlice { + pub bracket_token: token::Bracket, + pub elem: Box<Type>, + } +} + +ast_struct! { + /// A trait object type `dyn Bound1 + Bound2 + Bound3` where `Bound` is a + /// trait or a lifetime. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeTraitObject { + pub dyn_token: Option<Token![dyn]>, + pub bounds: Punctuated<TypeParamBound, Token![+]>, + } +} + +ast_struct! { + /// A tuple type: `(A, B, C, String)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct TypeTuple { + pub paren_token: token::Paren, + pub elems: Punctuated<Type, Token![,]>, + } +} + +ast_struct! { + /// The binary interface of a function: `extern "C"`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct Abi { + pub extern_token: Token![extern], + pub name: Option<LitStr>, + } +} + +ast_struct! { + /// An argument in a function type: the `usize` in `fn(usize) -> bool`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct BareFnArg { + pub attrs: Vec<Attribute>, + pub name: Option<(Ident, Token![:])>, + pub ty: Type, + } +} + +ast_struct! { + /// The variadic argument of a function pointer like `fn(usize, ...)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct BareVariadic { + pub attrs: Vec<Attribute>, + pub name: Option<(Ident, Token![:])>, + pub dots: Token![...], + pub comma: Option<Token![,]>, + } +} + +ast_enum! { + /// Return type of a function signature. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub enum ReturnType { + /// Return type is not specified. + /// + /// Functions default to `()` and closures default to type inference. + Default, + /// A particular type is returned. + Type(Token![->], Box<Type>), + } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + use super::*; + use crate::ext::IdentExt as _; + use crate::parse::{Parse, ParseStream, Result}; + use crate::path; + use proc_macro2::Span; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Type { + fn parse(input: ParseStream) -> Result<Self> { + let allow_plus = true; + let allow_group_generic = true; + ambig_ty(input, allow_plus, allow_group_generic) + } + } + + impl Type { + /// In some positions, types may not contain the `+` character, to + /// disambiguate them. For example in the expression `1 as T`, T may not + /// contain a `+` character. + /// + /// This parser does not allow a `+`, while the default parser does. + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn without_plus(input: ParseStream) -> Result<Self> { + let allow_plus = false; + let allow_group_generic = true; + ambig_ty(input, allow_plus, allow_group_generic) + } + } + + pub(crate) fn ambig_ty( + input: ParseStream, + allow_plus: bool, + allow_group_generic: bool, + ) -> Result<Type> { + let begin = input.fork(); + + if input.peek(token::Group) { + let mut group: TypeGroup = input.parse()?; + if input.peek(Token![::]) && input.peek3(Ident::peek_any) { + if let Type::Path(mut ty) = *group.elem { + Path::parse_rest(input, &mut ty.path, false)?; + return Ok(Type::Path(ty)); + } else { + return Ok(Type::Path(TypePath { + qself: Some(QSelf { + lt_token: Token![<](group.group_token.span), + position: 0, + as_token: None, + gt_token: Token![>](group.group_token.span), + ty: group.elem, + }), + path: Path::parse_helper(input, false)?, + })); + } + } else if input.peek(Token![<]) && allow_group_generic + || input.peek(Token![::]) && input.peek3(Token![<]) + { + if let Type::Path(mut ty) = *group.elem { + let arguments = &mut ty.path.segments.last_mut().unwrap().arguments; + if arguments.is_none() { + *arguments = PathArguments::AngleBracketed(input.parse()?); + Path::parse_rest(input, &mut ty.path, false)?; + return Ok(Type::Path(ty)); + } else { + group.elem = Box::new(Type::Path(ty)); + } + } + } + return Ok(Type::Group(group)); + } + + let mut lifetimes = None::<BoundLifetimes>; + let mut lookahead = input.lookahead1(); + if lookahead.peek(Token![for]) { + lifetimes = input.parse()?; + lookahead = input.lookahead1(); + if !lookahead.peek(Ident) + && !lookahead.peek(Token![fn]) + && !lookahead.peek(Token![unsafe]) + && !lookahead.peek(Token![extern]) + && !lookahead.peek(Token![super]) + && !lookahead.peek(Token![self]) + && !lookahead.peek(Token![Self]) + && !lookahead.peek(Token![crate]) + || input.peek(Token![dyn]) + { + return Err(lookahead.error()); + } + } + + if lookahead.peek(token::Paren) { + let content; + let paren_token = parenthesized!(content in input); + if content.is_empty() { + return Ok(Type::Tuple(TypeTuple { + paren_token, + elems: Punctuated::new(), + })); + } + if content.peek(Lifetime) { + return Ok(Type::Paren(TypeParen { + paren_token, + elem: Box::new(Type::TraitObject(content.parse()?)), + })); + } + if content.peek(Token![?]) { + return Ok(Type::TraitObject(TypeTraitObject { + dyn_token: None, + bounds: { + let mut bounds = Punctuated::new(); + bounds.push_value(TypeParamBound::Trait(TraitBound { + paren_token: Some(paren_token), + ..content.parse()? + })); + while let Some(plus) = input.parse()? { + bounds.push_punct(plus); + bounds.push_value(input.parse()?); + } + bounds + }, + })); + } + let mut first: Type = content.parse()?; + if content.peek(Token![,]) { + return Ok(Type::Tuple(TypeTuple { + paren_token, + elems: { + let mut elems = Punctuated::new(); + elems.push_value(first); + elems.push_punct(content.parse()?); + while !content.is_empty() { + elems.push_value(content.parse()?); + if content.is_empty() { + break; + } + elems.push_punct(content.parse()?); + } + elems + }, + })); + } + if allow_plus && input.peek(Token![+]) { + loop { + let first = match first { + Type::Path(TypePath { qself: None, path }) => { + TypeParamBound::Trait(TraitBound { + paren_token: Some(paren_token), + modifier: TraitBoundModifier::None, + lifetimes: None, + path, + }) + } + Type::TraitObject(TypeTraitObject { + dyn_token: None, + bounds, + }) => { + if bounds.len() > 1 || bounds.trailing_punct() { + first = Type::TraitObject(TypeTraitObject { + dyn_token: None, + bounds, + }); + break; + } + match bounds.into_iter().next().unwrap() { + TypeParamBound::Trait(trait_bound) => { + TypeParamBound::Trait(TraitBound { + paren_token: Some(paren_token), + ..trait_bound + }) + } + other @ (TypeParamBound::Lifetime(_) + | TypeParamBound::Verbatim(_)) => other, + } + } + _ => break, + }; + return Ok(Type::TraitObject(TypeTraitObject { + dyn_token: None, + bounds: { + let mut bounds = Punctuated::new(); + bounds.push_value(first); + while let Some(plus) = input.parse()? { + bounds.push_punct(plus); + bounds.push_value(input.parse()?); + } + bounds + }, + })); + } + } + Ok(Type::Paren(TypeParen { + paren_token, + elem: Box::new(first), + })) + } else if lookahead.peek(Token![fn]) + || lookahead.peek(Token![unsafe]) + || lookahead.peek(Token![extern]) + { + let mut bare_fn: TypeBareFn = input.parse()?; + bare_fn.lifetimes = lifetimes; + Ok(Type::BareFn(bare_fn)) + } else if lookahead.peek(Ident) + || input.peek(Token![super]) + || input.peek(Token![self]) + || input.peek(Token![Self]) + || input.peek(Token![crate]) + || lookahead.peek(Token![::]) + || lookahead.peek(Token![<]) + { + let ty: TypePath = input.parse()?; + if ty.qself.is_some() { + return Ok(Type::Path(ty)); + } + + if input.peek(Token![!]) && !input.peek(Token![!=]) && ty.path.is_mod_style() { + let bang_token: Token![!] = input.parse()?; + let (delimiter, tokens) = mac::parse_delimiter(input)?; + return Ok(Type::Macro(TypeMacro { + mac: Macro { + path: ty.path, + bang_token, + delimiter, + tokens, + }, + })); + } + + if lifetimes.is_some() || allow_plus && input.peek(Token![+]) { + let mut bounds = Punctuated::new(); + bounds.push_value(TypeParamBound::Trait(TraitBound { + paren_token: None, + modifier: TraitBoundModifier::None, + lifetimes, + path: ty.path, + })); + if allow_plus { + while input.peek(Token![+]) { + bounds.push_punct(input.parse()?); + if !(input.peek(Ident::peek_any) + || input.peek(Token![::]) + || input.peek(Token![?]) + || input.peek(Lifetime) + || input.peek(token::Paren)) + { + break; + } + bounds.push_value(input.parse()?); + } + } + return Ok(Type::TraitObject(TypeTraitObject { + dyn_token: None, + bounds, + })); + } + + Ok(Type::Path(ty)) + } else if lookahead.peek(Token![dyn]) { + let dyn_token: Token![dyn] = input.parse()?; + let dyn_span = dyn_token.span; + let star_token: Option<Token![*]> = input.parse()?; + let bounds = TypeTraitObject::parse_bounds(dyn_span, input, allow_plus)?; + return Ok(if star_token.is_some() { + Type::Verbatim(verbatim::between(&begin, input)) + } else { + Type::TraitObject(TypeTraitObject { + dyn_token: Some(dyn_token), + bounds, + }) + }); + } else if lookahead.peek(token::Bracket) { + let content; + let bracket_token = bracketed!(content in input); + let elem: Type = content.parse()?; + if content.peek(Token![;]) { + Ok(Type::Array(TypeArray { + bracket_token, + elem: Box::new(elem), + semi_token: content.parse()?, + len: content.parse()?, + })) + } else { + Ok(Type::Slice(TypeSlice { + bracket_token, + elem: Box::new(elem), + })) + } + } else if lookahead.peek(Token![*]) { + input.parse().map(Type::Ptr) + } else if lookahead.peek(Token![&]) { + input.parse().map(Type::Reference) + } else if lookahead.peek(Token![!]) && !input.peek(Token![=]) { + input.parse().map(Type::Never) + } else if lookahead.peek(Token![impl]) { + TypeImplTrait::parse(input, allow_plus).map(Type::ImplTrait) + } else if lookahead.peek(Token![_]) { + input.parse().map(Type::Infer) + } else if lookahead.peek(Lifetime) { + input.parse().map(Type::TraitObject) + } else { + Err(lookahead.error()) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeSlice { + fn parse(input: ParseStream) -> Result<Self> { + let content; + Ok(TypeSlice { + bracket_token: bracketed!(content in input), + elem: content.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeArray { + fn parse(input: ParseStream) -> Result<Self> { + let content; + Ok(TypeArray { + bracket_token: bracketed!(content in input), + elem: content.parse()?, + semi_token: content.parse()?, + len: content.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypePtr { + fn parse(input: ParseStream) -> Result<Self> { + let star_token: Token![*] = input.parse()?; + + let lookahead = input.lookahead1(); + let (const_token, mutability) = if lookahead.peek(Token![const]) { + (Some(input.parse()?), None) + } else if lookahead.peek(Token![mut]) { + (None, Some(input.parse()?)) + } else { + return Err(lookahead.error()); + }; + + Ok(TypePtr { + star_token, + const_token, + mutability, + elem: Box::new(input.call(Type::without_plus)?), + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeReference { + fn parse(input: ParseStream) -> Result<Self> { + Ok(TypeReference { + and_token: input.parse()?, + lifetime: input.parse()?, + mutability: input.parse()?, + // & binds tighter than +, so we don't allow + here. + elem: Box::new(input.call(Type::without_plus)?), + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeBareFn { + fn parse(input: ParseStream) -> Result<Self> { + let args; + let mut variadic = None; + + Ok(TypeBareFn { + lifetimes: input.parse()?, + unsafety: input.parse()?, + abi: input.parse()?, + fn_token: input.parse()?, + paren_token: parenthesized!(args in input), + inputs: { + let mut inputs = Punctuated::new(); + + while !args.is_empty() { + let attrs = args.call(Attribute::parse_outer)?; + + if inputs.empty_or_trailing() + && (args.peek(Token![...]) + || args.peek(Ident) + && args.peek2(Token![:]) + && args.peek3(Token![...])) + { + variadic = Some(parse_bare_variadic(&args, attrs)?); + break; + } + + let allow_self = inputs.is_empty(); + let arg = parse_bare_fn_arg(&args, allow_self)?; + inputs.push_value(BareFnArg { attrs, ..arg }); + if args.is_empty() { + break; + } + + let comma = args.parse()?; + inputs.push_punct(comma); + } + + inputs + }, + variadic, + output: input.call(ReturnType::without_plus)?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeNever { + fn parse(input: ParseStream) -> Result<Self> { + Ok(TypeNever { + bang_token: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeInfer { + fn parse(input: ParseStream) -> Result<Self> { + Ok(TypeInfer { + underscore_token: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeTuple { + fn parse(input: ParseStream) -> Result<Self> { + let content; + let paren_token = parenthesized!(content in input); + + if content.is_empty() { + return Ok(TypeTuple { + paren_token, + elems: Punctuated::new(), + }); + } + + let first: Type = content.parse()?; + Ok(TypeTuple { + paren_token, + elems: { + let mut elems = Punctuated::new(); + elems.push_value(first); + elems.push_punct(content.parse()?); + while !content.is_empty() { + elems.push_value(content.parse()?); + if content.is_empty() { + break; + } + elems.push_punct(content.parse()?); + } + elems + }, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeMacro { + fn parse(input: ParseStream) -> Result<Self> { + Ok(TypeMacro { + mac: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypePath { + fn parse(input: ParseStream) -> Result<Self> { + let expr_style = false; + let (qself, path) = path::parsing::qpath(input, expr_style)?; + Ok(TypePath { qself, path }) + } + } + + impl ReturnType { + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn without_plus(input: ParseStream) -> Result<Self> { + let allow_plus = false; + Self::parse(input, allow_plus) + } + + pub(crate) fn parse(input: ParseStream, allow_plus: bool) -> Result<Self> { + if input.peek(Token![->]) { + let arrow = input.parse()?; + let allow_group_generic = true; + let ty = ambig_ty(input, allow_plus, allow_group_generic)?; + Ok(ReturnType::Type(arrow, Box::new(ty))) + } else { + Ok(ReturnType::Default) + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for ReturnType { + fn parse(input: ParseStream) -> Result<Self> { + let allow_plus = true; + Self::parse(input, allow_plus) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeTraitObject { + fn parse(input: ParseStream) -> Result<Self> { + let allow_plus = true; + Self::parse(input, allow_plus) + } + } + + impl TypeTraitObject { + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn without_plus(input: ParseStream) -> Result<Self> { + let allow_plus = false; + Self::parse(input, allow_plus) + } + + // Only allow multiple trait references if allow_plus is true. + pub(crate) fn parse(input: ParseStream, allow_plus: bool) -> Result<Self> { + let dyn_token: Option<Token![dyn]> = input.parse()?; + let dyn_span = match &dyn_token { + Some(token) => token.span, + None => input.span(), + }; + let bounds = Self::parse_bounds(dyn_span, input, allow_plus)?; + Ok(TypeTraitObject { dyn_token, bounds }) + } + + fn parse_bounds( + dyn_span: Span, + input: ParseStream, + allow_plus: bool, + ) -> Result<Punctuated<TypeParamBound, Token![+]>> { + let bounds = TypeParamBound::parse_multiple(input, allow_plus)?; + let mut last_lifetime_span = None; + let mut at_least_one_trait = false; + for bound in &bounds { + match bound { + TypeParamBound::Trait(_) | TypeParamBound::Verbatim(_) => { + at_least_one_trait = true; + break; + } + TypeParamBound::Lifetime(lifetime) => { + last_lifetime_span = Some(lifetime.ident.span()); + } + } + } + // Just lifetimes like `'a + 'b` is not a TraitObject. + if !at_least_one_trait { + let msg = "at least one trait is required for an object type"; + return Err(error::new2(dyn_span, last_lifetime_span.unwrap(), msg)); + } + Ok(bounds) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeImplTrait { + fn parse(input: ParseStream) -> Result<Self> { + let allow_plus = true; + Self::parse(input, allow_plus) + } + } + + impl TypeImplTrait { + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn without_plus(input: ParseStream) -> Result<Self> { + let allow_plus = false; + Self::parse(input, allow_plus) + } + + pub(crate) fn parse(input: ParseStream, allow_plus: bool) -> Result<Self> { + let impl_token: Token![impl] = input.parse()?; + let bounds = TypeParamBound::parse_multiple(input, allow_plus)?; + let mut last_lifetime_span = None; + let mut at_least_one_trait = false; + for bound in &bounds { + match bound { + TypeParamBound::Trait(_) | TypeParamBound::Verbatim(_) => { + at_least_one_trait = true; + break; + } + TypeParamBound::Lifetime(lifetime) => { + last_lifetime_span = Some(lifetime.ident.span()); + } + } + } + if !at_least_one_trait { + let msg = "at least one trait must be specified"; + return Err(error::new2( + impl_token.span, + last_lifetime_span.unwrap(), + msg, + )); + } + Ok(TypeImplTrait { impl_token, bounds }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeGroup { + fn parse(input: ParseStream) -> Result<Self> { + let group = crate::group::parse_group(input)?; + Ok(TypeGroup { + group_token: group.token, + elem: group.content.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for TypeParen { + fn parse(input: ParseStream) -> Result<Self> { + let allow_plus = false; + Self::parse(input, allow_plus) + } + } + + impl TypeParen { + fn parse(input: ParseStream, allow_plus: bool) -> Result<Self> { + let content; + Ok(TypeParen { + paren_token: parenthesized!(content in input), + elem: Box::new({ + let allow_group_generic = true; + ambig_ty(&content, allow_plus, allow_group_generic)? + }), + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for BareFnArg { + fn parse(input: ParseStream) -> Result<Self> { + let allow_self = false; + parse_bare_fn_arg(input, allow_self) + } + } + + fn parse_bare_fn_arg(input: ParseStream, allow_self: bool) -> Result<BareFnArg> { + let attrs = input.call(Attribute::parse_outer)?; + + let begin = input.fork(); + + let has_mut_self = allow_self && input.peek(Token![mut]) && input.peek2(Token![self]); + if has_mut_self { + input.parse::<Token![mut]>()?; + } + + let mut has_self = false; + let mut name = if (input.peek(Ident) || input.peek(Token![_]) || { + has_self = allow_self && input.peek(Token![self]); + has_self + }) && input.peek2(Token![:]) + && !input.peek2(Token![::]) + { + let name = input.call(Ident::parse_any)?; + let colon: Token![:] = input.parse()?; + Some((name, colon)) + } else { + has_self = false; + None + }; + + let ty = if allow_self && !has_self && input.peek(Token![mut]) && input.peek2(Token![self]) + { + input.parse::<Token![mut]>()?; + input.parse::<Token![self]>()?; + None + } else if has_mut_self && name.is_none() { + input.parse::<Token![self]>()?; + None + } else { + Some(input.parse()?) + }; + + let ty = match ty { + Some(ty) if !has_mut_self => ty, + _ => { + name = None; + Type::Verbatim(verbatim::between(&begin, input)) + } + }; + + Ok(BareFnArg { attrs, name, ty }) + } + + fn parse_bare_variadic(input: ParseStream, attrs: Vec<Attribute>) -> Result<BareVariadic> { + Ok(BareVariadic { + attrs, + name: if input.peek(Ident) || input.peek(Token![_]) { + let name = input.call(Ident::parse_any)?; + let colon: Token![:] = input.parse()?; + Some((name, colon)) + } else { + None + }, + dots: input.parse()?, + comma: input.parse()?, + }) + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Abi { + fn parse(input: ParseStream) -> Result<Self> { + Ok(Abi { + extern_token: input.parse()?, + name: input.parse()?, + }) + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for Option<Abi> { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Token![extern]) { + input.parse().map(Some) + } else { + Ok(None) + } + } + } +} + +#[cfg(feature = "printing")] +mod printing { + use super::*; + use crate::attr::FilterAttrs; + use crate::print::TokensOrDefault; + use proc_macro2::TokenStream; + use quote::{ToTokens, TokenStreamExt}; + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeSlice { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.bracket_token.surround(tokens, |tokens| { + self.elem.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeArray { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.bracket_token.surround(tokens, |tokens| { + self.elem.to_tokens(tokens); + self.semi_token.to_tokens(tokens); + self.len.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypePtr { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.star_token.to_tokens(tokens); + match &self.mutability { + Some(tok) => tok.to_tokens(tokens), + None => { + TokensOrDefault(&self.const_token).to_tokens(tokens); + } + } + self.elem.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeReference { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.and_token.to_tokens(tokens); + self.lifetime.to_tokens(tokens); + self.mutability.to_tokens(tokens); + self.elem.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeBareFn { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.lifetimes.to_tokens(tokens); + self.unsafety.to_tokens(tokens); + self.abi.to_tokens(tokens); + self.fn_token.to_tokens(tokens); + self.paren_token.surround(tokens, |tokens| { + self.inputs.to_tokens(tokens); + if let Some(variadic) = &self.variadic { + if !self.inputs.empty_or_trailing() { + let span = variadic.dots.spans[0]; + Token![,](span).to_tokens(tokens); + } + variadic.to_tokens(tokens); + } + }); + self.output.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeNever { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.bang_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeTuple { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.paren_token.surround(tokens, |tokens| { + self.elems.to_tokens(tokens); + // If we only have one argument, we need a trailing comma to + // distinguish TypeTuple from TypeParen. + if self.elems.len() == 1 && !self.elems.trailing_punct() { + <Token![,]>::default().to_tokens(tokens); + } + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypePath { + fn to_tokens(&self, tokens: &mut TokenStream) { + path::printing::print_path(tokens, &self.qself, &self.path); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeTraitObject { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.dyn_token.to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeImplTrait { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.impl_token.to_tokens(tokens); + self.bounds.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeGroup { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.group_token.surround(tokens, |tokens| { + self.elem.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeParen { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.paren_token.surround(tokens, |tokens| { + self.elem.to_tokens(tokens); + }); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeInfer { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.underscore_token.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for TypeMacro { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.mac.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for ReturnType { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + ReturnType::Default => {} + ReturnType::Type(arrow, ty) => { + arrow.to_tokens(tokens); + ty.to_tokens(tokens); + } + } + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for BareFnArg { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + if let Some((name, colon)) = &self.name { + name.to_tokens(tokens); + colon.to_tokens(tokens); + } + self.ty.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for BareVariadic { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + if let Some((name, colon)) = &self.name { + name.to_tokens(tokens); + colon.to_tokens(tokens); + } + self.dots.to_tokens(tokens); + self.comma.to_tokens(tokens); + } + } + + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for Abi { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.extern_token.to_tokens(tokens); + self.name.to_tokens(tokens); + } + } +} diff --git a/vendor/syn/src/verbatim.rs b/vendor/syn/src/verbatim.rs new file mode 100644 index 0000000..54dc1cf --- /dev/null +++ b/vendor/syn/src/verbatim.rs @@ -0,0 +1,33 @@ +use crate::parse::ParseStream; +use proc_macro2::{Delimiter, TokenStream}; +use std::cmp::Ordering; +use std::iter; + +pub(crate) fn between<'a>(begin: ParseStream<'a>, end: ParseStream<'a>) -> TokenStream { + let end = end.cursor(); + let mut cursor = begin.cursor(); + assert!(crate::buffer::same_buffer(end, cursor)); + + let mut tokens = TokenStream::new(); + while cursor != end { + let (tt, next) = cursor.token_tree().unwrap(); + + if crate::buffer::cmp_assuming_same_buffer(end, next) == Ordering::Less { + // A syntax node can cross the boundary of a None-delimited group + // due to such groups being transparent to the parser in most cases. + // Any time this occurs the group is known to be semantically + // irrelevant. https://github.com/dtolnay/syn/issues/1235 + if let Some((inside, _span, after)) = cursor.group(Delimiter::None) { + assert!(next == after); + cursor = inside; + continue; + } else { + panic!("verbatim end must not be inside a delimited group"); + } + } + + tokens.extend(iter::once(tt)); + cursor = next; + } + tokens +} diff --git a/vendor/syn/src/whitespace.rs b/vendor/syn/src/whitespace.rs new file mode 100644 index 0000000..f8de1b9 --- /dev/null +++ b/vendor/syn/src/whitespace.rs @@ -0,0 +1,65 @@ +pub(crate) fn skip(mut s: &str) -> &str { + 'skip: while !s.is_empty() { + let byte = s.as_bytes()[0]; + if byte == b'/' { + if s.starts_with("//") + && (!s.starts_with("///") || s.starts_with("////")) + && !s.starts_with("//!") + { + if let Some(i) = s.find('\n') { + s = &s[i + 1..]; + continue; + } else { + return ""; + } + } else if s.starts_with("/**/") { + s = &s[4..]; + continue; + } else if s.starts_with("/*") + && (!s.starts_with("/**") || s.starts_with("/***")) + && !s.starts_with("/*!") + { + let mut depth = 0; + let bytes = s.as_bytes(); + let mut i = 0; + let upper = bytes.len() - 1; + while i < upper { + if bytes[i] == b'/' && bytes[i + 1] == b'*' { + depth += 1; + i += 1; // eat '*' + } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { + depth -= 1; + if depth == 0 { + s = &s[i + 2..]; + continue 'skip; + } + i += 1; // eat '/' + } + i += 1; + } + return s; + } + } + match byte { + b' ' | 0x09..=0x0d => { + s = &s[1..]; + continue; + } + b if b <= 0x7f => {} + _ => { + let ch = s.chars().next().unwrap(); + if is_whitespace(ch) { + s = &s[ch.len_utf8()..]; + continue; + } + } + } + return s; + } + s +} + +fn is_whitespace(ch: char) -> bool { + // Rust treats left-to-right mark and right-to-left mark as whitespace + ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' +} diff --git a/vendor/syn/tests/common/eq.rs b/vendor/syn/tests/common/eq.rs new file mode 100644 index 0000000..8ecbe46 --- /dev/null +++ b/vendor/syn/tests/common/eq.rs @@ -0,0 +1,879 @@ +#![allow(unused_macro_rules)] + +extern crate rustc_ast; +extern crate rustc_data_structures; +extern crate rustc_driver; +extern crate rustc_span; +extern crate thin_vec; + +use rustc_ast::ast::AngleBracketedArg; +use rustc_ast::ast::AngleBracketedArgs; +use rustc_ast::ast::AnonConst; +use rustc_ast::ast::Arm; +use rustc_ast::ast::AssocConstraint; +use rustc_ast::ast::AssocConstraintKind; +use rustc_ast::ast::AssocItemKind; +use rustc_ast::ast::AttrArgs; +use rustc_ast::ast::AttrArgsEq; +use rustc_ast::ast::AttrId; +use rustc_ast::ast::AttrItem; +use rustc_ast::ast::AttrKind; +use rustc_ast::ast::AttrStyle; +use rustc_ast::ast::Attribute; +use rustc_ast::ast::BareFnTy; +use rustc_ast::ast::BinOpKind; +use rustc_ast::ast::BindingAnnotation; +use rustc_ast::ast::Block; +use rustc_ast::ast::BlockCheckMode; +use rustc_ast::ast::BorrowKind; +use rustc_ast::ast::BoundConstness; +use rustc_ast::ast::BoundPolarity; +use rustc_ast::ast::ByRef; +use rustc_ast::ast::CaptureBy; +use rustc_ast::ast::Closure; +use rustc_ast::ast::ClosureBinder; +use rustc_ast::ast::Const; +use rustc_ast::ast::ConstItem; +use rustc_ast::ast::CoroutineKind; +use rustc_ast::ast::Crate; +use rustc_ast::ast::Defaultness; +use rustc_ast::ast::DelimArgs; +use rustc_ast::ast::EnumDef; +use rustc_ast::ast::Expr; +use rustc_ast::ast::ExprField; +use rustc_ast::ast::ExprKind; +use rustc_ast::ast::Extern; +use rustc_ast::ast::FieldDef; +use rustc_ast::ast::FloatTy; +use rustc_ast::ast::Fn; +use rustc_ast::ast::FnDecl; +use rustc_ast::ast::FnHeader; +use rustc_ast::ast::FnRetTy; +use rustc_ast::ast::FnSig; +use rustc_ast::ast::ForLoopKind; +use rustc_ast::ast::ForeignItemKind; +use rustc_ast::ast::ForeignMod; +use rustc_ast::ast::FormatAlignment; +use rustc_ast::ast::FormatArgPosition; +use rustc_ast::ast::FormatArgPositionKind; +use rustc_ast::ast::FormatArgs; +use rustc_ast::ast::FormatArgsPiece; +use rustc_ast::ast::FormatArgument; +use rustc_ast::ast::FormatArgumentKind; +use rustc_ast::ast::FormatArguments; +use rustc_ast::ast::FormatCount; +use rustc_ast::ast::FormatDebugHex; +use rustc_ast::ast::FormatOptions; +use rustc_ast::ast::FormatPlaceholder; +use rustc_ast::ast::FormatSign; +use rustc_ast::ast::FormatTrait; +use rustc_ast::ast::GenBlockKind; +use rustc_ast::ast::GenericArg; +use rustc_ast::ast::GenericArgs; +use rustc_ast::ast::GenericBound; +use rustc_ast::ast::GenericParam; +use rustc_ast::ast::GenericParamKind; +use rustc_ast::ast::Generics; +use rustc_ast::ast::Impl; +use rustc_ast::ast::ImplPolarity; +use rustc_ast::ast::Inline; +use rustc_ast::ast::InlineAsm; +use rustc_ast::ast::InlineAsmOperand; +use rustc_ast::ast::InlineAsmOptions; +use rustc_ast::ast::InlineAsmRegOrRegClass; +use rustc_ast::ast::InlineAsmSym; +use rustc_ast::ast::InlineAsmTemplatePiece; +use rustc_ast::ast::IntTy; +use rustc_ast::ast::IsAuto; +use rustc_ast::ast::Item; +use rustc_ast::ast::ItemKind; +use rustc_ast::ast::Label; +use rustc_ast::ast::Lifetime; +use rustc_ast::ast::LitFloatType; +use rustc_ast::ast::LitIntType; +use rustc_ast::ast::LitKind; +use rustc_ast::ast::Local; +use rustc_ast::ast::LocalKind; +use rustc_ast::ast::MacCall; +use rustc_ast::ast::MacCallStmt; +use rustc_ast::ast::MacStmtStyle; +use rustc_ast::ast::MacroDef; +use rustc_ast::ast::MetaItemLit; +use rustc_ast::ast::MethodCall; +use rustc_ast::ast::ModKind; +use rustc_ast::ast::ModSpans; +use rustc_ast::ast::Movability; +use rustc_ast::ast::MutTy; +use rustc_ast::ast::Mutability; +use rustc_ast::ast::NodeId; +use rustc_ast::ast::NormalAttr; +use rustc_ast::ast::Param; +use rustc_ast::ast::ParenthesizedArgs; +use rustc_ast::ast::Pat; +use rustc_ast::ast::PatField; +use rustc_ast::ast::PatFieldsRest; +use rustc_ast::ast::PatKind; +use rustc_ast::ast::Path; +use rustc_ast::ast::PathSegment; +use rustc_ast::ast::PolyTraitRef; +use rustc_ast::ast::QSelf; +use rustc_ast::ast::RangeEnd; +use rustc_ast::ast::RangeLimits; +use rustc_ast::ast::RangeSyntax; +use rustc_ast::ast::StaticItem; +use rustc_ast::ast::Stmt; +use rustc_ast::ast::StmtKind; +use rustc_ast::ast::StrLit; +use rustc_ast::ast::StrStyle; +use rustc_ast::ast::StructExpr; +use rustc_ast::ast::StructRest; +use rustc_ast::ast::Term; +use rustc_ast::ast::Trait; +use rustc_ast::ast::TraitBoundModifiers; +use rustc_ast::ast::TraitObjectSyntax; +use rustc_ast::ast::TraitRef; +use rustc_ast::ast::Ty; +use rustc_ast::ast::TyAlias; +use rustc_ast::ast::TyAliasWhereClause; +use rustc_ast::ast::TyKind; +use rustc_ast::ast::UintTy; +use rustc_ast::ast::UnOp; +use rustc_ast::ast::Unsafe; +use rustc_ast::ast::UnsafeSource; +use rustc_ast::ast::UseTree; +use rustc_ast::ast::UseTreeKind; +use rustc_ast::ast::Variant; +use rustc_ast::ast::VariantData; +use rustc_ast::ast::Visibility; +use rustc_ast::ast::VisibilityKind; +use rustc_ast::ast::WhereBoundPredicate; +use rustc_ast::ast::WhereClause; +use rustc_ast::ast::WhereEqPredicate; +use rustc_ast::ast::WherePredicate; +use rustc_ast::ast::WhereRegionPredicate; +use rustc_ast::ptr::P; +use rustc_ast::token::{self, CommentKind, Delimiter, Lit, Nonterminal, Token, TokenKind}; +use rustc_ast::tokenstream::{ + AttrTokenStream, AttrTokenTree, AttributesData, DelimSpacing, DelimSpan, LazyAttrTokenStream, + Spacing, TokenStream, TokenTree, +}; +use rustc_data_structures::sync::Lrc; +use rustc_span::source_map::Spanned; +use rustc_span::symbol::{sym, Ident}; +use rustc_span::{ErrorGuaranteed, Span, Symbol, SyntaxContext, DUMMY_SP}; +use std::collections::HashMap; +use std::hash::{BuildHasher, Hash}; +use thin_vec::ThinVec; + +pub trait SpanlessEq { + fn eq(&self, other: &Self) -> bool; +} + +impl<T: ?Sized + SpanlessEq> SpanlessEq for Box<T> { + fn eq(&self, other: &Self) -> bool { + SpanlessEq::eq(&**self, &**other) + } +} + +impl<T: ?Sized + SpanlessEq> SpanlessEq for P<T> { + fn eq(&self, other: &Self) -> bool { + SpanlessEq::eq(&**self, &**other) + } +} + +impl<T: ?Sized + SpanlessEq> SpanlessEq for Lrc<T> { + fn eq(&self, other: &Self) -> bool { + SpanlessEq::eq(&**self, &**other) + } +} + +impl<T: SpanlessEq> SpanlessEq for Option<T> { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (None, None) => true, + (Some(this), Some(other)) => SpanlessEq::eq(this, other), + _ => false, + } + } +} + +impl<T: SpanlessEq, E: SpanlessEq> SpanlessEq for Result<T, E> { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Ok(this), Ok(other)) => SpanlessEq::eq(this, other), + (Err(this), Err(other)) => SpanlessEq::eq(this, other), + _ => false, + } + } +} + +impl<T: SpanlessEq> SpanlessEq for [T] { + fn eq(&self, other: &Self) -> bool { + self.len() == other.len() && self.iter().zip(other).all(|(a, b)| SpanlessEq::eq(a, b)) + } +} + +impl<T: SpanlessEq> SpanlessEq for Vec<T> { + fn eq(&self, other: &Self) -> bool { + <[T] as SpanlessEq>::eq(self, other) + } +} + +impl<T: SpanlessEq> SpanlessEq for ThinVec<T> { + fn eq(&self, other: &Self) -> bool { + self.len() == other.len() + && self + .iter() + .zip(other.iter()) + .all(|(a, b)| SpanlessEq::eq(a, b)) + } +} + +impl<K: Eq + Hash, V: SpanlessEq, S: BuildHasher> SpanlessEq for HashMap<K, V, S> { + fn eq(&self, other: &Self) -> bool { + self.len() == other.len() + && self.iter().all(|(key, this_v)| { + other + .get(key) + .map_or(false, |other_v| SpanlessEq::eq(this_v, other_v)) + }) + } +} + +impl<T: SpanlessEq> SpanlessEq for Spanned<T> { + fn eq(&self, other: &Self) -> bool { + SpanlessEq::eq(&self.node, &other.node) + } +} + +impl<A: SpanlessEq, B: SpanlessEq> SpanlessEq for (A, B) { + fn eq(&self, other: &Self) -> bool { + SpanlessEq::eq(&self.0, &other.0) && SpanlessEq::eq(&self.1, &other.1) + } +} + +impl<A: SpanlessEq, B: SpanlessEq, C: SpanlessEq> SpanlessEq for (A, B, C) { + fn eq(&self, other: &Self) -> bool { + SpanlessEq::eq(&self.0, &other.0) + && SpanlessEq::eq(&self.1, &other.1) + && SpanlessEq::eq(&self.2, &other.2) + } +} + +macro_rules! spanless_eq_true { + ($name:ty) => { + impl SpanlessEq for $name { + fn eq(&self, _other: &Self) -> bool { + true + } + } + }; +} + +spanless_eq_true!(Span); +spanless_eq_true!(DelimSpan); +spanless_eq_true!(AttrId); +spanless_eq_true!(NodeId); +spanless_eq_true!(SyntaxContext); +spanless_eq_true!(Spacing); + +macro_rules! spanless_eq_partial_eq { + ($name:ty) => { + impl SpanlessEq for $name { + fn eq(&self, other: &Self) -> bool { + PartialEq::eq(self, other) + } + } + }; +} + +spanless_eq_partial_eq!(bool); +spanless_eq_partial_eq!(u8); +spanless_eq_partial_eq!(u16); +spanless_eq_partial_eq!(u32); +spanless_eq_partial_eq!(u128); +spanless_eq_partial_eq!(usize); +spanless_eq_partial_eq!(char); +spanless_eq_partial_eq!(String); +spanless_eq_partial_eq!(Symbol); +spanless_eq_partial_eq!(CommentKind); +spanless_eq_partial_eq!(Delimiter); +spanless_eq_partial_eq!(InlineAsmOptions); +spanless_eq_partial_eq!(token::LitKind); +spanless_eq_partial_eq!(ErrorGuaranteed); + +macro_rules! spanless_eq_struct { + { + $($name:ident)::+ $(<$param:ident>)? + $([$field:tt $this:ident $other:ident])* + $(![$ignore:tt])*; + } => { + impl $(<$param: SpanlessEq>)* SpanlessEq for $($name)::+ $(<$param>)* { + fn eq(&self, other: &Self) -> bool { + let $($name)::+ { $($field: $this,)* $($ignore: _,)* } = self; + let $($name)::+ { $($field: $other,)* $($ignore: _,)* } = other; + true $(&& SpanlessEq::eq($this, $other))* + } + } + }; + + { + $($name:ident)::+ $(<$param:ident>)? + $([$field:tt $this:ident $other:ident])* + $(![$ignore:tt])*; + !$next:tt + $($rest:tt)* + } => { + spanless_eq_struct! { + $($name)::+ $(<$param>)* + $([$field $this $other])* + $(![$ignore])* + ![$next]; + $($rest)* + } + }; + + { + $($name:ident)::+ $(<$param:ident>)? + $([$field:tt $this:ident $other:ident])* + $(![$ignore:tt])*; + $next:tt + $($rest:tt)* + } => { + spanless_eq_struct! { + $($name)::+ $(<$param>)* + $([$field $this $other])* + [$next this other] + $(![$ignore])*; + $($rest)* + } + }; +} + +macro_rules! spanless_eq_enum { + { + $($name:ident)::+; + $([$($variant:ident)::+; $([$field:tt $this:ident $other:ident])* $(![$ignore:tt])*])* + } => { + impl SpanlessEq for $($name)::+ { + fn eq(&self, other: &Self) -> bool { + match self { + $( + $($variant)::+ { .. } => {} + )* + } + #[allow(unreachable_patterns)] + match (self, other) { + $( + ( + $($variant)::+ { $($field: $this,)* $($ignore: _,)* }, + $($variant)::+ { $($field: $other,)* $($ignore: _,)* }, + ) => { + true $(&& SpanlessEq::eq($this, $other))* + } + )* + _ => false, + } + } + } + }; + + { + $($name:ident)::+; + $([$($variant:ident)::+; $($fields:tt)*])* + $next:ident [$([$($named:tt)*])* $(![$ignore:tt])*] (!$i:tt $($field:tt)*) + $($rest:tt)* + } => { + spanless_eq_enum! { + $($name)::+; + $([$($variant)::+; $($fields)*])* + $next [$([$($named)*])* $(![$ignore])* ![$i]] ($($field)*) + $($rest)* + } + }; + + { + $($name:ident)::+; + $([$($variant:ident)::+; $($fields:tt)*])* + $next:ident [$([$($named:tt)*])* $(![$ignore:tt])*] ($i:tt $($field:tt)*) + $($rest:tt)* + } => { + spanless_eq_enum! { + $($name)::+; + $([$($variant)::+; $($fields)*])* + $next [$([$($named)*])* [$i this other] $(![$ignore])*] ($($field)*) + $($rest)* + } + }; + + { + $($name:ident)::+; + $([$($variant:ident)::+; $($fields:tt)*])* + $next:ident [$($named:tt)*] () + $($rest:tt)* + } => { + spanless_eq_enum! { + $($name)::+; + $([$($variant)::+; $($fields)*])* + [$($name)::+::$next; $($named)*] + $($rest)* + } + }; + + { + $($name:ident)::+; + $([$($variant:ident)::+; $($fields:tt)*])* + $next:ident ($($field:tt)*) + $($rest:tt)* + } => { + spanless_eq_enum! { + $($name)::+; + $([$($variant)::+; $($fields)*])* + $next [] ($($field)*) + $($rest)* + } + }; + + { + $($name:ident)::+; + $([$($variant:ident)::+; $($fields:tt)*])* + $next:ident + $($rest:tt)* + } => { + spanless_eq_enum! { + $($name)::+; + $([$($variant)::+; $($fields)*])* + [$($name)::+::$next;] + $($rest)* + } + }; +} + +spanless_eq_struct!(AngleBracketedArgs; span args); +spanless_eq_struct!(AnonConst; id value); +spanless_eq_struct!(Arm; attrs pat guard body span id is_placeholder); +spanless_eq_struct!(AssocConstraint; id ident gen_args kind span); +spanless_eq_struct!(AttrItem; path args tokens); +spanless_eq_struct!(AttrTokenStream; 0); +spanless_eq_struct!(Attribute; kind id style span); +spanless_eq_struct!(AttributesData; attrs tokens); +spanless_eq_struct!(BareFnTy; unsafety ext generic_params decl decl_span); +spanless_eq_struct!(BindingAnnotation; 0 1); +spanless_eq_struct!(Block; stmts id rules span tokens could_be_bare_literal); +spanless_eq_struct!(Closure; binder capture_clause constness coroutine_kind movability fn_decl body !fn_decl_span !fn_arg_span); +spanless_eq_struct!(ConstItem; defaultness generics ty expr); +spanless_eq_struct!(Crate; attrs items spans id is_placeholder); +spanless_eq_struct!(DelimArgs; dspan delim tokens); +spanless_eq_struct!(DelimSpacing; open close); +spanless_eq_struct!(EnumDef; variants); +spanless_eq_struct!(Expr; id kind span attrs !tokens); +spanless_eq_struct!(ExprField; attrs id span ident expr is_shorthand is_placeholder); +spanless_eq_struct!(FieldDef; attrs id span vis ident ty is_placeholder); +spanless_eq_struct!(Fn; defaultness generics sig body); +spanless_eq_struct!(FnDecl; inputs output); +spanless_eq_struct!(FnHeader; constness coroutine_kind unsafety ext); +spanless_eq_struct!(FnSig; header decl span); +spanless_eq_struct!(ForeignMod; unsafety abi items); +spanless_eq_struct!(FormatArgPosition; index kind span); +spanless_eq_struct!(FormatArgs; span template arguments); +spanless_eq_struct!(FormatArgument; kind expr); +spanless_eq_struct!(FormatOptions; width precision alignment fill sign alternate zero_pad debug_hex); +spanless_eq_struct!(FormatPlaceholder; argument span format_trait format_options); +spanless_eq_struct!(GenericParam; id ident attrs bounds is_placeholder kind !colon_span); +spanless_eq_struct!(Generics; params where_clause span); +spanless_eq_struct!(Impl; defaultness unsafety generics constness polarity of_trait self_ty items); +spanless_eq_struct!(InlineAsm; template template_strs operands clobber_abis options line_spans); +spanless_eq_struct!(InlineAsmSym; id qself path); +spanless_eq_struct!(Item<K>; attrs id span vis ident kind !tokens); +spanless_eq_struct!(Label; ident); +spanless_eq_struct!(Lifetime; id ident); +spanless_eq_struct!(Lit; kind symbol suffix); +spanless_eq_struct!(Local; pat ty kind id span attrs !tokens); +spanless_eq_struct!(MacCall; path args); +spanless_eq_struct!(MacCallStmt; mac style attrs tokens); +spanless_eq_struct!(MacroDef; body macro_rules); +spanless_eq_struct!(MetaItemLit; symbol suffix kind span); +spanless_eq_struct!(MethodCall; seg receiver args !span); +spanless_eq_struct!(ModSpans; !inner_span !inject_use_span); +spanless_eq_struct!(MutTy; ty mutbl); +spanless_eq_struct!(NormalAttr; item tokens); +spanless_eq_struct!(ParenthesizedArgs; span inputs inputs_span output); +spanless_eq_struct!(Pat; id kind span tokens); +spanless_eq_struct!(PatField; ident pat is_shorthand attrs id span is_placeholder); +spanless_eq_struct!(Path; span segments tokens); +spanless_eq_struct!(PathSegment; ident id args); +spanless_eq_struct!(PolyTraitRef; bound_generic_params trait_ref span); +spanless_eq_struct!(QSelf; ty path_span position); +spanless_eq_struct!(StaticItem; ty mutability expr); +spanless_eq_struct!(Stmt; id kind span); +spanless_eq_struct!(StrLit; symbol suffix symbol_unescaped style span); +spanless_eq_struct!(StructExpr; qself path fields rest); +spanless_eq_struct!(Token; kind span); +spanless_eq_struct!(Trait; unsafety is_auto generics bounds items); +spanless_eq_struct!(TraitBoundModifiers; constness polarity); +spanless_eq_struct!(TraitRef; path ref_id); +spanless_eq_struct!(Ty; id kind span tokens); +spanless_eq_struct!(TyAlias; defaultness generics where_clauses !where_predicates_split bounds ty); +spanless_eq_struct!(TyAliasWhereClause; !0 1); +spanless_eq_struct!(UseTree; prefix kind span); +spanless_eq_struct!(Variant; attrs id span !vis ident data disr_expr is_placeholder); +spanless_eq_struct!(Visibility; kind span tokens); +spanless_eq_struct!(WhereBoundPredicate; span bound_generic_params bounded_ty bounds); +spanless_eq_struct!(WhereClause; has_where_token predicates span); +spanless_eq_struct!(WhereEqPredicate; span lhs_ty rhs_ty); +spanless_eq_struct!(WhereRegionPredicate; span lifetime bounds); +spanless_eq_enum!(AngleBracketedArg; Arg(0) Constraint(0)); +spanless_eq_enum!(AssocConstraintKind; Equality(term) Bound(bounds)); +spanless_eq_enum!(AssocItemKind; Const(0) Fn(0) Type(0) MacCall(0)); +spanless_eq_enum!(AttrArgs; Empty Delimited(0) Eq(0 1)); +spanless_eq_enum!(AttrArgsEq; Ast(0) Hir(0)); +spanless_eq_enum!(AttrStyle; Outer Inner); +spanless_eq_enum!(AttrTokenTree; Token(0 1) Delimited(0 1 2 3) Attributes(0)); +spanless_eq_enum!(BinOpKind; Add Sub Mul Div Rem And Or BitXor BitAnd BitOr Shl Shr Eq Lt Le Ne Ge Gt); +spanless_eq_enum!(BlockCheckMode; Default Unsafe(0)); +spanless_eq_enum!(BorrowKind; Ref Raw); +spanless_eq_enum!(BoundConstness; Never Always(0) Maybe(0)); +spanless_eq_enum!(BoundPolarity; Positive Negative(0) Maybe(0)); +spanless_eq_enum!(ByRef; Yes No); +spanless_eq_enum!(CaptureBy; Value(move_kw) Ref); +spanless_eq_enum!(ClosureBinder; NotPresent For(span generic_params)); +spanless_eq_enum!(Const; Yes(0) No); +spanless_eq_enum!(Defaultness; Default(0) Final); +spanless_eq_enum!(Extern; None Implicit(0) Explicit(0 1)); +spanless_eq_enum!(FloatTy; F32 F64); +spanless_eq_enum!(FnRetTy; Default(0) Ty(0)); +spanless_eq_enum!(ForLoopKind; For ForAwait); +spanless_eq_enum!(ForeignItemKind; Static(0 1 2) Fn(0) TyAlias(0) MacCall(0)); +spanless_eq_enum!(FormatAlignment; Left Right Center); +spanless_eq_enum!(FormatArgPositionKind; Implicit Number Named); +spanless_eq_enum!(FormatArgsPiece; Literal(0) Placeholder(0)); +spanless_eq_enum!(FormatArgumentKind; Normal Named(0) Captured(0)); +spanless_eq_enum!(FormatCount; Literal(0) Argument(0)); +spanless_eq_enum!(FormatDebugHex; Lower Upper); +spanless_eq_enum!(FormatSign; Plus Minus); +spanless_eq_enum!(FormatTrait; Display Debug LowerExp UpperExp Octal Pointer Binary LowerHex UpperHex); +spanless_eq_enum!(GenBlockKind; Async Gen AsyncGen); +spanless_eq_enum!(GenericArg; Lifetime(0) Type(0) Const(0)); +spanless_eq_enum!(GenericArgs; AngleBracketed(0) Parenthesized(0)); +spanless_eq_enum!(GenericBound; Trait(0 1) Outlives(0)); +spanless_eq_enum!(GenericParamKind; Lifetime Type(default) Const(ty kw_span default)); +spanless_eq_enum!(ImplPolarity; Positive Negative(0)); +spanless_eq_enum!(Inline; Yes No); +spanless_eq_enum!(InlineAsmRegOrRegClass; Reg(0) RegClass(0)); +spanless_eq_enum!(InlineAsmTemplatePiece; String(0) Placeholder(operand_idx modifier span)); +spanless_eq_enum!(IntTy; Isize I8 I16 I32 I64 I128); +spanless_eq_enum!(IsAuto; Yes No); +spanless_eq_enum!(LitFloatType; Suffixed(0) Unsuffixed); +spanless_eq_enum!(LitIntType; Signed(0) Unsigned(0) Unsuffixed); +spanless_eq_enum!(LocalKind; Decl Init(0) InitElse(0 1)); +spanless_eq_enum!(MacStmtStyle; Semicolon Braces NoBraces); +spanless_eq_enum!(ModKind; Loaded(0 1 2) Unloaded); +spanless_eq_enum!(Movability; Static Movable); +spanless_eq_enum!(Mutability; Mut Not); +spanless_eq_enum!(PatFieldsRest; Rest None); +spanless_eq_enum!(RangeEnd; Included(0) Excluded); +spanless_eq_enum!(RangeLimits; HalfOpen Closed); +spanless_eq_enum!(StmtKind; Local(0) Item(0) Expr(0) Semi(0) Empty MacCall(0)); +spanless_eq_enum!(StrStyle; Cooked Raw(0)); +spanless_eq_enum!(StructRest; Base(0) Rest(0) None); +spanless_eq_enum!(Term; Ty(0) Const(0)); +spanless_eq_enum!(TokenTree; Token(0 1) Delimited(0 1 2 3)); +spanless_eq_enum!(TraitObjectSyntax; Dyn DynStar None); +spanless_eq_enum!(UintTy; Usize U8 U16 U32 U64 U128); +spanless_eq_enum!(UnOp; Deref Not Neg); +spanless_eq_enum!(Unsafe; Yes(0) No); +spanless_eq_enum!(UnsafeSource; CompilerGenerated UserProvided); +spanless_eq_enum!(UseTreeKind; Simple(0) Nested(0) Glob); +spanless_eq_enum!(VariantData; Struct(fields recovered) Tuple(0 1) Unit(0)); +spanless_eq_enum!(VisibilityKind; Public Restricted(path id shorthand) Inherited); +spanless_eq_enum!(WherePredicate; BoundPredicate(0) RegionPredicate(0) EqPredicate(0)); +spanless_eq_enum!(CoroutineKind; Async(span closure_id return_impl_trait_id) + Gen(span closure_id return_impl_trait_id) + AsyncGen(span closure_id return_impl_trait_id)); +spanless_eq_enum!(ExprKind; Array(0) ConstBlock(0) Call(0 1) MethodCall(0) + Tup(0) Binary(0 1 2) Unary(0 1) Lit(0) Cast(0 1) Type(0 1) Let(0 1 2 3) + If(0 1 2) While(0 1 2) ForLoop(pat iter body label kind) Loop(0 1 2) + Match(0 1) Closure(0) Block(0 1) Gen(0 1 2) Await(0 1) TryBlock(0) + Assign(0 1 2) AssignOp(0 1 2) Field(0 1) Index(0 1 2) Underscore + Range(0 1 2) Path(0 1) AddrOf(0 1 2) Break(0 1) Continue(0) Ret(0) + InlineAsm(0) OffsetOf(0 1) MacCall(0) Struct(0) Repeat(0 1) Paren(0) Try(0) + Yield(0) Yeet(0) Become(0) IncludedBytes(0) FormatArgs(0) Err); +spanless_eq_enum!(InlineAsmOperand; In(reg expr) Out(reg late expr) + InOut(reg late expr) SplitInOut(reg late in_expr out_expr) Const(anon_const) + Sym(sym)); +spanless_eq_enum!(ItemKind; ExternCrate(0) Use(0) Static(0) Const(0) Fn(0) + Mod(0 1) ForeignMod(0) GlobalAsm(0) TyAlias(0) Enum(0 1) Struct(0 1) + Union(0 1) Trait(0) TraitAlias(0 1) Impl(0) MacCall(0) MacroDef(0)); +spanless_eq_enum!(LitKind; Str(0 1) ByteStr(0 1) CStr(0 1) Byte(0) Char(0) + Int(0 1) Float(0 1) Bool(0) Err); +spanless_eq_enum!(PatKind; Wild Ident(0 1 2) Struct(0 1 2 3) TupleStruct(0 1 2) + Or(0) Path(0 1) Tuple(0) Box(0) Ref(0 1) Lit(0) Range(0 1 2) Slice(0) Rest + Never Paren(0) MacCall(0)); +spanless_eq_enum!(TyKind; Slice(0) Array(0 1) Ptr(0) Ref(0 1) BareFn(0) Never + Tup(0) AnonStruct(0) AnonUnion(0) Path(0 1) TraitObject(0 1) ImplTrait(0 1) + Paren(0) Typeof(0) Infer ImplicitSelf MacCall(0) Err CVarArgs); + +impl SpanlessEq for Ident { + fn eq(&self, other: &Self) -> bool { + self.as_str() == other.as_str() + } +} + +impl SpanlessEq for RangeSyntax { + fn eq(&self, _other: &Self) -> bool { + match self { + RangeSyntax::DotDotDot | RangeSyntax::DotDotEq => true, + } + } +} + +impl SpanlessEq for Param { + fn eq(&self, other: &Self) -> bool { + let Param { + attrs, + ty, + pat, + id, + span: _, + is_placeholder, + } = self; + let Param { + attrs: attrs2, + ty: ty2, + pat: pat2, + id: id2, + span: _, + is_placeholder: is_placeholder2, + } = other; + SpanlessEq::eq(id, id2) + && SpanlessEq::eq(is_placeholder, is_placeholder2) + && (matches!(ty.kind, TyKind::Err) + || matches!(ty2.kind, TyKind::Err) + || SpanlessEq::eq(attrs, attrs2) + && SpanlessEq::eq(ty, ty2) + && SpanlessEq::eq(pat, pat2)) + } +} + +impl SpanlessEq for TokenKind { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (TokenKind::Literal(this), TokenKind::Literal(other)) => SpanlessEq::eq(this, other), + (TokenKind::DotDotEq | TokenKind::DotDotDot, _) => match other { + TokenKind::DotDotEq | TokenKind::DotDotDot => true, + _ => false, + }, + (TokenKind::Interpolated(this), TokenKind::Interpolated(other)) => { + let (this, this_span) = this.as_ref(); + let (other, other_span) = other.as_ref(); + SpanlessEq::eq(this_span, other_span) + && match (this, other) { + (Nonterminal::NtExpr(this), Nonterminal::NtExpr(other)) => { + SpanlessEq::eq(this, other) + } + _ => this == other, + } + } + _ => self == other, + } + } +} + +impl SpanlessEq for TokenStream { + fn eq(&self, other: &Self) -> bool { + let mut this_trees = self.trees(); + let mut other_trees = other.trees(); + loop { + let this = match this_trees.next() { + None => return other_trees.next().is_none(), + Some(tree) => tree, + }; + let other = match other_trees.next() { + None => return false, + Some(tree) => tree, + }; + if SpanlessEq::eq(this, other) { + continue; + } + if let (TokenTree::Token(this, _), TokenTree::Token(other, _)) = (this, other) { + if match (&this.kind, &other.kind) { + (TokenKind::Literal(this), TokenKind::Literal(other)) => { + SpanlessEq::eq(this, other) + } + (TokenKind::DocComment(_kind, style, symbol), TokenKind::Pound) => { + doc_comment(*style, *symbol, &mut other_trees) + } + (TokenKind::Pound, TokenKind::DocComment(_kind, style, symbol)) => { + doc_comment(*style, *symbol, &mut this_trees) + } + _ => false, + } { + continue; + } + } + return false; + } + } +} + +fn doc_comment<'a>( + style: AttrStyle, + unescaped: Symbol, + trees: &mut impl Iterator<Item = &'a TokenTree>, +) -> bool { + if match style { + AttrStyle::Outer => false, + AttrStyle::Inner => true, + } { + match trees.next() { + Some(TokenTree::Token( + Token { + kind: TokenKind::Not, + span: _, + }, + _spacing, + )) => {} + _ => return false, + } + } + let stream = match trees.next() { + Some(TokenTree::Delimited(_span, _spacing, Delimiter::Bracket, stream)) => stream, + _ => return false, + }; + let mut trees = stream.trees(); + match trees.next() { + Some(TokenTree::Token( + Token { + kind: TokenKind::Ident(symbol, false), + span: _, + }, + _spacing, + )) if *symbol == sym::doc => {} + _ => return false, + } + match trees.next() { + Some(TokenTree::Token( + Token { + kind: TokenKind::Eq, + span: _, + }, + _spacing, + )) => {} + _ => return false, + } + match trees.next() { + Some(TokenTree::Token(token, _spacing)) => { + is_escaped_literal_token(token, unescaped) && trees.next().is_none() + } + _ => false, + } +} + +fn is_escaped_literal_token(token: &Token, unescaped: Symbol) -> bool { + match token { + Token { + kind: TokenKind::Literal(lit), + span: _, + } => match MetaItemLit::from_token_lit(*lit, DUMMY_SP) { + Ok(lit) => is_escaped_literal_meta_item_lit(&lit, unescaped), + Err(_) => false, + }, + Token { + kind: TokenKind::Interpolated(nonterminal), + span: _, + } => match &nonterminal.0 { + Nonterminal::NtExpr(expr) => match &expr.kind { + ExprKind::Lit(lit) => is_escaped_lit(lit, unescaped), + _ => false, + }, + _ => false, + }, + _ => false, + } +} + +fn is_escaped_literal_attr_args(value: &AttrArgsEq, unescaped: Symbol) -> bool { + match value { + AttrArgsEq::Ast(expr) => match &expr.kind { + ExprKind::Lit(lit) => is_escaped_lit(lit, unescaped), + _ => false, + }, + AttrArgsEq::Hir(lit) => is_escaped_literal_meta_item_lit(lit, unescaped), + } +} + +fn is_escaped_literal_meta_item_lit(lit: &MetaItemLit, unescaped: Symbol) -> bool { + match lit { + MetaItemLit { + symbol: _, + suffix: None, + kind, + span: _, + } => is_escaped_lit_kind(kind, unescaped), + _ => false, + } +} + +fn is_escaped_lit(lit: &Lit, unescaped: Symbol) -> bool { + match lit { + Lit { + kind: token::LitKind::Str, + symbol: _, + suffix: None, + } => match LitKind::from_token_lit(*lit) { + Ok(lit_kind) => is_escaped_lit_kind(&lit_kind, unescaped), + _ => false, + }, + _ => false, + } +} + +fn is_escaped_lit_kind(kind: &LitKind, unescaped: Symbol) -> bool { + match kind { + LitKind::Str(symbol, StrStyle::Cooked) => { + symbol.as_str().replace('\r', "") == unescaped.as_str().replace('\r', "") + } + _ => false, + } +} + +impl SpanlessEq for LazyAttrTokenStream { + fn eq(&self, other: &Self) -> bool { + let this = self.to_attr_token_stream(); + let other = other.to_attr_token_stream(); + SpanlessEq::eq(&this, &other) + } +} + +impl SpanlessEq for AttrKind { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (AttrKind::Normal(normal), AttrKind::Normal(normal2)) => { + SpanlessEq::eq(normal, normal2) + } + (AttrKind::DocComment(kind, symbol), AttrKind::DocComment(kind2, symbol2)) => { + SpanlessEq::eq(kind, kind2) && SpanlessEq::eq(symbol, symbol2) + } + (AttrKind::DocComment(kind, unescaped), AttrKind::Normal(normal2)) => { + match kind { + CommentKind::Line | CommentKind::Block => {} + } + let path = Path::from_ident(Ident::with_dummy_span(sym::doc)); + SpanlessEq::eq(&path, &normal2.item.path) + && match &normal2.item.args { + AttrArgs::Empty | AttrArgs::Delimited(_) => false, + AttrArgs::Eq(_span, value) => { + is_escaped_literal_attr_args(value, *unescaped) + } + } + } + (AttrKind::Normal(_), AttrKind::DocComment(..)) => SpanlessEq::eq(other, self), + } + } +} + +impl SpanlessEq for FormatArguments { + fn eq(&self, other: &Self) -> bool { + SpanlessEq::eq(self.all_args(), other.all_args()) + } +} diff --git a/vendor/syn/tests/common/mod.rs b/vendor/syn/tests/common/mod.rs new file mode 100644 index 0000000..2156530 --- /dev/null +++ b/vendor/syn/tests/common/mod.rs @@ -0,0 +1,28 @@ +#![allow(dead_code)] +#![allow(clippy::module_name_repetitions, clippy::shadow_unrelated)] + +use rayon::ThreadPoolBuilder; +use std::env; + +pub mod eq; +pub mod parse; + +/// Read the `ABORT_AFTER_FAILURE` environment variable, and parse it. +pub fn abort_after() -> usize { + match env::var("ABORT_AFTER_FAILURE") { + Ok(s) => s.parse().expect("failed to parse ABORT_AFTER_FAILURE"), + Err(_) => usize::max_value(), + } +} + +/// Configure Rayon threadpool. +pub fn rayon_init() { + let stack_size = match env::var("RUST_MIN_STACK") { + Ok(s) => s.parse().expect("failed to parse RUST_MIN_STACK"), + Err(_) => 20 * 1024 * 1024, + }; + ThreadPoolBuilder::new() + .stack_size(stack_size) + .build_global() + .unwrap(); +} diff --git a/vendor/syn/tests/common/parse.rs b/vendor/syn/tests/common/parse.rs new file mode 100644 index 0000000..73be101 --- /dev/null +++ b/vendor/syn/tests/common/parse.rs @@ -0,0 +1,51 @@ +extern crate rustc_ast; +extern crate rustc_driver; +extern crate rustc_expand; +extern crate rustc_parse as parse; +extern crate rustc_session; +extern crate rustc_span; + +use rustc_ast::ast; +use rustc_ast::ptr::P; +use rustc_session::parse::ParseSess; +use rustc_span::source_map::FilePathMapping; +use rustc_span::FileName; +use std::panic; + +pub fn librustc_expr(input: &str) -> Option<P<ast::Expr>> { + match panic::catch_unwind(|| { + let locale_resources = rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(); + let file_path_mapping = FilePathMapping::empty(); + let sess = ParseSess::new(locale_resources, file_path_mapping); + let e = parse::new_parser_from_source_str( + &sess, + FileName::Custom("test_precedence".to_string()), + input.to_string(), + ) + .parse_expr(); + match e { + Ok(expr) => Some(expr), + Err(mut diagnostic) => { + diagnostic.emit(); + None + } + } + }) { + Ok(Some(e)) => Some(e), + Ok(None) => None, + Err(_) => { + errorf!("librustc panicked\n"); + None + } + } +} + +pub fn syn_expr(input: &str) -> Option<syn::Expr> { + match syn::parse_str(input) { + Ok(e) => Some(e), + Err(msg) => { + errorf!("syn failed to parse\n{:?}\n", msg); + None + } + } +} diff --git a/vendor/syn/tests/macros/mod.rs b/vendor/syn/tests/macros/mod.rs new file mode 100644 index 0000000..d27c391 --- /dev/null +++ b/vendor/syn/tests/macros/mod.rs @@ -0,0 +1,92 @@ +#![allow(unused_macros, unused_macro_rules)] + +#[path = "../debug/mod.rs"] +pub mod debug; + +use std::str::FromStr; +use syn::parse::Result; + +macro_rules! errorf { + ($($tt:tt)*) => {{ + use ::std::io::Write; + let stderr = ::std::io::stderr(); + write!(stderr.lock(), $($tt)*).unwrap(); + }}; +} + +macro_rules! punctuated { + ($($e:expr,)+) => {{ + let mut seq = ::syn::punctuated::Punctuated::new(); + $( + seq.push($e); + )+ + seq + }}; + + ($($e:expr),+) => { + punctuated!($($e,)+) + }; +} + +macro_rules! snapshot { + ($($args:tt)*) => { + snapshot_impl!(() $($args)*) + }; +} + +macro_rules! snapshot_impl { + (($expr:ident) as $t:ty, @$snapshot:literal) => { + let tokens = crate::macros::TryIntoTokens::try_into_tokens($expr).unwrap(); + let $expr: $t = syn::parse_quote!(#tokens); + let debug = crate::macros::debug::Lite(&$expr); + if !cfg!(miri) { + #[allow(clippy::needless_raw_string_hashes)] // https://github.com/mitsuhiko/insta/issues/389 + { + insta::assert_debug_snapshot!(debug, @$snapshot); + } + } + }; + (($($expr:tt)*) as $t:ty, @$snapshot:literal) => {{ + let tokens = crate::macros::TryIntoTokens::try_into_tokens($($expr)*).unwrap(); + let syntax_tree: $t = syn::parse_quote!(#tokens); + let debug = crate::macros::debug::Lite(&syntax_tree); + if !cfg!(miri) { + #[allow(clippy::needless_raw_string_hashes)] + { + insta::assert_debug_snapshot!(debug, @$snapshot); + } + } + syntax_tree + }}; + (($($expr:tt)*) , @$snapshot:literal) => {{ + let syntax_tree = $($expr)*; + let debug = crate::macros::debug::Lite(&syntax_tree); + if !cfg!(miri) { + #[allow(clippy::needless_raw_string_hashes)] + { + insta::assert_debug_snapshot!(debug, @$snapshot); + } + } + syntax_tree + }}; + (($($expr:tt)*) $next:tt $($rest:tt)*) => { + snapshot_impl!(($($expr)* $next) $($rest)*) + }; +} + +pub trait TryIntoTokens { + fn try_into_tokens(self) -> Result<proc_macro2::TokenStream>; +} + +impl<'a> TryIntoTokens for &'a str { + fn try_into_tokens(self) -> Result<proc_macro2::TokenStream> { + let tokens = proc_macro2::TokenStream::from_str(self)?; + Ok(tokens) + } +} + +impl TryIntoTokens for proc_macro2::TokenStream { + fn try_into_tokens(self) -> Result<proc_macro2::TokenStream> { + Ok(self) + } +} diff --git a/vendor/syn/tests/regression.rs b/vendor/syn/tests/regression.rs new file mode 100644 index 0000000..5c7fcdd --- /dev/null +++ b/vendor/syn/tests/regression.rs @@ -0,0 +1,5 @@ +#![allow(clippy::let_underscore_untyped, clippy::uninlined_format_args)] + +mod regression { + automod::dir!("tests/regression"); +} diff --git a/vendor/syn/tests/regression/issue1108.rs b/vendor/syn/tests/regression/issue1108.rs new file mode 100644 index 0000000..11a82ad --- /dev/null +++ b/vendor/syn/tests/regression/issue1108.rs @@ -0,0 +1,5 @@ +#[test] +fn issue1108() { + let data = "impl<x<>>::x for"; + let _ = syn::parse_file(data); +} diff --git a/vendor/syn/tests/regression/issue1235.rs b/vendor/syn/tests/regression/issue1235.rs new file mode 100644 index 0000000..8836030 --- /dev/null +++ b/vendor/syn/tests/regression/issue1235.rs @@ -0,0 +1,32 @@ +use proc_macro2::{Delimiter, Group}; +use quote::quote; + +#[test] +fn main() { + // Okay. Rustc allows top-level `static` with no value syntactically, but + // not semantically. Syn parses as Item::Verbatim. + let tokens = quote! { + pub static FOO: usize; + pub static BAR: usize; + }; + let file = syn::parse2::<syn::File>(tokens).unwrap(); + println!("{:#?}", file); + + // Okay. + let inner = Group::new( + Delimiter::None, + quote!(static FOO: usize = 0; pub static BAR: usize = 0), + ); + let tokens = quote!(pub #inner;); + let file = syn::parse2::<syn::File>(tokens).unwrap(); + println!("{:#?}", file); + + // Formerly parser crash. + let inner = Group::new( + Delimiter::None, + quote!(static FOO: usize; pub static BAR: usize), + ); + let tokens = quote!(pub #inner;); + let file = syn::parse2::<syn::File>(tokens).unwrap(); + println!("{:#?}", file); +} diff --git a/vendor/syn/tests/repo/mod.rs b/vendor/syn/tests/repo/mod.rs new file mode 100644 index 0000000..6aedd2c --- /dev/null +++ b/vendor/syn/tests/repo/mod.rs @@ -0,0 +1,375 @@ +#![allow(clippy::manual_assert)] + +mod progress; + +use self::progress::Progress; +use anyhow::Result; +use flate2::read::GzDecoder; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use std::collections::BTreeSet; +use std::ffi::OsStr; +use std::fs; +use std::path::{Path, PathBuf}; +use tar::Archive; +use walkdir::{DirEntry, WalkDir}; + +const REVISION: &str = "b10cfcd65fd7f7b1ab9beb34798b2108de003452"; + +#[rustfmt::skip] +static EXCLUDE_FILES: &[&str] = &[ + // TODO: CStr literals: c"…", cr"…" + // https://github.com/dtolnay/syn/issues/1502 + "src/tools/clippy/tests/ui/needless_raw_string.rs", + "src/tools/clippy/tests/ui/needless_raw_string_hashes.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs", + + // TODO: explicit tail calls: `become _g()` + // https://github.com/dtolnay/syn/issues/1501 + "tests/ui/explicit-tail-calls/return-lifetime-sub.rs", + + // TODO: non-lifetime binders: `where for<'a, T> &'a Struct<T>: Trait` + // https://github.com/dtolnay/syn/issues/1435 + "src/tools/rustfmt/tests/source/issue_5721.rs", + "src/tools/rustfmt/tests/source/non-lifetime-binders.rs", + "src/tools/rustfmt/tests/target/issue_5721.rs", + "src/tools/rustfmt/tests/target/non-lifetime-binders.rs", + "tests/rustdoc-json/non_lifetime_binders.rs", + "tests/rustdoc/inline_cross/auxiliary/non_lifetime_binders.rs", + "tests/rustdoc/non_lifetime_binders.rs", + + // TODO: return type notation: `where T: Trait<method(): Send>` + // https://github.com/dtolnay/syn/issues/1434 + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0208_associated_return_type_bounds.rs", + "tests/ui/associated-type-bounds/return-type-notation/basic.rs", + "tests/ui/feature-gates/feature-gate-return_type_notation.rs", + + // TODO: lazy type alias syntax with where-clause in trailing position + // https://github.com/dtolnay/syn/issues/1525 + "tests/rustdoc/typedef-inner-variants-lazy_type_alias.rs", + + // TODO: gen blocks and functions + // https://github.com/dtolnay/syn/issues/1526 + "compiler/rustc_codegen_cranelift/example/gen_block_iterate.rs", + "tests/ui/coroutine/gen_block_is_iter.rs", + "tests/ui/coroutine/gen_block_iterate.rs", + + // TODO: struct literal in match guard + // https://github.com/dtolnay/syn/issues/1527 + "tests/ui/parser/struct-literal-in-match-guard.rs", + + // Compile-fail expr parameter in const generic position: f::<1 + 2>() + "tests/ui/const-generics/early/closing-args-token.rs", + "tests/ui/const-generics/early/const-expression-parameter.rs", + + // Compile-fail variadics in not the last position of a function parameter list + "tests/ui/parser/variadic-ffi-syntactic-pass.rs", + + // Need at least one trait in impl Trait, no such type as impl 'static + "tests/ui/type-alias-impl-trait/generic_type_does_not_live_long_enough.rs", + + // Negative polarity trait bound: `where T: !Copy` + "src/tools/rustfmt/tests/target/negative-bounds.rs", + + // Lifetime bound inside for<>: `T: ~const ?for<'a: 'b> Trait<'a>` + "tests/ui/rfcs/rfc-2632-const-trait-impl/tilde-const-syntax.rs", + + // Const impl that is not a trait impl: `impl ~const T {}` + "tests/ui/rfcs/rfc-2632-const-trait-impl/syntax.rs", + + // Deprecated anonymous parameter syntax in traits + "src/tools/rustfmt/tests/source/trait.rs", + "src/tools/rustfmt/tests/target/trait.rs", + "tests/ui/issues/issue-13105.rs", + "tests/ui/issues/issue-13775.rs", + "tests/ui/issues/issue-34074.rs", + "tests/ui/proc-macro/trait-fn-args-2015.rs", + + // Deprecated where-clause location + "src/tools/rustfmt/tests/source/issue_4257.rs", + "src/tools/rustfmt/tests/source/issue_4911.rs", + "src/tools/rustfmt/tests/target/issue_4257.rs", + "src/tools/rustfmt/tests/target/issue_4911.rs", + "tests/pretty/gat-bounds.rs", + "tests/rustdoc/generic-associated-types/gats.rs", + + // Deprecated trait object syntax with parenthesized generic arguments and no dyn keyword + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0004_value_parameters_no_patterns.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0104_path_fn_trait_args.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0202_typepathfn_with_coloncolon.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0209_bare_dyn_types_with_paren_as_generic_args.rs", + "src/tools/rustfmt/tests/source/attrib.rs", + "src/tools/rustfmt/tests/source/closure.rs", + "src/tools/rustfmt/tests/source/existential_type.rs", + "src/tools/rustfmt/tests/source/fn-simple.rs", + "src/tools/rustfmt/tests/source/fn_args_layout-vertical.rs", + "src/tools/rustfmt/tests/source/issue-4689/one.rs", + "src/tools/rustfmt/tests/source/issue-4689/two.rs", + "src/tools/rustfmt/tests/source/paths.rs", + "src/tools/rustfmt/tests/source/structs.rs", + "src/tools/rustfmt/tests/target/attrib.rs", + "src/tools/rustfmt/tests/target/closure.rs", + "src/tools/rustfmt/tests/target/existential_type.rs", + "src/tools/rustfmt/tests/target/fn-simple.rs", + "src/tools/rustfmt/tests/target/fn.rs", + "src/tools/rustfmt/tests/target/fn_args_layout-vertical.rs", + "src/tools/rustfmt/tests/target/issue-4689/one.rs", + "src/tools/rustfmt/tests/target/issue-4689/two.rs", + "src/tools/rustfmt/tests/target/paths.rs", + "src/tools/rustfmt/tests/target/structs.rs", + "tests/codegen-units/item-collection/non-generic-closures.rs", + "tests/debuginfo/recursive-enum.rs", + "tests/pretty/closure-reform-pretty.rs", + "tests/run-make/reproducible-build-2/reproducible-build.rs", + "tests/run-make/reproducible-build/reproducible-build.rs", + "tests/ui/auxiliary/typeid-intrinsic-aux1.rs", + "tests/ui/auxiliary/typeid-intrinsic-aux2.rs", + "tests/ui/impl-trait/generic-with-implicit-hrtb-without-dyn.rs", + "tests/ui/lifetimes/auxiliary/lifetime_bound_will_change_warning_lib.rs", + "tests/ui/lifetimes/bare-trait-object-borrowck.rs", + "tests/ui/lifetimes/bare-trait-object.rs", + "tests/ui/parser/bounds-obj-parens.rs", + + // Invalid unparenthesized range pattern inside slice pattern: `[1..]` + "tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.rs", + + // Various extensions to Rust syntax made up by rust-analyzer + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0012_type_item_where_clause.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0058_range_pat.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0123_param_list_vararg.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0131_existential_type.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0156_fn_def_param.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0179_use_tree_abs_star.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0188_const_param_default_path.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/ok/0015_use_tree.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/ok/0029_range_forms.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/ok/0051_parameter_attrs.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/ok/0055_dot_dot_dot.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/ok/0068_item_modifiers.rs", + "src/tools/rust-analyzer/crates/syntax/test_data/parser/validation/0031_block_inner_attrs.rs", + "src/tools/rust-analyzer/crates/syntax/test_data/parser/validation/0038_endless_inclusive_range.rs", + "src/tools/rust-analyzer/crates/syntax/test_data/parser/validation/0045_ambiguous_trait_object.rs", + "src/tools/rust-analyzer/crates/syntax/test_data/parser/validation/0046_mutable_const_item.rs", + + // Placeholder syntax for "throw expressions" + "compiler/rustc_errors/src/translation.rs", + "src/tools/clippy/tests/ui/needless_return.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0204_yeet_expr.rs", + "tests/pretty/yeet-expr.rs", + "tests/ui/try-trait/yeet-for-option.rs", + "tests/ui/try-trait/yeet-for-result.rs", + + // Edition 2015 code using identifiers that are now keywords + // TODO: some of these we should probably parse + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0159_try_macro_fallback.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0160_try_macro_rules.rs", + "src/tools/rustfmt/tests/source/configs/indent_style/block_call.rs", + "src/tools/rustfmt/tests/source/configs/use_try_shorthand/false.rs", + "src/tools/rustfmt/tests/source/configs/use_try_shorthand/true.rs", + "src/tools/rustfmt/tests/source/issue_1306.rs", + "src/tools/rustfmt/tests/source/try-conversion.rs", + "src/tools/rustfmt/tests/target/configs/indent_style/block_call.rs", + "src/tools/rustfmt/tests/target/configs/use_try_shorthand/false.rs", + "src/tools/rustfmt/tests/target/issue-1681.rs", + "src/tools/rustfmt/tests/target/issue_1306.rs", + "tests/ui/dyn-keyword/dyn-2015-no-warnings-without-lints.rs", + "tests/ui/editions/edition-keywords-2015-2015.rs", + "tests/ui/editions/edition-keywords-2015-2018.rs", + "tests/ui/lint/lint_pre_expansion_extern_module_aux.rs", + "tests/ui/macros/macro-comma-support-rpass.rs", + "tests/ui/macros/try-macro.rs", + "tests/ui/parser/extern-crate-async.rs", + "tests/ui/try-block/try-is-identifier-edition2015.rs", + + // Excessive nesting + "tests/ui/issues/issue-74564-if-expr-stack-overflow.rs", + + // Testing tools on invalid syntax + "src/tools/rustfmt/tests/coverage/target/comments.rs", + "src/tools/rustfmt/tests/parser/issue-4126/invalid.rs", + "src/tools/rustfmt/tests/parser/issue_4418.rs", + "src/tools/rustfmt/tests/parser/unclosed-delims/issue_4466.rs", + "src/tools/rustfmt/tests/source/configs/disable_all_formatting/true.rs", + "src/tools/rustfmt/tests/source/configs/spaces_around_ranges/false.rs", + "src/tools/rustfmt/tests/source/configs/spaces_around_ranges/true.rs", + "src/tools/rustfmt/tests/source/type.rs", + "src/tools/rustfmt/tests/target/configs/spaces_around_ranges/false.rs", + "src/tools/rustfmt/tests/target/configs/spaces_around_ranges/true.rs", + "src/tools/rustfmt/tests/target/type.rs", + "tests/run-make/translation/test.rs", + "tests/ui/generics/issue-94432-garbage-ice.rs", + + // Generated file containing a top-level expression, used with `include!` + "compiler/rustc_codegen_gcc/src/intrinsic/archs.rs", + + // Clippy lint lists represented as expressions + "src/tools/clippy/clippy_lints/src/lib.deprecated.rs", + + // Not actually test cases + "tests/ui/lint/expansion-time-include.rs", + "tests/ui/macros/auxiliary/macro-comma-support.rs", + "tests/ui/macros/auxiliary/macro-include-items-expr.rs", + "tests/ui/macros/include-single-expr-helper.rs", + "tests/ui/macros/include-single-expr-helper-1.rs", + "tests/ui/parser/issues/auxiliary/issue-21146-inc.rs", +]; + +#[rustfmt::skip] +static EXCLUDE_DIRS: &[&str] = &[ + // Inputs that intentionally do not parse + "src/tools/rust-analyzer/crates/parser/test_data/parser/err", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/err", + + // Inputs that lex but do not necessarily parse + "src/tools/rust-analyzer/crates/parser/test_data/lexer", + + // Inputs that used to crash rust-analyzer, but aren't necessarily supposed to parse + "src/tools/rust-analyzer/crates/syntax/test_data/parser/fuzz-failures", + "src/tools/rust-analyzer/crates/syntax/test_data/reparse/fuzz-failures", +]; + +// Directories in which a .stderr implies the corresponding .rs is not expected +// to work. +static UI_TEST_DIRS: &[&str] = &["tests/ui", "tests/rustdoc-ui"]; + +pub fn for_each_rust_file(for_each: impl Fn(&Path) + Sync + Send) { + let mut rs_files = BTreeSet::new(); + + let repo_dir = Path::new("tests/rust"); + for entry in WalkDir::new(repo_dir) + .into_iter() + .filter_entry(base_dir_filter) + { + let entry = entry.unwrap(); + if !entry.file_type().is_dir() { + rs_files.insert(entry.into_path()); + } + } + + for ui_test_dir in UI_TEST_DIRS { + for entry in WalkDir::new(repo_dir.join(ui_test_dir)) { + let mut path = entry.unwrap().into_path(); + if path.extension() == Some(OsStr::new("stderr")) { + loop { + rs_files.remove(&path.with_extension("rs")); + path = path.with_extension(""); + if path.extension().is_none() { + break; + } + } + } + } + } + + rs_files.par_iter().map(PathBuf::as_path).for_each(for_each); +} + +pub fn base_dir_filter(entry: &DirEntry) -> bool { + let path = entry.path(); + + let mut path_string = path.to_string_lossy(); + if cfg!(windows) { + path_string = path_string.replace('\\', "/").into(); + } + let path_string = if path_string == "tests/rust" { + return true; + } else if let Some(path) = path_string.strip_prefix("tests/rust/") { + path + } else { + panic!("unexpected path in Rust dist: {}", path_string); + }; + + if path.is_dir() { + return !EXCLUDE_DIRS.contains(&path_string); + } + + if path.extension() != Some(OsStr::new("rs")) { + return false; + } + + !EXCLUDE_FILES.contains(&path_string) +} + +#[allow(dead_code)] +pub fn edition(path: &Path) -> &'static str { + if path.ends_with("dyn-2015-no-warnings-without-lints.rs") { + "2015" + } else { + "2018" + } +} + +pub fn clone_rust() { + let needs_clone = match fs::read_to_string("tests/rust/COMMIT") { + Err(_) => true, + Ok(contents) => contents.trim() != REVISION, + }; + if needs_clone { + download_and_unpack().unwrap(); + } + + let mut missing = String::new(); + let test_src = Path::new("tests/rust"); + + let mut exclude_files_set = BTreeSet::new(); + for exclude in EXCLUDE_FILES { + if !exclude_files_set.insert(exclude) { + panic!("duplicate path in EXCLUDE_FILES: {}", exclude); + } + for dir in EXCLUDE_DIRS { + if Path::new(exclude).starts_with(dir) { + panic!("excluded file {} is inside an excluded dir", exclude); + } + } + if !test_src.join(exclude).is_file() { + missing += "\ntests/rust/"; + missing += exclude; + } + } + + let mut exclude_dirs_set = BTreeSet::new(); + for exclude in EXCLUDE_DIRS { + if !exclude_dirs_set.insert(exclude) { + panic!("duplicate path in EXCLUDE_DIRS: {}", exclude); + } + if !test_src.join(exclude).is_dir() { + missing += "\ntests/rust/"; + missing += exclude; + missing += "/"; + } + } + + if !missing.is_empty() { + panic!("excluded test file does not exist:{}\n", missing); + } +} + +fn download_and_unpack() -> Result<()> { + let url = format!( + "https://github.com/rust-lang/rust/archive/{}.tar.gz", + REVISION + ); + let response = reqwest::blocking::get(url)?.error_for_status()?; + let progress = Progress::new(response); + let decoder = GzDecoder::new(progress); + let mut archive = Archive::new(decoder); + let prefix = format!("rust-{}", REVISION); + + let tests_rust = Path::new("tests/rust"); + if tests_rust.exists() { + fs::remove_dir_all(tests_rust)?; + } + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + if path == Path::new("pax_global_header") { + continue; + } + let relative = path.strip_prefix(&prefix)?; + let out = tests_rust.join(relative); + entry.unpack(&out)?; + } + + fs::write("tests/rust/COMMIT", REVISION)?; + Ok(()) +} diff --git a/vendor/syn/tests/repo/progress.rs b/vendor/syn/tests/repo/progress.rs new file mode 100644 index 0000000..28c8a44 --- /dev/null +++ b/vendor/syn/tests/repo/progress.rs @@ -0,0 +1,37 @@ +use std::io::{Read, Result}; +use std::time::{Duration, Instant}; + +pub struct Progress<R> { + bytes: usize, + tick: Instant, + stream: R, +} + +impl<R> Progress<R> { + pub fn new(stream: R) -> Self { + Progress { + bytes: 0, + tick: Instant::now() + Duration::from_millis(2000), + stream, + } + } +} + +impl<R: Read> Read for Progress<R> { + fn read(&mut self, buf: &mut [u8]) -> Result<usize> { + let num = self.stream.read(buf)?; + self.bytes += num; + let now = Instant::now(); + if now > self.tick { + self.tick = now + Duration::from_millis(500); + errorf!("downloading... {} bytes\n", self.bytes); + } + Ok(num) + } +} + +impl<R> Drop for Progress<R> { + fn drop(&mut self) { + errorf!("done ({} bytes)\n", self.bytes); + } +} diff --git a/vendor/syn/tests/test_asyncness.rs b/vendor/syn/tests/test_asyncness.rs new file mode 100644 index 0000000..9968934 --- /dev/null +++ b/vendor/syn/tests/test_asyncness.rs @@ -0,0 +1,43 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use syn::{Expr, Item}; + +#[test] +fn test_async_fn() { + let input = "async fn process() {}"; + + snapshot!(input as Item, @r###" + Item::Fn { + vis: Visibility::Inherited, + sig: Signature { + asyncness: Some, + ident: "process", + generics: Generics, + output: ReturnType::Default, + }, + block: Block { + stmts: [], + }, + } + "###); +} + +#[test] +fn test_async_closure() { + let input = "async || {}"; + + snapshot!(input as Expr, @r###" + Expr::Closure { + asyncness: Some, + output: ReturnType::Default, + body: Expr::Block { + block: Block { + stmts: [], + }, + }, + } + "###); +} diff --git a/vendor/syn/tests/test_attribute.rs b/vendor/syn/tests/test_attribute.rs new file mode 100644 index 0000000..597ae3a --- /dev/null +++ b/vendor/syn/tests/test_attribute.rs @@ -0,0 +1,225 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use syn::parse::Parser; +use syn::{Attribute, Meta}; + +#[test] +fn test_meta_item_word() { + let meta = test("#[foo]"); + + snapshot!(meta, @r###" + Meta::Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + } + "###); +} + +#[test] +fn test_meta_item_name_value() { + let meta = test("#[foo = 5]"); + + snapshot!(meta, @r###" + Meta::NameValue { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + value: Expr::Lit { + lit: 5, + }, + } + "###); +} + +#[test] +fn test_meta_item_bool_value() { + let meta = test("#[foo = true]"); + + snapshot!(meta, @r###" + Meta::NameValue { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + value: Expr::Lit { + lit: Lit::Bool { + value: true, + }, + }, + } + "###); + + let meta = test("#[foo = false]"); + + snapshot!(meta, @r###" + Meta::NameValue { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + value: Expr::Lit { + lit: Lit::Bool { + value: false, + }, + }, + } + "###); +} + +#[test] +fn test_meta_item_list_lit() { + let meta = test("#[foo(5)]"); + + snapshot!(meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`5`), + } + "###); +} + +#[test] +fn test_meta_item_list_word() { + let meta = test("#[foo(bar)]"); + + snapshot!(meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`bar`), + } + "###); +} + +#[test] +fn test_meta_item_list_name_value() { + let meta = test("#[foo(bar = 5)]"); + + snapshot!(meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`bar = 5`), + } + "###); +} + +#[test] +fn test_meta_item_list_bool_value() { + let meta = test("#[foo(bar = true)]"); + + snapshot!(meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`bar = true`), + } + "###); +} + +#[test] +fn test_meta_item_multiple() { + let meta = test("#[foo(word, name = 5, list(name2 = 6), word2)]"); + + snapshot!(meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`word , name = 5 , list (name2 = 6) , word2`), + } + "###); +} + +#[test] +fn test_bool_lit() { + let meta = test("#[foo(true)]"); + + snapshot!(meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`true`), + } + "###); +} + +#[test] +fn test_negative_lit() { + let meta = test("#[form(min = -1, max = 200)]"); + + snapshot!(meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "form", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`min = - 1 , max = 200`), + } + "###); +} + +fn test(input: &str) -> Meta { + let attrs = Attribute::parse_outer.parse_str(input).unwrap(); + + assert_eq!(attrs.len(), 1); + let attr = attrs.into_iter().next().unwrap(); + + attr.meta +} diff --git a/vendor/syn/tests/test_derive_input.rs b/vendor/syn/tests/test_derive_input.rs new file mode 100644 index 0000000..c3d31eb --- /dev/null +++ b/vendor/syn/tests/test_derive_input.rs @@ -0,0 +1,781 @@ +#![allow( + clippy::assertions_on_result_states, + clippy::manual_let_else, + clippy::too_many_lines, + clippy::uninlined_format_args +)] + +#[macro_use] +mod macros; + +use quote::quote; +use syn::{Data, DeriveInput}; + +#[test] +fn test_unit() { + let input = quote! { + struct Unit; + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "Unit", + generics: Generics, + data: Data::Struct { + fields: Fields::Unit, + semi_token: Some, + }, + } + "###); +} + +#[test] +fn test_struct() { + let input = quote! { + #[derive(Debug, Clone)] + pub struct Item { + pub ident: Ident, + pub attrs: Vec<Attribute> + } + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "derive", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`Debug , Clone`), + }, + }, + ], + vis: Visibility::Public, + ident: "Item", + generics: Generics, + data: Data::Struct { + fields: Fields::Named { + named: [ + Field { + vis: Visibility::Public, + ident: Some("ident"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Ident", + }, + ], + }, + }, + }, + Token![,], + Field { + vis: Visibility::Public, + ident: Some("attrs"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Vec", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Attribute", + }, + ], + }, + }), + ], + }, + }, + ], + }, + }, + }, + ], + }, + }, + } + "###); + + snapshot!(&input.attrs[0].meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "derive", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`Debug , Clone`), + } + "###); +} + +#[test] +fn test_union() { + let input = quote! { + union MaybeUninit<T> { + uninit: (), + value: T + } + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "MaybeUninit", + generics: Generics { + lt_token: Some, + params: [ + GenericParam::Type(TypeParam { + ident: "T", + }), + ], + gt_token: Some, + }, + data: Data::Union { + fields: FieldsNamed { + named: [ + Field { + vis: Visibility::Inherited, + ident: Some("uninit"), + colon_token: Some, + ty: Type::Tuple, + }, + Token![,], + Field { + vis: Visibility::Inherited, + ident: Some("value"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "T", + }, + ], + }, + }, + }, + ], + }, + }, + } + "###); +} + +#[test] +#[cfg(feature = "full")] +fn test_enum() { + let input = quote! { + /// See the std::result module documentation for details. + #[must_use] + pub enum Result<T, E> { + Ok(T), + Err(E), + Surprise = 0isize, + + // Smuggling data into a proc_macro_derive, + // in the style of https://github.com/dtolnay/proc-macro-hack + ProcMacroHack = (0, "data").0 + } + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::NameValue { + path: Path { + segments: [ + PathSegment { + ident: "doc", + }, + ], + }, + value: Expr::Lit { + lit: " See the std::result module documentation for details.", + }, + }, + }, + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "must_use", + }, + ], + }, + }, + ], + vis: Visibility::Public, + ident: "Result", + generics: Generics { + lt_token: Some, + params: [ + GenericParam::Type(TypeParam { + ident: "T", + }), + Token![,], + GenericParam::Type(TypeParam { + ident: "E", + }), + ], + gt_token: Some, + }, + data: Data::Enum { + variants: [ + Variant { + ident: "Ok", + fields: Fields::Unnamed { + unnamed: [ + Field { + vis: Visibility::Inherited, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "T", + }, + ], + }, + }, + }, + ], + }, + }, + Token![,], + Variant { + ident: "Err", + fields: Fields::Unnamed { + unnamed: [ + Field { + vis: Visibility::Inherited, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "E", + }, + ], + }, + }, + }, + ], + }, + }, + Token![,], + Variant { + ident: "Surprise", + fields: Fields::Unit, + discriminant: Some(Expr::Lit { + lit: 0isize, + }), + }, + Token![,], + Variant { + ident: "ProcMacroHack", + fields: Fields::Unit, + discriminant: Some(Expr::Field { + base: Expr::Tuple { + elems: [ + Expr::Lit { + lit: 0, + }, + Token![,], + Expr::Lit { + lit: "data", + }, + ], + }, + member: Member::Unnamed(Index { + index: 0, + }), + }), + }, + ], + }, + } + "###); + + let meta_items: Vec<_> = input.attrs.into_iter().map(|attr| attr.meta).collect(); + + snapshot!(meta_items, @r###" + [ + Meta::NameValue { + path: Path { + segments: [ + PathSegment { + ident: "doc", + }, + ], + }, + value: Expr::Lit { + lit: " See the std::result module documentation for details.", + }, + }, + Meta::Path { + segments: [ + PathSegment { + ident: "must_use", + }, + ], + }, + ] + "###); +} + +#[test] +fn test_attr_with_non_mod_style_path() { + let input = quote! { + #[inert <T>] + struct S; + }; + + syn::parse2::<DeriveInput>(input).unwrap_err(); +} + +#[test] +fn test_attr_with_mod_style_path_with_self() { + let input = quote! { + #[foo::self] + struct S; + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "foo", + }, + Token![::], + PathSegment { + ident: "self", + }, + ], + }, + }, + ], + vis: Visibility::Inherited, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Unit, + semi_token: Some, + }, + } + "###); + + snapshot!(&input.attrs[0].meta, @r###" + Meta::Path { + segments: [ + PathSegment { + ident: "foo", + }, + Token![::], + PathSegment { + ident: "self", + }, + ], + } + "###); +} + +#[test] +fn test_pub_restricted() { + // Taken from tests/rust/src/test/ui/resolve/auxiliary/privacy-struct-ctor.rs + let input = quote! { + pub(in m) struct Z(pub(in m::n) u8); + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Restricted { + in_token: Some, + path: Path { + segments: [ + PathSegment { + ident: "m", + }, + ], + }, + }, + ident: "Z", + generics: Generics, + data: Data::Struct { + fields: Fields::Unnamed { + unnamed: [ + Field { + vis: Visibility::Restricted { + in_token: Some, + path: Path { + segments: [ + PathSegment { + ident: "m", + }, + Token![::], + PathSegment { + ident: "n", + }, + ], + }, + }, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "u8", + }, + ], + }, + }, + }, + ], + }, + semi_token: Some, + }, + } + "###); +} + +#[test] +fn test_pub_restricted_crate() { + let input = quote! { + pub(crate) struct S; + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Restricted { + path: Path { + segments: [ + PathSegment { + ident: "crate", + }, + ], + }, + }, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Unit, + semi_token: Some, + }, + } + "###); +} + +#[test] +fn test_pub_restricted_super() { + let input = quote! { + pub(super) struct S; + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Restricted { + path: Path { + segments: [ + PathSegment { + ident: "super", + }, + ], + }, + }, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Unit, + semi_token: Some, + }, + } + "###); +} + +#[test] +fn test_pub_restricted_in_super() { + let input = quote! { + pub(in super) struct S; + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Restricted { + in_token: Some, + path: Path { + segments: [ + PathSegment { + ident: "super", + }, + ], + }, + }, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Unit, + semi_token: Some, + }, + } + "###); +} + +#[test] +fn test_fields_on_unit_struct() { + let input = quote! { + struct S; + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Unit, + semi_token: Some, + }, + } + "###); + + let data = match input.data { + Data::Struct(data) => data, + _ => panic!("expected a struct"), + }; + + assert_eq!(0, data.fields.iter().count()); +} + +#[test] +fn test_fields_on_named_struct() { + let input = quote! { + struct S { + foo: i32, + pub bar: String, + } + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Named { + named: [ + Field { + vis: Visibility::Inherited, + ident: Some("foo"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "i32", + }, + ], + }, + }, + }, + Token![,], + Field { + vis: Visibility::Public, + ident: Some("bar"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "String", + }, + ], + }, + }, + }, + Token![,], + ], + }, + }, + } + "###); + + let data = match input.data { + Data::Struct(data) => data, + _ => panic!("expected a struct"), + }; + + snapshot!(data.fields.into_iter().collect::<Vec<_>>(), @r###" + [ + Field { + vis: Visibility::Inherited, + ident: Some("foo"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "i32", + }, + ], + }, + }, + }, + Field { + vis: Visibility::Public, + ident: Some("bar"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "String", + }, + ], + }, + }, + }, + ] + "###); +} + +#[test] +fn test_fields_on_tuple_struct() { + let input = quote! { + struct S(i32, pub String); + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Unnamed { + unnamed: [ + Field { + vis: Visibility::Inherited, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "i32", + }, + ], + }, + }, + }, + Token![,], + Field { + vis: Visibility::Public, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "String", + }, + ], + }, + }, + }, + ], + }, + semi_token: Some, + }, + } + "###); + + let data = match input.data { + Data::Struct(data) => data, + _ => panic!("expected a struct"), + }; + + snapshot!(data.fields.iter().collect::<Vec<_>>(), @r###" + [ + Field { + vis: Visibility::Inherited, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "i32", + }, + ], + }, + }, + }, + Field { + vis: Visibility::Public, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "String", + }, + ], + }, + }, + }, + ] + "###); +} + +#[test] +fn test_ambiguous_crate() { + let input = quote! { + // The field type is `(crate::X)` not `crate (::X)`. + struct S(crate::X); + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Unnamed { + unnamed: [ + Field { + vis: Visibility::Inherited, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "crate", + }, + Token![::], + PathSegment { + ident: "X", + }, + ], + }, + }, + }, + ], + }, + semi_token: Some, + }, + } + "###); +} diff --git a/vendor/syn/tests/test_expr.rs b/vendor/syn/tests/test_expr.rs new file mode 100644 index 0000000..daf0d63 --- /dev/null +++ b/vendor/syn/tests/test_expr.rs @@ -0,0 +1,540 @@ +#![allow(clippy::single_element_loop, clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group}; +use quote::{quote, ToTokens as _}; +use syn::punctuated::Punctuated; +use syn::{parse_quote, token, Expr, ExprRange, ExprTuple, Stmt, Token}; + +#[test] +fn test_expr_parse() { + let tokens = quote!(..100u32); + snapshot!(tokens as Expr, @r###" + Expr::Range { + limits: RangeLimits::HalfOpen, + end: Some(Expr::Lit { + lit: 100u32, + }), + } + "###); + + let tokens = quote!(..100u32); + snapshot!(tokens as ExprRange, @r###" + ExprRange { + limits: RangeLimits::HalfOpen, + end: Some(Expr::Lit { + lit: 100u32, + }), + } + "###); +} + +#[test] +fn test_await() { + // Must not parse as Expr::Field. + let tokens = quote!(fut.await); + + snapshot!(tokens as Expr, @r###" + Expr::Await { + base: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "fut", + }, + ], + }, + }, + } + "###); +} + +#[rustfmt::skip] +#[test] +fn test_tuple_multi_index() { + let expected = snapshot!("tuple.0.0" as Expr, @r###" + Expr::Field { + base: Expr::Field { + base: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "tuple", + }, + ], + }, + }, + member: Member::Unnamed(Index { + index: 0, + }), + }, + member: Member::Unnamed(Index { + index: 0, + }), + } + "###); + + for &input in &[ + "tuple .0.0", + "tuple. 0.0", + "tuple.0 .0", + "tuple.0. 0", + "tuple . 0 . 0", + ] { + assert_eq!(expected, syn::parse_str(input).unwrap()); + } + + for tokens in [ + quote!(tuple.0.0), + quote!(tuple .0.0), + quote!(tuple. 0.0), + quote!(tuple.0 .0), + quote!(tuple.0. 0), + quote!(tuple . 0 . 0), + ] { + assert_eq!(expected, syn::parse2(tokens).unwrap()); + } +} + +#[test] +fn test_macro_variable_func() { + // mimics the token stream corresponding to `$fn()` + let path = Group::new(Delimiter::None, quote!(f)); + let tokens = quote!(#path()); + + snapshot!(tokens as Expr, @r###" + Expr::Call { + func: Expr::Group { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "f", + }, + ], + }, + }, + }, + } + "###); + + let path = Group::new(Delimiter::None, quote! { #[inside] f }); + let tokens = quote!(#[outside] #path()); + + snapshot!(tokens as Expr, @r###" + Expr::Call { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "outside", + }, + ], + }, + }, + ], + func: Expr::Group { + expr: Expr::Path { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "inside", + }, + ], + }, + }, + ], + path: Path { + segments: [ + PathSegment { + ident: "f", + }, + ], + }, + }, + }, + } + "###); +} + +#[test] +fn test_macro_variable_macro() { + // mimics the token stream corresponding to `$macro!()` + let mac = Group::new(Delimiter::None, quote!(m)); + let tokens = quote!(#mac!()); + + snapshot!(tokens as Expr, @r###" + Expr::Macro { + mac: Macro { + path: Path { + segments: [ + PathSegment { + ident: "m", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(``), + }, + } + "###); +} + +#[test] +fn test_macro_variable_struct() { + // mimics the token stream corresponding to `$struct {}` + let s = Group::new(Delimiter::None, quote! { S }); + let tokens = quote!(#s {}); + + snapshot!(tokens as Expr, @r###" + Expr::Struct { + path: Path { + segments: [ + PathSegment { + ident: "S", + }, + ], + }, + } + "###); +} + +#[test] +fn test_macro_variable_unary() { + // mimics the token stream corresponding to `$expr.method()` where expr is `&self` + let inner = Group::new(Delimiter::None, quote!(&self)); + let tokens = quote!(#inner.method()); + snapshot!(tokens as Expr, @r###" + Expr::MethodCall { + receiver: Expr::Group { + expr: Expr::Reference { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "self", + }, + ], + }, + }, + }, + }, + method: "method", + } + "###); +} + +#[test] +fn test_macro_variable_match_arm() { + // mimics the token stream corresponding to `match v { _ => $expr }` + let expr = Group::new(Delimiter::None, quote! { #[a] () }); + let tokens = quote!(match v { _ => #expr }); + snapshot!(tokens as Expr, @r###" + Expr::Match { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "v", + }, + ], + }, + }, + arms: [ + Arm { + pat: Pat::Wild, + body: Expr::Group { + expr: Expr::Tuple { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "a", + }, + ], + }, + }, + ], + }, + }, + }, + ], + } + "###); + + let expr = Group::new(Delimiter::None, quote!(loop {} + 1)); + let tokens = quote!(match v { _ => #expr }); + snapshot!(tokens as Expr, @r###" + Expr::Match { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "v", + }, + ], + }, + }, + arms: [ + Arm { + pat: Pat::Wild, + body: Expr::Group { + expr: Expr::Binary { + left: Expr::Loop { + body: Block { + stmts: [], + }, + }, + op: BinOp::Add, + right: Expr::Lit { + lit: 1, + }, + }, + }, + }, + ], + } + "###); +} + +// https://github.com/dtolnay/syn/issues/1019 +#[test] +fn test_closure_vs_rangefull() { + #[rustfmt::skip] // rustfmt bug: https://github.com/rust-lang/rustfmt/issues/4808 + let tokens = quote!(|| .. .method()); + snapshot!(tokens as Expr, @r###" + Expr::MethodCall { + receiver: Expr::Closure { + output: ReturnType::Default, + body: Expr::Range { + limits: RangeLimits::HalfOpen, + }, + }, + method: "method", + } + "###); +} + +#[test] +fn test_postfix_operator_after_cast() { + syn::parse_str::<Expr>("|| &x as T[0]").unwrap_err(); + syn::parse_str::<Expr>("|| () as ()()").unwrap_err(); +} + +#[test] +fn test_ranges() { + syn::parse_str::<Expr>("..").unwrap(); + syn::parse_str::<Expr>("..hi").unwrap(); + syn::parse_str::<Expr>("lo..").unwrap(); + syn::parse_str::<Expr>("lo..hi").unwrap(); + + syn::parse_str::<Expr>("..=").unwrap_err(); + syn::parse_str::<Expr>("..=hi").unwrap(); + syn::parse_str::<Expr>("lo..=").unwrap_err(); + syn::parse_str::<Expr>("lo..=hi").unwrap(); + + syn::parse_str::<Expr>("...").unwrap_err(); + syn::parse_str::<Expr>("...hi").unwrap_err(); + syn::parse_str::<Expr>("lo...").unwrap_err(); + syn::parse_str::<Expr>("lo...hi").unwrap_err(); +} + +#[test] +fn test_ambiguous_label() { + for stmt in [ + quote! { + return 'label: loop { break 'label 42; }; + }, + quote! { + break ('label: loop { break 'label 42; }); + }, + quote! { + break 1 + 'label: loop { break 'label 42; }; + }, + quote! { + break 'outer 'inner: loop { break 'inner 42; }; + }, + ] { + syn::parse2::<Stmt>(stmt).unwrap(); + } + + for stmt in [ + // Parentheses required. See https://github.com/rust-lang/rust/pull/87026. + quote! { + break 'label: loop { break 'label 42; }; + }, + ] { + syn::parse2::<Stmt>(stmt).unwrap_err(); + } +} + +#[test] +fn test_extended_interpolated_path() { + let path = Group::new(Delimiter::None, quote!(a::b)); + + let tokens = quote!(if #path {}); + snapshot!(tokens as Expr, @r###" + Expr::If { + cond: Expr::Group { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + ], + }, + }, + }, + then_branch: Block { + stmts: [], + }, + } + "###); + + let tokens = quote!(#path {}); + snapshot!(tokens as Expr, @r###" + Expr::Struct { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + ], + }, + } + "###); + + let tokens = quote!(#path :: c); + snapshot!(tokens as Expr, @r###" + Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + Token![::], + PathSegment { + ident: "c", + }, + ], + }, + } + "###); + + let nested = Group::new(Delimiter::None, quote!(a::b || true)); + let tokens = quote!(if #nested && false {}); + snapshot!(tokens as Expr, @r###" + Expr::If { + cond: Expr::Binary { + left: Expr::Group { + expr: Expr::Binary { + left: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + ], + }, + }, + op: BinOp::Or, + right: Expr::Lit { + lit: Lit::Bool { + value: true, + }, + }, + }, + }, + op: BinOp::And, + right: Expr::Lit { + lit: Lit::Bool { + value: false, + }, + }, + }, + then_branch: Block { + stmts: [], + }, + } + "###); +} + +#[test] +fn test_tuple_comma() { + let mut expr = ExprTuple { + attrs: Vec::new(), + paren_token: token::Paren::default(), + elems: Punctuated::new(), + }; + snapshot!(expr.to_token_stream() as Expr, @"Expr::Tuple"); + + expr.elems.push_value(parse_quote!(continue)); + // Must not parse to Expr::Paren + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + ], + } + "###); + + expr.elems.push_value(parse_quote!(continue)); + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + Expr::Continue, + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + Expr::Continue, + Token![,], + ], + } + "###); +} diff --git a/vendor/syn/tests/test_generics.rs b/vendor/syn/tests/test_generics.rs new file mode 100644 index 0000000..3faf0db --- /dev/null +++ b/vendor/syn/tests/test_generics.rs @@ -0,0 +1,282 @@ +#![allow( + clippy::manual_let_else, + clippy::too_many_lines, + clippy::uninlined_format_args +)] + +#[macro_use] +mod macros; + +use quote::quote; +use syn::{DeriveInput, ItemFn, TypeParamBound, WhereClause, WherePredicate}; + +#[test] +fn test_split_for_impl() { + let input = quote! { + struct S<'a, 'b: 'a, #[may_dangle] T: 'a = ()> where T: Debug; + }; + + snapshot!(input as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "S", + generics: Generics { + lt_token: Some, + params: [ + GenericParam::Lifetime(LifetimeParam { + lifetime: Lifetime { + ident: "a", + }, + }), + Token![,], + GenericParam::Lifetime(LifetimeParam { + lifetime: Lifetime { + ident: "b", + }, + colon_token: Some, + bounds: [ + Lifetime { + ident: "a", + }, + ], + }), + Token![,], + GenericParam::Type(TypeParam { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "may_dangle", + }, + ], + }, + }, + ], + ident: "T", + colon_token: Some, + bounds: [ + TypeParamBound::Lifetime { + ident: "a", + }, + ], + eq_token: Some, + default: Some(Type::Tuple), + }), + ], + gt_token: Some, + where_clause: Some(WhereClause { + predicates: [ + WherePredicate::Type(PredicateType { + bounded_ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "T", + }, + ], + }, + }, + bounds: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Debug", + }, + ], + }, + }), + ], + }), + ], + }), + }, + data: Data::Struct { + fields: Fields::Unit, + semi_token: Some, + }, + } + "###); + + let generics = input.generics; + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let generated = quote! { + impl #impl_generics MyTrait for Test #ty_generics #where_clause {} + }; + let expected = quote! { + impl<'a, 'b: 'a, #[may_dangle] T: 'a> MyTrait + for Test<'a, 'b, T> + where + T: Debug + {} + }; + assert_eq!(generated.to_string(), expected.to_string()); + + let turbofish = ty_generics.as_turbofish(); + let generated = quote! { + Test #turbofish + }; + let expected = quote! { + Test::<'a, 'b, T> + }; + assert_eq!(generated.to_string(), expected.to_string()); +} + +#[test] +fn test_ty_param_bound() { + let tokens = quote!('a); + snapshot!(tokens as TypeParamBound, @r###" + TypeParamBound::Lifetime { + ident: "a", + } + "###); + + let tokens = quote!('_); + snapshot!(tokens as TypeParamBound, @r###" + TypeParamBound::Lifetime { + ident: "_", + } + "###); + + let tokens = quote!(Debug); + snapshot!(tokens as TypeParamBound, @r###" + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Debug", + }, + ], + }, + }) + "###); + + let tokens = quote!(?Sized); + snapshot!(tokens as TypeParamBound, @r###" + TypeParamBound::Trait(TraitBound { + modifier: TraitBoundModifier::Maybe, + path: Path { + segments: [ + PathSegment { + ident: "Sized", + }, + ], + }, + }) + "###); +} + +#[test] +fn test_fn_precedence_in_where_clause() { + // This should parse as two separate bounds, `FnOnce() -> i32` and `Send` - not + // `FnOnce() -> (i32 + Send)`. + let input = quote! { + fn f<G>() + where + G: FnOnce() -> i32 + Send, + { + } + }; + + snapshot!(input as ItemFn, @r###" + ItemFn { + vis: Visibility::Inherited, + sig: Signature { + ident: "f", + generics: Generics { + lt_token: Some, + params: [ + GenericParam::Type(TypeParam { + ident: "G", + }), + ], + gt_token: Some, + where_clause: Some(WhereClause { + predicates: [ + WherePredicate::Type(PredicateType { + bounded_ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "G", + }, + ], + }, + }, + bounds: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "FnOnce", + arguments: PathArguments::Parenthesized { + output: ReturnType::Type( + Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "i32", + }, + ], + }, + }, + ), + }, + }, + ], + }, + }), + Token![+], + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Send", + }, + ], + }, + }), + ], + }), + Token![,], + ], + }), + }, + output: ReturnType::Default, + }, + block: Block { + stmts: [], + }, + } + "###); + + let where_clause = input.sig.generics.where_clause.as_ref().unwrap(); + assert_eq!(where_clause.predicates.len(), 1); + + let predicate = match &where_clause.predicates[0] { + WherePredicate::Type(pred) => pred, + _ => panic!("wrong predicate kind"), + }; + + assert_eq!(predicate.bounds.len(), 2, "{:#?}", predicate.bounds); + + let first_bound = &predicate.bounds[0]; + assert_eq!(quote!(#first_bound).to_string(), "FnOnce () -> i32"); + + let second_bound = &predicate.bounds[1]; + assert_eq!(quote!(#second_bound).to_string(), "Send"); +} + +#[test] +fn test_where_clause_at_end_of_input() { + let input = quote! { + where + }; + + snapshot!(input as WhereClause, @"WhereClause"); + + assert_eq!(input.predicates.len(), 0); +} diff --git a/vendor/syn/tests/test_grouping.rs b/vendor/syn/tests/test_grouping.rs new file mode 100644 index 0000000..6a73a92 --- /dev/null +++ b/vendor/syn/tests/test_grouping.rs @@ -0,0 +1,53 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree}; +use syn::Expr; + +#[test] +fn test_grouping() { + let tokens: TokenStream = TokenStream::from_iter(vec![ + TokenTree::Literal(Literal::i32_suffixed(1)), + TokenTree::Punct(Punct::new('+', Spacing::Alone)), + TokenTree::Group(Group::new( + Delimiter::None, + TokenStream::from_iter(vec![ + TokenTree::Literal(Literal::i32_suffixed(2)), + TokenTree::Punct(Punct::new('+', Spacing::Alone)), + TokenTree::Literal(Literal::i32_suffixed(3)), + ]), + )), + TokenTree::Punct(Punct::new('*', Spacing::Alone)), + TokenTree::Literal(Literal::i32_suffixed(4)), + ]); + + assert_eq!(tokens.to_string(), "1i32 + 2i32 + 3i32 * 4i32"); + + snapshot!(tokens as Expr, @r###" + Expr::Binary { + left: Expr::Lit { + lit: 1i32, + }, + op: BinOp::Add, + right: Expr::Binary { + left: Expr::Group { + expr: Expr::Binary { + left: Expr::Lit { + lit: 2i32, + }, + op: BinOp::Add, + right: Expr::Lit { + lit: 3i32, + }, + }, + }, + op: BinOp::Mul, + right: Expr::Lit { + lit: 4i32, + }, + }, + } + "###); +} diff --git a/vendor/syn/tests/test_ident.rs b/vendor/syn/tests/test_ident.rs new file mode 100644 index 0000000..ee01bfc --- /dev/null +++ b/vendor/syn/tests/test_ident.rs @@ -0,0 +1,85 @@ +use proc_macro2::{Ident, Span, TokenStream}; +use std::str::FromStr; +use syn::Result; + +fn parse(s: &str) -> Result<Ident> { + syn::parse2(TokenStream::from_str(s).unwrap()) +} + +fn new(s: &str) -> Ident { + Ident::new(s, Span::call_site()) +} + +#[test] +fn ident_parse() { + parse("String").unwrap(); +} + +#[test] +fn ident_parse_keyword() { + parse("abstract").unwrap_err(); +} + +#[test] +fn ident_parse_empty() { + parse("").unwrap_err(); +} + +#[test] +fn ident_parse_lifetime() { + parse("'static").unwrap_err(); +} + +#[test] +fn ident_parse_underscore() { + parse("_").unwrap_err(); +} + +#[test] +fn ident_parse_number() { + parse("255").unwrap_err(); +} + +#[test] +fn ident_parse_invalid() { + parse("a#").unwrap_err(); +} + +#[test] +fn ident_new() { + new("String"); +} + +#[test] +fn ident_new_keyword() { + new("abstract"); +} + +#[test] +#[should_panic(expected = "use Option<Ident>")] +fn ident_new_empty() { + new(""); +} + +#[test] +#[should_panic(expected = "not a valid Ident")] +fn ident_new_lifetime() { + new("'static"); +} + +#[test] +fn ident_new_underscore() { + new("_"); +} + +#[test] +#[should_panic(expected = "use Literal instead")] +fn ident_new_number() { + new("255"); +} + +#[test] +#[should_panic(expected = "\"a#\" is not a valid Ident")] +fn ident_new_invalid() { + new("a#"); +} diff --git a/vendor/syn/tests/test_item.rs b/vendor/syn/tests/test_item.rs new file mode 100644 index 0000000..db9e3ab --- /dev/null +++ b/vendor/syn/tests/test_item.rs @@ -0,0 +1,332 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, Ident, Span, TokenStream, TokenTree}; +use quote::quote; +use syn::{Item, ItemTrait}; + +#[test] +fn test_macro_variable_attr() { + // mimics the token stream corresponding to `$attr fn f() {}` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Group(Group::new(Delimiter::None, quote! { #[test] })), + TokenTree::Ident(Ident::new("fn", Span::call_site())), + TokenTree::Ident(Ident::new("f", Span::call_site())), + TokenTree::Group(Group::new(Delimiter::Parenthesis, TokenStream::new())), + TokenTree::Group(Group::new(Delimiter::Brace, TokenStream::new())), + ]); + + snapshot!(tokens as Item, @r###" + Item::Fn { + attrs: [ + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "test", + }, + ], + }, + }, + ], + vis: Visibility::Inherited, + sig: Signature { + ident: "f", + generics: Generics, + output: ReturnType::Default, + }, + block: Block { + stmts: [], + }, + } + "###); +} + +#[test] +fn test_negative_impl() { + // Rustc parses all of the following. + + #[cfg(any())] + impl ! {} + let tokens = quote! { + impl ! {} + }; + snapshot!(tokens as Item, @r###" + Item::Impl { + generics: Generics, + self_ty: Type::Never, + } + "###); + + #[cfg(any())] + #[rustfmt::skip] + impl !Trait {} + let tokens = quote! { + impl !Trait {} + }; + snapshot!(tokens as Item, @r###" + Item::Impl { + generics: Generics, + self_ty: Type::Verbatim(`! Trait`), + } + "###); + + #[cfg(any())] + impl !Trait for T {} + let tokens = quote! { + impl !Trait for T {} + }; + snapshot!(tokens as Item, @r###" + Item::Impl { + generics: Generics, + trait_: Some(( + Some, + Path { + segments: [ + PathSegment { + ident: "Trait", + }, + ], + }, + )), + self_ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "T", + }, + ], + }, + }, + } + "###); + + #[cfg(any())] + #[rustfmt::skip] + impl !! {} + let tokens = quote! { + impl !! {} + }; + snapshot!(tokens as Item, @r###" + Item::Impl { + generics: Generics, + self_ty: Type::Verbatim(`! !`), + } + "###); +} + +#[test] +fn test_macro_variable_impl() { + // mimics the token stream corresponding to `impl $trait for $ty {}` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Ident(Ident::new("impl", Span::call_site())), + TokenTree::Group(Group::new(Delimiter::None, quote!(Trait))), + TokenTree::Ident(Ident::new("for", Span::call_site())), + TokenTree::Group(Group::new(Delimiter::None, quote!(Type))), + TokenTree::Group(Group::new(Delimiter::Brace, TokenStream::new())), + ]); + + snapshot!(tokens as Item, @r###" + Item::Impl { + generics: Generics, + trait_: Some(( + None, + Path { + segments: [ + PathSegment { + ident: "Trait", + }, + ], + }, + )), + self_ty: Type::Group { + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Type", + }, + ], + }, + }, + }, + } + "###); +} + +#[test] +fn test_supertraits() { + // Rustc parses all of the following. + + #[rustfmt::skip] + let tokens = quote!(trait Trait where {}); + snapshot!(tokens as ItemTrait, @r###" + ItemTrait { + vis: Visibility::Inherited, + ident: "Trait", + generics: Generics { + where_clause: Some(WhereClause), + }, + } + "###); + + #[rustfmt::skip] + let tokens = quote!(trait Trait: where {}); + snapshot!(tokens as ItemTrait, @r###" + ItemTrait { + vis: Visibility::Inherited, + ident: "Trait", + generics: Generics { + where_clause: Some(WhereClause), + }, + colon_token: Some, + } + "###); + + #[rustfmt::skip] + let tokens = quote!(trait Trait: Sized where {}); + snapshot!(tokens as ItemTrait, @r###" + ItemTrait { + vis: Visibility::Inherited, + ident: "Trait", + generics: Generics { + where_clause: Some(WhereClause), + }, + colon_token: Some, + supertraits: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Sized", + }, + ], + }, + }), + ], + } + "###); + + #[rustfmt::skip] + let tokens = quote!(trait Trait: Sized + where {}); + snapshot!(tokens as ItemTrait, @r###" + ItemTrait { + vis: Visibility::Inherited, + ident: "Trait", + generics: Generics { + where_clause: Some(WhereClause), + }, + colon_token: Some, + supertraits: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Sized", + }, + ], + }, + }), + Token![+], + ], + } + "###); +} + +#[test] +fn test_type_empty_bounds() { + #[rustfmt::skip] + let tokens = quote! { + trait Foo { + type Bar: ; + } + }; + + snapshot!(tokens as ItemTrait, @r###" + ItemTrait { + vis: Visibility::Inherited, + ident: "Foo", + generics: Generics, + items: [ + TraitItem::Type { + ident: "Bar", + generics: Generics, + colon_token: Some, + }, + ], + } + "###); +} + +#[test] +fn test_impl_visibility() { + let tokens = quote! { + pub default unsafe impl union {} + }; + + snapshot!(tokens as Item, @"Item::Verbatim(`pub default unsafe impl union { }`)"); +} + +#[test] +fn test_impl_type_parameter_defaults() { + #[cfg(any())] + impl<T = ()> () {} + let tokens = quote! { + impl<T = ()> () {} + }; + snapshot!(tokens as Item, @r###" + Item::Impl { + generics: Generics { + lt_token: Some, + params: [ + GenericParam::Type(TypeParam { + ident: "T", + eq_token: Some, + default: Some(Type::Tuple), + }), + ], + gt_token: Some, + }, + self_ty: Type::Tuple, + } + "###); +} + +#[test] +fn test_impl_trait_trailing_plus() { + let tokens = quote! { + fn f() -> impl Sized + {} + }; + + snapshot!(tokens as Item, @r###" + Item::Fn { + vis: Visibility::Inherited, + sig: Signature { + ident: "f", + generics: Generics, + output: ReturnType::Type( + Type::ImplTrait { + bounds: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Sized", + }, + ], + }, + }), + Token![+], + ], + }, + ), + }, + block: Block { + stmts: [], + }, + } + "###); +} diff --git a/vendor/syn/tests/test_iterators.rs b/vendor/syn/tests/test_iterators.rs new file mode 100644 index 0000000..5f0eff5 --- /dev/null +++ b/vendor/syn/tests/test_iterators.rs @@ -0,0 +1,70 @@ +#![allow(clippy::uninlined_format_args)] + +use syn::punctuated::{Pair, Punctuated}; +use syn::Token; + +#[macro_use] +mod macros; + +macro_rules! check_exact_size_iterator { + ($iter:expr) => {{ + let iter = $iter; + let size_hint = iter.size_hint(); + let len = iter.len(); + let count = iter.count(); + assert_eq!(len, count); + assert_eq!(size_hint, (count, Some(count))); + }}; +} + +#[test] +fn pairs() { + let mut p: Punctuated<_, Token![,]> = punctuated!(2, 3, 4); + + check_exact_size_iterator!(p.pairs()); + check_exact_size_iterator!(p.pairs_mut()); + check_exact_size_iterator!(p.into_pairs()); + + let mut p: Punctuated<_, Token![,]> = punctuated!(2, 3, 4); + + assert_eq!(p.pairs().next_back().map(Pair::into_value), Some(&4)); + assert_eq!( + p.pairs_mut().next_back().map(Pair::into_value), + Some(&mut 4) + ); + assert_eq!(p.into_pairs().next_back().map(Pair::into_value), Some(4)); +} + +#[test] +fn iter() { + let mut p: Punctuated<_, Token![,]> = punctuated!(2, 3, 4); + + check_exact_size_iterator!(p.iter()); + check_exact_size_iterator!(p.iter_mut()); + check_exact_size_iterator!(p.into_iter()); + + let mut p: Punctuated<_, Token![,]> = punctuated!(2, 3, 4); + + assert_eq!(p.iter().next_back(), Some(&4)); + assert_eq!(p.iter_mut().next_back(), Some(&mut 4)); + assert_eq!(p.into_iter().next_back(), Some(4)); +} + +#[test] +fn may_dangle() { + let p: Punctuated<_, Token![,]> = punctuated!(2, 3, 4); + for element in &p { + if *element == 2 { + drop(p); + break; + } + } + + let mut p: Punctuated<_, Token![,]> = punctuated!(2, 3, 4); + for element in &mut p { + if *element == 2 { + drop(p); + break; + } + } +} diff --git a/vendor/syn/tests/test_lit.rs b/vendor/syn/tests/test_lit.rs new file mode 100644 index 0000000..bc50136 --- /dev/null +++ b/vendor/syn/tests/test_lit.rs @@ -0,0 +1,273 @@ +#![allow( + clippy::float_cmp, + clippy::non_ascii_literal, + clippy::single_match_else, + clippy::uninlined_format_args +)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, Literal, Span, TokenStream, TokenTree}; +use quote::ToTokens; +use std::str::FromStr; +use syn::{Lit, LitFloat, LitInt, LitStr}; + +fn lit(s: &str) -> Lit { + let mut tokens = TokenStream::from_str(s).unwrap().into_iter(); + match tokens.next().unwrap() { + TokenTree::Literal(lit) => { + assert!(tokens.next().is_none()); + Lit::new(lit) + } + wrong => panic!("{:?}", wrong), + } +} + +#[test] +fn strings() { + fn test_string(s: &str, value: &str) { + match lit(s) { + Lit::Str(lit) => { + assert_eq!(lit.value(), value); + let again = lit.into_token_stream().to_string(); + if again != s { + test_string(&again, value); + } + } + wrong => panic!("{:?}", wrong), + } + } + + test_string("\"a\"", "a"); + test_string("\"\\n\"", "\n"); + test_string("\"\\r\"", "\r"); + test_string("\"\\t\"", "\t"); + test_string("\"🐕\"", "🐕"); // NOTE: This is an emoji + test_string("\"\\\"\"", "\""); + test_string("\"'\"", "'"); + test_string("\"\"", ""); + test_string("\"\\u{1F415}\"", "\u{1F415}"); + test_string("\"\\u{1_2__3_}\"", "\u{123}"); + test_string( + "\"contains\nnewlines\\\nescaped newlines\"", + "contains\nnewlinesescaped newlines", + ); + test_string( + "\"escaped newline\\\n \x0C unsupported whitespace\"", + "escaped newline\x0C unsupported whitespace", + ); + test_string("r\"raw\nstring\\\nhere\"", "raw\nstring\\\nhere"); + test_string("\"...\"q", "..."); + test_string("r\"...\"q", "..."); + test_string("r##\"...\"##q", "..."); +} + +#[test] +fn byte_strings() { + fn test_byte_string(s: &str, value: &[u8]) { + match lit(s) { + Lit::ByteStr(lit) => { + assert_eq!(lit.value(), value); + let again = lit.into_token_stream().to_string(); + if again != s { + test_byte_string(&again, value); + } + } + wrong => panic!("{:?}", wrong), + } + } + + test_byte_string("b\"a\"", b"a"); + test_byte_string("b\"\\n\"", b"\n"); + test_byte_string("b\"\\r\"", b"\r"); + test_byte_string("b\"\\t\"", b"\t"); + test_byte_string("b\"\\\"\"", b"\""); + test_byte_string("b\"'\"", b"'"); + test_byte_string("b\"\"", b""); + test_byte_string( + "b\"contains\nnewlines\\\nescaped newlines\"", + b"contains\nnewlinesescaped newlines", + ); + test_byte_string("br\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere"); + test_byte_string("b\"...\"q", b"..."); + test_byte_string("br\"...\"q", b"..."); + test_byte_string("br##\"...\"##q", b"..."); +} + +#[test] +fn bytes() { + fn test_byte(s: &str, value: u8) { + match lit(s) { + Lit::Byte(lit) => { + assert_eq!(lit.value(), value); + let again = lit.into_token_stream().to_string(); + assert_eq!(again, s); + } + wrong => panic!("{:?}", wrong), + } + } + + test_byte("b'a'", b'a'); + test_byte("b'\\n'", b'\n'); + test_byte("b'\\r'", b'\r'); + test_byte("b'\\t'", b'\t'); + test_byte("b'\\''", b'\''); + test_byte("b'\"'", b'"'); + test_byte("b'a'q", b'a'); +} + +#[test] +fn chars() { + fn test_char(s: &str, value: char) { + match lit(s) { + Lit::Char(lit) => { + assert_eq!(lit.value(), value); + let again = lit.into_token_stream().to_string(); + if again != s { + test_char(&again, value); + } + } + wrong => panic!("{:?}", wrong), + } + } + + test_char("'a'", 'a'); + test_char("'\\n'", '\n'); + test_char("'\\r'", '\r'); + test_char("'\\t'", '\t'); + test_char("'🐕'", '🐕'); // NOTE: This is an emoji + test_char("'\\''", '\''); + test_char("'\"'", '"'); + test_char("'\\u{1F415}'", '\u{1F415}'); + test_char("'a'q", 'a'); +} + +#[test] +fn ints() { + fn test_int(s: &str, value: u64, suffix: &str) { + match lit(s) { + Lit::Int(lit) => { + assert_eq!(lit.base10_digits().parse::<u64>().unwrap(), value); + assert_eq!(lit.suffix(), suffix); + let again = lit.into_token_stream().to_string(); + if again != s { + test_int(&again, value, suffix); + } + } + wrong => panic!("{:?}", wrong), + } + } + + test_int("5", 5, ""); + test_int("5u32", 5, "u32"); + test_int("0E", 0, "E"); + test_int("0ECMA", 0, "ECMA"); + test_int("0o0A", 0, "A"); + test_int("5_0", 50, ""); + test_int("5_____0_____", 50, ""); + test_int("0x7f", 127, ""); + test_int("0x7F", 127, ""); + test_int("0b1001", 9, ""); + test_int("0o73", 59, ""); + test_int("0x7Fu8", 127, "u8"); + test_int("0b1001i8", 9, "i8"); + test_int("0o73u32", 59, "u32"); + test_int("0x__7___f_", 127, ""); + test_int("0x__7___F_", 127, ""); + test_int("0b_1_0__01", 9, ""); + test_int("0o_7__3", 59, ""); + test_int("0x_7F__u8", 127, "u8"); + test_int("0b__10__0_1i8", 9, "i8"); + test_int("0o__7__________________3u32", 59, "u32"); + test_int("0e1\u{5c5}", 0, "e1\u{5c5}"); +} + +#[test] +fn floats() { + fn test_float(s: &str, value: f64, suffix: &str) { + match lit(s) { + Lit::Float(lit) => { + assert_eq!(lit.base10_digits().parse::<f64>().unwrap(), value); + assert_eq!(lit.suffix(), suffix); + let again = lit.into_token_stream().to_string(); + if again != s { + test_float(&again, value, suffix); + } + } + wrong => panic!("{:?}", wrong), + } + } + + test_float("5.5", 5.5, ""); + test_float("5.5E12", 5.5e12, ""); + test_float("5.5e12", 5.5e12, ""); + test_float("1.0__3e-12", 1.03e-12, ""); + test_float("1.03e+12", 1.03e12, ""); + test_float("9e99e99", 9e99, "e99"); + test_float("1e_0", 1.0, ""); + test_float("0.0ECMA", 0.0, "ECMA"); +} + +#[test] +fn negative() { + let span = Span::call_site(); + assert_eq!("-1", LitInt::new("-1", span).to_string()); + assert_eq!("-1i8", LitInt::new("-1i8", span).to_string()); + assert_eq!("-1i16", LitInt::new("-1i16", span).to_string()); + assert_eq!("-1i32", LitInt::new("-1i32", span).to_string()); + assert_eq!("-1i64", LitInt::new("-1i64", span).to_string()); + assert_eq!("-1.5", LitFloat::new("-1.5", span).to_string()); + assert_eq!("-1.5f32", LitFloat::new("-1.5f32", span).to_string()); + assert_eq!("-1.5f64", LitFloat::new("-1.5f64", span).to_string()); +} + +#[test] +fn suffix() { + fn get_suffix(token: &str) -> String { + let lit = syn::parse_str::<Lit>(token).unwrap(); + match lit { + Lit::Str(lit) => lit.suffix().to_owned(), + Lit::ByteStr(lit) => lit.suffix().to_owned(), + Lit::Byte(lit) => lit.suffix().to_owned(), + Lit::Char(lit) => lit.suffix().to_owned(), + Lit::Int(lit) => lit.suffix().to_owned(), + Lit::Float(lit) => lit.suffix().to_owned(), + _ => unimplemented!(), + } + } + + assert_eq!(get_suffix("\"\"s"), "s"); + assert_eq!(get_suffix("r\"\"r"), "r"); + assert_eq!(get_suffix("b\"\"b"), "b"); + assert_eq!(get_suffix("br\"\"br"), "br"); + assert_eq!(get_suffix("r#\"\"#r"), "r"); + assert_eq!(get_suffix("'c'c"), "c"); + assert_eq!(get_suffix("b'b'b"), "b"); + assert_eq!(get_suffix("1i32"), "i32"); + assert_eq!(get_suffix("1_i32"), "i32"); + assert_eq!(get_suffix("1.0f32"), "f32"); + assert_eq!(get_suffix("1.0_f32"), "f32"); +} + +#[test] +fn test_deep_group_empty() { + let tokens = TokenStream::from_iter(vec![TokenTree::Group(Group::new( + Delimiter::None, + TokenStream::from_iter(vec![TokenTree::Group(Group::new( + Delimiter::None, + TokenStream::from_iter(vec![TokenTree::Literal(Literal::string("hi"))]), + ))]), + ))]); + + snapshot!(tokens as Lit, @r#""hi""# ); +} + +#[test] +fn test_error() { + let err = syn::parse_str::<LitStr>("...").unwrap_err(); + assert_eq!("expected string literal", err.to_string()); + + let err = syn::parse_str::<LitStr>("5").unwrap_err(); + assert_eq!("expected string literal", err.to_string()); +} diff --git a/vendor/syn/tests/test_meta.rs b/vendor/syn/tests/test_meta.rs new file mode 100644 index 0000000..d991c38 --- /dev/null +++ b/vendor/syn/tests/test_meta.rs @@ -0,0 +1,154 @@ +#![allow( + clippy::shadow_unrelated, + clippy::too_many_lines, + clippy::uninlined_format_args +)] + +#[macro_use] +mod macros; + +use syn::{Meta, MetaList, MetaNameValue}; + +#[test] +fn test_parse_meta_item_word() { + let input = "hello"; + + snapshot!(input as Meta, @r###" + Meta::Path { + segments: [ + PathSegment { + ident: "hello", + }, + ], + } + "###); +} + +#[test] +fn test_parse_meta_name_value() { + let input = "foo = 5"; + let (inner, meta) = (input, input); + + snapshot!(inner as MetaNameValue, @r###" + MetaNameValue { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + value: Expr::Lit { + lit: 5, + }, + } + "###); + + snapshot!(meta as Meta, @r###" + Meta::NameValue { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + value: Expr::Lit { + lit: 5, + }, + } + "###); + + assert_eq!(meta, inner.into()); +} + +#[test] +fn test_parse_meta_item_list_lit() { + let input = "foo(5)"; + let (inner, meta) = (input, input); + + snapshot!(inner as MetaList, @r###" + MetaList { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`5`), + } + "###); + + snapshot!(meta as Meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`5`), + } + "###); + + assert_eq!(meta, inner.into()); +} + +#[test] +fn test_parse_meta_item_multiple() { + let input = "foo(word, name = 5, list(name2 = 6), word2)"; + let (inner, meta) = (input, input); + + snapshot!(inner as MetaList, @r###" + MetaList { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`word , name = 5 , list (name2 = 6) , word2`), + } + "###); + + snapshot!(meta as Meta, @r###" + Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "foo", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`word , name = 5 , list (name2 = 6) , word2`), + } + "###); + + assert_eq!(meta, inner.into()); +} + +#[test] +fn test_parse_path() { + let input = "::serde::Serialize"; + snapshot!(input as Meta, @r###" + Meta::Path { + leading_colon: Some, + segments: [ + PathSegment { + ident: "serde", + }, + Token![::], + PathSegment { + ident: "Serialize", + }, + ], + } + "###); +} diff --git a/vendor/syn/tests/test_parse_buffer.rs b/vendor/syn/tests/test_parse_buffer.rs new file mode 100644 index 0000000..2205b50 --- /dev/null +++ b/vendor/syn/tests/test_parse_buffer.rs @@ -0,0 +1,92 @@ +#![allow(clippy::non_ascii_literal)] + +use proc_macro2::{Delimiter, Group, Punct, Spacing, TokenStream, TokenTree}; +use syn::parse::discouraged::Speculative as _; +use syn::parse::{Parse, ParseStream, Parser, Result}; +use syn::{parenthesized, Token}; + +#[test] +#[should_panic(expected = "Fork was not derived from the advancing parse stream")] +fn smuggled_speculative_cursor_between_sources() { + struct BreakRules; + impl Parse for BreakRules { + fn parse(input1: ParseStream) -> Result<Self> { + let nested = |input2: ParseStream| { + input1.advance_to(input2); + Ok(Self) + }; + nested.parse_str("") + } + } + + syn::parse_str::<BreakRules>("").unwrap(); +} + +#[test] +#[should_panic(expected = "Fork was not derived from the advancing parse stream")] +fn smuggled_speculative_cursor_between_brackets() { + struct BreakRules; + impl Parse for BreakRules { + fn parse(input: ParseStream) -> Result<Self> { + let a; + let b; + parenthesized!(a in input); + parenthesized!(b in input); + a.advance_to(&b); + Ok(Self) + } + } + + syn::parse_str::<BreakRules>("()()").unwrap(); +} + +#[test] +#[should_panic(expected = "Fork was not derived from the advancing parse stream")] +fn smuggled_speculative_cursor_into_brackets() { + struct BreakRules; + impl Parse for BreakRules { + fn parse(input: ParseStream) -> Result<Self> { + let a; + parenthesized!(a in input); + input.advance_to(&a); + Ok(Self) + } + } + + syn::parse_str::<BreakRules>("()").unwrap(); +} + +#[test] +fn trailing_empty_none_group() { + fn parse(input: ParseStream) -> Result<()> { + input.parse::<Token![+]>()?; + + let content; + parenthesized!(content in input); + content.parse::<Token![+]>()?; + + Ok(()) + } + + // `+ ( + <Ø Ø> ) <Ø <Ø Ø> Ø>` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Punct(Punct::new('+', Spacing::Alone)), + TokenTree::Group(Group::new( + Delimiter::Parenthesis, + TokenStream::from_iter(vec![ + TokenTree::Punct(Punct::new('+', Spacing::Alone)), + TokenTree::Group(Group::new(Delimiter::None, TokenStream::new())), + ]), + )), + TokenTree::Group(Group::new(Delimiter::None, TokenStream::new())), + TokenTree::Group(Group::new( + Delimiter::None, + TokenStream::from_iter(vec![TokenTree::Group(Group::new( + Delimiter::None, + TokenStream::new(), + ))]), + )), + ]); + + parse.parse2(tokens).unwrap(); +} diff --git a/vendor/syn/tests/test_parse_quote.rs b/vendor/syn/tests/test_parse_quote.rs new file mode 100644 index 0000000..73aae70 --- /dev/null +++ b/vendor/syn/tests/test_parse_quote.rs @@ -0,0 +1,164 @@ +#[macro_use] +mod macros; + +use syn::punctuated::Punctuated; +use syn::{parse_quote, Attribute, Field, Lit, Pat, Stmt, Token}; + +#[test] +fn test_attribute() { + let attr: Attribute = parse_quote!(#[test]); + snapshot!(attr, @r###" + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "test", + }, + ], + }, + } + "###); + + let attr: Attribute = parse_quote!(#![no_std]); + snapshot!(attr, @r###" + Attribute { + style: AttrStyle::Inner, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "no_std", + }, + ], + }, + } + "###); +} + +#[test] +fn test_field() { + let field: Field = parse_quote!(pub enabled: bool); + snapshot!(field, @r###" + Field { + vis: Visibility::Public, + ident: Some("enabled"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "bool", + }, + ], + }, + }, + } + "###); + + let field: Field = parse_quote!(primitive::bool); + snapshot!(field, @r###" + Field { + vis: Visibility::Inherited, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "primitive", + }, + Token![::], + PathSegment { + ident: "bool", + }, + ], + }, + }, + } + "###); +} + +#[test] +fn test_pat() { + let pat: Pat = parse_quote!(Some(false) | None); + snapshot!(&pat, @r###" + Pat::Or { + cases: [ + Pat::TupleStruct { + path: Path { + segments: [ + PathSegment { + ident: "Some", + }, + ], + }, + elems: [ + Pat::Lit(ExprLit { + lit: Lit::Bool { + value: false, + }, + }), + ], + }, + Token![|], + Pat::Ident { + ident: "None", + }, + ], + } + "###); + + let boxed_pat: Box<Pat> = parse_quote!(Some(false) | None); + assert_eq!(*boxed_pat, pat); +} + +#[test] +fn test_punctuated() { + let punctuated: Punctuated<Lit, Token![|]> = parse_quote!(true | true); + snapshot!(punctuated, @r###" + [ + Lit::Bool { + value: true, + }, + Token![|], + Lit::Bool { + value: true, + }, + ] + "###); + + let punctuated: Punctuated<Lit, Token![|]> = parse_quote!(true | true |); + snapshot!(punctuated, @r###" + [ + Lit::Bool { + value: true, + }, + Token![|], + Lit::Bool { + value: true, + }, + Token![|], + ] + "###); +} + +#[test] +fn test_vec_stmt() { + let stmts: Vec<Stmt> = parse_quote! { + let _; + true + }; + snapshot!(stmts, @r###" + [ + Stmt::Local { + pat: Pat::Wild, + }, + Stmt::Expr( + Expr::Lit { + lit: Lit::Bool { + value: true, + }, + }, + None, + ), + ] + "###); +} diff --git a/vendor/syn/tests/test_parse_stream.rs b/vendor/syn/tests/test_parse_stream.rs new file mode 100644 index 0000000..6e4a5a5 --- /dev/null +++ b/vendor/syn/tests/test_parse_stream.rs @@ -0,0 +1,14 @@ +#![allow(clippy::let_underscore_untyped)] + +use syn::ext::IdentExt as _; +use syn::parse::ParseStream; +use syn::{Ident, Token}; + +#[test] +fn test_peek() { + let _ = |input: ParseStream| { + let _ = input.peek(Ident); + let _ = input.peek(Ident::peek_any); + let _ = input.peek(Token![::]); + }; +} diff --git a/vendor/syn/tests/test_pat.rs b/vendor/syn/tests/test_pat.rs new file mode 100644 index 0000000..7b5f8b0 --- /dev/null +++ b/vendor/syn/tests/test_pat.rs @@ -0,0 +1,152 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, TokenStream, TokenTree}; +use quote::{quote, ToTokens as _}; +use syn::parse::Parser; +use syn::punctuated::Punctuated; +use syn::{parse_quote, token, Item, Pat, PatTuple, Stmt, Token}; + +#[test] +fn test_pat_ident() { + match Pat::parse_single.parse2(quote!(self)).unwrap() { + Pat::Ident(_) => (), + value => panic!("expected PatIdent, got {:?}", value), + } +} + +#[test] +fn test_pat_path() { + match Pat::parse_single.parse2(quote!(self::CONST)).unwrap() { + Pat::Path(_) => (), + value => panic!("expected PatPath, got {:?}", value), + } +} + +#[test] +fn test_leading_vert() { + // https://github.com/rust-lang/rust/blob/1.43.0/src/test/ui/or-patterns/remove-leading-vert.rs + + syn::parse_str::<Item>("fn f() {}").unwrap(); + syn::parse_str::<Item>("fn fun1(| A: E) {}").unwrap_err(); + syn::parse_str::<Item>("fn fun2(|| A: E) {}").unwrap_err(); + + syn::parse_str::<Stmt>("let | () = ();").unwrap_err(); + syn::parse_str::<Stmt>("let (| A): E;").unwrap(); + syn::parse_str::<Stmt>("let (|| A): (E);").unwrap_err(); + syn::parse_str::<Stmt>("let (| A,): (E,);").unwrap(); + syn::parse_str::<Stmt>("let [| A]: [E; 1];").unwrap(); + syn::parse_str::<Stmt>("let [|| A]: [E; 1];").unwrap_err(); + syn::parse_str::<Stmt>("let TS(| A): TS;").unwrap(); + syn::parse_str::<Stmt>("let TS(|| A): TS;").unwrap_err(); + syn::parse_str::<Stmt>("let NS { f: | A }: NS;").unwrap(); + syn::parse_str::<Stmt>("let NS { f: || A }: NS;").unwrap_err(); +} + +#[test] +fn test_group() { + let group = Group::new(Delimiter::None, quote!(Some(_))); + let tokens = TokenStream::from_iter(vec![TokenTree::Group(group)]); + let pat = Pat::parse_single.parse2(tokens).unwrap(); + + snapshot!(pat, @r###" + Pat::TupleStruct { + path: Path { + segments: [ + PathSegment { + ident: "Some", + }, + ], + }, + elems: [ + Pat::Wild, + ], + } + "###); +} + +#[test] +fn test_ranges() { + Pat::parse_single.parse_str("..").unwrap(); + Pat::parse_single.parse_str("..hi").unwrap(); + Pat::parse_single.parse_str("lo..").unwrap(); + Pat::parse_single.parse_str("lo..hi").unwrap(); + + Pat::parse_single.parse_str("..=").unwrap_err(); + Pat::parse_single.parse_str("..=hi").unwrap(); + Pat::parse_single.parse_str("lo..=").unwrap_err(); + Pat::parse_single.parse_str("lo..=hi").unwrap(); + + Pat::parse_single.parse_str("...").unwrap_err(); + Pat::parse_single.parse_str("...hi").unwrap_err(); + Pat::parse_single.parse_str("lo...").unwrap_err(); + Pat::parse_single.parse_str("lo...hi").unwrap(); + + Pat::parse_single.parse_str("[lo..]").unwrap_err(); + Pat::parse_single.parse_str("[..=hi]").unwrap_err(); + Pat::parse_single.parse_str("[(lo..)]").unwrap(); + Pat::parse_single.parse_str("[(..=hi)]").unwrap(); + Pat::parse_single.parse_str("[lo..=hi]").unwrap(); + + Pat::parse_single.parse_str("[_, lo.., _]").unwrap_err(); + Pat::parse_single.parse_str("[_, ..=hi, _]").unwrap_err(); + Pat::parse_single.parse_str("[_, (lo..), _]").unwrap(); + Pat::parse_single.parse_str("[_, (..=hi), _]").unwrap(); + Pat::parse_single.parse_str("[_, lo..=hi, _]").unwrap(); +} + +#[test] +fn test_tuple_comma() { + let mut expr = PatTuple { + attrs: Vec::new(), + paren_token: token::Paren::default(), + elems: Punctuated::new(), + }; + snapshot!(expr.to_token_stream() as Pat, @"Pat::Tuple"); + + expr.elems.push_value(parse_quote!(_)); + // Must not parse to Pat::Paren + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + ], + } + "###); + + expr.elems.push_value(parse_quote!(_)); + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + Pat::Wild, + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + Pat::Wild, + Token![,], + ], + } + "###); +} diff --git a/vendor/syn/tests/test_path.rs b/vendor/syn/tests/test_path.rs new file mode 100644 index 0000000..2873441 --- /dev/null +++ b/vendor/syn/tests/test_path.rs @@ -0,0 +1,130 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree}; +use quote::{quote, ToTokens}; +use syn::{parse_quote, Expr, Type, TypePath}; + +#[test] +fn parse_interpolated_leading_component() { + // mimics the token stream corresponding to `$mod::rest` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Group(Group::new(Delimiter::None, quote! { first })), + TokenTree::Punct(Punct::new(':', Spacing::Joint)), + TokenTree::Punct(Punct::new(':', Spacing::Alone)), + TokenTree::Ident(Ident::new("rest", Span::call_site())), + ]); + + snapshot!(tokens.clone() as Expr, @r###" + Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "first", + }, + Token![::], + PathSegment { + ident: "rest", + }, + ], + }, + } + "###); + + snapshot!(tokens as Type, @r###" + Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "first", + }, + Token![::], + PathSegment { + ident: "rest", + }, + ], + }, + } + "###); +} + +#[test] +fn print_incomplete_qpath() { + // qpath with `as` token + let mut ty: TypePath = parse_quote!(<Self as A>::Q); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`< Self as A > :: Q`) + "###); + assert!(ty.path.segments.pop().is_some()); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`< Self as A > ::`) + "###); + assert!(ty.path.segments.pop().is_some()); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`< Self >`) + "###); + assert!(ty.path.segments.pop().is_none()); + + // qpath without `as` token + let mut ty: TypePath = parse_quote!(<Self>::A::B); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`< Self > :: A :: B`) + "###); + assert!(ty.path.segments.pop().is_some()); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`< Self > :: A ::`) + "###); + assert!(ty.path.segments.pop().is_some()); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`< Self > ::`) + "###); + assert!(ty.path.segments.pop().is_none()); + + // normal path + let mut ty: TypePath = parse_quote!(Self::A::B); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`Self :: A :: B`) + "###); + assert!(ty.path.segments.pop().is_some()); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`Self :: A ::`) + "###); + assert!(ty.path.segments.pop().is_some()); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(`Self ::`) + "###); + assert!(ty.path.segments.pop().is_some()); + snapshot!(ty.to_token_stream(), @r###" + TokenStream(``) + "###); + assert!(ty.path.segments.pop().is_none()); +} + +#[test] +fn parse_parenthesized_path_arguments_with_disambiguator() { + #[rustfmt::skip] + let tokens = quote!(dyn FnOnce::() -> !); + snapshot!(tokens as Type, @r###" + Type::TraitObject { + dyn_token: Some, + bounds: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "FnOnce", + arguments: PathArguments::Parenthesized { + output: ReturnType::Type( + Type::Never, + ), + }, + }, + ], + }, + }), + ], + } + "###); +} diff --git a/vendor/syn/tests/test_precedence.rs b/vendor/syn/tests/test_precedence.rs new file mode 100644 index 0000000..026bece --- /dev/null +++ b/vendor/syn/tests/test_precedence.rs @@ -0,0 +1,548 @@ +//! This test does the following for every file in the rust-lang/rust repo: +//! +//! 1. Parse the file using syn into a syn::File. +//! 2. Extract every syn::Expr from the file. +//! 3. Print each expr to a string of source code. +//! 4. Parse the source code using librustc_parse into a rustc_ast::Expr. +//! 5. For both the syn::Expr and rustc_ast::Expr, crawl the syntax tree to +//! insert parentheses surrounding every subexpression. +//! 6. Serialize the fully parenthesized syn::Expr to a string of source code. +//! 7. Parse the fully parenthesized source code using librustc_parse. +//! 8. Compare the rustc_ast::Expr resulting from parenthesizing using rustc +//! data structures vs syn data structures, ignoring spans. If they agree, +//! rustc's parser and syn's parser have identical handling of expression +//! precedence. + +#![cfg(not(syn_disable_nightly_tests))] +#![cfg(not(miri))] +#![recursion_limit = "1024"] +#![feature(rustc_private)] +#![allow( + clippy::blocks_in_conditions, + clippy::doc_markdown, + clippy::explicit_deref_methods, + clippy::let_underscore_untyped, + clippy::manual_assert, + clippy::manual_let_else, + clippy::match_like_matches_macro, + clippy::match_wildcard_for_single_variants, + clippy::too_many_lines, + clippy::uninlined_format_args +)] + +extern crate rustc_ast; +extern crate rustc_ast_pretty; +extern crate rustc_data_structures; +extern crate rustc_driver; +extern crate rustc_span; +extern crate smallvec; +extern crate thin_vec; + +use crate::common::eq::SpanlessEq; +use crate::common::parse; +use quote::ToTokens; +use rustc_ast::ast; +use rustc_ast::ptr::P; +use rustc_ast_pretty::pprust; +use rustc_span::edition::Edition; +use std::fs; +use std::path::Path; +use std::process; +use std::sync::atomic::{AtomicUsize, Ordering}; + +#[macro_use] +mod macros; + +#[allow(dead_code)] +mod common; + +mod repo; + +#[test] +fn test_rustc_precedence() { + common::rayon_init(); + repo::clone_rust(); + let abort_after = common::abort_after(); + if abort_after == 0 { + panic!("Skipping all precedence tests"); + } + + let passed = AtomicUsize::new(0); + let failed = AtomicUsize::new(0); + + repo::for_each_rust_file(|path| { + let content = fs::read_to_string(path).unwrap(); + + let (l_passed, l_failed) = match syn::parse_file(&content) { + Ok(file) => { + let edition = repo::edition(path).parse().unwrap(); + let exprs = collect_exprs(file); + let (l_passed, l_failed) = test_expressions(path, edition, exprs); + errorf!( + "=== {}: {} passed | {} failed\n", + path.display(), + l_passed, + l_failed, + ); + (l_passed, l_failed) + } + Err(msg) => { + errorf!("\nFAIL {} - syn failed to parse: {}\n", path.display(), msg); + (0, 1) + } + }; + + passed.fetch_add(l_passed, Ordering::Relaxed); + let prev_failed = failed.fetch_add(l_failed, Ordering::Relaxed); + + if prev_failed + l_failed >= abort_after { + process::exit(1); + } + }); + + let passed = passed.load(Ordering::Relaxed); + let failed = failed.load(Ordering::Relaxed); + + errorf!("\n===== Precedence Test Results =====\n"); + errorf!("{} passed | {} failed\n", passed, failed); + + if failed > 0 { + panic!("{} failures", failed); + } +} + +fn test_expressions(path: &Path, edition: Edition, exprs: Vec<syn::Expr>) -> (usize, usize) { + let mut passed = 0; + let mut failed = 0; + + rustc_span::create_session_if_not_set_then(edition, |_| { + for expr in exprs { + let source_code = expr.to_token_stream().to_string(); + let librustc_ast = if let Some(e) = librustc_parse_and_rewrite(&source_code) { + e + } else { + failed += 1; + errorf!( + "\nFAIL {} - librustc failed to parse original\n", + path.display(), + ); + continue; + }; + + let syn_parenthesized_code = + syn_parenthesize(expr.clone()).to_token_stream().to_string(); + let syn_ast = if let Some(e) = parse::librustc_expr(&syn_parenthesized_code) { + e + } else { + failed += 1; + errorf!( + "\nFAIL {} - librustc failed to parse parenthesized\n", + path.display(), + ); + continue; + }; + + if !SpanlessEq::eq(&syn_ast, &librustc_ast) { + failed += 1; + let syn_pretty = pprust::expr_to_string(&syn_ast); + let librustc_pretty = pprust::expr_to_string(&librustc_ast); + errorf!( + "\nFAIL {}\n{}\nsyn != rustc\n{}\n", + path.display(), + syn_pretty, + librustc_pretty, + ); + continue; + } + + let expr_invisible = make_parens_invisible(expr); + let Ok(reparsed_expr_invisible) = syn::parse2(expr_invisible.to_token_stream()) else { + failed += 1; + errorf!( + "\nFAIL {} - syn failed to parse invisible delimiters\n{}\n", + path.display(), + source_code, + ); + continue; + }; + if expr_invisible != reparsed_expr_invisible { + failed += 1; + errorf!( + "\nFAIL {} - mismatch after parsing invisible delimiters\n{}\n", + path.display(), + source_code, + ); + continue; + } + + passed += 1; + } + }); + + (passed, failed) +} + +fn librustc_parse_and_rewrite(input: &str) -> Option<P<ast::Expr>> { + parse::librustc_expr(input).map(librustc_parenthesize) +} + +fn librustc_parenthesize(mut librustc_expr: P<ast::Expr>) -> P<ast::Expr> { + use rustc_ast::ast::{ + AssocItem, AssocItemKind, Attribute, BinOpKind, Block, BorrowKind, BoundConstness, Expr, + ExprField, ExprKind, GenericArg, GenericBound, ItemKind, Local, LocalKind, Pat, Stmt, + StmtKind, StructExpr, StructRest, TraitBoundModifiers, Ty, + }; + use rustc_ast::mut_visit::{ + noop_flat_map_assoc_item, noop_visit_generic_arg, noop_visit_item_kind, noop_visit_local, + noop_visit_param_bound, MutVisitor, + }; + use rustc_data_structures::flat_map_in_place::FlatMapInPlace; + use rustc_span::DUMMY_SP; + use smallvec::SmallVec; + use std::mem; + use std::ops::DerefMut; + use thin_vec::ThinVec; + + struct FullyParenthesize; + + fn contains_let_chain(expr: &Expr) -> bool { + match &expr.kind { + ExprKind::Let(..) => true, + ExprKind::Binary(binop, left, right) => { + binop.node == BinOpKind::And + && (contains_let_chain(left) || contains_let_chain(right)) + } + _ => false, + } + } + + fn flat_map_field<T: MutVisitor>(mut f: ExprField, vis: &mut T) -> Vec<ExprField> { + if f.is_shorthand { + noop_visit_expr(&mut f.expr, vis); + } else { + vis.visit_expr(&mut f.expr); + } + vec![f] + } + + fn flat_map_stmt<T: MutVisitor>(stmt: Stmt, vis: &mut T) -> Vec<Stmt> { + let kind = match stmt.kind { + // Don't wrap toplevel expressions in statements. + StmtKind::Expr(mut e) => { + noop_visit_expr(&mut e, vis); + StmtKind::Expr(e) + } + StmtKind::Semi(mut e) => { + noop_visit_expr(&mut e, vis); + StmtKind::Semi(e) + } + s => s, + }; + + vec![Stmt { kind, ..stmt }] + } + + fn noop_visit_expr<T: MutVisitor>(e: &mut Expr, vis: &mut T) { + use rustc_ast::mut_visit::{noop_visit_expr, visit_attrs}; + match &mut e.kind { + ExprKind::AddrOf(BorrowKind::Raw, ..) => {} + ExprKind::Struct(expr) => { + let StructExpr { + qself, + path, + fields, + rest, + } = expr.deref_mut(); + vis.visit_qself(qself); + vis.visit_path(path); + fields.flat_map_in_place(|field| flat_map_field(field, vis)); + if let StructRest::Base(rest) = rest { + vis.visit_expr(rest); + } + vis.visit_id(&mut e.id); + vis.visit_span(&mut e.span); + visit_attrs(&mut e.attrs, vis); + } + _ => noop_visit_expr(e, vis), + } + } + + impl MutVisitor for FullyParenthesize { + fn visit_expr(&mut self, e: &mut P<Expr>) { + noop_visit_expr(e, self); + match e.kind { + ExprKind::Block(..) | ExprKind::If(..) | ExprKind::Let(..) => {} + ExprKind::Binary(..) if contains_let_chain(e) => {} + _ => { + let inner = mem::replace( + e, + P(Expr { + id: ast::DUMMY_NODE_ID, + kind: ExprKind::Err, + span: DUMMY_SP, + attrs: ThinVec::new(), + tokens: None, + }), + ); + e.kind = ExprKind::Paren(inner); + } + } + } + + fn visit_generic_arg(&mut self, arg: &mut GenericArg) { + match arg { + // Don't wrap unbraced const generic arg as that's invalid syntax. + GenericArg::Const(anon_const) => { + if let ExprKind::Block(..) = &mut anon_const.value.kind { + noop_visit_expr(&mut anon_const.value, self); + } + } + _ => noop_visit_generic_arg(arg, self), + } + } + + fn visit_param_bound(&mut self, bound: &mut GenericBound) { + match bound { + GenericBound::Trait( + _, + TraitBoundModifiers { + constness: BoundConstness::Maybe(_), + .. + }, + ) => {} + _ => noop_visit_param_bound(bound, self), + } + } + + fn visit_block(&mut self, block: &mut P<Block>) { + self.visit_id(&mut block.id); + block + .stmts + .flat_map_in_place(|stmt| flat_map_stmt(stmt, self)); + self.visit_span(&mut block.span); + } + + fn visit_local(&mut self, local: &mut P<Local>) { + match local.kind { + LocalKind::InitElse(..) => {} + _ => noop_visit_local(local, self), + } + } + + fn visit_item_kind(&mut self, item: &mut ItemKind) { + match item { + ItemKind::Const(const_item) + if !const_item.generics.params.is_empty() + || !const_item.generics.where_clause.predicates.is_empty() => {} + _ => noop_visit_item_kind(item, self), + } + } + + fn flat_map_trait_item(&mut self, item: P<AssocItem>) -> SmallVec<[P<AssocItem>; 1]> { + match &item.kind { + AssocItemKind::Const(const_item) + if !const_item.generics.params.is_empty() + || !const_item.generics.where_clause.predicates.is_empty() => + { + SmallVec::from([item]) + } + _ => noop_flat_map_assoc_item(item, self), + } + } + + fn flat_map_impl_item(&mut self, item: P<AssocItem>) -> SmallVec<[P<AssocItem>; 1]> { + match &item.kind { + AssocItemKind::Const(const_item) + if !const_item.generics.params.is_empty() + || !const_item.generics.where_clause.predicates.is_empty() => + { + SmallVec::from([item]) + } + _ => noop_flat_map_assoc_item(item, self), + } + } + + // We don't want to look at expressions that might appear in patterns or + // types yet. We'll look into comparing those in the future. For now + // focus on expressions appearing in other places. + fn visit_pat(&mut self, pat: &mut P<Pat>) { + let _ = pat; + } + + fn visit_ty(&mut self, ty: &mut P<Ty>) { + let _ = ty; + } + + fn visit_attribute(&mut self, attr: &mut Attribute) { + let _ = attr; + } + } + + let mut folder = FullyParenthesize; + folder.visit_expr(&mut librustc_expr); + librustc_expr +} + +fn syn_parenthesize(syn_expr: syn::Expr) -> syn::Expr { + use syn::fold::{fold_expr, fold_generic_argument, Fold}; + use syn::{token, BinOp, Expr, ExprParen, GenericArgument, MetaNameValue, Pat, Stmt, Type}; + + struct FullyParenthesize; + + fn parenthesize(expr: Expr) -> Expr { + Expr::Paren(ExprParen { + attrs: Vec::new(), + expr: Box::new(expr), + paren_token: token::Paren::default(), + }) + } + + fn needs_paren(expr: &Expr) -> bool { + match expr { + Expr::Group(_) => unreachable!(), + Expr::If(_) | Expr::Unsafe(_) | Expr::Block(_) | Expr::Let(_) => false, + Expr::Binary(_) => !contains_let_chain(expr), + _ => true, + } + } + + fn contains_let_chain(expr: &Expr) -> bool { + match expr { + Expr::Let(_) => true, + Expr::Binary(expr) => { + matches!(expr.op, BinOp::And(_)) + && (contains_let_chain(&expr.left) || contains_let_chain(&expr.right)) + } + _ => false, + } + } + + impl Fold for FullyParenthesize { + fn fold_expr(&mut self, expr: Expr) -> Expr { + let needs_paren = needs_paren(&expr); + let folded = fold_expr(self, expr); + if needs_paren { + parenthesize(folded) + } else { + folded + } + } + + fn fold_generic_argument(&mut self, arg: GenericArgument) -> GenericArgument { + match arg { + GenericArgument::Const(arg) => GenericArgument::Const(match arg { + Expr::Block(_) => fold_expr(self, arg), + // Don't wrap unbraced const generic arg as that's invalid syntax. + _ => arg, + }), + _ => fold_generic_argument(self, arg), + } + } + + fn fold_stmt(&mut self, stmt: Stmt) -> Stmt { + match stmt { + // Don't wrap toplevel expressions in statements. + Stmt::Expr(Expr::Verbatim(_), Some(_)) => stmt, + Stmt::Expr(e, semi) => Stmt::Expr(fold_expr(self, e), semi), + s => s, + } + } + + fn fold_meta_name_value(&mut self, meta: MetaNameValue) -> MetaNameValue { + // Don't turn #[p = "..."] into #[p = ("...")]. + meta + } + + // We don't want to look at expressions that might appear in patterns or + // types yet. We'll look into comparing those in the future. For now + // focus on expressions appearing in other places. + fn fold_pat(&mut self, pat: Pat) -> Pat { + pat + } + + fn fold_type(&mut self, ty: Type) -> Type { + ty + } + } + + let mut folder = FullyParenthesize; + folder.fold_expr(syn_expr) +} + +fn make_parens_invisible(expr: syn::Expr) -> syn::Expr { + use syn::fold::{fold_expr, fold_stmt, Fold}; + use syn::{token, Expr, ExprGroup, ExprParen, Stmt}; + + struct MakeParensInvisible; + + impl Fold for MakeParensInvisible { + fn fold_expr(&mut self, mut expr: Expr) -> Expr { + if let Expr::Paren(paren) = expr { + expr = Expr::Group(ExprGroup { + attrs: paren.attrs, + group_token: token::Group(paren.paren_token.span.join()), + expr: paren.expr, + }); + } + fold_expr(self, expr) + } + + fn fold_stmt(&mut self, stmt: Stmt) -> Stmt { + if let Stmt::Expr(expr @ (Expr::Binary(_) | Expr::Cast(_)), None) = stmt { + Stmt::Expr( + Expr::Paren(ExprParen { + attrs: Vec::new(), + paren_token: token::Paren::default(), + expr: Box::new(fold_expr(self, expr)), + }), + None, + ) + } else { + fold_stmt(self, stmt) + } + } + } + + let mut folder = MakeParensInvisible; + folder.fold_expr(expr) +} + +/// Walk through a crate collecting all expressions we can find in it. +fn collect_exprs(file: syn::File) -> Vec<syn::Expr> { + use syn::fold::Fold; + use syn::punctuated::Punctuated; + use syn::{token, ConstParam, Expr, ExprTuple, Pat, Path}; + + struct CollectExprs(Vec<Expr>); + impl Fold for CollectExprs { + fn fold_expr(&mut self, expr: Expr) -> Expr { + match expr { + Expr::Verbatim(_) => {} + _ => self.0.push(expr), + } + + Expr::Tuple(ExprTuple { + attrs: vec![], + elems: Punctuated::new(), + paren_token: token::Paren::default(), + }) + } + + fn fold_pat(&mut self, pat: Pat) -> Pat { + pat + } + + fn fold_path(&mut self, path: Path) -> Path { + // Skip traversing into const generic path arguments + path + } + + fn fold_const_param(&mut self, const_param: ConstParam) -> ConstParam { + const_param + } + } + + let mut folder = CollectExprs(vec![]); + folder.fold_file(file); + folder.0 +} diff --git a/vendor/syn/tests/test_receiver.rs b/vendor/syn/tests/test_receiver.rs new file mode 100644 index 0000000..8decb55 --- /dev/null +++ b/vendor/syn/tests/test_receiver.rs @@ -0,0 +1,321 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use syn::{parse_quote, TraitItemFn}; + +#[test] +fn test_by_value() { + let TraitItemFn { sig, .. } = parse_quote! { + fn by_value(self: Self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }) + "###); +} + +#[test] +fn test_by_mut_value() { + let TraitItemFn { sig, .. } = parse_quote! { + fn by_mut(mut self: Self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + mutability: Some, + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }) + "###); +} + +#[test] +fn test_by_ref() { + let TraitItemFn { sig, .. } = parse_quote! { + fn by_ref(self: &Self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + colon_token: Some, + ty: Type::Reference { + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }, + }) + "###); +} + +#[test] +fn test_by_box() { + let TraitItemFn { sig, .. } = parse_quote! { + fn by_box(self: Box<Self>); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Box", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }), + ], + }, + }, + ], + }, + }, + }) + "###); +} + +#[test] +fn test_by_pin() { + let TraitItemFn { sig, .. } = parse_quote! { + fn by_pin(self: Pin<Self>); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Pin", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }), + ], + }, + }, + ], + }, + }, + }) + "###); +} + +#[test] +fn test_explicit_type() { + let TraitItemFn { sig, .. } = parse_quote! { + fn explicit_type(self: Pin<MyType>); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Pin", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "MyType", + }, + ], + }, + }), + ], + }, + }, + ], + }, + }, + }) + "###); +} + +#[test] +fn test_value_shorthand() { + let TraitItemFn { sig, .. } = parse_quote! { + fn value_shorthand(self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }) + "###); +} + +#[test] +fn test_mut_value_shorthand() { + let TraitItemFn { sig, .. } = parse_quote! { + fn mut_value_shorthand(mut self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + mutability: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }) + "###); +} + +#[test] +fn test_ref_shorthand() { + let TraitItemFn { sig, .. } = parse_quote! { + fn ref_shorthand(&self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + reference: Some(None), + ty: Type::Reference { + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }, + }) + "###); +} + +#[test] +fn test_ref_shorthand_with_lifetime() { + let TraitItemFn { sig, .. } = parse_quote! { + fn ref_shorthand(&'a self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + reference: Some(Some(Lifetime { + ident: "a", + })), + ty: Type::Reference { + lifetime: Some(Lifetime { + ident: "a", + }), + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }, + }) + "###); +} + +#[test] +fn test_ref_mut_shorthand() { + let TraitItemFn { sig, .. } = parse_quote! { + fn ref_mut_shorthand(&mut self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + reference: Some(None), + mutability: Some, + ty: Type::Reference { + mutability: Some, + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }, + }) + "###); +} + +#[test] +fn test_ref_mut_shorthand_with_lifetime() { + let TraitItemFn { sig, .. } = parse_quote! { + fn ref_mut_shorthand(&'a mut self); + }; + snapshot!(&sig.inputs[0], @r###" + FnArg::Receiver(Receiver { + reference: Some(Some(Lifetime { + ident: "a", + })), + mutability: Some, + ty: Type::Reference { + lifetime: Some(Lifetime { + ident: "a", + }), + mutability: Some, + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Self", + }, + ], + }, + }, + }, + }) + "###); +} diff --git a/vendor/syn/tests/test_round_trip.rs b/vendor/syn/tests/test_round_trip.rs new file mode 100644 index 0000000..a673fff --- /dev/null +++ b/vendor/syn/tests/test_round_trip.rs @@ -0,0 +1,239 @@ +#![cfg(not(syn_disable_nightly_tests))] +#![cfg(not(miri))] +#![recursion_limit = "1024"] +#![feature(rustc_private)] +#![allow( + clippy::blocks_in_conditions, + clippy::manual_assert, + clippy::manual_let_else, + clippy::match_like_matches_macro, + clippy::uninlined_format_args +)] + +extern crate rustc_ast; +extern crate rustc_ast_pretty; +extern crate rustc_data_structures; +extern crate rustc_driver; +extern crate rustc_error_messages; +extern crate rustc_errors; +extern crate rustc_expand; +extern crate rustc_parse as parse; +extern crate rustc_session; +extern crate rustc_span; + +use crate::common::eq::SpanlessEq; +use quote::quote; +use rustc_ast::ast::{ + AngleBracketedArg, AngleBracketedArgs, Crate, GenericArg, GenericParamKind, Generics, + WhereClause, +}; +use rustc_ast::mut_visit::{self, MutVisitor}; +use rustc_ast_pretty::pprust; +use rustc_error_messages::{DiagnosticMessage, LazyFallbackBundle}; +use rustc_errors::{translation, Diagnostic, PResult}; +use rustc_session::parse::ParseSess; +use rustc_span::source_map::FilePathMapping; +use rustc_span::FileName; +use std::borrow::Cow; +use std::fs; +use std::panic; +use std::path::Path; +use std::process; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Instant; + +#[macro_use] +mod macros; + +#[allow(dead_code)] +mod common; + +mod repo; + +#[test] +fn test_round_trip() { + common::rayon_init(); + repo::clone_rust(); + let abort_after = common::abort_after(); + if abort_after == 0 { + panic!("Skipping all round_trip tests"); + } + + let failed = AtomicUsize::new(0); + + repo::for_each_rust_file(|path| test(path, &failed, abort_after)); + + let failed = failed.load(Ordering::Relaxed); + if failed > 0 { + panic!("{} failures", failed); + } +} + +fn test(path: &Path, failed: &AtomicUsize, abort_after: usize) { + let content = fs::read_to_string(path).unwrap(); + + let start = Instant::now(); + let (krate, elapsed) = match syn::parse_file(&content) { + Ok(krate) => (krate, start.elapsed()), + Err(msg) => { + errorf!("=== {}: syn failed to parse\n{:?}\n", path.display(), msg); + let prev_failed = failed.fetch_add(1, Ordering::Relaxed); + if prev_failed + 1 >= abort_after { + process::exit(1); + } + return; + } + }; + let back = quote!(#krate).to_string(); + let edition = repo::edition(path).parse().unwrap(); + + rustc_span::create_session_if_not_set_then(edition, |_| { + let equal = match panic::catch_unwind(|| { + let locale_resources = rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(); + let file_path_mapping = FilePathMapping::empty(); + let sess = ParseSess::new(locale_resources, file_path_mapping); + let before = match librustc_parse(content, &sess) { + Ok(before) => before, + Err(diagnostic) => { + errorf!( + "=== {}: ignore - librustc failed to parse original content: {}\n", + path.display(), + translate_message(&diagnostic), + ); + diagnostic.cancel(); + return Err(true); + } + }; + let after = match librustc_parse(back, &sess) { + Ok(after) => after, + Err(mut diagnostic) => { + errorf!("=== {}: librustc failed to parse", path.display()); + diagnostic.emit(); + return Err(false); + } + }; + Ok((before, after)) + }) { + Err(_) => { + errorf!("=== {}: ignoring librustc panic\n", path.display()); + true + } + Ok(Err(equal)) => equal, + Ok(Ok((mut before, mut after))) => { + normalize(&mut before); + normalize(&mut after); + if SpanlessEq::eq(&before, &after) { + errorf!( + "=== {}: pass in {}ms\n", + path.display(), + elapsed.as_secs() * 1000 + u64::from(elapsed.subsec_nanos()) / 1_000_000 + ); + true + } else { + errorf!( + "=== {}: FAIL\n{}\n!=\n{}\n", + path.display(), + pprust::crate_to_string_for_macros(&before), + pprust::crate_to_string_for_macros(&after), + ); + false + } + } + }; + if !equal { + let prev_failed = failed.fetch_add(1, Ordering::Relaxed); + if prev_failed + 1 >= abort_after { + process::exit(1); + } + } + }); +} + +fn librustc_parse(content: String, sess: &ParseSess) -> PResult<Crate> { + static COUNTER: AtomicUsize = AtomicUsize::new(0); + let counter = COUNTER.fetch_add(1, Ordering::Relaxed); + let name = FileName::Custom(format!("test_round_trip{}", counter)); + parse::parse_crate_from_source_str(name, content, sess) +} + +fn translate_message(diagnostic: &Diagnostic) -> Cow<'static, str> { + thread_local! { + static FLUENT_BUNDLE: LazyFallbackBundle = { + let locale_resources = rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(); + let with_directionality_markers = false; + rustc_error_messages::fallback_fluent_bundle(locale_resources, with_directionality_markers) + }; + } + + let message = &diagnostic.messages[0].0; + let args = translation::to_fluent_args(diagnostic.args()); + + let (identifier, attr) = match message { + DiagnosticMessage::Str(msg) | DiagnosticMessage::Eager(msg) => return msg.clone(), + DiagnosticMessage::FluentIdentifier(identifier, attr) => (identifier, attr), + }; + + FLUENT_BUNDLE.with(|fluent_bundle| { + let message = fluent_bundle + .get_message(identifier) + .expect("missing diagnostic in fluent bundle"); + let value = match attr { + Some(attr) => message + .get_attribute(attr) + .expect("missing attribute in fluent message") + .value(), + None => message.value().expect("missing value in fluent message"), + }; + + let mut err = Vec::new(); + let translated = fluent_bundle.format_pattern(value, Some(&args), &mut err); + assert!(err.is_empty()); + Cow::Owned(translated.into_owned()) + }) +} + +fn normalize(krate: &mut Crate) { + struct NormalizeVisitor; + + impl MutVisitor for NormalizeVisitor { + fn visit_angle_bracketed_parameter_data(&mut self, e: &mut AngleBracketedArgs) { + #[derive(Ord, PartialOrd, Eq, PartialEq)] + enum Group { + Lifetimes, + TypesAndConsts, + Constraints, + } + e.args.sort_by_key(|arg| match arg { + AngleBracketedArg::Arg(arg) => match arg { + GenericArg::Lifetime(_) => Group::Lifetimes, + GenericArg::Type(_) | GenericArg::Const(_) => Group::TypesAndConsts, + }, + AngleBracketedArg::Constraint(_) => Group::Constraints, + }); + mut_visit::noop_visit_angle_bracketed_parameter_data(e, self); + } + + fn visit_generics(&mut self, e: &mut Generics) { + #[derive(Ord, PartialOrd, Eq, PartialEq)] + enum Group { + Lifetimes, + TypesAndConsts, + } + e.params.sort_by_key(|param| match param.kind { + GenericParamKind::Lifetime => Group::Lifetimes, + GenericParamKind::Type { .. } | GenericParamKind::Const { .. } => { + Group::TypesAndConsts + } + }); + mut_visit::noop_visit_generics(e, self); + } + + fn visit_where_clause(&mut self, e: &mut WhereClause) { + if e.predicates.is_empty() { + e.has_where_token = false; + } + } + } + + NormalizeVisitor.visit_crate(krate); +} diff --git a/vendor/syn/tests/test_shebang.rs b/vendor/syn/tests/test_shebang.rs new file mode 100644 index 0000000..8439161 --- /dev/null +++ b/vendor/syn/tests/test_shebang.rs @@ -0,0 +1,67 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +#[test] +fn test_basic() { + let content = "#!/usr/bin/env rustx\nfn main() {}"; + let file = syn::parse_file(content).unwrap(); + snapshot!(file, @r###" + File { + shebang: Some("#!/usr/bin/env rustx"), + items: [ + Item::Fn { + vis: Visibility::Inherited, + sig: Signature { + ident: "main", + generics: Generics, + output: ReturnType::Default, + }, + block: Block { + stmts: [], + }, + }, + ], + } + "###); +} + +#[test] +fn test_comment() { + let content = "#!//am/i/a/comment\n[allow(dead_code)] fn main() {}"; + let file = syn::parse_file(content).unwrap(); + snapshot!(file, @r###" + File { + attrs: [ + Attribute { + style: AttrStyle::Inner, + meta: Meta::List { + path: Path { + segments: [ + PathSegment { + ident: "allow", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`dead_code`), + }, + }, + ], + items: [ + Item::Fn { + vis: Visibility::Inherited, + sig: Signature { + ident: "main", + generics: Generics, + output: ReturnType::Default, + }, + block: Block { + stmts: [], + }, + }, + ], + } + "###); +} diff --git a/vendor/syn/tests/test_should_parse.rs b/vendor/syn/tests/test_should_parse.rs new file mode 100644 index 0000000..180d859 --- /dev/null +++ b/vendor/syn/tests/test_should_parse.rs @@ -0,0 +1,45 @@ +macro_rules! should_parse { + ($name:ident, { $($in:tt)* }) => { + #[test] + fn $name() { + // Make sure we can parse the file! + syn::parse_file(stringify!($($in)*)).unwrap(); + } + } +} + +should_parse!(generic_associated_type, { + impl Foo { + type Item = &'a i32; + fn foo<'a>(&'a self) -> Self::Item<'a> {} + } +}); + +#[rustfmt::skip] +should_parse!(const_generics_use, { + type X = Foo<5>; + type Y = Foo<"foo">; + type Z = Foo<X>; + type W = Foo<{ X + 10 }>; +}); + +should_parse!(trailing_plus_type, { + type A = Box<Foo>; + type A = Box<Foo + 'a>; + type A = Box<'a + Foo>; +}); + +should_parse!(generic_associated_type_where, { + trait Foo { + type Item; + fn foo<T>(&self, t: T) -> Self::Item<T>; + } +}); + +should_parse!(match_with_block_expr, { + fn main() { + match false { + _ => {}.a(), + } + } +}); diff --git a/vendor/syn/tests/test_size.rs b/vendor/syn/tests/test_size.rs new file mode 100644 index 0000000..d64a3ab --- /dev/null +++ b/vendor/syn/tests/test_size.rs @@ -0,0 +1,36 @@ +// Assumes proc-macro2's "span-locations" feature is off. + +#![cfg(target_pointer_width = "64")] + +use std::mem; +use syn::{Expr, Item, Lit, Pat, Type}; + +#[rustversion::attr(before(2022-11-24), ignore)] +#[test] +fn test_expr_size() { + assert_eq!(mem::size_of::<Expr>(), 176); +} + +#[rustversion::attr(before(2022-09-09), ignore)] +#[test] +fn test_item_size() { + assert_eq!(mem::size_of::<Item>(), 360); +} + +#[rustversion::attr(before(2023-04-29), ignore)] +#[test] +fn test_type_size() { + assert_eq!(mem::size_of::<Type>(), 232); +} + +#[rustversion::attr(before(2023-04-29), ignore)] +#[test] +fn test_pat_size() { + assert_eq!(mem::size_of::<Pat>(), 184); +} + +#[rustversion::attr(before(2023-12-20), ignore)] +#[test] +fn test_lit_size() { + assert_eq!(mem::size_of::<Lit>(), 24); +} diff --git a/vendor/syn/tests/test_stmt.rs b/vendor/syn/tests/test_stmt.rs new file mode 100644 index 0000000..61890a4 --- /dev/null +++ b/vendor/syn/tests/test_stmt.rs @@ -0,0 +1,322 @@ +#![allow( + clippy::assertions_on_result_states, + clippy::non_ascii_literal, + clippy::uninlined_format_args +)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, Ident, Span, TokenStream, TokenTree}; +use quote::{quote, ToTokens as _}; +use syn::parse::Parser as _; +use syn::{Block, Stmt}; + +#[test] +fn test_raw_operator() { + let stmt = syn::parse_str::<Stmt>("let _ = &raw const x;").unwrap(); + + snapshot!(stmt, @r###" + Stmt::Local { + pat: Pat::Wild, + init: Some(LocalInit { + expr: Expr::Verbatim(`& raw const x`), + }), + } + "###); +} + +#[test] +fn test_raw_variable() { + let stmt = syn::parse_str::<Stmt>("let _ = &raw;").unwrap(); + + snapshot!(stmt, @r###" + Stmt::Local { + pat: Pat::Wild, + init: Some(LocalInit { + expr: Expr::Reference { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "raw", + }, + ], + }, + }, + }, + }), + } + "###); +} + +#[test] +fn test_raw_invalid() { + assert!(syn::parse_str::<Stmt>("let _ = &raw x;").is_err()); +} + +#[test] +fn test_none_group() { + // <Ø async fn f() {} Ø> + let tokens = TokenStream::from_iter(vec![TokenTree::Group(Group::new( + Delimiter::None, + TokenStream::from_iter(vec![ + TokenTree::Ident(Ident::new("async", Span::call_site())), + TokenTree::Ident(Ident::new("fn", Span::call_site())), + TokenTree::Ident(Ident::new("f", Span::call_site())), + TokenTree::Group(Group::new(Delimiter::Parenthesis, TokenStream::new())), + TokenTree::Group(Group::new(Delimiter::Brace, TokenStream::new())), + ]), + ))]); + snapshot!(tokens as Stmt, @r###" + Stmt::Item(Item::Fn { + vis: Visibility::Inherited, + sig: Signature { + asyncness: Some, + ident: "f", + generics: Generics, + output: ReturnType::Default, + }, + block: Block { + stmts: [], + }, + }) + "###); + + let tokens = Group::new(Delimiter::None, quote!(let None = None)).to_token_stream(); + let stmts = Block::parse_within.parse2(tokens).unwrap(); + snapshot!(stmts, @r###" + [ + Stmt::Expr( + Expr::Group { + expr: Expr::Let { + pat: Pat::Ident { + ident: "None", + }, + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "None", + }, + ], + }, + }, + }, + }, + None, + ), + ] + "###); +} + +#[test] +fn test_let_dot_dot() { + let tokens = quote! { + let .. = 10; + }; + + snapshot!(tokens as Stmt, @r###" + Stmt::Local { + pat: Pat::Rest, + init: Some(LocalInit { + expr: Expr::Lit { + lit: 10, + }, + }), + } + "###); +} + +#[test] +fn test_let_else() { + let tokens = quote! { + let Some(x) = None else { return 0; }; + }; + + snapshot!(tokens as Stmt, @r###" + Stmt::Local { + pat: Pat::TupleStruct { + path: Path { + segments: [ + PathSegment { + ident: "Some", + }, + ], + }, + elems: [ + Pat::Ident { + ident: "x", + }, + ], + }, + init: Some(LocalInit { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "None", + }, + ], + }, + }, + diverge: Some(Expr::Block { + block: Block { + stmts: [ + Stmt::Expr( + Expr::Return { + expr: Some(Expr::Lit { + lit: 0, + }), + }, + Some, + ), + ], + }, + }), + }), + } + "###); +} + +#[test] +fn test_macros() { + let tokens = quote! { + fn main() { + macro_rules! mac {} + thread_local! { static FOO } + println!(""); + vec![] + } + }; + + snapshot!(tokens as Stmt, @r###" + Stmt::Item(Item::Fn { + vis: Visibility::Inherited, + sig: Signature { + ident: "main", + generics: Generics, + output: ReturnType::Default, + }, + block: Block { + stmts: [ + Stmt::Item(Item::Macro { + ident: Some("mac"), + mac: Macro { + path: Path { + segments: [ + PathSegment { + ident: "macro_rules", + }, + ], + }, + delimiter: MacroDelimiter::Brace, + tokens: TokenStream(``), + }, + }), + Stmt::Macro { + mac: Macro { + path: Path { + segments: [ + PathSegment { + ident: "thread_local", + }, + ], + }, + delimiter: MacroDelimiter::Brace, + tokens: TokenStream(`static FOO`), + }, + }, + Stmt::Macro { + mac: Macro { + path: Path { + segments: [ + PathSegment { + ident: "println", + }, + ], + }, + delimiter: MacroDelimiter::Paren, + tokens: TokenStream(`""`), + }, + semi_token: Some, + }, + Stmt::Expr( + Expr::Macro { + mac: Macro { + path: Path { + segments: [ + PathSegment { + ident: "vec", + }, + ], + }, + delimiter: MacroDelimiter::Bracket, + tokens: TokenStream(``), + }, + }, + None, + ), + ], + }, + }) + "###); +} + +#[test] +fn test_early_parse_loop() { + // The following is an Expr::Loop followed by Expr::Tuple. It is not an + // Expr::Call. + let tokens = quote! { + loop {} + () + }; + + let stmts = Block::parse_within.parse2(tokens).unwrap(); + + snapshot!(stmts, @r###" + [ + Stmt::Expr( + Expr::Loop { + body: Block { + stmts: [], + }, + }, + None, + ), + Stmt::Expr( + Expr::Tuple, + None, + ), + ] + "###); + + let tokens = quote! { + 'a: loop {} + () + }; + + let stmts = Block::parse_within.parse2(tokens).unwrap(); + + snapshot!(stmts, @r###" + [ + Stmt::Expr( + Expr::Loop { + label: Some(Label { + name: Lifetime { + ident: "a", + }, + }), + body: Block { + stmts: [], + }, + }, + None, + ), + Stmt::Expr( + Expr::Tuple, + None, + ), + ] + "###); +} diff --git a/vendor/syn/tests/test_token_trees.rs b/vendor/syn/tests/test_token_trees.rs new file mode 100644 index 0000000..f5a067f --- /dev/null +++ b/vendor/syn/tests/test_token_trees.rs @@ -0,0 +1,32 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::TokenStream; +use quote::quote; +use syn::Lit; + +#[test] +fn test_struct() { + let input = " + #[derive(Debug, Clone)] + pub struct Item { + pub ident: Ident, + pub attrs: Vec<Attribute>, + } + "; + + snapshot!(input as TokenStream, @r###" + TokenStream( + `# [derive (Debug , Clone)] pub struct Item { pub ident : Ident , pub attrs : Vec < Attribute >, }`, + ) + "###); +} + +#[test] +fn test_literal_mangling() { + let code = "0_4"; + let parsed: Lit = syn::parse_str(code).unwrap(); + assert_eq!(code, quote!(#parsed).to_string()); +} diff --git a/vendor/syn/tests/test_ty.rs b/vendor/syn/tests/test_ty.rs new file mode 100644 index 0000000..0645393 --- /dev/null +++ b/vendor/syn/tests/test_ty.rs @@ -0,0 +1,397 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree}; +use quote::{quote, ToTokens as _}; +use syn::punctuated::Punctuated; +use syn::{parse_quote, token, Token, Type, TypeTuple}; + +#[test] +fn test_mut_self() { + syn::parse_str::<Type>("fn(mut self)").unwrap(); + syn::parse_str::<Type>("fn(mut self,)").unwrap(); + syn::parse_str::<Type>("fn(mut self: ())").unwrap(); + syn::parse_str::<Type>("fn(mut self: ...)").unwrap_err(); + syn::parse_str::<Type>("fn(mut self: mut self)").unwrap_err(); + syn::parse_str::<Type>("fn(mut self::T)").unwrap_err(); +} + +#[test] +fn test_macro_variable_type() { + // mimics the token stream corresponding to `$ty<T>` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Group(Group::new(Delimiter::None, quote! { ty })), + TokenTree::Punct(Punct::new('<', Spacing::Alone)), + TokenTree::Ident(Ident::new("T", Span::call_site())), + TokenTree::Punct(Punct::new('>', Spacing::Alone)), + ]); + + snapshot!(tokens as Type, @r###" + Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "ty", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "T", + }, + ], + }, + }), + ], + }, + }, + ], + }, + } + "###); + + // mimics the token stream corresponding to `$ty::<T>` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Group(Group::new(Delimiter::None, quote! { ty })), + TokenTree::Punct(Punct::new(':', Spacing::Joint)), + TokenTree::Punct(Punct::new(':', Spacing::Alone)), + TokenTree::Punct(Punct::new('<', Spacing::Alone)), + TokenTree::Ident(Ident::new("T", Span::call_site())), + TokenTree::Punct(Punct::new('>', Spacing::Alone)), + ]); + + snapshot!(tokens as Type, @r###" + Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "ty", + arguments: PathArguments::AngleBracketed { + colon2_token: Some, + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "T", + }, + ], + }, + }), + ], + }, + }, + ], + }, + } + "###); +} + +#[test] +fn test_group_angle_brackets() { + // mimics the token stream corresponding to `Option<$ty>` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Ident(Ident::new("Option", Span::call_site())), + TokenTree::Punct(Punct::new('<', Spacing::Alone)), + TokenTree::Group(Group::new(Delimiter::None, quote! { Vec<u8> })), + TokenTree::Punct(Punct::new('>', Spacing::Alone)), + ]); + + snapshot!(tokens as Type, @r###" + Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Option", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Group { + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Vec", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "u8", + }, + ], + }, + }), + ], + }, + }, + ], + }, + }, + }), + ], + }, + }, + ], + }, + } + "###); +} + +#[test] +fn test_group_colons() { + // mimics the token stream corresponding to `$ty::Item` + let tokens = TokenStream::from_iter(vec![ + TokenTree::Group(Group::new(Delimiter::None, quote! { Vec<u8> })), + TokenTree::Punct(Punct::new(':', Spacing::Joint)), + TokenTree::Punct(Punct::new(':', Spacing::Alone)), + TokenTree::Ident(Ident::new("Item", Span::call_site())), + ]); + + snapshot!(tokens as Type, @r###" + Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "Vec", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Type(Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "u8", + }, + ], + }, + }), + ], + }, + }, + Token![::], + PathSegment { + ident: "Item", + }, + ], + }, + } + "###); + + let tokens = TokenStream::from_iter(vec![ + TokenTree::Group(Group::new(Delimiter::None, quote! { [T] })), + TokenTree::Punct(Punct::new(':', Spacing::Joint)), + TokenTree::Punct(Punct::new(':', Spacing::Alone)), + TokenTree::Ident(Ident::new("Element", Span::call_site())), + ]); + + snapshot!(tokens as Type, @r###" + Type::Path { + qself: Some(QSelf { + ty: Type::Slice { + elem: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "T", + }, + ], + }, + }, + }, + position: 0, + }), + path: Path { + leading_colon: Some, + segments: [ + PathSegment { + ident: "Element", + }, + ], + }, + } + "###); +} + +#[test] +fn test_trait_object() { + let tokens = quote!(dyn for<'a> Trait<'a> + 'static); + snapshot!(tokens as Type, @r###" + Type::TraitObject { + dyn_token: Some, + bounds: [ + TypeParamBound::Trait(TraitBound { + lifetimes: Some(BoundLifetimes { + lifetimes: [ + GenericParam::Lifetime(LifetimeParam { + lifetime: Lifetime { + ident: "a", + }, + }), + ], + }), + path: Path { + segments: [ + PathSegment { + ident: "Trait", + arguments: PathArguments::AngleBracketed { + args: [ + GenericArgument::Lifetime(Lifetime { + ident: "a", + }), + ], + }, + }, + ], + }, + }), + Token![+], + TypeParamBound::Lifetime { + ident: "static", + }, + ], + } + "###); + + let tokens = quote!(dyn 'a + Trait); + snapshot!(tokens as Type, @r###" + Type::TraitObject { + dyn_token: Some, + bounds: [ + TypeParamBound::Lifetime { + ident: "a", + }, + Token![+], + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Trait", + }, + ], + }, + }), + ], + } + "###); + + // None of the following are valid Rust types. + syn::parse_str::<Type>("for<'a> dyn Trait<'a>").unwrap_err(); + syn::parse_str::<Type>("dyn for<'a> 'a + Trait").unwrap_err(); +} + +#[test] +fn test_trailing_plus() { + #[rustfmt::skip] + let tokens = quote!(impl Trait +); + snapshot!(tokens as Type, @r###" + Type::ImplTrait { + bounds: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Trait", + }, + ], + }, + }), + Token![+], + ], + } + "###); + + #[rustfmt::skip] + let tokens = quote!(dyn Trait +); + snapshot!(tokens as Type, @r###" + Type::TraitObject { + dyn_token: Some, + bounds: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Trait", + }, + ], + }, + }), + Token![+], + ], + } + "###); + + #[rustfmt::skip] + let tokens = quote!(Trait +); + snapshot!(tokens as Type, @r###" + Type::TraitObject { + bounds: [ + TypeParamBound::Trait(TraitBound { + path: Path { + segments: [ + PathSegment { + ident: "Trait", + }, + ], + }, + }), + Token![+], + ], + } + "###); +} + +#[test] +fn test_tuple_comma() { + let mut expr = TypeTuple { + paren_token: token::Paren::default(), + elems: Punctuated::new(), + }; + snapshot!(expr.to_token_stream() as Type, @"Type::Tuple"); + + expr.elems.push_value(parse_quote!(_)); + // Must not parse to Type::Paren + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + ], + } + "###); + + expr.elems.push_value(parse_quote!(_)); + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + Type::Infer, + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + Type::Infer, + Token![,], + ], + } + "###); +} diff --git a/vendor/syn/tests/test_visibility.rs b/vendor/syn/tests/test_visibility.rs new file mode 100644 index 0000000..496e007 --- /dev/null +++ b/vendor/syn/tests/test_visibility.rs @@ -0,0 +1,144 @@ +#![allow(clippy::uninlined_format_args)] + +#[macro_use] +mod macros; + +use proc_macro2::{Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree}; +use syn::parse::{Parse, ParseStream}; +use syn::{DeriveInput, Result, Visibility}; + +#[derive(Debug)] +struct VisRest { + vis: Visibility, + rest: TokenStream, +} + +impl Parse for VisRest { + fn parse(input: ParseStream) -> Result<Self> { + Ok(VisRest { + vis: input.parse()?, + rest: input.parse()?, + }) + } +} + +macro_rules! assert_vis_parse { + ($input:expr, Ok($p:pat)) => { + assert_vis_parse!($input, Ok($p) + ""); + }; + + ($input:expr, Ok($p:pat) + $rest:expr) => { + let expected = $rest.parse::<TokenStream>().unwrap(); + let parse: VisRest = syn::parse_str($input).unwrap(); + + match parse.vis { + $p => {} + _ => panic!("Expected {}, got {:?}", stringify!($p), parse.vis), + } + + // NOTE: Round-trips through `to_string` to avoid potential whitespace + // diffs. + assert_eq!(parse.rest.to_string(), expected.to_string()); + }; + + ($input:expr, Err) => { + syn::parse2::<VisRest>($input.parse().unwrap()).unwrap_err(); + }; +} + +#[test] +fn test_pub() { + assert_vis_parse!("pub", Ok(Visibility::Public(_))); +} + +#[test] +fn test_inherited() { + assert_vis_parse!("", Ok(Visibility::Inherited)); +} + +#[test] +fn test_in() { + assert_vis_parse!("pub(in foo::bar)", Ok(Visibility::Restricted(_))); +} + +#[test] +fn test_pub_crate() { + assert_vis_parse!("pub(crate)", Ok(Visibility::Restricted(_))); +} + +#[test] +fn test_pub_self() { + assert_vis_parse!("pub(self)", Ok(Visibility::Restricted(_))); +} + +#[test] +fn test_pub_super() { + assert_vis_parse!("pub(super)", Ok(Visibility::Restricted(_))); +} + +#[test] +fn test_missing_in() { + assert_vis_parse!("pub(foo::bar)", Ok(Visibility::Public(_)) + "(foo::bar)"); +} + +#[test] +fn test_missing_in_path() { + assert_vis_parse!("pub(in)", Err); +} + +#[test] +fn test_crate_path() { + assert_vis_parse!( + "pub(crate::A, crate::B)", + Ok(Visibility::Public(_)) + "(crate::A, crate::B)" + ); +} + +#[test] +fn test_junk_after_in() { + assert_vis_parse!("pub(in some::path @@garbage)", Err); +} + +#[test] +fn test_empty_group_vis() { + // mimics `struct S { $vis $field: () }` where $vis is empty + let tokens = TokenStream::from_iter(vec![ + TokenTree::Ident(Ident::new("struct", Span::call_site())), + TokenTree::Ident(Ident::new("S", Span::call_site())), + TokenTree::Group(Group::new( + Delimiter::Brace, + TokenStream::from_iter(vec![ + TokenTree::Group(Group::new(Delimiter::None, TokenStream::new())), + TokenTree::Group(Group::new( + Delimiter::None, + TokenStream::from_iter(vec![TokenTree::Ident(Ident::new( + "f", + Span::call_site(), + ))]), + )), + TokenTree::Punct(Punct::new(':', Spacing::Alone)), + TokenTree::Group(Group::new(Delimiter::Parenthesis, TokenStream::new())), + ]), + )), + ]); + + snapshot!(tokens as DeriveInput, @r###" + DeriveInput { + vis: Visibility::Inherited, + ident: "S", + generics: Generics, + data: Data::Struct { + fields: Fields::Named { + named: [ + Field { + vis: Visibility::Inherited, + ident: Some("f"), + colon_token: Some, + ty: Type::Tuple, + }, + ], + }, + }, + } + "###); +} diff --git a/vendor/syn/tests/zzz_stable.rs b/vendor/syn/tests/zzz_stable.rs new file mode 100644 index 0000000..a1a670d --- /dev/null +++ b/vendor/syn/tests/zzz_stable.rs @@ -0,0 +1,33 @@ +#![cfg(syn_disable_nightly_tests)] + +use std::io::{self, Write}; +use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; + +const MSG: &str = "\ +‖ +‖ WARNING: +‖ This is not a nightly compiler so not all tests were able to +‖ run. Syn includes tests that compare Syn's parser against the +‖ compiler's parser, which requires access to unstable librustc +‖ data structures and a nightly compiler. +‖ +"; + +#[test] +fn notice() -> io::Result<()> { + let header = "WARNING"; + let index_of_header = MSG.find(header).unwrap(); + let before = &MSG[..index_of_header]; + let after = &MSG[index_of_header + header.len()..]; + + let mut stderr = StandardStream::stderr(ColorChoice::Auto); + stderr.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?; + write!(&mut stderr, "{}", before)?; + stderr.set_color(ColorSpec::new().set_bold(true).set_fg(Some(Color::Yellow)))?; + write!(&mut stderr, "{}", header)?; + stderr.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?; + write!(&mut stderr, "{}", after)?; + stderr.reset()?; + + Ok(()) +} diff --git a/vendor/tinyrick-0.0.13/.cargo-checksum.json b/vendor/tinyrick-0.0.13/.cargo-checksum.json new file mode 100644 index 0000000..d35b88e --- /dev/null +++ b/vendor/tinyrick-0.0.13/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"99670f6141c3cf3e56dee55651543584c7a317acf0e22d8d68e0bb5267b63265","Cargo.toml":"097c224cc65407cbcc2662dd64c0aa63e70bbe47be0dbe6bafea633c233293e3","Cross.toml":"5ea46353870ddf25126cd53fd2d6ec8c0fc991ba06e17a93130d354ba444bdf4","DEVELOPMENT.md":"d9e2c7c92b48d893a1313da417b62cd8c8b7a58cf62747065fdd9a54c721255e","LICENSE.md":"692fd932aac63bc63ba3b4908850cdcc1a38cee1527a07440355811028656c38","README.md":"3f71dfa6c728f861faa80bf074d4ba1925a6ddce19107b301603471de3b8eca9","install.mk":"7430f7a209f12b2b45e3d3becc8152b825528c7909b6c75917eafc57427eb7e4","makefile":"69fc8044b284f138491c474595a98191b46756691c5ac3cda4f1695e96179041","sample.envrc":"cedd4bfb23ba835278b0e4df8c77ac1d42bd11a688d736621346bd35195a37ff","src/lib.rs":"4ed464aa7f4ff462cb315f1508240827e6b47aea00ffbefcfcf9f50d62c83d9c","src/tinyrick.rs":"8cdc996761ae236c717b03741a786bd5376a74c76c8bd2e8d248dc18ec98192e"},"package":"8219cde494438520d0714a476f486533e5befb02c6438c9ca741aaa2c43002ff"} \ No newline at end of file diff --git a/vendor/tinyrick-0.0.13/Cargo.toml b/vendor/tinyrick-0.0.13/Cargo.toml new file mode 100644 index 0000000..cc7b38a --- /dev/null +++ b/vendor/tinyrick-0.0.13/Cargo.toml @@ -0,0 +1,36 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +name = "tinyrick" +version = "0.0.13" +authors = ["Andrew Pennebaker <andrew.pennebaker@gmail.com>"] +description = "a freeform Rust build system" +homepage = "https://github.com/mcandre/tinyrick" +documentation = "https://docs.rs/tinyrick/" +readme = "README.md" +license = "BSD-2-Clause" + +[lib] +name = "tinyrick" + +[[bin]] +name = "tinyrick" +path = "src/tinyrick.rs" + +[dependencies.die] +version = "0.2.0" + +[dependencies.getopts] +version = "0.2.21" + +[dependencies.lazy_static] +version = "1.4.0" diff --git a/vendor/tinyrick-0.0.13/Cross.toml b/vendor/tinyrick-0.0.13/Cross.toml new file mode 100644 index 0000000..adf04ae --- /dev/null +++ b/vendor/tinyrick-0.0.13/Cross.toml @@ -0,0 +1,15 @@ +[target.x86_64-unknown-netbsd] +pre-build = [ + "mkdir -p /tmp/netbsd", + "curl https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.2/amd64/binary/sets/base.tar.xz -O", + "tar -C /tmp/netbsd -xJf base.tar.xz", + "cp /tmp/netbsd/usr/lib/libexecinfo.so /usr/local/x86_64-unknown-netbsd/lib", + "rm base.tar.xz", + "rm -rf /tmp/netbsd", +] + +[target.aarch64-apple-darwin] +image = "freeznet/aarch64-apple-darwin-cross:11.3.1" + +[target.x86_64-apple-darwin] +image = "freeznet/x86_64-apple-darwin-cross:11.3.1" diff --git a/vendor/tinyrick-0.0.13/DEVELOPMENT.md b/vendor/tinyrick-0.0.13/DEVELOPMENT.md new file mode 100644 index 0000000..dd9a0fd --- /dev/null +++ b/vendor/tinyrick-0.0.13/DEVELOPMENT.md @@ -0,0 +1,61 @@ +# OVERVIEW + +tinyrick's own compilation process is compatible with standard `cargo`. We wrap some common workflows with `build` tasks for convenience. + +# BUILDTIME REQUIREMENTS + +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ +* a UNIX environment with [coreutils](https://www.gnu.org/software/coreutils/) / [base](http://ftp.freebsd.org/pub/FreeBSD/releases/) / [macOS](https://www.apple.com/macos) / [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) / etc. +* [GNU findutils](https://www.gnu.org/software/findutils/) +* [Docker](https://www.docker.com/) 20.10.21+ +* [zip](https://linux.die.net/man/1/zip) +* a POSIX compliant [make](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) implementation (e.g. GNU make, BSD make, etc.) +* Provision additional dev tools with `make -f install.mk` + +## Recommended + +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after provisioning) +* [direnv](https://direnv.net/) 2 +* [cargo-cache](https://crates.io/crates/cargo-cache) + +# INSTALL BINARY ARTIFACTS FROM LOCAL SOURCE + +```console +$ make install +``` + +# UNINSTALL BINARY ARTIFACTS + +```console +$ make uninstall +``` + +# AUDIT + +```console +$ make audit +``` + +# BUILD: LINT, DOC, COMPILE, and TEST + +```console +$ make build +``` + +# PUBLISH + +```console +$ make publish +``` + +# PORT + +```console +$ make port +``` + +# CLEAN + +```console +$ make clean +``` diff --git a/vendor/tinyrick-0.0.13/LICENSE.md b/vendor/tinyrick-0.0.13/LICENSE.md new file mode 100644 index 0000000..bff0a40 --- /dev/null +++ b/vendor/tinyrick-0.0.13/LICENSE.md @@ -0,0 +1,26 @@ +Copyright (c) 2018, Andrew Pennebaker +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OR MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD project. diff --git a/vendor/tinyrick-0.0.13/README.md b/vendor/tinyrick-0.0.13/README.md new file mode 100644 index 0000000..2d8e021 --- /dev/null +++ b/vendor/tinyrick-0.0.13/README.md @@ -0,0 +1,179 @@ +# tinyrick: a freeform Rust build system + +``` + .---. ^ + o{__ω__ o{ ^0^ -Let me out! +~~ ( // *|* \xx\) xx`|' + = = xxxx&x ' ` +``` + +# EXAMPLE + +```console +$ cd example + +$ tinyrick +running 1 test +test smoketest ... ok + +$ tinyrick -h +Usage: tinyrick [options] + +Options: + -l, --list list available tasks + -h, --help print usage info + -v, --version print version info +``` + +# ABOUT + +I'm tinyrick (TINYRICK!) and I build Rust projects. With tinyrick, you configure your build in the same normal Rust code as the rest of your project. Or keep picking your nose with make, it's up to you. + +Look at my pants! tinyrick! You think my pants are one size fits all? No, of course not! So get the pants that fit you. Get a `tinyrick.rs` that fits your workflow. Task dependency trees, get em while they're hot! Segfaults, get em while they're not. Smarter, butter, faster, stranger. + +Don't shell out, lib out. Your build is more portable that way. tinyricktinyricktinyrick. If you look closely, that last period is actually a *micro* rick rendered in ASCII; even tinier tinyrick! + +# CRATE + +https://crates.io/crates/tinyrick + +# API DOCUMENTATION + +https://docs.rs/tinyrick/latest/tinyrick/ + +# LICENSE + +BSD-2-Clause + +# RUNTIME REQUIREMENTS + +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ + +## Recommended + +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) +* [direnv](https://direnv.net/) 2 +* [cargo-cache](https://crates.io/crates/cargo-cache) + +# SETUP + +## tinyrick.rs + +Write some tasks in a `tinyrick.rs` build configuration script at the top-level directory of your Rust project: + +```rust +fn banner() { + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); +} + +fn test() { + tinyrick::exec!("cargo", &["test"]); +} + +fn build() { + tinyrick::deps(test); + tinyrick::exec!("cargo", &["build", "--release"]); +} + +fn publish() { + tinyrick::exec!("cargo", &["publish"]); +} + +fn clean() { + tinyrick::exec!("cargo", &["clean"]); +} + +fn main() { + tinyrick::phony!(clean); + tinyrick::wubba_lubba_dub_dub!(build; banner, test, publish, clean); +} +``` + +## Cargo.toml + +Now, wire up the tinyrick command line interface by configuring your top-level `Cargo.toml`: + +```toml +[package] +name = "derpmobile" +description = "hyperadvanced derpmobiles" +version = "3.1.4" + +[dependencies] +tinyrick = { version = "0.0.13", optional = true } + +[features] +letmeout = ["tinyrick"] + +[[bin]] +name = "tinyrick" +path = "tinyrick.rs" +required-features = ["letmeout"] +``` + +Launch a terminal session in your project directory. Install and run the tinyrick tool: + +```console +$ cargo install tinyrick +$ tinyrick +``` + +Watch how he behaves... I hope tinyrick is practicing good manners :P + +What happens when you run: + +* `tinyrick banner`? +* `tinyrick test`? +* `tinyrick clean`? +* `tinyrick build`? +* `tinyrick -h`? +* `tinyrick --list`? +* `VERBOSE=1 tinyrick build`? + +I bet the freakin' moon explodes if you run `VERBOSE=1 tinyrick build build build`! (Hold onto your pants~) + +# DEBRIEFING + +Where are my pants? Let's break down the code so far: + +* `fn name() { ... }` declares a task named `name`. +* `deps(requirement)` caches a dependency on task `requirement`. +* `exec!(...)` spawns raw shell command processes. +* `VERBOSE=1` enables command string emission during processing. +* `phony!(...)` disables dependency caching for some tasks. +* `wubba_lubba_dub_dub!(default; ...)` exposes a `default` task and some other tasks to the tinyrick command line. +* `letmeout` is a feature gate, so that neither the tinyrick package, nor your tinyrick binary escape with your Rust package when you `tinyrick publish`. + +# DoN't UsE sHelL cOmMaNdS!1 + +Just because the tinyrick library offers several *supremely convenient* macros for executing shell commands doesn't mean that you should always shell out. No way, man! + +Whenever possible, use regular Rust code, as in the `banner()` example. There's like a ba-jillion [crates](https://crates.io) of prewritten Rust code, so you might as well use it! + +# CONTRIBUTING + +For more details on developing tinyrick itself, see [DEVELOPMENT.md](DEVELOPMENT.md). + +# SEE ALSO + +* Inspired by the excellent [mage](https://magefile.org/) build system for Go projects +* [bb](https://github.com/mcandre/bb), a build system for (g)awk projects +* [cargo](https://doc.rust-lang.org/cargo/reference/build-scripts.html) custom build scripts, primarily for generating Rust source files from other languages +* [cmake](https://cmake.org/) for C/C++ projects +* [dale](https://github.com/mcandre/dale) builds D projects +* [GNU autotools](https://www.gnu.org/software/automake/manual/html_node/Autotools-Introduction.html), a build system for Linux C/C++ projects +* [Gradle](https://gradle.org/), a build system for JVM projects +* [invoke](https://pypi.org/project/invoke/), a Python task runner +* [jelly](https://github.com/mcandre/jelly), a JSON task runner +* [lake](https://luarocks.org/modules/steved/lake), a Lua task runner +* [lichen](https://github.com/mcandre/lichen), a sed task runner +* [POSIX make](https://pubs.opengroup.org/onlinepubs/009695299/utilities/make.html), a task runner standard for C/C++ and various other software projects +* [Rake](https://ruby.github.io/rake/), a task runner for Ruby projects +* [rez](https://github.com/mcandre/rez) builds C/C++ projects +* [Shake](https://shakebuild.com/), a task runner for Haskell projects + +# EVEN MORE EXAMPLES + +* The included [example](example) project provides a fully qualified demonstration of how to build projects with tinyrick. +* For a more practical example, see [ios7crypt-rs](https://github.com/mcandre/ios7crypt-rs), a little *modulino* library + command line tool for *deliciously dumb* password encryption. +* [tinyrick_extras](https://github.com/mcandre/tinyrick_extras) defines some common workflow tasks as plain old Rust functions, that you can sprinkle onto your tinyrick just like any other Rust crate. diff --git a/vendor/tinyrick-0.0.13/install.mk b/vendor/tinyrick-0.0.13/install.mk new file mode 100644 index 0000000..210297f --- /dev/null +++ b/vendor/tinyrick-0.0.13/install.mk @@ -0,0 +1,13 @@ +.POSIX: +.SILENT: +.PHONY: all + +all: + rustup component add \ + clippy \ + rustfmt + cargo install --force \ + cargo-audit \ + crit@0.0.7 \ + cross@0.2.5 \ + unmake@0.0.12 diff --git a/vendor/tinyrick-0.0.13/makefile b/vendor/tinyrick-0.0.13/makefile new file mode 100755 index 0000000..3c297df --- /dev/null +++ b/vendor/tinyrick-0.0.13/makefile @@ -0,0 +1,70 @@ +.POSIX: +.SILENT: +.IGNORE: uninstall clean +.PHONY: all \ + audit \ + doc \ + lint \ + clippy \ + rustfmt \ + unmake \ + build \ + port \ + crit \ + test \ + install \ + uninstall \ + publish \ + clean + +BANNER=tinyrick-0.0.13 + +all: build + +test: install + sh -c "cd example && tinyrick -l" + sh -c "cd example && tinyrick -v" + sh -c "cd example && tinyrick -h" + sh -c "cd example && tinyrick" + sh -c "cd example && VERBOSE=1 tinyrick test clippy lint build_debug build_release build doc install unit_test integration_test test banner uninstall clean_cargo clean" + +install: + cargo install --force --path . + +uninstall: + cargo uninstall tinyrick + +audit: + cargo audit + +doc: + cargo doc + +clippy: + cargo clippy + +rustfmt: + cargo fmt + +unmake: + unmake . + +lint: doc clippy rustfmt unmake + +build: lint test + cargo build --release + +publish: + cargo publish + +crit: + crit -b $(BANNER) + +port: crit + sh -c "cd .crit/bin && zip -r $(BANNER).zip $(BANNER)" + +clean: + crit -c + cargo clean + rm -rf example/target + rm -rf example/Cargo.lock diff --git a/vendor/tinyrick-0.0.13/sample.envrc b/vendor/tinyrick-0.0.13/sample.envrc new file mode 100644 index 0000000..e7fe0f8 --- /dev/null +++ b/vendor/tinyrick-0.0.13/sample.envrc @@ -0,0 +1,10 @@ +#!/bin/sh +# +# Install: +# cp sample.envrc .envrc +# +# Uninstall: +# rm .envrc + +source_up +export DOCKER_DEFAULT_PLATFORM='linux/amd64' diff --git a/vendor/tinyrick-0.0.13/src/lib.rs b/vendor/tinyrick-0.0.13/src/lib.rs new file mode 100644 index 0000000..1338b7e --- /dev/null +++ b/vendor/tinyrick-0.0.13/src/lib.rs @@ -0,0 +1,256 @@ +//! Common build patterns + +extern crate lazy_static; + +use std::collections::HashMap; +use std::sync::Mutex; + +/// Cargo toggle +pub static FEATURE: &str = "letmeout"; + +/// Environment name controlling verbosity +pub static VERBOSE_ENVIRONMENT_NAME: &str = "VERBOSE"; + +lazy_static::lazy_static! { + static ref DEPENDENCY_CACHE_MUTEX: Mutex<HashMap<fn(), bool>> = Mutex::new(HashMap::new()); + + pub static ref PHONY_TASK_MUTEX: Mutex<Vec<fn()>> = Mutex::new(Vec::new()); +} + +/// Query common host binary suffix +pub fn binary_suffix() -> String { + if cfg!(windows) { + return ".exe".to_string(); + } + + String::new() +} + +/// Declare a dependency on a task that may panic +pub fn deps(task: fn()) { + let phony: bool = PHONY_TASK_MUTEX.lock().unwrap().contains(&task); + let has_run: bool = DEPENDENCY_CACHE_MUTEX.lock().unwrap().contains_key(&task); + + if phony || !has_run { + task(); + DEPENDENCY_CACHE_MUTEX.lock().unwrap().insert(task, true); + } +} + +/// Declare tasks with no obviously cacheable artifacts. +#[macro_export] +macro_rules! phony { + ($t : expr) => { + { + tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap() + .push($t); + } + }; + ($t : expr, $($u : expr),*) => { + { + let ref mut phony_tasks = tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap(); + + phony_tasks.push($t); + $( phony_tasks.push($u); )* + } + }; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the command object. +#[macro_export] +macro_rules! exec_mut_with_arguments { + ($p : expr, $a : expr) => {{ + use std::env::var; + use std::process::Command; + + if var(tinyrick::VERBOSE_ENVIRONMENT_NAME).is_ok() { + println!("{} {}", $p, $a.join(" ")); + } + + Command::new($p).args($a) + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program. Can also accept CLI arguments collection. +/// Returns the command object. +#[macro_export] +macro_rules! exec_mut { + ($p : expr) => {{ + let args: &[&str] = &[]; + tinyrick::exec_mut_with_arguments!($p, args) + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut_with_arguments!($p, $a) + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the output object. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_output { + ($p : expr) => {{ + tinyrick::exec_mut!($p).output().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).output().unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the stdout stream. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stdout { + ($p : expr) => {{ + tinyrick::exec_output!($p).stdout + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stdout + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the stdout stream. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stderr { + ($p : expr) => {{ + tinyrick::exec_output!($p).stderr + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stderr + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the complete stdout string. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stdout_utf8 { + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p, $a)).unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the complete stderr string. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stderr_utf8 { + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p, $a)).unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the status. +/// Panics if the command could not run to completion. +#[macro_export] +macro_rules! exec_status { + ($p : expr) => {{ + tinyrick::exec_mut!($p).status().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).status().unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec { + ($p : expr) => {{ + assert!(tinyrick::exec_status!($p).success()); + }}; + ($p : expr, $a : expr) => {{ + assert!(tinyrick::exec_status!($p, $a).success()) + }}; +} + +/// Show registered tasks +#[macro_export] +macro_rules! list_tasks { + ($t : expr) => { + { + use std::process; + + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + process::exit(0); + } + }; + ($t : expr, $($u : expr),*) => { + { + use std::process; + + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + $(println!("* {}", stringify!($u));)* + process::exit(0); + } + }; +} + +/// Register tasks with CLI entrypoint. +/// The first entry is the default task, +/// When no tasks are named in CLI arguments. +#[macro_export] +macro_rules! wubba_lubba_dub_dub { + ($d : expr ; $($t : expr),*) => { + use std::env; + use std::process; + + let arguments: Vec<String> = env::args().collect(); + + let task_names: Vec<&str> = arguments + .iter() + .skip(1) + .map(String::as_str) + .collect(); + + if task_names.is_empty() { + $d(); + process::exit(0); + } + + for task_name in task_names { + match task_name { + "-l" => tinyrick::list_tasks!($d $(, $t)*), + "--list" => tinyrick::list_tasks!($d $(, $t)*), + stringify!($d) => $d(), + $(stringify!($t) => $t(),)* + _ => panic!("Unknown task {}", task_name) + } + } + }; +} diff --git a/vendor/tinyrick-0.0.13/src/tinyrick.rs b/vendor/tinyrick-0.0.13/src/tinyrick.rs new file mode 100644 index 0000000..a61124f --- /dev/null +++ b/vendor/tinyrick-0.0.13/src/tinyrick.rs @@ -0,0 +1,67 @@ +//! CLI tinyrick tool + +extern crate die; +extern crate getopts; +extern crate tinyrick; + +use die::{die, Die}; +use std::env; +use std::path; + +/// Show version information +pub fn banner() { + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); +} + +/// CLI entrypoint +fn main() { + let brief: String = format!("Usage: {} [options]", env!("CARGO_PKG_NAME")); + + let mut opts: getopts::Options = getopts::Options::new(); + opts.optflag("l", "list", "list available tasks"); + opts.optflag("h", "help", "print usage info"); + opts.optflag("v", "version", "print version info"); + + let usage: String = opts.usage(&brief); + let arguments: Vec<String> = env::args().collect(); + let optmatches: getopts::Matches = opts.parse(&arguments[1..]).die(&usage); + + if optmatches.opt_present("h") { + die!(0; usage); + } + + if optmatches.opt_present("v") { + die!(0; format!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))); + } + + let list_tasks: bool = optmatches.opt_present("l"); + let tasks: Vec<String> = optmatches.free; + + tinyrick::exec!( + "cargo", + &[ + "build", + "--bin", + env!("CARGO_PKG_NAME"), + "--features", + tinyrick::FEATURE + ] + ); + + let target_path: &path::Path = path::Path::new("target"); + + let rick_pathbuf: path::PathBuf = target_path.join("debug").join(format!( + "{}{}", + env!("CARGO_PKG_NAME"), + tinyrick::binary_suffix() + )); + + let rick_path: &str = rick_pathbuf.to_str().unwrap(); + + if list_tasks { + tinyrick::exec!(rick_path, &["-l"]); + die!(0); + } + + tinyrick::exec!(rick_path, tasks); +} diff --git a/vendor/tinyrick/.cargo-checksum.json b/vendor/tinyrick/.cargo-checksum.json index dac4740..88196dd 100644 --- a/vendor/tinyrick/.cargo-checksum.json +++ b/vendor/tinyrick/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"0a17a4f6b595c7e109dd4e1f671dd718fb774127e73f921259048cd2af84aaf8","DEVELOPMENT.md":"ab6cbb35cb42ca9a3091f2b6102a53a0aac7c38e65ea5e51649fbb7ba2b86161","README.md":"a49d82476c132075b496256f56b7f89458aa23f72b6fd7fa8de0458961f011d6","build.sh":"4bbee692a242410f9745ef86195f66877799d6983ea77f31589e4dbbb8adf483","crosscompile-linux.sh":"230130b57ea14dd8391dd7ae353b98495b8e6dff92c0f919aa7e573caff7f22e","src/lib.rs":"c6304e6f5d2beb5c8a56ed7a4884b9f1751c55ec88ead37863b195338dcf1ce0","src/tinyrick.rs":"db9727aef623f01615ec965bbb09f4baeb26d6488fa9a1399648e570008b8e9a"},"package":"504eac4d92d289782273d1cc4da582078f9b35e85b64c6dddfbf7ed773d7e294"} \ No newline at end of file +{"files":{"Cargo.lock":"1e8b91c307ee1da1e4e884d4db5761c8cb9dab85fa83555f86cfcebe9be6bd03","Cargo.toml":"a7ffc956a85913544bfaf94a1c1d30a5b25e419e74bed7a9579e70998a3534b6","Cross.toml":"5ea46353870ddf25126cd53fd2d6ec8c0fc991ba06e17a93130d354ba444bdf4","DEVELOPMENT.md":"b82234a910fe9195138019ceb158028bd46afa2949ab2310e0a3db6a8822bfff","LICENSE.md":"692fd932aac63bc63ba3b4908850cdcc1a38cee1527a07440355811028656c38","README.md":"4285f0a220d7a0e4319d8c45dfeea9c4220fb431880f2813d2506156f8590ab4","install.mk":"8afc130458ba6d1fba0f06fa13466324a9efcb59b845c26e63afc41231d0274d","makefile":"78476d08314d2b73862cdee087d51e1d0aebb3b3d46bda24e2176a26b00efcbf","sample.envrc":"cedd4bfb23ba835278b0e4df8c77ac1d42bd11a688d736621346bd35195a37ff","src/lib.rs":"4ed464aa7f4ff462cb315f1508240827e6b47aea00ffbefcfcf9f50d62c83d9c","src/tinyrick.rs":"8cdc996761ae236c717b03741a786bd5376a74c76c8bd2e8d248dc18ec98192e"},"package":"0b3b4fc359c03f76007b160be12ba2626986379f43dce9a016a4e473f3708b1f"} \ No newline at end of file diff --git a/vendor/tinyrick/Cargo.toml b/vendor/tinyrick/Cargo.toml index fcbe184..7843a3e 100644 --- a/vendor/tinyrick/Cargo.toml +++ b/vendor/tinyrick/Cargo.toml @@ -3,20 +3,20 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g. crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] name = "tinyrick" -version = "0.0.9" +version = "0.0.14" authors = ["Andrew Pennebaker <andrew.pennebaker@gmail.com>"] description = "a freeform Rust build system" homepage = "https://github.com/mcandre/tinyrick" documentation = "https://docs.rs/tinyrick/" +readme = "README.md" license = "BSD-2-Clause" [lib] @@ -25,8 +25,12 @@ name = "tinyrick" [[bin]] name = "tinyrick" path = "src/tinyrick.rs" + +[dependencies.die] +version = "0.2.0" + [dependencies.getopts] -version = "0.2.18" +version = "0.2.21" [dependencies.lazy_static] -version = "1.1.0" +version = "1.4.0" diff --git a/vendor/tinyrick/Cross.toml b/vendor/tinyrick/Cross.toml new file mode 100644 index 0000000..adf04ae --- /dev/null +++ b/vendor/tinyrick/Cross.toml @@ -0,0 +1,15 @@ +[target.x86_64-unknown-netbsd] +pre-build = [ + "mkdir -p /tmp/netbsd", + "curl https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.2/amd64/binary/sets/base.tar.xz -O", + "tar -C /tmp/netbsd -xJf base.tar.xz", + "cp /tmp/netbsd/usr/lib/libexecinfo.so /usr/local/x86_64-unknown-netbsd/lib", + "rm base.tar.xz", + "rm -rf /tmp/netbsd", +] + +[target.aarch64-apple-darwin] +image = "freeznet/aarch64-apple-darwin-cross:11.3.1" + +[target.x86_64-apple-darwin] +image = "freeznet/x86_64-apple-darwin-cross:11.3.1" diff --git a/vendor/tinyrick/DEVELOPMENT.md b/vendor/tinyrick/DEVELOPMENT.md index ab5a1ff..100fc27 100644 --- a/vendor/tinyrick/DEVELOPMENT.md +++ b/vendor/tinyrick/DEVELOPMENT.md @@ -1,47 +1,66 @@ # OVERVIEW -tinyrick's own compilation process is compatible with standard `cargo`. We wrap some common workflows with `build.sh` tasks for convenience. +tinyrick's own compilation process is compatible with standard `cargo`. We wrap some common workflows with `build` tasks for convenience. # BUILDTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.30+ -* [clippy](https://github.com/rust-lang-nursery/rust-clippy) -* [coreutils](https://www.gnu.org/software/coreutils/coreutils.html) -* [zip](https://linux.die.net/man/1/zip) -* [Docker](https://www.docker.com/) +* [Docker](https://www.docker.com/) 20.10.21+ +* POSIX compatible [make](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ +* POSIX compatible [sh](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sh.html) +* POSIX compatible [tar](https://pubs.opengroup.org/onlinepubs/7908799/xcu/tar.html) +* Provision additional dev tools with `make -f install.mk` + +## Recommended + +* a host capable of running musl/Linux containers (e.g. a GNU/Linux, musl/Linux, macOS, or Windows host) +* [Docker First Aid Kit](https://github.com/mcandre/docker-first-aid-kit) +* Apply `DOCKER_DEFAULT_PLATFORM` = `linux/amd64` environment variable +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after provisioning) +* [cargo-cache](https://crates.io/crates/cargo-cache) +* [direnv](https://direnv.net/) 2 +* a UNIX environment, such as macOS, Linux, BSD, [WSL](https://learn.microsoft.com/en-us/windows/wsl/), etc. + +Non-UNIX environments may produce subtle adverse effects when linting or generating application ports. # INSTALL BINARY ARTIFACTS FROM LOCAL SOURCE ```console -$ sh build.sh install +$ make install ``` # UNINSTALL BINARY ARTIFACTS ```console -$ sh build.sh uninstall +$ make uninstall +``` + +# AUDIT + +```console +$ make audit ``` # BUILD: LINT, DOC, COMPILE, and TEST ```console -$ sh build.sh [build] +$ make build ``` # PUBLISH ```console -$ sh build.sh publish +$ make publish ``` # PORT ```console -$ sh build.sh port +$ make port ``` # CLEAN ```console -$ sh build.sh clean +$ make clean ``` diff --git a/vendor/tinyrick/LICENSE.md b/vendor/tinyrick/LICENSE.md new file mode 100644 index 0000000..bff0a40 --- /dev/null +++ b/vendor/tinyrick/LICENSE.md @@ -0,0 +1,26 @@ +Copyright (c) 2018, Andrew Pennebaker +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OR MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD project. diff --git a/vendor/tinyrick/README.md b/vendor/tinyrick/README.md index fee621d..c131009 100644 --- a/vendor/tinyrick/README.md +++ b/vendor/tinyrick/README.md @@ -39,11 +39,21 @@ https://crates.io/crates/tinyrick # API DOCUMENTATION -https://docs.rs/tinyrick/ +https://docs.rs/tinyrick/latest/tinyrick/ + +# LICENSE + +BSD-2-Clause # RUNTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.30+ +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ + +## Recommended + +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) +* [direnv](https://direnv.net/) 2 +* [cargo-cache](https://crates.io/crates/cargo-cache) # SETUP @@ -53,29 +63,29 @@ Write some tasks in a `tinyrick.rs` build configuration script at the top-level ```rust fn banner() { - println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); } fn test() { - tinyrick::exec!("cargo", &["test"]); + tinyrick::exec!("cargo", &["test"]); } fn build() { - tinyrick::deps(test); - tinyrick::exec!("cargo", &["build", "--release"]); + tinyrick::deps(test); + tinyrick::exec!("cargo", &["build", "--release"]); } fn publish() { - tinyrick::exec!("cargo", &["publish"]); + tinyrick::exec!("cargo", &["publish"]); } fn clean() { - tinyrick::exec!("cargo", &["clean"]); + tinyrick::exec!("cargo", &["clean"]); } fn main() { - tinyrick::phony!(clean); - tinyrick::wubba_lubba_dub_dub!(build; banner, test, publish, clean); + tinyrick::phony!(clean); + tinyrick::wubba_lubba_dub_dub!(build; banner, test, publish, clean); } ``` @@ -90,7 +100,7 @@ description = "hyperadvanced derpmobiles" version = "3.1.4" [dependencies] -tinyrick = { version = "0.0.9", optional = true } +tinyrick = { version = "0.0.14", optional = true } [features] letmeout = ["tinyrick"] @@ -144,9 +154,28 @@ Whenever possible, use regular Rust code, as in the `banner()` example. There's For more details on developing tinyrick itself, see [DEVELOPMENT.md](DEVELOPMENT.md). -# CREDITS +# SEE ALSO * Inspired by the excellent [mage](https://magefile.org/) build system for Go projects +* [bb](https://github.com/mcandre/bb), a build system for (g)awk projects +* [cargo](https://doc.rust-lang.org/cargo/reference/build-scripts.html) custom build scripts, primarily for generating Rust source files from other languages +* [cmake](https://cmake.org/) for C/C++ projects +* [dale](https://github.com/mcandre/dale) builds D projects +* [GNU autotools](https://www.gnu.org/software/automake/manual/html_node/Autotools-Introduction.html), a build system for Linux C/C++ projects +* [Gradle](https://gradle.org/), a build system for JVM projects +* [invoke](https://pypi.org/project/invoke/), a Python task runner +* [jelly](https://github.com/mcandre/jelly), a JSON task runner +* [lake](https://luarocks.org/modules/steved/lake), a Lua task runner +* [Leiningen](https://leiningen.org/) + [lein-exec](https://github.com/kumarshantanu/lein-exec), a Clojure task runner +* [lichen](https://github.com/mcandre/lichen), a sed task runner +* [POSIX make](https://pubs.opengroup.org/onlinepubs/009695299/utilities/make.html), a task runner standard for C/C++ and various other software projects +* [mian](https://github.com/mcandre/mian), a task runner for (Chicken) Scheme Lisp +* [Rake](https://ruby.github.io/rake/), a task runner for Ruby projects +* [Rebar3](https://www.rebar3.org/), a build system for Erlang projects +* [rez](https://github.com/mcandre/rez) builds C/C++ projects +* [sbt](https://www.scala-sbt.org/index.html), a build system for Scala projects +* [Shake](https://shakebuild.com/), a task runner for Haskell projects +* [yao](https://github.com/mcandre/yao), a task runner for Common LISP projects # EVEN MORE EXAMPLES diff --git a/vendor/tinyrick/build.sh b/vendor/tinyrick/build.sh deleted file mode 100755 index f7febdb..0000000 --- a/vendor/tinyrick/build.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/sh -# -# Avoid make in order to work around https://github.com/rust-lang/rust/issues/52801 - -PACKAGE='tinyrick' -VERSION='0.0.9' -ARCHIVE="${PACKAGE}-${VERSION}.zip" -BIN="target/debug/$PACKAGE" - -test() { - install && - sh -c "cd example && tinyrick -l" && - sh -c "cd example && tinyrick -v" && - sh -c "cd example && tinyrick -h" && - sh -c "cd example && tinyrick" && - sh -c "cd example && VERBOSE=1 tinyrick test clippy lint build_debug build_release build doc install unit_test integration_test test banner uninstall clean_cargo clean" -} - -install_binaries() { - cargo install --force --path . -} - -install() { - install_binaries -} - -uninstall() { - cargo uninstall tinyrick -} - -clippy() { - cargo clippy -} - -doc() { - cargo doc -} - -lint() { - doc && - clippy -} - -build() { - lint && - test && - cargo build --release -} - -publish() { - cargo publish -} - -crosscompile() { - sh crosscompile-linux.sh x86_64 gnu && - sh crosscompile-linux.sh x86_64 musl -} - -port() { - crosscompile && - zip "$ARCHIVE" \ - target/x86_64-unknown-linux-gnu/release/tinyrick target/x86_64-unknown-linux-musl/release/tinyrick -} - -clean_example() { - rm -rf example/target; - rm -rf example/Cargo.lock -} - -clean_cargo() { - cargo clean -} - -clean_ports() { - rm *.zip -} - -clean() { - clean_example; - clean_cargo; - clean_ports -} - -if [ -z "$1" ]; then - build -fi - -for task in "$@"; do - "$task" -done diff --git a/vendor/tinyrick/crosscompile-linux.sh b/vendor/tinyrick/crosscompile-linux.sh deleted file mode 100755 index 539d895..0000000 --- a/vendor/tinyrick/crosscompile-linux.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -set -euo pipefail - -usage() { - echo -e "Usage: $0 <arch> <libc>\n" - - echo -e "Architectures:\n" - echo -e "x86_64\n" - - echo -e "libc's:\n" - echo "gnu" - echo "musl" - - exit 1 -} - -if [ "$#" -ne 2 ]; then - usage -fi - -arch="$1" -libc="$2" - -host_root_src_dir="$(cd "$(dirname "$0")" && pwd)" - -docker run -v "${host_root_src_dir}:/src" \ - "mcandre/docker-rustup:${arch}-${libc}" \ - sh -c "cd /src && cargo build --release --target ${arch}-unknown-linux-${libc}" diff --git a/vendor/tinyrick/install.mk b/vendor/tinyrick/install.mk new file mode 100644 index 0000000..2531551 --- /dev/null +++ b/vendor/tinyrick/install.mk @@ -0,0 +1,13 @@ +.POSIX: +.SILENT: +.PHONY: all + +all: + rustup component add \ + clippy \ + rustfmt + cargo install --force \ + cargo-audit \ + crit@0.0.7 \ + cross@0.2.5 \ + unmake@0.0.16 diff --git a/vendor/tinyrick/makefile b/vendor/tinyrick/makefile new file mode 100755 index 0000000..7c2c820 --- /dev/null +++ b/vendor/tinyrick/makefile @@ -0,0 +1,71 @@ +.POSIX: +.SILENT: +.IGNORE: uninstall clean +.PHONY: all \ + audit \ + doc \ + lint \ + clippy \ + rustfmt \ + unmake \ + build \ + port \ + crit \ + test \ + install \ + uninstall \ + publish \ + clean + +BANNER=tinyrick-0.0.14 + +all: build + +test: install + sh -c "cd example && tinyrick -l" + sh -c "cd example && tinyrick -v" + sh -c "cd example && tinyrick -h" + sh -c "cd example && tinyrick" + sh -c "cd example && VERBOSE=1 tinyrick test clippy lint build_debug build_release build doc install unit_test integration_test test banner uninstall clean_cargo clean" + +install: + cargo install --force --path . + +uninstall: + cargo uninstall tinyrick + +audit: + cargo audit + +doc: + cargo doc + +clippy: + cargo clippy + +rustfmt: + cargo fmt + +unmake: + unmake . + unmake -n . + +lint: doc clippy rustfmt unmake + +build: lint test + cargo build --release + +publish: + cargo publish + +crit: + crit -b $(BANNER) + +port: crit + sh -c "cd .crit/bin && tar czf $(BANNER).tgz $(BANNER)" + +clean: + crit -c + cargo clean + rm -rf example/target + rm -rf example/Cargo.lock diff --git a/vendor/tinyrick/sample.envrc b/vendor/tinyrick/sample.envrc new file mode 100644 index 0000000..e7fe0f8 --- /dev/null +++ b/vendor/tinyrick/sample.envrc @@ -0,0 +1,10 @@ +#!/bin/sh +# +# Install: +# cp sample.envrc .envrc +# +# Uninstall: +# rm .envrc + +source_up +export DOCKER_DEFAULT_PLATFORM='linux/amd64' diff --git a/vendor/tinyrick/src/lib.rs b/vendor/tinyrick/src/lib.rs index 2f2cffa..1338b7e 100644 --- a/vendor/tinyrick/src/lib.rs +++ b/vendor/tinyrick/src/lib.rs @@ -6,71 +6,58 @@ use std::collections::HashMap; use std::sync::Mutex; /// Cargo toggle -pub static FEATURE : &str = "letmeout"; +pub static FEATURE: &str = "letmeout"; /// Environment name controlling verbosity -pub static VERBOSE_ENVIRONMENT_NAME : &str = "VERBOSE"; +pub static VERBOSE_ENVIRONMENT_NAME: &str = "VERBOSE"; lazy_static::lazy_static! { - static ref DEPENDENCY_CACHE_MUTEX : Mutex<HashMap<fn(), bool>> = Mutex::new(HashMap::new()); -} + static ref DEPENDENCY_CACHE_MUTEX: Mutex<HashMap<fn(), bool>> = Mutex::new(HashMap::new()); -lazy_static::lazy_static! { - pub static ref PHONY_TASK_MUTEX : Mutex<Vec<fn()>> = Mutex::new(Vec::new()); + pub static ref PHONY_TASK_MUTEX: Mutex<Vec<fn()>> = Mutex::new(Vec::new()); } /// Query common host binary suffix pub fn binary_suffix() -> String { - if cfg!(windows) { - ".exe".to_string() - } else { - "".to_string() - } + if cfg!(windows) { + return ".exe".to_string(); + } + + String::new() } /// Declare a dependency on a task that may panic pub fn deps(task: fn()) { - let phony : bool = PHONY_TASK_MUTEX - .lock() - .unwrap() - .contains(&task); - - let has_run = DEPENDENCY_CACHE_MUTEX - .lock() - .unwrap() - .contains_key(&task); - - if phony || !has_run { - task(); + let phony: bool = PHONY_TASK_MUTEX.lock().unwrap().contains(&task); + let has_run: bool = DEPENDENCY_CACHE_MUTEX.lock().unwrap().contains_key(&task); - DEPENDENCY_CACHE_MUTEX - .lock() - .unwrap() - .insert(task, true); - } + if phony || !has_run { + task(); + DEPENDENCY_CACHE_MUTEX.lock().unwrap().insert(task, true); + } } /// Declare tasks with no obviously cacheable artifacts. #[macro_export] macro_rules! phony { - ($t : expr) => { - { - tinyrick::PHONY_TASK_MUTEX - .lock() - .unwrap() - .push($t); - } - }; - ($t : expr, $($u : expr),*) => { - { - let ref mut phony_tasks = tinyrick::PHONY_TASK_MUTEX - .lock() - .unwrap(); - - phony_tasks.push($t); - $( phony_tasks.push($u); )* - } - }; + ($t : expr) => { + { + tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap() + .push($t); + } + }; + ($t : expr, $($u : expr),*) => { + { + let ref mut phony_tasks = tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap(); + + phony_tasks.push($t); + $( phony_tasks.push($u); )* + } + }; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -79,19 +66,16 @@ macro_rules! phony { /// Returns the command object. #[macro_export] macro_rules! exec_mut_with_arguments { - ($p : expr, $a : expr) => { - { - use std::env::var; - use std::process::Command; + ($p : expr, $a : expr) => {{ + use std::env::var; + use std::process::Command; - if var(tinyrick::VERBOSE_ENVIRONMENT_NAME).is_ok() { - println!("{} {}", $p, $a.join(" ")); - } + if var(tinyrick::VERBOSE_ENVIRONMENT_NAME).is_ok() { + println!("{} {}", $p, $a.join(" ")); + } - Command::new($p) - .args($a) - } - }; + Command::new($p).args($a) + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -100,17 +84,13 @@ macro_rules! exec_mut_with_arguments { /// Returns the command object. #[macro_export] macro_rules! exec_mut { - ($p : expr) => { - { - let args : &[&str] = &[]; - tinyrick::exec_mut_with_arguments!($p, args) - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_mut_with_arguments!($p, $a) - } - }; + ($p : expr) => {{ + let args: &[&str] = &[]; + tinyrick::exec_mut_with_arguments!($p, args) + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut_with_arguments!($p, $a) + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -120,20 +100,12 @@ macro_rules! exec_mut { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_output { - ($p : expr) => { - { - tinyrick::exec_mut!($p) - .output() - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_mut!($p, $a) - .output() - .unwrap() - } - }; + ($p : expr) => {{ + tinyrick::exec_mut!($p).output().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).output().unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -143,18 +115,12 @@ macro_rules! exec_output { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stdout { - ($p : expr) => { - { - tinyrick::exec_output!($p) - .stdout - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_output!($p, $a) - .stdout - } - }; + ($p : expr) => {{ + tinyrick::exec_output!($p).stdout + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stdout + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -164,18 +130,12 @@ macro_rules! exec_stdout { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stderr { - ($p : expr) => { - { - tinyrick::exec_output!($p) - .stderr - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_output!($p, $a) - .stderr - } - }; + ($p : expr) => {{ + tinyrick::exec_output!($p).stderr + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stderr + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -185,18 +145,12 @@ macro_rules! exec_stderr { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stdout_utf8 { - ($p : expr) => { - { - String::from_utf8(tinyrick::exec_stdout!($p)) - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - String::from_utf8(tinyrick::exec_stdout!($p, $a)) - .unwrap() - } - }; + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p, $a)).unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -206,18 +160,12 @@ macro_rules! exec_stdout_utf8 { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stderr_utf8 { - ($p : expr) => { - { - String::from_utf8(tinyrick::exec_stderr!($p)) - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - String::from_utf8(tinyrick::exec_stderr!($p, $a)) - .unwrap() - } - }; + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p, $a)).unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -227,20 +175,12 @@ macro_rules! exec_stderr_utf8 { /// Panics if the command could not run to completion. #[macro_export] macro_rules! exec_status { - ($p : expr) => { - { - tinyrick::exec_mut!($p) - .status() - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_mut!($p, $a) - .status() - .unwrap() - } - }; + ($p : expr) => {{ + tinyrick::exec_mut!($p).status().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).status().unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -249,51 +189,36 @@ macro_rules! exec_status { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec { - ($p : expr) => { - { - assert!( - tinyrick::exec_status!($p) - .success() - ); - } - }; - ($p : expr, $a : expr) => { - { - assert!( - tinyrick::exec_status!($p, $a) - .success() - ) - } - }; + ($p : expr) => {{ + assert!(tinyrick::exec_status!($p).success()); + }}; + ($p : expr, $a : expr) => {{ + assert!(tinyrick::exec_status!($p, $a).success()) + }}; } - /// Show registered tasks #[macro_export] macro_rules! list_tasks { - ($t : expr) => { - { - use std::process::exit; - - println!("Registered tasks:\n"); - - println!("* {}", stringify!($t)); + ($t : expr) => { + { + use std::process; - exit(0); - } - }; - ($t : expr, $($u : expr),*) => { - { - use std::process::exit; - - println!("Registered tasks:\n"); - - println!("* {}", stringify!($t)); - $(println!("* {}", stringify!($u));)* - - exit(0); - } - }; + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + process::exit(0); + } + }; + ($t : expr, $($u : expr),*) => { + { + use std::process; + + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + $(println!("* {}", stringify!($u));)* + process::exit(0); + } + }; } /// Register tasks with CLI entrypoint. @@ -301,31 +226,31 @@ macro_rules! list_tasks { /// When no tasks are named in CLI arguments. #[macro_export] macro_rules! wubba_lubba_dub_dub { - ($d : expr ; $($t : expr),*) => { - use std::env::args; - use std::process::exit; + ($d : expr ; $($t : expr),*) => { + use std::env; + use std::process; - let arguments : Vec<String> = args() - .collect(); + let arguments: Vec<String> = env::args().collect(); - let task_names : Vec<&str> = arguments - .iter() - .skip(1) - .map(String::as_str) - .collect(); + let task_names: Vec<&str> = arguments + .iter() + .skip(1) + .map(String::as_str) + .collect(); - if task_names.len() == 0 { - $d(); - } else { - for task_name in task_names { - match task_name { - "-l" => tinyrick::list_tasks!($d $(, $t)*), - "--list" => tinyrick::list_tasks!($d $(, $t)*), - stringify!($d) => $d(), - $(stringify!($t) => $t(),)* - _ => panic!("Unknown task {}", task_name) + if task_names.is_empty() { + $d(); + process::exit(0); } - } - } - }; + + for task_name in task_names { + match task_name { + "-l" => tinyrick::list_tasks!($d $(, $t)*), + "--list" => tinyrick::list_tasks!($d $(, $t)*), + stringify!($d) => $d(), + $(stringify!($t) => $t(),)* + _ => panic!("Unknown task {}", task_name) + } + } + }; } diff --git a/vendor/tinyrick/src/tinyrick.rs b/vendor/tinyrick/src/tinyrick.rs index e273497..a61124f 100644 --- a/vendor/tinyrick/src/tinyrick.rs +++ b/vendor/tinyrick/src/tinyrick.rs @@ -1,76 +1,67 @@ //! CLI tinyrick tool +extern crate die; extern crate getopts; extern crate tinyrick; +use die::{die, Die}; use std::env; use std::path; -use std::process; - -// Show short CLI spec -fn usage(brief : &str, opts : &getopts::Options) { - println!("{}", (*opts).usage(brief)); -} /// Show version information pub fn banner() { - println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); } /// CLI entrypoint fn main() { - let arguments : Vec<String> = env::args() - .collect(); + let brief: String = format!("Usage: {} [options]", env!("CARGO_PKG_NAME")); + + let mut opts: getopts::Options = getopts::Options::new(); + opts.optflag("l", "list", "list available tasks"); + opts.optflag("h", "help", "print usage info"); + opts.optflag("v", "version", "print version info"); - let brief = format!("Usage: {} [options]", env!("CARGO_PKG_NAME")); + let usage: String = opts.usage(&brief); + let arguments: Vec<String> = env::args().collect(); + let optmatches: getopts::Matches = opts.parse(&arguments[1..]).die(&usage); - let mut opts : getopts::Options = getopts::Options::new(); - opts.optflag("l", "list", "list available tasks"); - opts.optflag("h", "help", "print usage info"); - opts.optflag("v", "version", "print version info"); + if optmatches.opt_present("h") { + die!(0; usage); + } - match opts.parse(&arguments[1..]) { - Err(_) => { - usage(&brief, &opts); - process::abort(); - }, - Ok(optmatches) => { - let list_tasks = optmatches.opt_present("l"); + if optmatches.opt_present("v") { + die!(0; format!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))); + } - if optmatches.opt_present("h") { - usage(&brief, &opts); - process::exit(0); - } else if optmatches.opt_present("v") { - banner(); - process::exit(0); - } else { - let tasks = optmatches.free; + let list_tasks: bool = optmatches.opt_present("l"); + let tasks: Vec<String> = optmatches.free; - tinyrick::exec!( - "cargo", - &[ - "build", - "--bin", env!("CARGO_PKG_NAME"), - "--features", tinyrick::FEATURE - ] - ); + tinyrick::exec!( + "cargo", + &[ + "build", + "--bin", + env!("CARGO_PKG_NAME"), + "--features", + tinyrick::FEATURE + ] + ); - let target_path : &path::Path = path::Path::new("target"); + let target_path: &path::Path = path::Path::new("target"); - let rick_pathbuf : path::PathBuf = target_path - .join("debug") - .join(&format!("{}{}", env!("CARGO_PKG_NAME"), tinyrick::binary_suffix())); + let rick_pathbuf: path::PathBuf = target_path.join("debug").join(format!( + "{}{}", + env!("CARGO_PKG_NAME"), + tinyrick::binary_suffix() + )); - let rick_path : &str = rick_pathbuf - .to_str() - .unwrap(); + let rick_path: &str = rick_pathbuf.to_str().unwrap(); - if list_tasks { - tinyrick::exec!(rick_path, &["-l"]); - } else { - tinyrick::exec!(rick_path, tasks); - } - } + if list_tasks { + tinyrick::exec!(rick_path, &["-l"]); + die!(0); } - } + + tinyrick::exec!(rick_path, tasks); } diff --git a/vendor/tinyrick_extras/.cargo-checksum.json b/vendor/tinyrick_extras/.cargo-checksum.json index ba226ab..28b26d0 100644 --- a/vendor/tinyrick_extras/.cargo-checksum.json +++ b/vendor/tinyrick_extras/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.lock":"da198fcdf14b6a63bb62e67b6bd33e4740452bc304d12f71f988a1b970d6ce54","Cargo.toml":"81f4de08e15070dd6fe9a2b92d4b3c5ac5b9543213fb768865ef30993e81aa82","DEVELOPMENT.md":"327f69f87c56ffb70f81978f1b9704d051dc6690e1e3f3a31c7f8a8060caae20","README.md":"00dee732b98c8596daea3e513f088bf56fb0a4fa0b6d85e87989162b1cc9a8b3","src/lib.rs":"d09a356cfed5ba83e70d7d80983f249455467821413a9844aaa558f3623f845f","tinyrick.rs":"d94d99260868b7156b074d4a5c1d89d1f06826fdd2ae5d6a1e029159f8413933"},"package":"a59a496a47794576c8fb1d8fd9727f6338d98a95acf9337400f91f8e891c041b"} \ No newline at end of file +{"files":{"Cargo.lock":"6361f9ccec7eb7a205735e93f430cf2cff7a3b5dbcf564eaa6ec67114c370e1d","Cargo.toml":"30f5a2dfba3062ea9c959c2713a0fe0913f61ecb72beb78cba41429ee02e0068","DEVELOPMENT.md":"525f535c9556135d478bc07628c04d45b141f9e8ef3d8b57cc95ab485e7a7d56","LICENSE.md":"692fd932aac63bc63ba3b4908850cdcc1a38cee1527a07440355811028656c38","README.md":"f7fcddd061f63fb2ce0dede8fbb8d0b126066262cd1f9eefc856a27790d5807c","makefile":"4e6d5ef5ae5f4aca6cbb4ff33b884e24bc7ad8e72dbf45f9a71720ebfcde4033","src/lib.rs":"8a6b3a2b5348de9b3cd0de6c3a363de1c3c65e5a61199c4a4c32600ecd3dd84f","tinyrick.rs":"c49eaf976d62332e5f5d6841d5d82600b15a6ee68b277b411cc159c6a3449816"},"package":"4aae31f75079b9c67acd8a1b381f447727050a5596010dfc259a6179c0a8f8a8"} \ No newline at end of file diff --git a/vendor/tinyrick_extras/Cargo.toml b/vendor/tinyrick_extras/Cargo.toml index c8af778..0d01651 100644 --- a/vendor/tinyrick_extras/Cargo.toml +++ b/vendor/tinyrick_extras/Cargo.toml @@ -11,7 +11,7 @@ [package] name = "tinyrick_extras" -version = "0.0.6" +version = "0.0.8" authors = ["Andrew Pennebaker <andrew.pennebaker@gmail.com>"] description = "common tasks for tinyrick projects" homepage = "https://github.com/mcandre/tinyrick_extras" @@ -28,7 +28,7 @@ path = "tinyrick.rs" required-features = ["letmeout"] [dependencies.tinyrick] -version = "0.0.9" +version = "0.0.13" [features] letmeout = [] diff --git a/vendor/tinyrick_extras/DEVELOPMENT.md b/vendor/tinyrick_extras/DEVELOPMENT.md index b5178b5..3ed79c9 100644 --- a/vendor/tinyrick_extras/DEVELOPMENT.md +++ b/vendor/tinyrick_extras/DEVELOPMENT.md @@ -4,13 +4,16 @@ tinyrick_extras' own compilation process is compatible with standard cargo. We w # BUILDTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.68.2+ with `rustup component add clippy rustfmt` and `cargo install cargo-audit@0.17.5 tinyrick@0.0.9` +* POSIX compatible [make](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ +* Provision additional dev tools with `make` ## Recommended -* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) -* [direnv](https://direnv.net/) 2 +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after provisioning) * [cargo-cache](https://crates.io/crates/cargo-cache) +* [direnv](https://direnv.net/) 2 +* POSIX compatible [tar](https://pubs.opengroup.org/onlinepubs/7908799/xcu/tar.html) # SECURITY AUDIT diff --git a/vendor/tinyrick_extras/LICENSE.md b/vendor/tinyrick_extras/LICENSE.md new file mode 100644 index 0000000..bff0a40 --- /dev/null +++ b/vendor/tinyrick_extras/LICENSE.md @@ -0,0 +1,26 @@ +Copyright (c) 2018, Andrew Pennebaker +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OR MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD project. diff --git a/vendor/tinyrick_extras/README.md b/vendor/tinyrick_extras/README.md index 933ca75..598e70e 100644 --- a/vendor/tinyrick_extras/README.md +++ b/vendor/tinyrick_extras/README.md @@ -32,16 +32,21 @@ https://crates.io/crates/tinyrick_extras https://docs.rs/tinyrick_extras/latest/tinyrick_extras/ +# LICENSE + +BSD-2-Clause + # RUNTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.68.2+ +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ ## Recommended * [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) -* [direnv](https://direnv.net/) 2 * [cargo-cache](https://crates.io/crates/cargo-cache) * [crit](https://github.com/mcandre/crit) ports Rust applications +* [direnv](https://direnv.net/) 2 +* POSIX compatible [tar](https://pubs.opengroup.org/onlinepubs/7908799/xcu/tar.html) # CONTRIBUTING diff --git a/vendor/tinyrick_extras/makefile b/vendor/tinyrick_extras/makefile new file mode 100644 index 0000000..d08891b --- /dev/null +++ b/vendor/tinyrick_extras/makefile @@ -0,0 +1,12 @@ +.POSIX: +.SILENT: +.PHONY: all + +all: + rustup component add \ + clippy \ + rustfmt + cargo install --force \ + cargo-audit \ + tinyrick@0.0.13 \ + unmake@0.0.16 diff --git a/vendor/tinyrick_extras/src/lib.rs b/vendor/tinyrick_extras/src/lib.rs index 1267de7..89933b6 100644 --- a/vendor/tinyrick_extras/src/lib.rs +++ b/vendor/tinyrick_extras/src/lib.rs @@ -46,6 +46,26 @@ pub fn crit(args: Vec<String>) { .success()); } +/// Compress binaries. +/// +/// artifacts_path denotes a build directory root, +/// where a software project houses porting artifacts. +/// +/// port_basename denotes an archive directory root within the artifacts_path, +/// generally of the form "<app-name>-<version>". +pub fn archive(artifacts_path: String, port_basename: String) { + let artifacts_path_str: &str = &artifacts_path; + let port_basename_str: &str = &port_basename; + let archive_basename: &str = &format!("{}.tgz", port_basename_str); + assert!( + tinyrick::exec_mut!("tar", &["czf", archive_basename, port_basename_str]) + .current_dir(artifacts_path_str) + .status() + .unwrap() + .success() + ); +} + /// Uninstall artifacts pub fn uninstall_binaries() { tinyrick::exec!("cargo", &["uninstall"]); diff --git a/vendor/tinyrick_extras/tinyrick.rs b/vendor/tinyrick_extras/tinyrick.rs index 8cbafc8..b30f87c 100644 --- a/vendor/tinyrick_extras/tinyrick.rs +++ b/vendor/tinyrick_extras/tinyrick.rs @@ -23,11 +23,18 @@ fn rustfmt() { tinyrick_extras::rustfmt(); } +/// Run unmake +fn unmake() { + tinyrick::exec!("unmake", &["."]); + tinyrick::exec!("unmake", &["-n", "."]); +} + /// Validate documentation and run linters fn lint() { tinyrick::deps(doc); tinyrick::deps(clippy); tinyrick::deps(rustfmt); + tinyrick::deps(unmake); } /// Doc, lint, and run tests @@ -69,6 +76,7 @@ fn main() { audit, clippy, rustfmt, + unmake, lint, test, publish, diff --git a/vendor/toml_datetime/.cargo-checksum.json b/vendor/toml_datetime/.cargo-checksum.json index 1dcac37..e801b0b 100644 --- a/vendor/toml_datetime/.cargo-checksum.json +++ b/vendor/toml_datetime/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"d0b973ff31ce9c14256cba2a48b94313537e9c146414934916bdaaddd58ec83c","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"a9540c57fbfade8959d34804427d9f37ccae9e24a9fc469bcf413c2089601f0d","src/datetime.rs":"9f85adaa87398164dbaa72c7ef4c7328cda7d9fa1478142b88fbe4de9551b53e","src/lib.rs":"91f3fe49647af4987a49b5aaa8c3175f02c05fa9ac45e7c49dbee61566230476"},"package":"3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622"} \ No newline at end of file +{"files":{"Cargo.toml":"5171fec882937c1a00a81fb38e55b38547da711ad1c88725f8501d5050ff2789","LICENSE-APACHE":"c6596eb7be8581c18be736c846fb9173b69eccf6ef94c5135893ec56bd92ba08","LICENSE-MIT":"6efb0476a1cc085077ed49357026d8c173bf33017278ef440f222fb9cbcb66e6","README.md":"a9540c57fbfade8959d34804427d9f37ccae9e24a9fc469bcf413c2089601f0d","src/datetime.rs":"40723c9d195527d4dc9a70450a0822e39f7de204debf502d0c0d977833dc3cf9","src/lib.rs":"91f3fe49647af4987a49b5aaa8c3175f02c05fa9ac45e7c49dbee61566230476"},"package":"3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"} \ No newline at end of file diff --git a/vendor/toml_datetime/Cargo.toml b/vendor/toml_datetime/Cargo.toml index 690ab5b..46989f2 100644 --- a/vendor/toml_datetime/Cargo.toml +++ b/vendor/toml_datetime/Cargo.toml @@ -11,22 +11,23 @@ [package] edition = "2021" -rust-version = "1.60.0" +rust-version = "1.67" name = "toml_datetime" -version = "0.6.1" +version = "0.6.5" authors = ["Alex Crichton <alex@alexcrichton.com>"] include = [ + "build.rs", "src/**/*", "Cargo.toml", + "Cargo.lock", "LICENSE*", "README.md", - "examples/**/*", "benches/**/*", + "examples/**/*", "tests/**/*", ] description = "A TOML-compatible datetime type" homepage = "https://github.com/toml-rs/toml" -documentation = "https://docs.rs/toml_datetime" readme = "README.md" keywords = [ "encoding", @@ -41,47 +42,47 @@ categories = [ license = "MIT OR Apache-2.0" repository = "https://github.com/toml-rs/toml" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "Unreleased" -replace = "{{version}}" min = 1 +replace = "{{version}}" +search = "Unreleased" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = '\.\.\.HEAD' replace = "...{{tag_name}}" -exactly = 1 +search = '\.\.\.HEAD' [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "ReleaseDate" -replace = "{{date}}" min = 1 +replace = "{{date}}" +search = "ReleaseDate" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-header -->" replace = """ <!-- next-header --> ## [Unreleased] - ReleaseDate """ -exactly = 1 +search = "<!-- next-header -->" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-url -->" replace = """ <!-- next-url --> [Unreleased]: https://github.com/toml-rs/toml/compare/{{tag_name}}...HEAD""" -exactly = 1 - -[package.metadata.docs.rs] -rustdoc-args = [ - "--cfg", - "docsrs", -] -all-features = true +search = "<!-- next-url -->" [dependencies.serde] version = "1.0.145" diff --git a/vendor/toml_datetime/LICENSE-APACHE b/vendor/toml_datetime/LICENSE-APACHE index 16fe87b..8f71f43 100644 --- a/vendor/toml_datetime/LICENSE-APACHE +++ b/vendor/toml_datetime/LICENSE-APACHE @@ -1,201 +1,202 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/vendor/toml_datetime/LICENSE-MIT b/vendor/toml_datetime/LICENSE-MIT index 39e0ed6..a2d0108 100644 --- a/vendor/toml_datetime/LICENSE-MIT +++ b/vendor/toml_datetime/LICENSE-MIT @@ -1,25 +1,19 @@ -Copyright (c) 2014 Alex Crichton +Copyright (c) Individual contributors -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/toml_datetime/src/datetime.rs b/vendor/toml_datetime/src/datetime.rs index 15781b9..1b37c70 100644 --- a/vendor/toml_datetime/src/datetime.rs +++ b/vendor/toml_datetime/src/datetime.rs @@ -308,7 +308,15 @@ impl FromStr for Datetime { if date.month < 1 || date.month > 12 { return Err(DatetimeParseError {}); } - if date.day < 1 || date.day > 31 { + let is_leap_year = + (date.year % 4 == 0) && ((date.year % 100 != 0) || (date.year % 400 == 0)); + let max_days_in_month = match date.month { + 2 if is_leap_year => 29, + 2 => 28, + 4 | 6 | 9 | 11 => 30, + _ => 31, + }; + if date.day < 1 || date.day > max_days_in_month { return Err(DatetimeParseError {}); } @@ -381,7 +389,8 @@ impl FromStr for Datetime { if time.minute > 59 { return Err(DatetimeParseError {}); } - if time.second > 59 { + // 00-58, 00-59, 00-60 based on leap second rules + if time.second > 60 { return Err(DatetimeParseError {}); } if time.nanosecond > 999_999_999 { @@ -451,7 +460,7 @@ impl FromStr for Datetime { fn digit(chars: &mut str::Chars<'_>) -> Result<u8, DatetimeParseError> { match chars.next() { - Some(c) if ('0'..='9').contains(&c) => Ok(c as u8 - b'0'), + Some(c) if c.is_ascii_digit() => Ok(c as u8 - b'0'), _ => Err(DatetimeParseError {}), } } diff --git a/vendor/toml_edit/.cargo-checksum.json b/vendor/toml_edit/.cargo-checksum.json index 7d4b128..a7a471a 100644 --- a/vendor/toml_edit/.cargo-checksum.json +++ b/vendor/toml_edit/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.lock":"679c3c26dd16d7e514f18f4ce2ccd22556d2802c29457aabc87919c64b6dedd5","Cargo.toml":"3852620c9f9ddaad965faa1dfbbc70b9acee9cea592716b156b6b6b98b26746a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"4ba025b28a1ea4c063512d92a206a0510972cd98daec7c6a64ab846ce8e3702d","README.md":"61071236d49e09b94cda5955f5d61a6196b192b11a990883d458dc6e3d72da6f","examples/visit.rs":"657756caba28aa87a8b10104aba704fd2365579c57a293bc572a8d72173ba174","src/array.rs":"3cd87f817f47b44688157f7ce52b303987c4091a21d3075c1022bc6d63ae7096","src/array_of_tables.rs":"2523e482cb10dd84c135141324fa6ecb6f5bccc6c08c6d96ff65abfb7cfbf8d9","src/de/array.rs":"7969d28e98e366cd16c9ed74128404a147a2b787c54d0450594811a92d17b310","src/de/datetime.rs":"a29e76372fc382686c31ee93fd54c749ea3d775c8d4fcc2b2643f9464ea23730","src/de/key.rs":"f8a977fe08e1f929e599f2216734db678ca6ec9fb7f6860b75f1cdfc628b52c6","src/de/mod.rs":"b7344ac410b21b2c82eb88782f48979d6f850a778a19bb2eea7228e834374d2f","src/de/spanned.rs":"5c959639bf4b3d19c6b18e625a57b28b311164bf7b191bbc4d30aa97b7239679","src/de/table.rs":"b1f0f95ea502126b8707ba945596f3c47d83d46680622899334567df1b58df1d","src/de/table_enum.rs":"879a78836ac3b00ab35b9cd72a08fb8486a6c4ae05c779481ae9fcb9546b8390","src/de/value.rs":"536d18bba54f335f4fe0f387888c1406e60cf7ed4a0afbf2b00b97d33378c91d","src/document.rs":"7882374aa62978cac3752f346253399f281121a544927feed2882ed9a008361d","src/encode.rs":"4fd14a1d23bc42cbc925d7b240c7f17641ccff0df31d8ed1aeb3cf4c9c598c36","src/index.rs":"34ed0a4cc0cd55ce29885ed8123cc7c09c8ba3b9aa8d3b0b47554c1a41d31577","src/inline_table.rs":"871cfc66cf6c87bfaece0d4725e4a6abe640f5911c3b9c97e617e01293dabd1f","src/internal_string.rs":"8c84f94363ed012eb7114c6b3f6b371155d0ce740ce44ec9d315aedf048c1d11","src/item.rs":"78c5d9e6c714483816f090d419db641977c9dfaf7dd3e9613f6eee10633173f2","src/key.rs":"c2baccdb585879b2ae0fdb98084b8745f23c0f9e33a3220413b2b8a717e7ce4d","src/lib.rs":"3ff2ffd3366e495278fd7b3118ea54b7d09534380396a75b46c409e3c563c67c","src/parser/array.rs":"0f7a828cb048d2593158c140394f8d34ffa5c7fff348488cbe5baf1bf99bfaf9","src/parser/datetime.rs":"ba31d72f1e4f8f64fadea66c6a00ad3d1c4740d6e1a8dfe3b45d07c23a736883","src/parser/document.rs":"b44f1a381ba7c5172c7e5bbcc5cb66d5ac85e5349d74b16b36c51c0a5761dd89","src/parser/errors.rs":"36c9b9d50ec26ebebc5c879e46db0cad5ba8b8f18c8ee48262e7248f4d5839ca","src/parser/inline_table.rs":"88a20841c01fdc5276724ee1df60bb190f0f02d98351c4745245b30f2027f914","src/parser/key.rs":"493ad3d85fca3e13c93c362c5edef7d31c6c14e12e20287d451113f524d2b1e1","src/parser/macros.rs":"9366d332efc75f09a80871a8d0c4c4daa865e3a2fc78b91e6e4657df4260b0f9","src/parser/mod.rs":"1f3a866936f7f7fef3df4d2e4a0716ac67210d20b801550593d674002bec3f49","src/parser/numbers.rs":"d50028c8898dd58c1c23f09ced676c5fd980575b3696646a0c1388f6b8351ab5","src/parser/state.rs":"8b7ba46e124007c3dfdf894d881768f2dd0895749e217e2b1fedb86d9281b8b2","src/parser/strings.rs":"fda76fd1f52126e8ff3d1cdb48ed745253febda9104c83a509047a0537d85723","src/parser/table.rs":"c4cf15a49aeb6638ebfb29ecf19a49d876c228d1f50ab337828fcf5a1c65fd11","src/parser/trivia.rs":"2da07c1a37e4ae703f8abe125c01d49eec356555a9202b72597ea1b7582ab1fa","src/parser/value.rs":"642053233803135671582fd57cddbf1a63d1c57d2130e2a1862e281281f1a320","src/raw_string.rs":"dcec7da0aab75a5ff5891a0b4a22265ed47c17f9d802ec135e40f2bb1b0a7c60","src/repr.rs":"68356f3b89cd76b75bd9f78b6fbd31fbcc96d8195a7118d286dca92950adaee4","src/ser/array.rs":"5fb55303365bf6991f7af523931d1a046e88d3125c099317e12a7d4f592321a8","src/ser/key.rs":"9bad3bee90876c5c54aaa5caee92bfe30d79a100615902118b35b3ceb40b524e","src/ser/map.rs":"700ddcac68f1cb92969bb8e3168d9ad5d1e1b545d68cc7711f0eb58cf41d9f8c","src/ser/mod.rs":"9b37c484e6ac281a40260206e8cb5bec9e2dab09126c9a2010a52786fbf7df78","src/ser/pretty.rs":"9a9f6d43b2eab487de8af216b9950c93318c9ee6a91f4707ffa41e2ee492c902","src/ser/value.rs":"f4d0bedf847cec29096f075995f31b6976539611f4da722e2a263f602bbc7ed0","src/table.rs":"91ea4d3111eb58aaad478d152773fc9ef8cd77a375c83d700eb83a48bf2fbc6e","src/value.rs":"881f7aee14ef968f5f843ccbbaa648117ceac2148df6e09d171f41c174d35dd9","src/visit.rs":"34b49c3091a6ef7c1513429f8290c26989da25cec496d3993abf33fa65ebfc58","src/visit_mut.rs":"b184d7bb94038fdc2e0fee8689112f01377d9f8c05ffafb8a1f5f2263e80ee27","tests/decoder.rs":"8093389d542f50974d6a606321b0413941709ac2cbdef48934b60088bb21e3a3","tests/decoder_compliance.rs":"a06a10a2b89758ef08dda24c4475d13638b7c31e8872ed6edc6919d5b9910bd4","tests/encoder.rs":"c84dd8e14ade77a4d55f932418567196e188fe65d3957408b9ce07c2c27c4da0","tests/encoder_compliance.rs":"643b529e2f4762b2ce98e041dffa5d4b87fee5942996e093afa5b8494e02a3c0","tests/fixtures/invalid/array/double-comma-1.stderr":"abfcc7bc82100306faf21d3636f0ae684d0fbc5e9ea89032e6cd39ecfcb92c84","tests/fixtures/invalid/array/double-comma-2.stderr":"56bd3cecd5b5ec4204d206f07751c34e1e65bee7aa108ca74ef7a8cb1aaaea43","tests/fixtures/invalid/array/extending-table.stderr":"a6c077cb49c41a9d10aca0bac6c571e570f0a772c6311d6a835f9cc46f4ab7cd","tests/fixtures/invalid/array/missing-separator.stderr":"eb38b2683f119d8b9b8c6b8ffd46da784112565ef1033febbe59469e14baea76","tests/fixtures/invalid/array/no-close-2.stderr":"214a9c5953175709a068433f0a594e0cf2be88d633905dc6e9047894b5249e4e","tests/fixtures/invalid/array/no-close-table-2.stderr":"5a2b4bf9026aa20c749551cd77458c5ffba008b5b235fa05fb7c756917018eb8","tests/fixtures/invalid/array/no-close-table.stderr":"545ad4381d5a007a6cd940a4523ae2f629617d298611c0898283a1c4783604cb","tests/fixtures/invalid/array/no-close.stderr":"85935faa2e3a57c4f0a7d519629630a96826ce4f217650972cd070fb7dca23a2","tests/fixtures/invalid/array/tables-1.stderr":"f105a34c2d87b61160881eeb09b7f54d244ba2a222d32fbfc755091939942247","tests/fixtures/invalid/array/tables-2.stderr":"77010599d1d61a34119a99acea7d84162d217df93bca01aed3ae73f1eb62dafe","tests/fixtures/invalid/array/text-after-array-entries.stderr":"391ee42f4fa3a7ec51ba1b90e69f1d9278c105426fe66ae1f80e65d7fb6ed379","tests/fixtures/invalid/array/text-before-array-separator.stderr":"7292ebcb8c9c8aaa4041279af5414de3e710977cac948988cdc8b0947223b62b","tests/fixtures/invalid/array/text-in-array.stderr":"0486e3ec5d299e39c61380a2ed8826d886edb730f6d9555a765e4314da7f5b68","tests/fixtures/invalid/bool/almost-false-with-extra.stderr":"0a14178172a5e9ed65a7d551870b4df768e7cfb39f7b66a2ac0643e2aa8374af","tests/fixtures/invalid/bool/almost-false.stderr":"ed5307c42046d6adf0bf4c3d1aa1c75faa23a46029d5e229f0c2ab4db068df1c","tests/fixtures/invalid/bool/almost-true-with-extra.stderr":"6e4dc09948a38c284f5bc52e011322a2c0182ee2f35c772d460fa15a76461d2d","tests/fixtures/invalid/bool/almost-true.stderr":"86b33938625e15e30d4166b4e81f89c669e84c5b96c2cf708d84dbf628536e07","tests/fixtures/invalid/bool/just-f.stderr":"657f90406495c748243b526e1e01737c418453a1a5ad12ff0400631e9fdaf859","tests/fixtures/invalid/bool/just-t.stderr":"8a8f5aa45b0d0acd13f121caafa85a52a8d17f1bd81542d21d3a5ff07f460b60","tests/fixtures/invalid/bool/mixed-case.stderr":"2e9a16757a17c7e604e081584e6f015ecffb0fd7b6ed969a1fe2096c09e7155c","tests/fixtures/invalid/bool/starting-same-false.stderr":"87069893e084d03c3ab540e0607498d371563c0341c6f043aff756c8f1b32c64","tests/fixtures/invalid/bool/starting-same-true.stderr":"2a1817a1e3a6558860eec1d802e1be8f2f2b54da9b2d9eaf1cddecd35d71a8d6","tests/fixtures/invalid/bool/wrong-case-false.stderr":"5eb58e82290b767febd475712f7de9ee712f094a30dcb627a757b305ab64d096","tests/fixtures/invalid/bool/wrong-case-true.stderr":"dc8e696e9137f737a232009e7e9f801aa9ebe4320a445259af74d83a6235c7b7","tests/fixtures/invalid/control/bare-cr.stderr":"568eff61d56bb362866973d9a7a488831b8b08d02f4808538a04b22ebe10bf09","tests/fixtures/invalid/control/bare-formfeed.stderr":"4532c2fdc7c4bab8fa6683cc4c46975c1dddf2f531459793dedfaab73e4ae347","tests/fixtures/invalid/control/bare-null.stderr":"fe799f446b55ba89609a52842b4eb63bfa725035c0006107c85c6a8f6e8db688","tests/fixtures/invalid/control/bare-vertical-tab.stderr":"b85c55e64c58b85343482dadcd5d833fa771a79e1d0f13a24f4185a4d0d826d5","tests/fixtures/invalid/control/comment-cr.stderr":"656e1b16fbc152545a9726a290a8ce84463a73d780ad13a32b65291f56cb7dc6","tests/fixtures/invalid/control/comment-del.stderr":"08f586fc1f6ea69e3718ab43fded3da139d5ae296cfe70bc27a7ffb4b2f106d5","tests/fixtures/invalid/control/comment-lf.stderr":"71725b218014e5ae260d5075f6bb90477e97c946e27a40c797710a190d586a58","tests/fixtures/invalid/control/comment-null.stderr":"39ca1697c9e13969314368c85401dbb141ee80f514ab4a359ea8e74880e33d28","tests/fixtures/invalid/control/comment-us.stderr":"1d8c3a908fb274503d357bdc428b82c97fd9f9b37681be09129a46454f31e0cd","tests/fixtures/invalid/control/control.stderr":"aa1ae71dbb29473161d8f82a163e6862f78c7c55d4831574e6994651fdccdb19","tests/fixtures/invalid/control/multi-del.stderr":"812c0742dab1b933689c925dd1cca16ed185d091acc51026e8fc6585c0401c00","tests/fixtures/invalid/control/multi-lf.stderr":"3d2ac666d19fc3d0fe676b4897557fe3735c43da64b4a71577c04445a1341a06","tests/fixtures/invalid/control/multi-null.stderr":"9fec8ad3ba45ddb96ad3b6a118b4fa648056e26a9128528b2c0a8fa3b01e741c","tests/fixtures/invalid/control/multi-us.stderr":"a4769a44b32457fbc791b15b442293f37c936d2394ca9aa87ad5c964dc7cbb35","tests/fixtures/invalid/control/rawmulti-del.stderr":"b2dd93070d5bb09e19ea6ca015cfa1ef279ac2cf5e7fb242d8b74318f5d8418c","tests/fixtures/invalid/control/rawmulti-lf.stderr":"7e1d64429ed08f831783bace226535852bcebb48aae05541590fb242491cd7e9","tests/fixtures/invalid/control/rawmulti-null.stderr":"84c04cc89a6bc716b6f7811142899014abdb0b49c4ea56bc163c19220b14c323","tests/fixtures/invalid/control/rawmulti-us.stderr":"4e4566a74bde0a055d2e5a0dde72d41208c2ed188175b11c9e46167dff231d3c","tests/fixtures/invalid/control/rawstring-del.stderr":"c49c702bda4ed350dec4fc43edecb16202f8e5f1f4b02b06b42b2109d775a9b5","tests/fixtures/invalid/control/rawstring-lf.stderr":"5b11decc012a95bde096088a285eaaad4b8984f6d683bd633640141b19135530","tests/fixtures/invalid/control/rawstring-null.stderr":"eafa2a63e9d12293b290405049457860a8fef70de56c4ba2f203e5f2c79a8634","tests/fixtures/invalid/control/rawstring-us.stderr":"b3d8e5dcb1c66b93f2543ea325a50decb62178a5f0bea59fe64b04d89472b25e","tests/fixtures/invalid/control/string-bs.stderr":"1b35fdd656fb29e3011e5f4302fd37d0354922a21e740eb947dac766cebf3200","tests/fixtures/invalid/control/string-del.stderr":"ec80f70e9aea38e11191e50e9dfd80415de5b0406608c935aae8b8dbed9a8a7f","tests/fixtures/invalid/control/string-lf.stderr":"f91e1e0035af8ff259ff98c79097f845d8be8886629a93da962daba1079cb382","tests/fixtures/invalid/control/string-null.stderr":"fe34d284c478853bad94235aac1f37ac3c591f97e12990847b5da0b6c99bd47d","tests/fixtures/invalid/control/string-us.stderr":"dce016319c9fa0981f03cfd8b2b1d52d2d847697ea88e037f5a0e28bb53d8830","tests/fixtures/invalid/datetime/hour-over.stderr":"bd2220bdbaa96caf3d2aaff640620e4856bffb722a0e5be61dcb5283ffd08056","tests/fixtures/invalid/datetime/mday-over.stderr":"de9d27d65c68dd09da10c229167ce881dfe0ebda457badfe24b7598ae80c47a6","tests/fixtures/invalid/datetime/mday-under.stderr":"18daf3ce2a6a972476ccabcf92690a488e4f3be804dab8458da2aebad22a2c8c","tests/fixtures/invalid/datetime/minute-over.stderr":"a29009d3f7a6b1d9afad2420f223d6a6e02df8149577547837f5eeec4075bb9a","tests/fixtures/invalid/datetime/month-over.stderr":"37a203b22c3b2510541e413ff347447f9f3319a896ee005b96f65bc0d68150f4","tests/fixtures/invalid/datetime/month-under.stderr":"24c554595ca9a999a1d8e1ef7dc28b443f2f0ad6e17337ee157fb18bdcf678c1","tests/fixtures/invalid/datetime/no-leads-with-milli.stderr":"a35c496884e921aa086c1404bc812ff74f2bfd347a3ecd96640942be5555afbb","tests/fixtures/invalid/datetime/no-leads.stderr":"2e20cb60a1ecee85b172d1402e4d8c425362e4db607706bd39494385dc6dc98e","tests/fixtures/invalid/datetime/no-secs.stderr":"65871ee020e645e737c363b22cf43c160b295871cd4ac97a37d3ea46f60e3250","tests/fixtures/invalid/datetime/no-t.stderr":"ff50b85f6bc0d49000ec6f1303fda9b44bf934c2ede61743363411bbf6ebecbb","tests/fixtures/invalid/datetime/second-over.stderr":"0ed555a874efa08b711b5227501208758d87a01ad8360cf76c3dc8761807fac4","tests/fixtures/invalid/datetime/time-no-leads-2.stderr":"f725d49ddb5af69b7285f071d68e3d8441d5e331adcfd8c025c1f3cbb68028a5","tests/fixtures/invalid/datetime/time-no-leads.stderr":"cf06f3847f3d14655a94d8cfd5a6984dc74115b1d3cdbee0662ef215738bbf65","tests/fixtures/invalid/datetime/trailing-t.stderr":"87a15cd62bbe7cba2b942fe424c045ce30a12fe439a1b49587a5cc037ffa6b09","tests/fixtures/invalid/encoding/bad-utf8-at-end.stderr":"518dc443f0404d486b40bbbd152870276016795b05f3cc8a1de64a0e08fcdda2","tests/fixtures/invalid/encoding/bad-utf8-in-comment.stderr":"e0f252d14c18ea072c098834997db8e5f68b807bb0fa6d3d34e4042a5ea6fbb7","tests/fixtures/invalid/encoding/bad-utf8-in-multiline-literal.stderr":"2328a89cd9043de10ee656f4ea0dd5e6491fd8c0484ac36099c23161dd7a2625","tests/fixtures/invalid/encoding/bad-utf8-in-multiline.stderr":"2328a89cd9043de10ee656f4ea0dd5e6491fd8c0484ac36099c23161dd7a2625","tests/fixtures/invalid/encoding/bad-utf8-in-string-literal.stderr":"eefb00fee073933fbdb95d24a9e7050c281d4719d0cb970c2c06a71a86f108b3","tests/fixtures/invalid/encoding/bad-utf8-in-string.stderr":"eefb00fee073933fbdb95d24a9e7050c281d4719d0cb970c2c06a71a86f108b3","tests/fixtures/invalid/encoding/bom-not-at-start-1.stderr":"bd4e557b8b4586cdb39a8fde46f0bb214954f9f8ef37be46e2cc19823f6d6919","tests/fixtures/invalid/encoding/bom-not-at-start-2.stderr":"27003a498cb355011782dc21f01e15457490b78c472bb9ddb54147413c8f597e","tests/fixtures/invalid/encoding/utf16-bom.stderr":"a8800edcb8f6184b712da53e74bb787c39eb891073575acbae1ad575f15043cc","tests/fixtures/invalid/encoding/utf16.stderr":"edb66c01034865f484ccf7921bfcec1efaa8599762cb9cd30c9c8103275bc4e6","tests/fixtures/invalid/float/double-point-1.stderr":"2917901dd186adc39cb5965faf388fa2babe577ef3bfcadd4919232868a727cf","tests/fixtures/invalid/float/double-point-2.stderr":"7eda489da0436d6f0f2268aa4005b422d215b4785af0c1696c8731908a563f17","tests/fixtures/invalid/float/exp-double-e-1.stderr":"e64082e328fcfbeff57e6801448c769b12bc8e879b77421b688b2e147e386713","tests/fixtures/invalid/float/exp-double-e-2.stderr":"5c45326ef7287ea16a9e08275222e281b5d61c9322f8764f6533707f9772e255","tests/fixtures/invalid/float/exp-double-us.stderr":"ebd30aa3f7cd3a0a5e79bbbde1beff209d24f4ab58eb5552c1baf0eb2194e97b","tests/fixtures/invalid/float/exp-leading-us.stderr":"19c8f676dd45a5db09bd5baba5c3e7b661e83099a340331ee6bb10defe679569","tests/fixtures/invalid/float/exp-point-1.stderr":"23e73e4e63db888546866967a1c0319a1db269f23ee9c277b298e9f2db88800e","tests/fixtures/invalid/float/exp-point-2.stderr":"633328e085fb04d6a79cdfb696f45a1836c3a8b6afafc4cd5e16d48465aa4613","tests/fixtures/invalid/float/exp-trailing-us.stderr":"d633c5a44a55dc2b6cac18f739cecb33526f31d85f72cada0ebf70c4cca56dcd","tests/fixtures/invalid/float/float.stderr":"cc664d16849deec2ae7ebee6a3f46923bd5959075e282315c4f60461cdb13a0f","tests/fixtures/invalid/float/inf-incomplete-1.stderr":"38cd906dfee7f13b8cbdb27f3406ab0499fae3ae16f3c77bc7fc48d009595d93","tests/fixtures/invalid/float/inf-incomplete-2.stderr":"97a9ae1ff194a95b5be2abaf2cd8179ada832cdd9fad349efa9951e7ab92e435","tests/fixtures/invalid/float/inf-incomplete-3.stderr":"034bc609343ecf1e659d6250f719e5f93512e8140228e44e57b538765e58a1f7","tests/fixtures/invalid/float/inf_underscore.stderr":"621326dde26e5364c7af1b562fb651f4184d9b5fc9bc45edc12f52b588d506bc","tests/fixtures/invalid/float/leading-point-neg.stderr":"d19e28ba2f11069800df4dd1951025aa7f75425f7258e8caf4bbf6abe0e84bc9","tests/fixtures/invalid/float/leading-point-plus.stderr":"10750e9acccb17f0682db30fb175d083d06c822a4863d3d6b8ddb6c75b7b22ec","tests/fixtures/invalid/float/leading-point.stderr":"2545b7a615528595f5d53a7338403c83a8587e70600b1501225446e5f456c805","tests/fixtures/invalid/float/leading-us.stderr":"dc958138922097b2e1e3865c7818604b2249268af4acbe5cafe0ce8c68a90a86","tests/fixtures/invalid/float/leading-zero-neg.stderr":"d1fad35fa8d18f93ebfdf681d3476f02600e5c39cc942ca9bc36181476cbbe53","tests/fixtures/invalid/float/leading-zero-plus.stderr":"ad8ba7a7c12cb4b296cc0d43915106732e6a6a713aea67034587d1fc0c8093df","tests/fixtures/invalid/float/leading-zero.stderr":"cc664d16849deec2ae7ebee6a3f46923bd5959075e282315c4f60461cdb13a0f","tests/fixtures/invalid/float/nan-incomplete-1.stderr":"f4bee0b1c639bf800fc4dda38276142e715cd85ab6cc5e93ae2112ea63d7de89","tests/fixtures/invalid/float/nan-incomplete-2.stderr":"dc908ec577d29083bfd709fc4bdc2fa641d7fb2ba77a5d7441215680a8839d69","tests/fixtures/invalid/float/nan-incomplete-3.stderr":"abab5a41e0f2f1bad2d2050d0c913dfd8c15e50530d53ef8de327f106f564e02","tests/fixtures/invalid/float/nan_underscore.stderr":"25b67a7d6c743f673be7b409c9990de5de8b52a1d97c32e6f4e62f33147f1872","tests/fixtures/invalid/float/trailing-point-min.stderr":"69ad03ae81990d580a6d63bdd5ab594de00c0a16694c8671704c6243b4578b38","tests/fixtures/invalid/float/trailing-point-plus.stderr":"fba0bbad890020fe943e9f23644e81bf0bb7d114230fe16182e866fddcfc108b","tests/fixtures/invalid/float/trailing-point.stderr":"2f12b368fd94304ab0126ebb5888c519475f9ca28e9ca702c477cf0085ba9216","tests/fixtures/invalid/float/trailing-us-exp.stderr":"2ad53ae4736ce5134921acf0c16bc5031627c0da3119edcdc4bd7eb300d40337","tests/fixtures/invalid/float/trailing-us.stderr":"506cb8051f1045ea1dc7f11865d58cbca0216502d273e1c10366c8be7cc9ab43","tests/fixtures/invalid/float/us-after-point.stderr":"fa9fb59f703b6770be3dc094c04eb2c4add8a7a7ab79d9fe508cfeee785404f1","tests/fixtures/invalid/float/us-before-point.stderr":"14e09a7a382e249e5143d1c81d6e4623408eb2d505e1e3f86c370a3a3bf6cd9e","tests/fixtures/invalid/inline-table/add.stderr":"bf95d34749254300f4179ed1314cc9cabd7c7b63fc2453fc7adbc7869b63be4a","tests/fixtures/invalid/inline-table/double-comma.stderr":"2132a1c4d97fab140089818f990284333e22ef91d20a9f65e11d4dd15b1a701a","tests/fixtures/invalid/inline-table/duplicate-key.stderr":"72bea73b20005f15ced977aae70a1b0f3bbe3e35598231aca9a2303d770efdc3","tests/fixtures/invalid/inline-table/empty.stderr":"604fef40337f04e5f37a52239d6509850aba95677a7a94ca8476a6c21b648a43","tests/fixtures/invalid/inline-table/linebreak-1.stderr":"45b0611d37c1ece88bf6c88b3528adc3d73e0cd3e3b24dcf07ab151023a6f488","tests/fixtures/invalid/inline-table/linebreak-2.stderr":"f7672965326b44adaf0cb4796a087fbe779a8b17fbb458090a33375d0c54e5b4","tests/fixtures/invalid/inline-table/linebreak-3.stderr":"e8c70f0c91b15e701567e93d8df1cd3bec593696af05ec1d95e8f9e00ab20fa6","tests/fixtures/invalid/inline-table/linebreak-4.stderr":"3d31147f9e1ff5f94384e4df1675dfff2da6f076cb0a729771615f05b990be91","tests/fixtures/invalid/inline-table/no-comma.stderr":"9f1c85e0df72c7e7e011c26a0d5dd9dea8b7a5e18c3ba9a53ff4a20a9429dce9","tests/fixtures/invalid/inline-table/overwrite.stderr":"812d1bc74d07750048a521e513a565676e606d4fa1a32d2ebda7af8fa064d3ab","tests/fixtures/invalid/inline-table/trailing-comma.stderr":"4791911dafd6602e2891d6ffc4d32ef8e9d0c1f8f6d37e84d440feb896d9cb88","tests/fixtures/invalid/integer/capital-bin.stderr":"fcfc8b0bddd36a641d3f5cc2ceee88554619fabf6874e11cdfdd147be8781881","tests/fixtures/invalid/integer/capital-hex.stderr":"c8e2d64f9659435a0387bb7e6447896eda253fef77e0214a4073fcffbac693a7","tests/fixtures/invalid/integer/capital-oct.stderr":"ec465fa25da212b0c9b6265ac8e9cd05c1fa07d614dafb3bc9b2ca74d6c2a7a7","tests/fixtures/invalid/integer/double-sign-nex.stderr":"8d57da526240c1cf73423b688442922ae291ff26e3c09f9c3b5b150e62e5cbaa","tests/fixtures/invalid/integer/double-sign-plus.stderr":"55896d9bd19637e124482966a12109a1a8351620ddc6f8d28553d70359f523f1","tests/fixtures/invalid/integer/double-us.stderr":"f14ed7bd3ad26b2203763fa953dd6e99212e50fb8e43a4eaeb115c1a7df4fc25","tests/fixtures/invalid/integer/incomplete-bin.stderr":"64168fc7ede87a10c12f82325fce644a7d9b9c3af55a313184175df7926845e3","tests/fixtures/invalid/integer/incomplete-hex.stderr":"ed2423540e288f4673bc68822a799bea04f571db5de56154e10360b03ab79553","tests/fixtures/invalid/integer/incomplete-oct.stderr":"9ed35e3078703a38996f20dc3e86477149564c8abd237c644bdf3a5ef26e3417","tests/fixtures/invalid/integer/integer.stderr":"ed5ef991b733b3d51700364da18bf58f1b7eb68053467afcbff22775b3b82788","tests/fixtures/invalid/integer/invalid-bin.stderr":"7248d47f2c7db309254a3a41af28bc1a6e96bfa95e0c8c94d607f65a1a30cee6","tests/fixtures/invalid/integer/invalid-hex.stderr":"3976255c6fe35a1e29f0fed7324eee8420ababd0f6f1f7702908c3df47c88846","tests/fixtures/invalid/integer/invalid-oct.stderr":"9f6776e33887cb446a5590d8fe4e51c36747c634cd5e4efaa84f807d3ce244e0","tests/fixtures/invalid/integer/leading-us-bin.stderr":"28edc918ac016cc9cb7b0b20fe2a5e1dc0175de0aa8105d7a6ca263815c6e4e7","tests/fixtures/invalid/integer/leading-us-hex.stderr":"2a1a5943399c8ba5849ba569411bfefc8323659c852bd714ca917231f6212ef0","tests/fixtures/invalid/integer/leading-us-oct.stderr":"aad69bdd80f94e907bda03558a1302e54d58d8911fe2b564e93cb0ec48403b09","tests/fixtures/invalid/integer/leading-us.stderr":"3a265cc11f1b0d43d4b532a47776486ec7c7ea7afe70813ab00c5a37cf87a9df","tests/fixtures/invalid/integer/leading-zero-1.stderr":"ed5ef991b733b3d51700364da18bf58f1b7eb68053467afcbff22775b3b82788","tests/fixtures/invalid/integer/leading-zero-2.stderr":"5c70e7874256512c0ef6bb364497d4e10154e994056f2feb7c5c729016522091","tests/fixtures/invalid/integer/leading-zero-3.stderr":"fb2730feda6f669a3b8c4332f01369e52ce1b942807f1bf3d9762b1fea04aeac","tests/fixtures/invalid/integer/leading-zero-sign-1.stderr":"c9d2d992eea36c4fe228eb74741bd8d0ede1e354cad132b79462e7b502b37f95","tests/fixtures/invalid/integer/leading-zero-sign-2.stderr":"4248329b339020cc2ea586f2775a0b4f4cbe2ae3f0f75b935263363b8be5eaf5","tests/fixtures/invalid/integer/leading-zero-sign-3.stderr":"3b414808727d3a446efdfca0033525e17536f9b54104d8a9cb9278b054d213df","tests/fixtures/invalid/integer/negative-bin.stderr":"74aae673b861bd46544e4835fe7075e20158dd69e27f75c790d48a6006476c73","tests/fixtures/invalid/integer/negative-hex.stderr":"799bd8120f4cf2c36e7f65a5f9aa43a3ec87dd95dd3bf68501059da9f21f8c9e","tests/fixtures/invalid/integer/negative-oct.stderr":"fb082674d0f5a8a231c91c2d34cf47fdeece4c8c5c8c7b23fb09fce95ed59a9f","tests/fixtures/invalid/integer/positive-bin.stderr":"54d8a33743737f374480cd1235bf3f7e0847d252ef7e2bb1d447529cbc0f6692","tests/fixtures/invalid/integer/positive-hex.stderr":"3b21b23cc3dd6b213a19256f4ffb4bb36172de2f739f90bbea78636f7a50524b","tests/fixtures/invalid/integer/positive-oct.stderr":"cc493d14bcb092f49b517fa5143e29c45020d491d94ac662f85b2af34805cf11","tests/fixtures/invalid/integer/text-after-integer.stderr":"07a13ad4841a452eff00947234a4ebac4d209ea0294162888db35668648bb55d","tests/fixtures/invalid/integer/trailing-us-bin.stderr":"62da06cf06527b9e9cbeba6c5299ce6001d40592e9d007c8350090977f4d1b58","tests/fixtures/invalid/integer/trailing-us-hex.stderr":"1b290eada58a7202b1a9251afd8e0e72a4caa8ad5c85036d1050e7de8141e94d","tests/fixtures/invalid/integer/trailing-us-oct.stderr":"34e6f86ffb0099e6e1ba67deb51e36af62dfce4e7299b94503a219339bf16447","tests/fixtures/invalid/integer/trailing-us.stderr":"3ab49ee921eb772f5aa4eaf0fb3619b1dcd9a9db3f4ebbd9bc505581a985e753","tests/fixtures/invalid/integer/us-after-bin.stderr":"a94a87ebab3536899ce7c0c785f020b3a236c60d24c0bd7494628ca310c40768","tests/fixtures/invalid/integer/us-after-hex.stderr":"9009b187f615f06e3392eabd8ffa58311ed1c2b1cd76f8c5bd99671242f2e026","tests/fixtures/invalid/integer/us-after-oct.stderr":"05af70a21980416fbd602337f9af22a1c600a294635d10ef1ca1b2138338e712","tests/fixtures/invalid/key/after-array.stderr":"487d957b20226ac36e27d6efb1e3d24147284c9a5e10a0188427a1c940d31ef0","tests/fixtures/invalid/key/after-table.stderr":"f70e84770817f096fcc1b6195c6b0a79d25210c6930ce412a89646040ee3d713","tests/fixtures/invalid/key/after-value.stderr":"00d4d2d3ccd61f64a92df0ca575aeafcd96e91d053d835ca855973339ba458cf","tests/fixtures/invalid/key/bare-invalid-character.stderr":"b1f64d54a43017e6cc09755fa7ba477901721d23f9271ec658fc9362f46631b3","tests/fixtures/invalid/key/dotted-redefine-table.stderr":"564febb355d1556df42f428a046ac6fdc5dad49b2b736be5824b0c13fcd1fae9","tests/fixtures/invalid/key/duplicate-keys.stderr":"7c9dfef2ef19b1487b7592a267ab5ba21c8b833dfa9ec1c3151e369c2fdba26e","tests/fixtures/invalid/key/duplicate.stderr":"764ef48bb06c9096c853b1156c8d29ba0065616939a5335146f5af88a424cea3","tests/fixtures/invalid/key/empty.stderr":"af6d3636ca73e5496c40d9c918c59b61fd86812db262649e5268094193873130","tests/fixtures/invalid/key/escape.stderr":"155aa9389f0eb28cac3b42974af7ea9e2eef8d96f084f08f9f75e960fc8ce8c7","tests/fixtures/invalid/key/hash.stderr":"85dd91b96aa4f81cc7922b02b411f25d9053bddd1e5b893c2a2ee9d0115a7cac","tests/fixtures/invalid/key/multiline.stderr":"d625f2caaf01d53d72d6f1c3df0952fe3ca8c5f3b081503cb02b9994c088b733","tests/fixtures/invalid/key/newline.stderr":"714aed0a140062f977ec85b9afa50f68448c67e806168e60b4f4554ab270b2b9","tests/fixtures/invalid/key/no-eol.stderr":"440ec927e94f0e520a0f256c865041f0478e1c82f3bb79323b7ddc36fc942edf","tests/fixtures/invalid/key/open-bracket.stderr":"3b36814373f51a8ea00a448d65bc514e8d99f5163b7dd8101df62bcd0a06e801","tests/fixtures/invalid/key/partial-quoted.stderr":"dc9059a014ed53071ed170b1e280923556dc09e0be2ae96cc8474e9da59fa378","tests/fixtures/invalid/key/quoted-unclosed-1.stderr":"6cdec8a7c5352a2f246273afaa923dfa81d4d2e68cca5b4f9a19193559b164c2","tests/fixtures/invalid/key/quoted-unclosed-2.stderr":"b4817e6f85a90fbb6adf049ba57c268f9888f1b42b3d62200c359606176170b1","tests/fixtures/invalid/key/single-open-bracket.stderr":"917c0203d1e45309fcff82ce33fdd2d989f630fb99290a40cb9e08a6f7ca0ef8","tests/fixtures/invalid/key/space.stderr":"3a5fa712d667890678873e3d4e4cabb084c67091c5ec6155355d5bd4229585dc","tests/fixtures/invalid/key/special-character.stderr":"a84c2f293c1e421a1c87050cb0211de80dbfe7a79939db0338fa35bf0c181ef2","tests/fixtures/invalid/key/start-bracket.stderr":"223d8a22bf34459cd9bcb993ae2a51ab3cc436674e3367e92f7d74e9f8710a45","tests/fixtures/invalid/key/start-dot.stderr":"f9366a1492ae24fd0721724b4039d2675e91219de564aff2826adefd83fac571","tests/fixtures/invalid/key/two-equals.stderr":"a0aae899cfa75df41104a4d3090a309fc7ebcd95bb5a944cf742f3d3fc9d4782","tests/fixtures/invalid/key/two-equals2.stderr":"861826b9456ab3a74f63f5c555e13d959a3991dfa6ce126ae5ed14d43f7dcee1","tests/fixtures/invalid/key/two-equals3.stderr":"71614864344e321ac5de238b7ef9d097c6d7f3ac3eee4118d96827b4b8bd6658","tests/fixtures/invalid/key/without-value-1.stderr":"16c2823a39a82c3c27e0959a691b7a95e3392d62195884697893d373b967b9c0","tests/fixtures/invalid/key/without-value-2.stderr":"d340f94f5d96f5730ab269db7ef27aca171d64e35af1181c474d75a7d11d6590","tests/fixtures/invalid/key/without-value-3.stderr":"3cf3072fe9206bfe6c682103d0414627a5a63db4c4a319cf37efeb5fe6b92007","tests/fixtures/invalid/key/without-value-4.stderr":"07132bec96e9a9a672bafdc3c448b7c596257245f8c3e2cae04641f9798644ec","tests/fixtures/invalid/spec/inline-table-2-0.stderr":"5ad1a938b1d1f0f3fdbd1871efdebfd30e136407ecdd9e2eff22150d00624b3f","tests/fixtures/invalid/spec/inline-table-3-0.stderr":"fcbc05e911b7db81bd918768fe98a51a7026fd476d616718cc417d2f08bcc1a1","tests/fixtures/invalid/spec/key-value-pair-1.stderr":"d5391142dfd56040840cf91b1e28e3c048229e3d9998534d41001cd6657f9bd6","tests/fixtures/invalid/spec/keys-2.stderr":"3c4ee6066fc75d2c1f1b325f618a01113694c318e330ff4f237e89127f332c87","tests/fixtures/invalid/spec/string-4-0.stderr":"910ee4b240159b828a7509c8dfb46507071a8d8636f3935a3914d6d91f315295","tests/fixtures/invalid/spec/string-7-0.stderr":"5128f0a930b3034e494a6bee4f384a587e9fd858b25f8cc529a488c94ee9670d","tests/fixtures/invalid/spec/table-9-0.stderr":"49dac70d337266f5c6b333fee468f279fed1bff62bfb4ec7436c8b6683ce0dd2","tests/fixtures/invalid/spec/table-9-1.stderr":"d9e071c70356c01b6537f876989ad2067e7773dd5eb24a298439d192dbad12d0","tests/fixtures/invalid/string/bad-byte-escape.stderr":"14f6ae446b3b8cb434267eba11c6ec5a1badef4f867169b173698cf9f1a29d95","tests/fixtures/invalid/string/bad-codepoint.stderr":"e5a9704b87fe85e447e817197ad4e59e27b33bd93e7c5ad3987561d119ce6ee4","tests/fixtures/invalid/string/bad-concat.stderr":"499219633467b9174471db40543ca188e2b906c470e511d2f701f5f5475d96be","tests/fixtures/invalid/string/bad-escape-1.stderr":"34a15ce7012217c62d31d5392038517c216f0cbfd5d75fb5f3c2bb07afd3f25c","tests/fixtures/invalid/string/bad-escape-2.stderr":"955aab40b16043c847d85d04e6adcd093c930dd8416d29c2ab5953c077eac6f4","tests/fixtures/invalid/string/bad-hex-esc-1.stderr":"aea935cf1e17743356e6fb1059afed2d0ee5262906594782e5537a025398038e","tests/fixtures/invalid/string/bad-hex-esc-2.stderr":"deac5217cf80acc759e1b40c43f5f56431b276dc2c896aae5490d57583105e06","tests/fixtures/invalid/string/bad-hex-esc-3.stderr":"94ecf886427e8fe5daf1d8f932bf1887f2533b10bc1f57cb6de03ea28fef466f","tests/fixtures/invalid/string/bad-hex-esc-4.stderr":"382b011dd4070554ee875fde06703d8332ef6ad36f3619f3536b0a4997ee2745","tests/fixtures/invalid/string/bad-hex-esc-5.stderr":"a8a039fae822eda68591da28ff2a117b5d85e99d066e9126ebbb6426a1cad52d","tests/fixtures/invalid/string/bad-hex-esc.stderr":"aea935cf1e17743356e6fb1059afed2d0ee5262906594782e5537a025398038e","tests/fixtures/invalid/string/bad-multiline.stderr":"141e5770190dd184bb1f64f6bb14fc017210bbd918ab5c8b7a3d80b86b21772b","tests/fixtures/invalid/string/bad-slash-escape.stderr":"d62f894ee166bddf84432507fb4ba56473c0a230fd88a3ccc2b199a72a34e613","tests/fixtures/invalid/string/bad-uni-esc-1.stderr":"d1d6e0c78b64f776a428aa7cb332b0ccd659d24950dd3e8f1d2a1450f61b37e5","tests/fixtures/invalid/string/bad-uni-esc-2.stderr":"d8ed938bafdeda05c083d79ed73f5043dabe0f5f1515c6857ae7a61db9026ebc","tests/fixtures/invalid/string/bad-uni-esc-3.stderr":"de1c29897549ae37988a38e1c370f9974fcbf7d4c4b3fd457d731999cfc05929","tests/fixtures/invalid/string/bad-uni-esc-4.stderr":"4e5a715e8dc212d073d399bb0ae9b49413396744282a195c34cb03e6f4fbd9eb","tests/fixtures/invalid/string/bad-uni-esc-5.stderr":"2419cd927d8e31ebf5025558e70bb1295d98bdb36c17f0e00620b9c4a7aadbf6","tests/fixtures/invalid/string/basic-byte-escapes.stderr":"b42fd0273c7438bf13ddea9552204bb9209cdcc8e4151311d2446185d2cd546a","tests/fixtures/invalid/string/basic-multiline-out-of-range-unicode-escape-1.stderr":"725cd4955987c3d6e736832281316d6c1a2446303e9a1dc78900cef4bb84ee64","tests/fixtures/invalid/string/basic-multiline-out-of-range-unicode-escape-2.stderr":"c6698fbdb95188d53bfdaa4a4f590d86a73aafcc321a5d9511ab43ce51be1c78","tests/fixtures/invalid/string/basic-multiline-quotes.stderr":"28177a49532f22aaffc9dc204592a2c5eca2fc20f8e208b7c7f589201e8b7de5","tests/fixtures/invalid/string/basic-multiline-unknown-escape.stderr":"a83406b30eb3ab2cebb0230d8d65d0b7583885138f2c070976ae61de2c8b17f3","tests/fixtures/invalid/string/basic-out-of-range-unicode-escape-1.stderr":"19af67599c6c2eef340c9fdb0ab2cc788928def50280af939247a1274447781f","tests/fixtures/invalid/string/basic-out-of-range-unicode-escape-2.stderr":"0e2e1a69358502ec17a07e4fc151b70e8a3b5123798cb38f98fe2d146515a84e","tests/fixtures/invalid/string/basic-unknown-escape.stderr":"1de467948fb18f61336350063701d9c5a6615054fe740a9be650f71f5ca4236b","tests/fixtures/invalid/string/literal-multiline-quotes-1.stderr":"249123229606aa8eedff1b5bdead5022daf470e47dbca639e32019d1d61dbcf9","tests/fixtures/invalid/string/literal-multiline-quotes-2.stderr":"d9784af1ff056a90bf531307749d53a5d24ffffbc0f4aada7fcee417a50d1615","tests/fixtures/invalid/string/missing-quotes.stderr":"462f24701d2c51d36b18d06b69be2f6eb36449b5f3ffbaa737fcbd2b2151ae4a","tests/fixtures/invalid/string/multiline-bad-escape-1.stderr":"18469c4d37d011b3f30ae17e3111b5e8a9526d593475e5d8d7a9b19461a40e8d","tests/fixtures/invalid/string/multiline-bad-escape-2.stderr":"d43896d3005c8470dc8149e2b74eb8825c6d9fedfe9f48125ad88a95c1dc3035","tests/fixtures/invalid/string/multiline-bad-escape-3.stderr":"92f732c6bcb922e25d2a001a389f93b596dd0e91109cbdcb651efa146309dc2a","tests/fixtures/invalid/string/multiline-escape-space.stderr":"94b451b6c03055186777a248cb216f95a1b2e29df25549f345d96bd0a4e63f1e","tests/fixtures/invalid/string/multiline-no-close-2.stderr":"e500e99a44305b1e148b211e963478cf1554f8c9536d3108390cf41d5b2ce069","tests/fixtures/invalid/string/multiline-no-close.stderr":"d5b9602d23b0cb023fbe3ae80d862fd60332475ba8863a1e977f17cb326a4548","tests/fixtures/invalid/string/multiline-quotes-1.stderr":"046956658c0a73e665e7a6a2044ff83c8efb8cdd8c2ab153c163eb1e61068c56","tests/fixtures/invalid/string/no-close.stderr":"3ad8aff0932d98592b808fc6f44fa68a854097f8025e92c11af1acb6de3d3cc7","tests/fixtures/invalid/string/text-after-string.stderr":"1c1e4677be8d3dba0e7933b3ed1cbb6e0bcf6f600cf9a989a7b09c9424a4d0a7","tests/fixtures/invalid/string/wrong-close.stderr":"441f4f1b73c11c8dbf2f73cf9a7766f17a9517b3b9142e86736ed43eaec07f18","tests/fixtures/invalid/table/append-with-dotted-keys-1.stderr":"a67f1f152005295e0a1bb3dcaaa755edd05f19ac5316b8ad2eb4d45797e0f770","tests/fixtures/invalid/table/append-with-dotted-keys-2.stderr":"72d9ea8a90b4d9e5319c2bf951bdde6a87a205612e82ed5a09cea2b706bfde7f","tests/fixtures/invalid/table/array-empty.stderr":"e8a41c60adf7756361920816b6c4f44125a813c869b71fae2c98473e4da1b231","tests/fixtures/invalid/table/array-implicit.stderr":"7797ce41aab0567fc9d40e277cc32c12e1f16ffc0e73857fdb3bbf754246305f","tests/fixtures/invalid/table/array-missing-bracket.stderr":"5f1e8703d59398f6595d21ed0abcc7dc3ce77943ad0f71eede9ad63ea2bcc7c1","tests/fixtures/invalid/table/duplicate-key-dotted-table.stderr":"ca58908463cbe2ec6b3de314237c178fee64245cc738c72a7b9e08bb3d02b2b0","tests/fixtures/invalid/table/duplicate-key-dotted-table2.stderr":"cb59f2ed324642de947f3cd9373ca111ec35104a5f33578f64c48084ce1a84f5","tests/fixtures/invalid/table/duplicate-key-table.stderr":"f4816522738b3e2ace87d1100a3d73e6a122d8dc67d05e0b35a1438e16a8952c","tests/fixtures/invalid/table/duplicate-table-array.stderr":"11d293e4b4f205fc98cd892f25a25f533cb922c963ecf095a932d2e9d550be4f","tests/fixtures/invalid/table/duplicate-table-array2.stderr":"fa9cd3b1212eed14ec56b66a16471ac2f7c0398d743982abb7c5cb4b5c7a5fe4","tests/fixtures/invalid/table/duplicate.stderr":"3e6d1b1a2f44d449e8cb0098e7c40ad1e755363b446f3821c399abfb26eb9939","tests/fixtures/invalid/table/empty-implicit-table.stderr":"cd3606ce97c5537d18146cd978403636a65fa703c83616da75b8cafa86e8fa24","tests/fixtures/invalid/table/empty.stderr":"4399e419abbcfbec93f5915e7fbdd11b6e462a4c066a29eacda159abfc588734","tests/fixtures/invalid/table/equals-sign.stderr":"472de6b908a03c99637b635a3a898ed956684ae422e1b4b135ec94986ea45f2d","tests/fixtures/invalid/table/llbrace.stderr":"db6bbee7ed15994398901c46ed4b40904897e71f5d972deb7904ccac49cd834e","tests/fixtures/invalid/table/nested-brackets-close.stderr":"e1dff60ea8f77dd1b8fae7d1d63c788c838c80560172d92377cc168f5cb5923a","tests/fixtures/invalid/table/nested-brackets-open.stderr":"bd58eb0630dc0c51ebc288258d360d707c8f43a5877ddc21e9420f8eb76a2f4c","tests/fixtures/invalid/table/quoted-no-close.stderr":"6bf7e2d30c735a55f595140af7c7f6be89b6faf868f4473ea39570fdb87d5823","tests/fixtures/invalid/table/redefine.stderr":"3e794bce5bb6ae9f603f50e3dc62d136701ec478078e8a8e99c94229778e24ca","tests/fixtures/invalid/table/rrbrace.stderr":"342a5ff362c8b4c1e85a6442029291bd33165a3b36552794fcd5269249bf36a1","tests/fixtures/invalid/table/text-after-table.stderr":"6dfaf1fc3199f0602fea52f7b1c65869eb2f8643b9e90dc1e718a183fb972485","tests/fixtures/invalid/table/whitespace.stderr":"fa48d4dc83f92e729dc25c6fc6a0c336014391b4bdb3392998f18141d2deb350","tests/fixtures/invalid/table/with-pound.stderr":"97dbd1ceb7f357bd98cc1caa9a602c638aaa5831237b7d63b18153acc64d3af4","tests/invalid.rs":"daa9034453fb7f10718020e36a07a19664eb852071995f17480c595cb44e2cdf","tests/testsuite/convert.rs":"9140d681dbb9370b975d5bc1cd4e9e640ac4023c6789edcae544e66657ad5fe9","tests/testsuite/datetime.rs":"105d95570d05e9ecbc30bfe7d081f9d63e2f36634e9124da012f467c6134549e","tests/testsuite/edit.rs":"7ff72670fe971ce9cd3e94a9254d1f54ebc792d8881b688d397cd9a996f2d051","tests/testsuite/invalid.rs":"31789643e3419ab922f8258e5a0421e1648b64aa5b96d3e1fb79bae36abf286e","tests/testsuite/main.rs":"a363749a7074ee95e3b7c556c0b0f59f5818445ca4762ec53693f451359b268a","tests/testsuite/parse.rs":"8cbf29045c78730b5be3b67f01b38378cee40030833261037eaf6be5c1872dea","tests/testsuite/stackoverflow.rs":"426d4e621bbafe62f8aba2e8c62e715929185d5eca4c5083b6427b601abc667a"},"package":"239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13"} \ No newline at end of file +{"files":{"Cargo.lock":"1a8f2b2db37e00242f68b043ed495cb9d7dc52830a99ac7438c60652de79f07f","Cargo.toml":"ef86b549c9ede24848b38bf2f683ae1c90c43e04da97d29c78a7479c3b020d82","LICENSE-APACHE":"c6596eb7be8581c18be736c846fb9173b69eccf6ef94c5135893ec56bd92ba08","LICENSE-MIT":"6efb0476a1cc085077ed49357026d8c173bf33017278ef440f222fb9cbcb66e6","README.md":"61071236d49e09b94cda5955f5d61a6196b192b11a990883d458dc6e3d72da6f","examples/visit.rs":"657756caba28aa87a8b10104aba704fd2365579c57a293bc572a8d72173ba174","src/array.rs":"46d62e3898f39b5cc0c2a98f0f38986ad0cbac837366849a2c07b458524dae47","src/array_of_tables.rs":"a4c391b2dedd475546159fec824e2cca5f32e4e684fdf02792f645d2162881ed","src/de/array.rs":"7969d28e98e366cd16c9ed74128404a147a2b787c54d0450594811a92d17b310","src/de/datetime.rs":"a29e76372fc382686c31ee93fd54c749ea3d775c8d4fcc2b2643f9464ea23730","src/de/key.rs":"f8a977fe08e1f929e599f2216734db678ca6ec9fb7f6860b75f1cdfc628b52c6","src/de/mod.rs":"b7344ac410b21b2c82eb88782f48979d6f850a778a19bb2eea7228e834374d2f","src/de/spanned.rs":"5c959639bf4b3d19c6b18e625a57b28b311164bf7b191bbc4d30aa97b7239679","src/de/table.rs":"b1f0f95ea502126b8707ba945596f3c47d83d46680622899334567df1b58df1d","src/de/table_enum.rs":"879a78836ac3b00ab35b9cd72a08fb8486a6c4ae05c779481ae9fcb9546b8390","src/de/value.rs":"57361229d150b5885748d079d7295e48d751506b9662b2cd2c623da331c3bb45","src/document.rs":"7882374aa62978cac3752f346253399f281121a544927feed2882ed9a008361d","src/encode.rs":"fb3b267d836ebb7b9a1ea0f1202e89b8ce056f28c95560418b64d2cc3e87cb8f","src/index.rs":"34ed0a4cc0cd55ce29885ed8123cc7c09c8ba3b9aa8d3b0b47554c1a41d31577","src/inline_table.rs":"497c56915a15f0d4826f20726aee056c7a6ab57af561b0901260f0a102199a36","src/internal_string.rs":"8c84f94363ed012eb7114c6b3f6b371155d0ce740ce44ec9d315aedf048c1d11","src/item.rs":"d0908cd810e7c57f00c2b7e84b48f83b9157837cb07945ca4e6fa24da46181e6","src/key.rs":"b6577a803b818e2010134615bc8cf93ff3a03d7097f29cba7a6b98306a81fdce","src/lib.rs":"3ff2ffd3366e495278fd7b3118ea54b7d09534380396a75b46c409e3c563c67c","src/parser/array.rs":"c8c2e789ce082cb617c11e28f07cfeed3663966f608b9b3d76a57aa321b898d5","src/parser/datetime.rs":"b1717d8038d6c58b90065d7552b38d8fa614c1271a3eaf056076fd68f1bdc85f","src/parser/document.rs":"0b94e7e0bd53606949d66bb6854bf275bed399b869bbce18d6acbd65238de112","src/parser/errors.rs":"1433576273997fab06f54a8cd777bcaa05437436d1a7f6c003cc9af398e14e86","src/parser/inline_table.rs":"91bb5f1a54574e66727fbc03ec4f73d3c435fc54ee4a22159709ee2612bfc118","src/parser/key.rs":"0de3bb1d3ab843b4da6c8913f28ea90bb54f35d1cbbad96b0d6f92597109dd0b","src/parser/mod.rs":"08004fa6b4751a20d1b963b3c3c06bbfe05c13d342a1f035cc76de6b9bcd333e","src/parser/numbers.rs":"76a61210ce5b399f7367347c4dd1d71871d99e6dc06aeeb7b7e8b84962831e21","src/parser/state.rs":"de77489ec1645ae89b1193eda1f94e46a84f6ea945c92f1f9b07c08611a72d15","src/parser/strings.rs":"d4f1383d83699ab43f2ce04613fa656f9015d5d588910e71122ed88c3b09c25a","src/parser/table.rs":"c716d82caed27229afb22e757cb14b2828356fc69d33ab94d8f43043f992cd2b","src/parser/trivia.rs":"c2d9feaa958c339de82c6756a2e8ee749d0324db0c9890abc237fb213bf7b2e0","src/parser/value.rs":"ffc4b97539b121f195051857089071344ae3b68d563566425e19fa0365a6ae62","src/raw_string.rs":"dcec7da0aab75a5ff5891a0b4a22265ed47c17f9d802ec135e40f2bb1b0a7c60","src/repr.rs":"68356f3b89cd76b75bd9f78b6fbd31fbcc96d8195a7118d286dca92950adaee4","src/ser/array.rs":"5fb55303365bf6991f7af523931d1a046e88d3125c099317e12a7d4f592321a8","src/ser/key.rs":"9bad3bee90876c5c54aaa5caee92bfe30d79a100615902118b35b3ceb40b524e","src/ser/map.rs":"07e189d93044746c83ffc85620facb5e26c148b1267589e373b9bc2903d449c0","src/ser/mod.rs":"5f5d0d7e7e2ec0c486e90ece7a1f5bbf166da640b1208365d89536d6616fb74b","src/ser/pretty.rs":"9a9f6d43b2eab487de8af216b9950c93318c9ee6a91f4707ffa41e2ee492c902","src/ser/value.rs":"4e53308b7b82e50382a6d48251c27cf817eabea063c99e6ab9abe30d781c3afc","src/table.rs":"267478d627cae7bf63d4e88c9b757b7fa55a1f58ae1914ae9f5ec29279f8421e","src/value.rs":"881f7aee14ef968f5f843ccbbaa648117ceac2148df6e09d171f41c174d35dd9","src/visit.rs":"34b49c3091a6ef7c1513429f8290c26989da25cec496d3993abf33fa65ebfc58","src/visit_mut.rs":"b184d7bb94038fdc2e0fee8689112f01377d9f8c05ffafb8a1f5f2263e80ee27","tests/decoder.rs":"8093389d542f50974d6a606321b0413941709ac2cbdef48934b60088bb21e3a3","tests/decoder_compliance.rs":"a06a10a2b89758ef08dda24c4475d13638b7c31e8872ed6edc6919d5b9910bd4","tests/encoder.rs":"c84dd8e14ade77a4d55f932418567196e188fe65d3957408b9ce07c2c27c4da0","tests/encoder_compliance.rs":"643b529e2f4762b2ce98e041dffa5d4b87fee5942996e093afa5b8494e02a3c0","tests/fixtures/invalid/array/double-comma-1.stderr":"abfcc7bc82100306faf21d3636f0ae684d0fbc5e9ea89032e6cd39ecfcb92c84","tests/fixtures/invalid/array/double-comma-2.stderr":"56bd3cecd5b5ec4204d206f07751c34e1e65bee7aa108ca74ef7a8cb1aaaea43","tests/fixtures/invalid/array/extending-table.stderr":"a6c077cb49c41a9d10aca0bac6c571e570f0a772c6311d6a835f9cc46f4ab7cd","tests/fixtures/invalid/array/missing-separator.stderr":"eb38b2683f119d8b9b8c6b8ffd46da784112565ef1033febbe59469e14baea76","tests/fixtures/invalid/array/no-close-2.stderr":"214a9c5953175709a068433f0a594e0cf2be88d633905dc6e9047894b5249e4e","tests/fixtures/invalid/array/no-close-table-2.stderr":"5a2b4bf9026aa20c749551cd77458c5ffba008b5b235fa05fb7c756917018eb8","tests/fixtures/invalid/array/no-close-table.stderr":"545ad4381d5a007a6cd940a4523ae2f629617d298611c0898283a1c4783604cb","tests/fixtures/invalid/array/no-close.stderr":"85935faa2e3a57c4f0a7d519629630a96826ce4f217650972cd070fb7dca23a2","tests/fixtures/invalid/array/tables-1.stderr":"f105a34c2d87b61160881eeb09b7f54d244ba2a222d32fbfc755091939942247","tests/fixtures/invalid/array/tables-2.stderr":"77010599d1d61a34119a99acea7d84162d217df93bca01aed3ae73f1eb62dafe","tests/fixtures/invalid/array/text-after-array-entries.stderr":"391ee42f4fa3a7ec51ba1b90e69f1d9278c105426fe66ae1f80e65d7fb6ed379","tests/fixtures/invalid/array/text-before-array-separator.stderr":"7292ebcb8c9c8aaa4041279af5414de3e710977cac948988cdc8b0947223b62b","tests/fixtures/invalid/array/text-in-array.stderr":"0486e3ec5d299e39c61380a2ed8826d886edb730f6d9555a765e4314da7f5b68","tests/fixtures/invalid/bool/almost-false-with-extra.stderr":"0a14178172a5e9ed65a7d551870b4df768e7cfb39f7b66a2ac0643e2aa8374af","tests/fixtures/invalid/bool/almost-false.stderr":"ed5307c42046d6adf0bf4c3d1aa1c75faa23a46029d5e229f0c2ab4db068df1c","tests/fixtures/invalid/bool/almost-true-with-extra.stderr":"6e4dc09948a38c284f5bc52e011322a2c0182ee2f35c772d460fa15a76461d2d","tests/fixtures/invalid/bool/almost-true.stderr":"86b33938625e15e30d4166b4e81f89c669e84c5b96c2cf708d84dbf628536e07","tests/fixtures/invalid/bool/just-f.stderr":"657f90406495c748243b526e1e01737c418453a1a5ad12ff0400631e9fdaf859","tests/fixtures/invalid/bool/just-t.stderr":"8a8f5aa45b0d0acd13f121caafa85a52a8d17f1bd81542d21d3a5ff07f460b60","tests/fixtures/invalid/bool/mixed-case.stderr":"2e9a16757a17c7e604e081584e6f015ecffb0fd7b6ed969a1fe2096c09e7155c","tests/fixtures/invalid/bool/starting-same-false.stderr":"87069893e084d03c3ab540e0607498d371563c0341c6f043aff756c8f1b32c64","tests/fixtures/invalid/bool/starting-same-true.stderr":"2a1817a1e3a6558860eec1d802e1be8f2f2b54da9b2d9eaf1cddecd35d71a8d6","tests/fixtures/invalid/bool/wrong-case-false.stderr":"5eb58e82290b767febd475712f7de9ee712f094a30dcb627a757b305ab64d096","tests/fixtures/invalid/bool/wrong-case-true.stderr":"dc8e696e9137f737a232009e7e9f801aa9ebe4320a445259af74d83a6235c7b7","tests/fixtures/invalid/control/bare-cr.stderr":"568eff61d56bb362866973d9a7a488831b8b08d02f4808538a04b22ebe10bf09","tests/fixtures/invalid/control/bare-formfeed.stderr":"4532c2fdc7c4bab8fa6683cc4c46975c1dddf2f531459793dedfaab73e4ae347","tests/fixtures/invalid/control/bare-null.stderr":"fe799f446b55ba89609a52842b4eb63bfa725035c0006107c85c6a8f6e8db688","tests/fixtures/invalid/control/bare-vertical-tab.stderr":"b85c55e64c58b85343482dadcd5d833fa771a79e1d0f13a24f4185a4d0d826d5","tests/fixtures/invalid/control/comment-cr.stderr":"656e1b16fbc152545a9726a290a8ce84463a73d780ad13a32b65291f56cb7dc6","tests/fixtures/invalid/control/comment-del.stderr":"08f586fc1f6ea69e3718ab43fded3da139d5ae296cfe70bc27a7ffb4b2f106d5","tests/fixtures/invalid/control/comment-lf.stderr":"71725b218014e5ae260d5075f6bb90477e97c946e27a40c797710a190d586a58","tests/fixtures/invalid/control/comment-null.stderr":"39ca1697c9e13969314368c85401dbb141ee80f514ab4a359ea8e74880e33d28","tests/fixtures/invalid/control/comment-us.stderr":"1d8c3a908fb274503d357bdc428b82c97fd9f9b37681be09129a46454f31e0cd","tests/fixtures/invalid/control/control.stderr":"aa1ae71dbb29473161d8f82a163e6862f78c7c55d4831574e6994651fdccdb19","tests/fixtures/invalid/control/multi-del.stderr":"812c0742dab1b933689c925dd1cca16ed185d091acc51026e8fc6585c0401c00","tests/fixtures/invalid/control/multi-lf.stderr":"3d2ac666d19fc3d0fe676b4897557fe3735c43da64b4a71577c04445a1341a06","tests/fixtures/invalid/control/multi-null.stderr":"9fec8ad3ba45ddb96ad3b6a118b4fa648056e26a9128528b2c0a8fa3b01e741c","tests/fixtures/invalid/control/multi-us.stderr":"a4769a44b32457fbc791b15b442293f37c936d2394ca9aa87ad5c964dc7cbb35","tests/fixtures/invalid/control/rawmulti-del.stderr":"b2dd93070d5bb09e19ea6ca015cfa1ef279ac2cf5e7fb242d8b74318f5d8418c","tests/fixtures/invalid/control/rawmulti-lf.stderr":"7e1d64429ed08f831783bace226535852bcebb48aae05541590fb242491cd7e9","tests/fixtures/invalid/control/rawmulti-null.stderr":"84c04cc89a6bc716b6f7811142899014abdb0b49c4ea56bc163c19220b14c323","tests/fixtures/invalid/control/rawmulti-us.stderr":"4e4566a74bde0a055d2e5a0dde72d41208c2ed188175b11c9e46167dff231d3c","tests/fixtures/invalid/control/rawstring-del.stderr":"c49c702bda4ed350dec4fc43edecb16202f8e5f1f4b02b06b42b2109d775a9b5","tests/fixtures/invalid/control/rawstring-lf.stderr":"5b11decc012a95bde096088a285eaaad4b8984f6d683bd633640141b19135530","tests/fixtures/invalid/control/rawstring-null.stderr":"eafa2a63e9d12293b290405049457860a8fef70de56c4ba2f203e5f2c79a8634","tests/fixtures/invalid/control/rawstring-us.stderr":"b3d8e5dcb1c66b93f2543ea325a50decb62178a5f0bea59fe64b04d89472b25e","tests/fixtures/invalid/control/string-bs.stderr":"1b35fdd656fb29e3011e5f4302fd37d0354922a21e740eb947dac766cebf3200","tests/fixtures/invalid/control/string-del.stderr":"ec80f70e9aea38e11191e50e9dfd80415de5b0406608c935aae8b8dbed9a8a7f","tests/fixtures/invalid/control/string-lf.stderr":"f91e1e0035af8ff259ff98c79097f845d8be8886629a93da962daba1079cb382","tests/fixtures/invalid/control/string-null.stderr":"fe34d284c478853bad94235aac1f37ac3c591f97e12990847b5da0b6c99bd47d","tests/fixtures/invalid/control/string-us.stderr":"dce016319c9fa0981f03cfd8b2b1d52d2d847697ea88e037f5a0e28bb53d8830","tests/fixtures/invalid/datetime/hour-over.stderr":"bd2220bdbaa96caf3d2aaff640620e4856bffb722a0e5be61dcb5283ffd08056","tests/fixtures/invalid/datetime/mday-over.stderr":"de9d27d65c68dd09da10c229167ce881dfe0ebda457badfe24b7598ae80c47a6","tests/fixtures/invalid/datetime/mday-under.stderr":"18daf3ce2a6a972476ccabcf92690a488e4f3be804dab8458da2aebad22a2c8c","tests/fixtures/invalid/datetime/minute-over.stderr":"a29009d3f7a6b1d9afad2420f223d6a6e02df8149577547837f5eeec4075bb9a","tests/fixtures/invalid/datetime/month-over.stderr":"37a203b22c3b2510541e413ff347447f9f3319a896ee005b96f65bc0d68150f4","tests/fixtures/invalid/datetime/month-under.stderr":"24c554595ca9a999a1d8e1ef7dc28b443f2f0ad6e17337ee157fb18bdcf678c1","tests/fixtures/invalid/datetime/no-leads-with-milli.stderr":"a35c496884e921aa086c1404bc812ff74f2bfd347a3ecd96640942be5555afbb","tests/fixtures/invalid/datetime/no-leads.stderr":"2e20cb60a1ecee85b172d1402e4d8c425362e4db607706bd39494385dc6dc98e","tests/fixtures/invalid/datetime/no-secs.stderr":"65871ee020e645e737c363b22cf43c160b295871cd4ac97a37d3ea46f60e3250","tests/fixtures/invalid/datetime/no-t.stderr":"ff50b85f6bc0d49000ec6f1303fda9b44bf934c2ede61743363411bbf6ebecbb","tests/fixtures/invalid/datetime/second-over.stderr":"0ed555a874efa08b711b5227501208758d87a01ad8360cf76c3dc8761807fac4","tests/fixtures/invalid/datetime/time-no-leads-2.stderr":"f725d49ddb5af69b7285f071d68e3d8441d5e331adcfd8c025c1f3cbb68028a5","tests/fixtures/invalid/datetime/time-no-leads.stderr":"cf06f3847f3d14655a94d8cfd5a6984dc74115b1d3cdbee0662ef215738bbf65","tests/fixtures/invalid/datetime/trailing-t.stderr":"87a15cd62bbe7cba2b942fe424c045ce30a12fe439a1b49587a5cc037ffa6b09","tests/fixtures/invalid/encoding/bad-utf8-at-end.stderr":"518dc443f0404d486b40bbbd152870276016795b05f3cc8a1de64a0e08fcdda2","tests/fixtures/invalid/encoding/bad-utf8-in-comment.stderr":"e0f252d14c18ea072c098834997db8e5f68b807bb0fa6d3d34e4042a5ea6fbb7","tests/fixtures/invalid/encoding/bad-utf8-in-multiline-literal.stderr":"2328a89cd9043de10ee656f4ea0dd5e6491fd8c0484ac36099c23161dd7a2625","tests/fixtures/invalid/encoding/bad-utf8-in-multiline.stderr":"2328a89cd9043de10ee656f4ea0dd5e6491fd8c0484ac36099c23161dd7a2625","tests/fixtures/invalid/encoding/bad-utf8-in-string-literal.stderr":"eefb00fee073933fbdb95d24a9e7050c281d4719d0cb970c2c06a71a86f108b3","tests/fixtures/invalid/encoding/bad-utf8-in-string.stderr":"eefb00fee073933fbdb95d24a9e7050c281d4719d0cb970c2c06a71a86f108b3","tests/fixtures/invalid/encoding/bom-not-at-start-1.stderr":"bd4e557b8b4586cdb39a8fde46f0bb214954f9f8ef37be46e2cc19823f6d6919","tests/fixtures/invalid/encoding/bom-not-at-start-2.stderr":"27003a498cb355011782dc21f01e15457490b78c472bb9ddb54147413c8f597e","tests/fixtures/invalid/encoding/utf16-bom.stderr":"a8800edcb8f6184b712da53e74bb787c39eb891073575acbae1ad575f15043cc","tests/fixtures/invalid/encoding/utf16.stderr":"edb66c01034865f484ccf7921bfcec1efaa8599762cb9cd30c9c8103275bc4e6","tests/fixtures/invalid/float/double-point-1.stderr":"2917901dd186adc39cb5965faf388fa2babe577ef3bfcadd4919232868a727cf","tests/fixtures/invalid/float/double-point-2.stderr":"7eda489da0436d6f0f2268aa4005b422d215b4785af0c1696c8731908a563f17","tests/fixtures/invalid/float/exp-double-e-1.stderr":"e64082e328fcfbeff57e6801448c769b12bc8e879b77421b688b2e147e386713","tests/fixtures/invalid/float/exp-double-e-2.stderr":"5c45326ef7287ea16a9e08275222e281b5d61c9322f8764f6533707f9772e255","tests/fixtures/invalid/float/exp-double-us.stderr":"ebd30aa3f7cd3a0a5e79bbbde1beff209d24f4ab58eb5552c1baf0eb2194e97b","tests/fixtures/invalid/float/exp-leading-us.stderr":"19c8f676dd45a5db09bd5baba5c3e7b661e83099a340331ee6bb10defe679569","tests/fixtures/invalid/float/exp-point-1.stderr":"23e73e4e63db888546866967a1c0319a1db269f23ee9c277b298e9f2db88800e","tests/fixtures/invalid/float/exp-point-2.stderr":"633328e085fb04d6a79cdfb696f45a1836c3a8b6afafc4cd5e16d48465aa4613","tests/fixtures/invalid/float/exp-trailing-us.stderr":"d633c5a44a55dc2b6cac18f739cecb33526f31d85f72cada0ebf70c4cca56dcd","tests/fixtures/invalid/float/float.stderr":"cc664d16849deec2ae7ebee6a3f46923bd5959075e282315c4f60461cdb13a0f","tests/fixtures/invalid/float/inf-incomplete-1.stderr":"38cd906dfee7f13b8cbdb27f3406ab0499fae3ae16f3c77bc7fc48d009595d93","tests/fixtures/invalid/float/inf-incomplete-2.stderr":"97a9ae1ff194a95b5be2abaf2cd8179ada832cdd9fad349efa9951e7ab92e435","tests/fixtures/invalid/float/inf-incomplete-3.stderr":"034bc609343ecf1e659d6250f719e5f93512e8140228e44e57b538765e58a1f7","tests/fixtures/invalid/float/inf_underscore.stderr":"621326dde26e5364c7af1b562fb651f4184d9b5fc9bc45edc12f52b588d506bc","tests/fixtures/invalid/float/leading-point-neg.stderr":"d19e28ba2f11069800df4dd1951025aa7f75425f7258e8caf4bbf6abe0e84bc9","tests/fixtures/invalid/float/leading-point-plus.stderr":"10750e9acccb17f0682db30fb175d083d06c822a4863d3d6b8ddb6c75b7b22ec","tests/fixtures/invalid/float/leading-point.stderr":"2545b7a615528595f5d53a7338403c83a8587e70600b1501225446e5f456c805","tests/fixtures/invalid/float/leading-us.stderr":"dc958138922097b2e1e3865c7818604b2249268af4acbe5cafe0ce8c68a90a86","tests/fixtures/invalid/float/leading-zero-neg.stderr":"d1fad35fa8d18f93ebfdf681d3476f02600e5c39cc942ca9bc36181476cbbe53","tests/fixtures/invalid/float/leading-zero-plus.stderr":"ad8ba7a7c12cb4b296cc0d43915106732e6a6a713aea67034587d1fc0c8093df","tests/fixtures/invalid/float/leading-zero.stderr":"cc664d16849deec2ae7ebee6a3f46923bd5959075e282315c4f60461cdb13a0f","tests/fixtures/invalid/float/nan-incomplete-1.stderr":"f4bee0b1c639bf800fc4dda38276142e715cd85ab6cc5e93ae2112ea63d7de89","tests/fixtures/invalid/float/nan-incomplete-2.stderr":"dc908ec577d29083bfd709fc4bdc2fa641d7fb2ba77a5d7441215680a8839d69","tests/fixtures/invalid/float/nan-incomplete-3.stderr":"abab5a41e0f2f1bad2d2050d0c913dfd8c15e50530d53ef8de327f106f564e02","tests/fixtures/invalid/float/nan_underscore.stderr":"25b67a7d6c743f673be7b409c9990de5de8b52a1d97c32e6f4e62f33147f1872","tests/fixtures/invalid/float/trailing-point-min.stderr":"69ad03ae81990d580a6d63bdd5ab594de00c0a16694c8671704c6243b4578b38","tests/fixtures/invalid/float/trailing-point-plus.stderr":"fba0bbad890020fe943e9f23644e81bf0bb7d114230fe16182e866fddcfc108b","tests/fixtures/invalid/float/trailing-point.stderr":"2f12b368fd94304ab0126ebb5888c519475f9ca28e9ca702c477cf0085ba9216","tests/fixtures/invalid/float/trailing-us-exp.stderr":"2ad53ae4736ce5134921acf0c16bc5031627c0da3119edcdc4bd7eb300d40337","tests/fixtures/invalid/float/trailing-us.stderr":"506cb8051f1045ea1dc7f11865d58cbca0216502d273e1c10366c8be7cc9ab43","tests/fixtures/invalid/float/us-after-point.stderr":"fa9fb59f703b6770be3dc094c04eb2c4add8a7a7ab79d9fe508cfeee785404f1","tests/fixtures/invalid/float/us-before-point.stderr":"14e09a7a382e249e5143d1c81d6e4623408eb2d505e1e3f86c370a3a3bf6cd9e","tests/fixtures/invalid/inline-table/add.stderr":"bf95d34749254300f4179ed1314cc9cabd7c7b63fc2453fc7adbc7869b63be4a","tests/fixtures/invalid/inline-table/double-comma.stderr":"2132a1c4d97fab140089818f990284333e22ef91d20a9f65e11d4dd15b1a701a","tests/fixtures/invalid/inline-table/duplicate-key.stderr":"72bea73b20005f15ced977aae70a1b0f3bbe3e35598231aca9a2303d770efdc3","tests/fixtures/invalid/inline-table/empty.stderr":"604fef40337f04e5f37a52239d6509850aba95677a7a94ca8476a6c21b648a43","tests/fixtures/invalid/inline-table/linebreak-1.stderr":"45b0611d37c1ece88bf6c88b3528adc3d73e0cd3e3b24dcf07ab151023a6f488","tests/fixtures/invalid/inline-table/linebreak-2.stderr":"f7672965326b44adaf0cb4796a087fbe779a8b17fbb458090a33375d0c54e5b4","tests/fixtures/invalid/inline-table/linebreak-3.stderr":"e8c70f0c91b15e701567e93d8df1cd3bec593696af05ec1d95e8f9e00ab20fa6","tests/fixtures/invalid/inline-table/linebreak-4.stderr":"3d31147f9e1ff5f94384e4df1675dfff2da6f076cb0a729771615f05b990be91","tests/fixtures/invalid/inline-table/no-comma.stderr":"9f1c85e0df72c7e7e011c26a0d5dd9dea8b7a5e18c3ba9a53ff4a20a9429dce9","tests/fixtures/invalid/inline-table/overwrite.stderr":"812d1bc74d07750048a521e513a565676e606d4fa1a32d2ebda7af8fa064d3ab","tests/fixtures/invalid/inline-table/trailing-comma.stderr":"4791911dafd6602e2891d6ffc4d32ef8e9d0c1f8f6d37e84d440feb896d9cb88","tests/fixtures/invalid/integer/capital-bin.stderr":"fcfc8b0bddd36a641d3f5cc2ceee88554619fabf6874e11cdfdd147be8781881","tests/fixtures/invalid/integer/capital-hex.stderr":"c8e2d64f9659435a0387bb7e6447896eda253fef77e0214a4073fcffbac693a7","tests/fixtures/invalid/integer/capital-oct.stderr":"ec465fa25da212b0c9b6265ac8e9cd05c1fa07d614dafb3bc9b2ca74d6c2a7a7","tests/fixtures/invalid/integer/double-sign-nex.stderr":"8d57da526240c1cf73423b688442922ae291ff26e3c09f9c3b5b150e62e5cbaa","tests/fixtures/invalid/integer/double-sign-plus.stderr":"55896d9bd19637e124482966a12109a1a8351620ddc6f8d28553d70359f523f1","tests/fixtures/invalid/integer/double-us.stderr":"f14ed7bd3ad26b2203763fa953dd6e99212e50fb8e43a4eaeb115c1a7df4fc25","tests/fixtures/invalid/integer/incomplete-bin.stderr":"64168fc7ede87a10c12f82325fce644a7d9b9c3af55a313184175df7926845e3","tests/fixtures/invalid/integer/incomplete-hex.stderr":"ed2423540e288f4673bc68822a799bea04f571db5de56154e10360b03ab79553","tests/fixtures/invalid/integer/incomplete-oct.stderr":"9ed35e3078703a38996f20dc3e86477149564c8abd237c644bdf3a5ef26e3417","tests/fixtures/invalid/integer/integer.stderr":"ed5ef991b733b3d51700364da18bf58f1b7eb68053467afcbff22775b3b82788","tests/fixtures/invalid/integer/invalid-bin.stderr":"7248d47f2c7db309254a3a41af28bc1a6e96bfa95e0c8c94d607f65a1a30cee6","tests/fixtures/invalid/integer/invalid-hex.stderr":"3976255c6fe35a1e29f0fed7324eee8420ababd0f6f1f7702908c3df47c88846","tests/fixtures/invalid/integer/invalid-oct.stderr":"9f6776e33887cb446a5590d8fe4e51c36747c634cd5e4efaa84f807d3ce244e0","tests/fixtures/invalid/integer/leading-us-bin.stderr":"28edc918ac016cc9cb7b0b20fe2a5e1dc0175de0aa8105d7a6ca263815c6e4e7","tests/fixtures/invalid/integer/leading-us-hex.stderr":"2a1a5943399c8ba5849ba569411bfefc8323659c852bd714ca917231f6212ef0","tests/fixtures/invalid/integer/leading-us-oct.stderr":"aad69bdd80f94e907bda03558a1302e54d58d8911fe2b564e93cb0ec48403b09","tests/fixtures/invalid/integer/leading-us.stderr":"3a265cc11f1b0d43d4b532a47776486ec7c7ea7afe70813ab00c5a37cf87a9df","tests/fixtures/invalid/integer/leading-zero-1.stderr":"ed5ef991b733b3d51700364da18bf58f1b7eb68053467afcbff22775b3b82788","tests/fixtures/invalid/integer/leading-zero-2.stderr":"5c70e7874256512c0ef6bb364497d4e10154e994056f2feb7c5c729016522091","tests/fixtures/invalid/integer/leading-zero-3.stderr":"fb2730feda6f669a3b8c4332f01369e52ce1b942807f1bf3d9762b1fea04aeac","tests/fixtures/invalid/integer/leading-zero-sign-1.stderr":"c9d2d992eea36c4fe228eb74741bd8d0ede1e354cad132b79462e7b502b37f95","tests/fixtures/invalid/integer/leading-zero-sign-2.stderr":"4248329b339020cc2ea586f2775a0b4f4cbe2ae3f0f75b935263363b8be5eaf5","tests/fixtures/invalid/integer/leading-zero-sign-3.stderr":"3b414808727d3a446efdfca0033525e17536f9b54104d8a9cb9278b054d213df","tests/fixtures/invalid/integer/negative-bin.stderr":"74aae673b861bd46544e4835fe7075e20158dd69e27f75c790d48a6006476c73","tests/fixtures/invalid/integer/negative-hex.stderr":"799bd8120f4cf2c36e7f65a5f9aa43a3ec87dd95dd3bf68501059da9f21f8c9e","tests/fixtures/invalid/integer/negative-oct.stderr":"fb082674d0f5a8a231c91c2d34cf47fdeece4c8c5c8c7b23fb09fce95ed59a9f","tests/fixtures/invalid/integer/positive-bin.stderr":"54d8a33743737f374480cd1235bf3f7e0847d252ef7e2bb1d447529cbc0f6692","tests/fixtures/invalid/integer/positive-hex.stderr":"3b21b23cc3dd6b213a19256f4ffb4bb36172de2f739f90bbea78636f7a50524b","tests/fixtures/invalid/integer/positive-oct.stderr":"cc493d14bcb092f49b517fa5143e29c45020d491d94ac662f85b2af34805cf11","tests/fixtures/invalid/integer/text-after-integer.stderr":"07a13ad4841a452eff00947234a4ebac4d209ea0294162888db35668648bb55d","tests/fixtures/invalid/integer/trailing-us-bin.stderr":"62da06cf06527b9e9cbeba6c5299ce6001d40592e9d007c8350090977f4d1b58","tests/fixtures/invalid/integer/trailing-us-hex.stderr":"1b290eada58a7202b1a9251afd8e0e72a4caa8ad5c85036d1050e7de8141e94d","tests/fixtures/invalid/integer/trailing-us-oct.stderr":"34e6f86ffb0099e6e1ba67deb51e36af62dfce4e7299b94503a219339bf16447","tests/fixtures/invalid/integer/trailing-us.stderr":"3ab49ee921eb772f5aa4eaf0fb3619b1dcd9a9db3f4ebbd9bc505581a985e753","tests/fixtures/invalid/integer/us-after-bin.stderr":"a94a87ebab3536899ce7c0c785f020b3a236c60d24c0bd7494628ca310c40768","tests/fixtures/invalid/integer/us-after-hex.stderr":"9009b187f615f06e3392eabd8ffa58311ed1c2b1cd76f8c5bd99671242f2e026","tests/fixtures/invalid/integer/us-after-oct.stderr":"05af70a21980416fbd602337f9af22a1c600a294635d10ef1ca1b2138338e712","tests/fixtures/invalid/key/after-array.stderr":"487d957b20226ac36e27d6efb1e3d24147284c9a5e10a0188427a1c940d31ef0","tests/fixtures/invalid/key/after-table.stderr":"f70e84770817f096fcc1b6195c6b0a79d25210c6930ce412a89646040ee3d713","tests/fixtures/invalid/key/after-value.stderr":"00d4d2d3ccd61f64a92df0ca575aeafcd96e91d053d835ca855973339ba458cf","tests/fixtures/invalid/key/bare-invalid-character.stderr":"b1f64d54a43017e6cc09755fa7ba477901721d23f9271ec658fc9362f46631b3","tests/fixtures/invalid/key/dotted-redefine-table.stderr":"564febb355d1556df42f428a046ac6fdc5dad49b2b736be5824b0c13fcd1fae9","tests/fixtures/invalid/key/duplicate-keys.stderr":"7c9dfef2ef19b1487b7592a267ab5ba21c8b833dfa9ec1c3151e369c2fdba26e","tests/fixtures/invalid/key/duplicate.stderr":"764ef48bb06c9096c853b1156c8d29ba0065616939a5335146f5af88a424cea3","tests/fixtures/invalid/key/empty.stderr":"af6d3636ca73e5496c40d9c918c59b61fd86812db262649e5268094193873130","tests/fixtures/invalid/key/escape.stderr":"155aa9389f0eb28cac3b42974af7ea9e2eef8d96f084f08f9f75e960fc8ce8c7","tests/fixtures/invalid/key/hash.stderr":"85dd91b96aa4f81cc7922b02b411f25d9053bddd1e5b893c2a2ee9d0115a7cac","tests/fixtures/invalid/key/multiline.stderr":"d625f2caaf01d53d72d6f1c3df0952fe3ca8c5f3b081503cb02b9994c088b733","tests/fixtures/invalid/key/newline.stderr":"714aed0a140062f977ec85b9afa50f68448c67e806168e60b4f4554ab270b2b9","tests/fixtures/invalid/key/no-eol.stderr":"440ec927e94f0e520a0f256c865041f0478e1c82f3bb79323b7ddc36fc942edf","tests/fixtures/invalid/key/open-bracket.stderr":"3b36814373f51a8ea00a448d65bc514e8d99f5163b7dd8101df62bcd0a06e801","tests/fixtures/invalid/key/partial-quoted.stderr":"dc9059a014ed53071ed170b1e280923556dc09e0be2ae96cc8474e9da59fa378","tests/fixtures/invalid/key/quoted-unclosed-1.stderr":"6cdec8a7c5352a2f246273afaa923dfa81d4d2e68cca5b4f9a19193559b164c2","tests/fixtures/invalid/key/quoted-unclosed-2.stderr":"b4817e6f85a90fbb6adf049ba57c268f9888f1b42b3d62200c359606176170b1","tests/fixtures/invalid/key/single-open-bracket.stderr":"917c0203d1e45309fcff82ce33fdd2d989f630fb99290a40cb9e08a6f7ca0ef8","tests/fixtures/invalid/key/space.stderr":"3a5fa712d667890678873e3d4e4cabb084c67091c5ec6155355d5bd4229585dc","tests/fixtures/invalid/key/special-character.stderr":"a84c2f293c1e421a1c87050cb0211de80dbfe7a79939db0338fa35bf0c181ef2","tests/fixtures/invalid/key/start-bracket.stderr":"223d8a22bf34459cd9bcb993ae2a51ab3cc436674e3367e92f7d74e9f8710a45","tests/fixtures/invalid/key/start-dot.stderr":"f9366a1492ae24fd0721724b4039d2675e91219de564aff2826adefd83fac571","tests/fixtures/invalid/key/two-equals.stderr":"a0aae899cfa75df41104a4d3090a309fc7ebcd95bb5a944cf742f3d3fc9d4782","tests/fixtures/invalid/key/two-equals2.stderr":"861826b9456ab3a74f63f5c555e13d959a3991dfa6ce126ae5ed14d43f7dcee1","tests/fixtures/invalid/key/two-equals3.stderr":"71614864344e321ac5de238b7ef9d097c6d7f3ac3eee4118d96827b4b8bd6658","tests/fixtures/invalid/key/without-value-1.stderr":"16c2823a39a82c3c27e0959a691b7a95e3392d62195884697893d373b967b9c0","tests/fixtures/invalid/key/without-value-2.stderr":"d340f94f5d96f5730ab269db7ef27aca171d64e35af1181c474d75a7d11d6590","tests/fixtures/invalid/key/without-value-3.stderr":"3cf3072fe9206bfe6c682103d0414627a5a63db4c4a319cf37efeb5fe6b92007","tests/fixtures/invalid/key/without-value-4.stderr":"07132bec96e9a9a672bafdc3c448b7c596257245f8c3e2cae04641f9798644ec","tests/fixtures/invalid/spec/inline-table-2-0.stderr":"5ad1a938b1d1f0f3fdbd1871efdebfd30e136407ecdd9e2eff22150d00624b3f","tests/fixtures/invalid/spec/inline-table-3-0.stderr":"fcbc05e911b7db81bd918768fe98a51a7026fd476d616718cc417d2f08bcc1a1","tests/fixtures/invalid/spec/key-value-pair-1.stderr":"d5391142dfd56040840cf91b1e28e3c048229e3d9998534d41001cd6657f9bd6","tests/fixtures/invalid/spec/keys-2.stderr":"3c4ee6066fc75d2c1f1b325f618a01113694c318e330ff4f237e89127f332c87","tests/fixtures/invalid/spec/string-4-0.stderr":"910ee4b240159b828a7509c8dfb46507071a8d8636f3935a3914d6d91f315295","tests/fixtures/invalid/spec/string-7-0.stderr":"5128f0a930b3034e494a6bee4f384a587e9fd858b25f8cc529a488c94ee9670d","tests/fixtures/invalid/spec/table-9-0.stderr":"49dac70d337266f5c6b333fee468f279fed1bff62bfb4ec7436c8b6683ce0dd2","tests/fixtures/invalid/spec/table-9-1.stderr":"d9e071c70356c01b6537f876989ad2067e7773dd5eb24a298439d192dbad12d0","tests/fixtures/invalid/string/bad-byte-escape.stderr":"14f6ae446b3b8cb434267eba11c6ec5a1badef4f867169b173698cf9f1a29d95","tests/fixtures/invalid/string/bad-codepoint.stderr":"e5a9704b87fe85e447e817197ad4e59e27b33bd93e7c5ad3987561d119ce6ee4","tests/fixtures/invalid/string/bad-concat.stderr":"499219633467b9174471db40543ca188e2b906c470e511d2f701f5f5475d96be","tests/fixtures/invalid/string/bad-escape-1.stderr":"34a15ce7012217c62d31d5392038517c216f0cbfd5d75fb5f3c2bb07afd3f25c","tests/fixtures/invalid/string/bad-escape-2.stderr":"955aab40b16043c847d85d04e6adcd093c930dd8416d29c2ab5953c077eac6f4","tests/fixtures/invalid/string/bad-hex-esc-1.stderr":"aea935cf1e17743356e6fb1059afed2d0ee5262906594782e5537a025398038e","tests/fixtures/invalid/string/bad-hex-esc-2.stderr":"deac5217cf80acc759e1b40c43f5f56431b276dc2c896aae5490d57583105e06","tests/fixtures/invalid/string/bad-hex-esc-3.stderr":"94ecf886427e8fe5daf1d8f932bf1887f2533b10bc1f57cb6de03ea28fef466f","tests/fixtures/invalid/string/bad-hex-esc-4.stderr":"382b011dd4070554ee875fde06703d8332ef6ad36f3619f3536b0a4997ee2745","tests/fixtures/invalid/string/bad-hex-esc-5.stderr":"a8a039fae822eda68591da28ff2a117b5d85e99d066e9126ebbb6426a1cad52d","tests/fixtures/invalid/string/bad-hex-esc.stderr":"aea935cf1e17743356e6fb1059afed2d0ee5262906594782e5537a025398038e","tests/fixtures/invalid/string/bad-multiline.stderr":"141e5770190dd184bb1f64f6bb14fc017210bbd918ab5c8b7a3d80b86b21772b","tests/fixtures/invalid/string/bad-slash-escape.stderr":"d62f894ee166bddf84432507fb4ba56473c0a230fd88a3ccc2b199a72a34e613","tests/fixtures/invalid/string/bad-uni-esc-1.stderr":"d1d6e0c78b64f776a428aa7cb332b0ccd659d24950dd3e8f1d2a1450f61b37e5","tests/fixtures/invalid/string/bad-uni-esc-2.stderr":"d8ed938bafdeda05c083d79ed73f5043dabe0f5f1515c6857ae7a61db9026ebc","tests/fixtures/invalid/string/bad-uni-esc-3.stderr":"de1c29897549ae37988a38e1c370f9974fcbf7d4c4b3fd457d731999cfc05929","tests/fixtures/invalid/string/bad-uni-esc-4.stderr":"4e5a715e8dc212d073d399bb0ae9b49413396744282a195c34cb03e6f4fbd9eb","tests/fixtures/invalid/string/bad-uni-esc-5.stderr":"2419cd927d8e31ebf5025558e70bb1295d98bdb36c17f0e00620b9c4a7aadbf6","tests/fixtures/invalid/string/basic-byte-escapes.stderr":"b42fd0273c7438bf13ddea9552204bb9209cdcc8e4151311d2446185d2cd546a","tests/fixtures/invalid/string/basic-multiline-out-of-range-unicode-escape-1.stderr":"725cd4955987c3d6e736832281316d6c1a2446303e9a1dc78900cef4bb84ee64","tests/fixtures/invalid/string/basic-multiline-out-of-range-unicode-escape-2.stderr":"c6698fbdb95188d53bfdaa4a4f590d86a73aafcc321a5d9511ab43ce51be1c78","tests/fixtures/invalid/string/basic-multiline-quotes.stderr":"28177a49532f22aaffc9dc204592a2c5eca2fc20f8e208b7c7f589201e8b7de5","tests/fixtures/invalid/string/basic-multiline-unknown-escape.stderr":"a83406b30eb3ab2cebb0230d8d65d0b7583885138f2c070976ae61de2c8b17f3","tests/fixtures/invalid/string/basic-out-of-range-unicode-escape-1.stderr":"19af67599c6c2eef340c9fdb0ab2cc788928def50280af939247a1274447781f","tests/fixtures/invalid/string/basic-out-of-range-unicode-escape-2.stderr":"0e2e1a69358502ec17a07e4fc151b70e8a3b5123798cb38f98fe2d146515a84e","tests/fixtures/invalid/string/basic-unknown-escape.stderr":"1de467948fb18f61336350063701d9c5a6615054fe740a9be650f71f5ca4236b","tests/fixtures/invalid/string/literal-multiline-quotes-1.stderr":"249123229606aa8eedff1b5bdead5022daf470e47dbca639e32019d1d61dbcf9","tests/fixtures/invalid/string/literal-multiline-quotes-2.stderr":"d9784af1ff056a90bf531307749d53a5d24ffffbc0f4aada7fcee417a50d1615","tests/fixtures/invalid/string/missing-quotes.stderr":"462f24701d2c51d36b18d06b69be2f6eb36449b5f3ffbaa737fcbd2b2151ae4a","tests/fixtures/invalid/string/multiline-bad-escape-1.stderr":"18469c4d37d011b3f30ae17e3111b5e8a9526d593475e5d8d7a9b19461a40e8d","tests/fixtures/invalid/string/multiline-bad-escape-2.stderr":"d43896d3005c8470dc8149e2b74eb8825c6d9fedfe9f48125ad88a95c1dc3035","tests/fixtures/invalid/string/multiline-bad-escape-3.stderr":"92f732c6bcb922e25d2a001a389f93b596dd0e91109cbdcb651efa146309dc2a","tests/fixtures/invalid/string/multiline-escape-space.stderr":"94b451b6c03055186777a248cb216f95a1b2e29df25549f345d96bd0a4e63f1e","tests/fixtures/invalid/string/multiline-no-close-2.stderr":"e500e99a44305b1e148b211e963478cf1554f8c9536d3108390cf41d5b2ce069","tests/fixtures/invalid/string/multiline-no-close.stderr":"d5b9602d23b0cb023fbe3ae80d862fd60332475ba8863a1e977f17cb326a4548","tests/fixtures/invalid/string/multiline-quotes-1.stderr":"046956658c0a73e665e7a6a2044ff83c8efb8cdd8c2ab153c163eb1e61068c56","tests/fixtures/invalid/string/no-close.stderr":"3ad8aff0932d98592b808fc6f44fa68a854097f8025e92c11af1acb6de3d3cc7","tests/fixtures/invalid/string/text-after-string.stderr":"1c1e4677be8d3dba0e7933b3ed1cbb6e0bcf6f600cf9a989a7b09c9424a4d0a7","tests/fixtures/invalid/string/wrong-close.stderr":"441f4f1b73c11c8dbf2f73cf9a7766f17a9517b3b9142e86736ed43eaec07f18","tests/fixtures/invalid/table/append-with-dotted-keys-1.stderr":"a67f1f152005295e0a1bb3dcaaa755edd05f19ac5316b8ad2eb4d45797e0f770","tests/fixtures/invalid/table/append-with-dotted-keys-2.stderr":"72d9ea8a90b4d9e5319c2bf951bdde6a87a205612e82ed5a09cea2b706bfde7f","tests/fixtures/invalid/table/array-empty.stderr":"e8a41c60adf7756361920816b6c4f44125a813c869b71fae2c98473e4da1b231","tests/fixtures/invalid/table/array-implicit.stderr":"7797ce41aab0567fc9d40e277cc32c12e1f16ffc0e73857fdb3bbf754246305f","tests/fixtures/invalid/table/array-missing-bracket.stderr":"5f1e8703d59398f6595d21ed0abcc7dc3ce77943ad0f71eede9ad63ea2bcc7c1","tests/fixtures/invalid/table/duplicate-key-dotted-table.stderr":"ca58908463cbe2ec6b3de314237c178fee64245cc738c72a7b9e08bb3d02b2b0","tests/fixtures/invalid/table/duplicate-key-dotted-table2.stderr":"cb59f2ed324642de947f3cd9373ca111ec35104a5f33578f64c48084ce1a84f5","tests/fixtures/invalid/table/duplicate-key-table.stderr":"f4816522738b3e2ace87d1100a3d73e6a122d8dc67d05e0b35a1438e16a8952c","tests/fixtures/invalid/table/duplicate-table-array.stderr":"11d293e4b4f205fc98cd892f25a25f533cb922c963ecf095a932d2e9d550be4f","tests/fixtures/invalid/table/duplicate-table-array2.stderr":"fa9cd3b1212eed14ec56b66a16471ac2f7c0398d743982abb7c5cb4b5c7a5fe4","tests/fixtures/invalid/table/duplicate.stderr":"3e6d1b1a2f44d449e8cb0098e7c40ad1e755363b446f3821c399abfb26eb9939","tests/fixtures/invalid/table/empty-implicit-table.stderr":"cd3606ce97c5537d18146cd978403636a65fa703c83616da75b8cafa86e8fa24","tests/fixtures/invalid/table/empty.stderr":"4399e419abbcfbec93f5915e7fbdd11b6e462a4c066a29eacda159abfc588734","tests/fixtures/invalid/table/equals-sign.stderr":"472de6b908a03c99637b635a3a898ed956684ae422e1b4b135ec94986ea45f2d","tests/fixtures/invalid/table/llbrace.stderr":"db6bbee7ed15994398901c46ed4b40904897e71f5d972deb7904ccac49cd834e","tests/fixtures/invalid/table/nested-brackets-close.stderr":"e1dff60ea8f77dd1b8fae7d1d63c788c838c80560172d92377cc168f5cb5923a","tests/fixtures/invalid/table/nested-brackets-open.stderr":"bd58eb0630dc0c51ebc288258d360d707c8f43a5877ddc21e9420f8eb76a2f4c","tests/fixtures/invalid/table/quoted-no-close.stderr":"6bf7e2d30c735a55f595140af7c7f6be89b6faf868f4473ea39570fdb87d5823","tests/fixtures/invalid/table/redefine.stderr":"3e794bce5bb6ae9f603f50e3dc62d136701ec478078e8a8e99c94229778e24ca","tests/fixtures/invalid/table/rrbrace.stderr":"342a5ff362c8b4c1e85a6442029291bd33165a3b36552794fcd5269249bf36a1","tests/fixtures/invalid/table/text-after-table.stderr":"6dfaf1fc3199f0602fea52f7b1c65869eb2f8643b9e90dc1e718a183fb972485","tests/fixtures/invalid/table/whitespace.stderr":"fa48d4dc83f92e729dc25c6fc6a0c336014391b4bdb3392998f18141d2deb350","tests/fixtures/invalid/table/with-pound.stderr":"97dbd1ceb7f357bd98cc1caa9a602c638aaa5831237b7d63b18153acc64d3af4","tests/invalid.rs":"daa9034453fb7f10718020e36a07a19664eb852071995f17480c595cb44e2cdf","tests/testsuite/convert.rs":"9140d681dbb9370b975d5bc1cd4e9e640ac4023c6789edcae544e66657ad5fe9","tests/testsuite/datetime.rs":"105d95570d05e9ecbc30bfe7d081f9d63e2f36634e9124da012f467c6134549e","tests/testsuite/edit.rs":"7ff72670fe971ce9cd3e94a9254d1f54ebc792d8881b688d397cd9a996f2d051","tests/testsuite/invalid.rs":"31789643e3419ab922f8258e5a0421e1648b64aa5b96d3e1fb79bae36abf286e","tests/testsuite/main.rs":"a363749a7074ee95e3b7c556c0b0f59f5818445ca4762ec53693f451359b268a","tests/testsuite/parse.rs":"3bd2b50bd735c6ef5ed276ce6409f068c3aa93abe2480f285c18a9c5425fbd96","tests/testsuite/stackoverflow.rs":"426d4e621bbafe62f8aba2e8c62e715929185d5eca4c5083b6427b601abc667a"},"package":"1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"} \ No newline at end of file diff --git a/vendor/toml_edit/Cargo.toml b/vendor/toml_edit/Cargo.toml index 263d7f5..a198550 100644 --- a/vendor/toml_edit/Cargo.toml +++ b/vendor/toml_edit/Cargo.toml @@ -11,24 +11,25 @@ [package] edition = "2021" -rust-version = "1.60.0" +rust-version = "1.66.0" name = "toml_edit" -version = "0.19.8" +version = "0.19.15" authors = [ "Andronik Ordian <write@reusable.software>", "Ed Page <eopage@gmail.com>", ] include = [ + "build.rs", "src/**/*", "Cargo.toml", + "Cargo.lock", "LICENSE*", "README.md", - "examples/**/*", "benches/**/*", + "examples/**/*", "tests/**/*", ] description = "Yet another format-preserving TOML parser." -documentation = "https://docs.rs/toml_edit" readme = "README.md" keywords = [ "encoding", @@ -41,52 +42,52 @@ categories = [ "config", ] license = "MIT OR Apache-2.0" -repository = "https://github.com/ordian/toml_edit" +repository = "https://github.com/toml-rs/toml" [package.metadata.docs.rs] +features = ["serde"] rustdoc-args = [ "--cfg", "docsrs", ] -features = ["serde"] [package.metadata.release] tag-name = "v{{version}}" [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "Unreleased" -replace = "{{version}}" min = 1 +replace = "{{version}}" +search = "Unreleased" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = '\.\.\.HEAD' replace = "...{{tag_name}}" -exactly = 1 +search = '\.\.\.HEAD' [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "ReleaseDate" -replace = "{{date}}" min = 1 +replace = "{{date}}" +search = "ReleaseDate" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-header -->" replace = """ <!-- next-header --> ## [Unreleased] - ReleaseDate """ -exactly = 1 +search = "<!-- next-header -->" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-url -->" replace = """ <!-- next-url --> [Unreleased]: https://github.com/toml-rs/toml/compare/{{tag_name}}...HEAD""" -exactly = 1 +search = "<!-- next-url -->" [[example]] name = "visit" @@ -105,7 +106,7 @@ name = "invalid" harness = false [dependencies.indexmap] -version = "1.9.1" +version = "2.0.0" features = ["std"] [dependencies.kstring] @@ -118,24 +119,24 @@ version = "1.0.145" optional = true [dependencies.serde_spanned] -version = "0.6.1" +version = "0.6.3" features = ["serde"] optional = true [dependencies.toml_datetime] -version = "0.6.1" +version = "0.6.3" [dependencies.winnow] -version = "0.4.0" +version = "0.5.0" [dev-dependencies.libtest-mimic] version = "0.6.0" [dev-dependencies.serde_json] -version = "1.0.93" +version = "1.0.96" [dev-dependencies.snapbox] -version = "0.4.7" +version = "0.4.11" features = ["harness"] [dev-dependencies.toml-test-data] diff --git a/vendor/toml_edit/LICENSE-APACHE b/vendor/toml_edit/LICENSE-APACHE index 16fe87b..8f71f43 100644 --- a/vendor/toml_edit/LICENSE-APACHE +++ b/vendor/toml_edit/LICENSE-APACHE @@ -1,201 +1,202 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/vendor/toml_edit/LICENSE-MIT b/vendor/toml_edit/LICENSE-MIT index b9e61a2..a2d0108 100644 --- a/vendor/toml_edit/LICENSE-MIT +++ b/vendor/toml_edit/LICENSE-MIT @@ -1,6 +1,4 @@ -MIT License - -Copyright (c) 2017 Andronik Ordian +Copyright (c) Individual contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/vendor/toml_edit/src/array.rs b/vendor/toml_edit/src/array.rs index 68cb446..045b451 100644 --- a/vendor/toml_edit/src/array.rs +++ b/vendor/toml_edit/src/array.rs @@ -303,6 +303,20 @@ impl Array { } } + /// Retains only the values specified by the `keep` predicate. + /// + /// In other words, remove all values for which `keep(&value)` returns `false`. + /// + /// This method operates in place, visiting each element exactly once in the + /// original order, and preserves the order of the retained elements. + pub fn retain<F>(&mut self, mut keep: F) + where + F: FnMut(&Value) -> bool, + { + self.values + .retain(|item| item.as_value().map(&mut keep).unwrap_or(false)); + } + fn value_op<T>( &mut self, v: Value, diff --git a/vendor/toml_edit/src/array_of_tables.rs b/vendor/toml_edit/src/array_of_tables.rs index 461a716..c4d7194 100644 --- a/vendor/toml_edit/src/array_of_tables.rs +++ b/vendor/toml_edit/src/array_of_tables.rs @@ -91,6 +91,20 @@ impl ArrayOfTables { pub fn remove(&mut self, index: usize) { self.values.remove(index); } + + /// Retains only the elements specified by the `keep` predicate. + /// + /// In other words, remove all tables for which `keep(&table)` returns `false`. + /// + /// This method operates in place, visiting each element exactly once in the + /// original order, and preserves the order of the retained elements. + pub fn retain<F>(&mut self, mut keep: F) + where + F: FnMut(&Table) -> bool, + { + self.values + .retain(|item| item.as_table().map(&mut keep).unwrap_or(false)); + } } /// An iterator type over `ArrayOfTables`'s values. diff --git a/vendor/toml_edit/src/de/value.rs b/vendor/toml_edit/src/de/value.rs index 5726feb..d3cf87f 100644 --- a/vendor/toml_edit/src/de/value.rs +++ b/vendor/toml_edit/src/de/value.rs @@ -5,6 +5,9 @@ use crate::de::Error; /// Deserialization implementation for TOML [values][crate::Value]. /// +/// Can be created either directly from TOML strings, using [`std::str::FromStr`], +/// or from parsed [values][crate::Value] using [`serde::de::IntoDeserializer::into_deserializer`]. +/// /// # Example /// /// ``` @@ -24,7 +27,6 @@ use crate::de::Error; /// let value = r#"{ title = 'TOML Example', owner = { name = 'Lisa' } }"#; /// let deserializer = value.parse::<toml_edit::de::ValueDeserializer>().unwrap(); /// let config = Config::deserialize(deserializer).unwrap(); -/// /// assert_eq!(config.title, "TOML Example"); /// assert_eq!(config.owner.name, "Lisa"); /// ``` diff --git a/vendor/toml_edit/src/encode.rs b/vendor/toml_edit/src/encode.rs index 9940f28..3e98448 100644 --- a/vendor/toml_edit/src/encode.rs +++ b/vendor/toml_edit/src/encode.rs @@ -390,7 +390,7 @@ pub(crate) fn to_string_repr( '\u{8}' => output.push_str("\\b"), '\u{9}' => output.push_str("\\t"), '\u{a}' => match style { - StringStyle::NewlineTripple => output.push('\n'), + StringStyle::NewlineTriple => output.push('\n'), StringStyle::OnelineSingle => output.push_str("\\n"), _ => unreachable!(), }, @@ -412,44 +412,44 @@ pub(crate) fn to_string_repr( #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub(crate) enum StringStyle { - NewlineTripple, - OnelineTripple, + NewlineTriple, + OnelineTriple, OnelineSingle, } impl StringStyle { fn literal_start(self) -> &'static str { match self { - Self::NewlineTripple => "'''\n", - Self::OnelineTripple => "'''", + Self::NewlineTriple => "'''\n", + Self::OnelineTriple => "'''", Self::OnelineSingle => "'", } } fn literal_end(self) -> &'static str { match self { - Self::NewlineTripple => "'''", - Self::OnelineTripple => "'''", + Self::NewlineTriple => "'''", + Self::OnelineTriple => "'''", Self::OnelineSingle => "'", } } fn standard_start(self) -> &'static str { match self { - Self::NewlineTripple => "\"\"\"\n", - // note: OnelineTripple can happen if do_pretty wants to do + Self::NewlineTriple => "\"\"\"\n", + // note: OnelineTriple can happen if do_pretty wants to do // '''it's one line''' // but literal == false - Self::OnelineTripple | Self::OnelineSingle => "\"", + Self::OnelineTriple | Self::OnelineSingle => "\"", } } fn standard_end(self) -> &'static str { match self { - Self::NewlineTripple => "\"\"\"", - // note: OnelineTripple can happen if do_pretty wants to do + Self::NewlineTriple => "\"\"\"", + // note: OnelineTriple can happen if do_pretty wants to do // '''it's one line''' // but literal == false - Self::OnelineTripple | Self::OnelineSingle => "\"", + Self::OnelineTriple | Self::OnelineSingle => "\"", } } } @@ -490,7 +490,7 @@ fn infer_style(value: &str) -> (StringStyle, bool) { '\\' => { prefer_literal = true; } - '\n' => ty = StringStyle::NewlineTripple, + '\n' => ty = StringStyle::NewlineTriple, // Escape codes are needed if any ascii control // characters are present, including \b \f \r. c if c <= '\u{1f}' || c == '\u{7f}' => can_be_pretty = false, @@ -501,7 +501,7 @@ fn infer_style(value: &str) -> (StringStyle, bool) { // the string cannot be represented as pretty, // still check if it should be multiline if ch == '\n' { - ty = StringStyle::NewlineTripple; + ty = StringStyle::NewlineTriple; } } } @@ -513,7 +513,7 @@ fn infer_style(value: &str) -> (StringStyle, bool) { can_be_pretty = false; } if !can_be_pretty { - debug_assert!(ty != StringStyle::OnelineTripple); + debug_assert!(ty != StringStyle::OnelineTriple); return (ty, false); } if found_singles > max_found_singles { @@ -522,7 +522,7 @@ fn infer_style(value: &str) -> (StringStyle, bool) { debug_assert!(max_found_singles < 3); if ty == StringStyle::OnelineSingle && max_found_singles >= 1 { // no newlines, but must use ''' because it has ' in it - ty = StringStyle::OnelineTripple; + ty = StringStyle::OnelineTriple; } (ty, true) } diff --git a/vendor/toml_edit/src/inline_table.rs b/vendor/toml_edit/src/inline_table.rs index 9327818..3dc6c0c 100644 --- a/vendor/toml_edit/src/inline_table.rs +++ b/vendor/toml_edit/src/inline_table.rs @@ -363,6 +363,24 @@ impl InlineTable { kv.value.into_value().ok().map(|value| (key, value)) }) } + + /// Retains only the elements specified by the `keep` predicate. + /// + /// In other words, remove all pairs `(key, value)` for which + /// `keep(&key, &mut value)` returns `false`. + /// + /// The elements are visited in iteration order. + pub fn retain<F>(&mut self, mut keep: F) + where + F: FnMut(&str, &mut Value) -> bool, + { + self.items.retain(|key, item| { + item.value + .as_value_mut() + .map(|value| keep(key, value)) + .unwrap_or(false) + }); + } } impl std::fmt::Display for InlineTable { diff --git a/vendor/toml_edit/src/item.rs b/vendor/toml_edit/src/item.rs index 48a1a35..2025fd9 100644 --- a/vendor/toml_edit/src/item.rs +++ b/vendor/toml_edit/src/item.rs @@ -7,7 +7,7 @@ use crate::table::TableLike; use crate::{Array, InlineTable, Table, Value}; /// Type representing either a value, a table, an array of tables, or none. -#[derive(Debug, Clone)] +#[derive(Debug)] pub enum Item { /// Type representing none. None, @@ -316,6 +316,18 @@ impl Item { } } +impl Clone for Item { + #[inline(never)] + fn clone(&self) -> Self { + match self { + Item::None => Item::None, + Item::Value(v) => Item::Value(v.clone()), + Item::Table(v) => Item::Table(v.clone()), + Item::ArrayOfTables(v) => Item::ArrayOfTables(v.clone()), + } + } +} + impl Default for Item { fn default() -> Self { Item::None diff --git a/vendor/toml_edit/src/key.rs b/vendor/toml_edit/src/key.rs index 203445a..c1ee165 100644 --- a/vendor/toml_edit/src/key.rs +++ b/vendor/toml_edit/src/key.rs @@ -29,7 +29,7 @@ use crate::InternalString; /// For details see [toml spec](https://github.com/toml-lang/toml/#keyvalue-pair). /// /// To parse a key use `FromStr` trait implementation: `"string".parse::<Key>()`. -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct Key { key: InternalString, pub(crate) repr: Option<Repr>, @@ -128,11 +128,28 @@ impl Key { } fn try_parse_simple(s: &str) -> Result<Key, crate::TomlError> { - parser::parse_key(s) + let mut key = parser::parse_key(s)?; + key.despan(s); + Ok(key) } fn try_parse_path(s: &str) -> Result<Vec<Key>, crate::TomlError> { - parser::parse_key_path(s) + let mut keys = parser::parse_key_path(s)?; + for key in &mut keys { + key.despan(s); + } + Ok(keys) + } +} + +impl Clone for Key { + #[inline(never)] + fn clone(&self) -> Self { + Self { + key: self.key.clone(), + repr: self.repr.clone(), + decor: self.decor.clone(), + } } } diff --git a/vendor/toml_edit/src/parser/array.rs b/vendor/toml_edit/src/parser/array.rs index ad66872..e3b1f3f 100644 --- a/vendor/toml_edit/src/parser/array.rs +++ b/vendor/toml_edit/src/parser/array.rs @@ -1,7 +1,8 @@ use winnow::combinator::cut_err; +use winnow::combinator::delimited; use winnow::combinator::opt; -use winnow::multi::separated1; -use winnow::sequence::delimited; +use winnow::combinator::separated1; +use winnow::trace::trace; use crate::parser::trivia::ws_comment_newline; use crate::parser::value::value; @@ -12,19 +13,17 @@ use crate::parser::prelude::*; // ;; Array // array = array-open array-values array-close -pub(crate) fn array( - check: RecursionCheck, -) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> { - move |input| { +pub(crate) fn array<'i>(check: RecursionCheck) -> impl Parser<Input<'i>, Array, ContextError> { + trace("array", move |input: &mut Input<'i>| { delimited( ARRAY_OPEN, cut_err(array_values(check)), cut_err(ARRAY_CLOSE) - .context(Context::Expression("array")) - .context(Context::Expected(ParserValue::CharLiteral(']'))), + .context(StrContext::Label("array")) + .context(StrContext::Expected(StrContextValue::CharLiteral(']'))), ) .parse_next(input) - } + }) } // note: we're omitting ws and newlines here, because @@ -39,10 +38,10 @@ const ARRAY_SEP: u8 = b','; // note: this rule is modified // array-values = [ ( array-value array-sep array-values ) / // array-value / ws-comment-newline ] -pub(crate) fn array_values( +pub(crate) fn array_values<'i>( check: RecursionCheck, -) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> { - move |input| { +) -> impl Parser<Input<'i>, Array, ContextError> { + move |input: &mut Input<'i>| { let check = check.recursing(input)?; ( opt( @@ -57,7 +56,7 @@ pub(crate) fn array_values( ), ws_comment_newline.span(), ) - .map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| { + .try_map::<_, _, std::str::Utf8Error>(|(array, trailing)| { let (mut array, comma) = array.unwrap_or_default(); array.set_trailing_comma(comma); array.set_trailing(RawString::with_span(trailing)); @@ -67,10 +66,10 @@ pub(crate) fn array_values( } } -pub(crate) fn array_value( +pub(crate) fn array_value<'i>( check: RecursionCheck, -) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> { - move |input| { +) -> impl Parser<Input<'i>, Value, ContextError> { + move |input: &mut Input<'i>| { ( ws_comment_newline.span(), value(check), diff --git a/vendor/toml_edit/src/parser/datetime.rs b/vendor/toml_edit/src/parser/datetime.rs index 89fd483..6e89b97 100644 --- a/vendor/toml_edit/src/parser/datetime.rs +++ b/vendor/toml_edit/src/parser/datetime.rs @@ -5,12 +5,13 @@ use crate::parser::prelude::*; use crate::parser::trivia::from_utf8_unchecked; use toml_datetime::*; -use winnow::branch::alt; -use winnow::bytes::one_of; -use winnow::bytes::take_while_m_n; +use winnow::combinator::alt; use winnow::combinator::cut_err; use winnow::combinator::opt; -use winnow::sequence::preceded; +use winnow::combinator::preceded; +use winnow::token::one_of; +use winnow::token::take_while; +use winnow::trace::trace; // ;; Date and Time (as defined in RFC 3339) @@ -20,91 +21,103 @@ use winnow::sequence::preceded; // local-date = full-date // local-time = partial-time // full-time = partial-time time-offset -pub(crate) fn date_time(input: Input<'_>) -> IResult<Input<'_>, Datetime, ParserError<'_>> { - alt(( - (full_date, opt((time_delim, partial_time, opt(time_offset)))) - .map(|(date, opt)| { - match opt { - // Offset Date-Time - Some((_, time, offset)) => Datetime { - date: Some(date), - time: Some(time), - offset, - }, - // Local Date - None => Datetime { - date: Some(date), - time: None, - offset: None, - }, - } - }) - .context(Context::Expression("date-time")), - partial_time - .map(|t| t.into()) - .context(Context::Expression("time")), - )) +pub(crate) fn date_time(input: &mut Input<'_>) -> PResult<Datetime> { + trace( + "date-time", + alt(( + (full_date, opt((time_delim, partial_time, opt(time_offset)))) + .map(|(date, opt)| { + match opt { + // Offset Date-Time + Some((_, time, offset)) => Datetime { + date: Some(date), + time: Some(time), + offset, + }, + // Local Date + None => Datetime { + date: Some(date), + time: None, + offset: None, + }, + } + }) + .context(StrContext::Label("date-time")), + partial_time + .map(|t| t.into()) + .context(StrContext::Label("time")), + )), + ) .parse_next(input) } // full-date = date-fullyear "-" date-month "-" date-mday -pub(crate) fn full_date(input: Input<'_>) -> IResult<Input<'_>, Date, ParserError<'_>> { - (date_fullyear, b'-', cut_err((date_month, b'-', date_mday))) - .map(|(year, _, (month, _, day))| Date { year, month, day }) - .parse_next(input) +pub(crate) fn full_date(input: &mut Input<'_>) -> PResult<Date> { + trace( + "full-date", + (date_fullyear, b'-', cut_err((date_month, b'-', date_mday))) + .map(|(year, _, (month, _, day))| Date { year, month, day }), + ) + .parse_next(input) } // partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] -pub(crate) fn partial_time(input: Input<'_>) -> IResult<Input<'_>, Time, ParserError<'_>> { - ( - time_hour, - b':', - cut_err((time_minute, b':', time_second, opt(time_secfrac))), +pub(crate) fn partial_time(input: &mut Input<'_>) -> PResult<Time> { + trace( + "partial-time", + ( + time_hour, + b':', + cut_err((time_minute, b':', time_second, opt(time_secfrac))), + ) + .map(|(hour, _, (minute, _, second, nanosecond))| Time { + hour, + minute, + second, + nanosecond: nanosecond.unwrap_or_default(), + }), ) - .map(|(hour, _, (minute, _, second, nanosecond))| Time { - hour, - minute, - second, - nanosecond: nanosecond.unwrap_or_default(), - }) - .parse_next(input) + .parse_next(input) } // time-offset = "Z" / time-numoffset // time-numoffset = ( "+" / "-" ) time-hour ":" time-minute -pub(crate) fn time_offset(input: Input<'_>) -> IResult<Input<'_>, Offset, ParserError<'_>> { - alt(( - one_of((b'Z', b'z')).value(Offset::Z), - ( - one_of((b'+', b'-')), - cut_err((time_hour, b':', time_minute)), - ) - .map(|(sign, (hours, _, minutes))| { - let sign = match sign { - b'+' => 1, - b'-' => -1, - _ => unreachable!("Parser prevents this"), - }; - sign * (hours as i16 * 60 + minutes as i16) - }) - .verify(|minutes| ((-24 * 60)..=(24 * 60)).contains(minutes)) - .map(|minutes| Offset::Custom { minutes }), - )) - .context(Context::Expression("time offset")) +pub(crate) fn time_offset(input: &mut Input<'_>) -> PResult<Offset> { + trace( + "time-offset", + alt(( + one_of((b'Z', b'z')).value(Offset::Z), + ( + one_of((b'+', b'-')), + cut_err((time_hour, b':', time_minute)), + ) + .map(|(sign, (hours, _, minutes))| { + let sign = match sign { + b'+' => 1, + b'-' => -1, + _ => unreachable!("Parser prevents this"), + }; + sign * (hours as i16 * 60 + minutes as i16) + }) + .verify(|minutes| ((-24 * 60)..=(24 * 60)).contains(minutes)) + .map(|minutes| Offset::Custom { minutes }), + )) + .context(StrContext::Label("time offset")), + ) .parse_next(input) } // date-fullyear = 4DIGIT -pub(crate) fn date_fullyear(input: Input<'_>) -> IResult<Input<'_>, u16, ParserError<'_>> { +pub(crate) fn date_fullyear(input: &mut Input<'_>) -> PResult<u16> { unsigned_digits::<4, 4> .map(|s: &str| s.parse::<u16>().expect("4DIGIT should match u8")) .parse_next(input) } // date-month = 2DIGIT ; 01-12 -pub(crate) fn date_month(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn date_month(input: &mut Input<'_>) -> PResult<u8> { unsigned_digits::<2, 2> - .map_res(|s: &str| { + .try_map(|s: &str| { let d = s.parse::<u8>().expect("2DIGIT should match u8"); if (1..=12).contains(&d) { Ok(d) @@ -116,9 +129,9 @@ pub(crate) fn date_month(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError } // date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year -pub(crate) fn date_mday(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn date_mday(input: &mut Input<'_>) -> PResult<u8> { unsigned_digits::<2, 2> - .map_res(|s: &str| { + .try_map(|s: &str| { let d = s.parse::<u8>().expect("2DIGIT should match u8"); if (1..=31).contains(&d) { Ok(d) @@ -130,16 +143,16 @@ pub(crate) fn date_mday(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError< } // time-delim = "T" / %x20 ; T, t, or space -pub(crate) fn time_delim(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn time_delim(input: &mut Input<'_>) -> PResult<u8> { one_of(TIME_DELIM).parse_next(input) } const TIME_DELIM: (u8, u8, u8) = (b'T', b't', b' '); // time-hour = 2DIGIT ; 00-23 -pub(crate) fn time_hour(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn time_hour(input: &mut Input<'_>) -> PResult<u8> { unsigned_digits::<2, 2> - .map_res(|s: &str| { + .try_map(|s: &str| { let d = s.parse::<u8>().expect("2DIGIT should match u8"); if (0..=23).contains(&d) { Ok(d) @@ -151,9 +164,9 @@ pub(crate) fn time_hour(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError< } // time-minute = 2DIGIT ; 00-59 -pub(crate) fn time_minute(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn time_minute(input: &mut Input<'_>) -> PResult<u8> { unsigned_digits::<2, 2> - .map_res(|s: &str| { + .try_map(|s: &str| { let d = s.parse::<u8>().expect("2DIGIT should match u8"); if (0..=59).contains(&d) { Ok(d) @@ -165,9 +178,9 @@ pub(crate) fn time_minute(input: Input<'_>) -> IResult<Input<'_>, u8, ParserErro } // time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules -pub(crate) fn time_second(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn time_second(input: &mut Input<'_>) -> PResult<u8> { unsigned_digits::<2, 2> - .map_res(|s: &str| { + .try_map(|s: &str| { let d = s.parse::<u8>().expect("2DIGIT should match u8"); if (0..=60).contains(&d) { Ok(d) @@ -179,7 +192,7 @@ pub(crate) fn time_second(input: Input<'_>) -> IResult<Input<'_>, u8, ParserErro } // time-secfrac = "." 1*DIGIT -pub(crate) fn time_secfrac(input: Input<'_>) -> IResult<Input<'_>, u32, ParserError<'_>> { +pub(crate) fn time_secfrac(input: &mut Input<'_>) -> PResult<u32> { static SCALE: [u32; 10] = [ 0, 100_000_000, @@ -194,7 +207,7 @@ pub(crate) fn time_secfrac(input: Input<'_>) -> IResult<Input<'_>, u32, ParserEr ]; const INF: usize = usize::MAX; preceded(b'.', unsigned_digits::<1, INF>) - .map_res(|mut repr: &str| -> Result<u32, CustomError> { + .try_map(|mut repr: &str| -> Result<u32, CustomError> { let max_digits = SCALE.len() - 1; if max_digits < repr.len() { // Millisecond precision is required. Further precision of fractional seconds is @@ -214,10 +227,10 @@ pub(crate) fn time_secfrac(input: Input<'_>) -> IResult<Input<'_>, u32, ParserEr .parse_next(input) } -pub(crate) fn unsigned_digits<const MIN: usize, const MAX: usize>( - input: Input<'_>, -) -> IResult<Input<'_>, &str, ParserError<'_>> { - take_while_m_n(MIN, MAX, DIGIT) +pub(crate) fn unsigned_digits<'i, const MIN: usize, const MAX: usize>( + input: &mut Input<'i>, +) -> PResult<&'i str> { + take_while(MIN..=MAX, DIGIT) .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) .parse_next(input) } diff --git a/vendor/toml_edit/src/parser/document.rs b/vendor/toml_edit/src/parser/document.rs index 3ff7536..aa8fb11 100644 --- a/vendor/toml_edit/src/parser/document.rs +++ b/vendor/toml_edit/src/parser/document.rs @@ -1,13 +1,13 @@ use std::cell::RefCell; -use winnow::bytes::any; -use winnow::bytes::one_of; use winnow::combinator::cut_err; use winnow::combinator::eof; use winnow::combinator::opt; use winnow::combinator::peek; -use winnow::error::FromExternalError; -use winnow::multi::many0; +use winnow::combinator::repeat; +use winnow::token::any; +use winnow::token::one_of; +use winnow::trace::trace; use crate::document::Document; use crate::key::Key; @@ -30,15 +30,15 @@ use crate::RawString; // ( ws keyval ws [ comment ] ) / // ( ws table ws [ comment ] ) / // ws ) -pub(crate) fn document(input: Input<'_>) -> IResult<Input<'_>, Document, ParserError<'_>> { +pub(crate) fn document(input: &mut Input<'_>) -> PResult<Document> { let state = RefCell::new(ParseState::default()); let state_ref = &state; - let (i, _o) = ( + let _o = ( // Remove BOM if present opt(b"\xEF\xBB\xBF"), parse_ws(state_ref), - many0(( + repeat(0.., ( dispatch! {peek(any); crate::parser::trivia::COMMENT_START_SYMBOL => cut_err(parse_comment(state_ref)), crate::parser::table::STD_TABLE_OPEN => cut_err(table(state_ref)), @@ -52,23 +52,15 @@ pub(crate) fn document(input: Input<'_>) -> IResult<Input<'_>, Document, ParserE eof, ) .parse_next(input)?; - state - .into_inner() - .into_document() - .map(|document| (i, document)) - .map_err(|err| { - winnow::error::ErrMode::Backtrack(ParserError::from_external_error( - i, - winnow::error::ErrorKind::Verify, - err, - )) - }) + state.into_inner().into_document().map_err(|err| { + winnow::error::ErrMode::from_external_error(input, winnow::error::ErrorKind::Verify, err) + }) } pub(crate) fn parse_comment<'s, 'i>( state: &'s RefCell<ParseState>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'_>> + 's { - move |i| { +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { (comment, line_ending) .span() .map(|span| { @@ -80,8 +72,8 @@ pub(crate) fn parse_comment<'s, 'i>( pub(crate) fn parse_ws<'s, 'i>( state: &'s RefCell<ParseState>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { - move |i| { +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { ws.span() .map(|span| state.borrow_mut().on_ws(span)) .parse_next(i) @@ -90,8 +82,8 @@ pub(crate) fn parse_ws<'s, 'i>( pub(crate) fn parse_newline<'s, 'i>( state: &'s RefCell<ParseState>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { - move |i| { +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { newline .span() .map(|span| state.borrow_mut().on_ws(span)) @@ -101,48 +93,49 @@ pub(crate) fn parse_newline<'s, 'i>( pub(crate) fn keyval<'s, 'i>( state: &'s RefCell<ParseState>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { - move |i| { +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { parse_keyval - .map_res(|(p, kv)| state.borrow_mut().on_keyval(p, kv)) + .try_map(|(p, kv)| state.borrow_mut().on_keyval(p, kv)) .parse_next(i) } } // keyval = key keyval-sep val -pub(crate) fn parse_keyval( - input: Input<'_>, -) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> { - ( - key, - cut_err(( - one_of(KEYVAL_SEP) - .context(Context::Expected(ParserValue::CharLiteral('.'))) - .context(Context::Expected(ParserValue::CharLiteral('='))), - ( - ws.span(), - value(RecursionCheck::default()), - line_trailing - .context(Context::Expected(ParserValue::CharLiteral('\n'))) - .context(Context::Expected(ParserValue::CharLiteral('#'))), - ), - )), - ) - .map_res::<_, _, std::str::Utf8Error>(|(key, (_, v))| { - let mut path = key; - let key = path.pop().expect("grammar ensures at least 1"); +pub(crate) fn parse_keyval(input: &mut Input<'_>) -> PResult<(Vec<Key>, TableKeyValue)> { + trace( + "keyval", + ( + key, + cut_err(( + one_of(KEYVAL_SEP) + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('='))), + ( + ws.span(), + value(RecursionCheck::default()), + line_trailing + .context(StrContext::Expected(StrContextValue::CharLiteral('\n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('#'))), + ), + )), + ) + .try_map::<_, _, std::str::Utf8Error>(|(key, (_, v))| { + let mut path = key; + let key = path.pop().expect("grammar ensures at least 1"); - let (pre, v, suf) = v; - let pre = RawString::with_span(pre); - let suf = RawString::with_span(suf); - let v = v.decorated(pre, suf); - Ok(( - path, - TableKeyValue { - key, - value: Item::Value(v), - }, - )) - }) - .parse_next(input) + let (pre, v, suf) = v; + let pre = RawString::with_span(pre); + let suf = RawString::with_span(suf); + let v = v.decorated(pre, suf); + Ok(( + path, + TableKeyValue { + key, + value: Item::Value(v), + }, + )) + }), + ) + .parse_next(input) } diff --git a/vendor/toml_edit/src/parser/errors.rs b/vendor/toml_edit/src/parser/errors.rs index 4e3825f..96ad886 100644 --- a/vendor/toml_edit/src/parser/errors.rs +++ b/vendor/toml_edit/src/parser/errors.rs @@ -4,7 +4,8 @@ use std::fmt::{Display, Formatter, Result}; use crate::parser::prelude::*; use crate::Key; -use winnow::BStr; +use winnow::error::ContextError; +use winnow::error::ParseError; /// Type representing a TOML parse error #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -16,19 +17,18 @@ pub struct TomlError { } impl TomlError { - pub(crate) fn new(error: ParserError<'_>, original: Input<'_>) -> Self { - use winnow::stream::Offset; + pub(crate) fn new(error: ParseError<Input<'_>, ContextError>, mut original: Input<'_>) -> Self { use winnow::stream::Stream; - let offset = original.offset_to(&error.input); + let offset = error.offset(); let span = if offset == original.len() { offset..offset } else { offset..(offset + 1) }; - let message = error.to_string(); - let original = original.next_slice(original.eof_offset()).1; + let message = error.inner().to_string(); + let original = original.finish(); Self { message, @@ -146,166 +146,6 @@ impl StdError for TomlError { } } -#[derive(Debug)] -pub(crate) struct ParserError<'b> { - input: Input<'b>, - context: Vec<Context>, - cause: Option<Box<dyn std::error::Error + Send + Sync + 'static>>, -} - -impl<'b> winnow::error::ParseError<Input<'b>> for ParserError<'b> { - fn from_error_kind(input: Input<'b>, _kind: winnow::error::ErrorKind) -> Self { - Self { - input, - context: Default::default(), - cause: Default::default(), - } - } - - fn append(self, _input: Input<'b>, _kind: winnow::error::ErrorKind) -> Self { - self - } - - fn or(self, other: Self) -> Self { - other - } -} - -impl<'b> winnow::error::ParseError<&'b str> for ParserError<'b> { - fn from_error_kind(input: &'b str, _kind: winnow::error::ErrorKind) -> Self { - Self { - input: Input::new(BStr::new(input)), - context: Default::default(), - cause: Default::default(), - } - } - - fn append(self, _input: &'b str, _kind: winnow::error::ErrorKind) -> Self { - self - } - - fn or(self, other: Self) -> Self { - other - } -} - -impl<'b> winnow::error::ContextError<Input<'b>, Context> for ParserError<'b> { - fn add_context(mut self, _input: Input<'b>, ctx: Context) -> Self { - self.context.push(ctx); - self - } -} - -impl<'b, E: std::error::Error + Send + Sync + 'static> - winnow::error::FromExternalError<Input<'b>, E> for ParserError<'b> -{ - fn from_external_error(input: Input<'b>, _kind: winnow::error::ErrorKind, e: E) -> Self { - Self { - input, - context: Default::default(), - cause: Some(Box::new(e)), - } - } -} - -impl<'b, E: std::error::Error + Send + Sync + 'static> winnow::error::FromExternalError<&'b str, E> - for ParserError<'b> -{ - fn from_external_error(input: &'b str, _kind: winnow::error::ErrorKind, e: E) -> Self { - Self { - input: Input::new(BStr::new(input)), - context: Default::default(), - cause: Some(Box::new(e)), - } - } -} - -// For tests -impl<'b> std::cmp::PartialEq for ParserError<'b> { - fn eq(&self, other: &Self) -> bool { - self.input == other.input - && self.context == other.context - && self.cause.as_ref().map(ToString::to_string) - == other.cause.as_ref().map(ToString::to_string) - } -} - -impl<'a> std::fmt::Display for ParserError<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let expression = self.context.iter().find_map(|c| match c { - Context::Expression(c) => Some(c), - _ => None, - }); - let expected = self - .context - .iter() - .filter_map(|c| match c { - Context::Expected(c) => Some(c), - _ => None, - }) - .collect::<Vec<_>>(); - - let mut newline = false; - - if let Some(expression) = expression { - newline = true; - - write!(f, "invalid {}", expression)?; - } - - if !expected.is_empty() { - if newline { - writeln!(f)?; - } - newline = true; - - write!(f, "expected ")?; - for (i, expected) in expected.iter().enumerate() { - if i != 0 { - write!(f, ", ")?; - } - write!(f, "{}", expected)?; - } - } - if let Some(cause) = &self.cause { - if newline { - writeln!(f)?; - } - write!(f, "{}", cause)?; - } - - Ok(()) - } -} - -#[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) enum Context { - Expression(&'static str), - Expected(ParserValue), -} - -#[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) enum ParserValue { - CharLiteral(char), - StringLiteral(&'static str), - Description(&'static str), -} - -impl std::fmt::Display for ParserValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ParserValue::CharLiteral('\n') => "newline".fmt(f), - ParserValue::CharLiteral('`') => "'`'".fmt(f), - ParserValue::CharLiteral(c) if c.is_ascii_control() => { - write!(f, "`{}`", c.escape_debug()) - } - ParserValue::CharLiteral(c) => write!(f, "`{}`", c), - ParserValue::StringLiteral(c) => write!(f, "`{}`", c), - ParserValue::Description(c) => write!(f, "{}", c), - } - } -} - fn translate_position(input: &[u8], index: usize) -> (usize, usize) { if input.is_empty() { return (0, index); @@ -470,7 +310,7 @@ impl Display for CustomError { ) } CustomError::OutOfRange => write!(f, "value is out of range"), - CustomError::RecursionLimitExceeded => write!(f, "recursion limit exceded"), + CustomError::RecursionLimitExceeded => write!(f, "recursion limit exceeded"), } } } diff --git a/vendor/toml_edit/src/parser/inline_table.rs b/vendor/toml_edit/src/parser/inline_table.rs index 7bec287..994e003 100644 --- a/vendor/toml_edit/src/parser/inline_table.rs +++ b/vendor/toml_edit/src/parser/inline_table.rs @@ -1,7 +1,8 @@ -use winnow::bytes::one_of; use winnow::combinator::cut_err; -use winnow::multi::separated0; -use winnow::sequence::delimited; +use winnow::combinator::delimited; +use winnow::combinator::separated0; +use winnow::token::one_of; +use winnow::trace::trace; use crate::key::Key; use crate::parser::errors::CustomError; @@ -17,19 +18,19 @@ use indexmap::map::Entry; // ;; Inline Table // inline-table = inline-table-open inline-table-keyvals inline-table-close -pub(crate) fn inline_table( +pub(crate) fn inline_table<'i>( check: RecursionCheck, -) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> { - move |input| { +) -> impl Parser<Input<'i>, InlineTable, ContextError> { + trace("inline-table", move |input: &mut Input<'i>| { delimited( INLINE_TABLE_OPEN, - cut_err(inline_table_keyvals(check).map_res(|(kv, p)| table_from_pairs(kv, p))), + cut_err(inline_table_keyvals(check).try_map(|(kv, p)| table_from_pairs(kv, p))), cut_err(INLINE_TABLE_CLOSE) - .context(Context::Expression("inline table")) - .context(Context::Expected(ParserValue::CharLiteral('}'))), + .context(StrContext::Label("inline table")) + .context(StrContext::Expected(StrContextValue::CharLiteral('}'))), ) .parse_next(input) - } + }) } fn table_from_pairs( @@ -96,12 +97,10 @@ pub(crate) const KEYVAL_SEP: u8 = b'='; // ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / // ( key keyval-sep val ) -fn inline_table_keyvals( +fn inline_table_keyvals<'i>( check: RecursionCheck, -) -> impl FnMut( - Input<'_>, -) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, RawString), ParserError<'_>> { - move |input| { +) -> impl Parser<Input<'i>, (Vec<(Vec<Key>, TableKeyValue)>, RawString), ContextError> { + move |input: &mut Input<'i>| { let check = check.recursing(input)?; ( separated0(keyval(check), INLINE_TABLE_SEP), @@ -111,16 +110,16 @@ fn inline_table_keyvals( } } -fn keyval( +fn keyval<'i>( check: RecursionCheck, -) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> { - move |input| { +) -> impl Parser<Input<'i>, (Vec<Key>, TableKeyValue), ContextError> { + move |input: &mut Input<'i>| { ( key, cut_err(( one_of(KEYVAL_SEP) - .context(Context::Expected(ParserValue::CharLiteral('.'))) - .context(Context::Expected(ParserValue::CharLiteral('='))), + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('='))), (ws.span(), value(check), ws.span()), )), ) diff --git a/vendor/toml_edit/src/parser/key.rs b/vendor/toml_edit/src/parser/key.rs index c3d4e79..12715da 100644 --- a/vendor/toml_edit/src/parser/key.rs +++ b/vendor/toml_edit/src/parser/key.rs @@ -1,9 +1,10 @@ use std::ops::RangeInclusive; -use winnow::bytes::any; -use winnow::bytes::take_while1; use winnow::combinator::peek; -use winnow::multi::separated1; +use winnow::combinator::separated1; +use winnow::token::any; +use winnow::token::take_while; +use winnow::trace::trace; use crate::key::Key; use crate::parser::errors::CustomError; @@ -16,51 +17,58 @@ use crate::RawString; // key = simple-key / dotted-key // dotted-key = simple-key 1*( dot-sep simple-key ) -pub(crate) fn key(input: Input<'_>) -> IResult<Input<'_>, Vec<Key>, ParserError<'_>> { - separated1( - (ws.span(), simple_key, ws.span()).map(|(pre, (raw, key), suffix)| { - Key::new(key) - .with_repr_unchecked(Repr::new_unchecked(raw)) - .with_decor(Decor::new( - RawString::with_span(pre), - RawString::with_span(suffix), - )) +pub(crate) fn key(input: &mut Input<'_>) -> PResult<Vec<Key>> { + trace( + "dotted-key", + separated1( + (ws.span(), simple_key, ws.span()).map(|(pre, (raw, key), suffix)| { + Key::new(key) + .with_repr_unchecked(Repr::new_unchecked(raw)) + .with_decor(Decor::new( + RawString::with_span(pre), + RawString::with_span(suffix), + )) + }), + DOT_SEP, + ) + .context(StrContext::Label("key")) + .try_map(|k: Vec<_>| { + // Inserting the key will require recursion down the line + RecursionCheck::check_depth(k.len())?; + Ok::<_, CustomError>(k) }), - DOT_SEP, ) - .context(Context::Expression("key")) - .map_res(|k: Vec<_>| { - // Inserting the key will require recursion down the line - RecursionCheck::check_depth(k.len())?; - Ok::<_, CustomError>(k) - }) .parse_next(input) } // simple-key = quoted-key / unquoted-key // quoted-key = basic-string / literal-string -pub(crate) fn simple_key( - input: Input<'_>, -) -> IResult<Input<'_>, (RawString, InternalString), ParserError<'_>> { - dispatch! {peek(any); - crate::parser::strings::QUOTATION_MARK => basic_string - .map(|s: std::borrow::Cow<'_, str>| s.as_ref().into()), - crate::parser::strings::APOSTROPHE => literal_string.map(|s: &str| s.into()), - _ => unquoted_key.map(|s: &str| s.into()), - } - .with_span() - .map(|(k, span)| { - let raw = RawString::with_span(span); - (raw, k) - }) +pub(crate) fn simple_key(input: &mut Input<'_>) -> PResult<(RawString, InternalString)> { + trace( + "simple-key", + dispatch! {peek(any); + crate::parser::strings::QUOTATION_MARK => basic_string + .map(|s: std::borrow::Cow<'_, str>| s.as_ref().into()), + crate::parser::strings::APOSTROPHE => literal_string.map(|s: &str| s.into()), + _ => unquoted_key.map(|s: &str| s.into()), + } + .with_span() + .map(|(k, span)| { + let raw = RawString::with_span(span); + (raw, k) + }), + ) .parse_next(input) } // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ -fn unquoted_key(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - take_while1(UNQUOTED_CHAR) - .map(|b| unsafe { from_utf8_unchecked(b, "`is_unquoted_char` filters out on-ASCII") }) - .parse_next(input) +fn unquoted_key<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "unquoted-key", + take_while(1.., UNQUOTED_CHAR) + .map(|b| unsafe { from_utf8_unchecked(b, "`is_unquoted_char` filters out on-ASCII") }), + ) + .parse_next(input) } pub(crate) fn is_unquoted_char(c: u8) -> bool { diff --git a/vendor/toml_edit/src/parser/macros.rs b/vendor/toml_edit/src/parser/macros.rs deleted file mode 100644 index 93050f5..0000000 --- a/vendor/toml_edit/src/parser/macros.rs +++ /dev/null @@ -1,13 +0,0 @@ -macro_rules! dispatch { - ($match_parser: expr; $( $pat:pat $(if $pred:expr)? => $expr: expr ),+ $(,)? ) => { - move |i| - { - let (i, initial) = $match_parser.parse_next(i)?; - match initial { - $( - $pat $(if $pred)? => $expr.parse_next(i), - )* - } - } - } -} diff --git a/vendor/toml_edit/src/parser/mod.rs b/vendor/toml_edit/src/parser/mod.rs index b2ce4bc..eb47550 100644 --- a/vendor/toml_edit/src/parser/mod.rs +++ b/vendor/toml_edit/src/parser/mod.rs @@ -1,8 +1,5 @@ #![allow(clippy::type_complexity)] -#[macro_use] -pub(crate) mod macros; - pub(crate) mod array; pub(crate) mod datetime; pub(crate) mod document; @@ -76,11 +73,13 @@ pub(crate) fn parse_value(raw: &str) -> Result<crate::Value, TomlError> { } pub(crate) mod prelude { - pub(crate) use super::errors::Context; - pub(crate) use super::errors::ParserError; - pub(crate) use super::errors::ParserValue; - pub(crate) use winnow::IResult; - pub(crate) use winnow::Parser as _; + pub(crate) use winnow::combinator::dispatch; + pub(crate) use winnow::error::ContextError; + pub(crate) use winnow::error::FromExternalError; + pub(crate) use winnow::error::StrContext; + pub(crate) use winnow::error::StrContextValue; + pub(crate) use winnow::PResult; + pub(crate) use winnow::Parser; pub(crate) type Input<'b> = winnow::Located<&'b winnow::BStr>; @@ -88,40 +87,6 @@ pub(crate) mod prelude { winnow::Located::new(winnow::BStr::new(s)) } - pub(crate) fn ok_error<I, O, E>( - res: IResult<I, O, E>, - ) -> Result<Option<(I, O)>, winnow::error::ErrMode<E>> { - match res { - Ok(ok) => Ok(Some(ok)), - Err(winnow::error::ErrMode::Backtrack(_)) => Ok(None), - Err(err) => Err(err), - } - } - - #[allow(dead_code)] - pub(crate) fn trace<I: std::fmt::Debug, O: std::fmt::Debug, E: std::fmt::Debug>( - context: impl std::fmt::Display, - mut parser: impl winnow::Parser<I, O, E>, - ) -> impl FnMut(I) -> IResult<I, O, E> { - static DEPTH: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); - move |input: I| { - let depth = DEPTH.fetch_add(1, std::sync::atomic::Ordering::SeqCst) * 2; - eprintln!("{:depth$}--> {} {:?}", "", context, input); - match parser.parse_next(input) { - Ok((i, o)) => { - DEPTH.fetch_sub(1, std::sync::atomic::Ordering::SeqCst); - eprintln!("{:depth$}<-- {} {:?}", "", context, i); - Ok((i, o)) - } - Err(err) => { - DEPTH.fetch_sub(1, std::sync::atomic::Ordering::SeqCst); - eprintln!("{:depth$}<-- {} {:?}", "", context, err); - Err(err) - } - } - } - } - #[cfg(not(feature = "unbounded"))] #[derive(Copy, Clone, Debug, Default)] pub(crate) struct RecursionCheck { @@ -140,18 +105,16 @@ pub(crate) mod prelude { pub(crate) fn recursing( mut self, - input: Input<'_>, - ) -> Result<Self, winnow::error::ErrMode<ParserError<'_>>> { + input: &mut Input<'_>, + ) -> Result<Self, winnow::error::ErrMode<ContextError>> { self.current += 1; if self.current < 128 { Ok(self) } else { - Err(winnow::error::ErrMode::Backtrack( - winnow::error::FromExternalError::from_external_error( - input, - winnow::error::ErrorKind::Eof, - super::errors::CustomError::RecursionLimitExceeded, - ), + Err(winnow::error::ErrMode::from_external_error( + input, + winnow::error::ErrorKind::Eof, + super::errors::CustomError::RecursionLimitExceeded, )) } } @@ -169,8 +132,8 @@ pub(crate) mod prelude { pub(crate) fn recursing( self, - _input: Input<'_>, - ) -> Result<Self, winnow::error::ErrMode<ParserError<'_>>> { + _input: &mut Input<'_>, + ) -> Result<Self, winnow::error::ErrMode<ContextError>> { Ok(self) } } @@ -219,10 +182,10 @@ hosts = [ "omega" ] - 'some.wierd .stuff' = """ + 'some.weird .stuff' = """ like that - # """ # this broke my sintax highlighting + # """ # this broke my syntax highlighting " also. like " = ''' that ''' diff --git a/vendor/toml_edit/src/parser/numbers.rs b/vendor/toml_edit/src/parser/numbers.rs index 903b5f5..6e4757f 100644 --- a/vendor/toml_edit/src/parser/numbers.rs +++ b/vendor/toml_edit/src/parser/numbers.rs @@ -1,15 +1,16 @@ use std::ops::RangeInclusive; -use winnow::branch::alt; -use winnow::bytes::one_of; -use winnow::bytes::tag; -use winnow::bytes::take; +use winnow::combinator::alt; use winnow::combinator::cut_err; use winnow::combinator::opt; use winnow::combinator::peek; +use winnow::combinator::preceded; +use winnow::combinator::repeat; use winnow::combinator::rest; -use winnow::multi::many0; -use winnow::sequence::preceded; +use winnow::token::one_of; +use winnow::token::tag; +use winnow::token::take; +use winnow::trace::trace; use crate::parser::prelude::*; use crate::parser::trivia::from_utf8_unchecked; @@ -18,16 +19,16 @@ use crate::parser::trivia::from_utf8_unchecked; // boolean = true / false #[allow(dead_code)] // directly define in `fn value` -pub(crate) fn boolean(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { - alt((true_, false_)).parse_next(input) +pub(crate) fn boolean(input: &mut Input<'_>) -> PResult<bool> { + trace("boolean", alt((true_, false_))).parse_next(input) } -pub(crate) fn true_(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { +pub(crate) fn true_(input: &mut Input<'_>) -> PResult<bool> { (peek(TRUE[0]), cut_err(TRUE)).value(true).parse_next(input) } const TRUE: &[u8] = b"true"; -pub(crate) fn false_(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { +pub(crate) fn false_(input: &mut Input<'_>) -> PResult<bool> { (peek(FALSE[0]), cut_err(FALSE)) .value(false) .parse_next(input) @@ -37,94 +38,119 @@ const FALSE: &[u8] = b"false"; // ;; Integer // integer = dec-int / hex-int / oct-int / bin-int -pub(crate) fn integer(input: Input<'_>) -> IResult<Input<'_>, i64, ParserError<'_>> { +pub(crate) fn integer(input: &mut Input<'_>) -> PResult<i64> { + trace("integer", dispatch! {peek(opt::<_, &[u8], _, _>(take(2usize))); - Some(b"0x") => cut_err(hex_int.map_res(|s| i64::from_str_radix(&s.replace('_', ""), 16))), - Some(b"0o") => cut_err(oct_int.map_res(|s| i64::from_str_radix(&s.replace('_', ""), 8))), - Some(b"0b") => cut_err(bin_int.map_res(|s| i64::from_str_radix(&s.replace('_', ""), 2))), + Some(b"0x") => cut_err(hex_int.try_map(|s| i64::from_str_radix(&s.replace('_', ""), 16))), + Some(b"0o") => cut_err(oct_int.try_map(|s| i64::from_str_radix(&s.replace('_', ""), 8))), + Some(b"0b") => cut_err(bin_int.try_map(|s| i64::from_str_radix(&s.replace('_', ""), 2))), _ => dec_int.and_then(cut_err(rest - .map_res(|s: &str| s.replace('_', "").parse()))) - } + .try_map(|s: &str| s.replace('_', "").parse()))) + }) .parse_next(input) } // dec-int = [ minus / plus ] unsigned-dec-int // unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) -pub(crate) fn dec_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - ( - opt(one_of((b'+', b'-'))), - alt(( - ( - one_of(DIGIT1_9), - many0(alt(( - digit.value(()), - ( - one_of(b'_'), - cut_err(digit) - .context(Context::Expected(ParserValue::Description("digit"))), +pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "dec-int", + ( + opt(one_of((b'+', b'-'))), + alt(( + ( + one_of(DIGIT1_9), + repeat( + 0.., + alt(( + digit.value(()), + ( + one_of(b'_'), + cut_err(digit).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), ) - .value(()), - ))) - .map(|()| ()), - ) - .value(()), - digit.value(()), - )), + .map(|()| ()), + ) + .value(()), + digit.value(()), + )), + ) + .recognize() + .map(|b: &[u8]| unsafe { + from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") + }) + .context(StrContext::Label("integer")), ) - .recognize() - .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") }) - .context(Context::Expression("integer")) - .parse_next(input) + .parse_next(input) } const DIGIT1_9: RangeInclusive<u8> = b'1'..=b'9'; // hex-prefix = %x30.78 ; 0x // hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG ) -pub(crate) fn hex_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - preceded( - HEX_PREFIX, - cut_err(( - hexdig, - many0(alt(( - hexdig.value(()), - ( - one_of(b'_'), - cut_err(hexdig).context(Context::Expected(ParserValue::Description("digit"))), +pub(crate) fn hex_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "hex-int", + preceded( + HEX_PREFIX, + cut_err(( + hexdig, + repeat( + 0.., + alt(( + hexdig.value(()), + ( + one_of(b'_'), + cut_err(hexdig).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), ) - .value(()), - ))) - .map(|()| ()), - )) - .recognize(), + .map(|()| ()), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII") }) + .context(StrContext::Label("hexadecimal integer")), ) - .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII") }) - .context(Context::Expression("hexadecimal integer")) .parse_next(input) } const HEX_PREFIX: &[u8] = b"0x"; // oct-prefix = %x30.6F ; 0o // oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 ) -pub(crate) fn oct_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - preceded( - OCT_PREFIX, - cut_err(( - one_of(DIGIT0_7), - many0(alt(( - one_of(DIGIT0_7).value(()), - ( - one_of(b'_'), - cut_err(one_of(DIGIT0_7)) - .context(Context::Expected(ParserValue::Description("digit"))), +pub(crate) fn oct_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "oct-int", + preceded( + OCT_PREFIX, + cut_err(( + one_of(DIGIT0_7), + repeat( + 0.., + alt(( + one_of(DIGIT0_7).value(()), + ( + one_of(b'_'), + cut_err(one_of(DIGIT0_7)).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), ) - .value(()), - ))) - .map(|()| ()), - )) - .recognize(), + .map(|()| ()), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII") }) + .context(StrContext::Label("octal integer")), ) - .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII") }) - .context(Context::Expression("octal integer")) .parse_next(input) } const OCT_PREFIX: &[u8] = b"0o"; @@ -132,26 +158,33 @@ const DIGIT0_7: RangeInclusive<u8> = b'0'..=b'7'; // bin-prefix = %x30.62 ; 0b // bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 ) -pub(crate) fn bin_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - preceded( - BIN_PREFIX, - cut_err(( - one_of(DIGIT0_1), - many0(alt(( - one_of(DIGIT0_1).value(()), - ( - one_of(b'_'), - cut_err(one_of(DIGIT0_1)) - .context(Context::Expected(ParserValue::Description("digit"))), +pub(crate) fn bin_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "bin-int", + preceded( + BIN_PREFIX, + cut_err(( + one_of(DIGIT0_1), + repeat( + 0.., + alt(( + one_of(DIGIT0_1).value(()), + ( + one_of(b'_'), + cut_err(one_of(DIGIT0_1)).context(StrContext::Expected( + StrContextValue::Description("digit"), + )), + ) + .value(()), + )), ) - .value(()), - ))) - .map(|()| ()), - )) - .recognize(), + .map(|()| ()), + )) + .recognize(), + ) + .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII") }) + .context(StrContext::Label("binary integer")), ) - .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII") }) - .context(Context::Expression("binary integer")) .parse_next(input) } const BIN_PREFIX: &[u8] = b"0b"; @@ -162,20 +195,26 @@ const DIGIT0_1: RangeInclusive<u8> = b'0'..=b'1'; // float = float-int-part ( exp / frac [ exp ] ) // float =/ special-float // float-int-part = dec-int -pub(crate) fn float(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { - alt(( - float_.and_then(cut_err( - rest.map_res(|s: &str| s.replace('_', "").parse()) - .verify(|f: &f64| *f != f64::INFINITY), - )), - special_float, - )) - .context(Context::Expression("floating-point number")) +pub(crate) fn float(input: &mut Input<'_>) -> PResult<f64> { + trace( + "float", + alt(( + float_.and_then(cut_err( + rest.try_map(|s: &str| s.replace('_', "").parse()) + .verify(|f: &f64| *f != f64::INFINITY), + )), + special_float, + )) + .context(StrContext::Label("floating-point number")), + ) .parse_next(input) } -pub(crate) fn float_(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - (dec_int, alt((exp, (frac, opt(exp)).map(|_| "")))) +pub(crate) fn float_<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + ( + dec_int, + alt((exp.void(), (frac.void(), opt(exp.void())).void())), + ) .recognize() .map(|b: &[u8]| unsafe { from_utf8_unchecked( @@ -188,10 +227,11 @@ pub(crate) fn float_(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<' // frac = decimal-point zero-prefixable-int // decimal-point = %x2E ; . -pub(crate) fn frac(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { +pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ( b'.', - cut_err(zero_prefixable_int).context(Context::Expected(ParserValue::Description("digit"))), + cut_err(zero_prefixable_int) + .context(StrContext::Expected(StrContextValue::Description("digit"))), ) .recognize() .map(|b: &[u8]| unsafe { @@ -204,17 +244,21 @@ pub(crate) fn frac(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_> } // zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) -pub(crate) fn zero_prefixable_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { +pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ( digit, - many0(alt(( - digit.value(()), - ( - one_of(b'_'), - cut_err(digit).context(Context::Expected(ParserValue::Description("digit"))), - ) - .value(()), - ))) + repeat( + 0.., + alt(( + digit.value(()), + ( + one_of(b'_'), + cut_err(digit) + .context(StrContext::Expected(StrContextValue::Description("digit"))), + ) + .value(()), + )), + ) .map(|()| ()), ) .recognize() @@ -224,7 +268,7 @@ pub(crate) fn zero_prefixable_int(input: Input<'_>) -> IResult<Input<'_>, &str, // exp = "e" float-exp-part // float-exp-part = [ minus / plus ] zero-prefixable-int -pub(crate) fn exp(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { +pub(crate) fn exp<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ( one_of((b'e', b'E')), opt(one_of([b'+', b'-'])), @@ -241,7 +285,7 @@ pub(crate) fn exp(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> } // special-float = [ minus / plus ] ( inf / nan ) -pub(crate) fn special_float(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { +pub(crate) fn special_float(input: &mut Input<'_>) -> PResult<f64> { (opt(one_of((b'+', b'-'))), alt((inf, nan))) .map(|(s, f)| match s { Some(b'+') | None => f, @@ -251,24 +295,24 @@ pub(crate) fn special_float(input: Input<'_>) -> IResult<Input<'_>, f64, ParserE .parse_next(input) } // inf = %x69.6e.66 ; inf -pub(crate) fn inf(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { +pub(crate) fn inf(input: &mut Input<'_>) -> PResult<f64> { tag(INF).value(f64::INFINITY).parse_next(input) } const INF: &[u8] = b"inf"; // nan = %x6e.61.6e ; nan -pub(crate) fn nan(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { +pub(crate) fn nan(input: &mut Input<'_>) -> PResult<f64> { tag(NAN).value(f64::NAN).parse_next(input) } const NAN: &[u8] = b"nan"; // DIGIT = %x30-39 ; 0-9 -pub(crate) fn digit(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn digit(input: &mut Input<'_>) -> PResult<u8> { one_of(DIGIT).parse_next(input) } const DIGIT: RangeInclusive<u8> = b'0'..=b'9'; // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" -pub(crate) fn hexdig(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn hexdig(input: &mut Input<'_>) -> PResult<u8> { one_of(HEXDIG).parse_next(input) } pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) = diff --git a/vendor/toml_edit/src/parser/state.rs b/vendor/toml_edit/src/parser/state.rs index efa884d..b30ee3f 100644 --- a/vendor/toml_edit/src/parser/state.rs +++ b/vendor/toml_edit/src/parser/state.rs @@ -94,7 +94,7 @@ impl ParseState { Ok(()) } - pub(crate) fn start_aray_table( + pub(crate) fn start_array_table( &mut self, path: Vec<Key>, decor: Decor, @@ -297,7 +297,7 @@ impl ParseState { .take() .map(RawString::with_span) .unwrap_or_default(); - self.start_aray_table( + self.start_array_table( path, Decor::new(leading, RawString::with_span(trailing)), span, diff --git a/vendor/toml_edit/src/parser/strings.rs b/vendor/toml_edit/src/parser/strings.rs index 93d034c..26f9cc2 100644 --- a/vendor/toml_edit/src/parser/strings.rs +++ b/vendor/toml_edit/src/parser/strings.rs @@ -2,25 +2,24 @@ use std::borrow::Cow; use std::char; use std::ops::RangeInclusive; -use winnow::branch::alt; -use winnow::bytes::any; -use winnow::bytes::none_of; -use winnow::bytes::one_of; -use winnow::bytes::tag; -use winnow::bytes::take_while0; -use winnow::bytes::take_while1; -use winnow::bytes::take_while_m_n; +use winnow::combinator::alt; use winnow::combinator::cut_err; +use winnow::combinator::delimited; use winnow::combinator::fail; use winnow::combinator::opt; use winnow::combinator::peek; +use winnow::combinator::preceded; +use winnow::combinator::repeat; use winnow::combinator::success; -use winnow::multi::many0; -use winnow::multi::many1; +use winnow::combinator::terminated; use winnow::prelude::*; -use winnow::sequence::delimited; -use winnow::sequence::preceded; -use winnow::sequence::terminated; +use winnow::stream::Stream; +use winnow::token::any; +use winnow::token::none_of; +use winnow::token::one_of; +use winnow::token::tag; +use winnow::token::take_while; +use winnow::trace::trace; use crate::parser::errors::CustomError; use crate::parser::numbers::HEXDIG; @@ -30,49 +29,53 @@ use crate::parser::trivia::{from_utf8_unchecked, newline, ws, ws_newlines, NON_A // ;; String // string = ml-basic-string / basic-string / ml-literal-string / literal-string -pub(crate) fn string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { - alt(( - ml_basic_string, - basic_string, - ml_literal_string, - literal_string.map(Cow::Borrowed), - )) +pub(crate) fn string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace( + "string", + alt(( + ml_basic_string, + basic_string, + ml_literal_string, + literal_string.map(Cow::Borrowed), + )), + ) .parse_next(input) } // ;; Basic String // basic-string = quotation-mark *basic-char quotation-mark -pub(crate) fn basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { - let (mut input, _) = one_of(QUOTATION_MARK).parse_next(input)?; +pub(crate) fn basic_string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace("basic-string", |input: &mut Input<'i>| { + let _ = one_of(QUOTATION_MARK).parse_next(input)?; - let mut c = Cow::Borrowed(""); - if let Some((i, ci)) = ok_error(basic_chars.parse_next(input))? { - input = i; - c = ci; - } - while let Some((i, ci)) = ok_error(basic_chars.parse_next(input))? { - input = i; - c.to_mut().push_str(&ci); - } + let mut c = Cow::Borrowed(""); + if let Some(ci) = opt(basic_chars).parse_next(input)? { + c = ci; + } + while let Some(ci) = opt(basic_chars).parse_next(input)? { + c.to_mut().push_str(&ci); + } - let (input, _) = cut_err(one_of(QUOTATION_MARK)) - .context(Context::Expression("basic string")) - .parse_next(input)?; + let _ = cut_err(one_of(QUOTATION_MARK)) + .context(StrContext::Label("basic string")) + .parse_next(input)?; - Ok((input, c)) + Ok(c) + }) + .parse_next(input) } // quotation-mark = %x22 ; " pub(crate) const QUOTATION_MARK: u8 = b'"'; // basic-char = basic-unescaped / escaped -fn basic_chars(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { +fn basic_chars<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { alt(( // Deviate from the official grammar by batching the unescaped chars so we build a string a // chunk at a time, rather than a `char` at a time. - take_while1(BASIC_UNESCAPED) - .map_res(std::str::from_utf8) + take_while(1.., BASIC_UNESCAPED) + .try_map(std::str::from_utf8) .map(Cow::Borrowed), escaped.map(|c| Cow::Owned(String::from(c))), )) @@ -89,7 +92,7 @@ pub(crate) const BASIC_UNESCAPED: ( ) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); // escaped = escape escape-seq-char -fn escaped(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> { +fn escaped(input: &mut Input<'_>) -> PResult<char> { preceded(ESCAPE, escape_seq_char).parse_next(input) } @@ -105,42 +108,40 @@ pub(crate) const ESCAPE: u8 = b'\\'; // escape-seq-char =/ %x74 ; t tab U+0009 // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX -fn escape_seq_char(input: Input<'_>) -> IResult<Input<'_>, char, ParserError<'_>> { +fn escape_seq_char(input: &mut Input<'_>) -> PResult<char> { dispatch! {any; b'b' => success('\u{8}'), b'f' => success('\u{c}'), b'n' => success('\n'), b'r' => success('\r'), b't' => success('\t'), - b'u' => cut_err(hexescape::<4>).context(Context::Expression("unicode 4-digit hex code")), - b'U' => cut_err(hexescape::<8>).context(Context::Expression("unicode 8-digit hex code")), + b'u' => cut_err(hexescape::<4>).context(StrContext::Label("unicode 4-digit hex code")), + b'U' => cut_err(hexescape::<8>).context(StrContext::Label("unicode 8-digit hex code")), b'\\' => success('\\'), b'"' => success('"'), _ => { cut_err(fail::<_, char, _>) - .context(Context::Expression("escape sequence")) - .context(Context::Expected(ParserValue::CharLiteral('b'))) - .context(Context::Expected(ParserValue::CharLiteral('f'))) - .context(Context::Expected(ParserValue::CharLiteral('n'))) - .context(Context::Expected(ParserValue::CharLiteral('r'))) - .context(Context::Expected(ParserValue::CharLiteral('t'))) - .context(Context::Expected(ParserValue::CharLiteral('u'))) - .context(Context::Expected(ParserValue::CharLiteral('U'))) - .context(Context::Expected(ParserValue::CharLiteral('\\'))) - .context(Context::Expected(ParserValue::CharLiteral('"'))) + .context(StrContext::Label("escape sequence")) + .context(StrContext::Expected(StrContextValue::CharLiteral('b'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('f'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('r'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('t'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('u'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('U'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\\'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) } } .parse_next(input) } -pub(crate) fn hexescape<const N: usize>( - input: Input<'_>, -) -> IResult<Input<'_>, char, ParserError<'_>> { - take_while_m_n(0, N, HEXDIG) +pub(crate) fn hexescape<const N: usize>(input: &mut Input<'_>) -> PResult<char> { + take_while(0..=N, HEXDIG) .verify(|b: &[u8]| b.len() == N) .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) .verify_map(|s| u32::from_str_radix(s, 16).ok()) - .map_res(|h| char::from_u32(h).ok_or(CustomError::OutOfRange)) + .try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange)) .parse_next(input) } @@ -148,13 +149,16 @@ pub(crate) fn hexescape<const N: usize>( // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body // ml-basic-string-delim -fn ml_basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { - delimited( - ML_BASIC_STRING_DELIM, - preceded(opt(newline), cut_err(ml_basic_body)), - cut_err(ML_BASIC_STRING_DELIM), +fn ml_basic_string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace( + "ml-basic-string", + delimited( + ML_BASIC_STRING_DELIM, + preceded(opt(newline), cut_err(ml_basic_body)), + cut_err(ML_BASIC_STRING_DELIM), + ) + .context(StrContext::Label("multiline basic string")), ) - .context(Context::Expression("multiline basic string")) .parse_next(input) } @@ -162,24 +166,20 @@ fn ml_basic_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserE pub(crate) const ML_BASIC_STRING_DELIM: &[u8] = b"\"\"\""; // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] -fn ml_basic_body(mut input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { +fn ml_basic_body<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { let mut c = Cow::Borrowed(""); - if let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? { - input = i; + if let Some(ci) = opt(mlb_content).parse_next(input)? { c = ci; } - while let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? { - input = i; + while let Some(ci) = opt(mlb_content).parse_next(input)? { c.to_mut().push_str(&ci); } - while let Some((i, qi)) = ok_error(mlb_quotes(none_of(b'\"').value(())).parse_next(input))? { - if let Some((i, ci)) = ok_error(mlb_content.parse_next(i))? { - input = i; + while let Some(qi) = opt(mlb_quotes(none_of(b'\"').value(()))).parse_next(input)? { + if let Some(ci) = opt(mlb_content).parse_next(input)? { c.to_mut().push_str(qi); c.to_mut().push_str(&ci); - while let Some((i, ci)) = ok_error(mlb_content.parse_next(input))? { - input = i; + while let Some(ci) = opt(mlb_content).parse_next(input)? { c.to_mut().push_str(&ci); } } else { @@ -187,24 +187,21 @@ fn ml_basic_body(mut input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, Parse } } - if let Some((i, qi)) = - ok_error(mlb_quotes(tag(ML_BASIC_STRING_DELIM).value(())).parse_next(input))? - { - input = i; + if let Some(qi) = opt(mlb_quotes(tag(ML_BASIC_STRING_DELIM).value(()))).parse_next(input)? { c.to_mut().push_str(qi); } - Ok((input, c)) + Ok(c) } // mlb-content = mlb-char / newline / mlb-escaped-nl // mlb-char = mlb-unescaped / escaped -fn mlb_content(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { +fn mlb_content<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { alt(( // Deviate from the official grammar by batching the unescaped chars so we build a string a // chunk at a time, rather than a `char` at a time. - take_while1(MLB_UNESCAPED) - .map_res(std::str::from_utf8) + take_while(1.., MLB_UNESCAPED) + .try_map(std::str::from_utf8) .map(Cow::Borrowed), // Order changed fromg grammar so `escaped` can more easily `cut_err` on bad escape sequences mlb_escaped_nl.map(|_| Cow::Borrowed("")), @@ -216,17 +213,21 @@ fn mlb_content(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError // mlb-quotes = 1*2quotation-mark fn mlb_quotes<'i>( - mut term: impl winnow::Parser<Input<'i>, (), ParserError<'i>>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> { - move |input| { + mut term: impl winnow::Parser<Input<'i>, (), ContextError>, +) -> impl Parser<Input<'i>, &'i str, ContextError> { + move |input: &mut Input<'i>| { + let start = input.checkpoint(); let res = terminated(b"\"\"", peek(term.by_ref())) .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) .parse_next(input); match res { - Err(winnow::error::ErrMode::Backtrack(_)) => terminated(b"\"", peek(term.by_ref())) - .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) - .parse_next(input), + Err(winnow::error::ErrMode::Backtrack(_)) => { + input.reset(start); + terminated(b"\"", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse_next(input) + } res => res, } } @@ -246,8 +247,8 @@ pub(crate) const MLB_UNESCAPED: ( // it will be trimmed along with all whitespace // (including newlines) up to the next non-whitespace // character or closing delimiter. -fn mlb_escaped_nl(input: Input<'_>) -> IResult<Input<'_>, (), ParserError<'_>> { - many1((ESCAPE, ws, ws_newlines)) +fn mlb_escaped_nl(input: &mut Input<'_>) -> PResult<()> { + repeat(1.., (ESCAPE, ws, ws_newlines)) .map(|()| ()) .value(()) .parse_next(input) @@ -256,14 +257,17 @@ fn mlb_escaped_nl(input: Input<'_>) -> IResult<Input<'_>, (), ParserError<'_>> { // ;; Literal String // literal-string = apostrophe *literal-char apostrophe -pub(crate) fn literal_string(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - delimited( - APOSTROPHE, - cut_err(take_while0(LITERAL_CHAR)), - cut_err(APOSTROPHE), +pub(crate) fn literal_string<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + trace( + "literal-string", + delimited( + APOSTROPHE, + cut_err(take_while(0.., LITERAL_CHAR)), + cut_err(APOSTROPHE), + ) + .try_map(std::str::from_utf8) + .context(StrContext::Label("literal string")), ) - .map_res(std::str::from_utf8) - .context(Context::Expression("literal string")) .parse_next(input) } @@ -282,19 +286,22 @@ pub(crate) const LITERAL_CHAR: ( // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body // ml-literal-string-delim -fn ml_literal_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, ParserError<'_>> { - delimited( - (ML_LITERAL_STRING_DELIM, opt(newline)), - cut_err(ml_literal_body.map(|t| { - if t.contains("\r\n") { - Cow::Owned(t.replace("\r\n", "\n")) - } else { - Cow::Borrowed(t) - } - })), - cut_err(ML_LITERAL_STRING_DELIM), +fn ml_literal_string<'i>(input: &mut Input<'i>) -> PResult<Cow<'i, str>> { + trace( + "ml-literal-string", + delimited( + (ML_LITERAL_STRING_DELIM, opt(newline)), + cut_err(ml_literal_body.map(|t| { + if t.contains("\r\n") { + Cow::Owned(t.replace("\r\n", "\n")) + } else { + Cow::Borrowed(t) + } + })), + cut_err(ML_LITERAL_STRING_DELIM), + ) + .context(StrContext::Label("multiline literal string")), ) - .context(Context::Expression("multiline literal string")) .parse_next(input) } @@ -302,23 +309,26 @@ fn ml_literal_string(input: Input<'_>) -> IResult<Input<'_>, Cow<'_, str>, Parse pub(crate) const ML_LITERAL_STRING_DELIM: &[u8] = b"'''"; // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] -fn ml_literal_body(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { +fn ml_literal_body<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ( - many0(mll_content).map(|()| ()), - many0(( - mll_quotes(none_of(APOSTROPHE).value(())), - many1(mll_content).map(|()| ()), - )) + repeat(0.., mll_content).map(|()| ()), + repeat( + 0.., + ( + mll_quotes(none_of(APOSTROPHE).value(())), + repeat(1.., mll_content).map(|()| ()), + ), + ) .map(|()| ()), opt(mll_quotes(tag(ML_LITERAL_STRING_DELIM).value(()))), ) .recognize() - .map_res(std::str::from_utf8) + .try_map(std::str::from_utf8) .parse_next(input) } // mll-content = mll-char / newline -fn mll_content(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +fn mll_content(input: &mut Input<'_>) -> PResult<u8> { alt((one_of(MLL_CHAR), newline)).parse_next(input) } @@ -332,17 +342,21 @@ const MLL_CHAR: ( // mll-quotes = 1*2apostrophe fn mll_quotes<'i>( - mut term: impl winnow::Parser<Input<'i>, (), ParserError<'i>>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, &str, ParserError<'i>> { - move |input| { + mut term: impl winnow::Parser<Input<'i>, (), ContextError>, +) -> impl Parser<Input<'i>, &'i str, ContextError> { + move |input: &mut Input<'i>| { + let start = input.checkpoint(); let res = terminated(b"''", peek(term.by_ref())) .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) .parse_next(input); match res { - Err(winnow::error::ErrMode::Backtrack(_)) => terminated(b"'", peek(term.by_ref())) - .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) - .parse_next(input), + Err(winnow::error::ErrMode::Backtrack(_)) => { + input.reset(start); + terminated(b"'", peek(term.by_ref())) + .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) + .parse_next(input) + } res => res, } } diff --git a/vendor/toml_edit/src/parser/table.rs b/vendor/toml_edit/src/parser/table.rs index a6085e4..0ace0c7 100644 --- a/vendor/toml_edit/src/parser/table.rs +++ b/vendor/toml_edit/src/parser/table.rs @@ -2,10 +2,10 @@ use std::cell::RefCell; #[allow(unused_imports)] use std::ops::DerefMut; -use winnow::bytes::take; use winnow::combinator::cut_err; +use winnow::combinator::delimited; use winnow::combinator::peek; -use winnow::sequence::delimited; +use winnow::token::take; // https://github.com/rust-lang/rust/issues/41358 use crate::parser::key::key; @@ -27,22 +27,22 @@ const ARRAY_TABLE_CLOSE: &[u8] = b"]]"; // std-table = std-table-open key *( table-key-sep key) std-table-close pub(crate) fn std_table<'s, 'i>( state: &'s RefCell<ParseState>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { - move |i| { +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { ( delimited( STD_TABLE_OPEN, cut_err(key), cut_err(STD_TABLE_CLOSE) - .context(Context::Expected(ParserValue::CharLiteral('.'))) - .context(Context::Expected(ParserValue::StringLiteral("]"))), + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::StringLiteral("]"))), ) .with_span(), cut_err(line_trailing) - .context(Context::Expected(ParserValue::CharLiteral('\n'))) - .context(Context::Expected(ParserValue::CharLiteral('#'))), + .context(StrContext::Expected(StrContextValue::CharLiteral('\n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('#'))), ) - .map_res(|((h, span), t)| state.borrow_mut().deref_mut().on_std_header(h, t, span)) + .try_map(|((h, span), t)| state.borrow_mut().deref_mut().on_std_header(h, t, span)) .parse_next(i) } } @@ -52,22 +52,22 @@ pub(crate) fn std_table<'s, 'i>( // array-table = array-table-open key *( table-key-sep key) array-table-close pub(crate) fn array_table<'s, 'i>( state: &'s RefCell<ParseState>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { - move |i| { +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { ( delimited( ARRAY_TABLE_OPEN, cut_err(key), cut_err(ARRAY_TABLE_CLOSE) - .context(Context::Expected(ParserValue::CharLiteral('.'))) - .context(Context::Expected(ParserValue::StringLiteral("]]"))), + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::StringLiteral("]]"))), ) .with_span(), cut_err(line_trailing) - .context(Context::Expected(ParserValue::CharLiteral('\n'))) - .context(Context::Expected(ParserValue::CharLiteral('#'))), + .context(StrContext::Expected(StrContextValue::CharLiteral('\n'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('#'))), ) - .map_res(|((h, span), t)| state.borrow_mut().deref_mut().on_array_header(h, t, span)) + .try_map(|((h, span), t)| state.borrow_mut().deref_mut().on_array_header(h, t, span)) .parse_next(i) } } @@ -77,13 +77,13 @@ pub(crate) fn array_table<'s, 'i>( // table = std-table / array-table pub(crate) fn table<'s, 'i>( state: &'s RefCell<ParseState>, -) -> impl FnMut(Input<'i>) -> IResult<Input<'i>, (), ParserError<'i>> + 's { - move |i| { +) -> impl Parser<Input<'i>, (), ContextError> + 's { + move |i: &mut Input<'i>| { dispatch!(peek::<_, &[u8],_,_>(take(2usize)); b"[[" => array_table(state), _ => std_table(state), ) - .context(Context::Expression("table header")) + .context(StrContext::Label("table header")) .parse_next(i) } } diff --git a/vendor/toml_edit/src/parser/trivia.rs b/vendor/toml_edit/src/parser/trivia.rs index 97ff49c..a359805 100644 --- a/vendor/toml_edit/src/parser/trivia.rs +++ b/vendor/toml_edit/src/parser/trivia.rs @@ -1,15 +1,13 @@ use std::ops::RangeInclusive; -use winnow::branch::alt; -use winnow::bytes::one_of; -use winnow::bytes::take_while0; -use winnow::bytes::take_while1; +use winnow::combinator::alt; use winnow::combinator::eof; use winnow::combinator::opt; -use winnow::multi::many0; -use winnow::multi::many1; +use winnow::combinator::repeat; +use winnow::combinator::terminated; use winnow::prelude::*; -use winnow::sequence::terminated; +use winnow::token::one_of; +use winnow::token::take_while; use crate::parser::prelude::*; @@ -30,8 +28,8 @@ pub(crate) unsafe fn from_utf8_unchecked<'b>( pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t'); // ws = *wschar -pub(crate) fn ws(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - take_while0(WSCHAR) +pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + take_while(0.., WSCHAR) .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") }) .parse_next(input) } @@ -50,15 +48,15 @@ pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) = pub(crate) const COMMENT_START_SYMBOL: u8 = b'#'; // comment = comment-start-symbol *non-eol -pub(crate) fn comment(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> { - (COMMENT_START_SYMBOL, take_while0(NON_EOL)) +pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { + (COMMENT_START_SYMBOL, take_while(0.., NON_EOL)) .recognize() .parse_next(input) } // newline = ( %x0A / ; LF // %x0D.0A ) ; CRLF -pub(crate) fn newline(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { +pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> { alt(( one_of(LF).value(b'\n'), (one_of(CR), one_of(LF)).value(b'\n'), @@ -69,18 +67,19 @@ pub(crate) const LF: u8 = b'\n'; pub(crate) const CR: u8 = b'\r'; // ws-newline = *( wschar / newline ) -pub(crate) fn ws_newline(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { - many0(alt((newline.value(&b"\n"[..]), take_while1(WSCHAR)))) - .map(|()| ()) - .recognize() - .map(|b| unsafe { - from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") - }) - .parse_next(input) +pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + repeat( + 0.., + alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))), + ) + .map(|()| ()) + .recognize() + .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") }) + .parse_next(input) } // ws-newlines = newline *( wschar / newline ) -pub(crate) fn ws_newlines(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { +pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> { (newline, ws_newline) .recognize() .map(|b| unsafe { @@ -91,11 +90,18 @@ pub(crate) fn ws_newlines(input: Input<'_>) -> IResult<Input<'_>, &str, ParserEr // note: this rule is not present in the original grammar // ws-comment-newline = *( ws-newline-nonempty / comment ) -pub(crate) fn ws_comment_newline(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> { - many0(alt(( - many1(alt((take_while1(WSCHAR), newline.value(&b"\n"[..])))).map(|()| ()), - comment.value(()), - ))) +pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { + repeat( + 0.., + alt(( + repeat( + 1.., + alt((take_while(1.., WSCHAR), newline.value(&b"\n"[..]))), + ) + .map(|()| ()), + comment.value(()), + )), + ) .map(|()| ()) .recognize() .parse_next(input) @@ -103,15 +109,13 @@ pub(crate) fn ws_comment_newline(input: Input<'_>) -> IResult<Input<'_>, &[u8], // note: this rule is not present in the original grammar // line-ending = newline / eof -pub(crate) fn line_ending(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { +pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> { alt((newline.value("\n"), eof.value(""))).parse_next(input) } // note: this rule is not present in the original grammar // line-trailing = ws [comment] skip-line-ending -pub(crate) fn line_trailing( - input: Input<'_>, -) -> IResult<Input<'_>, std::ops::Range<usize>, ParserError<'_>> { +pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> { terminated((ws, opt(comment)).span(), line_ending).parse_next(input) } diff --git a/vendor/toml_edit/src/parser/value.rs b/vendor/toml_edit/src/parser/value.rs index 63a8211..14cd951 100644 --- a/vendor/toml_edit/src/parser/value.rs +++ b/vendor/toml_edit/src/parser/value.rs @@ -1,7 +1,7 @@ -use winnow::branch::alt; -use winnow::bytes::any; +use winnow::combinator::alt; use winnow::combinator::fail; use winnow::combinator::peek; +use winnow::token::any; use crate::parser::array::array; use crate::parser::datetime::date_time; @@ -15,10 +15,8 @@ use crate::RawString; use crate::Value; // val = string / boolean / array / inline-table / date-time / float / integer -pub(crate) fn value( - check: RecursionCheck, -) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> { - move |input| { +pub(crate) fn value<'i>(check: RecursionCheck) -> impl Parser<Input<'i>, v::Value, ContextError> { + move |input: &mut Input<'i>| { dispatch!{peek(any); crate::parser::strings::QUOTATION_MARK | crate::parser::strings::APOSTROPHE => string.map(|s| { @@ -44,47 +42,47 @@ pub(crate) fn value( b'_' => { integer .map(v::Value::from) - .context(Context::Expected(ParserValue::Description("leading digit"))) + .context(StrContext::Expected(StrContextValue::Description("leading digit"))) }, // Report as if they were numbers because its most likely a typo b'.' => { float .map(v::Value::from) - .context(Context::Expected(ParserValue::Description("leading digit"))) + .context(StrContext::Expected(StrContextValue::Description("leading digit"))) }, b't' => { crate::parser::numbers::true_.map(v::Value::from) - .context(Context::Expression("string")) - .context(Context::Expected(ParserValue::CharLiteral('"'))) - .context(Context::Expected(ParserValue::CharLiteral('\''))) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) }, b'f' => { crate::parser::numbers::false_.map(v::Value::from) - .context(Context::Expression("string")) - .context(Context::Expected(ParserValue::CharLiteral('"'))) - .context(Context::Expected(ParserValue::CharLiteral('\''))) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) }, b'i' => { crate::parser::numbers::inf.map(v::Value::from) - .context(Context::Expression("string")) - .context(Context::Expected(ParserValue::CharLiteral('"'))) - .context(Context::Expected(ParserValue::CharLiteral('\''))) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) }, b'n' => { crate::parser::numbers::nan.map(v::Value::from) - .context(Context::Expression("string")) - .context(Context::Expected(ParserValue::CharLiteral('"'))) - .context(Context::Expected(ParserValue::CharLiteral('\''))) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) }, _ => { fail - .context(Context::Expression("string")) - .context(Context::Expected(ParserValue::CharLiteral('"'))) - .context(Context::Expected(ParserValue::CharLiteral('\''))) + .context(StrContext::Label("string")) + .context(StrContext::Expected(StrContextValue::CharLiteral('"'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) }, } .with_span() - .map_res(|(value, span)| apply_raw(value, span)) + .try_map(|(value, span)| apply_raw(value, span)) .parse_next(input) } } diff --git a/vendor/toml_edit/src/ser/map.rs b/vendor/toml_edit/src/ser/map.rs index d743e3d..951a5cb 100644 --- a/vendor/toml_edit/src/ser/map.rs +++ b/vendor/toml_edit/src/ser/map.rs @@ -1,4 +1,4 @@ -use super::{Error, KeySerializer}; +use super::{Error, KeySerializer, ValueSerializer}; #[doc(hidden)] pub enum SerializeMap { @@ -174,7 +174,8 @@ impl serde::ser::SerializeMap for SerializeInlineTable { where T: serde::ser::Serialize, { - let res = value.serialize(super::ValueSerializer {}); + let mut value_serializer = MapValueSerializer::new(); + let res = value.serialize(&mut value_serializer); match res { Ok(item) => { let key = self.key.take().unwrap(); @@ -185,7 +186,7 @@ impl serde::ser::SerializeMap for SerializeInlineTable { self.items.insert(key, kv); } Err(e) => { - if e != Error::UnsupportedNone { + if !(e == Error::UnsupportedNone && value_serializer.is_none) { return Err(e); } } @@ -210,7 +211,8 @@ impl serde::ser::SerializeStruct for SerializeInlineTable { where T: serde::ser::Serialize, { - let res = value.serialize(super::ValueSerializer {}); + let mut value_serializer = MapValueSerializer::new(); + let res = value.serialize(&mut value_serializer); match res { Ok(item) => { let kv = crate::table::TableKeyValue::new( @@ -220,7 +222,7 @@ impl serde::ser::SerializeStruct for SerializeInlineTable { self.items.insert(crate::InternalString::from(key), kv); } Err(e) => { - if e != Error::UnsupportedNone { + if !(e == Error::UnsupportedNone && value_serializer.is_none) { return Err(e); } } @@ -403,3 +405,183 @@ impl serde::ser::Serializer for DatetimeFieldSerializer { Err(Error::DateInvalid) } } + +#[derive(Default)] +struct MapValueSerializer { + is_none: bool, +} + +impl MapValueSerializer { + fn new() -> Self { + Self { is_none: false } + } +} + +impl serde::ser::Serializer for &mut MapValueSerializer { + type Ok = crate::Value; + type Error = Error; + type SerializeSeq = super::SerializeValueArray; + type SerializeTuple = super::SerializeValueArray; + type SerializeTupleStruct = super::SerializeValueArray; + type SerializeTupleVariant = super::SerializeValueArray; + type SerializeMap = super::SerializeMap; + type SerializeStruct = super::SerializeMap; + type SerializeStructVariant = serde::ser::Impossible<Self::Ok, Self::Error>; + + fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_bool(v) + } + + fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_i8(v) + } + + fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_i16(v) + } + + fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_i32(v) + } + + fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_i64(v) + } + + fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_u8(v) + } + + fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_u16(v) + } + + fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_u32(v) + } + + fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_u64(v) + } + + fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_f32(v) + } + + fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_f64(v) + } + + fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_char(v) + } + + fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_str(v) + } + + fn serialize_bytes(self, value: &[u8]) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_bytes(value) + } + + fn serialize_none(self) -> Result<Self::Ok, Self::Error> { + self.is_none = true; + Err(Error::UnsupportedNone) + } + + fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error> + where + T: serde::ser::Serialize, + { + ValueSerializer::new().serialize_some(value) + } + + fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_unit() + } + + fn serialize_unit_struct(self, name: &'static str) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_unit_struct(name) + } + + fn serialize_unit_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + ) -> Result<Self::Ok, Self::Error> { + ValueSerializer::new().serialize_unit_variant(name, variant_index, variant) + } + + fn serialize_newtype_struct<T: ?Sized>( + self, + name: &'static str, + value: &T, + ) -> Result<Self::Ok, Self::Error> + where + T: serde::ser::Serialize, + { + ValueSerializer::new().serialize_newtype_struct(name, value) + } + + fn serialize_newtype_variant<T: ?Sized>( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result<Self::Ok, Self::Error> + where + T: serde::ser::Serialize, + { + ValueSerializer::new().serialize_newtype_variant(name, variant_index, variant, value) + } + + fn serialize_seq(self, len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { + ValueSerializer::new().serialize_seq(len) + } + + fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple, Self::Error> { + ValueSerializer::new().serialize_tuple(len) + } + + fn serialize_tuple_struct( + self, + name: &'static str, + len: usize, + ) -> Result<Self::SerializeTupleStruct, Self::Error> { + ValueSerializer::new().serialize_tuple_struct(name, len) + } + + fn serialize_tuple_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result<Self::SerializeTupleVariant, Self::Error> { + ValueSerializer::new().serialize_tuple_variant(name, variant_index, variant, len) + } + + fn serialize_map(self, len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { + ValueSerializer::new().serialize_map(len) + } + + fn serialize_struct( + self, + name: &'static str, + len: usize, + ) -> Result<Self::SerializeStruct, Self::Error> { + ValueSerializer::new().serialize_struct(name, len) + } + + fn serialize_struct_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result<Self::SerializeStructVariant, Self::Error> { + ValueSerializer::new().serialize_struct_variant(name, variant_index, variant, len) + } +} diff --git a/vendor/toml_edit/src/ser/mod.rs b/vendor/toml_edit/src/ser/mod.rs index 2c31020..7f99930 100644 --- a/vendor/toml_edit/src/ser/mod.rs +++ b/vendor/toml_edit/src/ser/mod.rs @@ -24,7 +24,7 @@ pub enum Error { OutOfRange(Option<&'static str>), /// `None` could not be serialized to TOML UnsupportedNone, - /// Key was not convertable to `String` for serializing to TOML + /// Key was not convertible to `String` for serializing to TOML KeyNotString, /// A serialized date was invalid DateInvalid, diff --git a/vendor/toml_edit/src/ser/value.rs b/vendor/toml_edit/src/ser/value.rs index cc7dfb7..d29390a 100644 --- a/vendor/toml_edit/src/ser/value.rs +++ b/vendor/toml_edit/src/ser/value.rs @@ -167,15 +167,18 @@ impl serde::ser::Serializer for ValueSerializer { fn serialize_newtype_variant<T: ?Sized>( self, - name: &'static str, + _name: &'static str, _variant_index: u32, - _variant: &'static str, - _value: &T, + variant: &'static str, + value: &T, ) -> Result<Self::Ok, Self::Error> where T: serde::ser::Serialize, { - Err(Error::UnsupportedType(Some(name))) + let value = value.serialize(self)?; + let mut table = crate::InlineTable::new(); + table.insert(variant, value); + Ok(table.into()) } fn serialize_seq(self, len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { diff --git a/vendor/toml_edit/src/table.rs b/vendor/toml_edit/src/table.rs index f177028..45d6d61 100644 --- a/vendor/toml_edit/src/table.rs +++ b/vendor/toml_edit/src/table.rs @@ -265,7 +265,7 @@ impl Table { self.items.iter().filter(|i| !(i.1).value.is_none()).count() } - /// Returns true iff the table is empty. + /// Returns true if the table is empty. pub fn is_empty(&self) -> bool { self.len() == 0 } @@ -340,7 +340,7 @@ impl Table { }) } - /// Returns true iff the table contains an item with the given key. + /// Returns true if the table contains an item with the given key. pub fn contains_key(&self, key: &str) -> bool { if let Some(kv) = self.items.get(key) { !kv.value.is_none() @@ -349,7 +349,7 @@ impl Table { } } - /// Returns true iff the table contains a table with the given key. + /// Returns true if the table contains a table with the given key. pub fn contains_table(&self, key: &str) -> bool { if let Some(kv) = self.items.get(key) { kv.value.is_table() @@ -358,7 +358,7 @@ impl Table { } } - /// Returns true iff the table contains a value with the given key. + /// Returns true if the table contains a value with the given key. pub fn contains_value(&self, key: &str) -> bool { if let Some(kv) = self.items.get(key) { kv.value.is_value() @@ -367,7 +367,7 @@ impl Table { } } - /// Returns true iff the table contains an array of tables with the given key. + /// Returns true if the table contains an array of tables with the given key. pub fn contains_array_of_tables(&self, key: &str) -> bool { if let Some(kv) = self.items.get(key) { kv.value.is_array_of_tables() @@ -397,6 +397,20 @@ impl Table { pub fn remove_entry(&mut self, key: &str) -> Option<(Key, Item)> { self.items.shift_remove(key).map(|kv| (kv.key, kv.value)) } + + /// Retains only the elements specified by the `keep` predicate. + /// + /// In other words, remove all pairs `(key, item)` for which + /// `keep(&key, &mut item)` returns `false`. + /// + /// The elements are visited in iteration order. + pub fn retain<F>(&mut self, mut keep: F) + where + F: FnMut(&str, &mut Item) -> bool, + { + self.items + .retain(|key, key_value| keep(key, &mut key_value.value)); + } } impl std::fmt::Display for Table { @@ -502,7 +516,7 @@ pub trait TableLike: crate::private::Sealed { fn len(&self) -> usize { self.iter().filter(|&(_, v)| !v.is_none()).count() } - /// Returns true iff the table is empty. + /// Returns true if the table is empty. fn is_empty(&self) -> bool { self.len() == 0 } @@ -520,7 +534,7 @@ pub trait TableLike: crate::private::Sealed { fn get_key_value<'a>(&'a self, key: &str) -> Option<(&'a Key, &'a Item)>; /// Return mutable references to the key-value pair stored for key, if it is present, else None. fn get_key_value_mut<'a>(&'a mut self, key: &str) -> Option<(KeyMut<'a>, &'a mut Item)>; - /// Returns true iff the table contains an item with the given key. + /// Returns true if the table contains an item with the given key. fn contains_key(&self, key: &str) -> bool; /// Inserts a key-value pair into the map. fn insert(&mut self, key: &str, value: Item) -> Option<Item>; diff --git a/vendor/toml_edit/tests/testsuite/parse.rs b/vendor/toml_edit/tests/testsuite/parse.rs index 9f59f69..f1c3c27 100644 --- a/vendor/toml_edit/tests/testsuite/parse.rs +++ b/vendor/toml_edit/tests/testsuite/parse.rs @@ -1475,3 +1475,16 @@ p.a=4 let actual = document.to_string(); assert_eq(input, actual); } + +#[test] +fn despan_keys() { + let mut doc = r#"aaaaaa = 1"#.parse::<Document>().unwrap(); + let key = "bbb".parse::<Key>().unwrap(); + let table = doc.as_table_mut(); + table.insert_formatted( + &key, + toml_edit::Item::Value(Value::Integer(toml_edit::Formatted::new(2))), + ); + + assert_eq!(doc.to_string(), "aaaaaa = 1\nbbb = 2\n"); +} diff --git a/vendor/unicode-ident/.cargo-checksum.json b/vendor/unicode-ident/.cargo-checksum.json new file mode 100644 index 0000000..9a708d8 --- /dev/null +++ b/vendor/unicode-ident/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"7b10355305359d5feefb120329396a8823ce903cd66612d7d27612d51e6ceced","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","LICENSE-UNICODE":"68f5b9f5ea36881a0942ba02f558e9e1faf76cc09cb165ad801744c61b738844","README.md":"eff1f30712e93cc160101c25bf31738448c284b90636deb3e3a651cb9ad20dd1","benches/xid.rs":"a61f61ecc7d5124c759cdeb55ab74470ab69f2f3ca37613da65f16e0e5e33487","src/lib.rs":"2673969775cff349816e3fb30f62476a802523fe4940482288b75bd747cbe748","src/tables.rs":"ffe8e252eabccf261385865cb781b3d76c9f32f6f9503d00196a30fb92d80b29","tests/compare.rs":"62471ffb157744cac6faae1adafdbdf785349d7eb6dc2ff4b4941c9d618397f9","tests/fst/mod.rs":"69a3aaf59acd8bca962ecc6234be56be8c0934ab79b253162f10eb881523901f","tests/fst/xid_continue.fst":"41fc751514b8bde658544d5fe7e100115d299d41897af855934b9f4ebda9d3a2","tests/fst/xid_start.fst":"ffa5e2bfe7dd5f6738fbe4b7a3e6e2083c9777191c54f8291a80d558ec4e2dd2","tests/roaring/mod.rs":"784f65a48477fab7549620c7843c7ad6da533f69a18abca1172f6acb95045e53","tests/static_size.rs":"4524332c1e424cb987d7cee1f47a98aea9ed7b256303a3828eda5aa1d06da240","tests/tables/mod.rs":"e6949172d10fc4b2431ce7546269bfd4f9146454c8c3e31faf5e5d80c16a8ab6","tests/tables/tables.rs":"011404dab8a3958da6e18a1fe9406c191675e6f49bf30ce813e3d05f582e750b","tests/trie/mod.rs":"d4acbb716bcbaf80660039797f45e138ed8bbd66749fa3b19b1a971574679cc9","tests/trie/trie.rs":"3c1ca56062f1b3ffdf2ae2063d3fee8d362b90082778056181b5c95e2e242ad8"},"package":"3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"} \ No newline at end of file diff --git a/vendor/unicode-ident/Cargo.toml b/vendor/unicode-ident/Cargo.toml new file mode 100644 index 0000000..1c40dde --- /dev/null +++ b/vendor/unicode-ident/Cargo.toml @@ -0,0 +1,63 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.31" +name = "unicode-ident" +version = "1.0.12" +authors = ["David Tolnay <dtolnay@gmail.com>"] +description = "Determine whether characters have the XID_Start or XID_Continue properties according to Unicode Standard Annex #31" +documentation = "https://docs.rs/unicode-ident" +readme = "README.md" +keywords = [ + "unicode", + "xid", +] +categories = [ + "development-tools::procedural-macro-helpers", + "no-std", + "no-std::no-alloc", +] +license = "(MIT OR Apache-2.0) AND Unicode-DFS-2016" +repository = "https://github.com/dtolnay/unicode-ident" + +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] +targets = ["x86_64-unknown-linux-gnu"] + +[lib] +doc-scrape-examples = false + +[[bench]] +name = "xid" +harness = false + +[dev-dependencies.criterion] +version = "0.5" +default-features = false + +[dev-dependencies.fst] +version = "0.4" + +[dev-dependencies.rand] +version = "0.8" +features = ["small_rng"] + +[dev-dependencies.roaring] +version = "0.10" + +[dev-dependencies.ucd-trie] +version = "0.1" +default-features = false + +[dev-dependencies.unicode-xid] +version = "0.2.4" diff --git a/vendor/unicode-ident/LICENSE-APACHE b/vendor/unicode-ident/LICENSE-APACHE new file mode 100644 index 0000000..1b5ec8b --- /dev/null +++ b/vendor/unicode-ident/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/unicode-ident/LICENSE-MIT b/vendor/unicode-ident/LICENSE-MIT new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/vendor/unicode-ident/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/unicode-ident/LICENSE-UNICODE b/vendor/unicode-ident/LICENSE-UNICODE new file mode 100644 index 0000000..85d0d58 --- /dev/null +++ b/vendor/unicode-ident/LICENSE-UNICODE @@ -0,0 +1,46 @@ +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE + +See Terms of Use <https://www.unicode.org/copyright.html> +for definitions of Unicode Inc.’s Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2022 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. diff --git a/vendor/unicode-ident/README.md b/vendor/unicode-ident/README.md new file mode 100644 index 0000000..0e9af82 --- /dev/null +++ b/vendor/unicode-ident/README.md @@ -0,0 +1,283 @@ +Unicode ident +============= + +[<img alt="github" src="https://img.shields.io/badge/github-dtolnay/unicode--ident-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/dtolnay/unicode-ident) +[<img alt="crates.io" src="https://img.shields.io/crates/v/unicode-ident.svg?style=for-the-badge&color=fc8d62&logo=rust" height="20">](https://crates.io/crates/unicode-ident) +[<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-unicode--ident-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs" height="20">](https://docs.rs/unicode-ident) +[<img alt="build status" src="https://img.shields.io/github/actions/workflow/status/dtolnay/unicode-ident/ci.yml?branch=master&style=for-the-badge" height="20">](https://github.com/dtolnay/unicode-ident/actions?query=branch%3Amaster) + +Implementation of [Unicode Standard Annex #31][tr31] for determining which +`char` values are valid in programming language identifiers. + +[tr31]: https://www.unicode.org/reports/tr31/ + +This crate is a better optimized implementation of the older `unicode-xid` +crate. This crate uses less static storage, and is able to classify both ASCII +and non-ASCII codepoints with better performance, 2–10× faster than +`unicode-xid`. + +<br> + +## Comparison of performance + +The following table shows a comparison between five Unicode identifier +implementations. + +- `unicode-ident` is this crate; +- [`unicode-xid`] is a widely used crate run by the "unicode-rs" org; +- `ucd-trie` and `fst` are two data structures supported by the [`ucd-generate`] tool; +- [`roaring`] is a Rust implementation of Roaring bitmap. + +The *static storage* column shows the total size of `static` tables that the +crate bakes into your binary, measured in 1000s of bytes. + +The remaining columns show the **cost per call** to evaluate whether a single +`char` has the XID\_Start or XID\_Continue Unicode property, comparing across +different ratios of ASCII to non-ASCII codepoints in the input data. + +[`unicode-xid`]: https://github.com/unicode-rs/unicode-xid +[`ucd-generate`]: https://github.com/BurntSushi/ucd-generate +[`roaring`]: https://github.com/RoaringBitmap/roaring-rs + +| | static storage | 0% nonascii | 1% | 10% | 100% nonascii | +|---|---|---|---|---|---| +| **`unicode-ident`** | 10.1 K | 0.96 ns | 0.95 ns | 1.09 ns | 1.55 ns | +| **`unicode-xid`** | 11.5 K | 1.88 ns | 2.14 ns | 3.48 ns | 15.63 ns | +| **`ucd-trie`** | 10.2 K | 1.29 ns | 1.28 ns | 1.36 ns | 2.15 ns | +| **`fst`** | 139 K | 55.1 ns | 54.9 ns | 53.2 ns | 28.5 ns | +| **`roaring`** | 66.1 K | 2.78 ns | 3.09 ns | 3.37 ns | 4.70 ns | + +Source code for the benchmark is provided in the *bench* directory of this repo +and may be repeated by running `cargo criterion`. + +<br> + +## Comparison of data structures + +#### unicode-xid + +They use a sorted array of character ranges, and do a binary search to look up +whether a given character lands inside one of those ranges. + +```rust +static XID_Continue_table: [(char, char); 763] = [ + ('\u{30}', '\u{39}'), // 0-9 + ('\u{41}', '\u{5a}'), // A-Z + … + ('\u{e0100}', '\u{e01ef}'), +]; +``` + +The static storage used by this data structure scales with the number of +contiguous ranges of identifier codepoints in Unicode. Every table entry +consumes 8 bytes, because it consists of a pair of 32-bit `char` values. + +In some ranges of the Unicode codepoint space, this is quite a sparse +representation – there are some ranges where tens of thousands of adjacent +codepoints are all valid identifier characters. In other places, the +representation is quite inefficient. A characater like `µ` (U+00B5) which is +surrounded by non-identifier codepoints consumes 64 bits in the table, while it +would be just 1 bit in a dense bitmap. + +On a system with 64-byte cache lines, binary searching the table touches 7 cache +lines on average. Each cache line fits only 8 table entries. Additionally, the +branching performed during the binary search is probably mostly unpredictable to +the branch predictor. + +Overall, the crate ends up being about 10× slower on non-ASCII input +compared to the fastest crate. + +A potential improvement would be to pack the table entries more compactly. +Rust's `char` type is a 21-bit integer padded to 32 bits, which means every +table entry is holding 22 bits of wasted space, adding up to 3.9 K. They could +instead fit every table entry into 6 bytes, leaving out some of the padding, for +a 25% improvement in space used. With some cleverness it may be possible to fit +in 5 bytes or even 4 bytes by storing a low char and an extent, instead of low +char and high char. I don't expect that performance would improve much but this +could be the most efficient for space across all the libraries, needing only +about 7 K to store. + +#### ucd-trie + +Their data structure is a compressed trie set specifically tailored for Unicode +codepoints. The design is credited to Raph Levien in [rust-lang/rust#33098]. + +[rust-lang/rust#33098]: https://github.com/rust-lang/rust/pull/33098 + +```rust +pub struct TrieSet { + tree1_level1: &'static [u64; 32], + tree2_level1: &'static [u8; 992], + tree2_level2: &'static [u64], + tree3_level1: &'static [u8; 256], + tree3_level2: &'static [u8], + tree3_level3: &'static [u64], +} +``` + +It represents codepoint sets using a trie to achieve prefix compression. The +final states of the trie are embedded in leaves or "chunks", where each chunk is +a 64-bit integer. Each bit position of the integer corresponds to whether a +particular codepoint is in the set or not. These chunks are not just a compact +representation of the final states of the trie, but are also a form of suffix +compression. In particular, if multiple ranges of 64 contiguous codepoints have +the same Unicode properties, then they all map to the same chunk in the final +level of the trie. + +Being tailored for Unicode codepoints, this trie is partitioned into three +disjoint sets: tree1, tree2, tree3. The first set corresponds to codepoints \[0, +0x800), the second \[0x800, 0x10000) and the third \[0x10000, 0x110000). These +partitions conveniently correspond to the space of 1 or 2 byte UTF-8 encoded +codepoints, 3 byte UTF-8 encoded codepoints and 4 byte UTF-8 encoded codepoints, +respectively. + +Lookups in this data structure are significantly more efficient than binary +search. A lookup touches either 1, 2, or 3 cache lines based on which of the +trie partitions is being accessed. + +One possible performance improvement would be for this crate to expose a way to +query based on a UTF-8 encoded string, returning the Unicode property +corresponding to the first character in the string. Without such an API, the +caller is required to tokenize their UTF-8 encoded input data into `char`, hand +the `char` into `ucd-trie`, only for `ucd-trie` to undo that work by converting +back into the variable-length representation for trie traversal. + +#### fst + +Uses a [finite state transducer][fst]. This representation is built into +[ucd-generate] but I am not aware of any advantage over the `ucd-trie` +representation. In particular `ucd-trie` is optimized for storing Unicode +properties while `fst` is not. + +[fst]: https://github.com/BurntSushi/fst +[ucd-generate]: https://github.com/BurntSushi/ucd-generate + +As far as I can tell, the main thing that causes `fst` to have large size and +slow lookups for this use case relative to `ucd-trie` is that it does not +specialize for the fact that only 21 of the 32 bits in a `char` are meaningful. +There are some dense arrays in the structure with large ranges that could never +possibly be used. + +#### roaring + +This crate is a pure-Rust implementation of [Roaring Bitmap], a data structure +designed for storing sets of 32-bit unsigned integers. + +[Roaring Bitmap]: https://roaringbitmap.org/about/ + +Roaring bitmaps are compressed bitmaps which tend to outperform conventional +compressed bitmaps such as WAH, EWAH or Concise. In some instances, they can be +hundreds of times faster and they often offer significantly better compression. + +In this use case the performance was reasonably competitive but still +substantially slower than the Unicode-optimized crates. Meanwhile the +compression was significantly worse, requiring 6× as much storage for the +data structure. + +I also benchmarked the [`croaring`] crate which is an FFI wrapper around the C +reference implementation of Roaring Bitmap. This crate was consistently about +15% slower than pure-Rust `roaring`, which could just be FFI overhead. I did not +investigate further. + +[`croaring`]: https://crates.io/crates/croaring + +#### unicode-ident + +This crate is most similar to the `ucd-trie` library, in that it's based on +bitmaps stored in the leafs of a trie representation, achieving both prefix +compression and suffix compression. + +The key differences are: + +- Uses a single 2-level trie, rather than 3 disjoint partitions of different + depth each. +- Uses significantly larger chunks: 512 bits rather than 64 bits. +- Compresses the XID\_Start and XID\_Continue properties together + simultaneously, rather than duplicating identical trie leaf chunks across the + two. + +The following diagram show the XID\_Start and XID\_Continue Unicode boolean +properties in uncompressed form, in row-major order: + +<table> +<tr><th>XID_Start</th><th>XID_Continue</th></tr> +<tr> +<td><img alt="XID_Start bitmap" width="256" src="https://user-images.githubusercontent.com/1940490/168647353-c6eeb922-afec-49b2-9ef5-c03e9d1e0760.png"></td> +<td><img alt="XID_Continue bitmap" width="256" src="https://user-images.githubusercontent.com/1940490/168647367-f447cca7-2362-4d7d-8cd7-d21c011d329b.png"></td> +</tr> +</table> + +Uncompressed, these would take 140 K to store, which is beyond what would be +reasonable. However, as you can see there is a large degree of similarity +between the two bitmaps and across the rows, which lends well to compression. + +This crate stores one 512-bit "row" of the above bitmaps in the leaf level of a +trie, and a single additional level to index into the leafs. It turns out there +are 124 unique 512-bit chunks across the two bitmaps so 7 bits are sufficient to +index them. + +The chunk size of 512 bits is selected as the size that minimizes the total size +of the data structure. A smaller chunk, like 256 or 128 bits, would achieve +better deduplication but require a larger index. A larger chunk would increase +redundancy in the leaf bitmaps. 512 bit chunks are the optimum for total size of +the index plus leaf bitmaps. + +In fact since there are only 124 unique chunks, we can use an 8-bit index with a +spare bit to index at the half-chunk level. This achieves an additional 8.5% +compression by eliminating redundancies between the second half of any chunk and +the first half of any other chunk. Note that this is not the same as using +chunks which are half the size, because it does not necessitate raising the size +of the trie's first level. + +In contrast to binary search or the `ucd-trie` crate, performing lookups in this +data structure is straight-line code with no need for branching. + +```asm +is_xid_start: + mov eax, edi + shr eax, 9 + lea rcx, [rip + unicode_ident::tables::TRIE_START] + add rcx, rax + xor eax, eax + cmp edi, 201728 + cmovb rax, rcx + test rax, rax + lea rcx, [rip + .L__unnamed_1] + cmovne rcx, rax + movzx eax, byte ptr [rcx] + shl rax, 5 + mov ecx, edi + shr ecx, 3 + and ecx, 63 + add rcx, rax + lea rax, [rip + unicode_ident::tables::LEAF] + mov al, byte ptr [rax + rcx] + and dil, 7 + mov ecx, edi + shr al, cl + and al, 1 + ret +``` + +<br> + +## License + +Use of the Unicode Character Database, as this crate does, is governed by the <a +href="LICENSE-UNICODE">Unicode License Agreement – Data Files and Software +(2016)</a>. + +All intellectual property within this crate that is **not generated** using the +Unicode Character Database as input is licensed under either of <a +href="LICENSE-APACHE">Apache License, Version 2.0</a> or <a +href="LICENSE-MIT">MIT license</a> at your option. + +The **generated** files incorporate tabular data derived from the Unicode +Character Database, together with intellectual property from the original source +code content of the crate. One must comply with the terms of both the Unicode +License Agreement and either of the Apache license or MIT license when those +generated files are involved. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this crate by you, as defined in the Apache-2.0 license, shall +be licensed as just described, without any additional terms or conditions. diff --git a/vendor/unicode-ident/benches/xid.rs b/vendor/unicode-ident/benches/xid.rs new file mode 100644 index 0000000..040c4e1 --- /dev/null +++ b/vendor/unicode-ident/benches/xid.rs @@ -0,0 +1,124 @@ +// To run: +// +// cargo criterion --features criterion/real_blackbox +// +// This benchmarks each of the different libraries at several ratios of ASCII to +// non-ASCII content. There is one additional benchmark labeled "baseline" which +// just iterates over characters in a string, converting UTF-8 to 32-bit chars. +// +// Criterion will show a time in milliseconds. The non-baseline bench functions +// each make one million function calls (2 calls per character, 500K characters +// in the strings created by gen_string). The "time per call" listed in our +// readme is computed by subtracting this baseline from the other bench +// functions' time, then dividing by one million (ms -> ns). + +#![allow(clippy::needless_pass_by_value)] + +#[path = "../tests/fst/mod.rs"] +mod fst; +#[path = "../tests/roaring/mod.rs"] +mod roaring; +#[path = "../tests/trie/mod.rs"] +mod trie; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use rand::distributions::{Bernoulli, Distribution, Uniform}; +use rand::rngs::SmallRng; +use rand::SeedableRng; +use std::time::Duration; + +fn gen_string(p_nonascii: u32) -> String { + let mut rng = SmallRng::from_seed([b'!'; 32]); + let pick_nonascii = Bernoulli::from_ratio(p_nonascii, 100).unwrap(); + let ascii = Uniform::new_inclusive('\0', '\x7f'); + let nonascii = Uniform::new_inclusive(0x80 as char, char::MAX); + + let mut string = String::new(); + for _ in 0..500_000 { + let distribution = if pick_nonascii.sample(&mut rng) { + nonascii + } else { + ascii + }; + string.push(distribution.sample(&mut rng)); + } + + string +} + +fn bench(c: &mut Criterion, group_name: &str, string: String) { + let mut group = c.benchmark_group(group_name); + group.measurement_time(Duration::from_secs(10)); + group.bench_function("baseline", |b| { + b.iter(|| { + for ch in string.chars() { + black_box(ch); + } + }); + }); + group.bench_function("unicode-ident", |b| { + b.iter(|| { + for ch in string.chars() { + black_box(unicode_ident::is_xid_start(ch)); + black_box(unicode_ident::is_xid_continue(ch)); + } + }); + }); + group.bench_function("unicode-xid", |b| { + b.iter(|| { + for ch in string.chars() { + black_box(unicode_xid::UnicodeXID::is_xid_start(ch)); + black_box(unicode_xid::UnicodeXID::is_xid_continue(ch)); + } + }); + }); + group.bench_function("ucd-trie", |b| { + b.iter(|| { + for ch in string.chars() { + black_box(trie::XID_START.contains_char(ch)); + black_box(trie::XID_CONTINUE.contains_char(ch)); + } + }); + }); + group.bench_function("fst", |b| { + let xid_start_fst = fst::xid_start_fst(); + let xid_continue_fst = fst::xid_continue_fst(); + b.iter(|| { + for ch in string.chars() { + let ch_bytes = (ch as u32).to_be_bytes(); + black_box(xid_start_fst.contains(ch_bytes)); + black_box(xid_continue_fst.contains(ch_bytes)); + } + }); + }); + group.bench_function("roaring", |b| { + let xid_start_bitmap = roaring::xid_start_bitmap(); + let xid_continue_bitmap = roaring::xid_continue_bitmap(); + b.iter(|| { + for ch in string.chars() { + black_box(xid_start_bitmap.contains(ch as u32)); + black_box(xid_continue_bitmap.contains(ch as u32)); + } + }); + }); + group.finish(); +} + +fn bench0(c: &mut Criterion) { + bench(c, "0%-nonascii", gen_string(0)); +} + +fn bench1(c: &mut Criterion) { + bench(c, "1%-nonascii", gen_string(1)); +} + +fn bench10(c: &mut Criterion) { + bench(c, "10%-nonascii", gen_string(10)); +} + +fn bench100(c: &mut Criterion) { + bench(c, "100%-nonascii", gen_string(100)); +} + +criterion_group!(benches, bench0, bench1, bench10, bench100); +criterion_main!(benches); diff --git a/vendor/unicode-ident/src/lib.rs b/vendor/unicode-ident/src/lib.rs new file mode 100644 index 0000000..f890886 --- /dev/null +++ b/vendor/unicode-ident/src/lib.rs @@ -0,0 +1,269 @@ +//! [![github]](https://github.com/dtolnay/unicode-ident) [![crates-io]](https://crates.io/crates/unicode-ident) [![docs-rs]](https://docs.rs/unicode-ident) +//! +//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github +//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs +//! +//! <br> +//! +//! Implementation of [Unicode Standard Annex #31][tr31] for determining which +//! `char` values are valid in programming language identifiers. +//! +//! [tr31]: https://www.unicode.org/reports/tr31/ +//! +//! This crate is a better optimized implementation of the older `unicode-xid` +//! crate. This crate uses less static storage, and is able to classify both +//! ASCII and non-ASCII codepoints with better performance, 2–10× +//! faster than `unicode-xid`. +//! +//! <br> +//! +//! ## Comparison of performance +//! +//! The following table shows a comparison between five Unicode identifier +//! implementations. +//! +//! - `unicode-ident` is this crate; +//! - [`unicode-xid`] is a widely used crate run by the "unicode-rs" org; +//! - `ucd-trie` and `fst` are two data structures supported by the +//! [`ucd-generate`] tool; +//! - [`roaring`] is a Rust implementation of Roaring bitmap. +//! +//! The *static storage* column shows the total size of `static` tables that the +//! crate bakes into your binary, measured in 1000s of bytes. +//! +//! The remaining columns show the **cost per call** to evaluate whether a +//! single `char` has the XID\_Start or XID\_Continue Unicode property, +//! comparing across different ratios of ASCII to non-ASCII codepoints in the +//! input data. +//! +//! [`unicode-xid`]: https://github.com/unicode-rs/unicode-xid +//! [`ucd-generate`]: https://github.com/BurntSushi/ucd-generate +//! [`roaring`]: https://github.com/RoaringBitmap/roaring-rs +//! +//! | | static storage | 0% nonascii | 1% | 10% | 100% nonascii | +//! |---|---|---|---|---|---| +//! | **`unicode-ident`** | 10.1 K | 0.96 ns | 0.95 ns | 1.09 ns | 1.55 ns | +//! | **`unicode-xid`** | 11.5 K | 1.88 ns | 2.14 ns | 3.48 ns | 15.63 ns | +//! | **`ucd-trie`** | 10.2 K | 1.29 ns | 1.28 ns | 1.36 ns | 2.15 ns | +//! | **`fst`** | 139 K | 55.1 ns | 54.9 ns | 53.2 ns | 28.5 ns | +//! | **`roaring`** | 66.1 K | 2.78 ns | 3.09 ns | 3.37 ns | 4.70 ns | +//! +//! Source code for the benchmark is provided in the *bench* directory of this +//! repo and may be repeated by running `cargo criterion`. +//! +//! <br> +//! +//! ## Comparison of data structures +//! +//! #### unicode-xid +//! +//! They use a sorted array of character ranges, and do a binary search to look +//! up whether a given character lands inside one of those ranges. +//! +//! ```rust +//! # const _: &str = stringify! { +//! static XID_Continue_table: [(char, char); 763] = [ +//! ('\u{30}', '\u{39}'), // 0-9 +//! ('\u{41}', '\u{5a}'), // A-Z +//! # " +//! … +//! # " +//! ('\u{e0100}', '\u{e01ef}'), +//! ]; +//! # }; +//! ``` +//! +//! The static storage used by this data structure scales with the number of +//! contiguous ranges of identifier codepoints in Unicode. Every table entry +//! consumes 8 bytes, because it consists of a pair of 32-bit `char` values. +//! +//! In some ranges of the Unicode codepoint space, this is quite a sparse +//! representation – there are some ranges where tens of thousands of +//! adjacent codepoints are all valid identifier characters. In other places, +//! the representation is quite inefficient. A characater like `µ` (U+00B5) +//! which is surrounded by non-identifier codepoints consumes 64 bits in the +//! table, while it would be just 1 bit in a dense bitmap. +//! +//! On a system with 64-byte cache lines, binary searching the table touches 7 +//! cache lines on average. Each cache line fits only 8 table entries. +//! Additionally, the branching performed during the binary search is probably +//! mostly unpredictable to the branch predictor. +//! +//! Overall, the crate ends up being about 10× slower on non-ASCII input +//! compared to the fastest crate. +//! +//! A potential improvement would be to pack the table entries more compactly. +//! Rust's `char` type is a 21-bit integer padded to 32 bits, which means every +//! table entry is holding 22 bits of wasted space, adding up to 3.9 K. They +//! could instead fit every table entry into 6 bytes, leaving out some of the +//! padding, for a 25% improvement in space used. With some cleverness it may be +//! possible to fit in 5 bytes or even 4 bytes by storing a low char and an +//! extent, instead of low char and high char. I don't expect that performance +//! would improve much but this could be the most efficient for space across all +//! the libraries, needing only about 7 K to store. +//! +//! #### ucd-trie +//! +//! Their data structure is a compressed trie set specifically tailored for +//! Unicode codepoints. The design is credited to Raph Levien in +//! [rust-lang/rust#33098]. +//! +//! [rust-lang/rust#33098]: https://github.com/rust-lang/rust/pull/33098 +//! +//! ```rust +//! pub struct TrieSet { +//! tree1_level1: &'static [u64; 32], +//! tree2_level1: &'static [u8; 992], +//! tree2_level2: &'static [u64], +//! tree3_level1: &'static [u8; 256], +//! tree3_level2: &'static [u8], +//! tree3_level3: &'static [u64], +//! } +//! ``` +//! +//! It represents codepoint sets using a trie to achieve prefix compression. The +//! final states of the trie are embedded in leaves or "chunks", where each +//! chunk is a 64-bit integer. Each bit position of the integer corresponds to +//! whether a particular codepoint is in the set or not. These chunks are not +//! just a compact representation of the final states of the trie, but are also +//! a form of suffix compression. In particular, if multiple ranges of 64 +//! contiguous codepoints have the same Unicode properties, then they all map to +//! the same chunk in the final level of the trie. +//! +//! Being tailored for Unicode codepoints, this trie is partitioned into three +//! disjoint sets: tree1, tree2, tree3. The first set corresponds to codepoints +//! \[0, 0x800), the second \[0x800, 0x10000) and the third \[0x10000, +//! 0x110000). These partitions conveniently correspond to the space of 1 or 2 +//! byte UTF-8 encoded codepoints, 3 byte UTF-8 encoded codepoints and 4 byte +//! UTF-8 encoded codepoints, respectively. +//! +//! Lookups in this data structure are significantly more efficient than binary +//! search. A lookup touches either 1, 2, or 3 cache lines based on which of the +//! trie partitions is being accessed. +//! +//! One possible performance improvement would be for this crate to expose a way +//! to query based on a UTF-8 encoded string, returning the Unicode property +//! corresponding to the first character in the string. Without such an API, the +//! caller is required to tokenize their UTF-8 encoded input data into `char`, +//! hand the `char` into `ucd-trie`, only for `ucd-trie` to undo that work by +//! converting back into the variable-length representation for trie traversal. +//! +//! #### fst +//! +//! Uses a [finite state transducer][fst]. This representation is built into +//! [ucd-generate] but I am not aware of any advantage over the `ucd-trie` +//! representation. In particular `ucd-trie` is optimized for storing Unicode +//! properties while `fst` is not. +//! +//! [fst]: https://github.com/BurntSushi/fst +//! [ucd-generate]: https://github.com/BurntSushi/ucd-generate +//! +//! As far as I can tell, the main thing that causes `fst` to have large size +//! and slow lookups for this use case relative to `ucd-trie` is that it does +//! not specialize for the fact that only 21 of the 32 bits in a `char` are +//! meaningful. There are some dense arrays in the structure with large ranges +//! that could never possibly be used. +//! +//! #### roaring +//! +//! This crate is a pure-Rust implementation of [Roaring Bitmap], a data +//! structure designed for storing sets of 32-bit unsigned integers. +//! +//! [Roaring Bitmap]: https://roaringbitmap.org/about/ +//! +//! Roaring bitmaps are compressed bitmaps which tend to outperform conventional +//! compressed bitmaps such as WAH, EWAH or Concise. In some instances, they can +//! be hundreds of times faster and they often offer significantly better +//! compression. +//! +//! In this use case the performance was reasonably competitive but still +//! substantially slower than the Unicode-optimized crates. Meanwhile the +//! compression was significantly worse, requiring 6× as much storage for +//! the data structure. +//! +//! I also benchmarked the [`croaring`] crate which is an FFI wrapper around the +//! C reference implementation of Roaring Bitmap. This crate was consistently +//! about 15% slower than pure-Rust `roaring`, which could just be FFI overhead. +//! I did not investigate further. +//! +//! [`croaring`]: https://crates.io/crates/croaring +//! +//! #### unicode-ident +//! +//! This crate is most similar to the `ucd-trie` library, in that it's based on +//! bitmaps stored in the leafs of a trie representation, achieving both prefix +//! compression and suffix compression. +//! +//! The key differences are: +//! +//! - Uses a single 2-level trie, rather than 3 disjoint partitions of different +//! depth each. +//! - Uses significantly larger chunks: 512 bits rather than 64 bits. +//! - Compresses the XID\_Start and XID\_Continue properties together +//! simultaneously, rather than duplicating identical trie leaf chunks across +//! the two. +//! +//! The following diagram show the XID\_Start and XID\_Continue Unicode boolean +//! properties in uncompressed form, in row-major order: +//! +//! <table> +//! <tr><th>XID_Start</th><th>XID_Continue</th></tr> +//! <tr> +//! <td><img alt="XID_Start bitmap" width="256" src="https://user-images.githubusercontent.com/1940490/168647353-c6eeb922-afec-49b2-9ef5-c03e9d1e0760.png"></td> +//! <td><img alt="XID_Continue bitmap" width="256" src="https://user-images.githubusercontent.com/1940490/168647367-f447cca7-2362-4d7d-8cd7-d21c011d329b.png"></td> +//! </tr> +//! </table> +//! +//! Uncompressed, these would take 140 K to store, which is beyond what would be +//! reasonable. However, as you can see there is a large degree of similarity +//! between the two bitmaps and across the rows, which lends well to +//! compression. +//! +//! This crate stores one 512-bit "row" of the above bitmaps in the leaf level +//! of a trie, and a single additional level to index into the leafs. It turns +//! out there are 124 unique 512-bit chunks across the two bitmaps so 7 bits are +//! sufficient to index them. +//! +//! The chunk size of 512 bits is selected as the size that minimizes the total +//! size of the data structure. A smaller chunk, like 256 or 128 bits, would +//! achieve better deduplication but require a larger index. A larger chunk +//! would increase redundancy in the leaf bitmaps. 512 bit chunks are the +//! optimum for total size of the index plus leaf bitmaps. +//! +//! In fact since there are only 124 unique chunks, we can use an 8-bit index +//! with a spare bit to index at the half-chunk level. This achieves an +//! additional 8.5% compression by eliminating redundancies between the second +//! half of any chunk and the first half of any other chunk. Note that this is +//! not the same as using chunks which are half the size, because it does not +//! necessitate raising the size of the trie's first level. +//! +//! In contrast to binary search or the `ucd-trie` crate, performing lookups in +//! this data structure is straight-line code with no need for branching. + +#![no_std] +#![doc(html_root_url = "https://docs.rs/unicode-ident/1.0.12")] +#![allow(clippy::doc_markdown, clippy::must_use_candidate)] + +#[rustfmt::skip] +mod tables; + +use crate::tables::{ASCII_CONTINUE, ASCII_START, CHUNK, LEAF, TRIE_CONTINUE, TRIE_START}; + +pub fn is_xid_start(ch: char) -> bool { + if ch.is_ascii() { + return ASCII_START.0[ch as usize]; + } + let chunk = *TRIE_START.0.get(ch as usize / 8 / CHUNK).unwrap_or(&0); + let offset = chunk as usize * CHUNK / 2 + ch as usize / 8 % CHUNK; + unsafe { LEAF.0.get_unchecked(offset) }.wrapping_shr(ch as u32 % 8) & 1 != 0 +} + +pub fn is_xid_continue(ch: char) -> bool { + if ch.is_ascii() { + return ASCII_CONTINUE.0[ch as usize]; + } + let chunk = *TRIE_CONTINUE.0.get(ch as usize / 8 / CHUNK).unwrap_or(&0); + let offset = chunk as usize * CHUNK / 2 + ch as usize / 8 % CHUNK; + unsafe { LEAF.0.get_unchecked(offset) }.wrapping_shr(ch as u32 % 8) & 1 != 0 +} diff --git a/vendor/unicode-ident/src/tables.rs b/vendor/unicode-ident/src/tables.rs new file mode 100644 index 0000000..bb691b5 --- /dev/null +++ b/vendor/unicode-ident/src/tables.rs @@ -0,0 +1,651 @@ +// @generated by ../generate. To regenerate, run the following in the repo root: +// +// $ curl -LO https://www.unicode.org/Public/zipped/15.0.0/UCD.zip +// $ unzip UCD.zip -d UCD +// $ cargo run --manifest-path generate/Cargo.toml + +const T: bool = true; +const F: bool = false; + +#[repr(C, align(8))] +pub(crate) struct Align8<T>(pub(crate) T); +#[repr(C, align(64))] +pub(crate) struct Align64<T>(pub(crate) T); + +pub(crate) static ASCII_START: Align64<[bool; 128]> = Align64([ + F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F, + F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F, +]); + +pub(crate) static ASCII_CONTINUE: Align64<[bool; 128]> = Align64([ + F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F, F, + F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, T, + F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F, +]); + +pub(crate) const CHUNK: usize = 64; + +pub(crate) static TRIE_START: Align8<[u8; 402]> = Align8([ + 0x04, 0x0B, 0x0F, 0x13, 0x17, 0x1B, 0x1F, 0x23, 0x27, 0x2D, 0x31, 0x34, 0x38, 0x3C, 0x40, 0x02, + 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x00, 0x4D, 0x00, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x51, 0x54, 0x58, 0x5C, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x09, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x60, 0x64, 0x66, + 0x6A, 0x6E, 0x72, 0x28, 0x76, 0x78, 0x7C, 0x80, 0x84, 0x88, 0x8C, 0x90, 0x94, 0x98, 0x9E, 0xA2, + 0x05, 0x2B, 0xA6, 0x00, 0x00, 0x00, 0x00, 0x99, 0x05, 0x05, 0xA8, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x05, 0xAE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0xB1, 0x00, 0xB5, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x32, 0x05, 0x05, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9C, 0x43, 0xBB, 0x00, 0x00, 0x00, 0x00, 0xBE, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC6, 0xC8, 0x00, 0x00, 0x00, 0xAF, + 0xCE, 0xD2, 0xD6, 0xBC, 0xDA, 0x00, 0x00, 0xDE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0xE0, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x52, 0xE3, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE6, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0xE1, 0x05, 0xE9, 0x00, 0x00, 0x00, 0x00, 0x05, 0xEB, 0x00, 0x00, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE4, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0xE7, +]); + +pub(crate) static TRIE_CONTINUE: Align8<[u8; 1793]> = Align8([ + 0x08, 0x0D, 0x11, 0x15, 0x19, 0x1D, 0x21, 0x25, 0x2A, 0x2F, 0x31, 0x36, 0x3A, 0x3E, 0x42, 0x02, + 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x4F, 0x00, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x51, 0x56, 0x5A, 0x5E, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x09, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x62, 0x64, 0x68, + 0x6C, 0x70, 0x74, 0x28, 0x76, 0x7A, 0x7E, 0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9B, 0xA0, 0xA4, + 0x05, 0x2B, 0xA6, 0x00, 0x00, 0x00, 0x00, 0x99, 0x05, 0x05, 0xAB, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x05, 0xAE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0xB3, 0x00, 0xB7, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x32, 0x05, 0x05, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9C, 0x43, 0xBB, 0x00, 0x00, 0x00, 0x00, 0xC1, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA9, 0xAC, 0xC4, 0xC6, 0xCA, 0x00, 0xCC, 0x00, 0xAF, + 0xD0, 0xD4, 0xD8, 0xBC, 0xDC, 0x00, 0x00, 0xDE, 0x00, 0x00, 0x00, 0x00, 0x00, 0xBF, 0x00, 0x00, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0xE0, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x52, 0xE3, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE6, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0xE1, 0x05, 0xE9, 0x00, 0x00, 0x00, 0x00, 0x05, 0xEB, 0x00, 0x00, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE4, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0xE7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC2, +]); + +pub(crate) static LEAF: Align64<[u8; 7584]> = Align64([ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x3F, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x3F, 0xFF, 0xAA, 0xFF, 0xFF, 0xFF, 0x3F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0x5F, 0xDC, 0x1F, 0xCF, 0x0F, 0xFF, 0x1F, 0xDC, 0x1F, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x20, 0x04, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xA0, 0x04, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x7F, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC3, 0xFF, 0x03, 0x00, 0x1F, 0x50, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xDF, 0xB8, + 0x40, 0xD7, 0xFF, 0xFF, 0xFB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC3, 0xFF, 0x03, 0x00, 0x1F, 0x50, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0xB8, + 0xC0, 0xD7, 0xFF, 0xFF, 0xFB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x03, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0x7F, 0x02, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x87, 0x07, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFB, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0x7F, 0x02, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x01, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0xB6, 0x00, 0xFF, 0xFF, 0xFF, 0x87, 0x07, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x00, 0xC0, 0xFE, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2F, 0x00, 0x60, 0xC0, 0x00, 0x9C, + 0x00, 0x00, 0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x02, 0x00, 0x00, 0xFC, 0xFF, 0xFF, 0xFF, 0x07, 0x30, 0x04, + 0x00, 0x00, 0xFF, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC3, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xEF, 0x9F, 0xFF, 0xFD, 0xFF, 0x9F, + 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x24, + 0xFF, 0xFF, 0x3F, 0x04, 0x10, 0x01, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x01, 0xFF, 0x07, 0xFF, 0xFF, + 0xFF, 0x7E, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x23, 0x00, 0x00, 0x01, 0xFF, 0x03, 0x00, 0xFE, 0xFF, + 0xE1, 0x9F, 0xF9, 0xFF, 0xFF, 0xFD, 0xC5, 0x23, 0x00, 0x40, 0x00, 0xB0, 0x03, 0x00, 0x03, 0x10, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0x07, 0xFF, 0xFF, + 0xFF, 0x7E, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFB, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xCF, 0xFF, 0xFE, 0xFF, + 0xEF, 0x9F, 0xF9, 0xFF, 0xFF, 0xFD, 0xC5, 0xF3, 0x9F, 0x79, 0x80, 0xB0, 0xCF, 0xFF, 0x03, 0x50, + 0xE0, 0x87, 0xF9, 0xFF, 0xFF, 0xFD, 0x6D, 0x03, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x1C, 0x00, + 0xE0, 0xBF, 0xFB, 0xFF, 0xFF, 0xFD, 0xED, 0x23, 0x00, 0x00, 0x01, 0x00, 0x03, 0x00, 0x00, 0x02, + 0xE0, 0x9F, 0xF9, 0xFF, 0xFF, 0xFD, 0xED, 0x23, 0x00, 0x00, 0x00, 0xB0, 0x03, 0x00, 0x02, 0x00, + 0xE8, 0xC7, 0x3D, 0xD6, 0x18, 0xC7, 0xFF, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEE, 0x87, 0xF9, 0xFF, 0xFF, 0xFD, 0x6D, 0xD3, 0x87, 0x39, 0x02, 0x5E, 0xC0, 0xFF, 0x3F, 0x00, + 0xEE, 0xBF, 0xFB, 0xFF, 0xFF, 0xFD, 0xED, 0xF3, 0xBF, 0x3B, 0x01, 0x00, 0xCF, 0xFF, 0x00, 0xFE, + 0xEE, 0x9F, 0xF9, 0xFF, 0xFF, 0xFD, 0xED, 0xF3, 0x9F, 0x39, 0xE0, 0xB0, 0xCF, 0xFF, 0x02, 0x00, + 0xEC, 0xC7, 0x3D, 0xD6, 0x18, 0xC7, 0xFF, 0xC3, 0xC7, 0x3D, 0x81, 0x00, 0xC0, 0xFF, 0x00, 0x00, + 0xE0, 0xDF, 0xFD, 0xFF, 0xFF, 0xFD, 0xFF, 0x23, 0x00, 0x00, 0x00, 0x27, 0x03, 0x00, 0x00, 0x00, + 0xE1, 0xDF, 0xFD, 0xFF, 0xFF, 0xFD, 0xEF, 0x23, 0x00, 0x00, 0x00, 0x60, 0x03, 0x00, 0x06, 0x00, + 0xF0, 0xDF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0x27, 0x00, 0x40, 0x70, 0x80, 0x03, 0x00, 0x00, 0xFC, + 0xE0, 0xFF, 0x7F, 0xFC, 0xFF, 0xFF, 0xFB, 0x2F, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xDF, 0xFD, 0xFF, 0xFF, 0xFD, 0xFF, 0xF3, 0xDF, 0x3D, 0x60, 0x27, 0xCF, 0xFF, 0x00, 0x00, + 0xEF, 0xDF, 0xFD, 0xFF, 0xFF, 0xFD, 0xEF, 0xF3, 0xDF, 0x3D, 0x60, 0x60, 0xCF, 0xFF, 0x0E, 0x00, + 0xFF, 0xDF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0x7D, 0xF0, 0x80, 0xCF, 0xFF, 0x00, 0xFC, + 0xEE, 0xFF, 0x7F, 0xFC, 0xFF, 0xFF, 0xFB, 0x2F, 0x7F, 0x84, 0x5F, 0xFF, 0xC0, 0xFF, 0x0C, 0x00, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x05, 0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xD6, 0xF7, 0xFF, 0xFF, 0xAF, 0xFF, 0x05, 0x20, 0x5F, 0x00, 0x00, 0xF0, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0x1F, 0x00, 0x00, + 0x00, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0x7F, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0xD6, 0xF7, 0xFF, 0xFF, 0xAF, 0xFF, 0xFF, 0x3F, 0x5F, 0x7F, 0xFF, 0xF3, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x03, 0xFF, 0x03, 0xA0, 0xC2, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0x1F, 0xFE, 0xFF, + 0xDF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0x1F, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x80, 0x00, 0x00, 0x3F, 0x3C, 0x62, 0xC0, 0xE1, 0xFF, + 0x03, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0x20, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF7, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0x00, 0x00, 0x00, + 0xBF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0x20, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF7, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3D, 0x7F, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0x3D, 0x7F, 0x3D, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x3F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3D, 0x7F, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0x3D, 0x7F, 0x3D, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE7, 0x00, 0xFE, 0x03, 0x00, + 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x3F, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x9F, 0xFF, 0xFF, + 0xFE, 0xFF, 0xFF, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC7, 0xFF, 0x01, + 0xFF, 0xFF, 0x03, 0x80, 0xFF, 0xFF, 0x03, 0x00, 0xFF, 0xFF, 0x03, 0x00, 0xFF, 0xDF, 0x01, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x80, 0x10, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x9F, 0xFF, 0xFF, + 0xFE, 0xFF, 0xFF, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC7, 0xFF, 0x01, + 0xFF, 0xFF, 0x3F, 0x80, 0xFF, 0xFF, 0x1F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xDF, 0x0D, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8F, 0x30, 0xFF, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x05, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, + 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x3F, 0x1F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xB8, 0xFF, 0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, + 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0x0F, 0xFF, 0x0F, 0xC0, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x1F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0xFF, 0xFF, 0x03, 0xFF, 0x07, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0x7F, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0xE0, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF8, 0xFF, 0xFF, 0xFF, 0x01, 0xC0, 0x00, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x9F, + 0xFF, 0x03, 0xFF, 0x03, 0x80, 0x00, 0xFF, 0xBF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0xFF, 0x03, 0x00, 0xF8, 0x0F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x00, 0xFC, 0xFF, 0xFF, 0xFF, 0x3F, + 0xFF, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE7, 0x00, 0x00, 0x00, 0x00, 0x00, 0xDE, 0x6F, 0x04, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xE3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, + 0xFF, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE7, 0x00, 0x00, 0xF7, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x27, 0x00, 0xF0, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80, + 0x00, 0x00, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x84, 0xFC, 0x2F, 0x3F, 0x50, 0xFD, 0xFF, 0xF3, 0xE0, 0x43, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x80, + 0x00, 0x00, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x1F, 0xE2, 0xFF, 0x01, 0x00, + 0x84, 0xFC, 0x2F, 0x3F, 0x50, 0xFD, 0xFF, 0xF3, 0xE0, 0x43, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x78, 0x0C, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0x20, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x00, 0x00, + 0xFF, 0xFF, 0x7F, 0x00, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0xF8, 0x0F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0x20, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x00, 0x80, + 0xFF, 0xFF, 0x7F, 0x00, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, + 0xE0, 0x00, 0x00, 0x00, 0xFE, 0x03, 0x3E, 0x1F, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x7F, 0xE0, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF7, + 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0xE0, 0x00, 0x00, 0x00, 0xFE, 0xFF, 0x3E, 0x1F, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x7F, 0xE6, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x1F, 0xFF, 0xFF, 0x00, 0x0C, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x80, + 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, + 0x00, 0x00, 0x80, 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xEB, 0x03, 0x00, 0x00, 0xFC, 0xFF, + 0xFF, 0x1F, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0, 0xBF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, + 0x00, 0x00, 0x80, 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xEB, 0x03, 0x00, 0x00, 0xFC, 0xFF, + 0xBB, 0xF7, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFC, 0x68, + 0x00, 0xFC, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x1F, + 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x80, 0x00, 0x00, 0xDF, 0xFF, 0x00, 0x7C, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x10, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0x03, 0xFF, 0xFF, 0xFF, 0xE8, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0xFF, 0x1F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x80, 0xFF, 0x03, 0xFF, 0xFF, 0xFF, 0x7F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0xF7, 0x0F, 0x00, 0x00, 0xFF, 0xFF, 0x7F, 0xC4, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x62, 0x3E, 0x05, 0x00, 0x00, 0x38, 0xFF, 0x07, 0x1C, 0x00, + 0x7E, 0x7E, 0x7E, 0x00, 0x7F, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF7, 0xFF, 0x03, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0xFF, 0x3F, 0xFF, 0x03, 0xFF, 0xFF, 0x7F, 0xFC, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x38, 0xFF, 0xFF, 0x7C, 0x00, + 0x7E, 0x7E, 0x7E, 0x00, 0x7F, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF7, 0xFF, 0x03, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x37, 0xFF, 0x03, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0xF8, 0xA0, 0xFF, 0xFD, 0x7F, 0x5F, 0xDB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0xF8, 0xE0, 0xFF, 0xFD, 0x7F, 0x5F, 0xDB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xF0, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0xAA, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, + 0x00, 0x00, 0x00, 0x00, 0xFE, 0xFF, 0xFF, 0x07, 0xFE, 0xFF, 0xFF, 0x07, 0xC0, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFC, 0xFC, 0xFC, 0x1C, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0x18, 0x00, 0x00, 0xE0, 0x00, 0x00, 0x00, 0x00, 0x8A, 0xAA, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, + 0x00, 0x00, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07, 0xE0, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFC, 0xFC, 0xFC, 0x1C, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xEF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xB7, 0xFF, 0x3F, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xEF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xB7, 0xFF, 0x3F, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, + 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0x3E, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, + 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0x3E, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xF7, + 0xFF, 0xF7, 0xB7, 0xFF, 0xFB, 0xFF, 0xFB, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xF7, + 0xFF, 0xF7, 0xB7, 0xFF, 0xFB, 0xFF, 0xFB, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3F, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0x91, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0xFF, 0x7F, 0x00, + 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x37, 0x00, + 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0xEF, 0xFE, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x1F, + 0xFF, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFE, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0xFF, 0x07, 0x00, + 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x6F, 0xF0, 0xEF, 0xFE, 0xFF, 0xFF, 0x3F, 0x87, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x1F, + 0xFF, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFE, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0xFF, 0xFF, 0x07, 0x00, + 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x1F, 0x80, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x7F, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1B, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, + 0xFF, 0xFF, 0xFF, 0x1F, 0x80, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x7F, 0x00, + 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x00, + 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, + 0xF8, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x90, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x47, 0x00, + 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x1E, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0xC0, 0xFF, 0x3F, 0x80, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x04, 0x00, 0xFF, 0xFF, 0xFF, 0x01, 0xFF, 0x03, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0xFF, 0xF0, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x4F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0xDE, 0xFF, 0x17, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFB, 0xFF, 0xFF, 0x0F, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7F, 0xBD, 0xFF, 0xBF, 0xFF, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, + 0xE0, 0x9F, 0xF9, 0xFF, 0xFF, 0xFD, 0xED, 0x23, 0x00, 0x00, 0x01, 0xE0, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFB, 0xFF, 0xFF, 0xFF, 0xFF, 0xC0, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7F, 0xBD, 0xFF, 0xBF, 0xFF, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0x03, + 0xEF, 0x9F, 0xF9, 0xFF, 0xFF, 0xFD, 0xED, 0xFB, 0x9F, 0x39, 0x81, 0xE0, 0xCF, 0x1F, 0x1F, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x00, 0x80, 0x07, 0x00, 0x80, 0x03, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0xC3, 0x03, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBF, 0x00, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0x01, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x11, 0x00, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xE7, 0xFF, 0x0F, 0xFF, 0x03, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x80, + 0x7F, 0xF2, 0x6F, 0xFF, 0xFF, 0xFF, 0x00, 0x80, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0x80, + 0x7F, 0xF2, 0x6F, 0xFF, 0xFF, 0xFF, 0xBF, 0xF9, 0x0F, 0x00, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC, 0x1B, 0x00, 0x00, 0x00, + 0x01, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x04, 0x00, 0x00, 0x01, 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x03, 0x00, 0x20, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x80, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x23, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xEF, 0x6F, + 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFC, 0xFF, + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7F, 0xFB, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x40, 0x00, 0x00, 0x00, 0xBF, 0xFD, 0xFF, 0xFF, + 0xFF, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0x01, 0x00, 0xFF, 0x03, 0x00, 0x00, 0xFC, 0xFF, + 0xFF, 0xFF, 0xFC, 0xFF, 0xFF, 0xFE, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7F, 0xFB, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xB4, 0xFF, 0x00, 0xFF, 0x03, 0xBF, 0xFD, 0xFF, 0xFF, + 0xFF, 0x7F, 0xFB, 0x01, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x07, 0x00, + 0xF4, 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x7F, 0x00, + 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xC7, 0x07, 0x00, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0xE3, 0x07, 0xF8, + 0xE7, 0x0F, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x7F, 0xE0, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0xE0, + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0x03, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0x03, 0xFF, 0xFF, 0xFF, 0x3F, 0x1F, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x0F, 0x00, 0xFF, 0x03, 0xF8, 0xFF, 0xFF, 0xE0, + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xF8, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x87, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x80, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F, 0x6F, 0xFF, 0x7F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0x1F, + 0xFF, 0x01, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0x1F, + 0xFF, 0x01, 0xFF, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xDF, 0x64, 0xDE, 0xFF, 0xEB, 0xEF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xBF, 0xE7, 0xDF, 0xDF, 0xFF, 0xFF, 0xFF, 0x7B, 0x5F, 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xF7, 0xFF, 0xFF, 0xFF, 0xF7, + 0xFF, 0xFF, 0xDF, 0xFF, 0xFF, 0xFF, 0xDF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, + 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xF7, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xF7, 0xFF, 0xFF, 0xFF, 0xF7, + 0xFF, 0xFF, 0xDF, 0xFF, 0xFF, 0xFF, 0xDF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, + 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xF7, 0xCF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x20, 0x00, + 0x10, 0x00, 0x00, 0xF8, 0xFE, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x80, 0x3F, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7F, 0xFF, 0xFF, 0xF9, 0xDB, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, + 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0xFF, 0x3F, 0xFF, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEF, 0xFF, 0xFF, 0xFF, 0x96, 0xFE, 0xF7, 0x0A, 0x84, 0xEA, 0x96, 0xAA, 0x96, 0xF7, 0xF7, 0x5E, + 0xFF, 0xFB, 0xFF, 0x0F, 0xEE, 0xFB, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]); diff --git a/vendor/unicode-ident/tests/compare.rs b/vendor/unicode-ident/tests/compare.rs new file mode 100644 index 0000000..14ee22f --- /dev/null +++ b/vendor/unicode-ident/tests/compare.rs @@ -0,0 +1,67 @@ +mod fst; +mod roaring; +mod trie; + +#[test] +fn compare_all_implementations() { + let xid_start_fst = fst::xid_start_fst(); + let xid_continue_fst = fst::xid_continue_fst(); + let xid_start_roaring = roaring::xid_start_bitmap(); + let xid_continue_roaring = roaring::xid_continue_bitmap(); + + for ch in '\0'..=char::MAX { + let thought_to_be_start = unicode_ident::is_xid_start(ch); + let thought_to_be_continue = unicode_ident::is_xid_continue(ch); + + // unicode-xid + // FIXME: unicode-xid does not support Unicode 15.1.0 yet. + /* + assert_eq!( + thought_to_be_start, + unicode_xid::UnicodeXID::is_xid_start(ch), + "{ch:?}", + ); + assert_eq!( + thought_to_be_continue, + unicode_xid::UnicodeXID::is_xid_continue(ch), + "{ch:?}", + ); + */ + + // ucd-trie + assert_eq!( + thought_to_be_start, + trie::XID_START.contains_char(ch), + "{ch:?}", + ); + assert_eq!( + thought_to_be_continue, + trie::XID_CONTINUE.contains_char(ch), + "{ch:?}", + ); + + // fst + assert_eq!( + thought_to_be_start, + xid_start_fst.contains((ch as u32).to_be_bytes()), + "{ch:?}", + ); + assert_eq!( + thought_to_be_continue, + xid_continue_fst.contains((ch as u32).to_be_bytes()), + "{ch:?}", + ); + + // roaring + assert_eq!( + thought_to_be_start, + xid_start_roaring.contains(ch as u32), + "{ch:?}", + ); + assert_eq!( + thought_to_be_continue, + xid_continue_roaring.contains(ch as u32), + "{ch:?}", + ); + } +} diff --git a/vendor/unicode-ident/tests/fst/mod.rs b/vendor/unicode-ident/tests/fst/mod.rs new file mode 100644 index 0000000..5195efb --- /dev/null +++ b/vendor/unicode-ident/tests/fst/mod.rs @@ -0,0 +1,11 @@ +#![allow(clippy::module_name_repetitions)] + +pub fn xid_start_fst() -> fst::Set<&'static [u8]> { + let data = include_bytes!("xid_start.fst"); + fst::Set::from(fst::raw::Fst::new(data.as_slice()).unwrap()) +} + +pub fn xid_continue_fst() -> fst::Set<&'static [u8]> { + let data = include_bytes!("xid_continue.fst"); + fst::Set::from(fst::raw::Fst::new(data.as_slice()).unwrap()) +} diff --git a/vendor/unicode-ident/tests/fst/xid_continue.fst b/vendor/unicode-ident/tests/fst/xid_continue.fst new file mode 100644 index 0000000000000000000000000000000000000000..3024c8189a9f5a1feeeca3cb0957fa21ac08d9f9 GIT binary patch literal 73249 zcmeHQ3E)*@7XO#6qWWxOnJ6tvidLk|zl~%qN<t}W6dKC3X;73I*|Ud`HA_m88L2SV zv1H3Kn36#wm2EPVGw0iv``zum``-V(^x}W_@7{O6^Uim^^E>Bvwx}qv5B#!m!_Pnc z@5lfC@cnn+ezR`vny*)X^`BK=e)0LgKU?|fisk?M<l~S2`QZoeFMIc$x8Hhm=^L-V z_Ub=gdHJOmUwHoS&prFhlD{pO|KR!K$Bi8``j6+08hP$He;9H0S!WI(cE<0A4mtg_ z!Ka?o|HOVL^zC!}alLyTd(6>C9eG60!+RWdX!mYiyL33{zysR1Yul!ED@ggNs8)T8 zEw|cwn;JF6g|)Wbu6CWeWcwX<+-c|E?6T`_yZ^S{9(&eruvf!I^n}LHWbdZU_G!Lv zi<bNC->P++w(Z&<aNt254({0Lkl&@ebEQ7NGjsvy%C@_8KlHF3hxa_<$fJ%v=Gb1n zk2}6k-xK<s*dI<h`IG?zPaQP)w9|(S{rwrkhM#%X*(3gN&bcE;o%hGlW5$ky@#jzY z(*+k^G?7dyF^MSt<474?A0veCg-GGzMNcn$>TiF2a={bxAD{Qw+&PcVe&peY9-Q^S z{rBB_&)qZcy7P`1x8HW_^jmJe>Bbwbzi!&K*Ia$ol~-Io^|HVG`O+zqFS&Tq#EUMx z;7=36#dy%E0|%UPa=0vauKe90ojM+@MSB1JTDI7?`995>?%kwuqlSAmsK4hP^?tki zZoBUCo1J&safj_m-8!|m+qPEC8ry8W)s|aSuU4T&I$V|;hl_D#=Pq4Cq%cMZr3_BG z_>##}F8%XgE}MG!6<1z$^)=T{yYBiMZoKK{Tc+Q7+wC*%xbv==ci(gGefK{w>%oT} zeq{EebLKua@A3IhEO_#-e|u`-(~A}_dFI*Y{{H+6FTV8hEB|=)wb$QR`sQ12zw_?0 z_ul{D!+(DC@hAUUzT(rBpZ)vuFTPy$pRZPby=LvYZ@&HR`yc+h{>T6Q^z(*|zW~8k zuql4ltY$jYRGZH%*(|J8Ev!^+n*yuVI$dMpD|WpN@7M=%i@dujifZ(CQVKfSNXSV) z`_%IlOgUeKT6ydkpJB0SYGfEa2IXt)xbbj4mpW!3z(nw=XDEPv2Bo0MH_-yIsUKAW zUsQ{P*(00Hk%^Ol*&_f|W08=Jo1l2pxt(WTCsp=~&;R||XBMY2qZ6CY{ryuw*K97d z)eO$;FT`vHIDu<loWvzFpZVzk6McF*OijZhF#A!MGZ!9{mq(b?^b~5B$Wz&pytN66 zsq>Y9`6ldnq4rHM&jd=a&ohy|nXTL!d0QXGRM6^C+%Tqs>`<t0BlUccHpe^@$%HzU zFxzuNkds0HJuL*!ybxeHS>g2frn9;6r`)XNqWCYgC(c#Ft>W}BlS|`|A+l+u6@a?o zLKHFeqGcni#>3)jZr%*q#xj8Sa02-LrX!<)ZxU(vYIm?S8(e>IQmxA!aAE#8?Li(C z<#sEilg$<xx)tyq8)8e38xCN)8LDf6dx6<G^bdCjXW@y1NLL*BFwoiMzvGt;CS<_? zi;q8gkAf60y~qFxDv;DM{FrbG`+dUKTrK^<9(S+?H4xIsd+aX-Uoe1%B*4HVq!22H z1Hffd0sKB;5Kduu0EL)3h5?ir0Pw^D0O}r=V!n{BJiQ2*uNc7e7RyrrmcpA*MpmuR zl^Qam^0yRa5AiV$k4fp9KQCJT5Ry=c1WKR_rVXU%LvO^-4)4*m3y#;FIvjLB`?hVw zNbQFiNL>yvAihA2O5m6oeb+T>0r&@MK+KHVmC$S-XudDBXo*@67-&YXYu6qCp)*|T zKsWJ2K{SvknxPJvD&(iIzX*$g#Klem#Kpi5i~&Q&#-9%p{sa^nn*`huz}x_U$OIrX z0Wi&Inyyl;0zjh`>adVd@<8b+IxRHsQHgm45mkg!7cVkFEJU#Gx%-Y8w@tr=!d8e{ zamebDi$^*tX}~FnvNG|Y&?+<N7#I~RA`*qNg@g+TfFD%_CL&Toa8v|Ps45i+0FA7o zOwi8g+6@>;3ZP+9I9_#=Sn1OjxXFjx2%Q4OPI2}0Autqve+I4%8;*YvM#aAtl8W0* zbKw$&hAA(ADY%87OZh_n)ea@OM=r^=r6(jwo(MsEElX4Zee&_LcZEb*BOvM^IkWH& zf>grHVbB(`0f>JxAv*#gd|S*IjL&BceqFQ!BA!yzf`BKLGw{LpsEWXa?g1otUkM0% z0>wQ=7xp-uiWk6902GM_P+0s)vXkYKLi0?%C^ioW&Qq~6WhyWd;yB2LK>!7g0-~cp z;Zf1*op<q-nRf#PO93=o3W%4=d($PEpQLL`_?+*))k3F2M0kYAD<U-HX@$s3v2y|W zAR3QjAIIWt**ld6k+Ap?Q}&W0?m3H}D}74#gy;c_{9rvx!#_k9vnUV%9tT2auwp+h z__#;SFDiA8(BW|@cdjQ96e17P0R$+I=g*`~b8H@Ffkfx&PM|U;FnWN6PxF2JFIiN= zqGdiB6|E26my=Bqd-MWRYay=cd&A;A*w{Jw;z>Fp$qq>BoO}F+>9q43R>H0dh9R;3 zV6jNrd@=_~v)97ckM9Fw?4-t(*S3Q5q;bWgj<kRY`yQ^5G6p*A0|g9)EBwa%2)WPf zv#MQxAxR)mFb>EU24qIEg-!cJG#i>+9S&@-1AcbfwW1>At)VH-J|cz<1AdGXYeOBV zOSkoK2s)fC+~rFz@ww5o)ve^hnEH8UenKp0d+hTUxsrNGE~{EWDE{5)bPxk6ZLlI3 zNh!72YsMT=4VB77(_>|}p=&AtGqu~da6xNawhaGS1~3H}S}ZDf4HmJxF9t{v#0E5q zjjS!ch*jwwBVEJcp~BP?vP1-E((ugK=uGMfNsKMiz)YN%un=P!U@YT{OGmufaL%VB zI3WQXs&SSJkQmxvE)bl^>Zz-;1+C~zpy8PSB0v*iWorHbI6<0_G)j}KsZwDp#Lao2 zdv2gy9I)Yr6suTxj2+G(a+t;q$A=9Aje!SnqTs{L2A|2m{d53`y{R!@8xLw1UC~Zt zZb?`NaYK0XT!Yi^PEH(R48Vho(MVca2q`J2Hj}DkX>;TX*98sck_ep7W8&q^xwKzW zq;F&cJm&^ACk~t^P)@>{!Lu61bwn&%S}70D3<)HLB$*SIo(Ymfx^162_oq_`#-AC& zRK4~SL|NF0(nGL4BSS<CB;{#>=oud((S?XiJ8u-`*;#_<$*=2dIOVlpqJEnEBhd>0 zc2b=*C)8m;pfDI5VH~E8dYzv{o%4Wzpz%@IpfWUE(;~eofz97XZ*f1u(>iU*xT z=4GIpO0$V<tlbp3bYZ!mVIH)^+R{j$!|XYu_cXd)lcvw{BmRU?DZ4&_wS?44<+N-8 zapR!(KFoEGfHi)yNvIPy$3?jYp<ybqG)8q6u{*g+le@{%gRHFM2uU*`BQrrWzx+&K z$;LvqXcw8e#=6T)@JOq}pripuCiR7ZiB^<0q?aj(GLXN?8<jYbgGw5Av{%avu5@zL zoveoYbsn&h0UVkCn`ioOuBpGY6J#oTj+!eJ4`NY=0OPdQ;$5!ufGrWYQf}GF!3_R3 zCpq%8#7pvUX^xwDK2;Qzg4BO$rzvq+?23RvL!2h#Lg{!!>2w4IL~Fi)w5Z5$^GGO_ zf|g+|y%}Y(oA)R0<S7)Td3i#}3duikm`*{mpj`N=xfL`<B{@RkQWCPlret>ueh?nM zX3Sl*$SMi-NTm_eEgED*A0bm|XBJIoEVJZu$R&-gRl&8<ybJ7-O;l9*4@jhhAzh8d zR6|*6-djc|+=8J7ai>Kh8XzZDkQligaB_diZNr&u7_UvNmgP(!>E_{l0-n?upz)S8 z+!8>t(m*f309=T1Z2|~tnsoY<;ZBG;!E6*gqy~|3N4Q1lL1@%B4f>{Xsk6Z_8I0}# zWC{U66@pu-Qxr_Pq7^b5kcFXHd@mP`?9;ijwi#%Fo?~|ZG=HjZ;W>Lt&px$lE44;f zwzjr#11XF&--~-N!L~(rM9<#~|K}0hJkrQ%gX~wZ5lAWffXCEkgc@`VQ|^>7$EC|@ zws&Lk+t3s_+hDK^3#7r`lB`d)wgv1-9-GX+;Eu2p>>Th?0Z`s2b_OGVqHFu}3yi<X za)KFRYg|>lRnos;eO8(cE6drUA1jp294D@unV<4@?XUcd|5_CPllkK*PZxjmFDDL^ zwHftfP%BRu5^*wu$4t<e2^O<pxHOMiJV&tYMfkn^rb)}DS<;ii-mGJr0%i|++4gjV zYX}}C1Y?jXAh;gNpeN50_KM_RkgOpfd=-)@M>;D+ytE3JhB!7e@@900m+jFwNtKcP zIci^YSWLeqOltF^++%cmj6c$u8X~106AKCogne}AEwdtk5k~?r>PUJ`o<6q_5R01E zEGC9BJPGc{WK+VFynAF>*$QNZE0HcT;4(WFmdMeOttcKxCTl5}5G;%!DKZewCh7`Q zK=k979RnA{?9h-Z&oUu@EnlG%PaY?0asJoMwxmB+#Bp$iYO@%Li#<%1l(Pu(;MAna z12Rw3m$%}wFsHT)Q>eXQHm5&nSmX@nW*2%~tl^{?v0@T`LOSdK-BwSPv*1NMyUDF# z(}Cw`5ymqyYp1JD0cBj`u~KZ}`7X&uMDnq8B?ZXC8RF^_mb9}Q+J=ty$9#4l9k@Gy z$BozS_#4T@Y=4V#wht{~wT(p<Sn~FpOAU7VoJRD-uFU*=LrO&L&QV*+mZXFoDYHs* z3vLEb<n%d8BYSv8X~>L1^<A75n>L)hV0CI)Z=gi}95#gOz$eWO6P5QDXKpIE-(bs% z6Ui1?Gt$$ZZRT_@-At<^seUw@@h35dkNW;0Teqm`>-oMgFcptJPgA7G%VjNx(GM>w ze#8sv8+nt}M}}4T!j|1q3Weo(3x+KRqd>I~fhV2X7|iiRUEU)f4XW;F)LG#BB(bIi zr0Qx5=>gPeL8-!{jCWgejBx7{OVKcrq!8|hRMkHG6>A3R*eMR`%LIQ>tfoj)BP9X0 zX9z%-n&GDy1!v4CWjyF9Ig^`As%LRlDAX^per0`%!V2nC@?sUrP;qXpGycC`oncZ^ z-0<&un{0OC6e*{oR*fiC!z<MA`ZT;cjYzV?_yrcP0Kls)sV){}mq65A0$O+}VYDjP zHIsOgda6|PnG(G7E|R>evNim9$nxUIQHk7z)&dK&47_-dpnG!XT46jqo9ixN@vjt` zSpCj$C8P!uJtf962-abu7m?K^B6Qh5hMsL@9cJw+-lVxFehj`y28C@$Xe%Pbevrlk z_}F0!0$MIZV(lcKa?N}4NVx63it)9Eap>t}7MFp-mDA+@)b8E7bp9Pf*e$Eq<aT70 z?Ck1wX73400e6|D8Nsa0DC#mob3{U+rJ$KLYBR=0i=;vYZ2y4ysNVHdRqx14p3eQX z>`EoXo&ut-+28{FY0Y3$%FDTEBNt0Am0wyfnD%{zdA)Kf-AH~aa~aomuB>L8P8jo5 zxK&v*G^q8$xqy;Y;b7d$IhY2)C-Lu*T&9xAv}`y`@mJz0Vx(CjypyVYK`gVl?FAFY zk3DZB)?aSd*0na{@}}Ib(6p4Cl@gCPph*Yb54r)o`^}wASkx#I&I2;e7dR#=4pPPE zBD2a&r(w<uhvNAxhbyweRYMMUD^-Tm=W5~xv*+o$!Z@=m<y|<#5ucbR)C2I6$N;=3 zGGpCyzno{A%&w?bF{t|dE!%CgZ>>e!tr|D7?y7Nx_;=I}b$7mVmwvmit~a6ns&UO5 z7UCX-YV+>K>092r>8<-5(PHg>ueZLW?TU8v`kN2zcJP``Ju9E+GQ9g+hpienx94L= z^*wggxOT_w+_!nZ@%?{1dFa5YgFZfO^3aaM)|@$S#7XD&JFnB2RpVAq5EgC0B3$wd ze75onTeoW2B8%$cT2vc00%+WXr%n|iSj(Q4rJ8lU^s;{HkTp?^yul_JzMs}jo1sNv z&#P>ltn!dfo{>O+3o7P?qYRYLa$8=dCJ;wxISwqp0h$a!T7Kwouv<#bs>p?$t06*S z>`rFrhLfTt4RrM|4YQ{pZ|7)zY|N=fw(-q#TGgX>iuk0Q?o@z_u$B=yAjj7ocG>c! zX?@LBma;paEWo!;)nsJ<VoQ7MY4RSKoJS$vX6Np*T!yn-ZIWZdUUbRtf0yQ<OJR5} zLCiul$@}@^6SJyC*Y)%$;PB8`Pb9bM8@dt=^s-Y^I2G9WZbcLuCrefjoHis#usH`d zR-#M#Ay1aPXl4C?N$}nzES7~TSb%<u(q%D9#s`zaVGA&WPc2yiDJr(Qh?K79v(PE@ zNzZ%LQ(t-cicbwz)irDBd1PJI5>$Zp7Ct5F;5o3@T{?l_hU=$c)kMtyGhJOcw=#5P zF6BB45g<!ndoDTA^g8)5%KEsKqd)`{V97)<2zz0wO|&k27k6O&W7%jPOER+n-JVaA zikT|5zJ#)~IET5+p64|rTy<k1pazj{evFY}hLh?LzXhcZEABMC$6n(VRHE7ctSA(i zA`48|4du8+uF9t`3wrGj-+vp#+Gj)?U?IVxevzr{bkKn;H9fYHI(NU<*vgpwQko!( z7sw*{35M1|+jiJRyfSMCPu_>_zmTy?onS;V`@)#jdAY85xcT75n{;dB=7M&+-T2}~ z3k1hTTEN+*Ez&w69jBN4;(l^%N>D7%$ZwgL;K+%)<acEySMz!Ra;E*}OJeKu$ZSRl z2S?7%BQqN)8WO1jFn&g|Ty4D-R^P3llWo|Nb64;{TV%3E5^Gk{CA6Rmt4dG1r*&K( zwP_m>TZ0!z27<*kj>l#7)R>|#p)HYV#TN{2Kbirlqg2_RVbNwmWasY6qN8@$KE7ZN zt=ALUxiUFLzcZ}9?a}f2IF=xF`%xdnwIF|lb-3UgWMuI>Ilnu4OoceS-c!OaE{G{H z2Y$Z(n`oDf;jPJ06herdqC)koJEh=Bl{6C8_2khUx?7Lvg`fOcPg}Mdq)qDAjHv3| zIS)Uiy+m=piK>1%k=}31^$pGU#fNRtGQ%fE)lXWde#co~z;f_{b0t+(-ppS1*8}V2 zPN&`U&ZhNpCC9I)U-uqzH5QPNCx$Hjp(7zDdv<&4mU_;UyPW#p(%UT$1DTX{mlmr% z9b5V37a_*>v&v)RIQeE8UcNB)xX}*cd00thSEvs3uk?_-%)PK6C{~GAXGM{U$2pNj zBiE_M=*sms+<220l@@2|fMC9ZEGU+M6zV=783ktEc%3qVd(~CiS^t<MYzs73SSvVx z0j@&$4WYf77|?E{T1@mWMAey*vjKoJ<moUh8i9+eLCcLY+8UKCn^;p+T2{C6u8_rr z5{zuy)%q=ul-~#Lml^Ah_mNdVzLXECT`=M+Q5l3J(@jow$%rl^NG^HL(7C3!j}q_f z*y#{DrGTzX48c)*fTY^O=OkK;nkK}|3|I>~evCIr?#;H-RcmbU5f%6Z$keYF!jy;i z=-#a>?Jx}OXS0g7ZG;cl%$?FE52-8Bcf$pw)2x9yfIKb&jL>5tP&{*L|5mL*dv^dj z9Rh%_9iR(;3tSfnMWBMB@E99>_&Fu<mz<zpaOtn;{L&Y*q4ODx1ZSFjB07wtMHJUr zTui@DqLSw8Q#sK&`*e#JGpB1|AIGnp;|K$H$o6%)DY++lNgm8bnvi@1Z41SSLrdAG zH(aA1weUxi&uY!+UjWO%@H{4;Ie88^yU3y0bVhcq=qiLy<_ZbvpKW(5JiBe1)(IiN znl+HO+CTv%0^ryGk-t`a;X?7VSFei~IXw}w>2V=TJ2ATCrD#vV$XA8jAZ9nYI-jl; z(?IRfa}?$7Iu|8{ngsZ*Z7tF7pno6y8wq$N=IfR2VbU?57oXj=kX>e+ClEbr53eVz zrzl-4Ol$q?&i8j~KlK!HX?8uDPfMSzT(O)3F6r`d<jhK|W@VVi1!rhvwx+F<v0xK` zEZGEP`<TfTpk1yFbMyV`&NKJszAouJ%#w;~ZWS@90+=ACRdC7Vh})AX`E+5)UYm~Q zC?bF6_UuSFdeEt3)#xoMl~M>Q<w}qaG1pq{*y2>#s$SY5ih**)Mxo;Q3CbQ~bZbe~ z=(IzOq)xJtFBOd_{5>{pi`HMtvP>0`7_=+Xu?dG7{u(OLooTN{W5MWj^v(n^h}DQ; z^+=3;lf+x(@)FWR8U?-iCW>%edrhJ`Cd1C)Oz2uDBs(R|x=SM$HETFS+cq^)$pyal z_9z;l!ck;DbWs)w-@}~+=xT~9KFLc@Vm)f$rEx@q6BVO`yhFB2GBr#rLcmY5OV|bJ z_s`KC$-IV%^?rO{QFPdtj4YzFdVHTOY}9(M@xN~C^wb+YQ(qB%0XZW5+b`M`*;xm1 zXu3=9QM7QOXkG8u^tPgR41wtG&gv+cUNj*nksDPtU6)f0Hjb}eqp8IVxKz$gtp<|` zct9?Ff%Kj@_z;j@qw@>wk_kvq|FR%Ac=Ih$J~_znx%%rhWNjvLr{>OyN2nxVWRlod zyDI|kw~Tg>?wNE&WFuxd?1GffEw%TZ8=PSa6)lzS1mtMviSvcY;K=u%{GdN`sL<oL z^KH;WrGaCP)l>7`J*#xL$zjRgc=VZN6+c$O9nvHEHJAO-?`e7RuUCH+5%r@1#f*8_ zG{gTun}Nrj795l)UMREt6jY}|B0b3|CN2Ro-e=HYe!*$kGW;9yX1x7qQgnb1sL^En zn#d=`6;O}wZ>%rXRQn02C6>m~6T+@!vNGG`N&#*q^?KZRFb|619Z6XOW^ochF0L9b z(h3=ocjwx^*<h|%+;k@2Z<?>UlkffWy`FT=y9)}~z&GnC+Gu;{F-7ptUdMcb&M(~< zE*-+<%~%LYOF&nw5P-=#%H!&5uAO$>_4x|M4L8C~Hye%IGTqp~RdQ<q`@OWAPF0H~ z0;EhE@t&`)D>K(kReNgIuKVWO@4o-xzw4#olOIc4E^~f5#*LPmDKDUw6Cq~WWR4q} zH;e1Do}z+M7~_{_QDFr-lbV*4U~yE=T1qETq_%ae%r>gpkM3$`%2v?#xZb_mG|Ykz zxS`YEND3VB!EjW7ruGRmE>7;DTqb@`Q-tGfaK>}PxtcwVEG#o&%l2`T5dNhLw$tGZ zo3(V|#MUMKOG5Z3jfx-t_tVex=SVhWD@*ydHp2*=Q+3xZW}Otpt;=W#XR7fm$(i%V z(PPHq9>vGTjsmFA7a9e1`4wVs;TGh|e7lohAL2`|l1k2nds_u=IW|$|k%u38P*4|L zJZU1c2RM5Xvw6v$*cX_LNVdqPPs^*7JghT8)-3r)S&A3F=~?&PJM)eik%VBbNoH0e zvzbWyBq@AeXe=<1MaI(mGDzTq+ipi@TUs^+;I5f>-y`a#aGGYAE6YfOsw3^2jOfC= zF0U;e=E(QuC8HTVZ62bY2{A%)si%;);Db4grAezS=@6y_kte;AAj+dqhM6PB>QMv8 z+5<8hL)M}8Qe82o1gsT-*agsFcM;$NRWlLE)uQ{Ain{p4P>GKzeqwQeyhcetUgDgN zDtp&W^}P3<&Go9YDmDplpCo5{bdE9aMsbJ5p4ulYfDPVfVC*OV^67T;i^6)G!;qgd z(wb%0OnNmhg+8?Sgk~S{k{_$-n5*GqLO4U|`a`t*9*&xqj2PkB&=A9z3874<iZ(k0 z_EIdSbVjD8V{{&->9Mckh?hgG6jqx3sf8f9`T^3HStaYMT2dX@h&gyBj~3h~JRK#G zYrXhE3tp~NGaN-nqX?&soYD=KD|~sTy(zcY^c~I}$un1kw8(>5Ey+d61fV&iq0Wz7 zia#@_rqAm6OR62xB-zW%^Q@~!r2lysfz_-l8#fYu`~a75AaSdvw19a$HOMOKP`?2% zzIfILfw-PjKabdM91?p*#wU=WdG?aL{Ft7Keb-LCEgZ=Xz6u)>0i$*ueNSZ+f-AZ; zyGUHwFM6rD*4D0x4!x8Ww<Ll4Z0aNnM<>Lym&_t2yesgWVO2!pf7-k$(qF&i*tO)n z5O~fq-x1R-MlXoh+C1uI;_TH#;MROiV~GVtDybKBWw{+&#v?tdz|GV<mUp-$ysIIz z5EB-%CLwOroihiyhZm!kweRLV?ouWgze6z<G4@2Sl2;2PeLA%H$kL3LJd(Z`J#C17 zO{H?Q)Tk6qrf@c+WP>ug+-{xjWJh17{0<jV9GHf~i=rG92_ACV;8O>j+`nI+<9cJ# z!;w9EbnV!ICOc@NwWf_5?iG>t9N^jXM8Y$}8K5CFQbcRaUc(|QZFmQ_rHgiy0s(NW z#r%2b1VBB?mCza4*SdCt?r6lJQyE0wC{U(nK&t0{a3W|t1p@gGtbO!80CPV;fc2E$ zKfk{g&Z~!YQmb0UE*0&nuiWCot)JYwPL1bk9=C1%+Go`{n+(|Tgx_@8twp_d^+wf$ zJ?b^srO}l2)0)g~@@$hS>-oQt&E#6L<&b@EZ`ok~jjia}3)|AaN4J+(essWH2eml3 z_91+`d*wenZ`-9|w~mK?-s8KT)sH&==v$9%*88I4_v<^M--iApP8l_D*{Pid-+ubV zLuZ{aarnHmrmTN!#BS$q7<JBQGWNo8Kd(VHvClq54yg}mNDJ0vT}Tl<%CD;h&2?~{ z^M|ug))2+!9;+se8;PK*9NFVEd7SDu!QZO3*$P(BwRd>7Xe5=vDy4HUtket7h4wB= z;-mIx{&BSn#k4=roDG11bSI?%gwp}AMi|}He9^cujP7YLyg=ha=`<Tx_hoU$>0aUZ zu|IjVjKW{6$d^n_oCp^y71O8>SzbD_R`e%Hk1v?8+B2cJnxFG3Sa>U_gVQ<DG~;LS zEPF0Yo|=onDqP7ZRP(j(BZNN$RYOpX{8V&dE3F%ryXeR1eF?CoDNiYjQUb)5)Je;c zQZk7)G$wY#M)FH(Nn=r-CTo#TP_PFpbK`VwoHxDsG|GJ+ozfmB<PlT8a3DDbM-GLN z3!>j`(*$GW;=DPjv_1K@D1uc|H*uw05DDwb4K7@;*@9Q>jX7rs2U3I6Iw_;X+Ww8e ziceR5_V3TX_;S^MzCy06wd=??1(v^Z)=|#8E5S`NFsA#WEg7;WR#85*RyF*uQm*_` zP3+<Ov--Po%Y2)CWt)nZYo%^vN$u2^Wc8Idw^LPb0ApQK!rq(5u@WE0?14+EV&ni| z>#cr;#+p|BuGyA0=%d)uUz)Hb6Qt6#;%w<x#24im-|fW5-nlsgI@=6rr5rO_A#*Cw zOgww1YRUf_Qa#=IX3G*f%(=?7s5G9@w=c<NS~b?n|JSir>O-1KD7oo--?H_$xj!?q suA<tKY89VXf2ZbN^{3ochen|^*s5Ylhep+zz;8#VuK2mKPJ;{n4>vWH!~g&Q literal 0 HcmV?d00001 diff --git a/vendor/unicode-ident/tests/fst/xid_start.fst b/vendor/unicode-ident/tests/fst/xid_start.fst new file mode 100644 index 0000000000000000000000000000000000000000..3f5a46b168cc233fe408f420dfdc921768e85ba2 GIT binary patch literal 65487 zcmeHQ2b>f&7e9)?o%j(HD^i4`Nbf}j4mm(Y5Rl#xMC2%f0%8MHz=C2yMMO{$3l^{! zis)zWidaEGuwX$zK)~|8w3$gV$+Vrl-SxZW$8C~HGPB8><mHwBE0L83ANc3*zy3UU zVE?|odw$=&Yv*q}e*NX=pSJ(_!}s59+xqR6&EI^zY2#O4e)0LI8$Mb8@kbwi@cw)6 zzVr55Z@%&RYp?$Im6zAO_{{Q4rd>RB%H)eKoHX%*3FF6&Ef{nD=<`OMJ95N1!w2;5 z*SAmaUT630(fzD$UC-=t#_6YZ?$j~A!>R2d(N9*f;w4HRQmS;B?3_c(9`>JdsQlqa zR5<deqmMb3^?Y2#<11CJQuTyt)pKjqtX2EOI(6&SZ_uz&<0ef*-np4mzd1>^p!t@q zTAy_CDQ()e%WHpXhy0G6I-hp>8C}lo3f<1?-lON)y?XcQ+pqtCfrADQ89HqEIU`1% zJL<gA=Z`5E3**L5xM1R>3ok;G(_rRP`v?}@{ozA+pBX6crL}8TuX^!?=bwA_nWvw6 z@`;rz9)Il7N0vYQ(1Q;wyMO6@_ujK)@!gB=T6pJzJ8r*i{;jv%eAB!eZ<u@ib=O`q zXZEbCuex&PjOkZge%YmF+8s7@$lyT(%@my1u5FuBPClu1tClUAH*4CYaifL}>es7V z=fv8zYSzfDUhRacRVr6HzT$Dm9&_|jM^-rE@bakKe-0~qXij#S(xnb5S)zEcEHee0 zX}4MP76!`mAv}xjCQq4q@w7`Wz3lQUrq7sp<yBYDnmy;5Yp=V0?hQB2yXodzZk>PI z?RPA=bKzZ!?q0m)o_p_GdjGNq9(?HG<&Ql2*yAf!KJnyJPe1eQbI-r<;;Pka*1ojv z<yZdu>T9pR@#b4^zw_>U?|<;&M<1{MWW%SQeg4IlUv1p<^*5Whe7kkqci;c;<My9^ z{^i#lzwO+$`}aM2_x-W|z`;NN`uiV1cnJQDpB=x@byHj_UtU*4tl~163SuS8wn(sa z8MpsX^5wbp@QGb+CD4YHld8R5dhgSbmP_4gITx>$v!~-Va_SViw_8I|Sl@JgghNW< z`{7+4a@c(j1-#osIdJxSXSD=%tB+y11be+iGg3|hsd@mL^a^#WF504J%(Py%)NIvg zf2t@h=zYn7MRQoO7Qw*GUVr^DMP>4m&Eo+%p!ly^0DTx?=ua6MX3wId1iw9aM$$9k z8>Ks-B|#^$@aq9S_5QZw`|mdBj<qrKYZJyhM_-kuTn7hf1T~ou2M0}SwWw}A;0I%W zx-*<=HT0$u3f!dJ_vTx8!L9S1EBN#QumIgz_{0$AjtBSd@mO-0faHm!ok6{qAmj`t zOJ#vkP(m3ewZ4?s%1`1T6X7lw3u!6odrY-!FT9H+7tuVrJIqujudX8@^IJ-oSi&Tg zaD-{YOtQ4R+PTMt5iyW)I*JY!`ExY>e~g`z6UREdwai-cYQkPcOI-3We$WFZ50g8` zIk$k!w3N7-1}Z?ak~hI2d;V+JPAp5^wDJ8lL}K+MmsaV8RX5GE1XZe#YPc<x>fR=r z<cW-1hz-P+nl>ZCPbT}6hQ1}wIr32HGJtySCzmY=DZqe%04^yhB?7=wQX;K_X}AHA zSmCcLNMrFcUCaPxmoc9#6~V08PJRwfqH97<T=eP(wru{8&h|4;dCxbNP1>SKeAFI7 z#Ez|7wP@b73Fi&;GDTF5NfYT0WOGWArAh<1rQ+mG1}+eX8a1I-Z8)(GZE5~PCLbDV zu467$Ew#B;z*2Eg#NzTYFc8l;P{BCvWQ5ugz(}R&+)+w_l|7Fo(CFxM<lFK?QNWC) z%l9u`vUt(LJBdgcOO%;7`KpQdy&OD<y^+3?W(k4m!&|Q+nKA%XmBE%@;93keXLwR@ z%LdjM=)W5_0=K3^&}8sd&4G!Nan_nOoa~Sf9idZaoI34v{0Hkb@ZYY2%noNYpZ=OM zuOc+Scktt{uzS~*&0lY_-bzbppkIc`1ltSo@L_|oF2J?u>fg-6v9Mo1&G^LxMPFs@ zv7oA|@Zh;#GiN~Y3r82^maUvJU8*%Hv~*QuH3nN-B}^$+Rtf7cP<0h<SD@=rC>uKt z#!nFPBzeI^sQ^uie-=aa$Q6}qY+aEx>#C~AttVnCBK&e==|HzOfptB??3W~CVZG#K z04pnr{8G?XTC=ZCK4z@|sKQYkO<VbB72=7tlIk~O#%QUap|qA_;5?a|1+#UOlVJ*2 z;!m~nz<G2<sL6_<GErE+PIc33tZf(1^fHr5kS^g@UpbTMrSq&UH?SVhnO&DAT0ghY zNE$pY@R7<wWm>g{lYr`?K~_dPuQ&7qM;k3}xp=Fks%aRby=B4JapScYghDhS!kp1% zRkRea`C7$ZfSU{CWn~GsjFPlY!Q>V6eFHsiKxHK!*OPS#HHk3Hh1-N^39ubWP<{S; zZ!ilI-}AAg2f!vKqs|>pH1o`^q+N0}?z(BLs}?eg5>iSI!uX3U&-Q+d4MfN#$w-70 zSRcY$$sb}t;=Ht40}^~Rf(vLXyvy<vlCPiy>+^vq^wGVAOAsl)TTs1~Vqrq)avirW zA+{yHrX}R`7Fg9lXty6%HxQdB%OuKIE?j5^B|vPMY?dE-QC0zjEyOWs(#mVdDSQ@y zuW1Po+Y$h7T>?)6u`dBoEKDG%i3!?~a)OKF0hl#j5|sSdCQ7w|iov{zNFM!dn}C?- z0Z;=yJzEq^^VO%Xykr`3>e{JRqsDkz+D$~!3c#$Qu$1Z5I4B{L`v|)9TAEtbuGEh! zNOSAH(5Hf+<gZe8aws`r>pldTOuP}EU0{2Cm4m@`LuT_;m-T9p(^UGg8q{jqhvx-H zB4Jstg4ukPF7+|mu;Ts%GgiKlq?*;(_*oQ-XMA>&sKjNJpQo{64q~`0ZLgLczOc<p z*Rl3Nvd72zWcj*e;SAWgUcL<6*4~F|<lSBALYKXi6kevgqLiV_BdiZ;+9Vz}bGbo) z?l!tNd5W0iKY>>)WsY1~TLv3M{u9O^hTqANulXL1Xp4AE@138C-bxz^GjDHU+%u@n zvE)tUqLIWLES#|mnDVxU0iXhRp)XbQZE3wVenI0%GwvFsanTSu4Uvpn)%~E(k;_+V z5zhO?{(0DuOV~i%TZXhrSPocr>(~-R^_i3p>Y5JktL)<EeHL5sxVKA{<=z+47mAK< z$ko|3Bh<0a#YniS849$tWQ$^@M!owu*}%rP*cj4jrKZqT=|BkLGaTHvmpOtGKTv!Q z58-2H3qgap)?t^>xcubQK{e*F8JlxWWkYpz0ALH{c|)Ufr_6B@A3Ff-W#`<_Cq<>E zOlS&oZ@`srqz%wKadI*;o#DcqjCa-aAw1v9XkA8<U9g9y&hLNh!_M!L-#hVn|LjxZ z8^HbDu~KL)cQ}sZZ|t)q4j$Ow(tR^edFm(c%U&@b<8bD`#7q9TiU1}LdD+6-GT|iV zU`#^^-Ju8c1hbnvT?s?!CmnxdsuaR1A$S~<C3ZRn?sbfF#PJw_xgHammE4a>0bn0w z;7-UmhrU!PKynD5J`t5uE5w$!cS4t-PKHPr){SB!5NC4Lptp9-Tt2&w`*`W%*qn&` z(qaG(00NW+U{*!s62K8q0gePV&CVnP5Wx$rK+zCL%5B{lSP)G+D*QjFe{6G<0UiCm zo4|2MKpfs&x@fLigohA<S1TMm1oplVR0N#b{*(Cnpr12<9_2@rp<y^O47a!B_7fbO zgJW}WXb!ifB}L?zz+q!tf*u7t#<eNduN5cc2CySSEdUWVM&1;?WjlOYhl$fm(jpX$ za9XgdacB;5Y7h<sbAh9C0Eg$GewG-Y12{kjaD)!KA5#LYl;<zgb0ICxLh6{YirGM@ zetWtPbPR=!p`bCgl_>^=p_7IMA*RU#8}EqMpk|CfEh~;BMe2U<QnTpJxAy7Qv{bU2 z5kpXhT%-|CCX65h1VRSAm$~GqT08h1c#{i7<n1CVDfH;<hLFC2;1HGsC?ZB|!e2jZ zBB5%aq>5#PUdUXc%gSP=VnGdVEnk)*^eYwpDiG;-R|*H9h0ms|GMVtQvP>|6QBNm6 zSZ&Io*=K{zt6N7kmbKim+|X-yUR94AXR0ezcG6DDTNCK=f_56Rl#k_t#@@wj?h?q- zMSTFYCbNmpetLP|B?7w*>p%KXfT#4^h_z;ffn_&-WateMXp^WqJv?u3%Mi$EC5+L6 z!CHYh?}czGaD;X?NFaT5BDABn+|L<)8ORud=yiKS(<JfF-+!`QHN2UX1_sslJZnIU zB<|f9E0RD(5@8?4LMu&O2zCBTrje%QIv7;H^Q_#V>2!e41%_jR+&*lMtHA)s;TO_W z1xA5$osE!6={^XFMCu7a%6$^mWyDL^;S99Bd}u75oLY`}SUN5`6_k|i=P&s+S&9Kx z*%JcSO)$zGr0jGlhci?%f>9kT*iz>@1uz9PKTVbp&0}B{lVBT+VU=H5M<HyVeddYq z;I)_NA(w|)Aj_w=&oeluO9wzVKcq54B8;*#+ENOmD}xujvSF>keRRS9ZA%LiYO;%D zC=kf>830U72D4#~^gKca%O(H>d|Q4A!`up$(j9S1LrY#o0%~_uc3jIBd4MO|kl%E# z3*lD8L6bRVGBXxqK*<aynLAPtSQ7$j0^EJxo;+@C4BT4G!paC_+9AunSeFLQC{e!1 z3q0BWExH7H*K4N#b=L?GeNcG@73XqogUaWdy#0#+z%3VwU`I-kBn@ETq)KQ&aMB3< z@~<Ob>j%-3s5V}LN~qyCe%X%1+d|q;aTY>S9F9>C;w>09e|hoMF1ubP(FyAu6x4b` zkp3U^xi!O?{w+&R%waz$17SkUNk(vm{0{B&+ENXf6j#CFWh>Eu3UUwyappLmfQ!ls zf<2H7${A8PoCuV01r``2I-#1*!1gIc2(BPLsx}=e`T}|*(+M$w=MP20n%FgQuf<~Y zn@o-!!pKqyUE?~01!Eu#OhagfL5?7T4kkc0>Cn{Sa;-p0;fPTMatLdYkjl_C?p03> zqvWF?agicM290FC<$e-<i4*pQT}NxvfrhfU<#IeeoS_-f%P*Za^+FlzPd|F`mSR9_ z++lGj0azf;r2;yEcodm;SSdGt5Oj4~kb>7k;P7os48-i8*n;B|Jj+KNYYd%bmi(B} z6MgB!EXig~&5nXX*HO7U0JDyR@^|<iOuh)fB4`1y7+OF;w}dA!CDA=u#j^5?pH=ex zQm<wADVtM}T`s2}8UMDGpI+hVqrN#NtKz&$IR#~_CgUE->T_u0@IB9}b};vjS_e;D zU+<a*4>ias7~1&TrX^aGZS`>L)hG98+bu7rpniu2onAOCr(i*sO<g;jmD{8G**U#m z?lZsNg9Dx(v};Js;pdGQFlzAVoPxu~b6&Hs<f$Sni32J6Y7j*gQR7DS>%|d%&Y{<8 z)d8QFnE!wiNHOXg<?}uX!ulw|=$xlEq<FNtsvmi+nVEVmtoSrhQwc#OAfI@oXrFoQ z+MJ?U|IL9CO>q)Ez;mf{i$a{xnj@n+z^D|@T0mJe5Q$KnLc}Hd6li@kV$=m>^<77V z-yJDw-m1-;v>TT^(ye1@gqlYiY*Q;&yj;Iyp5&pXp7LV(i3xP#IlH)DQB@pB0vz&L zP&d;!r*k<CoNW0XkeJDSG~msAYa=bYUTp7pQ?}_@ICKtMhAPdQ_eqOZ4R*kHdbI;4 zyN>$gpbr#aR3}^&Wv8S!D|#)^@U>3=ekt@+Iulfk&lW`A8YDiNH_3*TPGz>*DvUOO z5UiURg9KkDt;Be?rU1;O9{N5;l1Wxqt5<Q~RYlt*#HPJ%K9A!qu$_dqgecx&4@(5C zChmycWXQ=Uiu)jI$^Hs~k$}aIx0gfUYJ9*TBZBA?lu+KlV|fF17EJtu?n~C(N9@-| zrtwASwmB&l``vOhWf=16aw91oZ5O>{1$4LPMem+xU$#T=I3;q52U&AR^suQ$vHft& zDCLeK=Fp81r!o1>_U<8pVu>rgV1T*Ud($%5SL#Wo-6^eFQjbnoG*3ONRR-I27bk|@ z1jGRww~-Ln0RmGn8-F0NVMuTog9n~7f5JgmJjU7;y!k8-)GE6N?}P^B5J-$q>w^{~ zf9~x&ytMW??u{N+*P$YFIBP;U;1%m<n>}@{8i{=e#_&+gZrEK%pP$&>F~2d!9`qC@ zyihYNt*hs8+tW7R^KHi_Dg?8|6}5)E?!=A55TMVB&lh<7J#i6nFP<<30q!z%7=tWO z*$-(W@#rTN;C61%*+~;I^090vr(eZA=kyqAcZ6rP9J5a@gMIdeMV#3u2iHV8qo!ZQ zo$DI=c;_NcS(D_jh|=YxXVAnHzT}Z^tzia8)G!A5Ma4BPxm`%No~y2$IU_K3v7SZ1 z&R+=;n1-w(l0p!Wc7?~3?u3yLg)@qBMByitd+z06Cd@-$YL8X~n&m<VlawAZGcqVQ z&`|P9Ppnj9B$_QgQ~morWz&*inmsAD{EYR3>V+BEc<S!coBq(?hpLcSilRn!LwbP0 zTE|yB?$~I1oim=5tR&}X6h6he>i_N5G5fa>Yjn~D3lJ+PX~W^!u9fm=ra%7hy&kkV z)R|`Tq=_w~NICF01b@#2Wvo4ZVG|@agcV|(wRHlGdos6-De}7=4SqR>gt=6_m<<gJ zKbom)q1gB(dA-B3BU3ayu!>&LIp3_1ci=Pd$OIt)!vM14TmWoM!akDQvEI8FSCoP! z28)yExa?WCV&6QLyj)0fSd!{4bd{hPt|=+>0v}#gUHYBq9E!w?#x0!9M51}^>Y74n z208M8ChQAa^<5(f7(_}_7^aA0T|hHvj@evn=0W4%T2>x5P;K`fyi>2x3+k(nc~##> zK{KPxIW^?!p5j3966Pxv75V<&lvaq_Cc}5?aA4X{yD2{F;fW<0_huo3A4HcQxMzgE z3GQO@a%f70A{~;07leb*)J~E>@OCC-B2onDEOJ=qsG<*YSW!Qx^^ngMd-WjuCJs<w z`K|}PKZ$-3q%vyiOZtgacz${C(_((IJx_JxAqQU9)i9UwFzrFOPpOkZQZ9N14Gu6h z94{u?`7O}1Rb+TEf$FAXB~&fIFveE}{KSeXtETQdMxHD|Idm%K#zKg3pXs%ybk|Yb z=b6W%aq&%@uBWYATvB2E^jZ5+`*x;Pj;V);A|YFr@dA_vTR8-%ypgR|2BQwdEY4E; z7vOyYPM_MrK=51&1FdhmSt-=AaM&as#`Sk!8wzBw`$^bL$wW+&UP@FsQsaUEdMQ3t zEHs2VZ*_!%aNxf}Lo3<eN;u!TXn2|<8AW>XH2LIRqNVXtgl##*&&jeaV;;vZ@-`uJ zYjT1iIy$4oQw>}teg#%!G(Y1azkcGmp3JCs3;bTG(dV5zl9&f}3ic*tmr<5xGx(xn z0j(V?>C`RNVAdMgm<Rm0OQ{+3le0<7=_9Uabg1Zv(68P~{{yq6Z>QmJq;(DcQx26k z3ZLkTm9&uw&~YMX4qf-Cap*BFv<7D7puA|h%`sF_v}%zlx;Oe$Ob~gcH^oFXpc;L? zPMP4P{npKK{AFlip%Fov+$h!9Fp9m=zl^|L6soAtyeMAu(Xsq<sC&*+4$Q{dm)5-; z#JK^-QQH2~&lnp9{T2(|b-<{p>9E$g#y5isP<|{^;n|7HmOD{7iz+%!xA6`sI`-!J zbynfZ4@Sd1$KQOBi68?VRSCSg%wh*Nf>)jvfb-C<@(d|JEzBbt>WTp|JlXSM;FmIF z^xcn%I)@tdw5=>Vyr_$$4+fCBCVERkCSEW^Mu1Rx_*RiDj%+8H0CFtV<($u}VjW;b zv9(m3uGtqoWqcfDH2y6v%o11scCZXBD733Opx2U0Uz%y1s`Gs!yiu~(5%tHDbww>x z@>+2;t)nqpR@x>#iv}e;b%`FG6a9^eavq7^KhgFmFX1Wh>R{`Z&C)vA-5YqJUa}N= zKFT#ZiX_-Vf*;8jCBeR+{{R^1)kh3(kJr?t^R-YI_#S#DRi7-;DtD$Y!o-){*@Eop z-dp%<ufOr80x$mg7hit0ansk|Y~J$i)@>yGGZS0<wsY6+-}mg@_lNZwp#AAgl?gnX z-m6NlKM!t3l<4M|4ziy7j={_y+(zojlCoLE=>LsAbx6WK%%g0l5T|e~HHJ#}J60K? z!=!5&mPxfxStWM@W6`^3kM2zj0pv_DdIm35Wxg@!qsIpuDgxjb>Eed~o>+Mwh9p4v zhe%ccOHF{|Xmy4WTfC1O1n4Ofz)k;M+P|deLTsjD|AB*llGhRa6)!C%)|;axwoRZU zpoKAVEU|%RNa?gt)dbwW>9Qy}n&Fa*rx9l)$Kk{=a-!9f{s*i>d{>jbuO4&P`& zl~FgmM;}@K@B_>4Te8>_OhnzgRYVF$L)`0Zyrrws)3oGNA#lV-Z&nou2;O3VCHDXj z8~|7*D24$?6K1Mmc;Us!aYN9a3?<gY9y<Y2*LsmBSDwJBzEeJtHMhkvO3c7ZRVs5o z^$G-N2U}9}V9B<=fQE(uut2peLuxBfq++ti6sCeVA+*J-Z#(8u2<dh}`}8v}dBXoG zsq+_wP`nQcI`$mXv${rP-eva}1(b{{9+ZR1>$3U*quCF55j+;4!5^5}MSmu?Sf)$s zcep^;2o>!(bZapTZN6INIed)Lx+$RjWCf<M{XTB3))MT11lX&vqP2x1&CqvL*QeFy z5k;PjeraGi%u!k#l*=21FnrF*3rrG&)v$)!p+#ZDgKBM=(|zSi(P39ARc6muh3;ul zo;B^;pc;w2n=<&D!)ohc;8b2a?C>O}G7WpT<kv@SBaMDHzI-H4=jr@(3lPF7p{aBV zEulP`AsZ*yfe3c1+WuJa2rMAX$H*zxxUU!<Hf6=$Z<zbB^e|NV&BomtOVG~nd;`1& z8XfP|6f>)$WOm1|q$`f_<lWM%*R;Bpf^%esh))cxOwts6I?4`M7L=Ruqr5I@W?`Qn z@b{-xHPq2>m<#v%DjSENhi;%72|h93wa^jvQ!_)yMe_<G^CxzO9GHwq{YRcNeCXhT z{rmRn*#nC|yPV$n)RS8iqfy8AxmIqq6FksTEl71Ji7}Q9fa*}CDx3gN4XWEvD?qph zXbc=gg<2e9WGQF?*lQLre9kEVZLmP79ppiKjHwKmYKYPvxP>7QQ&J$oL$Ob$1E~!o zRO?YnMHfw;0#k|PA6&SoaZnk$0Y{UpT>Oy|tx8QQ^=PRMWmaU@KXh8zQstohPZc&C zz2n&J$M@SbvdWAq^Q-jR#Qt@tW~G9Cx2UnJ=0&v|)*-c5*CT(sHn3W*YB;0up-q2j z&hph;&1t=&^~jTpwK=0*>Gs=Bt&{&l$Fn*&I(^X@_nvuwx24^$>$&ah2EFI?y{P}? z1A7ggGqm5P2ZsMTV)dvR=f66p(byf^fBa$Fw_7%UL!>pUiL9LfmN_(}9&aa%FTiAk zP2kyiwMMRm2pbU)g@c_`tB&#N_(uzAhMi2U>&9;qVjiH09^lTlR5doYE)XCyD-C_@ zl$_{`Etq);0nvb=02s%cl#JlT&R5lug}YXy;BJ1bZ;=sF03=N#Ql__NXAus8t3W%i z(3-t_*vvV^s!yTKDIltJxn*W4c-)M+mxd=HnHZCXIcREvOB^F388cG#3W?bXBzA+* z{Rhy&bfLi%Jx8XKzA9!^oof;ye}FI|kFbEc+-k)!S4l8tGY`r9(Z}mQ+2F0)B(~DT ztm&`6(LX67yHwsbZju*b>)n3a{9B700wUlT$V`b8#UeDT<8}`~yHiPTskygG<(rE* zczg!cvN9RM#K`Xms7^3wSCR?M+cVj3BleIuMF(&uaLqoR$&3e8#x4!BXo%1Bd2=$T za<Q!Jo!L9v(ZBx`Bk3srww4Uu>h02R<@nWJcbH#|qU*s&0NZ3K@i&WB^$cIt<z?Jn zn6Uo=O;`OtWL=zvP-SA&=(FiY{pswtE_t0h3Ko}s1*=x5X2qpgxj}FGWwiXV48J_2 zl5SR=dGcna9b$6ry4_;G^k%u@GIe#l80^I{rUY7vwR~+4c>Nn$-0P}y%~Fy@4*SM~ s>&5iT(z2VXxy7>HDOIy#{kcTNT(xA@x}8TAr)&L-z?S^%5gYgX52PAb82|tP literal 0 HcmV?d00001 diff --git a/vendor/unicode-ident/tests/roaring/mod.rs b/vendor/unicode-ident/tests/roaring/mod.rs new file mode 100644 index 0000000..3f558ff --- /dev/null +++ b/vendor/unicode-ident/tests/roaring/mod.rs @@ -0,0 +1,21 @@ +use roaring::RoaringBitmap; + +pub fn xid_start_bitmap() -> RoaringBitmap { + let mut bitmap = RoaringBitmap::new(); + for ch in '\0'..=char::MAX { + if unicode_ident::is_xid_start(ch) { + bitmap.insert(ch as u32); + } + } + bitmap +} + +pub fn xid_continue_bitmap() -> RoaringBitmap { + let mut bitmap = RoaringBitmap::new(); + for ch in '\0'..=char::MAX { + if unicode_ident::is_xid_continue(ch) { + bitmap.insert(ch as u32); + } + } + bitmap +} diff --git a/vendor/unicode-ident/tests/static_size.rs b/vendor/unicode-ident/tests/static_size.rs new file mode 100644 index 0000000..2df3537 --- /dev/null +++ b/vendor/unicode-ident/tests/static_size.rs @@ -0,0 +1,95 @@ +#![allow(clippy::let_underscore_untyped, clippy::unreadable_literal)] + +use std::mem::size_of_val; + +#[test] +fn test_size() { + #[allow(dead_code)] + #[path = "../src/tables.rs"] + mod tables; + + let size = size_of_val(&tables::ASCII_START) + + size_of_val(&tables::ASCII_CONTINUE) + + size_of_val(&tables::TRIE_START) + + size_of_val(&tables::TRIE_CONTINUE) + + size_of_val(&tables::LEAF); + assert_eq!(10080, size); +} + +#[test] +fn test_xid_size() { + #[deny(dead_code)] + #[path = "tables/mod.rs"] + mod tables; + + let size = size_of_val(tables::XID_START) + size_of_val(tables::XID_CONTINUE); + assert_eq!(11544, size); + + let _ = tables::BY_NAME; +} + +#[cfg(target_pointer_width = "64")] +#[test] +fn test_trieset_size() { + #[deny(dead_code)] + #[allow(clippy::redundant_static_lifetimes)] + #[path = "trie/trie.rs"] + mod trie; + + let ucd_trie::TrieSet { + tree1_level1, + tree2_level1, + tree2_level2, + tree3_level1, + tree3_level2, + tree3_level3, + } = *trie::XID_START; + + let start_size = size_of_val(trie::XID_START) + + size_of_val(tree1_level1) + + size_of_val(tree2_level1) + + size_of_val(tree2_level2) + + size_of_val(tree3_level1) + + size_of_val(tree3_level2) + + size_of_val(tree3_level3); + + let ucd_trie::TrieSet { + tree1_level1, + tree2_level1, + tree2_level2, + tree3_level1, + tree3_level2, + tree3_level3, + } = *trie::XID_CONTINUE; + + let continue_size = size_of_val(trie::XID_CONTINUE) + + size_of_val(tree1_level1) + + size_of_val(tree2_level1) + + size_of_val(tree2_level2) + + size_of_val(tree3_level1) + + size_of_val(tree3_level2) + + size_of_val(tree3_level3); + + assert_eq!(10200, start_size + continue_size); + + let _ = trie::BY_NAME; +} + +#[test] +fn test_fst_size() { + let xid_start_fst = include_bytes!("fst/xid_start.fst"); + let xid_continue_fst = include_bytes!("fst/xid_continue.fst"); + let size = xid_start_fst.len() + xid_continue_fst.len(); + assert_eq!(138736, size); +} + +#[test] +fn test_roaring_size() { + #[path = "roaring/mod.rs"] + mod roaring; + + let xid_start_bitmap = roaring::xid_start_bitmap(); + let xid_continue_bitmap = roaring::xid_continue_bitmap(); + let size = xid_start_bitmap.serialized_size() + xid_continue_bitmap.serialized_size(); + assert_eq!(66104, size); +} diff --git a/vendor/unicode-ident/tests/tables/mod.rs b/vendor/unicode-ident/tests/tables/mod.rs new file mode 100644 index 0000000..72bfd8b --- /dev/null +++ b/vendor/unicode-ident/tests/tables/mod.rs @@ -0,0 +1,7 @@ +#![allow(clippy::module_inception)] + +#[allow(clippy::redundant_static_lifetimes)] +#[rustfmt::skip] +mod tables; + +pub(crate) use self::tables::*; diff --git a/vendor/unicode-ident/tests/tables/tables.rs b/vendor/unicode-ident/tests/tables/tables.rs new file mode 100644 index 0000000..ba7b061 --- /dev/null +++ b/vendor/unicode-ident/tests/tables/tables.rs @@ -0,0 +1,347 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate property-bool UCD --include XID_Start,XID_Continue +// +// Unicode version: 15.1.0. +// +// ucd-generate 0.3.0 is available on crates.io. + +pub const BY_NAME: &'static [(&'static str, &'static [(u32, u32)])] = &[ + ("XID_Continue", XID_CONTINUE), ("XID_Start", XID_START), +]; + +pub const XID_CONTINUE: &'static [(u32, u32)] = &[ + (48, 57), (65, 90), (95, 95), (97, 122), (170, 170), (181, 181), (183, 183), + (186, 186), (192, 214), (216, 246), (248, 705), (710, 721), (736, 740), + (748, 748), (750, 750), (768, 884), (886, 887), (891, 893), (895, 895), + (902, 906), (908, 908), (910, 929), (931, 1013), (1015, 1153), (1155, 1159), + (1162, 1327), (1329, 1366), (1369, 1369), (1376, 1416), (1425, 1469), + (1471, 1471), (1473, 1474), (1476, 1477), (1479, 1479), (1488, 1514), + (1519, 1522), (1552, 1562), (1568, 1641), (1646, 1747), (1749, 1756), + (1759, 1768), (1770, 1788), (1791, 1791), (1808, 1866), (1869, 1969), + (1984, 2037), (2042, 2042), (2045, 2045), (2048, 2093), (2112, 2139), + (2144, 2154), (2160, 2183), (2185, 2190), (2200, 2273), (2275, 2403), + (2406, 2415), (2417, 2435), (2437, 2444), (2447, 2448), (2451, 2472), + (2474, 2480), (2482, 2482), (2486, 2489), (2492, 2500), (2503, 2504), + (2507, 2510), (2519, 2519), (2524, 2525), (2527, 2531), (2534, 2545), + (2556, 2556), (2558, 2558), (2561, 2563), (2565, 2570), (2575, 2576), + (2579, 2600), (2602, 2608), (2610, 2611), (2613, 2614), (2616, 2617), + (2620, 2620), (2622, 2626), (2631, 2632), (2635, 2637), (2641, 2641), + (2649, 2652), (2654, 2654), (2662, 2677), (2689, 2691), (2693, 2701), + (2703, 2705), (2707, 2728), (2730, 2736), (2738, 2739), (2741, 2745), + (2748, 2757), (2759, 2761), (2763, 2765), (2768, 2768), (2784, 2787), + (2790, 2799), (2809, 2815), (2817, 2819), (2821, 2828), (2831, 2832), + (2835, 2856), (2858, 2864), (2866, 2867), (2869, 2873), (2876, 2884), + (2887, 2888), (2891, 2893), (2901, 2903), (2908, 2909), (2911, 2915), + (2918, 2927), (2929, 2929), (2946, 2947), (2949, 2954), (2958, 2960), + (2962, 2965), (2969, 2970), (2972, 2972), (2974, 2975), (2979, 2980), + (2984, 2986), (2990, 3001), (3006, 3010), (3014, 3016), (3018, 3021), + (3024, 3024), (3031, 3031), (3046, 3055), (3072, 3084), (3086, 3088), + (3090, 3112), (3114, 3129), (3132, 3140), (3142, 3144), (3146, 3149), + (3157, 3158), (3160, 3162), (3165, 3165), (3168, 3171), (3174, 3183), + (3200, 3203), (3205, 3212), (3214, 3216), (3218, 3240), (3242, 3251), + (3253, 3257), (3260, 3268), (3270, 3272), (3274, 3277), (3285, 3286), + (3293, 3294), (3296, 3299), (3302, 3311), (3313, 3315), (3328, 3340), + (3342, 3344), (3346, 3396), (3398, 3400), (3402, 3406), (3412, 3415), + (3423, 3427), (3430, 3439), (3450, 3455), (3457, 3459), (3461, 3478), + (3482, 3505), (3507, 3515), (3517, 3517), (3520, 3526), (3530, 3530), + (3535, 3540), (3542, 3542), (3544, 3551), (3558, 3567), (3570, 3571), + (3585, 3642), (3648, 3662), (3664, 3673), (3713, 3714), (3716, 3716), + (3718, 3722), (3724, 3747), (3749, 3749), (3751, 3773), (3776, 3780), + (3782, 3782), (3784, 3790), (3792, 3801), (3804, 3807), (3840, 3840), + (3864, 3865), (3872, 3881), (3893, 3893), (3895, 3895), (3897, 3897), + (3902, 3911), (3913, 3948), (3953, 3972), (3974, 3991), (3993, 4028), + (4038, 4038), (4096, 4169), (4176, 4253), (4256, 4293), (4295, 4295), + (4301, 4301), (4304, 4346), (4348, 4680), (4682, 4685), (4688, 4694), + (4696, 4696), (4698, 4701), (4704, 4744), (4746, 4749), (4752, 4784), + (4786, 4789), (4792, 4798), (4800, 4800), (4802, 4805), (4808, 4822), + (4824, 4880), (4882, 4885), (4888, 4954), (4957, 4959), (4969, 4977), + (4992, 5007), (5024, 5109), (5112, 5117), (5121, 5740), (5743, 5759), + (5761, 5786), (5792, 5866), (5870, 5880), (5888, 5909), (5919, 5940), + (5952, 5971), (5984, 5996), (5998, 6000), (6002, 6003), (6016, 6099), + (6103, 6103), (6108, 6109), (6112, 6121), (6155, 6157), (6159, 6169), + (6176, 6264), (6272, 6314), (6320, 6389), (6400, 6430), (6432, 6443), + (6448, 6459), (6470, 6509), (6512, 6516), (6528, 6571), (6576, 6601), + (6608, 6618), (6656, 6683), (6688, 6750), (6752, 6780), (6783, 6793), + (6800, 6809), (6823, 6823), (6832, 6845), (6847, 6862), (6912, 6988), + (6992, 7001), (7019, 7027), (7040, 7155), (7168, 7223), (7232, 7241), + (7245, 7293), (7296, 7304), (7312, 7354), (7357, 7359), (7376, 7378), + (7380, 7418), (7424, 7957), (7960, 7965), (7968, 8005), (8008, 8013), + (8016, 8023), (8025, 8025), (8027, 8027), (8029, 8029), (8031, 8061), + (8064, 8116), (8118, 8124), (8126, 8126), (8130, 8132), (8134, 8140), + (8144, 8147), (8150, 8155), (8160, 8172), (8178, 8180), (8182, 8188), + (8204, 8205), (8255, 8256), (8276, 8276), (8305, 8305), (8319, 8319), + (8336, 8348), (8400, 8412), (8417, 8417), (8421, 8432), (8450, 8450), + (8455, 8455), (8458, 8467), (8469, 8469), (8472, 8477), (8484, 8484), + (8486, 8486), (8488, 8488), (8490, 8505), (8508, 8511), (8517, 8521), + (8526, 8526), (8544, 8584), (11264, 11492), (11499, 11507), (11520, 11557), + (11559, 11559), (11565, 11565), (11568, 11623), (11631, 11631), + (11647, 11670), (11680, 11686), (11688, 11694), (11696, 11702), + (11704, 11710), (11712, 11718), (11720, 11726), (11728, 11734), + (11736, 11742), (11744, 11775), (12293, 12295), (12321, 12335), + (12337, 12341), (12344, 12348), (12353, 12438), (12441, 12442), + (12445, 12447), (12449, 12543), (12549, 12591), (12593, 12686), + (12704, 12735), (12784, 12799), (13312, 19903), (19968, 42124), + (42192, 42237), (42240, 42508), (42512, 42539), (42560, 42607), + (42612, 42621), (42623, 42737), (42775, 42783), (42786, 42888), + (42891, 42954), (42960, 42961), (42963, 42963), (42965, 42969), + (42994, 43047), (43052, 43052), (43072, 43123), (43136, 43205), + (43216, 43225), (43232, 43255), (43259, 43259), (43261, 43309), + (43312, 43347), (43360, 43388), (43392, 43456), (43471, 43481), + (43488, 43518), (43520, 43574), (43584, 43597), (43600, 43609), + (43616, 43638), (43642, 43714), (43739, 43741), (43744, 43759), + (43762, 43766), (43777, 43782), (43785, 43790), (43793, 43798), + (43808, 43814), (43816, 43822), (43824, 43866), (43868, 43881), + (43888, 44010), (44012, 44013), (44016, 44025), (44032, 55203), + (55216, 55238), (55243, 55291), (63744, 64109), (64112, 64217), + (64256, 64262), (64275, 64279), (64285, 64296), (64298, 64310), + (64312, 64316), (64318, 64318), (64320, 64321), (64323, 64324), + (64326, 64433), (64467, 64605), (64612, 64829), (64848, 64911), + (64914, 64967), (65008, 65017), (65024, 65039), (65056, 65071), + (65075, 65076), (65101, 65103), (65137, 65137), (65139, 65139), + (65143, 65143), (65145, 65145), (65147, 65147), (65149, 65149), + (65151, 65276), (65296, 65305), (65313, 65338), (65343, 65343), + (65345, 65370), (65381, 65470), (65474, 65479), (65482, 65487), + (65490, 65495), (65498, 65500), (65536, 65547), (65549, 65574), + (65576, 65594), (65596, 65597), (65599, 65613), (65616, 65629), + (65664, 65786), (65856, 65908), (66045, 66045), (66176, 66204), + (66208, 66256), (66272, 66272), (66304, 66335), (66349, 66378), + (66384, 66426), (66432, 66461), (66464, 66499), (66504, 66511), + (66513, 66517), (66560, 66717), (66720, 66729), (66736, 66771), + (66776, 66811), (66816, 66855), (66864, 66915), (66928, 66938), + (66940, 66954), (66956, 66962), (66964, 66965), (66967, 66977), + (66979, 66993), (66995, 67001), (67003, 67004), (67072, 67382), + (67392, 67413), (67424, 67431), (67456, 67461), (67463, 67504), + (67506, 67514), (67584, 67589), (67592, 67592), (67594, 67637), + (67639, 67640), (67644, 67644), (67647, 67669), (67680, 67702), + (67712, 67742), (67808, 67826), (67828, 67829), (67840, 67861), + (67872, 67897), (67968, 68023), (68030, 68031), (68096, 68099), + (68101, 68102), (68108, 68115), (68117, 68119), (68121, 68149), + (68152, 68154), (68159, 68159), (68192, 68220), (68224, 68252), + (68288, 68295), (68297, 68326), (68352, 68405), (68416, 68437), + (68448, 68466), (68480, 68497), (68608, 68680), (68736, 68786), + (68800, 68850), (68864, 68903), (68912, 68921), (69248, 69289), + (69291, 69292), (69296, 69297), (69373, 69404), (69415, 69415), + (69424, 69456), (69488, 69509), (69552, 69572), (69600, 69622), + (69632, 69702), (69734, 69749), (69759, 69818), (69826, 69826), + (69840, 69864), (69872, 69881), (69888, 69940), (69942, 69951), + (69956, 69959), (69968, 70003), (70006, 70006), (70016, 70084), + (70089, 70092), (70094, 70106), (70108, 70108), (70144, 70161), + (70163, 70199), (70206, 70209), (70272, 70278), (70280, 70280), + (70282, 70285), (70287, 70301), (70303, 70312), (70320, 70378), + (70384, 70393), (70400, 70403), (70405, 70412), (70415, 70416), + (70419, 70440), (70442, 70448), (70450, 70451), (70453, 70457), + (70459, 70468), (70471, 70472), (70475, 70477), (70480, 70480), + (70487, 70487), (70493, 70499), (70502, 70508), (70512, 70516), + (70656, 70730), (70736, 70745), (70750, 70753), (70784, 70853), + (70855, 70855), (70864, 70873), (71040, 71093), (71096, 71104), + (71128, 71133), (71168, 71232), (71236, 71236), (71248, 71257), + (71296, 71352), (71360, 71369), (71424, 71450), (71453, 71467), + (71472, 71481), (71488, 71494), (71680, 71738), (71840, 71913), + (71935, 71942), (71945, 71945), (71948, 71955), (71957, 71958), + (71960, 71989), (71991, 71992), (71995, 72003), (72016, 72025), + (72096, 72103), (72106, 72151), (72154, 72161), (72163, 72164), + (72192, 72254), (72263, 72263), (72272, 72345), (72349, 72349), + (72368, 72440), (72704, 72712), (72714, 72758), (72760, 72768), + (72784, 72793), (72818, 72847), (72850, 72871), (72873, 72886), + (72960, 72966), (72968, 72969), (72971, 73014), (73018, 73018), + (73020, 73021), (73023, 73031), (73040, 73049), (73056, 73061), + (73063, 73064), (73066, 73102), (73104, 73105), (73107, 73112), + (73120, 73129), (73440, 73462), (73472, 73488), (73490, 73530), + (73534, 73538), (73552, 73561), (73648, 73648), (73728, 74649), + (74752, 74862), (74880, 75075), (77712, 77808), (77824, 78895), + (78912, 78933), (82944, 83526), (92160, 92728), (92736, 92766), + (92768, 92777), (92784, 92862), (92864, 92873), (92880, 92909), + (92912, 92916), (92928, 92982), (92992, 92995), (93008, 93017), + (93027, 93047), (93053, 93071), (93760, 93823), (93952, 94026), + (94031, 94087), (94095, 94111), (94176, 94177), (94179, 94180), + (94192, 94193), (94208, 100343), (100352, 101589), (101632, 101640), + (110576, 110579), (110581, 110587), (110589, 110590), (110592, 110882), + (110898, 110898), (110928, 110930), (110933, 110933), (110948, 110951), + (110960, 111355), (113664, 113770), (113776, 113788), (113792, 113800), + (113808, 113817), (113821, 113822), (118528, 118573), (118576, 118598), + (119141, 119145), (119149, 119154), (119163, 119170), (119173, 119179), + (119210, 119213), (119362, 119364), (119808, 119892), (119894, 119964), + (119966, 119967), (119970, 119970), (119973, 119974), (119977, 119980), + (119982, 119993), (119995, 119995), (119997, 120003), (120005, 120069), + (120071, 120074), (120077, 120084), (120086, 120092), (120094, 120121), + (120123, 120126), (120128, 120132), (120134, 120134), (120138, 120144), + (120146, 120485), (120488, 120512), (120514, 120538), (120540, 120570), + (120572, 120596), (120598, 120628), (120630, 120654), (120656, 120686), + (120688, 120712), (120714, 120744), (120746, 120770), (120772, 120779), + (120782, 120831), (121344, 121398), (121403, 121452), (121461, 121461), + (121476, 121476), (121499, 121503), (121505, 121519), (122624, 122654), + (122661, 122666), (122880, 122886), (122888, 122904), (122907, 122913), + (122915, 122916), (122918, 122922), (122928, 122989), (123023, 123023), + (123136, 123180), (123184, 123197), (123200, 123209), (123214, 123214), + (123536, 123566), (123584, 123641), (124112, 124153), (124896, 124902), + (124904, 124907), (124909, 124910), (124912, 124926), (124928, 125124), + (125136, 125142), (125184, 125259), (125264, 125273), (126464, 126467), + (126469, 126495), (126497, 126498), (126500, 126500), (126503, 126503), + (126505, 126514), (126516, 126519), (126521, 126521), (126523, 126523), + (126530, 126530), (126535, 126535), (126537, 126537), (126539, 126539), + (126541, 126543), (126545, 126546), (126548, 126548), (126551, 126551), + (126553, 126553), (126555, 126555), (126557, 126557), (126559, 126559), + (126561, 126562), (126564, 126564), (126567, 126570), (126572, 126578), + (126580, 126583), (126585, 126588), (126590, 126590), (126592, 126601), + (126603, 126619), (126625, 126627), (126629, 126633), (126635, 126651), + (130032, 130041), (131072, 173791), (173824, 177977), (177984, 178205), + (178208, 183969), (183984, 191456), (191472, 192093), (194560, 195101), + (196608, 201546), (201552, 205743), (917760, 917999), +]; + +pub const XID_START: &'static [(u32, u32)] = &[ + (65, 90), (97, 122), (170, 170), (181, 181), (186, 186), (192, 214), + (216, 246), (248, 705), (710, 721), (736, 740), (748, 748), (750, 750), + (880, 884), (886, 887), (891, 893), (895, 895), (902, 902), (904, 906), + (908, 908), (910, 929), (931, 1013), (1015, 1153), (1162, 1327), + (1329, 1366), (1369, 1369), (1376, 1416), (1488, 1514), (1519, 1522), + (1568, 1610), (1646, 1647), (1649, 1747), (1749, 1749), (1765, 1766), + (1774, 1775), (1786, 1788), (1791, 1791), (1808, 1808), (1810, 1839), + (1869, 1957), (1969, 1969), (1994, 2026), (2036, 2037), (2042, 2042), + (2048, 2069), (2074, 2074), (2084, 2084), (2088, 2088), (2112, 2136), + (2144, 2154), (2160, 2183), (2185, 2190), (2208, 2249), (2308, 2361), + (2365, 2365), (2384, 2384), (2392, 2401), (2417, 2432), (2437, 2444), + (2447, 2448), (2451, 2472), (2474, 2480), (2482, 2482), (2486, 2489), + (2493, 2493), (2510, 2510), (2524, 2525), (2527, 2529), (2544, 2545), + (2556, 2556), (2565, 2570), (2575, 2576), (2579, 2600), (2602, 2608), + (2610, 2611), (2613, 2614), (2616, 2617), (2649, 2652), (2654, 2654), + (2674, 2676), (2693, 2701), (2703, 2705), (2707, 2728), (2730, 2736), + (2738, 2739), (2741, 2745), (2749, 2749), (2768, 2768), (2784, 2785), + (2809, 2809), (2821, 2828), (2831, 2832), (2835, 2856), (2858, 2864), + (2866, 2867), (2869, 2873), (2877, 2877), (2908, 2909), (2911, 2913), + (2929, 2929), (2947, 2947), (2949, 2954), (2958, 2960), (2962, 2965), + (2969, 2970), (2972, 2972), (2974, 2975), (2979, 2980), (2984, 2986), + (2990, 3001), (3024, 3024), (3077, 3084), (3086, 3088), (3090, 3112), + (3114, 3129), (3133, 3133), (3160, 3162), (3165, 3165), (3168, 3169), + (3200, 3200), (3205, 3212), (3214, 3216), (3218, 3240), (3242, 3251), + (3253, 3257), (3261, 3261), (3293, 3294), (3296, 3297), (3313, 3314), + (3332, 3340), (3342, 3344), (3346, 3386), (3389, 3389), (3406, 3406), + (3412, 3414), (3423, 3425), (3450, 3455), (3461, 3478), (3482, 3505), + (3507, 3515), (3517, 3517), (3520, 3526), (3585, 3632), (3634, 3634), + (3648, 3654), (3713, 3714), (3716, 3716), (3718, 3722), (3724, 3747), + (3749, 3749), (3751, 3760), (3762, 3762), (3773, 3773), (3776, 3780), + (3782, 3782), (3804, 3807), (3840, 3840), (3904, 3911), (3913, 3948), + (3976, 3980), (4096, 4138), (4159, 4159), (4176, 4181), (4186, 4189), + (4193, 4193), (4197, 4198), (4206, 4208), (4213, 4225), (4238, 4238), + (4256, 4293), (4295, 4295), (4301, 4301), (4304, 4346), (4348, 4680), + (4682, 4685), (4688, 4694), (4696, 4696), (4698, 4701), (4704, 4744), + (4746, 4749), (4752, 4784), (4786, 4789), (4792, 4798), (4800, 4800), + (4802, 4805), (4808, 4822), (4824, 4880), (4882, 4885), (4888, 4954), + (4992, 5007), (5024, 5109), (5112, 5117), (5121, 5740), (5743, 5759), + (5761, 5786), (5792, 5866), (5870, 5880), (5888, 5905), (5919, 5937), + (5952, 5969), (5984, 5996), (5998, 6000), (6016, 6067), (6103, 6103), + (6108, 6108), (6176, 6264), (6272, 6312), (6314, 6314), (6320, 6389), + (6400, 6430), (6480, 6509), (6512, 6516), (6528, 6571), (6576, 6601), + (6656, 6678), (6688, 6740), (6823, 6823), (6917, 6963), (6981, 6988), + (7043, 7072), (7086, 7087), (7098, 7141), (7168, 7203), (7245, 7247), + (7258, 7293), (7296, 7304), (7312, 7354), (7357, 7359), (7401, 7404), + (7406, 7411), (7413, 7414), (7418, 7418), (7424, 7615), (7680, 7957), + (7960, 7965), (7968, 8005), (8008, 8013), (8016, 8023), (8025, 8025), + (8027, 8027), (8029, 8029), (8031, 8061), (8064, 8116), (8118, 8124), + (8126, 8126), (8130, 8132), (8134, 8140), (8144, 8147), (8150, 8155), + (8160, 8172), (8178, 8180), (8182, 8188), (8305, 8305), (8319, 8319), + (8336, 8348), (8450, 8450), (8455, 8455), (8458, 8467), (8469, 8469), + (8472, 8477), (8484, 8484), (8486, 8486), (8488, 8488), (8490, 8505), + (8508, 8511), (8517, 8521), (8526, 8526), (8544, 8584), (11264, 11492), + (11499, 11502), (11506, 11507), (11520, 11557), (11559, 11559), + (11565, 11565), (11568, 11623), (11631, 11631), (11648, 11670), + (11680, 11686), (11688, 11694), (11696, 11702), (11704, 11710), + (11712, 11718), (11720, 11726), (11728, 11734), (11736, 11742), + (12293, 12295), (12321, 12329), (12337, 12341), (12344, 12348), + (12353, 12438), (12445, 12447), (12449, 12538), (12540, 12543), + (12549, 12591), (12593, 12686), (12704, 12735), (12784, 12799), + (13312, 19903), (19968, 42124), (42192, 42237), (42240, 42508), + (42512, 42527), (42538, 42539), (42560, 42606), (42623, 42653), + (42656, 42735), (42775, 42783), (42786, 42888), (42891, 42954), + (42960, 42961), (42963, 42963), (42965, 42969), (42994, 43009), + (43011, 43013), (43015, 43018), (43020, 43042), (43072, 43123), + (43138, 43187), (43250, 43255), (43259, 43259), (43261, 43262), + (43274, 43301), (43312, 43334), (43360, 43388), (43396, 43442), + (43471, 43471), (43488, 43492), (43494, 43503), (43514, 43518), + (43520, 43560), (43584, 43586), (43588, 43595), (43616, 43638), + (43642, 43642), (43646, 43695), (43697, 43697), (43701, 43702), + (43705, 43709), (43712, 43712), (43714, 43714), (43739, 43741), + (43744, 43754), (43762, 43764), (43777, 43782), (43785, 43790), + (43793, 43798), (43808, 43814), (43816, 43822), (43824, 43866), + (43868, 43881), (43888, 44002), (44032, 55203), (55216, 55238), + (55243, 55291), (63744, 64109), (64112, 64217), (64256, 64262), + (64275, 64279), (64285, 64285), (64287, 64296), (64298, 64310), + (64312, 64316), (64318, 64318), (64320, 64321), (64323, 64324), + (64326, 64433), (64467, 64605), (64612, 64829), (64848, 64911), + (64914, 64967), (65008, 65017), (65137, 65137), (65139, 65139), + (65143, 65143), (65145, 65145), (65147, 65147), (65149, 65149), + (65151, 65276), (65313, 65338), (65345, 65370), (65382, 65437), + (65440, 65470), (65474, 65479), (65482, 65487), (65490, 65495), + (65498, 65500), (65536, 65547), (65549, 65574), (65576, 65594), + (65596, 65597), (65599, 65613), (65616, 65629), (65664, 65786), + (65856, 65908), (66176, 66204), (66208, 66256), (66304, 66335), + (66349, 66378), (66384, 66421), (66432, 66461), (66464, 66499), + (66504, 66511), (66513, 66517), (66560, 66717), (66736, 66771), + (66776, 66811), (66816, 66855), (66864, 66915), (66928, 66938), + (66940, 66954), (66956, 66962), (66964, 66965), (66967, 66977), + (66979, 66993), (66995, 67001), (67003, 67004), (67072, 67382), + (67392, 67413), (67424, 67431), (67456, 67461), (67463, 67504), + (67506, 67514), (67584, 67589), (67592, 67592), (67594, 67637), + (67639, 67640), (67644, 67644), (67647, 67669), (67680, 67702), + (67712, 67742), (67808, 67826), (67828, 67829), (67840, 67861), + (67872, 67897), (67968, 68023), (68030, 68031), (68096, 68096), + (68112, 68115), (68117, 68119), (68121, 68149), (68192, 68220), + (68224, 68252), (68288, 68295), (68297, 68324), (68352, 68405), + (68416, 68437), (68448, 68466), (68480, 68497), (68608, 68680), + (68736, 68786), (68800, 68850), (68864, 68899), (69248, 69289), + (69296, 69297), (69376, 69404), (69415, 69415), (69424, 69445), + (69488, 69505), (69552, 69572), (69600, 69622), (69635, 69687), + (69745, 69746), (69749, 69749), (69763, 69807), (69840, 69864), + (69891, 69926), (69956, 69956), (69959, 69959), (69968, 70002), + (70006, 70006), (70019, 70066), (70081, 70084), (70106, 70106), + (70108, 70108), (70144, 70161), (70163, 70187), (70207, 70208), + (70272, 70278), (70280, 70280), (70282, 70285), (70287, 70301), + (70303, 70312), (70320, 70366), (70405, 70412), (70415, 70416), + (70419, 70440), (70442, 70448), (70450, 70451), (70453, 70457), + (70461, 70461), (70480, 70480), (70493, 70497), (70656, 70708), + (70727, 70730), (70751, 70753), (70784, 70831), (70852, 70853), + (70855, 70855), (71040, 71086), (71128, 71131), (71168, 71215), + (71236, 71236), (71296, 71338), (71352, 71352), (71424, 71450), + (71488, 71494), (71680, 71723), (71840, 71903), (71935, 71942), + (71945, 71945), (71948, 71955), (71957, 71958), (71960, 71983), + (71999, 71999), (72001, 72001), (72096, 72103), (72106, 72144), + (72161, 72161), (72163, 72163), (72192, 72192), (72203, 72242), + (72250, 72250), (72272, 72272), (72284, 72329), (72349, 72349), + (72368, 72440), (72704, 72712), (72714, 72750), (72768, 72768), + (72818, 72847), (72960, 72966), (72968, 72969), (72971, 73008), + (73030, 73030), (73056, 73061), (73063, 73064), (73066, 73097), + (73112, 73112), (73440, 73458), (73474, 73474), (73476, 73488), + (73490, 73523), (73648, 73648), (73728, 74649), (74752, 74862), + (74880, 75075), (77712, 77808), (77824, 78895), (78913, 78918), + (82944, 83526), (92160, 92728), (92736, 92766), (92784, 92862), + (92880, 92909), (92928, 92975), (92992, 92995), (93027, 93047), + (93053, 93071), (93760, 93823), (93952, 94026), (94032, 94032), + (94099, 94111), (94176, 94177), (94179, 94179), (94208, 100343), + (100352, 101589), (101632, 101640), (110576, 110579), (110581, 110587), + (110589, 110590), (110592, 110882), (110898, 110898), (110928, 110930), + (110933, 110933), (110948, 110951), (110960, 111355), (113664, 113770), + (113776, 113788), (113792, 113800), (113808, 113817), (119808, 119892), + (119894, 119964), (119966, 119967), (119970, 119970), (119973, 119974), + (119977, 119980), (119982, 119993), (119995, 119995), (119997, 120003), + (120005, 120069), (120071, 120074), (120077, 120084), (120086, 120092), + (120094, 120121), (120123, 120126), (120128, 120132), (120134, 120134), + (120138, 120144), (120146, 120485), (120488, 120512), (120514, 120538), + (120540, 120570), (120572, 120596), (120598, 120628), (120630, 120654), + (120656, 120686), (120688, 120712), (120714, 120744), (120746, 120770), + (120772, 120779), (122624, 122654), (122661, 122666), (122928, 122989), + (123136, 123180), (123191, 123197), (123214, 123214), (123536, 123565), + (123584, 123627), (124112, 124139), (124896, 124902), (124904, 124907), + (124909, 124910), (124912, 124926), (124928, 125124), (125184, 125251), + (125259, 125259), (126464, 126467), (126469, 126495), (126497, 126498), + (126500, 126500), (126503, 126503), (126505, 126514), (126516, 126519), + (126521, 126521), (126523, 126523), (126530, 126530), (126535, 126535), + (126537, 126537), (126539, 126539), (126541, 126543), (126545, 126546), + (126548, 126548), (126551, 126551), (126553, 126553), (126555, 126555), + (126557, 126557), (126559, 126559), (126561, 126562), (126564, 126564), + (126567, 126570), (126572, 126578), (126580, 126583), (126585, 126588), + (126590, 126590), (126592, 126601), (126603, 126619), (126625, 126627), + (126629, 126633), (126635, 126651), (131072, 173791), (173824, 177977), + (177984, 178205), (178208, 183969), (183984, 191456), (191472, 192093), + (194560, 195101), (196608, 201546), (201552, 205743), +]; diff --git a/vendor/unicode-ident/tests/trie/mod.rs b/vendor/unicode-ident/tests/trie/mod.rs new file mode 100644 index 0000000..3e31c5c --- /dev/null +++ b/vendor/unicode-ident/tests/trie/mod.rs @@ -0,0 +1,7 @@ +#![allow(clippy::module_inception)] + +#[allow(dead_code, clippy::redundant_static_lifetimes, clippy::unreadable_literal)] +#[rustfmt::skip] +mod trie; + +pub(crate) use self::trie::*; diff --git a/vendor/unicode-ident/tests/trie/trie.rs b/vendor/unicode-ident/tests/trie/trie.rs new file mode 100644 index 0000000..fc805f4 --- /dev/null +++ b/vendor/unicode-ident/tests/trie/trie.rs @@ -0,0 +1,445 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set +// +// Unicode version: 15.1.0. +// +// ucd-generate 0.3.0 is available on crates.io. + +pub const BY_NAME: &'static [(&'static str, &'static ::ucd_trie::TrieSet)] = &[ + ("XID_Continue", XID_CONTINUE), ("XID_Start", XID_START), +]; + +pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { + tree1_level1: &[ + 0x3FF000000000000, 0x7FFFFFE87FFFFFE, 0x4A0040000000000, + 0xFF7FFFFFFF7FFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x501F0003FFC3, + 0xFFFFFFFFFFFFFFFF, 0xB8DFFFFFFFFFFFFF, 0xFFFFFFFBFFFFD7C0, + 0xFFBFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFCFB, 0xFFFFFFFFFFFFFFFF, 0xFFFEFFFFFFFFFFFF, + 0xFFFFFFFF027FFFFF, 0xBFFFFFFFFFFE01FF, 0x787FFFFFF00B6, + 0xFFFFFFFF07FF0000, 0xFFFFC3FFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0x9FFFFDFF9FEFFFFF, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFE7FF, + 0x3FFFFFFFFFFFF, 0x243FFFFFFFFFFFFF, + ], + tree2_level1: &[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 4, 32, 33, 34, 4, 4, 4, 4, 4, + 35, 36, 37, 38, 39, 40, 41, 42, 4, 4, 4, 4, 4, 4, 4, 4, 43, 44, 45, 46, + 47, 4, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 4, 61, 4, 62, + 63, 64, 65, 66, 4, 4, 4, 4, 4, 4, 4, 4, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 4, 4, 4, 79, 80, 81, 82, 83, 78, 78, 78, + 78, 78, 78, 78, 78, 84, 42, 85, 4, 86, 4, 87, 88, 78, 78, 78, 78, 78, 78, + 78, 78, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 78, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 89, 90, 4, 4, 4, 4, 91, 92, 4, 93, 94, 4, 95, 96, 97, 62, 4, + 98, 99, 100, 4, 101, 102, 103, 4, 104, 105, 106, 4, 107, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 108, 109, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 4, 4, 4, 4, 4, 99, 4, 110, 111, 112, 93, 113, 4, 114, 4, + 4, 115, 116, 117, 118, 119, 120, 4, 121, 122, 123, 124, 125, + ], + tree2_level2: &[ + 0x3FFFFFFFFFFF, 0xFFFF07FF0FFFFFFF, 0xFFFFFFFFFF007EFF, + 0xFFFFFFFBFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFEFFCFFFFFFFFF, + 0xF3C5FDFFFFF99FEF, 0x5003FFCFB080799F, 0xD36DFDFFFFF987EE, + 0x3FFFC05E023987, 0xF3EDFDFFFFFBBFEE, 0xFE00FFCF00013BBF, + 0xF3EDFDFFFFF99FEE, 0x2FFCFB0E0399F, 0xC3FFC718D63DC7EC, 0xFFC000813DC7, + 0xF3FFFDFFFFFDDFFF, 0xFFCF27603DDF, 0xF3EFFDFFFFFDDFEF, 0xEFFCF60603DDF, + 0xFFFFFFFFFFFDDFFF, 0xFC00FFCF80F07DDF, 0x2FFBFFFFFC7FFFEE, + 0xCFFC0FF5F847F, 0x7FFFFFFFFFFFFFE, 0x3FF7FFF, 0x3FFFFFAFFFFFF7D6, + 0xF3FF7F5F, 0xC2A003FF03000001, 0xFFFE1FFFFFFFFEFF, 0x1FFFFFFFFEFFFFDF, + 0x40, 0xFFFFFFFFFFFF03FF, 0xFFFFFFFF3FFFFFFF, 0xF7FFFFFFFFFF20BF, + 0xFFFFFFFF3D7F3DFF, 0x7F3DFFFFFFFF3DFF, 0xFFFFFFFFFF7FFF3D, + 0xFFFFFFFFFF3DFFFF, 0x3FE00E7FFFFFF, 0xFFFFFFFF0000FFFF, + 0x3F3FFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE, 0xFFFF9FFFFFFFFFFF, + 0xFFFFFFFF07FFFFFE, 0x1FFC7FFFFFFFFFF, 0x1FFFFF803FFFFF, 0xDDFFF000FFFFF, + 0x3FF308FFFFF, 0xFFFFFFFF03FFB800, 0x1FFFFFFFFFFFFFF, 0xFFFF07FFFFFFFFFF, + 0x3FFFFFFFFFFFFF, 0xFFF0FFF7FFFFFFF, 0x1F3FFFFFFFFFC0, 0xFFFF0FFFFFFFFFFF, + 0x7FF03FF, 0xFFFFFFFF0FFFFFFF, 0x9FFFFFFF7FFFFFFF, 0xBFFF008003FF03FF, + 0x7FFF, 0xFF80003FF1FFF, 0xFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFF, + 0x3FFFFFFFFFFFE3FF, 0xE7FFFFFFFFFF01FF, 0x7FFFFFFFFF70000, + 0xFFFFFFFF3F3FFFFF, 0x3FFFFFFFAAFF3F3F, 0x5FDFFFFFFFFFFFFF, + 0x1FDC1FFF0FCF1FDC, 0x8000000000003000, 0x8002000000100001, 0x1FFF0000, + 0x1FFE21FFF0000, 0xF3FFFD503F2FFC84, 0xFFFFFFFF000043E0, 0x1FF, 0, + 0xFF81FFFFFFFFF, 0xFFFF20BFFFFFFFFF, 0x800080FFFFFFFFFF, + 0x7F7F7F7F007FFFFF, 0xFFFFFFFF7F7F7F7F, 0x1F3EFFFE000000E0, + 0xFFFFFFFEE67FFFFF, 0xFFFEFFFFFFFFFFE0, 0xFFFFFFFF00007FFF, + 0xFFFF000000000000, 0x1FFF, 0x3FFFFFFFFFFF0000, 0xFFFFFFF1FFF, + 0xBFF0FFFFFFFFFFFF, 0x3FFFFFFFFFFFF, 0xFFFFFFFCFF800000, + 0xFFFFFFFFFFFFF9FF, 0xFFFC000003EB07FF, 0x10FFFFFFFFFF, + 0xE8FFFFFF03FF003F, 0xFFFF3FFFFFFFFFFF, 0x1FFFFFFF000FFFFF, + 0x7FFFFFFF03FF8001, 0x7FFFFFFFFFFFFF, 0xFC7FFFFF03FF3FFF, + 0x7CFFFF38000007, 0xFFFF7F7F007E7E7E, 0xFFFF03FFF7FFFFFF, + 0x3FF37FFFFFFFFFF, 0xFFFF000FFFFFFFFF, 0xFFFFFFFFFFFF87F, 0x3FFFFFF, + 0x5F7FFDFFE0F8007F, 0xFFFFFFFFFFFFFFDB, 0xFFFFFFFFFFF80000, + 0xFFFFFFF03FFFFFFF, 0x3FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFF0000, + 0xFFFFFFFFFFFCFFFF, 0x3FF0000000000FF, 0x18FFFF0000FFFF, + 0xAA8A00000000E000, 0x1FFFFFFFFFFFFFFF, 0x87FFFFFE03FF0000, + 0xFFFFFFE007FFFFFE, 0x7FFFFFFFFFFFFFFF, 0x1CFCFCFC, + ], + tree3_level1: &[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 11, 12, 13, 14, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 15, 16, 17, 7, 18, 19, 7, 20, 21, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + ], + tree3_level2: &[ + 0, 1, 2, 3, 4, 5, 4, 6, 4, 4, 7, 8, 9, 10, 11, 12, 2, 2, 13, 14, 15, 16, + 17, 4, 2, 2, 2, 2, 18, 19, 20, 4, 21, 22, 23, 24, 25, 4, 26, 4, 27, 28, + 29, 30, 31, 32, 33, 4, 2, 34, 35, 35, 36, 4, 4, 4, 4, 4, 37, 38, 39, 40, + 41, 42, 2, 43, 3, 44, 45, 46, 2, 47, 48, 49, 50, 51, 52, 53, 4, 4, 2, 54, + 2, 55, 4, 4, 56, 57, 2, 58, 59, 60, 61, 62, 4, 4, 3, 4, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 59, 4, 4, 4, 4, 72, 73, 74, 4, 75, 76, 77, 4, 4, 4, 4, + 78, 79, 80, 81, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 82, 4, 2, 83, + 2, 2, 2, 84, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 85, 86, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 87, 88, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 62, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, + 2, 2, 2, 2, 2, 2, 2, 59, 89, 69, 90, 18, 91, 92, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 2, 4, 4, 2, 93, 94, 95, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 96, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 88, 34, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 97, 2, 2, 2, 2, 98, 99, 2, 2, 2, 2, 2, 100, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 2, 101, 102, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 103, 62, 4, 4, 4, 4, 4, 4, 4, 104, 105, 4, 4, 106, 4, + 4, 4, 4, 4, 4, 2, 107, 108, 109, 110, 111, 2, 2, 2, 2, 112, 113, 114, 115, + 116, 117, 4, 4, 4, 4, 4, 4, 4, 4, 118, 119, 120, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 121, 4, 4, 4, 122, 123, 124, 4, 125, 126, 4, 4, + 4, 4, 127, 128, 4, 4, 4, 4, 4, 4, 4, 129, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 130, 2, 2, 2, 131, 2, 132, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 133, 134, 135, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 136, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 137, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 128, 2, 2, 2, + 11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 138, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 139, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 140, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 140, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 141, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 87, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 87, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + ], + tree3_level3: &[ + 0xB7FFFF7FFFFFEFFF, 0x3FFF3FFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFF, 0, + 0x1FFFFFFFFFFFFF, 0x2000000000000000, 0xFFFFFFFF1FFFFFFF, 0x10001FFFF, + 0xFFFFE000FFFFFFFF, 0x7FFFFFFFFFF07FF, 0xFFFFFFFF3FFFFFFF, 0x3EFF0F, + 0xFFFF03FF3FFFFFFF, 0xFFFFFFFFF0FFFFF, 0xFFFF00FFFFFFFFFF, + 0xF7FF000FFFFFFFFF, 0x1BFBFFFBFFB7F7FF, 0x7FFFFFFFFFFFFF, 0xFF003FFFFF, + 0x7FDFFFFFFFFFFBF, 0x91BFFFFFFFFFFD3F, 0x7FFFFF003FFFFF, 0x7FFFFFFF, + 0x37FFFF00000000, 0x3FFFFFF003FFFFF, 0xC0FFFFFFFFFFFFFF, + 0x873FFFFFFEEFF06F, 0x1FFFFFFF00000000, 0x1FFFFFFF, 0x7FFFFFFEFF, + 0x3FFFFFFFFFFFFF, 0x7FFFF003FFFFF, 0x3FFFF, 0x1FF, 0x7FFFFFFFFFFFF, + 0x3FF00FFFFFFFFFF, 0x31BFFFFFFFFFF, 0xE000000000000000, + 0xFFFF00801FFFFFFF, 0xFFFF00000001FFFF, 0xFFFF00000000003F, + 0x7FFFFF0000001F, 0x803FFFC00000007F, 0x3FF01FFFFFF0004, + 0xFFDFFFFFFFFFFFFF, 0x4FFFFFFFFF00F0, 0x17FFDE1F, 0xC0FFFFFFFFFBFFFF, 0x3, + 0xFFFF01FFBFFFBD7F, 0x3FF07FFFFFFFFFF, 0xFBEDFDFFFFF99FEF, + 0x1F1FCFE081399F, 0x3C3FF07FF, 0x3FF00BF, 0xFF3FFFFFFFFFFFFF, 0x3F000001, + 0x3FF0011, 0x1FFFFFFFFFFFFFF, 0x3FF, 0x3FF0FFFE7FFFFFF, 0x7F, + 0xFFFFFFFF00000000, 0x800003FFFFFFFFFF, 0xF9BFFFFFFF6FF27F, 0x3FF000F, + 0xFFFFFCFF00000000, 0x1BFCFFFFFF, 0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFF0080, + 0xFFFF000023FFFFFF, 0xFF7FFFFFFFFFFDFF, 0xFFFC000003FF0001, + 0x7FFEFFFFFCFFFF, 0xB47FFFFFFFFFFB7F, 0xFFFFFDBF03FF00FF, 0x3FF01FB7FFF, + 0x7FFFFF00000000, 0xC7FFFFFFFFFDFFFF, 0x3FF0007, 0x1000000000000, + 0x3FFFFFF, 0x7FFFFFFFFFFF, 0xF, 0xFFFFFFFFFFFF0000, 0x1FFFFFFFFFFFF, + 0xFFFFFFFFFFFF, 0x3FFFFF, 0xFFFF03FF7FFFFFFF, 0x1F3FFFFFFF03FF, + 0xE0FFFFF803FF000F, 0xFFFF, 0xFFFFFFFFFFFF87FF, 0xFFFF80FF, + 0x3001B00000000, 0xFFFFFFFFFFFFFF, 0x6FEF000000000000, 0x40007FFFFFFFF, + 0xFFFF00F000270000, 0xFFFFFFFFFFFFFFF, 0x1FFF07FFFFFFFFFF, 0x63FF01FF, + 0xFFFF3FFFFFFFFFFF, 0xF807E3E000000000, 0x3C0000000FE7, 0x1C, + 0xFFFFFFFFFFDFFFFF, 0xEBFFDE64DFFFFFFF, 0xFFFFFFFFFFFFFFEF, + 0x7BFFFFFFDFDFE7BF, 0xFFFFFFFFFFFDFC5F, 0xFFFFFF3FFFFFFFFF, + 0xF7FFFFFFF7FFFFFD, 0xFFDFFFFFFFDFFFFF, 0xFFFF7FFFFFFF7FFF, + 0xFFFFFDFFFFFFFDFF, 0xFFFFFFFFFFFFCFF7, 0xF87FFFFFFFFFFFFF, + 0x201FFFFFFFFFFF, 0xFFFEF8000010, 0x7E07FFFFFFF, 0xFFFF07DBF9FFFF7F, + 0x3FFFFFFFFFFF, 0x8000, 0x3FFF1FFFFFFFFFFF, 0x43FF, 0x7FFFFFFF0000, + 0x3FFFFFFFFFFFFFF, 0x3FFFFFFFFFF0000, 0x7FFF6F7F00000000, 0x7F001F, + 0x3FF0FFF, 0xAF7FE96FFFFFFEF, 0x5EF7F796AA96EA84, 0xFFFFBEE0FFFFBFF, + 0x3FF000000000000, 0xFFFFFFFF, 0xFFFF0003FFFFFFFF, 0xFFFF0001FFFFFFFF, + 0x3FFFFFFF, 0xFFFFFFFFFFFF07FF, + ], +}; + +pub const XID_START: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { + tree1_level1: &[ + 0, 0x7FFFFFE07FFFFFE, 0x420040000000000, 0xFF7FFFFFFF7FFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0x501F0003FFC3, 0, 0xB8DF000000000000, + 0xFFFFFFFBFFFFD740, 0xFFBFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFC03, 0xFFFFFFFFFFFFFFFF, + 0xFFFEFFFFFFFFFFFF, 0xFFFFFFFF027FFFFF, 0x1FF, 0x787FFFFFF0000, + 0xFFFFFFFF00000000, 0xFFFEC000000007FF, 0xFFFFFFFFFFFFFFFF, + 0x9C00C060002FFFFF, 0xFFFFFFFD0000, 0xFFFFFFFFFFFFE000, 0x2003FFFFFFFFF, + 0x43007FFFFFFFC00, + ], + tree2_level1: &[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 35, 35, + 35, 35, 36, 37, 38, 39, 40, 41, 42, 43, 35, 35, 35, 35, 35, 35, 35, 35, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 3, 58, 59, 60, 30, + 61, 62, 63, 64, 65, 66, 67, 68, 35, 35, 35, 30, 35, 35, 35, 35, 69, 70, + 71, 72, 30, 73, 74, 30, 75, 76, 77, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 35, 35, 35, 78, + 79, 80, 81, 82, 30, 30, 30, 30, 30, 30, 30, 30, 83, 43, 84, 85, 86, 35, + 87, 88, 30, 30, 30, 30, 30, 30, 30, 30, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 30, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 89, 90, 35, 35, 35, 35, 91, 92, + 93, 94, 95, 35, 96, 97, 98, 49, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 35, 111, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 112, 113, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 35, 35, 35, 35, 35, 114, 35, 115, 116, + 117, 118, 119, 35, 120, 35, 35, 121, 122, 123, 124, 30, 125, 35, 126, 127, + 128, 129, 130, + ], + tree2_level2: &[ + 0x110043FFFFF, 0xFFFF07FF01FFFFFF, 0xFFFFFFFF00007EFF, 0x3FF, + 0x23FFFFFFFFFFFFF0, 0xFFFE0003FF010000, 0x23C5FDFFFFF99FE1, + 0x10030003B0004000, 0x36DFDFFFFF987E0, 0x1C00005E000000, + 0x23EDFDFFFFFBBFE0, 0x200000300010000, 0x23EDFDFFFFF99FE0, + 0x20003B0000000, 0x3FFC718D63DC7E8, 0x10000, 0x23FFFDFFFFFDDFE0, + 0x327000000, 0x23EFFDFFFFFDDFE1, 0x6000360000000, 0x27FFFFFFFFFDDFF0, + 0xFC00000380704000, 0x2FFBFFFFFC7FFFE0, 0x7F, 0x5FFFFFFFFFFFE, + 0x2005FFAFFFFFF7D6, 0xF000005F, 0x1, 0x1FFFFFFFFEFF, 0x1F00, 0, + 0x800007FFFFFFFFFF, 0xFFE1C0623C3F0000, 0xFFFFFFFF00004003, + 0xF7FFFFFFFFFF20BF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF3D7F3DFF, + 0x7F3DFFFFFFFF3DFF, 0xFFFFFFFFFF7FFF3D, 0xFFFFFFFFFF3DFFFF, 0x7FFFFFF, + 0xFFFFFFFF0000FFFF, 0x3F3FFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE, + 0xFFFF9FFFFFFFFFFF, 0xFFFFFFFF07FFFFFE, 0x1FFC7FFFFFFFFFF, + 0x3FFFF8003FFFF, 0x1DFFF0003FFFF, 0xFFFFFFFFFFFFF, 0x10800000, + 0xFFFFFFFF00000000, 0x1FFFFFFFFFFFFFF, 0xFFFF05FFFFFFFFFF, + 0x3FFFFFFFFFFFFF, 0x7FFFFFFF, 0x1F3FFFFFFF0000, 0xFFFF0FFFFFFFFFFF, + 0xFFFFFFFF007FFFFF, 0x1FFFFF, 0x8000000000, 0xFFFFFFFFFFFE0, 0x1FE0, + 0xFC00C001FFFFFFF8, 0x3FFFFFFFFF, 0xFFFFFFFFF, 0x3FFFFFFFFC00E000, + 0xE7FFFFFFFFFF01FF, 0x46FDE0000000000, 0xFFFFFFFF3F3FFFFF, + 0x3FFFFFFFAAFF3F3F, 0x5FDFFFFFFFFFFFFF, 0x1FDC1FFF0FCF1FDC, + 0x8002000000000000, 0x1FFF0000, 0xF3FFFD503F2FFC84, 0xFFFFFFFF000043E0, + 0x1FF, 0xC781FFFFFFFFF, 0xFFFF20BFFFFFFFFF, 0x80FFFFFFFFFF, + 0x7F7F7F7F007FFFFF, 0x7F7F7F7F, 0x1F3E03FE000000E0, 0xFFFFFFFEE07FFFFF, + 0xF7FFFFFFFFFFFFFF, 0xFFFEFFFFFFFFFFE0, 0xFFFFFFFF00007FFF, + 0xFFFF000000000000, 0x1FFF, 0x3FFFFFFFFFFF0000, 0xC00FFFF1FFF, + 0x80007FFFFFFFFFFF, 0xFFFFFFFF3FFFFFFF, 0xFFFFFFFFFFFF, + 0xFFFFFFFCFF800000, 0xFFFFFFFFFFFFF9FF, 0xFFFC000003EB07FF, 0x7FFFFF7BB, + 0xFFFFFFFFFFFFC, 0x68FC000000000000, 0xFFFF003FFFFFFC00, + 0x1FFFFFFF0000007F, 0x7FFFFFFFFFFF0, 0x7C00FFDF00008000, 0x1FFFFFFFFFF, + 0xC47FFFFF00000FF7, 0x3E62FFFFFFFFFFFF, 0x1C07FF38000005, + 0xFFFF7F7F007E7E7E, 0xFFFF03FFF7FFFFFF, 0x7FFFFFFFF, 0xFFFF000FFFFFFFFF, + 0xFFFFFFFFFFFF87F, 0xFFFF3FFFFFFFFFFF, 0x3FFFFFF, 0x5F7FFDFFA0F8007F, + 0xFFFFFFFFFFFFFFDB, 0x3FFFFFFFFFFFF, 0xFFFFFFFFFFF80000, + 0xFFFFFFF03FFFFFFF, 0x3FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFF0000, + 0xFFFFFFFFFFFCFFFF, 0x3FF0000000000FF, 0xAA8A000000000000, + 0x1FFFFFFFFFFFFFFF, 0x7FFFFFE00000000, 0xFFFFFFC007FFFFFE, + 0x7FFFFFFF3FFFFFFF, 0x1CFCFCFC, + ], + tree3_level1: &[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 5, 11, 12, 5, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 13, 14, 15, 7, 16, 17, 7, 18, 19, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + ], + tree3_level2: &[ + 0, 1, 2, 3, 4, 5, 4, 4, 4, 4, 6, 7, 8, 9, 10, 11, 2, 2, 12, 13, 14, 15, + 16, 4, 2, 2, 2, 2, 17, 18, 19, 4, 20, 21, 22, 23, 24, 4, 25, 4, 26, 27, + 28, 29, 30, 31, 32, 4, 2, 33, 34, 34, 35, 4, 4, 4, 4, 4, 36, 4, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 22, 52, 53, 4, 4, 5, + 54, 55, 56, 4, 4, 57, 58, 55, 59, 60, 4, 61, 62, 4, 4, 63, 4, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 4, 4, 4, 4, 74, 75, 76, 4, 77, 78, 79, 4, 4, + 4, 4, 80, 81, 4, 82, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 83, 4, + 2, 57, 2, 2, 2, 84, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 85, 86, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 55, 87, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 62, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 2, 2, 2, 2, 2, 2, 2, 2, 73, 88, 89, 90, 55, 91, 76, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 2, 4, 4, 2, 92, 93, 94, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 95, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 96, 33, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 97, 2, 2, 2, 2, 98, 99, 2, 2, 2, 2, 2, 100, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 2, 101, 102, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 103, 104, 105, 106, 107, + 2, 2, 2, 2, 108, 109, 110, 111, 112, 113, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 114, 4, 4, 4, 115, 116, + 4, 4, 117, 118, 4, 4, 4, 4, 90, 63, 4, 4, 4, 4, 4, 4, 4, 119, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 120, 2, 2, 2, 121, 2, 122, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 123, 124, 125, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 126, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 128, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 129, 2, 2, 2, 2, 2, 2, 2, 2, 2, 130, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 130, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 131, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 55, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, + ], + tree3_level3: &[ + 0xB7FFFF7FFFFFEFFF, 0x3FFF3FFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFF, 0, + 0x1FFFFFFFFFFFFF, 0xFFFFFFFF1FFFFFFF, 0x1FFFF, 0xFFFFE000FFFFFFFF, + 0x3FFFFFFFFF07FF, 0xFFFFFFFF3FFFFFFF, 0x3EFF0F, 0xFFFF00003FFFFFFF, + 0xFFFFFFFFF0FFFFF, 0xFFFF00FFFFFFFFFF, 0xF7FF000FFFFFFFFF, + 0x1BFBFFFBFFB7F7FF, 0x7FFFFFFFFFFFFF, 0xFF003FFFFF, 0x7FDFFFFFFFFFFBF, + 0x91BFFFFFFFFFFD3F, 0x7FFFFF003FFFFF, 0x7FFFFFFF, 0x37FFFF00000000, + 0x3FFFFFF003FFFFF, 0xC0FFFFFFFFFFFFFF, 0x3FFFFFFEEF0001, + 0x1FFFFFFF00000000, 0x1FFFFFFF, 0x1FFFFFFEFF, 0x3FFFFFFFFFFFFF, + 0x7FFFF003FFFFF, 0x3FFFF, 0x1FF, 0x7FFFFFFFFFFFF, 0xFFFFFFFFF, + 0x303FFFFFFFFFF, 0xFFFF00801FFFFFFF, 0xFFFF00000000003F, + 0xFFFF000000000003, 0x7FFFFF0000001F, 0xFFFFFFFFFFFFF8, 0x26000000000000, + 0xFFFFFFFFFFF8, 0x1FFFFFF0000, 0x7FFFFFFFF8, 0x47FFFFFFFF0090, + 0x7FFFFFFFFFFF8, 0x1400001E, 0x80000FFFFFFBFFFF, 0x1, 0xFFFF01FFBFFFBD7F, + 0x23EDFDFFFFF99FE0, 0x3E0010000, 0x380000780, 0xFFFFFFFFFFFF, 0xB0, + 0x7FFFFFFFFFFF, 0xF000000, 0x10, 0x10007FFFFFFFFFF, 0x7FFFFFF, 0x7F, + 0xFFFFFFFFFFF, 0xFFFFFFFF00000000, 0x80000000FFFFFFFF, 0x8000FFFFFF6FF27F, + 0x2, 0xFFFFFCFF00000000, 0xA0001FFFF, 0x407FFFFFFFFF801, + 0xFFFFFFFFF0010000, 0xFFFF0000200003FF, 0x1FFFFFFFFFFFFFF, 0x7FFFFFFFFDFF, + 0xFFFC000000000001, 0xFFFF, 0x1FFFFFFFFFB7F, 0xFFFFFDBF00000040, + 0x10003FF, 0x7FFFF00000000, 0xFFFFFFFFDFFF4, 0x1000000000000, 0x3FFFFFF, + 0xF, 0xFFFFFFFFFFFF0000, 0x1FFFFFFFFFFFF, 0x7E, 0xFFFF00007FFFFFFF, + 0x7FFFFFFFFFFFFFFF, 0x3FFFFFFF0000, 0xE0FFFFF80000000F, 0x107FF, + 0xFFF80000, 0xB00000000, 0xFFFFFFFFFFFFFF, 0x3FFFFF, 0x6FEF000000000000, + 0x40007FFFFFFFF, 0xFFFF00F000270000, 0xFFFFFFFFFFFFFFF, + 0x1FFF07FFFFFFFFFF, 0x3FF01FF, 0xFFFFFFFFFFDFFFFF, 0xEBFFDE64DFFFFFFF, + 0xFFFFFFFFFFFFFFEF, 0x7BFFFFFFDFDFE7BF, 0xFFFFFFFFFFFDFC5F, + 0xFFFFFF3FFFFFFFFF, 0xF7FFFFFFF7FFFFFD, 0xFFDFFFFFFFDFFFFF, + 0xFFFF7FFFFFFF7FFF, 0xFFFFFDFFFFFFFDFF, 0xFF7, 0x7E07FFFFFFF, + 0xFFFF000000000000, 0x3FFFFFFFFFFF, 0x3F801FFFFFFFFFFF, 0x4000, + 0xFFFFFFF0000, 0x7FFF6F7F00000000, 0x1F, 0x80F, 0xAF7FE96FFFFFFEF, + 0x5EF7F796AA96EA84, 0xFFFFBEE0FFFFBFF, 0xFFFFFFFF, 0x3FFFFFFFFFFFFFF, + 0xFFFF0003FFFFFFFF, 0xFFFF0001FFFFFFFF, 0x3FFFFFFF, 0xFFFFFFFFFFFF07FF, + ], +}; diff --git a/vendor/unicode-width/.cargo-checksum.json b/vendor/unicode-width/.cargo-checksum.json index 5c7b922..c1608bd 100644 --- a/vendor/unicode-width/.cargo-checksum.json +++ b/vendor/unicode-width/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"f22e31fb3559e916864820719a09ab3adbf80301440e1702acf827210bbf76df","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"8a041a4305fb318f5c2cb284046f8480796521d0e829023b0441b5e8469490eb","scripts/unicode.py":"0c53095ef99395338399f9ad218b4481cffcf63774fd61871ed32efb242419f8","src/lib.rs":"38c44436eac069bd8d11203f31ecfef8adfe92da1fce19ba00bdd25aa3fbbe20","src/tables.rs":"c6ddb420c289517bb92973199fd2987b9608f29fc10bb33b5290f39b301ce92f","src/tests.rs":"ff9f331210861ba78040f119a0f6ccfacf5b2ca1ebee430784de0858fad01860"},"package":"c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"} \ No newline at end of file +{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"09233bddd9dcdd71355ad523e7293bc8764bb96f891e8968bea9b8aaf86ed314","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"8a041a4305fb318f5c2cb284046f8480796521d0e829023b0441b5e8469490eb","scripts/unicode.py":"d7884ff41ca9c54cbe97a8f9c909b0a8cd74bc8e6190b0c89bf3001a38d9d763","src/lib.rs":"baa30ab3913bde7d8e766a2fbccfda96f06dd499bebda899249fc7495d5ad6f7","src/tables.rs":"7201dff937e3b0f61f4fcad9cfb230fb0c44bb9082301e566351b95d7525605e","src/tests.rs":"ff9f331210861ba78040f119a0f6ccfacf5b2ca1ebee430784de0858fad01860"},"package":"e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"} \ No newline at end of file diff --git a/vendor/unicode-width/Cargo.toml b/vendor/unicode-width/Cargo.toml index 77aded9..6dcfacd 100644 --- a/vendor/unicode-width/Cargo.toml +++ b/vendor/unicode-width/Cargo.toml @@ -11,7 +11,7 @@ [package] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" authors = [ "kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>", diff --git a/vendor/unicode-width/scripts/unicode.py b/vendor/unicode-width/scripts/unicode.py index 2efb0b6..a6d58c5 100755 --- a/vendor/unicode-width/scripts/unicode.py +++ b/vendor/unicode-width/scripts/unicode.py @@ -106,9 +106,9 @@ def load_east_asian_widths() -> "list[EffectiveWidth]": `Ambiguous` chracters are assigned `EffectiveWidth.AMBIGUOUS`.""" with fetch_open("EastAsianWidth.txt") as eaw: # matches a width assignment for a single codepoint, i.e. "1F336;N # ..." - single = re.compile(r"^([0-9A-F]+);(\w+) +# (\w+)") + single = re.compile(r"^([0-9A-F]+)\s+;\s+(\w+) +# (\w+)") # matches a width assignment for a range of codepoints, i.e. "3001..3003;W # ..." - multiple = re.compile(r"^([0-9A-F]+)\.\.([0-9A-F]+);(\w+) +# (\w+)") + multiple = re.compile(r"^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\w+) +# (\w+)") # map between width category code and condensed width width_codes = { **{c: EffectiveWidth.NARROW for c in ["N", "Na", "H"]}, diff --git a/vendor/unicode-width/src/lib.rs b/vendor/unicode-width/src/lib.rs index 1ee35c8..fac45fc 100644 --- a/vendor/unicode-width/src/lib.rs +++ b/vendor/unicode-width/src/lib.rs @@ -59,8 +59,6 @@ extern crate test; use tables::charwidth as cw; pub use tables::UNICODE_VERSION; -use core::ops::Add; - mod tables; #[cfg(test)] @@ -121,11 +119,11 @@ pub trait UnicodeWidthStr { impl UnicodeWidthStr for str { #[inline] fn width(&self) -> usize { - self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add) + self.chars().map(|c| cw::width(c, false).unwrap_or(0)).sum() } #[inline] fn width_cjk(&self) -> usize { - self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add) + self.chars().map(|c| cw::width(c, true).unwrap_or(0)).sum() } } diff --git a/vendor/unicode-width/src/tables.rs b/vendor/unicode-width/src/tables.rs index 439c69c..791d7a8 100644 --- a/vendor/unicode-width/src/tables.rs +++ b/vendor/unicode-width/src/tables.rs @@ -12,7 +12,7 @@ /// The version of [Unicode](http://www.unicode.org/) /// that this version of unicode-width is based on. -pub const UNICODE_VERSION: (u8, u8, u8) = (15, 0, 0); +pub const UNICODE_VERSION: (u8, u8, u8) = (15, 1, 0); pub mod charwidth { use core::option::Option::{self, None, Some}; @@ -395,13 +395,13 @@ pub mod charwidth { 0x00, 0x00, 0x00, 0x00, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x9A, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x5A, 0x55, 0x55, 0x55, - 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x0A, 0xA0, 0xAA, 0xAA, 0xAA, 0x6A, 0xA9, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0x81, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0xA9, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xA9, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, - 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x95, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xFF, 0xFF, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x56, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, diff --git a/vendor/winnow/.cargo-checksum.json b/vendor/winnow/.cargo-checksum.json index 2667621..0c4c278 100644 --- a/vendor/winnow/.cargo-checksum.json +++ b/vendor/winnow/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.lock":"5fc55b66dc5106cdd4e33d98a67d8797fb41ba559825592a7a6fa2bc94cf2465","Cargo.toml":"e96225b256735315c523e88210b8b4b58c0d0af9e262f48a1711508fdbbc3c3e","LICENSE-MIT":"cb5aedb296c5246d1f22e9099f925a65146f9f0d6b4eebba97fd27a6cdbbab2d","README.md":"a3d07668263ad522f12b662b982e077e309fe071a447620066dec4380d25e879","benches/contains_token.rs":"3a86ca5ba35e645d73499ec745ca917d5b44e243d5bb5c8daf89676caa30b33f","benches/number.rs":"e7b1ec8c9c1955f797ff3c59e143336a4118acd45af500e8f4e02b20cb2f845b","examples/arithmetic/bench.rs":"bee163fdb9573f53d24c95dbc187770efdaf17bf39917b91204b8ea38edab888","examples/arithmetic/main.rs":"aab0793c96add893bbaacea331bc05de8f6b59e47c5bccca67bfd30e924cd790","examples/arithmetic/parser.rs":"185e2f27012f1f1ffdc11f2de85d4958ddc0ce1e7d8cb4009f90fbc31518cbe3","examples/arithmetic/parser_ast.rs":"542d371bd18413a82b2fa0c81d45acfb88204b8aa4438f3fe8ec9404cc65de25","examples/css/main.rs":"0d74980404a118cc54871f4ad32bd9acc3aa5c0d6502760155399f66a57d4a0e","examples/css/parser.rs":"89f133a847838f8a3e97e9cf28dd746dccf05087f9afac8efbfed292759bb914","examples/custom_error.rs":"b855f72e7ef33192f6be8ee761f59bc82249958f5f9279aa8ae6fafd0e97c711","examples/http/bench.rs":"b6d3e9e5833b53ac58f8336617cc42a2f3f4c5e5ce88feb23184fcdbb36843f6","examples/http/main.rs":"95d3c73953cd76ea462d886ade2ab34e987239bae3abb0349c434fb01b80467d","examples/http/parser.rs":"ce6a567de88920eed9cd2d6016985f55d76a617411ba6b7c58e80a34c45dc8a9","examples/http/parser_streaming.rs":"46ea2fc593079ed7d38472eb3e98f9216a448cad7900b6add51c571e917da56d","examples/ini/bench.rs":"a78706006a553dbc269ec6c215d85de16983d03baf619c18c7e8ba99e50687f6","examples/ini/main.rs":"d771aa8aae11e53f4f5453ac39e10ae3a1de210408dfd31ca7d1ce58d14c3fb9","examples/ini/parser.rs":"1a90a8e66a330f0b66a9c2233dd65c08aa117a8432d61afe8adf62baff92be1b","examples/ini/parser_str.rs":"89310dec5a85d487f21f1c11793a2d04d62d3461b3e692c3e7d3e470763e4388","examples/iterator.rs":"d1a41a242604fe72f46b6bf1cbf510f976784acf582eb3c95aa133a7241475e4","examples/json/bench.rs":"4fe77a79a9442c98eae9b0f808771763c71f61a191972955872dc2ebbfad8cfe","examples/json/json.rs":"48cf6c114098113c9767bebe430682f55f7c1a227e6486131ca58c00770711f2","examples/json/main.rs":"60c2be63c432f2c5da5c0c98ab7b2dfeae1851aa533dad67129dc5e0fa367130","examples/json/parser.rs":"1ec4d44c34d7b9da0ad6167bea1d56feeeaa08ff708f252e9719c93318f80f23","examples/json/parser_dispatch.rs":"048b83e6f5f270066c8a64318f034deb15a1d72b66537481dbd9d1a37fef397b","examples/json/parser_partial.rs":"dfd5ed74ba3bd041bc7c15236cb182a374523c592ca93fb451b55b76b0aca5c7","examples/json_iterator.rs":"95e0683830e55df3fc17f76e874420d04d75d87a09cd1655315e20cf83449321","examples/ndjson/example.ndjson":"c44c130731008bca76181348d40f46b183577949b5a4d229e0e1a56e1e405e5d","examples/ndjson/main.rs":"63ff8ad2311c1b10e436d9c4c5088493f6ed3359ba422028f388d1a5f9c94e66","examples/ndjson/parser.rs":"f69a71c9af8c582023a2b5aa8fb1a14a50b2f4a37419d18944af85d43489efa3","examples/s_expression/main.rs":"5a2de0b477807c7d6ab8bb72579f03692e8f5ea89d5b6f3a8342cfdbe4d84b96","examples/s_expression/parser.rs":"5c009e58cb3d408e2678c940a350d4195ebc5b8cddcc96d57e6de1e415c07986","examples/string/main.rs":"e6362f957d532d41bcd96476e55ac4c253eb04fa25ee88b48d872661a46a5bb5","examples/string/parser.rs":"8650724fd27a3fce25eb58c15af9dca9cff339c8bdb9b45dfbe9e6a961e60a93","src/_topic/arithmetic.rs":"94920a9572ed6deac58abb337a7be57b93cf37440f0cc3eaf514cb9ad9b9e077","src/_topic/error.rs":"e13463570e97b0ca2acea320fa721127b6e393328e798456dc2bbb6730b27a36","src/_topic/fromstr.rs":"01abdab296cd30067ae5f508b2b21ebe97c0571ace58e7817876eb2110e8d23a","src/_topic/http.rs":"19b9ec78a031fe5b3086fb34d04d6c13308c50b6e7bfe30229f5b682d3605ac8","src/_topic/ini.rs":"b2b04d48eac3158f4e26ee9dce748a699d02acaa0b12ae8d54421cad0fdc4ad7","src/_topic/json.rs":"bde39ee6b1c5fab2d2da480822b737674aed877f92c3f748d6932bec313b5661","src/_topic/language.rs":"8869f7b1cdee202c8b73a55c723977ec09ab08a10f89616d07f0520b18184578","src/_topic/mod.rs":"9d600daedb5a8a060049824a0931e548009551bcd7297f139981cfc871689d10","src/_topic/partial.rs":"270e385adb10dad68c989a9820fca9163c86b64ce4e3e0c2aaf73c5b40025d12","src/_topic/s_expression.rs":"6ca9a22a5c3344e120421f2ab353a1e822777aebfa1faa0acffc9632f7668cb2","src/_topic/stream.rs":"e1898f63954a3d3bedc965fb21253009f07ac97941335c747adaf010ded58153","src/_topic/why.rs":"5b425ff59f5bb0bbab87e63e4126161819a209fd9b316288ed3c58084012e5d7","src/_tutorial/chapter_0.rs":"2dd082fa014e8075e13524ce6a9ecf29adc380518764c487729d46e45463dc03","src/_tutorial/chapter_1.rs":"ed13f206d07fccec07ea6d2b14944f6620e7bbcc4b0fcec120f0015dc147a4fb","src/_tutorial/chapter_2.rs":"f6b3355575e84a37f4f09780128580e62cafeeaf5ba3ccb07fea653916134020","src/_tutorial/chapter_3.rs":"f53a4223e3ffef258ff66aac3c3fc1b8fdef2d19aa62a3c93743de5ed323c97c","src/_tutorial/chapter_4.rs":"8bc348452e38a908a1638b46a59fae7355fbd48899a4af898a32d699b65f5f2f","src/_tutorial/chapter_5.rs":"2433db664ec8088beeacb142d01f9fb7105bad77ff02d5833fb741e278ada44d","src/_tutorial/chapter_6.rs":"8728b354ae4e6691f3daab3de237e65590b2e881f00d0ae38228489310f62915","src/_tutorial/chapter_7.rs":"7fa0cb6baadedbd58e1d700c30062a68f4ee5918bbe257fae5a8b6cac1198325","src/_tutorial/mod.rs":"b69bb758ffcfbc1fa0c698ffc11d840ea6de3f8fa402ef8e170f3a0d6c3e190a","src/bits/mod.rs":"f7deabf46563db813e68169c16e03073d528ae8adc6d2ca273d7aca3ac243e28","src/bits/tests.rs":"e558e1004932eae8fefad5fbce57fe9f95d7b42301e2e9abf305d85ce9d97838","src/branch/mod.rs":"86c1f98fff4f981dea20c3712dfc859ce8132c64e1fe861ea3b62f9d5ca4b669","src/branch/tests.rs":"31a1da4589d377501dbad6c6b88bb02acf9f899815c4ac3467a74ed513cd0c0e","src/bytes/mod.rs":"4ee406fe415d4ff00192357551af5f15d07dc8d2b3986bda50888dd5e59e3fb0","src/bytes/tests.rs":"0f2fd29bac269c08c2a4f85bd2d2f8e4bd2f5ad907e0900d16adfd93e4401622","src/character/mod.rs":"8bbb35ca8597c700eff3eb1e73051d951c82b6ef8fd71f97c733b1ccaa69a9dd","src/character/tests.rs":"ef32c681a420322c17066b007cd93f6b88fdf6487d03372e9ae2e0a931e011fb","src/combinator/mod.rs":"a054a96d72999089c7201e51ef0308329b2bc1c94834283fc9d06812d9517aec","src/combinator/tests.rs":"1359e0b04885444d285f30f938bb3923be3f04216f086a1e93547fb333551817","src/error.rs":"6c1b5f4406573a970845460df2f363bcbdef6bc8b558c1503c3e18d238ab31d8","src/lib.rs":"3127df4585829b4fdd824d5d9ef9e3119e38ae401335f0b803b72c584f537922","src/macros.rs":"c73920e0956e72e42915648c569bc7b2804795d684db2dbf113f0cc8ed9beb99","src/multi/mod.rs":"9b9a90827bab53f4e2c154ef43d57cfbcad9fe7d3889820d12d89bdb73db30e8","src/multi/tests.rs":"7a4008391c3102805c77f0178e70c59e38581ab6fb76272067cf94d4f4150b00","src/number/mod.rs":"ca04449c7552d78d62e01a09a1ce6e8fbcf5218072d568839ff45d6ed2488a7e","src/number/tests.rs":"840237190a11efa7b5bd9ef08618963af1326e561c43ffbf98585c9f9430b9f6","src/parser.rs":"eaa21bb6e6ea48c2d0b8eb9ffe1b541f5f5e98c5df618880cc8b7d13f9fe8c89","src/sequence/mod.rs":"893c3d1355ccf4acee531c588d708069833cfe302721a67fce0a0368f42efca6","src/sequence/tests.rs":"e5df2c798f924d272fb8e5d6e00067cb7ee4722817071dff1c037157ae9efdc6","src/stream/impls.rs":"99f69fdb8c32ad1f759bcc414ebfd2428eac5997887ce2b6946a2fcfcd174082","src/stream/mod.rs":"c74106454ecb960a2578bfaae393ee20bc345bb006cc6c41851bc4f773990637","src/stream/tests.rs":"24bbd9d6fbafc85e29effb7d5ec93afff3cebca7e0f4cf2a992f0245c3aafa40","src/trace/internals.rs":"6b0eb9269844e984496aab38431c29cc4c430d7990de26e9b01f27332d9bc924","src/trace/mod.rs":"1de7f64af627b41c7b6268433aee8136c6533fb6824a563006824dba889ecfba"},"package":"ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28"} \ No newline at end of file +{"files":{"Cargo.lock":"eae42cda14a5f3bbad450ec86af9accbc08a9c5df8ca6e1e9b1a9a9ae0647481","Cargo.toml":"01e5d5020ac8600aa9e477aa15b42006ffac1e126fdc88b259436421cde5540c","LICENSE-MIT":"cb5aedb296c5246d1f22e9099f925a65146f9f0d6b4eebba97fd27a6cdbbab2d","README.md":"0b7e4cad5ef0cbb47dc1c1c34ff3b5d090aaba2cc3b3c5401539e5c4a22b7938","benches/contains_token.rs":"20e765de27f0ad9bf549e250bdb67d73714d23bc10bb67be240a258dcf8b6ed7","benches/find_slice.rs":"c140d235fb94997b365e5e66c6caf77ef8b782f80cfce580ef68aa8e300fdb70","benches/iter.rs":"bede3f156b87ffb8b901b092bcbb2717b7adf9a461a2fecdfad86025aad3e587","benches/next_slice.rs":"7cb1ea5e7122676532ecfbc53e0de78417bf96546c63a170370db8a99c90620b","benches/number.rs":"f4b54895ed523c462ce327a5205b0b99591eb7056fe9de9abe3f809eff287386","examples/arithmetic/bench.rs":"a162e4f91ce2ed19b5b53f79d77c92eb52278f0788da9af8fee3ee62cebfe0a9","examples/arithmetic/main.rs":"b7f2090beefa76ca324d23bc62e06ef97df8c7d02934998f4ddce1ac45e0b852","examples/arithmetic/parser.rs":"c219b2ff9a4f9bb4db97899203294765e72024fe4324b3d46d0b464258757da6","examples/arithmetic/parser_ast.rs":"5897edec836e91d0fcb6764894d0cb67898990a0c3cbde3ffc1fc43777127fa6","examples/arithmetic/parser_lexer.rs":"a0f79926cc06c9581ca73071355d6d44a070e40773a0c92271edcd5ed80bb93f","examples/css/main.rs":"3127f259af57aaaa8363bfa6e0a64c9719173cc1dcc5a293771a2c8a7f31982e","examples/css/parser.rs":"e81d89b876288637d8480fe981ddacdf2faa7f54da051801ee308db39834497d","examples/custom_error.rs":"5ebf88007ed2ce998fdb16cd4326b1573e83f0c420e2f6561c10b6dd8ebf69a2","examples/http/bench.rs":"b6d3e9e5833b53ac58f8336617cc42a2f3f4c5e5ce88feb23184fcdbb36843f6","examples/http/main.rs":"95d3c73953cd76ea462d886ade2ab34e987239bae3abb0349c434fb01b80467d","examples/http/parser.rs":"704fbcf9f1cf8e590763af5fcf4ce808ef9af8d485215e82dbc050b8b7d51444","examples/http/parser_streaming.rs":"5fd80da0a394afac6cb2755ff5d2b425dd5dc2c000bc6bf4b3832fc548afd006","examples/ini/bench.rs":"c8fd1aed25ce2878c0f440d765779d7877e83ca202cf071f8f4c57566596bf89","examples/ini/main.rs":"d771aa8aae11e53f4f5453ac39e10ae3a1de210408dfd31ca7d1ce58d14c3fb9","examples/ini/parser.rs":"c554286e8f2077f999d607902e7a63ce3945939118bccf830a68b22bfe24c775","examples/ini/parser_str.rs":"5eb333cd192aff09138fa1578628f76c19a9b48375c9f13860deb34cf1a6df37","examples/iterator.rs":"21fb0480749116407689162f0d3e9957e3e7b6863f9dda08ee01af675409b06e","examples/json/bench.rs":"9479248ccef46e0741e0eebf9fb3b307b343ba665925481adc4554d8e65f9d39","examples/json/json.rs":"48cf6c114098113c9767bebe430682f55f7c1a227e6486131ca58c00770711f2","examples/json/main.rs":"7ab1f6eefd4eb61153cf05991e593fd10827ac09f66af45d3019f94c39c5b84e","examples/json/parser.rs":"747e6d206cf72a471c3fef7a7a6df102c9b907e9b642057ba010b1a805eaa011","examples/json/parser_dispatch.rs":"e430b5fdfa8c9f4a2312657e8f4451101b5492019404bcbe0950c0c6007f959b","examples/json/parser_partial.rs":"a22a908248082f61b6d30c6a7a0d9d31147ce15cc850a9f117657b9e59f16260","examples/json_iterator.rs":"2a8a842b43fdea12a6b5c4927cede23cbd0eb96bb1cbf42a794103fc9be9ff3a","examples/ndjson/example.ndjson":"c44c130731008bca76181348d40f46b183577949b5a4d229e0e1a56e1e405e5d","examples/ndjson/main.rs":"dc92dd5d76b5d43edf947b6455d552f0212e46d59c926d95f97f5d159b14e361","examples/ndjson/parser.rs":"7699f08c0dc5d8e3a4ef488d5f9eb8de996046669ec988bd9dc7aa322168e57d","examples/s_expression/main.rs":"5a2de0b477807c7d6ab8bb72579f03692e8f5ea89d5b6f3a8342cfdbe4d84b96","examples/s_expression/parser.rs":"2a468bf61e4aed9aa5c556fb19c479060d121f60671865324675ab9fd92f0ddd","examples/string/main.rs":"e6362f957d532d41bcd96476e55ac4c253eb04fa25ee88b48d872661a46a5bb5","examples/string/parser.rs":"5a08324b3054fb4cc3a29164e91bdf4811f9f85deb38418603c22f4931a540b5","src/_topic/arithmetic.rs":"28d61dd492b308209d534c54813ea48ac1862a39196527042180de7718f915d0","src/_topic/error.rs":"69c972c609a91e078ddb4e01da8f3d35f29d8dbcb077614e9b156c033c9f5dd7","src/_topic/fromstr.rs":"01abdab296cd30067ae5f508b2b21ebe97c0571ace58e7817876eb2110e8d23a","src/_topic/http.rs":"19b9ec78a031fe5b3086fb34d04d6c13308c50b6e7bfe30229f5b682d3605ac8","src/_topic/ini.rs":"b2b04d48eac3158f4e26ee9dce748a699d02acaa0b12ae8d54421cad0fdc4ad7","src/_topic/json.rs":"bde39ee6b1c5fab2d2da480822b737674aed877f92c3f748d6932bec313b5661","src/_topic/language.rs":"437814054d4a0a19ffccd74ff9189677e6a8e9df45dc395861fc75d5bd23719b","src/_topic/mod.rs":"0691994d052e597413d577a4efbea3bd62e2d720718b6f360a4fa5cd274f7ca3","src/_topic/nom.rs":"9e7c4a900a7ad1bd417d9a72f2a5122a75707c5d53a2d39845b350d5aab362ca","src/_topic/partial.rs":"d1433dd9e5130aaad6801b3e9a7f81dbb7e40c224f8ad0b328b16ad467e95cb5","src/_topic/performance.rs":"115b636769c307bd80ecc5b89ffad47877352bc93ef47bf664093d3d7a2562cc","src/_topic/s_expression.rs":"6ca9a22a5c3344e120421f2ab353a1e822777aebfa1faa0acffc9632f7668cb2","src/_topic/stream.rs":"acc1e8942e27fec3911c562934be6bf9b096d436593af50a4418d9e0fe607535","src/_topic/why.rs":"097dbb252ed0c67e0d50157e1826286997450a9c413cf40697c0918e0dfecba1","src/_tutorial/chapter_0.rs":"c0512cb9857edb9df0ac1d327fccb1e950019b9a417959c802714039038b8bce","src/_tutorial/chapter_1.rs":"743847b36faab89235f7e4d36076b5090d1dff7c0f5f38f9c3387aaff80bbec2","src/_tutorial/chapter_2.rs":"3671bfd14e7a9c6c630daac8bc2c8128938fd0f830a5524c33a800d06d4cd205","src/_tutorial/chapter_3.rs":"268d0fa4d799480653cc025b7c4af206345f9d45fe014776dbcfbb85bf5b893a","src/_tutorial/chapter_4.rs":"4390d7ec2a67a726bd8df7f62ebd3fc26df926e79c014b8a2e623ec3e4c1ca25","src/_tutorial/chapter_5.rs":"25656769ea285f9d6d3780294a24491b27918cecd754b430220b56ac2047a10c","src/_tutorial/chapter_6.rs":"51fae6d7479288f518d04d43b3e987aa0f26f83ff0620ca4bdcfe7ec92e2e0cb","src/_tutorial/chapter_7.rs":"0767623ca13b97ab32df54c3d0b1d58c55b97d7aad25259687bdc59bf62cfef4","src/_tutorial/mod.rs":"afbc50be8c274db7b3963b74865c527e85f8b90b83e18c32edc1baca952c0957","src/ascii/mod.rs":"d4abf3e02a4680b7570c1efc7fd745d03ea9ab49d3231351b88ad9ab2bde5a1c","src/ascii/tests.rs":"3bc7866a1513d390bd73cfb7d23cf088abf755e2942e2d554fe62e23a4c6d79d","src/binary/bits/mod.rs":"5f64c2badc4de4e336e6fcdf6bf0c051b1b1d1acce1ca8da8f841b3ca7681762","src/binary/bits/tests.rs":"1ce1707a8ab2300d601f67fb3cb9c4b8ba7528564477e65ff25d8542c3e0f6e7","src/binary/mod.rs":"297b26caada87492381f890f0ca463201dd52c0abbcce769dffc101802ddfc4e","src/binary/tests.rs":"107a6a36115f0a52b5adf9e5e3bd2d242fc43888e916d3acca772c49563c545b","src/combinator/branch.rs":"4e225450c58eb3049dd7bade002fcbf90a37d76f2c32bcfd6e3e0a17e7072698","src/combinator/core.rs":"7fe951f5b5084338ea4f8387b344c297b96a177249ceea7ed65d1ef17ba27bb5","src/combinator/mod.rs":"fd626313b5bc6bf662d219577016e33066e610dcd308b9c1932557e3ed69b424","src/combinator/multi.rs":"ae2f167ad0a639c4cac6b253f9cc91c9a79059a675c6cd0f92bb8379e34664d1","src/combinator/parser.rs":"7ba34d0c5df8d60b201673842ccc0672e3be2cb68b27d5f258dc4401d2066a4d","src/combinator/sequence.rs":"46f98de7818e2ae75db921a3015d6dd99d3be3f87123c42ea3f4064b081e2c3e","src/combinator/tests.rs":"b13fd6e0516d73d4ec2733813dea763d1599a19c5ce2f7309b7e5e3d4acabc0b","src/error.rs":"64e43d63db6c5a5c64270b61d94719199c9ea9d329a13042b01a1bf35b1fa2e8","src/lib.rs":"36f2e7b3a1fa0f276f52d26d3da550db997dc4b77ee3b64f1a8fa7bc37954cc1","src/macros/dispatch.rs":"696fa579b5e7f1f139a9ff0edcf57898c63ecaca0c6b36dd84368d5542452aaa","src/macros/mod.rs":"9faebae01ea216cda6160cc306ca4a5f531344aa65a628973c0825d0f53b3fb3","src/macros/seq.rs":"8ad43c7f93910291d568190d5198543aa6e962d99655f82e40086823eddd2952","src/macros/test.rs":"1e431fe5dffdbb7b841bd6744fc836c35c8585ade916ea3039c1337aa0485d0a","src/parser.rs":"39b32bf14d6d1b3589731192448f4e19f1769f2bc0b09feadddf55a2685f49a2","src/stream/impls.rs":"139025ed7c350c86e14b3a23019b1e5569cfee98c5f3e07d5a7dc69f411a8730","src/stream/mod.rs":"77d7fc0b8d0a6056ed42d79f2d116cb6d7a225a54787ce4c34c57cdc4cf784df","src/stream/tests.rs":"399d884c7b1baf256f667f8dd25218fa473bd30a3b293625f756a9accd5b1f2d","src/token/mod.rs":"0bc7ebaf55c466868ce77e26299366e7d0f3b0193745c59400ffcdb415196e3d","src/token/tests.rs":"6b4791a68b5675070619bce938d09891aa0541d64d40bfd3dba078ee05d3a5dc","src/trace/internals.rs":"3480e946ba22390ac84046487728fd89d78cb3bc5547305162ed925e2437802f","src/trace/mod.rs":"767e5ac4dd698f17a61494eea288cf216f9920919a9b8e9b2547b3fe6cc470db"},"package":"b7cf47b659b318dccbd69cc4797a39ae128f533dce7902a1096044d1967b9c16"} \ No newline at end of file diff --git a/vendor/winnow/Cargo.toml b/vendor/winnow/Cargo.toml index 33943ce..cc5558e 100644 --- a/vendor/winnow/Cargo.toml +++ b/vendor/winnow/Cargo.toml @@ -11,13 +11,14 @@ [package] edition = "2021" -rust-version = "1.60.0" +rust-version = "1.64.0" name = "winnow" -version = "0.4.1" +version = "0.5.34" include = [ "build.rs", "src/**/*", "Cargo.toml", + "Cargo.lock", "LICENSE*", "README.md", "benches/**/*", @@ -38,55 +39,61 @@ license = "MIT" repository = "https://github.com/winnow-rs/winnow" [package.metadata.docs.rs] +cargo-args = [ + "-Zunstable-options", + "-Zrustdoc-scrape-examples", +] features = ["unstable-doc"] rustdoc-args = [ "--cfg", "docsrs", ] -cargo-args = [ - "-Zunstable-options", - "-Zrustdoc-scrape-examples", -] [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "Unreleased" -replace = "{{version}}" min = 1 +replace = "{{version}}" +search = "Unreleased" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = '\.\.\.HEAD' replace = "...{{tag_name}}" -exactly = 1 +search = '\.\.\.HEAD' [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" -search = "ReleaseDate" -replace = "{{date}}" min = 1 +replace = "{{date}}" +search = "ReleaseDate" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-header -->" replace = """ <!-- next-header --> ## [Unreleased] - ReleaseDate """ -exactly = 1 +search = "<!-- next-header -->" [[package.metadata.release.pre-release-replacements]] +exactly = 1 file = "CHANGELOG.md" -search = "<!-- next-url -->" replace = """ <!-- next-url --> [Unreleased]: https://github.com/winnow-rs/winnow/compare/{{tag_name}}...HEAD""" +search = "<!-- next-url -->" + +[[package.metadata.release.pre-release-replacements]] exactly = 1 +file = "src/lib.rs" +replace = "blob/v{{version}}/CHANGELOG.md" +search = 'blob/v.+\..+\..+/CHANGELOG.md' [profile.bench] lto = true codegen-units = 1 -debug = true +debug = 2 [[example]] name = "arithmetic" @@ -141,11 +148,24 @@ required-features = ["alloc"] name = "arithmetic" path = "examples/arithmetic/bench.rs" harness = false +required-features = ["alloc"] [[bench]] name = "contains_token" harness = false +[[bench]] +name = "find_slice" +harness = false + +[[bench]] +name = "iter" +harness = false + +[[bench]] +name = "next_slice" +harness = false + [[bench]] name = "number" harness = false @@ -168,33 +188,32 @@ path = "examples/json/bench.rs" harness = false required-features = ["std"] -[dependencies.anstyle] -version = "0.2.5" +[dependencies.anstream] +version = "0.3.2" optional = true -[dependencies.concolor] -version = "0.0.8" -features = ["auto"] +[dependencies.anstyle] +version = "1.0.1" optional = true [dependencies.is-terminal] -version = "0.4.3" +version = "0.4.9" optional = true [dependencies.memchr] -version = "2.3" +version = "2.5" optional = true default-features = false [dependencies.terminal_size] -version = "0.2.3" +version = "0.2.6" optional = true [dev-dependencies.circular] version = "0.3.0" [dev-dependencies.criterion] -version = "0.3.5" +version = "0.5.1" [dev-dependencies.doc-comment] version = "0.3" @@ -206,10 +225,13 @@ version = "0.5.7" version = "0.3.0" [dev-dependencies.proptest] -version = "1.0.0" +version = "1.2.0" + +[dev-dependencies.rustc-hash] +version = "1.1.0" [dev-dependencies.snapbox] -version = "0.4.6" +version = "0.4.11" features = ["examples"] [dev-dependencies.term-transcript] @@ -218,10 +240,10 @@ version = "0.2.0" [features] alloc = [] debug = [ + "dep:anstream", "dep:anstyle", "dep:is-terminal", "dep:terminal_size", - "dep:concolor", ] default = ["std"] simd = ["dep:memchr"] diff --git a/vendor/winnow/README.md b/vendor/winnow/README.md index 10f73b1..5530657 100644 --- a/vendor/winnow/README.md +++ b/vendor/winnow/README.md @@ -12,13 +12,14 @@ Build up a parser for your format of choice with the building blocks provided by For more details, see: - [Tutorial](https://docs.rs/winnow/latest/winnow/_tutorial/index.html) - [Special Topics](https://docs.rs/winnow/latest/winnow/_topic/index.html) + - [Why winnow? How does it compare to ...?](https://docs.rs/winnow/latest/winnow/_topic/why/index.html) - [API Reference](https://docs.rs/winnow) - [List of combinators](https://docs.rs/winnow/latest/winnow/combinator/index.html) # Contributors winnow is the fruit of the work of many contributors over the years, many -thanks for your help! In particular, thanks to [Geal](https://github.com/Geal) +thanks for your help! In particular, thanks to [Geal](https://github.com/Geal) for the original [`nom` crate](https://crates.io/crates/nom). <a href="https://github.com/winnow-rs/winnow/graphs/contributors"> diff --git a/vendor/winnow/benches/contains_token.rs b/vendor/winnow/benches/contains_token.rs index 67397ba..675b08e 100644 --- a/vendor/winnow/benches/contains_token.rs +++ b/vendor/winnow/benches/contains_token.rs @@ -1,10 +1,10 @@ use criterion::black_box; -use winnow::branch::alt; -use winnow::bytes::take_till1; -use winnow::bytes::take_while1; -use winnow::multi::many0; +use winnow::combinator::alt; +use winnow::combinator::repeat; use winnow::prelude::*; +use winnow::token::take_till; +use winnow::token::take_while; fn contains_token(c: &mut criterion::Criterion) { let data = [ @@ -17,23 +17,20 @@ fn contains_token(c: &mut criterion::Criterion) { let len = sample.len(); group.throughput(criterion::Throughput::Bytes(len as u64)); - group.bench_with_input(criterion::BenchmarkId::new("str", name), &len, |b, _| { - b.iter(|| black_box(parser_str.parse_next(black_box(sample)).unwrap())); - }); group.bench_with_input(criterion::BenchmarkId::new("slice", name), &len, |b, _| { - b.iter(|| black_box(parser_slice.parse_next(black_box(sample)).unwrap())); + b.iter(|| black_box(parser_slice.parse_peek(black_box(sample)).unwrap())); }); group.bench_with_input(criterion::BenchmarkId::new("array", name), &len, |b, _| { - b.iter(|| black_box(parser_array.parse_next(black_box(sample)).unwrap())); + b.iter(|| black_box(parser_array.parse_peek(black_box(sample)).unwrap())); }); group.bench_with_input(criterion::BenchmarkId::new("tuple", name), &len, |b, _| { - b.iter(|| black_box(parser_tuple.parse_next(black_box(sample)).unwrap())); + b.iter(|| black_box(parser_tuple.parse_peek(black_box(sample)).unwrap())); }); group.bench_with_input( criterion::BenchmarkId::new("closure-or", name), &len, |b, _| { - b.iter(|| black_box(parser_closure_or.parse_next(black_box(sample)).unwrap())); + b.iter(|| black_box(parser_closure_or.parse_peek(black_box(sample)).unwrap())); }, ); group.bench_with_input( @@ -43,7 +40,7 @@ fn contains_token(c: &mut criterion::Criterion) { b.iter(|| { black_box( parser_closure_matches - .parse_next(black_box(sample)) + .parse_peek(black_box(sample)) .unwrap(), ) }); @@ -53,27 +50,34 @@ fn contains_token(c: &mut criterion::Criterion) { group.finish(); } -fn parser_str(input: &str) -> IResult<&str, usize> { - let contains = "0123456789"; - many0(alt((take_while1(contains), take_till1(contains)))).parse_next(input) -} - -fn parser_slice(input: &str) -> IResult<&str, usize> { +fn parser_slice(input: &mut &str) -> PResult<usize> { let contains = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'][..]; - many0(alt((take_while1(contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } -fn parser_array(input: &str) -> IResult<&str, usize> { +fn parser_array(input: &mut &str) -> PResult<usize> { let contains = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; - many0(alt((take_while1(contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } -fn parser_tuple(input: &str) -> IResult<&str, usize> { +fn parser_tuple(input: &mut &str) -> PResult<usize> { let contains = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'); - many0(alt((take_while1(contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } -fn parser_closure_or(input: &str) -> IResult<&str, usize> { +fn parser_closure_or(input: &mut &str) -> PResult<usize> { let contains = |c: char| { c == '0' || c == '1' @@ -86,12 +90,20 @@ fn parser_closure_or(input: &str) -> IResult<&str, usize> { || c == '8' || c == '9' }; - many0(alt((take_while1(contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } -fn parser_closure_matches(input: &str) -> IResult<&str, usize> { +fn parser_closure_matches(input: &mut &str) -> PResult<usize> { let contains = |c: char| matches!(c, '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'); - many0(alt((take_while1(contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } const CONTIGUOUS: &str = "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; diff --git a/vendor/winnow/benches/find_slice.rs b/vendor/winnow/benches/find_slice.rs new file mode 100644 index 0000000..226fcad --- /dev/null +++ b/vendor/winnow/benches/find_slice.rs @@ -0,0 +1,50 @@ +use criterion::black_box; + +use winnow::combinator::repeat; +use winnow::prelude::*; +use winnow::token::take_until0; + +fn find_slice(c: &mut criterion::Criterion) { + let empty = ""; + let start_byte = "\r".repeat(100); + let start_slice = "\r\n".repeat(100); + let small = format!("{:>10}\r\n", "").repeat(100); + let large = format!("{:>10000}\r\n", "").repeat(100); + + let data = [ + ("empty", (empty, empty)), + ("start", (&start_byte, &start_slice)), + ("medium", (&small, &small)), + ("large", (&large, &large)), + ]; + let mut group = c.benchmark_group("find_slice"); + for (name, samples) in data { + group.bench_with_input( + criterion::BenchmarkId::new("byte", name), + samples.0, + |b, sample| { + b.iter(|| black_box(parser_byte.parse_peek(black_box(sample)).unwrap())); + }, + ); + + group.bench_with_input( + criterion::BenchmarkId::new("slice", name), + samples.1, + |b, sample| { + b.iter(|| black_box(parser_slice.parse_peek(black_box(sample)).unwrap())); + }, + ); + } + group.finish(); +} + +fn parser_byte(input: &mut &str) -> PResult<usize> { + repeat(0.., (take_until0("\r"), "\r")).parse_next(input) +} + +fn parser_slice(input: &mut &str) -> PResult<usize> { + repeat(0.., (take_until0("\r\n"), "\r\n")).parse_next(input) +} + +criterion::criterion_group!(benches, find_slice); +criterion::criterion_main!(benches); diff --git a/vendor/winnow/benches/iter.rs b/vendor/winnow/benches/iter.rs new file mode 100644 index 0000000..0a0d5ff --- /dev/null +++ b/vendor/winnow/benches/iter.rs @@ -0,0 +1,120 @@ +use criterion::black_box; + +use winnow::combinator::opt; +use winnow::prelude::*; +use winnow::stream::AsChar; +use winnow::stream::Stream as _; +use winnow::token::one_of; + +fn iter(c: &mut criterion::Criterion) { + let data = [ + ("contiguous", CONTIGUOUS.as_bytes()), + ("interleaved", INTERLEAVED.as_bytes()), + ("canada", CANADA.as_bytes()), + ]; + let mut group = c.benchmark_group("iter"); + for (name, sample) in data { + let len = sample.len(); + group.throughput(criterion::Throughput::Bytes(len as u64)); + + group.bench_with_input( + criterion::BenchmarkId::new("iterate", name), + &len, + |b, _| { + b.iter(|| black_box(iterate.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("next_token", name), + &len, + |b, _| { + b.iter(|| black_box(next_token.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("opt(one_of)", name), + &len, + |b, _| { + b.iter(|| black_box(opt_one_of.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("take_while", name), + &len, + |b, _| { + b.iter(|| black_box(take_while.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input(criterion::BenchmarkId::new("repeat", name), &len, |b, _| { + b.iter(|| black_box(repeat.parse_peek(black_box(sample)).unwrap())); + }); + } + group.finish(); +} + +fn iterate(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + for byte in input.iter() { + if byte.is_dec_digit() { + count += 1; + } + } + input.finish(); + Ok(count) +} + +fn next_token(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while let Some(byte) = input.next_token() { + if byte.is_dec_digit() { + count += 1; + } + } + Ok(count) +} + +fn opt_one_of(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while !input.is_empty() { + while opt(one_of(AsChar::is_dec_digit)) + .parse_next(input)? + .is_some() + { + count += 1; + } + while opt(one_of(|b: u8| !b.is_dec_digit())) + .parse_next(input)? + .is_some() + {} + } + Ok(count) +} + +fn take_while(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while !input.is_empty() { + count += winnow::token::take_while(0.., AsChar::is_dec_digit) + .parse_next(input)? + .len(); + let _ = winnow::token::take_while(0.., |b: u8| !b.is_dec_digit()).parse_next(input)?; + } + Ok(count) +} + +fn repeat(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while !input.is_empty() { + count += winnow::combinator::repeat(0.., one_of(AsChar::is_dec_digit)) + .map(|count: usize| count) + .parse_next(input)?; + winnow::combinator::repeat(0.., one_of(|b: u8| !b.is_dec_digit())).parse_next(input)?; + } + Ok(count) +} + +const CONTIGUOUS: &str = "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; +const INTERLEAVED: &str = "0123456789abc0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab"; +const CANADA: &str = include_str!("../third_party/nativejson-benchmark/data/canada.json"); + +criterion::criterion_group!(benches, iter); +criterion::criterion_main!(benches); diff --git a/vendor/winnow/benches/next_slice.rs b/vendor/winnow/benches/next_slice.rs new file mode 100644 index 0000000..6c25d23 --- /dev/null +++ b/vendor/winnow/benches/next_slice.rs @@ -0,0 +1,133 @@ +use criterion::black_box; + +use winnow::combinator::repeat; +use winnow::prelude::*; +use winnow::token::one_of; +use winnow::token::tag; + +fn next_slice(c: &mut criterion::Criterion) { + let mut group = c.benchmark_group("next_slice"); + + let name = "ascii"; + let sample = "h".repeat(100); + let sample = sample.as_str(); + group.bench_with_input( + criterion::BenchmarkId::new("char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("one_of", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_one_of.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_tag_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_tag_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + + let name = "utf8"; + let sample = "🧑".repeat(100); + let sample = sample.as_str(); + group.bench_with_input( + criterion::BenchmarkId::new("char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("one_of", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_one_of.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_tag_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_tag_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + + group.finish(); +} + +fn parser_ascii_char(input: &mut &str) -> PResult<usize> { + repeat(0.., 'h').parse_next(input) +} + +fn parser_ascii_str(input: &mut &str) -> PResult<usize> { + repeat(0.., "h").parse_next(input) +} + +fn parser_ascii_one_of(input: &mut &str) -> PResult<usize> { + repeat(0.., one_of('h')).parse_next(input) +} + +fn parser_ascii_tag_char(input: &mut &str) -> PResult<usize> { + repeat(0.., tag('h')).parse_next(input) +} + +fn parser_ascii_tag_str(input: &mut &str) -> PResult<usize> { + repeat(0.., tag("h")).parse_next(input) +} + +fn parser_utf8_char(input: &mut &str) -> PResult<usize> { + repeat(0.., '🧑').parse_next(input) +} + +fn parser_utf8_str(input: &mut &str) -> PResult<usize> { + repeat(0.., "🧑").parse_next(input) +} + +fn parser_utf8_one_of(input: &mut &str) -> PResult<usize> { + repeat(0.., one_of('🧑')).parse_next(input) +} + +fn parser_utf8_tag_char(input: &mut &str) -> PResult<usize> { + repeat(0.., tag('🧑')).parse_next(input) +} + +fn parser_utf8_tag_str(input: &mut &str) -> PResult<usize> { + repeat(0.., tag("🧑")).parse_next(input) +} + +criterion::criterion_group!(benches, next_slice); +criterion::criterion_main!(benches); diff --git a/vendor/winnow/benches/number.rs b/vendor/winnow/benches/number.rs index e801c8c..d35d65c 100644 --- a/vendor/winnow/benches/number.rs +++ b/vendor/winnow/benches/number.rs @@ -3,66 +3,66 @@ extern crate criterion; use criterion::Criterion; -use winnow::character::float; +use winnow::ascii::float; +use winnow::binary::be_u64; use winnow::error::ErrMode; -use winnow::error::Error; use winnow::error::ErrorKind; -use winnow::number::be_u64; +use winnow::error::InputError; +use winnow::error::ParserError; use winnow::prelude::*; use winnow::stream::ParseSlice; type Stream<'i> = &'i [u8]; -fn parser(i: Stream<'_>) -> IResult<Stream<'_>, u64> { - be_u64(i) +fn parser(i: &mut Stream<'_>) -> PResult<u64> { + be_u64.parse_next(i) } fn number(c: &mut Criterion) { let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; - parser(&data[..]).expect("should parse correctly"); + parser + .parse_peek(&data[..]) + .expect("should parse correctly"); c.bench_function("number", move |b| { - b.iter(|| parser(&data[..]).unwrap()); + b.iter(|| parser.parse_peek(&data[..]).unwrap()); }); } fn float_bytes(c: &mut Criterion) { println!( "float_bytes result: {:?}", - float::<_, f64, Error<_>>(&b"-1.234E-12"[..]) + float::<_, f64, InputError<_>>.parse_peek(&b"-1.234E-12"[..]) ); c.bench_function("float bytes", |b| { - b.iter(|| float::<_, f64, Error<_>>(&b"-1.234E-12"[..])); + b.iter(|| float::<_, f64, InputError<_>>.parse_peek(&b"-1.234E-12"[..])); }); } fn float_str(c: &mut Criterion) { println!( "float_str result: {:?}", - float::<_, f64, Error<_>>("-1.234E-12") + float::<_, f64, InputError<_>>.parse_peek("-1.234E-12") ); c.bench_function("float str", |b| { - b.iter(|| float::<_, f64, Error<_>>("-1.234E-12")); + b.iter(|| float::<_, f64, InputError<_>>.parse_peek("-1.234E-12")); }); } -fn std_float(input: &[u8]) -> IResult<&[u8], f64, Error<&[u8]>> { +fn std_float(input: &mut &[u8]) -> PResult<f64> { match input.parse_slice() { - Some(n) => Ok((&[], n)), - None => Err(ErrMode::Backtrack(Error { - input, - kind: ErrorKind::Slice, - })), + Some(n) => Ok(n), + None => Err(ErrMode::from_error_kind(input, ErrorKind::Slice)), } } fn std_float_bytes(c: &mut Criterion) { println!( "std_float_bytes result: {:?}", - std_float(&b"-1.234E-12"[..]) + std_float.parse_peek(&b"-1.234E-12"[..]) ); c.bench_function("std_float bytes", |b| { - b.iter(|| std_float(&b"-1.234E-12"[..])); + b.iter(|| std_float.parse_peek(&b"-1.234E-12"[..])); }); } diff --git a/vendor/winnow/examples/arithmetic/bench.rs b/vendor/winnow/examples/arithmetic/bench.rs index 0f6ec8e..692ac22 100644 --- a/vendor/winnow/examples/arithmetic/bench.rs +++ b/vendor/winnow/examples/arithmetic/bench.rs @@ -1,17 +1,31 @@ mod parser; +mod parser_ast; +mod parser_lexer; -use parser::expr; +use winnow::prelude::*; #[allow(clippy::eq_op, clippy::erasing_op)] fn arithmetic(c: &mut criterion::Criterion) { - let data = " 2*2 / ( 5 - 1) + 3 / 4 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2));"; + let data = " 2*2 / ( 5 - 1) + 3 / 4 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2))"; + let expected = 2 * 2 / (5 - 1) + 3 * (1 + 2 * (45 / 2)); + assert_eq!(parser::expr.parse(data), Ok(expected)); assert_eq!( - expr(data), - Ok((";", 2 * 2 / (5 - 1) + 3 * (1 + 2 * (45 / 2)),)) + parser_ast::expr.parse(data).map(|ast| ast.eval()), + Ok(expected) ); - c.bench_function("arithmetic", |b| { - b.iter(|| expr(data).unwrap()); + assert_eq!( + parser_lexer::expr2.parse(data).map(|ast| ast.eval()), + Ok(expected) + ); + c.bench_function("direct", |b| { + b.iter(|| parser::expr.parse(data).unwrap()); + }); + c.bench_function("ast", |b| { + b.iter(|| parser_ast::expr.parse(data).unwrap().eval()); + }); + c.bench_function("lexer", |b| { + b.iter(|| parser_lexer::expr2.parse_peek(data).unwrap()); }); } diff --git a/vendor/winnow/examples/arithmetic/main.rs b/vendor/winnow/examples/arithmetic/main.rs index 94a17d8..e46cf2f 100644 --- a/vendor/winnow/examples/arithmetic/main.rs +++ b/vendor/winnow/examples/arithmetic/main.rs @@ -2,32 +2,41 @@ use winnow::prelude::*; mod parser; mod parser_ast; +mod parser_lexer; fn main() -> Result<(), lexopt::Error> { let args = Args::parse()?; let input = args.input.as_deref().unwrap_or("1 + 1"); + if let Err(err) = calc(input, args.implementation) { + println!("FAILED"); + println!("{}", err); + } + + Ok(()) +} +fn calc( + input: &str, + imp: Impl, +) -> Result<(), winnow::error::ParseError<&str, winnow::error::ContextError>> { println!("{} =", input); - match args.implementation { - Impl::Eval => match parser::expr.parse(input) { - Ok(result) => { - println!(" {}", result); - } - Err(err) => { - println!(" {}", err); - } - }, - Impl::Ast => match parser_ast::expr.parse(input) { - Ok(result) => { - println!(" {:#?}", result); - } - Err(err) => { - println!(" {}", err); - } - }, + match imp { + Impl::Eval => { + let result = parser::expr.parse(input)?; + println!(" {}", result); + } + Impl::Ast => { + let result = parser_ast::expr.parse(input)?; + println!(" {:#?}={}", result, result.eval()); + } + Impl::Lexer => { + let tokens = parser_lexer::lex.parse(input)?; + println!(" {:#?}", tokens); + let result = parser_lexer::expr.parse(tokens.as_slice()).unwrap(); + println!(" {:#?}={}", result, result.eval()); + } } - Ok(()) } @@ -40,6 +49,7 @@ struct Args { enum Impl { Eval, Ast, + Lexer, } impl Default for Impl { @@ -61,6 +71,7 @@ impl Args { res.implementation = args.value()?.parse_with(|s| match s { "eval" => Ok(Impl::Eval), "ast" => Ok(Impl::Ast), + "lexer" => Ok(Impl::Lexer), _ => Err("expected `eval`, `ast`"), })?; } diff --git a/vendor/winnow/examples/arithmetic/parser.rs b/vendor/winnow/examples/arithmetic/parser.rs index 346c501..1657275 100644 --- a/vendor/winnow/examples/arithmetic/parser.rs +++ b/vendor/winnow/examples/arithmetic/parser.rs @@ -2,21 +2,21 @@ use std::str::FromStr; use winnow::prelude::*; use winnow::{ - branch::alt, - bytes::one_of, - character::{digit1 as digits, space0 as spaces}, - multi::fold_many0, - sequence::delimited, - IResult, + ascii::{digit1 as digits, multispace0 as multispaces}, + combinator::alt, + combinator::delimited, + combinator::fold_repeat, + token::one_of, }; // Parser definition -pub fn expr(i: &str) -> IResult<&str, i64> { - let (i, init) = term(i)?; +pub fn expr(i: &mut &str) -> PResult<i64> { + let init = term.parse_next(i)?; - fold_many0( - (one_of("+-"), term), + fold_repeat( + 0.., + (one_of(['+', '-']), term), move || init, |acc, (op, val): (char, i64)| { if op == '+' { @@ -32,11 +32,12 @@ pub fn expr(i: &str) -> IResult<&str, i64> { // We read an initial factor and for each time we find // a * or / operator followed by another factor, we do // the math by folding everything -fn term(i: &str) -> IResult<&str, i64> { - let (i, init) = factor(i)?; +fn term(i: &mut &str) -> PResult<i64> { + let init = factor.parse_next(i)?; - fold_many0( - (one_of("*/"), factor), + fold_repeat( + 0.., + (one_of(['*', '/']), factor), move || init, |acc, (op, val): (char, i64)| { if op == '*' { @@ -49,53 +50,88 @@ fn term(i: &str) -> IResult<&str, i64> { .parse_next(i) } -// We transform an integer string into a i64, ignoring surrounding whitespaces +// We transform an integer string into a i64, ignoring surrounding whitespace // We look for a digit suite, and try to convert it. // If either str::from_utf8 or FromStr::from_str fail, // we fallback to the parens parser defined above -fn factor(i: &str) -> IResult<&str, i64> { +fn factor(i: &mut &str) -> PResult<i64> { delimited( - spaces, - alt(( - digits.map_res(FromStr::from_str), - delimited('(', expr, ')'), - parens, - )), - spaces, + multispaces, + alt((digits.try_map(FromStr::from_str), parens)), + multispaces, ) .parse_next(i) } -// We parse any expr surrounded by parens, ignoring all whitespaces around those -fn parens(i: &str) -> IResult<&str, i64> { +// We parse any expr surrounded by parens, ignoring all whitespace around those +fn parens(i: &mut &str) -> PResult<i64> { delimited('(', expr, ')').parse_next(i) } #[test] fn factor_test() { - assert_eq!(factor("3"), Ok(("", 3))); - assert_eq!(factor(" 12"), Ok(("", 12))); - assert_eq!(factor("537 "), Ok(("", 537))); - assert_eq!(factor(" 24 "), Ok(("", 24))); + let input = "3"; + let expected = Ok(("", 3)); + assert_eq!(factor.parse_peek(input), expected); + + let input = " 12"; + let expected = Ok(("", 12)); + assert_eq!(factor.parse_peek(input), expected); + + let input = "537 "; + let expected = Ok(("", 537)); + assert_eq!(factor.parse_peek(input), expected); + + let input = " 24 "; + let expected = Ok(("", 24)); + assert_eq!(factor.parse_peek(input), expected); } #[test] fn term_test() { - assert_eq!(term(" 12 *2 / 3"), Ok(("", 8))); - assert_eq!(term(" 2* 3 *2 *2 / 3"), Ok(("", 8))); - assert_eq!(term(" 48 / 3/2"), Ok(("", 8))); + let input = " 12 *2 / 3"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); + + let input = " 2* 3 *2 *2 / 3"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); + + let input = " 48 / 3/2"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); } #[test] fn expr_test() { - assert_eq!(expr(" 1 + 2 "), Ok(("", 3))); - assert_eq!(expr(" 12 + 6 - 4+ 3"), Ok(("", 17))); - assert_eq!(expr(" 1 + 2*3 + 4"), Ok(("", 11))); + let input = " 1 + 2 "; + let expected = Ok(("", 3)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 12 + 6 - 4+ 3"; + let expected = Ok(("", 17)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 1 + 2*3 + 4"; + let expected = Ok(("", 11)); + assert_eq!(expr.parse_peek(input), expected); } #[test] fn parens_test() { - assert_eq!(expr(" ( 2 )"), Ok(("", 2))); - assert_eq!(expr(" 2* ( 3 + 4 ) "), Ok(("", 14))); - assert_eq!(expr(" 2*2 / ( 5 - 1) + 3"), Ok(("", 4))); + let input = " ( 2 )"; + let expected = Ok(("", 2)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 2* ( 3 + 4 ) "; + let expected = Ok(("", 14)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(("", 4)); + assert_eq!(expr.parse_peek(input), expected); } diff --git a/vendor/winnow/examples/arithmetic/parser_ast.rs b/vendor/winnow/examples/arithmetic/parser_ast.rs index 6ae8c9b..0ca1534 100644 --- a/vendor/winnow/examples/arithmetic/parser_ast.rs +++ b/vendor/winnow/examples/arithmetic/parser_ast.rs @@ -5,14 +5,14 @@ use std::str::FromStr; use winnow::prelude::*; use winnow::{ - branch::alt, - character::{digit1 as digit, multispace0 as multispace}, - multi::many0, - sequence::{delimited, preceded}, - IResult, + ascii::{digit1 as digits, multispace0 as multispaces}, + combinator::alt, + combinator::delimited, + combinator::fold_repeat, + token::one_of, }; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Expr { Value(i64), Add(Box<Expr>, Box<Expr>), @@ -22,12 +22,17 @@ pub enum Expr { Paren(Box<Expr>), } -#[derive(Debug)] -pub enum Oper { - Add, - Sub, - Mul, - Div, +impl Expr { + pub fn eval(&self) -> i64 { + match self { + Self::Value(v) => *v, + Self::Add(lhs, rhs) => lhs.eval() + rhs.eval(), + Self::Sub(lhs, rhs) => lhs.eval() - rhs.eval(), + Self::Mul(lhs, rhs) => lhs.eval() * rhs.eval(), + Self::Div(lhs, rhs) => lhs.eval() / rhs.eval(), + Self::Paren(expr) => expr.eval(), + } + } } impl Display for Expr { @@ -44,113 +49,136 @@ impl Display for Expr { } } -pub fn expr(i: &str) -> IResult<&str, Expr> { - let (i, initial) = term(i)?; - let (i, remainder) = many0(alt(( - |i| { - let (i, add) = preceded("+", term).parse_next(i)?; - Ok((i, (Oper::Add, add))) +pub fn expr(i: &mut &str) -> PResult<Expr> { + let init = term.parse_next(i)?; + + fold_repeat( + 0.., + (one_of(['+', '-']), term), + move || init.clone(), + |acc, (op, val): (char, Expr)| { + if op == '+' { + Expr::Add(Box::new(acc), Box::new(val)) + } else { + Expr::Sub(Box::new(acc), Box::new(val)) + } }, - |i| { - let (i, sub) = preceded("-", term).parse_next(i)?; - Ok((i, (Oper::Sub, sub))) - }, - ))) - .parse_next(i)?; - - Ok((i, fold_exprs(initial, remainder))) + ) + .parse_next(i) } -fn term(i: &str) -> IResult<&str, Expr> { - let (i, initial) = factor(i)?; - let (i, remainder) = many0(alt(( - |i| { - let (i, mul) = preceded("*", factor).parse_next(i)?; - Ok((i, (Oper::Mul, mul))) +fn term(i: &mut &str) -> PResult<Expr> { + let init = factor.parse_next(i)?; + + fold_repeat( + 0.., + (one_of(['*', '/']), factor), + move || init.clone(), + |acc, (op, val): (char, Expr)| { + if op == '*' { + Expr::Mul(Box::new(acc), Box::new(val)) + } else { + Expr::Div(Box::new(acc), Box::new(val)) + } }, - |i| { - let (i, div) = preceded("/", factor).parse_next(i)?; - Ok((i, (Oper::Div, div))) - }, - ))) - .parse_next(i)?; - - Ok((i, fold_exprs(initial, remainder))) -} - -fn factor(i: &str) -> IResult<&str, Expr> { - alt(( - delimited(multispace, digit, multispace) - .map_res(FromStr::from_str) - .map(Expr::Value), - parens, - )) + ) .parse_next(i) } -fn parens(i: &str) -> IResult<&str, Expr> { +fn factor(i: &mut &str) -> PResult<Expr> { delimited( - multispace, - delimited("(", expr.map(|e| Expr::Paren(Box::new(e))), ")"), - multispace, + multispaces, + alt((digits.try_map(FromStr::from_str).map(Expr::Value), parens)), + multispaces, ) .parse_next(i) } -fn fold_exprs(initial: Expr, remainder: Vec<(Oper, Expr)>) -> Expr { - remainder.into_iter().fold(initial, |acc, pair| { - let (oper, expr) = pair; - match oper { - Oper::Add => Expr::Add(Box::new(acc), Box::new(expr)), - Oper::Sub => Expr::Sub(Box::new(acc), Box::new(expr)), - Oper::Mul => Expr::Mul(Box::new(acc), Box::new(expr)), - Oper::Div => Expr::Div(Box::new(acc), Box::new(expr)), - } - }) +fn parens(i: &mut &str) -> PResult<Expr> { + delimited("(", expr, ")") + .map(|e| Expr::Paren(Box::new(e))) + .parse_next(i) } #[test] fn factor_test() { - assert_eq!( - factor(" 3 ").map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Value(3)"))) - ); + let input = "3"; + let expected = Ok(("", String::from("Value(3)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 12"; + let expected = Ok(("", String::from("Value(12)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = "537 "; + let expected = Ok(("", String::from("Value(537)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 24 "; + let expected = Ok(("", String::from("Value(24)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); } #[test] fn term_test() { - assert_eq!( - term(" 3 * 5 ").map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Mul(Value(3), Value(5))"))) - ); + let input = " 12 *2 / 3"; + let expected = Ok(("", String::from("Div(Mul(Value(12), Value(2)), Value(3))"))); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(("", String::from("Div(Mul(Value(12), Value(2)), Value(3))"))); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 2* 3 *2 *2 / 3"; + let expected = Ok(( + "", + String::from("Div(Mul(Mul(Mul(Value(2), Value(3)), Value(2)), Value(2)), Value(3))"), + )); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 48 / 3/2"; + let expected = Ok(("", String::from("Div(Div(Value(48), Value(3)), Value(2))"))); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); } #[test] fn expr_test() { - assert_eq!( - expr(" 1 + 2 * 3 ").map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Add(Value(1), Mul(Value(2), Value(3)))"))) - ); - assert_eq!( - expr(" 1 + 2 * 3 / 4 - 5 ").map(|(i, x)| (i, format!("{:?}", x))), - Ok(( - "", - String::from("Sub(Add(Value(1), Div(Mul(Value(2), Value(3)), Value(4))), Value(5))") - )) - ); - assert_eq!( - expr(" 72 / 2 / 3 ").map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Div(Div(Value(72), Value(2)), Value(3))"))) - ); + let input = " 1 + 2 "; + let expected = Ok(("", String::from("Add(Value(1), Value(2))"))); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 12 + 6 - 4+ 3"; + let expected = Ok(( + "", + String::from("Add(Sub(Add(Value(12), Value(6)), Value(4)), Value(3))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 1 + 2*3 + 4"; + let expected = Ok(( + "", + String::from("Add(Add(Value(1), Mul(Value(2), Value(3))), Value(4))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); } #[test] fn parens_test() { - assert_eq!( - expr(" ( 1 + 2 ) * 3 ").map(|(i, x)| (i, format!("{:?}", x))), - Ok(( - "", - String::from("Mul(Paren(Add(Value(1), Value(2))), Value(3))") - )) - ); + let input = " ( 2 )"; + let expected = Ok(("", String::from("Paren(Value(2))"))); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 2* ( 3 + 4 ) "; + let expected = Ok(( + "", + String::from("Mul(Value(2), Paren(Add(Value(3), Value(4))))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(( + "", + String::from("Add(Div(Mul(Value(2), Value(2)), Paren(Sub(Value(5), Value(1)))), Value(3))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); } diff --git a/vendor/winnow/examples/arithmetic/parser_lexer.rs b/vendor/winnow/examples/arithmetic/parser_lexer.rs new file mode 100644 index 0000000..f49566d --- /dev/null +++ b/vendor/winnow/examples/arithmetic/parser_lexer.rs @@ -0,0 +1,297 @@ +use std::fmt; +use std::fmt::{Debug, Display, Formatter}; + +use std::str::FromStr; + +use winnow::prelude::*; +use winnow::{ + ascii::{digit1 as digits, multispace0 as multispaces}, + combinator::alt, + combinator::dispatch, + combinator::fail, + combinator::fold_repeat, + combinator::peek, + combinator::repeat, + combinator::{delimited, preceded, terminated}, + token::any, + token::one_of, +}; + +#[derive(Debug, Clone)] +pub enum Expr { + Value(i64), + Add(Box<Expr>, Box<Expr>), + Sub(Box<Expr>, Box<Expr>), + Mul(Box<Expr>, Box<Expr>), + Div(Box<Expr>, Box<Expr>), + Paren(Box<Expr>), +} + +impl Expr { + pub fn eval(&self) -> i64 { + match self { + Self::Value(v) => *v, + Self::Add(lhs, rhs) => lhs.eval() + rhs.eval(), + Self::Sub(lhs, rhs) => lhs.eval() - rhs.eval(), + Self::Mul(lhs, rhs) => lhs.eval() * rhs.eval(), + Self::Div(lhs, rhs) => lhs.eval() / rhs.eval(), + Self::Paren(expr) => expr.eval(), + } + } +} + +impl Display for Expr { + fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result { + use Expr::{Add, Div, Mul, Paren, Sub, Value}; + match *self { + Value(val) => write!(format, "{}", val), + Add(ref left, ref right) => write!(format, "{} + {}", left, right), + Sub(ref left, ref right) => write!(format, "{} - {}", left, right), + Mul(ref left, ref right) => write!(format, "{} * {}", left, right), + Div(ref left, ref right) => write!(format, "{} / {}", left, right), + Paren(ref expr) => write!(format, "({})", expr), + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Token { + Value(i64), + Oper(Oper), + OpenParen, + CloseParen, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Oper { + Add, + Sub, + Mul, + Div, +} + +impl winnow::stream::ContainsToken<Token> for Token { + #[inline(always)] + fn contains_token(&self, token: Token) -> bool { + *self == token + } +} + +impl winnow::stream::ContainsToken<Token> for &'_ [Token] { + #[inline] + fn contains_token(&self, token: Token) -> bool { + self.iter().any(|t| *t == token) + } +} + +impl<const LEN: usize> winnow::stream::ContainsToken<Token> for &'_ [Token; LEN] { + #[inline] + fn contains_token(&self, token: Token) -> bool { + self.iter().any(|t| *t == token) + } +} + +impl<const LEN: usize> winnow::stream::ContainsToken<Token> for [Token; LEN] { + #[inline] + fn contains_token(&self, token: Token) -> bool { + self.iter().any(|t| *t == token) + } +} + +#[allow(dead_code)] +pub fn expr2(i: &mut &str) -> PResult<Expr> { + let tokens = lex.parse_next(i)?; + expr.parse_next(&mut tokens.as_slice()) +} + +pub fn lex(i: &mut &str) -> PResult<Vec<Token>> { + preceded(multispaces, repeat(1.., terminated(token, multispaces))).parse_next(i) +} + +fn token(i: &mut &str) -> PResult<Token> { + dispatch! {peek(any); + '0'..='9' => digits.try_map(FromStr::from_str).map(Token::Value), + '(' => '('.value(Token::OpenParen), + ')' => ')'.value(Token::CloseParen), + '+' => '+'.value(Token::Oper(Oper::Add)), + '-' => '-'.value(Token::Oper(Oper::Sub)), + '*' => '*'.value(Token::Oper(Oper::Mul)), + '/' => '/'.value(Token::Oper(Oper::Div)), + _ => fail, + } + .parse_next(i) +} + +pub fn expr(i: &mut &[Token]) -> PResult<Expr> { + let init = term.parse_next(i)?; + + fold_repeat( + 0.., + ( + one_of([Token::Oper(Oper::Add), Token::Oper(Oper::Sub)]), + term, + ), + move || init.clone(), + |acc, (op, val): (Token, Expr)| { + if op == Token::Oper(Oper::Add) { + Expr::Add(Box::new(acc), Box::new(val)) + } else { + Expr::Sub(Box::new(acc), Box::new(val)) + } + }, + ) + .parse_next(i) +} + +fn term(i: &mut &[Token]) -> PResult<Expr> { + let init = factor.parse_next(i)?; + + fold_repeat( + 0.., + ( + one_of([Token::Oper(Oper::Mul), Token::Oper(Oper::Div)]), + factor, + ), + move || init.clone(), + |acc, (op, val): (Token, Expr)| { + if op == Token::Oper(Oper::Mul) { + Expr::Mul(Box::new(acc), Box::new(val)) + } else { + Expr::Div(Box::new(acc), Box::new(val)) + } + }, + ) + .parse_next(i) +} + +fn factor(i: &mut &[Token]) -> PResult<Expr> { + alt(( + one_of(|t| matches!(t, Token::Value(_))).map(|t| match t { + Token::Value(v) => Expr::Value(v), + _ => unreachable!(), + }), + parens, + )) + .parse_next(i) +} + +fn parens(i: &mut &[Token]) -> PResult<Expr> { + delimited(one_of(Token::OpenParen), expr, one_of(Token::CloseParen)) + .map(|e| Expr::Paren(Box::new(e))) + .parse_next(i) +} + +#[test] +fn lex_test() { + let input = "3"; + let expected = Ok(String::from(r#"("", [Value(3)])"#)); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); + + let input = " 24 "; + let expected = Ok(String::from(r#"("", [Value(24)])"#)); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(String::from( + r#"("", [Value(12), Oper(Mul), Value(2), Oper(Div), Value(3)])"#, + )); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(String::from( + r#"("", [Value(2), Oper(Mul), Value(2), Oper(Div), OpenParen, Value(5), Oper(Sub), Value(1), CloseParen, Oper(Add), Value(3)])"#, + )); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); +} + +#[test] +fn factor_test() { + let input = "3"; + let expected = Ok(String::from("Value(3)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 12"; + let expected = Ok(String::from("Value(12)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = "537 "; + let expected = Ok(String::from("Value(537)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 24 "; + let expected = Ok(String::from("Value(24)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); +} + +#[test] +fn term_test() { + let input = " 12 *2 / 3"; + let expected = Ok(String::from("Div(Mul(Value(12), Value(2)), Value(3))")); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(String::from("Div(Mul(Value(12), Value(2)), Value(3))")); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 2* 3 *2 *2 / 3"; + let expected = Ok(String::from( + "Div(Mul(Mul(Mul(Value(2), Value(3)), Value(2)), Value(2)), Value(3))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 48 / 3/2"; + let expected = Ok(String::from("Div(Div(Value(48), Value(3)), Value(2))")); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); +} + +#[test] +fn expr_test() { + let input = " 1 + 2 "; + let expected = Ok(String::from("Add(Value(1), Value(2))")); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 12 + 6 - 4+ 3"; + let expected = Ok(String::from( + "Add(Sub(Add(Value(12), Value(6)), Value(4)), Value(3))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 1 + 2*3 + 4"; + let expected = Ok(String::from( + "Add(Add(Value(1), Mul(Value(2), Value(3))), Value(4))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); +} + +#[test] +fn parens_test() { + let input = " ( 2 )"; + let expected = Ok(String::from("Paren(Value(2))")); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 2* ( 3 + 4 ) "; + let expected = Ok(String::from( + "Mul(Value(2), Paren(Add(Value(3), Value(4))))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(String::from( + "Add(Div(Mul(Value(2), Value(2)), Paren(Sub(Value(5), Value(1)))), Value(3))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); +} diff --git a/vendor/winnow/examples/css/main.rs b/vendor/winnow/examples/css/main.rs index 26c1b92..cf52ada 100644 --- a/vendor/winnow/examples/css/main.rs +++ b/vendor/winnow/examples/css/main.rs @@ -49,7 +49,7 @@ impl Args { #[test] fn parse_color() { assert_eq!( - hex_color("#2F14DF"), + hex_color.parse_peek("#2F14DF"), Ok(( "", parser::Color { diff --git a/vendor/winnow/examples/css/parser.rs b/vendor/winnow/examples/css/parser.rs index e39b0f5..fa13078 100644 --- a/vendor/winnow/examples/css/parser.rs +++ b/vendor/winnow/examples/css/parser.rs @@ -1,5 +1,6 @@ -use winnow::bytes::take_while_m_n; +use winnow::combinator::seq; use winnow::prelude::*; +use winnow::token::take_while; #[derive(Debug, Eq, PartialEq)] pub struct Color { @@ -10,22 +11,25 @@ pub struct Color { impl std::str::FromStr for Color { // The error must be owned - type Err = winnow::error::Error<String>; + type Err = String; fn from_str(s: &str) -> Result<Self, Self::Err> { - hex_color.parse(s).map_err(winnow::error::Error::into_owned) + hex_color.parse(s).map_err(|e| e.to_string()) } } -pub fn hex_color(input: &str) -> IResult<&str, Color> { - let (input, _) = "#".parse_next(input)?; - let (input, (red, green, blue)) = (hex_primary, hex_primary, hex_primary).parse_next(input)?; - - Ok((input, Color { red, green, blue })) +pub fn hex_color(input: &mut &str) -> PResult<Color> { + seq!(Color { + _: '#', + red: hex_primary, + green: hex_primary, + blue: hex_primary + }) + .parse_next(input) } -fn hex_primary(input: &str) -> IResult<&str, u8> { - take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()) - .map_res(|input| u8::from_str_radix(input, 16)) +fn hex_primary(input: &mut &str) -> PResult<u8> { + take_while(2, |c: char| c.is_ascii_hexdigit()) + .try_map(|input| u8::from_str_radix(input, 16)) .parse_next(input) } diff --git a/vendor/winnow/examples/custom_error.rs b/vendor/winnow/examples/custom_error.rs index 954452a..998e5ad 100644 --- a/vendor/winnow/examples/custom_error.rs +++ b/vendor/winnow/examples/custom_error.rs @@ -1,7 +1,7 @@ use winnow::error::ErrMode; use winnow::error::ErrorKind; -use winnow::error::ParseError; -use winnow::IResult; +use winnow::error::ParserError; +use winnow::prelude::*; #[derive(Debug, PartialEq, Eq)] pub enum CustomError<I> { @@ -9,17 +9,17 @@ pub enum CustomError<I> { Nom(I, ErrorKind), } -impl<I> ParseError<I> for CustomError<I> { - fn from_error_kind(input: I, kind: ErrorKind) -> Self { - CustomError::Nom(input, kind) +impl<I: Clone> ParserError<I> for CustomError<I> { + fn from_error_kind(input: &I, kind: ErrorKind) -> Self { + CustomError::Nom(input.clone(), kind) } - fn append(self, _: I, _: ErrorKind) -> Self { + fn append(self, _: &I, _: ErrorKind) -> Self { self } } -pub fn parse(_input: &str) -> IResult<&str, &str, CustomError<&str>> { +pub fn parse<'s>(_input: &mut &'s str) -> PResult<&'s str, CustomError<&'s str>> { Err(ErrMode::Backtrack(CustomError::MyError)) } @@ -27,13 +27,11 @@ fn main() {} #[cfg(test)] mod tests { - use super::parse; - use super::CustomError; - use winnow::error::ErrMode; + use super::*; #[test] fn it_works() { - let err = parse("").unwrap_err(); + let err = parse.parse_next(&mut "").unwrap_err(); match err { ErrMode::Backtrack(e) => assert_eq!(e, CustomError::MyError), _ => panic!("Unexpected error: {:?}", err), diff --git a/vendor/winnow/examples/http/parser.rs b/vendor/winnow/examples/http/parser.rs index 83402fe..eae8ab6 100644 --- a/vendor/winnow/examples/http/parser.rs +++ b/vendor/winnow/examples/http/parser.rs @@ -1,4 +1,6 @@ -use winnow::{bytes::take_while1, character::line_ending, multi::many1, IResult, Parser}; +use winnow::combinator::seq; +use winnow::prelude::*; +use winnow::{ascii::line_ending, combinator::repeat, token::take_while}; pub type Stream<'i> = &'i [u8]; @@ -22,12 +24,11 @@ pub fn parse(data: &[u8]) -> Option<Vec<(Request<'_>, Vec<Header<'_>>)>> { let mut buf = data; let mut v = Vec::new(); loop { - match request(buf) { - Ok((b, r)) => { - buf = b; + match request(&mut buf) { + Ok(r) => { v.push(r); - if b.is_empty() { + if buf.is_empty() { //println!("{}", i); break; } @@ -42,53 +43,48 @@ pub fn parse(data: &[u8]) -> Option<Vec<(Request<'_>, Vec<Header<'_>>)>> { Some(v) } -fn request(input: Stream<'_>) -> IResult<Stream<'_>, (Request<'_>, Vec<Header<'_>>)> { - let (input, req) = request_line(input)?; - let (input, h) = many1(message_header).parse_next(input)?; - let (input, _) = line_ending(input)?; +fn request<'s>(input: &mut Stream<'s>) -> PResult<(Request<'s>, Vec<Header<'s>>)> { + let req = request_line(input)?; + let h = repeat(1.., message_header).parse_next(input)?; + let _ = line_ending.parse_next(input)?; - Ok((input, (req, h))) + Ok((req, h)) } -fn request_line(input: Stream<'_>) -> IResult<Stream<'_>, Request<'_>> { - let (input, method) = take_while1(is_token).parse_next(input)?; - let (input, _) = take_while1(is_space).parse_next(input)?; - let (input, uri) = take_while1(is_not_space).parse_next(input)?; - let (input, _) = take_while1(is_space).parse_next(input)?; - let (input, version) = http_version(input)?; - let (input, _) = line_ending(input)?; - - Ok(( - input, - Request { - method, - uri, - version, - }, - )) +fn request_line<'s>(input: &mut Stream<'s>) -> PResult<Request<'s>> { + seq!( Request { + method: take_while(1.., is_token), + _: take_while(1.., is_space), + uri: take_while(1.., is_not_space), + _: take_while(1.., is_space), + version: http_version, + _: line_ending, + }) + .parse_next(input) } -fn http_version(input: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { - let (input, _) = "HTTP/".parse_next(input)?; - let (input, version) = take_while1(is_version).parse_next(input)?; +fn http_version<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { + let _ = "HTTP/".parse_next(input)?; + let version = take_while(1.., is_version).parse_next(input)?; - Ok((input, version)) + Ok(version) } -fn message_header_value(input: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { - let (input, _) = take_while1(is_horizontal_space).parse_next(input)?; - let (input, data) = take_while1(not_line_ending).parse_next(input)?; - let (input, _) = line_ending(input)?; +fn message_header_value<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { + let _ = take_while(1.., is_horizontal_space).parse_next(input)?; + let data = take_while(1.., not_line_ending).parse_next(input)?; + let _ = line_ending.parse_next(input)?; - Ok((input, data)) + Ok(data) } -fn message_header(input: Stream<'_>) -> IResult<Stream<'_>, Header<'_>> { - let (input, name) = take_while1(is_token).parse_next(input)?; - let (input, _) = ':'.parse_next(input)?; - let (input, value) = many1(message_header_value).parse_next(input)?; - - Ok((input, Header { name, value })) +fn message_header<'s>(input: &mut Stream<'s>) -> PResult<Header<'s>> { + seq!(Header { + name: take_while(1.., is_token), + _: ':', + value: repeat(1.., message_header_value), + }) + .parse_next(input) } #[rustfmt::skip] @@ -121,7 +117,7 @@ fn is_token(c: u8) -> bool { } fn is_version(c: u8) -> bool { - (b'0'..=b'9').contains(&c) || c == b'.' + c.is_ascii_digit() || c == b'.' } fn not_line_ending(c: u8) -> bool { diff --git a/vendor/winnow/examples/http/parser_streaming.rs b/vendor/winnow/examples/http/parser_streaming.rs index f7774b7..74ba342 100644 --- a/vendor/winnow/examples/http/parser_streaming.rs +++ b/vendor/winnow/examples/http/parser_streaming.rs @@ -1,5 +1,6 @@ +use winnow::combinator::seq; use winnow::{ - bytes::take_while1, character::line_ending, multi::many1, stream::Partial, IResult, Parser, + ascii::line_ending, combinator::repeat, prelude::*, stream::Partial, token::take_while, }; pub type Stream<'i> = Partial<&'i [u8]>; @@ -24,12 +25,11 @@ pub fn parse(data: &[u8]) -> Option<Vec<(Request<'_>, Vec<Header<'_>>)>> { let mut buf = Partial::new(data); let mut v = Vec::new(); loop { - match request(buf) { - Ok((b, r)) => { - buf = b; + match request(&mut buf) { + Ok(r) => { v.push(r); - if b.is_empty() { + if buf.is_empty() { //println!("{}", i); break; } @@ -44,53 +44,48 @@ pub fn parse(data: &[u8]) -> Option<Vec<(Request<'_>, Vec<Header<'_>>)>> { Some(v) } -fn request(input: Stream<'_>) -> IResult<Stream<'_>, (Request<'_>, Vec<Header<'_>>)> { - let (input, req) = request_line(input)?; - let (input, h) = many1(message_header).parse_next(input)?; - let (input, _) = line_ending(input)?; +fn request<'s>(input: &mut Stream<'s>) -> PResult<(Request<'s>, Vec<Header<'s>>)> { + let req = request_line(input)?; + let h = repeat(1.., message_header).parse_next(input)?; + let _ = line_ending.parse_next(input)?; - Ok((input, (req, h))) + Ok((req, h)) } -fn request_line(input: Stream<'_>) -> IResult<Stream<'_>, Request<'_>> { - let (input, method) = take_while1(is_token).parse_next(input)?; - let (input, _) = take_while1(is_space).parse_next(input)?; - let (input, uri) = take_while1(is_not_space).parse_next(input)?; - let (input, _) = take_while1(is_space).parse_next(input)?; - let (input, version) = http_version(input)?; - let (input, _) = line_ending(input)?; - - Ok(( - input, - Request { - method, - uri, - version, - }, - )) +fn request_line<'s>(input: &mut Stream<'s>) -> PResult<Request<'s>> { + seq!( Request { + method: take_while(1.., is_token), + _: take_while(1.., is_space), + uri: take_while(1.., is_not_space), + _: take_while(1.., is_space), + version: http_version, + _: line_ending, + }) + .parse_next(input) } -fn http_version(input: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { - let (input, _) = "HTTP/".parse_next(input)?; - let (input, version) = take_while1(is_version).parse_next(input)?; +fn http_version<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { + let _ = "HTTP/".parse_next(input)?; + let version = take_while(1.., is_version).parse_next(input)?; - Ok((input, version)) + Ok(version) } -fn message_header_value(input: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { - let (input, _) = take_while1(is_horizontal_space).parse_next(input)?; - let (input, data) = take_while1(not_line_ending).parse_next(input)?; - let (input, _) = line_ending(input)?; +fn message_header_value<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { + let _ = take_while(1.., is_horizontal_space).parse_next(input)?; + let data = take_while(1.., not_line_ending).parse_next(input)?; + let _ = line_ending.parse_next(input)?; - Ok((input, data)) + Ok(data) } -fn message_header(input: Stream<'_>) -> IResult<Stream<'_>, Header<'_>> { - let (input, name) = take_while1(is_token).parse_next(input)?; - let (input, _) = ':'.parse_next(input)?; - let (input, value) = many1(message_header_value).parse_next(input)?; - - Ok((input, Header { name, value })) +fn message_header<'s>(input: &mut Stream<'s>) -> PResult<Header<'s>> { + seq!(Header { + name: take_while(1.., is_token), + _: ':', + value: repeat(1.., message_header_value), + }) + .parse_next(input) } #[rustfmt::skip] @@ -123,7 +118,7 @@ fn is_token(c: u8) -> bool { } fn is_version(c: u8) -> bool { - (b'0'..=b'9').contains(&c) || c == b'.' + c.is_ascii_digit() || c == b'.' } fn not_line_ending(c: u8) -> bool { diff --git a/vendor/winnow/examples/ini/bench.rs b/vendor/winnow/examples/ini/bench.rs index 2eba5e2..3c7eab8 100644 --- a/vendor/winnow/examples/ini/bench.rs +++ b/vendor/winnow/examples/ini/bench.rs @@ -1,4 +1,4 @@ -use winnow::multi::many0; +use winnow::combinator::repeat; use winnow::prelude::*; mod parser; @@ -18,10 +18,10 @@ file=payroll.dat let mut group = c.benchmark_group("ini"); group.throughput(criterion::Throughput::Bytes(str.len() as u64)); group.bench_function(criterion::BenchmarkId::new("bytes", str.len()), |b| { - b.iter(|| parser::categories(str.as_bytes()).unwrap()); + b.iter(|| parser::categories.parse_peek(str.as_bytes()).unwrap()); }); group.bench_function(criterion::BenchmarkId::new("str", str.len()), |b| { - b.iter(|| parser_str::categories(str).unwrap()) + b.iter(|| parser_str::categories.parse_peek(str).unwrap()) }); } @@ -31,14 +31,14 @@ port=143 file=payroll.dat \0"; - fn acc(i: parser::Stream<'_>) -> IResult<parser::Stream<'_>, Vec<(&str, &str)>> { - many0(parser::key_value).parse_next(i) + fn acc<'s>(i: &mut parser::Stream<'s>) -> PResult<Vec<(&'s str, &'s str)>> { + repeat(0.., parser::key_value).parse_next(i) } let mut group = c.benchmark_group("ini keys and values"); group.throughput(criterion::Throughput::Bytes(str.len() as u64)); group.bench_function(criterion::BenchmarkId::new("bytes", str.len()), |b| { - b.iter(|| acc(str.as_bytes()).unwrap()); + b.iter(|| acc.parse_peek(str.as_bytes()).unwrap()); }); } @@ -48,7 +48,7 @@ fn bench_ini_key_value(c: &mut criterion::Criterion) { let mut group = c.benchmark_group("ini key value"); group.throughput(criterion::Throughput::Bytes(str.len() as u64)); group.bench_function(criterion::BenchmarkId::new("bytes", str.len()), |b| { - b.iter(|| parser::key_value(str.as_bytes()).unwrap()); + b.iter(|| parser::key_value.parse_peek(str.as_bytes()).unwrap()); }); } diff --git a/vendor/winnow/examples/ini/parser.rs b/vendor/winnow/examples/ini/parser.rs index ae234ea..a782d6e 100644 --- a/vendor/winnow/examples/ini/parser.rs +++ b/vendor/winnow/examples/ini/parser.rs @@ -3,38 +3,41 @@ use std::str; use winnow::prelude::*; use winnow::{ - bytes::take_while0, - character::{alphanumeric1 as alphanumeric, multispace0 as multispace, space0 as space}, + ascii::{alphanumeric1 as alphanumeric, multispace0 as multispace, space0 as space}, combinator::opt, - multi::many0, - sequence::{delimited, separated_pair, terminated}, + combinator::repeat, + combinator::{delimited, separated_pair, terminated}, + token::take_while, }; pub type Stream<'i> = &'i [u8]; -pub fn categories(i: Stream<'_>) -> IResult<Stream<'_>, HashMap<&str, HashMap<&str, &str>>> { - many0(separated_pair( - category, - opt(multispace), - many0(terminated(key_value, opt(multispace))), - )) +pub fn categories<'s>(i: &mut Stream<'s>) -> PResult<HashMap<&'s str, HashMap<&'s str, &'s str>>> { + repeat( + 0.., + separated_pair( + category, + opt(multispace), + repeat(0.., terminated(key_value, opt(multispace))), + ), + ) .parse_next(i) } -fn category(i: Stream<'_>) -> IResult<Stream<'_>, &str> { - delimited('[', take_while0(|c| c != b']'), ']') - .map_res(str::from_utf8) +fn category<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { + delimited('[', take_while(0.., |c| c != b']'), ']') + .try_map(str::from_utf8) .parse_next(i) } -pub fn key_value(i: Stream<'_>) -> IResult<Stream<'_>, (&str, &str)> { - let (i, key) = alphanumeric.map_res(str::from_utf8).parse_next(i)?; - let (i, _) = (opt(space), '=', opt(space)).parse_next(i)?; - let (i, val) = take_while0(|c| c != b'\n' && c != b';') - .map_res(str::from_utf8) +pub fn key_value<'s>(i: &mut Stream<'s>) -> PResult<(&'s str, &'s str)> { + let key = alphanumeric.try_map(str::from_utf8).parse_next(i)?; + let _ = (opt(space), '=', opt(space)).parse_next(i)?; + let val = take_while(0.., |c| c != b'\n' && c != b';') + .try_map(str::from_utf8) .parse_next(i)?; - let (i, _) = opt((';', take_while0(|c| c != b'\n'))).parse_next(i)?; - Ok((i, (key, val))) + let _ = opt((';', take_while(0.., |c| c != b'\n'))).parse_next(i)?; + Ok((key, val)) } #[test] @@ -47,7 +50,7 @@ key = value2"[..]; let ini_without_category = &b"\n\nparameter=value key = value2"[..]; - let res = category(ini_file); + let res = category.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), @@ -64,7 +67,7 @@ key = value2"[..]; let ini_without_key_value = &b"\nkey = value2"[..]; - let res = key_value(ini_file); + let res = key_value.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), @@ -81,7 +84,7 @@ key = value2"[..]; let ini_without_key_value = &b"\nkey = value2"[..]; - let res = key_value(ini_file); + let res = key_value.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), @@ -98,7 +101,7 @@ key = value2"[..]; let ini_without_key_value = &b"\nkey = value2"[..]; - let res = key_value(ini_file); + let res = key_value.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), @@ -123,7 +126,7 @@ key4 = value4 let ini_after_parser = &b""[..]; - let res = categories(ini_file); + let res = categories.parse_peek(ini_file); //println!("{:?}", res); match res { Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), diff --git a/vendor/winnow/examples/ini/parser_str.rs b/vendor/winnow/examples/ini/parser_str.rs index 34455aa..c1858db 100644 --- a/vendor/winnow/examples/ini/parser_str.rs +++ b/vendor/winnow/examples/ini/parser_str.rs @@ -2,56 +2,58 @@ use std::collections::HashMap; use winnow::prelude::*; use winnow::{ - bytes::{take_till0, take_while0, take_while1}, - character::{alphanumeric1 as alphanumeric, space0 as space}, + ascii::{alphanumeric1 as alphanumeric, space0 as space}, combinator::opt, - multi::many0, - sequence::{delimited, terminated}, + combinator::repeat, + combinator::{delimited, terminated}, + token::{take_till, take_while}, }; pub type Stream<'i> = &'i str; -pub fn categories(input: Stream<'_>) -> IResult<Stream<'_>, HashMap<&str, HashMap<&str, &str>>> { - many0(category_and_keys).parse_next(input) +pub fn categories<'s>( + input: &mut Stream<'s>, +) -> PResult<HashMap<&'s str, HashMap<&'s str, &'s str>>> { + repeat(0.., category_and_keys).parse_next(input) } -fn category_and_keys(i: Stream<'_>) -> IResult<Stream<'_>, (&str, HashMap<&str, &str>)> { +fn category_and_keys<'s>(i: &mut Stream<'s>) -> PResult<(&'s str, HashMap<&'s str, &'s str>)> { (category, keys_and_values).parse_next(i) } -fn category(i: Stream<'_>) -> IResult<Stream<'_>, &str> { +fn category<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { terminated( - delimited('[', take_while0(|c| c != ']'), ']'), - opt(take_while1(" \r\n")), + delimited('[', take_while(0.., |c| c != ']'), ']'), + opt(take_while(1.., [' ', '\r', '\n'])), ) .parse_next(i) } -fn keys_and_values(input: Stream<'_>) -> IResult<Stream<'_>, HashMap<&str, &str>> { - many0(key_value).parse_next(input) +fn keys_and_values<'s>(input: &mut Stream<'s>) -> PResult<HashMap<&'s str, &'s str>> { + repeat(0.., key_value).parse_next(input) } -fn key_value(i: Stream<'_>) -> IResult<Stream<'_>, (&str, &str)> { - let (i, key) = alphanumeric(i)?; - let (i, _) = (opt(space), "=", opt(space)).parse_next(i)?; - let (i, val) = take_till0(is_line_ending_or_comment).parse_next(i)?; - let (i, _) = opt(space).parse_next(i)?; - let (i, _) = opt((";", not_line_ending)).parse_next(i)?; - let (i, _) = opt(space_or_line_ending).parse_next(i)?; +fn key_value<'s>(i: &mut Stream<'s>) -> PResult<(&'s str, &'s str)> { + let key = alphanumeric.parse_next(i)?; + let _ = (opt(space), "=", opt(space)).parse_next(i)?; + let val = take_till(0.., is_line_ending_or_comment).parse_next(i)?; + let _ = opt(space).parse_next(i)?; + let _ = opt((";", not_line_ending)).parse_next(i)?; + let _ = opt(space_or_line_ending).parse_next(i)?; - Ok((i, (key, val))) + Ok((key, val)) } fn is_line_ending_or_comment(chr: char) -> bool { chr == ';' || chr == '\n' } -fn not_line_ending(i: Stream<'_>) -> IResult<Stream<'_>, &str> { - take_while0(|c| c != '\r' && c != '\n').parse_next(i) +fn not_line_ending<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { + take_while(0.., |c| c != '\r' && c != '\n').parse_next(i) } -fn space_or_line_ending(i: Stream<'_>) -> IResult<Stream<'_>, &str> { - take_while1(" \r\n").parse_next(i) +fn space_or_line_ending<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { + take_while(1.., [' ', '\r', '\n']).parse_next(i) } #[test] @@ -64,7 +66,7 @@ key = value2"; let ini_without_category = "parameter=value key = value2"; - let res = category(ini_file); + let res = category.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, o)) => println!("i: {} | o: {:?}", i, o), @@ -81,7 +83,7 @@ key = value2"; let ini_without_key_value = "key = value2"; - let res = key_value(ini_file); + let res = key_value.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), @@ -98,7 +100,7 @@ key = value2"; let ini_without_key_value = "key = value2"; - let res = key_value(ini_file); + let res = key_value.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), @@ -115,7 +117,7 @@ key = value2"; let ini_without_key_value = "key = value2"; - let res = key_value(ini_file); + let res = key_value.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), @@ -135,7 +137,7 @@ key = value2 let ini_without_key_value = "[category]"; - let res = keys_and_values(ini_file); + let res = keys_and_values.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), @@ -160,7 +162,7 @@ key = value2 let ini_after_parser = "[category]"; - let res = category_and_keys(ini_file); + let res = category_and_keys.parse_peek(ini_file); println!("{:?}", res); match res { Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), @@ -186,7 +188,7 @@ parameter3=value3 key4 = value4 "; - let res = categories(ini_file); + let res = categories.parse_peek(ini_file); //println!("{:?}", res); match res { Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), diff --git a/vendor/winnow/examples/iterator.rs b/vendor/winnow/examples/iterator.rs index 7e14b57..5c8c731 100644 --- a/vendor/winnow/examples/iterator.rs +++ b/vendor/winnow/examples/iterator.rs @@ -1,30 +1,27 @@ use std::collections::HashMap; use std::iter::Iterator; -use winnow::character::alphanumeric1; +use winnow::ascii::alphanumeric1; use winnow::combinator::iterator; +use winnow::combinator::{separated_pair, terminated}; use winnow::prelude::*; -use winnow::sequence::{separated_pair, terminated}; fn main() { let mut data = "abcabcabcabc"; - fn parser(i: &str) -> IResult<&str, &str> { + fn parser<'s>(i: &mut &'s str) -> PResult<&'s str> { "abc".parse_next(i) } // `from_fn` (available from Rust 1.34) can create an iterator // from a closure let it = std::iter::from_fn(move || { - match parser(data) { + match parser.parse_next(&mut data) { // when successful, a parser returns a tuple of // the remaining input and the output value. // So we replace the captured input data with the // remaining input, to be parsed on the next call - Ok((i, o)) => { - data = i; - Some(o) - } + Ok(o) => Some(o), _ => None, } }); @@ -35,19 +32,18 @@ fn main() { println!("\n********************\n"); - let data = "abcabcabcabc"; + let mut data = "abcabcabcabc"; // if `from_fn` is not available, it is possible to fold // over an iterator of functions - let res = std::iter::repeat(parser).take(3).try_fold( - (data, Vec::new()), - |(data, mut acc), parser| { - parser(data).map(|(i, o)| { + let res = std::iter::repeat(parser) + .take(3) + .try_fold(Vec::new(), |mut acc, mut parser| { + parser.parse_next(&mut data).map(|o| { acc.push(o); - (i, acc) + acc }) - }, - ); + }); // will print "parser iterator returned: Ok(("abc", ["abc", "abc", "abc"]))" println!("\nparser iterator returned: {:?}", res); @@ -70,7 +66,7 @@ fn main() { .map(|(k, v)| (k.to_uppercase(), v)) .collect::<HashMap<_, _>>(); - let parser_result: IResult<_, _> = winnow_it.finish(); + let parser_result: PResult<(_, _), ()> = winnow_it.finish(); let (remaining_input, ()) = parser_result.unwrap(); // will print "iterator returned {"key1": "value1", "key3": "value3", "key2": "value2"}, remaining input is ';'" diff --git a/vendor/winnow/examples/json/bench.rs b/vendor/winnow/examples/json/bench.rs index 0a2fd4f..d074ba5 100644 --- a/vendor/winnow/examples/json/bench.rs +++ b/vendor/winnow/examples/json/bench.rs @@ -1,3 +1,4 @@ +use winnow::prelude::*; use winnow::Partial; mod json; @@ -13,35 +14,44 @@ fn json_bench(c: &mut criterion::Criterion) { group.throughput(criterion::Throughput::Bytes(len as u64)); group.bench_with_input(criterion::BenchmarkId::new("basic", name), &len, |b, _| { - type Error<'i> = winnow::error::Error<parser::Stream<'i>>; + type Error<'i> = winnow::error::InputError<parser::Stream<'i>>; - b.iter(|| parser::json::<Error>(sample).unwrap()); + b.iter(|| parser::json::<Error>.parse_peek(sample).unwrap()); + }); + group.bench_with_input(criterion::BenchmarkId::new("unit", name), &len, |b, _| { + type Error<'i> = (); + + b.iter(|| parser::json::<Error>.parse_peek(sample).unwrap()); }); group.bench_with_input( - criterion::BenchmarkId::new("verbose", name), + criterion::BenchmarkId::new("context", name), &len, |b, _| { - type Error<'i> = winnow::error::VerboseError<parser::Stream<'i>>; + type Error<'i> = winnow::error::ContextError<parser::Stream<'i>>; - b.iter(|| parser::json::<Error>(sample).unwrap()); + b.iter(|| parser::json::<Error>.parse_peek(sample).unwrap()); }, ); group.bench_with_input( criterion::BenchmarkId::new("dispatch", name), &len, |b, _| { - type Error<'i> = winnow::error::Error<parser::Stream<'i>>; + type Error<'i> = winnow::error::InputError<parser_dispatch::Stream<'i>>; - b.iter(|| parser_dispatch::json::<Error>(sample).unwrap()); + b.iter(|| parser_dispatch::json::<Error>.parse_peek(sample).unwrap()); }, ); group.bench_with_input( criterion::BenchmarkId::new("streaming", name), &len, |b, _| { - type Error<'i> = winnow::error::Error<parser_partial::Stream<'i>>; + type Error<'i> = winnow::error::InputError<parser_partial::Stream<'i>>; - b.iter(|| parser_partial::json::<Error>(Partial::new(sample)).unwrap()); + b.iter(|| { + parser_partial::json::<Error> + .parse_peek(Partial::new(sample)) + .unwrap() + }); }, ); } diff --git a/vendor/winnow/examples/json/main.rs b/vendor/winnow/examples/json/main.rs index f738a1f..be431e8 100644 --- a/vendor/winnow/examples/json/main.rs +++ b/vendor/winnow/examples/json/main.rs @@ -4,9 +4,7 @@ mod parser_dispatch; #[allow(dead_code)] mod parser_partial; -use winnow::error::convert_error; -use winnow::error::Error; -use winnow::error::VerboseError; +use winnow::error::ErrorKind; use winnow::prelude::*; fn main() -> Result<(), lexopt::Error> { @@ -26,30 +24,19 @@ fn main() -> Result<(), lexopt::Error> { } " }); - if args.verbose { - match parser::json::<VerboseError<&str>>.parse(data) { - Ok(json) => { - println!("{:#?}", json); - } - Err(err) => { - if args.pretty { - println!("{}", convert_error(data, err)); - } else { - println!("{:#?}", err); - } - } + let result = match args.implementation { + Impl::Naive => parser::json::<ErrorKind>.parse(data), + Impl::Dispatch => parser_dispatch::json::<ErrorKind>.parse(data), + }; + match result { + Ok(json) => { + println!("{:#?}", json); } - } else { - let result = match args.implementation { - Impl::Naive => parser::json::<Error<&str>>.parse(data), - Impl::Dispatch => parser_dispatch::json::<Error<&str>>.parse(data), - }; - match result { - Ok(json) => { - println!("{:#?}", json); - } - Err(err) => { - println!("{:?}", err); + Err(err) => { + if args.pretty { + println!("{}", err); + } else { + println!("{:#?}", err); } } } @@ -61,7 +48,6 @@ fn main() -> Result<(), lexopt::Error> { struct Args { input: Option<String>, invalid: bool, - verbose: bool, pretty: bool, implementation: Impl, } @@ -89,13 +75,7 @@ impl Args { Long("invalid") => { res.invalid = true; } - Long("verbose") => { - res.verbose = true; - // Only case where verbose matters - res.invalid = true; - } Long("pretty") => { - res.verbose = true; // Only case where pretty matters res.pretty = true; res.invalid = true; diff --git a/vendor/winnow/examples/json/parser.rs b/vendor/winnow/examples/json/parser.rs index b8a2b85..250621e 100644 --- a/vendor/winnow/examples/json/parser.rs +++ b/vendor/winnow/examples/json/parser.rs @@ -3,13 +3,13 @@ use std::str; use winnow::prelude::*; use winnow::{ - branch::alt, - bytes::{any, none_of, take, take_while0}, - character::float, + ascii::float, + combinator::alt, combinator::cut_err, - error::{ContextError, ParseError}, - multi::{fold_many0, separated0}, - sequence::{delimited, preceded, separated_pair, terminated}, + combinator::{delimited, preceded, separated_pair, terminated}, + combinator::{fold_repeat, separated}, + error::{AddContext, ParserError}, + token::{any, none_of, take, take_while}, }; use crate::json::JsonValue; @@ -19,8 +19,8 @@ pub type Stream<'i> = &'i str; /// The root element of a JSON parser is any value /// /// A parser has the following signature: -/// `Stream -> IResult<Stream, Output, Error>`, with `IResult` defined as: -/// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;` +/// `&mut Stream -> PResult<Output, InputError>`, with `PResult` defined as: +/// `type PResult<O, E = (I, ErrorKind)> = Result<O, Err<E>>;` /// /// most of the times you can ignore the error type and use the default (but this /// examples shows custom error types later on!) @@ -28,17 +28,17 @@ pub type Stream<'i> = &'i str; /// Here we use `&str` as input type, but parsers can be generic over /// the input type, work directly with `&[u8]`, or any other type that /// implements the required traits. -pub fn json<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, JsonValue, E> { +pub fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<JsonValue, E> { delimited(ws, json_value, ws).parse_next(input) } /// `alt` is a combinator that tries multiple parsers one by one, until /// one of them succeeds -fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, JsonValue, E> { +fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<JsonValue, E> { // `alt` combines the each value parser. It returns the result of the first // successful parser, or an error alt(( @@ -55,7 +55,7 @@ fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static /// `tag(string)` generates a parser that recognizes the argument string. /// /// This also shows returning a sub-slice of the original input -fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { +fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { // This is a parser that returns `"null"` if it sees the string "null", and // an error otherwise "null".parse_next(input) @@ -63,7 +63,7 @@ fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, /// We can combine `tag` with other functions, like `value` which returns a given constant value on /// success. -fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, bool, E> { +fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> { // This is a parser that returns `true` if it sees the string "true", and // an error otherwise let parse_true = "true".value(true); @@ -77,9 +77,9 @@ fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<' /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote /// character, before the string (using `preceded`) and after the string (using `terminated`). -fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, String, E> { +fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<String, E> { preceded( '\"', // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to @@ -87,7 +87,7 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_many0(character, String::new, |mut string, c| { + fold_repeat(0.., character, String::new, |mut string, c| { string.push(c); string }), @@ -102,8 +102,8 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> /// You can mix the above declarative parsing with an imperative style to handle more unique cases, /// like escaping -fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, char, E> { - let (input, c) = none_of("\"").parse_next(input)?; +fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { + let c = none_of('\"').parse_next(input)?; if c == '\\' { alt(( any.verify_map(|c| { @@ -121,13 +121,11 @@ fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream )) .parse_next(input) } else { - Ok((input, c)) + Ok(c) } } -fn unicode_escape<'i, E: ParseError<Stream<'i>>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, char, E> { +fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { alt(( // Not a surrogate u16_hex @@ -149,54 +147,60 @@ fn unicode_escape<'i, E: ParseError<Stream<'i>>>( .parse_next(input) } -fn u16_hex<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, u16, E> { +fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> { take(4usize) .verify_map(|s| u16::from_str_radix(s, 16).ok()) .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) -fn array<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, Vec<JsonValue>, E> { +fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) } -fn object<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, HashMap<String, JsonValue>, E> { +fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) } -fn key_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, (String, JsonValue), E> { +fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<(String, JsonValue), E> { separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input) } /// Parser combinators are constructed from the bottom up: /// first we write parsers for the smallest elements (here a space character), /// then we'll combine them in larger parsers -fn ws<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { - // Combinators like `take_while0` return a function. That function is the +fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { + // Combinators like `take_while` return a function. That function is the // parser,to which we can pass the input - take_while0(WS).parse_next(input) + take_while(0.., WS).parse_next(input) } -const WS: &str = " \t\r\n"; +const WS: &[char] = &[' ', '\t', '\r', '\n']; #[cfg(test)] mod test { @@ -206,28 +210,37 @@ mod test { #[allow(clippy::useless_attribute)] #[allow(dead_code)] // its dead for benches - type Error<'i> = winnow::error::Error<&'i str>; + type Error<'i> = winnow::error::InputError<&'i str>; #[test] fn json_string() { - assert_eq!(string::<Error<'_>>("\"\""), Ok(("", "".to_string()))); - assert_eq!(string::<Error<'_>>("\"abc\""), Ok(("", "abc".to_string()))); assert_eq!( - string::<Error<'_>>("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""), + string::<Error<'_>>.parse_peek("\"\""), + Ok(("", "".to_string())) + ); + assert_eq!( + string::<Error<'_>>.parse_peek("\"abc\""), + Ok(("", "abc".to_string())) + ); + assert_eq!( + string::<Error<'_>> + .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""), Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())), ); assert_eq!( - string::<Error<'_>>("\"\\uD83D\\uDE10\""), + string::<Error<'_>>.parse_peek("\"\\uD83D\\uDE10\""), Ok(("", "😐".to_string())) ); - assert!(string::<Error<'_>>("\"").is_err()); - assert!(string::<Error<'_>>("\"abc").is_err()); - assert!(string::<Error<'_>>("\"\\\"").is_err()); - assert!(string::<Error<'_>>("\"\\u123\"").is_err()); - assert!(string::<Error<'_>>("\"\\uD800\"").is_err()); - assert!(string::<Error<'_>>("\"\\uD800\\uD800\"").is_err()); - assert!(string::<Error<'_>>("\"\\uDC00\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"abc").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\u123\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\uD800\"").is_err()); + assert!(string::<Error<'_>> + .parse_peek("\"\\uD800\\uD800\"") + .is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\uDC00\"").is_err()); } #[test] @@ -245,7 +258,7 @@ mod test { .collect(), ); - assert_eq!(json::<Error<'_>>(input), Ok(("", expected))); + assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected))); } #[test] @@ -256,7 +269,7 @@ mod test { let expected = Array(vec![Num(42.0), Str("x".to_string())]); - assert_eq!(json::<Error<'_>>(input), Ok(("", expected))); + assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected))); } #[test] @@ -278,7 +291,7 @@ mod test { "#; assert_eq!( - json::<Error<'_>>(input), + json::<Error<'_>>.parse_peek(input), Ok(( "", Object( diff --git a/vendor/winnow/examples/json/parser_dispatch.rs b/vendor/winnow/examples/json/parser_dispatch.rs index 1fcd30f..8f7eaf9 100644 --- a/vendor/winnow/examples/json/parser_dispatch.rs +++ b/vendor/winnow/examples/json/parser_dispatch.rs @@ -3,16 +3,16 @@ use std::str; use winnow::prelude::*; use winnow::{ - branch::{alt, dispatch}, - bytes::{any, none_of, take, take_while0}, - character::float, + ascii::float, combinator::cut_err, combinator::fail, combinator::peek, combinator::success, - error::{ContextError, ParseError}, - multi::{fold_many0, separated0}, - sequence::{delimited, preceded, separated_pair, terminated}, + combinator::{alt, dispatch}, + combinator::{delimited, preceded, separated_pair, terminated}, + combinator::{fold_repeat, separated}, + error::{AddContext, ParserError}, + token::{any, none_of, take, take_while}, }; use crate::json::JsonValue; @@ -22,8 +22,8 @@ pub type Stream<'i> = &'i str; /// The root element of a JSON parser is any value /// /// A parser has the following signature: -/// `Stream -> IResult<Stream, Output, Error>`, with `IResult` defined as: -/// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;` +/// `&mut Stream -> PResult<Output, InputError>`, with `PResult` defined as: +/// `type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>;` /// /// most of the times you can ignore the error type and use the default (but this /// examples shows custom error types later on!) @@ -31,17 +31,17 @@ pub type Stream<'i> = &'i str; /// Here we use `&str` as input type, but parsers can be generic over /// the input type, work directly with `&[u8]`, or any other type that /// implements the required traits. -pub fn json<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, JsonValue, E> { +pub fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<JsonValue, E> { delimited(ws, json_value, ws).parse_next(input) } /// `alt` is a combinator that tries multiple parsers one by one, until /// one of them succeeds -fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, JsonValue, E> { +fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<JsonValue, E> { // `dispatch` gives you `match`-like behavior compared to `alt` successively trying different // implementations. dispatch!(peek(any); @@ -62,7 +62,7 @@ fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static /// `tag(string)` generates a parser that recognizes the argument string. /// /// This also shows returning a sub-slice of the original input -fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { +fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { // This is a parser that returns `"null"` if it sees the string "null", and // an error otherwise "null".parse_next(input) @@ -70,7 +70,7 @@ fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, /// We can combine `tag` with other functions, like `value` which returns a given constant value on /// success. -fn true_<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, bool, E> { +fn true_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> { // This is a parser that returns `true` if it sees the string "true", and // an error otherwise "true".value(true).parse_next(input) @@ -78,7 +78,7 @@ fn true_<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i> /// We can combine `tag` with other functions, like `value` which returns a given constant value on /// success. -fn false_<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, bool, E> { +fn false_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> { // This is a parser that returns `false` if it sees the string "false", and // an error otherwise "false".value(false).parse_next(input) @@ -86,9 +86,9 @@ fn false_<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote /// character, before the string (using `preceded`) and after the string (using `terminated`). -fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, String, E> { +fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<String, E> { preceded( '\"', // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to @@ -96,7 +96,7 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_many0(character, String::new, |mut string, c| { + fold_repeat(0.., character, String::new, |mut string, c| { string.push(c); string }), @@ -111,8 +111,8 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> /// You can mix the above declarative parsing with an imperative style to handle more unique cases, /// like escaping -fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, char, E> { - let (input, c) = none_of("\"").parse_next(input)?; +fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { + let c = none_of('\"').parse_next(input)?; if c == '\\' { dispatch!(any; '"' => success('"'), @@ -128,13 +128,11 @@ fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream ) .parse_next(input) } else { - Ok((input, c)) + Ok(c) } } -fn unicode_escape<'i, E: ParseError<Stream<'i>>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, char, E> { +fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { alt(( // Not a surrogate u16_hex @@ -156,54 +154,60 @@ fn unicode_escape<'i, E: ParseError<Stream<'i>>>( .parse_next(input) } -fn u16_hex<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, u16, E> { +fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> { take(4usize) .verify_map(|s| u16::from_str_radix(s, 16).ok()) .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) -fn array<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, Vec<JsonValue>, E> { +fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) } -fn object<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, HashMap<String, JsonValue>, E> { +fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) } -fn key_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, (String, JsonValue), E> { +fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<(String, JsonValue), E> { separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input) } /// Parser combinators are constructed from the bottom up: /// first we write parsers for the smallest elements (here a space character), /// then we'll combine them in larger parsers -fn ws<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { - // Combinators like `take_while0` return a function. That function is the +fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { + // Combinators like `take_while` return a function. That function is the // parser,to which we can pass the input - take_while0(WS).parse_next(input) + take_while(0.., WS).parse_next(input) } -const WS: &str = " \t\r\n"; +const WS: &[char] = &[' ', '\t', '\r', '\n']; #[cfg(test)] mod test { @@ -213,28 +217,37 @@ mod test { #[allow(clippy::useless_attribute)] #[allow(dead_code)] // its dead for benches - type Error<'i> = winnow::error::Error<&'i str>; + type Error<'i> = winnow::error::InputError<&'i str>; #[test] fn json_string() { - assert_eq!(string::<Error<'_>>("\"\""), Ok(("", "".to_string()))); - assert_eq!(string::<Error<'_>>("\"abc\""), Ok(("", "abc".to_string()))); assert_eq!( - string::<Error<'_>>("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""), + string::<Error<'_>>.parse_peek("\"\""), + Ok(("", "".to_string())) + ); + assert_eq!( + string::<Error<'_>>.parse_peek("\"abc\""), + Ok(("", "abc".to_string())) + ); + assert_eq!( + string::<Error<'_>> + .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""), Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())), ); assert_eq!( - string::<Error<'_>>("\"\\uD83D\\uDE10\""), + string::<Error<'_>>.parse_peek("\"\\uD83D\\uDE10\""), Ok(("", "😐".to_string())) ); - assert!(string::<Error<'_>>("\"").is_err()); - assert!(string::<Error<'_>>("\"abc").is_err()); - assert!(string::<Error<'_>>("\"\\\"").is_err()); - assert!(string::<Error<'_>>("\"\\u123\"").is_err()); - assert!(string::<Error<'_>>("\"\\uD800\"").is_err()); - assert!(string::<Error<'_>>("\"\\uD800\\uD800\"").is_err()); - assert!(string::<Error<'_>>("\"\\uDC00\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"abc").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\u123\"").is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\uD800\"").is_err()); + assert!(string::<Error<'_>> + .parse_peek("\"\\uD800\\uD800\"") + .is_err()); + assert!(string::<Error<'_>>.parse_peek("\"\\uDC00\"").is_err()); } #[test] @@ -252,7 +265,7 @@ mod test { .collect(), ); - assert_eq!(json::<Error<'_>>(input), Ok(("", expected))); + assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected))); } #[test] @@ -263,7 +276,7 @@ mod test { let expected = Array(vec![Num(42.0), Str("x".to_string())]); - assert_eq!(json::<Error<'_>>(input), Ok(("", expected))); + assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected))); } #[test] @@ -285,7 +298,7 @@ mod test { "#; assert_eq!( - json::<Error<'_>>(input), + json::<Error<'_>>.parse_peek(input), Ok(( "", Object( diff --git a/vendor/winnow/examples/json/parser_partial.rs b/vendor/winnow/examples/json/parser_partial.rs index 8050de4..779e05f 100644 --- a/vendor/winnow/examples/json/parser_partial.rs +++ b/vendor/winnow/examples/json/parser_partial.rs @@ -3,14 +3,14 @@ use std::str; use winnow::prelude::*; use winnow::{ - branch::alt, - bytes::{any, none_of, take, take_while0}, - character::float, + ascii::float, + combinator::alt, combinator::{cut_err, rest}, - error::{ContextError, ParseError}, - multi::{fold_many0, separated0}, - sequence::{delimited, preceded, separated_pair, terminated}, + combinator::{delimited, preceded, separated_pair, terminated}, + combinator::{fold_repeat, separated}, + error::{AddContext, ParserError}, stream::Partial, + token::{any, none_of, take, take_while}, }; use crate::json::JsonValue; @@ -20,8 +20,8 @@ pub type Stream<'i> = Partial<&'i str>; /// The root element of a JSON parser is any value /// /// A parser has the following signature: -/// `Stream -> IResult<Stream, Output, Error>`, with `IResult` defined as: -/// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;` +/// `&mut Stream -> PResult<Output, InputError>`, with `PResult` defined as: +/// `type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>;` /// /// most of the times you can ignore the error type and use the default (but this /// examples shows custom error types later on!) @@ -29,17 +29,17 @@ pub type Stream<'i> = Partial<&'i str>; /// Here we use `&str` as input type, but parsers can be generic over /// the input type, work directly with `&[u8]`, or any other type that /// implements the required traits. -pub fn json<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, JsonValue, E> { +pub fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<JsonValue, E> { delimited(ws, json_value, ws_or_eof).parse_next(input) } /// `alt` is a combinator that tries multiple parsers one by one, until /// one of them succeeds -fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, JsonValue, E> { +fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<JsonValue, E> { // `alt` combines the each value parser. It returns the result of the first // successful parser, or an error alt(( @@ -56,7 +56,7 @@ fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static /// `tag(string)` generates a parser that recognizes the argument string. /// /// This also shows returning a sub-slice of the original input -fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { +fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { // This is a parser that returns `"null"` if it sees the string "null", and // an error otherwise "null".parse_next(input) @@ -64,7 +64,7 @@ fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, /// We can combine `tag` with other functions, like `value` which returns a given constant value on /// success. -fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, bool, E> { +fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> { // This is a parser that returns `true` if it sees the string "true", and // an error otherwise let parse_true = "true".value(true); @@ -78,9 +78,9 @@ fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<' /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote /// character, before the string (using `preceded`) and after the string (using `terminated`). -fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, String, E> { +fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<String, E> { preceded( '\"', // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to @@ -88,7 +88,7 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_many0(character, String::new, |mut string, c| { + fold_repeat(0.., character, String::new, |mut string, c| { string.push(c); string }), @@ -103,8 +103,8 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> /// You can mix the above declarative parsing with an imperative style to handle more unique cases, /// like escaping -fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, char, E> { - let (input, c) = none_of("\"").parse_next(input)?; +fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { + let c = none_of('\"').parse_next(input)?; if c == '\\' { alt(( any.verify_map(|c| { @@ -122,13 +122,11 @@ fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream )) .parse_next(input) } else { - Ok((input, c)) + Ok(c) } } -fn unicode_escape<'i, E: ParseError<Stream<'i>>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, char, E> { +fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { alt(( // Not a surrogate u16_hex @@ -150,59 +148,65 @@ fn unicode_escape<'i, E: ParseError<Stream<'i>>>( .parse_next(input) } -fn u16_hex<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, u16, E> { +fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> { take(4usize) .verify_map(|s| u16::from_str_radix(s, 16).ok()) .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) -fn array<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, Vec<JsonValue>, E> { +fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) } -fn object<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, HashMap<String, JsonValue>, E> { +fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) } -fn key_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, (String, JsonValue), E> { +fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<(String, JsonValue), E> { separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input) } /// Parser combinators are constructed from the bottom up: /// first we write parsers for the smallest elements (here a space character), /// then we'll combine them in larger parsers -fn ws<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { - // Combinators like `take_while0` return a function. That function is the +fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { + // Combinators like `take_while` return a function. That function is the // parser,to which we can pass the input - take_while0(WS).parse_next(input) + take_while(0.., WS).parse_next(input) } -fn ws_or_eof<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { - rest.verify(|s: &str| s.chars().all(|c| WS.contains(c))) +fn ws_or_eof<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { + rest.verify(|s: &str| s.chars().all(|c| WS.contains(&c))) .parse_next(input) } -const WS: &str = " \t\r\n"; +const WS: &[char] = &[' ', '\t', '\r', '\n']; #[cfg(test)] mod test { @@ -212,20 +216,20 @@ mod test { #[allow(clippy::useless_attribute)] #[allow(dead_code)] // its dead for benches - type Error<'i> = winnow::error::Error<Partial<&'i str>>; + type Error<'i> = winnow::error::InputError<Partial<&'i str>>; #[test] fn json_string() { assert_eq!( - string::<Error<'_>>(Partial::new("\"\"")), + string::<Error<'_>>.parse_peek(Partial::new("\"\"")), Ok((Partial::new(""), "".to_string())) ); assert_eq!( - string::<Error<'_>>(Partial::new("\"abc\"")), + string::<Error<'_>>.parse_peek(Partial::new("\"abc\"")), Ok((Partial::new(""), "abc".to_string())) ); assert_eq!( - string::<Error<'_>>(Partial::new( + string::<Error<'_>>.parse_peek(Partial::new( "\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\"" )), Ok(( @@ -234,17 +238,29 @@ mod test { )), ); assert_eq!( - string::<Error<'_>>(Partial::new("\"\\uD83D\\uDE10\"")), + string::<Error<'_>>.parse_peek(Partial::new("\"\\uD83D\\uDE10\"")), Ok((Partial::new(""), "😐".to_string())) ); - assert!(string::<Error<'_>>(Partial::new("\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"abc")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\u123\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\uD800\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\uD800\\uD800\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\uDC00\"")).is_err()); + assert!(string::<Error<'_>>.parse_peek(Partial::new("\"")).is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"abc")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\u123\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\uD800\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\uD800\\uD800\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\uDC00\"")) + .is_err()); } #[test] @@ -263,7 +279,7 @@ mod test { ); assert_eq!( - json::<Error<'_>>(Partial::new(input)), + json::<Error<'_>>.parse_peek(Partial::new(input)), Ok((Partial::new(""), expected)) ); } @@ -277,7 +293,7 @@ mod test { let expected = Array(vec![Num(42.0), Str("x".to_string())]); assert_eq!( - json::<Error<'_>>(Partial::new(input)), + json::<Error<'_>>.parse_peek(Partial::new(input)), Ok((Partial::new(""), expected)) ); } @@ -301,7 +317,7 @@ mod test { "#; assert_eq!( - json::<Error<'_>>(Partial::new(input)), + json::<Error<'_>>.parse_peek(Partial::new(input)), Ok(( Partial::new(""), Object( diff --git a/vendor/winnow/examples/json_iterator.rs b/vendor/winnow/examples/json_iterator.rs index 52f28a5..9c21ae3 100644 --- a/vendor/winnow/examples/json_iterator.rs +++ b/vendor/winnow/examples/json_iterator.rs @@ -2,15 +2,16 @@ use std::collections::HashMap; use winnow::prelude::*; use winnow::{ - branch::alt, - bytes::one_of, - bytes::{tag, take_while0}, - character::{alphanumeric1 as alphanumeric, escaped, float}, + ascii::{alphanumeric1 as alphanumeric, escaped, float}, + combinator::alt, combinator::cut_err, - error::ParseError, - multi::separated0, - sequence::{preceded, separated_pair, terminated}, - IResult, + combinator::separated, + combinator::{preceded, separated_pair, terminated}, + error::ParserError, + error::StrContext, + stream::Offset, + token::one_of, + token::{tag, take_while}, }; use std::cell::Cell; @@ -28,7 +29,7 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { } pub fn offset(&self, input: &'a str) { - let offset = input.as_ptr() as usize - self.input.as_ptr() as usize; + let offset = input.offset_from(&self.input); self.offset.set(offset); } @@ -38,9 +39,10 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { pub fn string(&self) -> Option<&'a str> { println!("string()"); - match string(self.data()) { - Ok((i, s)) => { - self.offset(i); + let mut data = self.data(); + match string(&mut data) { + Ok(s) => { + self.offset(data); println!("-> {}", s); Some(s) } @@ -50,9 +52,10 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { pub fn boolean(&self) -> Option<bool> { println!("boolean()"); - match boolean(self.data()) { - Ok((i, o)) => { - self.offset(i); + let mut data = self.data(); + match boolean(&mut data) { + Ok(o) => { + self.offset(data); println!("-> {}", o); Some(o) } @@ -62,9 +65,10 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { pub fn number(&self) -> Option<f64> { println!("number()"); - match float::<_, _, ()>(self.data()) { - Ok((i, o)) => { - self.offset(i); + let mut data = self.data(); + match float::<_, _, ()>.parse_next(&mut data) { + Ok(o) => { + self.offset(data); println!("-> {}", o); Some(o) } @@ -75,11 +79,12 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { pub fn array(&self) -> Option<impl Iterator<Item = JsonValue<'a, 'b>>> { println!("array()"); - match tag::<_, _, ()>("[").parse_next(self.data()) { + let mut data = self.data(); + match tag::<_, _, ()>("[").parse_next(&mut data) { Err(_) => None, - Ok((i, _)) => { + Ok(_) => { println!("["); - self.offset(i); + self.offset(data); let mut first = true; let mut done = false; let mut previous = std::usize::MAX; @@ -94,14 +99,14 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { // if we ignored one of the items, skip over the value if v.offset.get() == previous { println!("skipping value"); - if let Ok((i, _)) = value(v.data()) { - v.offset(i); + if value(&mut data).is_ok() { + v.offset(data); } } - if let Ok((i, _)) = tag::<_, _, ()>("]").parse_next(v.data()) { + if tag::<_, _, ()>("]").parse_next(&mut data).is_ok() { println!("]"); - v.offset(i); + v.offset(data); done = true; return None; } @@ -109,10 +114,10 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { if first { first = false; } else { - match tag::<_, _, ()>(",").parse_next(v.data()) { - Ok((i, _)) => { + match tag::<_, _, ()>(",").parse_next(&mut data) { + Ok(_) => { println!(","); - v.offset(i); + v.offset(data); } Err(_) => { done = true; @@ -131,10 +136,11 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { pub fn object(&self) -> Option<impl Iterator<Item = (&'a str, JsonValue<'a, 'b>)>> { println!("object()"); - match tag::<_, _, ()>("{").parse_next(self.data()) { + let mut data = self.data(); + match tag::<_, _, ()>("{").parse_next(&mut data) { Err(_) => None, - Ok((i, _)) => { - self.offset(i); + Ok(_) => { + self.offset(data); println!("{{"); @@ -152,14 +158,14 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { // if we ignored one of the items, skip over the value if v.offset.get() == previous { println!("skipping value"); - if let Ok((i, _)) = value(v.data()) { - v.offset(i); + if value(&mut data).is_ok() { + v.offset(data); } } - if let Ok((i, _)) = tag::<_, _, ()>("}").parse_next(v.data()) { + if tag::<_, _, ()>("}").parse_next(&mut data).is_ok() { println!("}}"); - v.offset(i); + v.offset(data); done = true; return None; } @@ -167,10 +173,10 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { if first { first = false; } else { - match tag::<_, _, ()>(",").parse_next(v.data()) { - Ok((i, _)) => { + match tag::<_, _, ()>(",").parse_next(&mut data) { + Ok(_) => { println!(","); - v.offset(i); + v.offset(data); } Err(_) => { done = true; @@ -179,14 +185,14 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { } } - match string(v.data()) { - Ok((i, key)) => { - v.offset(i); + match string(&mut data) { + Ok(key) => { + v.offset(data); - match tag::<_, _, ()>(":").parse_next(v.data()) { + match tag::<_, _, ()>(":").parse_next(&mut data) { Err(_) => None, - Ok((i, _)) => { - v.offset(i); + Ok(_) => { + v.offset(data); previous = v.offset.get(); @@ -203,55 +209,55 @@ impl<'a, 'b: 'a> JsonValue<'a, 'b> { } } -fn sp<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> { +fn sp<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<&'a str, E> { let chars = " \t\r\n"; - take_while0(move |c| chars.contains(c)).parse_next(i) + take_while(0.., move |c| chars.contains(c)).parse_next(i) } -fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> { - escaped(alphanumeric, '\\', one_of("\"n\\")).parse_next(i) +fn parse_str<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<&'a str, E> { + escaped(alphanumeric, '\\', one_of(['"', 'n', '\\'])).parse_next(i) } -fn string(i: &str) -> IResult<&str, &str> { +fn string<'s>(i: &mut &'s str) -> PResult<&'s str> { preceded('\"', cut_err(terminated(parse_str, '\"'))) - .context("string") + .context(StrContext::Label("string")) .parse_next(i) } -fn boolean(input: &str) -> IResult<&str, bool> { +fn boolean(input: &mut &str) -> PResult<bool> { alt(("false".map(|_| false), "true".map(|_| true))).parse_next(input) } -fn array(i: &str) -> IResult<&str, ()> { +fn array(i: &mut &str) -> PResult<()> { preceded( '[', cut_err(terminated( - separated0(value, preceded(sp, ',')), + separated(0.., value, preceded(sp, ',')), preceded(sp, ']'), )), ) - .context("array") + .context(StrContext::Label("array")) .parse_next(i) } -fn key_value(i: &str) -> IResult<&str, (&str, ())> { +fn key_value<'s>(i: &mut &'s str) -> PResult<(&'s str, ())> { separated_pair(preceded(sp, string), cut_err(preceded(sp, ':')), value).parse_next(i) } -fn hash(i: &str) -> IResult<&str, ()> { +fn hash(i: &mut &str) -> PResult<()> { preceded( '{', cut_err(terminated( - separated0(key_value, preceded(sp, ',')), + separated(0.., key_value, preceded(sp, ',')), preceded(sp, '}'), )), ) - .context("map") + .context(StrContext::Label("map")) .parse_next(i) } -fn value(i: &str) -> IResult<&str, ()> { +fn value(i: &mut &str) -> PResult<()> { preceded( sp, alt(( diff --git a/vendor/winnow/examples/ndjson/main.rs b/vendor/winnow/examples/ndjson/main.rs index d81b690..6b2a716 100644 --- a/vendor/winnow/examples/ndjson/main.rs +++ b/vendor/winnow/examples/ndjson/main.rs @@ -3,8 +3,9 @@ mod parser; use std::io::Read; use winnow::error::ErrMode; -use winnow::error::Error; +use winnow::error::InputError; use winnow::error::Needed; +use winnow::prelude::*; use winnow::stream::Offset; fn main() -> Result<(), lexopt::Error> { @@ -38,12 +39,12 @@ fn main() -> Result<(), lexopt::Error> { loop { let input = parser::Stream::new(std::str::from_utf8(buffer.data()).map_err(to_lexopt)?); - match parser::ndjson::<Error<parser::Stream>>(input) { + match parser::ndjson::<InputError<parser::Stream>>.parse_peek(input) { Ok((remainder, value)) => { println!("{:?}", value); println!(); // Tell the buffer how much we read - let consumed = input.offset_to(&remainder); + let consumed = remainder.offset_from(&input); buffer.consume(consumed); } Err(ErrMode::Backtrack(e)) | Err(ErrMode::Cut(e)) => { diff --git a/vendor/winnow/examples/ndjson/parser.rs b/vendor/winnow/examples/ndjson/parser.rs index 2c1cd29..81b4745 100644 --- a/vendor/winnow/examples/ndjson/parser.rs +++ b/vendor/winnow/examples/ndjson/parser.rs @@ -3,15 +3,15 @@ use std::str; use winnow::prelude::*; use winnow::{ - branch::alt, - bytes::{any, none_of, take, take_while0}, - character::float, - character::line_ending, + ascii::float, + ascii::line_ending, + combinator::alt, combinator::cut_err, - error::{ContextError, ParseError}, - multi::{fold_many0, separated0}, - sequence::{delimited, preceded, separated_pair, terminated}, + combinator::{delimited, preceded, separated_pair, terminated}, + combinator::{fold_repeat, separated}, + error::{AddContext, ParserError}, stream::Partial, + token::{any, none_of, take, take_while}, }; #[derive(Debug, PartialEq, Clone)] @@ -27,9 +27,9 @@ pub enum JsonValue { /// Use `Partial` to cause `ErrMode::Incomplete` while parsing pub type Stream<'i> = Partial<&'i str>; -pub fn ndjson<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, Option<JsonValue>, E> { +pub fn ndjson<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<Option<JsonValue>, E> { alt(( terminated(delimited(ws, json_value, ws), line_ending).map(Some), line_ending.value(None), @@ -41,9 +41,9 @@ pub fn ndjson<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static /// `alt` is a combinator that tries multiple parsers one by one, until /// one of them succeeds -fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, JsonValue, E> { +fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<JsonValue, E> { // `alt` combines the each value parser. It returns the result of the first // successful parser, or an error alt(( @@ -60,7 +60,7 @@ fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static /// `tag(string)` generates a parser that recognizes the argument string. /// /// This also shows returning a sub-slice of the original input -fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { +fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { // This is a parser that returns `"null"` if it sees the string "null", and // an error otherwise "null".parse_next(input) @@ -68,7 +68,7 @@ fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, /// We can combine `tag` with other functions, like `value` which returns a given constant value on /// success. -fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, bool, E> { +fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> { // This is a parser that returns `true` if it sees the string "true", and // an error otherwise let parse_true = "true".value(true); @@ -82,9 +82,9 @@ fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<' /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote /// character, before the string (using `preceded`) and after the string (using `terminated`). -fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, String, E> { +fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<String, E> { preceded( '\"', // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to @@ -92,7 +92,7 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_many0(character, String::new, |mut string, c| { + fold_repeat(0.., character, String::new, |mut string, c| { string.push(c); string }), @@ -107,8 +107,8 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str> /// You can mix the above declarative parsing with an imperative style to handle more unique cases, /// like escaping -fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, char, E> { - let (input, c) = none_of("\"").parse_next(input)?; +fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { + let c = none_of('"').parse_next(input)?; if c == '\\' { alt(( any.verify_map(|c| { @@ -126,13 +126,11 @@ fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream )) .parse_next(input) } else { - Ok((input, c)) + Ok(c) } } -fn unicode_escape<'i, E: ParseError<Stream<'i>>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, char, E> { +fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> { alt(( // Not a surrogate u16_hex @@ -154,54 +152,60 @@ fn unicode_escape<'i, E: ParseError<Stream<'i>>>( .parse_next(input) } -fn u16_hex<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, u16, E> { +fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> { take(4usize) .verify_map(|s| u16::from_str_radix(s, 16).ok()) .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) -fn array<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, Vec<JsonValue>, E> { +fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) } -fn object<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, HashMap<String, JsonValue>, E> { +fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) } -fn key_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>( - input: Stream<'i>, -) -> IResult<Stream<'i>, (String, JsonValue), E> { +fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( + input: &mut Stream<'i>, +) -> PResult<(String, JsonValue), E> { separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input) } /// Parser combinators are constructed from the bottom up: /// first we write parsers for the smallest elements (here a space character), /// then we'll combine them in larger parsers -fn ws<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> { - // Combinators like `take_while0` return a function. That function is the +fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> { + // Combinators like `take_while` return a function. That function is the // parser,to which we can pass the input - take_while0(WS).parse_next(input) + take_while(0.., WS).parse_next(input) } -const WS: &str = " \t"; +const WS: &[char] = &[' ', '\t']; #[cfg(test)] mod test { @@ -211,20 +215,20 @@ mod test { #[allow(clippy::useless_attribute)] #[allow(dead_code)] // its dead for benches - type Error<'i> = winnow::error::Error<Partial<&'i str>>; + type Error<'i> = winnow::error::InputError<Partial<&'i str>>; #[test] fn json_string() { assert_eq!( - string::<Error<'_>>(Partial::new("\"\"")), + string::<Error<'_>>.parse_peek(Partial::new("\"\"")), Ok((Partial::new(""), "".to_string())) ); assert_eq!( - string::<Error<'_>>(Partial::new("\"abc\"")), + string::<Error<'_>>.parse_peek(Partial::new("\"abc\"")), Ok((Partial::new(""), "abc".to_string())) ); assert_eq!( - string::<Error<'_>>(Partial::new( + string::<Error<'_>>.parse_peek(Partial::new( "\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\"" )), Ok(( @@ -233,17 +237,29 @@ mod test { )), ); assert_eq!( - string::<Error<'_>>(Partial::new("\"\\uD83D\\uDE10\"")), + string::<Error<'_>>.parse_peek(Partial::new("\"\\uD83D\\uDE10\"")), Ok((Partial::new(""), "😐".to_string())) ); - assert!(string::<Error<'_>>(Partial::new("\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"abc")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\u123\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\uD800\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\uD800\\uD800\"")).is_err()); - assert!(string::<Error<'_>>(Partial::new("\"\\uDC00\"")).is_err()); + assert!(string::<Error<'_>>.parse_peek(Partial::new("\"")).is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"abc")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\u123\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\uD800\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\uD800\\uD800\"")) + .is_err()); + assert!(string::<Error<'_>> + .parse_peek(Partial::new("\"\\uDC00\"")) + .is_err()); } #[test] @@ -263,7 +279,7 @@ mod test { ); assert_eq!( - ndjson::<Error<'_>>(Partial::new(input)), + ndjson::<Error<'_>>.parse_peek(Partial::new(input)), Ok((Partial::new(""), Some(expected))) ); } @@ -278,7 +294,7 @@ mod test { let expected = Array(vec![Num(42.0), Str("x".to_string())]); assert_eq!( - ndjson::<Error<'_>>(Partial::new(input)), + ndjson::<Error<'_>>.parse_peek(Partial::new(input)), Ok((Partial::new(""), Some(expected))) ); } @@ -291,7 +307,7 @@ mod test { "#; assert_eq!( - ndjson::<Error<'_>>(Partial::new(input)), + ndjson::<Error<'_>>.parse_peek(Partial::new(input)), Ok(( Partial::new(""), Some(Object( diff --git a/vendor/winnow/examples/s_expression/parser.rs b/vendor/winnow/examples/s_expression/parser.rs index 9712c02..c445053 100644 --- a/vendor/winnow/examples/s_expression/parser.rs +++ b/vendor/winnow/examples/s_expression/parser.rs @@ -3,22 +3,24 @@ //! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees. use winnow::{ - branch::alt, - bytes::one_of, - character::{alpha1, digit1, multispace0, multispace1}, + ascii::{alpha1, digit1, multispace0, multispace1}, + combinator::alt, + combinator::repeat, combinator::{cut_err, opt}, - error::VerboseError, - multi::many0, - sequence::{delimited, preceded, terminated}, - IResult, Parser, + combinator::{delimited, preceded, terminated}, + error::ContextError, + error::StrContext, + prelude::*, + token::one_of, }; /// We start with a top-level function to tie everything together, letting /// us call eval on a string directly pub fn eval_from_str(src: &str) -> Result<Expr, String> { - parse_expr(src) - .map_err(|e: winnow::error::ErrMode<VerboseError<&str>>| format!("{:#?}", e)) - .and_then(|(_, exp)| eval_expression(exp).ok_or_else(|| "Eval failed".to_string())) + parse_expr + .parse(src) + .map_err(|e| e.to_string()) + .and_then(|exp| eval_expression(exp).ok_or_else(|| "Eval failed".to_string())) } /// For parsing, we start by defining the types that define the shape of data that we want. @@ -56,7 +58,7 @@ pub enum Atom { BuiltIn(BuiltIn), } -/// Now, the most basic type. We define some built-in functions that our lisp has +/// Now, the most basic type. We define some built-in functions that our lisp has #[derive(Debug, Eq, PartialEq, Clone, Copy)] pub enum BuiltIn { Plus, @@ -68,7 +70,7 @@ pub enum BuiltIn { } /// With types defined, we move onto the top-level expression parser! -fn parse_expr(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { +fn parse_expr(i: &mut &'_ str) -> PResult<Expr> { preceded( multispace0, alt((parse_constant, parse_application, parse_if, parse_quote)), @@ -77,13 +79,13 @@ fn parse_expr(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { } /// We then add the Expr layer on top -fn parse_constant(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { +fn parse_constant(i: &mut &'_ str) -> PResult<Expr> { parse_atom.map(Expr::Constant).parse_next(i) } /// Now we take all these simple parsers and connect them. /// We can now parse half of our language! -fn parse_atom(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { +fn parse_atom(i: &mut &'_ str) -> PResult<Atom> { alt(( parse_num, parse_bool, @@ -95,16 +97,16 @@ fn parse_atom(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { /// Next up is number parsing. We're keeping it simple here by accepting any number (> 1) /// of digits but ending the program if it doesn't fit into an i32. -fn parse_num(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { +fn parse_num(i: &mut &'_ str) -> PResult<Atom> { alt(( - digit1.map_res(|digit_str: &str| digit_str.parse::<i32>().map(Atom::Num)), + digit1.try_map(|digit_str: &str| digit_str.parse::<i32>().map(Atom::Num)), preceded("-", digit1).map(|digit_str: &str| Atom::Num(-digit_str.parse::<i32>().unwrap())), )) .parse_next(i) } /// Our boolean values are also constant, so we can do it the same way -fn parse_bool(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { +fn parse_bool(i: &mut &'_ str) -> PResult<Atom> { alt(( "#t".map(|_| Atom::Boolean(true)), "#f".map(|_| Atom::Boolean(false)), @@ -112,7 +114,7 @@ fn parse_bool(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { .parse_next(i) } -fn parse_builtin(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { +fn parse_builtin(i: &mut &'_ str) -> PResult<BuiltIn> { // alt gives us the result of first parser that succeeds, of the series of // parsers we give it alt(( @@ -126,23 +128,20 @@ fn parse_builtin(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { /// Continuing the trend of starting from the simplest piece and building up, /// we start by creating a parser for the built-in operator functions. -fn parse_builtin_op(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { +fn parse_builtin_op(i: &mut &'_ str) -> PResult<BuiltIn> { // one_of matches one of the characters we give it - let (i, t) = one_of("+-*/=").parse_next(i)?; + let t = one_of(['+', '-', '*', '/', '=']).parse_next(i)?; // because we are matching single character tokens, we can do the matching logic // on the returned value - Ok(( - i, - match t { - '+' => BuiltIn::Plus, - '-' => BuiltIn::Minus, - '*' => BuiltIn::Times, - '/' => BuiltIn::Divide, - '=' => BuiltIn::Equal, - _ => unreachable!(), - }, - )) + Ok(match t { + '+' => BuiltIn::Plus, + '-' => BuiltIn::Minus, + '*' => BuiltIn::Times, + '/' => BuiltIn::Divide, + '=' => BuiltIn::Equal, + _ => unreachable!(), + }) } /// The next easiest thing to parse are keywords. @@ -150,10 +149,10 @@ fn parse_builtin_op(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { /// and `cut_err` to prevent back-tracking. /// /// Put plainly: `preceded(":", cut_err(alpha1))` means that once we see the `:` -/// character, we have to see one or more alphabetic chararcters or the input is invalid. -fn parse_keyword(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { +/// character, we have to see one or more alphabetic characters or the input is invalid. +fn parse_keyword(i: &mut &'_ str) -> PResult<Atom> { preceded(":", cut_err(alpha1)) - .context("keyword") + .context(StrContext::Label("keyword")) .map(|sym_str: &str| Atom::Keyword(sym_str.to_string())) .parse_next(i) } @@ -167,9 +166,9 @@ fn parse_keyword(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { /// /// tuples are themselves a parser, used to sequence parsers together, so we can translate this /// directly and then map over it to transform the output into an `Expr::Application` -fn parse_application(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { - let application_inner = - (parse_expr, many0(parse_expr)).map(|(head, tail)| Expr::Application(Box::new(head), tail)); +fn parse_application(i: &mut &'_ str) -> PResult<Expr> { + let application_inner = (parse_expr, repeat(0.., parse_expr)) + .map(|(head, tail)| Expr::Application(Box::new(head), tail)); // finally, we wrap it in an s-expression s_exp(application_inner).parse_next(i) } @@ -180,7 +179,7 @@ fn parse_application(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { /// /// In fact, we define our parser as if `Expr::If` was defined with an Option in it, /// we have the `opt` combinator which fits very nicely here. -fn parse_if(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { +fn parse_if(i: &mut &'_ str) -> PResult<Expr> { let if_inner = preceded( // here to avoid ambiguity with other names starting with `if`, if we added // variables to our language, we say that if must be terminated by at least @@ -199,7 +198,7 @@ fn parse_if(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { Expr::If(Box::new(pred), Box::new(true_branch)) } }) - .context("if expression"); + .context(StrContext::Label("if expression")); s_exp(if_inner).parse_next(i) } @@ -208,12 +207,12 @@ fn parse_if(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { /// This example doesn't have the symbol atom, but by adding variables and changing /// the definition of quote to not always be around an S-expression, we'd get them /// naturally. -fn parse_quote(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { +fn parse_quote(i: &mut &'_ str) -> PResult<Expr> { // this should look very straight-forward after all we've done: // we find the `'` (quote) character, use cut_err to say that we're unambiguously // looking for an s-expression of 0 or more expressions, and then parse them - preceded("'", cut_err(s_exp(many0(parse_expr)))) - .context("quote") + preceded("'", cut_err(s_exp(repeat(0.., parse_expr)))) + .context(StrContext::Label("quote")) .map(Expr::Quote) .parse_next(i) } @@ -225,14 +224,14 @@ fn parse_quote(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { //.parse_next/ /// Unlike the previous functions, this function doesn't take or consume input, instead it /// takes a parsing function and returns a new parsing function. -fn s_exp<'a, O1, F>(inner: F) -> impl Parser<&'a str, O1, VerboseError<&'a str>> +fn s_exp<'a, O1, F>(inner: F) -> impl Parser<&'a str, O1, ContextError> where - F: Parser<&'a str, O1, VerboseError<&'a str>>, + F: Parser<&'a str, O1, ContextError>, { delimited( '(', preceded(multispace0, inner), - cut_err(preceded(multispace0, ')')).context("closing paren"), + cut_err(preceded(multispace0, ')')).context(StrContext::Label("closing paren")), ) } diff --git a/vendor/winnow/examples/string/parser.rs b/vendor/winnow/examples/string/parser.rs index a57dd04..01de737 100644 --- a/vendor/winnow/examples/string/parser.rs +++ b/vendor/winnow/examples/string/parser.rs @@ -9,23 +9,24 @@ //! - an escape followed by whitespace consumes all whitespace between the //! escape and the next non-whitespace character -use winnow::branch::alt; -use winnow::bytes::{take_till1, take_while_m_n}; -use winnow::character::multispace1; -use winnow::error::{FromExternalError, ParseError}; -use winnow::multi::fold_many0; +use winnow::ascii::multispace1; +use winnow::combinator::alt; +use winnow::combinator::fold_repeat; +use winnow::combinator::{delimited, preceded}; +use winnow::error::{FromExternalError, ParserError}; use winnow::prelude::*; -use winnow::sequence::{delimited, preceded}; +use winnow::token::{take_till, take_while}; /// Parse a string. Use a loop of `parse_fragment` and push all of the fragments /// into an output string. -pub fn parse_string<'a, E>(input: &'a str) -> IResult<&'a str, String, E> +pub fn parse_string<'a, E>(input: &mut &'a str) -> PResult<String, E> where - E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, + E: ParserError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, { - // fold_many0 is the equivalent of iterator::fold. It runs a parser in a loop, + // fold_repeat is the equivalent of iterator::fold. It runs a parser in a loop, // and for each output value, calls a folding function on each output value. - let build_string = fold_many0( + let build_string = fold_repeat( + 0.., // Our parser function – parses a single string fragment parse_fragment, // Our init value, an empty string @@ -44,7 +45,7 @@ where // Finally, parse the string. Note that, if `build_string` could accept a raw // " character, the closing delimiter " would never match. When using - // `delimited` with a looping parser (like fold_many0), be sure that the + // `delimited` with a looping parser (like fold_repeat), be sure that the // loop won't accidentally match your closing delimiter! delimited('"', build_string, '"').parse_next(input) } @@ -61,9 +62,9 @@ enum StringFragment<'a> { /// Combine `parse_literal`, `parse_escaped_whitespace`, and `parse_escaped_char` /// into a `StringFragment`. -fn parse_fragment<'a, E>(input: &'a str) -> IResult<&'a str, StringFragment<'a>, E> +fn parse_fragment<'a, E>(input: &mut &'a str) -> PResult<StringFragment<'a>, E> where - E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, + E: ParserError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, { alt(( // The `map` combinator runs a parser, then applies a function to the output @@ -76,14 +77,14 @@ where } /// Parse a non-empty block of text that doesn't include \ or " -fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> { - // `take_till1` parses a string of 0 or more characters that aren't one of the +fn parse_literal<'a, E: ParserError<&'a str>>(input: &mut &'a str) -> PResult<&'a str, E> { + // `take_till` parses a string of 0 or more characters that aren't one of the // given characters. - let not_quote_slash = take_till1("\"\\"); + let not_quote_slash = take_till(1.., ['"', '\\']); // `verify` runs a parser, then runs a verification function on the output of // the parser. The verification function accepts the output only if it - // returns true. In this case, we want to ensure that the output of take_till1 + // returns true. In this case, we want to ensure that the output of take_till // is non-empty. not_quote_slash .verify(|s: &str| !s.is_empty()) @@ -95,9 +96,9 @@ fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, // then combine them into larger parsers. /// Parse an escaped character: \n, \t, \r, \u{00AC}, etc. -fn parse_escaped_char<'a, E>(input: &'a str) -> IResult<&'a str, char, E> +fn parse_escaped_char<'a, E>(input: &mut &'a str) -> PResult<char, E> where - E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, + E: ParserError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, { preceded( '\\', @@ -125,13 +126,13 @@ where /// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6 /// hexadecimal numerals. We will combine this later with `parse_escaped_char` /// to parse sequences like \u{00AC}. -fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E> +fn parse_unicode<'a, E>(input: &mut &'a str) -> PResult<char, E> where - E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, + E: ParserError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, { - // `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match + // `take_while` parses between `m` and `n` bytes (inclusive) that match // a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals. - let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()); + let parse_hex = take_while(1..=6, |c: char| c.is_ascii_hexdigit()); // `preceded` takes a prefix parser, and if it succeeds, returns the result // of the body parser. In this case, it parses u{XXXX}. @@ -143,12 +144,12 @@ where delimited('{', parse_hex, '}'), ); - // `map_res` takes the result of a parser and applies a function that returns + // `try_map` takes the result of a parser and applies a function that returns // a Result. In this case we take the hex bytes from parse_hex and attempt to // convert them to a u32. - let parse_u32 = parse_delimited_hex.map_res(move |hex| u32::from_str_radix(hex, 16)); + let parse_u32 = parse_delimited_hex.try_map(move |hex| u32::from_str_radix(hex, 16)); - // verify_map is like map_res, but it takes an Option instead of a Result. If + // verify_map is like try_map, but it takes an Option instead of a Result. If // the function returns None, verify_map returns an error. In this case, because // not all u32 values are valid unicode code points, we have to fallibly // convert to char with from_u32. @@ -157,8 +158,8 @@ where /// Parse a backslash, followed by any amount of whitespace. This is used later /// to discard any escaped whitespace. -fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>( - input: &'a str, -) -> IResult<&'a str, &'a str, E> { +fn parse_escaped_whitespace<'a, E: ParserError<&'a str>>( + input: &mut &'a str, +) -> PResult<&'a str, E> { preceded('\\', multispace1).parse_next(input) } diff --git a/vendor/winnow/src/_topic/arithmetic.rs b/vendor/winnow/src/_topic/arithmetic.rs index 1a6eddc..d94b4fa 100644 --- a/vendor/winnow/src/_topic/arithmetic.rs +++ b/vendor/winnow/src/_topic/arithmetic.rs @@ -11,3 +11,9 @@ //! ```rust #![doc = include_str!("../../examples/arithmetic/parser_ast.rs")] //! ``` +//! +//! ## Parse to Tokens then AST +//! +//! ```rust +#![doc = include_str!("../../examples/arithmetic/parser_lexer.rs")] +//! ``` diff --git a/vendor/winnow/src/_topic/error.rs b/vendor/winnow/src/_topic/error.rs index abf2f8d..c5374b4 100644 --- a/vendor/winnow/src/_topic/error.rs +++ b/vendor/winnow/src/_topic/error.rs @@ -1,9 +1,9 @@ //! # Custom Errors //! -//! The most basic error type is [`ParseError`][crate::error::ParseError] +//! The most basic error type is [`ParserError`][crate::error::ParserError] //! //! Optional traits include: -//! - [`ContextError`][crate::error::ContextError] +//! - [`AddContext`][crate::error::AddContext] //! - [`FromExternalError`][crate::error::FromExternalError] //! //! # Example diff --git a/vendor/winnow/src/_topic/language.rs b/vendor/winnow/src/_topic/language.rs index d257c0b..0cebc99 100644 --- a/vendor/winnow/src/_topic/language.rs +++ b/vendor/winnow/src/_topic/language.rs @@ -27,14 +27,14 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! error::ParseError, -//! sequence::delimited, -//! character::multispace0, +//! error::ParserError, +//! combinator::delimited, +//! ascii::multispace0, //! }; //! //! /// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and //! /// trailing whitespace, returning the output of `inner`. -//! fn ws<'a, F, O, E: ParseError<&'a str>>(inner: F) -> impl Parser<&'a str, O, E> +//! fn ws<'a, F, O, E: ParserError<&'a str>>(inner: F) -> impl Parser<&'a str, O, E> //! where //! F: Parser<&'a str, O, E>, //! { @@ -61,13 +61,13 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! error::ParseError, -//! bytes::take_till1, +//! error::ParserError, +//! token::take_till1, //! }; //! -//! pub fn peol_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E> +//! pub fn peol_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<(), E> //! { -//! ('%', take_till1("\n\r")) +//! ('%', take_till1(['\n', '\r'])) //! .void() // Output is thrown away. //! .parse_next(i) //! } @@ -81,11 +81,11 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! error::ParseError, -//! bytes::{tag, take_until0}, +//! error::ParserError, +//! token::{tag, take_until0}, //! }; //! -//! pub fn pinline_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E> { +//! pub fn pinline_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<(), E> { //! ( //! "(*", //! take_until0("*)"), @@ -107,14 +107,14 @@ //! use winnow::prelude::*; //! use winnow::{ //! stream::AsChar, -//! bytes::take_while0, -//! bytes::one_of, +//! token::take_while, +//! token::one_of, //! }; //! -//! pub fn identifier(input: &str) -> IResult<&str, &str> { +//! pub fn identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { //! ( //! one_of(|c: char| c.is_alpha() || c == '_'), -//! take_while0(|c: char| c.is_alphanum() || c == '_') +//! take_while(0.., |c: char| c.is_alphanum() || c == '_') //! ) //! .recognize() //! .parse_next(input) @@ -122,8 +122,8 @@ //! ``` //! //! Let's say we apply this to the identifier `hello_world123abc`. The first element of the tuple -//! would uses [`one_of`][crate::bytes::one_of] which would recognize `h`. The tuple ensures that -//! `ello_world123abc` will be piped to the next [`take_while0`][crate::bytes::take_while0] parser, +//! would uses [`one_of`][crate::token::one_of] which would recognize `h`. The tuple ensures that +//! `ello_world123abc` will be piped to the next [`take_while`][crate::token::take_while] parser, //! which recognizes every remaining character. However, the tuple returns a tuple of the results //! of its sub-parsers. The [`recognize`][crate::Parser::recognize] parser produces a `&str` of the //! input text that was parsed, which in this case is the entire `&str` `hello_world123abc`. @@ -136,6 +136,8 @@ #![doc = include_str!("../../examples/string/parser.rs")] //! ``` //! +//! See also [`escaped`] and [`escaped_transform`]. +//! //! ### Integers //! //! The following recipes all return string slices rather than integer values. How to obtain an @@ -146,9 +148,6 @@ //! string slice to an integer value is slightly simpler. You can also strip the `_` from the string //! slice that is returned, which is demonstrated in the second hexadecimal number parser. //! -//! If you wish to limit the number of digits in a valid integer literal, replace `many1` with -//! `many_m_n` in the recipes. -//! //! #### Hexadecimal //! //! The parser outputs the string slice of the digits without the leading `0x`/`0X`. @@ -156,18 +155,18 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! branch::alt, -//! multi::{many0, many1}, -//! sequence::{preceded, terminated}, -//! bytes::one_of, -//! bytes::tag, +//! combinator::alt, +//! combinator::{repeat}, +//! combinator::{preceded, terminated}, +//! token::one_of, +//! token::tag, //! }; //! -//! fn hexadecimal(input: &str) -> IResult<&str, &str> { // <'a, E: ParseError<&'a str>> +//! fn hexadecimal<'s>(input: &mut &'s str) -> PResult<&'s str> { // <'a, E: ParserError<&'a str>> //! preceded( //! alt(("0x", "0X")), -//! many1( -//! terminated(one_of("0123456789abcdefABCDEF"), many0('_').map(|()| ())) +//! repeat(1.., +//! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ())) //! ).map(|()| ()).recognize() //! ).parse_next(input) //! } @@ -178,42 +177,44 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! branch::alt, -//! multi::{many0, many1}, -//! sequence::{preceded, terminated}, -//! bytes::one_of, -//! bytes::tag, +//! combinator::alt, +//! combinator::{repeat}, +//! combinator::{preceded, terminated}, +//! token::one_of, +//! token::tag, //! }; //! -//! fn hexadecimal_value(input: &str) -> IResult<&str, i64> { +//! fn hexadecimal_value(input: &mut &str) -> PResult<i64> { //! preceded( //! alt(("0x", "0X")), -//! many1( -//! terminated(one_of("0123456789abcdefABCDEF"), many0('_').map(|()| ())) +//! repeat(1.., +//! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ())) //! ).map(|()| ()).recognize() -//! ).map_res( +//! ).try_map( //! |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16) //! ).parse_next(input) //! } //! ``` //! +//! See also [`hex_uint`] +//! //! #### Octal //! //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! branch::alt, -//! multi::{many0, many1}, -//! sequence::{preceded, terminated}, -//! bytes::one_of, -//! bytes::tag, +//! combinator::alt, +//! combinator::{repeat}, +//! combinator::{preceded, terminated}, +//! token::one_of, +//! token::tag, //! }; //! -//! fn octal(input: &str) -> IResult<&str, &str> { +//! fn octal<'s>(input: &mut &'s str) -> PResult<&'s str> { //! preceded( //! alt(("0o", "0O")), -//! many1( -//! terminated(one_of("01234567"), many0('_').map(|()| ())) +//! repeat(1.., +//! terminated(one_of('0'..='7'), repeat(0.., '_').map(|()| ())) //! ).map(|()| ()).recognize() //! ).parse_next(input) //! } @@ -224,18 +225,18 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! branch::alt, -//! multi::{many0, many1}, -//! sequence::{preceded, terminated}, -//! bytes::one_of, -//! bytes::tag, +//! combinator::alt, +//! combinator::{repeat}, +//! combinator::{preceded, terminated}, +//! token::one_of, +//! token::tag, //! }; //! -//! fn binary(input: &str) -> IResult<&str, &str> { +//! fn binary<'s>(input: &mut &'s str) -> PResult<&'s str> { //! preceded( //! alt(("0b", "0B")), -//! many1( -//! terminated(one_of("01"), many0('_').map(|()| ())) +//! repeat(1.., +//! terminated(one_of('0'..='1'), repeat(0.., '_').map(|()| ())) //! ).map(|()| ()).recognize() //! ).parse_next(input) //! } @@ -246,21 +247,22 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! IResult, -//! multi::{many0, many1}, -//! sequence::terminated, -//! bytes::one_of, +//! combinator::{repeat}, +//! combinator::terminated, +//! token::one_of, //! }; //! -//! fn decimal(input: &str) -> IResult<&str, &str> { -//! many1( -//! terminated(one_of("0123456789"), many0('_').map(|()| ())) +//! fn decimal<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! repeat(1.., +//! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ())) //! ).map(|()| ()) //! .recognize() //! .parse_next(input) //! } //! ``` //! +//! See also [`dec_uint`] and [`dec_int`] +//! //! ### Floating Point Numbers //! //! The following is adapted from [the Python parser by Valentin Lorentz](https://github.com/ProgVal/rust-python-parser/blob/master/src/numbers.rs). @@ -268,22 +270,22 @@ //! ```rust //! use winnow::prelude::*; //! use winnow::{ -//! branch::alt, -//! multi::{many0, many1}, +//! combinator::alt, +//! combinator::{repeat}, //! combinator::opt, -//! sequence::{preceded, terminated}, -//! bytes::one_of, +//! combinator::{preceded, terminated}, +//! token::one_of, //! }; //! -//! fn float(input: &str) -> IResult<&str, &str> { +//! fn float<'s>(input: &mut &'s str) -> PResult<&'s str> { //! alt(( //! // Case one: .42 //! ( //! '.', //! decimal, //! opt(( -//! one_of("eE"), -//! opt(one_of("+-")), +//! one_of(['e', 'E']), +//! opt(one_of(['+', '-'])), //! decimal //! )) //! ).recognize() @@ -294,8 +296,8 @@ //! '.', //! decimal, //! )), -//! one_of("eE"), -//! opt(one_of("+-")), +//! one_of(['e', 'E']), +//! opt(one_of(['+', '-'])), //! decimal //! ).recognize() //! , // Case three: 42. and 42.42 @@ -307,12 +309,22 @@ //! )).parse_next(input) //! } //! -//! fn decimal(input: &str) -> IResult<&str, &str> { -//! many1( -//! terminated(one_of("0123456789"), many0('_').map(|()| ())) +//! fn decimal<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! repeat(1.., +//! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ())) //! ). //! map(|()| ()) //! .recognize() //! .parse_next(input) //! } //! ``` +//! +//! See also [`float`] + +#![allow(unused_imports)] +use crate::ascii::dec_int; +use crate::ascii::dec_uint; +use crate::ascii::escaped; +use crate::ascii::escaped_transform; +use crate::ascii::float; +use crate::ascii::hex_uint; diff --git a/vendor/winnow/src/_topic/mod.rs b/vendor/winnow/src/_topic/mod.rs index 7668727..79895ce 100644 --- a/vendor/winnow/src/_topic/mod.rs +++ b/vendor/winnow/src/_topic/mod.rs @@ -3,6 +3,7 @@ //! These are short recipes for accomplishing common tasks. //! //! - [Why `winnow`?][why] +//! - [Migrating from `nom`][nom] //! - Formats: //! - [Elements of Programming Languages][language] //! - [Arithmetic][arithmetic] @@ -12,13 +13,16 @@ //! - [HTTP][http] //! - Special Topics: //! - [Implementing `FromStr`][fromstr] +//! - [Performance][performance] //! - [Parsing Partial Input][partial] -//! - [Custom stream][stream] +//! - [Custom stream or token][stream] //! - [Custom errors][error] //! //! See also parsers written with `winnow`: //! //! - [`toml_edit`](https://crates.io/crates/toml_edit) +//! - [`hcl-edit`](https://crates.io/crates/hcl-edit) +#![allow(clippy::std_instead_of_core)] pub mod arithmetic; pub mod error; @@ -27,7 +31,9 @@ pub mod http; pub mod ini; pub mod json; pub mod language; +pub mod nom; pub mod partial; +pub mod performance; pub mod s_expression; pub mod stream; pub mod why; diff --git a/vendor/winnow/src/_topic/nom.rs b/vendor/winnow/src/_topic/nom.rs new file mode 100644 index 0000000..f3cb74e --- /dev/null +++ b/vendor/winnow/src/_topic/nom.rs @@ -0,0 +1,49 @@ +//! # Migrating from `nom` +//! +//! For comparisons with `nom`, see +//! - [Why `winnow`][super::why] +//! - [parse-rosetta-rs](https://github.com/rosetta-rs/parse-rosetta-rs/) +//! +//! What approach you take depends on the size and complexity of your parser. +//! For small, simple parsers, its likely easiest to directly port from `nom`. +//! When trying to look for the equivalent of a `nom` combinator, search in the docs for the name +//! of the `nom` combinator. It is expected that, where names diverge, a doc alias exists. +//! See also the [List of combinators][crate::combinator]. +//! +//! For larger parsers, it is likely best to take smaller steps +//! - Easier to debug when something goes wrong +//! - Deprecation messages will help assist through the process +//! +//! The workflow goes something like: +//! 1. Run `cargo rm nom && cargo add winnow@0.3` +//! 1. Ensure everything compiles and tests pass, ignoring deprecation messages (see [migration +//! notes](https://github.com/winnow-rs/winnow/blob/v0.3-main/CHANGELOG.md#nom-migration-guide)) +//! 1. Commit +//! 1. Switch any `impl FnMut(I) -> IResult<I, O, E>` to `impl Parser<I, O, E>` +//! 1. Resolve deprecation messages +//! 1. Commit +//! 1. Run `cargo add winnow@0.4` +//! 1. Ensure everything compiles and tests pass, ignoring deprecation messages (see [changelog](https://github.com/winnow-rs/winnow/blob/v0.4-main/CHANGELOG.md#compatibility-2) for more details) +//! 1. Commit +//! 1. Resolve deprecation messages +//! 1. Commit +//! 1. Run `cargo add winnow@0.5` +//! 1. Ensure everything compiles and tests pass, ignoring deprecation messages (see [migration +//! notes](https://github.com/winnow-rs/winnow/blob/v0.5.0/CHANGELOG.md)) +//! 1. Commit +//! 1. Resolve deprecation messagess +//! 1. Commit +//! +//! For example migrations, see +//! - [git-config-env](https://github.com/gitext-rs/git-config-env/pull/11) (nom to winnow 0.3) +//! - [git-conventional](https://github.com/crate-ci/git-conventional/pull/37) (nom to winnow 0.3, +//! adds explicit tracing for easier debugging) +//! - [typos](https://github.com/crate-ci/typos/pull/664) (nom to winnow 0.3) +//! - [cargo-smart-release](https://github.com/Byron/gitoxide/pull/948) (gradual migration from nom +//! to winnow 0.5) +//! - [gix-config](https://github.com/Byron/gitoxide/pull/951) (gradual migration from nom +//! to winnow 0.5) +//! - [gix-protocol](https://github.com/Byron/gitoxide/pull/1009) (gradual migration from nom +//! to winnow 0.5) +//! - [gitoxide](https://github.com/Byron/gitoxide/pull/956) (gradual migration from nom +//! to winnow 0.5) diff --git a/vendor/winnow/src/_topic/partial.rs b/vendor/winnow/src/_topic/partial.rs index 263d7f1..580a004 100644 --- a/vendor/winnow/src/_topic/partial.rs +++ b/vendor/winnow/src/_topic/partial.rs @@ -1,15 +1,15 @@ //! # Parsing Partial Input //! -//! Typically, the input being parsed is all in-memory, or is complete. Some data sources are too +//! Typically, the input being parsed is all in-memory, or is complete. Some data sources are too //! large to fit into memory, only allowing parsing an incomplete or [`Partial`] subset of the //! data, requiring incrementally parsing. //! //! By wrapping a stream, like `&[u8]`, with [`Partial`], parsers will report when the data is //! [`Incomplete`] and more input is [`Needed`], allowing the caller to stream-in additional data -//! to be parsed. The data is then parsed a chunk at a time. +//! to be parsed. The data is then parsed a chunk at a time. //! //! Chunks are typically defined by either: -//! - A header reporting the number of bytes, like with [`length_value`] +//! - A header reporting the number of bytes, like with [`length_and_then`] //! - [`Partial`] can explicitly be changed to being complete once the specified bytes are //! acquired via [`StreamIsPartial::complete`]. //! - A delimiter, like with [ndjson](http://ndjson.org/) @@ -19,9 +19,9 @@ //! parser is for the next chunk. //! //! Caveats: -//! - `winnow` takes the approach of re-parsing from scratch. Chunks should be relatively small to +//! - `winnow` takes the approach of re-parsing from scratch. Chunks should be relatively small to //! prevent the re-parsing overhead from dominating. -//! - Parsers like [`many0`] do not know when an `eof` is from insufficient data or the end of the +//! - Parsers like [`repeat`] do not know when an `eof` is from insufficient data or the end of the //! stream, causing them to always report [`Incomplete`]. //! //! # Example @@ -38,9 +38,9 @@ #![allow(unused_imports)] // Used for intra-doc links +use crate::binary::length_and_then; +use crate::combinator::repeat; use crate::error::ErrMode::Incomplete; use crate::error::Needed; -use crate::multi::length_value; -use crate::multi::many0; use crate::stream::Partial; use crate::stream::StreamIsPartial; diff --git a/vendor/winnow/src/_topic/performance.rs b/vendor/winnow/src/_topic/performance.rs new file mode 100644 index 0000000..2b53038 --- /dev/null +++ b/vendor/winnow/src/_topic/performance.rs @@ -0,0 +1,55 @@ +//! # Performance +//! +//! ## Runtime Performance +//! +//! See also the general Rust [Performance Book](https://nnethercote.github.io/perf-book/) +//! +//! Tips +//! - Try `cargo add winnow -F simd`. For some it offers significant performance improvements +//! - When enough cases of an [`alt`] have unique prefixes, prefer [`dispatch`] +//! - When parsing text, try to parse as bytes (`u8`) rather than `char`s ([`BStr`] can make +//! debugging easier) +//! - Find simplified subsets of the grammar to parse, falling back to the full grammar when it +//! doesn't work. For example, when parsing json strings, parse them without support for escapes, +//! falling back to escape support if it fails. +//! - Watch for large return types. A surprising place these can show up is when chaining parsers +//! with a tuple. +//! +//! ## Build-time Performance +//! +//! Returning complex types as `impl Trait` can negatively impact build times. This can hit in +//! surprising cases like: +//! ```rust +//! # use winnow::prelude::*; +//! fn foo<I, O, E>() -> impl Parser<I, O, E> +//! # where +//! # I: winnow::stream::Stream<Token=O>, +//! # I: winnow::stream::StreamIsPartial, +//! # E: winnow::error::ParserError<I>, +//! { +//! // ...some chained combinators... +//! # winnow::token::any +//! } +//! ``` +//! +//! Instead, wrap the combinators in a closure to simplify the type: +//! ```rust +//! # use winnow::prelude::*; +//! fn foo<I, O, E>() -> impl Parser<I, O, E> +//! # where +//! # I: winnow::stream::Stream<Token=O>, +//! # I: winnow::stream::StreamIsPartial, +//! # E: winnow::error::ParserError<I>, +//! { +//! move |input: &mut I| { +//! // ...some chained combinators... +//! # winnow::token::any +//! .parse_next(input) +//! } +//! } +//! ``` + +#![allow(unused_imports)] +use crate::combinator::alt; +use crate::combinator::dispatch; +use crate::stream::BStr; diff --git a/vendor/winnow/src/_topic/stream.rs b/vendor/winnow/src/_topic/stream.rs index 92841ed..2254f87 100644 --- a/vendor/winnow/src/_topic/stream.rs +++ b/vendor/winnow/src/_topic/stream.rs @@ -1,31 +1,32 @@ -//! # Custom [`Stream`][crate::stream::Stream] +//! # Custom [`Stream`] //! //! `winnow` is batteries included with support for //! - Basic inputs like `&str`, newtypes with -//! - Improved debug output like [`Bytes`][crate::Bytes] -//! - [`Stateful`][crate::Stateful] for passing state through your parser, like tracking recursion +//! - Improved debug output like [`Bytes`] +//! - [`Stateful`] for passing state through your parser, like tracking recursion //! depth -//! - [`Located`][crate::Located] for looking up the absolute position of a token +//! - [`Located`] for looking up the absolute position of a token //! -//! But that won't always cut it for your parser. For example, you might lex `&str` into +//! But that won't always cut it for your parser. For example, you might lex `&str` into //! a series of tokens and then want to parse a `TokenStream`. //! //! ## Implementing a custom stream //! -//! Let's assume we have an input type we'll call `MyStream`. `MyStream` is a sequence of `MyItem` type. -//! The goal is to define parsers with this signature: `MyStream -> IResult<MyStream, Output>`. +//! Let's assume we have an input type we'll call `MyStream`. +//! `MyStream` is a sequence of `MyItem` type. //! +//! The goal is to define parsers with this signature: `&mut MyStream -> PResult<Output>`. //! ```rust //! # use winnow::prelude::*; -//! # use winnow::bytes::tag; +//! # use winnow::token::tag; //! # type MyStream<'i> = &'i str; //! # type Output<'i> = &'i str; -//! fn parser(i: MyStream<'_>) -> IResult<MyStream<'_>, Output<'_>> { +//! fn parser<'s>(i: &mut MyStream<'s>) -> PResult<Output<'s>> { //! "test".parse_next(i) //! } //! ``` //! -//! Here are the traits we have to implement for `MyStream`: +//! Here are the traits you may have to implement for `MyStream`: //! //! | trait | usage | //! |---|---| @@ -38,17 +39,28 @@ //! | [`Location`] |Calculate location within initial input| //! | [`Offset`] |Calculate the offset between slices| //! -//! Here are the traits we have to implement for `MyItem`: +//! And for `MyItem`: //! //! | trait | usage | //! |---|---| //! | [`AsChar`] |Transforms common types to a char for basic token parsing| //! | [`ContainsToken`] |Look for the token in the given set| //! -//! And traits for slices of `MyItem`: +//! And traits for `&[MyItem]`: //! +//! | trait | usage | +//! |---|---| //! | [`SliceLen`] |Calculate the input length| //! | [`ParseSlice`] |Used to integrate `&str`'s `parse()` method| +//! +//! ## Implementing a custom token +//! +//! If you are parsing `&[Myitem]`, leaving just the `MyItem` traits. +//! +//! For example: +//! ```rust +#![doc = include_str!("../../examples/arithmetic/parser_lexer.rs")] +//! ``` #[allow(unused_imports)] // Here for intra-dock links use crate::stream::*; diff --git a/vendor/winnow/src/_topic/why.rs b/vendor/winnow/src/_topic/why.rs index e28181b..fc1716a 100644 --- a/vendor/winnow/src/_topic/why.rs +++ b/vendor/winnow/src/_topic/why.rs @@ -2,7 +2,7 @@ //! //! To answer this question, it will be useful to contrast this with other approaches to parsing. //! -//! **Note:** This will focus on principles and priorities. For a deeper and wider wider +//! **Note:** This will focus on principles and priorities. For a deeper and wider wider //! comparison with other Rust parser libraries, see //! [parse-rosetta-rs](https://github.com/rosetta-rs/parse-rosetta-rs). //! @@ -16,7 +16,7 @@ //! - Fewer dependencies to audit //! //! However, this comes at the cost of doing it all yourself, including -//! - Optimizing for each of the above characteristics you are about +//! - Optimizing for each of the above characteristics you care about //! - Ensuring the safety of any `unsafe` code (buffer overflows being a common bug with parsers) //! - Being aware of, familiar with, and correctly implement the relevant algorithms. //! matklad, who has written two rust compile frontends, commented @@ -42,7 +42,7 @@ //! //! For binary formats, `winnow` includes: //! - [A hexadecimal view][crate::Bytes] in [traces][crate::trace] -//! - [TLV](https://en.wikipedia.org/wiki/Type-length-value) +//! - [TLV](https://en.wikipedia.org/wiki/Type-length-value) (e.g. [`length_take`]) //! - Some common parsers to help get started, like numbers //! //! For text formats, `winnow` includes: @@ -52,13 +52,13 @@ //! //! This works well for: //! - Prototyping for what will be a hand-written parser -//! - When you to minimize the work to evolve your format +//! - When you want to minimize the work to evolve your format //! - When you don't have contributors focused solely on parsing and your grammar is large enough //! to be unwieldy to hand write. //! //! ## `nom` //! -//! `winnow` is a fork of the venerable [`nom`](https://crates.io/crates/nom). The difference +//! `winnow` is a fork of the venerable [`nom`](https://crates.io/crates/nom). The difference //! between them is largely in priorities. `nom` prioritizes: //! - Lower churn for existing users while `winnow` is trying to find ways to make things better //! for the parsers yet to be written. @@ -68,6 +68,8 @@ //! and to not block users on new features being merged while `winnow` aims to include all the //! fundamentals for parsing to ensure the experience is cohesive and high quality. //! +//! See also our [nom migration guide][super::nom] +//! //! ## `chumsky` //! //! [`chumsky`](https://crates.io/crates/chumsky) is an up and coming parser-combinator library @@ -77,11 +79,11 @@ //! //! > "If you need to implement either `Parser` or `Strategy` by hand, that's a problem that needs fixing". //! -//! This is under "batteries included" but it also ties into the feeling that `chumksy` acts more like -//! a framework. Instead of composing together helpers, you are expected to do everything through +//! This is under "batteries included" but it also ties into the feeling that `chumsky` acts more like +//! a framework. Instead of composing together helpers, you are expected to do everything through //! their system to the point that it is non-trivial to implement their `Parser` trait and are //! encouraged to use the -//! [`custom`](https://docs.rs/chumsky/0.9.0/chumsky/primitive/fn.custom.html) helper. This +//! [`custom`](https://docs.rs/chumsky/0.9.0/chumsky/primitive/fn.custom.html) helper. This //! requires re-framing everything to fit within their model and makes the code harder to understand //! and debug as you are working with abstract operations that will eventually be applied //! rather than directly with the parsers. @@ -90,9 +92,10 @@ //! Probably the biggest thing that `winnow` loses out on is optimizations from ["parse modes" via //! GATs](https://github.com/zesterer/chumsky/pull/82) which allows downstream parsers to tell //! upstream parsers when information will be discarded, allowing bypassing expensive operations, -//! like allocations. This requires a lot more complex interaction with parsers that isn't as +//! like allocations. This requires a lot more complex interaction with parsers that isn't as //! trivial to do with bare functions which would lose out on any of that side-band information. //! Instead, we work around this with things like the [`Accumulate`] trait. #![allow(unused_imports)] +use crate::binary::length_take; use crate::stream::Accumulate; diff --git a/vendor/winnow/src/_tutorial/chapter_0.rs b/vendor/winnow/src/_tutorial/chapter_0.rs index 4c768d8..35a2d14 100644 --- a/vendor/winnow/src/_tutorial/chapter_0.rs +++ b/vendor/winnow/src/_tutorial/chapter_0.rs @@ -10,7 +10,7 @@ //! ## About //! //! `winnow` is a parser-combinator library. In other words, it gives you tools to define: -//! - "parsers", or functions that takes an input and gives back an output +//! - "parsers", or functions that take an input and give back an output //! - "combinators", or functions that take parsers and _combine_ them together! //! //! While "combinator" might be an unfamiliar word, you are likely using them in your rust code @@ -36,3 +36,4 @@ use crate::_topic; use std::iter::Iterator; pub use super::chapter_1 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/chapter_1.rs b/vendor/winnow/src/_tutorial/chapter_1.rs index d6a45c8..2d94418 100644 --- a/vendor/winnow/src/_tutorial/chapter_1.rs +++ b/vendor/winnow/src/_tutorial/chapter_1.rs @@ -9,81 +9,78 @@ //! - `Ok` indicates the parser successfully found what it was looking for; or //! - `Err` indicates the parser could not find what it was looking for. //! -//! Parsers do more than just return a binary "success"/"failure" code. If -//! the parser was successful, then it will return a tuple where the first field -//! will contain everything the parser did not process. The second will contain -//! everything the parser processed. The idea is that a parser can happily parse the first -//! *part* of an input, without being able to parse the whole thing. +//! Parsers do more than just return a binary "success"/"failure" code. +//! On success, the parser will return the processed data. The input will be left pointing to +//! data that still needs processing //! //! If the parser failed, then there are multiple errors that could be returned. //! For simplicity, however, in the next chapters we will leave these unexplored. //! //! ```text -//! ┌─► Ok( -//! │ what the parser didn't touch, -//! │ what matched the parser -//! │ ) -//! ┌─────────┐ │ -//! my input───►│my parser├──►either──┤ -//! └─────────┘ └─► Err(...) +//! ┌─► Ok(what matched the parser) +//! ┌─────────┐ │ +//! my input───►│my parser├──►either──┤ +//! └─────────┘ └─► Err(...) //! ``` //! //! -//! To represent this model of the world, winnow uses the [`IResult<I, O>`] type. -//! The `Ok` variant has a tuple of `(remainder: I, output: O)`; +//! To represent this model of the world, winnow uses the [`PResult<O>`] type. +//! The `Ok` variant has `output: O`; //! whereas the `Err` variant stores an error. //! //! You can import that from: //! //! ```rust -//! use winnow::IResult; +//! use winnow::PResult; //! ``` //! +//! To combine parsers, we need a common way to refer to them which is where the [`Parser<I, O, E>`] +//! trait comes in with [`Parser::parse_next`] being the primary way to drive +//! parsing forward. +//! //! You'll note that `I` and `O` are parameterized -- while most of the examples in this book //! will be with `&str` (i.e. parsing a string); they do not have to be strings; nor do they //! have to be the same type (consider the simple example where `I = &str`, and `O = u64` -- this //! parses a string into an unsigned integer.) //! -//! To combine parsers, we need a common way to refer to them which is where the [`Parser`] -//! trait comes in with [`Parser::parse_next`] being the primary way to drive -//! parsing forward. //! //! # Let's write our first parser! //! //! The simplest parser we can write is one which successfully does nothing. //! //! To make it easier to implement a [`Parser`], the trait is implemented for -//! functions of the form `Fn(I) -> IResult<I, O>`. +//! functions of the form `Fn(&mut I) -> PResult<O>`. //! //! This parser function should take in a `&str`: //! -//! - Since it is supposed to succeed, we know it will return the Ok Variant. +//! - Since it is supposed to succeed, we know it will return the `Ok` variant. //! - Since it does nothing to our input, the remaining input is the same as the input. //! - Since it doesn't parse anything, it also should just return an empty string. //! //! ```rust -//! use winnow::IResult; +//! use winnow::PResult; //! use winnow::Parser; //! -//! pub fn do_nothing_parser(input: &str) -> IResult<&str, &str> { -//! Ok((input, "")) +//! pub fn do_nothing_parser<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! Ok("") //! } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, output) = do_nothing_parser.parse_next(input).unwrap(); +//! let output = do_nothing_parser.parse_next(&mut input).unwrap(); //! // Same as: -//! // let (remainder, output) = do_nothing_parser(input).unwrap(); +//! // let output = do_nothing_parser(&mut input).unwrap(); //! -//! assert_eq!(remainder, "0x1a2b Hello"); +//! assert_eq!(input, "0x1a2b Hello"); //! assert_eq!(output, ""); //! } //! ``` #![allow(unused_imports)] -use crate::IResult; +use crate::PResult; use crate::Parser; pub use super::chapter_0 as previous; pub use super::chapter_2 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/chapter_2.rs b/vendor/winnow/src/_tutorial/chapter_2.rs index a7f9ea9..c27b719 100644 --- a/vendor/winnow/src/_tutorial/chapter_2.rs +++ b/vendor/winnow/src/_tutorial/chapter_2.rs @@ -4,77 +4,162 @@ //! //! ## Tokens //! -//! Matching a single token literal is common enough that `Parser` is implemented for -//! `char`. -//! +//! [`Stream`] provides some core operations to help with parsing. For example, to process a +//! single token, you can do: //! ```rust //! # use winnow::Parser; -//! # use winnow::IResult; -//! # -//! fn parse_prefix(input: &str) -> IResult<&str, char> { -//! '0'.parse_next(input) +//! # use winnow::PResult; +//! use winnow::stream::Stream; +//! use winnow::error::ParserError; +//! use winnow::error::ErrorKind; +//! use winnow::error::ErrMode; +//! +//! fn parse_prefix(input: &mut &str) -> PResult<char> { +//! let c = input.next_token().ok_or_else(|| { +//! ErrMode::from_error_kind(input, ErrorKind::Token) +//! })?; +//! if c != '0' { +//! return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); +//! } +//! Ok(c) //! } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, output) = parse_prefix.parse_next(input).unwrap(); +//! let output = parse_prefix.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, "x1a2b Hello"); +//! assert_eq!(input, "x1a2b Hello"); //! assert_eq!(output, '0'); //! -//! assert!(parse_prefix("d").is_err()); +//! assert!(parse_prefix.parse_next(&mut "d").is_err()); //! } //! ``` //! -//! ## Tags +//! [`any`] and [`Parser::verify`] are [`Parser`] building blocks on top of [`Stream`]: +//! ```rust +//! # use winnow::PResult; +//! use winnow::Parser; +//! use winnow::token::any; //! -//! One of the most frequent way of matching a token is when they are combined into a string. -//! Again, this is common enough that `Parser` is implemented for `&str`: +//! fn parse_prefix(input: &mut &str) -> PResult<char> { +//! any.verify(|c| *c == '0').parse_next(input) +//! } +//! # +//! # fn main() { +//! # let mut input = "0x1a2b Hello"; +//! # +//! # let output = parse_prefix.parse_next(&mut input).unwrap(); +//! # +//! # assert_eq!(input, "x1a2b Hello"); +//! # assert_eq!(output, '0'); +//! # +//! # assert!(parse_prefix.parse_next(&mut "d").is_err()); +//! # } +//! ``` +//! +//! Matching a single token literal is common enough that [`Parser`] is implemented for +//! `char`. //! //! ```rust -//! # use winnow::Parser; -//! # use winnow::IResult; +//! # use winnow::PResult; +//! use winnow::Parser; +//! +//! fn parse_prefix(input: &mut &str) -> PResult<char> { +//! '0'.parse_next(input) +//! } //! # -//! fn parse_prefix(input: &str) -> IResult<&str, &str> { -//! "0x".parse_next(input) +//! # fn main() { +//! # let mut input = "0x1a2b Hello"; +//! # +//! # let output = parse_prefix.parse_next(&mut input).unwrap(); +//! # +//! # assert_eq!(input, "x1a2b Hello"); +//! # assert_eq!(output, '0'); +//! # +//! # assert!(parse_prefix.parse_next(&mut "d").is_err()); +//! # } +//! ``` +//! +//! ## Tags +//! +//! [`Stream`] also supports processing slices of tokens: +//! ```rust +//! # use winnow::Parser; +//! # use winnow::PResult; +//! use winnow::stream::Stream; +//! use winnow::error::ParserError; +//! use winnow::error::ErrorKind; +//! use winnow::error::ErrMode; +//! +//! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! let expected = "0x"; +//! if input.len() < expected.len() { +//! return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); +//! } +//! let actual = input.next_slice(expected.len()); +//! if actual != expected { +//! return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); +//! } +//! Ok(actual) //! } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, output) = parse_prefix.parse_next(input).unwrap(); -//! assert_eq!(remainder, "1a2b Hello"); +//! let output = parse_prefix.parse_next(&mut input).unwrap(); +//! assert_eq!(input, "1a2b Hello"); //! assert_eq!(output, "0x"); //! -//! assert!(parse_prefix("0o123").is_err()); +//! assert!(parse_prefix.parse_next(&mut "0o123").is_err()); //! } //! ``` //! -//! In `winnow`, we call this type of parser a [`tag`]. +//! Again, matching a literal is common enough that [`Parser`] is implemented for `&str`: +//! ```rust +//! # use winnow::PResult; +//! use winnow::Parser; +//! +//! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! "0x".parse_next(input) +//! } +//! # +//! # fn main() { +//! # let mut input = "0x1a2b Hello"; +//! # +//! # let output = parse_prefix.parse_next(&mut input).unwrap(); +//! # assert_eq!(input, "1a2b Hello"); +//! # assert_eq!(output, "0x"); +//! # +//! # assert!(parse_prefix.parse_next(&mut "0o123").is_err()); +//! # } +//! ``` +//! +//! In `winnow`, we call this type of parser a [`tag`]. See [`token`] for additional individual +//! and token-slice parsers. //! //! ## Character Classes //! -//! Selecting a single `char` or a [`tag`] is fairly limited. Sometimes, you will want to select one of several -//! `chars` of a specific class, like digits. For this, we use the [`one_of`] parer: +//! Selecting a single `char` or a [`tag`] is fairly limited. Sometimes, you will want to select one of several +//! `chars` of a specific class, like digits. For this, we use the [`one_of`] parser: //! //! ```rust //! # use winnow::Parser; -//! # use winnow::IResult; -//! use winnow::bytes::one_of; +//! # use winnow::PResult; +//! use winnow::token::one_of; //! -//! fn parse_digits(input: &str) -> IResult<&str, char> { -//! one_of("0123456789abcdefgABCDEFG").parse_next(input) +//! fn parse_digits(input: &mut &str) -> PResult<char> { +//! one_of(('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input) //! } //! //! fn main() { -//! let input = "1a2b Hello"; +//! let mut input = "1a2b Hello"; //! -//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); -//! assert_eq!(remainder, "a2b Hello"); +//! let output = parse_digits.parse_next(&mut input).unwrap(); +//! assert_eq!(input, "a2b Hello"); //! assert_eq!(output, '1'); //! -//! assert!(parse_digits("Z").is_err()); +//! assert!(parse_digits.parse_next(&mut "Z").is_err()); //! } //! ``` //! @@ -82,75 +167,82 @@ //! > Let's look at it more closely as its used above (resolving all generic parameters): //! > ```rust //! > # use winnow::prelude::*; -//! > # use winnow::error::Error; +//! > # use winnow::error::InputError; //! > pub fn one_of<'i>( -//! > list: &'static str -//! > ) -> impl Parser<&'i str, char, Error<&'i str>> { +//! > list: &'static [char] +//! > ) -> impl Parser<&'i str, char, InputError<&'i str>> { //! > // ... -//! > # winnow::bytes::one_of(list) +//! > # winnow::token::one_of(list) //! > } //! > ``` //! > If you have not programmed in a language where functions are values, the type signature of the //! > [`one_of`] function might be a surprise. //! > The function [`one_of`] *returns a function*. The function it returns is a -//! > `Parser`, taking a `&str` and returning an `IResult`. This is a common pattern in winnow for +//! > `Parser`, taking a `&str` and returning an `PResult`. This is a common pattern in winnow for //! > configurable or stateful parsers. //! //! Some of character classes are common enough that a named parser is provided, like with: -//! - [`line_ending`][crate::character::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) -//! - [`newline`][crate::character::newline]: Matches a newline character `\n` -//! - [`tab`][crate::character::tab]: Matches a tab character `\t` +//! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) +//! - [`newline`][crate::ascii::newline]: Matches a newline character `\n` +//! - [`tab`][crate::ascii::tab]: Matches a tab character `\t` //! -//! You can then capture sequences of these characters with parsers like [`take_while1`]. +//! You can then capture sequences of these characters with parsers like [`take_while`]. //! ```rust //! # use winnow::Parser; -//! # use winnow::IResult; -//! use winnow::bytes::take_while1; +//! # use winnow::PResult; +//! use winnow::token::take_while; //! -//! fn parse_digits(input: &str) -> IResult<&str, &str> { -//! take_while1("0123456789abcdefgABCDEFG").parse_next(input) +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! take_while(1.., ('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input) //! } //! //! fn main() { -//! let input = "1a2b Hello"; +//! let mut input = "1a2b Hello"; //! -//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); -//! assert_eq!(remainder, " Hello"); +//! let output = parse_digits.parse_next(&mut input).unwrap(); +//! assert_eq!(input, " Hello"); //! assert_eq!(output, "1a2b"); //! -//! assert!(parse_digits("Z").is_err()); +//! assert!(parse_digits.parse_next(&mut "Z").is_err()); //! } //! ``` //! -//! We could simplify this further with by using one of the built-in character classes, [`hex_digit1`]: +//! We could simplify this further by using one of the built-in character classes, [`hex_digit1`]: //! ```rust //! # use winnow::Parser; -//! # use winnow::IResult; -//! use winnow::character::hex_digit1; +//! # use winnow::PResult; +//! use winnow::ascii::hex_digit1; //! -//! fn parse_digits(input: &str) -> IResult<&str, &str> { +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { //! hex_digit1.parse_next(input) //! } //! //! fn main() { -//! let input = "1a2b Hello"; +//! let mut input = "1a2b Hello"; //! -//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); -//! assert_eq!(remainder, " Hello"); +//! let output = parse_digits.parse_next(&mut input).unwrap(); +//! assert_eq!(input, " Hello"); //! assert_eq!(output, "1a2b"); //! -//! assert!(parse_digits("Z").is_err()); +//! assert!(parse_digits.parse_next(&mut "Z").is_err()); //! } //! ``` +//! +//! See [`ascii`] for more text-based parsers. #![allow(unused_imports)] -use crate::bytes::one_of; -use crate::bytes::tag; -use crate::bytes::take_while1; -use crate::character::hex_digit1; +use crate::ascii; +use crate::ascii::hex_digit1; use crate::stream::ContainsToken; +use crate::stream::Stream; +use crate::token; +use crate::token::any; +use crate::token::one_of; +use crate::token::tag; +use crate::token::take_while; use crate::Parser; use std::ops::RangeInclusive; pub use super::chapter_1 as previous; pub use super::chapter_3 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/chapter_3.rs b/vendor/winnow/src/_tutorial/chapter_3.rs index 29c5db4..e3f86b5 100644 --- a/vendor/winnow/src/_tutorial/chapter_3.rs +++ b/vendor/winnow/src/_tutorial/chapter_3.rs @@ -10,16 +10,15 @@ //! Now that we can create more interesting parsers, we can sequence them together, like: //! //! ```rust -//! # use winnow::bytes::take_while1; -//! # use winnow::Parser; -//! # use winnow::IResult; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; //! # -//! fn parse_prefix(input: &str) -> IResult<&str, &str> { +//! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { //! "0x".parse_next(input) //! } //! -//! fn parse_digits(input: &str) -> IResult<&str, &str> { -//! take_while1(( +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! take_while(1.., ( //! ('0'..='9'), //! ('A'..='F'), //! ('a'..='f'), @@ -27,29 +26,28 @@ //! } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, prefix) = parse_prefix.parse_next(input).unwrap(); -//! let (remainder, digits) = parse_digits.parse_next(remainder).unwrap(); +//! let prefix = parse_prefix.parse_next(&mut input).unwrap(); +//! let digits = parse_digits.parse_next(&mut input).unwrap(); //! //! assert_eq!(prefix, "0x"); //! assert_eq!(digits, "1a2b"); -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! } //! ``` //! //! To sequence these together, you can just put them in a tuple: //! ```rust -//! # use winnow::bytes::take_while1; -//! # use winnow::Parser; -//! # use winnow::IResult; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; //! # -//! # fn parse_prefix(input: &str) -> IResult<&str, &str> { +//! # fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { //! # "0x".parse_next(input) //! # } //! # -//! # fn parse_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -59,33 +57,32 @@ //! //... //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, (prefix, digits)) = ( +//! let (prefix, digits) = ( //! parse_prefix, //! parse_digits -//! ).parse_next(input).unwrap(); +//! ).parse_next(&mut input).unwrap(); //! //! assert_eq!(prefix, "0x"); //! assert_eq!(digits, "1a2b"); -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! } //! ``` //! //! Frequently, you won't care about the tag and you can instead use one of the provided combinators, //! like [`preceded`]: //! ```rust -//! # use winnow::bytes::take_while1; -//! # use winnow::Parser; -//! # use winnow::IResult; -//! use winnow::sequence::preceded; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! use winnow::combinator::preceded; //! -//! # fn parse_prefix(input: &str) -> IResult<&str, &str> { +//! # fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { //! # "0x".parse_next(input) //! # } //! # -//! # fn parse_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -95,64 +92,73 @@ //! //... //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, digits) = preceded( +//! let digits = preceded( //! parse_prefix, //! parse_digits -//! ).parse_next(input).unwrap(); +//! ).parse_next(&mut input).unwrap(); //! //! assert_eq!(digits, "1a2b"); -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! } //! ``` //! +//! See [`combinator`] for more sequencing parsers. +//! //! ## Alternatives //! //! Sometimes, we might want to choose between two parsers; and we're happy with //! either being used. //! -//! The de facto way to do this in winnow is with the [`alt()`] combinator which will execute each -//! parser in a tuple until it finds one that does not error. If all error, then by default you are -//! given the error from the last parser. -//! -//! We can see a basic example of `alt()` below. +//! [`Stream::checkpoint`] helps us to retry parsing: //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! use winnow::branch::alt; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! use winnow::stream::Stream; //! -//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str)> { -//! alt(( -//! ("0b", parse_bin_digits), -//! ("0o", parse_oct_digits), -//! ("0d", parse_dec_digits), -//! ("0x", parse_hex_digits), -//! )).parse_next(input) +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<(&'s str, &'s str)> { +//! let start = input.checkpoint(); +//! +//! if let Ok(output) = ("0b", parse_bin_digits).parse_next(input) { +//! return Ok(output); +//! } +//! +//! input.reset(start); +//! if let Ok(output) = ("0o", parse_oct_digits).parse_next(input) { +//! return Ok(output); +//! } +//! +//! input.reset(start); +//! if let Ok(output) = ("0d", parse_dec_digits).parse_next(input) { +//! return Ok(output); +//! } +//! +//! input.reset(start); +//! ("0x", parse_hex_digits).parse_next(input) //! } //! //! // ... -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -160,31 +166,147 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); +//! let (prefix, digits) = parse_digits.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(prefix, "0x"); //! assert_eq!(digits, "1a2b"); //! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert!(parse_digits(&mut "ghiWorld").is_err()); //! } //! ``` //! +//! > **Warning:** the above example is for illustrative purposes and relying on `Result::Ok` or +//! > `Result::Err` can lead to incorrect behavior. This will be clarified in later when covering +//! > [error handling][`chapter_6`#errmode] +//! +//! [`opt`] is a basic building block for correctly handling retrying parsing: +//! ```rust +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! use winnow::combinator::opt; +//! +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<(&'s str, &'s str)> { +//! if let Some(output) = opt(("0b", parse_bin_digits)).parse_next(input)? { +//! Ok(output) +//! } else if let Some(output) = opt(("0o", parse_oct_digits)).parse_next(input)? { +//! Ok(output) +//! } else if let Some(output) = opt(("0d", parse_dec_digits)).parse_next(input)? { +//! Ok(output) +//! } else { +//! ("0x", parse_hex_digits).parse_next(input) +//! } +//! } +//! # +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn main() { +//! # let mut input = "0x1a2b Hello"; +//! # +//! # let (prefix, digits) = parse_digits.parse_next(&mut input).unwrap(); +//! # +//! # assert_eq!(input, " Hello"); +//! # assert_eq!(prefix, "0x"); +//! # assert_eq!(digits, "1a2b"); +//! # +//! # assert!(parse_digits(&mut "ghiWorld").is_err()); +//! # } +//! ``` +//! +//! [`alt`] encapsulates this if/else-if ladder pattern, with the last case being the `else`: +//! ```rust +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! use winnow::combinator::alt; +//! +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<(&'s str, &'s str)> { +//! alt(( +//! ("0b", parse_bin_digits), +//! ("0o", parse_oct_digits), +//! ("0d", parse_dec_digits), +//! ("0x", parse_hex_digits), +//! )).parse_next(input) +//! } +//! # +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn main() { +//! # let mut input = "0x1a2b Hello"; +//! # +//! # let (prefix, digits) = parse_digits.parse_next(&mut input).unwrap(); +//! # +//! # assert_eq!(input, " Hello"); +//! # assert_eq!(prefix, "0x"); +//! # assert_eq!(digits, "1a2b"); +//! # +//! # assert!(parse_digits(&mut "ghiWorld").is_err()); +//! # } +//! ``` +//! +//! > **Note:** [`success`] and [`fail`] are parsers that might be useful in the `else` case. +//! //! Sometimes a giant if/else-if ladder can be slow and you'd rather have a `match` statement for -//! branches of your parser that have unique prefixes. In this case, you can use the -//! [`dispatch`][crate::branch::dispatch] macro: +//! branches of your parser that have unique prefixes. In this case, you can use the +//! [`dispatch`] macro: //! //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! use winnow::branch::dispatch; -//! use winnow::bytes::take; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! use winnow::combinator::dispatch; +//! use winnow::token::take; //! use winnow::combinator::fail; //! -//! fn parse_digits(input: &str) -> IResult<&str, &str> { +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { //! dispatch!(take(2usize); //! "0b" => parse_bin_digits, //! "0o" => parse_oct_digits, @@ -195,26 +317,26 @@ //! } //! //! // ... -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -222,21 +344,33 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, digits) = parse_digits.parse_next(input).unwrap(); +//! let digits = parse_digits.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(digits, "1a2b"); //! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert!(parse_digits(&mut "ghiWorld").is_err()); //! } //! ``` +//! +//! > **Note:** [`peek`] may be useful when [`dispatch`]ing from hints from each case's parser. +//! +//! See [`combinator`] for more alternative parsers. #![allow(unused_imports)] -use crate::branch::alt; -use crate::branch::dispatch; -use crate::sequence::preceded; +use super::chapter_6; +use crate::combinator; +use crate::combinator::alt; +use crate::combinator::dispatch; +use crate::combinator::fail; +use crate::combinator::opt; +use crate::combinator::peek; +use crate::combinator::preceded; +use crate::combinator::success; +use crate::stream::Stream; pub use super::chapter_2 as previous; pub use super::chapter_4 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/chapter_4.rs b/vendor/winnow/src/_tutorial/chapter_4.rs index 5d19a05..328a648 100644 --- a/vendor/winnow/src/_tutorial/chapter_4.rs +++ b/vendor/winnow/src/_tutorial/chapter_4.rs @@ -1,18 +1,15 @@ //! # Chapter 4: Parsers With Custom Return Types //! //! So far, we have seen mostly functions that take an `&str`, and return a -//! `IResult<&str, &str>`. Splitting strings into smaller strings and characters is certainly +//! `PResult<&str>`. Splitting strings into smaller strings and characters is certainly //! useful, but it's not the only thing winnow is capable of! //! //! A useful operation when parsing is to convert between types; for example //! parsing from `&str` to another primitive, like [`usize`]. //! //! All we need to do for our parser to return a different type is to change -//! the second type parameter of [`IResult`] to the desired return type. -//! For example, to return a `usize`, return a `IResult<&str, usize>`. -//! Recall that the first type parameter of the `IResult` is the input -//! type, so even if you're returning something different, if your input -//! is a `&str`, the first type argument of `IResult` should be also. +//! the type parameter of [`PResult`] to the desired return type. +//! For example, to return a `usize`, return a `PResult<usize>`. //! //! One winnow-native way of doing a type conversion is to use the //! [`Parser::parse_to`] combinator @@ -20,68 +17,66 @@ //! //! The following code converts from a string containing a number to `usize`: //! ```rust -//! # use winnow::Parser; -//! # use winnow::IResult; -//! # use winnow::character::digit1; +//! # use winnow::prelude::*; +//! # use winnow::ascii::digit1; //! # -//! fn parse_digits(input: &str) -> IResult<&str, usize> { +//! fn parse_digits(input: &mut &str) -> PResult<usize> { //! digit1 //! .parse_to() //! .parse_next(input) //! } //! //! fn main() { -//! let input = "1024 Hello"; +//! let mut input = "1024 Hello"; //! -//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); -//! assert_eq!(remainder, " Hello"); +//! let output = parse_digits.parse_next(&mut input).unwrap(); +//! assert_eq!(input, " Hello"); //! assert_eq!(output, 1024); //! -//! assert!(parse_digits("Z").is_err()); +//! assert!(parse_digits(&mut "Z").is_err()); //! } //! ``` //! -//! `Parser::parse_to` is just a convenient form of [`Parser::map_res`] which we can use to handle +//! `Parser::parse_to` is just a convenient form of [`Parser::try_map`] which we can use to handle //! all radices of numbers: //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! use winnow::branch::dispatch; -//! use winnow::bytes::take; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! use winnow::combinator::dispatch; +//! use winnow::token::take; //! use winnow::combinator::fail; //! -//! fn parse_digits(input: &str) -> IResult<&str, usize> { +//! fn parse_digits(input: &mut &str) -> PResult<usize> { //! dispatch!(take(2usize); -//! "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! "0b" => parse_bin_digits.try_map(|s| usize::from_str_radix(s, 2)), +//! "0o" => parse_oct_digits.try_map(|s| usize::from_str_radix(s, 8)), +//! "0d" => parse_dec_digits.try_map(|s| usize::from_str_radix(s, 10)), +//! "0x" => parse_hex_digits.try_map(|s| usize::from_str_radix(s, 16)), //! _ => fail, //! ).parse_next(input) //! } //! //! // ... -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -89,21 +84,24 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, digits) = parse_digits.parse_next(input).unwrap(); +//! let digits = parse_digits.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(digits, 0x1a2b); //! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert!(parse_digits(&mut "ghiWorld").is_err()); //! } //! ``` +//! +//! See also [`Parser`] for more output-modifying parsers. #![allow(unused_imports)] -use crate::IResult; +use crate::PResult; use crate::Parser; use std::str::FromStr; pub use super::chapter_3 as previous; pub use super::chapter_5 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/chapter_5.rs b/vendor/winnow/src/_tutorial/chapter_5.rs index b703bcf..8aa719b 100644 --- a/vendor/winnow/src/_tutorial/chapter_5.rs +++ b/vendor/winnow/src/_tutorial/chapter_5.rs @@ -1,55 +1,58 @@ //! # Chapter 5: Repetition //! //! In [`chapter_3`], we covered how to sequence different parsers into a tuple but sometimes you need to run a -//! single parser many times into a [`Vec`]. +//! single parser multiple times, collecting the result into a container, like [`Vec`]. //! -//! Let's take our `parse_digits` and collect a list of them with [`many0`]: +//! Let's collect the result of `parse_digits`: //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! # use winnow::combinator::dispatch; +//! # use winnow::token::take; //! # use winnow::combinator::fail; //! use winnow::combinator::opt; -//! use winnow::multi::many0; -//! use winnow::sequence::terminated; +//! use winnow::combinator::repeat; +//! use winnow::combinator::terminated; //! -//! fn parse_list(input: &str) -> IResult<&str, Vec<usize>> { -//! many0(terminated(parse_digits, opt(','))).parse_next(input) +//! fn parse_list(input: &mut &str) -> PResult<Vec<usize>> { +//! let mut list = Vec::new(); +//! while let Some(output) = opt(terminated(parse_digits, opt(','))).parse_next(input)? { +//! list.push(output); +//! } +//! Ok(list) //! } //! //! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # fn parse_digits(input: &mut &str) -> PResult<usize> { //! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), -//! # _ => fail, -//! # ).parse_next(input) +//! # "0b" => parse_bin_digits.try_map(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.try_map(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.try_map(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.try_map(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) //! # } //! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -57,63 +60,127 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! let mut input = "0x1a2b,0x3c4d,0x5e6f Hello"; //! -//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); +//! let digits = parse_list.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); //! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert!(parse_digits(&mut "ghiWorld").is_err()); //! } //! ``` //! -//! You'll notice that the above allows trailing `,` when we intended to not support that. We can -//! easily fix this by using [`separated0`]: +//! We can implement this declaratively with [`repeat`]: //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! # use winnow::combinator::dispatch; +//! # use winnow::token::take; //! # use winnow::combinator::fail; -//! use winnow::multi::separated0; +//! use winnow::combinator::opt; +//! use winnow::combinator::repeat; +//! use winnow::combinator::terminated; +//! +//! fn parse_list(input: &mut &str) -> PResult<Vec<usize>> { +//! repeat(0.., +//! terminated(parse_digits, opt(',')) +//! ).parse_next(input) +//! } +//! # +//! # fn parse_digits(input: &mut &str) -> PResult<usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.try_map(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.try_map(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.try_map(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.try_map(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn main() { +//! # let mut input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! # +//! # let digits = parse_list.parse_next(&mut input).unwrap(); +//! # +//! # assert_eq!(input, " Hello"); +//! # assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); +//! # +//! # assert!(parse_digits(&mut "ghiWorld").is_err()); +//! # } +//! ``` //! -//! fn parse_list(input: &str) -> IResult<&str, Vec<usize>> { -//! separated0(parse_digits, ",").parse_next(input) +//! You'll notice that the above allows trailing `,` when we intended to not support that. We can +//! easily fix this by using [`separated`]: +//! ```rust +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! # use winnow::combinator::dispatch; +//! # use winnow::token::take; +//! # use winnow::combinator::fail; +//! use winnow::combinator::separated; +//! +//! fn parse_list(input: &mut &str) -> PResult<Vec<usize>> { +//! separated(0.., parse_digits, ",").parse_next(input) //! } //! //! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # fn parse_digits(input: &mut &str) -> PResult<usize> { //! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), -//! # _ => fail, -//! # ).parse_next(input) +//! # "0b" => parse_bin_digits.try_map(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.try_map(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.try_map(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.try_map(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) //! # } //! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -121,68 +188,66 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! let mut input = "0x1a2b,0x3c4d,0x5e6f Hello"; //! -//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); +//! let digits = parse_list.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); //! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert!(parse_digits(&mut "ghiWorld").is_err()); //! } //! ``` //! -//! If you look closely at [`many0`], it isn't collecting directly into a [`Vec`] but -//! [`Accumulate`] to gather the results. This let's us make more complex parsers than we did in +//! If you look closely at [`repeat`], it isn't collecting directly into a [`Vec`] but +//! [`Accumulate`] to gather the results. This lets us make more complex parsers than we did in //! [`chapter_2`] by accumulating the results into a `()` and [`recognize`][Parser::recognize]-ing the captured input: //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! # use winnow::combinator::dispatch; +//! # use winnow::token::take; //! # use winnow::combinator::fail; -//! # use winnow::multi::separated0; +//! # use winnow::combinator::separated; //! # -//! fn recognize_list(input: &str) -> IResult<&str, &str> { +//! fn recognize_list<'s>(input: &mut &'s str) -> PResult<&'s str> { //! parse_list.recognize().parse_next(input) //! } //! -//! fn parse_list(input: &str) -> IResult<&str, ()> { -//! separated0(parse_digits, ",").parse_next(input) +//! fn parse_list(input: &mut &str) -> PResult<()> { +//! separated(0.., parse_digits, ",").parse_next(input) //! } //! -//! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # fn parse_digits(input: &mut &str) -> PResult<usize> { //! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), -//! # _ => fail, -//! # ).parse_next(input) +//! # "0b" => parse_bin_digits.try_map(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.try_map(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.try_map(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.try_map(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) //! # } //! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -190,25 +255,28 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! let mut input = "0x1a2b,0x3c4d,0x5e6f Hello"; //! -//! let (remainder, digits) = recognize_list.parse_next(input).unwrap(); +//! let digits = recognize_list.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(digits, "0x1a2b,0x3c4d,0x5e6f"); //! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert!(parse_digits(&mut "ghiWorld").is_err()); //! } //! ``` +//! See [`combinator`] for more repetition parsers. #![allow(unused_imports)] use super::chapter_2; use super::chapter_3; -use crate::multi::many0; -use crate::multi::separated0; +use crate::combinator; +use crate::combinator::repeat; +use crate::combinator::separated; use crate::stream::Accumulate; use crate::Parser; use std::vec::Vec; pub use super::chapter_4 as previous; pub use super::chapter_6 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/chapter_6.rs b/vendor/winnow/src/_tutorial/chapter_6.rs index 7416942..0d54e15 100644 --- a/vendor/winnow/src/_tutorial/chapter_6.rs +++ b/vendor/winnow/src/_tutorial/chapter_6.rs @@ -2,52 +2,49 @@ //! //! ## `Error` //! -//! Back in [`chapter_1`], we glossed over the `Err` side of [`IResult`]. `IResult<I, O>` is -//! actually short for `IResult<I, O, E=Error>` where [`Error`] is a cheap, universal error type -//! for getting started. When humans are producing the file, like with `toml`, you might want to -//! sacrifice some performance for providing more details on how to resolve the problem +//! Back in [`chapter_1`], we glossed over the `Err` side of [`PResult`]. `PResult<O>` is +//! actually short for `PResult<O, E=ContextError>` where [`ContextError`] is a relatively cheap +//! way of building up reasonable errors for humans. //! -//! winnow includes [`VerboseError`] for this but you can [customize the error as you -//! wish][_topic::error]. You can use [`Parser::context`] to annotate the error with custom types +//! You can use [`Parser::context`] to annotate the error with custom types //! while unwinding to further improve the error quality. //! //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::alt; -//! use winnow::error::VerboseError; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! # use winnow::combinator::alt; +//! use winnow::error::StrContext; //! -//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<(&'s str, &'s str)> { //! alt(( -//! ("0b", parse_bin_digits).context("binary"), -//! ("0o", parse_oct_digits).context("octal"), -//! ("0d", parse_dec_digits).context("decimal"), -//! ("0x", parse_hex_digits).context("hexadecimal"), +//! ("0b", parse_bin_digits).context(StrContext::Label("binary")), +//! ("0o", parse_oct_digits).context(StrContext::Label("octal")), +//! ("0d", parse_dec_digits).context(StrContext::Label("decimal")), +//! ("0x", parse_hex_digits).context(StrContext::Label("hexadecimal")), //! )).parse_next(input) //! } //! //! // ... -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -55,11 +52,11 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); +//! let (prefix, digits) = parse_digits.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(prefix, "0x"); //! assert_eq!(digits, "1a2b"); //! } @@ -71,59 +68,58 @@ //! //! ## `ErrMode` //! -//! Let's break down `IResult<I, O, E>` one step further: +//! Let's break down `PResult<O, E>` one step further: //! ```rust -//! # use winnow::error::Error; +//! # use winnow::error::ErrorKind; //! # use winnow::error::ErrMode; -//! pub type IResult<I, O, E = Error<I>> = Result<(I, O), ErrMode<E>>; +//! pub type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>; //! ``` -//! `IResult` is just a fancy wrapper around `Result` that wraps our error in an [`ErrMode`] +//! [`PResult`] is just a fancy wrapper around `Result` that wraps our error in an [`ErrMode`] //! type. //! -//! `ErrMode` is an enum with `Backtrack` and `Cut` variants (ignore `Incomplete` as its only -//! relevant for [streaming][_topic::stream]). By default, errors are `Backtrack`, meaning that -//! other parsing branches will be attempted on failure, like the next case of an `alt`. `Cut` +//! [`ErrMode`] is an enum with [`Backtrack`] and [`Cut`] variants (ignore [`Incomplete`] as its only +//! relevant for [streaming][_topic::stream]). By default, errors are [`Backtrack`], meaning that +//! other parsing branches will be attempted on failure, like the next case of an [`alt`]. [`Cut`] //! shortcircuits all other branches, immediately reporting the error. //! //! So we can get the correct `context` by modifying the above example with [`cut_err`]: //! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::alt; -//! # use winnow::error::VerboseError; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! # use winnow::combinator::alt; +//! # use winnow::error::StrContext; //! use winnow::combinator::cut_err; //! -//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { +//! fn parse_digits<'s>(input: &mut &'s str) -> PResult<(&'s str, &'s str)> { //! alt(( -//! ("0b", cut_err(parse_bin_digits)).context("binary"), -//! ("0o", cut_err(parse_oct_digits)).context("octal"), -//! ("0d", cut_err(parse_dec_digits)).context("decimal"), -//! ("0x", cut_err(parse_hex_digits)).context("hexadecimal"), +//! ("0b", cut_err(parse_bin_digits)).context(StrContext::Label("binary")), +//! ("0o", cut_err(parse_oct_digits)).context(StrContext::Label("octal")), +//! ("0d", cut_err(parse_dec_digits)).context(StrContext::Label("decimal")), +//! ("0x", cut_err(parse_hex_digits)).context(StrContext::Label("hexadecimal")), //! )).parse_next(input) //! } //! //! // ... -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -131,11 +127,11 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let mut input = "0x1a2b Hello"; //! -//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); +//! let (prefix, digits) = parse_digits.parse_next(&mut input).unwrap(); //! -//! assert_eq!(remainder, " Hello"); +//! assert_eq!(input, " Hello"); //! assert_eq!(prefix, "0x"); //! assert_eq!(digits, "1a2b"); //! } @@ -145,14 +141,16 @@ #![allow(unused_imports)] use super::chapter_1; use super::chapter_3; -use crate::branch::alt; +use crate::combinator::alt; use crate::combinator::cut_err; +use crate::error::ContextError; use crate::error::ErrMode; -use crate::error::Error; -use crate::error::VerboseError; -use crate::IResult; +use crate::error::ErrMode::*; +use crate::error::ErrorKind; +use crate::PResult; use crate::Parser; use crate::_topic; pub use super::chapter_5 as previous; pub use super::chapter_7 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/chapter_7.rs b/vendor/winnow/src/_tutorial/chapter_7.rs index 8c27cb5..7f13f98 100644 --- a/vendor/winnow/src/_tutorial/chapter_7.rs +++ b/vendor/winnow/src/_tutorial/chapter_7.rs @@ -3,82 +3,91 @@ //! So far, we've highlighted how to incrementally parse, but how do we bring this all together //! into our application? //! -//! The type we've been working with looks like: +//! Parsers we've been working with look like: //! ```rust -//! # use winnow::error::VerboseError; +//! # use winnow::error::ContextError; //! # use winnow::error::ErrMode; -//! type IResult<'i, O> = Result< -//! (&'i str, O), -//! ErrMode< -//! VerboseError<&'i str> -//! > +//! # use winnow::Parser; +//! # +//! pub fn parser<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! // ... +//! # Ok("") +//! } +//! +//! type PResult<O> = Result< +//! O, +//! ErrMode<ContextError> //! >; //! ``` -//! 1. We have to decide what to do about the `remainder` of the input. -//! 2. The error type is not compatible with the rest of the Rust ecosystem +//! 1. We have to decide what to do about the "remainder" of the `input`. +//! 2. The [`ErrMode<ContextError>`] is not compatible with the rest of the Rust ecosystem. +//! Normally, Rust applications want errors that are `std::error::Error + Send + Sync + 'static` +//! meaning: +//! - They implement the [`std::error::Error`] trait +//! - They can be sent across threads +//! - They are safe to be referenced across threads +//! - They do not borrow //! -//! Normally, Rust applications want errors that are `std::error::Error + Send + Sync + 'static` -//! meaning: -//! - They implement the [`std::error::Error`] trait -//! - They can be sent across threads -//! - They are safe to be referenced across threads -//! - They do not borrow -//! -//! winnow provides some helpers for this: +//! winnow provides [`Parser::parse`] to help with this: +//! - Ensures we hit [`eof`] +//! - Removes the [`ErrMode`] wrapper +//! - Wraps the error in [`ParseError`] +//! - Provides access to the original [`input`][ParseError::input] with the +//! [`offset`][ParseError::offset] of where it failed +//! - Provides a default renderer (via [`std::fmt::Display`]) //! ```rust -//! # use winnow::IResult; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; +//! # use winnow::prelude::*; +//! # use winnow::token::take_while; +//! # use winnow::combinator::dispatch; +//! # use winnow::token::take; //! # use winnow::combinator::fail; //! use winnow::Parser; -//! use winnow::error::Error; //! //! #[derive(Debug, PartialEq, Eq)] //! pub struct Hex(usize); //! //! impl std::str::FromStr for Hex { -//! type Err = Error<String>; +//! type Err = String; //! //! fn from_str(input: &str) -> Result<Self, Self::Err> { //! parse_digits //! .map(Hex) //! .parse(input) -//! .map_err(|e| e.into_owned()) +//! .map_err(|e| e.to_string()) //! } //! } //! //! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # fn parse_digits<'s>(input: &mut &'s str) -> PResult<usize> { //! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # "0b" => parse_bin_digits.try_map(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.try_map(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.try_map(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.try_map(|s| usize::from_str_radix(s, 16)), //! # _ => fail, //! # ).parse_next(input) //! # } //! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_bin_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_oct_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_dec_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( +//! # fn parse_hex_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! # take_while(1.., ( //! # ('0'..='9'), //! # ('A'..='F'), //! # ('a'..='f'), @@ -95,17 +104,15 @@ //! assert!(input.parse::<Hex>().is_err()); //! } //! ``` -//! - Ensures we hit [`eof`] -//! - Removes the [`ErrMode`] wrapper -//! -//! [`Error::into_owned`]: -//! - Converts the `&str` in `Error` to `String` which enables support for [`std::error::Error`] #![allow(unused_imports)] use super::chapter_1; use crate::combinator::eof; use crate::error::ErrMode; -use crate::error::Error; -use crate::IResult; +use crate::error::InputError; +use crate::error::ParseError; +use crate::PResult; +use crate::Parser; pub use super::chapter_6 as previous; +pub use crate::_tutorial as table_of_contents; diff --git a/vendor/winnow/src/_tutorial/mod.rs b/vendor/winnow/src/_tutorial/mod.rs index e4b8392..2a4bd61 100644 --- a/vendor/winnow/src/_tutorial/mod.rs +++ b/vendor/winnow/src/_tutorial/mod.rs @@ -1,6 +1,7 @@ //! # Tutorial //! -//! Table of Content +//! Table of Contents +#![allow(clippy::std_instead_of_core)] pub mod chapter_0; pub mod chapter_1; diff --git a/vendor/winnow/src/ascii/mod.rs b/vendor/winnow/src/ascii/mod.rs new file mode 100644 index 0000000..0aacf27 --- /dev/null +++ b/vendor/winnow/src/ascii/mod.rs @@ -0,0 +1,1714 @@ +//! Character specific parsers and combinators +//! +//! Functions recognizing specific characters + +#[cfg(test)] +mod tests; + +use crate::lib::std::ops::{Add, Shl}; + +use crate::combinator::alt; +use crate::combinator::cut_err; +use crate::combinator::opt; +use crate::error::ParserError; +use crate::error::{ErrMode, ErrorKind, Needed}; +use crate::stream::{AsBStr, AsChar, ParseSlice, Stream, StreamIsPartial}; +use crate::stream::{Compare, CompareResult}; +use crate::token::one_of; +use crate::token::take_till; +use crate::token::take_while; +use crate::trace::trace; +use crate::PResult; +use crate::Parser; + +/// Mark a value as case-insensitive for ASCII characters +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; +/// # use winnow::ascii::Caseless; +/// +/// fn parser<'s>(s: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// Caseless("hello").parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser.parse_peek("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser.parse_peek("HeLlo, World!"), Ok((", World!", "HeLlo"))); +/// assert_eq!(parser.parse_peek("Some"), Err(ErrMode::Backtrack(InputError::new("Some", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` +#[derive(Copy, Clone, Debug)] +pub struct Caseless<T>(pub T); + +impl Caseless<&str> { + /// Get the byte-representation of this case-insensitive value + #[inline(always)] + pub fn as_bytes(&self) -> Caseless<&[u8]> { + Caseless(self.0.as_bytes()) + } +} + +/// Recognizes the string `"\r\n"`. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; +/// # use winnow::ascii::crlf; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// crlf.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("\r\nc"), Ok(("c", "\r\n"))); +/// assert_eq!(parser.parse_peek("ab\r\nc"), Err(ErrMode::Backtrack(InputError::new("ab\r\nc", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::crlf; +/// assert_eq!(crlf::<_, InputError<_>>.parse_peek(Partial::new("\r\nc")), Ok((Partial::new("c"), "\r\n"))); +/// assert_eq!(crlf::<_, InputError<_>>.parse_peek(Partial::new("ab\r\nc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("ab\r\nc"), ErrorKind::Tag)))); +/// assert_eq!(crlf::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(2)))); +/// ``` +#[inline(always)] +pub fn crlf<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, +{ + trace("crlf", "\r\n").parse_next(input) +} + +/// Recognizes a string of any char except `"\r\n"` or `"\n"`. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::not_line_ending; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// not_line_ending.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("ab\r\nc"), Ok(("\r\nc", "ab"))); +/// assert_eq!(parser.parse_peek("ab\nc"), Ok(("\nc", "ab"))); +/// assert_eq!(parser.parse_peek("abc"), Ok(("", "abc"))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// assert_eq!(parser.parse_peek("a\rb\nc"), Err(ErrMode::Backtrack(InputError::new("\rb\nc", ErrorKind::Tag )))); +/// assert_eq!(parser.parse_peek("a\rbc"), Err(ErrMode::Backtrack(InputError::new("\rbc", ErrorKind::Tag )))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::not_line_ending; +/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("ab\r\nc")), Ok((Partial::new("\r\nc"), "ab"))); +/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("abc")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("a\rb\nc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\rb\nc"), ErrorKind::Tag )))); +/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("a\rbc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\rbc"), ErrorKind::Tag )))); +/// ``` +#[inline(always)] +pub fn not_line_ending<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, + <I as Stream>::Token: AsChar + Clone, +{ + trace("not_line_ending", move |input: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() { + not_line_ending_::<_, _, true>(input) + } else { + not_line_ending_::<_, _, false>(input) + } + }) + .parse_next(input) +} + +fn not_line_ending_<I, E: ParserError<I>, const PARTIAL: bool>( + input: &mut I, +) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, + <I as Stream>::Token: AsChar + Clone, +{ + let res = take_till(0.., ('\r', '\n')).parse_next(input)?; + if input.compare("\r") == CompareResult::Ok { + let comp = input.compare("\r\n"); + match comp { + //FIXME: calculate the right index + CompareResult::Ok => {} + CompareResult::Incomplete if PARTIAL && input.is_partial() => { + return Err(ErrMode::Incomplete(Needed::Unknown)); + } + CompareResult::Incomplete | CompareResult::Error => { + let e: ErrorKind = ErrorKind::Tag; + return Err(ErrMode::from_error_kind(input, e)); + } + } + } + Ok(res) +} + +/// Recognizes an end of line (both `"\n"` and `"\r\n"`). +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::line_ending; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// line_ending.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("\r\nc"), Ok(("c", "\r\n"))); +/// assert_eq!(parser.parse_peek("ab\r\nc"), Err(ErrMode::Backtrack(InputError::new("ab\r\nc", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::line_ending; +/// assert_eq!(line_ending::<_, InputError<_>>.parse_peek(Partial::new("\r\nc")), Ok((Partial::new("c"), "\r\n"))); +/// assert_eq!(line_ending::<_, InputError<_>>.parse_peek(Partial::new("ab\r\nc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("ab\r\nc"), ErrorKind::Tag)))); +/// assert_eq!(line_ending::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn line_ending<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, +{ + trace("line_ending", alt(("\n", "\r\n"))).parse_next(input) +} + +/// Matches a newline character `'\n'`. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::newline; +/// fn parser<'s>(input: &mut &'s str) -> PResult<char, InputError<&'s str>> { +/// newline.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("\nc"), Ok(("c", '\n'))); +/// assert_eq!(parser.parse_peek("\r\nc"), Err(ErrMode::Backtrack(InputError::new("\r\nc", ErrorKind::Verify)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::newline; +/// assert_eq!(newline::<_, InputError<_>>.parse_peek(Partial::new("\nc")), Ok((Partial::new("c"), '\n'))); +/// assert_eq!(newline::<_, InputError<_>>.parse_peek(Partial::new("\r\nc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\r\nc"), ErrorKind::Verify)))); +/// assert_eq!(newline::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn newline<I, Error: ParserError<I>>(input: &mut I) -> PResult<char, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, +{ + trace("newline", '\n'.map(AsChar::as_char)).parse_next(input) +} + +/// Matches a tab character `'\t'`. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::tab; +/// fn parser<'s>(input: &mut &'s str) -> PResult<char, InputError<&'s str>> { +/// tab.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("\tc"), Ok(("c", '\t'))); +/// assert_eq!(parser.parse_peek("\r\nc"), Err(ErrMode::Backtrack(InputError::new("\r\nc", ErrorKind::Verify)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::tab; +/// assert_eq!(tab::<_, InputError<_>>.parse_peek(Partial::new("\tc")), Ok((Partial::new("c"), '\t'))); +/// assert_eq!(tab::<_, InputError<_>>.parse_peek(Partial::new("\r\nc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\r\nc"), ErrorKind::Verify)))); +/// assert_eq!(tab::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn tab<I, Error: ParserError<I>>(input: &mut I) -> PResult<char, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, +{ + trace("tab", '\t'.map(AsChar::as_char)).parse_next(input) +} + +/// Recognizes zero or more lowercase and uppercase ASCII alphabetic characters: `'a'..='z'`, `'A'..='Z'` +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non +/// alphabetic character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphabetic character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::ascii::alpha0; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// alpha0.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("ab1c"), Ok(("1c", "ab"))); +/// assert_eq!(parser.parse_peek("1c"), Ok(("1c", ""))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::alpha0; +/// assert_eq!(alpha0::<_, InputError<_>>.parse_peek(Partial::new("ab1c")), Ok((Partial::new("1c"), "ab"))); +/// assert_eq!(alpha0::<_, InputError<_>>.parse_peek(Partial::new("1c")), Ok((Partial::new("1c"), ""))); +/// assert_eq!(alpha0::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn alpha0<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("alpha0", take_while(0.., AsChar::is_alpha)).parse_next(input) +} + +/// Recognizes one or more lowercase and uppercase ASCII alphabetic characters: `'a'..='z'`, `'A'..='Z'` +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non alphabetic character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphabetic character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::alpha1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// alpha1.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("aB1c"), Ok(("1c", "aB"))); +/// assert_eq!(parser.parse_peek("1c"), Err(ErrMode::Backtrack(InputError::new("1c", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::alpha1; +/// assert_eq!(alpha1::<_, InputError<_>>.parse_peek(Partial::new("aB1c")), Ok((Partial::new("1c"), "aB"))); +/// assert_eq!(alpha1::<_, InputError<_>>.parse_peek(Partial::new("1c")), Err(ErrMode::Backtrack(InputError::new(Partial::new("1c"), ErrorKind::Slice)))); +/// assert_eq!(alpha1::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn alpha1<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("alpha1", take_while(1.., AsChar::is_alpha)).parse_next(input) +} + +/// Recognizes zero or more ASCII numerical characters: `'0'..='9'` +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non digit character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non digit character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::ascii::digit0; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// digit0.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21c"), Ok(("c", "21"))); +/// assert_eq!(parser.parse_peek("21"), Ok(("", "21"))); +/// assert_eq!(parser.parse_peek("a21c"), Ok(("a21c", ""))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::digit0; +/// assert_eq!(digit0::<_, InputError<_>>.parse_peek(Partial::new("21c")), Ok((Partial::new("c"), "21"))); +/// assert_eq!(digit0::<_, InputError<_>>.parse_peek(Partial::new("a21c")), Ok((Partial::new("a21c"), ""))); +/// assert_eq!(digit0::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn digit0<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("digit0", take_while(0.., AsChar::is_dec_digit)).parse_next(input) +} + +/// Recognizes one or more ASCII numerical characters: `'0'..='9'` +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non digit character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non digit character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::digit1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// digit1.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21c"), Ok(("c", "21"))); +/// assert_eq!(parser.parse_peek("c1"), Err(ErrMode::Backtrack(InputError::new("c1", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::digit1; +/// assert_eq!(digit1::<_, InputError<_>>.parse_peek(Partial::new("21c")), Ok((Partial::new("c"), "21"))); +/// assert_eq!(digit1::<_, InputError<_>>.parse_peek(Partial::new("c1")), Err(ErrMode::Backtrack(InputError::new(Partial::new("c1"), ErrorKind::Slice)))); +/// assert_eq!(digit1::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +/// +/// ## Parsing an integer +/// +/// You can use `digit1` in combination with [`Parser::try_map`] to parse an integer: +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed, Parser}; +/// # use winnow::ascii::digit1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<u32, InputError<&'s str>> { +/// digit1.try_map(str::parse).parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("416"), Ok(("", 416))); +/// assert_eq!(parser.parse_peek("12b"), Ok(("b", 12))); +/// assert!(parser.parse_peek("b").is_err()); +/// ``` +#[inline(always)] +pub fn digit1<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("digit1", take_while(1.., AsChar::is_dec_digit)).parse_next(input) +} + +/// Recognizes zero or more ASCII hexadecimal numerical characters: `'0'..='9'`, `'A'..='F'`, +/// `'a'..='f'` +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non hexadecimal digit character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non hexadecimal digit character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::ascii::hex_digit0; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// hex_digit0.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21cZ"), Ok(("Z", "21c"))); +/// assert_eq!(parser.parse_peek("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::hex_digit0; +/// assert_eq!(hex_digit0::<_, InputError<_>>.parse_peek(Partial::new("21cZ")), Ok((Partial::new("Z"), "21c"))); +/// assert_eq!(hex_digit0::<_, InputError<_>>.parse_peek(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); +/// assert_eq!(hex_digit0::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn hex_digit0<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("hex_digit0", take_while(0.., AsChar::is_hex_digit)).parse_next(input) +} + +/// Recognizes one or more ASCII hexadecimal numerical characters: `'0'..='9'`, `'A'..='F'`, +/// `'a'..='f'` +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non hexadecimal digit character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non hexadecimal digit character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::hex_digit1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// hex_digit1.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21cZ"), Ok(("Z", "21c"))); +/// assert_eq!(parser.parse_peek("H2"), Err(ErrMode::Backtrack(InputError::new("H2", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::hex_digit1; +/// assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(Partial::new("21cZ")), Ok((Partial::new("Z"), "21c"))); +/// assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(Partial::new("H2")), Err(ErrMode::Backtrack(InputError::new(Partial::new("H2"), ErrorKind::Slice)))); +/// assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn hex_digit1<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("hex_digit1", take_while(1.., AsChar::is_hex_digit)).parse_next(input) +} + +/// Recognizes zero or more octal characters: `'0'..='7'` +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non octal +/// digit character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non octal digit character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::ascii::oct_digit0; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// oct_digit0.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21cZ"), Ok(("cZ", "21"))); +/// assert_eq!(parser.parse_peek("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::oct_digit0; +/// assert_eq!(oct_digit0::<_, InputError<_>>.parse_peek(Partial::new("21cZ")), Ok((Partial::new("cZ"), "21"))); +/// assert_eq!(oct_digit0::<_, InputError<_>>.parse_peek(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); +/// assert_eq!(oct_digit0::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn oct_digit0<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("oct_digit0", take_while(0.., AsChar::is_oct_digit)).parse_next(input) +} + +/// Recognizes one or more octal characters: `'0'..='7'` +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non octal digit character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non octal digit character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::oct_digit1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// oct_digit1.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21cZ"), Ok(("cZ", "21"))); +/// assert_eq!(parser.parse_peek("H2"), Err(ErrMode::Backtrack(InputError::new("H2", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::oct_digit1; +/// assert_eq!(oct_digit1::<_, InputError<_>>.parse_peek(Partial::new("21cZ")), Ok((Partial::new("cZ"), "21"))); +/// assert_eq!(oct_digit1::<_, InputError<_>>.parse_peek(Partial::new("H2")), Err(ErrMode::Backtrack(InputError::new(Partial::new("H2"), ErrorKind::Slice)))); +/// assert_eq!(oct_digit1::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn oct_digit1<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("oct_digit0", take_while(1.., AsChar::is_oct_digit)).parse_next(input) +} + +/// Recognizes zero or more ASCII numerical and alphabetic characters: `'a'..='z'`, `'A'..='Z'`, `'0'..='9'` +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non +/// alphanumerical character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphanumerical character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::ascii::alphanumeric0; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// alphanumeric0.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21cZ%1"), Ok(("%1", "21cZ"))); +/// assert_eq!(parser.parse_peek("&Z21c"), Ok(("&Z21c", ""))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::alphanumeric0; +/// assert_eq!(alphanumeric0::<_, InputError<_>>.parse_peek(Partial::new("21cZ%1")), Ok((Partial::new("%1"), "21cZ"))); +/// assert_eq!(alphanumeric0::<_, InputError<_>>.parse_peek(Partial::new("&Z21c")), Ok((Partial::new("&Z21c"), ""))); +/// assert_eq!(alphanumeric0::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn alphanumeric0<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("alphanumeric0", take_while(0.., AsChar::is_alphanum)).parse_next(input) +} + +/// Recognizes one or more ASCII numerical and alphabetic characters: `'a'..='z'`, `'A'..='Z'`, `'0'..='9'` +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non alphanumerical character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphanumerical character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::alphanumeric1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// alphanumeric1.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("21cZ%1"), Ok(("%1", "21cZ"))); +/// assert_eq!(parser.parse_peek("&H2"), Err(ErrMode::Backtrack(InputError::new("&H2", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::alphanumeric1; +/// assert_eq!(alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new("21cZ%1")), Ok((Partial::new("%1"), "21cZ"))); +/// assert_eq!(alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new("&H2")), Err(ErrMode::Backtrack(InputError::new(Partial::new("&H2"), ErrorKind::Slice)))); +/// assert_eq!(alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn alphanumeric1<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar, +{ + trace("alphanumeric1", take_while(1.., AsChar::is_alphanum)).parse_next(input) +} + +/// Recognizes zero or more spaces and tabs. +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non space +/// character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::space0; +/// assert_eq!(space0::<_, InputError<_>>.parse_peek(Partial::new(" \t21c")), Ok((Partial::new("21c"), " \t"))); +/// assert_eq!(space0::<_, InputError<_>>.parse_peek(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); +/// assert_eq!(space0::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn space0<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, +{ + trace("space0", take_while(0.., AsChar::is_space)).parse_next(input) +} + +/// Recognizes zero or more spaces and tabs. +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non space +/// character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::ascii::space1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// space1.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek(" \t21c"), Ok(("21c", " \t"))); +/// assert_eq!(parser.parse_peek("H2"), Err(ErrMode::Backtrack(InputError::new("H2", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::space1; +/// assert_eq!(space1::<_, InputError<_>>.parse_peek(Partial::new(" \t21c")), Ok((Partial::new("21c"), " \t"))); +/// assert_eq!(space1::<_, InputError<_>>.parse_peek(Partial::new("H2")), Err(ErrMode::Backtrack(InputError::new(Partial::new("H2"), ErrorKind::Slice)))); +/// assert_eq!(space1::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn space1<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, +{ + trace("space1", take_while(1.., AsChar::is_space)).parse_next(input) +} + +/// Recognizes zero or more spaces, tabs, carriage returns and line feeds. +/// +/// *Complete version*: will return the whole input if no terminating token is found (a non space +/// character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::ascii::multispace0; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// multispace0.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); +/// assert_eq!(parser.parse_peek("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::multispace0; +/// assert_eq!(multispace0::<_, InputError<_>>.parse_peek(Partial::new(" \t\n\r21c")), Ok((Partial::new("21c"), " \t\n\r"))); +/// assert_eq!(multispace0::<_, InputError<_>>.parse_peek(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); +/// assert_eq!(multispace0::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn multispace0<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, +{ + trace("multispace0", take_while(0.., (' ', '\t', '\r', '\n'))).parse_next(input) +} + +/// Recognizes one or more spaces, tabs, carriage returns and line feeds. +/// +/// *Complete version*: will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non space character). +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// +/// # Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::ascii::multispace1; +/// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// multispace1.parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); +/// assert_eq!(parser.parse_peek("H2"), Err(ErrMode::Backtrack(InputError::new("H2", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::ascii::multispace1; +/// assert_eq!(multispace1::<_, InputError<_>>.parse_peek(Partial::new(" \t\n\r21c")), Ok((Partial::new("21c"), " \t\n\r"))); +/// assert_eq!(multispace1::<_, InputError<_>>.parse_peek(Partial::new("H2")), Err(ErrMode::Backtrack(InputError::new(Partial::new("H2"), ErrorKind::Slice)))); +/// assert_eq!(multispace1::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn multispace1<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, +{ + trace("multispace1", take_while(1.., (' ', '\t', '\r', '\n'))).parse_next(input) +} + +/// Decode a decimal unsigned integer (e.g. [`u32`]) +/// +/// *Complete version*: can parse until the end of input. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +#[doc(alias = "u8")] +#[doc(alias = "u16")] +#[doc(alias = "u32")] +#[doc(alias = "u64")] +#[doc(alias = "u128")] +pub fn dec_uint<I, O, E: ParserError<I>>(input: &mut I) -> PResult<O, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, + O: Uint, +{ + trace("dec_uint", move |input: &mut I| { + if input.eof_offset() == 0 { + if <I as StreamIsPartial>::is_partial_supported() && input.is_partial() { + return Err(ErrMode::Incomplete(Needed::new(1))); + } else { + return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); + } + } + + let mut value = O::default(); + for (offset, c) in input.iter_offsets() { + match c.as_char().to_digit(10) { + Some(d) => match value.checked_mul(10, sealed::SealedMarker).and_then(|v| { + let d = d as u8; + v.checked_add(d, sealed::SealedMarker) + }) { + None => return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)), + Some(v) => value = v, + }, + None => { + if offset == 0 { + return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); + } else { + let _ = input.next_slice(offset); + return Ok(value); + } + } + } + } + + if <I as StreamIsPartial>::is_partial_supported() && input.is_partial() { + Err(ErrMode::Incomplete(Needed::new(1))) + } else { + let _ = input.finish(); + Ok(value) + } + }) + .parse_next(input) +} + +/// Metadata for parsing unsigned integers, see [`dec_uint`] +pub trait Uint: Default { + #[doc(hidden)] + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self>; + #[doc(hidden)] + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self>; +} + +impl Uint for u8 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +impl Uint for u16 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +impl Uint for u32 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +impl Uint for u64 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +impl Uint for u128 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +/// Deprecated since v0.5.17 +impl Uint for i8 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +/// Deprecated since v0.5.17 +impl Uint for i16 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +/// Deprecated since v0.5.17 +impl Uint for i32 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +/// Deprecated since v0.5.17 +impl Uint for i64 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +/// Deprecated since v0.5.17 +impl Uint for i128 { + fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_mul(by as Self) + } + fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_add(by as Self) + } +} + +/// Decode a decimal signed integer (e.g. [`i32`]) +/// +/// *Complete version*: can parse until the end of input. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +#[doc(alias = "i8")] +#[doc(alias = "i16")] +#[doc(alias = "i32")] +#[doc(alias = "i64")] +#[doc(alias = "i128")] +pub fn dec_int<I, O, E: ParserError<I>>(input: &mut I) -> PResult<O, E> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, + O: Int, +{ + trace("dec_int", move |input: &mut I| { + fn sign(token: impl AsChar) -> bool { + let token = token.as_char(); + token == '+' || token == '-' + } + let sign = opt(crate::token::one_of(sign).map(AsChar::as_char)) + .map(|c| c != Some('-')) + .parse_next(input)?; + + if input.eof_offset() == 0 { + if <I as StreamIsPartial>::is_partial_supported() && input.is_partial() { + return Err(ErrMode::Incomplete(Needed::new(1))); + } else { + return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); + } + } + + let mut value = O::default(); + for (offset, c) in input.iter_offsets() { + match c.as_char().to_digit(10) { + Some(d) => match value.checked_mul(10, sealed::SealedMarker).and_then(|v| { + let d = d as u8; + if sign { + v.checked_add(d, sealed::SealedMarker) + } else { + v.checked_sub(d, sealed::SealedMarker) + } + }) { + None => return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)), + Some(v) => value = v, + }, + None => { + if offset == 0 { + return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); + } else { + let _ = input.next_slice(offset); + return Ok(value); + } + } + } + } + + if <I as StreamIsPartial>::is_partial_supported() && input.is_partial() { + Err(ErrMode::Incomplete(Needed::new(1))) + } else { + let _ = input.finish(); + Ok(value) + } + }) + .parse_next(input) +} + +/// Metadata for parsing signed integers, see [`dec_int`] +pub trait Int: Uint { + #[doc(hidden)] + fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self>; +} + +impl Int for i8 { + fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_sub(by as Self) + } +} + +impl Int for i16 { + fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_sub(by as Self) + } +} + +impl Int for i32 { + fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_sub(by as Self) + } +} + +impl Int for i64 { + fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_sub(by as Self) + } +} + +impl Int for i128 { + fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { + self.checked_sub(by as Self) + } +} + +/// Decode a variable-width hexadecimal integer (e.g. [`u32`]) +/// +/// *Complete version*: Will parse until the end of input if it has fewer characters than the type +/// supports. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if end-of-input +/// is hit before a hard boundary (non-hex character, more characters than supported). +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// use winnow::ascii::hex_uint; +/// +/// fn parser<'s>(s: &mut &'s [u8]) -> PResult<u32, InputError<&'s [u8]>> { +/// hex_uint(s) +/// } +/// +/// assert_eq!(parser.parse_peek(&b"01AE"[..]), Ok((&b""[..], 0x01AE))); +/// assert_eq!(parser.parse_peek(&b"abc"[..]), Ok((&b""[..], 0x0ABC))); +/// assert_eq!(parser.parse_peek(&b"ggg"[..]), Err(ErrMode::Backtrack(InputError::new(&b"ggg"[..], ErrorKind::Slice)))); +/// ``` +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// use winnow::ascii::hex_uint; +/// +/// fn parser<'s>(s: &mut Partial<&'s [u8]>) -> PResult<u32, InputError<Partial<&'s [u8]>>> { +/// hex_uint(s) +/// } +/// +/// assert_eq!(parser.parse_peek(Partial::new(&b"01AE;"[..])), Ok((Partial::new(&b";"[..]), 0x01AE))); +/// assert_eq!(parser.parse_peek(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(parser.parse_peek(Partial::new(&b"ggg"[..])), Err(ErrMode::Backtrack(InputError::new(Partial::new(&b"ggg"[..]), ErrorKind::Slice)))); +/// ``` +#[inline] +pub fn hex_uint<I, O, E: ParserError<I>>(input: &mut I) -> PResult<O, E> +where + I: StreamIsPartial, + I: Stream, + O: HexUint, + <I as Stream>::Token: AsChar, + <I as Stream>::Slice: AsBStr, +{ + trace("hex_uint", move |input: &mut I| { + let invalid_offset = input + .offset_for(|c| { + let c = c.as_char(); + !"0123456789abcdefABCDEF".contains(c) + }) + .unwrap_or_else(|| input.eof_offset()); + let max_nibbles = O::max_nibbles(sealed::SealedMarker); + let max_offset = input.offset_at(max_nibbles); + let offset = match max_offset { + Ok(max_offset) => { + if max_offset < invalid_offset { + // Overflow + return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); + } else { + invalid_offset + } + } + Err(_) => { + if <I as StreamIsPartial>::is_partial_supported() + && input.is_partial() + && invalid_offset == input.eof_offset() + { + // Only the next byte is guaranteed required + return Err(ErrMode::Incomplete(Needed::new(1))); + } else { + invalid_offset + } + } + }; + if offset == 0 { + // Must be at least one digit + return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); + } + let parsed = input.next_slice(offset); + + let mut res = O::default(); + for c in parsed.as_bstr() { + let nibble = *c as char; + let nibble = nibble.to_digit(16).unwrap_or(0) as u8; + let nibble = O::from(nibble); + res = (res << O::from(4)) + nibble; + } + + Ok(res) + }) + .parse_next(input) +} + +/// Metadata for parsing hex numbers, see [`hex_uint`] +pub trait HexUint: + Default + Shl<Self, Output = Self> + Add<Self, Output = Self> + From<u8> +{ + #[doc(hidden)] + fn max_nibbles(_: sealed::SealedMarker) -> usize; +} + +impl HexUint for u8 { + #[inline(always)] + fn max_nibbles(_: sealed::SealedMarker) -> usize { + 2 + } +} + +impl HexUint for u16 { + #[inline(always)] + fn max_nibbles(_: sealed::SealedMarker) -> usize { + 4 + } +} + +impl HexUint for u32 { + #[inline(always)] + fn max_nibbles(_: sealed::SealedMarker) -> usize { + 8 + } +} + +impl HexUint for u64 { + #[inline(always)] + fn max_nibbles(_: sealed::SealedMarker) -> usize { + 16 + } +} + +impl HexUint for u128 { + #[inline(always)] + fn max_nibbles(_: sealed::SealedMarker) -> usize { + 32 + } +} + +/// Recognizes floating point number in text format and returns a [`f32`] or [`f64`]. +/// +/// *Complete version*: Can parse until the end of input. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there is not enough data. +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::error::Needed::Size; +/// use winnow::ascii::float; +/// +/// fn parser<'s>(s: &mut &'s str) -> PResult<f64, InputError<&'s str>> { +/// float(s) +/// } +/// +/// assert_eq!(parser.parse_peek("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser.parse_peek("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser.parse_peek("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser.parse_peek("abc"), Err(ErrMode::Backtrack(InputError::new("abc", ErrorKind::Tag)))); +/// ``` +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::error::Needed::Size; +/// # use winnow::Partial; +/// use winnow::ascii::float; +/// +/// fn parser<'s>(s: &mut Partial<&'s str>) -> PResult<f64, InputError<Partial<&'s str>>> { +/// float(s) +/// } +/// +/// assert_eq!(parser.parse_peek(Partial::new("11e-1 ")), Ok((Partial::new(" "), 1.1))); +/// assert_eq!(parser.parse_peek(Partial::new("11e-1")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(parser.parse_peek(Partial::new("123E-02")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(parser.parse_peek(Partial::new("123K-01")), Ok((Partial::new("K-01"), 123.0))); +/// assert_eq!(parser.parse_peek(Partial::new("abc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("abc"), ErrorKind::Tag)))); +/// ``` +#[inline(always)] +#[doc(alias = "f32")] +#[doc(alias = "double")] +#[allow(clippy::trait_duplication_in_bounds)] // HACK: clippy 1.64.0 bug +pub fn float<I, O, E: ParserError<I>>(input: &mut I) -> PResult<O, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, + <I as Stream>::Slice: ParseSlice<O>, + <I as Stream>::Token: AsChar + Clone, + <I as Stream>::IterOffsets: Clone, + I: AsBStr, +{ + trace("float", move |input: &mut I| { + let s = recognize_float_or_exceptions(input)?; + s.parse_slice() + .ok_or_else(|| ErrMode::from_error_kind(input, ErrorKind::Verify)) + }) + .parse_next(input) +} + +#[allow(clippy::trait_duplication_in_bounds)] // HACK: clippy 1.64.0 bug +#[allow(deprecated)] +fn recognize_float_or_exceptions<I, E: ParserError<I>>( + input: &mut I, +) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, + <I as Stream>::Token: AsChar + Clone, + <I as Stream>::IterOffsets: Clone, + I: AsBStr, +{ + alt(( + recognize_float, + crate::token::tag_no_case("nan"), + ( + opt(one_of(['+', '-'])), + crate::token::tag_no_case("infinity"), + ) + .recognize(), + (opt(one_of(['+', '-'])), crate::token::tag_no_case("inf")).recognize(), + )) + .parse_next(input) +} + +#[allow(clippy::trait_duplication_in_bounds)] // HACK: clippy 1.64.0 bug +fn recognize_float<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, + <I as Stream>::Token: AsChar + Clone, + <I as Stream>::IterOffsets: Clone, + I: AsBStr, +{ + ( + opt(one_of(['+', '-'])), + alt(( + (digit1, opt(('.', opt(digit1)))).map(|_| ()), + ('.', digit1).map(|_| ()), + )), + opt((one_of(['e', 'E']), opt(one_of(['+', '-'])), cut_err(digit1))), + ) + .recognize() + .parse_next(input) +} + +/// Matches a byte string with escaped characters. +/// +/// * The first argument matches the normal characters (it must not accept the control character) +/// * The second argument is the control character (like `\` in most languages) +/// * The third argument matches the escaped characters +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed, IResult}; +/// # use winnow::ascii::digit1; +/// # use winnow::prelude::*; +/// use winnow::ascii::escaped; +/// use winnow::token::one_of; +/// +/// fn esc(s: &str) -> IResult<&str, &str> { +/// escaped(digit1, '\\', one_of(['"', 'n', '\\'])).parse_peek(s) +/// } +/// +/// assert_eq!(esc("123;"), Ok((";", "123"))); +/// assert_eq!(esc(r#"12\"34;"#), Ok((";", r#"12\"34"#))); +/// ``` +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed, IResult}; +/// # use winnow::ascii::digit1; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::ascii::escaped; +/// use winnow::token::one_of; +/// +/// fn esc(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// escaped(digit1, '\\', one_of(['"', 'n', '\\'])).parse_peek(s) +/// } +/// +/// assert_eq!(esc(Partial::new("123;")), Ok((Partial::new(";"), "123"))); +/// assert_eq!(esc(Partial::new("12\\\"34;")), Ok((Partial::new(";"), "12\\\"34"))); +/// ``` +#[inline(always)] +pub fn escaped<'a, I: 'a, Error, F, G, O1, O2>( + mut normal: F, + control_char: char, + mut escapable: G, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, + F: Parser<I, O1, Error>, + G: Parser<I, O2, Error>, + Error: ParserError<I>, +{ + trace("escaped", move |input: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() && input.is_partial() { + streaming_escaped_internal(input, &mut normal, control_char, &mut escapable) + } else { + complete_escaped_internal(input, &mut normal, control_char, &mut escapable) + } + }) +} + +fn streaming_escaped_internal<I, Error, F, G, O1, O2>( + input: &mut I, + normal: &mut F, + control_char: char, + escapable: &mut G, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: AsChar + Clone, + F: Parser<I, O1, Error>, + G: Parser<I, O2, Error>, + Error: ParserError<I>, +{ + let start = input.checkpoint(); + + while input.eof_offset() > 0 { + let current_len = input.eof_offset(); + + match opt(normal.by_ref()).parse_next(input)? { + Some(_) => { + if input.eof_offset() == current_len { + let offset = input.offset_from(&start); + input.reset(start); + return Ok(input.next_slice(offset)); + } + } + None => { + if opt(control_char).parse_next(input)?.is_some() { + let _ = escapable.parse_next(input)?; + } else { + let offset = input.offset_from(&start); + input.reset(start); + return Ok(input.next_slice(offset)); + } + } + } + } + + Err(ErrMode::Incomplete(Needed::Unknown)) +} + +fn complete_escaped_internal<'a, I: 'a, Error, F, G, O1, O2>( + input: &mut I, + normal: &mut F, + control_char: char, + escapable: &mut G, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: crate::stream::AsChar + Clone, + F: Parser<I, O1, Error>, + G: Parser<I, O2, Error>, + Error: ParserError<I>, +{ + let start = input.checkpoint(); + + while input.eof_offset() > 0 { + let current_len = input.eof_offset(); + + match opt(normal.by_ref()).parse_next(input)? { + Some(_) => { + if input.eof_offset() == current_len { + let offset = input.offset_from(&start); + input.reset(start); + return Ok(input.next_slice(offset)); + } + } + None => { + if opt(control_char).parse_next(input)?.is_some() { + let _ = escapable.parse_next(input)?; + } else { + let offset = input.offset_from(&start); + input.reset(start); + return Ok(input.next_slice(offset)); + } + } + } + } + + input.reset(start); + Ok(input.finish()) +} + +/// Matches a byte string with escaped characters. +/// +/// * The first argument matches the normal characters (it must not match the control character) +/// * The second argument is the control character (like `\` in most languages) +/// * The third argument matches the escaped characters and transforms them +/// +/// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character) +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use std::str::from_utf8; +/// use winnow::token::tag; +/// use winnow::ascii::escaped_transform; +/// use winnow::ascii::alpha1; +/// use winnow::combinator::alt; +/// +/// fn parser<'s>(input: &mut &'s str) -> PResult<String, InputError<&'s str>> { +/// escaped_transform( +/// alpha1, +/// '\\', +/// alt(( +/// "\\".value("\\"), +/// "\"".value("\""), +/// "n".value("\n"), +/// )) +/// ).parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek("ab\\\"cd"), Ok(("", String::from("ab\"cd")))); +/// assert_eq!(parser.parse_peek("ab\\ncd"), Ok(("", String::from("ab\ncd")))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use std::str::from_utf8; +/// # use winnow::Partial; +/// use winnow::token::tag; +/// use winnow::ascii::escaped_transform; +/// use winnow::ascii::alpha1; +/// use winnow::combinator::alt; +/// +/// fn parser<'s>(input: &mut Partial<&'s str>) -> PResult<String, InputError<Partial<&'s str>>> { +/// escaped_transform( +/// alpha1, +/// '\\', +/// alt(( +/// "\\".value("\\"), +/// "\"".value("\""), +/// "n".value("\n"), +/// )) +/// ).parse_next(input) +/// } +/// +/// assert_eq!(parser.parse_peek(Partial::new("ab\\\"cd\"")), Ok((Partial::new("\""), String::from("ab\"cd")))); +/// ``` +#[inline(always)] +pub fn escaped_transform<I, Error, F, G, Output>( + mut normal: F, + control_char: char, + mut transform: G, +) -> impl Parser<I, Output, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: crate::stream::AsChar + Clone, + Output: crate::stream::Accumulate<<I as Stream>::Slice>, + F: Parser<I, <I as Stream>::Slice, Error>, + G: Parser<I, <I as Stream>::Slice, Error>, + Error: ParserError<I>, +{ + trace("escaped_transform", move |input: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() && input.is_partial() { + streaming_escaped_transform_internal(input, &mut normal, control_char, &mut transform) + } else { + complete_escaped_transform_internal(input, &mut normal, control_char, &mut transform) + } + }) +} + +fn streaming_escaped_transform_internal<I, Error, F, G, Output>( + input: &mut I, + normal: &mut F, + control_char: char, + transform: &mut G, +) -> PResult<Output, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: crate::stream::AsChar + Clone, + Output: crate::stream::Accumulate<<I as Stream>::Slice>, + F: Parser<I, <I as Stream>::Slice, Error>, + G: Parser<I, <I as Stream>::Slice, Error>, + Error: ParserError<I>, +{ + let mut res = Output::initial(Some(input.eof_offset())); + + while input.eof_offset() > 0 { + let current_len = input.eof_offset(); + match opt(normal.by_ref()).parse_next(input)? { + Some(o) => { + res.accumulate(o); + if input.eof_offset() == current_len { + return Ok(res); + } + } + None => { + if opt(control_char).parse_next(input)?.is_some() { + let o = transform.parse_next(input)?; + res.accumulate(o); + } else { + return Ok(res); + } + } + } + } + Err(ErrMode::Incomplete(Needed::Unknown)) +} + +fn complete_escaped_transform_internal<I, Error, F, G, Output>( + input: &mut I, + normal: &mut F, + control_char: char, + transform: &mut G, +) -> PResult<Output, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: crate::stream::AsChar + Clone, + Output: crate::stream::Accumulate<<I as Stream>::Slice>, + F: Parser<I, <I as Stream>::Slice, Error>, + G: Parser<I, <I as Stream>::Slice, Error>, + Error: ParserError<I>, +{ + let mut res = Output::initial(Some(input.eof_offset())); + + while input.eof_offset() > 0 { + let current_len = input.eof_offset(); + + match opt(normal.by_ref()).parse_next(input)? { + Some(o) => { + res.accumulate(o); + if input.eof_offset() == current_len { + return Ok(res); + } + } + None => { + if opt(control_char).parse_next(input)?.is_some() { + let o = transform.parse_next(input)?; + res.accumulate(o); + } else { + return Ok(res); + } + } + } + } + Ok(res) +} + +mod sealed { + pub struct SealedMarker; +} diff --git a/vendor/winnow/src/character/tests.rs b/vendor/winnow/src/ascii/tests.rs similarity index 61% rename from vendor/winnow/src/character/tests.rs rename to vendor/winnow/src/ascii/tests.rs index 95a63c3..4f4c7f6 100644 --- a/vendor/winnow/src/character/tests.rs +++ b/vendor/winnow/src/ascii/tests.rs @@ -1,22 +1,23 @@ use super::*; +use crate::prelude::*; mod complete { use super::*; - use crate::branch::alt; - use crate::bytes::none_of; - use crate::bytes::one_of; + use crate::combinator::alt; use crate::combinator::opt; use crate::error::ErrMode; - use crate::error::Error; use crate::error::ErrorKind; + use crate::error::InputError; use crate::stream::ParseSlice; + use crate::token::none_of; + use crate::token::one_of; #[cfg(feature = "alloc")] use crate::{lib::std::string::String, lib::std::vec::Vec}; use proptest::prelude::*; macro_rules! assert_parse( ($left: expr, $right: expr) => { - let res: $crate::IResult<_, _, Error<_>> = $left; + let res: $crate::IResult<_, _, InputError<_>> = $left; assert_eq!(res, $right); }; ); @@ -30,60 +31,75 @@ mod complete { let d: &[u8] = "azé12".as_bytes(); let e: &[u8] = b" "; let f: &[u8] = b" ;"; - //assert_eq!(alpha1::<_, Error>(a), Err(ErrMode::Incomplete(Needed::Size(1)))); - assert_parse!(alpha1(a), Ok((empty, a))); + //assert_eq!(alpha1::<_, InputError>(a), Err(ErrMode::Incomplete(Needed::Size(1)))); + assert_parse!(alpha1.parse_peek(a), Ok((empty, a))); assert_eq!( - alpha1(b), - Err(ErrMode::Backtrack(Error::new(b, ErrorKind::Slice))) + alpha1.parse_peek(b), + Err(ErrMode::Backtrack(InputError::new(b, ErrorKind::Slice))) ); - assert_eq!(alpha1::<_, Error<_>>(c), Ok((&c[1..], &b"a"[..]))); - assert_eq!(alpha1::<_, Error<_>>(d), Ok(("é12".as_bytes(), &b"az"[..]))); assert_eq!( - digit1(a), - Err(ErrMode::Backtrack(Error::new(a, ErrorKind::Slice))) + alpha1::<_, InputError<_>>.parse_peek(c), + Ok((&c[1..], &b"a"[..])) ); - assert_eq!(digit1::<_, Error<_>>(b), Ok((empty, b))); assert_eq!( - digit1(c), - Err(ErrMode::Backtrack(Error::new(c, ErrorKind::Slice))) + alpha1::<_, InputError<_>>.parse_peek(d), + Ok(("é12".as_bytes(), &b"az"[..])) + ); + assert_eq!( + digit1.parse_peek(a), + Err(ErrMode::Backtrack(InputError::new(a, ErrorKind::Slice))) ); + assert_eq!(digit1::<_, InputError<_>>.parse_peek(b), Ok((empty, b))); assert_eq!( - digit1(d), - Err(ErrMode::Backtrack(Error::new(d, ErrorKind::Slice))) + digit1.parse_peek(c), + Err(ErrMode::Backtrack(InputError::new(c, ErrorKind::Slice))) ); - assert_eq!(hex_digit1::<_, Error<_>>(a), Ok((empty, a))); - assert_eq!(hex_digit1::<_, Error<_>>(b), Ok((empty, b))); - assert_eq!(hex_digit1::<_, Error<_>>(c), Ok((empty, c))); assert_eq!( - hex_digit1::<_, Error<_>>(d), + digit1.parse_peek(d), + Err(ErrMode::Backtrack(InputError::new(d, ErrorKind::Slice))) + ); + assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(a), Ok((empty, a))); + assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(b), Ok((empty, b))); + assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(c), Ok((empty, c))); + assert_eq!( + hex_digit1::<_, InputError<_>>.parse_peek(d), Ok(("zé12".as_bytes(), &b"a"[..])) ); assert_eq!( - hex_digit1(e), - Err(ErrMode::Backtrack(Error::new(e, ErrorKind::Slice))) + hex_digit1.parse_peek(e), + Err(ErrMode::Backtrack(InputError::new(e, ErrorKind::Slice))) ); assert_eq!( - oct_digit1(a), - Err(ErrMode::Backtrack(Error::new(a, ErrorKind::Slice))) + oct_digit1.parse_peek(a), + Err(ErrMode::Backtrack(InputError::new(a, ErrorKind::Slice))) ); - assert_eq!(oct_digit1::<_, Error<_>>(b), Ok((empty, b))); + assert_eq!(oct_digit1::<_, InputError<_>>.parse_peek(b), Ok((empty, b))); assert_eq!( - oct_digit1(c), - Err(ErrMode::Backtrack(Error::new(c, ErrorKind::Slice))) + oct_digit1.parse_peek(c), + Err(ErrMode::Backtrack(InputError::new(c, ErrorKind::Slice))) ); assert_eq!( - oct_digit1(d), - Err(ErrMode::Backtrack(Error::new(d, ErrorKind::Slice))) + oct_digit1.parse_peek(d), + Err(ErrMode::Backtrack(InputError::new(d, ErrorKind::Slice))) + ); + assert_eq!( + alphanumeric1::<_, InputError<_>>.parse_peek(a), + Ok((empty, a)) ); - assert_eq!(alphanumeric1::<_, Error<_>>(a), Ok((empty, a))); //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b))); - assert_eq!(alphanumeric1::<_, Error<_>>(c), Ok((empty, c))); assert_eq!( - alphanumeric1::<_, Error<_>>(d), + alphanumeric1::<_, InputError<_>>.parse_peek(c), + Ok((empty, c)) + ); + assert_eq!( + alphanumeric1::<_, InputError<_>>.parse_peek(d), Ok(("é12".as_bytes(), &b"az"[..])) ); - assert_eq!(space1::<_, Error<_>>(e), Ok((empty, e))); - assert_eq!(space1::<_, Error<_>>(f), Ok((&b";"[..], &b" "[..]))); + assert_eq!(space1::<_, InputError<_>>.parse_peek(e), Ok((empty, e))); + assert_eq!( + space1::<_, InputError<_>>.parse_peek(f), + Ok((&b";"[..], &b" "[..])) + ); } #[cfg(feature = "alloc")] @@ -95,52 +111,64 @@ mod complete { let c = "a123"; let d = "azé12"; let e = " "; - assert_eq!(alpha1::<_, Error<_>>(a), Ok((empty, a))); + assert_eq!(alpha1::<_, InputError<_>>.parse_peek(a), Ok((empty, a))); assert_eq!( - alpha1(b), - Err(ErrMode::Backtrack(Error::new(b, ErrorKind::Slice))) + alpha1.parse_peek(b), + Err(ErrMode::Backtrack(InputError::new(b, ErrorKind::Slice))) ); - assert_eq!(alpha1::<_, Error<_>>(c), Ok((&c[1..], "a"))); - assert_eq!(alpha1::<_, Error<_>>(d), Ok(("é12", "az"))); + assert_eq!(alpha1::<_, InputError<_>>.parse_peek(c), Ok((&c[1..], "a"))); + assert_eq!(alpha1::<_, InputError<_>>.parse_peek(d), Ok(("é12", "az"))); assert_eq!( - digit1(a), - Err(ErrMode::Backtrack(Error::new(a, ErrorKind::Slice))) + digit1.parse_peek(a), + Err(ErrMode::Backtrack(InputError::new(a, ErrorKind::Slice))) ); - assert_eq!(digit1::<_, Error<_>>(b), Ok((empty, b))); + assert_eq!(digit1::<_, InputError<_>>.parse_peek(b), Ok((empty, b))); assert_eq!( - digit1(c), - Err(ErrMode::Backtrack(Error::new(c, ErrorKind::Slice))) + digit1.parse_peek(c), + Err(ErrMode::Backtrack(InputError::new(c, ErrorKind::Slice))) ); assert_eq!( - digit1(d), - Err(ErrMode::Backtrack(Error::new(d, ErrorKind::Slice))) + digit1.parse_peek(d), + Err(ErrMode::Backtrack(InputError::new(d, ErrorKind::Slice))) ); - assert_eq!(hex_digit1::<_, Error<_>>(a), Ok((empty, a))); - assert_eq!(hex_digit1::<_, Error<_>>(b), Ok((empty, b))); - assert_eq!(hex_digit1::<_, Error<_>>(c), Ok((empty, c))); - assert_eq!(hex_digit1::<_, Error<_>>(d), Ok(("zé12", "a"))); + assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(a), Ok((empty, a))); + assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(b), Ok((empty, b))); + assert_eq!(hex_digit1::<_, InputError<_>>.parse_peek(c), Ok((empty, c))); assert_eq!( - hex_digit1(e), - Err(ErrMode::Backtrack(Error::new(e, ErrorKind::Slice))) + hex_digit1::<_, InputError<_>>.parse_peek(d), + Ok(("zé12", "a")) ); assert_eq!( - oct_digit1(a), - Err(ErrMode::Backtrack(Error::new(a, ErrorKind::Slice))) + hex_digit1.parse_peek(e), + Err(ErrMode::Backtrack(InputError::new(e, ErrorKind::Slice))) ); - assert_eq!(oct_digit1::<_, Error<_>>(b), Ok((empty, b))); assert_eq!( - oct_digit1(c), - Err(ErrMode::Backtrack(Error::new(c, ErrorKind::Slice))) + oct_digit1.parse_peek(a), + Err(ErrMode::Backtrack(InputError::new(a, ErrorKind::Slice))) ); + assert_eq!(oct_digit1::<_, InputError<_>>.parse_peek(b), Ok((empty, b))); assert_eq!( - oct_digit1(d), - Err(ErrMode::Backtrack(Error::new(d, ErrorKind::Slice))) + oct_digit1.parse_peek(c), + Err(ErrMode::Backtrack(InputError::new(c, ErrorKind::Slice))) + ); + assert_eq!( + oct_digit1.parse_peek(d), + Err(ErrMode::Backtrack(InputError::new(d, ErrorKind::Slice))) + ); + assert_eq!( + alphanumeric1::<_, InputError<_>>.parse_peek(a), + Ok((empty, a)) ); - assert_eq!(alphanumeric1::<_, Error<_>>(a), Ok((empty, a))); //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b))); - assert_eq!(alphanumeric1::<_, Error<_>>(c), Ok((empty, c))); - assert_eq!(alphanumeric1::<_, Error<_>>(d), Ok(("é12", "az"))); - assert_eq!(space1::<_, Error<_>>(e), Ok((empty, e))); + assert_eq!( + alphanumeric1::<_, InputError<_>>.parse_peek(c), + Ok((empty, c)) + ); + assert_eq!( + alphanumeric1::<_, InputError<_>>.parse_peek(d), + Ok(("é12", "az")) + ); + assert_eq!(space1::<_, InputError<_>>.parse_peek(e), Ok((empty, e))); } use crate::stream::Offset; @@ -153,45 +181,45 @@ mod complete { let e = &b" \t\r\n;"[..]; let f = &b"123abcDEF;"[..]; - match alpha1::<_, Error<_>>(a) { + match alpha1::<_, InputError<_>>.parse_peek(a) { Ok((i, _)) => { - assert_eq!(a.offset_to(i) + i.len(), a.len()); + assert_eq!(i.offset_from(&a) + i.len(), a.len()); } _ => panic!("wrong return type in offset test for alpha"), } - match digit1::<_, Error<_>>(b) { + match digit1::<_, InputError<_>>.parse_peek(b) { Ok((i, _)) => { - assert_eq!(b.offset_to(i) + i.len(), b.len()); + assert_eq!(i.offset_from(&b) + i.len(), b.len()); } _ => panic!("wrong return type in offset test for digit"), } - match alphanumeric1::<_, Error<_>>(c) { + match alphanumeric1::<_, InputError<_>>.parse_peek(c) { Ok((i, _)) => { - assert_eq!(c.offset_to(i) + i.len(), c.len()); + assert_eq!(i.offset_from(&c) + i.len(), c.len()); } _ => panic!("wrong return type in offset test for alphanumeric"), } - match space1::<_, Error<_>>(d) { + match space1::<_, InputError<_>>.parse_peek(d) { Ok((i, _)) => { - assert_eq!(d.offset_to(i) + i.len(), d.len()); + assert_eq!(i.offset_from(&d) + i.len(), d.len()); } _ => panic!("wrong return type in offset test for space"), } - match multispace1::<_, Error<_>>(e) { + match multispace1::<_, InputError<_>>.parse_peek(e) { Ok((i, _)) => { - assert_eq!(e.offset_to(i) + i.len(), e.len()); + assert_eq!(i.offset_from(&e) + i.len(), e.len()); } _ => panic!("wrong return type in offset test for multispace"), } - match hex_digit1::<_, Error<_>>(f) { + match hex_digit1::<_, InputError<_>>.parse_peek(f) { Ok((i, _)) => { - assert_eq!(f.offset_to(i) + i.len(), f.len()); + assert_eq!(i.offset_from(&f) + i.len(), f.len()); } _ => panic!("wrong return type in offset test for hex_digit"), } - match oct_digit1::<_, Error<_>>(f) { + match oct_digit1::<_, InputError<_>>.parse_peek(f) { Ok((i, _)) => { - assert_eq!(f.offset_to(i) + i.len(), f.len()); + assert_eq!(i.offset_from(&f) + i.len(), f.len()); } _ => panic!("wrong return type in offset test for oct_digit"), } @@ -201,53 +229,62 @@ mod complete { fn is_not_line_ending_bytes() { let a: &[u8] = b"ab12cd\nefgh"; assert_eq!( - not_line_ending::<_, Error<_>>(a), + not_line_ending::<_, InputError<_>>.parse_peek(a), Ok((&b"\nefgh"[..], &b"ab12cd"[..])) ); let b: &[u8] = b"ab12cd\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, Error<_>>(b), + not_line_ending::<_, InputError<_>>.parse_peek(b), Ok((&b"\nefgh\nijkl"[..], &b"ab12cd"[..])) ); let c: &[u8] = b"ab12cd\r\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, Error<_>>(c), + not_line_ending::<_, InputError<_>>.parse_peek(c), Ok((&b"\r\nefgh\nijkl"[..], &b"ab12cd"[..])) ); let d: &[u8] = b"ab12cd"; - assert_eq!(not_line_ending::<_, Error<_>>(d), Ok((&[][..], d))); + assert_eq!( + not_line_ending::<_, InputError<_>>.parse_peek(d), + Ok((&[][..], d)) + ); } #[test] fn is_not_line_ending_str() { let f = "βèƒôřè\rÂßÇáƒƭèř"; assert_eq!( - not_line_ending(f), - Err(ErrMode::Backtrack(Error::new(f, ErrorKind::Tag))) + not_line_ending.parse_peek(f), + Err(ErrMode::Backtrack(InputError::new( + &f[12..], + ErrorKind::Tag + ))) ); let g2: &str = "ab12cd"; - assert_eq!(not_line_ending::<_, Error<_>>(g2), Ok(("", g2))); + assert_eq!( + not_line_ending::<_, InputError<_>>.parse_peek(g2), + Ok(("", g2)) + ); } #[test] fn hex_digit_test() { let i = &b"0123456789abcdefABCDEF;"[..]; - assert_parse!(hex_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + assert_parse!(hex_digit1.parse_peek(i), Ok((&b";"[..], &i[..i.len() - 1]))); let i = &b"g"[..]; assert_parse!( - hex_digit1(i), - Err(ErrMode::Backtrack(error_position!(i, ErrorKind::Slice))) + hex_digit1.parse_peek(i), + Err(ErrMode::Backtrack(error_position!(&i, ErrorKind::Slice))) ); let i = &b"G"[..]; assert_parse!( - hex_digit1(i), - Err(ErrMode::Backtrack(error_position!(i, ErrorKind::Slice))) + hex_digit1.parse_peek(i), + Err(ErrMode::Backtrack(error_position!(&i, ErrorKind::Slice))) ); assert!(AsChar::is_hex_digit(b'0')); @@ -267,12 +304,12 @@ mod complete { #[test] fn oct_digit_test() { let i = &b"01234567;"[..]; - assert_parse!(oct_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + assert_parse!(oct_digit1.parse_peek(i), Ok((&b";"[..], &i[..i.len() - 1]))); let i = &b"8"[..]; assert_parse!( - oct_digit1(i), - Err(ErrMode::Backtrack(error_position!(i, ErrorKind::Slice))) + oct_digit1.parse_peek(i), + Err(ErrMode::Backtrack(error_position!(&i, ErrorKind::Slice))) ); assert!(AsChar::is_oct_digit(b'0')); @@ -290,7 +327,7 @@ mod complete { #[test] fn full_line_windows() { fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_next(i) + (not_line_ending, line_ending).parse_peek(i) } let input = b"abc\r\n"; let output = take_full_line(input); @@ -300,7 +337,7 @@ mod complete { #[test] fn full_line_unix() { fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_next(i) + (not_line_ending, line_ending).parse_peek(i) } let input = b"abc\n"; let output = take_full_line(input); @@ -310,87 +347,96 @@ mod complete { #[test] fn check_windows_lineending() { let input = b"\r\n"; - let output = line_ending(&input[..]); + let output = line_ending.parse_peek(&input[..]); assert_parse!(output, Ok((&b""[..], &b"\r\n"[..]))); } #[test] fn check_unix_lineending() { let input = b"\n"; - let output = line_ending(&input[..]); + let output = line_ending.parse_peek(&input[..]); assert_parse!(output, Ok((&b""[..], &b"\n"[..]))); } #[test] fn cr_lf() { - assert_parse!(crlf(&b"\r\na"[..]), Ok((&b"a"[..], &b"\r\n"[..]))); assert_parse!( - crlf(&b"\r"[..]), + crlf.parse_peek(&b"\r\na"[..]), + Ok((&b"a"[..], &b"\r\n"[..])) + ); + assert_parse!( + crlf.parse_peek(&b"\r"[..]), Err(ErrMode::Backtrack(error_position!( - &b"\r"[..], + &&b"\r"[..], ErrorKind::Tag ))) ); assert_parse!( - crlf(&b"\ra"[..]), + crlf.parse_peek(&b"\ra"[..]), Err(ErrMode::Backtrack(error_position!( - &b"\ra"[..], + &&b"\ra"[..], ErrorKind::Tag ))) ); - assert_parse!(crlf("\r\na"), Ok(("a", "\r\n"))); + assert_parse!(crlf.parse_peek("\r\na"), Ok(("a", "\r\n"))); assert_parse!( - crlf("\r"), - Err(ErrMode::Backtrack(error_position!("\r", ErrorKind::Tag))) + crlf.parse_peek("\r"), + Err(ErrMode::Backtrack(error_position!(&"\r", ErrorKind::Tag))) ); assert_parse!( - crlf("\ra"), - Err(ErrMode::Backtrack(error_position!("\ra", ErrorKind::Tag))) + crlf.parse_peek("\ra"), + Err(ErrMode::Backtrack(error_position!(&"\ra", ErrorKind::Tag))) ); } #[test] fn end_of_line() { - assert_parse!(line_ending(&b"\na"[..]), Ok((&b"a"[..], &b"\n"[..]))); - assert_parse!(line_ending(&b"\r\na"[..]), Ok((&b"a"[..], &b"\r\n"[..]))); assert_parse!( - line_ending(&b"\r"[..]), + line_ending.parse_peek(&b"\na"[..]), + Ok((&b"a"[..], &b"\n"[..])) + ); + assert_parse!( + line_ending.parse_peek(&b"\r\na"[..]), + Ok((&b"a"[..], &b"\r\n"[..])) + ); + assert_parse!( + line_ending.parse_peek(&b"\r"[..]), Err(ErrMode::Backtrack(error_position!( - &b"\r"[..], + &&b"\r"[..], ErrorKind::Tag ))) ); assert_parse!( - line_ending(&b"\ra"[..]), + line_ending.parse_peek(&b"\ra"[..]), Err(ErrMode::Backtrack(error_position!( - &b"\ra"[..], + &&b"\ra"[..], ErrorKind::Tag ))) ); - assert_parse!(line_ending("\na"), Ok(("a", "\n"))); - assert_parse!(line_ending("\r\na"), Ok(("a", "\r\n"))); + assert_parse!(line_ending.parse_peek("\na"), Ok(("a", "\n"))); + assert_parse!(line_ending.parse_peek("\r\na"), Ok(("a", "\r\n"))); assert_parse!( - line_ending("\r"), - Err(ErrMode::Backtrack(error_position!("\r", ErrorKind::Tag))) + line_ending.parse_peek("\r"), + Err(ErrMode::Backtrack(error_position!(&"\r", ErrorKind::Tag))) ); assert_parse!( - line_ending("\ra"), - Err(ErrMode::Backtrack(error_position!("\ra", ErrorKind::Tag))) + line_ending.parse_peek("\ra"), + Err(ErrMode::Backtrack(error_position!(&"\ra", ErrorKind::Tag))) ); } fn digit_to_i16(input: &str) -> IResult<&str, i16> { let i = input; - let (i, opt_sign) = opt(alt(('+', '-'))).parse_next(i)?; + let (i, opt_sign) = opt(alt(('+', '-'))).parse_peek(i)?; let sign = match opt_sign { Some('+') | None => true, Some('-') => false, _ => unreachable!(), }; - let (i, s) = digit1::<_, crate::error::Error<_>>(i)?; + let (i, s) = digit1::<_, InputError<_>>.parse_peek(i)?; match s.parse_slice() { Some(n) => { if sign { @@ -399,15 +445,15 @@ mod complete { Ok((i, -n)) } } - None => Err(ErrMode::from_error_kind(i, ErrorKind::Verify)), + None => Err(ErrMode::from_error_kind(&i, ErrorKind::Verify)), } } fn digit_to_u32(i: &str) -> IResult<&str, u32> { - let (i, s) = digit1(i)?; + let (i, s) = digit1.parse_peek(i)?; match s.parse_slice() { Some(n) => Ok((i, n)), - None => Err(ErrMode::from_error_kind(i, ErrorKind::Verify)), + None => Err(ErrMode::from_error_kind(&i, ErrorKind::Verify)), } } @@ -416,7 +462,7 @@ mod complete { #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 fn ints(s in "\\PC*") { let res1 = digit_to_i16(&s); - let res2 = dec_int(s.as_str()); + let res2 = dec_int.parse_peek(s.as_str()); assert_eq!(res1, res2); } @@ -424,7 +470,7 @@ mod complete { #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 fn uints(s in "\\PC*") { let res1 = digit_to_u32(&s); - let res2 = dec_uint(s.as_str()); + let res2 = dec_uint.parse_peek(s.as_str()); assert_eq!(res1, res2); } } @@ -432,13 +478,13 @@ mod complete { #[test] fn hex_uint_tests() { fn hex_u32(input: &[u8]) -> IResult<&[u8], u32> { - hex_uint(input) + hex_uint.parse_peek(input) } assert_parse!( hex_u32(&b";"[..]), Err(ErrMode::Backtrack(error_position!( - &b";"[..], + &&b";"[..], ErrorKind::Slice ))) ); @@ -449,14 +495,14 @@ mod complete { assert_parse!( hex_u32(&b"00c5a31be2;"[..]), // overflow Err(ErrMode::Backtrack(error_position!( - &b"00c5a31be2;"[..], + &&b"00c5a31be2;"[..], ErrorKind::Verify ))) ); assert_parse!( hex_u32(&b"c5a31be201;"[..]), // overflow Err(ErrMode::Backtrack(error_position!( - &b"c5a31be201;"[..], + &&b"c5a31be201;"[..], ErrorKind::Verify ))) ); @@ -464,14 +510,14 @@ mod complete { assert_parse!( hex_u32(&b"ffffffffffffffff;"[..]), // overflow Err(ErrMode::Backtrack(error_position!( - &b"ffffffffffffffff;"[..], + &&b"ffffffffffffffff;"[..], ErrorKind::Verify ))) ); assert_parse!( hex_u32(&b"ffffffffffffffff"[..]), // overflow Err(ErrMode::Backtrack(error_position!( - &b"ffffffffffffffff"[..], + &&b"ffffffffffffffff"[..], ErrorKind::Verify ))) ); @@ -482,7 +528,7 @@ mod complete { #[test] #[cfg(feature = "std")] fn float_test() { - let mut test_cases = vec![ + let test_cases = [ "+3.14", "3.14", "-3.14", @@ -500,45 +546,71 @@ mod complete { "-1.234E-12", "-1.234e-12", "0.00000000000000000087", + "inf", + "Inf", + "infinity", + "Infinity", + "-inf", + "-Inf", + "-infinity", + "-Infinity", + "+inf", + "+Inf", + "+infinity", + "+Infinity", ]; - for test in test_cases.drain(..) { + for test in test_cases { let expected32 = str::parse::<f32>(test).unwrap(); let expected64 = str::parse::<f64>(test).unwrap(); println!("now parsing: {} -> {}", test, expected32); - let larger = test.to_string(); - - assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32))); - assert_parse!(float(&larger[..]), Ok(("", expected32))); - - assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected64))); - assert_parse!(float(&larger[..]), Ok(("", expected64))); + assert_parse!( + float.parse_peek(test.as_bytes()), + Ok((&b""[..], expected32)) + ); + assert_parse!(float.parse_peek(test), Ok(("", expected32))); + + assert_parse!( + float.parse_peek(test.as_bytes()), + Ok((&b""[..], expected64)) + ); + assert_parse!(float.parse_peek(test), Ok(("", expected64))); } let remaining_exponent = "-1.234E-"; assert_parse!( - float::<_, f64, _>(remaining_exponent), - Err(ErrMode::Cut(Error { - input: "", - kind: ErrorKind::Slice - })) + float::<_, f64, _>.parse_peek(remaining_exponent), + Err(ErrMode::Cut(InputError::new("", ErrorKind::Slice))) ); - let (_i, nan) = float::<_, f32, ()>("NaN").unwrap(); - assert!(nan.is_nan()); + let nan_test_cases = ["nan", "NaN", "NAN"]; - let (_i, inf) = float::<_, f32, ()>("inf").unwrap(); - assert!(inf.is_infinite()); - let (_i, inf) = float::<_, f32, ()>("infinite").unwrap(); - assert!(inf.is_infinite()); + for test in nan_test_cases { + println!("now parsing: {}", test); + + let (remaining, parsed) = float::<_, f32, ()>.parse_peek(test.as_bytes()).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); + + let (remaining, parsed) = float::<_, f32, ()>.parse_peek(test).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); + + let (remaining, parsed) = float::<_, f64, ()>.parse_peek(test.as_bytes()).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); + + let (remaining, parsed) = float::<_, f64, ()>.parse_peek(test).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); + } } #[cfg(feature = "std")] fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - #[allow(deprecated)] // will just become `pub(crate)` later - match super::recognize_float_or_exceptions(i) { + match super::recognize_float_or_exceptions.parse_peek(i) { Err(e) => Err(e), Ok((i, s)) => { if s.is_empty() { @@ -559,7 +631,7 @@ mod complete { fn floats(s in "\\PC*") { println!("testing {}", s); let res1 = parse_f64(&s); - let res2 = float::<_, f64, ()>(s.as_str()); + let res2 = float::<_, f64, ()>.parse_peek(s.as_str()); assert_eq!(res1, res2); } } @@ -569,9 +641,9 @@ mod complete { fn complete_escaped_hang() { // issue #1336 "escaped hangs if normal parser accepts empty" fn escaped_string(input: &str) -> IResult<&str, &str> { - use crate::bytes::one_of; - use crate::character::alpha0; - escaped(alpha0, '\\', one_of("n")).parse_next(input) + use crate::ascii::alpha0; + use crate::token::one_of; + escaped(alpha0, '\\', one_of(['n'])).parse_peek(input) } escaped_string("7").unwrap(); @@ -582,16 +654,20 @@ mod complete { fn complete_escaped_hang_1118() { // issue ##1118 escaped does not work with empty string fn unquote(input: &str) -> IResult<&str, &str> { - use crate::bytes::one_of; + use crate::combinator::delimited; use crate::combinator::opt; - use crate::sequence::delimited; + use crate::token::one_of; delimited( '"', - escaped(opt(none_of(r#"\""#)), '\\', one_of(r#"\"rnt"#)), + escaped( + opt(none_of(['\\', '"'])), + '\\', + one_of(['\\', '"', 'r', 'n', 't']), + ), '"', ) - .parse_next(input) + .parse_peek(input) } assert_eq!(unquote(r#""""#), Ok(("", ""))); @@ -601,11 +677,11 @@ mod complete { #[allow(unused_variables)] #[test] fn complete_escaping() { - use crate::bytes::one_of; - use crate::character::{alpha1 as alpha, digit1 as digit}; + use crate::ascii::{alpha1 as alpha, digit1 as digit}; + use crate::token::one_of; fn esc(i: &[u8]) -> IResult<&[u8], &[u8]> { - escaped(alpha, '\\', one_of("\"n\\")).parse_next(i) + escaped(alpha, '\\', one_of(['\"', 'n', '\\'])).parse_peek(i) } assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], &b"abcd"[..]))); assert_eq!(esc(&b"ab\\\"cd;"[..]), Ok((&b";"[..], &b"ab\\\"cd"[..]))); @@ -615,21 +691,21 @@ mod complete { assert_eq!( esc(&b"AB\\"[..]), Err(ErrMode::Backtrack(error_position!( - &b""[..], + &&b""[..], ErrorKind::Token ))) ); assert_eq!( esc(&b"AB\\A"[..]), Err(ErrMode::Backtrack(error_node_position!( - &b"AB\\A"[..], + &&b"AB\\A"[..], ErrorKind::Token, - error_position!(&b"A"[..], ErrorKind::Verify) + error_position!(&&b"A"[..], ErrorKind::Verify) ))) ); fn esc2(i: &[u8]) -> IResult<&[u8], &[u8]> { - escaped(digit, '\\', one_of("\"n\\")).parse_next(i) + escaped(digit, '\\', one_of(['\"', 'n', '\\'])).parse_peek(i) } assert_eq!(esc2(&b"12\\nnn34"[..]), Ok((&b"nn34"[..], &b"12\\n"[..]))); } @@ -637,11 +713,11 @@ mod complete { #[cfg(feature = "alloc")] #[test] fn complete_escaping_str() { - use crate::bytes::one_of; - use crate::character::{alpha1 as alpha, digit1 as digit}; + use crate::ascii::{alpha1 as alpha, digit1 as digit}; + use crate::token::one_of; fn esc(i: &str) -> IResult<&str, &str> { - escaped(alpha, '\\', one_of("\"n\\")).parse_next(i) + escaped(alpha, '\\', one_of(['\"', 'n', '\\'])).parse_peek(i) } assert_eq!(esc("abcd;"), Ok((";", "abcd"))); assert_eq!(esc("ab\\\"cd;"), Ok((";", "ab\\\"cd"))); @@ -650,24 +726,24 @@ mod complete { assert_eq!(esc("ab\\\"12"), Ok(("12", "ab\\\""))); assert_eq!( esc("AB\\"), - Err(ErrMode::Backtrack(error_position!("", ErrorKind::Token))) + Err(ErrMode::Backtrack(error_position!(&"", ErrorKind::Token))) ); assert_eq!( esc("AB\\A"), Err(ErrMode::Backtrack(error_node_position!( - "AB\\A", + &"AB\\A", ErrorKind::Token, - error_position!("A", ErrorKind::Verify) + error_position!(&"A", ErrorKind::Verify) ))) ); fn esc2(i: &str) -> IResult<&str, &str> { - escaped(digit, '\\', one_of("\"n\\")).parse_next(i) + escaped(digit, '\\', one_of(['\"', 'n', '\\'])).parse_peek(i) } assert_eq!(esc2("12\\nnn34"), Ok(("nn34", "12\\n"))); fn esc3(i: &str) -> IResult<&str, &str> { - escaped(alpha, '\u{241b}', one_of("\"n")).parse_next(i) + escaped(alpha, '\u{241b}', one_of(['\"', 'n'])).parse_peek(i) } assert_eq!(esc3("ab␛ncd;"), Ok((";", "ab␛ncd"))); } @@ -675,8 +751,8 @@ mod complete { #[test] fn test_escaped_error() { fn esc(s: &str) -> IResult<&str, &str> { - use crate::character::digit1; - escaped(digit1, '\\', one_of("\"n\\")).parse_next(s) + use crate::ascii::digit1; + escaped(digit1, '\\', one_of(['\"', 'n', '\\'])).parse_peek(s) } assert_eq!(esc("abcd"), Ok(("abcd", ""))); @@ -685,7 +761,7 @@ mod complete { #[cfg(feature = "alloc")] #[test] fn complete_escape_transform() { - use crate::character::alpha1 as alpha; + use crate::ascii::alpha1 as alpha; #[cfg(feature = "alloc")] fn to_s(i: Vec<u8>) -> String { @@ -703,7 +779,7 @@ mod complete { )), ) .map(to_s) - .parse_next(i) + .parse_peek(i) } assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], String::from("abcd")))); @@ -723,16 +799,16 @@ mod complete { assert_eq!( esc(&b"AB\\"[..]), Err(ErrMode::Backtrack(error_position!( - &b""[..], + &&b""[..], ErrorKind::Tag ))) ); assert_eq!( esc(&b"AB\\A"[..]), Err(ErrMode::Backtrack(error_node_position!( - &b"AB\\A"[..], + &&b"AB\\A"[..], ErrorKind::Eof, - error_position!(&b"A"[..], ErrorKind::Tag) + error_position!(&&b"A"[..], ErrorKind::Tag) ))) ); @@ -746,7 +822,7 @@ mod complete { )), ) .map(to_s) - .parse_next(i) + .parse_peek(i) } assert_eq!( esc2(&b"abèDEF;"[..]), @@ -761,7 +837,7 @@ mod complete { #[cfg(feature = "std")] #[test] fn complete_escape_transform_str() { - use crate::character::alpha1 as alpha; + use crate::ascii::alpha1 as alpha; fn esc(i: &str) -> IResult<&str, String> { escaped_transform( @@ -769,7 +845,7 @@ mod complete { '\\', alt(("\\".value("\\"), "\"".value("\""), "n".value("\n"))), ) - .parse_next(i) + .parse_peek(i) } assert_eq!(esc("abcd;"), Ok((";", String::from("abcd")))); @@ -779,14 +855,14 @@ mod complete { assert_eq!(esc("ab\\\"12"), Ok(("12", String::from("ab\"")))); assert_eq!( esc("AB\\"), - Err(ErrMode::Backtrack(error_position!("", ErrorKind::Tag))) + Err(ErrMode::Backtrack(error_position!(&"", ErrorKind::Tag))) ); assert_eq!( esc("AB\\A"), Err(ErrMode::Backtrack(error_node_position!( - "AB\\A", + &"AB\\A", ErrorKind::Eof, - error_position!("A", ErrorKind::Tag) + error_position!(&"A", ErrorKind::Tag) ))) ); @@ -796,7 +872,7 @@ mod complete { '&', alt(("egrave;".value("è"), "agrave;".value("à"))), ) - .parse_next(i) + .parse_peek(i) } assert_eq!(esc2("abèDEF;"), Ok((";", String::from("abèDEF")))); assert_eq!( @@ -805,7 +881,7 @@ mod complete { ); fn esc3(i: &str) -> IResult<&str, String> { - escaped_transform(alpha, '␛', alt(("0".value("\0"), "n".value("\n")))).parse_next(i) + escaped_transform(alpha, '␛', alt(("0".value("\0"), "n".value("\n")))).parse_peek(i) } assert_eq!(esc3("a␛0bc␛n"), Ok(("", String::from("a\0bc\n")))); } @@ -814,8 +890,8 @@ mod complete { #[cfg(feature = "alloc")] fn test_escaped_transform_error() { fn esc_trans(s: &str) -> IResult<&str, String> { - use crate::character::digit1; - escaped_transform(digit1, '\\', "n").parse_next(s) + use crate::ascii::digit1; + escaped_transform(digit1, '\\', "n").parse_peek(s) } assert_eq!(esc_trans("abcd"), Ok(("abcd", String::new()))); @@ -825,8 +901,8 @@ mod complete { mod partial { use super::*; use crate::combinator::opt; - use crate::error::Error; use crate::error::ErrorKind; + use crate::error::InputError; use crate::error::{ErrMode, Needed}; use crate::stream::ParseSlice; use crate::IResult; @@ -835,7 +911,7 @@ mod partial { macro_rules! assert_parse( ($left: expr, $right: expr) => { - let res: $crate::IResult<_, _, Error<_>> = $left; + let res: $crate::IResult<_, _, InputError<_>> = $left; assert_eq!(res, $right); }; ); @@ -850,116 +926,116 @@ mod partial { let f: &[u8] = b" ;"; //assert_eq!(alpha1::<_, Error<_>>(a), Err(ErrMode::Incomplete(Needed::new(1)))); assert_parse!( - alpha1(Partial::new(a)), + alpha1.parse_peek(Partial::new(a)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - alpha1(Partial::new(b)), - Err(ErrMode::Backtrack(Error::new( + alpha1.parse_peek(Partial::new(b)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(b), ErrorKind::Slice ))) ); assert_eq!( - alpha1::<_, Error<_>>(Partial::new(c)), + alpha1::<_, InputError<_>>.parse_peek(Partial::new(c)), Ok((Partial::new(&c[1..]), &b"a"[..])) ); assert_eq!( - alpha1::<_, Error<_>>(Partial::new(d)), + alpha1::<_, InputError<_>>.parse_peek(Partial::new(d)), Ok((Partial::new("é12".as_bytes()), &b"az"[..])) ); assert_eq!( - digit1(Partial::new(a)), - Err(ErrMode::Backtrack(Error::new( + digit1.parse_peek(Partial::new(a)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(a), ErrorKind::Slice ))) ); assert_eq!( - digit1::<_, Error<_>>(Partial::new(b)), + digit1::<_, InputError<_>>.parse_peek(Partial::new(b)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - digit1(Partial::new(c)), - Err(ErrMode::Backtrack(Error::new( + digit1.parse_peek(Partial::new(c)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(c), ErrorKind::Slice ))) ); assert_eq!( - digit1(Partial::new(d)), - Err(ErrMode::Backtrack(Error::new( + digit1.parse_peek(Partial::new(d)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(d), ErrorKind::Slice ))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(a)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(a)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(b)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(b)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(c)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(c)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(d)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(d)), Ok((Partial::new("zé12".as_bytes()), &b"a"[..])) ); assert_eq!( - hex_digit1(Partial::new(e)), - Err(ErrMode::Backtrack(Error::new( + hex_digit1.parse_peek(Partial::new(e)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(e), ErrorKind::Slice ))) ); assert_eq!( - oct_digit1(Partial::new(a)), - Err(ErrMode::Backtrack(Error::new( + oct_digit1.parse_peek(Partial::new(a)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(a), ErrorKind::Slice ))) ); assert_eq!( - oct_digit1::<_, Error<_>>(Partial::new(b)), + oct_digit1::<_, InputError<_>>.parse_peek(Partial::new(b)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - oct_digit1(Partial::new(c)), - Err(ErrMode::Backtrack(Error::new( + oct_digit1.parse_peek(Partial::new(c)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(c), ErrorKind::Slice ))) ); assert_eq!( - oct_digit1(Partial::new(d)), - Err(ErrMode::Backtrack(Error::new( + oct_digit1.parse_peek(Partial::new(d)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(d), ErrorKind::Slice ))) ); assert_eq!( - alphanumeric1::<_, Error<_>>(Partial::new(a)), + alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new(a)), Err(ErrMode::Incomplete(Needed::new(1))) ); //assert_eq!(fix_error!(b,(), alphanumeric1), Ok((empty, b))); assert_eq!( - alphanumeric1::<_, Error<_>>(Partial::new(c)), + alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new(c)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - alphanumeric1::<_, Error<_>>(Partial::new(d)), + alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new(d)), Ok((Partial::new("é12".as_bytes()), &b"az"[..])) ); assert_eq!( - space1::<_, Error<_>>(Partial::new(e)), + space1::<_, InputError<_>>.parse_peek(Partial::new(e)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - space1::<_, Error<_>>(Partial::new(f)), + space1::<_, InputError<_>>.parse_peek(Partial::new(f)), Ok((Partial::new(&b";"[..]), &b" "[..])) ); } @@ -973,112 +1049,112 @@ mod partial { let d = "azé12"; let e = " "; assert_eq!( - alpha1::<_, Error<_>>(Partial::new(a)), + alpha1::<_, InputError<_>>.parse_peek(Partial::new(a)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - alpha1(Partial::new(b)), - Err(ErrMode::Backtrack(Error::new( + alpha1.parse_peek(Partial::new(b)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(b), ErrorKind::Slice ))) ); assert_eq!( - alpha1::<_, Error<_>>(Partial::new(c)), + alpha1::<_, InputError<_>>.parse_peek(Partial::new(c)), Ok((Partial::new(&c[1..]), "a")) ); assert_eq!( - alpha1::<_, Error<_>>(Partial::new(d)), + alpha1::<_, InputError<_>>.parse_peek(Partial::new(d)), Ok((Partial::new("é12"), "az")) ); assert_eq!( - digit1(Partial::new(a)), - Err(ErrMode::Backtrack(Error::new( + digit1.parse_peek(Partial::new(a)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(a), ErrorKind::Slice ))) ); assert_eq!( - digit1::<_, Error<_>>(Partial::new(b)), + digit1::<_, InputError<_>>.parse_peek(Partial::new(b)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - digit1(Partial::new(c)), - Err(ErrMode::Backtrack(Error::new( + digit1.parse_peek(Partial::new(c)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(c), ErrorKind::Slice ))) ); assert_eq!( - digit1(Partial::new(d)), - Err(ErrMode::Backtrack(Error::new( + digit1.parse_peek(Partial::new(d)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(d), ErrorKind::Slice ))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(a)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(a)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(b)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(b)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(c)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(c)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - hex_digit1::<_, Error<_>>(Partial::new(d)), + hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(d)), Ok((Partial::new("zé12"), "a")) ); assert_eq!( - hex_digit1(Partial::new(e)), - Err(ErrMode::Backtrack(Error::new( + hex_digit1.parse_peek(Partial::new(e)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(e), ErrorKind::Slice ))) ); assert_eq!( - oct_digit1(Partial::new(a)), - Err(ErrMode::Backtrack(Error::new( + oct_digit1.parse_peek(Partial::new(a)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(a), ErrorKind::Slice ))) ); assert_eq!( - oct_digit1::<_, Error<_>>(Partial::new(b)), + oct_digit1::<_, InputError<_>>.parse_peek(Partial::new(b)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - oct_digit1(Partial::new(c)), - Err(ErrMode::Backtrack(Error::new( + oct_digit1.parse_peek(Partial::new(c)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(c), ErrorKind::Slice ))) ); assert_eq!( - oct_digit1(Partial::new(d)), - Err(ErrMode::Backtrack(Error::new( + oct_digit1.parse_peek(Partial::new(d)), + Err(ErrMode::Backtrack(InputError::new( Partial::new(d), ErrorKind::Slice ))) ); assert_eq!( - alphanumeric1::<_, Error<_>>(Partial::new(a)), + alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new(a)), Err(ErrMode::Incomplete(Needed::new(1))) ); //assert_eq!(fix_error!(b,(), alphanumeric1), Ok((empty, b))); assert_eq!( - alphanumeric1::<_, Error<_>>(Partial::new(c)), + alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new(c)), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_eq!( - alphanumeric1::<_, Error<_>>(Partial::new(d)), + alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new(d)), Ok((Partial::new("é12"), "az")) ); assert_eq!( - space1::<_, Error<_>>(Partial::new(e)), + space1::<_, InputError<_>>.parse_peek(Partial::new(e)), Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -1093,52 +1169,52 @@ mod partial { let e = &b" \t\r\n;"[..]; let f = &b"123abcDEF;"[..]; - match alpha1::<_, Error<_>>(Partial::new(a)) { + match alpha1::<_, InputError<_>>.parse_peek(Partial::new(a)) { Ok((i, _)) => { let i = i.into_inner(); - assert_eq!(a.offset_to(i) + i.len(), a.len()); + assert_eq!(i.offset_from(&a) + i.len(), a.len()); } _ => panic!("wrong return type in offset test for alpha"), } - match digit1::<_, Error<_>>(Partial::new(b)) { + match digit1::<_, InputError<_>>.parse_peek(Partial::new(b)) { Ok((i, _)) => { let i = i.into_inner(); - assert_eq!(b.offset_to(i) + i.len(), b.len()); + assert_eq!(i.offset_from(&b) + i.len(), b.len()); } _ => panic!("wrong return type in offset test for digit"), } - match alphanumeric1::<_, Error<_>>(Partial::new(c)) { + match alphanumeric1::<_, InputError<_>>.parse_peek(Partial::new(c)) { Ok((i, _)) => { let i = i.into_inner(); - assert_eq!(c.offset_to(i) + i.len(), c.len()); + assert_eq!(i.offset_from(&c) + i.len(), c.len()); } _ => panic!("wrong return type in offset test for alphanumeric"), } - match space1::<_, Error<_>>(Partial::new(d)) { + match space1::<_, InputError<_>>.parse_peek(Partial::new(d)) { Ok((i, _)) => { let i = i.into_inner(); - assert_eq!(d.offset_to(i) + i.len(), d.len()); + assert_eq!(i.offset_from(&d) + i.len(), d.len()); } _ => panic!("wrong return type in offset test for space"), } - match multispace1::<_, Error<_>>(Partial::new(e)) { + match multispace1::<_, InputError<_>>.parse_peek(Partial::new(e)) { Ok((i, _)) => { let i = i.into_inner(); - assert_eq!(e.offset_to(i) + i.len(), e.len()); + assert_eq!(i.offset_from(&e) + i.len(), e.len()); } _ => panic!("wrong return type in offset test for multispace"), } - match hex_digit1::<_, Error<_>>(Partial::new(f)) { + match hex_digit1::<_, InputError<_>>.parse_peek(Partial::new(f)) { Ok((i, _)) => { let i = i.into_inner(); - assert_eq!(f.offset_to(i) + i.len(), f.len()); + assert_eq!(i.offset_from(&f) + i.len(), f.len()); } _ => panic!("wrong return type in offset test for hex_digit"), } - match oct_digit1::<_, Error<_>>(Partial::new(f)) { + match oct_digit1::<_, InputError<_>>.parse_peek(Partial::new(f)) { Ok((i, _)) => { let i = i.into_inner(); - assert_eq!(f.offset_to(i) + i.len(), f.len()); + assert_eq!(i.offset_from(&f) + i.len(), f.len()); } _ => panic!("wrong return type in offset test for oct_digit"), } @@ -1148,26 +1224,26 @@ mod partial { fn is_not_line_ending_bytes() { let a: &[u8] = b"ab12cd\nefgh"; assert_eq!( - not_line_ending::<_, Error<_>>(Partial::new(a)), + not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(a)), Ok((Partial::new(&b"\nefgh"[..]), &b"ab12cd"[..])) ); let b: &[u8] = b"ab12cd\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, Error<_>>(Partial::new(b)), + not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(b)), Ok((Partial::new(&b"\nefgh\nijkl"[..]), &b"ab12cd"[..])) ); let c: &[u8] = b"ab12cd\r\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, Error<_>>(Partial::new(c)), + not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(c)), Ok((Partial::new(&b"\r\nefgh\nijkl"[..]), &b"ab12cd"[..])) ); let d: &[u8] = b"ab12cd"; assert_eq!( - not_line_ending::<_, Error<_>>(Partial::new(d)), - Err(ErrMode::Incomplete(Needed::Unknown)) + not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(d)), + Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -1175,17 +1251,17 @@ mod partial { fn is_not_line_ending_str() { let f = "βèƒôřè\rÂßÇáƒƭèř"; assert_eq!( - not_line_ending(Partial::new(f)), - Err(ErrMode::Backtrack(Error::new( - Partial::new(f), + not_line_ending.parse_peek(Partial::new(f)), + Err(ErrMode::Backtrack(InputError::new( + Partial::new(&f[12..]), ErrorKind::Tag ))) ); let g2: &str = "ab12cd"; assert_eq!( - not_line_ending::<_, Error<_>>(Partial::new(g2)), - Err(ErrMode::Incomplete(Needed::Unknown)) + not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(g2)), + Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -1193,24 +1269,24 @@ mod partial { fn hex_digit_test() { let i = &b"0123456789abcdefABCDEF;"[..]; assert_parse!( - hex_digit1(Partial::new(i)), + hex_digit1.parse_peek(Partial::new(i)), Ok((Partial::new(&b";"[..]), &i[..i.len() - 1])) ); let i = &b"g"[..]; assert_parse!( - hex_digit1(Partial::new(i)), + hex_digit1.parse_peek(Partial::new(i)), Err(ErrMode::Backtrack(error_position!( - Partial::new(i), + &Partial::new(i), ErrorKind::Slice ))) ); let i = &b"G"[..]; assert_parse!( - hex_digit1(Partial::new(i)), + hex_digit1.parse_peek(Partial::new(i)), Err(ErrMode::Backtrack(error_position!( - Partial::new(i), + &Partial::new(i), ErrorKind::Slice ))) ); @@ -1233,15 +1309,15 @@ mod partial { fn oct_digit_test() { let i = &b"01234567;"[..]; assert_parse!( - oct_digit1(Partial::new(i)), + oct_digit1.parse_peek(Partial::new(i)), Ok((Partial::new(&b";"[..]), &i[..i.len() - 1])) ); let i = &b"8"[..]; assert_parse!( - oct_digit1(Partial::new(i)), + oct_digit1.parse_peek(Partial::new(i)), Err(ErrMode::Backtrack(error_position!( - Partial::new(i), + &Partial::new(i), ErrorKind::Slice ))) ); @@ -1262,7 +1338,7 @@ mod partial { fn full_line_windows() { #[allow(clippy::type_complexity)] fn take_full_line(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_next(i) + (not_line_ending, line_ending).parse_peek(i) } let input = b"abc\r\n"; let output = take_full_line(Partial::new(input)); @@ -1276,7 +1352,7 @@ mod partial { fn full_line_unix() { #[allow(clippy::type_complexity)] fn take_full_line(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_next(i) + (not_line_ending, line_ending).parse_peek(i) } let input = b"abc\n"; let output = take_full_line(Partial::new(input)); @@ -1289,44 +1365,47 @@ mod partial { #[test] fn check_windows_lineending() { let input = b"\r\n"; - let output = line_ending(Partial::new(&input[..])); + let output = line_ending.parse_peek(Partial::new(&input[..])); assert_parse!(output, Ok((Partial::new(&b""[..]), &b"\r\n"[..]))); } #[test] fn check_unix_lineending() { let input = b"\n"; - let output = line_ending(Partial::new(&input[..])); + let output = line_ending.parse_peek(Partial::new(&input[..])); assert_parse!(output, Ok((Partial::new(&b""[..]), &b"\n"[..]))); } #[test] fn cr_lf() { assert_parse!( - crlf(Partial::new(&b"\r\na"[..])), + crlf.parse_peek(Partial::new(&b"\r\na"[..])), Ok((Partial::new(&b"a"[..]), &b"\r\n"[..])) ); assert_parse!( - crlf(Partial::new(&b"\r"[..])), + crlf.parse_peek(Partial::new(&b"\r"[..])), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_parse!( - crlf(Partial::new(&b"\ra"[..])), + crlf.parse_peek(Partial::new(&b"\ra"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"\ra"[..]), + &Partial::new(&b"\ra"[..]), ErrorKind::Tag ))) ); - assert_parse!(crlf(Partial::new("\r\na")), Ok((Partial::new("a"), "\r\n"))); assert_parse!( - crlf(Partial::new("\r")), + crlf.parse_peek(Partial::new("\r\na")), + Ok((Partial::new("a"), "\r\n")) + ); + assert_parse!( + crlf.parse_peek(Partial::new("\r")), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_parse!( - crlf(Partial::new("\ra")), + crlf.parse_peek(Partial::new("\ra")), Err(ErrMode::Backtrack(error_position!( - Partial::new("\ra"), + &Partial::new("\ra"), ErrorKind::Tag ))) ); @@ -1335,41 +1414,41 @@ mod partial { #[test] fn end_of_line() { assert_parse!( - line_ending(Partial::new(&b"\na"[..])), + line_ending.parse_peek(Partial::new(&b"\na"[..])), Ok((Partial::new(&b"a"[..]), &b"\n"[..])) ); assert_parse!( - line_ending(Partial::new(&b"\r\na"[..])), + line_ending.parse_peek(Partial::new(&b"\r\na"[..])), Ok((Partial::new(&b"a"[..]), &b"\r\n"[..])) ); assert_parse!( - line_ending(Partial::new(&b"\r"[..])), + line_ending.parse_peek(Partial::new(&b"\r"[..])), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_parse!( - line_ending(Partial::new(&b"\ra"[..])), + line_ending.parse_peek(Partial::new(&b"\ra"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"\ra"[..]), + &Partial::new(&b"\ra"[..]), ErrorKind::Tag ))) ); assert_parse!( - line_ending(Partial::new("\na")), + line_ending.parse_peek(Partial::new("\na")), Ok((Partial::new("a"), "\n")) ); assert_parse!( - line_ending(Partial::new("\r\na")), + line_ending.parse_peek(Partial::new("\r\na")), Ok((Partial::new("a"), "\r\n")) ); assert_parse!( - line_ending(Partial::new("\r")), + line_ending.parse_peek(Partial::new("\r")), Err(ErrMode::Incomplete(Needed::new(1))) ); assert_parse!( - line_ending(Partial::new("\ra")), + line_ending.parse_peek(Partial::new("\ra")), Err(ErrMode::Backtrack(error_position!( - Partial::new("\ra"), + &Partial::new("\ra"), ErrorKind::Tag ))) ); @@ -1377,14 +1456,14 @@ mod partial { fn digit_to_i16(input: Partial<&str>) -> IResult<Partial<&str>, i16> { let i = input; - let (i, opt_sign) = opt(one_of("+-")).parse_next(i)?; + let (i, opt_sign) = opt(one_of(['+', '-'])).parse_peek(i)?; let sign = match opt_sign { Some('+') | None => true, Some('-') => false, _ => unreachable!(), }; - let (i, s) = digit1::<_, crate::error::Error<_>>(i)?; + let (i, s) = digit1::<_, InputError<_>>.parse_peek(i)?; match s.parse_slice() { Some(n) => { if sign { @@ -1393,15 +1472,15 @@ mod partial { Ok((i, -n)) } } - None => Err(ErrMode::from_error_kind(i, ErrorKind::Verify)), + None => Err(ErrMode::from_error_kind(&i, ErrorKind::Verify)), } } fn digit_to_u32(i: Partial<&str>) -> IResult<Partial<&str>, u32> { - let (i, s) = digit1(i)?; + let (i, s) = digit1.parse_peek(i)?; match s.parse_slice() { Some(n) => Ok((i, n)), - None => Err(ErrMode::from_error_kind(i, ErrorKind::Verify)), + None => Err(ErrMode::from_error_kind(&i, ErrorKind::Verify)), } } @@ -1410,7 +1489,7 @@ mod partial { #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 fn ints(s in "\\PC*") { let res1 = digit_to_i16(Partial::new(&s)); - let res2 = dec_int(Partial::new(s.as_str())); + let res2 = dec_int.parse_peek(Partial::new(s.as_str())); assert_eq!(res1, res2); } @@ -1418,7 +1497,7 @@ mod partial { #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 fn uints(s in "\\PC*") { let res1 = digit_to_u32(Partial::new(&s)); - let res2 = dec_uint(Partial::new(s.as_str())); + let res2 = dec_uint.parse_peek(Partial::new(s.as_str())); assert_eq!(res1, res2); } } @@ -1426,13 +1505,13 @@ mod partial { #[test] fn hex_uint_tests() { fn hex_u32(input: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { - hex_uint(input) + hex_uint.parse_peek(input) } assert_parse!( hex_u32(Partial::new(&b";"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b";"[..]), + &Partial::new(&b";"[..]), ErrorKind::Slice ))) ); @@ -1455,14 +1534,14 @@ mod partial { assert_parse!( hex_u32(Partial::new(&b"00c5a31be2;"[..])), // overflow Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"00c5a31be2;"[..]), + &Partial::new(&b"00c5a31be2;"[..]), ErrorKind::Verify ))) ); assert_parse!( hex_u32(Partial::new(&b"c5a31be201;"[..])), // overflow Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"c5a31be201;"[..]), + &Partial::new(&b"c5a31be201;"[..]), ErrorKind::Verify ))) ); @@ -1473,14 +1552,14 @@ mod partial { assert_parse!( hex_u32(Partial::new(&b"ffffffffffffffff;"[..])), // overflow Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"ffffffffffffffff;"[..]), + &Partial::new(&b"ffffffffffffffff;"[..]), ErrorKind::Verify ))) ); assert_parse!( hex_u32(Partial::new(&b"ffffffffffffffff"[..])), // overflow Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"ffffffffffffffff"[..]), + &Partial::new(&b"ffffffffffffffff"[..]), ErrorKind::Verify ))) ); diff --git a/vendor/winnow/src/bits/mod.rs b/vendor/winnow/src/binary/bits/mod.rs similarity index 51% rename from vendor/winnow/src/bits/mod.rs rename to vendor/winnow/src/binary/bits/mod.rs index 18cce6f..16a0f64 100644 --- a/vendor/winnow/src/bits/mod.rs +++ b/vendor/winnow/src/binary/bits/mod.rs @@ -4,11 +4,14 @@ #[cfg(test)] mod tests; -use crate::error::{ErrMode, ErrorConvert, ErrorKind, Needed, ParseError}; +use crate::error::{ErrMode, ErrorConvert, ErrorKind, Needed, ParserError}; use crate::lib::std::ops::{AddAssign, Div, Shl, Shr}; use crate::stream::{AsBytes, Stream, StreamIsPartial, ToUsize}; use crate::trace::trace; -use crate::{IResult, Parser}; +use crate::{unpeek, IResult, PResult, Parser}; + +/// Number of bits in a byte +const BYTE: usize = u8::BITS as usize; /// Converts a byte-level input to a bit-level input /// @@ -18,8 +21,8 @@ use crate::{IResult, Parser}; /// ``` /// use winnow::prelude::*; /// use winnow::Bytes; -/// use winnow::bits::{bits, take}; -/// use winnow::error::Error; +/// use winnow::binary::bits::{bits, take}; +/// use winnow::error::InputError; /// /// type Stream<'i> = &'i Bytes; /// @@ -28,7 +31,7 @@ use crate::{IResult, Parser}; /// } /// /// fn parse(input: Stream<'_>) -> IResult<Stream<'_>, (u8, u8)> { -/// bits::<_, _, Error<(_, usize)>, _, _>((take(4usize), take(8usize))).parse_next(input) +/// bits::<_, _, InputError<(_, usize)>, _, _>((take(4usize), take(8usize))).parse_peek(input) /// } /// /// let input = stream(&[0x12, 0x34, 0xff, 0xff]); @@ -45,25 +48,31 @@ use crate::{IResult, Parser}; /// ``` pub fn bits<I, O, E1, E2, P>(mut parser: P) -> impl Parser<I, O, E2> where - E1: ParseError<(I, usize)> + ErrorConvert<E2>, - E2: ParseError<I>, - I: Stream, + E1: ParserError<(I, usize)> + ErrorConvert<E2>, + E2: ParserError<I>, + I: Stream + Clone, P: Parser<(I, usize), O, E1>, { - trace("bits", move |input: I| { - match parser.parse_next((input, 0)) { - Ok(((rest, offset), result)) => { - // If the next byte has been partially read, it will be sliced away as well. - // The parser functions might already slice away all fully read bytes. - // That's why `offset / 8` isn't necessarily needed at all times. - let remaining_bytes_index = offset / 8 + if offset % 8 == 0 { 0 } else { 1 }; - let (input, _) = rest.next_slice(remaining_bytes_index); - Ok((input, result)) + trace( + "bits", + unpeek(move |input: I| { + match parser.parse_peek((input, 0)) { + Ok(((rest, offset), result)) => { + // If the next byte has been partially read, it will be sliced away as well. + // The parser functions might already slice away all fully read bytes. + // That's why `offset / BYTE` isn't necessarily needed at all times. + let remaining_bytes_index = + offset / BYTE + if offset % BYTE == 0 { 0 } else { 1 }; + let (input, _) = rest.peek_slice(remaining_bytes_index); + Ok((input, result)) + } + Err(ErrMode::Incomplete(n)) => { + Err(ErrMode::Incomplete(n.map(|u| u.get() / BYTE + 1))) + } + Err(e) => Err(e.convert()), } - Err(ErrMode::Incomplete(n)) => Err(ErrMode::Incomplete(n.map(|u| u.get() / 8 + 1))), - Err(e) => Err(e.convert()), - } - }) + }), + ) } /// Convert a [`bits`] stream back into a byte stream @@ -74,9 +83,9 @@ where /// ``` /// use winnow::prelude::*; /// use winnow::Bytes; -/// use winnow::bits::{bits, bytes, take}; +/// use winnow::binary::bits::{bits, bytes, take}; /// use winnow::combinator::rest; -/// use winnow::error::Error; +/// use winnow::error::InputError; /// /// type Stream<'i> = &'i Bytes; /// @@ -85,11 +94,11 @@ where /// } /// /// fn parse(input: Stream<'_>) -> IResult<Stream<'_>, (u8, u8, &[u8])> { -/// bits::<_, _, Error<(_, usize)>, _, _>(( +/// bits::<_, _, InputError<(_, usize)>, _, _>(( /// take(4usize), /// take(8usize), -/// bytes::<_, _, Error<_>, _, _>(rest) -/// )).parse_next(input) +/// bytes::<_, _, InputError<_>, _, _>(rest) +/// )).parse_peek(input) /// } /// /// let input = stream(&[0x12, 0x34, 0xff, 0xff]); @@ -98,31 +107,38 @@ where /// ``` pub fn bytes<I, O, E1, E2, P>(mut parser: P) -> impl Parser<(I, usize), O, E2> where - E1: ParseError<I> + ErrorConvert<E2>, - E2: ParseError<(I, usize)>, - I: Stream<Token = u8>, + E1: ParserError<I> + ErrorConvert<E2>, + E2: ParserError<(I, usize)>, + I: Stream<Token = u8> + Clone, P: Parser<I, O, E1>, { - trace("bytes", move |(input, offset): (I, usize)| { - let (inner, _) = if offset % 8 != 0 { - input.next_slice(1 + offset / 8) - } else { - input.next_slice(offset / 8) - }; - let i = (input, offset); - match parser.parse_next(inner) { - Ok((rest, res)) => Ok(((rest, 0), res)), - Err(ErrMode::Incomplete(Needed::Unknown)) => Err(ErrMode::Incomplete(Needed::Unknown)), - Err(ErrMode::Incomplete(Needed::Size(sz))) => Err(match sz.get().checked_mul(8) { - Some(v) => ErrMode::Incomplete(Needed::new(v)), - None => ErrMode::Cut(E2::assert( - i, - "overflow in turning needed bytes into needed bits", - )), - }), - Err(e) => Err(e.convert()), - } - }) + trace( + "bytes", + unpeek(move |(input, offset): (I, usize)| { + let (inner, _) = if offset % BYTE != 0 { + input.peek_slice(1 + offset / BYTE) + } else { + input.peek_slice(offset / BYTE) + }; + let i = (input, offset); + match parser.parse_peek(inner) { + Ok((rest, res)) => Ok(((rest, 0), res)), + Err(ErrMode::Incomplete(Needed::Unknown)) => { + Err(ErrMode::Incomplete(Needed::Unknown)) + } + Err(ErrMode::Incomplete(Needed::Size(sz))) => { + Err(match sz.get().checked_mul(BYTE) { + Some(v) => ErrMode::Incomplete(Needed::new(v)), + None => ErrMode::Cut(E2::assert( + &i, + "overflow in turning needed bytes into needed bits", + )), + }) + } + Err(e) => Err(e.convert()), + } + }), + ) } /// Parse taking `count` bits @@ -131,8 +147,8 @@ where /// ```rust /// # use winnow::prelude::*; /// # use winnow::Bytes; -/// # use winnow::error::{Error, ErrorKind}; -/// use winnow::bits::take; +/// # use winnow::error::{InputError, ErrorKind}; +/// use winnow::binary::bits::take; /// /// type Stream<'i> = &'i Bytes; /// @@ -141,7 +157,7 @@ where /// } /// /// fn parser(input: (Stream<'_>, usize), count: usize)-> IResult<(Stream<'_>, usize), u8> { -/// take(count).parse_next(input) +/// take(count).parse_peek(input) /// } /// /// // Consumes 0 bits, returns 0 @@ -154,89 +170,51 @@ where /// assert_eq!(parser((stream(&[0b00010010]), 4), 4), Ok(((stream(&[]), 0), 0b00000010))); /// /// // Tries to consume 12 bits but only 8 are available -/// assert_eq!(parser((stream(&[0b00010010]), 0), 12), Err(winnow::error::ErrMode::Backtrack(Error{input: (stream(&[0b00010010]), 0), kind: ErrorKind::Eof }))); +/// assert_eq!(parser((stream(&[0b00010010]), 0), 12), Err(winnow::error::ErrMode::Backtrack(InputError::new((stream(&[0b00010010]), 0), ErrorKind::Eof)))); /// ``` #[inline(always)] -pub fn take<I, O, C, E: ParseError<(I, usize)>>(count: C) -> impl Parser<(I, usize), O, E> +pub fn take<I, O, C, E: ParserError<(I, usize)>>(count: C) -> impl Parser<(I, usize), O, E> where - I: Stream<Token = u8> + AsBytes + StreamIsPartial, + I: Stream<Token = u8> + AsBytes + StreamIsPartial + Clone, C: ToUsize, O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O>, { let count = count.to_usize(); - trace("take", move |input: (I, usize)| { - if input.is_partial() { - streaming_take_internal(input, count) - } else { - complete_take_internal(input, count) - } - }) -} - -pub(crate) fn streaming_take_internal<I, O, E: ParseError<(I, usize)>>( - (input, bit_offset): (I, usize), - count: usize, -) -> IResult<(I, usize), O, E> -where - I: Stream<Token = u8> + AsBytes, - O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O>, -{ - if count == 0 { - Ok(((input, bit_offset), 0u8.into())) - } else { - let cnt = (count + bit_offset).div(8); - if input.eof_offset() * 8 < count + bit_offset { - Err(ErrMode::Incomplete(Needed::new(count))) - } else { - let mut acc: O = 0_u8.into(); - let mut offset: usize = bit_offset; - let mut remaining: usize = count; - let mut end_offset: usize = 0; - - for byte in input.as_bytes().iter().copied().take(cnt + 1) { - if remaining == 0 { - break; - } - let val: O = if offset == 0 { - byte.into() - } else { - (byte << offset >> offset).into() - }; - - if remaining < 8 - offset { - acc += val >> (8 - offset - remaining); - end_offset = remaining + offset; - break; - } else { - acc += val << (remaining - (8 - offset)); - remaining -= 8 - offset; - offset = 0; - } + trace( + "take", + unpeek(move |input: (I, usize)| { + if <I as StreamIsPartial>::is_partial_supported() { + take_::<_, _, _, true>(input, count) + } else { + take_::<_, _, _, false>(input, count) } - let (input, _) = input.next_slice(cnt); - Ok(((input, end_offset), acc)) - } - } + }), + ) } -pub(crate) fn complete_take_internal<I, O, E: ParseError<(I, usize)>>( +fn take_<I, O, E: ParserError<(I, usize)>, const PARTIAL: bool>( (input, bit_offset): (I, usize), count: usize, ) -> IResult<(I, usize), O, E> where - I: Stream<Token = u8> + AsBytes, + I: StreamIsPartial, + I: Stream<Token = u8> + AsBytes + Clone, O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O>, { if count == 0 { Ok(((input, bit_offset), 0u8.into())) } else { - let cnt = (count + bit_offset).div(8); - if input.eof_offset() * 8 < count + bit_offset { - Err(ErrMode::from_error_kind( - (input, bit_offset), - ErrorKind::Eof, - )) + if input.eof_offset() * BYTE < count + bit_offset { + if PARTIAL && input.is_partial() { + Err(ErrMode::Incomplete(Needed::new(count))) + } else { + Err(ErrMode::from_error_kind( + &(input, bit_offset), + ErrorKind::Eof, + )) + } } else { + let cnt = (count + bit_offset).div(BYTE); let mut acc: O = 0_u8.into(); let mut offset: usize = bit_offset; let mut remaining: usize = count; @@ -252,17 +230,17 @@ where (byte << offset >> offset).into() }; - if remaining < 8 - offset { - acc += val >> (8 - offset - remaining); + if remaining < BYTE - offset { + acc += val >> (BYTE - offset - remaining); end_offset = remaining + offset; break; } else { - acc += val << (remaining - (8 - offset)); - remaining -= 8 - offset; + acc += val << (remaining - (BYTE - offset)); + remaining -= BYTE - offset; offset = 0; } } - let (input, _) = input.next_slice(cnt); + let (input, _) = input.peek_slice(cnt); Ok(((input, end_offset), acc)) } } @@ -275,8 +253,8 @@ where /// ```rust /// # use winnow::prelude::*; /// # use winnow::Bytes; -/// # use winnow::error::{Error, ErrorKind}; -/// use winnow::bits::tag; +/// # use winnow::error::{InputError, ErrorKind}; +/// use winnow::binary::bits::tag; /// /// type Stream<'i> = &'i Bytes; /// @@ -288,7 +266,7 @@ where /// /// Return Ok and the matching section of `input` if there's a match. /// /// Return Err if there's no match. /// fn parser(pattern: u8, count: u8, input: (Stream<'_>, usize)) -> IResult<(Stream<'_>, usize), u8> { -/// tag(pattern, count).parse_next(input) +/// tag(pattern, count).parse_peek(input) /// } /// /// // The lowest 4 bits of 0b00001111 match the lowest 4 bits of 0b11111111. @@ -306,42 +284,46 @@ where /// // The lowest 2 bits of 0b11111111 and 0b00000001 are different. /// assert_eq!( /// parser(0b000000_01, 2, (stream(&[0b111111_11]), 0)), -/// Err(winnow::error::ErrMode::Backtrack(Error { -/// input: (stream(&[0b11111111]), 0), -/// kind: ErrorKind::Tag -/// })) +/// Err(winnow::error::ErrMode::Backtrack(InputError::new( +/// (stream(&[0b11111111]), 0), +/// ErrorKind::Tag +/// ))) /// ); /// /// // The lowest 8 bits of 0b11111111 and 0b11111110 are different. /// assert_eq!( /// parser(0b11111110, 8, (stream(&[0b11111111]), 0)), -/// Err(winnow::error::ErrMode::Backtrack(Error { -/// input: (stream(&[0b11111111]), 0), -/// kind: ErrorKind::Tag -/// })) +/// Err(winnow::error::ErrMode::Backtrack(InputError::new( +/// (stream(&[0b11111111]), 0), +/// ErrorKind::Tag +/// ))) /// ); /// ``` #[inline(always)] #[doc(alias = "literal")] #[doc(alias = "just")] -pub fn tag<I, O, C, E: ParseError<(I, usize)>>( +pub fn tag<I, O, C, E: ParserError<(I, usize)>>( pattern: O, count: C, ) -> impl Parser<(I, usize), O, E> where - I: Stream<Token = u8> + AsBytes + StreamIsPartial, + I: Stream<Token = u8> + AsBytes + StreamIsPartial + Clone, C: ToUsize, O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O> + PartialEq, { let count = count.to_usize(); - trace("tag", move |input: (I, usize)| { - let inp = input.clone(); + trace("tag", move |input: &mut (I, usize)| { + let start = input.checkpoint(); - take(count).parse_next(input).and_then(|(i, o)| { + take(count).parse_next(input).and_then(|o| { if pattern == o { - Ok((i, o)) + Ok(o) } else { - Err(ErrMode::Backtrack(E::from_error_kind(inp, ErrorKind::Tag))) + input.reset(start); + Err(ErrMode::Backtrack(E::from_error_kind( + input, + ErrorKind::Tag, + ))) } }) }) @@ -354,8 +336,8 @@ where /// ```rust /// # use winnow::prelude::*; /// # use winnow::Bytes; -/// # use winnow::error::{Error, ErrorKind}; -/// use winnow::bits::bool; +/// # use winnow::error::{InputError, ErrorKind}; +/// use winnow::binary::bits::bool; /// /// type Stream<'i> = &'i Bytes; /// @@ -364,20 +346,20 @@ where /// } /// /// fn parse(input: (Stream<'_>, usize)) -> IResult<(Stream<'_>, usize), bool> { -/// bool.parse_next(input) +/// bool.parse_peek(input) /// } /// /// assert_eq!(parse((stream(&[0b10000000]), 0)), Ok(((stream(&[0b10000000]), 1), true))); /// assert_eq!(parse((stream(&[0b10000000]), 1)), Ok(((stream(&[0b10000000]), 2), false))); /// ``` #[doc(alias = "any")] -pub fn bool<I, E: ParseError<(I, usize)>>(input: (I, usize)) -> IResult<(I, usize), bool, E> +pub fn bool<I, E: ParserError<(I, usize)>>(input: &mut (I, usize)) -> PResult<bool, E> where - I: Stream<Token = u8> + AsBytes + StreamIsPartial, + I: Stream<Token = u8> + AsBytes + StreamIsPartial + Clone, { - trace("bool", |input: (I, usize)| { - let (res, bit): (_, u32) = take(1usize).parse_next(input)?; - Ok((res, bit != 0)) + trace("bool", |input: &mut (I, usize)| { + let bit: u32 = take(1usize).parse_next(input)?; + Ok(bit != 0) }) .parse_next(input) } diff --git a/vendor/winnow/src/bits/tests.rs b/vendor/winnow/src/binary/bits/tests.rs similarity index 76% rename from vendor/winnow/src/bits/tests.rs rename to vendor/winnow/src/binary/bits/tests.rs index 61dba2c..41207c6 100644 --- a/vendor/winnow/src/bits/tests.rs +++ b/vendor/winnow/src/binary/bits/tests.rs @@ -1,5 +1,5 @@ use super::*; -use crate::error::Error; +use crate::error::InputError; use crate::Partial; #[test] @@ -10,8 +10,8 @@ fn test_complete_byte_consumption_bits() { // Take 3 bit slices with sizes [4, 8, 4]. let result: IResult<&[u8], (u8, u8, u8)> = - bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize), take(4usize))) - .parse_next(input); + bits::<_, _, InputError<(&[u8], usize)>, _, _>((take(4usize), take(8usize), take(4usize))) + .parse_peek(input); let output = result.expect("We take 2 bytes and the input is longer than 2 bytes"); @@ -34,7 +34,8 @@ fn test_partial_byte_consumption_bits() { // Take bit slices with sizes [4, 8]. let result: IResult<&[u8], (u8, u8)> = - bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize))).parse_next(input); + bits::<_, _, InputError<(&[u8], usize)>, _, _>((take(4usize), take(8usize))) + .parse_peek(input); let output = result.expect("We take 1.5 bytes and the input is longer than 2 bytes"); @@ -54,7 +55,7 @@ fn test_incomplete_bits() { // Take bit slices with sizes [4, 8]. let result: IResult<_, (u8, u8)> = - bits::<_, _, Error<(_, usize)>, _, _>((take(4usize), take(8usize))).parse_next(input); + bits::<_, _, InputError<(_, usize)>, _, _>((take(4usize), take(8usize))).parse_peek(input); assert!(result.is_err()); let error = result.err().unwrap(); @@ -68,7 +69,7 @@ fn test_take_complete_0() { assert_eq!(count, 0usize); let offset = 0usize; - let result: crate::IResult<(&[u8], usize), usize> = take(count).parse_next((input, offset)); + let result: crate::IResult<(&[u8], usize), usize> = take(count).parse_peek((input, offset)); assert_eq!(result, Ok(((input, offset), 0))); } @@ -77,14 +78,14 @@ fn test_take_complete_0() { fn test_take_complete_eof() { let input = &[0b00010010][..]; - let result: crate::IResult<(&[u8], usize), usize> = take(1usize).parse_next((input, 8)); + let result: crate::IResult<(&[u8], usize), usize> = take(1usize).parse_peek((input, 8)); assert_eq!( result, - Err(crate::error::ErrMode::Backtrack(crate::error::Error { - input: (input, 8), - kind: ErrorKind::Eof - })) + Err(crate::error::ErrMode::Backtrack(InputError::new( + (input, 8), + ErrorKind::Eof + ))) ); } @@ -92,7 +93,7 @@ fn test_take_complete_eof() { fn test_take_complete_span_over_multiple_bytes() { let input = &[0b00010010, 0b00110100, 0b11111111, 0b11111111][..]; - let result: crate::IResult<(&[u8], usize), usize> = take(24usize).parse_next((input, 4)); + let result: crate::IResult<(&[u8], usize), usize> = take(24usize).parse_peek((input, 4)); assert_eq!( result, @@ -107,7 +108,7 @@ fn test_take_partial_0() { assert_eq!(count, 0usize); let offset = 0usize; - let result: crate::IResult<(_, usize), usize> = take(count).parse_next((input, offset)); + let result: crate::IResult<(_, usize), usize> = take(count).parse_peek((input, offset)); assert_eq!(result, Ok(((input, offset), 0))); } @@ -120,7 +121,7 @@ fn test_tag_partial_ok() { let value_to_tag = 0b0001; let result: crate::IResult<(_, usize), usize> = - tag(value_to_tag, bits_to_take).parse_next((input, offset)); + tag(value_to_tag, bits_to_take).parse_peek((input, offset)); assert_eq!(result, Ok(((input, bits_to_take), value_to_tag))); } @@ -133,14 +134,14 @@ fn test_tag_partial_err() { let value_to_tag = 0b1111; let result: crate::IResult<(_, usize), usize> = - tag(value_to_tag, bits_to_take).parse_next((input, offset)); + tag(value_to_tag, bits_to_take).parse_peek((input, offset)); assert_eq!( result, - Err(crate::error::ErrMode::Backtrack(crate::error::Error { - input: (input, offset), - kind: ErrorKind::Tag - })) + Err(crate::error::ErrMode::Backtrack(InputError::new( + (input, offset), + ErrorKind::Tag + ))) ); } @@ -148,7 +149,7 @@ fn test_tag_partial_err() { fn test_bool_0_complete() { let input = [0b10000000].as_ref(); - let result: crate::IResult<(&[u8], usize), bool> = bool((input, 0)); + let result: crate::IResult<(&[u8], usize), bool> = bool.parse_peek((input, 0)); assert_eq!(result, Ok(((input, 1), true))); } @@ -157,14 +158,14 @@ fn test_bool_0_complete() { fn test_bool_eof_complete() { let input = [0b10000000].as_ref(); - let result: crate::IResult<(&[u8], usize), bool> = bool((input, 8)); + let result: crate::IResult<(&[u8], usize), bool> = bool.parse_peek((input, 8)); assert_eq!( result, - Err(crate::error::ErrMode::Backtrack(crate::error::Error { - input: (input, 8), - kind: ErrorKind::Eof - })) + Err(crate::error::ErrMode::Backtrack(InputError::new( + (input, 8), + ErrorKind::Eof + ))) ); } @@ -172,7 +173,7 @@ fn test_bool_eof_complete() { fn test_bool_0_partial() { let input = Partial::new([0b10000000].as_ref()); - let result: crate::IResult<(Partial<&[u8]>, usize), bool> = bool((input, 0)); + let result: crate::IResult<(Partial<&[u8]>, usize), bool> = bool.parse_peek((input, 0)); assert_eq!(result, Ok(((input, 1), true))); } @@ -181,7 +182,7 @@ fn test_bool_0_partial() { fn test_bool_eof_partial() { let input = Partial::new([0b10000000].as_ref()); - let result: crate::IResult<(Partial<&[u8]>, usize), bool> = bool.parse_next((input, 8)); + let result: crate::IResult<(Partial<&[u8]>, usize), bool> = bool.parse_peek((input, 8)); assert_eq!( result, diff --git a/vendor/winnow/src/number/mod.rs b/vendor/winnow/src/binary/mod.rs similarity index 58% rename from vendor/winnow/src/number/mod.rs rename to vendor/winnow/src/binary/mod.rs index 1554ac7..a80ae6c 100644 --- a/vendor/winnow/src/number/mod.rs +++ b/vendor/winnow/src/binary/mod.rs @@ -2,18 +2,23 @@ #![allow(clippy::match_same_arms)] +pub mod bits; + #[cfg(test)] mod tests; -use crate::bytes::take; +use crate::combinator::repeat; use crate::error::ErrMode; use crate::error::ErrorKind; use crate::error::Needed; -use crate::error::ParseError; +use crate::error::ParserError; use crate::lib::std::ops::{Add, Shl}; +use crate::stream::Accumulate; use crate::stream::{AsBytes, Stream, StreamIsPartial}; +use crate::stream::{ToUsize, UpdateSlice}; +use crate::token::take; use crate::trace::trace; -use crate::IResult; +use crate::PResult; use crate::Parser; /// Configurable endianness @@ -36,34 +41,34 @@ pub enum Endianness { /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_u8; +/// use winnow::binary::be_u8; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u8> { -/// be_u8.parse_next(s) +/// be_u8.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&[][..], ErrorKind::Token)))); +/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&[][..], ErrorKind::Token)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_u8; +/// use winnow::binary::be_u8; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u8> { -/// be_u8.parse_next(s) +/// be_u8.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"\x01abcd"[..]), 0x00))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn be_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +pub fn be_u8<I, E: ParserError<I>>(input: &mut I) -> PResult<u8, E> where I: StreamIsPartial, I: Stream<Token = u8>, @@ -80,40 +85,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_u16; +/// use winnow::binary::be_u16; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u16> { -/// be_u16.parse_next(s) +/// be_u16.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_u16; +/// use winnow::binary::be_u16; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { -/// be_u16.parse_next(s) +/// be_u16.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0001))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn be_u16<I, E: ParseError<I>>(input: I) -> IResult<I, u16, E> +pub fn be_u16<I, E: ParserError<I>>(input: &mut I) -> PResult<u16, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_u16", move |input: I| be_uint(input, 2)).parse_next(input) + trace("be_u16", move |input: &mut I| be_uint(input, 2)).parse_next(input) } /// Recognizes a big endian unsigned 3 byte integer. @@ -125,40 +130,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_u24; +/// use winnow::binary::be_u24; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u32> { -/// be_u24.parse_next(s) +/// be_u24.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_u24; +/// use winnow::binary::be_u24; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { -/// be_u24.parse_next(s) +/// be_u24.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x000102))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` #[inline(always)] -pub fn be_u24<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +pub fn be_u24<I, E: ParserError<I>>(input: &mut I) -> PResult<u32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_u23", move |input: I| be_uint(input, 3)).parse_next(input) + trace("be_u23", move |input: &mut I| be_uint(input, 3)).parse_next(input) } /// Recognizes a big endian unsigned 4 bytes integer. @@ -170,40 +175,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_u32; +/// use winnow::binary::be_u32; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u32> { -/// be_u32.parse_next(s) +/// be_u32.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_u32; +/// use winnow::binary::be_u32; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { -/// be_u32.parse_next(s) +/// be_u32.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x00010203))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn be_u32<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +pub fn be_u32<I, E: ParserError<I>>(input: &mut I) -> PResult<u32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_u32", move |input: I| be_uint(input, 4)).parse_next(input) + trace("be_u32", move |input: &mut I| be_uint(input, 4)).parse_next(input) } /// Recognizes a big endian unsigned 8 bytes integer. @@ -215,40 +220,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_u64; +/// use winnow::binary::be_u64; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u64> { -/// be_u64.parse_next(s) +/// be_u64.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_u64; +/// use winnow::binary::be_u64; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u64> { -/// be_u64.parse_next(s) +/// be_u64.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0001020304050607))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn be_u64<I, E: ParseError<I>>(input: I) -> IResult<I, u64, E> +pub fn be_u64<I, E: ParserError<I>>(input: &mut I) -> PResult<u64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_u64", move |input: I| be_uint(input, 8)).parse_next(input) + trace("be_u64", move |input: &mut I| be_uint(input, 8)).parse_next(input) } /// Recognizes a big endian unsigned 16 bytes integer. @@ -260,44 +265,44 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_u128; +/// use winnow::binary::be_u128; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u128> { -/// be_u128.parse_next(s) +/// be_u128.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_u128; +/// use winnow::binary::be_u128; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u128> { -/// be_u128.parse_next(s) +/// be_u128.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x00010203040506070809101112131415))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// ``` #[inline(always)] -pub fn be_u128<I, E: ParseError<I>>(input: I) -> IResult<I, u128, E> +pub fn be_u128<I, E: ParserError<I>>(input: &mut I) -> PResult<u128, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_u128", move |input: I| be_uint(input, 16)).parse_next(input) + trace("be_u128", move |input: &mut I| be_uint(input, 16)).parse_next(input) } #[inline] -fn be_uint<I, Uint, E: ParseError<I>>(input: I, bound: usize) -> IResult<I, Uint, E> +fn be_uint<I, Uint, E: ParserError<I>>(input: &mut I, bound: usize) -> PResult<Uint, E> where I: StreamIsPartial, I: Stream<Token = u8>, @@ -332,34 +337,34 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_i8; +/// use winnow::binary::be_i8; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i8> { -/// be_i8.parse_next(s) +/// be_i8.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&[][..], ErrorKind::Token)))); +/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&[][..], ErrorKind::Token)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_i8; +/// use winnow::binary::be_i8; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i8> { -/// be_i8.parse_next(s) +/// be_i8.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"\x01abcd"[..]), 0x00))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn be_i8<I, E: ParseError<I>>(input: I) -> IResult<I, i8, E> +pub fn be_i8<I, E: ParserError<I>>(input: &mut I) -> PResult<i8, E> where I: StreamIsPartial, I: Stream<Token = u8>, @@ -376,41 +381,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_i16; +/// use winnow::binary::be_i16; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i16> { -/// be_i16.parse_next(s) +/// be_i16.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_i16; +/// use winnow::binary::be_i16; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i16> { -/// be_i16.parse_next(s) +/// be_i16.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0001))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` #[inline(always)] -pub fn be_i16<I, E: ParseError<I>>(input: I) -> IResult<I, i16, E> +pub fn be_i16<I, E: ParserError<I>>(input: &mut I) -> PResult<i16, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_i16", move |input: I| { - be_uint::<_, u16, _>(input, 2).map(|(i, n)| (i, n as i16)) + trace("be_i16", move |input: &mut I| { + be_uint::<_, u16, _>(input, 2).map(|n| n as i16) }) .parse_next(input) } @@ -424,48 +429,48 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_i24; +/// use winnow::binary::be_i24; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i32> { -/// be_i24.parse_next(s) +/// be_i24.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_i24; +/// use winnow::binary::be_i24; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i32> { -/// be_i24.parse_next(s) +/// be_i24.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x000102))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn be_i24<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +pub fn be_i24<I, E: ParserError<I>>(input: &mut I) -> PResult<i32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_i24", move |input: I| { - be_uint::<_, u32, _>(input, 3).map(|(i, n)| { + trace("be_i24", move |input: &mut I| { + be_uint::<_, u32, _>(input, 3).map(|n| { // Same as the unsigned version but we need to sign-extend manually here let n = if n & 0x80_00_00 != 0 { (n | 0xff_00_00_00) as i32 } else { n as i32 }; - (i, n) + n }) }) .parse_next(input) @@ -480,41 +485,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_i32; +/// use winnow::binary::be_i32; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i32> { -/// be_i32.parse_next(s) +/// be_i32.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_i32; +/// use winnow::binary::be_i32; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i32> { -/// be_i32.parse_next(s) +/// be_i32.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x00010203))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(4)))); /// ``` #[inline(always)] -pub fn be_i32<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +pub fn be_i32<I, E: ParserError<I>>(input: &mut I) -> PResult<i32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_i32", move |input: I| { - be_uint::<_, u32, _>(input, 4).map(|(i, n)| (i, n as i32)) + trace("be_i32", move |input: &mut I| { + be_uint::<_, u32, _>(input, 4).map(|n| n as i32) }) .parse_next(input) } @@ -528,41 +533,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_i64; +/// use winnow::binary::be_i64; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i64> { -/// be_i64.parse_next(s) +/// be_i64.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_i64; +/// use winnow::binary::be_i64; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i64> { -/// be_i64.parse_next(s) +/// be_i64.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0001020304050607))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn be_i64<I, E: ParseError<I>>(input: I) -> IResult<I, i64, E> +pub fn be_i64<I, E: ParserError<I>>(input: &mut I) -> PResult<i64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_i64", move |input: I| { - be_uint::<_, u64, _>(input, 8).map(|(i, n)| (i, n as i64)) + trace("be_i64", move |input: &mut I| { + be_uint::<_, u64, _>(input, 8).map(|n| n as i64) }) .parse_next(input) } @@ -576,41 +581,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_i128; +/// use winnow::binary::be_i128; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i128> { -/// be_i128.parse_next(s) +/// be_i128.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_i128; +/// use winnow::binary::be_i128; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i128> { -/// be_i128.parse_next(s) +/// be_i128.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x00010203040506070809101112131415))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// ``` #[inline(always)] -pub fn be_i128<I, E: ParseError<I>>(input: I) -> IResult<I, i128, E> +pub fn be_i128<I, E: ParserError<I>>(input: &mut I) -> PResult<i128, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_i128", move |input: I| { - be_uint::<_, u128, _>(input, 16).map(|(i, n)| (i, n as i128)) + trace("be_i128", move |input: &mut I| { + be_uint::<_, u128, _>(input, 16).map(|n| n as i128) }) .parse_next(input) } @@ -624,34 +629,34 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_u8; +/// use winnow::binary::le_u8; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u8> { -/// le_u8.parse_next(s) +/// le_u8.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&[][..], ErrorKind::Token)))); +/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&[][..], ErrorKind::Token)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_u8; +/// use winnow::binary::le_u8; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u8> { -/// le_u8.parse_next(s) +/// le_u8.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"\x01abcd"[..]), 0x00))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn le_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +pub fn le_u8<I, E: ParserError<I>>(input: &mut I) -> PResult<u8, E> where I: StreamIsPartial, I: Stream<Token = u8>, @@ -668,40 +673,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_u16; +/// use winnow::binary::le_u16; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u16> { -/// le_u16.parse_next(s) +/// le_u16.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_u16; +/// use winnow::binary::le_u16; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { -/// le_u16::<_, Error<_>>.parse_next(s) +/// le_u16::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn le_u16<I, E: ParseError<I>>(input: I) -> IResult<I, u16, E> +pub fn le_u16<I, E: ParserError<I>>(input: &mut I) -> PResult<u16, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_u16", move |input: I| le_uint(input, 2)).parse_next(input) + trace("le_u16", move |input: &mut I| le_uint(input, 2)).parse_next(input) } /// Recognizes a little endian unsigned 3 byte integer. @@ -713,40 +718,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_u24; +/// use winnow::binary::le_u24; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u32> { -/// le_u24.parse_next(s) +/// le_u24.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_u24; +/// use winnow::binary::le_u24; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { -/// le_u24::<_, Error<_>>.parse_next(s) +/// le_u24::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` #[inline(always)] -pub fn le_u24<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +pub fn le_u24<I, E: ParserError<I>>(input: &mut I) -> PResult<u32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_u24", move |input: I| le_uint(input, 3)).parse_next(input) + trace("le_u24", move |input: &mut I| le_uint(input, 3)).parse_next(input) } /// Recognizes a little endian unsigned 4 bytes integer. @@ -758,40 +763,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_u32; +/// use winnow::binary::le_u32; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u32> { -/// le_u32.parse_next(s) +/// le_u32.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_u32; +/// use winnow::binary::le_u32; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { -/// le_u32::<_, Error<_>>.parse_next(s) +/// le_u32::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x03020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn le_u32<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +pub fn le_u32<I, E: ParserError<I>>(input: &mut I) -> PResult<u32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_u32", move |input: I| le_uint(input, 4)).parse_next(input) + trace("le_u32", move |input: &mut I| le_uint(input, 4)).parse_next(input) } /// Recognizes a little endian unsigned 8 bytes integer. @@ -803,40 +808,40 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_u64; +/// use winnow::binary::le_u64; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u64> { -/// le_u64.parse_next(s) +/// le_u64.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_u64; +/// use winnow::binary::le_u64; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u64> { -/// le_u64::<_, Error<_>>.parse_next(s) +/// le_u64::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0706050403020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn le_u64<I, E: ParseError<I>>(input: I) -> IResult<I, u64, E> +pub fn le_u64<I, E: ParserError<I>>(input: &mut I) -> PResult<u64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_u64", move |input: I| le_uint(input, 8)).parse_next(input) + trace("le_u64", move |input: &mut I| le_uint(input, 8)).parse_next(input) } /// Recognizes a little endian unsigned 16 bytes integer. @@ -848,44 +853,44 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_u128; +/// use winnow::binary::le_u128; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u128> { -/// le_u128.parse_next(s) +/// le_u128.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_u128; +/// use winnow::binary::le_u128; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u128> { -/// le_u128::<_, Error<_>>.parse_next(s) +/// le_u128::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x15141312111009080706050403020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// ``` #[inline(always)] -pub fn le_u128<I, E: ParseError<I>>(input: I) -> IResult<I, u128, E> +pub fn le_u128<I, E: ParserError<I>>(input: &mut I) -> PResult<u128, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_u128", move |input: I| le_uint(input, 16)).parse_next(input) + trace("le_u128", move |input: &mut I| le_uint(input, 16)).parse_next(input) } #[inline] -fn le_uint<I, Uint, E: ParseError<I>>(input: I, bound: usize) -> IResult<I, Uint, E> +fn le_uint<I, Uint, E: ParserError<I>>(input: &mut I, bound: usize) -> PResult<Uint, E> where I: StreamIsPartial, I: Stream<Token = u8>, @@ -919,34 +924,34 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_i8; +/// use winnow::binary::le_i8; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i8> { -/// le_i8.parse_next(s) +/// le_i8.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&[][..], ErrorKind::Token)))); +/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&[][..], ErrorKind::Token)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_i8; +/// use winnow::binary::le_i8; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i8> { -/// le_i8.parse_next(s) +/// le_i8.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"\x01abcd"[..]), 0x00))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn le_i8<I, E: ParseError<I>>(input: I) -> IResult<I, i8, E> +pub fn le_i8<I, E: ParserError<I>>(input: &mut I) -> PResult<i8, E> where I: StreamIsPartial, I: Stream<Token = u8>, @@ -963,41 +968,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_i16; +/// use winnow::binary::le_i16; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i16> { -/// le_i16.parse_next(s) +/// le_i16.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_i16; +/// use winnow::binary::le_i16; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i16> { -/// le_i16::<_, Error<_>>.parse_next(s) +/// le_i16::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn le_i16<I, E: ParseError<I>>(input: I) -> IResult<I, i16, E> +pub fn le_i16<I, E: ParserError<I>>(input: &mut I) -> PResult<i16, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_i16", move |input: I| { - le_uint::<_, u16, _>(input, 2).map(|(i, n)| (i, n as i16)) + trace("le_i16", move |input: &mut I| { + le_uint::<_, u16, _>(input, 2).map(|n| n as i16) }) .parse_next(input) } @@ -1011,48 +1016,48 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_i24; +/// use winnow::binary::le_i24; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i32> { -/// le_i24.parse_next(s) +/// le_i24.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_i24; +/// use winnow::binary::le_i24; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i32> { -/// le_i24::<_, Error<_>>.parse_next(s) +/// le_i24::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` #[inline(always)] -pub fn le_i24<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +pub fn le_i24<I, E: ParserError<I>>(input: &mut I) -> PResult<i32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_i24", move |input: I| { - le_uint::<_, u32, _>(input, 3).map(|(i, n)| { + trace("le_i24", move |input: &mut I| { + le_uint::<_, u32, _>(input, 3).map(|n| { // Same as the unsigned version but we need to sign-extend manually here let n = if n & 0x80_00_00 != 0 { (n | 0xff_00_00_00) as i32 } else { n as i32 }; - (i, n) + n }) }) .parse_next(input) @@ -1067,41 +1072,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_i32; +/// use winnow::binary::le_i32; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i32> { -/// le_i32.parse_next(s) +/// le_i32.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_i32; +/// use winnow::binary::le_i32; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i32> { -/// le_i32::<_, Error<_>>.parse_next(s) +/// le_i32::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x03020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn le_i32<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +pub fn le_i32<I, E: ParserError<I>>(input: &mut I) -> PResult<i32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_i32", move |input: I| { - le_uint::<_, u32, _>(input, 4).map(|(i, n)| (i, n as i32)) + trace("le_i32", move |input: &mut I| { + le_uint::<_, u32, _>(input, 4).map(|n| n as i32) }) .parse_next(input) } @@ -1115,41 +1120,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_i64; +/// use winnow::binary::le_i64; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i64> { -/// le_i64.parse_next(s) +/// le_i64.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_i64; +/// use winnow::binary::le_i64; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i64> { -/// le_i64::<_, Error<_>>.parse_next(s) +/// le_i64::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x0706050403020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn le_i64<I, E: ParseError<I>>(input: I) -> IResult<I, i64, E> +pub fn le_i64<I, E: ParserError<I>>(input: &mut I) -> PResult<i64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_i64", move |input: I| { - le_uint::<_, u64, _>(input, 8).map(|(i, n)| (i, n as i64)) + trace("le_i64", move |input: &mut I| { + le_uint::<_, u64, _>(input, 8).map(|n| n as i64) }) .parse_next(input) } @@ -1163,41 +1168,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_i128; +/// use winnow::binary::le_i128; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i128> { -/// le_i128.parse_next(s) +/// le_i128.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); -/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_i128; +/// use winnow::binary::le_i128; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i128> { -/// le_i128::<_, Error<_>>.parse_next(s) +/// le_i128::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..])), Ok((Partial::new(&b"abcd"[..]), 0x15141312111009080706050403020100))); /// assert_eq!(parser(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// ``` #[inline(always)] -pub fn le_i128<I, E: ParseError<I>>(input: I) -> IResult<I, i128, E> +pub fn le_i128<I, E: ParserError<I>>(input: &mut I) -> PResult<i128, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_i128", move |input: I| { - le_uint::<_, u128, _>(input, 16).map(|(i, n)| (i, n as i128)) + trace("le_i128", move |input: &mut I| { + le_uint::<_, u128, _>(input, 16).map(|n| n as i128) }) .parse_next(input) } @@ -1213,72 +1218,67 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::u8; +/// use winnow::binary::u8; /// /// fn parser(s: &[u8]) -> IResult<&[u8], u8> { -/// u8.parse_next(s) +/// u8.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&[][..], ErrorKind::Token)))); +/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&[][..], ErrorKind::Token)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::u8; +/// use winnow::binary::u8; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u8> { -/// u8::<_, Error<_>>.parse_next(s) +/// u8::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x03abcefg"[..])), Ok((Partial::new(&b"\x03abcefg"[..]), 0x00))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +pub fn u8<I, E: ParserError<I>>(input: &mut I) -> PResult<u8, E> where I: StreamIsPartial, I: Stream<Token = u8>, { - trace("u8", move |input: I| { - if input.is_partial() { - streaming_u8(input) + trace("u8", move |input: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() { + u8_::<_, _, true>(input) } else { - complete_u8(input) + u8_::<_, _, false>(input) } }) .parse_next(input) } -#[inline] -pub(crate) fn streaming_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> -where - I: Stream<Token = u8>, -{ - input - .next_token() - .ok_or_else(|| ErrMode::Incomplete(Needed::new(1))) -} - -pub(crate) fn complete_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +fn u8_<I, E: ParserError<I>, const PARTIAL: bool>(input: &mut I) -> PResult<u8, E> where + I: StreamIsPartial, I: Stream<Token = u8>, { - input - .next_token() - .ok_or_else(|| ErrMode::Backtrack(E::from_error_kind(input, ErrorKind::Token))) + input.next_token().ok_or_else(|| { + if PARTIAL && input.is_partial() { + ErrMode::Incomplete(Needed::new(1)) + } else { + ErrMode::Backtrack(E::from_error_kind(input, ErrorKind::Token)) + } + }) } /// Recognizes an unsigned 2 bytes integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian u16 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian u16 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian u16 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian u16 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1287,70 +1287,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::u16; +/// use winnow::binary::u16; /// /// let be_u16 = |s| { -/// u16(winnow::number::Endianness::Big).parse_next(s) +/// u16(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); -/// assert_eq!(be_u16(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_u16(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_u16 = |s| { -/// u16(winnow::number::Endianness::Little).parse_next(s) +/// u16(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); -/// assert_eq!(le_u16(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_u16(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::u16; +/// use winnow::binary::u16; /// /// let be_u16 = |s| { -/// u16::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// u16::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u16(Partial::new(&b"\x00\x03abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0003))); /// assert_eq!(be_u16(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// /// let le_u16 = |s| { -/// u16::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// u16::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u16(Partial::new(&b"\x00\x03abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0300))); /// assert_eq!(le_u16(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn u16<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, u16, E> +pub fn u16<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, u16, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_u16, - crate::number::Endianness::Little => le_u16, + Endianness::Big => be_u16, + Endianness::Little => le_u16, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_u16, + Endianness::Native => be_u16, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_u16, + Endianness::Native => le_u16, } }(input) } /// Recognizes an unsigned 3 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian u24 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian u24 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian u24 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian u24 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1359,70 +1359,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::u24; +/// use winnow::binary::u24; /// /// let be_u24 = |s| { -/// u24(winnow::number::Endianness::Big).parse_next(s) +/// u24(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); -/// assert_eq!(be_u24(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_u24(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_u24 = |s| { -/// u24(winnow::number::Endianness::Little).parse_next(s) +/// u24(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); -/// assert_eq!(le_u24(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_u24(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::u24; +/// use winnow::binary::u24; /// /// let be_u24 = |s| { -/// u24::<_,Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// u24::<_,InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u24(Partial::new(&b"\x00\x03\x05abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x000305))); /// assert_eq!(be_u24(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// /// let le_u24 = |s| { -/// u24::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// u24::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u24(Partial::new(&b"\x00\x03\x05abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x050300))); /// assert_eq!(le_u24(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` #[inline(always)] -pub fn u24<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, u32, E> +pub fn u24<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, u32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_u24, - crate::number::Endianness::Little => le_u24, + Endianness::Big => be_u24, + Endianness::Little => le_u24, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_u24, + Endianness::Native => be_u24, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_u24, + Endianness::Native => le_u24, } }(input) } /// Recognizes an unsigned 4 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian u32 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian u32 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian u32 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian u32 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1431,70 +1431,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::u32; +/// use winnow::binary::u32; /// /// let be_u32 = |s| { -/// u32(winnow::number::Endianness::Big).parse_next(s) +/// u32(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); -/// assert_eq!(be_u32(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_u32(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_u32 = |s| { -/// u32(winnow::number::Endianness::Little).parse_next(s) +/// u32(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); -/// assert_eq!(le_u32(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_u32(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::u32; +/// use winnow::binary::u32; /// /// let be_u32 = |s| { -/// u32::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// u32::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u32(Partial::new(&b"\x00\x03\x05\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x00030507))); /// assert_eq!(be_u32(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// /// let le_u32 = |s| { -/// u32::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// u32::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u32(Partial::new(&b"\x00\x03\x05\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x07050300))); /// assert_eq!(le_u32(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn u32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, u32, E> +pub fn u32<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, u32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_u32, - crate::number::Endianness::Little => le_u32, + Endianness::Big => be_u32, + Endianness::Little => le_u32, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_u32, + Endianness::Native => be_u32, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_u32, + Endianness::Native => le_u32, } }(input) } /// Recognizes an unsigned 8 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian u64 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian u64 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian u64 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian u64 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1503,70 +1503,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::u64; +/// use winnow::binary::u64; /// /// let be_u64 = |s| { -/// u64(winnow::number::Endianness::Big).parse_next(s) +/// u64(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); -/// assert_eq!(be_u64(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_u64(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_u64 = |s| { -/// u64(winnow::number::Endianness::Little).parse_next(s) +/// u64(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); -/// assert_eq!(le_u64(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_u64(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::u64; +/// use winnow::binary::u64; /// /// let be_u64 = |s| { -/// u64::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// u64::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u64(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0001020304050607))); /// assert_eq!(be_u64(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// /// let le_u64 = |s| { -/// u64::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// u64::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u64(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0706050403020100))); /// assert_eq!(le_u64(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn u64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, u64, E> +pub fn u64<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, u64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_u64, - crate::number::Endianness::Little => le_u64, + Endianness::Big => be_u64, + Endianness::Little => le_u64, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_u64, + Endianness::Native => be_u64, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_u64, + Endianness::Native => le_u64, } }(input) } /// Recognizes an unsigned 16 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian u128 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian u128 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian u128 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian u128 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1575,62 +1575,62 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::u128; +/// use winnow::binary::u128; /// /// let be_u128 = |s| { -/// u128(winnow::number::Endianness::Big).parse_next(s) +/// u128(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); -/// assert_eq!(be_u128(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_u128(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_u128 = |s| { -/// u128(winnow::number::Endianness::Little).parse_next(s) +/// u128(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); -/// assert_eq!(le_u128(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_u128(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::u128; +/// use winnow::binary::u128; /// /// let be_u128 = |s| { -/// u128::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// u128::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_u128(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x00010203040506070001020304050607))); /// assert_eq!(be_u128(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// /// let le_u128 = |s| { -/// u128::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// u128::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_u128(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x07060504030201000706050403020100))); /// assert_eq!(le_u128(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// ``` #[inline(always)] -pub fn u128<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, u128, E> +pub fn u128<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, u128, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_u128, - crate::number::Endianness::Little => le_u128, + Endianness::Big => be_u128, + Endianness::Little => le_u128, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_u128, + Endianness::Native => be_u128, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_u128, + Endianness::Native => le_u128, } }(input) } @@ -1646,54 +1646,54 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::i8; +/// use winnow::binary::i8; /// /// fn parser(s: &[u8]) -> IResult<&[u8], i8> { -/// i8.parse_next(s) +/// i8.parse_peek(s) /// } /// /// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&[][..], ErrorKind::Token)))); +/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&[][..], ErrorKind::Token)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::i8; +/// use winnow::binary::i8; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i8> { -/// i8.parse_next(s) +/// i8.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&b"\x00\x03abcefg"[..])), Ok((Partial::new(&b"\x03abcefg"[..]), 0x00))); /// assert_eq!(parser(Partial::new(&b""[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn i8<I, E: ParseError<I>>(input: I) -> IResult<I, i8, E> +pub fn i8<I, E: ParserError<I>>(input: &mut I) -> PResult<i8, E> where I: StreamIsPartial, I: Stream<Token = u8>, { - trace("i8", move |input: I| { - if input.is_partial() { - streaming_u8(input) + trace("i8", move |input: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() { + u8_::<_, _, true>(input) } else { - complete_u8(input) + u8_::<_, _, false>(input) } - .map(|(i, n)| (i, n as i8)) + .map(|n| n as i8) }) .parse_next(input) } /// Recognizes a signed 2 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian i16 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian i16 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian i16 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian i16 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1702,70 +1702,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::i16; +/// use winnow::binary::i16; /// /// let be_i16 = |s| { -/// i16(winnow::number::Endianness::Big).parse_next(s) +/// i16(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); -/// assert_eq!(be_i16(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_i16(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_i16 = |s| { -/// i16(winnow::number::Endianness::Little).parse_next(s) +/// i16(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); -/// assert_eq!(le_i16(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_i16(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::i16; +/// use winnow::binary::i16; /// /// let be_i16 = |s| { -/// i16::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// i16::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i16(Partial::new(&b"\x00\x03abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0003))); /// assert_eq!(be_i16(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// /// let le_i16 = |s| { -/// i16::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// i16::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i16(Partial::new(&b"\x00\x03abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0300))); /// assert_eq!(le_i16(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn i16<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, i16, E> +pub fn i16<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, i16, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_i16, - crate::number::Endianness::Little => le_i16, + Endianness::Big => be_i16, + Endianness::Little => le_i16, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_i16, + Endianness::Native => be_i16, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_i16, + Endianness::Native => le_i16, } }(input) } /// Recognizes a signed 3 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian i24 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian i24 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian i24 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian i24 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1774,70 +1774,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::i24; +/// use winnow::binary::i24; /// /// let be_i24 = |s| { -/// i24(winnow::number::Endianness::Big).parse_next(s) +/// i24(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); -/// assert_eq!(be_i24(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_i24(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_i24 = |s| { -/// i24(winnow::number::Endianness::Little).parse_next(s) +/// i24(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); -/// assert_eq!(le_i24(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_i24(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::i24; +/// use winnow::binary::i24; /// /// let be_i24 = |s| { -/// i24::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// i24::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i24(Partial::new(&b"\x00\x03\x05abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x000305))); /// assert_eq!(be_i24(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// /// let le_i24 = |s| { -/// i24::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// i24::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i24(Partial::new(&b"\x00\x03\x05abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x050300))); /// assert_eq!(le_i24(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` #[inline(always)] -pub fn i24<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, i32, E> +pub fn i24<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, i32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_i24, - crate::number::Endianness::Little => le_i24, + Endianness::Big => be_i24, + Endianness::Little => le_i24, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_i24, + Endianness::Native => be_i24, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_i24, + Endianness::Native => le_i24, } }(input) } /// Recognizes a signed 4 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian i32 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian i32 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian i32 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian i32 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1846,70 +1846,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::i32; +/// use winnow::binary::i32; /// /// let be_i32 = |s| { -/// i32(winnow::number::Endianness::Big).parse_next(s) +/// i32(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); -/// assert_eq!(be_i32(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_i32(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_i32 = |s| { -/// i32(winnow::number::Endianness::Little).parse_next(s) +/// i32(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); -/// assert_eq!(le_i32(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_i32(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::i32; +/// use winnow::binary::i32; /// /// let be_i32 = |s| { -/// i32::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// i32::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i32(Partial::new(&b"\x00\x03\x05\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x00030507))); /// assert_eq!(be_i32(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// /// let le_i32 = |s| { -/// i32::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// i32::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i32(Partial::new(&b"\x00\x03\x05\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x07050300))); /// assert_eq!(le_i32(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn i32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, i32, E> +pub fn i32<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, i32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_i32, - crate::number::Endianness::Little => le_i32, + Endianness::Big => be_i32, + Endianness::Little => le_i32, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_i32, + Endianness::Native => be_i32, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_i32, + Endianness::Native => le_i32, } }(input) } /// Recognizes a signed 8 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian i64 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian i64 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian i64 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian i64 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1918,70 +1918,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::i64; +/// use winnow::binary::i64; /// /// let be_i64 = |s| { -/// i64(winnow::number::Endianness::Big).parse_next(s) +/// i64(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); -/// assert_eq!(be_i64(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_i64(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_i64 = |s| { -/// i64(winnow::number::Endianness::Little).parse_next(s) +/// i64(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); -/// assert_eq!(le_i64(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_i64(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::i64; +/// use winnow::binary::i64; /// /// let be_i64 = |s| { -/// i64::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// i64::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i64(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0001020304050607))); /// assert_eq!(be_i64(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// /// let le_i64 = |s| { -/// i64::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// i64::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i64(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x0706050403020100))); /// assert_eq!(le_i64(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn i64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, i64, E> +pub fn i64<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, i64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_i64, - crate::number::Endianness::Little => le_i64, + Endianness::Big => be_i64, + Endianness::Little => le_i64, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_i64, + Endianness::Native => be_i64, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_i64, + Endianness::Native => le_i64, } }(input) } /// Recognizes a signed 16 byte integer /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian i128 integer, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian i128 integer. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian i128 integer, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian i128 integer. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -1990,62 +1990,62 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::i128; +/// use winnow::binary::i128; /// /// let be_i128 = |s| { -/// i128(winnow::number::Endianness::Big).parse_next(s) +/// i128(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); -/// assert_eq!(be_i128(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(be_i128(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// /// let le_i128 = |s| { -/// i128(winnow::number::Endianness::Little).parse_next(s) +/// i128(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); -/// assert_eq!(le_i128(&b"\x01"[..]), Err(ErrMode::Backtrack(Error::new(&[0x01][..], ErrorKind::Slice)))); +/// assert_eq!(le_i128(&b"\x01"[..]), Err(ErrMode::Backtrack(InputError::new(&[0x01][..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::i128; +/// use winnow::binary::i128; /// /// let be_i128 = |s| { -/// i128::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// i128::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_i128(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x00010203040506070001020304050607))); /// assert_eq!(be_i128(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// /// let le_i128 = |s| { -/// i128::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// i128::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_i128(Partial::new(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..])), Ok((Partial::new(&b"abcefg"[..]), 0x07060504030201000706050403020100))); /// assert_eq!(le_i128(Partial::new(&b"\x01"[..])), Err(ErrMode::Incomplete(Needed::new(15)))); /// ``` #[inline(always)] -pub fn i128<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, i128, E> +pub fn i128<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, i128, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_i128, - crate::number::Endianness::Little => le_i128, + Endianness::Big => be_i128, + Endianness::Little => le_i128, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_i128, + Endianness::Native => be_i128, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_i128, + Endianness::Native => le_i128, } }(input) } @@ -2059,42 +2059,42 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_f32; +/// use winnow::binary::be_f32; /// /// fn parser(s: &[u8]) -> IResult<&[u8], f32> { -/// be_f32.parse_next(s) +/// be_f32.parse_peek(s) /// } /// /// assert_eq!(parser(&[0x41, 0x48, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_f32; +/// use winnow::binary::be_f32; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, f32> { -/// be_f32.parse_next(s) +/// be_f32.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&[0x40, 0x29, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 2.640625))); /// assert_eq!(parser(Partial::new(&[0x01][..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn be_f32<I, E: ParseError<I>>(input: I) -> IResult<I, f32, E> +pub fn be_f32<I, E: ParserError<I>>(input: &mut I) -> PResult<f32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_f32", move |input: I| { - be_uint::<_, u32, _>(input, 4).map(|(i, n)| (i, f32::from_bits(n))) + trace("be_f32", move |input: &mut I| { + be_uint::<_, u32, _>(input, 4).map(f32::from_bits) }) .parse_next(input) } @@ -2108,41 +2108,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::be_f64; +/// use winnow::binary::be_f64; /// /// fn parser(s: &[u8]) -> IResult<&[u8], f64> { -/// be_f64.parse_next(s) +/// be_f64.parse_peek(s) /// } /// /// assert_eq!(parser(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::be_f64; +/// use winnow::binary::be_f64; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, f64> { -/// be_f64::<_, Error<_>>.parse_next(s) +/// be_f64::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 12.5))); /// assert_eq!(parser(Partial::new(&[0x01][..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn be_f64<I, E: ParseError<I>>(input: I) -> IResult<I, f64, E> +pub fn be_f64<I, E: ParserError<I>>(input: &mut I) -> PResult<f64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_f64", move |input: I| { - be_uint::<_, u64, _>(input, 8).map(|(i, n)| (i, f64::from_bits(n))) + trace("be_f64", move |input: &mut I| { + be_uint::<_, u64, _>(input, 8).map(f64::from_bits) }) .parse_next(input) } @@ -2156,41 +2156,41 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_f32; +/// use winnow::binary::le_f32; /// /// fn parser(s: &[u8]) -> IResult<&[u8], f32> { -/// le_f32.parse_next(s) +/// le_f32.parse_peek(s) /// } /// /// assert_eq!(parser(&[0x00, 0x00, 0x48, 0x41][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_f32; +/// use winnow::binary::le_f32; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, f32> { -/// le_f32::<_, Error<_>>.parse_next(s) +/// le_f32::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&[0x00, 0x00, 0x48, 0x41][..])), Ok((Partial::new(&b""[..]), 12.5))); /// assert_eq!(parser(Partial::new(&[0x01][..])), Err(ErrMode::Incomplete(Needed::new(3)))); /// ``` #[inline(always)] -pub fn le_f32<I, E: ParseError<I>>(input: I) -> IResult<I, f32, E> +pub fn le_f32<I, E: ParserError<I>>(input: &mut I) -> PResult<f32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("le_f32", move |input: I| { - le_uint::<_, u32, _>(input, 4).map(|(i, n)| (i, f32::from_bits(n))) + trace("le_f32", move |input: &mut I| { + le_uint::<_, u32, _>(input, 4).map(f32::from_bits) }) .parse_next(input) } @@ -2204,49 +2204,49 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::le_f64; +/// use winnow::binary::le_f64; /// /// fn parser(s: &[u8]) -> IResult<&[u8], f64> { -/// le_f64.parse_next(s) +/// le_f64.parse_peek(s) /// } /// /// assert_eq!(parser(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(parser(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::number::le_f64; +/// use winnow::binary::le_f64; /// /// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, f64> { -/// le_f64::<_, Error<_>>.parse_next(s) +/// le_f64::<_, InputError<_>>.parse_peek(s) /// } /// /// assert_eq!(parser(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41][..])), Ok((Partial::new(&b""[..]), 3145728.0))); /// assert_eq!(parser(Partial::new(&[0x01][..])), Err(ErrMode::Incomplete(Needed::new(7)))); /// ``` #[inline(always)] -pub fn le_f64<I, E: ParseError<I>>(input: I) -> IResult<I, f64, E> +pub fn le_f64<I, E: ParserError<I>>(input: &mut I) -> PResult<f64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - trace("be_f64", move |input: I| { - le_uint::<_, u64, _>(input, 8).map(|(i, n)| (i, f64::from_bits(n))) + trace("be_f64", move |input: &mut I| { + le_uint::<_, u64, _>(input, 8).map(f64::from_bits) }) .parse_next(input) } /// Recognizes a 4 byte floating point number /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian f32 float, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian f32 float. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian f32 float, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian f32 float. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -2255,70 +2255,70 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::f32; +/// use winnow::binary::f32; /// /// let be_f32 = |s| { -/// f32(winnow::number::Endianness::Big).parse_next(s) +/// f32(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_f32(&[0x41, 0x48, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(be_f32(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(be_f32(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// /// let le_f32 = |s| { -/// f32(winnow::number::Endianness::Little).parse_next(s) +/// f32(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_f32(&[0x00, 0x00, 0x48, 0x41][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(le_f32(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(le_f32(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::f32; +/// use winnow::binary::f32; /// /// let be_f32 = |s| { -/// f32::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// f32::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_f32(Partial::new(&[0x41, 0x48, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 12.5))); /// assert_eq!(be_f32(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// /// let le_f32 = |s| { -/// f32::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// f32::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_f32(Partial::new(&[0x00, 0x00, 0x48, 0x41][..])), Ok((Partial::new(&b""[..]), 12.5))); /// assert_eq!(le_f32(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -pub fn f32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, f32, E> +pub fn f32<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, f32, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_f32, - crate::number::Endianness::Little => le_f32, + Endianness::Big => be_f32, + Endianness::Little => le_f32, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_f32, + Endianness::Native => be_f32, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_f32, + Endianness::Native => le_f32, } }(input) } /// Recognizes an 8 byte floating point number /// -/// If the parameter is `winnow::number::Endianness::Big`, parse a big endian f64 float, -/// otherwise if `winnow::number::Endianness::Little` parse a little endian f64 float. +/// If the parameter is `winnow::binary::Endianness::Big`, parse a big endian f64 float, +/// otherwise if `winnow::binary::Endianness::Little` parse a little endian f64 float. /// /// *Complete version*: returns an error if there is not enough input data /// @@ -2327,62 +2327,265 @@ where /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; -/// use winnow::number::f64; +/// use winnow::binary::f64; /// /// let be_f64 = |s| { -/// f64(winnow::number::Endianness::Big).parse_next(s) +/// f64(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_f64(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(be_f64(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(be_f64(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// /// let le_f64 = |s| { -/// f64(winnow::number::Endianness::Little).parse_next(s) +/// f64(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40][..]), Ok((&b""[..], 12.5))); -/// assert_eq!(le_f64(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(le_f64(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// ``` /// /// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::error::Needed::Size; /// # use winnow::Partial; -/// use winnow::number::f64; +/// use winnow::binary::f64; /// /// let be_f64 = |s| { -/// f64::<_, Error<_>>(winnow::number::Endianness::Big).parse_next(s) +/// f64::<_, InputError<_>>(winnow::binary::Endianness::Big).parse_peek(s) /// }; /// /// assert_eq!(be_f64(Partial::new(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 12.5))); /// assert_eq!(be_f64(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(5)))); /// /// let le_f64 = |s| { -/// f64::<_, Error<_>>(winnow::number::Endianness::Little).parse_next(s) +/// f64::<_, InputError<_>>(winnow::binary::Endianness::Little).parse_peek(s) /// }; /// /// assert_eq!(le_f64(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40][..])), Ok((Partial::new(&b""[..]), 12.5))); /// assert_eq!(le_f64(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(5)))); /// ``` #[inline(always)] -pub fn f64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> impl Parser<I, f64, E> +pub fn f64<I, E: ParserError<I>>(endian: Endianness) -> impl Parser<I, f64, E> where I: StreamIsPartial, I: Stream<Token = u8>, <I as Stream>::Slice: AsBytes, { - move |input: I| { + move |input: &mut I| { match endian { - crate::number::Endianness::Big => be_f64, - crate::number::Endianness::Little => le_f64, + Endianness::Big => be_f64, + Endianness::Little => le_f64, #[cfg(target_endian = "big")] - crate::number::Endianness::Native => be_f64, + Endianness::Native => be_f64, #[cfg(target_endian = "little")] - crate::number::Endianness::Native => le_f64, + Endianness::Native => le_f64, } }(input) } + +/// Get a length-prefixed slice ([TLV](https://en.wikipedia.org/wiki/Type-length-value)) +/// +/// To apply a parser to the returned slice, see [`length_and_then`]. +/// +/// If the count is for something besides tokens, see [`length_repeat`]. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there is not enough data. +/// +/// # Arguments +/// * `f` The parser to apply. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed, stream::Partial}; +/// # use winnow::prelude::*; +/// use winnow::Bytes; +/// use winnow::binary::be_u16; +/// use winnow::binary::length_take; +/// use winnow::token::tag; +/// +/// type Stream<'i> = Partial<&'i Bytes>; +/// +/// fn stream(b: &[u8]) -> Stream<'_> { +/// Partial::new(Bytes::new(b)) +/// } +/// +/// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { +/// length_take(be_u16).parse_peek(s) +/// } +/// +/// assert_eq!(parser(stream(b"\x00\x03abcefg")), Ok((stream(&b"efg"[..]), &b"abc"[..]))); +/// assert_eq!(parser(stream(b"\x00\x03a")), Err(ErrMode::Incomplete(Needed::new(2)))); +/// ``` +pub fn length_take<I, N, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + N: ToUsize, + F: Parser<I, N, E>, + E: ParserError<I>, +{ + trace("length_take", move |i: &mut I| { + let length = f.parse_next(i)?; + + crate::token::take(length).parse_next(i) + }) +} + +/// Deprecated since 0.5.27, replaced with [`length_take`] +#[deprecated(since = "0.5.27", note = "Replaced with `length_take`")] +pub fn length_data<I, N, E, F>(f: F) -> impl Parser<I, <I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + N: ToUsize, + F: Parser<I, N, E>, + E: ParserError<I>, +{ + length_take(f) +} + +/// Parse a length-prefixed slice ([TLV](https://en.wikipedia.org/wiki/Type-length-value)) +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there is not enough data. +/// +/// # Arguments +/// * `f` The parser to apply. +/// * `g` The parser to apply on the subslice. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed, stream::{Partial, StreamIsPartial}}; +/// # use winnow::prelude::*; +/// use winnow::Bytes; +/// use winnow::binary::be_u16; +/// use winnow::binary::length_and_then; +/// use winnow::token::tag; +/// +/// type Stream<'i> = Partial<&'i Bytes>; +/// +/// fn stream(b: &[u8]) -> Stream<'_> { +/// Partial::new(Bytes::new(b)) +/// } +/// +/// fn complete_stream(b: &[u8]) -> Stream<'_> { +/// let mut p = Partial::new(Bytes::new(b)); +/// let _ = p.complete(); +/// p +/// } +/// +/// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { +/// length_and_then(be_u16, "abc").parse_peek(s) +/// } +/// +/// assert_eq!(parser(stream(b"\x00\x03abcefg")), Ok((stream(&b"efg"[..]), &b"abc"[..]))); +/// assert_eq!(parser(stream(b"\x00\x03123123")), Err(ErrMode::Backtrack(InputError::new(complete_stream(&b"123"[..]), ErrorKind::Tag)))); +/// assert_eq!(parser(stream(b"\x00\x03a")), Err(ErrMode::Incomplete(Needed::new(2)))); +/// ``` +pub fn length_and_then<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, O, E> +where + I: StreamIsPartial, + I: Stream + UpdateSlice + Clone, + N: ToUsize, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParserError<I>, +{ + trace("length_and_then", move |i: &mut I| { + let data = length_take(f.by_ref()).parse_next(i)?; + let mut data = I::update_slice(i.clone(), data); + let _ = data.complete(); + let o = g.by_ref().complete_err().parse_next(&mut data)?; + Ok(o) + }) +} + +/// Deprecated since 0.5.27, replaced with [`length_and_then`] +#[deprecated(since = "0.5.27", note = "Replaced with `length_and_then`")] +pub fn length_value<I, O, N, E, F, G>(f: F, g: G) -> impl Parser<I, O, E> +where + I: StreamIsPartial, + I: Stream + UpdateSlice + Clone, + N: ToUsize, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParserError<I>, +{ + length_and_then(f, g) +} + +/// [`Accumulate`] a length-prefixed sequence of values ([TLV](https://en.wikipedia.org/wiki/Type-length-value)) +/// +/// If the length represents token counts, see instead [`length_take`] +/// +/// # Arguments +/// * `f` The parser to apply to obtain the count. +/// * `g` The parser to apply repeatedly. +/// +/// # Example +/// +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::Bytes; +/// use winnow::binary::u8; +/// use winnow::binary::length_count; +/// use winnow::token::tag; +/// +/// type Stream<'i> = &'i Bytes; +/// +/// fn stream(b: &[u8]) -> Stream<'_> { +/// Bytes::new(b) +/// } +/// +/// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, Vec<&[u8]>> { +/// length_count(u8.map(|i| { +/// println!("got number: {}", i); +/// i +/// }), "abc").parse_peek(s) +/// } +/// +/// assert_eq!(parser(stream(b"\x02abcabcabc")), Ok((stream(b"abc"), vec![&b"abc"[..], &b"abc"[..]]))); +/// assert_eq!(parser(stream(b"\x03123123123")), Err(ErrMode::Backtrack(InputError::new(stream(b"123123123"), ErrorKind::Tag)))); +/// # } +/// ``` +pub fn length_repeat<I, O, C, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, C, E> +where + I: Stream, + N: ToUsize, + C: Accumulate<O>, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParserError<I>, +{ + trace("length_repeat", move |i: &mut I| { + let n = f.parse_next(i)?; + let n = n.to_usize(); + repeat(n, g.by_ref()).parse_next(i) + }) +} + +/// Deprecated since 0.5.27, replaced with [`length_repeat`] +#[deprecated(since = "0.5.27", note = "Replaced with `length_repeat`")] +pub fn length_count<I, O, C, N, E, F, G>(f: F, g: G) -> impl Parser<I, C, E> +where + I: Stream, + N: ToUsize, + C: Accumulate<O>, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParserError<I>, +{ + length_repeat(f, g) +} diff --git a/vendor/winnow/src/number/tests.rs b/vendor/winnow/src/binary/tests.rs similarity index 56% rename from vendor/winnow/src/number/tests.rs rename to vendor/winnow/src/binary/tests.rs index bbfeb23..bc2a005 100644 --- a/vendor/winnow/src/number/tests.rs +++ b/vendor/winnow/src/binary/tests.rs @@ -1,70 +1,96 @@ use super::*; +use crate::unpeek; +use crate::IResult; mod complete { use super::*; - use crate::error::Error; + use crate::error::InputError; macro_rules! assert_parse( ($left: expr, $right: expr) => { - let res: $crate::IResult<_, _, Error<_>> = $left; + let res: $crate::IResult<_, _, InputError<_>> = $left; assert_eq!(res, $right); }; ); #[test] fn i8_tests() { - assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0))); - assert_parse!(i8(&[0x7f][..]), Ok((&b""[..], 127))); - assert_parse!(i8(&[0xff][..]), Ok((&b""[..], -1))); - assert_parse!(i8(&[0x80][..]), Ok((&b""[..], -128))); + assert_parse!(i8.parse_peek(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(i8.parse_peek(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(i8.parse_peek(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(i8.parse_peek(&[0x80][..]), Ok((&b""[..], -128))); } #[test] fn be_i8_tests() { - assert_parse!(be_i8(&[0x00][..]), Ok((&b""[..], 0))); - assert_parse!(be_i8(&[0x7f][..]), Ok((&b""[..], 127))); - assert_parse!(be_i8(&[0xff][..]), Ok((&b""[..], -1))); - assert_parse!(be_i8(&[0x80][..]), Ok((&b""[..], -128))); + assert_parse!(be_i8.parse_peek(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(be_i8.parse_peek(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(be_i8.parse_peek(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(be_i8.parse_peek(&[0x80][..]), Ok((&b""[..], -128))); } #[test] fn be_i16_tests() { - assert_parse!(be_i16(&[0x00, 0x00][..]), Ok((&b""[..], 0))); - assert_parse!(be_i16(&[0x7f, 0xff][..]), Ok((&b""[..], 32_767_i16))); - assert_parse!(be_i16(&[0xff, 0xff][..]), Ok((&b""[..], -1))); - assert_parse!(be_i16(&[0x80, 0x00][..]), Ok((&b""[..], -32_768_i16))); + assert_parse!(be_i16.parse_peek(&[0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!( + be_i16.parse_peek(&[0x7f, 0xff][..]), + Ok((&b""[..], 32_767_i16)) + ); + assert_parse!(be_i16.parse_peek(&[0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!( + be_i16.parse_peek(&[0x80, 0x00][..]), + Ok((&b""[..], -32_768_i16)) + ); } #[test] fn be_u24_tests() { - assert_parse!(be_u24(&[0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); - assert_parse!(be_u24(&[0x00, 0xFF, 0xFF][..]), Ok((&b""[..], 65_535_u32))); assert_parse!( - be_u24(&[0x12, 0x34, 0x56][..]), + be_u24.parse_peek(&[0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + be_u24.parse_peek(&[0x00, 0xFF, 0xFF][..]), + Ok((&b""[..], 65_535_u32)) + ); + assert_parse!( + be_u24.parse_peek(&[0x12, 0x34, 0x56][..]), Ok((&b""[..], 1_193_046_u32)) ); } #[test] fn be_i24_tests() { - assert_parse!(be_i24(&[0xFF, 0xFF, 0xFF][..]), Ok((&b""[..], -1_i32))); - assert_parse!(be_i24(&[0xFF, 0x00, 0x00][..]), Ok((&b""[..], -65_536_i32))); assert_parse!( - be_i24(&[0xED, 0xCB, 0xAA][..]), + be_i24.parse_peek(&[0xFF, 0xFF, 0xFF][..]), + Ok((&b""[..], -1_i32)) + ); + assert_parse!( + be_i24.parse_peek(&[0xFF, 0x00, 0x00][..]), + Ok((&b""[..], -65_536_i32)) + ); + assert_parse!( + be_i24.parse_peek(&[0xED, 0xCB, 0xAA][..]), Ok((&b""[..], -1_193_046_i32)) ); } #[test] fn be_i32_tests() { - assert_parse!(be_i32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); assert_parse!( - be_i32(&[0x7f, 0xff, 0xff, 0xff][..]), + be_i32.parse_peek(&[0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + be_i32.parse_peek(&[0x7f, 0xff, 0xff, 0xff][..]), Ok((&b""[..], 2_147_483_647_i32)) ); - assert_parse!(be_i32(&[0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1))); assert_parse!( - be_i32(&[0x80, 0x00, 0x00, 0x00][..]), + be_i32.parse_peek(&[0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], -1)) + ); + assert_parse!( + be_i32.parse_peek(&[0x80, 0x00, 0x00, 0x00][..]), Ok((&b""[..], -2_147_483_648_i32)) ); } @@ -72,19 +98,19 @@ mod complete { #[test] fn be_i64_tests() { assert_parse!( - be_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + be_i64.parse_peek(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0)) ); assert_parse!( - be_i64(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + be_i64.parse_peek(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], 9_223_372_036_854_775_807_i64)) ); assert_parse!( - be_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + be_i64.parse_peek(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1)) ); assert_parse!( - be_i64(&[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + be_i64.parse_peek(&[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], -9_223_372_036_854_775_808_i64)) ); } @@ -92,7 +118,7 @@ mod complete { #[test] fn be_i128_tests() { assert_parse!( - be_i128( + be_i128.parse_peek( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -101,7 +127,7 @@ mod complete { Ok((&b""[..], 0)) ); assert_parse!( - be_i128( + be_i128.parse_peek( &[ 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -113,7 +139,7 @@ mod complete { )) ); assert_parse!( - be_i128( + be_i128.parse_peek( &[ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -122,7 +148,7 @@ mod complete { Ok((&b""[..], -1)) ); assert_parse!( - be_i128( + be_i128.parse_peek( &[ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -137,50 +163,74 @@ mod complete { #[test] fn le_i8_tests() { - assert_parse!(le_i8(&[0x00][..]), Ok((&b""[..], 0))); - assert_parse!(le_i8(&[0x7f][..]), Ok((&b""[..], 127))); - assert_parse!(le_i8(&[0xff][..]), Ok((&b""[..], -1))); - assert_parse!(le_i8(&[0x80][..]), Ok((&b""[..], -128))); + assert_parse!(le_i8.parse_peek(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(le_i8.parse_peek(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(le_i8.parse_peek(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(le_i8.parse_peek(&[0x80][..]), Ok((&b""[..], -128))); } #[test] fn le_i16_tests() { - assert_parse!(le_i16(&[0x00, 0x00][..]), Ok((&b""[..], 0))); - assert_parse!(le_i16(&[0xff, 0x7f][..]), Ok((&b""[..], 32_767_i16))); - assert_parse!(le_i16(&[0xff, 0xff][..]), Ok((&b""[..], -1))); - assert_parse!(le_i16(&[0x00, 0x80][..]), Ok((&b""[..], -32_768_i16))); + assert_parse!(le_i16.parse_peek(&[0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!( + le_i16.parse_peek(&[0xff, 0x7f][..]), + Ok((&b""[..], 32_767_i16)) + ); + assert_parse!(le_i16.parse_peek(&[0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!( + le_i16.parse_peek(&[0x00, 0x80][..]), + Ok((&b""[..], -32_768_i16)) + ); } #[test] fn le_u24_tests() { - assert_parse!(le_u24(&[0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); - assert_parse!(le_u24(&[0xFF, 0xFF, 0x00][..]), Ok((&b""[..], 65_535_u32))); assert_parse!( - le_u24(&[0x56, 0x34, 0x12][..]), + le_u24.parse_peek(&[0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + le_u24.parse_peek(&[0xFF, 0xFF, 0x00][..]), + Ok((&b""[..], 65_535_u32)) + ); + assert_parse!( + le_u24.parse_peek(&[0x56, 0x34, 0x12][..]), Ok((&b""[..], 1_193_046_u32)) ); } #[test] fn le_i24_tests() { - assert_parse!(le_i24(&[0xFF, 0xFF, 0xFF][..]), Ok((&b""[..], -1_i32))); - assert_parse!(le_i24(&[0x00, 0x00, 0xFF][..]), Ok((&b""[..], -65_536_i32))); assert_parse!( - le_i24(&[0xAA, 0xCB, 0xED][..]), + le_i24.parse_peek(&[0xFF, 0xFF, 0xFF][..]), + Ok((&b""[..], -1_i32)) + ); + assert_parse!( + le_i24.parse_peek(&[0x00, 0x00, 0xFF][..]), + Ok((&b""[..], -65_536_i32)) + ); + assert_parse!( + le_i24.parse_peek(&[0xAA, 0xCB, 0xED][..]), Ok((&b""[..], -1_193_046_i32)) ); } #[test] fn le_i32_tests() { - assert_parse!(le_i32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); assert_parse!( - le_i32(&[0xff, 0xff, 0xff, 0x7f][..]), + le_i32.parse_peek(&[0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + le_i32.parse_peek(&[0xff, 0xff, 0xff, 0x7f][..]), Ok((&b""[..], 2_147_483_647_i32)) ); - assert_parse!(le_i32(&[0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1))); assert_parse!( - le_i32(&[0x00, 0x00, 0x00, 0x80][..]), + le_i32.parse_peek(&[0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], -1)) + ); + assert_parse!( + le_i32.parse_peek(&[0x00, 0x00, 0x00, 0x80][..]), Ok((&b""[..], -2_147_483_648_i32)) ); } @@ -188,19 +238,19 @@ mod complete { #[test] fn le_i64_tests() { assert_parse!( - le_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + le_i64.parse_peek(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0)) ); assert_parse!( - le_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f][..]), + le_i64.parse_peek(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f][..]), Ok((&b""[..], 9_223_372_036_854_775_807_i64)) ); assert_parse!( - le_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + le_i64.parse_peek(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1)) ); assert_parse!( - le_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80][..]), + le_i64.parse_peek(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80][..]), Ok((&b""[..], -9_223_372_036_854_775_808_i64)) ); } @@ -208,7 +258,7 @@ mod complete { #[test] fn le_i128_tests() { assert_parse!( - le_i128( + le_i128.parse_peek( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -217,7 +267,7 @@ mod complete { Ok((&b""[..], 0)) ); assert_parse!( - le_i128( + le_i128.parse_peek( &[ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f @@ -229,7 +279,7 @@ mod complete { )) ); assert_parse!( - le_i128( + le_i128.parse_peek( &[ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -238,7 +288,7 @@ mod complete { Ok((&b""[..], -1)) ); assert_parse!( - le_i128( + le_i128.parse_peek( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 @@ -253,9 +303,12 @@ mod complete { #[test] fn be_f32_tests() { - assert_parse!(be_f32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f32))); assert_parse!( - be_f32(&[0x4d, 0x31, 0x1f, 0xd8][..]), + be_f32.parse_peek(&[0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0_f32)) + ); + assert_parse!( + be_f32.parse_peek(&[0x4d, 0x31, 0x1f, 0xd8][..]), Ok((&b""[..], 185_728_380_f32)) ); } @@ -263,20 +316,23 @@ mod complete { #[test] fn be_f64_tests() { assert_parse!( - be_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + be_f64.parse_peek(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f64)) ); assert_parse!( - be_f64(&[0x41, 0xa6, 0x23, 0xfb, 0x10, 0x00, 0x00, 0x00][..]), + be_f64.parse_peek(&[0x41, 0xa6, 0x23, 0xfb, 0x10, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 185_728_392_f64)) ); } #[test] fn le_f32_tests() { - assert_parse!(le_f32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f32))); assert_parse!( - le_f32(&[0xd8, 0x1f, 0x31, 0x4d][..]), + le_f32.parse_peek(&[0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0_f32)) + ); + assert_parse!( + le_f32.parse_peek(&[0xd8, 0x1f, 0x31, 0x4d][..]), Ok((&b""[..], 185_728_380_f32)) ); } @@ -284,33 +340,33 @@ mod complete { #[test] fn le_f64_tests() { assert_parse!( - le_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + le_f64.parse_peek(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f64)) ); assert_parse!( - le_f64(&[0x00, 0x00, 0x00, 0x10, 0xfb, 0x23, 0xa6, 0x41][..]), + le_f64.parse_peek(&[0x00, 0x00, 0x00, 0x10, 0xfb, 0x23, 0xa6, 0x41][..]), Ok((&b""[..], 185_728_392_f64)) ); } #[test] fn configurable_endianness() { - use crate::number::Endianness; + use crate::binary::Endianness; fn be_tst16(i: &[u8]) -> IResult<&[u8], u16> { - u16(Endianness::Big).parse_next(i) + u16(Endianness::Big).parse_peek(i) } fn le_tst16(i: &[u8]) -> IResult<&[u8], u16> { - u16(Endianness::Little).parse_next(i) + u16(Endianness::Little).parse_peek(i) } assert_eq!(be_tst16(&[0x80, 0x00]), Ok((&b""[..], 32_768_u16))); assert_eq!(le_tst16(&[0x80, 0x00]), Ok((&b""[..], 128_u16))); fn be_tst32(i: &[u8]) -> IResult<&[u8], u32> { - u32(Endianness::Big).parse_next(i) + u32(Endianness::Big).parse_peek(i) } fn le_tst32(i: &[u8]) -> IResult<&[u8], u32> { - u32(Endianness::Little).parse_next(i) + u32(Endianness::Little).parse_peek(i) } assert_eq!( be_tst32(&[0x12, 0x00, 0x60, 0x00]), @@ -322,10 +378,10 @@ mod complete { ); fn be_tst64(i: &[u8]) -> IResult<&[u8], u64> { - u64(Endianness::Big).parse_next(i) + u64(Endianness::Big).parse_peek(i) } fn le_tst64(i: &[u8]) -> IResult<&[u8], u64> { - u64(Endianness::Little).parse_next(i) + u64(Endianness::Little).parse_peek(i) } assert_eq!( be_tst64(&[0x12, 0x00, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), @@ -337,19 +393,19 @@ mod complete { ); fn be_tsti16(i: &[u8]) -> IResult<&[u8], i16> { - i16(Endianness::Big).parse_next(i) + i16(Endianness::Big).parse_peek(i) } fn le_tsti16(i: &[u8]) -> IResult<&[u8], i16> { - i16(Endianness::Little).parse_next(i) + i16(Endianness::Little).parse_peek(i) } assert_eq!(be_tsti16(&[0x00, 0x80]), Ok((&b""[..], 128_i16))); assert_eq!(le_tsti16(&[0x00, 0x80]), Ok((&b""[..], -32_768_i16))); fn be_tsti32(i: &[u8]) -> IResult<&[u8], i32> { - i32(Endianness::Big).parse_next(i) + i32(Endianness::Big).parse_peek(i) } fn le_tsti32(i: &[u8]) -> IResult<&[u8], i32> { - i32(Endianness::Little).parse_next(i) + i32(Endianness::Little).parse_peek(i) } assert_eq!( be_tsti32(&[0x00, 0x12, 0x60, 0x00]), @@ -361,10 +417,10 @@ mod complete { ); fn be_tsti64(i: &[u8]) -> IResult<&[u8], i64> { - i64(Endianness::Big).parse_next(i) + i64(Endianness::Big).parse_peek(i) } fn le_tsti64(i: &[u8]) -> IResult<&[u8], i64> { - i64(Endianness::Little).parse_next(i) + i64(Endianness::Little).parse_peek(i) } assert_eq!( be_tsti64(&[0x00, 0xFF, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), @@ -380,13 +436,22 @@ mod complete { mod partial { use super::*; use crate::error::ErrMode; - use crate::error::Error; + use crate::error::InputError; use crate::error::Needed; + #[cfg(feature = "alloc")] + use crate::lib::std::vec::Vec; use crate::Partial; + use crate::{ + ascii::digit1 as digit, + binary::{be_u16, be_u8}, + error::ErrorKind, + lib::std::str::{self, FromStr}, + IResult, + }; macro_rules! assert_parse( ($left: expr, $right: expr) => { - let res: $crate::IResult<_, _, Error<_>> = $left; + let res: $crate::IResult<_, _, InputError<_>> = $left; assert_eq!(res, $right); }; ); @@ -394,23 +459,23 @@ mod partial { #[test] fn i8_tests() { assert_parse!( - be_i8(Partial::new(&[0x00][..])), + be_i8.parse_peek(Partial::new(&[0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - be_i8(Partial::new(&[0x7f][..])), + be_i8.parse_peek(Partial::new(&[0x7f][..])), Ok((Partial::new(&b""[..]), 127)) ); assert_parse!( - be_i8(Partial::new(&[0xff][..])), + be_i8.parse_peek(Partial::new(&[0xff][..])), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - be_i8(Partial::new(&[0x80][..])), + be_i8.parse_peek(Partial::new(&[0x80][..])), Ok((Partial::new(&b""[..]), -128)) ); assert_parse!( - be_i8(Partial::new(&[][..])), + be_i8.parse_peek(Partial::new(&[][..])), Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -418,27 +483,27 @@ mod partial { #[test] fn i16_tests() { assert_parse!( - be_i16(Partial::new(&[0x00, 0x00][..])), + be_i16.parse_peek(Partial::new(&[0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - be_i16(Partial::new(&[0x7f, 0xff][..])), + be_i16.parse_peek(Partial::new(&[0x7f, 0xff][..])), Ok((Partial::new(&b""[..]), 32_767_i16)) ); assert_parse!( - be_i16(Partial::new(&[0xff, 0xff][..])), + be_i16.parse_peek(Partial::new(&[0xff, 0xff][..])), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - be_i16(Partial::new(&[0x80, 0x00][..])), + be_i16.parse_peek(Partial::new(&[0x80, 0x00][..])), Ok((Partial::new(&b""[..]), -32_768_i16)) ); assert_parse!( - be_i16(Partial::new(&[][..])), + be_i16.parse_peek(Partial::new(&[][..])), Err(ErrMode::Incomplete(Needed::new(2))) ); assert_parse!( - be_i16(Partial::new(&[0x00][..])), + be_i16.parse_peek(Partial::new(&[0x00][..])), Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -446,27 +511,27 @@ mod partial { #[test] fn u24_tests() { assert_parse!( - be_u24(Partial::new(&[0x00, 0x00, 0x00][..])), + be_u24.parse_peek(Partial::new(&[0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - be_u24(Partial::new(&[0x00, 0xFF, 0xFF][..])), + be_u24.parse_peek(Partial::new(&[0x00, 0xFF, 0xFF][..])), Ok((Partial::new(&b""[..]), 65_535_u32)) ); assert_parse!( - be_u24(Partial::new(&[0x12, 0x34, 0x56][..])), + be_u24.parse_peek(Partial::new(&[0x12, 0x34, 0x56][..])), Ok((Partial::new(&b""[..]), 1_193_046_u32)) ); assert_parse!( - be_u24(Partial::new(&[][..])), + be_u24.parse_peek(Partial::new(&[][..])), Err(ErrMode::Incomplete(Needed::new(3))) ); assert_parse!( - be_u24(Partial::new(&[0x00][..])), + be_u24.parse_peek(Partial::new(&[0x00][..])), Err(ErrMode::Incomplete(Needed::new(2))) ); assert_parse!( - be_u24(Partial::new(&[0x00, 0x00][..])), + be_u24.parse_peek(Partial::new(&[0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -474,27 +539,27 @@ mod partial { #[test] fn i24_tests() { assert_parse!( - be_i24(Partial::new(&[0xFF, 0xFF, 0xFF][..])), + be_i24.parse_peek(Partial::new(&[0xFF, 0xFF, 0xFF][..])), Ok((Partial::new(&b""[..]), -1_i32)) ); assert_parse!( - be_i24(Partial::new(&[0xFF, 0x00, 0x00][..])), + be_i24.parse_peek(Partial::new(&[0xFF, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), -65_536_i32)) ); assert_parse!( - be_i24(Partial::new(&[0xED, 0xCB, 0xAA][..])), + be_i24.parse_peek(Partial::new(&[0xED, 0xCB, 0xAA][..])), Ok((Partial::new(&b""[..]), -1_193_046_i32)) ); assert_parse!( - be_i24(Partial::new(&[][..])), + be_i24.parse_peek(Partial::new(&[][..])), Err(ErrMode::Incomplete(Needed::new(3))) ); assert_parse!( - be_i24(Partial::new(&[0x00][..])), + be_i24.parse_peek(Partial::new(&[0x00][..])), Err(ErrMode::Incomplete(Needed::new(2))) ); assert_parse!( - be_i24(Partial::new(&[0x00, 0x00][..])), + be_i24.parse_peek(Partial::new(&[0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -502,35 +567,35 @@ mod partial { #[test] fn i32_tests() { assert_parse!( - be_i32(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), + be_i32.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - be_i32(Partial::new(&[0x7f, 0xff, 0xff, 0xff][..])), + be_i32.parse_peek(Partial::new(&[0x7f, 0xff, 0xff, 0xff][..])), Ok((Partial::new(&b""[..]), 2_147_483_647_i32)) ); assert_parse!( - be_i32(Partial::new(&[0xff, 0xff, 0xff, 0xff][..])), + be_i32.parse_peek(Partial::new(&[0xff, 0xff, 0xff, 0xff][..])), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - be_i32(Partial::new(&[0x80, 0x00, 0x00, 0x00][..])), + be_i32.parse_peek(Partial::new(&[0x80, 0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), -2_147_483_648_i32)) ); assert_parse!( - be_i32(Partial::new(&[][..])), + be_i32.parse_peek(Partial::new(&[][..])), Err(ErrMode::Incomplete(Needed::new(4))) ); assert_parse!( - be_i32(Partial::new(&[0x00][..])), + be_i32.parse_peek(Partial::new(&[0x00][..])), Err(ErrMode::Incomplete(Needed::new(3))) ); assert_parse!( - be_i32(Partial::new(&[0x00, 0x00][..])), + be_i32.parse_peek(Partial::new(&[0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(2))) ); assert_parse!( - be_i32(Partial::new(&[0x00, 0x00, 0x00][..])), + be_i32.parse_peek(Partial::new(&[0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -538,59 +603,59 @@ mod partial { #[test] fn i64_tests() { assert_parse!( - be_i64(Partial::new( + be_i64.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - be_i64(Partial::new( + be_i64.parse_peek(Partial::new( &[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..] )), Ok((Partial::new(&b""[..]), 9_223_372_036_854_775_807_i64)) ); assert_parse!( - be_i64(Partial::new( + be_i64.parse_peek(Partial::new( &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..] )), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - be_i64(Partial::new( + be_i64.parse_peek(Partial::new( &[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Ok((Partial::new(&b""[..]), -9_223_372_036_854_775_808_i64)) ); assert_parse!( - be_i64(Partial::new(&[][..])), + be_i64.parse_peek(Partial::new(&[][..])), Err(ErrMode::Incomplete(Needed::new(8))) ); assert_parse!( - be_i64(Partial::new(&[0x00][..])), + be_i64.parse_peek(Partial::new(&[0x00][..])), Err(ErrMode::Incomplete(Needed::new(7))) ); assert_parse!( - be_i64(Partial::new(&[0x00, 0x00][..])), + be_i64.parse_peek(Partial::new(&[0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(6))) ); assert_parse!( - be_i64(Partial::new(&[0x00, 0x00, 0x00][..])), + be_i64.parse_peek(Partial::new(&[0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(5))) ); assert_parse!( - be_i64(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), + be_i64.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(4))) ); assert_parse!( - be_i64(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00][..])), + be_i64.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(3))) ); assert_parse!( - be_i64(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..])), + be_i64.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(2))) ); assert_parse!( - be_i64(Partial::new( + be_i64.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(1))) @@ -600,7 +665,7 @@ mod partial { #[test] fn i128_tests() { assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -609,7 +674,7 @@ mod partial { Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[ 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -621,7 +686,7 @@ mod partial { )) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -630,7 +695,7 @@ mod partial { Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -642,77 +707,77 @@ mod partial { )) ); assert_parse!( - be_i128(Partial::new(&[][..])), + be_i128.parse_peek(Partial::new(&[][..])), Err(ErrMode::Incomplete(Needed::new(16))) ); assert_parse!( - be_i128(Partial::new(&[0x00][..])), + be_i128.parse_peek(Partial::new(&[0x00][..])), Err(ErrMode::Incomplete(Needed::new(15))) ); assert_parse!( - be_i128(Partial::new(&[0x00, 0x00][..])), + be_i128.parse_peek(Partial::new(&[0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(14))) ); assert_parse!( - be_i128(Partial::new(&[0x00, 0x00, 0x00][..])), + be_i128.parse_peek(Partial::new(&[0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(13))) ); assert_parse!( - be_i128(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), + be_i128.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(12))) ); assert_parse!( - be_i128(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00][..])), + be_i128.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(11))) ); assert_parse!( - be_i128(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..])), + be_i128.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..])), Err(ErrMode::Incomplete(Needed::new(10))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(9))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(8))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(7))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(6))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(5))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(4))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Err(ErrMode::Incomplete(Needed::new(3))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -721,7 +786,7 @@ mod partial { Err(ErrMode::Incomplete(Needed::new(2))) ); assert_parse!( - be_i128(Partial::new( + be_i128.parse_peek(Partial::new( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -734,19 +799,19 @@ mod partial { #[test] fn le_i8_tests() { assert_parse!( - le_i8(Partial::new(&[0x00][..])), + le_i8.parse_peek(Partial::new(&[0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - le_i8(Partial::new(&[0x7f][..])), + le_i8.parse_peek(Partial::new(&[0x7f][..])), Ok((Partial::new(&b""[..]), 127)) ); assert_parse!( - le_i8(Partial::new(&[0xff][..])), + le_i8.parse_peek(Partial::new(&[0xff][..])), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - le_i8(Partial::new(&[0x80][..])), + le_i8.parse_peek(Partial::new(&[0x80][..])), Ok((Partial::new(&b""[..]), -128)) ); } @@ -754,19 +819,19 @@ mod partial { #[test] fn le_i16_tests() { assert_parse!( - le_i16(Partial::new(&[0x00, 0x00][..])), + le_i16.parse_peek(Partial::new(&[0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - le_i16(Partial::new(&[0xff, 0x7f][..])), + le_i16.parse_peek(Partial::new(&[0xff, 0x7f][..])), Ok((Partial::new(&b""[..]), 32_767_i16)) ); assert_parse!( - le_i16(Partial::new(&[0xff, 0xff][..])), + le_i16.parse_peek(Partial::new(&[0xff, 0xff][..])), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - le_i16(Partial::new(&[0x00, 0x80][..])), + le_i16.parse_peek(Partial::new(&[0x00, 0x80][..])), Ok((Partial::new(&b""[..]), -32_768_i16)) ); } @@ -774,15 +839,15 @@ mod partial { #[test] fn le_u24_tests() { assert_parse!( - le_u24(Partial::new(&[0x00, 0x00, 0x00][..])), + le_u24.parse_peek(Partial::new(&[0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - le_u24(Partial::new(&[0xFF, 0xFF, 0x00][..])), + le_u24.parse_peek(Partial::new(&[0xFF, 0xFF, 0x00][..])), Ok((Partial::new(&b""[..]), 65_535_u32)) ); assert_parse!( - le_u24(Partial::new(&[0x56, 0x34, 0x12][..])), + le_u24.parse_peek(Partial::new(&[0x56, 0x34, 0x12][..])), Ok((Partial::new(&b""[..]), 1_193_046_u32)) ); } @@ -790,15 +855,15 @@ mod partial { #[test] fn le_i24_tests() { assert_parse!( - le_i24(Partial::new(&[0xFF, 0xFF, 0xFF][..])), + le_i24.parse_peek(Partial::new(&[0xFF, 0xFF, 0xFF][..])), Ok((Partial::new(&b""[..]), -1_i32)) ); assert_parse!( - le_i24(Partial::new(&[0x00, 0x00, 0xFF][..])), + le_i24.parse_peek(Partial::new(&[0x00, 0x00, 0xFF][..])), Ok((Partial::new(&b""[..]), -65_536_i32)) ); assert_parse!( - le_i24(Partial::new(&[0xAA, 0xCB, 0xED][..])), + le_i24.parse_peek(Partial::new(&[0xAA, 0xCB, 0xED][..])), Ok((Partial::new(&b""[..]), -1_193_046_i32)) ); } @@ -806,19 +871,19 @@ mod partial { #[test] fn le_i32_tests() { assert_parse!( - le_i32(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), + le_i32.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - le_i32(Partial::new(&[0xff, 0xff, 0xff, 0x7f][..])), + le_i32.parse_peek(Partial::new(&[0xff, 0xff, 0xff, 0x7f][..])), Ok((Partial::new(&b""[..]), 2_147_483_647_i32)) ); assert_parse!( - le_i32(Partial::new(&[0xff, 0xff, 0xff, 0xff][..])), + le_i32.parse_peek(Partial::new(&[0xff, 0xff, 0xff, 0xff][..])), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - le_i32(Partial::new(&[0x00, 0x00, 0x00, 0x80][..])), + le_i32.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x80][..])), Ok((Partial::new(&b""[..]), -2_147_483_648_i32)) ); } @@ -826,25 +891,25 @@ mod partial { #[test] fn le_i64_tests() { assert_parse!( - le_i64(Partial::new( + le_i64.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - le_i64(Partial::new( + le_i64.parse_peek(Partial::new( &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f][..] )), Ok((Partial::new(&b""[..]), 9_223_372_036_854_775_807_i64)) ); assert_parse!( - le_i64(Partial::new( + le_i64.parse_peek(Partial::new( &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..] )), Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - le_i64(Partial::new( + le_i64.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80][..] )), Ok((Partial::new(&b""[..]), -9_223_372_036_854_775_808_i64)) @@ -854,7 +919,7 @@ mod partial { #[test] fn le_i128_tests() { assert_parse!( - le_i128(Partial::new( + le_i128.parse_peek(Partial::new( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -863,7 +928,7 @@ mod partial { Ok((Partial::new(&b""[..]), 0)) ); assert_parse!( - le_i128(Partial::new( + le_i128.parse_peek(Partial::new( &[ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f @@ -875,7 +940,7 @@ mod partial { )) ); assert_parse!( - le_i128(Partial::new( + le_i128.parse_peek(Partial::new( &[ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -884,7 +949,7 @@ mod partial { Ok((Partial::new(&b""[..]), -1)) ); assert_parse!( - le_i128(Partial::new( + le_i128.parse_peek(Partial::new( &[ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 @@ -900,11 +965,11 @@ mod partial { #[test] fn be_f32_tests() { assert_parse!( - be_f32(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), + be_f32.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0_f32)) ); assert_parse!( - be_f32(Partial::new(&[0x4d, 0x31, 0x1f, 0xd8][..])), + be_f32.parse_peek(Partial::new(&[0x4d, 0x31, 0x1f, 0xd8][..])), Ok((Partial::new(&b""[..]), 185_728_380_f32)) ); } @@ -912,13 +977,13 @@ mod partial { #[test] fn be_f64_tests() { assert_parse!( - be_f64(Partial::new( + be_f64.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Ok((Partial::new(&b""[..]), 0_f64)) ); assert_parse!( - be_f64(Partial::new( + be_f64.parse_peek(Partial::new( &[0x41, 0xa6, 0x23, 0xfb, 0x10, 0x00, 0x00, 0x00][..] )), Ok((Partial::new(&b""[..]), 185_728_392_f64)) @@ -928,11 +993,11 @@ mod partial { #[test] fn le_f32_tests() { assert_parse!( - le_f32(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), + le_f32.parse_peek(Partial::new(&[0x00, 0x00, 0x00, 0x00][..])), Ok((Partial::new(&b""[..]), 0_f32)) ); assert_parse!( - le_f32(Partial::new(&[0xd8, 0x1f, 0x31, 0x4d][..])), + le_f32.parse_peek(Partial::new(&[0xd8, 0x1f, 0x31, 0x4d][..])), Ok((Partial::new(&b""[..]), 185_728_380_f32)) ); } @@ -940,13 +1005,13 @@ mod partial { #[test] fn le_f64_tests() { assert_parse!( - le_f64(Partial::new( + le_f64.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] )), Ok((Partial::new(&b""[..]), 0_f64)) ); assert_parse!( - le_f64(Partial::new( + le_f64.parse_peek(Partial::new( &[0x00, 0x00, 0x00, 0x10, 0xfb, 0x23, 0xa6, 0x41][..] )), Ok((Partial::new(&b""[..]), 185_728_392_f64)) @@ -955,13 +1020,13 @@ mod partial { #[test] fn configurable_endianness() { - use crate::number::Endianness; + use crate::binary::Endianness; fn be_tst16(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { - u16(Endianness::Big).parse_next(i) + u16(Endianness::Big).parse_peek(i) } fn le_tst16(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { - u16(Endianness::Little).parse_next(i) + u16(Endianness::Little).parse_peek(i) } assert_eq!( be_tst16(Partial::new(&[0x80, 0x00])), @@ -973,10 +1038,10 @@ mod partial { ); fn be_tst32(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { - u32(Endianness::Big).parse_next(i) + u32(Endianness::Big).parse_peek(i) } fn le_tst32(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { - u32(Endianness::Little).parse_next(i) + u32(Endianness::Little).parse_peek(i) } assert_eq!( be_tst32(Partial::new(&[0x12, 0x00, 0x60, 0x00])), @@ -988,10 +1053,10 @@ mod partial { ); fn be_tst64(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u64> { - u64(Endianness::Big).parse_next(i) + u64(Endianness::Big).parse_peek(i) } fn le_tst64(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u64> { - u64(Endianness::Little).parse_next(i) + u64(Endianness::Little).parse_peek(i) } assert_eq!( be_tst64(Partial::new(&[ @@ -1007,10 +1072,10 @@ mod partial { ); fn be_tsti16(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i16> { - i16(Endianness::Big).parse_next(i) + i16(Endianness::Big).parse_peek(i) } fn le_tsti16(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i16> { - i16(Endianness::Little).parse_next(i) + i16(Endianness::Little).parse_peek(i) } assert_eq!( be_tsti16(Partial::new(&[0x00, 0x80])), @@ -1022,10 +1087,10 @@ mod partial { ); fn be_tsti32(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i32> { - i32(Endianness::Big).parse_next(i) + i32(Endianness::Big).parse_peek(i) } fn le_tsti32(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i32> { - i32(Endianness::Little).parse_next(i) + i32(Endianness::Little).parse_peek(i) } assert_eq!( be_tsti32(Partial::new(&[0x00, 0x12, 0x60, 0x00])), @@ -1037,10 +1102,10 @@ mod partial { ); fn be_tsti64(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i64> { - i64(Endianness::Big).parse_next(i) + i64(Endianness::Big).parse_peek(i) } fn le_tsti64(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, i64> { - i64(Endianness::Little).parse_next(i) + i64(Endianness::Little).parse_peek(i) } assert_eq!( be_tsti64(Partial::new(&[ @@ -1055,4 +1120,197 @@ mod partial { Ok((Partial::new(&b""[..]), 36_028_874_334_732_032_i64)) ); } + + #[test] + #[cfg(feature = "alloc")] + fn length_repeat_test() { + fn number(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { + digit + .try_map(str::from_utf8) + .try_map(FromStr::from_str) + .parse_peek(i) + } + + fn cnt(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + length_repeat(unpeek(number), "abc").parse_peek(i) + } + + assert_eq!( + cnt(Partial::new(&b"2abcabcabcdef"[..])), + Ok((Partial::new(&b"abcdef"[..]), vec![&b"abc"[..], &b"abc"[..]])) + ); + assert_eq!( + cnt(Partial::new(&b"2ab"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + cnt(Partial::new(&b"3abcab"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + cnt(Partial::new(&b"xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Slice + ))) + ); + assert_eq!( + cnt(Partial::new(&b"2abcxxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); + } + + #[test] + fn partial_length_bytes() { + use crate::binary::le_u8; + + fn x(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + length_take(le_u8).parse_peek(i) + } + assert_eq!( + x(Partial::new(b"\x02..>>")), + Ok((Partial::new(&b">>"[..]), &b".."[..])) + ); + assert_eq!( + x(Partial::new(b"\x02..")), + Ok((Partial::new(&[][..]), &b".."[..])) + ); + assert_eq!( + x(Partial::new(b"\x02.")), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + x(Partial::new(b"\x02")), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + + fn y(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + let (i, _) = "magic".parse_peek(i)?; + length_take(le_u8).parse_peek(i) + } + assert_eq!( + y(Partial::new(b"magic\x02..>>")), + Ok((Partial::new(&b">>"[..]), &b".."[..])) + ); + assert_eq!( + y(Partial::new(b"magic\x02..")), + Ok((Partial::new(&[][..]), &b".."[..])) + ); + assert_eq!( + y(Partial::new(b"magic\x02.")), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + y(Partial::new(b"magic\x02")), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + } + + #[test] + fn length_take_test() { + fn number(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { + digit + .try_map(str::from_utf8) + .try_map(FromStr::from_str) + .parse_peek(i) + } + + fn take(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + length_take(unpeek(number)).parse_peek(i) + } + + assert_eq!( + take(Partial::new(&b"6abcabcabcdef"[..])), + Ok((Partial::new(&b"abcdef"[..]), &b"abcabc"[..])) + ); + assert_eq!( + take(Partial::new(&b"3ab"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + take(Partial::new(&b"xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Slice + ))) + ); + assert_eq!( + take(Partial::new(&b"2abcxxx"[..])), + Ok((Partial::new(&b"cxxx"[..]), &b"ab"[..])) + ); + } + + #[test] + fn length_and_then_test() { + use crate::stream::StreamIsPartial; + + fn length_and_then_1(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { + length_and_then(be_u8, be_u16).parse_peek(i) + } + fn length_and_then_2(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (u8, u8)> { + length_and_then(be_u8, (be_u8, be_u8)).parse_peek(i) + } + + let mut empty_complete = Partial::new(&b""[..]); + let _ = empty_complete.complete(); + + let i1 = [0, 5, 6]; + assert_eq!( + length_and_then_1(Partial::new(&i1)), + Err(ErrMode::Backtrack(error_position!( + &empty_complete, + ErrorKind::Slice + ))) + ); + assert_eq!( + length_and_then_2(Partial::new(&i1)), + Err(ErrMode::Backtrack(error_position!( + &empty_complete, + ErrorKind::Token + ))) + ); + + let i2 = [1, 5, 6, 3]; + { + let mut middle_complete = Partial::new(&i2[1..2]); + let _ = middle_complete.complete(); + assert_eq!( + length_and_then_1(Partial::new(&i2)), + Err(ErrMode::Backtrack(error_position!( + &middle_complete, + ErrorKind::Slice + ))) + ); + assert_eq!( + length_and_then_2(Partial::new(&i2)), + Err(ErrMode::Backtrack(error_position!( + &empty_complete, + ErrorKind::Token + ))) + ); + } + + let i3 = [2, 5, 6, 3, 4, 5, 7]; + assert_eq!( + length_and_then_1(Partial::new(&i3)), + Ok((Partial::new(&i3[3..]), 1286)) + ); + assert_eq!( + length_and_then_2(Partial::new(&i3)), + Ok((Partial::new(&i3[3..]), (5, 6))) + ); + + let i4 = [3, 5, 6, 3, 4, 5]; + assert_eq!( + length_and_then_1(Partial::new(&i4)), + Ok((Partial::new(&i4[4..]), 1286)) + ); + assert_eq!( + length_and_then_2(Partial::new(&i4)), + Ok((Partial::new(&i4[4..]), (5, 6))) + ); + } } diff --git a/vendor/winnow/src/branch/tests.rs b/vendor/winnow/src/branch/tests.rs deleted file mode 100644 index 80bc163..0000000 --- a/vendor/winnow/src/branch/tests.rs +++ /dev/null @@ -1,180 +0,0 @@ -use crate::branch::{alt, permutation}; - -use crate::error::ErrMode; -use crate::error::ErrorKind; -use crate::error::Needed; -use crate::IResult; -use crate::Parser; -use crate::Partial; -#[cfg(feature = "alloc")] -use crate::{ - error::ParseError, - lib::std::{ - fmt::Debug, - string::{String, ToString}, - }, -}; - -#[cfg(feature = "alloc")] -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct ErrorStr(String); - -#[cfg(feature = "alloc")] -impl From<u32> for ErrorStr { - fn from(i: u32) -> Self { - ErrorStr(format!("custom error code: {}", i)) - } -} - -#[cfg(feature = "alloc")] -impl<'a> From<&'a str> for ErrorStr { - fn from(i: &'a str) -> Self { - ErrorStr(format!("custom error message: {}", i)) - } -} - -#[cfg(feature = "alloc")] -impl<I: Debug> ParseError<I> for ErrorStr { - fn from_error_kind(input: I, kind: ErrorKind) -> Self { - ErrorStr(format!("custom error message: ({:?}, {:?})", input, kind)) - } - - fn append(self, input: I, kind: ErrorKind) -> Self { - ErrorStr(format!( - "custom error message: ({:?}, {:?}) - {:?}", - input, kind, self - )) - } -} - -#[cfg(feature = "alloc")] -#[test] -fn alt_test() { - fn work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { - Ok((&b""[..], input)) - } - - #[allow(unused_variables)] - fn dont_work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { - Err(ErrMode::Backtrack(ErrorStr("abcd".to_string()))) - } - - fn work2(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { - Ok((input, &b""[..])) - } - - fn alt1(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { - alt((dont_work, dont_work)).parse_next(i) - } - fn alt2(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { - alt((dont_work, work)).parse_next(i) - } - fn alt3(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { - alt((dont_work, dont_work, work2, dont_work)).parse_next(i) - } - //named!(alt1, alt!(dont_work | dont_work)); - //named!(alt2, alt!(dont_work | work)); - //named!(alt3, alt!(dont_work | dont_work | work2 | dont_work)); - - let a = &b"abcd"[..]; - assert_eq!( - alt1(a), - Err(ErrMode::Backtrack(error_node_position!( - a, - ErrorKind::Alt, - ErrorStr("abcd".to_string()) - ))) - ); - assert_eq!(alt2(a), Ok((&b""[..], a))); - assert_eq!(alt3(a), Ok((a, &b""[..]))); - - fn alt4(i: &[u8]) -> IResult<&[u8], &[u8]> { - alt(("abcd", "efgh")).parse_next(i) - } - let b = &b"efgh"[..]; - assert_eq!(alt4(a), Ok((&b""[..], a))); - assert_eq!(alt4(b), Ok((&b""[..], b))); -} - -#[test] -fn alt_incomplete() { - fn alt1(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - alt(("a", "bc", "def")).parse_next(i) - } - - let a = &b""[..]; - assert_eq!( - alt1(Partial::new(a)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - let a = &b"b"[..]; - assert_eq!( - alt1(Partial::new(a)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - let a = &b"bcd"[..]; - assert_eq!( - alt1(Partial::new(a)), - Ok((Partial::new(&b"d"[..]), &b"bc"[..])) - ); - let a = &b"cde"[..]; - assert_eq!( - alt1(Partial::new(a)), - Err(ErrMode::Backtrack(error_position!( - Partial::new(a), - ErrorKind::Tag - ))) - ); - let a = &b"de"[..]; - assert_eq!( - alt1(Partial::new(a)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - let a = &b"defg"[..]; - assert_eq!( - alt1(Partial::new(a)), - Ok((Partial::new(&b"g"[..]), &b"def"[..])) - ); -} - -#[test] -fn permutation_test() { - #[allow(clippy::type_complexity)] - fn perm(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8], &[u8])> { - permutation(("abcd", "efg", "hi")).parse_next(i) - } - - let expected = (&b"abcd"[..], &b"efg"[..], &b"hi"[..]); - - let a = &b"abcdefghijk"[..]; - assert_eq!( - perm(Partial::new(a)), - Ok((Partial::new(&b"jk"[..]), expected)) - ); - let b = &b"efgabcdhijk"[..]; - assert_eq!( - perm(Partial::new(b)), - Ok((Partial::new(&b"jk"[..]), expected)) - ); - let c = &b"hiefgabcdjk"[..]; - assert_eq!( - perm(Partial::new(c)), - Ok((Partial::new(&b"jk"[..]), expected)) - ); - - let d = &b"efgxyzabcdefghi"[..]; - assert_eq!( - perm(Partial::new(d)), - Err(ErrMode::Backtrack(error_node_position!( - Partial::new(&b"efgxyzabcdefghi"[..]), - ErrorKind::Alt, - error_position!(Partial::new(&b"xyzabcdefghi"[..]), ErrorKind::Tag) - ))) - ); - - let e = &b"efgabc"[..]; - assert_eq!( - perm(Partial::new(e)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); -} diff --git a/vendor/winnow/src/bytes/mod.rs b/vendor/winnow/src/bytes/mod.rs deleted file mode 100644 index 0bf8681..0000000 --- a/vendor/winnow/src/bytes/mod.rs +++ /dev/null @@ -1,1167 +0,0 @@ -//! Parsers recognizing bytes streams - -#[cfg(test)] -mod tests; - -use crate::error::ErrMode; -use crate::error::ErrorKind; -use crate::error::Needed; -use crate::error::ParseError; -use crate::lib::std::result::Result::Ok; -use crate::stream::{ - split_at_offset1_complete, split_at_offset1_partial, split_at_offset_complete, - split_at_offset_partial, Compare, CompareResult, ContainsToken, FindSlice, SliceLen, Stream, -}; -use crate::stream::{StreamIsPartial, ToUsize}; -use crate::trace::trace; -use crate::IResult; -use crate::Parser; - -/// Matches one token -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{bytes::any, error::ErrMode, error::{Error, ErrorKind}}; -/// # use winnow::prelude::*; -/// fn parser(input: &str) -> IResult<&str, char> { -/// any.parse_next(input) -/// } -/// -/// assert_eq!(parser("abc"), Ok(("bc",'a'))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Token)))); -/// ``` -/// -/// ```rust -/// # use winnow::{bytes::any, error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// assert_eq!(any::<_, Error<_>>.parse_next(Partial::new("abc")), Ok((Partial::new("bc"),'a'))); -/// assert_eq!(any::<_, Error<_>>.parse_next(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -#[doc(alias = "token")] -pub fn any<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Token, E> -where - I: StreamIsPartial, - I: Stream, -{ - trace("any", move |input: I| { - if input.is_partial() { - streaming_any(input) - } else { - complete_any(input) - } - }) - .parse_next(input) -} - -pub(crate) fn streaming_any<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Token, E> -where - I: Stream, -{ - input - .next_token() - .ok_or_else(|| ErrMode::Incomplete(Needed::new(1))) -} - -pub(crate) fn complete_any<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Token, E> -where - I: Stream, -{ - input - .next_token() - .ok_or_else(|| ErrMode::from_error_kind(input, ErrorKind::Token)) -} - -/// Recognizes a literal -/// -/// The input data will be compared to the tag combinator's argument and will return the part of -/// the input that matches the argument -/// -/// It will return `Err(ErrMode::Backtrack(Error::new(_, ErrorKind::Tag)))` if the input doesn't match the pattern -/// -/// **Note:** [`Parser`][crate::Parser] is implemented for strings and byte strings as a convenience (complete -/// only) -/// -/// # Example -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, &str> { -/// "Hello".parse_next(s) -/// } -/// -/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); -/// assert_eq!(parser("Something"), Err(ErrMode::Backtrack(Error::new("Something", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// ``` -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::Partial; -/// use winnow::bytes::tag; -/// -/// fn parser(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// "Hello".parse_next(s) -/// } -/// -/// assert_eq!(parser(Partial::new("Hello, World!")), Ok((Partial::new(", World!"), "Hello"))); -/// assert_eq!(parser(Partial::new("Something")), Err(ErrMode::Backtrack(Error::new(Partial::new("Something"), ErrorKind::Tag)))); -/// assert_eq!(parser(Partial::new("S")), Err(ErrMode::Backtrack(Error::new(Partial::new("S"), ErrorKind::Tag)))); -/// assert_eq!(parser(Partial::new("H")), Err(ErrMode::Incomplete(Needed::new(4)))); -/// ``` -#[inline(always)] -#[doc(alias = "literal")] -#[doc(alias = "bytes")] -#[doc(alias = "just")] -pub fn tag<T, I, Error: ParseError<I>>(tag: T) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream + Compare<T>, - T: SliceLen + Clone, -{ - trace("tag", move |i: I| { - let t = tag.clone(); - if i.is_partial() { - streaming_tag_internal(i, t) - } else { - complete_tag_internal(i, t) - } - }) -} - -pub(crate) fn streaming_tag_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + Compare<T>, - T: SliceLen, -{ - let tag_len = t.slice_len(); - match i.compare(t) { - CompareResult::Ok => Ok(i.next_slice(tag_len)), - CompareResult::Incomplete => { - Err(ErrMode::Incomplete(Needed::new(tag_len - i.eof_offset()))) - } - CompareResult::Error => { - let e: ErrorKind = ErrorKind::Tag; - Err(ErrMode::from_error_kind(i, e)) - } - } -} - -pub(crate) fn complete_tag_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + Compare<T>, - T: SliceLen, -{ - let tag_len = t.slice_len(); - match i.compare(t) { - CompareResult::Ok => Ok(i.next_slice(tag_len)), - CompareResult::Incomplete | CompareResult::Error => { - let e: ErrorKind = ErrorKind::Tag; - Err(ErrMode::from_error_kind(i, e)) - } - } -} - -/// Recognizes a case insensitive literal. -/// -/// The input data will be compared to the tag combinator's argument and will return the part of -/// the input that matches the argument with no regard to case. -/// -/// It will return `Err(ErrMode::Backtrack(Error::new(_, ErrorKind::Tag)))` if the input doesn't match the pattern. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::tag_no_case; -/// -/// fn parser(s: &str) -> IResult<&str, &str> { -/// tag_no_case("hello").parse_next(s) -/// } -/// -/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); -/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello"))); -/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO"))); -/// assert_eq!(parser("Something"), Err(ErrMode::Backtrack(Error::new("Something", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::tag_no_case; -/// -/// fn parser(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// tag_no_case("hello").parse_next(s) -/// } -/// -/// assert_eq!(parser(Partial::new("Hello, World!")), Ok((Partial::new(", World!"), "Hello"))); -/// assert_eq!(parser(Partial::new("hello, World!")), Ok((Partial::new(", World!"), "hello"))); -/// assert_eq!(parser(Partial::new("HeLlO, World!")), Ok((Partial::new(", World!"), "HeLlO"))); -/// assert_eq!(parser(Partial::new("Something")), Err(ErrMode::Backtrack(Error::new(Partial::new("Something"), ErrorKind::Tag)))); -/// assert_eq!(parser(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(5)))); -/// ``` -#[inline(always)] -#[doc(alias = "literal")] -#[doc(alias = "bytes")] -#[doc(alias = "just")] -pub fn tag_no_case<T, I, Error: ParseError<I>>( - tag: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream + Compare<T>, - T: SliceLen + Clone, -{ - trace("tag_no_case", move |i: I| { - let t = tag.clone(); - if i.is_partial() { - streaming_tag_no_case_internal(i, t) - } else { - complete_tag_no_case_internal(i, t) - } - }) -} - -pub(crate) fn streaming_tag_no_case_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + Compare<T>, - T: SliceLen, -{ - let tag_len = t.slice_len(); - - match (i).compare_no_case(t) { - CompareResult::Ok => Ok(i.next_slice(tag_len)), - CompareResult::Incomplete => { - Err(ErrMode::Incomplete(Needed::new(tag_len - i.eof_offset()))) - } - CompareResult::Error => { - let e: ErrorKind = ErrorKind::Tag; - Err(ErrMode::from_error_kind(i, e)) - } - } -} - -pub(crate) fn complete_tag_no_case_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + Compare<T>, - T: SliceLen, -{ - let tag_len = t.slice_len(); - - match (i).compare_no_case(t) { - CompareResult::Ok => Ok(i.next_slice(tag_len)), - CompareResult::Incomplete | CompareResult::Error => { - let e: ErrorKind = ErrorKind::Tag; - Err(ErrMode::from_error_kind(i, e)) - } - } -} - -/// Recognize a token that matches the [pattern][ContainsToken] -/// -/// **Note:** [`Parser`][crate::Parser] is implemented as a convenience (complete -/// only) for -/// - `u8` -/// - `char` -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::bytes::one_of; -/// assert_eq!(one_of::<_, _, Error<_>>("abc").parse_next("b"), Ok(("", 'b'))); -/// assert_eq!(one_of::<_, _, Error<_>>("a").parse_next("bc"), Err(ErrMode::Backtrack(Error::new("bc", ErrorKind::Verify)))); -/// assert_eq!(one_of::<_, _, Error<_>>("a").parse_next(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Token)))); -/// -/// fn parser_fn(i: &str) -> IResult<&str, char> { -/// one_of(|c| c == 'a' || c == 'b').parse_next(i) -/// } -/// assert_eq!(parser_fn("abc"), Ok(("bc", 'a'))); -/// assert_eq!(parser_fn("cd"), Err(ErrMode::Backtrack(Error::new("cd", ErrorKind::Verify)))); -/// assert_eq!(parser_fn(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Token)))); -/// ``` -/// -/// ``` -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::bytes::one_of; -/// assert_eq!(one_of::<_, _, Error<_>>("abc").parse_next(Partial::new("b")), Ok((Partial::new(""), 'b'))); -/// assert_eq!(one_of::<_, _, Error<_>>("a").parse_next(Partial::new("bc")), Err(ErrMode::Backtrack(Error::new(Partial::new("bc"), ErrorKind::Verify)))); -/// assert_eq!(one_of::<_, _, Error<_>>("a").parse_next(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// -/// fn parser_fn(i: Partial<&str>) -> IResult<Partial<&str>, char> { -/// one_of(|c| c == 'a' || c == 'b').parse_next(i) -/// } -/// assert_eq!(parser_fn(Partial::new("abc")), Ok((Partial::new("bc"), 'a'))); -/// assert_eq!(parser_fn(Partial::new("cd")), Err(ErrMode::Backtrack(Error::new(Partial::new("cd"), ErrorKind::Verify)))); -/// assert_eq!(parser_fn(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -#[doc(alias = "char")] -#[doc(alias = "token")] -#[doc(alias = "satisfy")] -pub fn one_of<I, T, Error: ParseError<I>>(list: T) -> impl Parser<I, <I as Stream>::Token, Error> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: Copy, - T: ContainsToken<<I as Stream>::Token>, -{ - trace( - "one_of", - any.verify(move |t: &<I as Stream>::Token| list.contains_token(*t)), - ) -} - -/// Recognize a token that does not match the [pattern][ContainsToken] -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::prelude::*; -/// # use winnow::bytes::none_of; -/// assert_eq!(none_of::<_, _, Error<_>>("abc").parse_next("z"), Ok(("", 'z'))); -/// assert_eq!(none_of::<_, _, Error<_>>("ab").parse_next("a"), Err(ErrMode::Backtrack(Error::new("a", ErrorKind::Verify)))); -/// assert_eq!(none_of::<_, _, Error<_>>("a").parse_next(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Token)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// # use winnow::bytes::none_of; -/// assert_eq!(none_of::<_, _, Error<_>>("abc").parse_next(Partial::new("z")), Ok((Partial::new(""), 'z'))); -/// assert_eq!(none_of::<_, _, Error<_>>("ab").parse_next(Partial::new("a")), Err(ErrMode::Backtrack(Error::new(Partial::new("a"), ErrorKind::Verify)))); -/// assert_eq!(none_of::<_, _, Error<_>>("a").parse_next(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn none_of<I, T, Error: ParseError<I>>(list: T) -> impl Parser<I, <I as Stream>::Token, Error> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: Copy, - T: ContainsToken<<I as Stream>::Token>, -{ - trace( - "none_of", - any.verify(move |t: &<I as Stream>::Token| !list.contains_token(*t)), - ) -} - -/// Recognize the longest input slice (if any) that matches the [pattern][ContainsToken] -/// -/// *Partial version*: will return a `ErrMode::Incomplete(Needed::new(1))` if the pattern reaches the end of the input. -/// -/// To recognize a series of tokens, use [`many0`][crate::multi::many0] to [`Accumulate`][crate::stream::Accumulate] into a `()` and then [`Parser::recognize`][crate::Parser::recognize]. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take_while0; -/// use winnow::stream::AsChar; -/// -/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { -/// take_while0(AsChar::is_alpha).parse_next(s) -/// } -/// -/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); -/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..]))); -/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); -/// assert_eq!(alpha(b""), Ok((&b""[..], &b""[..]))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::take_while0; -/// use winnow::stream::AsChar; -/// -/// fn alpha(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { -/// take_while0(AsChar::is_alpha).parse_next(s) -/// } -/// -/// assert_eq!(alpha(Partial::new(b"latin123")), Ok((Partial::new(&b"123"[..]), &b"latin"[..]))); -/// assert_eq!(alpha(Partial::new(b"12345")), Ok((Partial::new(&b"12345"[..]), &b""[..]))); -/// assert_eq!(alpha(Partial::new(b"latin")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(alpha(Partial::new(b"")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn take_while0<T, I, Error: ParseError<I>>( - list: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - trace("take_while0", move |i: I| { - if i.is_partial() { - streaming_take_while_internal(i, &list) - } else { - complete_take_while_internal(i, &list) - } - }) -} - -pub(crate) fn streaming_take_while_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - split_at_offset_partial(&i, |c| !list.contains_token(c)) -} - -pub(crate) fn complete_take_while_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - split_at_offset_complete(&i, |c| !list.contains_token(c)) -} - -/// Recognize the longest (at least 1) input slice that matches the [pattern][ContainsToken] -/// -/// It will return an `Err(ErrMode::Backtrack(Error::new(_, ErrorKind::Slice)))` if the pattern wasn't met. -/// -/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` or if the pattern reaches the end of the input. -/// -/// To recognize a series of tokens, use [`many1`][crate::multi::many1] to [`Accumulate`][crate::stream::Accumulate] into a `()` and then [`Parser::recognize`][crate::Parser::recognize]. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take_while1; -/// use winnow::stream::AsChar; -/// -/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { -/// take_while1(AsChar::is_alpha).parse_next(s) -/// } -/// -/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); -/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); -/// assert_eq!(alpha(b"12345"), Err(ErrMode::Backtrack(Error::new(&b"12345"[..], ErrorKind::Slice)))); -/// -/// fn hex(s: &str) -> IResult<&str, &str> { -/// take_while1("1234567890ABCDEF").parse_next(s) -/// } -/// -/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123"))); -/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF"))); -/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE"))); -/// assert_eq!(hex("D15EA5E"), Ok(("", "D15EA5E"))); -/// assert_eq!(hex(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::take_while1; -/// use winnow::stream::AsChar; -/// -/// fn alpha(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { -/// take_while1(AsChar::is_alpha).parse_next(s) -/// } -/// -/// assert_eq!(alpha(Partial::new(b"latin123")), Ok((Partial::new(&b"123"[..]), &b"latin"[..]))); -/// assert_eq!(alpha(Partial::new(b"latin")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(alpha(Partial::new(b"12345")), Err(ErrMode::Backtrack(Error::new(Partial::new(&b"12345"[..]), ErrorKind::Slice)))); -/// -/// fn hex(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_while1("1234567890ABCDEF").parse_next(s) -/// } -/// -/// assert_eq!(hex(Partial::new("123 and voila")), Ok((Partial::new(" and voila"), "123"))); -/// assert_eq!(hex(Partial::new("DEADBEEF and others")), Ok((Partial::new(" and others"), "DEADBEEF"))); -/// assert_eq!(hex(Partial::new("BADBABEsomething")), Ok((Partial::new("something"), "BADBABE"))); -/// assert_eq!(hex(Partial::new("D15EA5E")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(hex(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -#[doc(alias = "is_a")] -pub fn take_while1<T, I, Error: ParseError<I>>( - list: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - trace("take_while1", move |i: I| { - if i.is_partial() { - streaming_take_while1_internal(i, &list) - } else { - complete_take_while1_internal(i, &list) - } - }) -} - -pub(crate) fn streaming_take_while1_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - let e: ErrorKind = ErrorKind::Slice; - split_at_offset1_partial(&i, |c| !list.contains_token(c), e) -} - -pub(crate) fn complete_take_while1_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - let e: ErrorKind = ErrorKind::Slice; - split_at_offset1_complete(&i, |c| !list.contains_token(c), e) -} - -/// Recognize the longest (m <= len <= n) input slice that matches the [pattern][ContainsToken] -/// -/// It will return an `ErrMode::Backtrack(Error::new(_, ErrorKind::Slice))` if the pattern wasn't met or is out -/// of range (m <= len <= n). -/// -/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the pattern reaches the end of the input or is too short. -/// -/// To recognize a series of tokens, use [`many_m_n`][crate::multi::many_m_n] to [`Accumulate`][crate::stream::Accumulate] into a `()` and then [`Parser::recognize`][crate::Parser::recognize]. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take_while_m_n; -/// use winnow::stream::AsChar; -/// -/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { -/// take_while_m_n(3, 6, AsChar::is_alpha).parse_next(s) -/// } -/// -/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); -/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..]))); -/// assert_eq!(short_alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); -/// assert_eq!(short_alpha(b"ed"), Err(ErrMode::Backtrack(Error::new(&b"ed"[..], ErrorKind::Slice)))); -/// assert_eq!(short_alpha(b"12345"), Err(ErrMode::Backtrack(Error::new(&b"12345"[..], ErrorKind::Slice)))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::take_while_m_n; -/// use winnow::stream::AsChar; -/// -/// fn short_alpha(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { -/// take_while_m_n(3, 6, AsChar::is_alpha).parse_next(s) -/// } -/// -/// assert_eq!(short_alpha(Partial::new(b"latin123")), Ok((Partial::new(&b"123"[..]), &b"latin"[..]))); -/// assert_eq!(short_alpha(Partial::new(b"lengthy")), Ok((Partial::new(&b"y"[..]), &b"length"[..]))); -/// assert_eq!(short_alpha(Partial::new(b"latin")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(short_alpha(Partial::new(b"ed")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(short_alpha(Partial::new(b"12345")), Err(ErrMode::Backtrack(Error::new(Partial::new(&b"12345"[..]), ErrorKind::Slice)))); -/// ``` -#[inline(always)] -pub fn take_while_m_n<T, I, Error: ParseError<I>>( - m: usize, - n: usize, - list: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - trace("take_while_m_n", move |i: I| { - if i.is_partial() { - streaming_take_while_m_n_internal(i, m, n, &list) - } else { - complete_take_while_m_n_internal(i, m, n, &list) - } - }) -} - -pub(crate) fn streaming_take_while_m_n_internal<T, I, Error: ParseError<I>>( - input: I, - m: usize, - n: usize, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - if n < m { - return Err(ErrMode::assert(input, "`m` should be <= `n`")); - } - - let mut final_count = 0; - for (processed, (offset, token)) in input.iter_offsets().enumerate() { - if !list.contains_token(token) { - if processed < m { - return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); - } else { - return Ok(input.next_slice(offset)); - } - } else { - if processed == n { - return Ok(input.next_slice(offset)); - } - final_count = processed + 1; - } - } - - if final_count == n { - Ok(input.next_slice(input.eof_offset())) - } else { - let needed = if m > input.eof_offset() { - m - input.eof_offset() - } else { - 1 - }; - Err(ErrMode::Incomplete(Needed::new(needed))) - } -} - -pub(crate) fn complete_take_while_m_n_internal<T, I, Error: ParseError<I>>( - input: I, - m: usize, - n: usize, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - if n < m { - return Err(ErrMode::assert(input, "`m` should be <= `n`")); - } - - let mut final_count = 0; - for (processed, (offset, token)) in input.iter_offsets().enumerate() { - if !list.contains_token(token) { - if processed < m { - return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); - } else { - return Ok(input.next_slice(offset)); - } - } else { - if processed == n { - return Ok(input.next_slice(offset)); - } - final_count = processed + 1; - } - } - - if m <= final_count { - Ok(input.next_slice(input.eof_offset())) - } else { - Err(ErrMode::from_error_kind(input, ErrorKind::Slice)) - } -} - -/// Recognize the longest input slice (if any) till a [pattern][ContainsToken] is met. -/// -/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the match reaches the -/// end of input or if there was not match. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take_till0; -/// -/// fn till_colon(s: &str) -> IResult<&str, &str> { -/// take_till0(|c| c == ':').parse_next(s) -/// } -/// -/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); -/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed -/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); -/// assert_eq!(till_colon(""), Ok(("", ""))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::take_till0; -/// -/// fn till_colon(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_till0(|c| c == ':').parse_next(s) -/// } -/// -/// assert_eq!(till_colon(Partial::new("latin:123")), Ok((Partial::new(":123"), "latin"))); -/// assert_eq!(till_colon(Partial::new(":empty matched")), Ok((Partial::new(":empty matched"), ""))); //allowed -/// assert_eq!(till_colon(Partial::new("12345")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(till_colon(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn take_till0<T, I, Error: ParseError<I>>( - list: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - trace("take_till0", move |i: I| { - if i.is_partial() { - streaming_take_till_internal(i, &list) - } else { - complete_take_till_internal(i, &list) - } - }) -} - -pub(crate) fn streaming_take_till_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - split_at_offset_partial(&i, |c| list.contains_token(c)) -} - -pub(crate) fn complete_take_till_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - split_at_offset_complete(&i, |c| list.contains_token(c)) -} - -/// Recognize the longest (at least 1) input slice till a [pattern][ContainsToken] is met. -/// -/// It will return `Err(ErrMode::Backtrack(Error::new(_, ErrorKind::Slice)))` if the input is empty or the -/// predicate matches the first input. -/// -/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the match reaches the -/// end of input or if there was not match. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take_till1; -/// -/// fn till_colon(s: &str) -> IResult<&str, &str> { -/// take_till1(|c| c == ':').parse_next(s) -/// } -/// -/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); -/// assert_eq!(till_colon(":empty matched"), Err(ErrMode::Backtrack(Error::new(":empty matched", ErrorKind::Slice)))); -/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); -/// assert_eq!(till_colon(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// -/// fn not_space(s: &str) -> IResult<&str, &str> { -/// take_till1(" \t\r\n").parse_next(s) -/// } -/// -/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,"))); -/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes"))); -/// assert_eq!(not_space("Nospace"), Ok(("", "Nospace"))); -/// assert_eq!(not_space(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::take_till1; -/// -/// fn till_colon(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_till1(|c| c == ':').parse_next(s) -/// } -/// -/// assert_eq!(till_colon(Partial::new("latin:123")), Ok((Partial::new(":123"), "latin"))); -/// assert_eq!(till_colon(Partial::new(":empty matched")), Err(ErrMode::Backtrack(Error::new(Partial::new(":empty matched"), ErrorKind::Slice)))); -/// assert_eq!(till_colon(Partial::new("12345")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(till_colon(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// -/// fn not_space(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_till1(" \t\r\n").parse_next(s) -/// } -/// -/// assert_eq!(not_space(Partial::new("Hello, World!")), Ok((Partial::new(" World!"), "Hello,"))); -/// assert_eq!(not_space(Partial::new("Sometimes\t")), Ok((Partial::new("\t"), "Sometimes"))); -/// assert_eq!(not_space(Partial::new("Nospace")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(not_space(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -#[doc(alias = "is_not")] -pub fn take_till1<T, I, Error: ParseError<I>>( - list: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - trace("take_till1", move |i: I| { - if i.is_partial() { - streaming_take_till1_internal(i, &list) - } else { - complete_take_till1_internal(i, &list) - } - }) -} - -pub(crate) fn streaming_take_till1_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - let e: ErrorKind = ErrorKind::Slice; - split_at_offset1_partial(&i, |c| list.contains_token(c), e) -} - -pub(crate) fn complete_take_till1_internal<T, I, Error: ParseError<I>>( - i: I, - list: &T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, - T: ContainsToken<<I as Stream>::Token>, -{ - let e: ErrorKind = ErrorKind::Slice; - split_at_offset1_complete(&i, |c| list.contains_token(c), e) -} - -/// Recognize an input slice containing the first N input elements (I[..N]). -/// -/// *Complete version*: It will return `Err(ErrMode::Backtrack(Error::new(_, ErrorKind::Slice)))` if the input is shorter than the argument. -/// -/// *Partial version*: if the input has less than N elements, `take` will -/// return a `ErrMode::Incomplete(Needed::new(M))` where M is the number of -/// additional bytes the parser would need to succeed. -/// It is well defined for `&[u8]` as the number of elements is the byte size, -/// but for types like `&str`, we cannot know how many bytes correspond for -/// the next few chars, so the result will be `ErrMode::Incomplete(Needed::Unknown)` -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take; -/// -/// fn take6(s: &str) -> IResult<&str, &str> { -/// take(6usize).parse_next(s) -/// } -/// -/// assert_eq!(take6("1234567"), Ok(("7", "123456"))); -/// assert_eq!(take6("things"), Ok(("", "things"))); -/// assert_eq!(take6("short"), Err(ErrMode::Backtrack(Error::new("short", ErrorKind::Slice)))); -/// assert_eq!(take6(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// The units that are taken will depend on the input type. For example, for a -/// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will -/// take that many `u8`'s: -/// -/// ```rust -/// # use winnow::prelude::*; -/// use winnow::error::Error; -/// use winnow::bytes::take; -/// -/// assert_eq!(take::<_, _, Error<_>>(1usize).parse_next("💙"), Ok(("", "💙"))); -/// assert_eq!(take::<_, _, Error<_>>(1usize).parse_next("💙".as_bytes()), Ok((b"\x9F\x92\x99".as_ref(), b"\xF0".as_ref()))); -/// ``` -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::Partial; -/// use winnow::bytes::take; -/// -/// fn take6(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take(6usize).parse_next(s) -/// } -/// -/// assert_eq!(take6(Partial::new("1234567")), Ok((Partial::new("7"), "123456"))); -/// assert_eq!(take6(Partial::new("things")), Ok((Partial::new(""), "things"))); -/// // `Unknown` as we don't know the number of bytes that `count` corresponds to -/// assert_eq!(take6(Partial::new("short")), Err(ErrMode::Incomplete(Needed::Unknown))); -/// ``` -#[inline(always)] -pub fn take<C, I, Error: ParseError<I>>(count: C) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream, - C: ToUsize, -{ - let c = count.to_usize(); - trace("take", move |i: I| { - if i.is_partial() { - streaming_take_internal(i, c) - } else { - complete_take_internal(i, c) - } - }) -} - -pub(crate) fn streaming_take_internal<I, Error: ParseError<I>>( - i: I, - c: usize, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, -{ - match i.offset_at(c) { - Ok(offset) => Ok(i.next_slice(offset)), - Err(i) => Err(ErrMode::Incomplete(i)), - } -} - -pub(crate) fn complete_take_internal<I, Error: ParseError<I>>( - i: I, - c: usize, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream, -{ - match i.offset_at(c) { - Ok(offset) => Ok(i.next_slice(offset)), - Err(_needed) => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), - } -} - -/// Recognize the input slice up to the first occurrence of the literal. -/// -/// It doesn't consume the pattern. -/// -/// *Complete version*: It will return `Err(ErrMode::Backtrack(Error::new(_, ErrorKind::Slice)))` -/// if the pattern wasn't met. -/// -/// *Partial version*: will return a `ErrMode::Incomplete(Needed::new(N))` if the input doesn't -/// contain the pattern or if the input is smaller than the pattern. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take_until0; -/// -/// fn until_eof(s: &str) -> IResult<&str, &str> { -/// take_until0("eof").parse_next(s) -/// } -/// -/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); -/// assert_eq!(until_eof("hello, world"), Err(ErrMode::Backtrack(Error::new("hello, world", ErrorKind::Slice)))); -/// assert_eq!(until_eof(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::take_until0; -/// -/// fn until_eof(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_until0("eof").parse_next(s) -/// } -/// -/// assert_eq!(until_eof(Partial::new("hello, worldeof")), Ok((Partial::new("eof"), "hello, world"))); -/// assert_eq!(until_eof(Partial::new("hello, world")), Err(ErrMode::Incomplete(Needed::Unknown))); -/// assert_eq!(until_eof(Partial::new("hello, worldeo")), Err(ErrMode::Incomplete(Needed::Unknown))); -/// assert_eq!(until_eof(Partial::new("1eof2eof")), Ok((Partial::new("eof2eof"), "1"))); -/// ``` -#[inline(always)] -pub fn take_until0<T, I, Error: ParseError<I>>( - tag: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream + FindSlice<T>, - T: SliceLen + Clone, -{ - trace("take_until0", move |i: I| { - if i.is_partial() { - streaming_take_until_internal(i, tag.clone()) - } else { - complete_take_until_internal(i, tag.clone()) - } - }) -} - -pub(crate) fn streaming_take_until_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + FindSlice<T>, - T: SliceLen, -{ - match i.find_slice(t) { - Some(offset) => Ok(i.next_slice(offset)), - None => Err(ErrMode::Incomplete(Needed::Unknown)), - } -} - -pub(crate) fn complete_take_until_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + FindSlice<T>, - T: SliceLen, -{ - match i.find_slice(t) { - Some(offset) => Ok(i.next_slice(offset)), - None => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), - } -} - -/// Recognize the non empty input slice up to the first occurrence of the literal. -/// -/// It doesn't consume the pattern. -/// -/// *Complete version*: It will return `Err(ErrMode::Backtrack(Error::new(_, ErrorKind::Slice)))` -/// if the pattern wasn't met. -/// -/// *Partial version*: will return a `ErrMode::Incomplete(Needed::new(N))` if the input doesn't -/// contain the pattern or if the input is smaller than the pattern. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::bytes::take_until1; -/// -/// fn until_eof(s: &str) -> IResult<&str, &str> { -/// take_until1("eof").parse_next(s) -/// } -/// -/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); -/// assert_eq!(until_eof("hello, world"), Err(ErrMode::Backtrack(Error::new("hello, world", ErrorKind::Slice)))); -/// assert_eq!(until_eof(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); -/// assert_eq!(until_eof("eof"), Err(ErrMode::Backtrack(Error::new("eof", ErrorKind::Slice)))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::bytes::take_until1; -/// -/// fn until_eof(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_until1("eof").parse_next(s) -/// } -/// -/// assert_eq!(until_eof(Partial::new("hello, worldeof")), Ok((Partial::new("eof"), "hello, world"))); -/// assert_eq!(until_eof(Partial::new("hello, world")), Err(ErrMode::Incomplete(Needed::Unknown))); -/// assert_eq!(until_eof(Partial::new("hello, worldeo")), Err(ErrMode::Incomplete(Needed::Unknown))); -/// assert_eq!(until_eof(Partial::new("1eof2eof")), Ok((Partial::new("eof2eof"), "1"))); -/// assert_eq!(until_eof(Partial::new("eof")), Err(ErrMode::Backtrack(Error::new(Partial::new("eof"), ErrorKind::Slice)))); -/// ``` -#[inline(always)] -pub fn take_until1<T, I, Error: ParseError<I>>( - tag: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream + FindSlice<T>, - T: SliceLen + Clone, -{ - trace("take_until1", move |i: I| { - if i.is_partial() { - streaming_take_until1_internal(i, tag.clone()) - } else { - complete_take_until1_internal(i, tag.clone()) - } - }) -} - -pub(crate) fn streaming_take_until1_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + FindSlice<T>, - T: SliceLen, -{ - match i.find_slice(t) { - None => Err(ErrMode::Incomplete(Needed::Unknown)), - Some(0) => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), - Some(offset) => Ok(i.next_slice(offset)), - } -} - -pub(crate) fn complete_take_until1_internal<T, I, Error: ParseError<I>>( - i: I, - t: T, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + FindSlice<T>, - T: SliceLen, -{ - match i.find_slice(t) { - None | Some(0) => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), - Some(offset) => Ok(i.next_slice(offset)), - } -} diff --git a/vendor/winnow/src/character/mod.rs b/vendor/winnow/src/character/mod.rs deleted file mode 100644 index 89cc9ec..0000000 --- a/vendor/winnow/src/character/mod.rs +++ /dev/null @@ -1,1808 +0,0 @@ -//! Character specific parsers and combinators -//! -//! Functions recognizing specific characters - -#[cfg(test)] -mod tests; - -use crate::lib::std::ops::{Add, Shl}; - -use crate::branch::alt; -use crate::bytes::one_of; - -use crate::bytes::take_while0; -use crate::bytes::take_while1; -use crate::combinator::cut_err; -use crate::combinator::opt; -use crate::error::ParseError; -use crate::error::{ErrMode, ErrorKind, Needed}; -use crate::stream::ContainsToken; -use crate::stream::{AsBStr, AsChar, Offset, ParseSlice, Stream, StreamIsPartial}; -use crate::stream::{Compare, CompareResult}; -use crate::trace::trace; -use crate::IResult; -use crate::Parser; - -/// Recognizes the string "\r\n". -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult}; -/// # use winnow::character::crlf; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// crlf(input) -/// } -/// -/// assert_eq!(parser("\r\nc"), Ok(("c", "\r\n"))); -/// assert_eq!(parser("ab\r\nc"), Err(ErrMode::Backtrack(Error::new("ab\r\nc", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::crlf; -/// assert_eq!(crlf::<_, Error<_>>(Partial::new("\r\nc")), Ok((Partial::new("c"), "\r\n"))); -/// assert_eq!(crlf::<_, Error<_>>(Partial::new("ab\r\nc")), Err(ErrMode::Backtrack(Error::new(Partial::new("ab\r\nc"), ErrorKind::Tag)))); -/// assert_eq!(crlf::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(2)))); -/// ``` -#[inline(always)] -pub fn crlf<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - I: Compare<&'static str>, -{ - trace("crlf", move |input: I| "\r\n".parse_next(input)).parse_next(input) -} - -/// Recognizes a string of any char except '\r\n' or '\n'. -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::not_line_ending; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// not_line_ending(input) -/// } -/// -/// assert_eq!(parser("ab\r\nc"), Ok(("\r\nc", "ab"))); -/// assert_eq!(parser("ab\nc"), Ok(("\nc", "ab"))); -/// assert_eq!(parser("abc"), Ok(("", "abc"))); -/// assert_eq!(parser(""), Ok(("", ""))); -/// assert_eq!(parser("a\rb\nc"), Err(ErrMode::Backtrack(Error { input: "a\rb\nc", kind: ErrorKind::Tag }))); -/// assert_eq!(parser("a\rbc"), Err(ErrMode::Backtrack(Error { input: "a\rbc", kind: ErrorKind::Tag }))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::not_line_ending; -/// assert_eq!(not_line_ending::<_, Error<_>>(Partial::new("ab\r\nc")), Ok((Partial::new("\r\nc"), "ab"))); -/// assert_eq!(not_line_ending::<_, Error<_>>(Partial::new("abc")), Err(ErrMode::Incomplete(Needed::Unknown))); -/// assert_eq!(not_line_ending::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::Unknown))); -/// assert_eq!(not_line_ending::<_, Error<_>>(Partial::new("a\rb\nc")), Err(ErrMode::Backtrack(Error::new(Partial::new("a\rb\nc"), ErrorKind::Tag )))); -/// assert_eq!(not_line_ending::<_, Error<_>>(Partial::new("a\rbc")), Err(ErrMode::Backtrack(Error::new(Partial::new("a\rbc"), ErrorKind::Tag )))); -/// ``` -#[inline(always)] -pub fn not_line_ending<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream + AsBStr, - I: Compare<&'static str>, - <I as Stream>::Token: AsChar, -{ - trace("not_line_ending", move |input: I| { - if input.is_partial() { - streaming_not_line_ending(input) - } else { - complete_not_line_ending(input) - } - }) - .parse_next(input) -} - -pub(crate) fn streaming_not_line_ending<T, E: ParseError<T>>( - input: T, -) -> IResult<T, <T as Stream>::Slice, E> -where - T: Stream + AsBStr, - T: Compare<&'static str>, - <T as Stream>::Token: AsChar, -{ - match input.offset_for(|item| { - let c = item.as_char(); - c == '\r' || c == '\n' - }) { - None => Err(ErrMode::Incomplete(Needed::Unknown)), - Some(offset) => { - let (new_input, res) = input.next_slice(offset); - let bytes = new_input.as_bstr(); - let nth = bytes[0]; - if nth == b'\r' { - let comp = new_input.compare("\r\n"); - match comp { - //FIXME: calculate the right index - CompareResult::Ok => {} - CompareResult::Incomplete => { - return Err(ErrMode::Incomplete(Needed::Unknown)); - } - CompareResult::Error => { - let e: ErrorKind = ErrorKind::Tag; - return Err(ErrMode::from_error_kind(input, e)); - } - } - } - Ok((new_input, res)) - } - } -} - -pub(crate) fn complete_not_line_ending<T, E: ParseError<T>>( - input: T, -) -> IResult<T, <T as Stream>::Slice, E> -where - T: Stream + AsBStr, - T: Compare<&'static str>, - <T as Stream>::Token: AsChar, -{ - match input.offset_for(|item| { - let c = item.as_char(); - c == '\r' || c == '\n' - }) { - None => Ok(input.next_slice(input.eof_offset())), - Some(offset) => { - let (new_input, res) = input.next_slice(offset); - let bytes = new_input.as_bstr(); - let nth = bytes[0]; - if nth == b'\r' { - let comp = new_input.compare("\r\n"); - match comp { - //FIXME: calculate the right index - CompareResult::Ok => {} - CompareResult::Incomplete | CompareResult::Error => { - let e: ErrorKind = ErrorKind::Tag; - return Err(ErrMode::from_error_kind(input, e)); - } - } - } - Ok((new_input, res)) - } - } -} - -/// Recognizes an end of line (both '\n' and '\r\n'). -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::line_ending; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// line_ending(input) -/// } -/// -/// assert_eq!(parser("\r\nc"), Ok(("c", "\r\n"))); -/// assert_eq!(parser("ab\r\nc"), Err(ErrMode::Backtrack(Error::new("ab\r\nc", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::line_ending; -/// assert_eq!(line_ending::<_, Error<_>>(Partial::new("\r\nc")), Ok((Partial::new("c"), "\r\n"))); -/// assert_eq!(line_ending::<_, Error<_>>(Partial::new("ab\r\nc")), Err(ErrMode::Backtrack(Error::new(Partial::new("ab\r\nc"), ErrorKind::Tag)))); -/// assert_eq!(line_ending::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn line_ending<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - I: Compare<&'static str>, -{ - trace("line_ending", move |input: I| { - alt(("\n", "\r\n")).parse_next(input) - }) - .parse_next(input) -} - -/// Matches a newline character '\n'. -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::newline; -/// fn parser(input: &str) -> IResult<&str, char> { -/// newline(input) -/// } -/// -/// assert_eq!(parser("\nc"), Ok(("c", '\n'))); -/// assert_eq!(parser("\r\nc"), Err(ErrMode::Backtrack(Error::new("\r\nc", ErrorKind::Verify)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Token)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::newline; -/// assert_eq!(newline::<_, Error<_>>(Partial::new("\nc")), Ok((Partial::new("c"), '\n'))); -/// assert_eq!(newline::<_, Error<_>>(Partial::new("\r\nc")), Err(ErrMode::Backtrack(Error::new(Partial::new("\r\nc"), ErrorKind::Verify)))); -/// assert_eq!(newline::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn newline<I, Error: ParseError<I>>(input: I) -> IResult<I, char, Error> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, -{ - trace("newline", move |input: I| { - '\n'.map(|c: <I as Stream>::Token| c.as_char()) - .parse_next(input) - }) - .parse_next(input) -} - -/// Matches a tab character '\t'. -/// -/// *Complete version*: Will return an error if there's not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::tab; -/// fn parser(input: &str) -> IResult<&str, char> { -/// tab(input) -/// } -/// -/// assert_eq!(parser("\tc"), Ok(("c", '\t'))); -/// assert_eq!(parser("\r\nc"), Err(ErrMode::Backtrack(Error::new("\r\nc", ErrorKind::Verify)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Token)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::tab; -/// assert_eq!(tab::<_, Error<_>>(Partial::new("\tc")), Ok((Partial::new("c"), '\t'))); -/// assert_eq!(tab::<_, Error<_>>(Partial::new("\r\nc")), Err(ErrMode::Backtrack(Error::new(Partial::new("\r\nc"), ErrorKind::Verify)))); -/// assert_eq!(tab::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn tab<I, Error: ParseError<I>>(input: I) -> IResult<I, char, Error> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, -{ - trace("tab", move |input: I| { - '\t'.map(|c: <I as Stream>::Token| c.as_char()) - .parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes zero or more lowercase and uppercase ASCII alphabetic characters: a-z, A-Z -/// -/// *Complete version*: Will return the whole input if no terminating token is found (a non -/// alphabetic character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non alphabetic character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::character::alpha0; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// alpha0(input) -/// } -/// -/// assert_eq!(parser("ab1c"), Ok(("1c", "ab"))); -/// assert_eq!(parser("1c"), Ok(("1c", ""))); -/// assert_eq!(parser(""), Ok(("", ""))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::alpha0; -/// assert_eq!(alpha0::<_, Error<_>>(Partial::new("ab1c")), Ok((Partial::new("1c"), "ab"))); -/// assert_eq!(alpha0::<_, Error<_>>(Partial::new("1c")), Ok((Partial::new("1c"), ""))); -/// assert_eq!(alpha0::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn alpha0<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("alpha0", move |input: I| { - take_while0(|c: <I as Stream>::Token| c.is_alpha()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes one or more lowercase and uppercase ASCII alphabetic characters: a-z, A-Z -/// -/// *Complete version*: Will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non alphabetic character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non alphabetic character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::alpha1; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// alpha1(input) -/// } -/// -/// assert_eq!(parser("aB1c"), Ok(("1c", "aB"))); -/// assert_eq!(parser("1c"), Err(ErrMode::Backtrack(Error::new("1c", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::alpha1; -/// assert_eq!(alpha1::<_, Error<_>>(Partial::new("aB1c")), Ok((Partial::new("1c"), "aB"))); -/// assert_eq!(alpha1::<_, Error<_>>(Partial::new("1c")), Err(ErrMode::Backtrack(Error::new(Partial::new("1c"), ErrorKind::Slice)))); -/// assert_eq!(alpha1::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn alpha1<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("alpha1", move |input: I| { - take_while1(|c: <I as Stream>::Token| c.is_alpha()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes zero or more ASCII numerical characters: 0-9 -/// -/// *Complete version*: Will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non digit character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non digit character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::character::digit0; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// digit0(input) -/// } -/// -/// assert_eq!(parser("21c"), Ok(("c", "21"))); -/// assert_eq!(parser("21"), Ok(("", "21"))); -/// assert_eq!(parser("a21c"), Ok(("a21c", ""))); -/// assert_eq!(parser(""), Ok(("", ""))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::digit0; -/// assert_eq!(digit0::<_, Error<_>>(Partial::new("21c")), Ok((Partial::new("c"), "21"))); -/// assert_eq!(digit0::<_, Error<_>>(Partial::new("a21c")), Ok((Partial::new("a21c"), ""))); -/// assert_eq!(digit0::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn digit0<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("digit0", move |input: I| { - take_while0(|c: <I as Stream>::Token| c.is_dec_digit()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes one or more ASCII numerical characters: 0-9 -/// -/// *Complete version*: Will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non digit character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non digit character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::digit1; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// digit1(input) -/// } -/// -/// assert_eq!(parser("21c"), Ok(("c", "21"))); -/// assert_eq!(parser("c1"), Err(ErrMode::Backtrack(Error::new("c1", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::digit1; -/// assert_eq!(digit1::<_, Error<_>>(Partial::new("21c")), Ok((Partial::new("c"), "21"))); -/// assert_eq!(digit1::<_, Error<_>>(Partial::new("c1")), Err(ErrMode::Backtrack(Error::new(Partial::new("c1"), ErrorKind::Slice)))); -/// assert_eq!(digit1::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -/// -/// ## Parsing an integer -/// -/// You can use `digit1` in combination with [`Parser::map_res`][crate::Parser::map_res] to parse an integer: -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed, Parser}; -/// # use winnow::character::digit1; -/// fn parser(input: &str) -> IResult<&str, u32> { -/// digit1.map_res(str::parse).parse_next(input) -/// } -/// -/// assert_eq!(parser("416"), Ok(("", 416))); -/// assert_eq!(parser("12b"), Ok(("b", 12))); -/// assert!(parser("b").is_err()); -/// ``` -#[inline(always)] -pub fn digit1<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("digit1", move |input: I| { - take_while1(|c: <I as Stream>::Token| c.is_dec_digit()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes zero or more ASCII hexadecimal numerical characters: 0-9, A-F, a-f -/// -/// *Complete version*: Will return the whole input if no terminating token is found (a non hexadecimal digit character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non hexadecimal digit character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::character::hex_digit0; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// hex_digit0(input) -/// } -/// -/// assert_eq!(parser("21cZ"), Ok(("Z", "21c"))); -/// assert_eq!(parser("Z21c"), Ok(("Z21c", ""))); -/// assert_eq!(parser(""), Ok(("", ""))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::hex_digit0; -/// assert_eq!(hex_digit0::<_, Error<_>>(Partial::new("21cZ")), Ok((Partial::new("Z"), "21c"))); -/// assert_eq!(hex_digit0::<_, Error<_>>(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); -/// assert_eq!(hex_digit0::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn hex_digit0<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("hex_digit0", move |input: I| { - take_while0(|c: <I as Stream>::Token| c.is_hex_digit()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes one or more ASCII hexadecimal numerical characters: 0-9, A-F, a-f -/// -/// *Complete version*: Will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non hexadecimal digit character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non hexadecimal digit character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::hex_digit1; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// hex_digit1(input) -/// } -/// -/// assert_eq!(parser("21cZ"), Ok(("Z", "21c"))); -/// assert_eq!(parser("H2"), Err(ErrMode::Backtrack(Error::new("H2", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::hex_digit1; -/// assert_eq!(hex_digit1::<_, Error<_>>(Partial::new("21cZ")), Ok((Partial::new("Z"), "21c"))); -/// assert_eq!(hex_digit1::<_, Error<_>>(Partial::new("H2")), Err(ErrMode::Backtrack(Error::new(Partial::new("H2"), ErrorKind::Slice)))); -/// assert_eq!(hex_digit1::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn hex_digit1<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("hex_digit1", move |input: I| { - take_while1(|c: <I as Stream>::Token| c.is_hex_digit()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes zero or more octal characters: 0-7 -/// -/// *Complete version*: Will return the whole input if no terminating token is found (a non octal -/// digit character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non octal digit character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::character::oct_digit0; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// oct_digit0(input) -/// } -/// -/// assert_eq!(parser("21cZ"), Ok(("cZ", "21"))); -/// assert_eq!(parser("Z21c"), Ok(("Z21c", ""))); -/// assert_eq!(parser(""), Ok(("", ""))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::oct_digit0; -/// assert_eq!(oct_digit0::<_, Error<_>>(Partial::new("21cZ")), Ok((Partial::new("cZ"), "21"))); -/// assert_eq!(oct_digit0::<_, Error<_>>(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); -/// assert_eq!(oct_digit0::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn oct_digit0<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("oct_digit0", move |input: I| { - take_while0(|c: <I as Stream>::Token| c.is_oct_digit()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes one or more octal characters: 0-7 -/// -/// *Complete version*: Will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non octal digit character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non octal digit character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::oct_digit1; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// oct_digit1(input) -/// } -/// -/// assert_eq!(parser("21cZ"), Ok(("cZ", "21"))); -/// assert_eq!(parser("H2"), Err(ErrMode::Backtrack(Error::new("H2", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::oct_digit1; -/// assert_eq!(oct_digit1::<_, Error<_>>(Partial::new("21cZ")), Ok((Partial::new("cZ"), "21"))); -/// assert_eq!(oct_digit1::<_, Error<_>>(Partial::new("H2")), Err(ErrMode::Backtrack(Error::new(Partial::new("H2"), ErrorKind::Slice)))); -/// assert_eq!(oct_digit1::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn oct_digit1<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("oct_digit0", move |input: I| { - take_while1(|c: <I as Stream>::Token| c.is_oct_digit()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes zero or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z -/// -/// *Complete version*: Will return the whole input if no terminating token is found (a non -/// alphanumerical character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non alphanumerical character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::character::alphanumeric0; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// alphanumeric0(input) -/// } -/// -/// assert_eq!(parser("21cZ%1"), Ok(("%1", "21cZ"))); -/// assert_eq!(parser("&Z21c"), Ok(("&Z21c", ""))); -/// assert_eq!(parser(""), Ok(("", ""))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::alphanumeric0; -/// assert_eq!(alphanumeric0::<_, Error<_>>(Partial::new("21cZ%1")), Ok((Partial::new("%1"), "21cZ"))); -/// assert_eq!(alphanumeric0::<_, Error<_>>(Partial::new("&Z21c")), Ok((Partial::new("&Z21c"), ""))); -/// assert_eq!(alphanumeric0::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn alphanumeric0<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("alphanumeric0", move |input: I| { - take_while0(|c: <I as Stream>::Token| c.is_alphanum()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes one or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z -/// -/// *Complete version*: Will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non alphanumerical character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non alphanumerical character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::alphanumeric1; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// alphanumeric1(input) -/// } -/// -/// assert_eq!(parser("21cZ%1"), Ok(("%1", "21cZ"))); -/// assert_eq!(parser("&H2"), Err(ErrMode::Backtrack(Error::new("&H2", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::alphanumeric1; -/// assert_eq!(alphanumeric1::<_, Error<_>>(Partial::new("21cZ%1")), Ok((Partial::new("%1"), "21cZ"))); -/// assert_eq!(alphanumeric1::<_, Error<_>>(Partial::new("&H2")), Err(ErrMode::Backtrack(Error::new(Partial::new("&H2"), ErrorKind::Slice)))); -/// assert_eq!(alphanumeric1::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn alphanumeric1<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar, -{ - trace("alphanumeric1", move |input: I| { - take_while1(|c: <I as Stream>::Token| c.is_alphanum()).parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes zero or more spaces and tabs. -/// -/// *Complete version*: Will return the whole input if no terminating token is found (a non space -/// character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non space character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::space0; -/// assert_eq!(space0::<_, Error<_>>(Partial::new(" \t21c")), Ok((Partial::new("21c"), " \t"))); -/// assert_eq!(space0::<_, Error<_>>(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); -/// assert_eq!(space0::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn space0<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, -{ - trace("space0", move |input: I| { - take_while0(|c: <I as Stream>::Token| { - let ch = c.as_char(); - matches!(ch, ' ' | '\t') - }) - .parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes one or more spaces and tabs. -/// -/// *Complete version*: Will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non space character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non space character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::space1; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// space1(input) -/// } -/// -/// assert_eq!(parser(" \t21c"), Ok(("21c", " \t"))); -/// assert_eq!(parser("H2"), Err(ErrMode::Backtrack(Error::new("H2", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::space1; -/// assert_eq!(space1::<_, Error<_>>(Partial::new(" \t21c")), Ok((Partial::new("21c"), " \t"))); -/// assert_eq!(space1::<_, Error<_>>(Partial::new("H2")), Err(ErrMode::Backtrack(Error::new(Partial::new("H2"), ErrorKind::Slice)))); -/// assert_eq!(space1::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn space1<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, -{ - trace("space1", move |input: I| { - take_while1(|c: <I as Stream>::Token| { - let ch = c.as_char(); - matches!(ch, ' ' | '\t') - }) - .parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes zero or more spaces, tabs, carriage returns and line feeds. -/// -/// *Complete version*: will return the whole input if no terminating token is found (a non space -/// character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non space character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::character::multispace0; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// multispace0(input) -/// } -/// -/// assert_eq!(parser(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); -/// assert_eq!(parser("Z21c"), Ok(("Z21c", ""))); -/// assert_eq!(parser(""), Ok(("", ""))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::multispace0; -/// assert_eq!(multispace0::<_, Error<_>>(Partial::new(" \t\n\r21c")), Ok((Partial::new("21c"), " \t\n\r"))); -/// assert_eq!(multispace0::<_, Error<_>>(Partial::new("Z21c")), Ok((Partial::new("Z21c"), ""))); -/// assert_eq!(multispace0::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn multispace0<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, -{ - trace("multispace0", move |input: I| { - take_while0(|c: <I as Stream>::Token| { - let ch = c.as_char(); - matches!(ch, ' ' | '\t' | '\r' | '\n') - }) - .parse_next(input) - }) - .parse_next(input) -} - -/// Recognizes one or more spaces, tabs, carriage returns and line feeds. -/// -/// *Complete version*: will return an error if there's not enough input data, -/// or the whole input if no terminating token is found (a non space character). -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data, -/// or if no terminating token is found (a non space character). -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult, error::Needed}; -/// # use winnow::character::multispace1; -/// fn parser(input: &str) -> IResult<&str, &str> { -/// multispace1(input) -/// } -/// -/// assert_eq!(parser(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); -/// assert_eq!(parser("H2"), Err(ErrMode::Backtrack(Error::new("H2", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// ``` -/// -/// ``` -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, error::Needed}; -/// # use winnow::Partial; -/// # use winnow::character::multispace1; -/// assert_eq!(multispace1::<_, Error<_>>(Partial::new(" \t\n\r21c")), Ok((Partial::new("21c"), " \t\n\r"))); -/// assert_eq!(multispace1::<_, Error<_>>(Partial::new("H2")), Err(ErrMode::Backtrack(Error::new(Partial::new("H2"), ErrorKind::Slice)))); -/// assert_eq!(multispace1::<_, Error<_>>(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// ``` -#[inline(always)] -pub fn multispace1<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, -{ - trace("multispace1", move |input: I| { - take_while1(|c: <I as Stream>::Token| { - let ch = c.as_char(); - matches!(ch, ' ' | '\t' | '\r' | '\n') - }) - .parse_next(input) - }) - .parse_next(input) -} - -/// Decode a decimal unsigned integer -/// -/// *Complete version*: can parse until the end of input. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -#[doc(alias = "u8")] -#[doc(alias = "u16")] -#[doc(alias = "u32")] -#[doc(alias = "u64")] -#[doc(alias = "u128")] -pub fn dec_uint<I, O, E: ParseError<I>>(input: I) -> IResult<I, O, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, - O: Uint, -{ - trace("dec_uint", move |input: I| { - if input.eof_offset() == 0 { - if input.is_partial() { - return Err(ErrMode::Incomplete(Needed::new(1))); - } else { - return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); - } - } - - let mut value = O::default(); - for (offset, c) in input.iter_offsets() { - match c.as_char().to_digit(10) { - Some(d) => match value.checked_mul(10, sealed::SealedMarker).and_then(|v| { - let d = d as u8; - v.checked_add(d, sealed::SealedMarker) - }) { - None => return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)), - Some(v) => value = v, - }, - None => { - if offset == 0 { - return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); - } else { - return Ok((input.next_slice(offset).0, value)); - } - } - } - } - - if input.is_partial() { - Err(ErrMode::Incomplete(Needed::new(1))) - } else { - Ok((input.next_slice(input.eof_offset()).0, value)) - } - }) - .parse_next(input) -} - -/// Metadata for parsing unsigned integers, see [`dec_uint`] -pub trait Uint: Default { - #[doc(hidden)] - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self>; - #[doc(hidden)] - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self>; -} - -impl Uint for u8 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for u16 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for u32 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for u64 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for u128 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for i8 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for i16 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for i32 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for i64 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -impl Uint for i128 { - fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_mul(by as Self) - } - fn checked_add(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_add(by as Self) - } -} - -/// Decode a decimal signed integer -/// -/// *Complete version*: can parse until the end of input. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. -#[doc(alias = "i8")] -#[doc(alias = "i16")] -#[doc(alias = "i32")] -#[doc(alias = "i64")] -#[doc(alias = "i128")] -pub fn dec_int<I, O, E: ParseError<I>>(input: I) -> IResult<I, O, E> -where - I: StreamIsPartial, - I: Stream, - <I as Stream>::Token: AsChar + Copy, - O: Int, -{ - trace("dec_int", move |input: I| { - fn sign(token: impl AsChar) -> bool { - let token = token.as_char(); - token == '+' || token == '-' - } - let (input, sign) = opt(crate::bytes::one_of(sign).map(AsChar::as_char)) - .map(|c| c != Some('-')) - .parse_next(input)?; - - if input.eof_offset() == 0 { - if input.is_partial() { - return Err(ErrMode::Incomplete(Needed::new(1))); - } else { - return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); - } - } - - let mut value = O::default(); - for (offset, c) in input.iter_offsets() { - match c.as_char().to_digit(10) { - Some(d) => match value.checked_mul(10, sealed::SealedMarker).and_then(|v| { - let d = d as u8; - if sign { - v.checked_add(d, sealed::SealedMarker) - } else { - v.checked_sub(d, sealed::SealedMarker) - } - }) { - None => return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)), - Some(v) => value = v, - }, - None => { - if offset == 0 { - return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); - } else { - return Ok((input.next_slice(offset).0, value)); - } - } - } - } - - if input.is_partial() { - Err(ErrMode::Incomplete(Needed::new(1))) - } else { - Ok((input.next_slice(input.eof_offset()).0, value)) - } - }) - .parse_next(input) -} - -/// Metadata for parsing signed integers, see [`dec_int`] -pub trait Int: Uint { - #[doc(hidden)] - fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self>; -} - -impl Int for i8 { - fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_sub(by as Self) - } -} - -impl Int for i16 { - fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_sub(by as Self) - } -} - -impl Int for i32 { - fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_sub(by as Self) - } -} - -impl Int for i64 { - fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_sub(by as Self) - } -} - -impl Int for i128 { - fn checked_sub(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { - self.checked_sub(by as Self) - } -} - -/// Decode a variable-width hexadecimal integer. -/// -/// *Complete version*: Will parse until the end of input if it has fewer characters than the type -/// supports. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if end-of-input -/// is hit before a hard boundary (non-hex character, more characters than supported). -/// -/// # Example -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// use winnow::character::hex_uint; -/// -/// fn parser(s: &[u8]) -> IResult<&[u8], u32> { -/// hex_uint(s) -/// } -/// -/// assert_eq!(parser(&b"01AE"[..]), Ok((&b""[..], 0x01AE))); -/// assert_eq!(parser(&b"abc"[..]), Ok((&b""[..], 0x0ABC))); -/// assert_eq!(parser(&b"ggg"[..]), Err(ErrMode::Backtrack(Error::new(&b"ggg"[..], ErrorKind::Slice)))); -/// ``` -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::Partial; -/// use winnow::character::hex_uint; -/// -/// fn parser(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { -/// hex_uint(s) -/// } -/// -/// assert_eq!(parser(Partial::new(&b"01AE;"[..])), Ok((Partial::new(&b";"[..]), 0x01AE))); -/// assert_eq!(parser(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(parser(Partial::new(&b"ggg"[..])), Err(ErrMode::Backtrack(Error::new(Partial::new(&b"ggg"[..]), ErrorKind::Slice)))); -/// ``` -#[inline] -pub fn hex_uint<I, O, E: ParseError<I>>(input: I) -> IResult<I, O, E> -where - I: StreamIsPartial, - I: Stream, - O: HexUint, - <I as Stream>::Token: AsChar, - <I as Stream>::Slice: AsBStr, -{ - trace("hex_uint", move |input: I| { - let invalid_offset = input - .offset_for(|c| { - let c = c.as_char(); - !"0123456789abcdefABCDEF".contains(c) - }) - .unwrap_or_else(|| input.eof_offset()); - let max_nibbles = O::max_nibbles(sealed::SealedMarker); - let max_offset = input.offset_at(max_nibbles); - let offset = match max_offset { - Ok(max_offset) => { - if max_offset < invalid_offset { - // Overflow - return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); - } else { - invalid_offset - } - } - Err(_) => { - if input.is_partial() && invalid_offset == input.eof_offset() { - // Only the next byte is guaranteed required - return Err(ErrMode::Incomplete(Needed::new(1))); - } else { - invalid_offset - } - } - }; - if offset == 0 { - // Must be at least one digit - return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); - } - let (remaining, parsed) = input.next_slice(offset); - - let mut res = O::default(); - for c in parsed.as_bstr() { - let nibble = *c as char; - let nibble = nibble.to_digit(16).unwrap_or(0) as u8; - let nibble = O::from(nibble); - res = (res << O::from(4)) + nibble; - } - - Ok((remaining, res)) - }) - .parse_next(input) -} - -/// Metadata for parsing hex numbers, see [`hex_uint`] -pub trait HexUint: - Default + Shl<Self, Output = Self> + Add<Self, Output = Self> + From<u8> -{ - #[doc(hidden)] - fn max_nibbles(_: sealed::SealedMarker) -> usize; -} - -impl HexUint for u8 { - #[inline(always)] - fn max_nibbles(_: sealed::SealedMarker) -> usize { - 2 - } -} - -impl HexUint for u16 { - #[inline(always)] - fn max_nibbles(_: sealed::SealedMarker) -> usize { - 4 - } -} - -impl HexUint for u32 { - #[inline(always)] - fn max_nibbles(_: sealed::SealedMarker) -> usize { - 8 - } -} - -impl HexUint for u64 { - #[inline(always)] - fn max_nibbles(_: sealed::SealedMarker) -> usize { - 16 - } -} - -impl HexUint for u128 { - #[inline(always)] - fn max_nibbles(_: sealed::SealedMarker) -> usize { - 32 - } -} - -/// Recognizes floating point number in text format and returns a f32 or f64. -/// -/// *Complete version*: Can parse until the end of input. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there is not enough data. -/// -/// # Example -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::error::Needed::Size; -/// use winnow::character::float; -/// -/// fn parser(s: &str) -> IResult<&str, f64> { -/// float(s) -/// } -/// -/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); -/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); -/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); -/// assert_eq!(parser("abc"), Err(ErrMode::Backtrack(Error::new("abc", ErrorKind::Tag)))); -/// ``` -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::error::Needed::Size; -/// # use winnow::Partial; -/// use winnow::character::float; -/// -/// fn parser(s: Partial<&str>) -> IResult<Partial<&str>, f64> { -/// float(s) -/// } -/// -/// assert_eq!(parser(Partial::new("11e-1 ")), Ok((Partial::new(" "), 1.1))); -/// assert_eq!(parser(Partial::new("11e-1")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(parser(Partial::new("123E-02")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(parser(Partial::new("123K-01")), Ok((Partial::new("K-01"), 123.0))); -/// assert_eq!(parser(Partial::new("abc")), Err(ErrMode::Backtrack(Error::new(Partial::new("abc"), ErrorKind::Tag)))); -/// ``` -#[inline(always)] -#[doc(alias = "f32")] -#[doc(alias = "double")] -pub fn float<I, O, E: ParseError<I>>(input: I) -> IResult<I, O, E> -where - I: StreamIsPartial, - I: Stream, - I: Offset + Compare<&'static str>, - <I as Stream>::Slice: ParseSlice<O>, - <I as Stream>::Token: AsChar + Copy, - <I as Stream>::IterOffsets: Clone, - I: AsBStr, - &'static str: ContainsToken<<I as Stream>::Token>, -{ - trace("float", move |input: I| { - let (i, s) = recognize_float_or_exceptions(input)?; - match s.parse_slice() { - Some(f) => Ok((i, f)), - None => Err(ErrMode::from_error_kind(i, ErrorKind::Verify)), - } - }) - .parse_next(input) -} - -fn recognize_float_or_exceptions<I, E: ParseError<I>>( - input: I, -) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - I: Offset + Compare<&'static str>, - <I as Stream>::Token: AsChar + Copy, - <I as Stream>::IterOffsets: Clone, - I: AsBStr, - &'static str: ContainsToken<<I as Stream>::Token>, -{ - alt(( - recognize_float, - crate::bytes::tag_no_case("nan"), - crate::bytes::tag_no_case("inf"), - crate::bytes::tag_no_case("infinity"), - )) - .parse_next(input) -} - -fn recognize_float<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - I: Offset + Compare<&'static str>, - <I as Stream>::Token: AsChar + Copy, - <I as Stream>::IterOffsets: Clone, - I: AsBStr, - &'static str: ContainsToken<<I as Stream>::Token>, -{ - ( - opt(one_of("+-")), - alt(( - (digit1, opt(('.', opt(digit1)))).map(|_| ()), - ('.', digit1).map(|_| ()), - )), - opt((one_of("eE"), opt(one_of("+-")), cut_err(digit1))), - ) - .recognize() - .parse_next(input) -} - -/// Matches a byte string with escaped characters. -/// -/// * The first argument matches the normal characters (it must not accept the control character) -/// * The second argument is the control character (like `\` in most languages) -/// * The third argument matches the escaped characters -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed, IResult}; -/// # use winnow::character::digit1; -/// # use winnow::prelude::*; -/// use winnow::character::escaped; -/// use winnow::bytes::one_of; -/// -/// fn esc(s: &str) -> IResult<&str, &str> { -/// escaped(digit1, '\\', one_of(r#""n\"#)).parse_next(s) -/// } -/// -/// assert_eq!(esc("123;"), Ok((";", "123"))); -/// assert_eq!(esc(r#"12\"34;"#), Ok((";", r#"12\"34"#))); -/// ``` -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed, IResult}; -/// # use winnow::character::digit1; -/// # use winnow::prelude::*; -/// # use winnow::Partial; -/// use winnow::character::escaped; -/// use winnow::bytes::one_of; -/// -/// fn esc(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// escaped(digit1, '\\', one_of("\"n\\")).parse_next(s) -/// } -/// -/// assert_eq!(esc(Partial::new("123;")), Ok((Partial::new(";"), "123"))); -/// assert_eq!(esc(Partial::new("12\\\"34;")), Ok((Partial::new(";"), "12\\\"34"))); -/// ``` -#[inline(always)] -pub fn escaped<'a, I: 'a, Error, F, G, O1, O2>( - mut normal: F, - control_char: char, - mut escapable: G, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream + Offset, - <I as Stream>::Token: crate::stream::AsChar, - F: Parser<I, O1, Error>, - G: Parser<I, O2, Error>, - Error: ParseError<I>, -{ - trace("escaped", move |input: I| { - if input.is_partial() { - streaming_escaped_internal(input, &mut normal, control_char, &mut escapable) - } else { - complete_escaped_internal(input, &mut normal, control_char, &mut escapable) - } - }) -} - -pub(crate) fn streaming_escaped_internal<I, Error, F, G, O1, O2>( - input: I, - normal: &mut F, - control_char: char, - escapable: &mut G, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + Offset, - <I as Stream>::Token: crate::stream::AsChar, - F: Parser<I, O1, Error>, - G: Parser<I, O2, Error>, - Error: ParseError<I>, -{ - let mut i = input.clone(); - - while i.eof_offset() > 0 { - let current_len = i.eof_offset(); - - match normal.parse_next(i.clone()) { - Ok((i2, _)) => { - if i2.eof_offset() == 0 { - return Err(ErrMode::Incomplete(Needed::Unknown)); - } else if i2.eof_offset() == current_len { - let offset = input.offset_to(&i2); - return Ok(input.next_slice(offset)); - } else { - i = i2; - } - } - Err(ErrMode::Backtrack(_)) => { - if i.next_token().expect("eof_offset > 0").1.as_char() == control_char { - let next = control_char.len_utf8(); - match escapable.parse_next(i.next_slice(next).0) { - Ok((i2, _)) => { - if i2.eof_offset() == 0 { - return Err(ErrMode::Incomplete(Needed::Unknown)); - } else { - i = i2; - } - } - Err(e) => return Err(e), - } - } else { - let offset = input.offset_to(&i); - return Ok(input.next_slice(offset)); - } - } - Err(e) => { - return Err(e); - } - } - } - - Err(ErrMode::Incomplete(Needed::Unknown)) -} - -pub(crate) fn complete_escaped_internal<'a, I: 'a, Error, F, G, O1, O2>( - input: I, - normal: &mut F, - control_char: char, - escapable: &mut G, -) -> IResult<I, <I as Stream>::Slice, Error> -where - I: Stream + Offset, - <I as Stream>::Token: crate::stream::AsChar, - F: Parser<I, O1, Error>, - G: Parser<I, O2, Error>, - Error: ParseError<I>, -{ - let mut i = input.clone(); - - while i.eof_offset() > 0 { - let current_len = i.eof_offset(); - - match normal.parse_next(i.clone()) { - Ok((i2, _)) => { - // return if we consumed everything or if the normal parser - // does not consume anything - if i2.eof_offset() == 0 { - return Ok(input.next_slice(input.eof_offset())); - } else if i2.eof_offset() == current_len { - let offset = input.offset_to(&i2); - return Ok(input.next_slice(offset)); - } else { - i = i2; - } - } - Err(ErrMode::Backtrack(_)) => { - if i.next_token().expect("eof_offset > 0").1.as_char() == control_char { - let next = control_char.len_utf8(); - match escapable.parse_next(i.next_slice(next).0) { - Ok((i2, _)) => { - if i2.eof_offset() == 0 { - return Ok(input.next_slice(input.eof_offset())); - } else { - i = i2; - } - } - Err(e) => return Err(e), - } - } else { - let offset = input.offset_to(&i); - return Ok(input.next_slice(offset)); - } - } - Err(e) => { - return Err(e); - } - } - } - - Ok(input.next_slice(input.eof_offset())) -} - -/// Matches a byte string with escaped characters. -/// -/// * The first argument matches the normal characters (it must not match the control character) -/// * The second argument is the control character (like `\` in most languages) -/// * The third argument matches the escaped characters and transforms them -/// -/// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character) -/// -/// # Example -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use std::str::from_utf8; -/// use winnow::bytes::tag; -/// use winnow::character::escaped_transform; -/// use winnow::character::alpha1; -/// use winnow::branch::alt; -/// -/// fn parser(input: &str) -> IResult<&str, String> { -/// escaped_transform( -/// alpha1, -/// '\\', -/// alt(( -/// "\\".value("\\"), -/// "\"".value("\""), -/// "n".value("\n"), -/// )) -/// ).parse_next(input) -/// } -/// -/// assert_eq!(parser("ab\\\"cd"), Ok(("", String::from("ab\"cd")))); -/// assert_eq!(parser("ab\\ncd"), Ok(("", String::from("ab\ncd")))); -/// ``` -/// -/// ``` -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use std::str::from_utf8; -/// # use winnow::Partial; -/// use winnow::bytes::tag; -/// use winnow::character::escaped_transform; -/// use winnow::character::alpha1; -/// use winnow::branch::alt; -/// -/// fn parser(input: Partial<&str>) -> IResult<Partial<&str>, String> { -/// escaped_transform( -/// alpha1, -/// '\\', -/// alt(( -/// "\\".value("\\"), -/// "\"".value("\""), -/// "n".value("\n"), -/// )) -/// ).parse_next(input) -/// } -/// -/// assert_eq!(parser(Partial::new("ab\\\"cd\"")), Ok((Partial::new("\""), String::from("ab\"cd")))); -/// ``` -#[cfg(feature = "alloc")] -#[inline(always)] -pub fn escaped_transform<I, Error, F, G, Output>( - mut normal: F, - control_char: char, - mut transform: G, -) -> impl Parser<I, Output, Error> -where - I: StreamIsPartial, - I: Stream + Offset, - <I as Stream>::Token: crate::stream::AsChar, - Output: crate::stream::Accumulate<<I as Stream>::Slice>, - F: Parser<I, <I as Stream>::Slice, Error>, - G: Parser<I, <I as Stream>::Slice, Error>, - Error: ParseError<I>, -{ - trace("escaped_transform", move |input: I| { - if input.is_partial() { - streaming_escaped_transform_internal(input, &mut normal, control_char, &mut transform) - } else { - complete_escaped_transform_internal(input, &mut normal, control_char, &mut transform) - } - }) -} - -#[cfg(feature = "alloc")] -pub(crate) fn streaming_escaped_transform_internal<I, Error, F, G, Output>( - input: I, - normal: &mut F, - control_char: char, - transform: &mut G, -) -> IResult<I, Output, Error> -where - I: Stream + Offset, - <I as Stream>::Token: crate::stream::AsChar, - Output: crate::stream::Accumulate<<I as Stream>::Slice>, - F: Parser<I, <I as Stream>::Slice, Error>, - G: Parser<I, <I as Stream>::Slice, Error>, - Error: ParseError<I>, -{ - let mut offset = 0; - let mut res = Output::initial(Some(input.eof_offset())); - - let i = input.clone(); - - while offset < i.eof_offset() { - let current_len = i.eof_offset(); - let remainder = i.next_slice(offset).0; - match normal.parse_next(remainder.clone()) { - Ok((i2, o)) => { - res.accumulate(o); - if i2.eof_offset() == 0 { - return Err(ErrMode::Incomplete(Needed::Unknown)); - } else if i2.eof_offset() == current_len { - return Ok((remainder, res)); - } else { - offset = input.offset_to(&i2); - } - } - Err(ErrMode::Backtrack(_)) => { - if remainder.next_token().expect("eof_offset > 0").1.as_char() == control_char { - let next = offset + control_char.len_utf8(); - match transform.parse_next(i.next_slice(next).0) { - Ok((i2, o)) => { - res.accumulate(o); - if i2.eof_offset() == 0 { - return Err(ErrMode::Incomplete(Needed::Unknown)); - } else { - offset = input.offset_to(&i2); - } - } - Err(e) => return Err(e), - } - } else { - return Ok((remainder, res)); - } - } - Err(e) => return Err(e), - } - } - Err(ErrMode::Incomplete(Needed::Unknown)) -} - -#[cfg(feature = "alloc")] -pub(crate) fn complete_escaped_transform_internal<I, Error, F, G, Output>( - input: I, - normal: &mut F, - control_char: char, - transform: &mut G, -) -> IResult<I, Output, Error> -where - I: Stream + Offset, - <I as Stream>::Token: crate::stream::AsChar, - Output: crate::stream::Accumulate<<I as Stream>::Slice>, - F: Parser<I, <I as Stream>::Slice, Error>, - G: Parser<I, <I as Stream>::Slice, Error>, - Error: ParseError<I>, -{ - let mut offset = 0; - let mut res = Output::initial(Some(input.eof_offset())); - - let i = input.clone(); - - while offset < i.eof_offset() { - let current_len = i.eof_offset(); - let (remainder, _) = i.next_slice(offset); - match normal.parse_next(remainder.clone()) { - Ok((i2, o)) => { - res.accumulate(o); - if i2.eof_offset() == 0 { - return Ok((i.next_slice(i.eof_offset()).0, res)); - } else if i2.eof_offset() == current_len { - return Ok((remainder, res)); - } else { - offset = input.offset_to(&i2); - } - } - Err(ErrMode::Backtrack(_)) => { - if remainder.next_token().expect("eof_offset > 0").1.as_char() == control_char { - let next = offset + control_char.len_utf8(); - match transform.parse_next(i.next_slice(next).0) { - Ok((i2, o)) => { - res.accumulate(o); - if i2.eof_offset() == 0 { - return Ok((i.next_slice(i.eof_offset()).0, res)); - } else { - offset = input.offset_to(&i2); - } - } - Err(e) => return Err(e), - } - } else { - return Ok((remainder, res)); - } - } - Err(e) => return Err(e), - } - } - Ok((input.next_slice(offset).0, res)) -} - -mod sealed { - pub struct SealedMarker; -} diff --git a/vendor/winnow/src/branch/mod.rs b/vendor/winnow/src/combinator/branch.rs similarity index 54% rename from vendor/winnow/src/branch/mod.rs rename to vendor/winnow/src/combinator/branch.rs index 6a2c9bc..c5eac99 100644 --- a/vendor/winnow/src/branch/mod.rs +++ b/vendor/winnow/src/combinator/branch.rs @@ -1,14 +1,7 @@ -//! Choice combinators - -#[cfg(test)] -mod tests; - -use crate::error::ErrMode; -use crate::error::ErrorKind; -use crate::error::ParseError; +use crate::error::{ErrMode, ErrorKind, ParserError}; use crate::stream::Stream; use crate::trace::trace; -use crate::{IResult, Parser}; +use crate::*; #[doc(inline)] pub use crate::dispatch; @@ -18,7 +11,7 @@ pub use crate::dispatch; /// This trait is implemented for tuples of up to 21 elements pub trait Alt<I, O, E> { /// Tests each parser in the tuple and returns the result of the first one that succeeds - fn choice(&mut self, input: I) -> IResult<I, O, E>; + fn choice(&mut self, input: &mut I) -> PResult<O, E>; } /// Pick the first successful parser @@ -32,13 +25,13 @@ pub trait Alt<I, O, E> { /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::Error,error::ErrorKind, error::Needed}; +/// # use winnow::{error::ErrMode, error::InputError,error::ErrorKind, error::Needed}; /// # use winnow::prelude::*; -/// use winnow::character::{alpha1, digit1}; -/// use winnow::branch::alt; +/// use winnow::ascii::{alpha1, digit1}; +/// use winnow::combinator::alt; /// # fn main() { /// fn parser(input: &str) -> IResult<&str, &str> { -/// alt((alpha1, digit1)).parse_next(input) +/// alt((alpha1, digit1)).parse_peek(input) /// }; /// /// // the first parser, alpha1, recognizes the input @@ -48,14 +41,14 @@ pub trait Alt<I, O, E> { /// assert_eq!(parser("123456"), Ok(("", "123456"))); /// /// // both parsers failed, and with the default error type, alt will return the last error -/// assert_eq!(parser(" "), Err(ErrMode::Backtrack(Error::new(" ", ErrorKind::Slice)))); +/// assert_eq!(parser(" "), Err(ErrMode::Backtrack(InputError::new(" ", ErrorKind::Slice)))); /// # } /// ``` #[doc(alias = "choice")] -pub fn alt<I: Stream, O, E: ParseError<I>, List: Alt<I, O, E>>( +pub fn alt<I: Stream, O, E: ParserError<I>, List: Alt<I, O, E>>( mut l: List, ) -> impl Parser<I, O, E> { - trace("alt", move |i: I| l.choice(i)) + trace("alt", move |i: &mut I| l.choice(i)) } /// Helper trait for the [permutation()] combinator. @@ -63,7 +56,7 @@ pub fn alt<I: Stream, O, E: ParseError<I>, List: Alt<I, O, E>>( /// This trait is implemented for tuples of up to 21 elements pub trait Permutation<I, O, E> { /// Tries to apply all parsers in the tuple in various orders until all of them succeed - fn permutation(&mut self, input: I) -> IResult<I, O, E>; + fn permutation(&mut self, input: &mut I) -> PResult<O, E>; } /// Applies a list of parsers in any order. @@ -73,13 +66,13 @@ pub trait Permutation<I, O, E> { /// tuple of the parser results. /// /// ```rust -/// # use winnow::{error::ErrMode,error::{Error, ErrorKind}, error::Needed}; +/// # use winnow::{error::ErrMode,error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; -/// use winnow::character::{alpha1, digit1}; -/// use winnow::branch::permutation; +/// use winnow::ascii::{alpha1, digit1}; +/// use winnow::combinator::permutation; /// # fn main() { /// fn parser(input: &str) -> IResult<&str, (&str, &str)> { -/// permutation((alpha1, digit1)).parse_next(input) +/// permutation((alpha1, digit1)).parse_peek(input) /// } /// /// // permutation recognizes alphabetic characters then digit @@ -89,20 +82,20 @@ pub trait Permutation<I, O, E> { /// assert_eq!(parser("123abc"), Ok(("", ("abc", "123")))); /// /// // it will fail if one of the parsers failed -/// assert_eq!(parser("abc;"), Err(ErrMode::Backtrack(Error::new(";", ErrorKind::Slice)))); +/// assert_eq!(parser("abc;"), Err(ErrMode::Backtrack(InputError::new(";", ErrorKind::Slice)))); /// # } /// ``` /// /// The parsers are applied greedily: if there are multiple unapplied parsers /// that could parse the next slice of input, the first one is used. /// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}}; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; /// # use winnow::prelude::*; -/// use winnow::branch::permutation; -/// use winnow::bytes::any; +/// use winnow::combinator::permutation; +/// use winnow::token::any; /// /// fn parser(input: &str) -> IResult<&str, (char, char)> { -/// permutation((any, 'a')).parse_next(input) +/// permutation((any, 'a')).parse_peek(input) /// } /// /// // any parses 'b', then char('a') parses 'a' @@ -110,13 +103,38 @@ pub trait Permutation<I, O, E> { /// /// // any parses 'a', then char('a') fails on 'b', /// // even though char('a') followed by any would succeed -/// assert_eq!(parser("ab"), Err(ErrMode::Backtrack(Error::new("b", ErrorKind::Verify)))); +/// assert_eq!(parser("ab"), Err(ErrMode::Backtrack(InputError::new("b", ErrorKind::Verify)))); /// ``` /// -pub fn permutation<I: Stream, O, E: ParseError<I>, List: Permutation<I, O, E>>( +pub fn permutation<I: Stream, O, E: ParserError<I>, List: Permutation<I, O, E>>( mut l: List, ) -> impl Parser<I, O, E> { - trace("permutation", move |i: I| l.permutation(i)) + trace("permutation", move |i: &mut I| l.permutation(i)) +} + +impl<const N: usize, I: Stream, O, E: ParserError<I>, P: Parser<I, O, E>> Alt<I, O, E> for [P; N] { + fn choice(&mut self, input: &mut I) -> PResult<O, E> { + let mut error: Option<E> = None; + + let start = input.checkpoint(); + for branch in self { + input.reset(start.clone()); + match branch.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + error = match error { + Some(error) => Some(error.or(e)), + None => Some(e), + }; + } + res => return res, + } + } + + match error { + Some(e) => Err(ErrMode::Backtrack(e.append(input, ErrorKind::Alt))), + None => Err(ErrMode::assert(input, "`alt` needs at least one parser")), + } + } } macro_rules! alt_trait( @@ -137,13 +155,14 @@ macro_rules! alt_trait( macro_rules! alt_trait_impl( ($($id:ident)+) => ( impl< - I: Clone, Output, Error: ParseError<I>, + I: Stream, Output, Error: ParserError<I>, $($id: Parser<I, Output, Error>),+ > Alt<I, Output, Error> for ( $($id),+ ) { - fn choice(&mut self, input: I) -> IResult<I, Output, Error> { - match self.0.parse_next(input.clone()) { - Err(ErrMode::Backtrack(e)) => alt_trait_inner!(1, self, input, e, $($id)+), + fn choice(&mut self, input: &mut I) -> PResult<Output, Error> { + let start = input.checkpoint(); + match self.0.parse_next(input) { + Err(ErrMode::Backtrack(e)) => alt_trait_inner!(1, self, input, start, e, $($id)+), res => res, } } @@ -151,26 +170,51 @@ macro_rules! alt_trait_impl( ); ); +macro_rules! succ ( + (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); + (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); + (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); + (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); + (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); + (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); + (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); + (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); + (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); + (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); + (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); + (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); + (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); + (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); + (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); + (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); + (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); + (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); + (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); + (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); + (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); +); + macro_rules! alt_trait_inner( - ($it:tt, $self:expr, $input:expr, $err:expr, $head:ident $($id:ident)+) => ( - match $self.$it.parse_next($input.clone()) { + ($it:tt, $self:expr, $input:expr, $start:ident, $err:expr, $head:ident $($id:ident)+) => ({ + $input.reset($start.clone()); + match $self.$it.parse_next($input) { Err(ErrMode::Backtrack(e)) => { let err = $err.or(e); - succ!($it, alt_trait_inner!($self, $input, err, $($id)+)) + succ!($it, alt_trait_inner!($self, $input, $start, err, $($id)+)) } res => res, } - ); - ($it:tt, $self:expr, $input:expr, $err:expr, $head:ident) => ( + }); + ($it:tt, $self:expr, $input:expr, $start:ident, $err:expr, $head:ident) => ({ Err(ErrMode::Backtrack($err.append($input, ErrorKind::Alt))) - ); + }); ); alt_trait!(Alt2 Alt3 Alt4 Alt5 Alt6 Alt7 Alt8 Alt9 Alt10 Alt11 Alt12 Alt13 Alt14 Alt15 Alt16 Alt17 Alt18 Alt19 Alt20 Alt21 Alt22); // Manually implement Alt for (A,), the 1-tuple type -impl<I, O, E: ParseError<I>, A: Parser<I, O, E>> Alt<I, O, E> for (A,) { - fn choice(&mut self, input: I) -> IResult<I, O, E> { +impl<I, O, E: ParserError<I>, A: Parser<I, O, E>> Alt<I, O, E> for (A,) { + fn choice(&mut self, input: &mut I) -> PResult<O, E> { self.0.parse_next(input) } } @@ -198,27 +242,29 @@ macro_rules! permutation_trait( macro_rules! permutation_trait_impl( ($($name:ident $ty:ident $item:ident),+) => ( impl< - I: Clone, $($ty),+ , Error: ParseError<I>, + I: Stream, $($ty),+ , Error: ParserError<I>, $($name: Parser<I, $ty, Error>),+ > Permutation<I, ( $($ty),+ ), Error> for ( $($name),+ ) { - fn permutation(&mut self, mut input: I) -> IResult<I, ( $($ty),+ ), Error> { + fn permutation(&mut self, input: &mut I) -> PResult<( $($ty),+ ), Error> { let mut res = ($(Option::<$ty>::None),+); loop { let mut err: Option<Error> = None; - permutation_trait_inner!(0, self, input, res, err, $($name)+); + let start = input.checkpoint(); + permutation_trait_inner!(0, self, input, start, res, err, $($name)+); // If we reach here, every iterator has either been applied before, // or errored on the remaining input if let Some(err) = err { // There are remaining parsers, and all errored on the remaining input + input.reset(start.clone()); return Err(ErrMode::Backtrack(err.append(input, ErrorKind::Alt))); } // All parsers were applied match res { - ($(Some($item)),+) => return Ok((input, ($($item),+))), + ($(Some($item)),+) => return Ok(($($item),+)), _ => unreachable!(), } } @@ -228,11 +274,11 @@ macro_rules! permutation_trait_impl( ); macro_rules! permutation_trait_inner( - ($it:tt, $self:expr, $input:ident, $res:expr, $err:expr, $head:ident $($id:ident)*) => ( + ($it:tt, $self:expr, $input:ident, $start:ident, $res:expr, $err:expr, $head:ident $($id:ident)*) => ( if $res.$it.is_none() { - match $self.$it.parse_next($input.clone()) { - Ok((i, o)) => { - $input = i; + $input.reset($start.clone()); + match $self.$it.parse_next($input) { + Ok(o) => { $res.$it = Some(o); continue; } @@ -245,9 +291,9 @@ macro_rules! permutation_trait_inner( Err(e) => return Err(e), }; } - succ!($it, permutation_trait_inner!($self, $input, $res, $err, $($id)*)); + succ!($it, permutation_trait_inner!($self, $input, $start, $res, $err, $($id)*)); ); - ($it:tt, $self:expr, $input:ident, $res:expr, $err:expr,) => (); + ($it:tt, $self:expr, $input:ident, $start:ident, $res:expr, $err:expr,) => (); ); permutation_trait!( diff --git a/vendor/winnow/src/combinator/core.rs b/vendor/winnow/src/combinator/core.rs new file mode 100644 index 0000000..2a92d64 --- /dev/null +++ b/vendor/winnow/src/combinator/core.rs @@ -0,0 +1,491 @@ +use crate::error::{ErrMode, ErrorKind, Needed, ParserError}; +use crate::stream::Stream; +use crate::trace::trace; +use crate::*; + +/// Return the remaining input. +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::error::ErrorKind; +/// # use winnow::error::InputError; +/// use winnow::combinator::rest; +/// assert_eq!(rest::<_,InputError<_>>.parse_peek("abc"), Ok(("", "abc"))); +/// assert_eq!(rest::<_,InputError<_>>.parse_peek(""), Ok(("", ""))); +/// ``` +#[inline] +pub fn rest<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: Stream, +{ + trace("rest", move |input: &mut I| Ok(input.finish())).parse_next(input) +} + +/// Return the length of the remaining input. +/// +/// Note: this does not advance the [`Stream`] +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::error::ErrorKind; +/// # use winnow::error::InputError; +/// use winnow::combinator::rest_len; +/// assert_eq!(rest_len::<_,InputError<_>>.parse_peek("abc"), Ok(("abc", 3))); +/// assert_eq!(rest_len::<_,InputError<_>>.parse_peek(""), Ok(("", 0))); +/// ``` +#[inline] +pub fn rest_len<I, E: ParserError<I>>(input: &mut I) -> PResult<usize, E> +where + I: Stream, +{ + trace("rest_len", move |input: &mut I| { + let len = input.eof_offset(); + Ok(len) + }) + .parse_next(input) +} + +/// Apply a [`Parser`], producing `None` on [`ErrMode::Backtrack`]. +/// +/// To chain an error up, see [`cut_err`]. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::prelude::*; +/// use winnow::combinator::opt; +/// use winnow::ascii::alpha1; +/// # fn main() { +/// +/// fn parser(i: &str) -> IResult<&str, Option<&str>> { +/// opt(alpha1).parse_peek(i) +/// } +/// +/// assert_eq!(parser("abcd;"), Ok((";", Some("abcd")))); +/// assert_eq!(parser("123;"), Ok(("123;", None))); +/// # } +/// ``` +pub fn opt<I: Stream, O, E: ParserError<I>, F>(mut f: F) -> impl Parser<I, Option<O>, E> +where + F: Parser<I, O, E>, +{ + trace("opt", move |input: &mut I| { + let start = input.checkpoint(); + match f.parse_next(input) { + Ok(o) => Ok(Some(o)), + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + Ok(None) + } + Err(e) => Err(e), + } + }) +} + +/// Calls the parser if the condition is met. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, IResult}; +/// # use winnow::prelude::*; +/// use winnow::combinator::cond; +/// use winnow::ascii::alpha1; +/// # fn main() { +/// +/// fn parser(b: bool, i: &str) -> IResult<&str, Option<&str>> { +/// cond(b, alpha1).parse_peek(i) +/// } +/// +/// assert_eq!(parser(true, "abcd;"), Ok((";", Some("abcd")))); +/// assert_eq!(parser(false, "abcd;"), Ok(("abcd;", None))); +/// assert_eq!(parser(true, "123;"), Err(ErrMode::Backtrack(InputError::new("123;", ErrorKind::Slice)))); +/// assert_eq!(parser(false, "123;"), Ok(("123;", None))); +/// # } +/// ``` +pub fn cond<I, O, E: ParserError<I>, F>(b: bool, mut f: F) -> impl Parser<I, Option<O>, E> +where + I: Stream, + F: Parser<I, O, E>, +{ + trace("cond", move |input: &mut I| { + if b { + f.parse_next(input).map(Some) + } else { + Ok(None) + } + }) +} + +/// Tries to apply its parser without consuming the input. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, IResult}; +/// # use winnow::prelude::*; +/// use winnow::combinator::peek; +/// use winnow::ascii::alpha1; +/// # fn main() { +/// +/// let mut parser = peek(alpha1); +/// +/// assert_eq!(parser.parse_peek("abcd;"), Ok(("abcd;", "abcd"))); +/// assert_eq!(parser.parse_peek("123;"), Err(ErrMode::Backtrack(InputError::new("123;", ErrorKind::Slice)))); +/// # } +/// ``` +#[doc(alias = "look_ahead")] +#[doc(alias = "rewind")] +pub fn peek<I: Stream, O, E: ParserError<I>, F>(mut f: F) -> impl Parser<I, O, E> +where + F: Parser<I, O, E>, +{ + trace("peek", move |input: &mut I| { + let start = input.checkpoint(); + let res = f.parse_next(input); + input.reset(start); + res + }) +} + +/// Match the end of the [`Stream`] +/// +/// Otherwise, it will error. +/// +/// # Example +/// +/// ```rust +/// # use std::str; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::combinator::eof; +/// # use winnow::prelude::*; +/// +/// let mut parser = eof; +/// assert_eq!(parser.parse_peek("abc"), Err(ErrMode::Backtrack(InputError::new("abc", ErrorKind::Eof)))); +/// assert_eq!(parser.parse_peek(""), Ok(("", ""))); +/// ``` +#[doc(alias = "end")] +#[doc(alias = "eoi")] +pub fn eof<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: Stream, +{ + trace("eof", move |input: &mut I| { + if input.eof_offset() == 0 { + Ok(input.next_slice(0)) + } else { + Err(ErrMode::from_error_kind(input, ErrorKind::Eof)) + } + }) + .parse_next(input) +} + +/// Succeeds if the child parser returns an error. +/// +/// **Note:** This does not advance the [`Stream`] +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, IResult}; +/// # use winnow::prelude::*; +/// use winnow::combinator::not; +/// use winnow::ascii::alpha1; +/// # fn main() { +/// +/// let mut parser = not(alpha1); +/// +/// assert_eq!(parser.parse_peek("123"), Ok(("123", ()))); +/// assert_eq!(parser.parse_peek("abcd"), Err(ErrMode::Backtrack(InputError::new("abcd", ErrorKind::Not)))); +/// # } +/// ``` +pub fn not<I: Stream, O, E: ParserError<I>, F>(mut parser: F) -> impl Parser<I, (), E> +where + F: Parser<I, O, E>, +{ + trace("not", move |input: &mut I| { + let start = input.checkpoint(); + let res = parser.parse_next(input); + input.reset(start); + match res { + Ok(_) => Err(ErrMode::from_error_kind(input, ErrorKind::Not)), + Err(ErrMode::Backtrack(_)) => Ok(()), + Err(e) => Err(e), + } + }) +} + +/// Transforms an [`ErrMode::Backtrack`] (recoverable) to [`ErrMode::Cut`] (unrecoverable) +/// +/// This commits the parse result, preventing alternative branch paths like with +/// [`winnow::combinator::alt`][crate::combinator::alt]. +/// +/// # Example +/// +/// Without `cut_err`: +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::token::one_of; +/// # use winnow::ascii::digit1; +/// # use winnow::combinator::rest; +/// # use winnow::combinator::alt; +/// # use winnow::combinator::preceded; +/// # use winnow::prelude::*; +/// # fn main() { +/// +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alt(( +/// preceded(one_of(['+', '-']), digit1), +/// rest +/// )).parse_peek(input) +/// } +/// +/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); +/// assert_eq!(parser("ab"), Ok(("", "ab"))); +/// assert_eq!(parser("+"), Ok(("", "+"))); +/// # } +/// ``` +/// +/// With `cut_err`: +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::prelude::*; +/// # use winnow::token::one_of; +/// # use winnow::ascii::digit1; +/// # use winnow::combinator::rest; +/// # use winnow::combinator::alt; +/// # use winnow::combinator::preceded; +/// use winnow::combinator::cut_err; +/// # fn main() { +/// +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alt(( +/// preceded(one_of(['+', '-']), cut_err(digit1)), +/// rest +/// )).parse_peek(input) +/// } +/// +/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); +/// assert_eq!(parser("ab"), Ok(("", "ab"))); +/// assert_eq!(parser("+"), Err(ErrMode::Cut(InputError::new("", ErrorKind::Slice )))); +/// # } +/// ``` +pub fn cut_err<I, O, E: ParserError<I>, F>(mut parser: F) -> impl Parser<I, O, E> +where + I: Stream, + F: Parser<I, O, E>, +{ + trace("cut_err", move |input: &mut I| { + parser.parse_next(input).map_err(|e| e.cut()) + }) +} + +/// Transforms an [`ErrMode::Cut`] (unrecoverable) to [`ErrMode::Backtrack`] (recoverable) +/// +/// This attempts the parse, allowing other parsers to be tried on failure, like with +/// [`winnow::combinator::alt`][crate::combinator::alt]. +pub fn backtrack_err<I, O, E: ParserError<I>, F>(mut parser: F) -> impl Parser<I, O, E> +where + I: Stream, + F: Parser<I, O, E>, +{ + trace("backtrack_err", move |input: &mut I| { + parser.parse_next(input).map_err(|e| e.backtrack()) + }) +} + +/// A placeholder for a not-yet-implemented [`Parser`] +/// +/// This is analogous to the [`todo!`] macro and helps with prototyping. +/// +/// # Panic +/// +/// This will panic when parsing +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::combinator::todo; +/// +/// fn parser(input: &mut &str) -> PResult<u64> { +/// todo(input) +/// } +/// ``` +#[track_caller] +pub fn todo<I, O, E>(input: &mut I) -> PResult<O, E> +where + I: Stream, +{ + #![allow(clippy::todo)] + trace("todo", move |_input: &mut I| todo!("unimplemented parse")).parse_next(input) +} + +/// Repeats the embedded parser, lazily returning the results +/// +/// Call the iterator's [`ParserIterator::finish`] method to get the remaining input if successful, +/// or the error value if we encountered an error. +/// +/// On [`ErrMode::Backtrack`], iteration will stop. To instead chain an error up, see [`cut_err`]. +/// +/// # Example +/// +/// ```rust +/// use winnow::{combinator::iterator, IResult, token::tag, ascii::alpha1, combinator::terminated}; +/// use std::collections::HashMap; +/// +/// let data = "abc|defg|hijkl|mnopqr|123"; +/// let mut it = iterator(data, terminated(alpha1, "|")); +/// +/// let parsed = it.map(|v| (v, v.len())).collect::<HashMap<_,_>>(); +/// let res: IResult<_,_> = it.finish(); +/// +/// assert_eq!(parsed, [("abc", 3usize), ("defg", 4), ("hijkl", 5), ("mnopqr", 6)].iter().cloned().collect()); +/// assert_eq!(res, Ok(("123", ()))); +/// ``` +pub fn iterator<I, O, E, F>(input: I, parser: F) -> ParserIterator<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, + E: ParserError<I>, +{ + ParserIterator { + parser, + input, + state: Some(State::Running), + o: Default::default(), + } +} + +/// Main structure associated to [`iterator`]. +pub struct ParserIterator<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + parser: F, + input: I, + state: Option<State<E>>, + o: core::marker::PhantomData<O>, +} + +impl<F, I, O, E> ParserIterator<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + /// Returns the remaining input if parsing was successful, or the error if we encountered an error. + pub fn finish(mut self) -> PResult<(I, ()), E> { + match self.state.take().unwrap() { + State::Running | State::Done => Ok((self.input, ())), + State::Failure(e) => Err(ErrMode::Cut(e)), + State::Incomplete(i) => Err(ErrMode::Incomplete(i)), + } + } +} + +impl<'a, F, I, O, E> core::iter::Iterator for &'a mut ParserIterator<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + type Item = O; + + fn next(&mut self) -> Option<Self::Item> { + if let State::Running = self.state.take().unwrap() { + let start = self.input.checkpoint(); + + match self.parser.parse_next(&mut self.input) { + Ok(o) => { + self.state = Some(State::Running); + Some(o) + } + Err(ErrMode::Backtrack(_)) => { + self.input.reset(start); + self.state = Some(State::Done); + None + } + Err(ErrMode::Cut(e)) => { + self.state = Some(State::Failure(e)); + None + } + Err(ErrMode::Incomplete(i)) => { + self.state = Some(State::Incomplete(i)); + None + } + } + } else { + None + } + } +} + +enum State<E> { + Running, + Done, + Failure(E), + Incomplete(Needed), +} + +/// Always succeeds with given value without consuming any input. +/// +/// For example, it can be used as the last alternative in `alt` to +/// specify the default case. +/// +/// **Note:** This never advances the [`Stream`] +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::prelude::*; +/// use winnow::combinator::alt; +/// use winnow::combinator::success; +/// +/// let mut parser = success::<_,_,InputError<_>>(10); +/// assert_eq!(parser.parse_peek("xyz"), Ok(("xyz", 10))); +/// +/// fn sign(input: &str) -> IResult<&str, isize> { +/// alt(( +/// '-'.value(-1), +/// '+'.value(1), +/// success::<_,_,InputError<_>>(1) +/// )).parse_peek(input) +/// } +/// assert_eq!(sign("+10"), Ok(("10", 1))); +/// assert_eq!(sign("-10"), Ok(("10", -1))); +/// assert_eq!(sign("10"), Ok(("10", 1))); +/// ``` +#[doc(alias = "value")] +#[doc(alias = "empty")] +pub fn success<I: Stream, O: Clone, E: ParserError<I>>(val: O) -> impl Parser<I, O, E> { + trace("success", move |_input: &mut I| Ok(val.clone())) +} + +/// A parser which always fails. +/// +/// For example, it can be used as the last alternative in `alt` to +/// control the error message given. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, IResult}; +/// # use winnow::prelude::*; +/// use winnow::combinator::fail; +/// +/// let s = "string"; +/// assert_eq!(fail::<_, &str, _>.parse_peek(s), Err(ErrMode::Backtrack(InputError::new(s, ErrorKind::Fail)))); +/// ``` +#[doc(alias = "unexpected")] +pub fn fail<I: Stream, O, E: ParserError<I>>(i: &mut I) -> PResult<O, E> { + trace("fail", |i: &mut I| { + Err(ErrMode::from_error_kind(i, ErrorKind::Fail)) + }) + .parse_next(i) +} diff --git a/vendor/winnow/src/combinator/mod.rs b/vendor/winnow/src/combinator/mod.rs index 4276b06..a0acf87 100644 --- a/vendor/winnow/src/combinator/mod.rs +++ b/vendor/winnow/src/combinator/mod.rs @@ -6,76 +6,74 @@ //! //! Those are used to recognize the lowest level elements of your grammar, like, "here is a dot", or "here is an big endian integer". //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`one_of`][crate::bytes::one_of] | `one_of("abc")` | `"abc"` | `Ok(("bc", 'a'))` |Matches one of the provided characters (works with non ASCII characters too)| -//! | [`none_of`][crate::bytes::none_of] | `none_of("abc")` | `"xyab"` | `Ok(("yab", 'x'))` |Matches anything but the provided characters| -//! | [`tag`][crate::bytes::tag] | `"hello"` | `"hello world"` | `Ok((" world", "hello"))` |Recognizes a specific suite of characters or bytes| -//! | [`tag_no_case`][crate::bytes::tag_no_case] | `tag_no_case("hello")` | `"HeLLo World"` | `Ok((" World", "HeLLo"))` |Case insensitive comparison. Note that case insensitive comparison is not well defined for unicode, and that you might have bad surprises| -//! | [`take`][crate::bytes::take] | `take(4)` | `"hello"` | `Ok(("o", "hell"))` |Takes a specific number of bytes or characters| -//! | [`take_while0`][crate::bytes::take_while0] | `take_while0(is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided pattern matches. `take_while1` does the same, but must return at least one character| -//! | [`take_till0`][crate::bytes::take_till0] | `take_till0(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided pattern matches. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while0`: `take_till(f)` is equivalent to `take_while0(\|c\| !f(c))`| -//! | [`take_until0`][crate::bytes::take_until0] | `take_until0("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`one_of`][crate::token::one_of] | `one_of(['a', 'b', 'c'])` | `"abc"` | `"bc"` | `Ok('a')` |Matches one of the provided characters (works with non ASCII characters too)| +//! | [`none_of`][crate::token::none_of] | `none_of(['a', 'b', 'c'])` | `"xyab"` | `"yab"` | `Ok('x')` |Matches anything but the provided characters| +//! | [`tag`][crate::token::tag] | `"hello"` | `"hello world"` | `" world"` | `Ok("hello")` |Recognizes a specific suite of characters or bytes (see also [`Caseless`][crate::ascii::Caseless])| +//! | [`take`][crate::token::take] | `take(4)` | `"hello"` | `"o"` | `Ok("hell")` |Takes a specific number of bytes or characters| +//! | [`take_while`][crate::token::take_while] | `take_while(0.., is_alphabetic)` | `"abc123"` | `"123"` | `Ok("abc")` |Returns the longest list of bytes for which the provided pattern matches.| +//! | [`take_till0`][crate::token::take_till0] | `take_till0(is_alphabetic)` | `"123abc"` | `"abc"` | `Ok("123")` |Returns the longest list of bytes or characters until the provided pattern matches. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(0.., \|c\| !f(c))`| +//! | [`take_until0`][crate::token::take_until0] | `take_until0("world")` | `"Hello world"` | `"world"` | `Ok("Hello ")` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| //! //! ## Choice combinators //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`alt`][crate::branch::alt] | `alt(("ab", "cd"))` | `"cdef"` | `Ok(("ef", "cd"))` |Try a list of parsers and return the result of the first successful one| -//! | [`dispatch`][crate::branch::dispatch] | \- | \- | \- | `match` for parsers | -//! | [`permutation`][crate::branch::permutation] | `permutation(("ab", "cd", "12"))` | `"cd12abc"` | `Ok(("c", ("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`alt`] | `alt(("ab", "cd"))` | `"cdef"` | `"ef"` | `Ok("cd")` |Try a list of parsers and return the result of the first successful one| +//! | [`dispatch`] | \- | \- | \- | \- | `match` for parsers | +//! | [`permutation`] | `permutation(("ab", "cd", "12"))` | `"cd12abc"` | `"c"` | `Ok(("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| //! //! ## Sequence combinators //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`(...)` (tuples)][crate::Parser] | `("ab", "XY", take(1))` | `"abXYZ!"` | `Ok(("!", ("ab", "XY", "Z")))` |Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple| -//! | [`delimited`][crate::sequence::delimited] | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || -//! | [`preceded`][crate::sequence::preceded] | `preceded("ab", "XY")` | `"abXYZ"` | `Ok(("Z", "XY"))` || -//! | [`terminated`][crate::sequence::terminated] | `terminated("ab", "XY")` | `"abXYZ"` | `Ok(("Z", "ab"))` || -//! | [`separated_pair`][crate::sequence::separated_pair] | `separated_pair("hello", char(','), "world")` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`(...)` (tuples)][crate::Parser] | `("ab", "XY", take(1))` | `"abXYZ!"` | `"!"` | `Ok(("ab", "XY", "Z"))` |Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple| +//! | [`seq!`] | `seq!(_: char('('), take(2), _: char(')'))` | `"(ab)cd"` | `"cd"` | `Ok("ab")` || +//! | [`delimited`] | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `"cd"` | `Ok("ab")` || +//! | [`preceded`] | `preceded("ab", "XY")` | `"abXYZ"` | `"Z"` | `Ok("XY")` || +//! | [`terminated`] | `terminated("ab", "XY")` | `"abXYZ"` | `"Z"` | `Ok("ab")` || +//! | [`separated_pair`] | `separated_pair("hello", char(','), "world")` | `"hello,world!"` | `"!"` | `Ok(("hello", "world"))` || //! //! ## Applying a parser multiple times //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`count`][crate::multi::count] | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!["ab", "cd", "ef"]))` |Applies the child parser a specified number of times| -//! | [`many0`][crate::multi::many0] | `many0("ab")` | `"abababc"` | `Ok(("c", vec!["ab", "ab", "ab"]))` |Applies the parser 0 or more times and returns the list of results in a Vec. `many1` does the same operation but must return at least one element| -//! | [`many_m_n`][crate::multi::many_m_n] | `many_m_n(1, 3, "ab")` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| -//! | [`many_till0`][crate::multi::many_till0] | `many_till0(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| -//! | [`separated0`][crate::multi::separated0] | `separated0("ab", ",")` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated1` works like `separated0` but must returns at least one element| -//! | [`fold_many0`][crate::multi::fold_many0] | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| -//! | [`fold_many_m_n`][crate::multi::fold_many_m_n] | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| -//! | [`length_count`][crate::multi::length_count] | `length_count(number, "ab")` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`repeat`] | `repeat(1..=3, "ab")` | `"ababc"` | `"c"` | `Ok(vec!["ab", "ab"])` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| +//! | [`repeat_till0`] | `repeat_till0(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `"g"` | `Ok((vec!["ab", "ab"], "ef"))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +//! | [`separated`] | `separated(1..=3, "ab", ",")` | `"ab,ab,ab."` | `"."` | `Ok(vec!["ab", "ab", "ab"])` |Applies the parser and separator between m and n times (n included) and returns the list of results in a Vec| +//! | [`fold_repeat`] | `fold_repeat(1..=2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `[3]` | `Ok(3)` |Applies the parser between m and n times (n included) and folds the list of return value| //! //! ## Partial related //! -//! - [`eof`][eof]: Returns its input if it is at the end of input data +//! - [`eof`]: Returns its input if it is at the end of input data //! - [`Parser::complete_err`]: Replaces an `Incomplete` returned by the child parser with an `Backtrack` //! //! ## Modifiers //! -//! - [`cond`][cond]: Conditional combinator. Wraps another parser and calls it if the condition is met -//! - [`Parser::flat_map`][crate::Parser::flat_map]: method to map a new parser from the output of the first parser, then apply that parser over the rest of the input -//! - [`Parser::value`][crate::Parser::value]: method to replace the result of a parser -//! - [`Parser::map`][crate::Parser::map]: method to map a function on the result of a parser -//! - [`Parser::and_then`][crate::Parser::and_then]: Applies a second parser over the output of the first one -//! - [`Parser::verify_map`][Parser::verify_map]: Maps a function returning an `Option` on the output of a parser -//! - [`Parser::map_res`][Parser::map_res]: Maps a function returning a `Result` on the output of a parser -//! - [`Parser::parse_to`][crate::Parser::parse_to]: Apply [`std::str::FromStr`] to the output of the parser -//! - [`not`][not]: Returns a result only if the embedded parser returns `Backtrack` or `Incomplete`. Does not consume the input -//! - [`opt`][opt]: Make the underlying parser optional -//! - [`peek`][peek]: Returns a result without consuming the input -//! - [`Parser::recognize`][Parser::recognize]: If the child parser was successful, return the consumed input as the produced value -//! - [`Parser::with_recognized`][Parser::with_recognized]: If the child parser was successful, return a tuple of the consumed input and the produced output. -//! - [`Parser::span`][Parser::span]: If the child parser was successful, return the location of the consumed input as the produced value -//! - [`Parser::with_span`][Parser::with_span]: If the child parser was successful, return a tuple of the location of the consumed input and the produced output. +//! - [`cond`]: Conditional combinator. Wraps another parser and calls it if the condition is met +//! - [`Parser::flat_map`]: method to map a new parser from the output of the first parser, then apply that parser over the rest of the input +//! - [`Parser::value`]: method to replace the result of a parser +//! - [`Parser::default_value`]: method to replace the result of a parser +//! - [`Parser::void`]: method to discard the result of a parser +//! - [`Parser::map`]: method to map a function on the result of a parser +//! - [`Parser::and_then`]: Applies a second parser over the output of the first one +//! - [`Parser::verify_map`]: Maps a function returning an `Option` on the output of a parser +//! - [`Parser::try_map`]: Maps a function returning a `Result` on the output of a parser +//! - [`Parser::parse_to`]: Apply [`std::str::FromStr`] to the output of the parser +//! - [`not`]: Returns a result only if the embedded parser returns `Backtrack` or `Incomplete`. Does not consume the input +//! - [`opt`]: Make the underlying parser optional +//! - [`peek`]: Returns a result without consuming the input +//! - [`Parser::recognize`]: If the child parser was successful, return the consumed input as the produced value +//! - [`Parser::with_recognized`]: If the child parser was successful, return a tuple of the consumed input and the produced output. +//! - [`Parser::span`]: If the child parser was successful, return the location of the consumed input as the produced value +//! - [`Parser::with_span`]: If the child parser was successful, return a tuple of the location of the consumed input and the produced output. //! - [`Parser::verify`]: Returns the result of the child parser if it satisfies a verification function //! //! ## Error management and debugging //! //! - [`cut_err`]: Commit the parse result, disallowing alternative parsers from being attempted -//! - [`backtrack_err`]: Attemmpts a parse, allowing alternative parsers to be attempted despite +//! - [`backtrack_err`]: Attempts a parse, allowing alternative parsers to be attempted despite //! use of `cut_err` //! - [`Parser::context`]: Add context to the error if the parser fails //! - [`trace`][crate::trace::trace]: Print the parse state with the `debug` feature flag @@ -83,39 +81,39 @@ //! //! ## Remaining combinators //! -//! - [`success`][success]: Returns a value without consuming any input, always succeeds -//! - [`fail`][fail]: Inversion of `success`. Always fails. +//! - [`success`]: Returns a value without consuming any input, always succeeds +//! - [`fail`]: Inversion of `success`. Always fails. //! - [`Parser::by_ref`]: Allow moving `&mut impl Parser` into other parsers //! //! ## Text parsing //! -//! - [`any`][crate::bytes::any]: Matches one token -//! - [`tab`][crate::character::tab]: Matches a tab character `\t` -//! - [`crlf`][crate::character::crlf]: Recognizes the string `\r\n` -//! - [`line_ending`][crate::character::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) -//! - [`newline`][crate::character::newline]: Matches a newline character `\n` -//! - [`not_line_ending`][crate::character::not_line_ending]: Recognizes a string of any char except `\r` or `\n` -//! - [`rest`][rest]: Return the remaining input -//! -//! - [`alpha0`][crate::character::alpha0]: Recognizes zero or more lowercase and uppercase alphabetic characters: `[a-zA-Z]`. [`alpha1`][crate::character::alpha1] does the same but returns at least one character -//! - [`alphanumeric0`][crate::character::alphanumeric0]: Recognizes zero or more numerical and alphabetic characters: `[0-9a-zA-Z]`. [`alphanumeric1`][crate::character::alphanumeric1] does the same but returns at least one character -//! - [`space0`][crate::character::space0]: Recognizes zero or more spaces and tabs. [`space1`][crate::character::space1] does the same but returns at least one character -//! - [`multispace0`][crate::character::multispace0]: Recognizes zero or more spaces, tabs, carriage returns and line feeds. [`multispace1`][crate::character::multispace1] does the same but returns at least one character -//! - [`digit0`][crate::character::digit0]: Recognizes zero or more numerical characters: `[0-9]`. [`digit1`][crate::character::digit1] does the same but returns at least one character -//! - [`hex_digit0`][crate::character::hex_digit0]: Recognizes zero or more hexadecimal numerical characters: `[0-9A-Fa-f]`. [`hex_digit1`][crate::character::hex_digit1] does the same but returns at least one character -//! - [`oct_digit0`][crate::character::oct_digit0]: Recognizes zero or more octal characters: `[0-7]`. [`oct_digit1`][crate::character::oct_digit1] does the same but returns at least one character -//! -//! - [`float`][crate::character::float]: Parse a floating point number in a byte string -//! - [`dec_int`][crate::character::dec_uint]: Decode a variable-width, decimal signed integer -//! - [`dec_uint`][crate::character::dec_uint]: Decode a variable-width, decimal unsigned integer -//! - [`hex_uint`][crate::character::hex_uint]: Decode a variable-width, hexadecimal integer -//! -//! - [`escaped`][crate::character::escaped]: Matches a byte string with escaped characters -//! - [`escaped_transform`][crate::character::escaped_transform]: Matches a byte string with escaped characters, and returns a new string with the escaped characters replaced +//! - [`any`][crate::token::any]: Matches one token +//! - [`tab`][crate::ascii::tab]: Matches a tab character `\t` +//! - [`crlf`][crate::ascii::crlf]: Recognizes the string `\r\n` +//! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) +//! - [`newline`][crate::ascii::newline]: Matches a newline character `\n` +//! - [`not_line_ending`][crate::ascii::not_line_ending]: Recognizes a string of any char except `\r` or `\n` +//! - [`rest`]: Return the remaining input +//! +//! - [`alpha0`][crate::ascii::alpha0]: Recognizes zero or more lowercase and uppercase alphabetic characters: `[a-zA-Z]`. [`alpha1`][crate::ascii::alpha1] does the same but returns at least one character +//! - [`alphanumeric0`][crate::ascii::alphanumeric0]: Recognizes zero or more numerical and alphabetic characters: `[0-9a-zA-Z]`. [`alphanumeric1`][crate::ascii::alphanumeric1] does the same but returns at least one character +//! - [`space0`][crate::ascii::space0]: Recognizes zero or more spaces and tabs. [`space1`][crate::ascii::space1] does the same but returns at least one character +//! - [`multispace0`][crate::ascii::multispace0]: Recognizes zero or more spaces, tabs, carriage returns and line feeds. [`multispace1`][crate::ascii::multispace1] does the same but returns at least one character +//! - [`digit0`][crate::ascii::digit0]: Recognizes zero or more numerical characters: `[0-9]`. [`digit1`][crate::ascii::digit1] does the same but returns at least one character +//! - [`hex_digit0`][crate::ascii::hex_digit0]: Recognizes zero or more hexadecimal numerical characters: `[0-9A-Fa-f]`. [`hex_digit1`][crate::ascii::hex_digit1] does the same but returns at least one character +//! - [`oct_digit0`][crate::ascii::oct_digit0]: Recognizes zero or more octal characters: `[0-7]`. [`oct_digit1`][crate::ascii::oct_digit1] does the same but returns at least one character +//! +//! - [`float`][crate::ascii::float]: Parse a floating point number in a byte string +//! - [`dec_int`][crate::ascii::dec_uint]: Decode a variable-width, decimal signed integer +//! - [`dec_uint`][crate::ascii::dec_uint]: Decode a variable-width, decimal unsigned integer +//! - [`hex_uint`][crate::ascii::hex_uint]: Decode a variable-width, hexadecimal integer +//! +//! - [`escaped`][crate::ascii::escaped]: Matches a byte string with escaped characters +//! - [`escaped_transform`][crate::ascii::escaped_transform]: Matches a byte string with escaped characters, and returns a new string with the escaped characters replaced //! //! ### Character test functions //! -//! Use these functions with a combinator like `take_while0`: +//! Use these functions with a combinator like `take_while`: //! //! - [`AsChar::is_alpha`][crate::stream::AsChar::is_alpha]: Tests if byte is ASCII alphabetic: `[A-Za-z]` //! - [`AsChar::is_alphanum`][crate::stream::AsChar::is_alphanum]: Tests if byte is ASCII alphanumeric: `[A-Za-z0-9]` @@ -127,1345 +125,50 @@ //! //! ## Binary format parsing //! -//! - [`length_data`][crate::multi::length_data]: Gets a number from the first parser, then takes a subslice of the input of that size, and returns that subslice -//! - [`length_value`][crate::multi::length_value]: Gets a number from the first parser, takes a subslice of the input of that size, then applies the second parser on that subslice. If the second parser returns `Incomplete`, `length_value` will return an error +//! - [`length_count`][crate::binary::length_count] Gets a number from the first parser, then applies the second parser that many times +//! - [`length_data`][crate::binary::length_data]: Gets a number from the first parser, then takes a subslice of the input of that size, and returns that subslice +//! - [`length_value`][crate::binary::length_value]: Gets a number from the first parser, takes a subslice of the input of that size, then applies the second parser on that subslice. If the second parser returns `Incomplete`, `length_value` will return an error //! //! ### Integers //! //! Parsing integers from binary formats can be done in two ways: With parser functions, or combinators with configurable endianness. //! -//! - **configurable endianness:** [`i16`][crate::number::i16], [`i32`][crate::number::i32], -//! [`i64`][crate::number::i64], [`u16`][crate::number::u16], [`u32`][crate::number::u32], -//! [`u64`][crate::number::u64] are combinators that take as argument a -//! [`winnow::number::Endianness`][crate::number::Endianness], like this: `i16(endianness)`. If the -//! parameter is `winnow::number::Endianness::Big`, parse a big endian `i16` integer, otherwise a +//! - **configurable endianness:** [`i16`][crate::binary::i16], [`i32`][crate::binary::i32], +//! [`i64`][crate::binary::i64], [`u16`][crate::binary::u16], [`u32`][crate::binary::u32], +//! [`u64`][crate::binary::u64] are combinators that take as argument a +//! [`winnow::binary::Endianness`][crate::binary::Endianness], like this: `i16(endianness)`. If the +//! parameter is `winnow::binary::Endianness::Big`, parse a big endian `i16` integer, otherwise a //! little endian `i16` integer. //! - **fixed endianness**: The functions are prefixed by `be_` for big endian numbers, and by `le_` for little endian numbers, and the suffix is the type they parse to. As an example, `be_u32` parses a big endian unsigned integer stored in 32 bits. -//! - [`be_f32`][crate::number::be_f32], [`be_f64`][crate::number::be_f64]: Big endian floating point numbers -//! - [`le_f32`][crate::number::le_f32], [`le_f64`][crate::number::le_f64]: Little endian floating point numbers -//! - [`be_i8`][crate::number::be_i8], [`be_i16`][crate::number::be_i16], [`be_i24`][crate::number::be_i24], [`be_i32`][crate::number::be_i32], [`be_i64`][crate::number::be_i64], [`be_i128`][crate::number::be_i128]: Big endian signed integers -//! - [`be_u8`][crate::number::be_u8], [`be_u16`][crate::number::be_u16], [`be_u24`][crate::number::be_u24], [`be_u32`][crate::number::be_u32], [`be_u64`][crate::number::be_u64], [`be_u128`][crate::number::be_u128]: Big endian unsigned integers -//! - [`le_i8`][crate::number::le_i8], [`le_i16`][crate::number::le_i16], [`le_i24`][crate::number::le_i24], [`le_i32`][crate::number::le_i32], [`le_i64`][crate::number::le_i64], [`le_i128`][crate::number::le_i128]: Little endian signed integers -//! - [`le_u8`][crate::number::le_u8], [`le_u16`][crate::number::le_u16], [`le_u24`][crate::number::le_u24], [`le_u32`][crate::number::le_u32], [`le_u64`][crate::number::le_u64], [`le_u128`][crate::number::le_u128]: Little endian unsigned integers +//! - [`be_f32`][crate::binary::be_f32], [`be_f64`][crate::binary::be_f64]: Big endian floating point numbers +//! - [`le_f32`][crate::binary::le_f32], [`le_f64`][crate::binary::le_f64]: Little endian floating point numbers +//! - [`be_i8`][crate::binary::be_i8], [`be_i16`][crate::binary::be_i16], [`be_i24`][crate::binary::be_i24], [`be_i32`][crate::binary::be_i32], [`be_i64`][crate::binary::be_i64], [`be_i128`][crate::binary::be_i128]: Big endian signed integers +//! - [`be_u8`][crate::binary::be_u8], [`be_u16`][crate::binary::be_u16], [`be_u24`][crate::binary::be_u24], [`be_u32`][crate::binary::be_u32], [`be_u64`][crate::binary::be_u64], [`be_u128`][crate::binary::be_u128]: Big endian unsigned integers +//! - [`le_i8`][crate::binary::le_i8], [`le_i16`][crate::binary::le_i16], [`le_i24`][crate::binary::le_i24], [`le_i32`][crate::binary::le_i32], [`le_i64`][crate::binary::le_i64], [`le_i128`][crate::binary::le_i128]: Little endian signed integers +//! - [`le_u8`][crate::binary::le_u8], [`le_u16`][crate::binary::le_u16], [`le_u24`][crate::binary::le_u24], [`le_u32`][crate::binary::le_u32], [`le_u64`][crate::binary::le_u64], [`le_u128`][crate::binary::le_u128]: Little endian unsigned integers //! //! ### Bit stream parsing //! -//! - [`bits`][crate::bits::bits]: Transforms the current input type (byte slice `&[u8]`) to a bit stream on which bit specific parsers and more general combinators can be applied -//! - [`bytes`][crate::bits::bytes]: Transforms its bits stream input back into a byte slice for the underlying parser -//! - [`take`][crate::bits::take]: Take a set number of its -//! - [`tag`][crate::bits::tag]: Check if a set number of bis matches a pattern -//! - [`bool`][crate::bits::bool]: Match any one bit +//! - [`bits`][crate::binary::bits::bits]: Transforms the current input type (byte slice `&[u8]`) to a bit stream on which bit specific parsers and more general combinators can be applied +//! - [`bytes`][crate::binary::bits::bytes]: Transforms its bits stream input back into a byte slice for the underlying parser +//! - [`take`][crate::binary::bits::take]: Take a set number of bits +//! - [`tag`][crate::binary::bits::tag]: Check if a set number of bits matches a pattern +//! - [`bool`][crate::binary::bits::bool]: Match any one bit -use crate::error::{ContextError, ErrMode, ErrorKind, FromExternalError, Needed, ParseError}; -use crate::lib::std::borrow::Borrow; -use crate::lib::std::ops::Range; -use crate::stream::{Location, Stream}; -use crate::stream::{Offset, StreamIsPartial}; -use crate::trace::trace; -use crate::trace::trace_result; -use crate::*; +mod branch; +mod core; +mod multi; +mod parser; +mod sequence; #[cfg(test)] mod tests; -/// Return the remaining input. -/// -/// # Example -/// -/// ```rust -/// # use winnow::error::ErrorKind; -/// # use winnow::error::Error; -/// use winnow::combinator::rest; -/// assert_eq!(rest::<_,Error<_>>("abc"), Ok(("", "abc"))); -/// assert_eq!(rest::<_,Error<_>>(""), Ok(("", ""))); -/// ``` -#[inline] -pub fn rest<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: Stream, -{ - trace("rest", move |input: I| { - Ok(input.next_slice(input.eof_offset())) - }) - .parse_next(input) -} - -/// Return the length of the remaining input. -/// -/// Note: this does not advance the [`Stream`] -/// -/// # Example -/// -/// ```rust -/// # use winnow::error::ErrorKind; -/// # use winnow::error::Error; -/// use winnow::combinator::rest_len; -/// assert_eq!(rest_len::<_,Error<_>>("abc"), Ok(("abc", 3))); -/// assert_eq!(rest_len::<_,Error<_>>(""), Ok(("", 0))); -/// ``` -#[inline] -pub fn rest_len<I, E: ParseError<I>>(input: I) -> IResult<I, usize, E> -where - I: Stream, -{ - trace("rest_len", move |input: I| { - let len = input.eof_offset(); - Ok((input, len)) - }) - .parse_next(input) -} - -/// Implementation of [`Parser::by_ref`][Parser::by_ref] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct ByRef<'p, P> { - p: &'p mut P, -} - -impl<'p, P> ByRef<'p, P> { - pub(crate) fn new(p: &'p mut P) -> Self { - Self { p } - } -} - -impl<'p, I, O, E, P: Parser<I, O, E>> Parser<I, O, E> for ByRef<'p, P> { - fn parse_next(&mut self, i: I) -> IResult<I, O, E> { - self.p.parse_next(i) - } -} - -/// Implementation of [`Parser::map`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct Map<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Fn(O) -> O2, -{ - parser: F, - map: G, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, -} - -impl<F, G, I, O, O2, E> Map<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Fn(O) -> O2, -{ - pub(crate) fn new(parser: F, map: G) -> Self { - Self { - parser, - map, - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - } - } -} - -impl<F, G, I, O, O2, E> Parser<I, O2, E> for Map<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Fn(O) -> O2, -{ - fn parse_next(&mut self, i: I) -> IResult<I, O2, E> { - match self.parser.parse_next(i) { - Err(e) => Err(e), - Ok((i, o)) => Ok((i, (self.map)(o))), - } - } -} - -/// Implementation of [`Parser::map_res`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct MapRes<F, G, I, O, O2, E, E2> -where - F: Parser<I, O, E>, - G: FnMut(O) -> Result<O2, E2>, - I: Clone, - E: FromExternalError<I, E2>, -{ - parser: F, - map: G, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, - e2: core::marker::PhantomData<E2>, -} - -impl<F, G, I, O, O2, E, E2> MapRes<F, G, I, O, O2, E, E2> -where - F: Parser<I, O, E>, - G: FnMut(O) -> Result<O2, E2>, - I: Clone, - E: FromExternalError<I, E2>, -{ - pub(crate) fn new(parser: F, map: G) -> Self { - Self { - parser, - map, - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - e2: Default::default(), - } - } -} - -impl<F, G, I, O, O2, E, E2> Parser<I, O2, E> for MapRes<F, G, I, O, O2, E, E2> -where - F: Parser<I, O, E>, - G: FnMut(O) -> Result<O2, E2>, - I: Clone, - E: FromExternalError<I, E2>, -{ - fn parse_next(&mut self, input: I) -> IResult<I, O2, E> { - let i = input.clone(); - let (input, o) = self.parser.parse_next(input)?; - let res = match (self.map)(o) { - Ok(o2) => Ok((input, o2)), - Err(e) => Err(ErrMode::from_external_error(i, ErrorKind::Verify, e)), - }; - trace_result("verify", &res); - res - } -} - -/// Implementation of [`Parser::verify_map`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct VerifyMap<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: FnMut(O) -> Option<O2>, - I: Clone, - E: ParseError<I>, -{ - parser: F, - map: G, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, -} - -impl<F, G, I, O, O2, E> VerifyMap<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: FnMut(O) -> Option<O2>, - I: Clone, - E: ParseError<I>, -{ - pub(crate) fn new(parser: F, map: G) -> Self { - Self { - parser, - map, - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - } - } -} - -impl<F, G, I, O, O2, E> Parser<I, O2, E> for VerifyMap<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: FnMut(O) -> Option<O2>, - I: Clone, - E: ParseError<I>, -{ - fn parse_next(&mut self, input: I) -> IResult<I, O2, E> { - let i = input.clone(); - let (input, o) = self.parser.parse_next(input)?; - let res = match (self.map)(o) { - Some(o2) => Ok((input, o2)), - None => Err(ErrMode::from_error_kind(i, ErrorKind::Verify)), - }; - trace_result("verify", &res); - res - } -} - -/// Implementation of [`Parser::and_then`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct AndThen<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Parser<O, O2, E>, - O: StreamIsPartial, -{ - outer: F, - inner: G, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, -} - -impl<F, G, I, O, O2, E> AndThen<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Parser<O, O2, E>, - O: StreamIsPartial, -{ - pub(crate) fn new(outer: F, inner: G) -> Self { - Self { - outer, - inner, - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - } - } -} - -impl<F, G, I, O, O2, E> Parser<I, O2, E> for AndThen<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Parser<O, O2, E>, - O: StreamIsPartial, -{ - fn parse_next(&mut self, i: I) -> IResult<I, O2, E> { - let (i, mut o) = self.outer.parse_next(i)?; - let _ = o.complete(); - let (_, o2) = self.inner.parse_next(o)?; - Ok((i, o2)) - } -} - -/// Implementation of [`Parser::parse_to`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct ParseTo<P, I, O, O2, E> -where - P: Parser<I, O, E>, - I: Stream, - O: crate::stream::ParseSlice<O2>, - E: ParseError<I>, -{ - p: P, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, -} - -impl<P, I, O, O2, E> ParseTo<P, I, O, O2, E> -where - P: Parser<I, O, E>, - I: Stream, - O: crate::stream::ParseSlice<O2>, - E: ParseError<I>, -{ - pub(crate) fn new(p: P) -> Self { - Self { - p, - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - } - } -} - -impl<P, I, O, O2, E> Parser<I, O2, E> for ParseTo<P, I, O, O2, E> -where - P: Parser<I, O, E>, - I: Stream, - O: crate::stream::ParseSlice<O2>, - E: ParseError<I>, -{ - fn parse_next(&mut self, i: I) -> IResult<I, O2, E> { - let input = i.clone(); - let (i, o) = self.p.parse_next(i)?; - - let res = o - .parse_slice() - .ok_or_else(|| ErrMode::from_error_kind(input, ErrorKind::Verify)); - trace_result("verify", &res); - Ok((i, res?)) - } -} - -/// Implementation of [`Parser::flat_map`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct FlatMap<F, G, H, I, O, O2, E> -where - F: Parser<I, O, E>, - G: FnMut(O) -> H, - H: Parser<I, O2, E>, -{ - f: F, - g: G, - h: core::marker::PhantomData<H>, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, -} - -impl<F, G, H, I, O, O2, E> FlatMap<F, G, H, I, O, O2, E> -where - F: Parser<I, O, E>, - G: FnMut(O) -> H, - H: Parser<I, O2, E>, -{ - pub(crate) fn new(f: F, g: G) -> Self { - Self { - f, - g, - h: Default::default(), - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - } - } -} - -impl<F, G, H, I, O, O2, E> Parser<I, O2, E> for FlatMap<F, G, H, I, O, O2, E> -where - F: Parser<I, O, E>, - G: FnMut(O) -> H, - H: Parser<I, O2, E>, -{ - fn parse_next(&mut self, i: I) -> IResult<I, O2, E> { - let (i, o) = self.f.parse_next(i)?; - (self.g)(o).parse_next(i) - } -} - -/// Apply a [`Parser`], producing `None` on [`ErrMode::Backtrack`]. -/// -/// To chain an error up, see [`cut_err`]. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::prelude::*; -/// use winnow::combinator::opt; -/// use winnow::character::alpha1; -/// # fn main() { -/// -/// fn parser(i: &str) -> IResult<&str, Option<&str>> { -/// opt(alpha1).parse_next(i) -/// } -/// -/// assert_eq!(parser("abcd;"), Ok((";", Some("abcd")))); -/// assert_eq!(parser("123;"), Ok(("123;", None))); -/// # } -/// ``` -pub fn opt<I: Stream, O, E: ParseError<I>, F>(mut f: F) -> impl Parser<I, Option<O>, E> -where - F: Parser<I, O, E>, -{ - trace("opt", move |input: I| { - let i = input.clone(); - match f.parse_next(input) { - Ok((i, o)) => Ok((i, Some(o))), - Err(ErrMode::Backtrack(_)) => Ok((i, None)), - Err(e) => Err(e), - } - }) -} - -/// Calls the parser if the condition is met. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, IResult}; -/// # use winnow::prelude::*; -/// use winnow::combinator::cond; -/// use winnow::character::alpha1; -/// # fn main() { -/// -/// fn parser(b: bool, i: &str) -> IResult<&str, Option<&str>> { -/// cond(b, alpha1).parse_next(i) -/// } -/// -/// assert_eq!(parser(true, "abcd;"), Ok((";", Some("abcd")))); -/// assert_eq!(parser(false, "abcd;"), Ok(("abcd;", None))); -/// assert_eq!(parser(true, "123;"), Err(ErrMode::Backtrack(Error::new("123;", ErrorKind::Slice)))); -/// assert_eq!(parser(false, "123;"), Ok(("123;", None))); -/// # } -/// ``` -pub fn cond<I, O, E: ParseError<I>, F>(b: bool, mut f: F) -> impl Parser<I, Option<O>, E> -where - I: Stream, - F: Parser<I, O, E>, -{ - trace("cond", move |input: I| { - if b { - match f.parse_next(input) { - Ok((i, o)) => Ok((i, Some(o))), - Err(e) => Err(e), - } - } else { - Ok((input, None)) - } - }) -} - -/// Tries to apply its parser without consuming the input. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult}; -/// # use winnow::prelude::*; -/// use winnow::combinator::peek; -/// use winnow::character::alpha1; -/// # fn main() { -/// -/// let mut parser = peek(alpha1); -/// -/// assert_eq!(parser.parse_next("abcd;"), Ok(("abcd;", "abcd"))); -/// assert_eq!(parser.parse_next("123;"), Err(ErrMode::Backtrack(Error::new("123;", ErrorKind::Slice)))); -/// # } -/// ``` -#[doc(alias = "look_ahead")] -#[doc(alias = "rewind")] -pub fn peek<I: Stream, O, E: ParseError<I>, F>(mut f: F) -> impl Parser<I, O, E> -where - F: Parser<I, O, E>, -{ - trace("peek", move |input: I| { - let i = input.clone(); - match f.parse_next(input) { - Ok((_, o)) => Ok((i, o)), - Err(e) => Err(e), - } - }) -} - -/// Match the end of the [`Stream`] -/// -/// Otherwise, it will error. -/// -/// # Example -/// -/// ```rust -/// # use std::str; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::combinator::eof; -/// # use winnow::prelude::*; -/// -/// let mut parser = eof; -/// assert_eq!(parser.parse_next("abc"), Err(ErrMode::Backtrack(Error::new("abc", ErrorKind::Eof)))); -/// assert_eq!(parser.parse_next(""), Ok(("", ""))); -/// ``` -#[doc(alias = "end")] -#[doc(alias = "eoi")] -pub fn eof<I, E: ParseError<I>>(input: I) -> IResult<I, <I as Stream>::Slice, E> -where - I: Stream, -{ - trace("eof", move |input: I| { - if input.eof_offset() == 0 { - Ok(input.next_slice(0)) - } else { - Err(ErrMode::from_error_kind(input, ErrorKind::Eof)) - } - }) - .parse_next(input) -} - -/// Implementation of [`Parser::complete_err`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct CompleteErr<F> { - f: F, -} - -impl<F> CompleteErr<F> { - pub(crate) fn new(f: F) -> Self { - Self { f } - } -} - -impl<F, I, O, E> Parser<I, O, E> for CompleteErr<F> -where - I: Stream, - F: Parser<I, O, E>, - E: ParseError<I>, -{ - fn parse_next(&mut self, input: I) -> IResult<I, O, E> { - trace("complete_err", |input: I| { - let i = input.clone(); - match (self.f).parse_next(input) { - Err(ErrMode::Incomplete(_)) => { - Err(ErrMode::from_error_kind(i, ErrorKind::Complete)) - } - rest => rest, - } - }) - .parse_next(input) - } -} - -/// Implementation of [`Parser::verify`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct Verify<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Fn(&O2) -> bool, - I: Clone, - O: Borrow<O2>, - O2: ?Sized, - E: ParseError<I>, -{ - parser: F, - filter: G, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, -} - -impl<F, G, I, O, O2, E> Verify<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Fn(&O2) -> bool, - I: Clone, - O: Borrow<O2>, - O2: ?Sized, - E: ParseError<I>, -{ - pub(crate) fn new(parser: F, filter: G) -> Self { - Self { - parser, - filter, - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - } - } -} - -impl<F, G, I, O, O2, E> Parser<I, O, E> for Verify<F, G, I, O, O2, E> -where - F: Parser<I, O, E>, - G: Fn(&O2) -> bool, - I: Clone, - O: Borrow<O2>, - O2: ?Sized, - E: ParseError<I>, -{ - fn parse_next(&mut self, input: I) -> IResult<I, O, E> { - let i = input.clone(); - let (input, o) = self.parser.parse_next(input)?; - - let res = if (self.filter)(o.borrow()) { - Ok((input, o)) - } else { - Err(ErrMode::from_error_kind(i, ErrorKind::Verify)) - }; - trace_result("verify", &res); - res - } -} - -/// Implementation of [`Parser::value`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct Value<F, I, O, O2, E> -where - F: Parser<I, O, E>, - O2: Clone, -{ - parser: F, - val: O2, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, O2, E> Value<F, I, O, O2, E> -where - F: Parser<I, O, E>, - O2: Clone, -{ - pub(crate) fn new(parser: F, val: O2) -> Self { - Self { - parser, - val, - i: Default::default(), - o: Default::default(), - e: Default::default(), - } - } -} - -impl<F, I, O, O2, E> Parser<I, O2, E> for Value<F, I, O, O2, E> -where - F: Parser<I, O, E>, - O2: Clone, -{ - fn parse_next(&mut self, input: I) -> IResult<I, O2, E> { - (self.parser) - .parse_next(input) - .map(|(i, _)| (i, self.val.clone())) - } -} - -/// Implementation of [`Parser::void`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct Void<F, I, O, E> -where - F: Parser<I, O, E>, -{ - parser: F, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, E> Void<F, I, O, E> -where - F: Parser<I, O, E>, -{ - pub(crate) fn new(parser: F) -> Self { - Self { - parser, - i: Default::default(), - o: Default::default(), - e: Default::default(), - } - } -} - -impl<F, I, O, E> Parser<I, (), E> for Void<F, I, O, E> -where - F: Parser<I, O, E>, -{ - fn parse_next(&mut self, input: I) -> IResult<I, (), E> { - (self.parser).parse_next(input).map(|(i, _)| (i, ())) - } -} - -/// Succeeds if the child parser returns an error. -/// -/// **Note:** This does not advance the [`Stream`] -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult}; -/// # use winnow::prelude::*; -/// use winnow::combinator::not; -/// use winnow::character::alpha1; -/// # fn main() { -/// -/// let mut parser = not(alpha1); -/// -/// assert_eq!(parser.parse_next("123"), Ok(("123", ()))); -/// assert_eq!(parser.parse_next("abcd"), Err(ErrMode::Backtrack(Error::new("abcd", ErrorKind::Not)))); -/// # } -/// ``` -pub fn not<I: Stream, O, E: ParseError<I>, F>(mut parser: F) -> impl Parser<I, (), E> -where - F: Parser<I, O, E>, -{ - trace("not", move |input: I| { - let i = input.clone(); - match parser.parse_next(input) { - Ok(_) => Err(ErrMode::from_error_kind(i, ErrorKind::Not)), - Err(ErrMode::Backtrack(_)) => Ok((i, ())), - Err(e) => Err(e), - } - }) -} - -/// Implementation of [`Parser::recognize`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct Recognize<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream + Offset, -{ - parser: F, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, E> Recognize<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream + Offset, -{ - pub(crate) fn new(parser: F) -> Self { - Self { - parser, - i: Default::default(), - o: Default::default(), - e: Default::default(), - } - } -} - -impl<I, O, E, F> Parser<I, <I as Stream>::Slice, E> for Recognize<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream + Offset, -{ - fn parse_next(&mut self, input: I) -> IResult<I, <I as Stream>::Slice, E> { - let i = input.clone(); - match (self.parser).parse_next(i) { - Ok((i, _)) => { - let offset = input.offset_to(&i); - Ok(input.next_slice(offset)) - } - Err(e) => Err(e), - } - } -} - -/// Implementation of [`Parser::with_recognized`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct WithRecognized<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream + Offset, -{ - parser: F, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, E> WithRecognized<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream + Offset, -{ - pub(crate) fn new(parser: F) -> Self { - Self { - parser, - i: Default::default(), - o: Default::default(), - e: Default::default(), - } - } -} - -impl<F, I, O, E> Parser<I, (O, <I as Stream>::Slice), E> for WithRecognized<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream + Offset, -{ - fn parse_next(&mut self, input: I) -> IResult<I, (O, <I as Stream>::Slice), E> { - let i = input.clone(); - match (self.parser).parse_next(i) { - Ok((remaining, result)) => { - let offset = input.offset_to(&remaining); - let (remaining, recognized) = input.next_slice(offset); - Ok((remaining, (result, recognized))) - } - Err(e) => Err(e), - } - } -} - -/// Implementation of [`Parser::span`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct Span<F, I, O, E> -where - F: Parser<I, O, E>, - I: Clone + Location, -{ - parser: F, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, E> Span<F, I, O, E> -where - F: Parser<I, O, E>, - I: Clone + Location, -{ - pub(crate) fn new(parser: F) -> Self { - Self { - parser, - i: Default::default(), - o: Default::default(), - e: Default::default(), - } - } -} - -impl<I, O, E, F> Parser<I, Range<usize>, E> for Span<F, I, O, E> -where - F: Parser<I, O, E>, - I: Clone + Location, -{ - fn parse_next(&mut self, input: I) -> IResult<I, Range<usize>, E> { - let start = input.location(); - self.parser.parse_next(input).map(move |(remaining, _)| { - let end = remaining.location(); - (remaining, (start..end)) - }) - } -} - -/// Implementation of [`Parser::with_span`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct WithSpan<F, I, O, E> -where - F: Parser<I, O, E>, - I: Clone + Location, -{ - parser: F, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, E> WithSpan<F, I, O, E> -where - F: Parser<I, O, E>, - I: Clone + Location, -{ - pub(crate) fn new(parser: F) -> Self { - Self { - parser, - i: Default::default(), - o: Default::default(), - e: Default::default(), - } - } -} - -impl<F, I, O, E> Parser<I, (O, Range<usize>), E> for WithSpan<F, I, O, E> -where - F: Parser<I, O, E>, - I: Clone + Location, -{ - fn parse_next(&mut self, input: I) -> IResult<I, (O, Range<usize>), E> { - let start = input.location(); - self.parser - .parse_next(input) - .map(move |(remaining, output)| { - let end = remaining.location(); - (remaining, (output, (start..end))) - }) - } -} - -/// Transforms an [`ErrMode::Backtrack`] (recoverable) to [`ErrMode::Cut`] (unrecoverable) -/// -/// This commits the parse result, preventing alternative branch paths like with -/// [`winnow::branch::alt`][crate::branch::alt]. -/// -/// # Example -/// -/// Without `cut_err`: -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::bytes::one_of; -/// # use winnow::character::digit1; -/// # use winnow::combinator::rest; -/// # use winnow::branch::alt; -/// # use winnow::sequence::preceded; -/// # use winnow::prelude::*; -/// # fn main() { -/// -/// fn parser(input: &str) -> IResult<&str, &str> { -/// alt(( -/// preceded(one_of("+-"), digit1), -/// rest -/// )).parse_next(input) -/// } -/// -/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); -/// assert_eq!(parser("ab"), Ok(("", "ab"))); -/// assert_eq!(parser("+"), Ok(("", "+"))); -/// # } -/// ``` -/// -/// With `cut_err`: -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::prelude::*; -/// # use winnow::bytes::one_of; -/// # use winnow::character::digit1; -/// # use winnow::combinator::rest; -/// # use winnow::branch::alt; -/// # use winnow::sequence::preceded; -/// use winnow::combinator::cut_err; -/// # fn main() { -/// -/// fn parser(input: &str) -> IResult<&str, &str> { -/// alt(( -/// preceded(one_of("+-"), cut_err(digit1)), -/// rest -/// )).parse_next(input) -/// } -/// -/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); -/// assert_eq!(parser("ab"), Ok(("", "ab"))); -/// assert_eq!(parser("+"), Err(ErrMode::Cut(Error { input: "", kind: ErrorKind::Slice }))); -/// # } -/// ``` -pub fn cut_err<I, O, E: ParseError<I>, F>(mut parser: F) -> impl Parser<I, O, E> -where - I: Stream, - F: Parser<I, O, E>, -{ - trace("cut_err", move |input: I| { - parser.parse_next(input).map_err(|e| e.cut()) - }) -} - -/// Transforms an [`ErrMode::Cut`] (unrecoverable) to [`ErrMode::Backtrack`] (recoverable) -/// -/// This attempts the parse, allowing other parsers to be tried on failure, like with -/// [`winnow::branch::alt`][crate::branch::alt]. -pub fn backtrack_err<I, O, E: ParseError<I>, F>(mut parser: F) -> impl Parser<I, O, E> -where - I: Stream, - F: Parser<I, O, E>, -{ - trace("backtrack_err", move |input: I| { - parser.parse_next(input).map_err(|e| e.backtrack()) - }) -} - -/// A placeholder for a not-yet-implemented [`Parser`] -/// -/// This is analogous to the [`todo!`] macro and helps with prototyping. -/// -/// # Panic -/// -/// This will panic when parsing -/// -/// # Example -/// -/// ```rust -/// # use winnow::prelude::*; -/// # use winnow::combinator::todo; -/// -/// fn parser(input: &str) -> IResult<&str, u64> { -/// todo(input) -/// } -/// ``` -#[track_caller] -pub fn todo<I, O, E>(input: I) -> IResult<I, O, E> -where - I: Stream, -{ - #![allow(clippy::todo)] - trace("todo", move |_input: I| todo!("unimplemented parse")).parse_next(input) -} - -/// Implementation of [`Parser::output_into`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct OutputInto<F, I, O, O2, E> -where - F: Parser<I, O, E>, - O: Into<O2>, -{ - parser: F, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - o2: core::marker::PhantomData<O2>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, O2, E> OutputInto<F, I, O, O2, E> -where - F: Parser<I, O, E>, - O: Into<O2>, -{ - pub(crate) fn new(parser: F) -> Self { - Self { - parser, - i: Default::default(), - o: Default::default(), - o2: Default::default(), - e: Default::default(), - } - } -} - -impl<F, I, O, O2, E> Parser<I, O2, E> for OutputInto<F, I, O, O2, E> -where - F: Parser<I, O, E>, - O: Into<O2>, -{ - fn parse_next(&mut self, i: I) -> IResult<I, O2, E> { - match self.parser.parse_next(i) { - Ok((i, o)) => Ok((i, o.into())), - Err(err) => Err(err), - } - } -} - -/// Implementation of [`Parser::err_into`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct ErrInto<F, I, O, E, E2> -where - F: Parser<I, O, E>, - E: Into<E2>, -{ - parser: F, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, - e2: core::marker::PhantomData<E2>, -} - -impl<F, I, O, E, E2> ErrInto<F, I, O, E, E2> -where - F: Parser<I, O, E>, - E: Into<E2>, -{ - pub(crate) fn new(parser: F) -> Self { - Self { - parser, - i: Default::default(), - o: Default::default(), - e: Default::default(), - e2: Default::default(), - } - } -} - -impl<F, I, O, E, E2> Parser<I, O, E2> for ErrInto<F, I, O, E, E2> -where - F: Parser<I, O, E>, - E: Into<E2>, -{ - fn parse_next(&mut self, i: I) -> IResult<I, O, E2> { - match self.parser.parse_next(i) { - Ok(ok) => Ok(ok), - Err(ErrMode::Backtrack(e)) => Err(ErrMode::Backtrack(e.into())), - Err(ErrMode::Cut(e)) => Err(ErrMode::Cut(e.into())), - Err(ErrMode::Incomplete(e)) => Err(ErrMode::Incomplete(e)), - } - } -} - -/// Creates an iterator from input data and a parser. -/// -/// Call the iterator's [`ParserIterator::finish`] method to get the remaining input if successful, -/// or the error value if we encountered an error. -/// -/// On [`ErrMode::Backtrack`], iteration will stop. To instead chain an error up, see [`cut_err`]. -/// -/// # Example -/// -/// ```rust -/// use winnow::{combinator::iterator, IResult, bytes::tag, character::alpha1, sequence::terminated}; -/// use std::collections::HashMap; -/// -/// let data = "abc|defg|hijkl|mnopqr|123"; -/// let mut it = iterator(data, terminated(alpha1, "|")); -/// -/// let parsed = it.map(|v| (v, v.len())).collect::<HashMap<_,_>>(); -/// let res: IResult<_,_> = it.finish(); -/// -/// assert_eq!(parsed, [("abc", 3usize), ("defg", 4), ("hijkl", 5), ("mnopqr", 6)].iter().cloned().collect()); -/// assert_eq!(res, Ok(("123", ()))); -/// ``` -pub fn iterator<I, O, E, F>(input: I, parser: F) -> ParserIterator<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream, - E: ParseError<I>, -{ - ParserIterator { - parser, - input, - state: Some(State::Running), - o: Default::default(), - } -} - -/// Main structure associated to [`iterator`]. -pub struct ParserIterator<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream, -{ - parser: F, - input: I, - state: Option<State<E>>, - o: core::marker::PhantomData<O>, -} - -impl<F, I, O, E> ParserIterator<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream, -{ - /// Returns the remaining input if parsing was successful, or the error if we encountered an error. - pub fn finish(mut self) -> IResult<I, (), E> { - match self.state.take().unwrap() { - State::Running | State::Done => Ok((self.input, ())), - State::Failure(e) => Err(ErrMode::Cut(e)), - State::Incomplete(i) => Err(ErrMode::Incomplete(i)), - } - } -} - -impl<'a, F, I, O, E> core::iter::Iterator for &'a mut ParserIterator<F, I, O, E> -where - F: Parser<I, O, E>, - I: Stream, -{ - type Item = O; - - fn next(&mut self) -> Option<Self::Item> { - if let State::Running = self.state.take().unwrap() { - let input = self.input.clone(); - - match self.parser.parse_next(input) { - Ok((i, o)) => { - self.input = i; - self.state = Some(State::Running); - Some(o) - } - Err(ErrMode::Backtrack(_)) => { - self.state = Some(State::Done); - None - } - Err(ErrMode::Cut(e)) => { - self.state = Some(State::Failure(e)); - None - } - Err(ErrMode::Incomplete(i)) => { - self.state = Some(State::Incomplete(i)); - None - } - } - } else { - None - } - } -} - -enum State<E> { - Running, - Done, - Failure(E), - Incomplete(Needed), -} - -/// Always succeeds with given value without consuming any input. -/// -/// For example, it can be used as the last alternative in `alt` to -/// specify the default case. -/// -/// **Note:** This never advances the [`Stream`] -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::prelude::*; -/// use winnow::branch::alt; -/// use winnow::combinator::success; -/// -/// let mut parser = success::<_,_,Error<_>>(10); -/// assert_eq!(parser.parse_next("xyz"), Ok(("xyz", 10))); -/// -/// fn sign(input: &str) -> IResult<&str, isize> { -/// alt(( -/// '-'.value(-1), -/// '+'.value(1), -/// success::<_,_,Error<_>>(1) -/// )).parse_next(input) -/// } -/// assert_eq!(sign("+10"), Ok(("10", 1))); -/// assert_eq!(sign("-10"), Ok(("10", -1))); -/// assert_eq!(sign("10"), Ok(("10", 1))); -/// ``` -#[doc(alias = "value")] -#[doc(alias = "empty")] -pub fn success<I: Stream, O: Clone, E: ParseError<I>>(val: O) -> impl Parser<I, O, E> { - trace("success", move |input: I| Ok((input, val.clone()))) -} - -/// A parser which always fails. -/// -/// For example, it can be used as the last alternative in `alt` to -/// control the error message given. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult}; -/// use winnow::combinator::fail; -/// -/// let s = "string"; -/// assert_eq!(fail::<_, &str, _>(s), Err(ErrMode::Backtrack(Error::new(s, ErrorKind::Fail)))); -/// ``` -#[doc(alias = "unexpected")] -pub fn fail<I: Stream, O, E: ParseError<I>>(i: I) -> IResult<I, O, E> { - trace("fail", |i| { - Err(ErrMode::from_error_kind(i, ErrorKind::Fail)) - }) - .parse_next(i) -} - -/// Implementation of [`Parser::context`] -#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] -pub struct Context<F, I, O, E, C> -where - F: Parser<I, O, E>, - I: Stream, - E: ContextError<I, C>, - C: Clone + crate::lib::std::fmt::Debug, -{ - parser: F, - context: C, - i: core::marker::PhantomData<I>, - o: core::marker::PhantomData<O>, - e: core::marker::PhantomData<E>, -} - -impl<F, I, O, E, C> Context<F, I, O, E, C> -where - F: Parser<I, O, E>, - I: Stream, - E: ContextError<I, C>, - C: Clone + crate::lib::std::fmt::Debug, -{ - pub(crate) fn new(parser: F, context: C) -> Self { - Self { - parser, - context, - i: Default::default(), - o: Default::default(), - e: Default::default(), - } - } -} +pub use self::branch::*; +pub use self::core::*; +pub use self::multi::*; +pub use self::parser::*; +pub use self::sequence::*; -impl<F, I, O, E, C> Parser<I, O, E> for Context<F, I, O, E, C> -where - F: Parser<I, O, E>, - I: Stream, - E: ContextError<I, C>, - C: Clone + crate::lib::std::fmt::Debug, -{ - fn parse_next(&mut self, i: I) -> IResult<I, O, E> { - #[cfg(feature = "debug")] - let name = format!("context={:?}", self.context); - #[cfg(not(feature = "debug"))] - let name = "context"; - trace(name, move |i: I| { - (self.parser) - .parse_next(i.clone()) - .map_err(|err| err.map(|err| err.add_context(i, self.context.clone()))) - }) - .parse_next(i) - } -} +#[allow(unused_imports)] +use crate::Parser; diff --git a/vendor/winnow/src/combinator/multi.rs b/vendor/winnow/src/combinator/multi.rs new file mode 100644 index 0000000..0c68694 --- /dev/null +++ b/vendor/winnow/src/combinator/multi.rs @@ -0,0 +1,1259 @@ +//! Combinators applying their child parser multiple times + +use crate::error::ErrMode; +use crate::error::ErrorKind; +use crate::error::ParserError; +use crate::stream::Accumulate; +use crate::stream::Range; +use crate::stream::Stream; +use crate::trace::trace; +use crate::PResult; +use crate::Parser; + +/// [`Accumulate`] the output of a parser into a container, like `Vec` +/// +/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Arguments +/// * `m` The minimum number of iterations. +/// * `n` The maximum number of iterations. +/// * `f` The parser to apply. +/// +/// To recognize a series of tokens, [`Accumulate`] into a `()` and then [`Parser::recognize`]. +/// +/// **Warning:** If the parser passed to `repeat` accepts empty inputs +/// (like `alpha0` or `digit0`), `repeat` will return an error, +/// to prevent going into an infinite loop. +/// +/// # Example +/// +/// Zero or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::repeat; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// repeat(0.., "abc").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// # } +/// ``` +/// +/// One or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::repeat; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// repeat(1.., "abc").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// # } +/// ``` +/// +/// Fixed number of repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::repeat; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// repeat(2, "abc").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Err(ErrMode::Backtrack(InputError::new("123", ErrorKind::Tag)))); +/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); +/// # } +/// ``` +/// +/// Arbitrary repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::repeat; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// repeat(0..=2, "abc").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); +/// # } +/// ``` +#[doc(alias = "many0")] +#[doc(alias = "count")] +#[doc(alias = "many0_count")] +#[doc(alias = "many1")] +#[doc(alias = "many1_count")] +#[doc(alias = "many_m_n")] +#[doc(alias = "repeated")] +#[doc(alias = "skip_many")] +#[doc(alias = "skip_many1")] +#[inline(always)] +pub fn repeat<I, O, C, E, F>(range: impl Into<Range>, mut f: F) -> impl Parser<I, C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("repeat", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => repeat0_(&mut f, i), + (1, None) => repeat1_(&mut f, i), + (start, end) if Some(start) == end => repeat_n_(start, &mut f, i), + (start, end) => repeat_m_n_(start, end.unwrap_or(usize::MAX), &mut f, i), + } + }) +} + +fn repeat0_<I, O, C, E, F>(f: &mut F, i: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(None); + loop { + let start = i.checkpoint(); + let len = i.eof_offset(); + match f.parse_next(i) { + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); + } + + acc.accumulate(o); + } + } + } +} + +fn repeat1_<I, O, C, E, F>(f: &mut F, i: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + match f.parse_next(i) { + Err(e) => Err(e.append(i, ErrorKind::Many)), + Ok(o) => { + let mut acc = C::initial(None); + acc.accumulate(o); + + loop { + let start = i.checkpoint(); + let len = i.eof_offset(); + match f.parse_next(i) { + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); + } + + acc.accumulate(o); + } + } + } + } + } +} + +fn repeat_n_<I, O, C, E, F>(count: usize, f: &mut F, i: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + let mut res = C::initial(Some(count)); + + for _ in 0..count { + match f.parse_next(i) { + Ok(o) => { + res.accumulate(o); + } + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + } + } + + Ok(res) +} + +fn repeat_m_n_<I, O, C, E, F>(min: usize, max: usize, parse: &mut F, input: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); + } + + let mut res = C::initial(Some(min)); + for count in 0..max { + let start = input.checkpoint(); + let len = input.eof_offset(); + match parse.parse_next(input) { + Ok(value) => { + // infinite loop check: the parser must always consume + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`repeat` parsers must always consume", + )); + } + + res.accumulate(value); + } + Err(ErrMode::Backtrack(e)) => { + if count < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(res); + } + } + Err(e) => { + return Err(e); + } + } + } + + Ok(res) +} + +/// [`Accumulate`] the output of parser `f` into a container, like `Vec`, until the parser `g` +/// produces a result. +/// +/// Returns a tuple of the results of `f` in a `Vec` and the result of `g`. +/// +/// `f` keeps going so long as `g` produces [`ErrMode::Backtrack`]. To instead chain an error up, see [`cut_err`][crate::combinator::cut_err]. +/// +/// To recognize a series of tokens, [`Accumulate`] into a `()` and then [`Parser::recognize`]. +/// +/// # Example +/// +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::repeat_till0; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, (Vec<&str>, &str)> { +/// repeat_till0("abc", "end").parse_peek(s) +/// }; +/// +/// assert_eq!(parser("abcabcend"), Ok(("", (vec!["abc", "abc"], "end")))); +/// assert_eq!(parser("abc123end"), Err(ErrMode::Backtrack(InputError::new("123end", ErrorKind::Tag)))); +/// assert_eq!(parser("123123end"), Err(ErrMode::Backtrack(InputError::new("123123end", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("abcendefg"), Ok(("efg", (vec!["abc"], "end")))); +/// # } +/// ``` +#[doc(alias = "many_till0")] +pub fn repeat_till0<I, O, C, P, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, (C, P), E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParserError<I>, +{ + trace("repeat_till0", move |i: &mut I| { + let mut res = C::initial(None); + loop { + let start = i.checkpoint(); + let len = i.eof_offset(); + match g.parse_next(i) { + Ok(o) => return Ok((res, o)), + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + match f.parse_next(i) { + Err(e) => return Err(e.append(i, ErrorKind::Many)), + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert( + i, + "`repeat` parsers must always consume", + )); + } + + res.accumulate(o); + } + } + } + Err(e) => return Err(e), + } + } + }) +} + +/// [`Accumulate`] the output of a parser, interleaved with `sep` +/// +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Arguments +/// * `range` The minimum and maximum number of iterations. +/// * `parser` The parser that parses the elements of the list. +/// * `sep` The parser that parses the separator between list elements. +/// +/// **Warning:** If the separator parser accepts empty inputs +/// (like `alpha0` or `digit0`), `separated` will return an error, +/// to prevent going into an infinite loop. +/// +/// # Example +/// +/// Zero or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(0.., "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// # } +/// ``` +/// +/// One or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(1.., "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Tag)))); +/// # } +/// ``` +/// +/// Fixed number of repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(2, "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("|abc", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Err(ErrMode::Backtrack(InputError::new("123abc", ErrorKind::Tag)))); +/// assert_eq!(parser("abc|def"), Err(ErrMode::Backtrack(InputError::new("def", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Tag)))); +/// # } +/// ``` +/// +/// Arbitrary repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(0..=2, "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("|abc", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// # } +/// ``` +#[doc(alias = "sep_by")] +#[doc(alias = "sep_by1")] +#[doc(alias = "separated_list0")] +#[doc(alias = "separated_list1")] +#[doc(alias = "separated_m_n")] +#[inline(always)] +pub fn separated<I, O, C, O2, E, P, S>( + range: impl Into<Range>, + mut parser: P, + mut separator: S, +) -> impl Parser<I, C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("separated", move |input: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => separated0_(&mut parser, &mut separator, input), + (1, None) => separated1_(&mut parser, &mut separator, input), + (start, end) if Some(start) == end => { + separated_n_(start, &mut parser, &mut separator, input) + } + (start, end) => separated_m_n_( + start, + end.unwrap_or(usize::MAX), + &mut parser, + &mut separator, + input, + ), + } + }) +} + +/// [`Accumulate`] the output of a parser, interleaved with `sep` +/// +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Arguments +/// * `parser` Parses the elements of the list. +/// * `sep` Parses the separator between list elements. +/// +/// # Example +/// +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated0; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated0("abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// # } +/// ``` +#[doc(alias = "sep_by")] +#[doc(alias = "separated_list0")] +#[deprecated(since = "0.5.19", note = "Replaced with `combinator::separated`")] +pub fn separated0<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + trace("separated0", move |i: &mut I| { + separated0_(&mut parser, &mut sep, i) + }) +} + +fn separated0_<I, O, C, O2, E, P, S>( + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(None); + + let start = input.checkpoint(); + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + } + } + } +} + +/// [`Accumulate`] the output of a parser, interleaved with `sep` +/// +/// Fails if the element parser does not produce at least one element.$ +/// +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Arguments +/// * `sep` Parses the separator between list elements. +/// * `f` Parses the elements of the list. +/// +/// # Example +/// +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated1; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated1("abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Tag)))); +/// # } +/// ``` +#[doc(alias = "sep_by1")] +#[doc(alias = "separated_list1")] +#[deprecated(since = "0.5.19", note = "Replaced with `combinator::separated`")] +pub fn separated1<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + trace("separated1", move |i: &mut I| { + separated1_(&mut parser, &mut sep, i) + }) +} + +fn separated1_<I, O, C, O2, E, P, S>( + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(None); + + // Parse the first element + match parser.parse_next(input) { + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + } + } + } +} + +fn separated_n_<I, O, C, O2, E, P, S>( + count: usize, + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(Some(count)); + + if count == 0 { + return Ok(acc); + } + + match parser.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); + } + Ok(o) => { + acc.accumulate(o); + } + } + + for _ in 1..count { + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); + } + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); + } + Ok(o) => { + acc.accumulate(o); + } + } + } + } + } + + Ok(acc) +} + +fn separated_m_n_<I, O, C, O2, E, P, S>( + min: usize, + max: usize, + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); + } + + let mut acc = C::initial(Some(min)); + + let start = input.checkpoint(); + match parser.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if min == 0 { + input.reset(start); + return Ok(acc); + } else { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + + for index in 1..max { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if index < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(acc); + } + } + Err(e) => { + return Err(e); + } + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if index < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(acc); + } + } + Err(e) => { + return Err(e); + } + Ok(o) => { + acc.accumulate(o); + } + } + } + } + } + + Ok(acc) +} + +/// Alternates between two parsers, merging the results (left associative) +/// +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated_foldl1; +/// use winnow::ascii::dec_int; +/// +/// fn parser(s: &str) -> IResult<&str, i32> { +/// separated_foldl1(dec_int, "-", |l, _, r| l - r).parse_peek(s) +/// } +/// +/// assert_eq!(parser("9-3-5"), Ok(("", 1))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Slice)))); +/// ``` +pub fn separated_foldl1<I, O, O2, E, P, S, Op>( + mut parser: P, + mut sep: S, + mut op: Op, +) -> impl Parser<I, O, E> +where + I: Stream, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, + Op: FnMut(O, O2, O) -> O, +{ + trace("separated_foldl1", move |i: &mut I| { + let mut ol = parser.parse_next(i)?; + + loop { + let start = i.checkpoint(); + let len = i.eof_offset(); + match sep.parse_next(i) { + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + return Ok(ol); + } + Err(e) => return Err(e), + Ok(s) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); + } + + match parser.parse_next(i) { + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + return Ok(ol); + } + Err(e) => return Err(e), + Ok(or) => { + ol = op(ol, s, or); + } + } + } + } + } + }) +} + +/// Alternates between two parsers, merging the results (right associative) +/// +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Example +/// +/// ``` +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated_foldr1; +/// use winnow::ascii::dec_uint; +/// +/// fn parser(s: &str) -> IResult<&str, u32> { +/// separated_foldr1(dec_uint, "^", |l: u32, _, r: u32| l.pow(r)).parse_peek(s) +/// } +/// +/// assert_eq!(parser("2^3^2"), Ok(("", 512))); +/// assert_eq!(parser("2"), Ok(("", 2))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Slice)))); +/// ``` +#[cfg(feature = "alloc")] +pub fn separated_foldr1<I, O, O2, E, P, S, Op>( + mut parser: P, + mut sep: S, + mut op: Op, +) -> impl Parser<I, O, E> +where + I: Stream, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, + Op: FnMut(O, O2, O) -> O, +{ + trace("separated_foldr1", move |i: &mut I| { + let ol = parser.parse_next(i)?; + let all: crate::lib::std::vec::Vec<(O2, O)> = + repeat(0.., (sep.by_ref(), parser.by_ref())).parse_next(i)?; + if let Some((s, or)) = all + .into_iter() + .rev() + .reduce(|(sr, or), (sl, ol)| (sl, op(ol, sr, or))) + { + let merged = op(ol, s, or); + Ok(merged) + } else { + Ok(ol) + } + }) +} + +/// Repeats the embedded parser, filling the given slice with results. +/// +/// This parser fails if the input runs out before the given slice is full. +/// +/// # Arguments +/// * `f` The parser to apply. +/// * `buf` The slice to fill +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::fill; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, [&str; 2]> { +/// let mut buf = ["", ""]; +/// let (rest, ()) = fill("abc", &mut buf).parse_peek(s)?; +/// Ok((rest, buf)) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", ["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Err(ErrMode::Backtrack(InputError::new("123", ErrorKind::Tag)))); +/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", ["abc", "abc"]))); +/// ``` +pub fn fill<'a, I, O, E, F>(mut f: F, buf: &'a mut [O]) -> impl Parser<I, (), E> + 'a +where + I: Stream + 'a, + F: Parser<I, O, E> + 'a, + E: ParserError<I> + 'a, +{ + trace("fill", move |i: &mut I| { + for elem in buf.iter_mut() { + match f.parse_next(i) { + Ok(o) => { + *elem = o; + } + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + } + } + + Ok(()) + }) +} + +/// Repeats the embedded parser `m..=n` times, calling `g` to gather the results +/// +/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Arguments +/// * `m` The minimum number of iterations. +/// * `n` The maximum number of iterations. +/// * `f` The parser to apply. +/// * `init` A function returning the initial value. +/// * `g` The function that combines a result of `f` with +/// the current accumulator. +/// +/// **Warning:** If the parser passed to `fold_repeat` accepts empty inputs +/// (like `alpha0` or `digit0`), `fold_repeat` will return an error, +/// to prevent going into an infinite loop. +/// +/// # Example +/// +/// Zero or more repetitions: +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::fold_repeat; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// fold_repeat( +/// 0.., +/// "abc", +/// Vec::new, +/// |mut acc: Vec<_>, item| { +/// acc.push(item); +/// acc +/// } +/// ).parse_peek(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// ``` +/// +/// One or more repetitions: +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::fold_repeat; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// fold_repeat( +/// 1.., +/// "abc", +/// Vec::new, +/// |mut acc: Vec<_>, item| { +/// acc.push(item); +/// acc +/// } +/// ).parse_peek(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Many)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Many)))); +/// ``` +/// +/// Arbitrary number of repetitions: +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::fold_repeat; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// fold_repeat( +/// 0..=2, +/// "abc", +/// Vec::new, +/// |mut acc: Vec<_>, item| { +/// acc.push(item); +/// acc +/// } +/// ).parse_peek(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); +/// ``` +#[doc(alias = "fold_many0")] +#[doc(alias = "fold_many1")] +#[doc(alias = "fold_many_m_n")] +#[inline(always)] +pub fn fold_repeat<I, O, E, F, G, H, R>( + range: impl Into<Range>, + mut f: F, + mut init: H, + mut g: G, +) -> impl Parser<I, R, E> +where + I: Stream, + F: Parser<I, O, E>, + G: FnMut(R, O) -> R, + H: FnMut() -> R, + E: ParserError<I>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("fold_repeat", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => fold_repeat0_(&mut f, &mut init, &mut g, i), + (1, None) => fold_repeat1_(&mut f, &mut init, &mut g, i), + (start, end) => fold_repeat_m_n_( + start, + end.unwrap_or(usize::MAX), + &mut f, + &mut init, + &mut g, + i, + ), + } + }) +} + +fn fold_repeat0_<I, O, E, F, G, H, R>( + f: &mut F, + init: &mut H, + g: &mut G, + input: &mut I, +) -> PResult<R, E> +where + I: Stream, + F: Parser<I, O, E>, + G: FnMut(R, O) -> R, + H: FnMut() -> R, + E: ParserError<I>, +{ + let mut res = init(); + + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match f.parse_next(input) { + Ok(o) => { + // infinite loop check: the parser must always consume + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`repeat` parsers must always consume", + )); + } + + res = g(res, o); + } + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(res); + } + Err(e) => { + return Err(e); + } + } + } +} + +fn fold_repeat1_<I, O, E, F, G, H, R>( + f: &mut F, + init: &mut H, + g: &mut G, + input: &mut I, +) -> PResult<R, E> +where + I: Stream, + F: Parser<I, O, E>, + G: FnMut(R, O) -> R, + H: FnMut() -> R, + E: ParserError<I>, +{ + let init = init(); + match f.parse_next(input) { + Err(ErrMode::Backtrack(_)) => Err(ErrMode::from_error_kind(input, ErrorKind::Many)), + Err(e) => Err(e), + Ok(o1) => { + let mut acc = g(init, o1); + + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match f.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + break; + } + Err(e) => return Err(e), + Ok(o) => { + // infinite loop check: the parser must always consume + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`repeat` parsers must always consume", + )); + } + + acc = g(acc, o); + } + } + } + + Ok(acc) + } + } +} + +fn fold_repeat_m_n_<I, O, E, F, G, H, R>( + min: usize, + max: usize, + parse: &mut F, + init: &mut H, + fold: &mut G, + input: &mut I, +) -> PResult<R, E> +where + I: Stream, + F: Parser<I, O, E>, + G: FnMut(R, O) -> R, + H: FnMut() -> R, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); + } + + let mut acc = init(); + for count in 0..max { + let start = input.checkpoint(); + let len = input.eof_offset(); + match parse.parse_next(input) { + Ok(value) => { + // infinite loop check: the parser must always consume + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`repeat` parsers must always consume", + )); + } + + acc = fold(acc, value); + } + //FInputXMError: handle failure properly + Err(ErrMode::Backtrack(err)) => { + if count < min { + return Err(ErrMode::Backtrack(err.append(input, ErrorKind::Many))); + } else { + input.reset(start); + break; + } + } + Err(e) => return Err(e), + } + } + + Ok(acc) +} diff --git a/vendor/winnow/src/combinator/parser.rs b/vendor/winnow/src/combinator/parser.rs new file mode 100644 index 0000000..80ed11b --- /dev/null +++ b/vendor/winnow/src/combinator/parser.rs @@ -0,0 +1,905 @@ +use crate::error::{AddContext, ErrMode, ErrorKind, FromExternalError, ParserError}; +use crate::lib::std::borrow::Borrow; +use crate::lib::std::ops::Range; +use crate::stream::StreamIsPartial; +use crate::stream::{Location, Stream}; +use crate::trace::trace; +use crate::trace::trace_result; +use crate::*; + +/// Implementation of [`Parser::by_ref`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct ByRef<'p, P> { + p: &'p mut P, +} + +impl<'p, P> ByRef<'p, P> { + #[inline(always)] + pub(crate) fn new(p: &'p mut P) -> Self { + Self { p } + } +} + +impl<'p, I, O, E, P> Parser<I, O, E> for ByRef<'p, P> +where + P: Parser<I, O, E>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + self.p.parse_next(i) + } +} + +/// Implementation of [`Parser::map`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Map<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> O2, +{ + parser: F, + map: G, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, +} + +impl<F, G, I, O, O2, E> Map<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> O2, +{ + #[inline(always)] + pub(crate) fn new(parser: F, map: G) -> Self { + Self { + parser, + map, + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + } + } +} + +impl<F, G, I, O, O2, E> Parser<I, O2, E> for Map<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> O2, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O2, E> { + match self.parser.parse_next(i) { + Err(e) => Err(e), + Ok(o) => Ok((self.map)(o)), + } + } +} + +/// Implementation of [`Parser::try_map`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct TryMap<F, G, I, O, O2, E, E2> +where + F: Parser<I, O, E>, + G: FnMut(O) -> Result<O2, E2>, + I: Stream, + E: FromExternalError<I, E2>, +{ + parser: F, + map: G, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, + e2: core::marker::PhantomData<E2>, +} + +impl<F, G, I, O, O2, E, E2> TryMap<F, G, I, O, O2, E, E2> +where + F: Parser<I, O, E>, + G: FnMut(O) -> Result<O2, E2>, + I: Stream, + E: FromExternalError<I, E2>, +{ + #[inline(always)] + pub(crate) fn new(parser: F, map: G) -> Self { + Self { + parser, + map, + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + e2: Default::default(), + } + } +} + +impl<F, G, I, O, O2, E, E2> Parser<I, O2, E> for TryMap<F, G, I, O, O2, E, E2> +where + F: Parser<I, O, E>, + G: FnMut(O) -> Result<O2, E2>, + I: Stream, + E: FromExternalError<I, E2>, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O2, E> { + let start = input.checkpoint(); + let o = self.parser.parse_next(input)?; + let res = (self.map)(o).map_err(|err| { + input.reset(start); + ErrMode::from_external_error(input, ErrorKind::Verify, err) + }); + trace_result("verify", &res); + res + } +} + +/// Implementation of [`Parser::verify_map`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct VerifyMap<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> Option<O2>, + I: Stream, + E: ParserError<I>, +{ + parser: F, + map: G, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, +} + +impl<F, G, I, O, O2, E> VerifyMap<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> Option<O2>, + I: Stream, + E: ParserError<I>, +{ + #[inline(always)] + pub(crate) fn new(parser: F, map: G) -> Self { + Self { + parser, + map, + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + } + } +} + +impl<F, G, I, O, O2, E> Parser<I, O2, E> for VerifyMap<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> Option<O2>, + I: Stream, + E: ParserError<I>, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O2, E> { + let start = input.checkpoint(); + let o = self.parser.parse_next(input)?; + let res = (self.map)(o).ok_or_else(|| { + input.reset(start); + ErrMode::from_error_kind(input, ErrorKind::Verify) + }); + trace_result("verify", &res); + res + } +} + +/// Implementation of [`Parser::and_then`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct AndThen<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: Parser<O, O2, E>, + O: StreamIsPartial, + I: Stream, +{ + outer: F, + inner: G, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, +} + +impl<F, G, I, O, O2, E> AndThen<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: Parser<O, O2, E>, + O: StreamIsPartial, + I: Stream, +{ + #[inline(always)] + pub(crate) fn new(outer: F, inner: G) -> Self { + Self { + outer, + inner, + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + } + } +} + +impl<F, G, I, O, O2, E> Parser<I, O2, E> for AndThen<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: Parser<O, O2, E>, + O: StreamIsPartial, + I: Stream, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<O2, E> { + let start = i.checkpoint(); + let mut o = self.outer.parse_next(i)?; + let _ = o.complete(); + let o2 = self.inner.parse_next(&mut o).map_err(|err| { + i.reset(start); + err + })?; + Ok(o2) + } +} + +/// Implementation of [`Parser::parse_to`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct ParseTo<P, I, O, O2, E> +where + P: Parser<I, O, E>, + I: Stream, + O: crate::stream::ParseSlice<O2>, + E: ParserError<I>, +{ + p: P, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, +} + +impl<P, I, O, O2, E> ParseTo<P, I, O, O2, E> +where + P: Parser<I, O, E>, + I: Stream, + O: crate::stream::ParseSlice<O2>, + E: ParserError<I>, +{ + #[inline(always)] + pub(crate) fn new(p: P) -> Self { + Self { + p, + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + } + } +} + +impl<P, I, O, O2, E> Parser<I, O2, E> for ParseTo<P, I, O, O2, E> +where + P: Parser<I, O, E>, + I: Stream, + O: crate::stream::ParseSlice<O2>, + E: ParserError<I>, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O2, E> { + let start = i.checkpoint(); + let o = self.p.parse_next(i)?; + let res = o.parse_slice().ok_or_else(|| { + i.reset(start); + ErrMode::from_error_kind(i, ErrorKind::Verify) + }); + trace_result("verify", &res); + res + } +} + +/// Implementation of [`Parser::flat_map`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct FlatMap<F, G, H, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> H, + H: Parser<I, O2, E>, +{ + f: F, + g: G, + h: core::marker::PhantomData<H>, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, +} + +impl<F, G, H, I, O, O2, E> FlatMap<F, G, H, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> H, + H: Parser<I, O2, E>, +{ + #[inline(always)] + pub(crate) fn new(f: F, g: G) -> Self { + Self { + f, + g, + h: Default::default(), + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + } + } +} + +impl<F, G, H, I, O, O2, E> Parser<I, O2, E> for FlatMap<F, G, H, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(O) -> H, + H: Parser<I, O2, E>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<O2, E> { + let o = self.f.parse_next(i)?; + (self.g)(o).parse_next(i) + } +} + +/// Implementation of [`Parser::complete_err`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct CompleteErr<F> { + f: F, +} + +impl<F> CompleteErr<F> { + #[inline(always)] + pub(crate) fn new(f: F) -> Self { + Self { f } + } +} + +impl<F, I, O, E> Parser<I, O, E> for CompleteErr<F> +where + I: Stream, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O, E> { + trace("complete_err", |input: &mut I| { + match (self.f).parse_next(input) { + Err(ErrMode::Incomplete(_)) => { + Err(ErrMode::from_error_kind(input, ErrorKind::Complete)) + } + rest => rest, + } + }) + .parse_next(input) + } +} + +/// Implementation of [`Parser::verify`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Verify<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(&O2) -> bool, + I: Stream, + O: Borrow<O2>, + O2: ?Sized, + E: ParserError<I>, +{ + parser: F, + filter: G, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, +} + +impl<F, G, I, O, O2, E> Verify<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(&O2) -> bool, + I: Stream, + O: Borrow<O2>, + O2: ?Sized, + E: ParserError<I>, +{ + #[inline(always)] + pub(crate) fn new(parser: F, filter: G) -> Self { + Self { + parser, + filter, + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + } + } +} + +impl<F, G, I, O, O2, E> Parser<I, O, E> for Verify<F, G, I, O, O2, E> +where + F: Parser<I, O, E>, + G: FnMut(&O2) -> bool, + I: Stream, + O: Borrow<O2>, + O2: ?Sized, + E: ParserError<I>, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O, E> { + let start = input.checkpoint(); + let o = self.parser.parse_next(input)?; + let res = (self.filter)(o.borrow()).then_some(o).ok_or_else(|| { + input.reset(start); + ErrMode::from_error_kind(input, ErrorKind::Verify) + }); + trace_result("verify", &res); + res + } +} + +/// Implementation of [`Parser::value`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Value<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: Clone, +{ + parser: F, + val: O2, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, O2, E> Value<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: Clone, +{ + #[inline(always)] + pub(crate) fn new(parser: F, val: O2) -> Self { + Self { + parser, + val, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, O2, E> Parser<I, O2, E> for Value<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: Clone, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O2, E> { + (self.parser).parse_next(input).map(|_| self.val.clone()) + } +} + +/// Implementation of [`Parser::default_value`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + parser: F, + o2: core::marker::PhantomData<O2>, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, O2, E> DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + o2: Default::default(), + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, O2, E> Parser<I, O2, E> for DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O2, E> { + (self.parser).parse_next(input).map(|_| O2::default()) + } +} + +/// Implementation of [`Parser::void`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Void<F, I, O, E> +where + F: Parser<I, O, E>, +{ + parser: F, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, E> Void<F, I, O, E> +where + F: Parser<I, O, E>, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, E> Parser<I, (), E> for Void<F, I, O, E> +where + F: Parser<I, O, E>, +{ + #[inline(always)] + fn parse_next(&mut self, input: &mut I) -> PResult<(), E> { + (self.parser).parse_next(input).map(|_| ()) + } +} + +/// Implementation of [`Parser::recognize`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Recognize<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + parser: F, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, E> Recognize<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<I, O, E, F> Parser<I, <I as Stream>::Slice, E> for Recognize<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<<I as Stream>::Slice, E> { + let checkpoint = input.checkpoint(); + match (self.parser).parse_next(input) { + Ok(_) => { + let offset = input.offset_from(&checkpoint); + input.reset(checkpoint); + let recognized = input.next_slice(offset); + Ok(recognized) + } + Err(e) => Err(e), + } + } +} + +/// Implementation of [`Parser::with_recognized`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct WithRecognized<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + parser: F, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, E> WithRecognized<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, E> Parser<I, (O, <I as Stream>::Slice), E> for WithRecognized<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<(O, <I as Stream>::Slice), E> { + let checkpoint = input.checkpoint(); + match (self.parser).parse_next(input) { + Ok(result) => { + let offset = input.offset_from(&checkpoint); + input.reset(checkpoint); + let recognized = input.next_slice(offset); + Ok((result, recognized)) + } + Err(e) => Err(e), + } + } +} + +/// Implementation of [`Parser::span`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Span<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream + Location, +{ + parser: F, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, E> Span<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream + Location, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<I, O, E, F> Parser<I, Range<usize>, E> for Span<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream + Location, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<Range<usize>, E> { + let start = input.location(); + self.parser.parse_next(input).map(move |_| { + let end = input.location(); + start..end + }) + } +} + +/// Implementation of [`Parser::with_span`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct WithSpan<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream + Location, +{ + parser: F, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, E> WithSpan<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream + Location, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, E> Parser<I, (O, Range<usize>), E> for WithSpan<F, I, O, E> +where + F: Parser<I, O, E>, + I: Stream + Location, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<(O, Range<usize>), E> { + let start = input.location(); + self.parser.parse_next(input).map(move |output| { + let end = input.location(); + (output, (start..end)) + }) + } +} + +/// Implementation of [`Parser::output_into`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct OutputInto<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O: Into<O2>, +{ + parser: F, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + o2: core::marker::PhantomData<O2>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, O2, E> OutputInto<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O: Into<O2>, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + i: Default::default(), + o: Default::default(), + o2: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, O2, E> Parser<I, O2, E> for OutputInto<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O: Into<O2>, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O2, E> { + self.parser.parse_next(i).map(|o| o.into()) + } +} + +/// Implementation of [`Parser::err_into`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct ErrInto<F, I, O, E, E2> +where + F: Parser<I, O, E>, + E: Into<E2>, +{ + parser: F, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, + e2: core::marker::PhantomData<E2>, +} + +impl<F, I, O, E, E2> ErrInto<F, I, O, E, E2> +where + F: Parser<I, O, E>, + E: Into<E2>, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + i: Default::default(), + o: Default::default(), + e: Default::default(), + e2: Default::default(), + } + } +} + +impl<F, I, O, E, E2> Parser<I, O, E2> for ErrInto<F, I, O, E, E2> +where + F: Parser<I, O, E>, + E: Into<E2>, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E2> { + match self.parser.parse_next(i) { + Ok(ok) => Ok(ok), + Err(ErrMode::Backtrack(e)) => Err(ErrMode::Backtrack(e.into())), + Err(ErrMode::Cut(e)) => Err(ErrMode::Cut(e.into())), + Err(ErrMode::Incomplete(e)) => Err(ErrMode::Incomplete(e)), + } + } +} + +/// Implementation of [`Parser::context`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Context<F, I, O, E, C> +where + F: Parser<I, O, E>, + I: Stream, + E: AddContext<I, C>, + C: Clone + crate::lib::std::fmt::Debug, +{ + parser: F, + context: C, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, E, C> Context<F, I, O, E, C> +where + F: Parser<I, O, E>, + I: Stream, + E: AddContext<I, C>, + C: Clone + crate::lib::std::fmt::Debug, +{ + #[inline(always)] + pub(crate) fn new(parser: F, context: C) -> Self { + Self { + parser, + context, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, E, C> Parser<I, O, E> for Context<F, I, O, E, C> +where + F: Parser<I, O, E>, + I: Stream, + E: AddContext<I, C>, + C: Clone + crate::lib::std::fmt::Debug, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + #[cfg(feature = "debug")] + let name = format!("context={:?}", self.context); + #[cfg(not(feature = "debug"))] + let name = "context"; + trace(name, move |i: &mut I| { + (self.parser) + .parse_next(i) + .map_err(|err| err.add_context(i, self.context.clone())) + }) + .parse_next(i) + } +} diff --git a/vendor/winnow/src/combinator/sequence.rs b/vendor/winnow/src/combinator/sequence.rs new file mode 100644 index 0000000..4eaa6d1 --- /dev/null +++ b/vendor/winnow/src/combinator/sequence.rs @@ -0,0 +1,175 @@ +use crate::error::ParserError; +use crate::stream::Stream; +use crate::trace::trace; +use crate::*; + +#[doc(inline)] +pub use crate::seq; + +/// Sequence two parsers, only returning the output from the second. +/// +/// # Arguments +/// * `first` The opening parser. +/// * `second` The second parser to get object. +/// +/// See also [`seq`] to generalize this across any number of fields. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::error::Needed::Size; +/// use winnow::combinator::preceded; +/// use winnow::token::tag; +/// +/// let mut parser = preceded("abc", "efg"); +/// +/// assert_eq!(parser.parse_peek("abcefg"), Ok(("", "efg"))); +/// assert_eq!(parser.parse_peek("abcefghij"), Ok(("hij", "efg"))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek("123"), Err(ErrMode::Backtrack(InputError::new("123", ErrorKind::Tag)))); +/// ``` +#[doc(alias = "ignore_then")] +pub fn preceded<I, O1, O2, E: ParserError<I>, F, G>( + mut first: F, + mut second: G, +) -> impl Parser<I, O2, E> +where + I: Stream, + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, +{ + trace("preceded", move |input: &mut I| { + let _ = first.parse_next(input)?; + second.parse_next(input) + }) +} + +/// Sequence two parsers, only returning the output of the first. +/// +/// # Arguments +/// * `first` The first parser to apply. +/// * `second` The second parser to match an object. +/// +/// See also [`seq`] to generalize this across any number of fields. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::error::Needed::Size; +/// use winnow::combinator::terminated; +/// use winnow::token::tag; +/// +/// let mut parser = terminated("abc", "efg"); +/// +/// assert_eq!(parser.parse_peek("abcefg"), Ok(("", "abc"))); +/// assert_eq!(parser.parse_peek("abcefghij"), Ok(("hij", "abc"))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek("123"), Err(ErrMode::Backtrack(InputError::new("123", ErrorKind::Tag)))); +/// ``` +#[doc(alias = "then_ignore")] +pub fn terminated<I, O1, O2, E: ParserError<I>, F, G>( + mut first: F, + mut second: G, +) -> impl Parser<I, O1, E> +where + I: Stream, + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, +{ + trace("terminated", move |input: &mut I| { + let o1 = first.parse_next(input)?; + second.parse_next(input).map(|_| o1) + }) +} + +/// Sequence three parsers, only returning the values of the first and third. +/// +/// # Arguments +/// * `first` The first parser to apply. +/// * `sep` The separator parser to apply. +/// * `second` The second parser to apply. +/// +/// See also [`seq`] to generalize this across any number of fields. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::error::Needed::Size; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated_pair; +/// use winnow::token::tag; +/// +/// let mut parser = separated_pair("abc", "|", "efg"); +/// +/// assert_eq!(parser.parse_peek("abc|efg"), Ok(("", ("abc", "efg")))); +/// assert_eq!(parser.parse_peek("abc|efghij"), Ok(("hij", ("abc", "efg")))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek("123"), Err(ErrMode::Backtrack(InputError::new("123", ErrorKind::Tag)))); +/// ``` +pub fn separated_pair<I, O1, O2, O3, E: ParserError<I>, F, G, H>( + mut first: F, + mut sep: G, + mut second: H, +) -> impl Parser<I, (O1, O3), E> +where + I: Stream, + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, + H: Parser<I, O3, E>, +{ + trace("separated_pair", move |input: &mut I| { + let o1 = first.parse_next(input)?; + let _ = sep.parse_next(input)?; + second.parse_next(input).map(|o2| (o1, o2)) + }) +} + +/// Sequence three parsers, only returning the output of the second. +/// +/// # Arguments +/// * `first` The first parser to apply and discard. +/// * `second` The second parser to apply. +/// * `third` The third parser to apply and discard. +/// +/// See also [`seq`] to generalize this across any number of fields. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::error::Needed::Size; +/// # use winnow::prelude::*; +/// use winnow::combinator::delimited; +/// use winnow::token::tag; +/// +/// let mut parser = delimited("(", "abc", ")"); +/// +/// assert_eq!(parser.parse_peek("(abc)"), Ok(("", "abc"))); +/// assert_eq!(parser.parse_peek("(abc)def"), Ok(("def", "abc"))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek("123"), Err(ErrMode::Backtrack(InputError::new("123", ErrorKind::Tag)))); +/// ``` +#[doc(alias = "between")] +#[doc(alias = "padded")] +pub fn delimited<I, O1, O2, O3, E: ParserError<I>, F, G, H>( + mut first: F, + mut second: G, + mut third: H, +) -> impl Parser<I, O2, E> +where + I: Stream, + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, + H: Parser<I, O3, E>, +{ + trace("delimited", move |input: &mut I| { + let _ = first.parse_next(input)?; + let o2 = second.parse_next(input)?; + third.parse_next(input).map(|_| o2) + }) +} diff --git a/vendor/winnow/src/combinator/tests.rs b/vendor/winnow/src/combinator/tests.rs index 4286286..27fa534 100644 --- a/vendor/winnow/src/combinator/tests.rs +++ b/vendor/winnow/src/combinator/tests.rs @@ -1,22 +1,28 @@ use super::*; -use crate::bytes::take; +use crate::ascii::digit1 as digit; +use crate::binary::u16; +use crate::binary::u8; +use crate::binary::Endianness; use crate::error::ErrMode; -use crate::error::Error; use crate::error::ErrorKind; +use crate::error::InputError; use crate::error::Needed; -use crate::error::ParseError; -use crate::multi::count; -use crate::number::u16; -use crate::number::u8; -use crate::number::Endianness; +use crate::error::ParserError; +use crate::stream::Stream; +use crate::token::take; +use crate::unpeek; use crate::IResult; +use crate::PResult; use crate::Parser; use crate::Partial; +#[cfg(feature = "alloc")] +use crate::lib::std::vec::Vec; + macro_rules! assert_parse( ($left: expr, $right: expr) => { - let res: $crate::IResult<_, _, Error<_>> = $left; + let res: $crate::IResult<_, _, InputError<_>> = $left; assert_eq!(res, $right); }; ); @@ -26,16 +32,16 @@ fn eof_on_slices() { let not_over: &[u8] = &b"Hello, world!"[..]; let is_over: &[u8] = &b""[..]; - let res_not_over = eof(not_over); + let res_not_over = eof.parse_peek(not_over); assert_parse!( res_not_over, Err(ErrMode::Backtrack(error_position!( - not_over, + ¬_over, ErrorKind::Eof ))) ); - let res_over = eof(is_over); + let res_over = eof.parse_peek(is_over); assert_parse!(res_over, Ok((is_over, is_over))); } @@ -44,16 +50,16 @@ fn eof_on_strs() { let not_over: &str = "Hello, world!"; let is_over: &str = ""; - let res_not_over = eof(not_over); + let res_not_over = eof.parse_peek(not_over); assert_parse!( res_not_over, Err(ErrMode::Backtrack(error_position!( - not_over, + ¬_over, ErrorKind::Eof ))) ); - let res_over = eof(is_over); + let res_over = eof.parse_peek(is_over); assert_parse!(res_over, Ok((is_over, is_over))); } @@ -61,20 +67,20 @@ fn eof_on_strs() { fn rest_on_slices() { let input: &[u8] = &b"Hello, world!"[..]; let empty: &[u8] = &b""[..]; - assert_parse!(rest(input), Ok((empty, input))); + assert_parse!(rest.parse_peek(input), Ok((empty, input))); } #[test] fn rest_on_strs() { let input: &str = "Hello, world!"; let empty: &str = ""; - assert_parse!(rest(input), Ok((empty, input))); + assert_parse!(rest.parse_peek(input), Ok((empty, input))); } #[test] fn rest_len_on_slices() { let input: &[u8] = &b"Hello, world!"[..]; - assert_parse!(rest_len(input), Ok((input, input.len()))); + assert_parse!(rest_len.parse_peek(input), Ok((input, input.len()))); } use crate::lib::std::convert::From; @@ -84,12 +90,12 @@ impl From<u32> for CustomError { } } -impl<I> ParseError<I> for CustomError { - fn from_error_kind(_: I, _: ErrorKind) -> Self { +impl<I> ParserError<I> for CustomError { + fn from_error_kind(_: &I, _: ErrorKind) -> Self { CustomError } - fn append(self, _: I, _: ErrorKind) -> Self { + fn append(self, _: &I, _: ErrorKind) -> Self { CustomError } } @@ -98,22 +104,22 @@ struct CustomError; #[allow(dead_code)] fn custom_error(input: &[u8]) -> IResult<&[u8], &[u8], CustomError> { //fix_error!(input, CustomError<_>, alphanumeric) - crate::character::alphanumeric1(input) + crate::ascii::alphanumeric1.parse_peek(input) } #[test] fn test_parser_flat_map() { let input: &[u8] = &[3, 100, 101, 102, 103, 104][..]; assert_parse!( - u8.flat_map(take).parse_next(input), + u8.flat_map(take).parse_peek(input), Ok((&[103, 104][..], &[100, 101, 102][..])) ); } #[allow(dead_code)] fn test_closure_compiles_195(input: &[u8]) -> IResult<&[u8], ()> { - u8.flat_map(|num| count(u16(Endianness::Big), num as usize)) - .parse_next(input) + u8.flat_map(|num| repeat(num as usize, u16(Endianness::Big))) + .parse_peek(input) } #[test] @@ -121,15 +127,15 @@ fn test_parser_verify_map() { let input: &[u8] = &[50][..]; assert_parse!( u8.verify_map(|u| if u < 20 { Some(u) } else { None }) - .parse_next(input), - Err(ErrMode::Backtrack(Error { - input: &[50][..], - kind: ErrorKind::Verify - })) + .parse_peek(input), + Err(ErrMode::Backtrack(InputError::new( + &[50][..], + ErrorKind::Verify + ))) ); assert_parse!( u8.verify_map(|u| if u > 20 { Some(u) } else { None }) - .parse_next(input), + .parse_peek(input), Ok((&[][..], 50)) ); } @@ -138,7 +144,7 @@ fn test_parser_verify_map() { fn test_parser_map_parser() { let input: &[u8] = &[100, 101, 102, 103, 104][..]; assert_parse!( - take(4usize).and_then(take(2usize)).parse_next(input), + take(4usize).and_then(take(2usize)).parse_peek(input), Ok((&[104][..], &[100, 101][..])) ); } @@ -146,11 +152,11 @@ fn test_parser_map_parser() { #[test] #[cfg(feature = "std")] fn test_parser_into() { - use crate::bytes::take; - use crate::error::Error; + use crate::error::InputError; + use crate::token::take; - let mut parser = take::<_, _, Error<_>>(3u8).output_into(); - let result: IResult<&[u8], Vec<u8>> = parser.parse_next(&b"abcdefg"[..]); + let mut parser = take::<_, _, InputError<_>>(3u8).output_into(); + let result: IResult<&[u8], Vec<u8>> = parser.parse_peek(&b"abcdefg"[..]); assert_eq!(result, Ok((&b"defg"[..], vec![97, 98, 99]))); } @@ -158,7 +164,7 @@ fn test_parser_into() { #[test] fn opt_test() { fn opt_abcd(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Option<&[u8]>> { - opt("abcd").parse_next(i) + opt("abcd").parse_peek(i) } let a = &b"abcdef"[..]; @@ -181,7 +187,7 @@ fn opt_test() { #[test] fn peek_test() { fn peek_tag(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - peek("abcd").parse_next(i) + peek("abcd").parse_peek(i) } assert_eq!( @@ -195,7 +201,7 @@ fn peek_test() { assert_eq!( peek_tag(Partial::new(&b"xxx"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), + &Partial::new(&b"xxx"[..]), ErrorKind::Tag ))) ); @@ -204,13 +210,13 @@ fn peek_test() { #[test] fn not_test() { fn not_aaa(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, ()> { - not("aaa").parse_next(i) + not("aaa").parse_peek(i) } assert_eq!( not_aaa(Partial::new(&b"aaa"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"aaa"[..]), + &Partial::new(&b"aaa"[..]), ErrorKind::Not ))) ); @@ -226,12 +232,12 @@ fn not_test() { #[test] fn test_parser_verify() { - use crate::bytes::take; + use crate::token::take; fn test(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { take(5u8) .verify(|slice: &[u8]| slice[0] == b'a') - .parse_next(i) + .parse_peek(i) } assert_eq!( test(Partial::new(&b"bcd"[..])), @@ -240,7 +246,7 @@ fn test_parser_verify() { assert_eq!( test(Partial::new(&b"bcdefg"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"bcdefg"[..]), + &Partial::new(&b"bcdefg"[..]), ErrorKind::Verify ))) ); @@ -253,47 +259,47 @@ fn test_parser_verify() { #[test] #[allow(unused)] fn test_parser_verify_ref() { - use crate::bytes::take; + use crate::token::take; let mut parser1 = take(3u8).verify(|s: &[u8]| s == &b"abc"[..]); assert_eq!( - parser1.parse_next(&b"abcd"[..]), + parser1.parse_peek(&b"abcd"[..]), Ok((&b"d"[..], &b"abc"[..])) ); assert_eq!( - parser1.parse_next(&b"defg"[..]), - Err(ErrMode::Backtrack(Error { - input: &b"defg"[..], - kind: ErrorKind::Verify - })) + parser1.parse_peek(&b"defg"[..]), + Err(ErrMode::Backtrack(InputError::new( + &b"defg"[..], + ErrorKind::Verify + ))) ); fn parser2(i: &[u8]) -> IResult<&[u8], u32> { - crate::number::be_u32 + crate::binary::be_u32 .verify(|val: &u32| *val < 3) - .parse_next(i) + .parse_peek(i) } } #[test] #[cfg(feature = "alloc")] fn test_parser_verify_alloc() { - use crate::bytes::take; + use crate::token::take; let mut parser1 = take(3u8) .map(|s: &[u8]| s.to_vec()) .verify(|s: &[u8]| s == &b"abc"[..]); assert_eq!( - parser1.parse_next(&b"abcd"[..]), + parser1.parse_peek(&b"abcd"[..]), Ok((&b"d"[..], b"abc".to_vec())) ); assert_eq!( - parser1.parse_next(&b"defg"[..]), - Err(ErrMode::Backtrack(Error { - input: &b"defg"[..], - kind: ErrorKind::Verify - })) + parser1.parse_peek(&b"defg"[..]), + Err(ErrMode::Backtrack(InputError::new( + &b"defg"[..], + ErrorKind::Verify + ))) ); } @@ -303,17 +309,1025 @@ fn fail_test() { let b = "another string"; assert_eq!( - fail::<_, &str, _>(a), - Err(ErrMode::Backtrack(Error { - input: a, - kind: ErrorKind::Fail - })) + fail::<_, &str, _>.parse_peek(a), + Err(ErrMode::Backtrack(InputError::new(a, ErrorKind::Fail))) + ); + assert_eq!( + fail::<_, &str, _>.parse_peek(b), + Err(ErrMode::Backtrack(InputError::new(b, ErrorKind::Fail))) + ); +} + +#[test] +fn complete() { + fn err_test(i: &[u8]) -> IResult<&[u8], &[u8]> { + let (i, _) = "ijkl".parse_peek(i)?; + "mnop".parse_peek(i) + } + let a = &b"ijklmn"[..]; + + let res_a = err_test(a); + assert_eq!( + res_a, + Err(ErrMode::Backtrack(error_position!( + &&b"mn"[..], + ErrorKind::Tag + ))) + ); +} + +#[test] +fn separated_pair_test() { + #[allow(clippy::type_complexity)] + fn sep_pair_abc_def(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8])> { + separated_pair("abc", ",", "def").parse_peek(i) + } + + assert_eq!( + sep_pair_abc_def(Partial::new(&b"abc,defghijkl"[..])), + Ok((Partial::new(&b"ghijkl"[..]), (&b"abc"[..], &b"def"[..]))) + ); + assert_eq!( + sep_pair_abc_def(Partial::new(&b"ab"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + sep_pair_abc_def(Partial::new(&b"abc,d"[..])), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + assert_eq!( + sep_pair_abc_def(Partial::new(&b"xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + sep_pair_abc_def(Partial::new(&b"xxx,def"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx,def"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + sep_pair_abc_def(Partial::new(&b"abc,xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); +} + +#[test] +fn preceded_test() { + fn preceded_abcd_efgh(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + preceded("abcd", "efgh").parse_peek(i) + } + + assert_eq!( + preceded_abcd_efgh(Partial::new(&b"abcdefghijkl"[..])), + Ok((Partial::new(&b"ijkl"[..]), &b"efgh"[..])) + ); + assert_eq!( + preceded_abcd_efgh(Partial::new(&b"ab"[..])), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + assert_eq!( + preceded_abcd_efgh(Partial::new(&b"abcde"[..])), + Err(ErrMode::Incomplete(Needed::new(3))) + ); + assert_eq!( + preceded_abcd_efgh(Partial::new(&b"xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + preceded_abcd_efgh(Partial::new(&b"xxxxdef"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxxxdef"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + preceded_abcd_efgh(Partial::new(&b"abcdxxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); +} + +#[test] +fn terminated_test() { + fn terminated_abcd_efgh(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + terminated("abcd", "efgh").parse_peek(i) + } + + assert_eq!( + terminated_abcd_efgh(Partial::new(&b"abcdefghijkl"[..])), + Ok((Partial::new(&b"ijkl"[..]), &b"abcd"[..])) + ); + assert_eq!( + terminated_abcd_efgh(Partial::new(&b"ab"[..])), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + assert_eq!( + terminated_abcd_efgh(Partial::new(&b"abcde"[..])), + Err(ErrMode::Incomplete(Needed::new(3))) + ); + assert_eq!( + terminated_abcd_efgh(Partial::new(&b"xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + terminated_abcd_efgh(Partial::new(&b"xxxxdef"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxxxdef"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + terminated_abcd_efgh(Partial::new(&b"abcdxxxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxxx"[..]), + ErrorKind::Tag + ))) + ); +} + +#[test] +fn delimited_test() { + fn delimited_abc_def_ghi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + delimited("abc", "def", "ghi").parse_peek(i) + } + + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"abcdefghijkl"[..])), + Ok((Partial::new(&b"jkl"[..]), &b"def"[..])) + ); + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"ab"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"abcde"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"abcdefgh"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"xxxdefghi"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxxdefghi"[..]), + ErrorKind::Tag + ),)) + ); + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"abcxxxghi"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxxghi"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + delimited_abc_def_ghi(Partial::new(&b"abcdefxxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); +} + +#[cfg(feature = "alloc")] +#[test] +fn alt_test() { + #[cfg(feature = "alloc")] + use crate::{ + error::ParserError, + lib::std::{ + fmt::Debug, + string::{String, ToString}, + }, + }; + + #[cfg(feature = "alloc")] + #[derive(Debug, Clone, Eq, PartialEq)] + pub struct ErrorStr(String); + + #[cfg(feature = "alloc")] + impl From<u32> for ErrorStr { + fn from(i: u32) -> Self { + ErrorStr(format!("custom error code: {}", i)) + } + } + + #[cfg(feature = "alloc")] + impl<'a> From<&'a str> for ErrorStr { + fn from(i: &'a str) -> Self { + ErrorStr(format!("custom error message: {}", i)) + } + } + + #[cfg(feature = "alloc")] + impl<I: Debug> ParserError<I> for ErrorStr { + fn from_error_kind(input: &I, kind: ErrorKind) -> Self { + ErrorStr(format!("custom error message: ({:?}, {:?})", input, kind)) + } + + fn append(self, input: &I, kind: ErrorKind) -> Self { + ErrorStr(format!( + "custom error message: ({:?}, {:?}) - {:?}", + input, kind, self + )) + } + } + + fn work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + Ok(input.peek_finish()) + } + + #[allow(unused_variables)] + fn dont_work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + Err(ErrMode::Backtrack(ErrorStr("abcd".to_string()))) + } + + fn work2(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + Ok((input, &b""[..])) + } + + fn alt1(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + alt((unpeek(dont_work), unpeek(dont_work))).parse_peek(i) + } + fn alt2(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + alt((unpeek(dont_work), unpeek(work))).parse_peek(i) + } + fn alt3(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + alt(( + unpeek(dont_work), + unpeek(dont_work), + unpeek(work2), + unpeek(dont_work), + )) + .parse_peek(i) + } + //named!(alt1, alt!(dont_work | dont_work)); + //named!(alt2, alt!(dont_work | work)); + //named!(alt3, alt!(dont_work | dont_work | work2 | dont_work)); + + let a = &b"abcd"[..]; + assert_eq!( + alt1(a), + Err(ErrMode::Backtrack(error_node_position!( + &a, + ErrorKind::Alt, + ErrorStr("abcd".to_string()) + ))) + ); + assert_eq!(alt2(a), Ok((&b""[..], a))); + assert_eq!(alt3(a), Ok((a, &b""[..]))); + + fn alt4(i: &[u8]) -> IResult<&[u8], &[u8]> { + alt(("abcd", "efgh")).parse_peek(i) + } + let b = &b"efgh"[..]; + assert_eq!(alt4(a), Ok((&b""[..], a))); + assert_eq!(alt4(b), Ok((&b""[..], b))); +} + +#[test] +fn alt_incomplete() { + fn alt1(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + alt(("a", "bc", "def")).parse_peek(i) + } + + let a = &b""[..]; + assert_eq!( + alt1(Partial::new(a)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + let a = &b"b"[..]; + assert_eq!( + alt1(Partial::new(a)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + let a = &b"bcd"[..]; + assert_eq!( + alt1(Partial::new(a)), + Ok((Partial::new(&b"d"[..]), &b"bc"[..])) + ); + let a = &b"cde"[..]; + assert_eq!( + alt1(Partial::new(a)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(a), + ErrorKind::Tag + ))) + ); + let a = &b"de"[..]; + assert_eq!( + alt1(Partial::new(a)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + let a = &b"defg"[..]; + assert_eq!( + alt1(Partial::new(a)), + Ok((Partial::new(&b"g"[..]), &b"def"[..])) + ); +} + +#[test] +fn alt_array() { + fn alt1<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8]> { + alt(["a", "bc", "def"]).parse_next(i) + } + + let i = &b"a"[..]; + assert_eq!(alt1.parse_peek(i), Ok((&b""[..], (&b"a"[..])))); + + let i = &b"bc"[..]; + assert_eq!(alt1.parse_peek(i), Ok((&b""[..], (&b"bc"[..])))); + + let i = &b"defg"[..]; + assert_eq!(alt1.parse_peek(i), Ok((&b"g"[..], (&b"def"[..])))); + + let i = &b"z"[..]; + assert_eq!( + alt1.parse_peek(i), + Err(ErrMode::Backtrack(error_position!(&i, ErrorKind::Tag))) + ); +} + +#[test] +fn permutation_test() { + #[allow(clippy::type_complexity)] + fn perm(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8], &[u8])> { + permutation(("abcd", "efg", "hi")).parse_peek(i) + } + + let expected = (&b"abcd"[..], &b"efg"[..], &b"hi"[..]); + + let a = &b"abcdefghijk"[..]; + assert_eq!( + perm(Partial::new(a)), + Ok((Partial::new(&b"jk"[..]), expected)) + ); + let b = &b"efgabcdhijk"[..]; + assert_eq!( + perm(Partial::new(b)), + Ok((Partial::new(&b"jk"[..]), expected)) + ); + let c = &b"hiefgabcdjk"[..]; + assert_eq!( + perm(Partial::new(c)), + Ok((Partial::new(&b"jk"[..]), expected)) + ); + + let d = &b"efgxyzabcdefghi"[..]; + assert_eq!( + perm(Partial::new(d)), + Err(ErrMode::Backtrack(error_node_position!( + &Partial::new(&b"efgxyzabcdefghi"[..]), + ErrorKind::Alt, + error_position!(&Partial::new(&b"xyzabcdefghi"[..]), ErrorKind::Tag) + ))) + ); + + let e = &b"efgabc"[..]; + assert_eq!( + perm(Partial::new(e)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn separated0_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(0.., "abcd", ",").parse_peek(i) + } + fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(0.., "", ",").parse_peek(i) + } + fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(0.., "abcd", "..").parse_peek(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcd,abcdef"[..]; + let c = &b"azerty"[..]; + let d = &b",,abc"[..]; + let e = &b"abcd,abcd,ef"[..]; + let f = &b"abc"[..]; + let g = &b"abcd."[..]; + let h = &b"abcd,abc"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(Partial::new(b)), Ok((Partial::new(&b"ef"[..]), res2))); + assert_eq!( + multi(Partial::new(c)), + Ok((Partial::new(&b"azerty"[..]), Vec::new())) + ); + let res3 = vec![&b""[..], &b""[..], &b""[..]]; + assert_eq!( + multi_empty(Partial::new(d)), + Ok((Partial::new(&b"abc"[..]), res3)) + ); + let res4 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(e)), + Ok((Partial::new(&b",ef"[..]), res4)) + ); + + assert_eq!( + multi(Partial::new(f)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + multi_longsep(Partial::new(g)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + multi(Partial::new(h)), + Err(ErrMode::Incomplete(Needed::new(1))) ); +} + +#[test] +#[cfg(feature = "alloc")] +#[cfg_attr(debug_assertions, should_panic)] +fn separated0_empty_sep_test() { + fn empty_sep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(0.., "abc", "").parse_peek(i) + } + + let i = &b"abcabc"[..]; + + let i_err_pos = &i[3..]; assert_eq!( - fail::<_, &str, _>(b), - Err(ErrMode::Backtrack(Error { - input: b, - kind: ErrorKind::Fail - })) + empty_sep(Partial::new(i)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(i_err_pos), + ErrorKind::Assert + ))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn separated1_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(1.., "abcd", ",").parse_peek(i) + } + fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(1.., "abcd", "..").parse_peek(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcd,abcdef"[..]; + let c = &b"azerty"[..]; + let d = &b"abcd,abcd,ef"[..]; + + let f = &b"abc"[..]; + let g = &b"abcd."[..]; + let h = &b"abcd,abc"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(Partial::new(b)), Ok((Partial::new(&b"ef"[..]), res2))); + assert_eq!( + multi(Partial::new(c)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(c), + ErrorKind::Tag + ))) + ); + let res3 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(d)), + Ok((Partial::new(&b",ef"[..]), res3)) + ); + + assert_eq!( + multi(Partial::new(f)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + multi_longsep(Partial::new(g)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + multi(Partial::new(h)), + Err(ErrMode::Incomplete(Needed::new(1))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn separated_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(2..=4, "abcd", ",").parse_peek(i) + } + + let a = &b"abcd,ef"[..]; + let b = &b"abcd,abcd,efgh"[..]; + let c = &b"abcd,abcd,abcd,abcd,efgh"[..]; + let d = &b"abcd,abcd,abcd,abcd,abcd,efgh"[..]; + let e = &b"abcd,ab"[..]; + + assert_eq!( + multi(Partial::new(a)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"ef"[..]), + ErrorKind::Tag + ))) + ); + let res1 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(b)), + Ok((Partial::new(&b",efgh"[..]), res1)) + ); + let res2 = vec![&b"abcd"[..], &b"abcd"[..], &b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(c)), + Ok((Partial::new(&b",efgh"[..]), res2)) + ); + let res3 = vec![&b"abcd"[..], &b"abcd"[..], &b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(d)), + Ok((Partial::new(&b",abcd,efgh"[..]), res3)) + ); + assert_eq!( + multi(Partial::new(e)), + Err(ErrMode::Incomplete(Needed::new(2))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn repeat0_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + repeat(0.., "abcd").parse_peek(i) + } + + assert_eq!( + multi(Partial::new(&b"abcdef"[..])), + Ok((Partial::new(&b"ef"[..]), vec![&b"abcd"[..]])) + ); + assert_eq!( + multi(Partial::new(&b"abcdabcdefgh"[..])), + Ok((Partial::new(&b"efgh"[..]), vec![&b"abcd"[..], &b"abcd"[..]])) + ); + assert_eq!( + multi(Partial::new(&b"azerty"[..])), + Ok((Partial::new(&b"azerty"[..]), Vec::new())) + ); + assert_eq!( + multi(Partial::new(&b"abcdab"[..])), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + assert_eq!( + multi(Partial::new(&b"abcd"[..])), + Err(ErrMode::Incomplete(Needed::new(4))) + ); + assert_eq!( + multi(Partial::new(&b""[..])), + Err(ErrMode::Incomplete(Needed::new(4))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +#[cfg_attr(debug_assertions, should_panic)] +fn repeat0_empty_test() { + fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + repeat(0.., "").parse_peek(i) + } + + assert_eq!( + multi_empty(Partial::new(&b"abcdef"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"abcdef"[..]), + ErrorKind::Assert + ))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn repeat1_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + repeat(1.., "abcd").parse_peek(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcdabcdefgh"[..]; + let c = &b"azerty"[..]; + let d = &b"abcdab"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(b)), + Ok((Partial::new(&b"efgh"[..]), res2)) + ); + assert_eq!( + multi(Partial::new(c)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(c), + ErrorKind::Tag + ))) + ); + assert_eq!( + multi(Partial::new(d)), + Err(ErrMode::Incomplete(Needed::new(2))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn repeat_till_test() { + #[allow(clippy::type_complexity)] + fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { + repeat_till0("abcd", "efgh").parse_peek(i) + } + + let a = b"abcdabcdefghabcd"; + let b = b"efghabcd"; + let c = b"azerty"; + + let res_a = (vec![&b"abcd"[..], &b"abcd"[..]], &b"efgh"[..]); + let res_b: (Vec<&[u8]>, &[u8]) = (Vec::new(), &b"efgh"[..]); + assert_eq!(multi(&a[..]), Ok((&b"abcd"[..], res_a))); + assert_eq!(multi(&b[..]), Ok((&b"abcd"[..], res_b))); + assert_eq!( + multi(&c[..]), + Err(ErrMode::Backtrack(error_node_position!( + &&c[..], + ErrorKind::Many, + error_position!(&&c[..], ErrorKind::Tag) + ))) + ); +} + +#[test] +#[cfg(feature = "std")] +fn infinite_many() { + fn tst(input: &[u8]) -> IResult<&[u8], &[u8]> { + println!("input: {:?}", input); + Err(ErrMode::Backtrack(error_position!(&input, ErrorKind::Tag))) + } + + // should not go into an infinite loop + fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + repeat(0.., unpeek(tst)).parse_peek(i) + } + let a = &b"abcdef"[..]; + assert_eq!(multi0(a), Ok((a, Vec::new()))); + + fn multi1(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + repeat(1.., unpeek(tst)).parse_peek(i) + } + let a = &b"abcdef"[..]; + assert_eq!( + multi1(a), + Err(ErrMode::Backtrack(error_position!(&a, ErrorKind::Tag))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn repeat_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + repeat(2..=4, "Abcd").parse_peek(i) + } + + let a = &b"Abcdef"[..]; + let b = &b"AbcdAbcdefgh"[..]; + let c = &b"AbcdAbcdAbcdAbcdefgh"[..]; + let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..]; + let e = &b"AbcdAb"[..]; + + assert_eq!( + multi(Partial::new(a)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"ef"[..]), + ErrorKind::Tag + ))) + ); + let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!( + multi(Partial::new(b)), + Ok((Partial::new(&b"efgh"[..]), res1)) + ); + let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!( + multi(Partial::new(c)), + Ok((Partial::new(&b"efgh"[..]), res2)) + ); + let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!( + multi(Partial::new(d)), + Ok((Partial::new(&b"Abcdefgh"[..]), res3)) + ); + assert_eq!( + multi(Partial::new(e)), + Err(ErrMode::Incomplete(Needed::new(2))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn count_test() { + const TIMES: usize = 2; + fn cnt_2(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + repeat(TIMES, "abc").parse_peek(i) + } + + assert_eq!( + cnt_2(Partial::new(&b"abcabcabcdef"[..])), + Ok((Partial::new(&b"abcdef"[..]), vec![&b"abc"[..], &b"abc"[..]])) + ); + assert_eq!( + cnt_2(Partial::new(&b"ab"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + cnt_2(Partial::new(&b"abcab"[..])), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + cnt_2(Partial::new(&b"xxx"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxx"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + cnt_2(Partial::new(&b"xxxabcabcdef"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxxabcabcdef"[..]), + ErrorKind::Tag + ))) + ); + assert_eq!( + cnt_2(Partial::new(&b"abcxxxabcdef"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"xxxabcdef"[..]), + ErrorKind::Tag + ))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn count_zero() { + const TIMES: usize = 0; + fn counter_2(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + repeat(TIMES, "abc").parse_peek(i) + } + + let done = &b"abcabcabcdef"[..]; + let parsed_done = Vec::new(); + let rest = done; + let incomplete_1 = &b"ab"[..]; + let parsed_incompl_1 = Vec::new(); + let incomplete_2 = &b"abcab"[..]; + let parsed_incompl_2 = Vec::new(); + let error = &b"xxx"[..]; + let error_remain = &b"xxx"[..]; + let parsed_err = Vec::new(); + let error_1 = &b"xxxabcabcdef"[..]; + let parsed_err_1 = Vec::new(); + let error_1_remain = &b"xxxabcabcdef"[..]; + let error_2 = &b"abcxxxabcdef"[..]; + let parsed_err_2 = Vec::new(); + let error_2_remain = &b"abcxxxabcdef"[..]; + + assert_eq!(counter_2(done), Ok((rest, parsed_done))); + assert_eq!( + counter_2(incomplete_1), + Ok((incomplete_1, parsed_incompl_1)) + ); + assert_eq!( + counter_2(incomplete_2), + Ok((incomplete_2, parsed_incompl_2)) + ); + assert_eq!(counter_2(error), Ok((error_remain, parsed_err))); + assert_eq!(counter_2(error_1), Ok((error_1_remain, parsed_err_1))); + assert_eq!(counter_2(error_2), Ok((error_2_remain, parsed_err_2))); +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct NilError; + +impl<I> From<(I, ErrorKind)> for NilError { + fn from(_: (I, ErrorKind)) -> Self { + NilError + } +} + +impl<I> ParserError<I> for NilError { + fn from_error_kind(_: &I, _: ErrorKind) -> NilError { + NilError + } + fn append(self, _: &I, _: ErrorKind) -> NilError { + NilError + } +} + +#[test] +#[cfg(feature = "alloc")] +fn fold_repeat0_test() { + fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { + acc.push(item); + acc + } + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + fold_repeat(0.., "abcd", Vec::new, fold_into_vec).parse_peek(i) + } + + assert_eq!( + multi(Partial::new(&b"abcdef"[..])), + Ok((Partial::new(&b"ef"[..]), vec![&b"abcd"[..]])) + ); + assert_eq!( + multi(Partial::new(&b"abcdabcdefgh"[..])), + Ok((Partial::new(&b"efgh"[..]), vec![&b"abcd"[..], &b"abcd"[..]])) + ); + assert_eq!( + multi(Partial::new(&b"azerty"[..])), + Ok((Partial::new(&b"azerty"[..]), Vec::new())) + ); + assert_eq!( + multi(Partial::new(&b"abcdab"[..])), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + assert_eq!( + multi(Partial::new(&b"abcd"[..])), + Err(ErrMode::Incomplete(Needed::new(4))) + ); + assert_eq!( + multi(Partial::new(&b""[..])), + Err(ErrMode::Incomplete(Needed::new(4))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +#[cfg_attr(debug_assertions, should_panic)] +fn fold_repeat0_empty_test() { + fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { + acc.push(item); + acc + } + fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + fold_repeat(0.., "", Vec::new, fold_into_vec).parse_peek(i) + } + + assert_eq!( + multi_empty(Partial::new(&b"abcdef"[..])), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"abcdef"[..]), + ErrorKind::Assert + ))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn fold_repeat1_test() { + fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { + acc.push(item); + acc + } + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + fold_repeat(1.., "abcd", Vec::new, fold_into_vec).parse_peek(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcdabcdefgh"[..]; + let c = &b"azerty"[..]; + let d = &b"abcdab"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(b)), + Ok((Partial::new(&b"efgh"[..]), res2)) + ); + assert_eq!( + multi(Partial::new(c)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(c), + ErrorKind::Many + ))) + ); + assert_eq!( + multi(Partial::new(d)), + Err(ErrMode::Incomplete(Needed::new(2))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn fold_repeat_test() { + fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { + acc.push(item); + acc + } + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + fold_repeat(2..=4, "Abcd", Vec::new, fold_into_vec).parse_peek(i) + } + + let a = &b"Abcdef"[..]; + let b = &b"AbcdAbcdefgh"[..]; + let c = &b"AbcdAbcdAbcdAbcdefgh"[..]; + let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..]; + let e = &b"AbcdAb"[..]; + + assert_eq!( + multi(Partial::new(a)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"ef"[..]), + ErrorKind::Tag + ))) + ); + let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!( + multi(Partial::new(b)), + Ok((Partial::new(&b"efgh"[..]), res1)) + ); + let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!( + multi(Partial::new(c)), + Ok((Partial::new(&b"efgh"[..]), res2)) + ); + let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!( + multi(Partial::new(d)), + Ok((Partial::new(&b"Abcdefgh"[..]), res3)) + ); + assert_eq!( + multi(Partial::new(e)), + Err(ErrMode::Incomplete(Needed::new(2))) + ); +} + +#[test] +fn repeat0_count_test() { + fn count0_nums(i: &[u8]) -> IResult<&[u8], usize> { + repeat(0.., (digit, ",")).parse_peek(i) + } + + assert_eq!(count0_nums(&b"123,junk"[..]), Ok((&b"junk"[..], 1))); + + assert_eq!(count0_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2))); + + assert_eq!( + count0_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]), + Ok((&b"junk"[..], 10)) + ); + + assert_eq!(count0_nums(&b"hello"[..]), Ok((&b"hello"[..], 0))); +} + +#[test] +fn repeat1_count_test() { + fn count1_nums(i: &[u8]) -> IResult<&[u8], usize> { + repeat(1.., (digit, ",")).parse_peek(i) + } + + assert_eq!(count1_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2))); + + assert_eq!( + count1_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]), + Ok((&b"junk"[..], 10)) + ); + + assert_eq!( + count1_nums(&b"hello"[..]), + Err(ErrMode::Backtrack(error_position!( + &&b"hello"[..], + ErrorKind::Slice + ))) ); } diff --git a/vendor/winnow/src/error.rs b/vendor/winnow/src/error.rs index b49b056..fd7d946 100644 --- a/vendor/winnow/src/error.rs +++ b/vendor/winnow/src/error.rs @@ -4,18 +4,20 @@ //! - Accumulate more [context][Parser::context] as the error goes up the parser chain //! - Distinguish between [recoverable errors, //! unrecoverable errors, and more data is needed][ErrMode] -//! - Have a very low overhead, as errors are often discarded by the calling parser (examples: `many0`, `alt`) +//! - Have a very low overhead, as errors are often discarded by the calling parser (examples: `repeat`, `alt`) //! - Can be modified according to the user's needs, because some languages need a lot more information //! - Help thread-through the [stream][crate::stream] //! -//! To abstract these needs away from the user, generally `winnow` parsers use the [`IResult`] -//! alias, rather than [`Result`][std::result::Result]. [`finish`][FinishIResult::finish] is -//! provided for top-level parsers to integrate with your application's error reporting. +//! To abstract these needs away from the user, generally `winnow` parsers use the [`PResult`] +//! alias, rather than [`Result`]. [`Parser::parse`] is a top-level operation +//! that can help convert to a `Result` for integrating with your application's error reporting. //! //! Error types include: //! - `()` -//! - [`Error`] -//! - [`VerboseError`] +//! - [`ErrorKind`] +//! - [`InputError`] (mostly for testing) +//! - [`ContextError`] +//! - [`TreeError`] (mostly for testing) //! - [Custom errors][crate::_topic::error] #[cfg(feature = "alloc")] @@ -23,109 +25,39 @@ use crate::lib::std::borrow::ToOwned; use crate::lib::std::fmt; use core::num::NonZeroUsize; +use crate::stream::AsBStr; use crate::stream::Stream; -use crate::stream::StreamIsPartial; #[allow(unused_imports)] // Here for intra-doc links use crate::Parser; -/// Holds the result of [`Parser`] +/// For use with [`Parser::parse_peek`] which allows the input stream to be threaded through a +/// parser. /// /// - `Ok((I, O))` is the remaining [input][crate::stream] and the parsed value /// - [`Err(ErrMode<E>)`][ErrMode] is the error along with how to respond to it /// -/// By default, the error type (`E`) is [`Error`] +/// By default, the error type (`E`) is [`InputError`] /// -/// At the top-level of your parser, you can use the [`FinishIResult::finish`] method to convert -/// it to a more common result type -pub type IResult<I, O, E = Error<I>> = Result<(I, O), ErrMode<E>>; - -/// Extension trait to convert a parser's [`IResult`] to a more manageable type -#[deprecated(since = "0.4.0", note = "Replaced with `Parser::parse`")] -pub trait FinishIResult<I, O, E> { - /// Converts the parser's [`IResult`] to a type that is more consumable by callers. - /// - /// Errors if the parser is not at the [end of input][crate::combinator::eof]. See - /// [`FinishIResult::finish_err`] if the remaining input is needed. - /// - /// # Panic - /// - /// If the result is `Err(ErrMode::Incomplete(_))`, this method will panic. - /// - **Complete parsers:** It will not be an issue, `Incomplete` is never used - /// - **Partial parsers:** `Incomplete` will be returned if there's not enough data - /// for the parser to decide, and you should gather more data before parsing again. - /// Once the parser returns either `Ok(_)`, `Err(ErrMode::Backtrack(_))` or `Err(ErrMode::Cut(_))`, - /// you can get out of the parsing loop and call `finish_err()` on the parser's result - /// - /// # Example - /// - #[cfg_attr(not(feature = "std"), doc = "```ignore")] - #[cfg_attr(feature = "std", doc = "```")] - /// use winnow::prelude::*; - /// use winnow::character::hex_uint; - /// use winnow::error::Error; - /// - /// struct Hex(u64); - /// - /// fn parse(value: &str) -> Result<Hex, Error<String>> { - /// hex_uint.map(Hex).parse_next(value).finish().map_err(Error::into_owned) - /// } - /// ``` - #[deprecated(since = "0.4.0", note = "Replaced with `Parser::parse`")] - fn finish(self) -> Result<O, E>; - - /// Converts the parser's [`IResult`] to a type that is more consumable by errors. - /// - /// It keeps the same `Ok` branch, and merges `ErrMode::Backtrack` and `ErrMode::Cut` into the `Err` - /// side. - /// - /// # Panic - /// - /// If the result is `Err(ErrMode::Incomplete(_))`, this method will panic as [`ErrMode::Incomplete`] - /// should only be set when the input is [`StreamIsPartial<false>`] which this isn't implemented - /// for. - #[deprecated(since = "0.4.0", note = "Replaced with `Parser::parse`")] - fn finish_err(self) -> Result<(I, O), E>; -} - -#[allow(deprecated)] -impl<I, O, E> FinishIResult<I, O, E> for IResult<I, O, E> -where - I: Stream, - // Force users to deal with `Incomplete` when `StreamIsPartial<true>` - I: StreamIsPartial, - I: Clone, - E: ParseError<I>, -{ - fn finish(self) -> Result<O, E> { - debug_assert!( - !I::is_partial_supported(), - "partial streams need to handle `ErrMode::Incomplete`" - ); - - let (i, o) = self.finish_err()?; - crate::combinator::eof(i).finish_err()?; - Ok(o) - } +/// When integrating into the result of the application, see +/// - [`Parser::parse`] +/// - [`ErrMode::into_inner`] +pub type IResult<I, O, E = InputError<I>> = PResult<(I, O), E>; - fn finish_err(self) -> Result<(I, O), E> { - debug_assert!( - !I::is_partial_supported(), - "partial streams need to handle `ErrMode::Incomplete`" - ); - - match self { - Ok(res) => Ok(res), - Err(ErrMode::Backtrack(e)) | Err(ErrMode::Cut(e)) => Err(e), - Err(ErrMode::Incomplete(_)) => { - panic!("complete parsers should not report `Err(ErrMode::Incomplete(_))`") - } - } - } -} +/// For use with [`Parser::parse_next`] +/// +/// - `Ok(O)` is the parsed value +/// - [`Err(ErrMode<E>)`][ErrMode] is the error along with how to respond to it +/// +/// By default, the error type (`E`) is [`ContextError`]. +/// +/// When integrating into the result of the application, see +/// - [`Parser::parse`] +/// - [`ErrMode::into_inner`] +pub type PResult<O, E = ContextError> = Result<O, ErrMode<E>>; /// Contains information on needed data if a parser returned `Incomplete` /// -/// **Note:** This is only possible for `Stream` that are [partial][`StreamIsPartial`], +/// **Note:** This is only possible for `Stream` that are [partial][`crate::stream::StreamIsPartial`], /// like [`Partial`][crate::Partial]. #[derive(Debug, PartialEq, Eq, Clone, Copy)] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] @@ -160,7 +92,7 @@ impl Needed { } } -/// The `Err` enum indicates the parser was not successful +/// Add parse error state to [`ParserError`]s #[derive(Debug, Clone, PartialEq)] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub enum ErrMode<E> { @@ -168,7 +100,7 @@ pub enum ErrMode<E> { /// /// More data needs to be buffered before retrying the parse. /// - /// This must only be set when the [`Stream`] is [partial][`StreamIsPartial`], like with + /// This must only be set when the [`Stream`] is [partial][`crate::stream::StreamIsPartial`], like with /// [`Partial`][crate::Partial] /// /// Convert this into an `Backtrack` with [`Parser::complete_err`] @@ -176,8 +108,8 @@ pub enum ErrMode<E> { /// The parser failed with a recoverable error (the default). /// /// For example, a parser for json values might include a - /// [`dec_uint`][crate::character::dec_uint] as one case in an [`alt`][crate::branch::alt] - /// combiantor. If it fails, the next case should be tried. + /// [`dec_uint`][crate::ascii::dec_uint] as one case in an [`alt`][crate::combinator::alt] + /// combinator. If it fails, the next case should be tried. Backtrack(E), /// The parser had an unrecoverable error. /// @@ -185,13 +117,14 @@ pub enum ErrMode<E> { /// other branches. You can use [`cut_err()`][crate::combinator::cut_err] combinator to switch /// from `ErrMode::Backtrack` to `ErrMode::Cut`. /// - /// For example, one case in an [`alt`][crate::branch::alt] combinator found a unique prefix + /// For example, one case in an [`alt`][crate::combinator::alt] combinator found a unique prefix /// and you want any further errors parsing the case to be reported to the user. Cut(E), } impl<E> ErrMode<E> { /// Tests if the result is Incomplete + #[inline] pub fn is_incomplete(&self) -> bool { matches!(self, ErrMode::Incomplete(_)) } @@ -231,22 +164,37 @@ impl<E> ErrMode<E> { { self.map(ErrorConvert::convert) } + + /// Unwrap the mode, returning the underlying error + /// + /// Returns `None` for [`ErrMode::Incomplete`] + #[cfg_attr(debug_assertions, track_caller)] + #[inline(always)] + pub fn into_inner(self) -> Option<E> { + match self { + ErrMode::Backtrack(e) | ErrMode::Cut(e) => Some(e), + ErrMode::Incomplete(_) => None, + } + } } -impl<I, E: ParseError<I>> ParseError<I> for ErrMode<E> { - fn from_error_kind(input: I, kind: ErrorKind) -> Self { +impl<I, E: ParserError<I>> ParserError<I> for ErrMode<E> { + #[inline(always)] + fn from_error_kind(input: &I, kind: ErrorKind) -> Self { ErrMode::Backtrack(E::from_error_kind(input, kind)) } #[cfg_attr(debug_assertions, track_caller)] - fn assert(input: I, message: &'static str) -> Self + #[inline(always)] + fn assert(input: &I, message: &'static str) -> Self where I: crate::lib::std::fmt::Debug, { ErrMode::Backtrack(E::assert(input, message)) } - fn append(self, input: I, kind: ErrorKind) -> Self { + #[inline] + fn append(self, input: &I, kind: ErrorKind) -> Self { match self { ErrMode::Backtrack(e) => ErrMode::Backtrack(e.append(input, kind)), e => e, @@ -266,24 +214,32 @@ impl<I, EXT, E> FromExternalError<I, EXT> for ErrMode<E> where E: FromExternalError<I, EXT>, { - fn from_external_error(input: I, kind: ErrorKind, e: EXT) -> Self { + #[inline(always)] + fn from_external_error(input: &I, kind: ErrorKind, e: EXT) -> Self { ErrMode::Backtrack(E::from_external_error(input, kind, e)) } } -impl<T> ErrMode<Error<T>> { - /// Maps `ErrMode<Error<T>>` to `ErrMode<Error<U>>` with the given `F: T -> U` - pub fn map_input<U, F>(self, f: F) -> ErrMode<Error<U>> +impl<I, C, E: AddContext<I, C>> AddContext<I, C> for ErrMode<E> { + #[inline(always)] + fn add_context(self, input: &I, ctx: C) -> Self { + self.map(|err| err.add_context(input, ctx)) + } +} + +impl<T: Clone> ErrMode<InputError<T>> { + /// Maps `ErrMode<InputError<T>>` to `ErrMode<InputError<U>>` with the given `F: T -> U` + pub fn map_input<U: Clone, F>(self, f: F) -> ErrMode<InputError<U>> where F: FnOnce(T) -> U, { match self { ErrMode::Incomplete(n) => ErrMode::Incomplete(n), - ErrMode::Cut(Error { input, kind }) => ErrMode::Cut(Error { + ErrMode::Cut(InputError { input, kind }) => ErrMode::Cut(InputError { input: f(input), kind, }), - ErrMode::Backtrack(Error { input, kind }) => ErrMode::Backtrack(Error { + ErrMode::Backtrack(InputError { input, kind }) => ErrMode::Backtrack(InputError { input: f(input), kind, }), @@ -311,13 +267,13 @@ where /// /// It provides methods to create an error from some combinators, /// and combine existing errors in combinators like `alt`. -pub trait ParseError<I>: Sized { +pub trait ParserError<I>: Sized { /// Creates an error from the input position and an [`ErrorKind`] - fn from_error_kind(input: I, kind: ErrorKind) -> Self; + fn from_error_kind(input: &I, kind: ErrorKind) -> Self; /// Process a parser assertion #[cfg_attr(debug_assertions, track_caller)] - fn assert(input: I, _message: &'static str) -> Self + fn assert(input: &I, _message: &'static str) -> Self where I: crate::lib::std::fmt::Debug, { @@ -327,16 +283,17 @@ pub trait ParseError<I>: Sized { Self::from_error_kind(input, ErrorKind::Assert) } - /// Like [`ParseError::from_error_kind`] but merges it with the existing error. + /// Like [`ParserError::from_error_kind`] but merges it with the existing error. /// /// This is useful when backtracking through a parse tree, accumulating error context on the /// way. - fn append(self, input: I, kind: ErrorKind) -> Self; + fn append(self, input: &I, kind: ErrorKind) -> Self; /// Combines errors from two different parse branches. /// - /// For example, this would be used by [`alt`][crate::branch::alt] to report the error from + /// For example, this would be used by [`alt`][crate::combinator::alt] to report the error from /// each case. + #[inline] fn or(self, other: Self) -> Self { other } @@ -345,22 +302,23 @@ pub trait ParseError<I>: Sized { /// Used by [`Parser::context`] to add custom data to error while backtracking /// /// May be implemented multiple times for different kinds of context. -pub trait ContextError<I, C = &'static str>: Sized { +pub trait AddContext<I, C = &'static str>: Sized { /// Append to an existing error custom data /// /// This is used mainly by [`Parser::context`], to add user friendly information /// to errors when backtracking through a parse tree - fn add_context(self, _input: I, _ctx: C) -> Self { + #[inline] + fn add_context(self, _input: &I, _ctx: C) -> Self { self } } /// Create a new error with an external error, from [`std::str::FromStr`] /// -/// This trait is required by the [`Parser::map_res`] combinator. +/// This trait is required by the [`Parser::try_map`] combinator. pub trait FromExternalError<I, E> { - /// Like [`ParseError::from_error_kind`] but also include an external error. - fn from_external_error(input: I, kind: ErrorKind, e: E) -> Self; + /// Like [`ParserError::from_error_kind`] but also include an external error. + fn from_external_error(input: &I, kind: ErrorKind, e: E) -> Self; } /// Equivalent of `From` implementation to avoid orphan rules in bits parsers @@ -369,70 +327,91 @@ pub trait ErrorConvert<E> { fn convert(self) -> E; } -/// Default error type, only contains the error' location and kind +/// Capture input on error +/// +/// This is useful for testing of generic parsers to ensure the error happens at the right +/// location. /// -/// This is a low-overhead error that only provides basic information. For less overhead, see -/// `()`. Fore more information, see [`VerboseError`]. -///:w -/// **Note:** [context][Parser::context] and inner errors (like from [`Parser::map_res`]) will be +/// **Note:** [context][Parser::context] and inner errors (like from [`Parser::try_map`]) will be /// dropped. #[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct Error<I> { +pub struct InputError<I: Clone> { /// The input stream, pointing to the location where the error occurred pub input: I, /// A rudimentary error kind pub kind: ErrorKind, } -impl<I> Error<I> { +impl<I: Clone> InputError<I> { /// Creates a new basic error - pub fn new(input: I, kind: ErrorKind) -> Error<I> { - Error { input, kind } + #[inline] + pub fn new(input: I, kind: ErrorKind) -> Self { + Self { input, kind } + } + + /// Translate the input type + #[inline] + pub fn map_input<I2: Clone, O: Fn(I) -> I2>(self, op: O) -> InputError<I2> { + InputError { + input: op(self.input), + kind: self.kind, + } } } #[cfg(feature = "alloc")] -impl<'i, I: ToOwned + ?Sized> Error<&'i I> { +impl<'i, I: ToOwned> InputError<&'i I> +where + <I as ToOwned>::Owned: Clone, +{ /// Obtaining ownership - pub fn into_owned(self) -> Error<<I as ToOwned>::Owned> { - Error { - input: self.input.to_owned(), - kind: self.kind, - } + pub fn into_owned(self) -> InputError<<I as ToOwned>::Owned> { + self.map_input(ToOwned::to_owned) } } -impl<I> ParseError<I> for Error<I> { - fn from_error_kind(input: I, kind: ErrorKind) -> Self { - Error { input, kind } +impl<I: Clone> ParserError<I> for InputError<I> { + #[inline] + fn from_error_kind(input: &I, kind: ErrorKind) -> Self { + Self { + input: input.clone(), + kind, + } } - fn append(self, _: I, _: ErrorKind) -> Self { + #[inline] + fn append(self, _: &I, _: ErrorKind) -> Self { self } } -impl<I, C> ContextError<I, C> for Error<I> {} +impl<I: Clone, C> AddContext<I, C> for InputError<I> {} -impl<I, E> FromExternalError<I, E> for Error<I> { +impl<I: Clone, E> FromExternalError<I, E> for InputError<I> { /// Create a new error from an input position and an external error - fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { - Error { input, kind } + #[inline] + fn from_external_error(input: &I, kind: ErrorKind, _e: E) -> Self { + Self { + input: input.clone(), + kind, + } } } -impl<I> ErrorConvert<Error<(I, usize)>> for Error<I> { - fn convert(self) -> Error<(I, usize)> { - Error { +impl<I: Clone> ErrorConvert<InputError<(I, usize)>> for InputError<I> { + #[inline] + fn convert(self) -> InputError<(I, usize)> { + InputError { input: (self.input, 0), kind: self.kind, } } } -impl<I> ErrorConvert<Error<I>> for Error<(I, usize)> { - fn convert(self) -> Error<I> { - Error { +impl<I: Clone> ErrorConvert<InputError<I>> for InputError<(I, usize)> { + #[inline] + fn convert(self) -> InputError<I> { + InputError { input: self.input.0, kind: self.kind, } @@ -440,127 +419,215 @@ impl<I> ErrorConvert<Error<I>> for Error<(I, usize)> { } /// The Display implementation allows the `std::error::Error` implementation -impl<I: fmt::Display> fmt::Display for Error<I> { +impl<I: Clone + fmt::Display> fmt::Display for InputError<I> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "error {:?} at: {}", self.kind, self.input) + write!( + f, + "{} error starting at: {}", + self.kind.description(), + self.input + ) } } #[cfg(feature = "std")] -impl<I: fmt::Debug + fmt::Display + Sync + Send + 'static> std::error::Error for Error<I> {} +impl<I: Clone + fmt::Debug + fmt::Display + Sync + Send + 'static> std::error::Error + for InputError<I> +{ +} -impl<I> ParseError<I> for () { - fn from_error_kind(_: I, _: ErrorKind) -> Self {} +impl<I> ParserError<I> for () { + #[inline] + fn from_error_kind(_: &I, _: ErrorKind) -> Self {} - fn append(self, _: I, _: ErrorKind) -> Self {} + #[inline] + fn append(self, _: &I, _: ErrorKind) -> Self {} } -impl<I, C> ContextError<I, C> for () {} +impl<I, C> AddContext<I, C> for () {} impl<I, E> FromExternalError<I, E> for () { - fn from_external_error(_input: I, _kind: ErrorKind, _e: E) -> Self {} + #[inline] + fn from_external_error(_input: &I, _kind: ErrorKind, _e: E) -> Self {} } impl ErrorConvert<()> for () { + #[inline] fn convert(self) {} } -/// Accumulates error information while backtracking -/// -/// For less overhead (and information), see [`Error`]. -/// -/// [`convert_error`] provides an example of how to render this for end-users. -/// -/// **Note:** This will only capture the last failed branch for combinators like -/// [`alt`][crate::branch::alt]. -#[cfg(feature = "alloc")] -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct VerboseError<I> { - /// Accumulated error information - pub errors: crate::lib::std::vec::Vec<(I, VerboseErrorKind)>, +/// Accumulate context while backtracking errors +#[derive(Debug)] +pub struct ContextError<C = StrContext> { + #[cfg(feature = "alloc")] + context: crate::lib::std::vec::Vec<C>, + #[cfg(not(feature = "alloc"))] + context: core::marker::PhantomData<C>, + #[cfg(feature = "std")] + cause: Option<Box<dyn std::error::Error + Send + Sync + 'static>>, } -#[cfg(feature = "alloc")] -impl<'i, I: ToOwned + ?Sized> VerboseError<&'i I> { - /// Obtaining ownership - pub fn into_owned(self) -> VerboseError<<I as ToOwned>::Owned> { - #[allow(clippy::redundant_clone)] // false positive - VerboseError { - errors: self - .errors - .into_iter() - .map(|(i, k)| (i.to_owned(), k)) - .collect(), +impl<C> ContextError<C> { + /// Create an empty error + #[inline] + pub fn new() -> Self { + Self { + context: Default::default(), + #[cfg(feature = "std")] + cause: None, } } -} -#[cfg(feature = "alloc")] -#[derive(Clone, Debug, Eq, PartialEq)] -/// Error context for `VerboseError` -pub enum VerboseErrorKind { - /// Static string added by the `context` function - Context(&'static str), - /// Error kind given by various parsers - Winnow(ErrorKind), + /// Access context from [`Parser::context`] + #[inline] + #[cfg(feature = "alloc")] + pub fn context(&self) -> impl Iterator<Item = &C> { + self.context.iter() + } + + /// Originating [`std::error::Error`] + #[inline] + #[cfg(feature = "std")] + pub fn cause(&self) -> Option<&(dyn std::error::Error + Send + Sync + 'static)> { + self.cause.as_deref() + } } -#[cfg(feature = "alloc")] -impl<I> ParseError<I> for VerboseError<I> { - fn from_error_kind(input: I, kind: ErrorKind) -> Self { - VerboseError { - errors: vec![(input, VerboseErrorKind::Winnow(kind))], +impl<C: Clone> Clone for ContextError<C> { + fn clone(&self) -> Self { + Self { + context: self.context.clone(), + #[cfg(feature = "std")] + cause: self.cause.as_ref().map(|e| e.to_string().into()), } } +} - fn append(mut self, input: I, kind: ErrorKind) -> Self { - self.errors.push((input, VerboseErrorKind::Winnow(kind))); - self +impl<C> Default for ContextError<C> { + #[inline] + fn default() -> Self { + Self::new() } } -#[cfg(feature = "alloc")] -impl<I> ContextError<I, &'static str> for VerboseError<I> { - fn add_context(mut self, input: I, ctx: &'static str) -> Self { - self.errors.push((input, VerboseErrorKind::Context(ctx))); +impl<I, C> ParserError<I> for ContextError<C> { + #[inline] + fn from_error_kind(_input: &I, _kind: ErrorKind) -> Self { + Self::new() + } + + #[inline] + fn append(self, _input: &I, _kind: ErrorKind) -> Self { self } + + #[inline] + fn or(self, other: Self) -> Self { + other + } } -#[cfg(feature = "alloc")] -impl<I, E> FromExternalError<I, E> for VerboseError<I> { - /// Create a new error from an input position and an external error - fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { - Self::from_error_kind(input, kind) +impl<C, I> AddContext<I, C> for ContextError<C> { + #[inline] + fn add_context(mut self, _input: &I, ctx: C) -> Self { + #[cfg(feature = "alloc")] + self.context.push(ctx); + self } } -#[cfg(feature = "alloc")] -impl<I> ErrorConvert<VerboseError<I>> for VerboseError<(I, usize)> { - fn convert(self) -> VerboseError<I> { - VerboseError { - errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(), +#[cfg(feature = "std")] +impl<C, I, E: std::error::Error + Send + Sync + 'static> FromExternalError<I, E> + for ContextError<C> +{ + #[inline] + fn from_external_error(_input: &I, _kind: ErrorKind, e: E) -> Self { + let mut err = Self::new(); + { + err.cause = Some(Box::new(e)); } + err } } -#[cfg(feature = "alloc")] -impl<I> ErrorConvert<VerboseError<(I, usize)>> for VerboseError<I> { - fn convert(self) -> VerboseError<(I, usize)> { - VerboseError { - errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(), +// HACK: This is more general than `std`, making the features non-additive +#[cfg(not(feature = "std"))] +impl<C, I, E: Send + Sync + 'static> FromExternalError<I, E> for ContextError<C> { + #[inline] + fn from_external_error(_input: &I, _kind: ErrorKind, _e: E) -> Self { + let err = Self::new(); + err + } +} + +// For tests +impl<C: core::cmp::PartialEq> core::cmp::PartialEq for ContextError<C> { + fn eq(&self, other: &Self) -> bool { + #[cfg(feature = "alloc")] + { + if self.context != other.context { + return false; + } } + #[cfg(feature = "std")] + { + if self.cause.as_ref().map(ToString::to_string) + != other.cause.as_ref().map(ToString::to_string) + { + return false; + } + } + + true } } -#[cfg(feature = "alloc")] -impl<I: fmt::Display> fmt::Display for VerboseError<I> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "Parse error:")?; - for (input, error) in &self.errors { - match error { - VerboseErrorKind::Winnow(e) => writeln!(f, "{:?} at: {}", e, input)?, - VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?, +impl crate::lib::std::fmt::Display for ContextError<StrContext> { + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + #[cfg(feature = "alloc")] + { + let expression = self.context().find_map(|c| match c { + StrContext::Label(c) => Some(c), + _ => None, + }); + let expected = self + .context() + .filter_map(|c| match c { + StrContext::Expected(c) => Some(c), + _ => None, + }) + .collect::<crate::lib::std::vec::Vec<_>>(); + + let mut newline = false; + + if let Some(expression) = expression { + newline = true; + + write!(f, "invalid {}", expression)?; + } + + if !expected.is_empty() { + if newline { + writeln!(f)?; + } + newline = true; + + write!(f, "expected ")?; + for (i, expected) in expected.iter().enumerate() { + if i != 0 { + write!(f, ", ")?; + } + write!(f, "{}", expected)?; + } + } + #[cfg(feature = "std")] + { + if let Some(cause) = self.cause() { + if newline { + writeln!(f)?; + } + write!(f, "{}", cause)?; + } } } @@ -568,89 +635,453 @@ impl<I: fmt::Display> fmt::Display for VerboseError<I> { } } +/// Additional parse context for [`ContextError`] added via [`Parser::context`] +#[derive(Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum StrContext { + /// Description of what is currently being parsed + Label(&'static str), + /// Grammar item that was expected + Expected(StrContextValue), +} + +impl crate::lib::std::fmt::Display for StrContext { + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + match self { + Self::Label(name) => write!(f, "invalid {name}"), + Self::Expected(value) => write!(f, "expected {value}"), + } + } +} + +/// See [`StrContext`] +#[derive(Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum StrContextValue { + /// A [`char`] token + CharLiteral(char), + /// A [`&str`] token + StringLiteral(&'static str), + /// A description of what was being parsed + Description(&'static str), +} + +impl From<char> for StrContextValue { + #[inline] + fn from(inner: char) -> Self { + Self::CharLiteral(inner) + } +} + +impl From<&'static str> for StrContextValue { + #[inline] + fn from(inner: &'static str) -> Self { + Self::StringLiteral(inner) + } +} + +impl crate::lib::std::fmt::Display for StrContextValue { + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + match self { + Self::CharLiteral('\n') => "newline".fmt(f), + Self::CharLiteral('`') => "'`'".fmt(f), + Self::CharLiteral(c) if c.is_ascii_control() => { + write!(f, "`{}`", c.escape_debug()) + } + Self::CharLiteral(c) => write!(f, "`{}`", c), + Self::StringLiteral(c) => write!(f, "`{}`", c), + Self::Description(c) => write!(f, "{}", c), + } + } +} + +/// Trace all error paths, particularly for tests +#[derive(Debug)] #[cfg(feature = "std")] -impl<I: fmt::Debug + fmt::Display + Sync + Send + 'static> std::error::Error for VerboseError<I> {} +pub enum TreeError<I, C = StrContext> { + /// Initial error that kicked things off + Base(TreeErrorBase<I>), + /// Traces added to the error while walking back up the stack + Stack { + /// Initial error that kicked things off + base: Box<Self>, + /// Traces added to the error while walking back up the stack + stack: Vec<TreeErrorFrame<I, C>>, + }, + /// All failed branches of an `alt` + Alt(Vec<Self>), +} -/// Transforms a `VerboseError` into a trace with input position information -#[cfg(feature = "alloc")] -pub fn convert_error<I: core::ops::Deref<Target = str>>( - input: I, - e: VerboseError<I>, -) -> crate::lib::std::string::String { - use crate::lib::std::fmt::Write; - use crate::stream::Offset; +/// See [`TreeError::Stack`] +#[derive(Debug)] +#[cfg(feature = "std")] +pub enum TreeErrorFrame<I, C = StrContext> { + /// See [`ParserError::append`] + Kind(TreeErrorBase<I>), + /// See [`AddContext::add_context`] + Context(TreeErrorContext<I, C>), +} + +/// See [`TreeErrorFrame::Kind`], [`ParserError::append`] +#[derive(Debug)] +#[cfg(feature = "std")] +pub struct TreeErrorBase<I> { + /// Parsed input, at the location where the error occurred + pub input: I, + /// Debug context + pub kind: ErrorKind, + /// See [`FromExternalError::from_external_error`] + pub cause: Option<Box<dyn std::error::Error + Send + Sync + 'static>>, +} - let mut result = crate::lib::std::string::String::new(); +/// See [`TreeErrorFrame::Context`], [`AddContext::add_context`] +#[derive(Debug)] +#[cfg(feature = "std")] +pub struct TreeErrorContext<I, C = StrContext> { + /// Parsed input, at the location where the error occurred + pub input: I, + /// See [`AddContext::add_context`] + pub context: C, +} + +#[cfg(feature = "std")] +impl<'i, I: ToOwned, C> TreeError<&'i I, C> +where + <I as ToOwned>::Owned: Clone, +{ + /// Obtaining ownership + pub fn into_owned(self) -> TreeError<<I as ToOwned>::Owned, C> { + self.map_input(ToOwned::to_owned) + } +} + +#[cfg(feature = "std")] +impl<I, C> TreeError<I, C> +where + I: Clone, +{ + /// Translate the input type + pub fn map_input<I2: Clone, O: Clone + Fn(I) -> I2>(self, op: O) -> TreeError<I2, C> { + match self { + TreeError::Base(base) => TreeError::Base(TreeErrorBase { + input: op(base.input), + kind: base.kind, + cause: base.cause, + }), + TreeError::Stack { base, stack } => { + let base = Box::new(base.map_input(op.clone())); + let stack = stack + .into_iter() + .map(|frame| match frame { + TreeErrorFrame::Kind(kind) => TreeErrorFrame::Kind(TreeErrorBase { + input: op(kind.input), + kind: kind.kind, + cause: kind.cause, + }), + TreeErrorFrame::Context(context) => { + TreeErrorFrame::Context(TreeErrorContext { + input: op(context.input), + context: context.context, + }) + } + }) + .collect(); + TreeError::Stack { base, stack } + } + TreeError::Alt(alt) => { + TreeError::Alt(alt.into_iter().map(|e| e.map_input(op.clone())).collect()) + } + } + } - for (i, (substring, kind)) in e.errors.iter().enumerate() { - let offset = input.offset_to(substring); + fn append_frame(self, frame: TreeErrorFrame<I, C>) -> Self { + match self { + TreeError::Stack { base, mut stack } => { + stack.push(frame); + TreeError::Stack { base, stack } + } + base => TreeError::Stack { + base: Box::new(base), + stack: vec![frame], + }, + } + } +} + +#[cfg(feature = "std")] +impl<I, C> ParserError<I> for TreeError<I, C> +where + I: Clone, +{ + fn from_error_kind(input: &I, kind: ErrorKind) -> Self { + TreeError::Base(TreeErrorBase { + input: input.clone(), + kind, + cause: None, + }) + } + + fn append(self, input: &I, kind: ErrorKind) -> Self { + let frame = TreeErrorFrame::Kind(TreeErrorBase { + input: input.clone(), + kind, + cause: None, + }); + self.append_frame(frame) + } + + fn or(self, other: Self) -> Self { + match (self, other) { + (TreeError::Alt(mut first), TreeError::Alt(second)) => { + // Just in case an implementation does a divide-and-conquer algorithm + // + // To prevent mixing `alt`s at different levels, parsers should + // `alt_err.append(input, ErrorKind::Alt)`. + first.extend(second); + TreeError::Alt(first) + } + (TreeError::Alt(mut alt), new) | (new, TreeError::Alt(mut alt)) => { + alt.push(new); + TreeError::Alt(alt) + } + (first, second) => TreeError::Alt(vec![first, second]), + } + } +} + +#[cfg(feature = "std")] +impl<I, C> AddContext<I, C> for TreeError<I, C> +where + I: Clone, +{ + fn add_context(self, input: &I, context: C) -> Self { + let frame = TreeErrorFrame::Context(TreeErrorContext { + input: input.clone(), + context, + }); + self.append_frame(frame) + } +} + +#[cfg(feature = "std")] +impl<I, C, E: std::error::Error + Send + Sync + 'static> FromExternalError<I, E> for TreeError<I, C> +where + I: Clone, +{ + fn from_external_error(input: &I, kind: ErrorKind, e: E) -> Self { + TreeError::Base(TreeErrorBase { + input: input.clone(), + kind, + cause: Some(Box::new(e)), + }) + } +} - if input.is_empty() { - match kind { - VerboseErrorKind::Context(s) => { - write!(&mut result, "{}: in {}, got empty input\n\n", i, s) +#[cfg(feature = "std")] +impl<I, C> TreeError<I, C> +where + I: Clone + crate::lib::std::fmt::Display, + C: fmt::Display, +{ + fn write(&self, f: &mut fmt::Formatter<'_>, indent: usize) -> fmt::Result { + let child_indent = indent + 2; + match self { + TreeError::Base(base) => { + writeln!(f, "{:indent$}{base}", "")?; + } + TreeError::Stack { base, stack } => { + base.write(f, indent)?; + for (level, frame) in stack.iter().enumerate() { + match frame { + TreeErrorFrame::Kind(frame) => { + writeln!(f, "{:child_indent$}{level}: {frame}", "")?; + } + TreeErrorFrame::Context(frame) => { + writeln!(f, "{:child_indent$}{level}: {frame}", "")?; + } + } } - VerboseErrorKind::Winnow(e) => { - write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e) + } + TreeError::Alt(alt) => { + writeln!(f, "{:indent$}during one of:", "")?; + for child in alt { + child.write(f, child_indent)?; } } + } + + Ok(()) + } +} + +#[cfg(feature = "std")] +impl<I: Clone + fmt::Display> fmt::Display for TreeErrorBase<I> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(cause) = self.cause.as_ref() { + write!(f, "caused by {cause}")?; } else { - let prefix = &input.as_bytes()[..offset]; - - // Count the number of newlines in the first `offset` bytes of input - let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1; - - // Find the line that includes the subslice: - // Find the *last* newline before the substring starts - let line_begin = prefix - .iter() - .rev() - .position(|&b| b == b'\n') - .map(|pos| offset - pos) - .unwrap_or(0); - - // Find the full line after that newline - let line = input[line_begin..] - .lines() - .next() - .unwrap_or(&input[line_begin..]) - .trim_end(); - - // The (1-indexed) column number is the offset of our substring into that line - let column_number = line.offset_to(substring) + 1; - - match kind { - VerboseErrorKind::Context(s) => write!( - &mut result, - "{i}: at line {line_number}, in {context}:\n\ - {line}\n\ - {caret:>column$}\n\n", - i = i, - line_number = line_number, - context = s, - line = line, - caret = '^', - column = column_number, - ), - VerboseErrorKind::Winnow(e) => write!( - &mut result, - "{i}: at line {line_number}, in {kind:?}:\n\ - {line}\n\ - {caret:>column$}\n\n", - i = i, - line_number = line_number, - kind = e, - line = line, - caret = '^', - column = column_number, - ), + let kind = self.kind.description(); + write!(f, "in {kind}")?; + } + let input = abbreviate(self.input.to_string()); + write!(f, " at '{input}'")?; + Ok(()) + } +} + +#[cfg(feature = "std")] +impl<I: Clone + fmt::Display, C: fmt::Display> fmt::Display for TreeErrorContext<I, C> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let context = &self.context; + let input = abbreviate(self.input.to_string()); + write!(f, "{context} at '{input}'")?; + Ok(()) + } +} + +#[cfg(feature = "std")] +impl< + I: Clone + fmt::Debug + fmt::Display + Sync + Send + 'static, + C: fmt::Display + fmt::Debug, + > std::error::Error for TreeError<I, C> +{ +} + +#[cfg(feature = "std")] +fn abbreviate(input: String) -> String { + let mut abbrev = None; + + if let Some((line, _)) = input.split_once('\n') { + abbrev = Some(line); + } + + let max_len = 20; + let current = abbrev.unwrap_or(&input); + if max_len < current.len() { + if let Some((index, _)) = current.char_indices().nth(max_len) { + abbrev = Some(¤t[..index]); + } + } + + if let Some(abbrev) = abbrev { + format!("{abbrev}...") + } else { + input + } +} + +#[cfg(feature = "std")] +impl<I: Clone + fmt::Display, C: fmt::Display> fmt::Display for TreeError<I, C> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.write(f, 0) + } +} + +/// Deprecated, replaced with [`ContextError`] +#[cfg(feature = "std")] +#[allow(deprecated)] +#[deprecated(since = "0.5.8", note = "Replaced with `ContextError`")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct VerboseError<I: Clone, C = &'static str> { + /// Accumulated error information + pub errors: crate::lib::std::vec::Vec<(I, VerboseErrorKind<C>)>, +} + +#[cfg(feature = "std")] +#[allow(deprecated)] +impl<'i, I: ToOwned, C> VerboseError<&'i I, C> +where + <I as ToOwned>::Owned: Clone, +{ + /// Obtaining ownership + pub fn into_owned(self) -> VerboseError<<I as ToOwned>::Owned, C> { + self.map_input(ToOwned::to_owned) + } +} + +#[cfg(feature = "std")] +#[allow(deprecated)] +impl<I: Clone, C> VerboseError<I, C> { + /// Translate the input type + pub fn map_input<I2: Clone, O>(self, op: O) -> VerboseError<I2, C> + where + O: Fn(I) -> I2, + { + VerboseError { + errors: self.errors.into_iter().map(|(i, k)| (op(i), k)).collect(), + } + } +} + +/// Deprecated, replaced with [`ContextError`] +#[cfg(feature = "std")] +#[deprecated(since = "0.5.8", note = "Replaced with `ContextError`")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum VerboseErrorKind<C = &'static str> { + /// Static string added by the `context` function + Context(C), + /// Error kind given by various parsers + Winnow(ErrorKind), +} + +#[cfg(feature = "std")] +#[allow(deprecated)] +impl<I: Clone, C> ParserError<I> for VerboseError<I, C> { + fn from_error_kind(input: &I, kind: ErrorKind) -> Self { + VerboseError { + errors: vec![(input.clone(), VerboseErrorKind::Winnow(kind))], + } + } + + fn append(mut self, input: &I, kind: ErrorKind) -> Self { + self.errors + .push((input.clone(), VerboseErrorKind::Winnow(kind))); + self + } +} + +#[cfg(feature = "std")] +#[allow(deprecated)] +impl<I: Clone, C> AddContext<I, C> for VerboseError<I, C> { + fn add_context(mut self, input: &I, ctx: C) -> Self { + self.errors + .push((input.clone(), VerboseErrorKind::Context(ctx))); + self + } +} + +#[cfg(feature = "std")] +#[allow(deprecated)] +impl<I: Clone, C, E> FromExternalError<I, E> for VerboseError<I, C> { + /// Create a new error from an input position and an external error + fn from_external_error(input: &I, kind: ErrorKind, _e: E) -> Self { + Self::from_error_kind(input, kind) + } +} + +#[cfg(feature = "std")] +#[allow(deprecated)] +impl<I: Clone + fmt::Display, C: fmt::Display> fmt::Display for VerboseError<I, C> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Parse error:")?; + for (input, error) in &self.errors { + match error { + VerboseErrorKind::Winnow(e) => writeln!(f, "{} at: {}", e.description(), input)?, + VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?, } } - // Because `write!` to a `String` is infallible, this `unwrap` is fine. - .unwrap(); + + Ok(()) } +} - result +#[cfg(feature = "std")] +#[allow(deprecated)] +impl< + I: Clone + fmt::Debug + fmt::Display + Sync + Send + 'static, + C: fmt::Display + fmt::Debug, + > std::error::Error for VerboseError<I, C> +{ } /// Provide some minor debug context for errors @@ -691,31 +1122,287 @@ impl ErrorKind { } } +impl<I> ParserError<I> for ErrorKind { + #[inline] + fn from_error_kind(_input: &I, kind: ErrorKind) -> Self { + kind + } + + #[inline] + fn append(self, _: &I, _: ErrorKind) -> Self { + self + } +} + +impl<I, C> AddContext<I, C> for ErrorKind {} + +impl<I, E> FromExternalError<I, E> for ErrorKind { + /// Create a new error from an input position and an external error + #[inline] + fn from_external_error(_input: &I, kind: ErrorKind, _e: E) -> Self { + kind + } +} + +/// The Display implementation allows the `std::error::Error` implementation +impl fmt::Display for ErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "error {:?}", self) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for ErrorKind {} + +/// See [`Parser::parse`] +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ParseError<I, E> { + input: I, + offset: usize, + inner: E, +} + +impl<I: Stream, E: ParserError<I>> ParseError<I, E> { + pub(crate) fn new(mut input: I, start: I::Checkpoint, inner: E) -> Self { + let offset = input.offset_from(&start); + input.reset(start); + Self { + input, + offset, + inner, + } + } +} + +impl<I, E> ParseError<I, E> { + /// The [`Stream`] at the initial location when parsing started + #[inline] + pub fn input(&self) -> &I { + &self.input + } + + /// The location in [`ParseError::input`] where parsing failed + /// + /// **Note:** This is an offset, not an index, and may point to the end of input + /// (`input.len()`) on eof errors. + #[inline] + pub fn offset(&self) -> usize { + self.offset + } + + /// The original [`ParserError`] + #[inline] + pub fn inner(&self) -> &E { + &self.inner + } + + /// The original [`ParserError`] + #[inline] + pub fn into_inner(self) -> E { + self.inner + } +} + +impl<I, E> core::fmt::Display for ParseError<I, E> +where + I: AsBStr, + E: core::fmt::Display, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let input = self.input.as_bstr(); + let span_start = self.offset; + let span_end = span_start; + #[cfg(feature = "std")] + if input.contains(&b'\n') { + let (line_idx, col_idx) = translate_position(input, span_start); + let line_num = line_idx + 1; + let col_num = col_idx + 1; + let gutter = line_num.to_string().len(); + let content = input + .split(|c| *c == b'\n') + .nth(line_idx) + .expect("valid line number"); + + writeln!(f, "parse error at line {}, column {}", line_num, col_num)?; + // | + for _ in 0..gutter { + write!(f, " ")?; + } + writeln!(f, " |")?; + + // 1 | 00:32:00.a999999 + write!(f, "{} | ", line_num)?; + writeln!(f, "{}", String::from_utf8_lossy(content))?; + + // | ^ + for _ in 0..gutter { + write!(f, " ")?; + } + write!(f, " | ")?; + for _ in 0..col_idx { + write!(f, " ")?; + } + // The span will be empty at eof, so we need to make sure we always print at least + // one `^` + write!(f, "^")?; + for _ in (span_start + 1)..(span_end.min(span_start + content.len())) { + write!(f, "^")?; + } + writeln!(f)?; + } else { + let content = input; + writeln!(f, "{}", String::from_utf8_lossy(content))?; + for _ in 0..span_start { + write!(f, " ")?; + } + // The span will be empty at eof, so we need to make sure we always print at least + // one `^` + write!(f, "^")?; + for _ in (span_start + 1)..(span_end.min(span_start + content.len())) { + write!(f, "^")?; + } + writeln!(f)?; + } + write!(f, "{}", self.inner)?; + + Ok(()) + } +} + +#[cfg(feature = "std")] +fn translate_position(input: &[u8], index: usize) -> (usize, usize) { + if input.is_empty() { + return (0, index); + } + + let safe_index = index.min(input.len() - 1); + let column_offset = index - safe_index; + let index = safe_index; + + let nl = input[0..index] + .iter() + .rev() + .enumerate() + .find(|(_, b)| **b == b'\n') + .map(|(nl, _)| index - nl - 1); + let line_start = match nl { + Some(nl) => nl + 1, + None => 0, + }; + let line = input[0..line_start].iter().filter(|b| **b == b'\n').count(); + + // HACK: This treats byte offset and column offsets the same + let column = crate::lib::std::str::from_utf8(&input[line_start..=index]) + .map(|s| s.chars().count() - 1) + .unwrap_or_else(|_| index - line_start); + let column = column + column_offset; + + (line, column) +} + +#[cfg(test)] +#[cfg(feature = "std")] +mod test_parse_error { + use super::*; + + #[test] + fn single_line() { + let mut input = "0xZ123"; + let start = input.checkpoint(); + let _ = input.next_token().unwrap(); + let _ = input.next_token().unwrap(); + let inner = InputError::new(input, ErrorKind::Slice); + let error = ParseError::new(input, start, inner); + let expected = "\ +0xZ123 + ^ +slice error starting at: Z123"; + assert_eq!(error.to_string(), expected); + } +} + +#[cfg(test)] +#[cfg(feature = "std")] +mod test_translate_position { + use super::*; + + #[test] + fn empty() { + let input = b""; + let index = 0; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 0)); + } + + #[test] + fn start() { + let input = b"Hello"; + let index = 0; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 0)); + } + + #[test] + fn end() { + let input = b"Hello"; + let index = input.len() - 1; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, input.len() - 1)); + } + + #[test] + fn after() { + let input = b"Hello"; + let index = input.len(); + let position = translate_position(&input[..], index); + assert_eq!(position, (0, input.len())); + } + + #[test] + fn first_line() { + let input = b"Hello\nWorld\n"; + let index = 2; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 2)); + } + + #[test] + fn end_of_line() { + let input = b"Hello\nWorld\n"; + let index = 5; + let position = translate_position(&input[..], index); + assert_eq!(position, (0, 5)); + } + + #[test] + fn start_of_second_line() { + let input = b"Hello\nWorld\n"; + let index = 6; + let position = translate_position(&input[..], index); + assert_eq!(position, (1, 0)); + } + + #[test] + fn second_line() { + let input = b"Hello\nWorld\n"; + let index = 8; + let position = translate_position(&input[..], index); + assert_eq!(position, (1, 2)); + } +} + /// Creates a parse error from a [`ErrorKind`] /// and the position in the input #[cfg(test)] macro_rules! error_position( ($input:expr, $code:expr) => ({ - $crate::error::ParseError::from_error_kind($input, $code) + $crate::error::ParserError::from_error_kind($input, $code) }); ); #[cfg(test)] macro_rules! error_node_position( ($input:expr, $code:expr, $next:expr) => ({ - $crate::error::ParseError::append($next, $input, $code) + $crate::error::ParserError::append($next, $input, $code) }); ); - -#[cfg(test)] -#[cfg(feature = "alloc")] -mod tests { - use super::*; - - #[test] - fn convert_error_panic() { - let input = ""; - - let _result: IResult<_, _, VerboseError<&str>> = 'x'.parse_next(input); - } -} diff --git a/vendor/winnow/src/lib.rs b/vendor/winnow/src/lib.rs index ddbc8f8..0c4d1e9 100644 --- a/vendor/winnow/src/lib.rs +++ b/vendor/winnow/src/lib.rs @@ -7,6 +7,8 @@ //! - [Tutorial][_tutorial::chapter_0] //! - [Special Topics][_topic] //! - [Discussions](https://github.com/winnow-rs/winnow/discussions) +//! - [CHANGELOG](https://github.com/winnow-rs/winnow/blob/v0.5.34/CHANGELOG.md) (includes major version migration +//! guides) //! //! ## Aspirations //! @@ -24,7 +26,7 @@ //! - Resilient maintainership, including //! - Willing to break compatibility rather than batching up breaking changes in large releases //! - Leverage feature flags to keep one active branch -//! - We will support the last 6 months of rust releases (MSRV, currently 1.60) +//! - We will support the last 6 months of rust releases (MSRV, currently 1.64.0) //! //! See also [Special Topic: Why winnow?][crate::_topic::why] //! @@ -48,7 +50,8 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(docsrs, feature(extended_key_value_attributes))] #![cfg_attr(not(feature = "std"), no_std)] -#![deny(missing_docs)] +#![warn(missing_docs)] +#![warn(clippy::std_instead_of_core)] // BEGIN - Embark standard lints v6 for Rust 1.55+ // do not change or add/remove here, but one can add exceptions after this section // for more info see: <https://github.com/EmbarkStudios/rust-ecosystem/issues/59> @@ -137,7 +140,7 @@ #![allow(clippy::unnested_or_patterns)] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] #[cfg(feature = "alloc")] -#[macro_use] +#[cfg_attr(test, macro_use)] extern crate alloc; #[cfg(doctest)] extern crate doc_comment; @@ -175,6 +178,7 @@ pub(crate) mod lib { #[cfg(feature = "std")] /// internal std exports for `no_std` compatibility pub mod std { + #![allow(clippy::std_instead_of_core)] #[doc(hidden)] pub use std::{ alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, @@ -199,14 +203,10 @@ mod parser; pub mod stream; -pub mod bits; -pub mod branch; -pub mod bytes; -pub mod character; +pub mod ascii; +pub mod binary; pub mod combinator; -pub mod multi; -pub mod number; -pub mod sequence; +pub mod token; pub mod trace; #[cfg(feature = "unstable-doc")] @@ -217,7 +217,7 @@ pub mod _tutorial; /// Core concepts available for glob import /// /// Including -/// - [`FinishIResult`] +/// - [`StreamIsPartial`][crate::stream::StreamIsPartial] /// - [`Parser`] /// /// ## Example @@ -225,9 +225,9 @@ pub mod _tutorial; /// ```rust /// use winnow::prelude::*; /// -/// fn parse_data(input: &str) -> IResult<&str, u64> { +/// fn parse_data(input: &mut &str) -> PResult<u64> { /// // ... -/// # winnow::character::dec_uint(input) +/// # winnow::ascii::dec_uint(input) /// } /// /// fn main() { @@ -237,15 +237,13 @@ pub mod _tutorial; /// ``` pub mod prelude { pub use crate::stream::StreamIsPartial as _; - #[allow(deprecated)] - pub use crate::FinishIResult as _; pub use crate::IResult; + pub use crate::PResult; pub use crate::Parser; } -#[allow(deprecated)] -pub use error::FinishIResult; pub use error::IResult; +pub use error::PResult; pub use parser::*; pub use stream::BStr; pub use stream::Bytes; diff --git a/vendor/winnow/src/macros.rs b/vendor/winnow/src/macros.rs deleted file mode 100644 index bec67c1..0000000 --- a/vendor/winnow/src/macros.rs +++ /dev/null @@ -1,78 +0,0 @@ -/// `match` for parsers -/// -/// When parsers have unique prefixes to test for, this offers better performance over -/// [`alt`][crate::branch::alt] though it might be at the cost of duplicating parts of your grammar -/// if you needed to [`peek`][crate::combinator::peek]. -/// -/// For tight control over the error in a catch-all case, use [`fail`][crate::combinator::fail]. -/// -/// # Example -/// -/// ```rust -/// use winnow::prelude::*; -/// use winnow::branch::dispatch; -/// # use winnow::bytes::any; -/// # use winnow::combinator::peek; -/// # use winnow::sequence::preceded; -/// # use winnow::combinator::success; -/// # use winnow::combinator::fail; -/// -/// fn escaped(input: &str) -> IResult<&str, char> { -/// preceded('\\', escape_seq_char).parse_next(input) -/// } -/// -/// fn escape_seq_char(input: &str) -> IResult<&str, char> { -/// dispatch! {any; -/// 'b' => success('\u{8}'), -/// 'f' => success('\u{c}'), -/// 'n' => success('\n'), -/// 'r' => success('\r'), -/// 't' => success('\t'), -/// '\\' => success('\\'), -/// '"' => success('"'), -/// _ => fail::<_, char, _>, -/// } -/// .parse_next(input) -/// } -/// -/// assert_eq!(escaped.parse_next("\\nHello"), Ok(("Hello", '\n'))); -/// ``` -#[macro_export] -macro_rules! dispatch { - ($match_parser: expr; $( $pat:pat $(if $pred:expr)? => $expr: expr ),+ $(,)? ) => { - $crate::trace::trace("dispatch", move |i| - { - use $crate::Parser; - let (i, initial) = $match_parser.parse_next(i)?; - match initial { - $( - $pat $(if $pred)? => $expr.parse_next(i), - )* - } - }) - } -} - -macro_rules! succ ( - (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); - (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); - (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); - (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); - (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); - (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); - (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); - (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); - (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); - (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); - (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); - (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); - (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); - (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); - (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); - (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); - (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); - (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); - (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); - (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); - (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); -); diff --git a/vendor/winnow/src/macros/dispatch.rs b/vendor/winnow/src/macros/dispatch.rs new file mode 100644 index 0000000..f16cbc3 --- /dev/null +++ b/vendor/winnow/src/macros/dispatch.rs @@ -0,0 +1,55 @@ +/// `match` for parsers +/// +/// When parsers have unique prefixes to test for, this offers better performance over +/// [`alt`][crate::combinator::alt] though it might be at the cost of duplicating parts of your grammar +/// if you needed to [`peek`][crate::combinator::peek]. +/// +/// For tight control over the error in a catch-all case, use [`fail`][crate::combinator::fail]. +/// +/// # Example +/// +/// ```rust +/// use winnow::prelude::*; +/// use winnow::combinator::dispatch; +/// # use winnow::token::any; +/// # use winnow::combinator::peek; +/// # use winnow::combinator::preceded; +/// # use winnow::combinator::success; +/// # use winnow::combinator::fail; +/// +/// fn escaped(input: &mut &str) -> PResult<char> { +/// preceded('\\', escape_seq_char).parse_next(input) +/// } +/// +/// fn escape_seq_char(input: &mut &str) -> PResult<char> { +/// dispatch! {any; +/// 'b' => success('\u{8}'), +/// 'f' => success('\u{c}'), +/// 'n' => success('\n'), +/// 'r' => success('\r'), +/// 't' => success('\t'), +/// '\\' => success('\\'), +/// '"' => success('"'), +/// _ => fail::<_, char, _>, +/// } +/// .parse_next(input) +/// } +/// +/// assert_eq!(escaped.parse_peek("\\nHello"), Ok(("Hello", '\n'))); +/// ``` +#[macro_export] +#[doc(hidden)] // forced to be visible in intended location +macro_rules! dispatch { + ($match_parser: expr; $( $pat:pat $(if $pred:expr)? => $expr: expr ),+ $(,)? ) => { + $crate::trace::trace("dispatch", move |i: &mut _| + { + use $crate::Parser; + let initial = $match_parser.parse_next(i)?; + match initial { + $( + $pat $(if $pred)? => $expr.parse_next(i), + )* + } + }) + } +} diff --git a/vendor/winnow/src/macros/mod.rs b/vendor/winnow/src/macros/mod.rs new file mode 100644 index 0000000..09dc68b --- /dev/null +++ b/vendor/winnow/src/macros/mod.rs @@ -0,0 +1,5 @@ +mod dispatch; +mod seq; + +#[cfg(test)] +mod test; diff --git a/vendor/winnow/src/macros/seq.rs b/vendor/winnow/src/macros/seq.rs new file mode 100644 index 0000000..756f300 --- /dev/null +++ b/vendor/winnow/src/macros/seq.rs @@ -0,0 +1,268 @@ +/// Initialize a struct or tuple out of a sequences of parsers +/// +///# Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::ascii::{alphanumeric1, dec_uint, space0}; +/// # use winnow::combinator::delimited; +/// # use winnow::combinator::success; +/// # use winnow::error::ContextError; +/// use winnow::combinator::seq; +/// +/// #[derive(Default, Debug, PartialEq)] +/// struct Field { +/// namespace: u32, +/// name: Vec<u8>, +/// value: Vec<u8>, +/// point: (u32, u32), +/// metadata: Vec<u8>, +/// } +/// +/// // Parse into structs / tuple-structs +/// fn field(input: &mut &[u8]) -> PResult<Field> { +/// seq!{Field { +/// namespace: success(5), +/// name: alphanumeric1.map(|s: &[u8]| s.to_owned()), +/// // `_` fields are ignored when building the struct +/// _: (space0, b':', space0), +/// value: alphanumeric1.map(|s: &[u8]| s.to_owned()), +/// _: (space0, b':', space0), +/// point: point, +/// // default initialization also works +/// ..Default::default() +/// }}.parse_next(input) +/// } +/// +/// // Or parse into tuples +/// fn point(input: &mut &[u8]) -> PResult<(u32, u32)> { +/// let num = dec_uint::<_, u32, ContextError>; +/// seq!(num, _: (space0, b',', space0), num).parse_next(input) +/// } +/// +/// assert_eq!( +/// field.parse_peek(&b"test: data: 123 , 4"[..]), +/// Ok(( +/// &b""[..], +/// Field { +/// namespace: 5, +/// name: b"test"[..].to_owned(), +/// value: b"data"[..].to_owned(), +/// point: (123, 4), +/// metadata: Default::default(), +/// }, +/// )), +/// ); +/// ``` +#[macro_export] +#[doc(alias = "tuple")] +#[doc(alias = "preceded")] +#[doc(alias = "terminated")] +#[doc(alias = "delimited")] +#[doc(alias = "pair")] +#[doc(alias = "separated_pair")] +#[doc(alias = "struct_parser")] +macro_rules! seq { + ($name: ident { $($fields: tt)* }) => { + $crate::trace::trace(stringify!($name), move |input: &mut _| { + use $crate::Parser; + $crate::seq_parse_struct_fields!(input; $($fields)*); + #[allow(clippy::redundant_field_names)] + Ok($crate::seq_init_struct_fields!( ($($fields)*); $name;)) + }) + }; + ($name: ident ( $($elements: tt)* )) => { + $crate::trace::trace(stringify!($name), move |input: &mut _| { + use $crate::Parser; + $crate::seq_parse_tuple_fields!( ($($elements)*) ; ).map(|t| { + $crate::seq_init_tuple_fields!( + ($($elements)*); + (t.0, t.1, t.2, t.3, t.4, t.5, t.6, t.7, t.8, t.9, t.10, t.11, t.12, t.13, t.14, t.15, t.16, t.17, t.18, t.19, t.20); + $name; + ) + }).parse_next(input) + }) + }; + (( $($elements: tt)* )) => { + $crate::trace::trace("tuple", move |input: &mut _| { + use $crate::Parser; + $crate::seq_parse_tuple_fields!( ($($elements)*) ; ).map(|t| { + $crate::seq_init_tuple_fields!( + ($($elements)*); + (t.0, t.1, t.2, t.3, t.4, t.5, t.6, t.7, t.8, t.9, t.10, t.11, t.12, t.13, t.14, t.15, t.16, t.17, t.18, t.19, t.20); + ; + ) + }).parse_next(input) + }) + }; + ($($elements: tt)*) => { + $crate::seq!(($($elements)*)) + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_parse_struct_fields { + ( + $input: ident; + _ : $head_parser: expr, $($fields: tt)* + ) => { + let _ = $head_parser.parse_next($input)?; + $crate::seq_parse_struct_fields!($input; $($fields)*) + }; + ( + $input: ident; + _ : $head_parser: expr + ) => { + let _ = $head_parser.parse_next($input)?; + }; + ( + $input: ident; + $head_field: ident : $head_parser: expr, $($fields: tt)* + ) => { + let $head_field = $head_parser.parse_next($input)?; + $crate::seq_parse_struct_fields!($input; $($fields)*) + }; + ( + $input: ident; + $head_field: ident : $head_parser: expr + ) => { + let $head_field = $head_parser.parse_next($input)?; + }; + ( + $input: expr; + .. $update: expr + ) => {}; + ( + $input: expr; + $(,)? + ) => {}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_parse_tuple_fields { + ( + (_ : $head_parser: expr, $($fields: tt)* ); + $($sequenced: tt)* + ) => { + $crate::seq_parse_tuple_fields!( ( $($fields)* ) ; $($sequenced)* $head_parser.void(), ) + }; + ( + (_ : $head_parser: expr); + $($sequenced: tt)* + ) => { + $crate::seq_parse_tuple_fields!((); $($sequenced)* $head_parser.void(), ) + }; + ( + ($head_parser: expr, $($fields: tt)*); + $($sequenced: tt)* + ) => { + $crate::seq_parse_tuple_fields!( ( $($fields)* ) ; $($sequenced)* $head_parser, ) + }; + ( + ($head_parser: expr); + $($sequenced: tt)* + )=> { + $crate::seq_parse_tuple_fields!((); $($sequenced)* $head_parser, ) + }; + ( + (); + $($sequenced: tt)* + ) => { + ($($sequenced)*) + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_init_struct_fields { + ( + (_ : $head_parser: expr, $($fields: tt)*); + $name: ident; + $($inits: tt)* + ) => { + $crate::seq_init_struct_fields!( ( $($fields)* ) ; $name ; $($inits)* ) + }; + ( + (_ : $head_parser: expr); + $name: ident; + $($inits: tt)* + ) => { + $crate::seq_init_struct_fields!( (); $name ; $($inits)* ) + }; + ( + ($head_field: ident : $head_parser: expr, $($fields: tt)*); + $name: ident; + $($inits: tt)* + ) => + { + $crate::seq_init_struct_fields!( ( $($fields)* ) ; $name ; $($inits)* $head_field: $head_field, ) + }; + ( + ($head_field: ident : $head_parser: expr); + $name: ident; + $($inits: tt)* + ) => { + $crate::seq_init_struct_fields!( (); $name ; $($inits)* $head_field: $head_field,) + }; + ( + (.. $update: expr); + $name: ident; + $($inits: tt)* + ) => { + $name { $($inits)* ..$update } + }; + ( + ($(,)?); + $name: ident; + $($inits: tt)* + ) => { + $name { $($inits)* } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_init_tuple_fields { + ( + (_ : $head_parser: expr, $($fields: tt)*); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!( ( $($fields)* ); ( $($args),* ) ; $($name)? ; $($inits)* ) + }; + ( + (_ : $head_parser: expr); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!((); ( $($args),* ); $($name)? ; $($inits)*) + }; + ( + ($head_parser: expr, $($fields: tt)*); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!( ( $($fields)* ) ; ( $($args),* ) ; $($name)? ; $($inits)* $head_arg, ) + }; + ( + ($head_parser: expr); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!((); ( $($args),* ); $($name)? ; $($inits)* $head_arg) + }; + ( + (); + ($($args: expr),*); + $($name: ident)?; + $($inits: expr),* $(,)? + ) => { + $($name)?( $($inits,)* ) + }; +} diff --git a/vendor/winnow/src/macros/test.rs b/vendor/winnow/src/macros/test.rs new file mode 100644 index 0000000..faf917f --- /dev/null +++ b/vendor/winnow/src/macros/test.rs @@ -0,0 +1,378 @@ +use crate::ascii::dec_uint; +use crate::combinator::dispatch; +use crate::combinator::fail; +use crate::combinator::seq; +use crate::combinator::success; +use crate::error::ErrMode; +use crate::error::ErrorKind; +use crate::error::ParserError; +use crate::prelude::*; +use crate::token::any; + +#[test] +fn dispatch_basics() { + fn escape_seq_char(input: &mut &str) -> PResult<char> { + dispatch! {any; + 'b' => success('\u{8}'), + 'f' => success('\u{c}'), + 'n' => success('\n'), + 'r' => success('\r'), + 't' => success('\t'), + '\\' => success('\\'), + '"' => success('"'), + _ => fail::<_, char, _>, + } + .parse_next(input) + } + assert_eq!(escape_seq_char.parse_peek("b123"), Ok(("123", '\u{8}'))); + assert_eq!( + escape_seq_char.parse_peek("error"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"rror", + ErrorKind::Fail + ))) + ); + assert_eq!( + escape_seq_char.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_struct_basics() { + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + } + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", Point { x: 123, y: 4 },)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_struct_default_init() { + #[derive(Debug, PartialEq, Default)] + struct Point { + x: u32, + y: u32, + z: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + ..Default::default() + } + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", Point { x: 123, y: 4, z: 0 },)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_struct_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + _: success(()), + } + } + .parse_next(input) + } +} + +#[test] +fn seq_struct_no_trailing_comma() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint + } + } + .parse_next(input) + } +} + +#[test] +fn seq_struct_no_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + _: success(()) + } + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_struct_basics() { + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint, + ) + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", Point(123, 4),)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_tuple_struct_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint, + _: success(()), + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_struct_no_trailing_comma() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_struct_no_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint, + _: success(()) + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_basics() { + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint, + ) + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", (123, 4),)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_tuple_trailing_comma_elided() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint, + _: success(()), + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_no_trailing_comma() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_no_trailing_comma_elided() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint, + _: success(()) + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_no_parens() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! ( + dec_uint, + _: ',', + dec_uint, + ) + .parse_next(input) + } +} diff --git a/vendor/winnow/src/multi/mod.rs b/vendor/winnow/src/multi/mod.rs deleted file mode 100644 index 1c3aed7..0000000 --- a/vendor/winnow/src/multi/mod.rs +++ /dev/null @@ -1,1065 +0,0 @@ -//! Combinators applying their child parser multiple times - -#[cfg(test)] -mod tests; - -use crate::error::ErrMode; -use crate::error::ErrorKind; -use crate::error::ParseError; -use crate::stream::Accumulate; -use crate::stream::{Stream, StreamIsPartial, ToUsize, UpdateSlice}; -use crate::trace::trace; -use crate::Parser; - -/// [`Accumulate`] the output of a parser into a container, like `Vec` -/// -/// This stops on [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// To recognize a series of tokens, [`Accumulate`] into a `()` and then [`Parser::recognize`]. -/// -/// **Warning:** if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will -/// return an error, to prevent going into an infinite loop -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::many0; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// many0("abc").parse_next(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// ``` -#[doc(alias = "skip_many")] -#[doc(alias = "repeated")] -#[doc(alias = "many0_count")] -pub fn many0<I, O, C, E, F>(mut f: F) -> impl Parser<I, C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParseError<I>, -{ - trace("many0", move |mut i: I| { - let mut acc = C::initial(None); - loop { - let len = i.eof_offset(); - match f.parse_next(i.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, acc)), - Err(e) => return Err(e), - Ok((i1, o)) => { - // infinite loop check: the parser must always consume - if i1.eof_offset() == len { - return Err(ErrMode::assert(i, "many parsers must always consume")); - } - - i = i1; - acc.accumulate(o); - } - } - } - }) -} - -/// [`Accumulate`] the output of a parser into a container, like `Vec` -/// -/// -/// This stops on [`ErrMode::Backtrack`] if there is at least one result. To instead chain an error up, -/// see [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `f` The parser to apply. -/// -/// To recognize a series of tokens, [`Accumulate`] into a `()` and then [`Parser::recognize`]. -/// -/// **Warning:** If the parser passed to `many1` accepts empty inputs -/// (like `alpha0` or `digit0`), `many1` will return an error, -/// to prevent going into an infinite loop. -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::many1; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// many1("abc").parse_next(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(Error::new("123123", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// ``` -#[doc(alias = "skip_many1")] -#[doc(alias = "repeated")] -#[doc(alias = "many1_count")] -pub fn many1<I, O, C, E, F>(mut f: F) -> impl Parser<I, C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParseError<I>, -{ - trace("many1", move |mut i: I| match f.parse_next(i.clone()) { - Err(e) => Err(e.append(i, ErrorKind::Many)), - Ok((i1, o)) => { - let mut acc = C::initial(None); - acc.accumulate(o); - i = i1; - - loop { - let len = i.eof_offset(); - match f.parse_next(i.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, acc)), - Err(e) => return Err(e), - Ok((i1, o)) => { - // infinite loop check: the parser must always consume - if i1.eof_offset() == len { - return Err(ErrMode::assert(i, "many parsers must always consume")); - } - - i = i1; - acc.accumulate(o); - } - } - } - } - }) -} - -/// Applies the parser `f` until the parser `g` produces a result. -/// -/// Returns a tuple of the results of `f` in a `Vec` and the result of `g`. -/// -/// `f` keeps going so long as `g` produces [`ErrMode::Backtrack`]. To instead chain an error up, see [`cut_err`][crate::combinator::cut_err]. -/// -/// To recognize a series of tokens, [`Accumulate`] into a `()` and then [`Parser::recognize`]. -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::many_till0; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, (Vec<&str>, &str)> { -/// many_till0("abc", "end").parse_next(s) -/// }; -/// -/// assert_eq!(parser("abcabcend"), Ok(("", (vec!["abc", "abc"], "end")))); -/// assert_eq!(parser("abc123end"), Err(ErrMode::Backtrack(Error::new("123end", ErrorKind::Tag)))); -/// assert_eq!(parser("123123end"), Err(ErrMode::Backtrack(Error::new("123123end", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser("abcendefg"), Ok(("efg", (vec!["abc"], "end")))); -/// ``` -pub fn many_till0<I, O, C, P, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, (C, P), E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - G: Parser<I, P, E>, - E: ParseError<I>, -{ - trace("many_till0", move |mut i: I| { - let mut res = C::initial(None); - loop { - let len = i.eof_offset(); - match g.parse_next(i.clone()) { - Ok((i1, o)) => return Ok((i1, (res, o))), - Err(ErrMode::Backtrack(_)) => { - match f.parse_next(i.clone()) { - Err(e) => return Err(e.append(i, ErrorKind::Many)), - Ok((i1, o)) => { - // infinite loop check: the parser must always consume - if i1.eof_offset() == len { - return Err(ErrMode::assert(i, "many parsers must always consume")); - } - - res.accumulate(o); - i = i1; - } - } - } - Err(e) => return Err(e), - } - } - }) -} - -/// Alternates between two parsers to produce a list of elements. -/// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `parser` Parses the elements of the list. -/// * `sep` Parses the separator between list elements. -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::separated0; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// separated0("abc", "|").parse_next(s) -/// } -/// -/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); -/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); -/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); -/// ``` -#[doc(alias = "sep_by")] -#[doc(alias = "separated_list0")] -pub fn separated0<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> -where - I: Stream, - C: Accumulate<O>, - P: Parser<I, O, E>, - S: Parser<I, O2, E>, - E: ParseError<I>, -{ - trace("separated0", move |mut i: I| { - let mut res = C::initial(None); - - match parser.parse_next(i.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, res)), - Err(e) => return Err(e), - Ok((i1, o)) => { - res.accumulate(o); - i = i1; - } - } - - loop { - let len = i.eof_offset(); - match sep.parse_next(i.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, res)), - Err(e) => return Err(e), - Ok((i1, _)) => { - // infinite loop check: the parser must always consume - if i1.eof_offset() == len { - return Err(ErrMode::assert(i, "sep parsers must always consume")); - } - - match parser.parse_next(i1.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, res)), - Err(e) => return Err(e), - Ok((i2, o)) => { - res.accumulate(o); - i = i2; - } - } - } - } - } - }) -} - -/// Alternates between two parsers to produce a list of elements until [`ErrMode::Backtrack`]. -/// -/// Fails if the element parser does not produce at least one element.$ -/// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `sep` Parses the separator between list elements. -/// * `f` Parses the elements of the list. -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::separated1; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// separated1("abc", "|").parse_next(s) -/// } -/// -/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); -/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); -/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(Error::new("def|abc", ErrorKind::Tag)))); -/// ``` -#[doc(alias = "sep_by1")] -#[doc(alias = "separated_list1")] -pub fn separated1<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> -where - I: Stream, - C: Accumulate<O>, - P: Parser<I, O, E>, - S: Parser<I, O2, E>, - E: ParseError<I>, -{ - trace("separated1", move |mut i: I| { - let mut res = C::initial(None); - - // Parse the first element - match parser.parse_next(i.clone()) { - Err(e) => return Err(e), - Ok((i1, o)) => { - res.accumulate(o); - i = i1; - } - } - - loop { - let len = i.eof_offset(); - match sep.parse_next(i.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, res)), - Err(e) => return Err(e), - Ok((i1, _)) => { - // infinite loop check: the parser must always consume - if i1.eof_offset() == len { - return Err(ErrMode::assert(i, "sep parsers must always consume")); - } - - match parser.parse_next(i1.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, res)), - Err(e) => return Err(e), - Ok((i2, o)) => { - res.accumulate(o); - i = i2; - } - } - } - } - } - }) -} - -/// Alternates between two parsers, merging the results (left associative) -/// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::separated_foldl1; -/// use winnow::character::dec_int; -/// -/// fn parser(s: &str) -> IResult<&str, i32> { -/// separated_foldl1(dec_int, "-", |l, _, r| l - r).parse_next(s) -/// } -/// -/// assert_eq!(parser("9-3-5"), Ok(("", 1))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(Error::new("def|abc", ErrorKind::Slice)))); -/// ``` -pub fn separated_foldl1<I, O, O2, E, P, S, Op>( - mut parser: P, - mut sep: S, - op: Op, -) -> impl Parser<I, O, E> -where - I: Stream, - P: Parser<I, O, E>, - S: Parser<I, O2, E>, - E: ParseError<I>, - Op: Fn(O, O2, O) -> O, -{ - trace("separated_foldl1", move |i: I| { - let (mut i, mut ol) = parser.parse_next(i)?; - - loop { - let len = i.eof_offset(); - match sep.parse_next(i.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, ol)), - Err(e) => return Err(e), - Ok((i1, s)) => { - // infinite loop check: the parser must always consume - if i1.eof_offset() == len { - return Err(ErrMode::assert(i, "many parsers must always consume")); - } - - match parser.parse_next(i1.clone()) { - Err(ErrMode::Backtrack(_)) => return Ok((i, ol)), - Err(e) => return Err(e), - Ok((i2, or)) => { - ol = op(ol, s, or); - i = i2; - } - } - } - } - } - }) -} - -/// Alternates between two parsers, merging the results (right associative) -/// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Example -/// -/// ``` -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::separated_foldr1; -/// use winnow::character::dec_uint; -/// -/// fn parser(s: &str) -> IResult<&str, u32> { -/// separated_foldr1(dec_uint, "^", |l: u32, _, r: u32| l.pow(r)).parse_next(s) -/// } -/// -/// assert_eq!(parser("2^3^2"), Ok(("", 512))); -/// assert_eq!(parser("2"), Ok(("", 2))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); -/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(Error::new("def|abc", ErrorKind::Slice)))); -/// ``` -#[cfg(feature = "alloc")] -pub fn separated_foldr1<I, O, O2, E, P, S, Op>( - mut parser: P, - mut sep: S, - op: Op, -) -> impl Parser<I, O, E> -where - I: Stream, - P: Parser<I, O, E>, - S: Parser<I, O2, E>, - E: ParseError<I>, - Op: Fn(O, O2, O) -> O, -{ - trace("separated_foldr1", move |i: I| { - let (i, ol) = parser.parse_next(i)?; - let (i, all): (_, crate::lib::std::vec::Vec<(O2, O)>) = - many0((sep.by_ref(), parser.by_ref())).parse_next(i)?; - if let Some((s, or)) = all - .into_iter() - .rev() - .reduce(|(sr, or), (sl, ol)| (sl, op(ol, sr, or))) - { - let merged = op(ol, s, or); - Ok((i, merged)) - } else { - Ok((i, ol)) - } - }) -} - -/// Repeats the embedded parser `m..=n` times -/// -/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `m` The minimum number of iterations. -/// * `n` The maximum number of iterations. -/// * `f` The parser to apply. -/// -/// To recognize a series of tokens, [`Accumulate`] into a `()` and then [`Parser::recognize`]. -/// -/// **Warning:** If the parser passed to `many1` accepts empty inputs -/// (like `alpha0` or `digit0`), `many1` will return an error, -/// to prevent going into an infinite loop. -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::many_m_n; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// many_m_n(0, 2, "abc").parse_next(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); -/// ``` -#[doc(alias = "repeated")] -pub fn many_m_n<I, O, C, E, F>(min: usize, max: usize, mut parse: F) -> impl Parser<I, C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParseError<I>, -{ - trace("many_m_n", move |mut input: I| { - if min > max { - return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); - } - - let mut res = C::initial(Some(min)); - for count in 0..max { - let len = input.eof_offset(); - match parse.parse_next(input.clone()) { - Ok((tail, value)) => { - // infinite loop check: the parser must always consume - if tail.eof_offset() == len { - return Err(ErrMode::assert(input, "many parsers must always consume")); - } - - res.accumulate(value); - input = tail; - } - Err(ErrMode::Backtrack(e)) => { - if count < min { - return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); - } else { - return Ok((input, res)); - } - } - Err(e) => { - return Err(e); - } - } - } - - Ok((input, res)) - }) -} - -/// [`Accumulate`] the output of a parser into a container, like `Vec` -/// -/// # Arguments -/// * `f` The parser to apply. -/// * `count` How often to apply the parser. -/// -/// To recognize a series of tokens, [`Accumulate`] into a `()` and then [`Parser::recognize`]. -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::count; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// count("abc", 2).parse_next(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Err(ErrMode::Backtrack(Error::new("123", ErrorKind::Tag)))); -/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(Error::new("123123", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); -/// ``` -#[doc(alias = "skip_counskip_count")] -pub fn count<I, O, C, E, F>(mut f: F, count: usize) -> impl Parser<I, C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParseError<I>, -{ - trace("count", move |i: I| { - let mut input = i.clone(); - let mut res = C::initial(Some(count)); - - for _ in 0..count { - let input_ = input.clone(); - match f.parse_next(input_) { - Ok((i, o)) => { - res.accumulate(o); - input = i; - } - Err(e) => { - return Err(e.append(i, ErrorKind::Many)); - } - } - } - - Ok((input, res)) - }) -} - -/// Runs the embedded parser repeatedly, filling the given slice with results. -/// -/// This parser fails if the input runs out before the given slice is full. -/// -/// # Arguments -/// * `f` The parser to apply. -/// * `buf` The slice to fill -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::fill; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, [&str; 2]> { -/// let mut buf = ["", ""]; -/// let (rest, ()) = fill("abc", &mut buf).parse_next(s)?; -/// Ok((rest, buf)) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", ["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Err(ErrMode::Backtrack(Error::new("123", ErrorKind::Tag)))); -/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(Error::new("123123", ErrorKind::Tag)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser("abcabcabc"), Ok(("abc", ["abc", "abc"]))); -/// ``` -pub fn fill<'a, I, O, E, F>(mut f: F, buf: &'a mut [O]) -> impl Parser<I, (), E> + 'a -where - I: Stream + 'a, - F: Parser<I, O, E> + 'a, - E: ParseError<I> + 'a, -{ - trace("fill", move |i: I| { - let mut input = i.clone(); - - for elem in buf.iter_mut() { - let input_ = input.clone(); - match f.parse_next(input_) { - Ok((i, o)) => { - *elem = o; - input = i; - } - Err(e) => { - return Err(e.append(i, ErrorKind::Many)); - } - } - } - - Ok((input, ())) - }) -} - -/// Repeats the embedded parser, calling `g` to gather the results. -/// -/// This stops on [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `f` The parser to apply. -/// * `init` A function returning the initial value. -/// * `g` The function that combines a result of `f` with -/// the current accumulator. -/// -/// **Warning:** if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will -/// return an error, to prevent going into an infinite loop -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::fold_many0; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_many0( -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_next(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// ``` -pub fn fold_many0<I, O, E, F, G, H, R>(mut f: F, mut init: H, mut g: G) -> impl Parser<I, R, E> -where - I: Stream, - F: Parser<I, O, E>, - G: FnMut(R, O) -> R, - H: FnMut() -> R, - E: ParseError<I>, -{ - trace("fold_many0", move |i: I| { - let mut res = init(); - let mut input = i; - - loop { - let i_ = input.clone(); - let len = input.eof_offset(); - match f.parse_next(i_) { - Ok((i, o)) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert(i, "many parsers must always consume")); - } - - res = g(res, o); - input = i; - } - Err(ErrMode::Backtrack(_)) => { - return Ok((input, res)); - } - Err(e) => { - return Err(e); - } - } - } - }) -} - -/// Repeats the embedded parser, calling `g` to gather the results. -/// -/// This stops on [`ErrMode::Backtrack`] if there is at least one result. To instead chain an error up, -/// see [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `f` The parser to apply. -/// * `init` A function returning the initial value. -/// * `g` The function that combines a result of `f` with -/// the current accumulator. -/// -/// **Warning:** If the parser passed to `many1` accepts empty inputs -/// (like `alpha0` or `digit0`), `many1` will return an error, -/// to prevent going into an infinite loop. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::fold_many1; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_many1( -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_next(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(Error::new("123123", ErrorKind::Many)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Many)))); -/// ``` -pub fn fold_many1<I, O, E, F, G, H, R>(mut f: F, mut init: H, mut g: G) -> impl Parser<I, R, E> -where - I: Stream, - F: Parser<I, O, E>, - G: FnMut(R, O) -> R, - H: FnMut() -> R, - E: ParseError<I>, -{ - trace("fold_many1", move |i: I| { - let _i = i.clone(); - let init = init(); - match f.parse_next(_i) { - Err(ErrMode::Backtrack(_)) => Err(ErrMode::from_error_kind(i, ErrorKind::Many)), - Err(e) => Err(e), - Ok((i1, o1)) => { - let mut acc = g(init, o1); - let mut input = i1; - - loop { - let _input = input.clone(); - let len = input.eof_offset(); - match f.parse_next(_input) { - Err(ErrMode::Backtrack(_)) => { - break; - } - Err(e) => return Err(e), - Ok((i, o)) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert(i, "many parsers must always consume")); - } - - acc = g(acc, o); - input = i; - } - } - } - - Ok((input, acc)) - } - } - }) -} - -/// Repeats the embedded parser `m..=n` times, calling `g` to gather the results -/// -/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `m` The minimum number of iterations. -/// * `n` The maximum number of iterations. -/// * `f` The parser to apply. -/// * `init` A function returning the initial value. -/// * `g` The function that combines a result of `f` with -/// the current accumulator. -/// -/// **Warning:** If the parser passed to `many1` accepts empty inputs -/// (like `alpha0` or `digit0`), `many1` will return an error, -/// to prevent going into an infinite loop. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::multi::fold_many_m_n; -/// use winnow::bytes::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_many_m_n( -/// 0, -/// 2, -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_next(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); -/// ``` -pub fn fold_many_m_n<I, O, E, F, G, H, R>( - min: usize, - max: usize, - mut parse: F, - mut init: H, - mut fold: G, -) -> impl Parser<I, R, E> -where - I: Stream, - F: Parser<I, O, E>, - G: FnMut(R, O) -> R, - H: FnMut() -> R, - E: ParseError<I>, -{ - trace("fold_many_m_n", move |mut input: I| { - if min > max { - return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); - } - - let mut acc = init(); - for count in 0..max { - let len = input.eof_offset(); - match parse.parse_next(input.clone()) { - Ok((tail, value)) => { - // infinite loop check: the parser must always consume - if tail.eof_offset() == len { - return Err(ErrMode::assert(input, "many parsers must always consume")); - } - - acc = fold(acc, value); - input = tail; - } - //FInputXMError: handle failure properly - Err(ErrMode::Backtrack(err)) => { - if count < min { - return Err(ErrMode::Backtrack(err.append(input, ErrorKind::Many))); - } else { - break; - } - } - Err(e) => return Err(e), - } - } - - Ok((input, acc)) - }) -} - -/// Gets a number from the parser and returns a -/// subslice of the input of that size. -/// -/// *Complete version*: Returns an error if there is not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there is not enough data. -/// -/// # Arguments -/// * `f` The parser to apply. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed, stream::Partial}; -/// # use winnow::prelude::*; -/// use winnow::Bytes; -/// use winnow::number::be_u16; -/// use winnow::multi::length_data; -/// use winnow::bytes::tag; -/// -/// type Stream<'i> = Partial<&'i Bytes>; -/// -/// fn stream(b: &[u8]) -> Stream<'_> { -/// Partial::new(Bytes::new(b)) -/// } -/// -/// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { -/// length_data(be_u16).parse_next(s) -/// } -/// -/// assert_eq!(parser(stream(b"\x00\x03abcefg")), Ok((stream(&b"efg"[..]), &b"abc"[..]))); -/// assert_eq!(parser(stream(b"\x00\x03a")), Err(ErrMode::Incomplete(Needed::new(2)))); -/// ``` -pub fn length_data<I, N, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E> -where - I: StreamIsPartial, - I: Stream, - N: ToUsize, - F: Parser<I, N, E>, - E: ParseError<I>, -{ - trace("length_data", move |i: I| { - let (i, length) = f.parse_next(i)?; - - crate::bytes::take(length).parse_next(i) - }) -} - -/// Gets a number from the first parser, -/// takes a subslice of the input of that size, -/// then applies the second parser on that subslice. -/// If the second parser returns `Incomplete`, -/// `length_value` will return an error. -/// -/// *Complete version*: Returns an error if there is not enough input data. -/// -/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there is not enough data. -/// -/// # Arguments -/// * `f` The parser to apply. -/// * `g` The parser to apply on the subslice. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed, stream::{Partial, StreamIsPartial}}; -/// # use winnow::prelude::*; -/// use winnow::Bytes; -/// use winnow::number::be_u16; -/// use winnow::multi::length_value; -/// use winnow::bytes::tag; -/// -/// type Stream<'i> = Partial<&'i Bytes>; -/// -/// fn stream(b: &[u8]) -> Stream<'_> { -/// Partial::new(Bytes::new(b)) -/// } -/// -/// fn complete_stream(b: &[u8]) -> Stream<'_> { -/// let mut p = Partial::new(Bytes::new(b)); -/// let _ = p.complete(); -/// p -/// } -/// -/// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { -/// length_value(be_u16, "abc").parse_next(s) -/// } -/// -/// assert_eq!(parser(stream(b"\x00\x03abcefg")), Ok((stream(&b"efg"[..]), &b"abc"[..]))); -/// assert_eq!(parser(stream(b"\x00\x03123123")), Err(ErrMode::Backtrack(Error::new(complete_stream(&b"123"[..]), ErrorKind::Tag)))); -/// assert_eq!(parser(stream(b"\x00\x03a")), Err(ErrMode::Incomplete(Needed::new(2)))); -/// ``` -pub fn length_value<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, O, E> -where - I: StreamIsPartial, - I: Stream + UpdateSlice, - N: ToUsize, - F: Parser<I, N, E>, - G: Parser<I, O, E>, - E: ParseError<I>, -{ - trace("length_value", move |i: I| { - let (i, data) = length_data(f.by_ref()).parse_next(i)?; - let mut data = I::update_slice(i.clone(), data); - let _ = data.complete(); - let (_, o) = g.by_ref().complete_err().parse_next(data)?; - Ok((i, o)) - }) -} - -/// Gets a number from the first parser, -/// then applies the second parser that many times. -/// -/// # Arguments -/// * `f` The parser to apply to obtain the count. -/// * `g` The parser to apply repeatedly. -/// -/// # Example -/// -#[cfg_attr(not(feature = "std"), doc = "```ignore")] -#[cfg_attr(feature = "std", doc = "```")] -/// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::Bytes; -/// use winnow::number::u8; -/// use winnow::multi::length_count; -/// use winnow::bytes::tag; -/// -/// type Stream<'i> = &'i Bytes; -/// -/// fn stream(b: &[u8]) -> Stream<'_> { -/// Bytes::new(b) -/// } -/// -/// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, Vec<&[u8]>> { -/// length_count(u8.map(|i| { -/// println!("got number: {}", i); -/// i -/// }), "abc").parse_next(s) -/// } -/// -/// assert_eq!(parser(stream(b"\x02abcabcabc")), Ok((stream(b"abc"), vec![&b"abc"[..], &b"abc"[..]]))); -/// assert_eq!(parser(stream(b"\x03123123123")), Err(ErrMode::Backtrack(Error::new(stream(b"123123123"), ErrorKind::Tag)))); -/// ``` -pub fn length_count<I, O, C, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, C, E> -where - I: Stream, - N: ToUsize, - C: Accumulate<O>, - F: Parser<I, N, E>, - G: Parser<I, O, E>, - E: ParseError<I>, -{ - trace("length_count", move |i: I| { - let (i, n) = f.parse_next(i)?; - let n = n.to_usize(); - count(g.by_ref(), n).parse_next(i) - }) -} diff --git a/vendor/winnow/src/multi/tests.rs b/vendor/winnow/src/multi/tests.rs deleted file mode 100644 index 1977ff9..0000000 --- a/vendor/winnow/src/multi/tests.rs +++ /dev/null @@ -1,743 +0,0 @@ -use super::{length_data, length_value, many0, many1}; -use crate::Parser; -use crate::Partial; -use crate::{ - character::digit1 as digit, - error::{ErrMode, ErrorKind, Needed, ParseError}, - lib::std::str::{self, FromStr}, - number::{be_u16, be_u8}, - IResult, -}; -#[cfg(feature = "alloc")] -use crate::{ - lib::std::vec::Vec, - multi::{ - count, fold_many0, fold_many1, fold_many_m_n, length_count, many_m_n, many_till0, - separated0, separated1, - }, -}; - -#[test] -#[cfg(feature = "alloc")] -fn separated0_test() { - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abcd", ",").parse_next(i) - } - fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("", ",").parse_next(i) - } - fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abcd", "..").parse_next(i) - } - - let a = &b"abcdef"[..]; - let b = &b"abcd,abcdef"[..]; - let c = &b"azerty"[..]; - let d = &b",,abc"[..]; - let e = &b"abcd,abcd,ef"[..]; - let f = &b"abc"[..]; - let g = &b"abcd."[..]; - let h = &b"abcd,abc"[..]; - - let res1 = vec![&b"abcd"[..]]; - assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); - let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; - assert_eq!(multi(Partial::new(b)), Ok((Partial::new(&b"ef"[..]), res2))); - assert_eq!( - multi(Partial::new(c)), - Ok((Partial::new(&b"azerty"[..]), Vec::new())) - ); - let res3 = vec![&b""[..], &b""[..], &b""[..]]; - assert_eq!( - multi_empty(Partial::new(d)), - Ok((Partial::new(&b"abc"[..]), res3)) - ); - let res4 = vec![&b"abcd"[..], &b"abcd"[..]]; - assert_eq!( - multi(Partial::new(e)), - Ok((Partial::new(&b",ef"[..]), res4)) - ); - - assert_eq!( - multi(Partial::new(f)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - multi_longsep(Partial::new(g)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - multi(Partial::new(h)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -#[cfg_attr(debug_assertions, should_panic)] -fn separated0_empty_sep_test() { - fn empty_sep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abc", "").parse_next(i) - } - - let i = &b"abcabc"[..]; - - let i_err_pos = &i[3..]; - assert_eq!( - empty_sep(Partial::new(i)), - Err(ErrMode::Backtrack(error_position!( - Partial::new(i_err_pos), - ErrorKind::Assert - ))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn separated1_test() { - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated1("abcd", ",").parse_next(i) - } - fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated1("abcd", "..").parse_next(i) - } - - let a = &b"abcdef"[..]; - let b = &b"abcd,abcdef"[..]; - let c = &b"azerty"[..]; - let d = &b"abcd,abcd,ef"[..]; - - let f = &b"abc"[..]; - let g = &b"abcd."[..]; - let h = &b"abcd,abc"[..]; - - let res1 = vec![&b"abcd"[..]]; - assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); - let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; - assert_eq!(multi(Partial::new(b)), Ok((Partial::new(&b"ef"[..]), res2))); - assert_eq!( - multi(Partial::new(c)), - Err(ErrMode::Backtrack(error_position!( - Partial::new(c), - ErrorKind::Tag - ))) - ); - let res3 = vec![&b"abcd"[..], &b"abcd"[..]]; - assert_eq!( - multi(Partial::new(d)), - Ok((Partial::new(&b",ef"[..]), res3)) - ); - - assert_eq!( - multi(Partial::new(f)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - multi_longsep(Partial::new(g)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - multi(Partial::new(h)), - Err(ErrMode::Incomplete(Needed::new(1))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn many0_test() { - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - many0("abcd").parse_next(i) - } - - assert_eq!( - multi(Partial::new(&b"abcdef"[..])), - Ok((Partial::new(&b"ef"[..]), vec![&b"abcd"[..]])) - ); - assert_eq!( - multi(Partial::new(&b"abcdabcdefgh"[..])), - Ok((Partial::new(&b"efgh"[..]), vec![&b"abcd"[..], &b"abcd"[..]])) - ); - assert_eq!( - multi(Partial::new(&b"azerty"[..])), - Ok((Partial::new(&b"azerty"[..]), Vec::new())) - ); - assert_eq!( - multi(Partial::new(&b"abcdab"[..])), - Err(ErrMode::Incomplete(Needed::new(2))) - ); - assert_eq!( - multi(Partial::new(&b"abcd"[..])), - Err(ErrMode::Incomplete(Needed::new(4))) - ); - assert_eq!( - multi(Partial::new(&b""[..])), - Err(ErrMode::Incomplete(Needed::new(4))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -#[cfg_attr(debug_assertions, should_panic)] -fn many0_empty_test() { - fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - many0("").parse_next(i) - } - - assert_eq!( - multi_empty(Partial::new(&b"abcdef"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"abcdef"[..]), - ErrorKind::Assert - ))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn many1_test() { - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - many1("abcd").parse_next(i) - } - - let a = &b"abcdef"[..]; - let b = &b"abcdabcdefgh"[..]; - let c = &b"azerty"[..]; - let d = &b"abcdab"[..]; - - let res1 = vec![&b"abcd"[..]]; - assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); - let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; - assert_eq!( - multi(Partial::new(b)), - Ok((Partial::new(&b"efgh"[..]), res2)) - ); - assert_eq!( - multi(Partial::new(c)), - Err(ErrMode::Backtrack(error_position!( - Partial::new(c), - ErrorKind::Tag - ))) - ); - assert_eq!( - multi(Partial::new(d)), - Err(ErrMode::Incomplete(Needed::new(2))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn many_till_test() { - #[allow(clippy::type_complexity)] - fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { - many_till0("abcd", "efgh").parse_next(i) - } - - let a = b"abcdabcdefghabcd"; - let b = b"efghabcd"; - let c = b"azerty"; - - let res_a = (vec![&b"abcd"[..], &b"abcd"[..]], &b"efgh"[..]); - let res_b: (Vec<&[u8]>, &[u8]) = (Vec::new(), &b"efgh"[..]); - assert_eq!(multi(&a[..]), Ok((&b"abcd"[..], res_a))); - assert_eq!(multi(&b[..]), Ok((&b"abcd"[..], res_b))); - assert_eq!( - multi(&c[..]), - Err(ErrMode::Backtrack(error_node_position!( - &c[..], - ErrorKind::Many, - error_position!(&c[..], ErrorKind::Tag) - ))) - ); -} - -#[test] -#[cfg(feature = "std")] -fn infinite_many() { - fn tst(input: &[u8]) -> IResult<&[u8], &[u8]> { - println!("input: {:?}", input); - Err(ErrMode::Backtrack(error_position!(input, ErrorKind::Tag))) - } - - // should not go into an infinite loop - fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { - many0(tst).parse_next(i) - } - let a = &b"abcdef"[..]; - assert_eq!(multi0(a), Ok((a, Vec::new()))); - - fn multi1(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { - many1(tst).parse_next(i) - } - let a = &b"abcdef"[..]; - assert_eq!( - multi1(a), - Err(ErrMode::Backtrack(error_position!(a, ErrorKind::Tag))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn many_m_n_test() { - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - many_m_n(2, 4, "Abcd").parse_next(i) - } - - let a = &b"Abcdef"[..]; - let b = &b"AbcdAbcdefgh"[..]; - let c = &b"AbcdAbcdAbcdAbcdefgh"[..]; - let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..]; - let e = &b"AbcdAb"[..]; - - assert_eq!( - multi(Partial::new(a)), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"ef"[..]), - ErrorKind::Tag - ))) - ); - let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]]; - assert_eq!( - multi(Partial::new(b)), - Ok((Partial::new(&b"efgh"[..]), res1)) - ); - let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; - assert_eq!( - multi(Partial::new(c)), - Ok((Partial::new(&b"efgh"[..]), res2)) - ); - let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; - assert_eq!( - multi(Partial::new(d)), - Ok((Partial::new(&b"Abcdefgh"[..]), res3)) - ); - assert_eq!( - multi(Partial::new(e)), - Err(ErrMode::Incomplete(Needed::new(2))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn count_test() { - const TIMES: usize = 2; - fn cnt_2(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - count("abc", TIMES).parse_next(i) - } - - assert_eq!( - cnt_2(Partial::new(&b"abcabcabcdef"[..])), - Ok((Partial::new(&b"abcdef"[..]), vec![&b"abc"[..], &b"abc"[..]])) - ); - assert_eq!( - cnt_2(Partial::new(&b"ab"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - cnt_2(Partial::new(&b"abcab"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - cnt_2(Partial::new(&b"xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - cnt_2(Partial::new(&b"xxxabcabcdef"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxxabcabcdef"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - cnt_2(Partial::new(&b"abcxxxabcdef"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxxabcdef"[..]), - ErrorKind::Tag - ))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn count_zero() { - const TIMES: usize = 0; - fn counter_2(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { - count("abc", TIMES).parse_next(i) - } - - let done = &b"abcabcabcdef"[..]; - let parsed_done = Vec::new(); - let rest = done; - let incomplete_1 = &b"ab"[..]; - let parsed_incompl_1 = Vec::new(); - let incomplete_2 = &b"abcab"[..]; - let parsed_incompl_2 = Vec::new(); - let error = &b"xxx"[..]; - let error_remain = &b"xxx"[..]; - let parsed_err = Vec::new(); - let error_1 = &b"xxxabcabcdef"[..]; - let parsed_err_1 = Vec::new(); - let error_1_remain = &b"xxxabcabcdef"[..]; - let error_2 = &b"abcxxxabcdef"[..]; - let parsed_err_2 = Vec::new(); - let error_2_remain = &b"abcxxxabcdef"[..]; - - assert_eq!(counter_2(done), Ok((rest, parsed_done))); - assert_eq!( - counter_2(incomplete_1), - Ok((incomplete_1, parsed_incompl_1)) - ); - assert_eq!( - counter_2(incomplete_2), - Ok((incomplete_2, parsed_incompl_2)) - ); - assert_eq!(counter_2(error), Ok((error_remain, parsed_err))); - assert_eq!(counter_2(error_1), Ok((error_1_remain, parsed_err_1))); - assert_eq!(counter_2(error_2), Ok((error_2_remain, parsed_err_2))); -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct NilError; - -impl<I> From<(I, ErrorKind)> for NilError { - fn from(_: (I, ErrorKind)) -> Self { - NilError - } -} - -impl<I> ParseError<I> for NilError { - fn from_error_kind(_: I, _: ErrorKind) -> NilError { - NilError - } - fn append(self, _: I, _: ErrorKind) -> NilError { - NilError - } -} - -#[test] -#[cfg(feature = "alloc")] -fn length_count_test() { - fn number(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { - digit - .map_res(str::from_utf8) - .map_res(FromStr::from_str) - .parse_next(i) - } - - fn cnt(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - length_count(number, "abc").parse_next(i) - } - - assert_eq!( - cnt(Partial::new(&b"2abcabcabcdef"[..])), - Ok((Partial::new(&b"abcdef"[..]), vec![&b"abc"[..], &b"abc"[..]])) - ); - assert_eq!( - cnt(Partial::new(&b"2ab"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - cnt(Partial::new(&b"3abcab"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - cnt(Partial::new(&b"xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Slice - ))) - ); - assert_eq!( - cnt(Partial::new(&b"2abcxxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); -} - -#[test] -fn length_data_test() { - fn number(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { - digit - .map_res(str::from_utf8) - .map_res(FromStr::from_str) - .parse_next(i) - } - - fn take(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - length_data(number).parse_next(i) - } - - assert_eq!( - take(Partial::new(&b"6abcabcabcdef"[..])), - Ok((Partial::new(&b"abcdef"[..]), &b"abcabc"[..])) - ); - assert_eq!( - take(Partial::new(&b"3ab"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - take(Partial::new(&b"xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Slice - ))) - ); - assert_eq!( - take(Partial::new(&b"2abcxxx"[..])), - Ok((Partial::new(&b"cxxx"[..]), &b"ab"[..])) - ); -} - -#[test] -fn length_value_test() { - use crate::stream::StreamIsPartial; - - fn length_value_1(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { - length_value(be_u8, be_u16).parse_next(i) - } - fn length_value_2(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (u8, u8)> { - length_value(be_u8, (be_u8, be_u8)).parse_next(i) - } - - let mut empty_complete = Partial::new(&b""[..]); - let _ = empty_complete.complete(); - - let i1 = [0, 5, 6]; - assert_eq!( - length_value_1(Partial::new(&i1)), - Err(ErrMode::Backtrack(error_position!( - empty_complete, - ErrorKind::Slice - ))) - ); - assert_eq!( - length_value_2(Partial::new(&i1)), - Err(ErrMode::Backtrack(error_position!( - empty_complete, - ErrorKind::Token - ))) - ); - - let i2 = [1, 5, 6, 3]; - { - let mut middle_complete = Partial::new(&i2[1..2]); - let _ = middle_complete.complete(); - assert_eq!( - length_value_1(Partial::new(&i2)), - Err(ErrMode::Backtrack(error_position!( - middle_complete, - ErrorKind::Slice - ))) - ); - assert_eq!( - length_value_2(Partial::new(&i2)), - Err(ErrMode::Backtrack(error_position!( - empty_complete, - ErrorKind::Token - ))) - ); - } - - let i3 = [2, 5, 6, 3, 4, 5, 7]; - assert_eq!( - length_value_1(Partial::new(&i3)), - Ok((Partial::new(&i3[3..]), 1286)) - ); - assert_eq!( - length_value_2(Partial::new(&i3)), - Ok((Partial::new(&i3[3..]), (5, 6))) - ); - - let i4 = [3, 5, 6, 3, 4, 5]; - assert_eq!( - length_value_1(Partial::new(&i4)), - Ok((Partial::new(&i4[4..]), 1286)) - ); - assert_eq!( - length_value_2(Partial::new(&i4)), - Ok((Partial::new(&i4[4..]), (5, 6))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn fold_many0_test() { - fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { - acc.push(item); - acc - } - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_many0("abcd", Vec::new, fold_into_vec).parse_next(i) - } - - assert_eq!( - multi(Partial::new(&b"abcdef"[..])), - Ok((Partial::new(&b"ef"[..]), vec![&b"abcd"[..]])) - ); - assert_eq!( - multi(Partial::new(&b"abcdabcdefgh"[..])), - Ok((Partial::new(&b"efgh"[..]), vec![&b"abcd"[..], &b"abcd"[..]])) - ); - assert_eq!( - multi(Partial::new(&b"azerty"[..])), - Ok((Partial::new(&b"azerty"[..]), Vec::new())) - ); - assert_eq!( - multi(Partial::new(&b"abcdab"[..])), - Err(ErrMode::Incomplete(Needed::new(2))) - ); - assert_eq!( - multi(Partial::new(&b"abcd"[..])), - Err(ErrMode::Incomplete(Needed::new(4))) - ); - assert_eq!( - multi(Partial::new(&b""[..])), - Err(ErrMode::Incomplete(Needed::new(4))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -#[cfg_attr(debug_assertions, should_panic)] -fn fold_many0_empty_test() { - fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { - acc.push(item); - acc - } - fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_many0("", Vec::new, fold_into_vec).parse_next(i) - } - - assert_eq!( - multi_empty(Partial::new(&b"abcdef"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"abcdef"[..]), - ErrorKind::Assert - ))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn fold_many1_test() { - fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { - acc.push(item); - acc - } - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_many1("abcd", Vec::new, fold_into_vec).parse_next(i) - } - - let a = &b"abcdef"[..]; - let b = &b"abcdabcdefgh"[..]; - let c = &b"azerty"[..]; - let d = &b"abcdab"[..]; - - let res1 = vec![&b"abcd"[..]]; - assert_eq!(multi(Partial::new(a)), Ok((Partial::new(&b"ef"[..]), res1))); - let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; - assert_eq!( - multi(Partial::new(b)), - Ok((Partial::new(&b"efgh"[..]), res2)) - ); - assert_eq!( - multi(Partial::new(c)), - Err(ErrMode::Backtrack(error_position!( - Partial::new(c), - ErrorKind::Many - ))) - ); - assert_eq!( - multi(Partial::new(d)), - Err(ErrMode::Incomplete(Needed::new(2))) - ); -} - -#[test] -#[cfg(feature = "alloc")] -fn fold_many_m_n_test() { - fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { - acc.push(item); - acc - } - fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_many_m_n(2, 4, "Abcd", Vec::new, fold_into_vec).parse_next(i) - } - - let a = &b"Abcdef"[..]; - let b = &b"AbcdAbcdefgh"[..]; - let c = &b"AbcdAbcdAbcdAbcdefgh"[..]; - let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..]; - let e = &b"AbcdAb"[..]; - - assert_eq!( - multi(Partial::new(a)), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"ef"[..]), - ErrorKind::Tag - ))) - ); - let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]]; - assert_eq!( - multi(Partial::new(b)), - Ok((Partial::new(&b"efgh"[..]), res1)) - ); - let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; - assert_eq!( - multi(Partial::new(c)), - Ok((Partial::new(&b"efgh"[..]), res2)) - ); - let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; - assert_eq!( - multi(Partial::new(d)), - Ok((Partial::new(&b"Abcdefgh"[..]), res3)) - ); - assert_eq!( - multi(Partial::new(e)), - Err(ErrMode::Incomplete(Needed::new(2))) - ); -} - -#[test] -fn many0_count_test() { - fn count0_nums(i: &[u8]) -> IResult<&[u8], usize> { - many0((digit, ",")).parse_next(i) - } - - assert_eq!(count0_nums(&b"123,junk"[..]), Ok((&b"junk"[..], 1))); - - assert_eq!(count0_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2))); - - assert_eq!( - count0_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]), - Ok((&b"junk"[..], 10)) - ); - - assert_eq!(count0_nums(&b"hello"[..]), Ok((&b"hello"[..], 0))); -} - -#[test] -fn many1_count_test() { - fn count1_nums(i: &[u8]) -> IResult<&[u8], usize> { - many1((digit, ",")).parse_next(i) - } - - assert_eq!(count1_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2))); - - assert_eq!( - count1_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]), - Ok((&b"junk"[..], 10)) - ); - - assert_eq!( - count1_nums(&b"hello"[..]), - Err(ErrMode::Backtrack(error_position!( - &b"hello"[..], - ErrorKind::Slice - ))) - ); -} diff --git a/vendor/winnow/src/parser.rs b/vendor/winnow/src/parser.rs index 2641590..796ce02 100644 --- a/vendor/winnow/src/parser.rs +++ b/vendor/winnow/src/parser.rs @@ -1,8 +1,9 @@ //! Basic types to build the parsers +use crate::ascii::Caseless as AsciiCaseless; use crate::combinator::*; -use crate::error::{ContextError, FromExternalError, IResult, ParseError}; -use crate::stream::{AsChar, Compare, Location, Offset, ParseSlice, Stream, StreamIsPartial}; +use crate::error::{AddContext, FromExternalError, IResult, PResult, ParseError, ParserError}; +use crate::stream::{AsChar, Compare, Location, ParseSlice, Stream, StreamIsPartial}; /// Core trait for parsing /// @@ -10,12 +11,12 @@ use crate::stream::{AsChar, Compare, Location, Offset, ParseSlice, Stream, Strea /// ```rust /// use winnow::prelude::*; /// -/// fn success(input: &str) -> IResult<&str, ()> { +/// fn success(input: &mut &str) -> PResult<()> { /// let output = (); -/// Ok((input, output)) +/// Ok(output) /// } /// -/// let (input, output) = success.parse_next("Hello").unwrap(); +/// let (input, output) = success.parse_peek("Hello").unwrap(); /// assert_eq!(input, "Hello"); // We didn't consume any input /// ``` /// @@ -23,40 +24,67 @@ use crate::stream::{AsChar, Compare, Location, Offset, ParseSlice, Stream, Strea /// ```rust /// use winnow::prelude::*; /// -/// fn success<O: Clone>(output: O) -> impl FnMut(&str) -> IResult<&str, O> { -/// move |input: &str| { +/// fn success<O: Clone>(output: O) -> impl FnMut(&mut &str) -> PResult<O> { +/// move |input: &mut &str| { /// let output = output.clone(); -/// Ok((input, output)) +/// Ok(output) /// } /// } /// -/// let (input, output) = success("World").parse_next("Hello").unwrap(); +/// let (input, output) = success("World").parse_peek("Hello").unwrap(); /// assert_eq!(input, "Hello"); // We didn't consume any input /// assert_eq!(output, "World"); /// ``` /// /// Additionally, some basic types implement `Parser` as well, including -/// - `u8` and `char`, see [`winnow::bytes::one_of`][crate::bytes::one_of] -/// - `&[u8]` and `&str`, see [`winnow::bytes::tag`][crate::bytes::tag] +/// - `u8` and `char`, see [`winnow::token::one_of`][crate::token::one_of] +/// - `&[u8]` and `&str`, see [`winnow::token::tag`][crate::token::tag] pub trait Parser<I, O, E> { /// Parse all of `input`, generating `O` from it - fn parse(&mut self, input: I) -> Result<O, E> + #[inline] + fn parse(&mut self, mut input: I) -> Result<O, ParseError<I, E>> where + Self: core::marker::Sized, I: Stream, // Force users to deal with `Incomplete` when `StreamIsPartial<true>` I: StreamIsPartial, I: Clone, - E: ParseError<I>, + E: ParserError<I>, { - #![allow(deprecated)] - use crate::error::FinishIResult; - self.parse_next(input).finish() + debug_assert!( + !I::is_partial_supported(), + "partial streams need to handle `ErrMode::Incomplete`" + ); + + let start = input.checkpoint(); + let (o, _) = (self.by_ref(), crate::combinator::eof) + .parse_next(&mut input) + .map_err(|e| { + let e = e + .into_inner() + .expect("complete parsers should not report `ErrMode::Incomplete(_)`"); + ParseError::new(input, start, e) + })?; + Ok(o) } /// Take tokens from the [`Stream`], turning it into the output /// /// This includes advancing the [`Stream`] to the next location. - fn parse_next(&mut self, input: I) -> IResult<I, O, E>; + /// + /// On error, `input` will be left pointing at the error location. + fn parse_next(&mut self, input: &mut I) -> PResult<O, E>; + + /// Take tokens from the [`Stream`], turning it into the output + /// + /// This includes advancing the [`Stream`] to the next location. + #[inline(always)] + fn parse_peek(&mut self, mut input: I) -> IResult<I, O, E> { + match self.parse_next(&mut input) { + Ok(o) => Ok((input, o)), + Err(err) => Err(err), + } + } /// Treat `&mut Self` as a parser /// @@ -64,23 +92,22 @@ pub trait Parser<I, O, E> { /// /// # Example /// - /// Because parsers are `FnMut`, they can be called multiple times. This prevents moving `f` - /// into [`length_data`][crate::multi::length_data] and `g` into + /// Because parsers are `FnMut`, they can be called multiple times. This prevents moving `f` + /// into [`length_data`][crate::binary::length_data] and `g` into /// [`Parser::complete_err`]: /// ```rust,compile_fail /// # use winnow::prelude::*; - /// # use winnow::IResult; /// # use winnow::Parser; - /// # use winnow::error::ParseError; - /// # use winnow::multi::length_data; - /// pub fn length_value<'i, O, E: ParseError<&'i [u8]>>( + /// # use winnow::error::ParserError; + /// # use winnow::binary::length_data; + /// pub fn length_value<'i, O, E: ParserError<&'i [u8]>>( /// mut f: impl Parser<&'i [u8], usize, E>, /// mut g: impl Parser<&'i [u8], O, E> - /// ) -> impl FnMut(&'i [u8]) -> IResult<&'i [u8], O, E> { - /// move |i: &'i [u8]| { - /// let (i, data) = length_data(f).parse_next(i)?; - /// let (_, o) = g.complete().parse_next(data)?; - /// Ok((i, o)) + /// ) -> impl Parser<&'i [u8], O, E> { + /// move |i: &mut &'i [u8]| { + /// let mut data = length_data(f).parse_next(i)?; + /// let o = g.complete_err().parse_next(&mut data)?; + /// Ok(o) /// } /// } /// ``` @@ -88,21 +115,21 @@ pub trait Parser<I, O, E> { /// By adding `by_ref`, we can make this work: /// ```rust /// # use winnow::prelude::*; - /// # use winnow::IResult; /// # use winnow::Parser; - /// # use winnow::error::ParseError; - /// # use winnow::multi::length_data; - /// pub fn length_value<'i, O, E: ParseError<&'i [u8]>>( + /// # use winnow::error::ParserError; + /// # use winnow::binary::length_data; + /// pub fn length_value<'i, O, E: ParserError<&'i [u8]>>( /// mut f: impl Parser<&'i [u8], usize, E>, /// mut g: impl Parser<&'i [u8], O, E> - /// ) -> impl FnMut(&'i [u8]) -> IResult<&'i [u8], O, E> { - /// move |i: &'i [u8]| { - /// let (i, data) = length_data(f.by_ref()).parse_next(i)?; - /// let (_, o) = g.by_ref().complete_err().parse_next(data)?; - /// Ok((i, o)) + /// ) -> impl Parser<&'i [u8], O, E> { + /// move |i: &mut &'i [u8]| { + /// let mut data = length_data(f.by_ref()).parse_next(i)?; + /// let o = g.by_ref().complete_err().parse_next(&mut data)?; + /// Ok(o) /// } /// } /// ``` + #[inline(always)] fn by_ref(&mut self) -> ByRef<'_, Self> where Self: core::marker::Sized, @@ -115,17 +142,18 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::character::alpha1; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::alpha1; /// # fn main() { /// /// let mut parser = alpha1.value(1234); /// - /// assert_eq!(parser.parse_next("abcd"), Ok(("", 1234))); - /// assert_eq!(parser.parse_next("123abcd;"), Err(ErrMode::Backtrack(Error::new("123abcd;", ErrorKind::Slice)))); + /// assert_eq!(parser.parse_peek("abcd"), Ok(("", 1234))); + /// assert_eq!(parser.parse_peek("123abcd;"), Err(ErrMode::Backtrack(InputError::new("123abcd;", ErrorKind::Slice)))); /// # } /// ``` #[doc(alias = "to")] + #[inline(always)] fn value<O2>(self, val: O2) -> Value<Self, I, O, O2, E> where Self: core::marker::Sized, @@ -134,21 +162,46 @@ pub trait Parser<I, O, E> { Value::new(self, val) } + /// Produce a type's default value + /// + /// # Example + /// + /// ```rust + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::alpha1; + /// # fn main() { + /// + /// let mut parser = alpha1.default_value::<u32>(); + /// + /// assert_eq!(parser.parse_peek("abcd"), Ok(("", 0))); + /// assert_eq!(parser.parse_peek("123abcd;"), Err(ErrMode::Backtrack(InputError::new("123abcd;", ErrorKind::Slice)))); + /// # } + /// ``` + #[inline(always)] + fn default_value<O2>(self) -> DefaultValue<Self, I, O, O2, E> + where + Self: core::marker::Sized, + O2: core::default::Default, + { + DefaultValue::new(self) + } + /// Discards the output of the `Parser` /// /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::character::alpha1; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::alpha1; /// # fn main() { /// /// let mut parser = alpha1.void(); /// - /// assert_eq!(parser.parse_next("abcd"), Ok(("", ()))); - /// assert_eq!(parser.parse_next("123abcd;"), Err(ErrMode::Backtrack(Error::new("123abcd;", ErrorKind::Slice)))); + /// assert_eq!(parser.parse_peek("abcd"), Ok(("", ()))); + /// assert_eq!(parser.parse_peek("123abcd;"), Err(ErrMode::Backtrack(InputError::new("123abcd;", ErrorKind::Slice)))); /// # } /// ``` + #[inline(always)] fn void(self) -> Void<Self, I, O, E> where Self: core::marker::Sized, @@ -161,22 +214,23 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::IResult; - /// # use winnow::Parser; - /// use winnow::character::alpha1; + /// # use winnow::prelude::*; + /// # use winnow::error::InputError; + /// use winnow::ascii::alpha1; /// # fn main() { /// - /// fn parser1(i: &str) -> IResult<&str, &str> { - /// alpha1(i) - /// } + /// fn parser1<'s>(i: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { + /// alpha1(i) + /// } /// - /// let mut parser2 = parser1.output_into(); + /// let mut parser2 = parser1.output_into(); /// /// // the parser converts the &str output of the child parser into a Vec<u8> - /// let bytes: IResult<&str, Vec<u8>> = parser2.parse_next("abcd"); + /// let bytes: IResult<&str, Vec<u8>> = parser2.parse_peek("abcd"); /// assert_eq!(bytes, Ok(("", vec![97, 98, 99, 100]))); /// # } /// ``` + #[inline(always)] fn output_into<O2>(self) -> OutputInto<Self, I, O, O2, E> where Self: core::marker::Sized, @@ -190,22 +244,23 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::character::{alpha1}; - /// use winnow::sequence::separated_pair; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::{alpha1}; + /// use winnow::combinator::separated_pair; /// # fn main() { /// /// let mut parser = separated_pair(alpha1, ',', alpha1).recognize(); /// - /// assert_eq!(parser.parse_next("abcd,efgh"), Ok(("", "abcd,efgh"))); - /// assert_eq!(parser.parse_next("abcd;"),Err(ErrMode::Backtrack(Error::new(";", ErrorKind::Verify)))); + /// assert_eq!(parser.parse_peek("abcd,efgh"), Ok(("", "abcd,efgh"))); + /// assert_eq!(parser.parse_peek("abcd;"),Err(ErrMode::Backtrack(InputError::new(";", ErrorKind::Verify)))); /// # } /// ``` #[doc(alias = "concat")] + #[inline(always)] fn recognize(self) -> Recognize<Self, I, O, E> where Self: core::marker::Sized, - I: Stream + Offset, + I: Stream, { Recognize::new(self) } @@ -224,36 +279,34 @@ pub trait Parser<I, O, E> { /// /// ```rust /// # use winnow::prelude::*; - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult}; - /// use winnow::character::{alpha1}; - /// use winnow::bytes::tag; - /// use winnow::sequence::separated_pair; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError}; + /// use winnow::ascii::{alpha1}; + /// use winnow::token::tag; + /// use winnow::combinator::separated_pair; /// - /// fn inner_parser(input: &str) -> IResult<&str, bool> { + /// fn inner_parser<'s>(input: &mut &'s str) -> PResult<bool, InputError<&'s str>> { /// "1234".value(true).parse_next(input) /// } /// - /// # fn main() { - /// /// let mut consumed_parser = separated_pair(alpha1, ',', alpha1).value(true).with_recognized(); /// - /// assert_eq!(consumed_parser.parse_next("abcd,efgh1"), Ok(("1", (true, "abcd,efgh")))); - /// assert_eq!(consumed_parser.parse_next("abcd;"),Err(ErrMode::Backtrack(Error::new(";", ErrorKind::Verify)))); + /// assert_eq!(consumed_parser.parse_peek("abcd,efgh1"), Ok(("1", (true, "abcd,efgh")))); + /// assert_eq!(consumed_parser.parse_peek("abcd;"),Err(ErrMode::Backtrack(InputError::new(";", ErrorKind::Verify)))); /// /// // the second output (representing the consumed input) /// // should be the same as that of the `recognize` parser. /// let mut recognize_parser = inner_parser.recognize(); /// let mut consumed_parser = inner_parser.with_recognized().map(|(output, consumed)| consumed); /// - /// assert_eq!(recognize_parser.parse_next("1234"), consumed_parser.parse_next("1234")); - /// assert_eq!(recognize_parser.parse_next("abcd"), consumed_parser.parse_next("abcd")); - /// # } + /// assert_eq!(recognize_parser.parse_peek("1234"), consumed_parser.parse_peek("1234")); + /// assert_eq!(recognize_parser.parse_peek("abcd"), consumed_parser.parse_peek("abcd")); /// ``` #[doc(alias = "consumed")] + #[inline(always)] fn with_recognized(self) -> WithRecognized<Self, I, O, E> where Self: core::marker::Sized, - I: Stream + Offset, + I: Stream, { WithRecognized::new(self) } @@ -264,16 +317,17 @@ pub trait Parser<I, O, E> { /// /// ```rust /// # use winnow::prelude::*; - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, stream::Stream}; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, stream::Stream}; /// use winnow::stream::Located; - /// use winnow::character::alpha1; - /// use winnow::sequence::separated_pair; + /// use winnow::ascii::alpha1; + /// use winnow::combinator::separated_pair; /// /// let mut parser = separated_pair(alpha1.span(), ',', alpha1.span()); /// /// assert_eq!(parser.parse(Located::new("abcd,efgh")), Ok((0..4, 5..9))); - /// assert_eq!(parser.parse_next(Located::new("abcd;")),Err(ErrMode::Backtrack(Error::new(Located::new("abcd;").next_slice(4).0, ErrorKind::Verify)))); + /// assert_eq!(parser.parse_peek(Located::new("abcd;")),Err(ErrMode::Backtrack(InputError::new(Located::new("abcd;").peek_slice(4).0, ErrorKind::Verify)))); /// ``` + #[inline(always)] fn span(self) -> Span<Self, I, O, E> where Self: core::marker::Sized, @@ -296,13 +350,13 @@ pub trait Parser<I, O, E> { /// /// ```rust /// # use winnow::prelude::*; - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, stream::Stream}; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, stream::Stream}; /// use winnow::stream::Located; - /// use winnow::character::alpha1; - /// use winnow::bytes::tag; - /// use winnow::sequence::separated_pair; + /// use winnow::ascii::alpha1; + /// use winnow::token::tag; + /// use winnow::combinator::separated_pair; /// - /// fn inner_parser(input: Located<&str>) -> IResult<Located<&str>, bool> { + /// fn inner_parser<'s>(input: &mut Located<&'s str>) -> PResult<bool, InputError<Located<&'s str>>> { /// "1234".value(true).parse_next(input) /// } /// @@ -311,17 +365,18 @@ pub trait Parser<I, O, E> { /// let mut consumed_parser = separated_pair(alpha1.value(1).with_span(), ',', alpha1.value(2).with_span()); /// /// assert_eq!(consumed_parser.parse(Located::new("abcd,efgh")), Ok(((1, 0..4), (2, 5..9)))); - /// assert_eq!(consumed_parser.parse_next(Located::new("abcd;")),Err(ErrMode::Backtrack(Error::new(Located::new("abcd;").next_slice(4).0, ErrorKind::Verify)))); + /// assert_eq!(consumed_parser.parse_peek(Located::new("abcd;")),Err(ErrMode::Backtrack(InputError::new(Located::new("abcd;").peek_slice(4).0, ErrorKind::Verify)))); /// /// // the second output (representing the consumed input) /// // should be the same as that of the `span` parser. /// let mut recognize_parser = inner_parser.span(); /// let mut consumed_parser = inner_parser.with_span().map(|(output, consumed)| consumed); /// - /// assert_eq!(recognize_parser.parse_next(Located::new("1234")), consumed_parser.parse_next(Located::new("1234"))); - /// assert_eq!(recognize_parser.parse_next(Located::new("abcd")), consumed_parser.parse_next(Located::new("abcd"))); + /// assert_eq!(recognize_parser.parse_peek(Located::new("1234")), consumed_parser.parse_peek(Located::new("1234"))); + /// assert_eq!(recognize_parser.parse_peek(Located::new("abcd")), consumed_parser.parse_peek(Located::new("abcd"))); /// # } /// ``` + #[inline(always)] fn with_span(self) -> WithSpan<Self, I, O, E> where Self: core::marker::Sized, @@ -335,22 +390,23 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult,Parser}; - /// use winnow::character::digit1; + /// use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::digit1; /// # fn main() { /// /// let mut parser = digit1.map(|s: &str| s.len()); /// /// // the parser will count how many characters were returned by digit1 - /// assert_eq!(parser.parse_next("123456"), Ok(("", 6))); + /// assert_eq!(parser.parse_peek("123456"), Ok(("", 6))); /// /// // this will fail if digit1 fails - /// assert_eq!(parser.parse_next("abc"), Err(ErrMode::Backtrack(Error::new("abc", ErrorKind::Slice)))); + /// assert_eq!(parser.parse_peek("abc"), Err(ErrMode::Backtrack(InputError::new("abc", ErrorKind::Slice)))); /// # } /// ``` + #[inline(always)] fn map<G, O2>(self, map: G) -> Map<Self, G, I, O, O2, E> where - G: Fn(O) -> O2, + G: FnMut(O) -> O2, Self: core::marker::Sized, { Map::new(self, map) @@ -361,30 +417,31 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::character::digit1; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::digit1; /// # fn main() { /// - /// let mut parse = digit1.map_res(|s: &str| s.parse::<u8>()); + /// let mut parse = digit1.try_map(|s: &str| s.parse::<u8>()); /// /// // the parser will convert the result of digit1 to a number - /// assert_eq!(parse.parse_next("123"), Ok(("", 123))); + /// assert_eq!(parse.parse_peek("123"), Ok(("", 123))); /// /// // this will fail if digit1 fails - /// assert_eq!(parse.parse_next("abc"), Err(ErrMode::Backtrack(Error::new("abc", ErrorKind::Slice)))); + /// assert_eq!(parse.parse_peek("abc"), Err(ErrMode::Backtrack(InputError::new("abc", ErrorKind::Slice)))); /// /// // this will fail if the mapped function fails (a `u8` is too small to hold `123456`) - /// assert_eq!(parse.parse_next("123456"), Err(ErrMode::Backtrack(Error::new("123456", ErrorKind::Verify)))); + /// assert_eq!(parse.parse_peek("123456"), Err(ErrMode::Backtrack(InputError::new("123456", ErrorKind::Verify)))); /// # } /// ``` - fn map_res<G, O2, E2>(self, map: G) -> MapRes<Self, G, I, O, O2, E, E2> + #[inline(always)] + fn try_map<G, O2, E2>(self, map: G) -> TryMap<Self, G, I, O, O2, E, E2> where Self: core::marker::Sized, G: FnMut(O) -> Result<O2, E2>, - I: Clone, + I: Stream, E: FromExternalError<I, E2>, { - MapRes::new(self, map) + TryMap::new(self, map) } /// Apply both [`Parser::verify`] and [`Parser::map`]. @@ -392,31 +449,32 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::character::digit1; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::digit1; /// # fn main() { /// /// let mut parse = digit1.verify_map(|s: &str| s.parse::<u8>().ok()); /// /// // the parser will convert the result of digit1 to a number - /// assert_eq!(parse.parse_next("123"), Ok(("", 123))); + /// assert_eq!(parse.parse_peek("123"), Ok(("", 123))); /// /// // this will fail if digit1 fails - /// assert_eq!(parse.parse_next("abc"), Err(ErrMode::Backtrack(Error::new("abc", ErrorKind::Slice)))); + /// assert_eq!(parse.parse_peek("abc"), Err(ErrMode::Backtrack(InputError::new("abc", ErrorKind::Slice)))); /// /// // this will fail if the mapped function fails (a `u8` is too small to hold `123456`) - /// assert_eq!(parse.parse_next("123456"), Err(ErrMode::Backtrack(Error::new("123456", ErrorKind::Verify)))); + /// assert_eq!(parse.parse_peek("123456"), Err(ErrMode::Backtrack(InputError::new("123456", ErrorKind::Verify)))); /// # } /// ``` #[doc(alias = "satisfy_map")] #[doc(alias = "filter_map")] #[doc(alias = "map_opt")] + #[inline(always)] fn verify_map<G, O2>(self, map: G) -> VerifyMap<Self, G, I, O, O2, E> where Self: core::marker::Sized, G: FnMut(O) -> Option<O2>, - I: Clone, - E: ParseError<I>, + I: Stream, + E: ParserError<I>, { VerifyMap::new(self, map) } @@ -426,33 +484,34 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::bytes::take; - /// use winnow::number::u8; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, PResult, Parser}; + /// use winnow::token::take; + /// use winnow::binary::u8; /// - /// fn length_data(input: &[u8]) -> IResult<&[u8], &[u8]> { + /// fn length_data<'s>(input: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { /// u8.flat_map(take).parse_next(input) /// } /// - /// assert_eq!(length_data.parse_next(&[2, 0, 1, 2][..]), Ok((&[2][..], &[0, 1][..]))); - /// assert_eq!(length_data.parse_next(&[4, 0, 1, 2][..]), Err(ErrMode::Backtrack(Error::new(&[0, 1, 2][..], ErrorKind::Slice)))); + /// assert_eq!(length_data.parse_peek(&[2, 0, 1, 2][..]), Ok((&[2][..], &[0, 1][..]))); + /// assert_eq!(length_data.parse_peek(&[4, 0, 1, 2][..]), Err(ErrMode::Backtrack(InputError::new(&[0, 1, 2][..], ErrorKind::Slice)))); /// ``` /// /// which is the same as /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::bytes::take; - /// use winnow::number::u8; - /// - /// fn length_data(input: &[u8]) -> IResult<&[u8], &[u8]> { - /// let (input, length) = u8.parse_next(input)?; - /// let (input, data) = take(length).parse_next(input)?; - /// Ok((input, data)) + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, PResult, Parser}; + /// use winnow::token::take; + /// use winnow::binary::u8; + /// + /// fn length_data<'s>(input: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { + /// let length = u8.parse_next(input)?; + /// let data = take(length).parse_next(input)?; + /// Ok(data) /// } /// - /// assert_eq!(length_data.parse_next(&[2, 0, 1, 2][..]), Ok((&[2][..], &[0, 1][..]))); - /// assert_eq!(length_data.parse_next(&[4, 0, 1, 2][..]), Err(ErrMode::Backtrack(Error::new(&[0, 1, 2][..], ErrorKind::Slice)))); + /// assert_eq!(length_data.parse_peek(&[2, 0, 1, 2][..]), Ok((&[2][..], &[0, 1][..]))); + /// assert_eq!(length_data.parse_peek(&[4, 0, 1, 2][..]), Err(ErrMode::Backtrack(InputError::new(&[0, 1, 2][..], ErrorKind::Slice)))); /// ``` + #[inline(always)] fn flat_map<G, H, O2>(self, map: G) -> FlatMap<Self, G, H, I, O, O2, E> where Self: core::marker::Sized, @@ -467,23 +526,25 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// use winnow::character::digit1; - /// use winnow::bytes::take; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::digit1; + /// use winnow::token::take; /// # fn main() { /// /// let mut digits = take(5u8).and_then(digit1); /// - /// assert_eq!(digits.parse_next("12345"), Ok(("", "12345"))); - /// assert_eq!(digits.parse_next("123ab"), Ok(("", "123"))); - /// assert_eq!(digits.parse_next("123"), Err(ErrMode::Backtrack(Error::new("123", ErrorKind::Slice)))); + /// assert_eq!(digits.parse_peek("12345"), Ok(("", "12345"))); + /// assert_eq!(digits.parse_peek("123ab"), Ok(("", "123"))); + /// assert_eq!(digits.parse_peek("123"), Err(ErrMode::Backtrack(InputError::new("123", ErrorKind::Slice)))); /// # } /// ``` + #[inline(always)] fn and_then<G, O2>(self, inner: G) -> AndThen<Self, G, I, O, O2, E> where Self: core::marker::Sized, G: Parser<O, O2, E>, O: StreamIsPartial, + I: Stream, { AndThen::new(self, inner) } @@ -494,26 +555,27 @@ pub trait Parser<I, O, E> { /// /// ```rust /// # use winnow::prelude::*; - /// use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult,Parser}; - /// use winnow::character::digit1; + /// use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::digit1; /// - /// fn parser(input: &str) -> IResult<&str, u64> { + /// fn parser<'s>(input: &mut &'s str) -> PResult<u64, InputError<&'s str>> { /// digit1.parse_to().parse_next(input) /// } /// /// // the parser will count how many characters were returned by digit1 - /// assert_eq!(parser.parse_next("123456"), Ok(("", 123456))); + /// assert_eq!(parser.parse_peek("123456"), Ok(("", 123456))); /// /// // this will fail if digit1 fails - /// assert_eq!(parser.parse_next("abc"), Err(ErrMode::Backtrack(Error::new("abc", ErrorKind::Slice)))); + /// assert_eq!(parser.parse_peek("abc"), Err(ErrMode::Backtrack(InputError::new("abc", ErrorKind::Slice)))); /// ``` #[doc(alias = "from_str")] + #[inline(always)] fn parse_to<O2>(self) -> ParseTo<Self, I, O, O2, E> where Self: core::marker::Sized, I: Stream, O: ParseSlice<O2>, - E: ParseError<I>, + E: ParserError<I>, { ParseTo::new(self) } @@ -526,27 +588,28 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode,error::ErrorKind, error::Error, IResult, Parser}; - /// # use winnow::character::alpha1; + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// # use winnow::ascii::alpha1; /// # fn main() { /// /// let mut parser = alpha1.verify(|s: &str| s.len() == 4); /// - /// assert_eq!(parser.parse_next("abcd"), Ok(("", "abcd"))); - /// assert_eq!(parser.parse_next("abcde"), Err(ErrMode::Backtrack(Error::new("abcde", ErrorKind::Verify)))); - /// assert_eq!(parser.parse_next("123abcd;"),Err(ErrMode::Backtrack(Error::new("123abcd;", ErrorKind::Slice)))); + /// assert_eq!(parser.parse_peek("abcd"), Ok(("", "abcd"))); + /// assert_eq!(parser.parse_peek("abcde"), Err(ErrMode::Backtrack(InputError::new("abcde", ErrorKind::Verify)))); + /// assert_eq!(parser.parse_peek("123abcd;"),Err(ErrMode::Backtrack(InputError::new("123abcd;", ErrorKind::Slice)))); /// # } /// ``` #[doc(alias = "satisfy")] #[doc(alias = "filter")] + #[inline(always)] fn verify<G, O2>(self, filter: G) -> Verify<Self, G, I, O, O2, E> where Self: core::marker::Sized, - G: Fn(&O2) -> bool, - I: Clone, + G: FnMut(&O2) -> bool, + I: Stream, O: crate::lib::std::borrow::Borrow<O2>, O2: ?Sized, - E: ParseError<I>, + E: ParserError<I>, { Verify::new(self, filter) } @@ -556,11 +619,12 @@ pub trait Parser<I, O, E> { /// This is used mainly to add user friendly information /// to errors when backtracking through a parse tree. #[doc(alias = "labelled")] + #[inline(always)] fn context<C>(self, context: C) -> Context<Self, I, O, E, C> where Self: core::marker::Sized, I: Stream, - E: ContextError<I, C>, + E: AddContext<I, C>, C: Clone + crate::lib::std::fmt::Debug, { Context::new(self, context) @@ -571,16 +635,17 @@ pub trait Parser<I, O, E> { /// # Example /// /// ```rust - /// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, IResult, stream::Partial, Parser}; - /// # use winnow::bytes::take; + /// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, stream::Partial, Parser}; + /// # use winnow::token::take; /// # fn main() { /// /// let mut parser = take(5u8).complete_err(); /// - /// assert_eq!(parser.parse_next(Partial::new("abcdefg")), Ok((Partial::new("fg"), "abcde"))); - /// assert_eq!(parser.parse_next(Partial::new("abcd")), Err(ErrMode::Backtrack(Error::new(Partial::new("abcd"), ErrorKind::Complete)))); + /// assert_eq!(parser.parse_peek(Partial::new("abcdefg")), Ok((Partial::new("fg"), "abcde"))); + /// assert_eq!(parser.parse_peek(Partial::new("abcd")), Err(ErrMode::Backtrack(InputError::new(Partial::new("abcd"), ErrorKind::Complete)))); /// # } /// ``` + #[inline(always)] fn complete_err(self) -> CompleteErr<Self> where Self: core::marker::Sized, @@ -589,6 +654,7 @@ pub trait Parser<I, O, E> { } /// Convert the parser's error to another type using [`std::convert::From`] + #[inline(always)] fn err_into<E2>(self) -> ErrInto<Self, I, O, E, E2> where Self: core::marker::Sized, @@ -600,169 +666,275 @@ pub trait Parser<I, O, E> { impl<'a, I, O, E, F> Parser<I, O, E> for F where - F: FnMut(I) -> IResult<I, O, E> + 'a, + F: FnMut(&mut I) -> PResult<O, E> + 'a, + I: Stream, { - fn parse_next(&mut self, i: I) -> IResult<I, O, E> { + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { self(i) } } -/// This is a shortcut for [`one_of`][crate::bytes::one_of]. +/// This is a shortcut for [`one_of`][crate::token::one_of]. /// /// # Example /// /// ``` /// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{ErrorKind, Error}}; -/// fn parser(i: &[u8]) -> IResult<&[u8], u8> { +/// # use winnow::{error::ErrMode, error::{ErrorKind, InputError}}; +/// fn parser<'s>(i: &mut &'s [u8]) -> PResult<u8, InputError<&'s [u8]>> { /// b'a'.parse_next(i) /// } -/// assert_eq!(parser(&b"abc"[..]), Ok((&b"bc"[..], b'a'))); -/// assert_eq!(parser(&b" abc"[..]), Err(ErrMode::Backtrack(Error::new(&b" abc"[..], ErrorKind::Verify)))); -/// assert_eq!(parser(&b"bc"[..]), Err(ErrMode::Backtrack(Error::new(&b"bc"[..], ErrorKind::Verify)))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&b""[..], ErrorKind::Token)))); +/// assert_eq!(parser.parse_peek(&b"abc"[..]), Ok((&b"bc"[..], b'a'))); +/// assert_eq!(parser.parse_peek(&b" abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b" abc"[..], ErrorKind::Verify)))); +/// assert_eq!(parser.parse_peek(&b"bc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"bc"[..], ErrorKind::Verify)))); +/// assert_eq!(parser.parse_peek(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&b""[..], ErrorKind::Token)))); /// ``` impl<I, E> Parser<I, u8, E> for u8 where I: StreamIsPartial, I: Stream<Token = u8>, - E: ParseError<I>, + E: ParserError<I>, { - fn parse_next(&mut self, i: I) -> IResult<I, u8, E> { - crate::bytes::one_of(*self).parse_next(i) + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<u8, E> { + crate::token::one_of(*self).parse_next(i) } } -/// This is a shortcut for [`one_of`][crate::bytes::one_of]. +/// This is a shortcut for [`one_of`][crate::token::one_of]. /// /// # Example /// /// ``` /// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{ErrorKind, Error}}; -/// fn parser(i: &str) -> IResult<&str, char> { +/// # use winnow::{error::ErrMode, error::{ErrorKind, InputError}}; +/// fn parser<'s>(i: &mut &'s str) -> PResult<char, InputError<&'s str>> { /// 'a'.parse_next(i) /// } -/// assert_eq!(parser("abc"), Ok(("bc", 'a'))); -/// assert_eq!(parser(" abc"), Err(ErrMode::Backtrack(Error::new(" abc", ErrorKind::Verify)))); -/// assert_eq!(parser("bc"), Err(ErrMode::Backtrack(Error::new("bc", ErrorKind::Verify)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Token)))); +/// assert_eq!(parser.parse_peek("abc"), Ok(("bc", 'a'))); +/// assert_eq!(parser.parse_peek(" abc"), Err(ErrMode::Backtrack(InputError::new(" abc", ErrorKind::Verify)))); +/// assert_eq!(parser.parse_peek("bc"), Err(ErrMode::Backtrack(InputError::new("bc", ErrorKind::Verify)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token)))); /// ``` impl<I, E> Parser<I, <I as Stream>::Token, E> for char where I: StreamIsPartial, I: Stream, - <I as Stream>::Token: AsChar + Copy, - E: ParseError<I>, + <I as Stream>::Token: AsChar + Clone, + E: ParserError<I>, { - fn parse_next(&mut self, i: I) -> IResult<I, <I as Stream>::Token, E> { - crate::bytes::one_of(*self).parse_next(i) + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Token, E> { + crate::token::one_of(*self).parse_next(i) } } -/// This is a shortcut for [`tag`][crate::bytes::tag]. +/// This is a shortcut for [`tag`][crate::token::tag]. /// /// # Example /// ```rust /// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::branch::alt; -/// # use winnow::bytes::take; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; /// -/// fn parser(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// fn parser<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { /// alt((&"Hello"[..], take(5usize))).parse_next(s) /// } /// -/// assert_eq!(parser(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); -/// assert_eq!(parser(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); -/// assert_eq!(parser(&b"Some"[..]), Err(ErrMode::Backtrack(Error::new(&b"Some"[..], ErrorKind::Slice)))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&b""[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); +/// assert_eq!(parser.parse_peek(&b"Some"[..]), Err(ErrMode::Backtrack(InputError::new(&b"Some"[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&b""[..], ErrorKind::Slice)))); /// ``` -impl<'s, I, E: ParseError<I>> Parser<I, <I as Stream>::Slice, E> for &'s [u8] +impl<'s, I, E: ParserError<I>> Parser<I, <I as Stream>::Slice, E> for &'s [u8] where I: Compare<&'s [u8]> + StreamIsPartial, I: Stream, { - fn parse_next(&mut self, i: I) -> IResult<I, <I as Stream>::Slice, E> { - crate::bytes::tag(*self).parse_next(i) + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) } } -/// This is a shortcut for [`tag`][crate::bytes::tag]. +/// This is a shortcut for [`tag`][crate::token::tag]. /// /// # Example /// ```rust /// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed}; -/// # use winnow::branch::alt; -/// # use winnow::bytes::take; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; +/// use winnow::ascii::Caseless; /// -/// fn parser(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// fn parser<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { +/// alt((Caseless(&"hello"[..]), take(5usize))).parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"hello, World!"[..]), Ok((&b", World!"[..], &b"hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"HeLlo, World!"[..]), Ok((&b", World!"[..], &b"HeLlo"[..]))); +/// assert_eq!(parser.parse_peek(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); +/// assert_eq!(parser.parse_peek(&b"Some"[..]), Err(ErrMode::Backtrack(InputError::new(&b"Some"[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&b""[..], ErrorKind::Slice)))); +/// ``` +impl<'s, I, E: ParserError<I>> Parser<I, <I as Stream>::Slice, E> for AsciiCaseless<&'s [u8]> +where + I: Compare<AsciiCaseless<&'s [u8]>> + StreamIsPartial, + I: Stream, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) + } +} + +/// This is a shortcut for [`tag`][crate::token::tag]. +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; +/// +/// fn parser<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { /// alt((b"Hello", take(5usize))).parse_next(s) /// } /// -/// assert_eq!(parser(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); -/// assert_eq!(parser(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); -/// assert_eq!(parser(&b"Some"[..]), Err(ErrMode::Backtrack(Error::new(&b"Some"[..], ErrorKind::Slice)))); -/// assert_eq!(parser(&b""[..]), Err(ErrMode::Backtrack(Error::new(&b""[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); +/// assert_eq!(parser.parse_peek(&b"Some"[..]), Err(ErrMode::Backtrack(InputError::new(&b"Some"[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&b""[..], ErrorKind::Slice)))); /// ``` -impl<'s, I, E: ParseError<I>, const N: usize> Parser<I, <I as Stream>::Slice, E> for &'s [u8; N] +impl<'s, I, E: ParserError<I>, const N: usize> Parser<I, <I as Stream>::Slice, E> for &'s [u8; N] where I: Compare<&'s [u8; N]> + StreamIsPartial, I: Stream, { - fn parse_next(&mut self, i: I) -> IResult<I, <I as Stream>::Slice, E> { - crate::bytes::tag(*self).parse_next(i) + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) + } +} + +/// This is a shortcut for [`tag`][crate::token::tag]. +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; +/// use winnow::ascii::Caseless; +/// +/// fn parser<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { +/// alt((Caseless(b"hello"), take(5usize))).parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"hello, World!"[..]), Ok((&b", World!"[..], &b"hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"HeLlo, World!"[..]), Ok((&b", World!"[..], &b"HeLlo"[..]))); +/// assert_eq!(parser.parse_peek(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); +/// assert_eq!(parser.parse_peek(&b"Some"[..]), Err(ErrMode::Backtrack(InputError::new(&b"Some"[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&b""[..], ErrorKind::Slice)))); +/// ``` +impl<'s, I, E: ParserError<I>, const N: usize> Parser<I, <I as Stream>::Slice, E> + for AsciiCaseless<&'s [u8; N]> +where + I: Compare<AsciiCaseless<&'s [u8; N]>> + StreamIsPartial, + I: Stream, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) } } -/// This is a shortcut for [`tag`][crate::bytes::tag]. +/// This is a shortcut for [`tag`][crate::token::tag]. /// /// # Example /// ```rust /// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}}; -/// # use winnow::branch::alt; -/// # use winnow::bytes::take; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; /// -/// fn parser(s: &str) -> IResult<&str, &str> { +/// fn parser<'s>(s: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { /// alt(("Hello", take(5usize))).parse_next(s) /// } /// -/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); -/// assert_eq!(parser("Something"), Ok(("hing", "Somet"))); -/// assert_eq!(parser("Some"), Err(ErrMode::Backtrack(Error::new("Some", ErrorKind::Slice)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser.parse_peek("Something"), Ok(("hing", "Somet"))); +/// assert_eq!(parser.parse_peek("Some"), Err(ErrMode::Backtrack(InputError::new("Some", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); /// ``` -impl<'s, I, E: ParseError<I>> Parser<I, <I as Stream>::Slice, E> for &'s str +impl<'s, I, E: ParserError<I>> Parser<I, <I as Stream>::Slice, E> for &'s str where I: Compare<&'s str> + StreamIsPartial, I: Stream, { - fn parse_next(&mut self, i: I) -> IResult<I, <I as Stream>::Slice, E> { - crate::bytes::tag(*self).parse_next(i) + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) + } +} + +/// This is a shortcut for [`tag`][crate::token::tag]. +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; +/// # use winnow::ascii::Caseless; +/// +/// fn parser<'s>(s: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// alt((Caseless("hello"), take(5usize))).parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser.parse_peek("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser.parse_peek("HeLlo, World!"), Ok((", World!", "HeLlo"))); +/// assert_eq!(parser.parse_peek("Something"), Ok(("hing", "Somet"))); +/// assert_eq!(parser.parse_peek("Some"), Err(ErrMode::Backtrack(InputError::new("Some", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +impl<'s, I, E: ParserError<I>> Parser<I, <I as Stream>::Slice, E> for AsciiCaseless<&'s str> +where + I: Compare<AsciiCaseless<&'s str>> + StreamIsPartial, + I: Stream, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) } } -impl<I, E: ParseError<I>> Parser<I, (), E> for () { - fn parse_next(&mut self, i: I) -> IResult<I, (), E> { - Ok((i, ())) +impl<I, E: ParserError<I>> Parser<I, (), E> for () { + #[inline(always)] + fn parse_next(&mut self, _i: &mut I) -> PResult<(), E> { + Ok(()) } } macro_rules! impl_parser_for_tuple { ($($parser:ident $output:ident),+) => ( #[allow(non_snake_case)] - impl<I, $($output),+, E: ParseError<I>, $($parser),+> Parser<I, ($($output),+,), E> for ($($parser),+,) + impl<I, $($output),+, E: ParserError<I>, $($parser),+> Parser<I, ($($output),+,), E> for ($($parser),+,) where $($parser: Parser<I, $output, E>),+ { - fn parse_next(&mut self, i: I) -> IResult<I, ($($output),+,), E> { + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<($($output),+,), E> { let ($(ref mut $parser),+,) = *self; - $(let(i, $output) = $parser.parse_next(i)?;)+ + $(let $output = $parser.parse_next(i)?;)+ - Ok((i, ($($output),+,))) + Ok(($($output),+,)) } } ) @@ -810,20 +982,38 @@ use alloc::boxed::Box; #[cfg(feature = "alloc")] impl<'a, I, O, E> Parser<I, O, E> for Box<dyn Parser<I, O, E> + 'a> { - fn parse_next(&mut self, input: I) -> IResult<I, O, E> { - (**self).parse_next(input) + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + (**self).parse_next(i) + } +} + +/// Convert a [`Parser::parse_peek`] style parse function to be a [`Parser`] +#[inline(always)] +pub fn unpeek<'a, I, O, E>( + mut peek: impl FnMut(I) -> IResult<I, O, E> + 'a, +) -> impl FnMut(&mut I) -> PResult<O, E> +where + I: Clone, +{ + move |input| match peek((*input).clone()) { + Ok((i, o)) => { + *input = i; + Ok(o) + } + Err(err) => Err(err), } } #[cfg(test)] mod tests { use super::*; - use crate::bytes::take; + use crate::binary::be_u16; use crate::error::ErrMode; - use crate::error::Error; use crate::error::ErrorKind; + use crate::error::InputError; use crate::error::Needed; - use crate::number::be_u16; + use crate::token::take; use crate::Partial; #[doc(hidden)] @@ -852,17 +1042,17 @@ mod tests { #[test] fn single_element_tuples() { - use crate::character::alpha1; + use crate::ascii::alpha1; use crate::error::ErrorKind; let mut parser = (alpha1,); - assert_eq!(parser.parse_next("abc123def"), Ok(("123def", ("abc",)))); + assert_eq!(parser.parse_peek("abc123def"), Ok(("123def", ("abc",)))); assert_eq!( - parser.parse_next("123def"), - Err(ErrMode::Backtrack(Error { - input: "123def", - kind: ErrorKind::Slice - })) + parser.parse_peek("123def"), + Err(ErrMode::Backtrack(InputError::new( + "123def", + ErrorKind::Slice + ))) ); } @@ -870,7 +1060,7 @@ mod tests { fn tuple_test() { #[allow(clippy::type_complexity)] fn tuple_3(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (u16, &[u8], &[u8])> { - (be_u16, take(3u8), "fg").parse_next(i) + (be_u16, take(3u8), "fg").parse_peek(i) } assert_eq!( @@ -891,7 +1081,7 @@ mod tests { assert_eq!( tuple_3(Partial::new(&b"abcdejk"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"jk"[..]), + &Partial::new(&b"jk"[..]), ErrorKind::Tag ))) ); @@ -899,11 +1089,11 @@ mod tests { #[test] fn unit_type() { - fn parser(i: &str) -> IResult<&str, ()> { + fn parser(i: &mut &str) -> PResult<()> { ().parse_next(i) } - assert_eq!(parser.parse_next("abxsbsh"), Ok(("abxsbsh", ()))); - assert_eq!(parser.parse_next("sdfjakdsas"), Ok(("sdfjakdsas", ()))); - assert_eq!(parser.parse_next(""), Ok(("", ()))); + assert_eq!(parser.parse_peek("abxsbsh"), Ok(("abxsbsh", ()))); + assert_eq!(parser.parse_peek("sdfjakdsas"), Ok(("sdfjakdsas", ()))); + assert_eq!(parser.parse_peek(""), Ok(("", ()))); } } diff --git a/vendor/winnow/src/sequence/mod.rs b/vendor/winnow/src/sequence/mod.rs deleted file mode 100644 index 68f7723..0000000 --- a/vendor/winnow/src/sequence/mod.rs +++ /dev/null @@ -1,169 +0,0 @@ -//! Combinators applying parsers in sequence - -#[cfg(test)] -mod tests; - -use crate::error::ParseError; -use crate::stream::Stream; -use crate::trace::trace; -use crate::Parser; - -/// Apply two parsers, only returning the output from the second. -/// -/// # Arguments -/// * `first` The opening parser. -/// * `second` The second parser to get object. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::error::Needed::Size; -/// use winnow::sequence::preceded; -/// use winnow::bytes::tag; -/// -/// let mut parser = preceded("abc", "efg"); -/// -/// assert_eq!(parser.parse_next("abcefg"), Ok(("", "efg"))); -/// assert_eq!(parser.parse_next("abcefghij"), Ok(("hij", "efg"))); -/// assert_eq!(parser.parse_next(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser.parse_next("123"), Err(ErrMode::Backtrack(Error::new("123", ErrorKind::Tag)))); -/// ``` -#[doc(alias = "ignore_then")] -pub fn preceded<I, O1, O2, E: ParseError<I>, F, G>( - mut first: F, - mut second: G, -) -> impl Parser<I, O2, E> -where - I: Stream, - F: Parser<I, O1, E>, - G: Parser<I, O2, E>, -{ - trace("preceded", move |input: I| { - let (input, _) = first.parse_next(input)?; - second.parse_next(input) - }) -} - -/// Apply two parsers, only returning the output of the first. -/// -/// # Arguments -/// * `first` The first parser to apply. -/// * `second` The second parser to match an object. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::prelude::*; -/// # use winnow::error::Needed::Size; -/// use winnow::sequence::terminated; -/// use winnow::bytes::tag; -/// -/// let mut parser = terminated("abc", "efg"); -/// -/// assert_eq!(parser.parse_next("abcefg"), Ok(("", "abc"))); -/// assert_eq!(parser.parse_next("abcefghij"), Ok(("hij", "abc"))); -/// assert_eq!(parser.parse_next(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser.parse_next("123"), Err(ErrMode::Backtrack(Error::new("123", ErrorKind::Tag)))); -/// ``` -#[doc(alias = "then_ignore")] -pub fn terminated<I, O1, O2, E: ParseError<I>, F, G>( - mut first: F, - mut second: G, -) -> impl Parser<I, O1, E> -where - I: Stream, - F: Parser<I, O1, E>, - G: Parser<I, O2, E>, -{ - trace("terminated", move |input: I| { - let (input, o1) = first.parse_next(input)?; - second.parse_next(input).map(|(i, _)| (i, o1)) - }) -} - -/// Apply three parsers, only returning the values of the first and third. -/// -/// # Arguments -/// * `first` The first parser to apply. -/// * `sep` The separator parser to apply. -/// * `second` The second parser to apply. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::error::Needed::Size; -/// # use winnow::prelude::*; -/// use winnow::sequence::separated_pair; -/// use winnow::bytes::tag; -/// -/// let mut parser = separated_pair("abc", "|", "efg"); -/// -/// assert_eq!(parser.parse_next("abc|efg"), Ok(("", ("abc", "efg")))); -/// assert_eq!(parser.parse_next("abc|efghij"), Ok(("hij", ("abc", "efg")))); -/// assert_eq!(parser.parse_next(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser.parse_next("123"), Err(ErrMode::Backtrack(Error::new("123", ErrorKind::Tag)))); -/// ``` -pub fn separated_pair<I, O1, O2, O3, E: ParseError<I>, F, G, H>( - mut first: F, - mut sep: G, - mut second: H, -) -> impl Parser<I, (O1, O3), E> -where - I: Stream, - F: Parser<I, O1, E>, - G: Parser<I, O2, E>, - H: Parser<I, O3, E>, -{ - trace("separated_pair", move |input: I| { - let (input, o1) = first.parse_next(input)?; - let (input, _) = sep.parse_next(input)?; - second.parse_next(input).map(|(i, o2)| (i, (o1, o2))) - }) -} - -/// Apply three parsers, only returning the output of the second. -/// -/// # Arguments -/// * `first` The first parser to apply and discard. -/// * `second` The second parser to apply. -/// * `third` The third parser to apply and discard. -/// -/// # Example -/// -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error, error::Needed}; -/// # use winnow::error::Needed::Size; -/// # use winnow::prelude::*; -/// use winnow::sequence::delimited; -/// use winnow::bytes::tag; -/// -/// let mut parser = delimited("(", "abc", ")"); -/// -/// assert_eq!(parser.parse_next("(abc)"), Ok(("", "abc"))); -/// assert_eq!(parser.parse_next("(abc)def"), Ok(("def", "abc"))); -/// assert_eq!(parser.parse_next(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Tag)))); -/// assert_eq!(parser.parse_next("123"), Err(ErrMode::Backtrack(Error::new("123", ErrorKind::Tag)))); -/// ``` -#[doc(alias = "between")] -#[doc(alias = "padded")] -pub fn delimited<I, O1, O2, O3, E: ParseError<I>, F, G, H>( - mut first: F, - mut second: G, - mut third: H, -) -> impl Parser<I, O2, E> -where - I: Stream, - F: Parser<I, O1, E>, - G: Parser<I, O2, E>, - H: Parser<I, O3, E>, -{ - trace("delimited", move |input: I| { - let (input, _) = first.parse_next(input)?; - let (input, o2) = second.parse_next(input)?; - third.parse_next(input).map(|(i, _)| (i, o2)) - }) -} diff --git a/vendor/winnow/src/sequence/tests.rs b/vendor/winnow/src/sequence/tests.rs deleted file mode 100644 index 77fa75d..0000000 --- a/vendor/winnow/src/sequence/tests.rs +++ /dev/null @@ -1,211 +0,0 @@ -use super::*; - -use crate::error::{ErrMode, ErrorKind, Needed}; -use crate::IResult; -use crate::Partial; - -#[derive(PartialEq, Eq, Debug)] -struct B { - a: u8, - b: u8, -} - -#[derive(PartialEq, Eq, Debug)] -struct C { - a: u8, - b: Option<u8>, -} - -#[test] -fn complete() { - fn err_test(i: &[u8]) -> IResult<&[u8], &[u8]> { - let (i, _) = "ijkl".parse_next(i)?; - "mnop".parse_next(i) - } - let a = &b"ijklmn"[..]; - - let res_a = err_test(a); - assert_eq!( - res_a, - Err(ErrMode::Backtrack(error_position!( - &b"mn"[..], - ErrorKind::Tag - ))) - ); -} - -#[test] -fn separated_pair_test() { - #[allow(clippy::type_complexity)] - fn sep_pair_abc_def(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8])> { - separated_pair("abc", ",", "def").parse_next(i) - } - - assert_eq!( - sep_pair_abc_def(Partial::new(&b"abc,defghijkl"[..])), - Ok((Partial::new(&b"ghijkl"[..]), (&b"abc"[..], &b"def"[..]))) - ); - assert_eq!( - sep_pair_abc_def(Partial::new(&b"ab"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - sep_pair_abc_def(Partial::new(&b"abc,d"[..])), - Err(ErrMode::Incomplete(Needed::new(2))) - ); - assert_eq!( - sep_pair_abc_def(Partial::new(&b"xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - sep_pair_abc_def(Partial::new(&b"xxx,def"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx,def"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - sep_pair_abc_def(Partial::new(&b"abc,xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); -} - -#[test] -fn preceded_test() { - fn preceded_abcd_efgh(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - preceded("abcd", "efgh").parse_next(i) - } - - assert_eq!( - preceded_abcd_efgh(Partial::new(&b"abcdefghijkl"[..])), - Ok((Partial::new(&b"ijkl"[..]), &b"efgh"[..])) - ); - assert_eq!( - preceded_abcd_efgh(Partial::new(&b"ab"[..])), - Err(ErrMode::Incomplete(Needed::new(2))) - ); - assert_eq!( - preceded_abcd_efgh(Partial::new(&b"abcde"[..])), - Err(ErrMode::Incomplete(Needed::new(3))) - ); - assert_eq!( - preceded_abcd_efgh(Partial::new(&b"xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - preceded_abcd_efgh(Partial::new(&b"xxxxdef"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxxxdef"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - preceded_abcd_efgh(Partial::new(&b"abcdxxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); -} - -#[test] -fn terminated_test() { - fn terminated_abcd_efgh(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - terminated("abcd", "efgh").parse_next(i) - } - - assert_eq!( - terminated_abcd_efgh(Partial::new(&b"abcdefghijkl"[..])), - Ok((Partial::new(&b"ijkl"[..]), &b"abcd"[..])) - ); - assert_eq!( - terminated_abcd_efgh(Partial::new(&b"ab"[..])), - Err(ErrMode::Incomplete(Needed::new(2))) - ); - assert_eq!( - terminated_abcd_efgh(Partial::new(&b"abcde"[..])), - Err(ErrMode::Incomplete(Needed::new(3))) - ); - assert_eq!( - terminated_abcd_efgh(Partial::new(&b"xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - terminated_abcd_efgh(Partial::new(&b"xxxxdef"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxxxdef"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - terminated_abcd_efgh(Partial::new(&b"abcdxxxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxxx"[..]), - ErrorKind::Tag - ))) - ); -} - -#[test] -fn delimited_test() { - fn delimited_abc_def_ghi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - delimited("abc", "def", "ghi").parse_next(i) - } - - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"abcdefghijkl"[..])), - Ok((Partial::new(&b"jkl"[..]), &b"def"[..])) - ); - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"ab"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"abcde"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"abcdefgh"[..])), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"xxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"xxxdefghi"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxxdefghi"[..]), - ErrorKind::Tag - ),)) - ); - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"abcxxxghi"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxxghi"[..]), - ErrorKind::Tag - ))) - ); - assert_eq!( - delimited_abc_def_ghi(Partial::new(&b"abcdefxxx"[..])), - Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"xxx"[..]), - ErrorKind::Tag - ))) - ); -} diff --git a/vendor/winnow/src/stream/impls.rs b/vendor/winnow/src/stream/impls.rs index b277dd9..d76e1bf 100644 --- a/vendor/winnow/src/stream/impls.rs +++ b/vendor/winnow/src/stream/impls.rs @@ -238,14 +238,14 @@ mod bytes { impl PartialOrd for Bytes { #[inline] fn partial_cmp(&self, other: &Bytes) -> Option<Ordering> { - PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes()) + Some(self.cmp(other)) } } impl Ord for Bytes { #[inline] fn cmp(&self, other: &Bytes) -> Ordering { - self.partial_cmp(other).unwrap() + Ord::cmp(self.as_bytes(), other.as_bytes()) } } @@ -485,14 +485,14 @@ mod bstr { impl PartialOrd for BStr { #[inline] fn partial_cmp(&self, other: &BStr) -> Option<Ordering> { - PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes()) + Some(self.cmp(other)) } } impl Ord for BStr { #[inline] fn cmp(&self, other: &BStr) -> Ordering { - self.partial_cmp(other).unwrap() + Ord::cmp(self.as_bytes(), other.as_bytes()) } } diff --git a/vendor/winnow/src/stream/mod.rs b/vendor/winnow/src/stream/mod.rs index 019975a..ea910e7 100644 --- a/vendor/winnow/src/stream/mod.rs +++ b/vendor/winnow/src/stream/mod.rs @@ -9,23 +9,29 @@ //! - [`Partial`] can mark an input as partial buffer that is being streamed into //! - [Custom stream types][crate::_topic::stream] +use core::hash::BuildHasher; use core::num::NonZeroUsize; -use crate::error::{ErrMode, ErrorKind, Needed, ParseError}; +use crate::ascii::Caseless as AsciiCaseless; +use crate::error::Needed; use crate::lib::std::iter::{Cloned, Enumerate}; -use crate::lib::std::ops::{ - Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, -}; use crate::lib::std::slice::Iter; use crate::lib::std::str::from_utf8; use crate::lib::std::str::CharIndices; use crate::lib::std::str::FromStr; -use crate::IResult; + +#[allow(unused_imports)] +#[cfg(feature = "unstable-doc")] +use crate::error::ErrMode; #[cfg(feature = "alloc")] use crate::lib::std::collections::BTreeMap; +#[cfg(feature = "alloc")] +use crate::lib::std::collections::BTreeSet; #[cfg(feature = "std")] use crate::lib::std::collections::HashMap; +#[cfg(feature = "std")] +use crate::lib::std::collections::HashSet; #[cfg(feature = "alloc")] use crate::lib::std::string::String; #[cfg(feature = "alloc")] @@ -88,8 +94,18 @@ impl BStr { /// Allow collecting the span of a parsed token /// +/// Spans are tracked as a [`Range<usize>`] of byte offsets. +/// +/// Converting byte offsets to line or column numbers is left up to the user, as computing column +/// numbers requires domain knowledge (are columns byte-based, codepoint-based, or grapheme-based?) +/// and O(n) iteration over the input to determine codepoint and line boundaries. +/// +/// [The `line-span` crate](https://docs.rs/line-span/latest/line_span/) can help with converting +/// byte offsets to line numbers. +/// /// See [`Parser::span`][crate::Parser::span] and [`Parser::with_span`][crate::Parser::with_span] for more details #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[doc(alias = "LocatedSpan")] pub struct Located<I> { initial: I, input: I, @@ -106,7 +122,7 @@ where } fn location(&self) -> usize { - self.initial.offset_to(&self.input) + self.input.offset_from(&self.initial) } } @@ -145,7 +161,7 @@ impl<I: crate::lib::std::fmt::Display> crate::lib::std::fmt::Display for Located /// # use std::cell::Cell; /// # use winnow::prelude::*; /// # use winnow::stream::Stateful; -/// # use winnow::character::alpha1; +/// # use winnow::ascii::alpha1; /// # type Error = (); /// /// #[derive(Clone, Debug)] @@ -159,9 +175,9 @@ impl<I: crate::lib::std::fmt::Display> crate::lib::std::fmt::Display for Located /// /// type Stream<'is> = Stateful<&'is str, State<'is>>; /// -/// fn word(i: Stream<'_>) -> IResult<Stream<'_>, &str> { +/// fn word<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { /// i.state.count(); -/// alpha1(i) +/// alpha1.parse_next(i) /// } /// /// let data = "Hello"; @@ -171,6 +187,7 @@ impl<I: crate::lib::std::fmt::Display> crate::lib::std::fmt::Display for Located /// assert_eq!(state.get(), 1); /// ``` #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[doc(alias = "LocatedSpan")] pub struct Stateful<I, S> { /// Inner input being wrapped in state pub input: I, @@ -202,7 +219,7 @@ impl<I: crate::lib::std::fmt::Display, S> crate::lib::std::fmt::Display for Stat /// Mark the input as a partial buffer for streaming input. /// -/// Complete input means that we already have all of the data. This will be the common case with +/// Complete input means that we already have all of the data. This will be the common case with /// small files that can be read entirely to memory. /// /// In contrast, streaming input assumes that we might not have all of the data. @@ -221,48 +238,48 @@ impl<I: crate::lib::std::fmt::Display, S> crate::lib::std::fmt::Display for Stat /// Here is how it works in practice: /// /// ```rust -/// # use winnow::{IResult, error::ErrMode, error::Needed, error::{Error, ErrorKind}, bytes, character, stream::Partial}; +/// # use winnow::{PResult, error::ErrMode, error::Needed, error::{InputError, ErrorKind}, token, ascii, stream::Partial}; /// # use winnow::prelude::*; /// -/// fn take_partial(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { -/// bytes::take(4u8).parse_next(i) +/// fn take_partial<'s>(i: &mut Partial<&'s [u8]>) -> PResult<&'s [u8], InputError<Partial<&'s [u8]>>> { +/// token::take(4u8).parse_next(i) /// } /// -/// fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> { -/// bytes::take(4u8).parse_next(i) +/// fn take_complete<'s>(i: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { +/// token::take(4u8).parse_next(i) /// } /// /// // both parsers will take 4 bytes as expected -/// assert_eq!(take_partial(Partial::new(&b"abcde"[..])), Ok((Partial::new(&b"e"[..]), &b"abcd"[..]))); -/// assert_eq!(take_complete(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..]))); +/// assert_eq!(take_partial.parse_peek(Partial::new(&b"abcde"[..])), Ok((Partial::new(&b"e"[..]), &b"abcd"[..]))); +/// assert_eq!(take_complete.parse_peek(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..]))); /// /// // if the input is smaller than 4 bytes, the partial parser /// // will return `Incomplete` to indicate that we need more data -/// assert_eq!(take_partial(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(take_partial.parse_peek(Partial::new(&b"abc"[..])), Err(ErrMode::Incomplete(Needed::new(1)))); /// /// // but the complete parser will return an error -/// assert_eq!(take_complete(&b"abc"[..]), Err(ErrMode::Backtrack(Error::new(&b"abc"[..], ErrorKind::Slice)))); +/// assert_eq!(take_complete.parse_peek(&b"abc"[..]), Err(ErrMode::Backtrack(InputError::new(&b"abc"[..], ErrorKind::Slice)))); /// /// // the alpha0 function recognizes 0 or more alphabetic characters -/// fn alpha0_partial(i: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// character::alpha0(i) +/// fn alpha0_partial<'s>(i: &mut Partial<&'s str>) -> PResult<&'s str, InputError<Partial<&'s str>>> { +/// ascii::alpha0.parse_next(i) /// } /// -/// fn alpha0_complete(i: &str) -> IResult<&str, &str> { -/// character::alpha0(i) +/// fn alpha0_complete<'s>(i: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// ascii::alpha0.parse_next(i) /// } /// /// // if there's a clear limit to the recognized characters, both parsers work the same way -/// assert_eq!(alpha0_partial(Partial::new("abcd;")), Ok((Partial::new(";"), "abcd"))); -/// assert_eq!(alpha0_complete("abcd;"), Ok((";", "abcd"))); +/// assert_eq!(alpha0_partial.parse_peek(Partial::new("abcd;")), Ok((Partial::new(";"), "abcd"))); +/// assert_eq!(alpha0_complete.parse_peek("abcd;"), Ok((";", "abcd"))); /// /// // but when there's no limit, the partial version returns `Incomplete`, because it cannot /// // know if more input data should be recognized. The whole input could be "abcd;", or /// // "abcde;" -/// assert_eq!(alpha0_partial(Partial::new("abcd")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(alpha0_partial.parse_peek(Partial::new("abcd")), Err(ErrMode::Incomplete(Needed::new(1)))); /// /// // while the complete version knows that all of the data is there -/// assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd"))); +/// assert_eq!(alpha0_complete.parse_peek("abcd"), Ok(("", "abcd"))); /// ``` #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Partial<I> { @@ -322,6 +339,13 @@ pub trait SliceLen { fn slice_len(&self) -> usize; } +impl<S: SliceLen> SliceLen for AsciiCaseless<S> { + #[inline(always)] + fn slice_len(&self) -> usize { + self.0.slice_len() + } +} + impl<'a, T> SliceLen for &'a [T] { #[inline] fn slice_len(&self) -> usize { @@ -350,6 +374,13 @@ impl<'a> SliceLen for &'a str { } } +impl SliceLen for char { + #[inline] + fn slice_len(&self) -> usize { + self.len_utf8() + } +} + impl<'a> SliceLen for &'a Bytes { #[inline] fn slice_len(&self) -> usize { @@ -405,7 +436,7 @@ where } /// Core definition for parser input state -pub trait Stream: Offset + Clone + crate::lib::std::fmt::Debug { +pub trait Stream: Offset<<Self as Stream>::Checkpoint> + crate::lib::std::fmt::Debug { /// The smallest unit being parsed /// /// Example: `u8` for `&[u8]` or `char` for `&str` @@ -418,13 +449,27 @@ pub trait Stream: Offset + Clone + crate::lib::std::fmt::Debug { /// Iterate with the offset from the current location type IterOffsets: Iterator<Item = (usize, Self::Token)>; + /// A parse location within the stream + type Checkpoint: Offset + Clone + crate::lib::std::fmt::Debug; + /// Iterate with the offset from the current location fn iter_offsets(&self) -> Self::IterOffsets; - /// Returns the offaet to the end of the input + + /// Returns the offset to the end of the input fn eof_offset(&self) -> usize; /// Split off the next token from the input - fn next_token(&self) -> Option<(Self, Self::Token)>; + fn next_token(&mut self) -> Option<Self::Token>; + /// Split off the next token from the input + #[inline(always)] + fn peek_token(&self) -> Option<(Self, Self::Token)> + where + Self: Clone, + { + let mut peek = self.clone(); + let token = peek.next_token()?; + Some((peek, token)) + } /// Finds the offset of the next matching token fn offset_for<P>(&self, predicate: P) -> Option<usize> @@ -437,7 +482,7 @@ pub trait Stream: Offset + Clone + crate::lib::std::fmt::Debug { /// Split off a slice of tokens from the input /// /// **NOTE:** For inputs with variable width tokens, like `&str`'s `char`, `offset` might not correspond - /// with the number of tokens. To get a valid offset, use: + /// with the number of tokens. To get a valid offset, use: /// - [`Stream::eof_offset`] /// - [`Stream::iter_offsets`] /// - [`Stream::offset_for`] @@ -451,7 +496,45 @@ pub trait Stream: Offset + Clone + crate::lib::std::fmt::Debug { /// * Indexes must uphold invariants of the stream, like for `str` they must lie on UTF-8 /// sequence boundaries. /// - fn next_slice(&self, offset: usize) -> (Self, Self::Slice); + fn next_slice(&mut self, offset: usize) -> Self::Slice; + /// Split off a slice of tokens from the input + #[inline(always)] + fn peek_slice(&self, offset: usize) -> (Self, Self::Slice) + where + Self: Clone, + { + let mut peek = self.clone(); + let slice = peek.next_slice(offset); + (peek, slice) + } + + /// Advance to the end of the stream + #[inline(always)] + fn finish(&mut self) -> Self::Slice { + self.next_slice(self.eof_offset()) + } + /// Advance to the end of the stream + #[inline(always)] + fn peek_finish(&self) -> (Self, Self::Slice) + where + Self: Clone, + { + let mut peek = self.clone(); + let slice = peek.finish(); + (peek, slice) + } + + /// Save the current parse location within the stream + fn checkpoint(&self) -> Self::Checkpoint; + /// Revert the stream to a prior [`Self::Checkpoint`] + /// + /// # Panic + /// + /// May panic if an invalid [`Self::Checkpoint`] is provided + fn reset(&mut self, checkpoint: Self::Checkpoint); + + /// Return the inner-most stream + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug; } impl<'i, T> Stream for &'i [T] @@ -463,6 +546,8 @@ where type IterOffsets = Enumerate<Cloned<Iter<'i, T>>>; + type Checkpoint = Checkpoint<Self>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.iter().cloned().enumerate() @@ -473,9 +558,10 @@ where } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { - self.split_first() - .map(|(token, next)| (next, token.clone())) + fn next_token(&mut self) -> Option<Self::Token> { + let (token, next) = self.split_first()?; + *self = next; + Some(token.clone()) } #[inline(always)] @@ -494,9 +580,24 @@ where } } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { + fn next_slice(&mut self, offset: usize) -> Self::Slice { let (slice, next) = self.split_at(offset); - (next, slice) + *self = next; + slice + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(*self) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + *self = checkpoint.0; + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + self } } @@ -506,6 +607,8 @@ impl<'i> Stream for &'i str { type IterOffsets = CharIndices<'i>; + type Checkpoint = Checkpoint<Self>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.char_indices() @@ -516,10 +619,11 @@ impl<'i> Stream for &'i str { } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { + fn next_token(&mut self) -> Option<Self::Token> { let c = self.chars().next()?; let offset = c.len(); - Some((&self[offset..], c)) + *self = &self[offset..]; + Some(c) } #[inline(always)] @@ -551,9 +655,24 @@ impl<'i> Stream for &'i str { } } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { + fn next_slice(&mut self, offset: usize) -> Self::Slice { let (slice, next) = self.split_at(offset); - (next, slice) + *self = next; + slice + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(*self) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + *self = checkpoint.0; + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + self } } @@ -563,6 +682,8 @@ impl<'i> Stream for &'i Bytes { type IterOffsets = Enumerate<Cloned<Iter<'i, u8>>>; + type Checkpoint = Checkpoint<Self>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.iter().cloned().enumerate() @@ -573,11 +694,13 @@ impl<'i> Stream for &'i Bytes { } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { + fn next_token(&mut self) -> Option<Self::Token> { if self.is_empty() { None } else { - Some((Bytes::from_bytes(&self[1..]), self[0])) + let token = self[0]; + *self = &self[1..]; + Some(token) } } @@ -597,9 +720,24 @@ impl<'i> Stream for &'i Bytes { } } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { - let (next, slice) = (&self.0).next_slice(offset); - (Bytes::from_bytes(next), slice) + fn next_slice(&mut self, offset: usize) -> Self::Slice { + let (slice, next) = self.0.split_at(offset); + *self = Bytes::from_bytes(next); + slice + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(*self) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + *self = checkpoint.0; + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + self } } @@ -609,6 +747,8 @@ impl<'i> Stream for &'i BStr { type IterOffsets = Enumerate<Cloned<Iter<'i, u8>>>; + type Checkpoint = Checkpoint<Self>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.iter().cloned().enumerate() @@ -619,11 +759,13 @@ impl<'i> Stream for &'i BStr { } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { + fn next_token(&mut self) -> Option<Self::Token> { if self.is_empty() { None } else { - Some((BStr::from_bytes(&self[1..]), self[0])) + let token = self[0]; + *self = &self[1..]; + Some(token) } } @@ -643,21 +785,38 @@ impl<'i> Stream for &'i BStr { } } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { - let (next, slice) = (&self.0).next_slice(offset); - (BStr::from_bytes(next), slice) + fn next_slice(&mut self, offset: usize) -> Self::Slice { + let (slice, next) = self.0.split_at(offset); + *self = BStr::from_bytes(next); + slice + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(*self) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + *self = checkpoint.0; + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + self } } impl<I> Stream for (I, usize) where - I: Stream<Token = u8>, + I: Stream<Token = u8> + Clone, { type Token = bool; type Slice = (I::Slice, usize, usize); type IterOffsets = BitOffsets<I>; + type Checkpoint = Checkpoint<(I::Checkpoint, usize)>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { BitOffsets { @@ -676,7 +835,7 @@ where } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { + fn next_token(&mut self) -> Option<Self::Token> { next_bit(self) } @@ -686,7 +845,7 @@ where P: Fn(Self::Token) -> bool, { self.iter_offsets() - .find_map(|(o, b)| predicate(b).then(|| o)) + .find_map(|(o, b)| predicate(b).then_some(o)) } #[inline(always)] fn offset_at(&self, tokens: usize) -> Result<usize, Needed> { @@ -700,15 +859,32 @@ where } } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { + fn next_slice(&mut self, offset: usize) -> Self::Slice { let byte_offset = (offset + self.1) / 8; let end_offset = (offset + self.1) % 8; - let (i, s) = self.0.next_slice(byte_offset); - ((i, end_offset), (s, self.1, end_offset)) + let s = self.0.next_slice(byte_offset); + let start_offset = self.1; + self.1 = end_offset; + (s, start_offset, end_offset) + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint((self.0.checkpoint(), self.1)) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + self.0.reset(checkpoint.0 .0); + self.1 = checkpoint.0 .1; + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + &self.0 } } -/// Iterator for [bit][crate::bits] stream (`(I, usize)`) +/// Iterator for [bit][crate::binary::bits] stream (`(I, usize)`) pub struct BitOffsets<I> { i: (I, usize), o: usize, @@ -716,37 +892,40 @@ pub struct BitOffsets<I> { impl<I> Iterator for BitOffsets<I> where - I: Stream<Token = u8>, + I: Stream<Token = u8> + Clone, { type Item = (usize, bool); fn next(&mut self) -> Option<Self::Item> { - let (next, b) = next_bit(&self.i)?; + let b = next_bit(&mut self.i)?; let o = self.o; - self.i = next; self.o += 1; Some((o, b)) } } -fn next_bit<I>(i: &(I, usize)) -> Option<((I, usize), bool)> +fn next_bit<I>(i: &mut (I, usize)) -> Option<bool> where - I: Stream<Token = u8>, + I: Stream<Token = u8> + Clone, { if i.eof_offset() == 0 { return None; } + let offset = i.1; - let i = i.clone(); - let (next_i, byte) = i.0.next_token()?; - let bit = (byte >> i.1) & 0x1 == 0x1; + let mut next_i = i.0.clone(); + let byte = next_i.next_token()?; + let bit = (byte >> offset) & 0x1 == 0x1; - let next_offset = i.1 + 1; + let next_offset = offset + 1; if next_offset == 8 { - Some(((next_i, 0), bit)) + i.0 = next_i; + i.1 = 0; + Some(bit) } else { - Some(((i.0, next_offset), bit)) + i.1 = next_offset; + Some(bit) } } @@ -756,6 +935,8 @@ impl<I: Stream> Stream for Located<I> { type IterOffsets = <I as Stream>::IterOffsets; + type Checkpoint = Checkpoint<I::Checkpoint>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.input.iter_offsets() @@ -766,15 +947,8 @@ impl<I: Stream> Stream for Located<I> { } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { - let (next, token) = self.input.next_token()?; - Some(( - Self { - initial: self.initial.clone(), - input: next, - }, - token, - )) + fn next_token(&mut self) -> Option<Self::Token> { + self.input.next_token() } #[inline(always)] @@ -789,24 +963,33 @@ impl<I: Stream> Stream for Located<I> { self.input.offset_at(tokens) } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { - let (next, slice) = self.input.next_slice(offset); - ( - Self { - initial: self.initial.clone(), - input: next, - }, - slice, - ) + fn next_slice(&mut self, offset: usize) -> Self::Slice { + self.input.next_slice(offset) + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.input.checkpoint()) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + self.input.reset(checkpoint.0); + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + &self.input } } -impl<I: Stream, S: Clone + crate::lib::std::fmt::Debug> Stream for Stateful<I, S> { +impl<I: Stream, S: crate::lib::std::fmt::Debug> Stream for Stateful<I, S> { type Token = <I as Stream>::Token; type Slice = <I as Stream>::Slice; type IterOffsets = <I as Stream>::IterOffsets; + type Checkpoint = Checkpoint<I::Checkpoint>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.input.iter_offsets() @@ -817,15 +1000,8 @@ impl<I: Stream, S: Clone + crate::lib::std::fmt::Debug> Stream for Stateful<I, S } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { - let (next, token) = self.input.next_token()?; - Some(( - Self { - input: next, - state: self.state.clone(), - }, - token, - )) + fn next_token(&mut self) -> Option<Self::Token> { + self.input.next_token() } #[inline(always)] @@ -840,15 +1016,22 @@ impl<I: Stream, S: Clone + crate::lib::std::fmt::Debug> Stream for Stateful<I, S self.input.offset_at(tokens) } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { - let (next, slice) = self.input.next_slice(offset); - ( - Self { - input: next, - state: self.state.clone(), - }, - slice, - ) + fn next_slice(&mut self, offset: usize) -> Self::Slice { + self.input.next_slice(offset) + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.input.checkpoint()) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + self.input.reset(checkpoint.0); + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + &self.input } } @@ -858,6 +1041,8 @@ impl<I: Stream> Stream for Partial<I> { type IterOffsets = <I as Stream>::IterOffsets; + type Checkpoint = Checkpoint<I::Checkpoint>; + #[inline(always)] fn iter_offsets(&self) -> Self::IterOffsets { self.input.iter_offsets() @@ -868,15 +1053,8 @@ impl<I: Stream> Stream for Partial<I> { } #[inline(always)] - fn next_token(&self) -> Option<(Self, Self::Token)> { - let (next, token) = self.input.next_token()?; - Some(( - Partial { - input: next, - partial: self.partial, - }, - token, - )) + fn next_token(&mut self) -> Option<Self::Token> { + self.input.next_token() } #[inline(always)] @@ -891,15 +1069,22 @@ impl<I: Stream> Stream for Partial<I> { self.input.offset_at(tokens) } #[inline(always)] - fn next_slice(&self, offset: usize) -> (Self, Self::Slice) { - let (next, slice) = self.input.next_slice(offset); - ( - Partial { - input: next, - partial: self.partial, - }, - slice, - ) + fn next_slice(&mut self, offset: usize) -> Self::Slice { + self.input.next_slice(offset) + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.input.checkpoint()) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + self.input.reset(checkpoint.0); + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + &self.input } } @@ -1122,73 +1307,77 @@ where } /// Useful functions to calculate the offset between slices and show a hexdump of a slice -pub trait Offset { - /// Offset between the first byte of self and the first byte of the argument - fn offset_to(&self, second: &Self) -> usize; +pub trait Offset<Start = Self> { + /// Offset between the first byte of `start` and the first byte of `self`a + /// + /// **Note:** This is an offset, not an index, and may point to the end of input + /// (`start.len()`) when `self` is exhausted. + fn offset_from(&self, start: &Start) -> usize; } impl<'a, T> Offset for &'a [T] { - #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - (*self).offset_to(*second) - } -} - -/// Convenience implementation to accept `&[T]` instead of `&&[T]` as above -impl<T> Offset for [T] { #[inline] - fn offset_to(&self, second: &Self) -> usize { - let fst = self.as_ptr(); - let snd = second.as_ptr(); + fn offset_from(&self, start: &Self) -> usize { + let fst = (*start).as_ptr(); + let snd = (*self).as_ptr(); debug_assert!( fst <= snd, - "`Offset::offset_to` only accepts slices of `self`" + "`Offset::offset_from({snd:?}, {fst:?})` only accepts slices of `self`" ); - snd as usize - fst as usize + (snd as usize - fst as usize) / crate::lib::std::mem::size_of::<T>() } } -impl<'a> Offset for &'a str { +impl<'a, T> Offset<<&'a [T] as Stream>::Checkpoint> for &'a [T] +where + T: Clone + crate::lib::std::fmt::Debug, +{ #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - self.as_bytes().offset_to(second.as_bytes()) + fn offset_from(&self, other: &<&'a [T] as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) } } -/// Convenience implementation to accept `&str` instead of `&&str` as above -impl Offset for str { +impl<'a> Offset for &'a str { #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - self.as_bytes().offset_to(second.as_bytes()) + fn offset_from(&self, start: &Self) -> usize { + self.as_bytes().offset_from(&start.as_bytes()) } } -impl Offset for Bytes { +impl<'a> Offset<<&'a str as Stream>::Checkpoint> for &'a str { #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - self.as_bytes().offset_to(second.as_bytes()) + fn offset_from(&self, other: &<&'a str as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) } } impl<'a> Offset for &'a Bytes { #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - self.as_bytes().offset_to(second.as_bytes()) + fn offset_from(&self, start: &Self) -> usize { + self.as_bytes().offset_from(&start.as_bytes()) } } -impl Offset for BStr { +impl<'a> Offset<<&'a Bytes as Stream>::Checkpoint> for &'a Bytes { #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - self.as_bytes().offset_to(second.as_bytes()) + fn offset_from(&self, other: &<&'a Bytes as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) } } impl<'a> Offset for &'a BStr { #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - self.as_bytes().offset_to(second.as_bytes()) + fn offset_from(&self, start: &Self) -> usize { + self.as_bytes().offset_from(&start.as_bytes()) + } +} + +impl<'a> Offset<<&'a BStr as Stream>::Checkpoint> for &'a BStr { + #[inline(always)] + fn offset_from(&self, other: &<&'a BStr as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) } } @@ -1197,38 +1386,90 @@ where I: Offset, { #[inline(always)] - fn offset_to(&self, other: &Self) -> usize { - self.0.offset_to(&other.0) * 8 + other.1 - self.1 + fn offset_from(&self, start: &Self) -> usize { + self.0.offset_from(&start.0) * 8 + self.1 - start.1 + } +} + +impl<I> Offset<<(I, usize) as Stream>::Checkpoint> for (I, usize) +where + I: Stream<Token = u8> + Clone, +{ + #[inline(always)] + fn offset_from(&self, other: &<(I, usize) as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) } } impl<I> Offset for Located<I> where - I: Offset, + I: Stream, +{ + #[inline(always)] + fn offset_from(&self, other: &Self) -> usize { + self.offset_from(&other.checkpoint()) + } +} + +impl<I> Offset<<Located<I> as Stream>::Checkpoint> for Located<I> +where + I: Stream, { #[inline(always)] - fn offset_to(&self, other: &Self) -> usize { - self.input.offset_to(&other.input) + fn offset_from(&self, other: &<Located<I> as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) } } impl<I, S> Offset for Stateful<I, S> where - I: Offset, + I: Stream, + S: Clone + crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn offset_from(&self, start: &Self) -> usize { + self.offset_from(&start.checkpoint()) + } +} + +impl<I, S> Offset<<Stateful<I, S> as Stream>::Checkpoint> for Stateful<I, S> +where + I: Stream, + S: crate::lib::std::fmt::Debug, { #[inline(always)] - fn offset_to(&self, other: &Self) -> usize { - self.input.offset_to(&other.input) + fn offset_from(&self, other: &<Stateful<I, S> as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) } } impl<I> Offset for Partial<I> +where + I: Stream, +{ + #[inline(always)] + fn offset_from(&self, start: &Self) -> usize { + self.offset_from(&start.checkpoint()) + } +} + +impl<I> Offset<<Partial<I> as Stream>::Checkpoint> for Partial<I> +where + I: Stream, +{ + #[inline(always)] + fn offset_from(&self, other: &<Partial<I> as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) + } +} + +impl<I> Offset for Checkpoint<I> where I: Offset, { #[inline(always)] - fn offset_to(&self, second: &Self) -> usize { - self.input.offset_to(&second.input) + fn offset_from(&self, start: &Self) -> usize { + self.0.offset_from(&start.0) } } @@ -1361,47 +1602,50 @@ pub trait Compare<T> { /// by lowercasing both strings and comparing /// the result. This is a temporary solution until /// a better one appears + #[deprecated(since = "0.5.20", note = "Replaced with `compare(ascii::Caseless(_))`")] fn compare_no_case(&self, t: T) -> CompareResult; } -fn lowercase_byte(c: u8) -> u8 { - match c { - b'A'..=b'Z' => c - b'A' + b'a', - _ => c, - } -} - impl<'a, 'b> Compare<&'b [u8]> for &'a [u8] { #[inline] fn compare(&self, t: &'b [u8]) -> CompareResult { - let pos = self.iter().zip(t.iter()).position(|(a, b)| a != b); - - match pos { - Some(_) => CompareResult::Error, - None => { - if self.len() >= t.len() { - CompareResult::Ok - } else { - CompareResult::Incomplete - } - } + if t.iter().zip(*self).any(|(a, b)| a != b) { + CompareResult::Error + } else if self.len() < t.slice_len() { + CompareResult::Incomplete + } else { + CompareResult::Ok } } - #[inline] + #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { - if self + self.compare(AsciiCaseless(t)) + } +} + +impl<'a, 'b> Compare<AsciiCaseless<&'b [u8]>> for &'a [u8] { + #[inline] + fn compare(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + if t.0 .iter() - .zip(t) - .any(|(a, b)| lowercase_byte(*a) != lowercase_byte(*b)) + .zip(*self) + .any(|(a, b)| !a.eq_ignore_ascii_case(b)) { CompareResult::Error - } else if self.len() < t.len() { + } else if self.len() < t.slice_len() { CompareResult::Incomplete } else { CompareResult::Ok } } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + self.compare(t) + } } impl<'a, const LEN: usize> Compare<[u8; LEN]> for &'a [u8] { @@ -1411,11 +1655,25 @@ impl<'a, const LEN: usize> Compare<[u8; LEN]> for &'a [u8] { } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: [u8; LEN]) -> CompareResult { self.compare_no_case(&t[..]) } } +impl<'a, const LEN: usize> Compare<AsciiCaseless<[u8; LEN]>> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<[u8; LEN]>) -> CompareResult { + self.compare(AsciiCaseless(&t.0[..])) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<[u8; LEN]>) -> CompareResult { + self.compare_no_case(AsciiCaseless(&t.0[..])) + } +} + impl<'a, 'b, const LEN: usize> Compare<&'b [u8; LEN]> for &'a [u8] { #[inline(always)] fn compare(&self, t: &'b [u8; LEN]) -> CompareResult { @@ -1423,46 +1681,124 @@ impl<'a, 'b, const LEN: usize> Compare<&'b [u8; LEN]> for &'a [u8] { } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b [u8; LEN]) -> CompareResult { self.compare_no_case(&t[..]) } } +impl<'a, 'b, const LEN: usize> Compare<AsciiCaseless<&'b [u8; LEN]>> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b [u8; LEN]>) -> CompareResult { + self.compare(AsciiCaseless(&t.0[..])) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b [u8; LEN]>) -> CompareResult { + self.compare_no_case(AsciiCaseless(&t.0[..])) + } +} + impl<'a, 'b> Compare<&'b str> for &'a [u8] { #[inline(always)] fn compare(&self, t: &'b str) -> CompareResult { self.compare(t.as_bytes()) } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b str) -> CompareResult { self.compare_no_case(t.as_bytes()) } } +impl<'a, 'b> Compare<AsciiCaseless<&'b str>> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.compare(AsciiCaseless(t.0.as_bytes())) + } + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.as_bytes())) + } +} + +impl<'a> Compare<char> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: char) -> CompareResult { + self.compare(t.encode_utf8(&mut [0; 4]).as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: char) -> CompareResult { + self.compare_no_case(t.encode_utf8(&mut [0; 4]).as_bytes()) + } +} + +impl<'a> Compare<AsciiCaseless<char>> for &'a [u8] { + #[inline] + fn compare(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } +} + impl<'a, 'b> Compare<&'b str> for &'a str { #[inline(always)] fn compare(&self, t: &'b str) -> CompareResult { self.as_bytes().compare(t.as_bytes()) } - //FIXME: this version is too simple and does not use the current locale #[inline] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b str) -> CompareResult { - let pos = self - .chars() - .zip(t.chars()) - .position(|(a, b)| a.to_lowercase().ne(b.to_lowercase())); + self.compare(AsciiCaseless(t)) + } +} - match pos { - Some(_) => CompareResult::Error, - None => { - if self.len() >= t.len() { - CompareResult::Ok - } else { - CompareResult::Incomplete - } - } - } +impl<'a, 'b> Compare<AsciiCaseless<&'b str>> for &'a str { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.as_bytes().compare(t.as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.compare(t) + } +} + +impl<'a> Compare<char> for &'a str { + #[inline(always)] + fn compare(&self, t: char) -> CompareResult { + self.compare(t.encode_utf8(&mut [0; 4]).as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: char) -> CompareResult { + self.compare_no_case(t.encode_utf8(&mut [0; 4]).as_bytes()) + } +} + +impl<'a> Compare<AsciiCaseless<char>> for &'a str { + #[inline] + fn compare(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) } } @@ -1472,11 +1808,24 @@ impl<'a, 'b> Compare<&'b [u8]> for &'a str { AsBStr::as_bstr(self).compare(t) } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { AsBStr::as_bstr(self).compare_no_case(t) } } +impl<'a, 'b> Compare<AsciiCaseless<&'b [u8]>> for &'a str { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + AsBStr::as_bstr(self).compare(t) + } + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + AsBStr::as_bstr(self).compare_no_case(t) + } +} + impl<'a, T> Compare<T> for &'a Bytes where &'a [u8]: Compare<T>, @@ -1488,6 +1837,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare_no_case(t) @@ -1505,6 +1855,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare_no_case(t) @@ -1521,6 +1872,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, other: U) -> CompareResult { self.input.compare_no_case(other) } @@ -1536,6 +1888,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, other: U) -> CompareResult { self.input.compare_no_case(other) } @@ -1551,6 +1904,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: T) -> CompareResult { self.input.compare_no_case(t) } @@ -1569,6 +1923,27 @@ impl<'i, 's> FindSlice<&'s [u8]> for &'i [u8] { } } +impl<'i, 's> FindSlice<(&'s [u8],)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s [u8],)) -> Option<usize> { + memmem(self, substr.0) + } +} + +impl<'i, 's> FindSlice<(&'s [u8], &'s [u8])> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s [u8], &'s [u8])) -> Option<usize> { + memmem2(self, substr) + } +} + +impl<'i, 's> FindSlice<(&'s [u8], &'s [u8], &'s [u8])> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s [u8], &'s [u8], &'s [u8])) -> Option<usize> { + memmem3(self, substr) + } +} + impl<'i> FindSlice<u8> for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: u8) -> Option<usize> { @@ -1576,6 +1951,27 @@ impl<'i> FindSlice<u8> for &'i [u8] { } } +impl<'i> FindSlice<(u8,)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (u8,)) -> Option<usize> { + memchr(substr.0, self) + } +} + +impl<'i> FindSlice<(u8, u8)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8)) -> Option<usize> { + memchr2(substr, self) + } +} + +impl<'i> FindSlice<(u8, u8, u8)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8, u8)) -> Option<usize> { + memchr3(substr, self) + } +} + impl<'i, 's> FindSlice<&'s str> for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: &'s str) -> Option<usize> { @@ -1583,17 +1979,129 @@ impl<'i, 's> FindSlice<&'s str> for &'i [u8] { } } +impl<'i, 's> FindSlice<(&'s str,)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s str,)) -> Option<usize> { + memmem(self, substr.0.as_bytes()) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str)) -> Option<usize> { + memmem2(self, (substr.0.as_bytes(), substr.1.as_bytes())) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str, &'s str)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str, &'s str)) -> Option<usize> { + memmem3( + self, + ( + substr.0.as_bytes(), + substr.1.as_bytes(), + substr.2.as_bytes(), + ), + ) + } +} + impl<'i, 's> FindSlice<&'s str> for &'i str { #[inline(always)] fn find_slice(&self, substr: &'s str) -> Option<usize> { - self.find(substr) + self.as_bytes().find_slice(substr.as_bytes()) + } +} + +impl<'i, 's> FindSlice<(&'s str,)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (&'s str,)) -> Option<usize> { + self.as_bytes().find_slice(substr) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str)) -> Option<usize> { + self.as_bytes().find_slice(substr) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str, &'s str)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str, &'s str)) -> Option<usize> { + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<char> for &'i str { #[inline(always)] fn find_slice(&self, substr: char) -> Option<usize> { - self.find(substr) + let mut b = [0; 4]; + let substr = substr.encode_utf8(&mut b); + self.find_slice(&*substr) + } +} + +impl<'i> FindSlice<(char,)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (char,)) -> Option<usize> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + self.find_slice((&*substr0,)) + } +} + +impl<'i> FindSlice<(char, char)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (char, char)) -> Option<usize> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr1 = substr.1.encode_utf8(&mut b); + self.find_slice((&*substr0, &*substr1)) + } +} + +impl<'i> FindSlice<(char, char, char)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (char, char, char)) -> Option<usize> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr1 = substr.1.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr2 = substr.2.encode_utf8(&mut b); + self.find_slice((&*substr0, &*substr1, &*substr2)) + } +} + +impl<'i> FindSlice<u8> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: u8) -> Option<usize> { + self.find_slice(substr.as_char()) + } +} + +impl<'i> FindSlice<(u8,)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (u8,)) -> Option<usize> { + self.find_slice((substr.0.as_char(),)) + } +} + +impl<'i> FindSlice<(u8, u8)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8)) -> Option<usize> { + self.find_slice((substr.0.as_char(), substr.1.as_char())) + } +} + +impl<'i> FindSlice<(u8, u8, u8)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8, u8)) -> Option<usize> { + self.find_slice((substr.0.as_char(), substr.1.as_char(), substr.2.as_char())) } } @@ -1747,6 +2255,127 @@ where } } +/// Ensure checkpoint details are kept private +#[derive(Copy, Clone, Debug)] +pub struct Checkpoint<T>(T); + +/// A range bounded inclusively for counting parses performed +#[derive(PartialEq, Eq)] +pub struct Range { + pub(crate) start_inclusive: usize, + pub(crate) end_inclusive: Option<usize>, +} + +impl Range { + #[inline(always)] + fn raw(start_inclusive: usize, end_inclusive: Option<usize>) -> Self { + Self { + start_inclusive, + end_inclusive, + } + } +} + +impl crate::lib::std::ops::RangeBounds<usize> for Range { + #[inline(always)] + fn start_bound(&self) -> crate::lib::std::ops::Bound<&usize> { + crate::lib::std::ops::Bound::Included(&self.start_inclusive) + } + + #[inline(always)] + fn end_bound(&self) -> crate::lib::std::ops::Bound<&usize> { + if let Some(end_inclusive) = &self.end_inclusive { + crate::lib::std::ops::Bound::Included(end_inclusive) + } else { + crate::lib::std::ops::Bound::Unbounded + } + } +} + +impl From<usize> for Range { + #[inline(always)] + fn from(fixed: usize) -> Self { + (fixed..=fixed).into() + } +} + +impl From<crate::lib::std::ops::Range<usize>> for Range { + #[inline(always)] + fn from(range: crate::lib::std::ops::Range<usize>) -> Self { + let start_inclusive = range.start; + let end_inclusive = Some(range.end.saturating_sub(1)); + Self::raw(start_inclusive, end_inclusive) + } +} + +impl From<crate::lib::std::ops::RangeFull> for Range { + #[inline(always)] + fn from(_: crate::lib::std::ops::RangeFull) -> Self { + let start_inclusive = 0; + let end_inclusive = None; + Self::raw(start_inclusive, end_inclusive) + } +} + +impl From<crate::lib::std::ops::RangeFrom<usize>> for Range { + #[inline(always)] + fn from(range: crate::lib::std::ops::RangeFrom<usize>) -> Self { + let start_inclusive = range.start; + let end_inclusive = None; + Self::raw(start_inclusive, end_inclusive) + } +} + +impl From<crate::lib::std::ops::RangeTo<usize>> for Range { + #[inline(always)] + fn from(range: crate::lib::std::ops::RangeTo<usize>) -> Self { + let start_inclusive = 0; + let end_inclusive = Some(range.end.saturating_sub(1)); + Self::raw(start_inclusive, end_inclusive) + } +} + +impl From<crate::lib::std::ops::RangeInclusive<usize>> for Range { + #[inline(always)] + fn from(range: crate::lib::std::ops::RangeInclusive<usize>) -> Self { + let start_inclusive = *range.start(); + let end_inclusive = Some(*range.end()); + Self::raw(start_inclusive, end_inclusive) + } +} + +impl From<crate::lib::std::ops::RangeToInclusive<usize>> for Range { + #[inline(always)] + fn from(range: crate::lib::std::ops::RangeToInclusive<usize>) -> Self { + let start_inclusive = 0; + let end_inclusive = Some(range.end); + Self::raw(start_inclusive, end_inclusive) + } +} + +impl crate::lib::std::fmt::Display for Range { + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + self.start_inclusive.fmt(f)?; + match self.end_inclusive { + Some(e) if e == self.start_inclusive => {} + Some(e) => { + "..=".fmt(f)?; + e.fmt(f)?; + } + None => { + "..".fmt(f)?; + } + } + Ok(()) + } +} + +impl crate::lib::std::fmt::Debug for Range { + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + write!(f, "{self}") + } +} + /// Abstracts something which can extend an `Extend`. /// Used to build modified input slices in `escaped_transform` pub trait Accumulate<T>: Sized { @@ -1850,15 +2479,19 @@ where } #[cfg(feature = "std")] -impl<K, V> Accumulate<(K, V)> for HashMap<K, V> +impl<K, V, S> Accumulate<(K, V)> for HashMap<K, V, S> where K: crate::lib::std::cmp::Eq + crate::lib::std::hash::Hash, + S: BuildHasher + Default, { #[inline(always)] fn initial(capacity: Option<usize>) -> Self { + let h = S::default(); match capacity { - Some(capacity) => HashMap::with_capacity(clamp_capacity::<(K, V)>(capacity)), - None => HashMap::new(), + Some(capacity) => { + HashMap::with_capacity_and_hasher(clamp_capacity::<(K, V)>(capacity), h) + } + None => HashMap::with_hasher(h), } } #[inline(always)] @@ -1867,6 +2500,41 @@ where } } +#[cfg(feature = "alloc")] +impl<K> Accumulate<K> for BTreeSet<K> +where + K: crate::lib::std::cmp::Ord, +{ + #[inline(always)] + fn initial(_capacity: Option<usize>) -> Self { + BTreeSet::new() + } + #[inline(always)] + fn accumulate(&mut self, key: K) { + self.insert(key); + } +} + +#[cfg(feature = "std")] +impl<K, S> Accumulate<K> for HashSet<K, S> +where + K: crate::lib::std::cmp::Eq + crate::lib::std::hash::Hash, + S: BuildHasher + Default, +{ + #[inline(always)] + fn initial(capacity: Option<usize>) -> Self { + let h = S::default(); + match capacity { + Some(capacity) => HashSet::with_capacity_and_hasher(clamp_capacity::<K>(capacity), h), + None => HashSet::with_hasher(h), + } + } + #[inline(always)] + fn accumulate(&mut self, key: K) { + self.insert(key); + } +} + #[cfg(feature = "alloc")] #[inline] pub(crate) fn clamp_capacity<T>(capacity: usize) -> usize { @@ -2006,10 +2674,12 @@ impl AsChar for u8 { fn is_space(self) -> bool { self == b' ' || self == b'\t' } + #[inline] fn is_newline(self) -> bool { self == b'\n' } } + impl<'a> AsChar for &'a u8 { #[inline(always)] fn as_char(self) -> char { @@ -2043,6 +2713,7 @@ impl<'a> AsChar for &'a u8 { fn is_space(self) -> bool { *self == b' ' || *self == b'\t' } + #[inline] fn is_newline(self) -> bool { *self == b'\n' } @@ -2081,6 +2752,7 @@ impl AsChar for char { fn is_space(self) -> bool { self == ' ' || self == '\t' } + #[inline] fn is_newline(self) -> bool { self == '\n' } @@ -2119,6 +2791,7 @@ impl<'a> AsChar for &'a char { fn is_space(self) -> bool { *self == ' ' || *self == '\t' } + #[inline] fn is_newline(self) -> bool { *self == '\n' } @@ -2139,15 +2812,15 @@ impl<'a> AsChar for &'a char { /// For example, you could implement `hex_digit0` as: /// ``` /// # use winnow::prelude::*; -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Error}; -/// # use winnow::bytes::take_while1; -/// fn hex_digit1(input: &str) -> IResult<&str, &str> { -/// take_while1(('a'..='f', 'A'..='F', '0'..='9')).parse_next(input) +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::token::take_while; +/// fn hex_digit1<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// take_while(1.., ('a'..='f', 'A'..='F', '0'..='9')).parse_next(input) /// } /// -/// assert_eq!(hex_digit1("21cZ"), Ok(("Z", "21c"))); -/// assert_eq!(hex_digit1("H2"), Err(ErrMode::Backtrack(Error::new("H2", ErrorKind::Slice)))); -/// assert_eq!(hex_digit1(""), Err(ErrMode::Backtrack(Error::new("", ErrorKind::Slice)))); +/// assert_eq!(hex_digit1.parse_peek("21cZ"), Ok(("Z", "21c"))); +/// assert_eq!(hex_digit1.parse_peek("H2"), Err(ErrMode::Backtrack(InputError::new("H2", ErrorKind::Slice)))); +/// assert_eq!(hex_digit1.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); /// ``` pub trait ContainsToken<T> { /// Returns true if self contains the token @@ -2189,14 +2862,14 @@ impl<C: AsChar> ContainsToken<C> for char { } } -impl<C: AsChar, F: Fn(C) -> bool> ContainsToken<C> for F { +impl<C, F: Fn(C) -> bool> ContainsToken<C> for F { #[inline(always)] fn contains_token(&self, token: C) -> bool { self(token) } } -impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for Range<C2> { +impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for crate::lib::std::ops::Range<C2> { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let start = self.start.clone().as_char(); @@ -2205,7 +2878,9 @@ impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for Range<C2> { } } -impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeInclusive<C2> { +impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> + for crate::lib::std::ops::RangeInclusive<C2> +{ #[inline(always)] fn contains_token(&self, token: C1) -> bool { let start = self.start().clone().as_char(); @@ -2214,7 +2889,7 @@ impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeInclusive<C2> { } } -impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeFrom<C2> { +impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for crate::lib::std::ops::RangeFrom<C2> { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let start = self.start.clone().as_char(); @@ -2222,7 +2897,7 @@ impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeFrom<C2> { } } -impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeTo<C2> { +impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for crate::lib::std::ops::RangeTo<C2> { #[inline(always)] fn contains_token(&self, token: C1) -> bool { let end = self.end.clone().as_char(); @@ -2230,7 +2905,9 @@ impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeTo<C2> { } } -impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeToInclusive<C2> { +impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> + for crate::lib::std::ops::RangeToInclusive<C2> +{ #[inline(always)] fn contains_token(&self, token: C1) -> bool { let end = self.end.clone().as_char(); @@ -2238,7 +2915,7 @@ impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for RangeToInclusive<C2> } } -impl<C1: AsChar> ContainsToken<C1> for RangeFull { +impl<C1: AsChar> ContainsToken<C1> for crate::lib::std::ops::RangeFull { #[inline(always)] fn contains_token(&self, _token: C1) -> bool { true @@ -2293,14 +2970,6 @@ impl<const LEN: usize, C: AsChar> ContainsToken<C> for [char; LEN] { } } -impl<C: AsChar> ContainsToken<C> for &'_ str { - #[inline(always)] - fn contains_token(&self, token: C) -> bool { - let token = token.as_char(); - self.chars().any(|i| i == token) - } -} - impl<T> ContainsToken<T> for () { #[inline(always)] fn contains_token(&self, _token: T) -> bool { @@ -2342,108 +3011,130 @@ impl_contains_token_for_tuples!( F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21 ); -/// Looks for the first element of the input type for which the condition returns true, -/// and returns the input up to this position. -/// -/// *Partial version*: If no element is found matching the condition, this will return `Incomplete` -pub(crate) fn split_at_offset_partial<P, I: Stream, E: ParseError<I>>( - input: &I, - predicate: P, -) -> IResult<I, <I as Stream>::Slice, E> -where - P: Fn(I::Token) -> bool, -{ - let offset = input - .offset_for(predicate) - .ok_or_else(|| ErrMode::Incomplete(Needed::new(1)))?; - Ok(input.next_slice(offset)) +#[cfg(feature = "simd")] +#[inline(always)] +fn memchr(token: u8, slice: &[u8]) -> Option<usize> { + memchr::memchr(token, slice) } -/// Looks for the first element of the input type for which the condition returns true -/// and returns the input up to this position. -/// -/// Fails if the produced slice is empty. -/// -/// *Partial version*: If no element is found matching the condition, this will return `Incomplete` -pub(crate) fn split_at_offset1_partial<P, I: Stream, E: ParseError<I>>( - input: &I, - predicate: P, - e: ErrorKind, -) -> IResult<I, <I as Stream>::Slice, E> -where - P: Fn(I::Token) -> bool, -{ - let offset = input - .offset_for(predicate) - .ok_or_else(|| ErrMode::Incomplete(Needed::new(1)))?; - if offset == 0 { - Err(ErrMode::from_error_kind(input.clone(), e)) +#[cfg(feature = "simd")] +#[inline(always)] +fn memchr2(token: (u8, u8), slice: &[u8]) -> Option<usize> { + memchr::memchr2(token.0, token.1, slice) +} + +#[cfg(feature = "simd")] +#[inline(always)] +fn memchr3(token: (u8, u8, u8), slice: &[u8]) -> Option<usize> { + memchr::memchr3(token.0, token.1, token.2, slice) +} + +#[cfg(not(feature = "simd"))] +#[inline(always)] +fn memchr(token: u8, slice: &[u8]) -> Option<usize> { + slice.iter().position(|t| *t == token) +} + +#[cfg(not(feature = "simd"))] +#[inline(always)] +fn memchr2(token: (u8, u8), slice: &[u8]) -> Option<usize> { + slice.iter().position(|t| *t == token.0 || *t == token.1) +} + +#[cfg(not(feature = "simd"))] +#[inline(always)] +fn memchr3(token: (u8, u8, u8), slice: &[u8]) -> Option<usize> { + slice + .iter() + .position(|t| *t == token.0 || *t == token.1 || *t == token.2) +} + +#[inline(always)] +fn memmem(slice: &[u8], tag: &[u8]) -> Option<usize> { + if tag.len() == 1 { + memchr(tag[0], slice) } else { - Ok(input.next_slice(offset)) + memmem_(slice, tag) } } -/// Looks for the first element of the input type for which the condition returns true, -/// and returns the input up to this position. -/// -/// *Complete version*: If no element is found matching the condition, this will return the whole input -pub(crate) fn split_at_offset_complete<P, I: Stream, E: ParseError<I>>( - input: &I, - predicate: P, -) -> IResult<I, <I as Stream>::Slice, E> -where - P: Fn(I::Token) -> bool, -{ - let offset = input - .offset_for(predicate) - .unwrap_or_else(|| input.eof_offset()); - Ok(input.next_slice(offset)) +#[inline(always)] +fn memmem2(slice: &[u8], tag: (&[u8], &[u8])) -> Option<usize> { + if tag.0.len() == 1 && tag.1.len() == 1 { + memchr2((tag.0[0], tag.1[0]), slice) + } else { + memmem2_(slice, tag) + } } -/// Looks for the first element of the input type for which the condition returns true -/// and returns the input up to this position. -/// -/// Fails if the produced slice is empty. -/// -/// *Complete version*: If no element is found matching the condition, this will return the whole input -pub(crate) fn split_at_offset1_complete<P, I: Stream, E: ParseError<I>>( - input: &I, - predicate: P, - e: ErrorKind, -) -> IResult<I, <I as Stream>::Slice, E> -where - P: Fn(I::Token) -> bool, -{ - let offset = input - .offset_for(predicate) - .unwrap_or_else(|| input.eof_offset()); - if offset == 0 { - Err(ErrMode::from_error_kind(input.clone(), e)) +#[inline(always)] +fn memmem3(slice: &[u8], tag: (&[u8], &[u8], &[u8])) -> Option<usize> { + if tag.0.len() == 1 && tag.1.len() == 1 && tag.2.len() == 1 { + memchr3((tag.0[0], tag.1[0], tag.2[0]), slice) } else { - Ok(input.next_slice(offset)) + memmem3_(slice, tag) } } #[cfg(feature = "simd")] #[inline(always)] -fn memchr(token: u8, slice: &[u8]) -> Option<usize> { - memchr::memchr(token, slice) +fn memmem_(slice: &[u8], tag: &[u8]) -> Option<usize> { + let &prefix = match tag.first() { + Some(x) => x, + None => return Some(0), + }; + #[allow(clippy::manual_find)] // faster this way + for i in memchr::memchr_iter(prefix, slice) { + if slice[i..].starts_with(tag) { + return Some(i); + } + } + None } -#[cfg(not(feature = "simd"))] -#[inline(always)] -fn memchr(token: u8, slice: &[u8]) -> Option<usize> { - slice.iter().position(|t| *t == token) +#[cfg(feature = "simd")] +fn memmem2_(slice: &[u8], tag: (&[u8], &[u8])) -> Option<usize> { + let prefix = match (tag.0.first(), tag.1.first()) { + (Some(&a), Some(&b)) => (a, b), + _ => return Some(0), + }; + #[allow(clippy::manual_find)] // faster this way + for i in memchr::memchr2_iter(prefix.0, prefix.1, slice) { + let subslice = &slice[i..]; + if subslice.starts_with(tag.0) { + return Some(i); + } + if subslice.starts_with(tag.1) { + return Some(i); + } + } + None } #[cfg(feature = "simd")] -#[inline(always)] -fn memmem(slice: &[u8], tag: &[u8]) -> Option<usize> { - memchr::memmem::find(slice, tag) +fn memmem3_(slice: &[u8], tag: (&[u8], &[u8], &[u8])) -> Option<usize> { + let prefix = match (tag.0.first(), tag.1.first(), tag.2.first()) { + (Some(&a), Some(&b), Some(&c)) => (a, b, c), + _ => return Some(0), + }; + #[allow(clippy::manual_find)] // faster this way + for i in memchr::memchr3_iter(prefix.0, prefix.1, prefix.2, slice) { + let subslice = &slice[i..]; + if subslice.starts_with(tag.0) { + return Some(i); + } + if subslice.starts_with(tag.1) { + return Some(i); + } + if subslice.starts_with(tag.2) { + return Some(i); + } + } + None } #[cfg(not(feature = "simd"))] -fn memmem(slice: &[u8], tag: &[u8]) -> Option<usize> { +fn memmem_(slice: &[u8], tag: &[u8]) -> Option<usize> { for i in 0..slice.len() { let subslice = &slice[i..]; if subslice.starts_with(tag) { @@ -2452,3 +3143,34 @@ fn memmem(slice: &[u8], tag: &[u8]) -> Option<usize> { } None } + +#[cfg(not(feature = "simd"))] +fn memmem2_(slice: &[u8], tag: (&[u8], &[u8])) -> Option<usize> { + for i in 0..slice.len() { + let subslice = &slice[i..]; + if subslice.starts_with(tag.0) { + return Some(i); + } + if subslice.starts_with(tag.1) { + return Some(i); + } + } + None +} + +#[cfg(not(feature = "simd"))] +fn memmem3_(slice: &[u8], tag: (&[u8], &[u8], &[u8])) -> Option<usize> { + for i in 0..slice.len() { + let subslice = &slice[i..]; + if subslice.starts_with(tag.0) { + return Some(i); + } + if subslice.starts_with(tag.1) { + return Some(i); + } + if subslice.starts_with(tag.2) { + return Some(i); + } + } + None +} diff --git a/vendor/winnow/src/stream/tests.rs b/vendor/winnow/src/stream/tests.rs index de61df0..06047df 100644 --- a/vendor/winnow/src/stream/tests.rs +++ b/vendor/winnow/src/stream/tests.rs @@ -1,8 +1,28 @@ #[cfg(feature = "std")] use proptest::prelude::*; +use crate::error::ErrMode::Backtrack; +use crate::error::{ErrorKind, InputError}; +use crate::token::tag; +use crate::{ + combinator::{separated, separated_pair}, + PResult, Parser, +}; + use super::*; +#[cfg(feature = "std")] +#[test] +fn test_fxhashmap_compiles() { + let input = "a=b"; + fn pair(i: &mut &str) -> PResult<(char, char)> { + let out = separated_pair('a', '=', 'b').parse_next(i)?; + Ok(out) + } + + let _: rustc_hash::FxHashMap<char, char> = separated(0.., pair, ',').parse(input).unwrap(); +} + #[test] fn test_offset_u8() { let s = b"abcd123"; @@ -10,9 +30,9 @@ fn test_offset_u8() { let b = &a[2..]; let c = &a[..4]; let d = &a[3..5]; - assert_eq!(a.offset_to(b), 2); - assert_eq!(a.offset_to(c), 0); - assert_eq!(a.offset_to(d), 3); + assert_eq!(b.offset_from(&a), 2); + assert_eq!(c.offset_from(&a), 0); + assert_eq!(d.offset_from(&a), 3); } #[test] @@ -21,9 +41,9 @@ fn test_offset_str() { let b = &a[7..]; let c = &a[..5]; let d = &a[5..9]; - assert_eq!(a.offset_to(b), 7); - assert_eq!(a.offset_to(c), 0); - assert_eq!(a.offset_to(d), 5); + assert_eq!(b.offset_from(&a), 7); + assert_eq!(c.offset_from(&a), 0); + assert_eq!(d.offset_from(&a), 5); } #[test] @@ -37,7 +57,7 @@ fn test_bit_stream_empty() { let actual = i.eof_offset(); assert_eq!(actual, 0); - let actual = i.next_token(); + let actual = i.peek_token(); assert_eq!(actual, None); let actual = i.offset_for(|b| b); @@ -46,7 +66,7 @@ fn test_bit_stream_empty() { let actual = i.offset_at(1); assert_eq!(actual, Err(Needed::new(1))); - let (actual_input, actual_slice) = i.next_slice(0); + let (actual_input, actual_slice) = i.peek_slice(0); assert_eq!(actual_input, (&b""[..], 0)); assert_eq!(actual_slice, (&b""[..], 0, 0)); } @@ -56,7 +76,7 @@ fn test_bit_stream_empty() { fn test_bit_offset_empty() { let i = (&b""[..], 0); - let actual = i.offset_to(&i); + let actual = i.offset_from(&i); assert_eq!(actual, 0); } @@ -80,18 +100,18 @@ fn bit_stream_inner(byte_len: usize, start: usize) { let mut curr_i = i; let mut curr_offset = 0; - while let Some((next_i, _token)) = curr_i.next_token() { - let to_offset = i.offset_to(&curr_i); + while let Some((next_i, _token)) = curr_i.peek_token() { + let to_offset = curr_i.offset_from(&i); assert_eq!(curr_offset, to_offset); - let (slice_i, _) = i.next_slice(curr_offset); + let (slice_i, _) = i.peek_slice(curr_offset); assert_eq!(curr_i, slice_i); let at_offset = i.offset_at(curr_offset).unwrap(); assert_eq!(curr_offset, at_offset); let eof_offset = curr_i.eof_offset(); - let (next_eof_i, eof_slice) = curr_i.next_slice(eof_offset); + let (next_eof_i, eof_slice) = curr_i.peek_slice(eof_offset); assert_eq!(next_eof_i, (&b""[..], 0)); let eof_slice_i = (eof_slice.0, eof_slice.1); assert_eq!(eof_slice_i, curr_i); @@ -114,3 +134,95 @@ fn test_partial_complete() { i.restore_partial(incomplete_state); assert!(i.is_partial(), "incomplete stream state should be restored"); } + +#[test] +fn test_custom_slice() { + type Token = usize; + type TokenSlice<'i> = &'i [Token]; + + let mut tokens: TokenSlice<'_> = &[1, 2, 3, 4]; + + let input = &mut tokens; + let start = input.checkpoint(); + let _ = input.next_token(); + let _ = input.next_token(); + let offset = input.offset_from(&start); + assert_eq!(offset, 2); +} + +#[test] +fn test_tag_support_char() { + assert_eq!( + tag::<_, _, InputError<_>>('π').parse_peek("π"), + Ok(("", "π")) + ); + assert_eq!( + tag::<_, _, InputError<_>>('π').parse_peek("π3.14"), + Ok(("3.14", "π")) + ); + + assert_eq!( + tag::<_, _, InputError<_>>("π").parse_peek("π3.14"), + Ok(("3.14", "π")) + ); + + assert_eq!( + tag::<_, _, InputError<_>>('-').parse_peek("π"), + Err(Backtrack(InputError::new("π", ErrorKind::Tag))) + ); + + assert_eq!( + tag::<_, Partial<&[u8]>, InputError<_>>('π').parse_peek(Partial::new(b"\xCF\x80")), + Ok((Partial::new(Default::default()), "π".as_bytes())) + ); + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x80"), + Ok((Default::default(), "π".as_bytes())) + ); + + assert_eq!( + tag::<_, Partial<&[u8]>, InputError<_>>('π').parse_peek(Partial::new(b"\xCF\x803.14")), + Ok((Partial::new(&b"3.14"[..]), "π".as_bytes())) + ); + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x80"), + Ok((Default::default(), "π".as_bytes())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x803.14"), + Ok((&b"3.14"[..], "π".as_bytes())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('a')).parse_peek(b"ABCxyz"), + Ok((&b"BCxyz"[..], &b"A"[..])) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('a').parse_peek(b"ABCxyz"), + Err(Backtrack(InputError::new(&b"ABCxyz"[..], ErrorKind::Tag))) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('π')).parse_peek(b"\xCF\x803.14"), + Ok((&b"3.14"[..], "π".as_bytes())) + ); + + assert_eq!( + tag::<_, _, InputError<_>>(AsciiCaseless('🧑')).parse_peek("🧑你好"), + Ok(("你好", "🧑")) + ); + + let mut buffer = [0; 4]; + let input = '\u{241b}'.encode_utf8(&mut buffer); + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('␛')).parse_peek(input.as_bytes()), + Ok((&b""[..], [226, 144, 155].as_slice())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('-').parse_peek(b"\xCF\x80"), + Err(Backtrack(InputError::new(&b"\xCF\x80"[..], ErrorKind::Tag))) + ); +} diff --git a/vendor/winnow/src/token/mod.rs b/vendor/winnow/src/token/mod.rs new file mode 100644 index 0000000..5855bf7 --- /dev/null +++ b/vendor/winnow/src/token/mod.rs @@ -0,0 +1,1174 @@ +//! Parsers extracting tokens from the stream + +#[cfg(test)] +mod tests; + +use crate::error::ErrMode; +use crate::error::ErrorKind; +use crate::error::Needed; +use crate::error::ParserError; +use crate::lib::std::result::Result::Ok; +use crate::stream::Range; +use crate::stream::{Compare, CompareResult, ContainsToken, FindSlice, SliceLen, Stream}; +use crate::stream::{StreamIsPartial, ToUsize}; +use crate::trace::trace; +use crate::PResult; +use crate::Parser; + +/// Matches one token +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{token::any, error::ErrMode, error::{InputError, ErrorKind}}; +/// # use winnow::prelude::*; +/// fn parser(input: &str) -> IResult<&str, char> { +/// any.parse_peek(input) +/// } +/// +/// assert_eq!(parser("abc"), Ok(("bc",'a'))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token)))); +/// ``` +/// +/// ```rust +/// # use winnow::{token::any, error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// assert_eq!(any::<_, InputError<_>>.parse_peek(Partial::new("abc")), Ok((Partial::new("bc"),'a'))); +/// assert_eq!(any::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +#[doc(alias = "token")] +pub fn any<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Token, E> +where + I: StreamIsPartial, + I: Stream, +{ + trace("any", move |input: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() { + any_::<_, _, true>(input) + } else { + any_::<_, _, false>(input) + } + }) + .parse_next(input) +} + +fn any_<I, E: ParserError<I>, const PARTIAL: bool>( + input: &mut I, +) -> PResult<<I as Stream>::Token, E> +where + I: StreamIsPartial, + I: Stream, +{ + input.next_token().ok_or_else(|| { + if PARTIAL && input.is_partial() { + ErrMode::Incomplete(Needed::new(1)) + } else { + ErrMode::from_error_kind(input, ErrorKind::Token) + } + }) +} + +/// Recognizes a literal +/// +/// The input data will be compared to the tag combinator's argument and will return the part of +/// the input that matches the argument +/// +/// It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Tag)))` if the input doesn't match the pattern +/// +/// **Note:** [`Parser`] is implemented for strings and byte strings as a convenience (complete +/// only) +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// "Hello".parse_peek(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("Something"), Err(ErrMode::Backtrack(InputError::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::Partial; +/// use winnow::token::tag; +/// +/// fn parser(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// "Hello".parse_peek(s) +/// } +/// +/// assert_eq!(parser(Partial::new("Hello, World!")), Ok((Partial::new(", World!"), "Hello"))); +/// assert_eq!(parser(Partial::new("Something")), Err(ErrMode::Backtrack(InputError::new(Partial::new("Something"), ErrorKind::Tag)))); +/// assert_eq!(parser(Partial::new("S")), Err(ErrMode::Backtrack(InputError::new(Partial::new("S"), ErrorKind::Tag)))); +/// assert_eq!(parser(Partial::new("H")), Err(ErrMode::Incomplete(Needed::new(4)))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::tag; +/// use winnow::ascii::Caseless; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// tag(Caseless("hello")).parse_peek(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO"))); +/// assert_eq!(parser("Something"), Err(ErrMode::Backtrack(InputError::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` +#[inline(always)] +#[doc(alias = "literal")] +#[doc(alias = "bytes")] +#[doc(alias = "just")] +pub fn tag<T, I, Error: ParserError<I>>(tag: T) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + Compare<T>, + T: SliceLen + Clone, +{ + trace("tag", move |i: &mut I| { + let t = tag.clone(); + if <I as StreamIsPartial>::is_partial_supported() { + tag_::<_, _, _, true>(i, t) + } else { + tag_::<_, _, _, false>(i, t) + } + }) +} + +fn tag_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + i: &mut I, + t: T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + Compare<T>, + T: SliceLen, +{ + let tag_len = t.slice_len(); + match i.compare(t) { + CompareResult::Ok => Ok(i.next_slice(tag_len)), + CompareResult::Incomplete if PARTIAL && i.is_partial() => { + Err(ErrMode::Incomplete(Needed::new(tag_len - i.eof_offset()))) + } + CompareResult::Incomplete | CompareResult::Error => { + let e: ErrorKind = ErrorKind::Tag; + Err(ErrMode::from_error_kind(i, e)) + } + } +} + +/// Recognizes a case insensitive literal. +/// +/// The input data will be compared to the tag combinator's argument and will return the part of +/// the input that matches the argument with no regard to case. +/// +/// It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Tag)))` if the input doesn't match the pattern. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::tag_no_case; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// tag_no_case("hello").parse_peek(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO"))); +/// assert_eq!(parser("Something"), Err(ErrMode::Backtrack(InputError::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::tag_no_case; +/// +/// fn parser(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// tag_no_case("hello").parse_peek(s) +/// } +/// +/// assert_eq!(parser(Partial::new("Hello, World!")), Ok((Partial::new(", World!"), "Hello"))); +/// assert_eq!(parser(Partial::new("hello, World!")), Ok((Partial::new(", World!"), "hello"))); +/// assert_eq!(parser(Partial::new("HeLlO, World!")), Ok((Partial::new(", World!"), "HeLlO"))); +/// assert_eq!(parser(Partial::new("Something")), Err(ErrMode::Backtrack(InputError::new(Partial::new("Something"), ErrorKind::Tag)))); +/// assert_eq!(parser(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(5)))); +/// ``` +#[inline(always)] +#[doc(alias = "literal")] +#[doc(alias = "bytes")] +#[doc(alias = "just")] +#[deprecated(since = "0.5.20", note = "Replaced with `tag(ascii::Caseless(_))`")] +pub fn tag_no_case<T, I, Error: ParserError<I>>( + tag: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + Compare<T>, + T: SliceLen + Clone, +{ + trace("tag_no_case", move |i: &mut I| { + let t = tag.clone(); + if <I as StreamIsPartial>::is_partial_supported() { + tag_no_case_::<_, _, _, true>(i, t) + } else { + tag_no_case_::<_, _, _, false>(i, t) + } + }) +} + +#[allow(deprecated)] +fn tag_no_case_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + i: &mut I, + t: T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + Compare<T>, + T: SliceLen, +{ + let tag_len = t.slice_len(); + + match i.compare_no_case(t) { + CompareResult::Ok => Ok(i.next_slice(tag_len)), + CompareResult::Incomplete if PARTIAL && i.is_partial() => { + Err(ErrMode::Incomplete(Needed::new(tag_len - i.eof_offset()))) + } + CompareResult::Incomplete | CompareResult::Error => { + let e: ErrorKind = ErrorKind::Tag; + Err(ErrMode::from_error_kind(i, e)) + } + } +} + +/// Recognize a token that matches the [pattern][ContainsToken] +/// +/// **Note:** [`Parser`] is implemented as a convenience (complete +/// only) for +/// - `u8` +/// - `char` +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::token::one_of; +/// assert_eq!(one_of::<_, _, InputError<_>>(['a', 'b', 'c']).parse_peek("b"), Ok(("", 'b'))); +/// assert_eq!(one_of::<_, _, InputError<_>>('a').parse_peek("bc"), Err(ErrMode::Backtrack(InputError::new("bc", ErrorKind::Verify)))); +/// assert_eq!(one_of::<_, _, InputError<_>>('a').parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token)))); +/// +/// fn parser_fn(i: &str) -> IResult<&str, char> { +/// one_of(|c| c == 'a' || c == 'b').parse_peek(i) +/// } +/// assert_eq!(parser_fn("abc"), Ok(("bc", 'a'))); +/// assert_eq!(parser_fn("cd"), Err(ErrMode::Backtrack(InputError::new("cd", ErrorKind::Verify)))); +/// assert_eq!(parser_fn(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token)))); +/// ``` +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// # use winnow::token::one_of; +/// assert_eq!(one_of::<_, _, InputError<_>>(['a', 'b', 'c']).parse_peek(Partial::new("b")), Ok((Partial::new(""), 'b'))); +/// assert_eq!(one_of::<_, _, InputError<_>>('a').parse_peek(Partial::new("bc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("bc"), ErrorKind::Verify)))); +/// assert_eq!(one_of::<_, _, InputError<_>>('a').parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// +/// fn parser_fn(i: Partial<&str>) -> IResult<Partial<&str>, char> { +/// one_of(|c| c == 'a' || c == 'b').parse_peek(i) +/// } +/// assert_eq!(parser_fn(Partial::new("abc")), Ok((Partial::new("bc"), 'a'))); +/// assert_eq!(parser_fn(Partial::new("cd")), Err(ErrMode::Backtrack(InputError::new(Partial::new("cd"), ErrorKind::Verify)))); +/// assert_eq!(parser_fn(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +#[doc(alias = "char")] +#[doc(alias = "token")] +#[doc(alias = "satisfy")] +pub fn one_of<I, T, Error: ParserError<I>>(list: T) -> impl Parser<I, <I as Stream>::Token, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: Clone, + T: ContainsToken<<I as Stream>::Token>, +{ + trace( + "one_of", + any.verify(move |t: &<I as Stream>::Token| list.contains_token(t.clone())), + ) +} + +/// Recognize a token that does not match the [pattern][ContainsToken] +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// +/// *Partial version*: Will return `Err(winnow::error::ErrMode::Incomplete(_))` if there's not enough input data. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; +/// # use winnow::prelude::*; +/// # use winnow::token::none_of; +/// assert_eq!(none_of::<_, _, InputError<_>>(['a', 'b', 'c']).parse_peek("z"), Ok(("", 'z'))); +/// assert_eq!(none_of::<_, _, InputError<_>>(['a', 'b']).parse_peek("a"), Err(ErrMode::Backtrack(InputError::new("a", ErrorKind::Verify)))); +/// assert_eq!(none_of::<_, _, InputError<_>>('a').parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token)))); +/// ``` +/// +/// ``` +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// # use winnow::token::none_of; +/// assert_eq!(none_of::<_, _, InputError<_>>(['a', 'b', 'c']).parse_peek(Partial::new("z")), Ok((Partial::new(""), 'z'))); +/// assert_eq!(none_of::<_, _, InputError<_>>(['a', 'b']).parse_peek(Partial::new("a")), Err(ErrMode::Backtrack(InputError::new(Partial::new("a"), ErrorKind::Verify)))); +/// assert_eq!(none_of::<_, _, InputError<_>>('a').parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +pub fn none_of<I, T, Error: ParserError<I>>(list: T) -> impl Parser<I, <I as Stream>::Token, Error> +where + I: StreamIsPartial, + I: Stream, + <I as Stream>::Token: Clone, + T: ContainsToken<<I as Stream>::Token>, +{ + trace( + "none_of", + any.verify(move |t: &<I as Stream>::Token| !list.contains_token(t.clone())), + ) +} + +/// Recognize the longest (m <= len <= n) input slice that matches the [pattern][ContainsToken] +/// +/// It will return an `ErrMode::Backtrack(InputError::new(_, ErrorKind::Slice))` if the pattern wasn't met or is out +/// of range (m <= len <= n). +/// +/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the pattern reaches the end of the input or is too short. +/// +/// To recognize a series of tokens, use [`repeat`][crate::combinator::repeat] to [`Accumulate`][crate::stream::Accumulate] into a `()` and then [`Parser::recognize`]. +/// +/// # Example +/// +/// Zero or more tokens: +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_while; +/// use winnow::stream::AsChar; +/// +/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while(0.., AsChar::is_alpha).parse_peek(s) +/// } +/// +/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..]))); +/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(alpha(b""), Ok((&b""[..], &b""[..]))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_while; +/// use winnow::stream::AsChar; +/// +/// fn alpha(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { +/// take_while(0.., AsChar::is_alpha).parse_peek(s) +/// } +/// +/// assert_eq!(alpha(Partial::new(b"latin123")), Ok((Partial::new(&b"123"[..]), &b"latin"[..]))); +/// assert_eq!(alpha(Partial::new(b"12345")), Ok((Partial::new(&b"12345"[..]), &b""[..]))); +/// assert_eq!(alpha(Partial::new(b"latin")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(alpha(Partial::new(b"")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +/// +/// One or more tokens: +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_while; +/// use winnow::stream::AsChar; +/// +/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while(1.., AsChar::is_alpha).parse_peek(s) +/// } +/// +/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"12345"), Err(ErrMode::Backtrack(InputError::new(&b"12345"[..], ErrorKind::Slice)))); +/// +/// fn hex(s: &str) -> IResult<&str, &str> { +/// take_while(1.., ('0'..='9', 'A'..='F')).parse_peek(s) +/// } +/// +/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123"))); +/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF"))); +/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE"))); +/// assert_eq!(hex("D15EA5E"), Ok(("", "D15EA5E"))); +/// assert_eq!(hex(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_while; +/// use winnow::stream::AsChar; +/// +/// fn alpha(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { +/// take_while(1.., AsChar::is_alpha).parse_peek(s) +/// } +/// +/// assert_eq!(alpha(Partial::new(b"latin123")), Ok((Partial::new(&b"123"[..]), &b"latin"[..]))); +/// assert_eq!(alpha(Partial::new(b"latin")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(alpha(Partial::new(b"12345")), Err(ErrMode::Backtrack(InputError::new(Partial::new(&b"12345"[..]), ErrorKind::Slice)))); +/// +/// fn hex(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_while(1.., ('0'..='9', 'A'..='F')).parse_peek(s) +/// } +/// +/// assert_eq!(hex(Partial::new("123 and voila")), Ok((Partial::new(" and voila"), "123"))); +/// assert_eq!(hex(Partial::new("DEADBEEF and others")), Ok((Partial::new(" and others"), "DEADBEEF"))); +/// assert_eq!(hex(Partial::new("BADBABEsomething")), Ok((Partial::new("something"), "BADBABE"))); +/// assert_eq!(hex(Partial::new("D15EA5E")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(hex(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +/// +/// Arbitrary amount of tokens: +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_while; +/// use winnow::stream::AsChar; +/// +/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while(3..=6, AsChar::is_alpha).parse_peek(s) +/// } +/// +/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..]))); +/// assert_eq!(short_alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(short_alpha(b"ed"), Err(ErrMode::Backtrack(InputError::new(&b"ed"[..], ErrorKind::Slice)))); +/// assert_eq!(short_alpha(b"12345"), Err(ErrMode::Backtrack(InputError::new(&b"12345"[..], ErrorKind::Slice)))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_while; +/// use winnow::stream::AsChar; +/// +/// fn short_alpha(s: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { +/// take_while(3..=6, AsChar::is_alpha).parse_peek(s) +/// } +/// +/// assert_eq!(short_alpha(Partial::new(b"latin123")), Ok((Partial::new(&b"123"[..]), &b"latin"[..]))); +/// assert_eq!(short_alpha(Partial::new(b"lengthy")), Ok((Partial::new(&b"y"[..]), &b"length"[..]))); +/// assert_eq!(short_alpha(Partial::new(b"latin")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(short_alpha(Partial::new(b"ed")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(short_alpha(Partial::new(b"12345")), Err(ErrMode::Backtrack(InputError::new(Partial::new(&b"12345"[..]), ErrorKind::Slice)))); +/// ``` +#[inline(always)] +#[doc(alias = "is_a")] +#[doc(alias = "take_while0")] +#[doc(alias = "take_while1")] +pub fn take_while<T, I, Error: ParserError<I>>( + range: impl Into<Range>, + list: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("take_while", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_while0_::<_, _, _, true>(i, &list) + } else { + take_while0_::<_, _, _, false>(i, &list) + } + } + (1, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_while1_::<_, _, _, true>(i, &list) + } else { + take_while1_::<_, _, _, false>(i, &list) + } + } + (start, end) => { + let end = end.unwrap_or(usize::MAX); + if <I as StreamIsPartial>::is_partial_supported() { + take_while_m_n_::<_, _, _, true>(i, start, end, &list) + } else { + take_while_m_n_::<_, _, _, false>(i, start, end, &list) + } + } + } + }) +} + +fn take_while0_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + input: &mut I, + list: &T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + if PARTIAL && input.is_partial() { + take_till0_partial(input, |c| !list.contains_token(c)) + } else { + take_till0_complete(input, |c| !list.contains_token(c)) + } +} + +fn take_while1_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + input: &mut I, + list: &T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + if PARTIAL && input.is_partial() { + take_till1_partial(input, |c| !list.contains_token(c)) + } else { + take_till1_complete(input, |c| !list.contains_token(c)) + } +} + +fn take_while_m_n_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + input: &mut I, + m: usize, + n: usize, + list: &T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + take_till_m_n::<_, _, _, PARTIAL>(input, m, n, |c| !list.contains_token(c)) +} + +/// Looks for the first element of the input type for which the condition returns true, +/// and returns the input up to this position. +/// +/// *Partial version*: If no element is found matching the condition, this will return `Incomplete` +fn take_till0_partial<P, I: Stream, E: ParserError<I>>( + input: &mut I, + predicate: P, +) -> PResult<<I as Stream>::Slice, E> +where + P: Fn(I::Token) -> bool, +{ + let offset = input + .offset_for(predicate) + .ok_or_else(|| ErrMode::Incomplete(Needed::new(1)))?; + Ok(input.next_slice(offset)) +} + +/// Looks for the first element of the input type for which the condition returns true +/// and returns the input up to this position. +/// +/// Fails if the produced slice is empty. +/// +/// *Partial version*: If no element is found matching the condition, this will return `Incomplete` +fn take_till1_partial<P, I: Stream, E: ParserError<I>>( + input: &mut I, + predicate: P, +) -> PResult<<I as Stream>::Slice, E> +where + P: Fn(I::Token) -> bool, +{ + let e: ErrorKind = ErrorKind::Slice; + let offset = input + .offset_for(predicate) + .ok_or_else(|| ErrMode::Incomplete(Needed::new(1)))?; + if offset == 0 { + Err(ErrMode::from_error_kind(input, e)) + } else { + Ok(input.next_slice(offset)) + } +} + +/// Looks for the first element of the input type for which the condition returns true, +/// and returns the input up to this position. +/// +/// *Complete version*: If no element is found matching the condition, this will return the whole input +fn take_till0_complete<P, I: Stream, E: ParserError<I>>( + input: &mut I, + predicate: P, +) -> PResult<<I as Stream>::Slice, E> +where + P: Fn(I::Token) -> bool, +{ + let offset = input + .offset_for(predicate) + .unwrap_or_else(|| input.eof_offset()); + Ok(input.next_slice(offset)) +} + +/// Looks for the first element of the input type for which the condition returns true +/// and returns the input up to this position. +/// +/// Fails if the produced slice is empty. +/// +/// *Complete version*: If no element is found matching the condition, this will return the whole input +fn take_till1_complete<P, I: Stream, E: ParserError<I>>( + input: &mut I, + predicate: P, +) -> PResult<<I as Stream>::Slice, E> +where + P: Fn(I::Token) -> bool, +{ + let e: ErrorKind = ErrorKind::Slice; + let offset = input + .offset_for(predicate) + .unwrap_or_else(|| input.eof_offset()); + if offset == 0 { + Err(ErrMode::from_error_kind(input, e)) + } else { + Ok(input.next_slice(offset)) + } +} + +fn take_till_m_n<P, I, Error: ParserError<I>, const PARTIAL: bool>( + input: &mut I, + m: usize, + n: usize, + predicate: P, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + P: Fn(I::Token) -> bool, +{ + if n < m { + return Err(ErrMode::assert(input, "`m` should be <= `n`")); + } + + let mut final_count = 0; + for (processed, (offset, token)) in input.iter_offsets().enumerate() { + if predicate(token) { + if processed < m { + return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); + } else { + return Ok(input.next_slice(offset)); + } + } else { + if processed == n { + return Ok(input.next_slice(offset)); + } + final_count = processed + 1; + } + } + if PARTIAL && input.is_partial() { + if final_count == n { + Ok(input.finish()) + } else { + let needed = if m > input.eof_offset() { + m - input.eof_offset() + } else { + 1 + }; + Err(ErrMode::Incomplete(Needed::new(needed))) + } + } else { + if m <= final_count { + Ok(input.finish()) + } else { + Err(ErrMode::from_error_kind(input, ErrorKind::Slice)) + } + } +} + +/// Recognize the longest input slice (if any) till a [pattern][ContainsToken] is met. +/// +/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the match reaches the +/// end of input or if there was not match. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_till; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till(0.., |c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed +/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); +/// assert_eq!(till_colon(""), Ok(("", ""))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_till; +/// +/// fn till_colon(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_till(0.., |c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon(Partial::new("latin:123")), Ok((Partial::new(":123"), "latin"))); +/// assert_eq!(till_colon(Partial::new(":empty matched")), Ok((Partial::new(":empty matched"), ""))); //allowed +/// assert_eq!(till_colon(Partial::new("12345")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(till_colon(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +#[doc(alias = "is_not")] +pub fn take_till<T, I, Error: ParserError<I>>( + range: impl Into<Range>, + list: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("take_till", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_till0_partial(i, |c| list.contains_token(c)) + } else { + take_till0_complete(i, |c| list.contains_token(c)) + } + } + (1, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_till1_partial(i, |c| list.contains_token(c)) + } else { + take_till1_complete(i, |c| list.contains_token(c)) + } + } + (start, end) => { + let end = end.unwrap_or(usize::MAX); + if <I as StreamIsPartial>::is_partial_supported() { + take_till_m_n::<_, _, _, true>(i, start, end, |c| list.contains_token(c)) + } else { + take_till_m_n::<_, _, _, false>(i, start, end, |c| list.contains_token(c)) + } + } + } + }) +} + +/// Recognize the longest input slice (if any) till a [pattern][ContainsToken] is met. +/// +/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the match reaches the +/// end of input or if there was not match. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_till0; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till0(|c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed +/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); +/// assert_eq!(till_colon(""), Ok(("", ""))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_till0; +/// +/// fn till_colon(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_till0(|c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon(Partial::new("latin:123")), Ok((Partial::new(":123"), "latin"))); +/// assert_eq!(till_colon(Partial::new(":empty matched")), Ok((Partial::new(":empty matched"), ""))); //allowed +/// assert_eq!(till_colon(Partial::new("12345")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(till_colon(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[deprecated(since = "0.5.21", note = "Replaced with `take_till(0.., ...)`")] +#[inline(always)] +pub fn take_till0<T, I, Error: ParserError<I>>( + list: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + trace("take_till0", move |i: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() && i.is_partial() { + take_till0_partial(i, |c| list.contains_token(c)) + } else { + take_till0_complete(i, |c| list.contains_token(c)) + } + }) +} + +/// Recognize the longest (at least 1) input slice till a [pattern][ContainsToken] is met. +/// +/// It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Slice)))` if the input is empty or the +/// predicate matches the first input. +/// +/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the match reaches the +/// end of input or if there was not match. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_till1; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till1(|c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Err(ErrMode::Backtrack(InputError::new(":empty matched", ErrorKind::Slice)))); +/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); +/// assert_eq!(till_colon(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// +/// fn not_space(s: &str) -> IResult<&str, &str> { +/// take_till1([' ', '\t', '\r', '\n']).parse_peek(s) +/// } +/// +/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,"))); +/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes"))); +/// assert_eq!(not_space("Nospace"), Ok(("", "Nospace"))); +/// assert_eq!(not_space(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_till1; +/// +/// fn till_colon(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_till1(|c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon(Partial::new("latin:123")), Ok((Partial::new(":123"), "latin"))); +/// assert_eq!(till_colon(Partial::new(":empty matched")), Err(ErrMode::Backtrack(InputError::new(Partial::new(":empty matched"), ErrorKind::Slice)))); +/// assert_eq!(till_colon(Partial::new("12345")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(till_colon(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// +/// fn not_space(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_till1([' ', '\t', '\r', '\n']).parse_peek(s) +/// } +/// +/// assert_eq!(not_space(Partial::new("Hello, World!")), Ok((Partial::new(" World!"), "Hello,"))); +/// assert_eq!(not_space(Partial::new("Sometimes\t")), Ok((Partial::new("\t"), "Sometimes"))); +/// assert_eq!(not_space(Partial::new("Nospace")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(not_space(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +#[deprecated(since = "0.5.21", note = "Replaced with `take_till(1.., ...)`")] +pub fn take_till1<T, I, Error: ParserError<I>>( + list: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + trace("take_till1", move |i: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() && i.is_partial() { + take_till1_partial(i, |c| list.contains_token(c)) + } else { + take_till1_complete(i, |c| list.contains_token(c)) + } + }) +} + +/// Recognize an input slice containing the first N input elements (I[..N]). +/// +/// *Complete version*: It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Slice)))` if the input is shorter than the argument. +/// +/// *Partial version*: if the input has less than N elements, `take` will +/// return a `ErrMode::Incomplete(Needed::new(M))` where M is the number of +/// additional bytes the parser would need to succeed. +/// It is well defined for `&[u8]` as the number of elements is the byte size, +/// but for types like `&str`, we cannot know how many bytes correspond for +/// the next few chars, so the result will be `ErrMode::Incomplete(Needed::Unknown)` +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take; +/// +/// fn take6(s: &str) -> IResult<&str, &str> { +/// take(6usize).parse_peek(s) +/// } +/// +/// assert_eq!(take6("1234567"), Ok(("7", "123456"))); +/// assert_eq!(take6("things"), Ok(("", "things"))); +/// assert_eq!(take6("short"), Err(ErrMode::Backtrack(InputError::new("short", ErrorKind::Slice)))); +/// assert_eq!(take6(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +/// +/// The units that are taken will depend on the input type. For example, for a +/// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will +/// take that many `u8`'s: +/// +/// ```rust +/// # use winnow::prelude::*; +/// use winnow::error::InputError; +/// use winnow::token::take; +/// +/// assert_eq!(take::<_, _, InputError<_>>(1usize).parse_peek("💙"), Ok(("", "💙"))); +/// assert_eq!(take::<_, _, InputError<_>>(1usize).parse_peek("💙".as_bytes()), Ok((b"\x9F\x92\x99".as_ref(), b"\xF0".as_ref()))); +/// ``` +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::Partial; +/// use winnow::token::take; +/// +/// fn take6(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take(6usize).parse_peek(s) +/// } +/// +/// assert_eq!(take6(Partial::new("1234567")), Ok((Partial::new("7"), "123456"))); +/// assert_eq!(take6(Partial::new("things")), Ok((Partial::new(""), "things"))); +/// // `Unknown` as we don't know the number of bytes that `count` corresponds to +/// assert_eq!(take6(Partial::new("short")), Err(ErrMode::Incomplete(Needed::Unknown))); +/// ``` +#[inline(always)] +pub fn take<C, I, Error: ParserError<I>>(count: C) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + C: ToUsize, +{ + let c = count.to_usize(); + trace("take", move |i: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() { + take_::<_, _, true>(i, c) + } else { + take_::<_, _, false>(i, c) + } + }) +} + +fn take_<I, Error: ParserError<I>, const PARTIAL: bool>( + i: &mut I, + c: usize, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, +{ + match i.offset_at(c) { + Ok(offset) => Ok(i.next_slice(offset)), + Err(e) if PARTIAL && i.is_partial() => Err(ErrMode::Incomplete(e)), + Err(_needed) => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), + } +} + +/// Recognize the input slice up to the first occurrence of the literal. +/// +/// It doesn't consume the pattern. +/// +/// *Complete version*: It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Slice)))` +/// if the pattern wasn't met. +/// +/// *Partial version*: will return a `ErrMode::Incomplete(Needed::new(N))` if the input doesn't +/// contain the pattern or if the input is smaller than the pattern. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_until0; +/// +/// fn until_eof(s: &str) -> IResult<&str, &str> { +/// take_until0("eof").parse_peek(s) +/// } +/// +/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); +/// assert_eq!(until_eof("hello, world"), Err(ErrMode::Backtrack(InputError::new("hello, world", ErrorKind::Slice)))); +/// assert_eq!(until_eof(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_until0; +/// +/// fn until_eof(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_until0("eof").parse_peek(s) +/// } +/// +/// assert_eq!(until_eof(Partial::new("hello, worldeof")), Ok((Partial::new("eof"), "hello, world"))); +/// assert_eq!(until_eof(Partial::new("hello, world")), Err(ErrMode::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof(Partial::new("hello, worldeo")), Err(ErrMode::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof(Partial::new("1eof2eof")), Ok((Partial::new("eof2eof"), "1"))); +/// ``` +#[inline(always)] +pub fn take_until0<T, I, Error: ParserError<I>>( + tag: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen + Clone, +{ + trace("take_until0", move |i: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() { + take_until0_::<_, _, _, true>(i, tag.clone()) + } else { + take_until0_::<_, _, _, false>(i, tag.clone()) + } + }) +} + +fn take_until0_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + i: &mut I, + t: T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen, +{ + match i.find_slice(t) { + Some(offset) => Ok(i.next_slice(offset)), + None if PARTIAL && i.is_partial() => Err(ErrMode::Incomplete(Needed::Unknown)), + None => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), + } +} + +/// Recognize the non empty input slice up to the first occurrence of the literal. +/// +/// It doesn't consume the pattern. +/// +/// *Complete version*: It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Slice)))` +/// if the pattern wasn't met. +/// +/// *Partial version*: will return a `ErrMode::Incomplete(Needed::new(N))` if the input doesn't +/// contain the pattern or if the input is smaller than the pattern. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::take_until1; +/// +/// fn until_eof(s: &str) -> IResult<&str, &str> { +/// take_until1("eof").parse_peek(s) +/// } +/// +/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); +/// assert_eq!(until_eof("hello, world"), Err(ErrMode::Backtrack(InputError::new("hello, world", ErrorKind::Slice)))); +/// assert_eq!(until_eof(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); +/// assert_eq!(until_eof("eof"), Err(ErrMode::Backtrack(InputError::new("eof", ErrorKind::Slice)))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_until1; +/// +/// fn until_eof(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_until1("eof").parse_peek(s) +/// } +/// +/// assert_eq!(until_eof(Partial::new("hello, worldeof")), Ok((Partial::new("eof"), "hello, world"))); +/// assert_eq!(until_eof(Partial::new("hello, world")), Err(ErrMode::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof(Partial::new("hello, worldeo")), Err(ErrMode::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof(Partial::new("1eof2eof")), Ok((Partial::new("eof2eof"), "1"))); +/// assert_eq!(until_eof(Partial::new("eof")), Err(ErrMode::Backtrack(InputError::new(Partial::new("eof"), ErrorKind::Slice)))); +/// ``` +#[inline(always)] +pub fn take_until1<T, I, Error: ParserError<I>>( + tag: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen + Clone, +{ + trace("take_until1", move |i: &mut I| { + if <I as StreamIsPartial>::is_partial_supported() { + take_until1_::<_, _, _, true>(i, tag.clone()) + } else { + take_until1_::<_, _, _, false>(i, tag.clone()) + } + }) +} + +fn take_until1_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + i: &mut I, + t: T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen, +{ + match i.find_slice(t) { + None if PARTIAL && i.is_partial() => Err(ErrMode::Incomplete(Needed::Unknown)), + None | Some(0) => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), + Some(offset) => Ok(i.next_slice(offset)), + } +} diff --git a/vendor/winnow/src/bytes/tests.rs b/vendor/winnow/src/token/tests.rs similarity index 80% rename from vendor/winnow/src/bytes/tests.rs rename to vendor/winnow/src/token/tests.rs index c2a1cee..9c39378 100644 --- a/vendor/winnow/src/bytes/tests.rs +++ b/vendor/winnow/src/token/tests.rs @@ -3,14 +3,15 @@ use super::*; #[cfg(feature = "std")] use proptest::prelude::*; -use crate::bytes::tag; +use crate::ascii::Caseless; +use crate::combinator::delimited; use crate::error::ErrMode; -use crate::error::Error; use crate::error::ErrorKind; +use crate::error::InputError; use crate::error::Needed; -use crate::multi::length_data; -use crate::sequence::delimited; use crate::stream::AsChar; +use crate::token::tag; +use crate::unpeek; use crate::IResult; use crate::Parser; use crate::Partial; @@ -18,14 +19,13 @@ use crate::Partial; #[test] fn complete_take_while_m_n_utf8_all_matching() { let result: IResult<&str, &str> = - take_while_m_n(1, 4, |c: char| c.is_alphabetic()).parse_next("øn"); + take_while(1..=4, |c: char| c.is_alphabetic()).parse_peek("øn"); assert_eq!(result, Ok(("", "øn"))); } #[test] fn complete_take_while_m_n_utf8_all_matching_substring() { - let result: IResult<&str, &str> = - take_while_m_n(1, 1, |c: char| c.is_alphabetic()).parse_next("øn"); + let result: IResult<&str, &str> = take_while(1, |c: char| c.is_alphabetic()).parse_peek("øn"); assert_eq!(result, Ok(("n", "ø"))); } @@ -38,7 +38,7 @@ fn model_complete_take_while_m_n( ) -> IResult<&str, &str> { if n < m { Err(crate::error::ErrMode::from_error_kind( - input, + &input, crate::error::ErrorKind::Slice, )) } else if m <= valid { @@ -46,7 +46,7 @@ fn model_complete_take_while_m_n( Ok((&input[offset..], &input[0..offset])) } else { Err(crate::error::ErrMode::from_error_kind( - input, + &input, crate::error::ErrorKind::Slice, )) } @@ -60,7 +60,7 @@ proptest! { let input = format!("{:a<valid$}{:b<invalid$}", "", "", valid=valid, invalid=invalid); let expected = model_complete_take_while_m_n(m, n, valid, &input); if m <= n { - let actual = take_while_m_n(m, n, |c: char| c == 'a').parse_next(input.as_str()); + let actual = take_while(m..=n, |c: char| c == 'a').parse_peek(input.as_str()); assert_eq!(expected, actual); } } @@ -70,7 +70,7 @@ proptest! { fn partial_any_str() { use super::any; assert_eq!( - any::<_, Error<Partial<&str>>>(Partial::new("Ә")), + any::<_, InputError<Partial<&str>>>.parse_peek(Partial::new("Ә")), Ok((Partial::new(""), 'Ә')) ); } @@ -78,7 +78,7 @@ fn partial_any_str() { #[test] fn partial_one_of_test() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u8> { - one_of("ab").parse_next(i) + one_of(['a', 'b']).parse_peek(i) } let a = &b"abcd"[..]; @@ -88,13 +88,13 @@ fn partial_one_of_test() { assert_eq!( f(Partial::new(b)), Err(ErrMode::Backtrack(error_position!( - Partial::new(b), + &Partial::new(b), ErrorKind::Verify ))) ); fn utf8(i: Partial<&str>) -> IResult<Partial<&str>, char> { - one_of("+\u{FF0B}").parse_next(i) + one_of(['+', '\u{FF0B}']).parse_peek(i) } assert!(utf8(Partial::new("+")).is_ok()); @@ -104,14 +104,14 @@ fn partial_one_of_test() { #[test] fn char_byteslice() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u8> { - 'c'.parse_next(i) + 'c'.parse_peek(i) } let a = &b"abcd"[..]; assert_eq!( f(Partial::new(a)), Err(ErrMode::Backtrack(error_position!( - Partial::new(a), + &Partial::new(a), ErrorKind::Verify ))) ); @@ -123,14 +123,14 @@ fn char_byteslice() { #[test] fn char_str() { fn f(i: Partial<&str>) -> IResult<Partial<&str>, char> { - 'c'.parse_next(i) + 'c'.parse_peek(i) } let a = "abcd"; assert_eq!( f(Partial::new(a)), Err(ErrMode::Backtrack(error_position!( - Partial::new(a), + &Partial::new(a), ErrorKind::Verify ))) ); @@ -142,14 +142,14 @@ fn char_str() { #[test] fn partial_none_of_test() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u8> { - none_of("ab").parse_next(i) + none_of(['a', 'b']).parse_peek(i) } let a = &b"abcd"[..]; assert_eq!( f(Partial::new(a)), Err(ErrMode::Backtrack(error_position!( - Partial::new(a), + &Partial::new(a), ErrorKind::Verify ))) ); @@ -161,7 +161,7 @@ fn partial_none_of_test() { #[test] fn partial_is_a() { fn a_or_b(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_while1("ab").parse_next(i) + take_while(1.., ['a', 'b']).parse_peek(i) } let a = Partial::new(&b"abcd"[..]); @@ -173,7 +173,7 @@ fn partial_is_a() { let c = Partial::new(&b"cdef"[..]); assert_eq!( a_or_b(c), - Err(ErrMode::Backtrack(error_position!(c, ErrorKind::Slice))) + Err(ErrMode::Backtrack(error_position!(&c, ErrorKind::Slice))) ); let d = Partial::new(&b"bacdef"[..]); @@ -183,7 +183,7 @@ fn partial_is_a() { #[test] fn partial_is_not() { fn a_or_b(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_till1("ab").parse_next(i) + take_till(1.., ['a', 'b']).parse_peek(i) } let a = Partial::new(&b"cdab"[..]); @@ -195,7 +195,7 @@ fn partial_is_not() { let c = Partial::new(&b"abab"[..]); assert_eq!( a_or_b(c), - Err(ErrMode::Backtrack(error_position!(c, ErrorKind::Slice))) + Err(ErrMode::Backtrack(error_position!(&c, ErrorKind::Slice))) ); let d = Partial::new(&b"cdefba"[..]); @@ -208,7 +208,7 @@ fn partial_is_not() { #[test] fn partial_take_until_incomplete() { fn y(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_until0("end").parse_next(i) + take_until0("end").parse_peek(i) } assert_eq!( y(Partial::new(&b"nd"[..])), @@ -227,7 +227,7 @@ fn partial_take_until_incomplete() { #[test] fn partial_take_until_incomplete_s() { fn ys(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_until0("end").parse_next(i) + take_until0("end").parse_peek(i) } assert_eq!( ys(Partial::new("123en")), @@ -237,7 +237,7 @@ fn partial_take_until_incomplete_s() { #[test] fn partial_recognize() { - use crate::character::{ + use crate::ascii::{ alpha1 as alpha, alphanumeric1 as alphanumeric, digit1 as digit, hex_digit1 as hex_digit, multispace1 as multispace, oct_digit1 as oct_digit, space1 as space, }; @@ -245,7 +245,7 @@ fn partial_recognize() { fn x(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { delimited("<!--", take(5_usize), "-->") .recognize() - .parse_next(i) + .parse_peek(i) } let r = x(Partial::new(&b"<!-- abc --> aaa"[..])); assert_eq!(r, Ok((Partial::new(&b" aaa"[..]), &b"<!-- abc -->"[..]))); @@ -253,43 +253,43 @@ fn partial_recognize() { let semicolon = &b";"[..]; fn ya(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - alpha.recognize().parse_next(i) + alpha.recognize().parse_peek(i) } let ra = ya(Partial::new(&b"abc;"[..])); assert_eq!(ra, Ok((Partial::new(semicolon), &b"abc"[..]))); fn yd(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - digit.recognize().parse_next(i) + digit.recognize().parse_peek(i) } let rd = yd(Partial::new(&b"123;"[..])); assert_eq!(rd, Ok((Partial::new(semicolon), &b"123"[..]))); fn yhd(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - hex_digit.recognize().parse_next(i) + hex_digit.recognize().parse_peek(i) } let rhd = yhd(Partial::new(&b"123abcDEF;"[..])); assert_eq!(rhd, Ok((Partial::new(semicolon), &b"123abcDEF"[..]))); fn yod(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - oct_digit.recognize().parse_next(i) + oct_digit.recognize().parse_peek(i) } let rod = yod(Partial::new(&b"1234567;"[..])); assert_eq!(rod, Ok((Partial::new(semicolon), &b"1234567"[..]))); fn yan(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - alphanumeric.recognize().parse_next(i) + alphanumeric.recognize().parse_peek(i) } let ran = yan(Partial::new(&b"123abc;"[..])); assert_eq!(ran, Ok((Partial::new(semicolon), &b"123abc"[..]))); fn ys(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - space.recognize().parse_next(i) + space.recognize().parse_peek(i) } let rs = ys(Partial::new(&b" \t;"[..])); assert_eq!(rs, Ok((Partial::new(semicolon), &b" \t"[..]))); fn yms(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - multispace.recognize().parse_next(i) + multispace.recognize().parse_peek(i) } let rms = yms(Partial::new(&b" \t\r\n;"[..])); assert_eq!(rms, Ok((Partial::new(semicolon), &b" \t\r\n"[..]))); @@ -298,7 +298,7 @@ fn partial_recognize() { #[test] fn partial_take_while0() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_while0(AsChar::is_alpha).parse_next(i) + take_while(0.., AsChar::is_alpha).parse_peek(i) } let a = &b""[..]; let b = &b"abcd"[..]; @@ -314,7 +314,7 @@ fn partial_take_while0() { #[test] fn partial_take_while1() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_while1(AsChar::is_alpha).parse_next(i) + take_while(1.., AsChar::is_alpha).parse_peek(i) } let a = &b""[..]; let b = &b"abcd"[..]; @@ -327,7 +327,7 @@ fn partial_take_while1() { assert_eq!( f(Partial::new(d)), Err(ErrMode::Backtrack(error_position!( - Partial::new(d), + &Partial::new(d), ErrorKind::Slice ))) ); @@ -336,7 +336,7 @@ fn partial_take_while1() { #[test] fn partial_take_while_m_n() { fn x(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_while_m_n(2, 4, AsChar::is_alpha).parse_next(i) + take_while(2..=4, AsChar::is_alpha).parse_peek(i) } let a = &b""[..]; let b = &b"a"[..]; @@ -356,7 +356,7 @@ fn partial_take_while_m_n() { assert_eq!( x(Partial::new(f)), Err(ErrMode::Backtrack(error_position!( - Partial::new(f), + &Partial::new(f), ErrorKind::Slice ))) ); @@ -365,7 +365,7 @@ fn partial_take_while_m_n() { #[test] fn partial_take_till0() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_till0(AsChar::is_alpha).parse_next(i) + take_till(0.., AsChar::is_alpha).parse_peek(i) } let a = &b""[..]; let b = &b"abcd"[..]; @@ -387,7 +387,7 @@ fn partial_take_till0() { #[test] fn partial_take_till1() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_till1(AsChar::is_alpha).parse_next(i) + take_till(1.., AsChar::is_alpha).parse_peek(i) } let a = &b""[..]; let b = &b"abcd"[..]; @@ -398,7 +398,7 @@ fn partial_take_till1() { assert_eq!( f(Partial::new(b)), Err(ErrMode::Backtrack(error_position!( - Partial::new(b), + &Partial::new(b), ErrorKind::Slice ))) ); @@ -412,7 +412,7 @@ fn partial_take_till1() { #[test] fn partial_take_while_utf8() { fn f(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_while0(|c| c != '點').parse_next(i) + take_while(0.., |c| c != '點').parse_peek(i) } assert_eq!( @@ -430,7 +430,7 @@ fn partial_take_while_utf8() { ); fn g(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_while0(|c| c == '點').parse_next(i) + take_while(0.., |c| c == '點').parse_peek(i) } assert_eq!( @@ -447,7 +447,7 @@ fn partial_take_while_utf8() { #[test] fn partial_take_till0_utf8() { fn f(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_till0(|c| c == '點').parse_next(i) + take_till(0.., |c| c == '點').parse_peek(i) } assert_eq!( @@ -465,7 +465,7 @@ fn partial_take_till0_utf8() { ); fn g(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_till0(|c| c != '點').parse_next(i) + take_till(0.., |c| c != '點').parse_peek(i) } assert_eq!( @@ -482,7 +482,7 @@ fn partial_take_till0_utf8() { #[test] fn partial_take_utf8() { fn f(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take(3_usize).parse_next(i) + take(3_usize).parse_peek(i) } assert_eq!( @@ -502,7 +502,7 @@ fn partial_take_utf8() { assert_eq!(f(Partial::new("a點b")), Ok((Partial::new(""), "a點b"))); fn g(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_while0(|c| c == '點').parse_next(i) + take_while(0.., |c| c == '點').parse_peek(i) } assert_eq!( @@ -519,7 +519,7 @@ fn partial_take_utf8() { #[test] fn partial_take_while_m_n_utf8_fixed() { fn parser(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_while_m_n(1, 1, |c| c == 'A' || c == '😃').parse_next(i) + take_while(1, |c| c == 'A' || c == '😃').parse_peek(i) } assert_eq!(parser(Partial::new("A!")), Ok((Partial::new("!"), "A"))); assert_eq!(parser(Partial::new("😃!")), Ok((Partial::new("!"), "😃"))); @@ -528,7 +528,7 @@ fn partial_take_while_m_n_utf8_fixed() { #[test] fn partial_take_while_m_n_utf8_range() { fn parser(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_while_m_n(1, 2, |c| c == 'A' || c == '😃').parse_next(i) + take_while(1..=2, |c| c == 'A' || c == '😃').parse_peek(i) } assert_eq!(parser(Partial::new("A!")), Ok((Partial::new("!"), "A"))); assert_eq!(parser(Partial::new("😃!")), Ok((Partial::new("!"), "😃"))); @@ -537,7 +537,7 @@ fn partial_take_while_m_n_utf8_range() { #[test] fn partial_take_while_m_n_utf8_full_match_fixed() { fn parser(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_while_m_n(1, 1, |c: char| c.is_alphabetic()).parse_next(i) + take_while(1, |c: char| c.is_alphabetic()).parse_peek(i) } assert_eq!(parser(Partial::new("øn")), Ok((Partial::new("n"), "ø"))); } @@ -545,7 +545,7 @@ fn partial_take_while_m_n_utf8_full_match_fixed() { #[test] fn partial_take_while_m_n_utf8_full_match_range() { fn parser(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_while_m_n(1, 2, |c: char| c.is_alphabetic()).parse_next(i) + take_while(1..=2, |c: char| c.is_alphabetic()).parse_peek(i) } assert_eq!(parser(Partial::new("øn")), Ok((Partial::new(""), "øn"))); } @@ -554,10 +554,10 @@ fn partial_take_while_m_n_utf8_full_match_range() { #[cfg(feature = "std")] fn partial_recognize_take_while0() { fn x(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_while0(AsChar::is_alphanum).parse_next(i) + take_while(0.., AsChar::is_alphanum).parse_peek(i) } fn y(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - x.recognize().parse_next(i) + unpeek(x).recognize().parse_peek(i) } assert_eq!( x(Partial::new(&b"ab."[..])), @@ -569,57 +569,11 @@ fn partial_recognize_take_while0() { ); } -#[test] -fn partial_length_bytes() { - use crate::number::le_u8; - - fn x(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - length_data(le_u8).parse_next(i) - } - assert_eq!( - x(Partial::new(b"\x02..>>")), - Ok((Partial::new(&b">>"[..]), &b".."[..])) - ); - assert_eq!( - x(Partial::new(b"\x02..")), - Ok((Partial::new(&[][..]), &b".."[..])) - ); - assert_eq!( - x(Partial::new(b"\x02.")), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - x(Partial::new(b"\x02")), - Err(ErrMode::Incomplete(Needed::new(2))) - ); - - fn y(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - let (i, _) = "magic".parse_next(i)?; - length_data(le_u8).parse_next(i) - } - assert_eq!( - y(Partial::new(b"magic\x02..>>")), - Ok((Partial::new(&b">>"[..]), &b".."[..])) - ); - assert_eq!( - y(Partial::new(b"magic\x02..")), - Ok((Partial::new(&[][..]), &b".."[..])) - ); - assert_eq!( - y(Partial::new(b"magic\x02.")), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - y(Partial::new(b"magic\x02")), - Err(ErrMode::Incomplete(Needed::new(2))) - ); -} - #[cfg(feature = "alloc")] #[test] fn partial_case_insensitive() { fn test(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - tag_no_case("ABcd").parse_next(i) + tag(Caseless("ABcd")).parse_peek(i) } assert_eq!( test(Partial::new(&b"aBCdefgh"[..])), @@ -640,20 +594,20 @@ fn partial_case_insensitive() { assert_eq!( test(Partial::new(&b"Hello"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"Hello"[..]), + &Partial::new(&b"Hello"[..]), ErrorKind::Tag ))) ); assert_eq!( test(Partial::new(&b"Hel"[..])), Err(ErrMode::Backtrack(error_position!( - Partial::new(&b"Hel"[..]), + &Partial::new(&b"Hel"[..]), ErrorKind::Tag ))) ); fn test2(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - tag_no_case("ABcd").parse_next(i) + tag(Caseless("ABcd")).parse_peek(i) } assert_eq!( test2(Partial::new("aBCdefgh")), @@ -674,14 +628,14 @@ fn partial_case_insensitive() { assert_eq!( test2(Partial::new("Hello")), Err(ErrMode::Backtrack(error_position!( - Partial::new("Hello"), + &Partial::new("Hello"), ErrorKind::Tag ))) ); assert_eq!( test2(Partial::new("Hel")), Err(ErrMode::Backtrack(error_position!( - Partial::new("Hel"), + &Partial::new("Hel"), ErrorKind::Tag ))) ); @@ -690,10 +644,10 @@ fn partial_case_insensitive() { #[test] fn partial_tag_fixed_size_array() { fn test(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - tag([0x42]).parse_next(i) + tag([0x42]).parse_peek(i) } fn test2(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - tag(&[0x42]).parse_next(i) + tag(&[0x42]).parse_peek(i) } let input = Partial::new(&[0x42, 0x00][..]); assert_eq!(test(input), Ok((Partial::new(&b"\x00"[..]), &b"\x42"[..]))); diff --git a/vendor/winnow/src/trace/internals.rs b/vendor/winnow/src/trace/internals.rs index 2e91f69..c38b11e 100644 --- a/vendor/winnow/src/trace/internals.rs +++ b/vendor/winnow/src/trace/internals.rs @@ -4,6 +4,63 @@ use std::io::Write; use crate::error::ErrMode; use crate::stream::Stream; +use crate::*; + +pub struct Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + parser: P, + name: D, + call_count: usize, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<P, D, I, O, E> Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + #[inline(always)] + pub fn new(parser: P, name: D) -> Self { + Self { + parser, + name, + call_count: 0, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<P, D, I, O, E> Parser<I, O, E> for Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + let depth = Depth::new(); + let original = i.checkpoint(); + start(*depth, &self.name, self.call_count, i); + + let res = self.parser.parse_next(i); + + let consumed = i.offset_from(&original); + let severity = Severity::with_result(&res); + end(*depth, &self.name, self.call_count, consumed, severity); + self.call_count += 1; + + res + } +} pub struct Depth { depth: usize, @@ -74,30 +131,9 @@ pub fn start<I: Stream>( count: usize, input: &I, ) { - let ansi_color = ansi_color(); - let reset = if ansi_color { - anstyle::Reset.render().to_string() - } else { - "".to_owned() - }; - let gutter_style = if ansi_color { - anstyle::Style::new().bold() - } else { - anstyle::Style::new() - } - .render(); - let input_style = if ansi_color { - anstyle::Style::new().underline() - } else { - anstyle::Style::new() - } - .render(); - let eof_style = if ansi_color { - anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Cyan.into())) - } else { - anstyle::Style::new() - } - .render(); + let gutter_style = anstyle::Style::new().bold(); + let input_style = anstyle::Style::new().underline(); + let eof_style = anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Cyan.into())); let (call_width, input_width) = column_widths(); @@ -108,17 +144,13 @@ pub fn start<I: Stream>( }; let call_column = format!("{:depth$}> {name}{count}", ""); - let eof_offset = input.eof_offset(); - let offset = input.offset_at(input_width).unwrap_or(eof_offset); - let (_, slice) = input.next_slice(offset); - // The debug version of `slice` might be wider, either due to rendering one byte as two nibbles or // escaping in strings. - let mut debug_slice = format!("{:#?}", slice); + let mut debug_slice = format!("{:#?}", input.raw()); let (debug_slice, eof) = if let Some(debug_offset) = debug_slice .char_indices() .enumerate() - .find_map(|(pos, (offset, _))| (input_width <= pos).then(|| offset)) + .find_map(|(pos, (offset, _))| (input_width <= pos).then_some(offset)) { debug_slice.truncate(debug_offset); let eof = ""; @@ -132,30 +164,28 @@ pub fn start<I: Stream>( (debug_slice, eof) }; - let writer = std::io::stderr(); + let writer = anstream::stderr(); let mut writer = writer.lock(); - let _ = writeln!(writer, "{call_column:call_width$} {gutter_style}|{reset} {input_style}{debug_slice}{eof_style}{eof}{reset}"); + let _ = writeln!( + writer, + "{call_column:call_width$} {gutter_style}|{gutter_reset} {input_style}{debug_slice}{input_reset}{eof_style}{eof}{eof_reset}", + gutter_style=gutter_style.render(), + gutter_reset=gutter_style.render_reset(), + input_style=input_style.render(), + input_reset=input_style.render_reset(), + eof_style=eof_style.render(), + eof_reset=eof_style.render_reset(), + ); } pub fn end( depth: usize, name: &dyn crate::lib::std::fmt::Display, count: usize, - consumed: Option<usize>, + consumed: usize, severity: Severity, ) { - let ansi_color = ansi_color(); - let reset = if ansi_color { - anstyle::Reset.render().to_string() - } else { - "".to_owned() - }; - let gutter_style = if ansi_color { - anstyle::Style::new().bold() - } else { - anstyle::Style::new() - } - .render(); + let gutter_style = anstyle::Style::new().bold(); let (call_width, _) = column_widths(); @@ -166,105 +196,76 @@ pub fn end( }; let call_column = format!("{:depth$}< {name}{count}", ""); - let (mut status_style, status) = match severity { + let (status_style, status) = match severity { Severity::Success => { - let style = anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Green.into())) - .render(); - let status = format!("+{}", consumed.unwrap_or_default()); + let style = anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Green.into())); + let status = format!("+{}", consumed); (style, status) } Severity::Backtrack => ( - anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Yellow.into())) - .render(), + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Yellow.into())), "backtrack".to_owned(), ), Severity::Cut => ( - anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Red.into())) - .render(), + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), "cut".to_owned(), ), Severity::Incomplete => ( - anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Red.into())) - .render(), + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), "incomplete".to_owned(), ), }; - if !ansi_color { - status_style = anstyle::Style::new().render(); - } - let writer = std::io::stderr(); + let writer = anstream::stderr(); let mut writer = writer.lock(); let _ = writeln!( writer, - "{status_style}{call_column:call_width$}{reset} {gutter_style}|{reset} {status_style}{status}{reset}" + "{status_style}{call_column:call_width$}{status_reset} {gutter_style}|{gutter_reset} {status_style}{status}{status_reset}", + gutter_style=gutter_style.render(), + gutter_reset=gutter_style.render_reset(), + status_style=status_style.render(), + status_reset=status_style.render_reset(), ); } pub fn result(depth: usize, name: &dyn crate::lib::std::fmt::Display, severity: Severity) { - let ansi_color = ansi_color(); - let reset = if ansi_color { - anstyle::Reset.render().to_string() - } else { - "".to_owned() - }; - let gutter_style = if ansi_color { - anstyle::Style::new().bold() - } else { - anstyle::Style::new() - } - .render(); + let gutter_style = anstyle::Style::new().bold(); let (call_width, _) = column_widths(); let call_column = format!("{:depth$}| {name}", ""); - let (mut status_style, status) = match severity { + let (status_style, status) = match severity { Severity::Success => ( - anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Green.into())) - .render(), + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Green.into())), "", ), Severity::Backtrack => ( - anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Yellow.into())) - .render(), + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Yellow.into())), "backtrack", ), Severity::Cut => ( - anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Red.into())) - .render(), + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), "cut", ), Severity::Incomplete => ( - anstyle::Style::new() - .fg_color(Some(anstyle::AnsiColor::Red.into())) - .render(), + anstyle::Style::new().fg_color(Some(anstyle::AnsiColor::Red.into())), "incomplete", ), }; - if !ansi_color { - status_style = anstyle::Style::new().render(); - } - let writer = std::io::stderr(); + let writer = anstream::stderr(); let mut writer = writer.lock(); let _ = writeln!( writer, - "{status_style}{call_column:call_width$}{reset} {gutter_style}|{reset} {status_style}{status}{reset}" + "{status_style}{call_column:call_width$}{status_reset} {gutter_style}|{gutter_reset} {status_style}{status}{status_reset}", + gutter_style=gutter_style.render(), + gutter_reset=gutter_style.render_reset(), + status_style=status_style.render(), + status_reset=status_style.render_reset(), ); } -fn ansi_color() -> bool { - concolor::get(concolor::Stream::Stderr).ansi_color() -} - fn column_widths() -> (usize, usize) { let term_width = term_width(); diff --git a/vendor/winnow/src/trace/mod.rs b/vendor/winnow/src/trace/mod.rs index 59dafba..6fe89ac 100644 --- a/vendor/winnow/src/trace/mod.rs +++ b/vendor/winnow/src/trace/mod.rs @@ -1,11 +1,12 @@ //! Parser execution tracing //! -//! By default, nothing happens and tracing gets compiled away as a no-op. To enable tracing, use +//! By default, nothing happens and tracing gets compiled away as a no-op. To enable tracing, use //! `--features debug`. //! //! # Example //! //!![Trace output from string example](https://raw.githubusercontent.com/winnow-rs/winnow/main/assets/trace.svg "Example output") +#![cfg_attr(feature = "debug", allow(clippy::std_instead_of_core))] #[cfg(feature = "debug")] mod internals; @@ -19,61 +20,41 @@ compile_error!("`debug` requires `std`"); /// Trace the execution of the parser /// -/// Note that [`Parser::context` also provides high level trace information. +/// Note that [`Parser::context`] also provides high level trace information. /// /// See [`trace` module][self] for more details. /// /// # Example /// /// ```rust -/// # use winnow::{error::ErrMode, error::{Error, ErrorKind}, error::Needed, IResult}; -/// # use winnow::bytes::take_while_m_n; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::token::take_while; /// # use winnow::stream::AsChar; /// # use winnow::prelude::*; /// use winnow::trace::trace; /// -/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// fn short_alpha<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { /// trace("short_alpha", -/// take_while_m_n(3, 6, AsChar::is_alpha) +/// take_while(3..=6, AsChar::is_alpha) /// ).parse_next(s) /// } /// -/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); -/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..]))); -/// assert_eq!(short_alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); -/// assert_eq!(short_alpha(b"ed"), Err(ErrMode::Backtrack(Error::new(&b"ed"[..], ErrorKind::Slice)))); -/// assert_eq!(short_alpha(b"12345"), Err(ErrMode::Backtrack(Error::new(&b"12345"[..], ErrorKind::Slice)))); +/// assert_eq!(short_alpha.parse_peek(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(short_alpha.parse_peek(b"lengthy"), Ok((&b"y"[..], &b"length"[..]))); +/// assert_eq!(short_alpha.parse_peek(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(short_alpha.parse_peek(b"ed"), Err(ErrMode::Backtrack(InputError::new(&b"ed"[..], ErrorKind::Slice)))); +/// assert_eq!(short_alpha.parse_peek(b"12345"), Err(ErrMode::Backtrack(InputError::new(&b"12345"[..], ErrorKind::Slice)))); /// ``` #[cfg_attr(not(feature = "debug"), allow(unused_variables))] #[cfg_attr(not(feature = "debug"), allow(unused_mut))] +#[cfg_attr(not(feature = "debug"), inline(always))] pub fn trace<I: Stream, O, E>( name: impl crate::lib::std::fmt::Display, - mut parser: impl Parser<I, O, E>, + parser: impl Parser<I, O, E>, ) -> impl Parser<I, O, E> { #[cfg(feature = "debug")] { - let mut call_count = 0; - move |i: I| { - let depth = internals::Depth::new(); - let original = i.clone(); - internals::start(*depth, &name, call_count, &original); - - let res = parser.parse_next(i); - - let consumed = res.as_ref().ok().map(|(i, _)| { - if i.eof_offset() == 0 { - // Sometimes, an unrelated empty string is returned which can break `offset_to` - original.eof_offset() - } else { - original.offset_to(i) - } - }); - let severity = internals::Severity::with_result(&res); - internals::end(*depth, &name, call_count, consumed, severity); - call_count += 1; - - res - } + internals::Trace::new(parser, name) } #[cfg(not(feature = "debug"))] {

{ + #[cfg_attr(feature = "perf-inline", inline(always))] + fn find(&self, haystack: &[u8], span: Span) -> Option { + (&**self).find(haystack, span) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + (&**self).prefix(haystack, span) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn memory_usage(&self) -> usize { + (&**self).memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_fast(&self) -> bool { + (&**self).is_fast() + } +} + +/// A type that encapsulates the selection of a prefilter algorithm from a +/// sequence of needles. +/// +/// The existence of this type is a little tricky, because we don't (currently) +/// use it for performing a search. Instead, we really only consume it by +/// converting the underlying prefilter into a trait object, whether that be +/// `dyn PrefilterI` or `dyn Strategy` (for the meta regex engine). In order +/// to avoid re-copying the prefilter selection logic, we isolate it here, and +/// then force anything downstream that wants to convert it to a trait object +/// to do trivial case analysis on it. +/// +/// One wonders whether we *should* use an enum instead of a trait object. +/// At time of writing, I chose trait objects based on instinct because 1) I +/// knew I wasn't going to inline anything and 2) there would potentially be +/// many different choices. However, as of time of writing, I haven't actually +/// compared the trait object approach to the enum approach. That probably +/// should be litigated, but I ran out of steam. +/// +/// Note that if the `alloc` feature is disabled, then values of this type +/// are (and should) never be constructed. Also, in practice, for any of the +/// prefilters to be selected, you'll need at least one of the `perf-literal-*` +/// features enabled. +#[derive(Clone, Debug)] +pub(crate) enum Choice { + Memchr(Memchr), + Memchr2(Memchr2), + Memchr3(Memchr3), + Memmem(Memmem), + Teddy(Teddy), + ByteSet(ByteSet), + AhoCorasick(AhoCorasick), +} + +impl Choice { + /// Select what is believed to be the best prefilter algorithm for the + /// match semantics and sequence of needles given. + /// + /// This selection algorithm uses the needles as given without any + /// modification. For example, if `[bar]` is given, then this doesn't + /// try to select `memchr` for `b`. Instead, it would select `memmem` + /// for `bar`. If callers would want `memchr` selected for `[bar]`, then + /// callers should massages the literals themselves. That is, callers are + /// responsible for heuristics surrounding which sequence of literals is + /// best. + /// + /// What this selection algorithm does is attempt to use the fastest + /// prefilter that works for the literals given. So if `[a, b]`, is given, + /// then `memchr2` is selected. + /// + /// Of course, which prefilter is selected is also subject to what + /// is available. For example, if `alloc` isn't enabled, then + /// that limits which prefilters can be selected. Similarly, if + /// `perf-literal-substring` isn't enabled, then nothing from the `memchr` + /// crate can be returned. + pub(crate) fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + // An empty set means the regex matches nothing, so no sense in + // building a prefilter. + if needles.len() == 0 { + debug!("prefilter building failed: found empty set of literals"); + return None; + } + // If the regex can match the empty string, then the prefilter + // will by definition match at every position. This is obviously + // completely ineffective. + if needles.iter().any(|n| n.as_ref().is_empty()) { + debug!("prefilter building failed: literals match empty string"); + return None; + } + // BREADCRUMBS: Perhaps the literal optimizer should special case + // sequences of length two or three if the leading bytes of each are + // "rare"? Or perhaps, if there are two or three total possible leading + // bytes, regardless of the number of literals, and all are rare... + // Then well, perhaps we should use memchr2 or memchr3 in those cases? + if let Some(pre) = Memchr::new(kind, needles) { + debug!("prefilter built: memchr"); + return Some(Choice::Memchr(pre)); + } + if let Some(pre) = Memchr2::new(kind, needles) { + debug!("prefilter built: memchr2"); + return Some(Choice::Memchr2(pre)); + } + if let Some(pre) = Memchr3::new(kind, needles) { + debug!("prefilter built: memchr3"); + return Some(Choice::Memchr3(pre)); + } + if let Some(pre) = Memmem::new(kind, needles) { + debug!("prefilter built: memmem"); + return Some(Choice::Memmem(pre)); + } + if let Some(pre) = Teddy::new(kind, needles) { + debug!("prefilter built: teddy"); + return Some(Choice::Teddy(pre)); + } + if let Some(pre) = ByteSet::new(kind, needles) { + debug!("prefilter built: byteset"); + return Some(Choice::ByteSet(pre)); + } + if let Some(pre) = AhoCorasick::new(kind, needles) { + debug!("prefilter built: aho-corasick"); + return Some(Choice::AhoCorasick(pre)); + } + debug!("prefilter building failed: no strategy could be found"); + None + } +} + +/// Extracts all of the prefix literals from the given HIR expressions into a +/// single `Seq`. The literals in the sequence are ordered with respect to the +/// order of the given HIR expressions and consistent with the match semantics +/// given. +/// +/// The sequence returned is "optimized." That is, they may be shrunk or even +/// truncated according to heuristics with the intent of making them more +/// useful as a prefilter. (Which translates to both using faster algorithms +/// and minimizing the false positive rate.) +/// +/// Note that this erases any connection between the literals and which pattern +/// (or patterns) they came from. +/// +/// The match kind given must correspond to the match semantics of the regex +/// that is represented by the HIRs given. The match semantics may change the +/// literal sequence returned. +#[cfg(feature = "syntax")] +pub(crate) fn prefixes(kind: MatchKind, hirs: &[H]) -> literal::Seq +where + H: core::borrow::Borrow, +{ + let mut extractor = literal::Extractor::new(); + extractor.kind(literal::ExtractKind::Prefix); + + let mut prefixes = literal::Seq::empty(); + for hir in hirs { + prefixes.union(&mut extractor.extract(hir.borrow())); + } + debug!( + "prefixes (len={:?}, exact={:?}) extracted before optimization: {:?}", + prefixes.len(), + prefixes.is_exact(), + prefixes + ); + match kind { + MatchKind::All => { + prefixes.sort(); + prefixes.dedup(); + } + MatchKind::LeftmostFirst => { + prefixes.optimize_for_prefix_by_preference(); + } + } + debug!( + "prefixes (len={:?}, exact={:?}) extracted after optimization: {:?}", + prefixes.len(), + prefixes.is_exact(), + prefixes + ); + prefixes +} + +/// Like `prefixes`, but for all suffixes of all matches for the given HIRs. +#[cfg(feature = "syntax")] +pub(crate) fn suffixes(kind: MatchKind, hirs: &[H]) -> literal::Seq +where + H: core::borrow::Borrow, +{ + let mut extractor = literal::Extractor::new(); + extractor.kind(literal::ExtractKind::Suffix); + + let mut suffixes = literal::Seq::empty(); + for hir in hirs { + suffixes.union(&mut extractor.extract(hir.borrow())); + } + debug!( + "suffixes (len={:?}, exact={:?}) extracted before optimization: {:?}", + suffixes.len(), + suffixes.is_exact(), + suffixes + ); + match kind { + MatchKind::All => { + suffixes.sort(); + suffixes.dedup(); + } + MatchKind::LeftmostFirst => { + suffixes.optimize_for_suffix_by_preference(); + } + } + debug!( + "suffixes (len={:?}, exact={:?}) extracted after optimization: {:?}", + suffixes.len(), + suffixes.is_exact(), + suffixes + ); + suffixes +} diff --git a/vendor/regex-automata/src/util/prefilter/teddy.rs b/vendor/regex-automata/src/util/prefilter/teddy.rs new file mode 100644 index 0000000..fc79f2b --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/teddy.rs @@ -0,0 +1,160 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct Teddy { + #[cfg(not(feature = "perf-literal-multisubstring"))] + _unused: (), + /// The actual Teddy searcher. + /// + /// Technically, it's possible that Teddy doesn't actually get used, since + /// Teddy does require its haystack to at least be of a certain size + /// (usually around the size of whatever vector is being used, so ~16 + /// or ~32 bytes). For haystacks shorter than that, the implementation + /// currently uses Rabin-Karp. + #[cfg(feature = "perf-literal-multisubstring")] + searcher: aho_corasick::packed::Searcher, + /// When running an anchored search, the packed searcher can't handle it so + /// we defer to Aho-Corasick itself. Kind of sad, but changing the packed + /// searchers to support anchored search would be difficult at worst and + /// annoying at best. Since packed searchers only apply to small numbers of + /// literals, we content ourselves that this is not much of an added cost. + /// (That packed searchers only work with a small number of literals is + /// also why we use a DFA here. Otherwise, the memory usage of a DFA would + /// likely be unacceptable.) + #[cfg(feature = "perf-literal-multisubstring")] + anchored_ac: aho_corasick::dfa::DFA, + /// The length of the smallest literal we look for. + /// + /// We use this as a heuristic to figure out whether this will be "fast" or + /// not. Generally, the longer the better, because longer needles are more + /// discriminating and thus reduce false positive rate. + #[cfg(feature = "perf-literal-multisubstring")] + minimum_len: usize, +} + +impl Teddy { + pub(crate) fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + None + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // We only really support leftmost-first semantics. In + // theory we could at least support leftmost-longest, as the + // aho-corasick crate does, but regex-automata doesn't know about + // leftmost-longest currently. + // + // And like the aho-corasick prefilter, if we're using `All` + // semantics, then we can still use leftmost semantics for a + // prefilter. (This might be a suspicious choice for the literal + // engine, which uses a prefilter as a regex engine directly, but + // that only happens when using leftmost-first semantics.) + let (packed_match_kind, ac_match_kind) = match kind { + MatchKind::LeftmostFirst | MatchKind::All => ( + aho_corasick::packed::MatchKind::LeftmostFirst, + aho_corasick::MatchKind::LeftmostFirst, + ), + }; + let minimum_len = + needles.iter().map(|n| n.as_ref().len()).min().unwrap_or(0); + let packed = aho_corasick::packed::Config::new() + .match_kind(packed_match_kind) + .builder() + .extend(needles) + .build()?; + let anchored_ac = aho_corasick::dfa::DFA::builder() + .match_kind(ac_match_kind) + .start_kind(aho_corasick::StartKind::Anchored) + .prefilter(false) + .build(needles) + .ok()?; + Some(Teddy { searcher: packed, anchored_ac, minimum_len }) + } + } +} + +impl PrefilterI for Teddy { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let ac_span = + aho_corasick::Span { start: span.start, end: span.end }; + self.searcher + .find_in(haystack, ac_span) + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + use aho_corasick::automaton::Automaton; + let input = aho_corasick::Input::new(haystack) + .anchored(aho_corasick::Anchored::Yes) + .span(span.start..span.end); + self.anchored_ac + .try_find(&input) + // OK because we build the DFA with anchored support. + .expect("aho-corasick DFA should never fail") + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn memory_usage(&self) -> usize { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + use aho_corasick::automaton::Automaton; + self.searcher.memory_usage() + self.anchored_ac.memory_usage() + } + } + + fn is_fast(&self) -> bool { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // Teddy is usually quite fast, but I have seen some cases where + // a large number of literals can overwhelm it and make it not so + // fast. We make an educated but conservative guess at a limit, at + // which point, we're not so comfortable thinking Teddy is "fast." + // + // Well... this used to incorporate a "limit" on the *number* + // of literals, but I have since changed it to a minimum on the + // *smallest* literal. Namely, when there is a very small literal + // (1 or 2 bytes), it is far more likely that it leads to a higher + // false positive rate. (Although, of course, not always. For + // example, 'zq' is likely to have a very low false positive rate.) + // But when we have 3 bytes, we have a really good chance of being + // quite discriminatory and thus fast. + // + // We may still want to add some kind of limit on the number of + // literals here, but keep in mind that Teddy already has its own + // somewhat small limit (64 at time of writing). The main issue + // here is that if 'is_fast' is false, it opens the door for the + // reverse inner optimization to kick in. We really only want to + // resort to the reverse inner optimization if we absolutely must. + self.minimum_len >= 3 + } + } +} diff --git a/vendor/regex-automata/src/util/primitives.rs b/vendor/regex-automata/src/util/primitives.rs new file mode 100644 index 0000000..5c5d187 --- /dev/null +++ b/vendor/regex-automata/src/util/primitives.rs @@ -0,0 +1,776 @@ +/*! +Lower level primitive types that are useful in a variety of circumstances. + +# Overview + +This list represents the principle types in this module and briefly describes +when you might want to use them. + +* [`PatternID`] - A type that represents the identifier of a regex pattern. +This is probably the most widely used type in this module (which is why it's +also re-exported in the crate root). +* [`StateID`] - A type the represents the identifier of a finite automaton +state. This is used for both NFAs and DFAs, with the notable exception of +the hybrid NFA/DFA. (The hybrid NFA/DFA uses a special purpose "lazy" state +identifier.) +* [`SmallIndex`] - The internal representation of both a `PatternID` and a +`StateID`. Its purpose is to serve as a type that can index memory without +being as big as a `usize` on 64-bit targets. The main idea behind this type +is that there are many things in regex engines that will, in practice, never +overflow a 32-bit integer. (For example, like the number of patterns in a regex +or the number of states in an NFA.) Thus, a `SmallIndex` can be used to index +memory without peppering `as` casts everywhere. Moreover, it forces callers +to handle errors in the case where, somehow, the value would otherwise overflow +either a 32-bit integer or a `usize` (e.g., on 16-bit targets). +* [`NonMaxUsize`] - Represents a `usize` that cannot be `usize::MAX`. As a +result, `Option` has the same size in memory as a `usize`. This +useful, for example, when representing the offsets of submatches since it +reduces memory usage by a factor of 2. It is a legal optimization since Rust +guarantees that slices never have a length that exceeds `isize::MAX`. +*/ + +use core::num::NonZeroUsize; + +#[cfg(feature = "alloc")] +use alloc::vec::Vec; + +use crate::util::int::{Usize, U16, U32, U64}; + +/// A `usize` that can never be `usize::MAX`. +/// +/// This is similar to `core::num::NonZeroUsize`, but instead of not permitting +/// a zero value, this does not permit a max value. +/// +/// This is useful in certain contexts where one wants to optimize the memory +/// usage of things that contain match offsets. Namely, since Rust slices +/// are guaranteed to never have a length exceeding `isize::MAX`, we can use +/// `usize::MAX` as a sentinel to indicate that no match was found. Indeed, +/// types like `Option` have exactly the same size in memory as a +/// `usize`. +/// +/// This type is defined to be `repr(transparent)` for +/// `core::num::NonZeroUsize`, which is in turn defined to be +/// `repr(transparent)` for `usize`. +#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct NonMaxUsize(NonZeroUsize); + +impl NonMaxUsize { + /// Create a new `NonMaxUsize` from the given value. + /// + /// This returns `None` only when the given value is equal to `usize::MAX`. + #[inline] + pub fn new(value: usize) -> Option { + NonZeroUsize::new(value.wrapping_add(1)).map(NonMaxUsize) + } + + /// Return the underlying `usize` value. The returned value is guaranteed + /// to not equal `usize::MAX`. + #[inline] + pub fn get(self) -> usize { + self.0.get().wrapping_sub(1) + } +} + +// We provide our own Debug impl because seeing the internal repr can be quite +// surprising if you aren't expecting it. e.g., 'NonMaxUsize(5)' vs just '5'. +impl core::fmt::Debug for NonMaxUsize { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?}", self.get()) + } +} + +/// A type that represents a "small" index. +/// +/// The main idea of this type is to provide something that can index memory, +/// but uses less memory than `usize` on 64-bit systems. Specifically, its +/// representation is always a `u32` and has `repr(transparent)` enabled. (So +/// it is safe to transmute between a `u32` and a `SmallIndex`.) +/// +/// A small index is typically useful in cases where there is no practical way +/// that the index will overflow a 32-bit integer. A good example of this is +/// an NFA state. If you could somehow build an NFA with `2^30` states, its +/// memory usage would be exorbitant and its runtime execution would be so +/// slow as to be completely worthless. Therefore, this crate generally deems +/// it acceptable to return an error if it would otherwise build an NFA that +/// requires a slice longer than what a 32-bit integer can index. In exchange, +/// we can use 32-bit indices instead of 64-bit indices in various places. +/// +/// This type ensures this by providing a constructor that will return an error +/// if its argument cannot fit into the type. This makes it much easier to +/// handle these sorts of boundary cases that are otherwise extremely subtle. +/// +/// On all targets, this type guarantees that its value will fit in a `u32`, +/// `i32`, `usize` and an `isize`. This means that on 16-bit targets, for +/// example, this type's maximum value will never overflow an `isize`, +/// which means it will never overflow a `i16` even though its internal +/// representation is still a `u32`. +/// +/// The purpose for making the type fit into even signed integer types like +/// `isize` is to guarantee that the difference between any two small indices +/// is itself also a small index. This is useful in certain contexts, e.g., +/// for delta encoding. +/// +/// # Other types +/// +/// The following types wrap `SmallIndex` to provide a more focused use case: +/// +/// * [`PatternID`] is for representing the identifiers of patterns. +/// * [`StateID`] is for representing the identifiers of states in finite +/// automata. It is used for both NFAs and DFAs. +/// +/// # Representation +/// +/// This type is always represented internally by a `u32` and is marked as +/// `repr(transparent)`. Thus, this type always has the same representation as +/// a `u32`. It is thus safe to transmute between a `u32` and a `SmallIndex`. +/// +/// # Indexing +/// +/// For convenience, callers may use a `SmallIndex` to index slices. +/// +/// # Safety +/// +/// While a `SmallIndex` is meant to guarantee that its value fits into `usize` +/// without using as much space as a `usize` on all targets, callers must +/// not rely on this property for safety. Callers may choose to rely on this +/// property for correctness however. For example, creating a `SmallIndex` with +/// an invalid value can be done in entirely safe code. This may in turn result +/// in panics or silent logical errors. +#[derive( + Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, +)] +#[repr(transparent)] +pub struct SmallIndex(u32); + +impl SmallIndex { + /// The maximum index value. + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + pub const MAX: SmallIndex = + // FIXME: Use as_usize() once const functions in traits are stable. + SmallIndex::new_unchecked(core::i32::MAX as usize - 1); + + /// The maximum index value. + #[cfg(target_pointer_width = "16")] + pub const MAX: SmallIndex = + SmallIndex::new_unchecked(core::isize::MAX - 1); + + /// The total number of values that can be represented as a small index. + pub const LIMIT: usize = SmallIndex::MAX.as_usize() + 1; + + /// The zero index value. + pub const ZERO: SmallIndex = SmallIndex::new_unchecked(0); + + /// The number of bytes that a single small index uses in memory. + pub const SIZE: usize = core::mem::size_of::(); + + /// Create a new small index. + /// + /// If the given index exceeds [`SmallIndex::MAX`], then this returns + /// an error. + #[inline] + pub fn new(index: usize) -> Result { + SmallIndex::try_from(index) + } + + /// Create a new small index without checking whether the given value + /// exceeds [`SmallIndex::MAX`]. + /// + /// Using this routine with an invalid index value will result in + /// unspecified behavior, but *not* undefined behavior. In particular, an + /// invalid index value is likely to cause panics or possibly even silent + /// logical errors. + /// + /// Callers must never rely on a `SmallIndex` to be within a certain range + /// for memory safety. + #[inline] + pub const fn new_unchecked(index: usize) -> SmallIndex { + // FIXME: Use as_u32() once const functions in traits are stable. + SmallIndex(index as u32) + } + + /// Like [`SmallIndex::new`], but panics if the given index is not valid. + #[inline] + pub fn must(index: usize) -> SmallIndex { + SmallIndex::new(index).expect("invalid small index") + } + + /// Return this small index as a `usize`. This is guaranteed to never + /// overflow `usize`. + #[inline] + pub const fn as_usize(&self) -> usize { + // FIXME: Use as_usize() once const functions in traits are stable. + self.0 as usize + } + + /// Return this small index as a `u64`. This is guaranteed to never + /// overflow. + #[inline] + pub const fn as_u64(&self) -> u64 { + // FIXME: Use u64::from() once const functions in traits are stable. + self.0 as u64 + } + + /// Return the internal `u32` of this small index. This is guaranteed to + /// never overflow `u32`. + #[inline] + pub const fn as_u32(&self) -> u32 { + self.0 + } + + /// Return the internal `u32` of this small index represented as an `i32`. + /// This is guaranteed to never overflow an `i32`. + #[inline] + pub const fn as_i32(&self) -> i32 { + // This is OK because we guarantee that our max value is <= i32::MAX. + self.0 as i32 + } + + /// Returns one more than this small index as a usize. + /// + /// Since a small index has constraints on its maximum value, adding `1` to + /// it will always fit in a `usize`, `u32` and a `i32`. + #[inline] + pub fn one_more(&self) -> usize { + self.as_usize() + 1 + } + + /// Decode this small index from the bytes given using the native endian + /// byte order for the current target. + /// + /// If the decoded integer is not representable as a small index for the + /// current target, then this returns an error. + #[inline] + pub fn from_ne_bytes( + bytes: [u8; 4], + ) -> Result { + let id = u32::from_ne_bytes(bytes); + if id > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(id) }); + } + Ok(SmallIndex::new_unchecked(id.as_usize())) + } + + /// Decode this small index from the bytes given using the native endian + /// byte order for the current target. + /// + /// This is analogous to [`SmallIndex::new_unchecked`] in that is does not + /// check whether the decoded integer is representable as a small index. + #[inline] + pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> SmallIndex { + SmallIndex::new_unchecked(u32::from_ne_bytes(bytes).as_usize()) + } + + /// Return the underlying small index integer as raw bytes in native endian + /// format. + #[inline] + pub fn to_ne_bytes(&self) -> [u8; 4] { + self.0.to_ne_bytes() + } +} + +impl core::ops::Index for [T] { + type Output = T; + + #[inline] + fn index(&self, index: SmallIndex) -> &T { + &self[index.as_usize()] + } +} + +impl core::ops::IndexMut for [T] { + #[inline] + fn index_mut(&mut self, index: SmallIndex) -> &mut T { + &mut self[index.as_usize()] + } +} + +#[cfg(feature = "alloc")] +impl core::ops::Index for Vec { + type Output = T; + + #[inline] + fn index(&self, index: SmallIndex) -> &T { + &self[index.as_usize()] + } +} + +#[cfg(feature = "alloc")] +impl core::ops::IndexMut for Vec { + #[inline] + fn index_mut(&mut self, index: SmallIndex) -> &mut T { + &mut self[index.as_usize()] + } +} + +impl From for SmallIndex { + fn from(index: u8) -> SmallIndex { + SmallIndex::new_unchecked(usize::from(index)) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u16) -> Result { + if u32::from(index) > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(index) }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u32) -> Result { + if index > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(index) }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u64) -> Result { + if index > SmallIndex::MAX.as_u64() { + return Err(SmallIndexError { attempted: index }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: usize) -> Result { + if index > SmallIndex::MAX.as_usize() { + return Err(SmallIndexError { attempted: index.as_u64() }); + } + Ok(SmallIndex::new_unchecked(index)) + } +} + +#[cfg(test)] +impl quickcheck::Arbitrary for SmallIndex { + fn arbitrary(gen: &mut quickcheck::Gen) -> SmallIndex { + use core::cmp::max; + + let id = max(i32::MIN + 1, i32::arbitrary(gen)).abs(); + if id > SmallIndex::MAX.as_i32() { + SmallIndex::MAX + } else { + SmallIndex::new(usize::try_from(id).unwrap()).unwrap() + } + } +} + +/// This error occurs when a small index could not be constructed. +/// +/// This occurs when given an integer exceeding the maximum small index value. +/// +/// When the `std` feature is enabled, this implements the `Error` trait. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmallIndexError { + attempted: u64, +} + +impl SmallIndexError { + /// Returns the value that could not be converted to a small index. + pub fn attempted(&self) -> u64 { + self.attempted + } +} + +#[cfg(feature = "std")] +impl std::error::Error for SmallIndexError {} + +impl core::fmt::Display for SmallIndexError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create small index from {:?}, which exceeds {:?}", + self.attempted(), + SmallIndex::MAX, + ) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct SmallIndexIter { + rng: core::ops::Range, +} + +impl Iterator for SmallIndexIter { + type Item = SmallIndex; + + fn next(&mut self) -> Option { + if self.rng.start >= self.rng.end { + return None; + } + let next_id = self.rng.start + 1; + let id = core::mem::replace(&mut self.rng.start, next_id); + // new_unchecked is OK since we asserted that the number of + // elements in this iterator will fit in an ID at construction. + Some(SmallIndex::new_unchecked(id)) + } +} + +macro_rules! index_type_impls { + ($name:ident, $err:ident, $iter:ident, $withiter:ident) => { + impl $name { + /// The maximum value. + pub const MAX: $name = $name(SmallIndex::MAX); + + /// The total number of values that can be represented. + pub const LIMIT: usize = SmallIndex::LIMIT; + + /// The zero value. + pub const ZERO: $name = $name(SmallIndex::ZERO); + + /// The number of bytes that a single value uses in memory. + pub const SIZE: usize = SmallIndex::SIZE; + + /// Create a new value that is represented by a "small index." + /// + /// If the given index exceeds the maximum allowed value, then this + /// returns an error. + #[inline] + pub fn new(value: usize) -> Result<$name, $err> { + SmallIndex::new(value).map($name).map_err($err) + } + + /// Create a new value without checking whether the given argument + /// exceeds the maximum. + /// + /// Using this routine with an invalid value will result in + /// unspecified behavior, but *not* undefined behavior. In + /// particular, an invalid ID value is likely to cause panics or + /// possibly even silent logical errors. + /// + /// Callers must never rely on this type to be within a certain + /// range for memory safety. + #[inline] + pub const fn new_unchecked(value: usize) -> $name { + $name(SmallIndex::new_unchecked(value)) + } + + /// Like `new`, but panics if the given value is not valid. + #[inline] + pub fn must(value: usize) -> $name { + $name::new(value).expect(concat!( + "invalid ", + stringify!($name), + " value" + )) + } + + /// Return the internal value as a `usize`. This is guaranteed to + /// never overflow `usize`. + #[inline] + pub const fn as_usize(&self) -> usize { + self.0.as_usize() + } + + /// Return the internal value as a `u64`. This is guaranteed to + /// never overflow. + #[inline] + pub const fn as_u64(&self) -> u64 { + self.0.as_u64() + } + + /// Return the internal value as a `u32`. This is guaranteed to + /// never overflow `u32`. + #[inline] + pub const fn as_u32(&self) -> u32 { + self.0.as_u32() + } + + /// Return the internal value as a i32`. This is guaranteed to + /// never overflow an `i32`. + #[inline] + pub const fn as_i32(&self) -> i32 { + self.0.as_i32() + } + + /// Returns one more than this value as a usize. + /// + /// Since values represented by a "small index" have constraints + /// on their maximum value, adding `1` to it will always fit in a + /// `usize`, `u32` and a `i32`. + #[inline] + pub fn one_more(&self) -> usize { + self.0.one_more() + } + + /// Decode this value from the bytes given using the native endian + /// byte order for the current target. + /// + /// If the decoded integer is not representable as a small index + /// for the current target, then this returns an error. + #[inline] + pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<$name, $err> { + SmallIndex::from_ne_bytes(bytes).map($name).map_err($err) + } + + /// Decode this value from the bytes given using the native endian + /// byte order for the current target. + /// + /// This is analogous to `new_unchecked` in that is does not check + /// whether the decoded integer is representable as a small index. + #[inline] + pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> $name { + $name(SmallIndex::from_ne_bytes_unchecked(bytes)) + } + + /// Return the underlying integer as raw bytes in native endian + /// format. + #[inline] + pub fn to_ne_bytes(&self) -> [u8; 4] { + self.0.to_ne_bytes() + } + + /// Returns an iterator over all values from 0 up to and not + /// including the given length. + /// + /// If the given length exceeds this type's limit, then this + /// panics. + pub(crate) fn iter(len: usize) -> $iter { + $iter::new(len) + } + } + + // We write our own Debug impl so that we get things like PatternID(5) + // instead of PatternID(SmallIndex(5)). + impl core::fmt::Debug for $name { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple(stringify!($name)).field(&self.as_u32()).finish() + } + } + + impl core::ops::Index<$name> for [T] { + type Output = T; + + #[inline] + fn index(&self, index: $name) -> &T { + &self[index.as_usize()] + } + } + + impl core::ops::IndexMut<$name> for [T] { + #[inline] + fn index_mut(&mut self, index: $name) -> &mut T { + &mut self[index.as_usize()] + } + } + + #[cfg(feature = "alloc")] + impl core::ops::Index<$name> for Vec { + type Output = T; + + #[inline] + fn index(&self, index: $name) -> &T { + &self[index.as_usize()] + } + } + + #[cfg(feature = "alloc")] + impl core::ops::IndexMut<$name> for Vec { + #[inline] + fn index_mut(&mut self, index: $name) -> &mut T { + &mut self[index.as_usize()] + } + } + + impl From for $name { + fn from(value: u8) -> $name { + $name(SmallIndex::from(value)) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u16) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u32) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u64) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: usize) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + #[cfg(test)] + impl quickcheck::Arbitrary for $name { + fn arbitrary(gen: &mut quickcheck::Gen) -> $name { + $name(SmallIndex::arbitrary(gen)) + } + } + + /// This error occurs when a value could not be constructed. + /// + /// This occurs when given an integer exceeding the maximum allowed + /// value. + /// + /// When the `std` feature is enabled, this implements the `Error` + /// trait. + #[derive(Clone, Debug, Eq, PartialEq)] + pub struct $err(SmallIndexError); + + impl $err { + /// Returns the value that could not be converted to an ID. + pub fn attempted(&self) -> u64 { + self.0.attempted() + } + } + + #[cfg(feature = "std")] + impl std::error::Error for $err {} + + impl core::fmt::Display for $err { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create {} from {:?}, which exceeds {:?}", + stringify!($name), + self.attempted(), + $name::MAX, + ) + } + } + + #[derive(Clone, Debug)] + pub(crate) struct $iter(SmallIndexIter); + + impl $iter { + fn new(len: usize) -> $iter { + assert!( + len <= $name::LIMIT, + "cannot create iterator for {} when number of \ + elements exceed {:?}", + stringify!($name), + $name::LIMIT, + ); + $iter(SmallIndexIter { rng: 0..len }) + } + } + + impl Iterator for $iter { + type Item = $name; + + fn next(&mut self) -> Option<$name> { + self.0.next().map($name) + } + } + + /// An iterator adapter that is like std::iter::Enumerate, but attaches + /// small index values instead. It requires `ExactSizeIterator`. At + /// construction, it ensures that the index of each element in the + /// iterator is representable in the corresponding small index type. + #[derive(Clone, Debug)] + pub(crate) struct $withiter { + it: I, + ids: $iter, + } + + impl $withiter { + fn new(it: I) -> $withiter { + let ids = $name::iter(it.len()); + $withiter { it, ids } + } + } + + impl Iterator for $withiter { + type Item = ($name, I::Item); + + fn next(&mut self) -> Option<($name, I::Item)> { + let item = self.it.next()?; + // Number of elements in this iterator must match, according + // to contract of ExactSizeIterator. + let id = self.ids.next().unwrap(); + Some((id, item)) + } + } + }; +} + +/// The identifier of a regex pattern, represented by a [`SmallIndex`]. +/// +/// The identifier for a pattern corresponds to its relative position among +/// other patterns in a single finite state machine. Namely, when building +/// a multi-pattern regex engine, one must supply a sequence of patterns to +/// match. The position (starting at 0) of each pattern in that sequence +/// represents its identifier. This identifier is in turn used to identify and +/// report matches of that pattern in various APIs. +/// +/// See the [`SmallIndex`] type for more information about what it means for +/// a pattern ID to be a "small index." +/// +/// Note that this type is defined in the +/// [`util::primitives`](crate::util::primitives) module, but it is also +/// re-exported at the crate root due to how common it is. +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct PatternID(SmallIndex); + +/// The identifier of a finite automaton state, represented by a +/// [`SmallIndex`]. +/// +/// Most regex engines in this crate are built on top of finite automata. Each +/// state in a finite automaton defines transitions from its state to another. +/// Those transitions point to other states via their identifiers, i.e., a +/// `StateID`. Since finite automata tend to contain many transitions, it is +/// much more memory efficient to define state IDs as small indices. +/// +/// See the [`SmallIndex`] type for more information about what it means for +/// a state ID to be a "small index." +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct StateID(SmallIndex); + +index_type_impls!(PatternID, PatternIDError, PatternIDIter, WithPatternIDIter); +index_type_impls!(StateID, StateIDError, StateIDIter, WithStateIDIter); + +/// A utility trait that defines a couple of adapters for making it convenient +/// to access indices as "small index" types. We require ExactSizeIterator so +/// that iterator construction can do a single check to make sure the index of +/// each element is representable by its small index type. +pub(crate) trait IteratorIndexExt: Iterator { + fn with_pattern_ids(self) -> WithPatternIDIter + where + Self: Sized + ExactSizeIterator, + { + WithPatternIDIter::new(self) + } + + fn with_state_ids(self) -> WithStateIDIter + where + Self: Sized + ExactSizeIterator, + { + WithStateIDIter::new(self) + } +} + +impl IteratorIndexExt for I {} diff --git a/vendor/regex-automata/src/util/search.rs b/vendor/regex-automata/src/util/search.rs new file mode 100644 index 0000000..39aec52 --- /dev/null +++ b/vendor/regex-automata/src/util/search.rs @@ -0,0 +1,1969 @@ +/*! +Types and routines that support the search APIs of most regex engines. + +This sub-module isn't exposed directly, but rather, its contents are exported +at the crate root due to the universality of most of the types and routines in +this module. +*/ + +use core::ops::{Range, RangeBounds}; + +use crate::util::{escape::DebugByte, primitives::PatternID, utf8}; + +/// The parameters for a regex search including the haystack to search. +/// +/// It turns out that regex searches have a few parameters, and in most cases, +/// those parameters have defaults that work in the vast majority of cases. +/// This `Input` type exists to make that common case seamnless while also +/// providing an avenue for changing the parameters of a search. In particular, +/// this type enables doing so without a combinatorial explosion of different +/// methods and/or superfluous parameters in the common cases. +/// +/// An `Input` permits configuring the following things: +/// +/// * Search only a substring of a haystack, while taking the broader context +/// into account for resolving look-around assertions. +/// * Indicating whether to search for all patterns in a regex, or to +/// only search for one pattern in particular. +/// * Whether to perform an anchored on unanchored search. +/// * Whether to report a match as early as possible. +/// +/// All of these parameters, except for the haystack, have sensible default +/// values. This means that the minimal search configuration is simply a call +/// to [`Input::new`] with your haystack. Setting any other parameter is +/// optional. +/// +/// Moreover, for any `H` that implements `AsRef<[u8]>`, there exists a +/// `From for Input` implementation. This is useful because many of the +/// search APIs in this crate accept an `Into`. This means you can +/// provide string or byte strings to these routines directly, and they'll +/// automatically get converted into an `Input` for you. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack. +/// +/// # Organization +/// +/// The API of `Input` is split into a few different parts: +/// +/// * A builder-like API that transforms a `Input` by value. Examples: +/// [`Input::span`] and [`Input::anchored`]. +/// * A setter API that permits mutating parameters in place. Examples: +/// [`Input::set_span`] and [`Input::set_anchored`]. +/// * A getter API that permits retrieving any of the search parameters. +/// Examples: [`Input::get_span`] and [`Input::get_anchored`]. +/// * A few convenience getter routines that don't conform to the above naming +/// pattern due to how common they are. Examples: [`Input::haystack`], +/// [`Input::start`] and [`Input::end`]. +/// * Miscellaneous predicates and other helper routines that are useful +/// in some contexts. Examples: [`Input::is_char_boundary`]. +/// +/// A `Input` exposes so much because it is meant to be used by both callers of +/// regex engines _and_ implementors of regex engines. A constraining factor is +/// that regex engines should accept a `&Input` as its lowest level API, which +/// means that implementors should only use the "getter" APIs of a `Input`. +/// +/// # Valid bounds and search termination +/// +/// An `Input` permits setting the bounds of a search via either +/// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or +/// else a panic will occur. Bounds are valid if and only if: +/// +/// * The bounds represent a valid range into the input's haystack. +/// * **or** the end bound is a valid ending bound for the haystack *and* +/// the start bound is exactly one greater than the start bound. +/// +/// In the latter case, [`Input::is_done`] will return true and indicates any +/// search receiving such an input should immediately return with no match. +/// +/// Note that while `Input` is used for reverse searches in this crate, the +/// `Input::is_done` predicate assumes a forward search. Because unsigned +/// offsets are used internally, there is no way to tell from only the offsets +/// whether a reverse search is done or not. +/// +/// # Regex engine support +/// +/// Any regex engine accepting an `Input` must support at least the following +/// things: +/// +/// * Searching a `&[u8]` for matches. +/// * Searching a substring of `&[u8]` for a match, such that any match +/// reported must appear entirely within that substring. +/// * For a forwards search, a match should never be reported when +/// [`Input::is_done`] returns true. (For reverse searches, termination should +/// be handled outside of `Input`.) +/// +/// Supporting other aspects of an `Input` are optional, but regex engines +/// should handle aspects they don't support gracefully. How this is done is +/// generally up to the regex engine. This crate generally treats unsupported +/// anchored modes as an error to report for example, but for simplicity, in +/// the meta regex engine, trying to search with an invalid pattern ID just +/// results in no match being reported. +#[derive(Clone)] +pub struct Input<'h> { + haystack: &'h [u8], + span: Span, + anchored: Anchored, + earliest: bool, +} + +impl<'h> Input<'h> { + /// Create a new search configuration for the given haystack. + #[inline] + pub fn new>(haystack: &'h H) -> Input<'h> { + Input { + haystack: haystack.as_ref(), + span: Span { start: 0, end: haystack.as_ref().len() }, + anchored: Anchored::No, + earliest: false, + } + } + + /// Set the span for this search. + /// + /// This routine does not panic if the span given is not a valid range for + /// this search's haystack. If this search is run with an invalid range, + /// then the most likely outcome is that the actual search execution will + /// panic. + /// + /// This routine is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. To provide anything supported by range + /// syntax, use the [`Input::range`] method. + /// + /// The default span is the entire haystack. + /// + /// Note that [`Input::range`] overrides this method and vice versa. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// This example shows how the span of the search can impact whether a + /// match is reported or not. This is particularly relevant for look-around + /// operators, which might take things outside of the span into account + /// when determining whether they match. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Match, Input, + /// }; + /// + /// // Look for 'at', but as a distinct word. + /// let re = PikeVM::new(r"\bat\b")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// // Our haystack contains 'at', but not as a distinct word. + /// let haystack = "batter"; + /// + /// // A standard search finds nothing, as expected. + /// let input = Input::new(haystack); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// // But if we wanted to search starting at position '1', we might + /// // slice the haystack. If we do this, it's impossible for the \b + /// // anchors to take the surrounding context into account! And thus, + /// // a match is produced. + /// let input = Input::new(&haystack[1..3]); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..2)), caps.get_match()); + /// + /// // But if we specify the span of the search instead of slicing the + /// // haystack, then the regex engine can "see" outside of the span + /// // and resolve the anchors correctly. + /// let input = Input::new(haystack).span(1..3); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This may seem a little ham-fisted, but this scenario tends to come up + /// if some other regex engine found the match span and now you need to + /// re-process that span to look for capturing groups. (e.g., Run a faster + /// DFA first, find a match, then run the PikeVM on just the match span to + /// resolve capturing groups.) In order to implement that sort of logic + /// correctly, you need to set the span on the search instead of slicing + /// the haystack directly. + /// + /// The other advantage of using this routine to specify the bounds of the + /// search is that the match offsets are still reported in terms of the + /// original haystack. For example, the second search in the example above + /// reported a match at position `0`, even though `at` starts at offset + /// `1` because we sliced the haystack. + #[inline] + pub fn span>(mut self, span: S) -> Input<'h> { + self.set_span(span); + self + } + + /// Like `Input::span`, but accepts any range instead. + /// + /// This routine does not panic if the range given is not a valid range for + /// this search's haystack. If this search is run with an invalid range, + /// then the most likely outcome is that the actual search execution will + /// panic. + /// + /// The default range is the entire haystack. + /// + /// Note that [`Input::span`] overrides this method and vice versa. + /// + /// # Panics + /// + /// This routine will panic if the given range could not be converted + /// to a valid [`Range`]. For example, this would panic when given + /// `0..=usize::MAX` since it cannot be represented using a half-open + /// interval in terms of `usize`. + /// + /// This also panics if the given range does not correspond to valid bounds + /// in the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// + /// let input = Input::new("foobar").range(2..=4); + /// assert_eq!(2..5, input.get_range()); + /// ``` + #[inline] + pub fn range>(mut self, range: R) -> Input<'h> { + self.set_range(range); + self + } + + /// Sets the anchor mode of a search. + /// + /// When a search is anchored (so that's [`Anchored::Yes`] or + /// [`Anchored::Pattern`]), a match must begin at the start of a search. + /// When a search is not anchored (that's [`Anchored::No`]), regex engines + /// will behave as if the pattern started with a `(?s-u:.)*?`. This prefix + /// permits a match to appear anywhere. + /// + /// By default, the anchored mode is [`Anchored::No`]. + /// + /// **WARNING:** this is subtly different than using a `^` at the start of + /// your regex. A `^` forces a regex to match exclusively at the start of + /// a haystack, regardless of where you begin your search. In contrast, + /// anchoring a search will allow your regex to match anywhere in your + /// haystack, but the match must start at the beginning of a search. + /// + /// For example, consider the haystack `aba` and the following searches: + /// + /// 1. The regex `^a` is compiled with `Anchored::No` and searches `aba` + /// starting at position `2`. Since `^` requires the match to start at + /// the beginning of the haystack and `2 > 0`, no match is found. + /// 2. The regex `a` is compiled with `Anchored::Yes` and searches `aba` + /// starting at position `2`. This reports a match at `[2, 3]` since + /// the match starts where the search started. Since there is no `^`, + /// there is no requirement for the match to start at the beginning of + /// the haystack. + /// 3. The regex `a` is compiled with `Anchored::Yes` and searches `aba` + /// starting at position `1`. Since `b` corresponds to position `1` and + /// since the search is anchored, it finds no match. While the regex + /// matches at other positions, configuring the search to be anchored + /// requires that it only report a match that begins at the same offset + /// as the beginning of the search. + /// 4. The regex `a` is compiled with `Anchored::No` and searches `aba` + /// starting at position `1`. Since the search is not anchored and + /// the regex does not start with `^`, the search executes as if there + /// is a `(?s:.)*?` prefix that permits it to match anywhere. Thus, it + /// reports a match at `[2, 3]`. + /// + /// Note that the [`Anchored::Pattern`] mode is like `Anchored::Yes`, + /// except it only reports matches for a particular pattern. + /// + /// # Example + /// + /// This demonstrates the differences between an anchored search and + /// a pattern that begins with `^` (as described in the above warning + /// message). + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Anchored, Match, Input, + /// }; + /// + /// let haystack = "aba"; + /// + /// let re = PikeVM::new(r"^a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(2..3).anchored(Anchored::No); + /// re.search(&mut cache, &input, &mut caps); + /// // No match is found because 2 is not the beginning of the haystack, + /// // which is what ^ requires. + /// assert_eq!(None, caps.get_match()); + /// + /// let re = PikeVM::new(r"a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(2..3).anchored(Anchored::Yes); + /// re.search(&mut cache, &input, &mut caps); + /// // An anchored search can still match anywhere in the haystack, it just + /// // must begin at the start of the search which is '2' in this case. + /// assert_eq!(Some(Match::must(0, 2..3)), caps.get_match()); + /// + /// let re = PikeVM::new(r"a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(1..3).anchored(Anchored::Yes); + /// re.search(&mut cache, &input, &mut caps); + /// // No match is found since we start searching at offset 1 which + /// // corresponds to 'b'. Since there is no '(?s:.)*?' prefix, no match + /// // is found. + /// assert_eq!(None, caps.get_match()); + /// + /// let re = PikeVM::new(r"a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(1..3).anchored(Anchored::No); + /// re.search(&mut cache, &input, &mut caps); + /// // Since anchored=no, an implicit '(?s:.)*?' prefix was added to the + /// // pattern. Even though the search starts at 'b', the 'match anything' + /// // prefix allows the search to match 'a'. + /// let expected = Some(Match::must(0, 2..3)); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn anchored(mut self, mode: Anchored) -> Input<'h> { + self.set_anchored(mode); + self + } + + /// Whether to execute an "earliest" search or not. + /// + /// When running a non-overlapping search, an "earliest" search will return + /// the match location as early as possible. For example, given a pattern + /// of `foo[0-9]+` and a haystack of `foo12345`, a normal leftmost search + /// will return `foo12345` as a match. But an "earliest" search for regex + /// engines that support "earliest" semantics will return `foo1` as a + /// match, since as soon as the first digit following `foo` is seen, it is + /// known to have found a match. + /// + /// Note that "earliest" semantics generally depend on the regex engine. + /// Different regex engines may determine there is a match at different + /// points. So there is no guarantee that "earliest" matches will always + /// return the same offsets for all regex engines. The "earliest" notion + /// is really about when the particular regex engine determines there is + /// a match rather than a consistent semantic unto itself. This is often + /// useful for implementing "did a match occur or not" predicates, but + /// sometimes the offset is useful as well. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows the difference between "earliest" searching and + /// normal searching. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input}; + /// + /// let re = PikeVM::new(r"foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// // A normal search implements greediness like you expect. + /// let input = Input::new("foo12345"); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..8)), caps.get_match()); + /// + /// // When 'earliest' is enabled and the regex engine supports + /// // it, the search will bail once it knows a match has been + /// // found. + /// let input = Input::new("foo12345").earliest(true); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..4)), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn earliest(mut self, yes: bool) -> Input<'h> { + self.set_earliest(yes); + self + } + + /// Set the span for this search configuration. + /// + /// This is like the [`Input::span`] method, except this mutates the + /// span in place. + /// + /// This routine is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_span(2..4); + /// assert_eq!(2..4, input.get_range()); + /// ``` + #[inline] + pub fn set_span>(&mut self, span: S) { + let span = span.into(); + assert!( + span.end <= self.haystack.len() + && span.start <= span.end.wrapping_add(1), + "invalid span {:?} for haystack of length {}", + span, + self.haystack.len(), + ); + self.span = span; + } + + /// Set the span for this search configuration given any range. + /// + /// This is like the [`Input::range`] method, except this mutates the + /// span in place. + /// + /// This routine does not panic if the range given is not a valid range for + /// this search's haystack. If this search is run with an invalid range, + /// then the most likely outcome is that the actual search execution will + /// panic. + /// + /// # Panics + /// + /// This routine will panic if the given range could not be converted + /// to a valid [`Range`]. For example, this would panic when given + /// `0..=usize::MAX` since it cannot be represented using a half-open + /// interval in terms of `usize`. + /// + /// This also panics if the given span does not correspond to valid bounds + /// in the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_range(2..=4); + /// assert_eq!(2..5, input.get_range()); + /// ``` + #[inline] + pub fn set_range>(&mut self, range: R) { + use core::ops::Bound; + + // It's a little weird to convert ranges into spans, and then spans + // back into ranges when we actually slice the haystack. Because + // of that process, we always represent everything as a half-open + // internal. Therefore, handling things like m..=n is a little awkward. + let start = match range.start_bound() { + Bound::Included(&i) => i, + // Can this case ever happen? Range syntax doesn't support it... + Bound::Excluded(&i) => i.checked_add(1).unwrap(), + Bound::Unbounded => 0, + }; + let end = match range.end_bound() { + Bound::Included(&i) => i.checked_add(1).unwrap(), + Bound::Excluded(&i) => i, + Bound::Unbounded => self.haystack().len(), + }; + self.set_span(Span { start, end }); + } + + /// Set the starting offset for the span for this search configuration. + /// + /// This is a convenience routine for only mutating the start of a span + /// without having to set the entire span. + /// + /// # Panics + /// + /// This panics if the span resulting from the new start position does not + /// correspond to valid bounds in the haystack or the termination of a + /// search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_start(5); + /// assert_eq!(5..6, input.get_range()); + /// ``` + #[inline] + pub fn set_start(&mut self, start: usize) { + self.set_span(Span { start, ..self.get_span() }); + } + + /// Set the ending offset for the span for this search configuration. + /// + /// This is a convenience routine for only mutating the end of a span + /// without having to set the entire span. + /// + /// # Panics + /// + /// This panics if the span resulting from the new end position does not + /// correspond to valid bounds in the haystack or the termination of a + /// search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_end(5); + /// assert_eq!(0..5, input.get_range()); + /// ``` + #[inline] + pub fn set_end(&mut self, end: usize) { + self.set_span(Span { end, ..self.get_span() }); + } + + /// Set the anchor mode of a search. + /// + /// This is like [`Input::anchored`], except it mutates the search + /// configuration in place. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Anchored, Input, PatternID}; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(Anchored::No, input.get_anchored()); + /// + /// let pid = PatternID::must(5); + /// input.set_anchored(Anchored::Pattern(pid)); + /// assert_eq!(Anchored::Pattern(pid), input.get_anchored()); + /// ``` + #[inline] + pub fn set_anchored(&mut self, mode: Anchored) { + self.anchored = mode; + } + + /// Set whether the search should execute in "earliest" mode or not. + /// + /// This is like [`Input::earliest`], except it mutates the search + /// configuration in place. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert!(!input.get_earliest()); + /// input.set_earliest(true); + /// assert!(input.get_earliest()); + /// ``` + #[inline] + pub fn set_earliest(&mut self, yes: bool) { + self.earliest = yes; + } + + /// Return a borrow of the underlying haystack as a slice of bytes. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(b"foobar", input.haystack()); + /// ``` + #[inline] + pub fn haystack(&self) -> &[u8] { + self.haystack + } + + /// Return the start position of this search. + /// + /// This is a convenience routine for `search.get_span().start()`. + /// + /// When [`Input::is_done`] is `false`, this is guaranteed to return + /// an offset that is less than or equal to [`Input::end`]. Otherwise, + /// the offset is one greater than [`Input::end`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0, input.start()); + /// + /// let input = Input::new("foobar").span(2..4); + /// assert_eq!(2, input.start()); + /// ``` + #[inline] + pub fn start(&self) -> usize { + self.get_span().start + } + + /// Return the end position of this search. + /// + /// This is a convenience routine for `search.get_span().end()`. + /// + /// This is guaranteed to return an offset that is a valid exclusive end + /// bound for this input's haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(6, input.end()); + /// + /// let input = Input::new("foobar").span(2..4); + /// assert_eq!(4, input.end()); + /// ``` + #[inline] + pub fn end(&self) -> usize { + self.get_span().end + } + + /// Return the span for this search configuration. + /// + /// If one was not explicitly set, then the span corresponds to the entire + /// range of the haystack. + /// + /// When [`Input::is_done`] is `false`, the span returned is guaranteed + /// to correspond to valid bounds for this input's haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Input, Span}; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(Span { start: 0, end: 6 }, input.get_span()); + /// ``` + #[inline] + pub fn get_span(&self) -> Span { + self.span + } + + /// Return the span as a range for this search configuration. + /// + /// If one was not explicitly set, then the span corresponds to the entire + /// range of the haystack. + /// + /// When [`Input::is_done`] is `false`, the range returned is guaranteed + /// to correspond to valid bounds for this input's haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// ``` + #[inline] + pub fn get_range(&self) -> Range { + self.get_span().range() + } + + /// Return the anchored mode for this search configuration. + /// + /// If no anchored mode was set, then it defaults to [`Anchored::No`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Anchored, Input, PatternID}; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(Anchored::No, input.get_anchored()); + /// + /// let pid = PatternID::must(5); + /// input.set_anchored(Anchored::Pattern(pid)); + /// assert_eq!(Anchored::Pattern(pid), input.get_anchored()); + /// ``` + #[inline] + pub fn get_anchored(&self) -> Anchored { + self.anchored + } + + /// Return whether this search should execute in "earliest" mode. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert!(!input.get_earliest()); + /// ``` + #[inline] + pub fn get_earliest(&self) -> bool { + self.earliest + } + + /// Return true if and only if this search can never return any other + /// matches. + /// + /// This occurs when the start position of this search is greater than the + /// end position of the search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert!(!input.is_done()); + /// input.set_start(6); + /// assert!(!input.is_done()); + /// input.set_start(7); + /// assert!(input.is_done()); + /// ``` + #[inline] + pub fn is_done(&self) -> bool { + self.get_span().start > self.get_span().end + } + + /// Returns true if and only if the given offset in this search's haystack + /// falls on a valid UTF-8 encoded codepoint boundary. + /// + /// If the haystack is not valid UTF-8, then the behavior of this routine + /// is unspecified. + /// + /// # Example + /// + /// This shows where codepoint boundaries do and don't exist in valid + /// UTF-8. + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("☃"); + /// assert!(input.is_char_boundary(0)); + /// assert!(!input.is_char_boundary(1)); + /// assert!(!input.is_char_boundary(2)); + /// assert!(input.is_char_boundary(3)); + /// assert!(!input.is_char_boundary(4)); + /// ``` + #[inline] + pub fn is_char_boundary(&self, offset: usize) -> bool { + utf8::is_boundary(self.haystack(), offset) + } +} + +impl<'h> core::fmt::Debug for Input<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use crate::util::escape::DebugHaystack; + + f.debug_struct("Input") + .field("haystack", &DebugHaystack(self.haystack())) + .field("span", &self.span) + .field("anchored", &self.anchored) + .field("earliest", &self.earliest) + .finish() + } +} + +impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> { + fn from(haystack: &'h H) -> Input<'h> { + Input::new(haystack) + } +} + +/// A representation of a span reported by a regex engine. +/// +/// A span corresponds to the starting and ending _byte offsets_ of a +/// contiguous region of bytes. The starting offset is inclusive while the +/// ending offset is exclusive. That is, a span is a half-open interval. +/// +/// A span is used to report the offsets of a match, but it is also used to +/// convey which region of a haystack should be searched via routines like +/// [`Input::span`]. +/// +/// This is basically equivalent to a `std::ops::Range`, except this +/// type implements `Copy` which makes it more ergonomic to use in the context +/// of this crate. Like a range, this implements `Index` for `[u8]` and `str`, +/// and `IndexMut` for `[u8]`. For convenience, this also impls `From`, +/// which means things like `Span::from(5..10)` work. +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub struct Span { + /// The start offset of the span, inclusive. + pub start: usize, + /// The end offset of the span, exclusive. + pub end: usize, +} + +impl Span { + /// Returns this span as a range. + #[inline] + pub fn range(&self) -> Range { + Range::from(*self) + } + + /// Returns true when this span is empty. That is, when `start >= end`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start >= self.end + } + + /// Returns the length of this span. + /// + /// This returns `0` in precisely the cases that `is_empty` returns `true`. + #[inline] + pub fn len(&self) -> usize { + self.end.saturating_sub(self.start) + } + + /// Returns true when the given offset is contained within this span. + /// + /// Note that an empty span contains no offsets and will always return + /// false. + #[inline] + pub fn contains(&self, offset: usize) -> bool { + !self.is_empty() && self.start <= offset && offset <= self.end + } + + /// Returns a new span with `offset` added to this span's `start` and `end` + /// values. + #[inline] + pub fn offset(&self, offset: usize) -> Span { + Span { start: self.start + offset, end: self.end + offset } + } +} + +impl core::fmt::Debug for Span { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} + +impl core::ops::Index for [u8] { + type Output = [u8]; + + #[inline] + fn index(&self, index: Span) -> &[u8] { + &self[index.range()] + } +} + +impl core::ops::IndexMut for [u8] { + #[inline] + fn index_mut(&mut self, index: Span) -> &mut [u8] { + &mut self[index.range()] + } +} + +impl core::ops::Index for str { + type Output = str; + + #[inline] + fn index(&self, index: Span) -> &str { + &self[index.range()] + } +} + +impl From> for Span { + #[inline] + fn from(range: Range) -> Span { + Span { start: range.start, end: range.end } + } +} + +impl From for Range { + #[inline] + fn from(span: Span) -> Range { + Range { start: span.start, end: span.end } + } +} + +impl PartialEq> for Span { + #[inline] + fn eq(&self, range: &Range) -> bool { + self.start == range.start && self.end == range.end + } +} + +impl PartialEq for Range { + #[inline] + fn eq(&self, span: &Span) -> bool { + self.start == span.start && self.end == span.end + } +} + +/// A representation of "half" of a match reported by a DFA. +/// +/// This is called a "half" match because it only includes the end location (or +/// start location for a reverse search) of a match. This corresponds to the +/// information that a single DFA scan can report. Getting the other half of +/// the match requires a second scan with a reversed DFA. +/// +/// A half match also includes the pattern that matched. The pattern is +/// identified by an ID, which corresponds to its position (starting from `0`) +/// relative to other patterns used to construct the corresponding DFA. If only +/// a single pattern is provided to the DFA, then all matches are guaranteed to +/// have a pattern ID of `0`. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct HalfMatch { + /// The pattern ID. + pattern: PatternID, + /// The offset of the match. + /// + /// For forward searches, the offset is exclusive. For reverse searches, + /// the offset is inclusive. + offset: usize, +} + +impl HalfMatch { + /// Create a new half match from a pattern ID and a byte offset. + #[inline] + pub fn new(pattern: PatternID, offset: usize) -> HalfMatch { + HalfMatch { pattern, offset } + } + + /// Create a new half match from a pattern ID and a byte offset. + /// + /// This is like [`HalfMatch::new`], but accepts a `usize` instead of a + /// [`PatternID`]. This panics if the given `usize` is not representable + /// as a `PatternID`. + #[inline] + pub fn must(pattern: usize, offset: usize) -> HalfMatch { + HalfMatch::new(PatternID::new(pattern).unwrap(), offset) + } + + /// Returns the ID of the pattern that matched. + /// + /// The ID of a pattern is derived from the position in which it was + /// originally inserted into the corresponding DFA. The first pattern has + /// identifier `0`, and each subsequent pattern is `1`, `2` and so on. + #[inline] + pub fn pattern(&self) -> PatternID { + self.pattern + } + + /// The position of the match. + /// + /// If this match was produced by a forward search, then the offset is + /// exclusive. If this match was produced by a reverse search, then the + /// offset is inclusive. + #[inline] + pub fn offset(&self) -> usize { + self.offset + } +} + +/// A representation of a match reported by a regex engine. +/// +/// A match has two essential pieces of information: the [`PatternID`] that +/// matches, and the [`Span`] of the match in a haystack. +/// +/// The pattern is identified by an ID, which corresponds to its position +/// (starting from `0`) relative to other patterns used to construct the +/// corresponding regex engine. If only a single pattern is provided, then all +/// matches are guaranteed to have a pattern ID of `0`. +/// +/// Every match reported by a regex engine guarantees that its span has its +/// start offset as less than or equal to its end offset. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct Match { + /// The pattern ID. + pattern: PatternID, + /// The underlying match span. + span: Span, +} + +impl Match { + /// Create a new match from a pattern ID and a span. + /// + /// This constructor is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// # Panics + /// + /// This panics if `end < start`. + /// + /// # Example + /// + /// This shows how to create a match for the first pattern in a regex + /// object using convenient range syntax. + /// + /// ``` + /// use regex_automata::{Match, PatternID}; + /// + /// let m = Match::new(PatternID::ZERO, 5..10); + /// assert_eq!(0, m.pattern().as_usize()); + /// assert_eq!(5, m.start()); + /// assert_eq!(10, m.end()); + /// ``` + #[inline] + pub fn new>(pattern: PatternID, span: S) -> Match { + let span: Span = span.into(); + assert!(span.start <= span.end, "invalid match span"); + Match { pattern, span } + } + + /// Create a new match from a pattern ID and a byte offset span. + /// + /// This constructor is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// This is like [`Match::new`], but accepts a `usize` instead of a + /// [`PatternID`]. This panics if the given `usize` is not representable + /// as a `PatternID`. + /// + /// # Panics + /// + /// This panics if `end < start` or if `pattern > PatternID::MAX`. + /// + /// # Example + /// + /// This shows how to create a match for the third pattern in a regex + /// object using convenient range syntax. + /// + /// ``` + /// use regex_automata::Match; + /// + /// let m = Match::must(3, 5..10); + /// assert_eq!(3, m.pattern().as_usize()); + /// assert_eq!(5, m.start()); + /// assert_eq!(10, m.end()); + /// ``` + #[inline] + pub fn must>(pattern: usize, span: S) -> Match { + Match::new(PatternID::must(pattern), span) + } + + /// Returns the ID of the pattern that matched. + /// + /// The ID of a pattern is derived from the position in which it was + /// originally inserted into the corresponding regex engine. The first + /// pattern has identifier `0`, and each subsequent pattern is `1`, `2` and + /// so on. + #[inline] + pub fn pattern(&self) -> PatternID { + self.pattern + } + + /// The starting position of the match. + /// + /// This is a convenience routine for `Match::span().start`. + #[inline] + pub fn start(&self) -> usize { + self.span().start + } + + /// The ending position of the match. + /// + /// This is a convenience routine for `Match::span().end`. + #[inline] + pub fn end(&self) -> usize { + self.span().end + } + + /// Returns the match span as a range. + /// + /// This is a convenience routine for `Match::span().range()`. + #[inline] + pub fn range(&self) -> core::ops::Range { + self.span().range() + } + + /// Returns the span for this match. + #[inline] + pub fn span(&self) -> Span { + self.span + } + + /// Returns true when the span in this match is empty. + /// + /// An empty match can only be returned when the regex itself can match + /// the empty string. + #[inline] + pub fn is_empty(&self) -> bool { + self.span().is_empty() + } + + /// Returns the length of this match. + /// + /// This returns `0` in precisely the cases that `is_empty` returns `true`. + #[inline] + pub fn len(&self) -> usize { + self.span().len() + } +} + +/// A set of `PatternID`s. +/// +/// A set of pattern identifiers is useful for recording which patterns have +/// matched a particular haystack. A pattern set _only_ includes pattern +/// identifiers. It does not include offset information. +/// +/// # Example +/// +/// This shows basic usage of a set. +/// +/// ``` +/// use regex_automata::{PatternID, PatternSet}; +/// +/// let pid1 = PatternID::must(5); +/// let pid2 = PatternID::must(8); +/// // Create a new empty set. +/// let mut set = PatternSet::new(10); +/// // Insert pattern IDs. +/// set.insert(pid1); +/// set.insert(pid2); +/// // Test membership. +/// assert!(set.contains(pid1)); +/// assert!(set.contains(pid2)); +/// // Get all members. +/// assert_eq!( +/// vec![5, 8], +/// set.iter().map(|p| p.as_usize()).collect::>(), +/// ); +/// // Clear the set. +/// set.clear(); +/// // Test that it is indeed empty. +/// assert!(set.is_empty()); +/// ``` +#[cfg(feature = "alloc")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PatternSet { + /// The number of patterns set to 'true' in this set. + len: usize, + /// A map from PatternID to boolean of whether a pattern matches or not. + /// + /// This should probably be a bitset, but it's probably unlikely to matter + /// much in practice. + /// + /// The main downside of this representation (and similarly for a bitset) + /// is that iteration scales with the capacity of the set instead of + /// the length of the set. This doesn't seem likely to be a problem in + /// practice. + /// + /// Another alternative is to just use a 'SparseSet' for this. It does use + /// more memory (quite a bit more), but that seems fine I think compared + /// to the memory being used by the regex engine. The real hiccup with + /// it is that it yields pattern IDs in the order they were inserted. + /// Which is actually kind of nice, but at the time of writing, pattern + /// IDs are yielded in ascending order in the regex crate RegexSet API. + /// If we did change to 'SparseSet', we could provide an additional + /// 'iter_match_order' iterator, but keep the ascending order one for + /// compatibility. + which: alloc::boxed::Box<[bool]>, +} + +#[cfg(feature = "alloc")] +impl PatternSet { + /// Create a new set of pattern identifiers with the given capacity. + /// + /// The given capacity typically corresponds to (at least) the number of + /// patterns in a compiled regex object. + /// + /// # Panics + /// + /// This panics if the given capacity exceeds [`PatternID::LIMIT`]. This is + /// impossible if you use the `pattern_len()` method as defined on any of + /// the regex engines in this crate. Namely, a regex will fail to build by + /// returning an error if the number of patterns given to it exceeds the + /// limit. Therefore, the number of patterns in a valid regex is always + /// a correct capacity to provide here. + pub fn new(capacity: usize) -> PatternSet { + assert!( + capacity <= PatternID::LIMIT, + "pattern set capacity exceeds limit of {}", + PatternID::LIMIT, + ); + PatternSet { + len: 0, + which: alloc::vec![false; capacity].into_boxed_slice(), + } + } + + /// Clear this set such that it contains no pattern IDs. + pub fn clear(&mut self) { + self.len = 0; + for matched in self.which.iter_mut() { + *matched = false; + } + } + + /// Return true if and only if the given pattern identifier is in this set. + pub fn contains(&self, pid: PatternID) -> bool { + pid.as_usize() < self.capacity() && self.which[pid] + } + + /// Insert the given pattern identifier into this set and return `true` if + /// the given pattern ID was not previously in this set. + /// + /// If the pattern identifier is already in this set, then this is a no-op. + /// + /// Use [`PatternSet::try_insert`] for a fallible version of this routine. + /// + /// # Panics + /// + /// This panics if this pattern set has insufficient capacity to + /// store the given pattern ID. + pub fn insert(&mut self, pid: PatternID) -> bool { + self.try_insert(pid) + .expect("PatternSet should have sufficient capacity") + } + + /// Insert the given pattern identifier into this set and return `true` if + /// the given pattern ID was not previously in this set. + /// + /// If the pattern identifier is already in this set, then this is a no-op. + /// + /// # Errors + /// + /// This returns an error if this pattern set has insufficient capacity to + /// store the given pattern ID. + pub fn try_insert( + &mut self, + pid: PatternID, + ) -> Result { + if pid.as_usize() >= self.capacity() { + return Err(PatternSetInsertError { + attempted: pid, + capacity: self.capacity(), + }); + } + if self.which[pid] { + return Ok(false); + } + self.len += 1; + self.which[pid] = true; + Ok(true) + } + + /* + // This is currently commented out because it is unused and it is unclear + // whether it's useful or not. What's the harm in having it? When, if + // we ever wanted to change our representation to a 'SparseSet', then + // supporting this method would be a bit tricky. So in order to keep some + // API evolution flexibility, we leave it out for now. + + /// Remove the given pattern identifier from this set. + /// + /// If the pattern identifier was not previously in this set, then this + /// does not change the set and returns `false`. + /// + /// # Panics + /// + /// This panics if `pid` exceeds the capacity of this set. + pub fn remove(&mut self, pid: PatternID) -> bool { + if !self.which[pid] { + return false; + } + self.len -= 1; + self.which[pid] = false; + true + } + */ + + /// Return true if and only if this set has no pattern identifiers in it. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Return true if and only if this set has the maximum number of pattern + /// identifiers in the set. This occurs precisely when `PatternSet::len() + /// == PatternSet::capacity()`. + /// + /// This particular property is useful to test because it may allow one to + /// stop a search earlier than you might otherwise. Namely, if a search is + /// only reporting which patterns match a haystack and if you know all of + /// the patterns match at a given point, then there's no new information + /// that can be learned by continuing the search. (Because a pattern set + /// does not keep track of offset information.) + pub fn is_full(&self) -> bool { + self.len() == self.capacity() + } + + /// Returns the total number of pattern identifiers in this set. + pub fn len(&self) -> usize { + self.len + } + + /// Returns the total number of pattern identifiers that may be stored + /// in this set. + /// + /// This is guaranteed to be less than or equal to [`PatternID::LIMIT`]. + /// + /// Typically, the capacity of a pattern set matches the number of patterns + /// in a regex object with which you are searching. + pub fn capacity(&self) -> usize { + self.which.len() + } + + /// Returns an iterator over all pattern identifiers in this set. + /// + /// The iterator yields pattern identifiers in ascending order, starting + /// at zero. + pub fn iter(&self) -> PatternSetIter<'_> { + PatternSetIter { it: self.which.iter().enumerate() } + } +} + +/// An error that occurs when a `PatternID` failed to insert into a +/// `PatternSet`. +/// +/// An insert fails when the given `PatternID` exceeds the configured capacity +/// of the `PatternSet`. +/// +/// This error is created by the [`PatternSet::try_insert`] routine. +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +pub struct PatternSetInsertError { + attempted: PatternID, + capacity: usize, +} + +#[cfg(feature = "std")] +impl std::error::Error for PatternSetInsertError {} + +#[cfg(feature = "alloc")] +impl core::fmt::Display for PatternSetInsertError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to insert pattern ID {} into pattern set \ + with insufficiet capacity of {}", + self.attempted.as_usize(), + self.capacity, + ) + } +} + +/// An iterator over all pattern identifiers in a [`PatternSet`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the pattern set being +/// iterated over. +/// +/// This iterator is created by the [`PatternSet::iter`] method. +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +pub struct PatternSetIter<'a> { + it: core::iter::Enumerate>, +} + +#[cfg(feature = "alloc")] +impl<'a> Iterator for PatternSetIter<'a> { + type Item = PatternID; + + fn next(&mut self) -> Option { + while let Some((index, &yes)) = self.it.next() { + if yes { + // Only valid 'PatternID' values can be inserted into the set + // and construction of the set panics if the capacity would + // permit storing invalid pattern IDs. Thus, 'yes' is only true + // precisely when 'index' corresponds to a valid 'PatternID'. + return Some(PatternID::new_unchecked(index)); + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +#[cfg(feature = "alloc")] +impl<'a> DoubleEndedIterator for PatternSetIter<'a> { + fn next_back(&mut self) -> Option { + while let Some((index, &yes)) = self.it.next_back() { + if yes { + // Only valid 'PatternID' values can be inserted into the set + // and construction of the set panics if the capacity would + // permit storing invalid pattern IDs. Thus, 'yes' is only true + // precisely when 'index' corresponds to a valid 'PatternID'. + return Some(PatternID::new_unchecked(index)); + } + } + None + } +} + +/// The type of anchored search to perform. +/// +/// This is *almost* a boolean option. That is, you can either do an unanchored +/// search for any pattern in a regex, or you can do an anchored search for any +/// pattern in a regex. +/// +/// A third option exists that, assuming the regex engine supports it, permits +/// you to do an anchored search for a specific pattern. +/// +/// Note that there is no way to run an unanchored search for a specific +/// pattern. If you need that, you'll need to build separate regexes for each +/// pattern. +/// +/// # Errors +/// +/// If a regex engine does not support the anchored mode selected, then the +/// regex engine will return an error. While any non-trivial regex engine +/// should support at least one of the available anchored modes, there is no +/// singular mode that is guaranteed to be universally supported. Some regex +/// engines might only support unanchored searches (DFAs compiled without +/// anchored starting states) and some regex engines might only support +/// anchored searches (like the one-pass DFA). +/// +/// The specific error returned is a [`MatchError`] with a +/// [`MatchErrorKind::UnsupportedAnchored`] kind. The kind includes the +/// `Anchored` value given that is unsupported. +/// +/// Note that regex engines should report "no match" if, for example, an +/// `Anchored::Pattern` is provided with an invalid pattern ID _but_ where +/// anchored searches for a specific pattern are supported. This is smooths out +/// behavior such that it's possible to guarantee that an error never occurs +/// based on how the regex engine is configured. All regex engines in this +/// crate report "no match" when searching for an invalid pattern ID, but where +/// searching for a valid pattern ID is otherwise supported. +/// +/// # Example +/// +/// This example shows how to use the various `Anchored` modes to run a +/// search. We use the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) +/// because it supports all modes unconditionally. Some regex engines, like +/// the [`onepass::DFA`](crate::dfa::onepass::DFA) cannot support unanchored +/// searches. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// Anchored, Input, Match, PatternID, +/// }; +/// +/// let re = PikeVM::new_many(&[ +/// r"Mrs. \w+", +/// r"Miss \w+", +/// r"Mr. \w+", +/// r"Ms. \w+", +/// ])?; +/// let mut cache = re.create_cache(); +/// let hay = "Hello Mr. Springsteen!"; +/// +/// // The default is to do an unanchored search. +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, hay)); +/// // Explicitly ask for an unanchored search. Same as above. +/// let input = Input::new(hay).anchored(Anchored::No); +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, hay)); +/// +/// // Now try an anchored search. Since the match doesn't start at the +/// // beginning of the haystack, no match is found! +/// let input = Input::new(hay).anchored(Anchored::Yes); +/// assert_eq!(None, re.find(&mut cache, input)); +/// +/// // We can try an anchored search again, but move the location of where +/// // we start the search. Note that the offsets reported are still in +/// // terms of the overall haystack and not relative to where we started +/// // the search. +/// let input = Input::new(hay).anchored(Anchored::Yes).range(6..); +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, input)); +/// +/// // Now try an anchored search for a specific pattern. We specifically +/// // choose a pattern that we know doesn't match to prove that the search +/// // only looks for the pattern we provide. +/// let input = Input::new(hay) +/// .anchored(Anchored::Pattern(PatternID::must(1))) +/// .range(6..); +/// assert_eq!(None, re.find(&mut cache, input)); +/// +/// // But if we switch it to the pattern that we know matches, then we find +/// // the match. +/// let input = Input::new(hay) +/// .anchored(Anchored::Pattern(PatternID::must(2))) +/// .range(6..); +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, input)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Anchored { + /// Run an unanchored search. This means a match may occur anywhere at or + /// after the start position of the search. + /// + /// This search can return a match for any pattern in the regex. + No, + /// Run an anchored search. This means that a match must begin at the + /// start position of the search. + /// + /// This search can return a match for any pattern in the regex. + Yes, + /// Run an anchored search for a specific pattern. This means that a match + /// must be for the given pattern and must begin at the start position of + /// the search. + Pattern(PatternID), +} + +impl Anchored { + /// Returns true if and only if this anchor mode corresponds to any kind of + /// anchored search. + /// + /// # Example + /// + /// This examples shows that both `Anchored::Yes` and `Anchored::Pattern` + /// are considered anchored searches. + /// + /// ``` + /// use regex_automata::{Anchored, PatternID}; + /// + /// assert!(!Anchored::No.is_anchored()); + /// assert!(Anchored::Yes.is_anchored()); + /// assert!(Anchored::Pattern(PatternID::ZERO).is_anchored()); + /// ``` + #[inline] + pub fn is_anchored(&self) -> bool { + matches!(*self, Anchored::Yes | Anchored::Pattern(_)) + } + + /// Returns the pattern ID associated with this configuration if it is an + /// anchored search for a specific pattern. Otherwise `None` is returned. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Anchored, PatternID}; + /// + /// assert_eq!(None, Anchored::No.pattern()); + /// assert_eq!(None, Anchored::Yes.pattern()); + /// + /// let pid = PatternID::must(5); + /// assert_eq!(Some(pid), Anchored::Pattern(pid).pattern()); + /// ``` + #[inline] + pub fn pattern(&self) -> Option { + match *self { + Anchored::Pattern(pid) => Some(pid), + _ => None, + } + } +} + +/// The kind of match semantics to use for a regex pattern. +/// +/// The default match kind is `LeftmostFirst`, and this corresponds to the +/// match semantics used by most backtracking engines, such as Perl. +/// +/// # Leftmost first or "preference order" match semantics +/// +/// Leftmost-first semantics determine which match to report when there are +/// multiple paths through a regex that match at the same position. The tie is +/// essentially broken by how a backtracker would behave. For example, consider +/// running the regex `foofoofoo|foofoo|foo` on the haystack `foofoo`. In this +/// case, both the `foofoo` and `foo` branches match at position `0`. So should +/// the end of the match be `3` or `6`? +/// +/// A backtracker will conceptually work by trying `foofoofoo` and failing. +/// Then it will try `foofoo`, find the match and stop there. Thus, the +/// leftmost-first match position is `6`. This is called "leftmost-first" or +/// "preference order" because the order of the branches as written in the +/// regex pattern is what determines how to break the tie. +/// +/// (Note that leftmost-longest match semantics, which break ties by always +/// taking the longest matching string, are not currently supported by this +/// crate. These match semantics tend to be found in POSIX regex engines.) +/// +/// This example shows how leftmost-first semantics work, and how it even +/// applies to multi-pattern regexes: +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// Match, +/// }; +/// +/// let re = PikeVM::new_many(&[ +/// r"foofoofoo", +/// r"foofoo", +/// r"foo", +/// ])?; +/// let mut cache = re.create_cache(); +/// let got: Vec = re.find_iter(&mut cache, "foofoo").collect(); +/// let expected = vec![Match::must(1, 0..6)]; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # All matches +/// +/// The `All` match semantics report any and all matches, and generally will +/// attempt to match as much as possible. It doesn't respect any sort of match +/// priority at all, so things like non-greedy matching don't work in this +/// mode. +/// +/// The fact that non-greedy matching doesn't work generally makes most forms +/// of unanchored non-overlapping searches have unintuitive behavior. Namely, +/// unanchored searches behave as if there is a `(?s-u:.)*?` prefix at the +/// beginning of the pattern, which is specifically non-greedy. Since it will +/// be treated as greedy in `All` match semantics, this generally means that +/// it will first attempt to consume all of the haystack and is likely to wind +/// up skipping matches. +/// +/// Generally speaking, `All` should only be used in two circumstances: +/// +/// * When running an anchored search and there is a desire to match as much as +/// possible. For example, when building a reverse regex matcher to find the +/// start of a match after finding the end. In this case, the reverse search +/// is anchored to the end of the match found by the forward search. +/// * When running overlapping searches. Since `All` encodes all possible +/// matches, this is generally what you want for an overlapping search. If you +/// try to use leftmost-first in an overlapping search, it is likely to produce +/// counter-intuitive results since leftmost-first specifically excludes some +/// matches from its underlying finite state machine. +/// +/// This example demonstrates the counter-intuitive behavior of `All` semantics +/// when using a standard leftmost unanchored search: +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// Match, MatchKind, +/// }; +/// +/// let re = PikeVM::builder() +/// .configure(PikeVM::config().match_kind(MatchKind::All)) +/// .build("foo")?; +/// let hay = "first foo second foo wat"; +/// let mut cache = re.create_cache(); +/// let got: Vec = re.find_iter(&mut cache, hay).collect(); +/// // Notice that it completely skips the first 'foo'! +/// let expected = vec![Match::must(0, 17..20)]; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This second example shows how `All` semantics are useful for an overlapping +/// search. Note that we use lower level lazy DFA APIs here since the NFA +/// engines only currently support a very limited form of overlapping search. +/// +/// ``` +/// use regex_automata::{ +/// hybrid::dfa::{DFA, OverlappingState}, +/// HalfMatch, Input, MatchKind, +/// }; +/// +/// let re = DFA::builder() +/// // If we didn't set 'All' semantics here, then the regex would only +/// // match 'foo' at offset 3 and nothing else. Why? Because the state +/// // machine implements preference order and knows that the 'foofoo' and +/// // 'foofoofoo' branches can never match since 'foo' will always match +/// // when they match and take priority. +/// .configure(DFA::config().match_kind(MatchKind::All)) +/// .build(r"foo|foofoo|foofoofoo")?; +/// let mut cache = re.create_cache(); +/// let mut state = OverlappingState::start(); +/// let input = Input::new("foofoofoo"); +/// let mut got = vec![]; +/// loop { +/// re.try_search_overlapping_fwd(&mut cache, &input, &mut state)?; +/// let m = match state.get_match() { +/// None => break, +/// Some(m) => m, +/// }; +/// got.push(m); +/// } +/// let expected = vec![ +/// HalfMatch::must(0, 3), +/// HalfMatch::must(0, 6), +/// HalfMatch::must(0, 9), +/// ]; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[non_exhaustive] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MatchKind { + /// Report all possible matches. + All, + /// Report only the leftmost matches. When multiple leftmost matches exist, + /// report the match corresponding to the part of the regex that appears + /// first in the syntax. + LeftmostFirst, + // There is prior art in RE2 that shows that we should be able to add + // LeftmostLongest too. The tricky part of it is supporting ungreedy + // repetitions. Instead of treating all NFA states as having equivalent + // priority (as in 'All') or treating all NFA states as having distinct + // priority based on order (as in 'LeftmostFirst'), we instead group NFA + // states into sets, and treat members of each set as having equivalent + // priority, but having greater priority than all following members + // of different sets. + // + // However, it's not clear whether it's really worth adding this. After + // all, leftmost-longest can be emulated when using literals by using + // leftmost-first and sorting the literals by length in descending order. + // However, this won't work for arbitrary regexes. e.g., `\w|\w\w` will + // always match `a` in `ab` when using leftmost-first, but leftmost-longest + // would match `ab`. +} + +impl MatchKind { + #[cfg(feature = "alloc")] + pub(crate) fn continue_past_first_match(&self) -> bool { + *self == MatchKind::All + } +} + +impl Default for MatchKind { + fn default() -> MatchKind { + MatchKind::LeftmostFirst + } +} + +/// An error indicating that a search stopped before reporting whether a +/// match exists or not. +/// +/// To be very clear, this error type implies that one cannot assume that no +/// matches occur, since the search stopped before completing. That is, if +/// you're looking for information about where a search determined that no +/// match can occur, then this error type does *not* give you that. (Indeed, at +/// the time of writing, if you need such a thing, you have to write your own +/// search routine.) +/// +/// Normally, when one searches for something, the response is either an +/// affirmative "it was found at this location" or a negative "not found at +/// all." However, in some cases, a regex engine can be configured to stop its +/// search before concluding whether a match exists or not. When this happens, +/// it may be important for the caller to know why the regex engine gave up and +/// where in the input it gave up at. This error type exposes the 'why' and the +/// 'where.' +/// +/// For example, the DFAs provided by this library generally cannot correctly +/// implement Unicode word boundaries. Instead, they provide an option to +/// eagerly support them on ASCII text (since Unicode word boundaries are +/// equivalent to ASCII word boundaries when searching ASCII text), but will +/// "give up" if a non-ASCII byte is seen. In such cases, one is usually +/// required to either report the failure to the caller (unergonomic) or +/// otherwise fall back to some other regex engine (ergonomic, but potentially +/// costly). +/// +/// More generally, some regex engines offer the ability for callers to specify +/// certain bytes that will trigger the regex engine to automatically quit if +/// they are seen. +/// +/// Still yet, there may be other reasons for a failed match. For example, +/// the hybrid DFA provided by this crate can be configured to give up if it +/// believes that it is not efficient. This in turn permits callers to choose a +/// different regex engine. +/// +/// (Note that DFAs are configured by default to never quit or give up in this +/// fashion. For example, by default, a DFA will fail to build if the regex +/// pattern contains a Unicode word boundary. One needs to opt into the "quit" +/// behavior via options, like +/// [`hybrid::dfa::Config::unicode_word_boundary`](crate::hybrid::dfa::Config::unicode_word_boundary).) +/// +/// There are a couple other ways a search +/// can fail. For example, when using the +/// [`BoundedBacktracker`](crate::nfa::thompson::backtrack::BoundedBacktracker) +/// with a haystack that is too long, or trying to run an unanchored search +/// with a [one-pass DFA](crate::dfa::onepass). +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MatchError( + #[cfg(feature = "alloc")] alloc::boxed::Box, + #[cfg(not(feature = "alloc"))] MatchErrorKind, +); + +impl MatchError { + /// Create a new error value with the given kind. + /// + /// This is a more verbose version of the kind-specific constructors, + /// e.g., `MatchError::quit`. + pub fn new(kind: MatchErrorKind) -> MatchError { + #[cfg(feature = "alloc")] + { + MatchError(alloc::boxed::Box::new(kind)) + } + #[cfg(not(feature = "alloc"))] + { + MatchError(kind) + } + } + + /// Returns a reference to the underlying error kind. + pub fn kind(&self) -> &MatchErrorKind { + &self.0 + } + + /// Create a new "quit" error. The given `byte` corresponds to the value + /// that tripped a search's quit condition, and `offset` corresponds to the + /// location in the haystack at which the search quit. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::Quit`] kind. + pub fn quit(byte: u8, offset: usize) -> MatchError { + MatchError::new(MatchErrorKind::Quit { byte, offset }) + } + + /// Create a new "gave up" error. The given `offset` corresponds to the + /// location in the haystack at which the search gave up. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::GaveUp`] kind. + pub fn gave_up(offset: usize) -> MatchError { + MatchError::new(MatchErrorKind::GaveUp { offset }) + } + + /// Create a new "haystack too long" error. The given `len` corresponds to + /// the length of the haystack that was problematic. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::HaystackTooLong`] kind. + pub fn haystack_too_long(len: usize) -> MatchError { + MatchError::new(MatchErrorKind::HaystackTooLong { len }) + } + + /// Create a new "unsupported anchored" error. This occurs when the caller + /// requests a search with an anchor mode that is not supported by the + /// regex engine. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::UnsupportedAnchored`] kind. + pub fn unsupported_anchored(mode: Anchored) -> MatchError { + MatchError::new(MatchErrorKind::UnsupportedAnchored { mode }) + } +} + +/// The underlying kind of a [`MatchError`]. +/// +/// This is a **non-exhaustive** enum. That means new variants may be added in +/// a semver-compatible release. +#[non_exhaustive] +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum MatchErrorKind { + /// The search saw a "quit" byte at which it was instructed to stop + /// searching. + Quit { + /// The "quit" byte that was observed that caused the search to stop. + byte: u8, + /// The offset at which the quit byte was observed. + offset: usize, + }, + /// The search, based on heuristics, determined that it would be better + /// to stop, typically to provide the caller an opportunity to use an + /// alternative regex engine. + /// + /// Currently, the only way for this to occur is via the lazy DFA and + /// only when it is configured to do so (it will not return this error by + /// default). + GaveUp { + /// The offset at which the search stopped. This corresponds to the + /// position immediately following the last byte scanned. + offset: usize, + }, + /// This error occurs if the haystack given to the regex engine was too + /// long to be searched. This occurs, for example, with regex engines + /// like the bounded backtracker that have a configurable fixed amount of + /// capacity that is tied to the length of the haystack. Anything beyond + /// that configured limit will result in an error at search time. + HaystackTooLong { + /// The length of the haystack that exceeded the limit. + len: usize, + }, + /// An error indicating that a particular type of anchored search was + /// requested, but that the regex engine does not support it. + /// + /// Note that this error should not be returned by a regex engine simply + /// because the pattern ID is invalid (i.e., equal to or exceeds the number + /// of patterns in the regex). In that case, the regex engine should report + /// a non-match. + UnsupportedAnchored { + /// The anchored mode given that is unsupported. + mode: Anchored, + }, +} + +#[cfg(feature = "std")] +impl std::error::Error for MatchError {} + +impl core::fmt::Display for MatchError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match *self.kind() { + MatchErrorKind::Quit { byte, offset } => write!( + f, + "quit search after observing byte {:?} at offset {}", + DebugByte(byte), + offset, + ), + MatchErrorKind::GaveUp { offset } => { + write!(f, "gave up searching at offset {}", offset) + } + MatchErrorKind::HaystackTooLong { len } => { + write!(f, "haystack of length {} is too long", len) + } + MatchErrorKind::UnsupportedAnchored { mode: Anchored::Yes } => { + write!(f, "anchored searches are not supported or enabled") + } + MatchErrorKind::UnsupportedAnchored { mode: Anchored::No } => { + write!(f, "unanchored searches are not supported or enabled") + } + MatchErrorKind::UnsupportedAnchored { + mode: Anchored::Pattern(pid), + } => { + write!( + f, + "anchored searches for a specific pattern ({}) are \ + not supported or enabled", + pid.as_usize(), + ) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // We test that our 'MatchError' type is the size we expect. This isn't an + // API guarantee, but if the size increases, we really want to make sure we + // decide to do that intentionally. So this should be a speed bump. And in + // general, we should not increase the size without a very good reason. + // + // Why? Because low level search APIs return Result<.., MatchError>. When + // MatchError gets bigger, so to does the Result type. + // + // Now, when 'alloc' is enabled, we do box the error, which de-emphasizes + // the importance of keeping a small error type. But without 'alloc', we + // still want things to be small. + #[test] + fn match_error_size() { + let expected_size = if cfg!(feature = "alloc") { + core::mem::size_of::() + } else { + 2 * core::mem::size_of::() + }; + assert_eq!(expected_size, core::mem::size_of::()); + } + + // Same as above, but for the underlying match error kind. + #[cfg(target_pointer_width = "64")] + #[test] + fn match_error_kind_size() { + let expected_size = 2 * core::mem::size_of::(); + assert_eq!(expected_size, core::mem::size_of::()); + } + + #[cfg(target_pointer_width = "32")] + #[test] + fn match_error_kind_size() { + let expected_size = 3 * core::mem::size_of::(); + assert_eq!(expected_size, core::mem::size_of::()); + } +} diff --git a/vendor/regex-automata/src/util/sparse_set.rs b/vendor/regex-automata/src/util/sparse_set.rs new file mode 100644 index 0000000..cbaa0b6 --- /dev/null +++ b/vendor/regex-automata/src/util/sparse_set.rs @@ -0,0 +1,239 @@ +/*! +This module defines a sparse set data structure. Its most interesting +properties are: + +* They preserve insertion order. +* Set membership testing is done in constant time. +* Set insertion is done in constant time. +* Clearing the set is done in constant time. + +The cost for doing this is that the capacity of the set needs to be known up +front, and the elements in the set are limited to state identifiers. + +These sets are principally used when traversing an NFA state graph. This +happens at search time, for example, in the PikeVM. It also happens during DFA +determinization. +*/ + +use alloc::{vec, vec::Vec}; + +use crate::util::primitives::StateID; + +/// A pairse of sparse sets. +/// +/// This is useful when one needs to compute NFA epsilon closures from a +/// previous set of states derived from an epsilon closure. One set can be the +/// starting states where as the other set can be the destination states after +/// following the transitions for a particular byte of input. +/// +/// There is no significance to 'set1' or 'set2'. They are both sparse sets of +/// the same size. +/// +/// The members of this struct are exposed so that callers may borrow 'set1' +/// and 'set2' individually without being force to borrow both at the same +/// time. +#[derive(Clone, Debug)] +pub(crate) struct SparseSets { + pub(crate) set1: SparseSet, + pub(crate) set2: SparseSet, +} + +impl SparseSets { + /// Create a new pair of sparse sets where each set has the given capacity. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + pub(crate) fn new(capacity: usize) -> SparseSets { + SparseSets { + set1: SparseSet::new(capacity), + set2: SparseSet::new(capacity), + } + } + + /// Resizes these sparse sets to have the new capacity given. + /// + /// The sets are automatically cleared. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + #[inline] + pub(crate) fn resize(&mut self, new_capacity: usize) { + self.set1.resize(new_capacity); + self.set2.resize(new_capacity); + } + + /// Clear both sparse sets. + pub(crate) fn clear(&mut self) { + self.set1.clear(); + self.set2.clear(); + } + + /// Swap set1 with set2. + pub(crate) fn swap(&mut self) { + core::mem::swap(&mut self.set1, &mut self.set2); + } + + /// Returns the memory usage, in bytes, used by this pair of sparse sets. + pub(crate) fn memory_usage(&self) -> usize { + self.set1.memory_usage() + self.set2.memory_usage() + } +} + +/// A sparse set used for representing ordered NFA states. +/// +/// This supports constant time addition and membership testing. Clearing an +/// entire set can also be done in constant time. Iteration yields elements +/// in the order in which they were inserted. +/// +/// The data structure is based on: https://research.swtch.com/sparse +/// Note though that we don't actually use uninitialized memory. We generally +/// reuse sparse sets, so the initial allocation cost is bareable. However, its +/// other properties listed above are extremely useful. +#[derive(Clone)] +pub(crate) struct SparseSet { + /// The number of elements currently in this set. + len: usize, + /// Dense contains the ids in the order in which they were inserted. + dense: Vec, + /// Sparse maps ids to their location in dense. + /// + /// A state ID is in the set if and only if + /// sparse[id] < len && id == dense[sparse[id]]. + /// + /// Note that these are indices into 'dense'. It's a little weird to use + /// StateID here, but we know our length can never exceed the bounds of + /// StateID (enforced by 'resize') and StateID will be at most 4 bytes + /// where as a usize is likely double that in most cases. + sparse: Vec, +} + +impl SparseSet { + /// Create a new sparse set with the given capacity. + /// + /// Sparse sets have a fixed size and they cannot grow. Attempting to + /// insert more distinct elements than the total capacity of the set will + /// result in a panic. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + #[inline] + pub(crate) fn new(capacity: usize) -> SparseSet { + let mut set = SparseSet { len: 0, dense: vec![], sparse: vec![] }; + set.resize(capacity); + set + } + + /// Resizes this sparse set to have the new capacity given. + /// + /// This set is automatically cleared. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + #[inline] + pub(crate) fn resize(&mut self, new_capacity: usize) { + assert!( + new_capacity <= StateID::LIMIT, + "sparse set capacity cannot excced {:?}", + StateID::LIMIT + ); + self.clear(); + self.dense.resize(new_capacity, StateID::ZERO); + self.sparse.resize(new_capacity, StateID::ZERO); + } + + /// Returns the capacity of this set. + /// + /// The capacity represents a fixed limit on the number of distinct + /// elements that are allowed in this set. The capacity cannot be changed. + #[inline] + pub(crate) fn capacity(&self) -> usize { + self.dense.len() + } + + /// Returns the number of elements in this set. + #[inline] + pub(crate) fn len(&self) -> usize { + self.len + } + + /// Returns true if and only if this set is empty. + #[inline] + pub(crate) fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Insert the state ID value into this set and return true if the given + /// state ID was not previously in this set. + /// + /// This operation is idempotent. If the given value is already in this + /// set, then this is a no-op. + /// + /// If more than `capacity` ids are inserted, then this panics. + /// + /// This is marked as inline(always) since the compiler won't inline it + /// otherwise, and it's a fairly hot piece of code in DFA determinization. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn insert(&mut self, id: StateID) -> bool { + if self.contains(id) { + return false; + } + + let i = self.len(); + assert!( + i < self.capacity(), + "{:?} exceeds capacity of {:?} when inserting {:?}", + i, + self.capacity(), + id, + ); + // OK since i < self.capacity() and self.capacity() is guaranteed to + // be <= StateID::LIMIT. + let index = StateID::new_unchecked(i); + self.dense[index] = id; + self.sparse[id] = index; + self.len += 1; + true + } + + /// Returns true if and only if this set contains the given value. + #[inline] + pub(crate) fn contains(&self, id: StateID) -> bool { + let index = self.sparse[id]; + index.as_usize() < self.len() && self.dense[index] == id + } + + /// Clear this set such that it has no members. + #[inline] + pub(crate) fn clear(&mut self) { + self.len = 0; + } + + #[inline] + pub(crate) fn iter(&self) -> SparseSetIter<'_> { + SparseSetIter(self.dense[..self.len()].iter()) + } + + /// Returns the heap memory usage, in bytes, used by this sparse set. + #[inline] + pub(crate) fn memory_usage(&self) -> usize { + self.dense.len() * StateID::SIZE + self.sparse.len() * StateID::SIZE + } +} + +impl core::fmt::Debug for SparseSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let elements: Vec = self.iter().collect(); + f.debug_tuple("SparseSet").field(&elements).finish() + } +} + +/// An iterator over all elements in a sparse set. +/// +/// The lifetime `'a` refers to the lifetime of the set being iterated over. +#[derive(Debug)] +pub(crate) struct SparseSetIter<'a>(core::slice::Iter<'a, StateID>); + +impl<'a> Iterator for SparseSetIter<'a> { + type Item = StateID; + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next(&mut self) -> Option { + self.0.next().map(|&id| id) + } +} diff --git a/vendor/regex-automata/src/util/start.rs b/vendor/regex-automata/src/util/start.rs new file mode 100644 index 0000000..2715378 --- /dev/null +++ b/vendor/regex-automata/src/util/start.rs @@ -0,0 +1,479 @@ +/*! +Provides helpers for dealing with start state configurations in DFAs. +*/ + +use crate::util::{ + look::LookMatcher, + search::{Anchored, Input}, + wire::{self, DeserializeError, SerializeError}, +}; + +/// The configuration used to determine a DFA's start state for a search. +/// +/// A DFA has a single starting state in the typical textbook description. That +/// is, it corresponds to the set of all starting states for the NFA that built +/// it, along with their espsilon closures. In this crate, however, DFAs have +/// many possible start states due to a few factors: +/// +/// * DFAs support the ability to run either anchored or unanchored searches. +/// Each type of search needs its own start state. For example, an unanchored +/// search requires starting at a state corresponding to a regex with a +/// `(?s-u:.)*?` prefix, which will match through anything. +/// * DFAs also optionally support starting an anchored search for any one +/// specific pattern. Each such pattern requires its own start state. +/// * If a look-behind assertion like `^` or `\b` is used in the regex, then +/// the DFA will need to inspect a single byte immediately before the start of +/// the search to choose the correct start state. +/// +/// Indeed, this configuration precisely encapsulates all of the above factors. +/// The [`Config::anchored`] method sets which kind of anchored search to +/// perform while the [`Config::look_behind`] method provides a way to set +/// the byte that occurs immediately before the start of the search. +/// +/// Generally speaking, this type is only useful when you want to run searches +/// without using an [`Input`]. In particular, an `Input` wants a haystack +/// slice, but callers may not have a contiguous sequence of bytes as a +/// haystack in all cases. This type provides a lower level of control such +/// that callers can provide their own anchored configuration and look-behind +/// byte explicitly. +/// +/// # Example +/// +/// This shows basic usage that permits running a search with a DFA without +/// using the `Input` abstraction. +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::start, +/// Anchored, +/// }; +/// +/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?; +/// let haystack = "quartz"; +/// +/// let config = start::Config::new().anchored(Anchored::Yes); +/// let mut state = dfa.start_state(&config)?; +/// for &b in haystack.as_bytes().iter() { +/// state = dfa.next_state(state, b); +/// } +/// state = dfa.next_eoi_state(state); +/// assert!(dfa.is_match_state(state)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This example shows how to correctly run a search that doesn't begin at +/// the start of a haystack. Notice how we set the look-behind byte, and as +/// a result, the `\b` assertion does not match. +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::start, +/// Anchored, +/// }; +/// +/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?; +/// let haystack = "quartz"; +/// +/// let config = start::Config::new() +/// .anchored(Anchored::Yes) +/// .look_behind(Some(b'q')); +/// let mut state = dfa.start_state(&config)?; +/// for &b in haystack.as_bytes().iter().skip(1) { +/// state = dfa.next_state(state, b); +/// } +/// state = dfa.next_eoi_state(state); +/// // No match! +/// assert!(!dfa.is_match_state(state)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// If we had instead not set a look-behind byte, then the DFA would assume +/// that it was starting at the beginning of the haystack, and thus `\b` should +/// match. This in turn would result in erroneously reporting a match: +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::start, +/// Anchored, +/// }; +/// +/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?; +/// let haystack = "quartz"; +/// +/// // Whoops, forgot the look-behind byte... +/// let config = start::Config::new().anchored(Anchored::Yes); +/// let mut state = dfa.start_state(&config)?; +/// for &b in haystack.as_bytes().iter().skip(1) { +/// state = dfa.next_state(state, b); +/// } +/// state = dfa.next_eoi_state(state); +/// // And now we get a match unexpectedly. +/// assert!(dfa.is_match_state(state)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Config { + look_behind: Option, + anchored: Anchored, +} + +impl Config { + /// Create a new default start configuration. + /// + /// The default is an unanchored search that starts at the beginning of the + /// haystack. + pub fn new() -> Config { + Config { anchored: Anchored::No, look_behind: None } + } + + /// A convenience routine for building a start configuration from an + /// [`Input`] for a forward search. + /// + /// This automatically sets the look-behind byte to the byte immediately + /// preceding the start of the search. If the start of the search is at + /// offset `0`, then no look-behind byte is set. + pub fn from_input_forward(input: &Input<'_>) -> Config { + let look_behind = input + .start() + .checked_sub(1) + .and_then(|i| input.haystack().get(i).copied()); + Config { look_behind, anchored: input.get_anchored() } + } + + /// A convenience routine for building a start configuration from an + /// [`Input`] for a reverse search. + /// + /// This automatically sets the look-behind byte to the byte immediately + /// following the end of the search. If the end of the search is at + /// offset `haystack.len()`, then no look-behind byte is set. + pub fn from_input_reverse(input: &Input<'_>) -> Config { + let look_behind = input.haystack().get(input.end()).copied(); + Config { look_behind, anchored: input.get_anchored() } + } + + /// Set the look-behind byte at the start of a search. + /// + /// Unless the search is intended to logically start at the beginning of a + /// haystack, this should _always_ be set to the byte immediately preceding + /// the start of the search. If no look-behind byte is set, then the start + /// configuration will assume it is at the beginning of the haystack. For + /// example, the anchor `^` will match. + /// + /// The default is that no look-behind byte is set. + pub fn look_behind(mut self, byte: Option) -> Config { + self.look_behind = byte; + self + } + + /// Set the anchored mode of a search. + /// + /// The default is an unanchored search. + pub fn anchored(mut self, mode: Anchored) -> Config { + self.anchored = mode; + self + } + + /// Return the look-behind byte in this configuration, if one exists. + pub fn get_look_behind(&self) -> Option { + self.look_behind + } + + /// Return the anchored mode in this configuration. + pub fn get_anchored(&self) -> Anchored { + self.anchored + } +} + +/// A map from every possible byte value to its corresponding starting +/// configuration. +/// +/// This map is used in order to lookup the start configuration for a particular +/// position in a haystack. This start configuration is then used in +/// combination with things like the anchored mode and pattern ID to fully +/// determine the start state. +/// +/// Generally speaking, this map is only used for fully compiled DFAs and lazy +/// DFAs. For NFAs (including the one-pass DFA), the start state is generally +/// selected by virtue of traversing the NFA state graph. DFAs do the same +/// thing, but at build time and not search time. (Well, technically the lazy +/// DFA does it at search time, but it does enough work to cache the full +/// result of the epsilon closure that the NFA engines tend to need to do.) +#[derive(Clone)] +pub(crate) struct StartByteMap { + map: [Start; 256], +} + +impl StartByteMap { + /// Create a new map from byte values to their corresponding starting + /// configurations. The map is determined, in part, by how look-around + /// assertions are matched via the matcher given. + pub(crate) fn new(lookm: &LookMatcher) -> StartByteMap { + let mut map = [Start::NonWordByte; 256]; + map[usize::from(b'\n')] = Start::LineLF; + map[usize::from(b'\r')] = Start::LineCR; + map[usize::from(b'_')] = Start::WordByte; + + let mut byte = b'0'; + while byte <= b'9' { + map[usize::from(byte)] = Start::WordByte; + byte += 1; + } + byte = b'A'; + while byte <= b'Z' { + map[usize::from(byte)] = Start::WordByte; + byte += 1; + } + byte = b'a'; + while byte <= b'z' { + map[usize::from(byte)] = Start::WordByte; + byte += 1; + } + + let lineterm = lookm.get_line_terminator(); + // If our line terminator is normal, then it is already handled by + // the LineLF and LineCR configurations. But if it's weird, then we + // overwrite whatever was there before for that terminator with a + // special configuration. The trick here is that if the terminator + // is, say, a word byte like `a`, then callers seeing this start + // configuration need to account for that and build their DFA state as + // if it *also* came from a word byte. + if lineterm != b'\r' && lineterm != b'\n' { + map[usize::from(lineterm)] = Start::CustomLineTerminator; + } + StartByteMap { map } + } + + /// Return the starting configuration for the given look-behind byte. + /// + /// If no look-behind exists, callers should use `Start::Text`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, byte: u8) -> Start { + self.map[usize::from(byte)] + } + + /// Deserializes a byte class map from the given slice. If the slice is of + /// insufficient length or otherwise contains an impossible mapping, then + /// an error is returned. Upon success, the number of bytes read along with + /// the map are returned. The number of bytes read is always a multiple of + /// 8. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(StartByteMap, usize), DeserializeError> { + wire::check_slice_len(slice, 256, "start byte map")?; + let mut map = [Start::NonWordByte; 256]; + for (i, &repr) in slice[..256].iter().enumerate() { + map[i] = match Start::from_usize(usize::from(repr)) { + Some(start) => start, + None => { + return Err(DeserializeError::generic( + "found invalid starting configuration", + )) + } + }; + } + Ok((StartByteMap { map }, 256)) + } + + /// Writes this map to the given byte buffer. if the given buffer is too + /// small, then an error is returned. Upon success, the total number of + /// bytes written is returned. The number of bytes written is guaranteed to + /// be a multiple of 8. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("start byte map")); + } + for (i, &start) in self.map.iter().enumerate() { + dst[i] = start.as_u8(); + } + Ok(nwrite) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 256 + } +} + +impl core::fmt::Debug for StartByteMap { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use crate::util::escape::DebugByte; + + write!(f, "StartByteMap{{")?; + for byte in 0..=255 { + if byte > 0 { + write!(f, ", ")?; + } + let start = self.map[usize::from(byte)]; + write!(f, "{:?} => {:?}", DebugByte(byte), start)?; + } + write!(f, "}}")?; + Ok(()) + } +} + +/// Represents the six possible starting configurations of a DFA search. +/// +/// The starting configuration is determined by inspecting the the beginning +/// of the haystack (up to 1 byte). Ultimately, this along with a pattern ID +/// (if specified) and the type of search (anchored or not) is what selects the +/// start state to use in a DFA. +/// +/// As one example, if a DFA only supports unanchored searches and does not +/// support anchored searches for each pattern, then it will have at most 6 +/// distinct start states. (Some start states may be reused if determinization +/// can determine that they will be equivalent.) If the DFA supports both +/// anchored and unanchored searches, then it will have a maximum of 12 +/// distinct start states. Finally, if the DFA also supports anchored searches +/// for each pattern, then it can have up to `12 + (N * 6)` start states, where +/// `N` is the number of patterns. +/// +/// Handling each of these starting configurations in the context of DFA +/// determinization can be *quite* tricky and subtle. But the code is small +/// and can be found at `crate::util::determinize::set_lookbehind_from_start`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum Start { + /// This occurs when the starting position is not any of the ones below. + NonWordByte = 0, + /// This occurs when the byte immediately preceding the start of the search + /// is an ASCII word byte. + WordByte = 1, + /// This occurs when the starting position of the search corresponds to the + /// beginning of the haystack. + Text = 2, + /// This occurs when the byte immediately preceding the start of the search + /// is a line terminator. Specifically, `\n`. + LineLF = 3, + /// This occurs when the byte immediately preceding the start of the search + /// is a line terminator. Specifically, `\r`. + LineCR = 4, + /// This occurs when a custom line terminator has been set via a + /// `LookMatcher`, and when that line terminator is neither a `\r` or a + /// `\n`. + /// + /// If the custom line terminator is a word byte, then this start + /// configuration is still selected. DFAs that implement word boundary + /// assertions will likely need to check whether the custom line terminator + /// is a word byte, in which case, it should behave as if the byte + /// satisfies `\b` in addition to multi-line anchors. + CustomLineTerminator = 5, +} + +impl Start { + /// Return the starting state corresponding to the given integer. If no + /// starting state exists for the given integer, then None is returned. + pub(crate) fn from_usize(n: usize) -> Option { + match n { + 0 => Some(Start::NonWordByte), + 1 => Some(Start::WordByte), + 2 => Some(Start::Text), + 3 => Some(Start::LineLF), + 4 => Some(Start::LineCR), + 5 => Some(Start::CustomLineTerminator), + _ => None, + } + } + + /// Returns the total number of starting state configurations. + pub(crate) fn len() -> usize { + 6 + } + + /// Return this starting configuration as `u8` integer. It is guaranteed to + /// be less than `Start::len()`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn as_u8(&self) -> u8 { + // AFAIK, 'as' is the only way to zero-cost convert an int enum to an + // actual int. + *self as u8 + } + + /// Return this starting configuration as a `usize` integer. It is + /// guaranteed to be less than `Start::len()`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn as_usize(&self) -> usize { + usize::from(self.as_u8()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn start_fwd_done_range() { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new("").range(1..0); + let config = Config::from_input_forward(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + assert_eq!(Start::Text, start); + } + + #[test] + fn start_rev_done_range() { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new("").range(1..0); + let config = Config::from_input_reverse(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + assert_eq!(Start::Text, start); + } + + #[test] + fn start_fwd() { + let f = |haystack, start, end| { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new(haystack).range(start..end); + let config = Config::from_input_forward(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + start + }; + + assert_eq!(Start::Text, f("", 0, 0)); + assert_eq!(Start::Text, f("abc", 0, 3)); + assert_eq!(Start::Text, f("\nabc", 0, 3)); + + assert_eq!(Start::LineLF, f("\nabc", 1, 3)); + + assert_eq!(Start::LineCR, f("\rabc", 1, 3)); + + assert_eq!(Start::WordByte, f("abc", 1, 3)); + + assert_eq!(Start::NonWordByte, f(" abc", 1, 3)); + } + + #[test] + fn start_rev() { + let f = |haystack, start, end| { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new(haystack).range(start..end); + let config = Config::from_input_reverse(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + start + }; + + assert_eq!(Start::Text, f("", 0, 0)); + assert_eq!(Start::Text, f("abc", 0, 3)); + assert_eq!(Start::Text, f("abc\n", 0, 4)); + + assert_eq!(Start::LineLF, f("abc\nz", 0, 3)); + + assert_eq!(Start::LineCR, f("abc\rz", 0, 3)); + + assert_eq!(Start::WordByte, f("abc", 0, 2)); + + assert_eq!(Start::NonWordByte, f("abc ", 0, 3)); + } +} diff --git a/vendor/regex-automata/src/util/syntax.rs b/vendor/regex-automata/src/util/syntax.rs new file mode 100644 index 0000000..78e3cf9 --- /dev/null +++ b/vendor/regex-automata/src/util/syntax.rs @@ -0,0 +1,482 @@ +/*! +Utilities for dealing with the syntax of a regular expression. + +This module currently only exposes a [`Config`] type that +itself represents a wrapper around the configuration for a +[`regex-syntax::ParserBuilder`](regex_syntax::ParserBuilder). The purpose of +this wrapper is to make configuring syntax options very similar to how other +configuration is done throughout this crate. Namely, instead of duplicating +syntax options across every builder (of which there are many), we instead +create small config objects like this one that can be passed around and +composed. +*/ + +use alloc::{vec, vec::Vec}; + +use regex_syntax::{ + ast, + hir::{self, Hir}, + Error, ParserBuilder, +}; + +/// A convenience routine for parsing a pattern into an HIR value with the +/// default configuration. +/// +/// # Example +/// +/// This shows how to parse a pattern into an HIR value: +/// +/// ``` +/// use regex_automata::util::syntax; +/// +/// let hir = syntax::parse(r"([a-z]+)|([0-9]+)")?; +/// assert_eq!(Some(1), hir.properties().static_explicit_captures_len()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse(pattern: &str) -> Result { + parse_with(pattern, &Config::default()) +} + +/// A convenience routine for parsing many patterns into HIR value with the +/// default configuration. +/// +/// # Example +/// +/// This shows how to parse many patterns into an corresponding HIR values: +/// +/// ``` +/// use { +/// regex_automata::util::syntax, +/// regex_syntax::hir::Properties, +/// }; +/// +/// let hirs = syntax::parse_many(&[ +/// r"([a-z]+)|([0-9]+)", +/// r"foo(A-Z]+)bar", +/// ])?; +/// let props = Properties::union(hirs.iter().map(|h| h.properties())); +/// assert_eq!(Some(1), props.static_explicit_captures_len()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse_many>(patterns: &[P]) -> Result, Error> { + parse_many_with(patterns, &Config::default()) +} + +/// A convenience routine for parsing a pattern into an HIR value using a +/// `Config`. +/// +/// # Example +/// +/// This shows how to parse a pattern into an HIR value with a non-default +/// configuration: +/// +/// ``` +/// use regex_automata::util::syntax; +/// +/// let hir = syntax::parse_with( +/// r"^[a-z]+$", +/// &syntax::Config::new().multi_line(true).crlf(true), +/// )?; +/// assert!(hir.properties().look_set().contains_anchor_crlf()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse_with(pattern: &str, config: &Config) -> Result { + let mut builder = ParserBuilder::new(); + config.apply(&mut builder); + builder.build().parse(pattern) +} + +/// A convenience routine for parsing many patterns into HIR values using a +/// `Config`. +/// +/// # Example +/// +/// This shows how to parse many patterns into an corresponding HIR values +/// with a non-default configuration: +/// +/// ``` +/// use { +/// regex_automata::util::syntax, +/// regex_syntax::hir::Properties, +/// }; +/// +/// let patterns = &[ +/// r"([a-z]+)|([0-9]+)", +/// r"\W", +/// r"foo(A-Z]+)bar", +/// ]; +/// let config = syntax::Config::new().unicode(false).utf8(false); +/// let hirs = syntax::parse_many_with(patterns, &config)?; +/// let props = Properties::union(hirs.iter().map(|h| h.properties())); +/// assert!(!props.is_utf8()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse_many_with>( + patterns: &[P], + config: &Config, +) -> Result, Error> { + let mut builder = ParserBuilder::new(); + config.apply(&mut builder); + let mut hirs = vec![]; + for p in patterns.iter() { + hirs.push(builder.build().parse(p.as_ref())?); + } + Ok(hirs) +} + +/// A common set of configuration options that apply to the syntax of a regex. +/// +/// This represents a group of configuration options that specifically apply +/// to how the concrete syntax of a regular expression is interpreted. In +/// particular, they are generally forwarded to the +/// [`ParserBuilder`](https://docs.rs/regex-syntax/*/regex_syntax/struct.ParserBuilder.html) +/// in the +/// [`regex-syntax`](https://docs.rs/regex-syntax) +/// crate when building a regex from its concrete syntax directly. +/// +/// These options are defined as a group since they apply to every regex engine +/// in this crate. Instead of re-defining them on every engine's builder, they +/// are instead provided here as one cohesive unit. +#[derive(Clone, Copy, Debug)] +pub struct Config { + case_insensitive: bool, + multi_line: bool, + dot_matches_new_line: bool, + crlf: bool, + line_terminator: u8, + swap_greed: bool, + ignore_whitespace: bool, + unicode: bool, + utf8: bool, + nest_limit: u32, + octal: bool, +} + +impl Config { + /// Return a new default syntax configuration. + pub fn new() -> Config { + // These defaults match the ones used in regex-syntax. + Config { + case_insensitive: false, + multi_line: false, + dot_matches_new_line: false, + crlf: false, + line_terminator: b'\n', + swap_greed: false, + ignore_whitespace: false, + unicode: true, + utf8: true, + nest_limit: 250, + octal: false, + } + } + + /// Enable or disable the case insensitive flag by default. + /// + /// When Unicode mode is enabled, case insensitivity is Unicode-aware. + /// Specifically, it will apply the "simple" case folding rules as + /// specified by Unicode. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `i` flag. + pub fn case_insensitive(mut self, yes: bool) -> Config { + self.case_insensitive = yes; + self + } + + /// Enable or disable the multi-line matching flag by default. + /// + /// When this is enabled, the `^` and `$` look-around assertions will + /// match immediately after and immediately before a new line character, + /// respectively. Note that the `\A` and `\z` look-around assertions are + /// unaffected by this setting and always correspond to matching at the + /// beginning and end of the input. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `m` flag. + pub fn multi_line(mut self, yes: bool) -> Config { + self.multi_line = yes; + self + } + + /// Enable or disable the "dot matches any character" flag by default. + /// + /// When this is enabled, `.` will match any character. When it's disabled, + /// then `.` will match any character except for a new line character. + /// + /// Note that `.` is impacted by whether the "unicode" setting is enabled + /// or not. When Unicode is enabled (the default), `.` will match any UTF-8 + /// encoding of any Unicode scalar value (sans a new line, depending on + /// whether this "dot matches new line" option is enabled). When Unicode + /// mode is disabled, `.` will match any byte instead. Because of this, + /// when Unicode mode is disabled, `.` can only be used when the "allow + /// invalid UTF-8" option is enabled, since `.` could otherwise match + /// invalid UTF-8. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `s` flag. + pub fn dot_matches_new_line(mut self, yes: bool) -> Config { + self.dot_matches_new_line = yes; + self + } + + /// Enable or disable the "CRLF mode" flag by default. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `R` flag. + /// + /// When CRLF mode is enabled, the following happens: + /// + /// * Unless `dot_matches_new_line` is enabled, `.` will match any character + /// except for `\r` and `\n`. + /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`, + /// `\r` and `\n` as line terminators. And in particular, neither will + /// match between a `\r` and a `\n`. + pub fn crlf(mut self, yes: bool) -> Config { + self.crlf = yes; + self + } + + /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. + /// + /// Namely, instead of `.` (by default) matching everything except for `\n`, + /// this will cause `.` to match everything except for the byte given. + /// + /// If `.` is used in a context where Unicode mode is enabled and this byte + /// isn't ASCII, then an error will be returned. When Unicode mode is + /// disabled, then any byte is permitted, but will return an error if UTF-8 + /// mode is enabled and it is a non-ASCII byte. + /// + /// In short, any ASCII value for a line terminator is always okay. But a + /// non-ASCII byte might result in an error depending on whether Unicode + /// mode or UTF-8 mode are enabled. + /// + /// Note that if `R` mode is enabled then it always takes precedence and + /// the line terminator will be treated as `\r` and `\n` simultaneously. + /// + /// Note also that this *doesn't* impact the look-around assertions + /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional + /// configuration in the regex engine itself. + pub fn line_terminator(mut self, byte: u8) -> Config { + self.line_terminator = byte; + self + } + + /// Enable or disable the "swap greed" flag by default. + /// + /// When this is enabled, `.*` (for example) will become ungreedy and `.*?` + /// will become greedy. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `U` flag. + pub fn swap_greed(mut self, yes: bool) -> Config { + self.swap_greed = yes; + self + } + + /// Enable verbose mode in the regular expression. + /// + /// When enabled, verbose mode permits insigificant whitespace in many + /// places in the regular expression, as well as comments. Comments are + /// started using `#` and continue until the end of the line. + /// + /// By default, this is disabled. It may be selectively enabled in the + /// regular expression by using the `x` flag regardless of this setting. + pub fn ignore_whitespace(mut self, yes: bool) -> Config { + self.ignore_whitespace = yes; + self + } + + /// Enable or disable the Unicode flag (`u`) by default. + /// + /// By default this is **enabled**. It may alternatively be selectively + /// disabled in the regular expression itself via the `u` flag. + /// + /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by + /// default), a regular expression will fail to parse if Unicode mode is + /// disabled and a sub-expression could possibly match invalid UTF-8. + /// + /// **WARNING**: Unicode mode can greatly increase the size of the compiled + /// DFA, which can noticeably impact both memory usage and compilation + /// time. This is especially noticeable if your regex contains character + /// classes like `\w` that are impacted by whether Unicode is enabled or + /// not. If Unicode is not necessary, you are encouraged to disable it. + pub fn unicode(mut self, yes: bool) -> Config { + self.unicode = yes; + self + } + + /// When disabled, the builder will permit the construction of a regular + /// expression that may match invalid UTF-8. + /// + /// For example, when [`Config::unicode`] is disabled, then + /// expressions like `[^a]` may match invalid UTF-8 since they can match + /// any single byte that is not `a`. By default, these sub-expressions + /// are disallowed to avoid returning offsets that split a UTF-8 + /// encoded codepoint. However, in cases where matching at arbitrary + /// locations is desired, this option can be disabled to permit all such + /// sub-expressions. + /// + /// When enabled (the default), the builder is guaranteed to produce a + /// regex that will only ever match valid UTF-8 (otherwise, the builder + /// will return an error). + pub fn utf8(mut self, yes: bool) -> Config { + self.utf8 = yes; + self + } + + /// Set the nesting limit used for the regular expression parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is allowed + /// to be. If the AST exceeds the given limit (e.g., with too many nested + /// groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow when building a finite automaton from a regular expression's + /// abstract syntax tree. In particular, construction currently uses + /// recursion. In the future, the implementation may stop using recursion + /// and this option will no longer be necessary. + /// + /// This limit is not checked until the entire AST is parsed. Therefore, + /// if callers want to put a limit on the amount of heap space used, then + /// they should impose a limit on the length, in bytes, of the concrete + /// pattern string. In particular, this is viable since the parser will + /// limit itself to heap space proportional to the length of the pattern + /// string. + /// + /// Note that a nest limit of `0` will return a nest limit error for most + /// patterns but not all. For example, a nest limit of `0` permits `a` but + /// not `ab`, since `ab` requires a concatenation AST item, which results + /// in a nest depth of `1`. In general, a nest limit is not something that + /// manifests in an obvious way in the concrete syntax, therefore, it + /// should not be used in a granular way. + pub fn nest_limit(mut self, limit: u32) -> Config { + self.nest_limit = limit; + self + } + + /// Whether to support octal syntax or not. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints in + /// a regular expression. For example, `a`, `\x61`, `\u0061` and + /// `\141` are all equivalent regular expressions, where the last example + /// shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, it does + /// make good error messages harder. That is, in PCRE based regex engines, + /// syntax like `\1` invokes a backreference, which is explicitly + /// unsupported in Rust's regex engine. However, many users expect it to + /// be supported. Therefore, when octal support is disabled, the error + /// message will explicitly mention that backreferences aren't supported. + /// + /// Octal syntax is disabled by default. + pub fn octal(mut self, yes: bool) -> Config { + self.octal = yes; + self + } + + /// Returns whether "unicode" mode is enabled. + pub fn get_unicode(&self) -> bool { + self.unicode + } + + /// Returns whether "case insensitive" mode is enabled. + pub fn get_case_insensitive(&self) -> bool { + self.case_insensitive + } + + /// Returns whether "multi line" mode is enabled. + pub fn get_multi_line(&self) -> bool { + self.multi_line + } + + /// Returns whether "dot matches new line" mode is enabled. + pub fn get_dot_matches_new_line(&self) -> bool { + self.dot_matches_new_line + } + + /// Returns whether "CRLF" mode is enabled. + pub fn get_crlf(&self) -> bool { + self.crlf + } + + /// Returns the line terminator in this syntax configuration. + pub fn get_line_terminator(&self) -> u8 { + self.line_terminator + } + + /// Returns whether "swap greed" mode is enabled. + pub fn get_swap_greed(&self) -> bool { + self.swap_greed + } + + /// Returns whether "ignore whitespace" mode is enabled. + pub fn get_ignore_whitespace(&self) -> bool { + self.ignore_whitespace + } + + /// Returns whether UTF-8 mode is enabled. + pub fn get_utf8(&self) -> bool { + self.utf8 + } + + /// Returns the "nest limit" setting. + pub fn get_nest_limit(&self) -> u32 { + self.nest_limit + } + + /// Returns whether "octal" mode is enabled. + pub fn get_octal(&self) -> bool { + self.octal + } + + /// Applies this configuration to the given parser. + pub(crate) fn apply(&self, builder: &mut ParserBuilder) { + builder + .unicode(self.unicode) + .case_insensitive(self.case_insensitive) + .multi_line(self.multi_line) + .dot_matches_new_line(self.dot_matches_new_line) + .crlf(self.crlf) + .line_terminator(self.line_terminator) + .swap_greed(self.swap_greed) + .ignore_whitespace(self.ignore_whitespace) + .utf8(self.utf8) + .nest_limit(self.nest_limit) + .octal(self.octal); + } + + /// Applies this configuration to the given AST parser. + pub(crate) fn apply_ast(&self, builder: &mut ast::parse::ParserBuilder) { + builder + .ignore_whitespace(self.ignore_whitespace) + .nest_limit(self.nest_limit) + .octal(self.octal); + } + + /// Applies this configuration to the given AST-to-HIR translator. + pub(crate) fn apply_hir( + &self, + builder: &mut hir::translate::TranslatorBuilder, + ) { + builder + .unicode(self.unicode) + .case_insensitive(self.case_insensitive) + .multi_line(self.multi_line) + .crlf(self.crlf) + .dot_matches_new_line(self.dot_matches_new_line) + .line_terminator(self.line_terminator) + .swap_greed(self.swap_greed) + .utf8(self.utf8); + } +} + +impl Default for Config { + fn default() -> Config { + Config::new() + } +} diff --git a/vendor/regex-automata/src/util/unicode_data/mod.rs b/vendor/regex-automata/src/util/unicode_data/mod.rs new file mode 100644 index 0000000..fc7b1c7 --- /dev/null +++ b/vendor/regex-automata/src/util/unicode_data/mod.rs @@ -0,0 +1,17 @@ +// This cfg should match the one in src/util/look.rs that uses perl_word. +#[cfg(all( + // We have to explicitly want to support Unicode word boundaries. + feature = "unicode-word-boundary", + not(all( + // If we don't have regex-syntax at all, then we definitely need to + // bring our own \w data table. + feature = "syntax", + // If unicode-perl is enabled, then regex-syntax/unicode-perl is + // also enabled, which in turn means we can use regex-syntax's + // is_word_character routine (and thus use its data tables). But if + // unicode-perl is not enabled, even if syntax is, then we need to + // bring our own. + feature = "unicode-perl", + )), +))] +pub(crate) mod perl_word; diff --git a/vendor/regex-automata/src/util/unicode_data/perl_word.rs b/vendor/regex-automata/src/util/unicode_data/perl_word.rs new file mode 100644 index 0000000..74d6265 --- /dev/null +++ b/vendor/regex-automata/src/util/unicode_data/perl_word.rs @@ -0,0 +1,781 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate perl-word tmp/ucd-15.0.0/ --chars +// +// Unicode version: 15.0.0. +// +// ucd-generate 0.2.15 is available on crates.io. + +pub const PERL_WORD: &'static [(char, char)] = &[ + ('0', '9'), + ('A', 'Z'), + ('_', '_'), + ('a', 'z'), + ('ª', 'ª'), + ('µ', 'µ'), + ('º', 'º'), + ('À', 'Ö'), + ('Ø', 'ö'), + ('ø', 'ˁ'), + ('ˆ', 'ˑ'), + ('ˠ', 'ˤ'), + ('ˬ', 'ˬ'), + ('ˮ', 'ˮ'), + ('\u{300}', 'ʹ'), + ('Ͷ', 'ͷ'), + ('ͺ', 'ͽ'), + ('Ϳ', 'Ϳ'), + ('Ά', 'Ά'), + ('Έ', 'Ί'), + ('Ό', 'Ό'), + ('Ύ', 'Ρ'), + ('Σ', 'ϵ'), + ('Ϸ', 'ҁ'), + ('\u{483}', 'ԯ'), + ('Ա', 'Ֆ'), + ('ՙ', 'ՙ'), + ('ՠ', 'ֈ'), + ('\u{591}', '\u{5bd}'), + ('\u{5bf}', '\u{5bf}'), + ('\u{5c1}', '\u{5c2}'), + ('\u{5c4}', '\u{5c5}'), + ('\u{5c7}', '\u{5c7}'), + ('א', 'ת'), + ('ׯ', 'ײ'), + ('\u{610}', '\u{61a}'), + ('ؠ', '٩'), + ('ٮ', 'ۓ'), + ('ە', '\u{6dc}'), + ('\u{6df}', '\u{6e8}'), + ('\u{6ea}', 'ۼ'), + ('ۿ', 'ۿ'), + ('ܐ', '\u{74a}'), + ('ݍ', 'ޱ'), + ('߀', 'ߵ'), + ('ߺ', 'ߺ'), + ('\u{7fd}', '\u{7fd}'), + ('ࠀ', '\u{82d}'), + ('ࡀ', '\u{85b}'), + ('ࡠ', 'ࡪ'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('\u{898}', '\u{8e1}'), + ('\u{8e3}', '\u{963}'), + ('०', '९'), + ('ॱ', 'ঃ'), + ('অ', 'ঌ'), + ('এ', 'ঐ'), + ('ও', 'ন'), + ('প', 'র'), + ('ল', 'ল'), + ('শ', 'হ'), + ('\u{9bc}', '\u{9c4}'), + ('ে', 'ৈ'), + ('ো', 'ৎ'), + ('\u{9d7}', '\u{9d7}'), + ('ড়', 'ঢ়'), + ('য়', '\u{9e3}'), + ('০', 'ৱ'), + ('ৼ', 'ৼ'), + ('\u{9fe}', '\u{9fe}'), + ('\u{a01}', 'ਃ'), + ('ਅ', 'ਊ'), + ('ਏ', 'ਐ'), + ('ਓ', 'ਨ'), + ('ਪ', 'ਰ'), + ('ਲ', 'ਲ਼'), + ('ਵ', 'ਸ਼'), + ('ਸ', 'ਹ'), + ('\u{a3c}', '\u{a3c}'), + ('ਾ', '\u{a42}'), + ('\u{a47}', '\u{a48}'), + ('\u{a4b}', '\u{a4d}'), + ('\u{a51}', '\u{a51}'), + ('ਖ਼', 'ੜ'), + ('ਫ਼', 'ਫ਼'), + ('੦', '\u{a75}'), + ('\u{a81}', 'ઃ'), + ('અ', 'ઍ'), + ('એ', 'ઑ'), + ('ઓ', 'ન'), + ('પ', 'ર'), + ('લ', 'ળ'), + ('વ', 'હ'), + ('\u{abc}', '\u{ac5}'), + ('\u{ac7}', 'ૉ'), + ('ો', '\u{acd}'), + ('ૐ', 'ૐ'), + ('ૠ', '\u{ae3}'), + ('૦', '૯'), + ('ૹ', '\u{aff}'), + ('\u{b01}', 'ଃ'), + ('ଅ', 'ଌ'), + ('ଏ', 'ଐ'), + ('ଓ', 'ନ'), + ('ପ', 'ର'), + ('ଲ', 'ଳ'), + ('ଵ', 'ହ'), + ('\u{b3c}', '\u{b44}'), + ('େ', 'ୈ'), + ('ୋ', '\u{b4d}'), + ('\u{b55}', '\u{b57}'), + ('ଡ଼', 'ଢ଼'), + ('ୟ', '\u{b63}'), + ('୦', '୯'), + ('ୱ', 'ୱ'), + ('\u{b82}', 'ஃ'), + ('அ', 'ஊ'), + ('எ', 'ஐ'), + ('ஒ', 'க'), + ('ங', 'ச'), + ('ஜ', 'ஜ'), + ('ஞ', 'ட'), + ('ண', 'த'), + ('ந', 'ப'), + ('ம', 'ஹ'), + ('\u{bbe}', 'ூ'), + ('ெ', 'ை'), + ('ொ', '\u{bcd}'), + ('ௐ', 'ௐ'), + ('\u{bd7}', '\u{bd7}'), + ('௦', '௯'), + ('\u{c00}', 'ఌ'), + ('ఎ', 'ఐ'), + ('ఒ', 'న'), + ('ప', 'హ'), + ('\u{c3c}', 'ౄ'), + ('\u{c46}', '\u{c48}'), + ('\u{c4a}', '\u{c4d}'), + ('\u{c55}', '\u{c56}'), + ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), + ('ౠ', '\u{c63}'), + ('౦', '౯'), + ('ಀ', 'ಃ'), + ('ಅ', 'ಌ'), + ('ಎ', 'ಐ'), + ('ಒ', 'ನ'), + ('ಪ', 'ಳ'), + ('ವ', 'ಹ'), + ('\u{cbc}', 'ೄ'), + ('\u{cc6}', 'ೈ'), + ('ೊ', '\u{ccd}'), + ('\u{cd5}', '\u{cd6}'), + ('ೝ', 'ೞ'), + ('ೠ', '\u{ce3}'), + ('೦', '೯'), + ('ೱ', 'ೳ'), + ('\u{d00}', 'ഌ'), + ('എ', 'ഐ'), + ('ഒ', '\u{d44}'), + ('െ', 'ൈ'), + ('ൊ', 'ൎ'), + ('ൔ', '\u{d57}'), + ('ൟ', '\u{d63}'), + ('൦', '൯'), + ('ൺ', 'ൿ'), + ('\u{d81}', 'ඃ'), + ('අ', 'ඖ'), + ('ක', 'න'), + ('ඳ', 'ර'), + ('ල', 'ල'), + ('ව', 'ෆ'), + ('\u{dca}', '\u{dca}'), + ('\u{dcf}', '\u{dd4}'), + ('\u{dd6}', '\u{dd6}'), + ('ෘ', '\u{ddf}'), + ('෦', '෯'), + ('ෲ', 'ෳ'), + ('ก', '\u{e3a}'), + ('เ', '\u{e4e}'), + ('๐', '๙'), + ('ກ', 'ຂ'), + ('ຄ', 'ຄ'), + ('ຆ', 'ຊ'), + ('ຌ', 'ຣ'), + ('ລ', 'ລ'), + ('ວ', 'ຽ'), + ('ເ', 'ໄ'), + ('ໆ', 'ໆ'), + ('\u{ec8}', '\u{ece}'), + ('໐', '໙'), + ('ໜ', 'ໟ'), + ('ༀ', 'ༀ'), + ('\u{f18}', '\u{f19}'), + ('༠', '༩'), + ('\u{f35}', '\u{f35}'), + ('\u{f37}', '\u{f37}'), + ('\u{f39}', '\u{f39}'), + ('༾', 'ཇ'), + ('ཉ', 'ཬ'), + ('\u{f71}', '\u{f84}'), + ('\u{f86}', '\u{f97}'), + ('\u{f99}', '\u{fbc}'), + ('\u{fc6}', '\u{fc6}'), + ('က', '၉'), + ('ၐ', '\u{109d}'), + ('Ⴀ', 'Ⴥ'), + ('Ⴧ', 'Ⴧ'), + ('Ⴭ', 'Ⴭ'), + ('ა', 'ჺ'), + ('ჼ', 'ቈ'), + ('ቊ', 'ቍ'), + ('ቐ', 'ቖ'), + ('ቘ', 'ቘ'), + ('ቚ', 'ቝ'), + ('በ', 'ኈ'), + ('ኊ', 'ኍ'), + ('ነ', 'ኰ'), + ('ኲ', 'ኵ'), + ('ኸ', 'ኾ'), + ('ዀ', 'ዀ'), + ('ዂ', 'ዅ'), + ('ወ', 'ዖ'), + ('ዘ', 'ጐ'), + ('ጒ', 'ጕ'), + ('ጘ', 'ፚ'), + ('\u{135d}', '\u{135f}'), + ('ᎀ', 'ᎏ'), + ('Ꭰ', 'Ᏽ'), + ('ᏸ', 'ᏽ'), + ('ᐁ', 'ᙬ'), + ('ᙯ', 'ᙿ'), + ('ᚁ', 'ᚚ'), + ('ᚠ', 'ᛪ'), + ('ᛮ', 'ᛸ'), + ('ᜀ', '᜕'), + ('ᜟ', '᜴'), + ('ᝀ', '\u{1753}'), + ('ᝠ', 'ᝬ'), + ('ᝮ', 'ᝰ'), + ('\u{1772}', '\u{1773}'), + ('ក', '\u{17d3}'), + ('ៗ', 'ៗ'), + ('ៜ', '\u{17dd}'), + ('០', '៩'), + ('\u{180b}', '\u{180d}'), + ('\u{180f}', '᠙'), + ('ᠠ', 'ᡸ'), + ('ᢀ', 'ᢪ'), + ('ᢰ', 'ᣵ'), + ('ᤀ', 'ᤞ'), + ('\u{1920}', 'ᤫ'), + ('ᤰ', '\u{193b}'), + ('᥆', 'ᥭ'), + ('ᥰ', 'ᥴ'), + ('ᦀ', 'ᦫ'), + ('ᦰ', 'ᧉ'), + ('᧐', '᧙'), + ('ᨀ', '\u{1a1b}'), + ('ᨠ', '\u{1a5e}'), + ('\u{1a60}', '\u{1a7c}'), + ('\u{1a7f}', '᪉'), + ('᪐', '᪙'), + ('ᪧ', 'ᪧ'), + ('\u{1ab0}', '\u{1ace}'), + ('\u{1b00}', 'ᭌ'), + ('᭐', '᭙'), + ('\u{1b6b}', '\u{1b73}'), + ('\u{1b80}', '᯳'), + ('ᰀ', '\u{1c37}'), + ('᱀', '᱉'), + ('ᱍ', 'ᱽ'), + ('ᲀ', 'ᲈ'), + ('Ა', 'Ჺ'), + ('Ჽ', 'Ჿ'), + ('\u{1cd0}', '\u{1cd2}'), + ('\u{1cd4}', 'ᳺ'), + ('ᴀ', 'ἕ'), + ('Ἐ', 'Ἕ'), + ('ἠ', 'ὅ'), + ('Ὀ', 'Ὅ'), + ('ὐ', 'ὗ'), + ('Ὑ', 'Ὑ'), + ('Ὓ', 'Ὓ'), + ('Ὕ', 'Ὕ'), + ('Ὗ', 'ώ'), + ('ᾀ', 'ᾴ'), + ('ᾶ', 'ᾼ'), + ('ι', 'ι'), + ('ῂ', 'ῄ'), + ('ῆ', 'ῌ'), + ('ῐ', 'ΐ'), + ('ῖ', 'Ί'), + ('ῠ', 'Ῥ'), + ('ῲ', 'ῴ'), + ('ῶ', 'ῼ'), + ('\u{200c}', '\u{200d}'), + ('‿', '⁀'), + ('⁔', '⁔'), + ('ⁱ', 'ⁱ'), + ('ⁿ', 'ⁿ'), + ('ₐ', 'ₜ'), + ('\u{20d0}', '\u{20f0}'), + ('ℂ', 'ℂ'), + ('ℇ', 'ℇ'), + ('ℊ', 'ℓ'), + ('ℕ', 'ℕ'), + ('ℙ', 'ℝ'), + ('ℤ', 'ℤ'), + ('Ω', 'Ω'), + ('ℨ', 'ℨ'), + ('K', 'ℭ'), + ('ℯ', 'ℹ'), + ('ℼ', 'ℿ'), + ('ⅅ', 'ⅉ'), + ('ⅎ', 'ⅎ'), + ('Ⅰ', 'ↈ'), + ('Ⓐ', 'ⓩ'), + ('Ⰰ', 'ⳤ'), + ('Ⳬ', 'ⳳ'), + ('ⴀ', 'ⴥ'), + ('ⴧ', 'ⴧ'), + ('ⴭ', 'ⴭ'), + ('ⴰ', 'ⵧ'), + ('ⵯ', 'ⵯ'), + ('\u{2d7f}', 'ⶖ'), + ('ⶠ', 'ⶦ'), + ('ⶨ', 'ⶮ'), + ('ⶰ', 'ⶶ'), + ('ⶸ', 'ⶾ'), + ('ⷀ', 'ⷆ'), + ('ⷈ', 'ⷎ'), + ('ⷐ', 'ⷖ'), + ('ⷘ', 'ⷞ'), + ('\u{2de0}', '\u{2dff}'), + ('ⸯ', 'ⸯ'), + ('々', '〇'), + ('〡', '\u{302f}'), + ('〱', '〵'), + ('〸', '〼'), + ('ぁ', 'ゖ'), + ('\u{3099}', '\u{309a}'), + ('ゝ', 'ゟ'), + ('ァ', 'ヺ'), + ('ー', 'ヿ'), + ('ㄅ', 'ㄯ'), + ('ㄱ', 'ㆎ'), + ('ㆠ', 'ㆿ'), + ('ㇰ', 'ㇿ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), + ('ꓐ', 'ꓽ'), + ('ꔀ', 'ꘌ'), + ('ꘐ', 'ꘫ'), + ('Ꙁ', '\u{a672}'), + ('\u{a674}', '\u{a67d}'), + ('ꙿ', '\u{a6f1}'), + ('ꜗ', 'ꜟ'), + ('Ꜣ', 'ꞈ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠧ'), + ('\u{a82c}', '\u{a82c}'), + ('ꡀ', 'ꡳ'), + ('ꢀ', '\u{a8c5}'), + ('꣐', '꣙'), + ('\u{a8e0}', 'ꣷ'), + ('ꣻ', 'ꣻ'), + ('ꣽ', '\u{a92d}'), + ('ꤰ', '꥓'), + ('ꥠ', 'ꥼ'), + ('\u{a980}', '꧀'), + ('ꧏ', '꧙'), + ('ꧠ', 'ꧾ'), + ('ꨀ', '\u{aa36}'), + ('ꩀ', 'ꩍ'), + ('꩐', '꩙'), + ('ꩠ', 'ꩶ'), + ('ꩺ', 'ꫂ'), + ('ꫛ', 'ꫝ'), + ('ꫠ', 'ꫯ'), + ('ꫲ', '\u{aaf6}'), + ('ꬁ', 'ꬆ'), + ('ꬉ', 'ꬎ'), + ('ꬑ', 'ꬖ'), + ('ꬠ', 'ꬦ'), + ('ꬨ', 'ꬮ'), + ('ꬰ', 'ꭚ'), + ('ꭜ', 'ꭩ'), + ('ꭰ', 'ꯪ'), + ('꯬', '\u{abed}'), + ('꯰', '꯹'), + ('가', '힣'), + ('ힰ', 'ퟆ'), + ('ퟋ', 'ퟻ'), + ('豈', '舘'), + ('並', '龎'), + ('ff', 'st'), + ('ﬓ', 'ﬗ'), + ('יִ', 'ﬨ'), + ('שׁ', 'זּ'), + ('טּ', 'לּ'), + ('מּ', 'מּ'), + ('נּ', 'סּ'), + ('ףּ', 'פּ'), + ('צּ', 'ﮱ'), + ('ﯓ', 'ﴽ'), + ('ﵐ', 'ﶏ'), + ('ﶒ', 'ﷇ'), + ('ﷰ', 'ﷻ'), + ('\u{fe00}', '\u{fe0f}'), + ('\u{fe20}', '\u{fe2f}'), + ('︳', '︴'), + ('﹍', '﹏'), + ('ﹰ', 'ﹴ'), + ('ﹶ', 'ﻼ'), + ('0', '9'), + ('A', 'Z'), + ('_', '_'), + ('a', 'z'), + ('ヲ', 'ᄒ'), + ('ᅡ', 'ᅦ'), + ('ᅧ', 'ᅬ'), + ('ᅭ', 'ᅲ'), + ('ᅳ', 'ᅵ'), + ('𐀀', '𐀋'), + ('𐀍', '𐀦'), + ('𐀨', '𐀺'), + ('𐀼', '𐀽'), + ('𐀿', '𐁍'), + ('𐁐', '𐁝'), + ('𐂀', '𐃺'), + ('𐅀', '𐅴'), + ('\u{101fd}', '\u{101fd}'), + ('𐊀', '𐊜'), + ('𐊠', '𐋐'), + ('\u{102e0}', '\u{102e0}'), + ('𐌀', '𐌟'), + ('𐌭', '𐍊'), + ('𐍐', '\u{1037a}'), + ('𐎀', '𐎝'), + ('𐎠', '𐏃'), + ('𐏈', '𐏏'), + ('𐏑', '𐏕'), + ('𐐀', '𐒝'), + ('𐒠', '𐒩'), + ('𐒰', '𐓓'), + ('𐓘', '𐓻'), + ('𐔀', '𐔧'), + ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), + ('𐘀', '𐜶'), + ('𐝀', '𐝕'), + ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), + ('𐠀', '𐠅'), + ('𐠈', '𐠈'), + ('𐠊', '𐠵'), + ('𐠷', '𐠸'), + ('𐠼', '𐠼'), + ('𐠿', '𐡕'), + ('𐡠', '𐡶'), + ('𐢀', '𐢞'), + ('𐣠', '𐣲'), + ('𐣴', '𐣵'), + ('𐤀', '𐤕'), + ('𐤠', '𐤹'), + ('𐦀', '𐦷'), + ('𐦾', '𐦿'), + ('𐨀', '\u{10a03}'), + ('\u{10a05}', '\u{10a06}'), + ('\u{10a0c}', '𐨓'), + ('𐨕', '𐨗'), + ('𐨙', '𐨵'), + ('\u{10a38}', '\u{10a3a}'), + ('\u{10a3f}', '\u{10a3f}'), + ('𐩠', '𐩼'), + ('𐪀', '𐪜'), + ('𐫀', '𐫇'), + ('𐫉', '\u{10ae6}'), + ('𐬀', '𐬵'), + ('𐭀', '𐭕'), + ('𐭠', '𐭲'), + ('𐮀', '𐮑'), + ('𐰀', '𐱈'), + ('𐲀', '𐲲'), + ('𐳀', '𐳲'), + ('𐴀', '\u{10d27}'), + ('𐴰', '𐴹'), + ('𐺀', '𐺩'), + ('\u{10eab}', '\u{10eac}'), + ('𐺰', '𐺱'), + ('\u{10efd}', '𐼜'), + ('𐼧', '𐼧'), + ('𐼰', '\u{10f50}'), + ('𐽰', '\u{10f85}'), + ('𐾰', '𐿄'), + ('𐿠', '𐿶'), + ('𑀀', '\u{11046}'), + ('𑁦', '𑁵'), + ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), + ('𑃐', '𑃨'), + ('𑃰', '𑃹'), + ('\u{11100}', '\u{11134}'), + ('𑄶', '𑄿'), + ('𑅄', '𑅇'), + ('𑅐', '\u{11173}'), + ('𑅶', '𑅶'), + ('\u{11180}', '𑇄'), + ('\u{111c9}', '\u{111cc}'), + ('𑇎', '𑇚'), + ('𑇜', '𑇜'), + ('𑈀', '𑈑'), + ('𑈓', '\u{11237}'), + ('\u{1123e}', '\u{11241}'), + ('𑊀', '𑊆'), + ('𑊈', '𑊈'), + ('𑊊', '𑊍'), + ('𑊏', '𑊝'), + ('𑊟', '𑊨'), + ('𑊰', '\u{112ea}'), + ('𑋰', '𑋹'), + ('\u{11300}', '𑌃'), + ('𑌅', '𑌌'), + ('𑌏', '𑌐'), + ('𑌓', '𑌨'), + ('𑌪', '𑌰'), + ('𑌲', '𑌳'), + ('𑌵', '𑌹'), + ('\u{1133b}', '𑍄'), + ('𑍇', '𑍈'), + ('𑍋', '𑍍'), + ('𑍐', '𑍐'), + ('\u{11357}', '\u{11357}'), + ('𑍝', '𑍣'), + ('\u{11366}', '\u{1136c}'), + ('\u{11370}', '\u{11374}'), + ('𑐀', '𑑊'), + ('𑑐', '𑑙'), + ('\u{1145e}', '𑑡'), + ('𑒀', '𑓅'), + ('𑓇', '𑓇'), + ('𑓐', '𑓙'), + ('𑖀', '\u{115b5}'), + ('𑖸', '\u{115c0}'), + ('𑗘', '\u{115dd}'), + ('𑘀', '\u{11640}'), + ('𑙄', '𑙄'), + ('𑙐', '𑙙'), + ('𑚀', '𑚸'), + ('𑛀', '𑛉'), + ('𑜀', '𑜚'), + ('\u{1171d}', '\u{1172b}'), + ('𑜰', '𑜹'), + ('𑝀', '𑝆'), + ('𑠀', '\u{1183a}'), + ('𑢠', '𑣩'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), + ('\u{1193b}', '\u{11943}'), + ('𑥐', '𑥙'), + ('𑦠', '𑦧'), + ('𑦪', '\u{119d7}'), + ('\u{119da}', '𑧡'), + ('𑧣', '𑧤'), + ('𑨀', '\u{11a3e}'), + ('\u{11a47}', '\u{11a47}'), + ('𑩐', '\u{11a99}'), + ('𑪝', '𑪝'), + ('𑪰', '𑫸'), + ('𑰀', '𑰈'), + ('𑰊', '\u{11c36}'), + ('\u{11c38}', '𑱀'), + ('𑱐', '𑱙'), + ('𑱲', '𑲏'), + ('\u{11c92}', '\u{11ca7}'), + ('𑲩', '\u{11cb6}'), + ('𑴀', '𑴆'), + ('𑴈', '𑴉'), + ('𑴋', '\u{11d36}'), + ('\u{11d3a}', '\u{11d3a}'), + ('\u{11d3c}', '\u{11d3d}'), + ('\u{11d3f}', '\u{11d47}'), + ('𑵐', '𑵙'), + ('𑵠', '𑵥'), + ('𑵧', '𑵨'), + ('𑵪', '𑶎'), + ('\u{11d90}', '\u{11d91}'), + ('𑶓', '𑶘'), + ('𑶠', '𑶩'), + ('𑻠', '𑻶'), + ('\u{11f00}', '𑼐'), + ('𑼒', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('𑽐', '𑽙'), + ('𑾰', '𑾰'), + ('𒀀', '𒎙'), + ('𒐀', '𒑮'), + ('𒒀', '𒕃'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('\u{13440}', '\u{13455}'), + ('𔐀', '𔙆'), + ('𖠀', '𖨸'), + ('𖩀', '𖩞'), + ('𖩠', '𖩩'), + ('𖩰', '𖪾'), + ('𖫀', '𖫉'), + ('𖫐', '𖫭'), + ('\u{16af0}', '\u{16af4}'), + ('𖬀', '\u{16b36}'), + ('𖭀', '𖭃'), + ('𖭐', '𖭙'), + ('𖭣', '𖭷'), + ('𖭽', '𖮏'), + ('𖹀', '𖹿'), + ('𖼀', '𖽊'), + ('\u{16f4f}', '𖾇'), + ('\u{16f8f}', '𖾟'), + ('𖿠', '𖿡'), + ('𖿣', '\u{16fe4}'), + ('𖿰', '𖿱'), + ('𗀀', '𘟷'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), + ('𛅐', '𛅒'), + ('𛅕', '𛅕'), + ('𛅤', '𛅧'), + ('𛅰', '𛋻'), + ('𛰀', '𛱪'), + ('𛱰', '𛱼'), + ('𛲀', '𛲈'), + ('𛲐', '𛲙'), + ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), + ('\u{1d165}', '\u{1d169}'), + ('𝅭', '\u{1d172}'), + ('\u{1d17b}', '\u{1d182}'), + ('\u{1d185}', '\u{1d18b}'), + ('\u{1d1aa}', '\u{1d1ad}'), + ('\u{1d242}', '\u{1d244}'), + ('𝐀', '𝑔'), + ('𝑖', '𝒜'), + ('𝒞', '𝒟'), + ('𝒢', '𝒢'), + ('𝒥', '𝒦'), + ('𝒩', '𝒬'), + ('𝒮', '𝒹'), + ('𝒻', '𝒻'), + ('𝒽', '𝓃'), + ('𝓅', '𝔅'), + ('𝔇', '𝔊'), + ('𝔍', '𝔔'), + ('𝔖', '𝔜'), + ('𝔞', '𝔹'), + ('𝔻', '𝔾'), + ('𝕀', '𝕄'), + ('𝕆', '𝕆'), + ('𝕊', '𝕐'), + ('𝕒', '𝚥'), + ('𝚨', '𝛀'), + ('𝛂', '𝛚'), + ('𝛜', '𝛺'), + ('𝛼', '𝜔'), + ('𝜖', '𝜴'), + ('𝜶', '𝝎'), + ('𝝐', '𝝮'), + ('𝝰', '𝞈'), + ('𝞊', '𝞨'), + ('𝞪', '𝟂'), + ('𝟄', '𝟋'), + ('𝟎', '𝟿'), + ('\u{1da00}', '\u{1da36}'), + ('\u{1da3b}', '\u{1da6c}'), + ('\u{1da75}', '\u{1da75}'), + ('\u{1da84}', '\u{1da84}'), + ('\u{1da9b}', '\u{1da9f}'), + ('\u{1daa1}', '\u{1daaf}'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), + ('\u{1e000}', '\u{1e006}'), + ('\u{1e008}', '\u{1e018}'), + ('\u{1e01b}', '\u{1e021}'), + ('\u{1e023}', '\u{1e024}'), + ('\u{1e026}', '\u{1e02a}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), + ('𞄀', '𞄬'), + ('\u{1e130}', '𞄽'), + ('𞅀', '𞅉'), + ('𞅎', '𞅎'), + ('𞊐', '\u{1e2ae}'), + ('𞋀', '𞋹'), + ('𞓐', '𞓹'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), + ('𞠀', '𞣄'), + ('\u{1e8d0}', '\u{1e8d6}'), + ('𞤀', '𞥋'), + ('𞥐', '𞥙'), + ('𞸀', '𞸃'), + ('𞸅', '𞸟'), + ('𞸡', '𞸢'), + ('𞸤', '𞸤'), + ('𞸧', '𞸧'), + ('𞸩', '𞸲'), + ('𞸴', '𞸷'), + ('𞸹', '𞸹'), + ('𞸻', '𞸻'), + ('𞹂', '𞹂'), + ('𞹇', '𞹇'), + ('𞹉', '𞹉'), + ('𞹋', '𞹋'), + ('𞹍', '𞹏'), + ('𞹑', '𞹒'), + ('𞹔', '𞹔'), + ('𞹗', '𞹗'), + ('𞹙', '𞹙'), + ('𞹛', '𞹛'), + ('𞹝', '𞹝'), + ('𞹟', '𞹟'), + ('𞹡', '𞹢'), + ('𞹤', '𞹤'), + ('𞹧', '𞹪'), + ('𞹬', '𞹲'), + ('𞹴', '𞹷'), + ('𞹹', '𞹼'), + ('𞹾', '𞹾'), + ('𞺀', '𞺉'), + ('𞺋', '𞺛'), + ('𞺡', '𞺣'), + ('𞺥', '𞺩'), + ('𞺫', '𞺻'), + ('🄰', '🅉'), + ('🅐', '🅩'), + ('🅰', '🆉'), + ('🯰', '🯹'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), + ('𫝀', '𫠝'), + ('𫠠', '𬺡'), + ('𬺰', '𮯠'), + ('丽', '𪘀'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), + ('\u{e0100}', '\u{e01ef}'), +]; diff --git a/vendor/regex-automata/src/util/utf8.rs b/vendor/regex-automata/src/util/utf8.rs new file mode 100644 index 0000000..91b27ef --- /dev/null +++ b/vendor/regex-automata/src/util/utf8.rs @@ -0,0 +1,196 @@ +/*! +Utilities for dealing with UTF-8. + +This module provides some UTF-8 related helper routines, including an +incremental decoder. +*/ + +/// Returns true if and only if the given byte is considered a word character. +/// This only applies to ASCII. +/// +/// This was copied from regex-syntax so that we can use it to determine the +/// starting DFA state while searching without depending on regex-syntax. The +/// definition is never going to change, so there's no maintenance/bit-rot +/// hazard here. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn is_word_byte(b: u8) -> bool { + const fn mkwordset() -> [bool; 256] { + // FIXME: Use as_usize() once const functions in traits are stable. + let mut set = [false; 256]; + set[b'_' as usize] = true; + + let mut byte = b'0'; + while byte <= b'9' { + set[byte as usize] = true; + byte += 1; + } + byte = b'A'; + while byte <= b'Z' { + set[byte as usize] = true; + byte += 1; + } + byte = b'a'; + while byte <= b'z' { + set[byte as usize] = true; + byte += 1; + } + set + } + const WORD: [bool; 256] = mkwordset(); + WORD[b as usize] +} + +/// Decodes the next UTF-8 encoded codepoint from the given byte slice. +/// +/// If no valid encoding of a codepoint exists at the beginning of the given +/// byte slice, then the first byte is returned instead. +/// +/// This returns `None` if and only if `bytes` is empty. +/// +/// This never panics. +/// +/// *WARNING*: This is not designed for performance. If you're looking for a +/// fast UTF-8 decoder, this is not it. If you feel like you need one in this +/// crate, then please file an issue and discuss your use case. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn decode(bytes: &[u8]) -> Option> { + if bytes.is_empty() { + return None; + } + let len = match len(bytes[0]) { + None => return Some(Err(bytes[0])), + Some(len) if len > bytes.len() => return Some(Err(bytes[0])), + Some(1) => return Some(Ok(char::from(bytes[0]))), + Some(len) => len, + }; + match core::str::from_utf8(&bytes[..len]) { + Ok(s) => Some(Ok(s.chars().next().unwrap())), + Err(_) => Some(Err(bytes[0])), + } +} + +/// Decodes the last UTF-8 encoded codepoint from the given byte slice. +/// +/// If no valid encoding of a codepoint exists at the end of the given byte +/// slice, then the last byte is returned instead. +/// +/// This returns `None` if and only if `bytes` is empty. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn decode_last(bytes: &[u8]) -> Option> { + if bytes.is_empty() { + return None; + } + let mut start = bytes.len() - 1; + let limit = bytes.len().saturating_sub(4); + while start > limit && !is_leading_or_invalid_byte(bytes[start]) { + start -= 1; + } + match decode(&bytes[start..]) { + None => None, + Some(Ok(ch)) => Some(Ok(ch)), + Some(Err(_)) => Some(Err(bytes[bytes.len() - 1])), + } +} + +/// Given a UTF-8 leading byte, this returns the total number of code units +/// in the following encoded codepoint. +/// +/// If the given byte is not a valid UTF-8 leading byte, then this returns +/// `None`. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn len(byte: u8) -> Option { + if byte <= 0x7F { + return Some(1); + } else if byte & 0b1100_0000 == 0b1000_0000 { + return None; + } else if byte <= 0b1101_1111 { + Some(2) + } else if byte <= 0b1110_1111 { + Some(3) + } else if byte <= 0b1111_0111 { + Some(4) + } else { + None + } +} + +/// Returns true if and only if the given offset in the given bytes falls on a +/// valid UTF-8 encoded codepoint boundary. +/// +/// If `bytes` is not valid UTF-8, then the behavior of this routine is +/// unspecified. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn is_boundary(bytes: &[u8], i: usize) -> bool { + match bytes.get(i) { + // The position at the end of the bytes always represents an empty + // string, which is a valid boundary. But anything after that doesn't + // make much sense to call valid a boundary. + None => i == bytes.len(), + // Other than ASCII (where the most significant bit is never set), + // valid starting bytes always have their most significant two bits + // set, where as continuation bytes never have their second most + // significant bit set. Therefore, this only returns true when bytes[i] + // corresponds to a byte that begins a valid UTF-8 encoding of a + // Unicode scalar value. + Some(&b) => b <= 0b0111_1111 || b >= 0b1100_0000, + } +} + +/// Returns true if and only if the given byte is either a valid leading UTF-8 +/// byte, or is otherwise an invalid byte that can never appear anywhere in a +/// valid UTF-8 sequence. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn is_leading_or_invalid_byte(b: u8) -> bool { + // In the ASCII case, the most significant bit is never set. The leading + // byte of a 2/3/4-byte sequence always has the top two most significant + // bits set. For bytes that can never appear anywhere in valid UTF-8, this + // also returns true, since every such byte has its two most significant + // bits set: + // + // \xC0 :: 11000000 + // \xC1 :: 11000001 + // \xF5 :: 11110101 + // \xF6 :: 11110110 + // \xF7 :: 11110111 + // \xF8 :: 11111000 + // \xF9 :: 11111001 + // \xFA :: 11111010 + // \xFB :: 11111011 + // \xFC :: 11111100 + // \xFD :: 11111101 + // \xFE :: 11111110 + // \xFF :: 11111111 + (b & 0b1100_0000) != 0b1000_0000 +} + +/* +/// Returns the smallest possible index of the next valid UTF-8 sequence +/// starting after `i`. +/// +/// For all inputs, including invalid UTF-8 and any value of `i`, the return +/// value is guaranteed to be greater than `i`. (If there is no value greater +/// than `i` that fits in `usize`, then this panics.) +/// +/// Generally speaking, this should only be called on `text` when it is +/// permitted to assume that it is valid UTF-8 and where either `i >= +/// text.len()` or where `text[i]` is a leading byte of a UTF-8 sequence. +/// +/// NOTE: This method was used in a previous conception of iterators where we +/// specifically tried to skip over empty matches that split a codepoint by +/// simply requiring that our next search begin at the beginning of codepoint. +/// But we ended up changing that technique to always advance by 1 byte and +/// then filter out matches that split a codepoint after-the-fact. Thus, we no +/// longer use this method. But I've kept it around in case we want to switch +/// back to this approach. Its guarantees are a little subtle, so I'd prefer +/// not to rebuild it from whole cloth. +pub(crate) fn next(text: &[u8], i: usize) -> usize { + let b = match text.get(i) { + None => return i.checked_add(1).unwrap(), + Some(&b) => b, + }; + // For cases where we see an invalid UTF-8 byte, there isn't much we can do + // other than just start at the next byte. + let inc = len(b).unwrap_or(1); + i.checked_add(inc).unwrap() +} +*/ diff --git a/vendor/regex-automata/src/util/wire.rs b/vendor/regex-automata/src/util/wire.rs new file mode 100644 index 0000000..ecf4fd8 --- /dev/null +++ b/vendor/regex-automata/src/util/wire.rs @@ -0,0 +1,975 @@ +/*! +Types and routines that support the wire format of finite automata. + +Currently, this module just exports a few error types and some small helpers +for deserializing [dense DFAs](crate::dfa::dense::DFA) using correct alignment. +*/ + +/* +A collection of helper functions, types and traits for serializing automata. + +This crate defines its own bespoke serialization mechanism for some structures +provided in the public API, namely, DFAs. A bespoke mechanism was developed +primarily because structures like automata demand a specific binary format. +Attempting to encode their rich structure in an existing serialization +format is just not feasible. Moreover, the format for each structure is +generally designed such that deserialization is cheap. More specifically, that +deserialization can be done in constant time. (The idea being that you can +embed it into your binary or mmap it, and then use it immediately.) + +In order to achieve this, the dense and sparse DFAs in this crate use an +in-memory representation that very closely corresponds to its binary serialized +form. This pervades and complicates everything, and in some cases, requires +dealing with alignment and reasoning about safety. + +This technique does have major advantages. In particular, it permits doing +the potentially costly work of compiling a finite state machine in an offline +manner, and then loading it at runtime not only without having to re-compile +the regex, but even without the code required to do the compilation. This, for +example, permits one to use a pre-compiled DFA not only in environments without +Rust's standard library, but also in environments without a heap. + +In the code below, whenever we insert some kind of padding, it's to enforce a +4-byte alignment, unless otherwise noted. Namely, u32 is the only state ID type +supported. (In a previous version of this library, DFAs were generic over the +state ID representation.) + +Also, serialization generally requires the caller to specify endianness, +where as deserialization always assumes native endianness (otherwise cheap +deserialization would be impossible). This implies that serializing a structure +generally requires serializing both its big-endian and little-endian variants, +and then loading the correct one based on the target's endianness. +*/ + +use core::{ + cmp, + convert::{TryFrom, TryInto}, + mem::size_of, +}; + +#[cfg(feature = "alloc")] +use alloc::{vec, vec::Vec}; + +use crate::util::{ + int::Pointer, + primitives::{PatternID, PatternIDError, StateID, StateIDError}, +}; + +/// A hack to align a smaller type `B` with a bigger type `T`. +/// +/// The usual use of this is with `B = [u8]` and `T = u32`. That is, +/// it permits aligning a sequence of bytes on a 4-byte boundary. This +/// is useful in contexts where one wants to embed a serialized [dense +/// DFA](crate::dfa::dense::DFA) into a Rust a program while guaranteeing the +/// alignment required for the DFA. +/// +/// See [`dense::DFA::from_bytes`](crate::dfa::dense::DFA::from_bytes) for an +/// example of how to use this type. +#[repr(C)] +#[derive(Debug)] +pub struct AlignAs { + /// A zero-sized field indicating the alignment we want. + pub _align: [T; 0], + /// A possibly non-sized field containing a sequence of bytes. + pub bytes: B, +} + +/// An error that occurs when serializing an object from this crate. +/// +/// Serialization, as used in this crate, universally refers to the process +/// of transforming a structure (like a DFA) into a custom binary format +/// represented by `&[u8]`. To this end, serialization is generally infallible. +/// However, it can fail when caller provided buffer sizes are too small. When +/// that occurs, a serialization error is reported. +/// +/// A `SerializeError` provides no introspection capabilities. Its only +/// supported operation is conversion to a human readable error message. +/// +/// This error type implements the `std::error::Error` trait only when the +/// `std` feature is enabled. Otherwise, this type is defined in all +/// configurations. +#[derive(Debug)] +pub struct SerializeError { + /// The name of the thing that a buffer is too small for. + /// + /// Currently, the only kind of serialization error is one that is + /// committed by a caller: providing a destination buffer that is too + /// small to fit the serialized object. This makes sense conceptually, + /// since every valid inhabitant of a type should be serializable. + /// + /// This is somewhat exposed in the public API of this crate. For example, + /// the `to_bytes_{big,little}_endian` APIs return a `Vec` and are + /// guaranteed to never panic or error. This is only possible because the + /// implementation guarantees that it will allocate a `Vec` that is + /// big enough. + /// + /// In summary, if a new serialization error kind needs to be added, then + /// it will need careful consideration. + what: &'static str, +} + +impl SerializeError { + pub(crate) fn buffer_too_small(what: &'static str) -> SerializeError { + SerializeError { what } + } +} + +impl core::fmt::Display for SerializeError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "destination buffer is too small to write {}", self.what) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for SerializeError {} + +/// An error that occurs when deserializing an object defined in this crate. +/// +/// Serialization, as used in this crate, universally refers to the process +/// of transforming a structure (like a DFA) into a custom binary format +/// represented by `&[u8]`. Deserialization, then, refers to the process of +/// cheaply converting this binary format back to the object's in-memory +/// representation as defined in this crate. To the extent possible, +/// deserialization will report this error whenever this process fails. +/// +/// A `DeserializeError` provides no introspection capabilities. Its only +/// supported operation is conversion to a human readable error message. +/// +/// This error type implements the `std::error::Error` trait only when the +/// `std` feature is enabled. Otherwise, this type is defined in all +/// configurations. +#[derive(Debug)] +pub struct DeserializeError(DeserializeErrorKind); + +#[derive(Debug)] +enum DeserializeErrorKind { + Generic { msg: &'static str }, + BufferTooSmall { what: &'static str }, + InvalidUsize { what: &'static str }, + VersionMismatch { expected: u32, found: u32 }, + EndianMismatch { expected: u32, found: u32 }, + AlignmentMismatch { alignment: usize, address: usize }, + LabelMismatch { expected: &'static str }, + ArithmeticOverflow { what: &'static str }, + PatternID { err: PatternIDError, what: &'static str }, + StateID { err: StateIDError, what: &'static str }, +} + +impl DeserializeError { + pub(crate) fn generic(msg: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::Generic { msg }) + } + + pub(crate) fn buffer_too_small(what: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::BufferTooSmall { what }) + } + + fn invalid_usize(what: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::InvalidUsize { what }) + } + + fn version_mismatch(expected: u32, found: u32) -> DeserializeError { + DeserializeError(DeserializeErrorKind::VersionMismatch { + expected, + found, + }) + } + + fn endian_mismatch(expected: u32, found: u32) -> DeserializeError { + DeserializeError(DeserializeErrorKind::EndianMismatch { + expected, + found, + }) + } + + fn alignment_mismatch( + alignment: usize, + address: usize, + ) -> DeserializeError { + DeserializeError(DeserializeErrorKind::AlignmentMismatch { + alignment, + address, + }) + } + + fn label_mismatch(expected: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::LabelMismatch { expected }) + } + + fn arithmetic_overflow(what: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::ArithmeticOverflow { what }) + } + + fn pattern_id_error( + err: PatternIDError, + what: &'static str, + ) -> DeserializeError { + DeserializeError(DeserializeErrorKind::PatternID { err, what }) + } + + pub(crate) fn state_id_error( + err: StateIDError, + what: &'static str, + ) -> DeserializeError { + DeserializeError(DeserializeErrorKind::StateID { err, what }) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for DeserializeError {} + +impl core::fmt::Display for DeserializeError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use self::DeserializeErrorKind::*; + + match self.0 { + Generic { msg } => write!(f, "{}", msg), + BufferTooSmall { what } => { + write!(f, "buffer is too small to read {}", what) + } + InvalidUsize { what } => { + write!(f, "{} is too big to fit in a usize", what) + } + VersionMismatch { expected, found } => write!( + f, + "unsupported version: \ + expected version {} but found version {}", + expected, found, + ), + EndianMismatch { expected, found } => write!( + f, + "endianness mismatch: expected 0x{:X} but got 0x{:X}. \ + (Are you trying to load an object serialized with a \ + different endianness?)", + expected, found, + ), + AlignmentMismatch { alignment, address } => write!( + f, + "alignment mismatch: slice starts at address \ + 0x{:X}, which is not aligned to a {} byte boundary", + address, alignment, + ), + LabelMismatch { expected } => write!( + f, + "label mismatch: start of serialized object should \ + contain a NUL terminated {:?} label, but a different \ + label was found", + expected, + ), + ArithmeticOverflow { what } => { + write!(f, "arithmetic overflow for {}", what) + } + PatternID { ref err, what } => { + write!(f, "failed to read pattern ID for {}: {}", what, err) + } + StateID { ref err, what } => { + write!(f, "failed to read state ID for {}: {}", what, err) + } + } + } +} + +/// Safely converts a `&[u32]` to `&[StateID]` with zero cost. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn u32s_to_state_ids(slice: &[u32]) -> &[StateID] { + // SAFETY: This is safe because StateID is defined to have the same memory + // representation as a u32 (it is repr(transparent)). While not every u32 + // is a "valid" StateID, callers are not permitted to rely on the validity + // of StateIDs for memory safety. It can only lead to logical errors. (This + // is why StateID::new_unchecked is safe.) + unsafe { + core::slice::from_raw_parts( + slice.as_ptr().cast::(), + slice.len(), + ) + } +} + +/// Safely converts a `&mut [u32]` to `&mut [StateID]` with zero cost. +pub(crate) fn u32s_to_state_ids_mut(slice: &mut [u32]) -> &mut [StateID] { + // SAFETY: This is safe because StateID is defined to have the same memory + // representation as a u32 (it is repr(transparent)). While not every u32 + // is a "valid" StateID, callers are not permitted to rely on the validity + // of StateIDs for memory safety. It can only lead to logical errors. (This + // is why StateID::new_unchecked is safe.) + unsafe { + core::slice::from_raw_parts_mut( + slice.as_mut_ptr().cast::(), + slice.len(), + ) + } +} + +/// Safely converts a `&[u32]` to `&[PatternID]` with zero cost. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn u32s_to_pattern_ids(slice: &[u32]) -> &[PatternID] { + // SAFETY: This is safe because PatternID is defined to have the same + // memory representation as a u32 (it is repr(transparent)). While not + // every u32 is a "valid" PatternID, callers are not permitted to rely + // on the validity of PatternIDs for memory safety. It can only lead to + // logical errors. (This is why PatternID::new_unchecked is safe.) + unsafe { + core::slice::from_raw_parts( + slice.as_ptr().cast::(), + slice.len(), + ) + } +} + +/// Checks that the given slice has an alignment that matches `T`. +/// +/// This is useful for checking that a slice has an appropriate alignment +/// before casting it to a &[T]. Note though that alignment is not itself +/// sufficient to perform the cast for any `T`. +pub(crate) fn check_alignment( + slice: &[u8], +) -> Result<(), DeserializeError> { + let alignment = core::mem::align_of::(); + let address = slice.as_ptr().as_usize(); + if address % alignment == 0 { + return Ok(()); + } + Err(DeserializeError::alignment_mismatch(alignment, address)) +} + +/// Reads a possibly empty amount of padding, up to 7 bytes, from the beginning +/// of the given slice. All padding bytes must be NUL bytes. +/// +/// This is useful because it can be theoretically necessary to pad the +/// beginning of a serialized object with NUL bytes to ensure that it starts +/// at a correctly aligned address. These padding bytes should come immediately +/// before the label. +/// +/// This returns the number of bytes read from the given slice. +pub(crate) fn skip_initial_padding(slice: &[u8]) -> usize { + let mut nread = 0; + while nread < 7 && nread < slice.len() && slice[nread] == 0 { + nread += 1; + } + nread +} + +/// Allocate a byte buffer of the given size, along with some initial padding +/// such that `buf[padding..]` has the same alignment as `T`, where the +/// alignment of `T` must be at most `8`. In particular, callers should treat +/// the first N bytes (second return value) as padding bytes that must not be +/// overwritten. In all cases, the following identity holds: +/// +/// ```ignore +/// let (buf, padding) = alloc_aligned_buffer::(SIZE); +/// assert_eq!(SIZE, buf[padding..].len()); +/// ``` +/// +/// In practice, padding is often zero. +/// +/// The requirement for `8` as a maximum here is somewhat arbitrary. In +/// practice, we never need anything bigger in this crate, and so this function +/// does some sanity asserts under the assumption of a max alignment of `8`. +#[cfg(feature = "alloc")] +pub(crate) fn alloc_aligned_buffer(size: usize) -> (Vec, usize) { + // NOTE: This is a kludge because there's no easy way to allocate a Vec + // with an alignment guaranteed to be greater than 1. We could create a + // Vec, but this cannot be safely transmuted to a Vec without + // concern, since reallocing or dropping the Vec is UB (different + // alignment than the initial allocation). We could define a wrapper type + // to manage this for us, but it seems like more machinery than it's worth. + let buf = vec![0; size]; + let align = core::mem::align_of::(); + let address = buf.as_ptr().as_usize(); + if address % align == 0 { + return (buf, 0); + } + // Let's try this again. We have to create a totally new alloc with + // the maximum amount of bytes we might need. We can't just extend our + // pre-existing 'buf' because that might create a new alloc with a + // different alignment. + let extra = align - 1; + let mut buf = vec![0; size + extra]; + let address = buf.as_ptr().as_usize(); + // The code below handles the case where 'address' is aligned to T, so if + // we got lucky and 'address' is now aligned to T (when it previously + // wasn't), then we're done. + if address % align == 0 { + buf.truncate(size); + return (buf, 0); + } + let padding = ((address & !(align - 1)).checked_add(align).unwrap()) + .checked_sub(address) + .unwrap(); + assert!(padding <= 7, "padding of {} is bigger than 7", padding); + assert!( + padding <= extra, + "padding of {} is bigger than extra {} bytes", + padding, + extra + ); + buf.truncate(size + padding); + assert_eq!(size + padding, buf.len()); + assert_eq!( + 0, + buf[padding..].as_ptr().as_usize() % align, + "expected end of initial padding to be aligned to {}", + align, + ); + (buf, padding) +} + +/// Reads a NUL terminated label starting at the beginning of the given slice. +/// +/// If a NUL terminated label could not be found, then an error is returned. +/// Similarly, if a label is found but doesn't match the expected label, then +/// an error is returned. +/// +/// Upon success, the total number of bytes read (including padding bytes) is +/// returned. +pub(crate) fn read_label( + slice: &[u8], + expected_label: &'static str, +) -> Result { + // Set an upper bound on how many bytes we scan for a NUL. Since no label + // in this crate is longer than 256 bytes, if we can't find one within that + // range, then we have corrupted data. + let first_nul = + slice[..cmp::min(slice.len(), 256)].iter().position(|&b| b == 0); + let first_nul = match first_nul { + Some(first_nul) => first_nul, + None => { + return Err(DeserializeError::generic( + "could not find NUL terminated label \ + at start of serialized object", + )); + } + }; + let len = first_nul + padding_len(first_nul); + if slice.len() < len { + return Err(DeserializeError::generic( + "could not find properly sized label at start of serialized object" + )); + } + if expected_label.as_bytes() != &slice[..first_nul] { + return Err(DeserializeError::label_mismatch(expected_label)); + } + Ok(len) +} + +/// Writes the given label to the buffer as a NUL terminated string. The label +/// given must not contain NUL, otherwise this will panic. Similarly, the label +/// must not be longer than 255 bytes, otherwise this will panic. +/// +/// Additional NUL bytes are written as necessary to ensure that the number of +/// bytes written is always a multiple of 4. +/// +/// Upon success, the total number of bytes written (including padding) is +/// returned. +pub(crate) fn write_label( + label: &str, + dst: &mut [u8], +) -> Result { + let nwrite = write_label_len(label); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("label")); + } + dst[..label.len()].copy_from_slice(label.as_bytes()); + for i in 0..(nwrite - label.len()) { + dst[label.len() + i] = 0; + } + assert_eq!(nwrite % 4, 0); + Ok(nwrite) +} + +/// Returns the total number of bytes (including padding) that would be written +/// for the given label. This panics if the given label contains a NUL byte or +/// is longer than 255 bytes. (The size restriction exists so that searching +/// for a label during deserialization can be done in small bounded space.) +pub(crate) fn write_label_len(label: &str) -> usize { + if label.len() > 255 { + panic!("label must not be longer than 255 bytes"); + } + if label.as_bytes().iter().position(|&b| b == 0).is_some() { + panic!("label must not contain NUL bytes"); + } + let label_len = label.len() + 1; // +1 for the NUL terminator + label_len + padding_len(label_len) +} + +/// Reads the endianness check from the beginning of the given slice and +/// confirms that the endianness of the serialized object matches the expected +/// endianness. If the slice is too small or if the endianness check fails, +/// this returns an error. +/// +/// Upon success, the total number of bytes read is returned. +pub(crate) fn read_endianness_check( + slice: &[u8], +) -> Result { + let (n, nr) = try_read_u32(slice, "endianness check")?; + assert_eq!(nr, write_endianness_check_len()); + if n != 0xFEFF { + return Err(DeserializeError::endian_mismatch(0xFEFF, n)); + } + Ok(nr) +} + +/// Writes 0xFEFF as an integer using the given endianness. +/// +/// This is useful for writing into the header of a serialized object. It can +/// be read during deserialization as a sanity check to ensure the proper +/// endianness is used. +/// +/// Upon success, the total number of bytes written is returned. +pub(crate) fn write_endianness_check( + dst: &mut [u8], +) -> Result { + let nwrite = write_endianness_check_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("endianness check")); + } + E::write_u32(0xFEFF, dst); + Ok(nwrite) +} + +/// Returns the number of bytes written by the endianness check. +pub(crate) fn write_endianness_check_len() -> usize { + size_of::() +} + +/// Reads a version number from the beginning of the given slice and confirms +/// that is matches the expected version number given. If the slice is too +/// small or if the version numbers aren't equivalent, this returns an error. +/// +/// Upon success, the total number of bytes read is returned. +/// +/// N.B. Currently, we require that the version number is exactly equivalent. +/// In the future, if we bump the version number without a semver bump, then +/// we'll need to relax this a bit and support older versions. +pub(crate) fn read_version( + slice: &[u8], + expected_version: u32, +) -> Result { + let (n, nr) = try_read_u32(slice, "version")?; + assert_eq!(nr, write_version_len()); + if n != expected_version { + return Err(DeserializeError::version_mismatch(expected_version, n)); + } + Ok(nr) +} + +/// Writes the given version number to the beginning of the given slice. +/// +/// This is useful for writing into the header of a serialized object. It can +/// be read during deserialization as a sanity check to ensure that the library +/// code supports the format of the serialized object. +/// +/// Upon success, the total number of bytes written is returned. +pub(crate) fn write_version( + version: u32, + dst: &mut [u8], +) -> Result { + let nwrite = write_version_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("version number")); + } + E::write_u32(version, dst); + Ok(nwrite) +} + +/// Returns the number of bytes written by writing the version number. +pub(crate) fn write_version_len() -> usize { + size_of::() +} + +/// Reads a pattern ID from the given slice. If the slice has insufficient +/// length, then this panics. If the deserialized integer exceeds the pattern +/// ID limit for the current target, then this returns an error. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn read_pattern_id( + slice: &[u8], + what: &'static str, +) -> Result<(PatternID, usize), DeserializeError> { + let bytes: [u8; PatternID::SIZE] = + slice[..PatternID::SIZE].try_into().unwrap(); + let pid = PatternID::from_ne_bytes(bytes) + .map_err(|err| DeserializeError::pattern_id_error(err, what))?; + Ok((pid, PatternID::SIZE)) +} + +/// Reads a pattern ID from the given slice. If the slice has insufficient +/// length, then this panics. Otherwise, the deserialized integer is assumed +/// to be a valid pattern ID. +/// +/// This also returns the number of bytes read. +pub(crate) fn read_pattern_id_unchecked(slice: &[u8]) -> (PatternID, usize) { + let pid = PatternID::from_ne_bytes_unchecked( + slice[..PatternID::SIZE].try_into().unwrap(), + ); + (pid, PatternID::SIZE) +} + +/// Write the given pattern ID to the beginning of the given slice of bytes +/// using the specified endianness. The given slice must have length at least +/// `PatternID::SIZE`, or else this panics. Upon success, the total number of +/// bytes written is returned. +pub(crate) fn write_pattern_id( + pid: PatternID, + dst: &mut [u8], +) -> usize { + E::write_u32(pid.as_u32(), dst); + PatternID::SIZE +} + +/// Attempts to read a state ID from the given slice. If the slice has an +/// insufficient number of bytes or if the state ID exceeds the limit for +/// the current target, then this returns an error. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_state_id( + slice: &[u8], + what: &'static str, +) -> Result<(StateID, usize), DeserializeError> { + if slice.len() < StateID::SIZE { + return Err(DeserializeError::buffer_too_small(what)); + } + read_state_id(slice, what) +} + +/// Reads a state ID from the given slice. If the slice has insufficient +/// length, then this panics. If the deserialized integer exceeds the state ID +/// limit for the current target, then this returns an error. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn read_state_id( + slice: &[u8], + what: &'static str, +) -> Result<(StateID, usize), DeserializeError> { + let bytes: [u8; StateID::SIZE] = + slice[..StateID::SIZE].try_into().unwrap(); + let sid = StateID::from_ne_bytes(bytes) + .map_err(|err| DeserializeError::state_id_error(err, what))?; + Ok((sid, StateID::SIZE)) +} + +/// Reads a state ID from the given slice. If the slice has insufficient +/// length, then this panics. Otherwise, the deserialized integer is assumed +/// to be a valid state ID. +/// +/// This also returns the number of bytes read. +pub(crate) fn read_state_id_unchecked(slice: &[u8]) -> (StateID, usize) { + let sid = StateID::from_ne_bytes_unchecked( + slice[..StateID::SIZE].try_into().unwrap(), + ); + (sid, StateID::SIZE) +} + +/// Write the given state ID to the beginning of the given slice of bytes +/// using the specified endianness. The given slice must have length at least +/// `StateID::SIZE`, or else this panics. Upon success, the total number of +/// bytes written is returned. +pub(crate) fn write_state_id( + sid: StateID, + dst: &mut [u8], +) -> usize { + E::write_u32(sid.as_u32(), dst); + StateID::SIZE +} + +/// Try to read a u16 as a usize from the beginning of the given slice in +/// native endian format. If the slice has fewer than 2 bytes or if the +/// deserialized number cannot be represented by usize, then this returns an +/// error. The error message will include the `what` description of what is +/// being deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u16_as_usize( + slice: &[u8], + what: &'static str, +) -> Result<(usize, usize), DeserializeError> { + try_read_u16(slice, what).and_then(|(n, nr)| { + usize::try_from(n) + .map(|n| (n, nr)) + .map_err(|_| DeserializeError::invalid_usize(what)) + }) +} + +/// Try to read a u32 as a usize from the beginning of the given slice in +/// native endian format. If the slice has fewer than 4 bytes or if the +/// deserialized number cannot be represented by usize, then this returns an +/// error. The error message will include the `what` description of what is +/// being deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u32_as_usize( + slice: &[u8], + what: &'static str, +) -> Result<(usize, usize), DeserializeError> { + try_read_u32(slice, what).and_then(|(n, nr)| { + usize::try_from(n) + .map(|n| (n, nr)) + .map_err(|_| DeserializeError::invalid_usize(what)) + }) +} + +/// Try to read a u16 from the beginning of the given slice in native endian +/// format. If the slice has fewer than 2 bytes, then this returns an error. +/// The error message will include the `what` description of what is being +/// deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u16( + slice: &[u8], + what: &'static str, +) -> Result<(u16, usize), DeserializeError> { + check_slice_len(slice, size_of::(), what)?; + Ok((read_u16(slice), size_of::())) +} + +/// Try to read a u32 from the beginning of the given slice in native endian +/// format. If the slice has fewer than 4 bytes, then this returns an error. +/// The error message will include the `what` description of what is being +/// deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u32( + slice: &[u8], + what: &'static str, +) -> Result<(u32, usize), DeserializeError> { + check_slice_len(slice, size_of::(), what)?; + Ok((read_u32(slice), size_of::())) +} + +/// Try to read a u128 from the beginning of the given slice in native endian +/// format. If the slice has fewer than 16 bytes, then this returns an error. +/// The error message will include the `what` description of what is being +/// deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u128( + slice: &[u8], + what: &'static str, +) -> Result<(u128, usize), DeserializeError> { + check_slice_len(slice, size_of::(), what)?; + Ok((read_u128(slice), size_of::())) +} + +/// Read a u16 from the beginning of the given slice in native endian format. +/// If the slice has fewer than 2 bytes, then this panics. +/// +/// Marked as inline to speed up sparse searching which decodes integers from +/// its automaton at search time. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn read_u16(slice: &[u8]) -> u16 { + let bytes: [u8; 2] = slice[..size_of::()].try_into().unwrap(); + u16::from_ne_bytes(bytes) +} + +/// Read a u32 from the beginning of the given slice in native endian format. +/// If the slice has fewer than 4 bytes, then this panics. +/// +/// Marked as inline to speed up sparse searching which decodes integers from +/// its automaton at search time. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn read_u32(slice: &[u8]) -> u32 { + let bytes: [u8; 4] = slice[..size_of::()].try_into().unwrap(); + u32::from_ne_bytes(bytes) +} + +/// Read a u128 from the beginning of the given slice in native endian format. +/// If the slice has fewer than 16 bytes, then this panics. +pub(crate) fn read_u128(slice: &[u8]) -> u128 { + let bytes: [u8; 16] = slice[..size_of::()].try_into().unwrap(); + u128::from_ne_bytes(bytes) +} + +/// Checks that the given slice has some minimal length. If it's smaller than +/// the bound given, then a "buffer too small" error is returned with `what` +/// describing what the buffer represents. +pub(crate) fn check_slice_len( + slice: &[T], + at_least_len: usize, + what: &'static str, +) -> Result<(), DeserializeError> { + if slice.len() < at_least_len { + return Err(DeserializeError::buffer_too_small(what)); + } + Ok(()) +} + +/// Multiply the given numbers, and on overflow, return an error that includes +/// 'what' in the error message. +/// +/// This is useful when doing arithmetic with untrusted data. +pub(crate) fn mul( + a: usize, + b: usize, + what: &'static str, +) -> Result { + match a.checked_mul(b) { + Some(c) => Ok(c), + None => Err(DeserializeError::arithmetic_overflow(what)), + } +} + +/// Add the given numbers, and on overflow, return an error that includes +/// 'what' in the error message. +/// +/// This is useful when doing arithmetic with untrusted data. +pub(crate) fn add( + a: usize, + b: usize, + what: &'static str, +) -> Result { + match a.checked_add(b) { + Some(c) => Ok(c), + None => Err(DeserializeError::arithmetic_overflow(what)), + } +} + +/// Shift `a` left by `b`, and on overflow, return an error that includes +/// 'what' in the error message. +/// +/// This is useful when doing arithmetic with untrusted data. +pub(crate) fn shl( + a: usize, + b: usize, + what: &'static str, +) -> Result { + let amount = u32::try_from(b) + .map_err(|_| DeserializeError::arithmetic_overflow(what))?; + match a.checked_shl(amount) { + Some(c) => Ok(c), + None => Err(DeserializeError::arithmetic_overflow(what)), + } +} + +/// Returns the number of additional bytes required to add to the given length +/// in order to make the total length a multiple of 4. The return value is +/// always less than 4. +pub(crate) fn padding_len(non_padding_len: usize) -> usize { + (4 - (non_padding_len & 0b11)) & 0b11 +} + +/// A simple trait for writing code generic over endianness. +/// +/// This is similar to what byteorder provides, but we only need a very small +/// subset. +pub(crate) trait Endian { + /// Writes a u16 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 2, then + /// this panics. + fn write_u16(n: u16, dst: &mut [u8]); + + /// Writes a u32 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 4, then + /// this panics. + fn write_u32(n: u32, dst: &mut [u8]); + + /// Writes a u64 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 8, then + /// this panics. + fn write_u64(n: u64, dst: &mut [u8]); + + /// Writes a u128 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 16, + /// then this panics. + fn write_u128(n: u128, dst: &mut [u8]); +} + +/// Little endian writing. +pub(crate) enum LE {} +/// Big endian writing. +pub(crate) enum BE {} + +#[cfg(target_endian = "little")] +pub(crate) type NE = LE; +#[cfg(target_endian = "big")] +pub(crate) type NE = BE; + +impl Endian for LE { + fn write_u16(n: u16, dst: &mut [u8]) { + dst[..2].copy_from_slice(&n.to_le_bytes()); + } + + fn write_u32(n: u32, dst: &mut [u8]) { + dst[..4].copy_from_slice(&n.to_le_bytes()); + } + + fn write_u64(n: u64, dst: &mut [u8]) { + dst[..8].copy_from_slice(&n.to_le_bytes()); + } + + fn write_u128(n: u128, dst: &mut [u8]) { + dst[..16].copy_from_slice(&n.to_le_bytes()); + } +} + +impl Endian for BE { + fn write_u16(n: u16, dst: &mut [u8]) { + dst[..2].copy_from_slice(&n.to_be_bytes()); + } + + fn write_u32(n: u32, dst: &mut [u8]) { + dst[..4].copy_from_slice(&n.to_be_bytes()); + } + + fn write_u64(n: u64, dst: &mut [u8]) { + dst[..8].copy_from_slice(&n.to_be_bytes()); + } + + fn write_u128(n: u128, dst: &mut [u8]) { + dst[..16].copy_from_slice(&n.to_be_bytes()); + } +} + +#[cfg(all(test, feature = "alloc"))] +mod tests { + use super::*; + + #[test] + fn labels() { + let mut buf = [0; 1024]; + + let nwrite = write_label("fooba", &mut buf).unwrap(); + assert_eq!(nwrite, 8); + assert_eq!(&buf[..nwrite], b"fooba\x00\x00\x00"); + + let nread = read_label(&buf, "fooba").unwrap(); + assert_eq!(nread, 8); + } + + #[test] + #[should_panic] + fn bad_label_interior_nul() { + // interior NULs are not allowed + write_label("foo\x00bar", &mut [0; 1024]).unwrap(); + } + + #[test] + fn bad_label_almost_too_long() { + // ok + write_label(&"z".repeat(255), &mut [0; 1024]).unwrap(); + } + + #[test] + #[should_panic] + fn bad_label_too_long() { + // labels longer than 255 bytes are banned + write_label(&"z".repeat(256), &mut [0; 1024]).unwrap(); + } + + #[test] + fn padding() { + assert_eq!(0, padding_len(8)); + assert_eq!(3, padding_len(9)); + assert_eq!(2, padding_len(10)); + assert_eq!(1, padding_len(11)); + assert_eq!(0, padding_len(12)); + assert_eq!(3, padding_len(13)); + assert_eq!(2, padding_len(14)); + assert_eq!(1, padding_len(15)); + assert_eq!(0, padding_len(16)); + } +} diff --git a/vendor/regex-automata/test b/vendor/regex-automata/test new file mode 100755 index 0000000..df3e5ae --- /dev/null +++ b/vendor/regex-automata/test @@ -0,0 +1,95 @@ +#!/bin/bash + +# This is a script that attempts to *approximately* exhaustively run the test +# suite for regex-automata. The main reason for why 'cargo test' isn't enough +# is because of crate features. regex-automata has a ton of them. This script +# tests many of those feature combinations (although not all) to try to get +# decent coverage in a finite amount of time. + +set -e + +# cd to the directory containing this crate's Cargo.toml so that we don't need +# to pass --manifest-path to every `cargo` command. +cd "$(dirname "$0")" + +echo "===== ALL FEATURES TEST ===" +cargo test --all-features + +# Man I don't *want* to have this many crate features, but... I really want +# folks to be able to slim the crate down to just the things they want. But +# the main downside is that I just can't feasibly test every combination of +# features because there are too many of them. Sad, but I'm not sure if there +# is a better alternative. +features=( + "" + "unicode-word-boundary" + "unicode-word-boundary,syntax,unicode-perl" + "unicode-word-boundary,syntax,dfa-build" + "nfa" + "dfa" + "hybrid" + "nfa,dfa" + "nfa,hybrid" + "dfa,hybrid" + "dfa-onepass" + "nfa-pikevm" + "nfa-backtrack" + "std" + "alloc" + "syntax" + "syntax,nfa-pikevm" + "syntax,hybrid" + "perf-literal-substring" + "perf-literal-multisubstring" + "meta" + "meta,nfa-backtrack" + "meta,hybrid" + "meta,dfa-build" + "meta,dfa-onepass" + "meta,nfa,dfa,hybrid,nfa-backtrack" + "meta,nfa,dfa,hybrid,nfa-backtrack,perf-literal-substring" + "meta,nfa,dfa,hybrid,nfa-backtrack,perf-literal-multisubstring" +) +for f in "${features[@]}"; do + echo "===== LIB FEATURES: $f ===" + # It's actually important to do a standard 'cargo build' in addition to a + # 'cargo test'. In particular, in the latter case, the dev-dependencies may + # wind up enabling features in dependencies (like memchr) that make it look + # like everything is well, but actually isn't. For example, the 'regex-test' + # dev-dependency uses 'bstr' and enables its 'std' feature, which in turn + # unconditionally enables 'memchr's 'std' feature. Since we're specifically + # looking to test that certain feature combinations work as expected, this + # can lead to things testing okay, but would actually fail to build. Yikes. + cargo build --no-default-features --lib --features "$f" + cargo test --no-default-features --lib --features "$f" +done + +# We can also run the integration test suite on stripped down features too. +# But the test suite doesn't do well with things like 'std' and 'unicode' +# disabled, so we always enable them. +features=( + "std,unicode,syntax,nfa-pikevm" + "std,unicode,syntax,nfa-backtrack" + "std,unicode,syntax,hybrid" + "std,unicode,syntax,dfa-onepass" + "std,unicode,syntax,dfa-search" + "std,unicode,syntax,dfa-build" + "std,unicode,meta" + # This one is a little tricky because it causes the backtracker to get used + # in more instances and results in failing tests for the 'earliest' tests. + # The actual results are semantically consistent with the API guarantee + # (the backtracker tends to report greater offsets because it isn't an FSM), + # but our tests are less flexible than the API guarantee and demand offsets + # reported by FSM regex engines. (Which is... all of them except for the + # backtracker.) + # "std,unicode,meta,nfa-backtrack" + "std,unicode,meta,hybrid" + "std,unicode,meta,dfa-onepass" + "std,unicode,meta,dfa-build" + "std,unicode,meta,nfa,dfa-onepass,hybrid" +) +for f in "${features[@]}"; do + echo "===== INTEGRATION FEATURES: $f ===" + cargo build --no-default-features --lib --features "$f" + cargo test --no-default-features --test integration --features "$f" +done diff --git a/vendor/regex-automata/tests/dfa/api.rs b/vendor/regex-automata/tests/dfa/api.rs new file mode 100644 index 0000000..96e73af --- /dev/null +++ b/vendor/regex-automata/tests/dfa/api.rs @@ -0,0 +1,69 @@ +use std::error::Error; + +use regex_automata::{ + dfa::{dense, Automaton, OverlappingState}, + nfa::thompson, + HalfMatch, Input, MatchError, +}; + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .build("[[:word:]]+$")?; + + assert_eq!( + Err(MatchError::quit(b'x', 3)), + dfa.try_search_fwd(&Input::new(b"abcxyz")) + ); + assert_eq!( + dfa.try_search_overlapping_fwd( + &Input::new(b"abcxyz"), + &mut OverlappingState::start() + ), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + + assert_eq!( + Err(MatchError::quit(b'x', 3)), + dfa.try_search_rev(&Input::new(b"abcxyz")) + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box> { + let mut config = dense::Config::new(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = dense::Builder::new().configure(config).build(r"\b")?; + let expected = HalfMatch::must(0, 1); + assert_eq!(Ok(Some(expected)), dfa.try_search_fwd(&Input::new(b" a"))); + Ok(()) +} diff --git a/vendor/regex-automata/tests/dfa/mod.rs b/vendor/regex-automata/tests/dfa/mod.rs new file mode 100644 index 0000000..0d8f539 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/mod.rs @@ -0,0 +1,8 @@ +#[cfg(all(feature = "dfa-build", feature = "dfa-search"))] +mod api; +#[cfg(feature = "dfa-onepass")] +mod onepass; +#[cfg(all(feature = "dfa-build", feature = "dfa-search"))] +mod regression; +#[cfg(all(not(miri), feature = "dfa-build", feature = "dfa-search"))] +mod suite; diff --git a/vendor/regex-automata/tests/dfa/onepass/mod.rs b/vendor/regex-automata/tests/dfa/onepass/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/onepass/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/dfa/onepass/suite.rs b/vendor/regex-automata/tests/dfa/onepass/suite.rs new file mode 100644 index 0000000..20bd696 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/onepass/suite.rs @@ -0,0 +1,197 @@ +use { + anyhow::Result, + regex_automata::{ + dfa::onepass::{self, DFA}, + nfa::thompson, + util::{iter, syntax}, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures, untestify_kind}; + +const EXPANSIONS: &[&str] = &["is_match", "find", "captures"]; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = DFA::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled for all +/// tests. +#[test] +fn starts_for_each_pattern() -> Result<()> { + let mut builder = DFA::builder(); + builder.configure(DFA::config().starts_for_each_pattern(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when byte classes are disabled. +/// +/// N.B. Disabling byte classes doesn't avoid any indirection at search time. +/// All it does is cause every byte value to be its own distinct equivalence +/// class. +#[test] +fn no_byte_classes() -> Result<()> { + let mut builder = DFA::builder(); + builder.configure(DFA::config().byte_classes(false)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: onepass::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + if !configure_onepass_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = match builder.build_many(®exes) { + Ok(re) => re, + Err(err) => { + let msg = err.to_string(); + // This is pretty gross, but when a regex fails to compile as + // a one-pass regex, then we want to be OK with that and just + // skip the test. But we have to be careful to only skip it + // when the expected result is that the regex compiles. If + // the test is specifically checking that the regex does not + // compile, then we should bubble up that error and allow the + // test to pass. + // + // Since our error types are all generally opaque, we just + // look for an error string. Not great, but not the end of the + // world. + if test.compiles() && msg.contains("not one-pass") { + return Ok(CompiledRegex::skip()); + } + return Err(err.into()); + } + }; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &DFA, + cache: &mut onepass::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => { + TestResult::matched(re.is_match(cache, input.earliest(true))) + } + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + let mut caps = re.create_captures(); + let it = iter::Searcher::new(input) + .into_matches_iter(|input| { + re.try_search(cache, input, &mut caps)?; + Ok(caps.get_match()) + }) + .infallible() + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Overlapping => { + // The one-pass DFA does not support any kind of overlapping + // search. This is not just a matter of not having the API. + // It's fundamentally incompatible with the one-pass concept. + // If overlapping matches were possible, then the one-pass DFA + // would fail to build. + TestResult::skip() + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + let it = iter::Searcher::new(input) + .into_captures_iter(re.create_captures(), |input, caps| { + re.try_search(cache, input, caps) + }) + .infallible() + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // The one-pass DFA does not support any kind of overlapping + // search. This is not just a matter of not having the API. + // It's fundamentally incompatible with the one-pass concept. + // If overlapping matches were possible, then the one-pass DFA + // would fail to build. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_onepass_builder( + test: &RegexTest, + builder: &mut onepass::Builder, +) -> bool { + if !test.anchored() { + return false; + } + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + + let config = DFA::config().match_kind(match_kind); + builder + .configure(config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/dfa/regression.rs b/vendor/regex-automata/tests/dfa/regression.rs new file mode 100644 index 0000000..09caffa --- /dev/null +++ b/vendor/regex-automata/tests/dfa/regression.rs @@ -0,0 +1,48 @@ +// A regression test for checking that minimization correctly translates +// whether a state is a match state or not. Previously, it was possible for +// minimization to mark a non-matching state as matching. +#[test] +#[cfg(not(miri))] +fn minimize_sets_correct_match_states() { + use regex_automata::{ + dfa::{dense::DFA, Automaton, StartKind}, + Anchored, Input, + }; + + let pattern = + // This is a subset of the grapheme matching regex. I couldn't seem + // to get a repro any smaller than this unfortunately. + r"(?x) + (?: + \p{gcb=Prepend}* + (?: + (?: + (?: + \p{gcb=L}* + (?:\p{gcb=V}+|\p{gcb=LV}\p{gcb=V}*|\p{gcb=LVT}) + \p{gcb=T}* + ) + | + \p{gcb=L}+ + | + \p{gcb=T}+ + ) + | + \p{Extended_Pictographic} + (?:\p{gcb=Extend}*\p{gcb=ZWJ}\p{Extended_Pictographic})* + | + [^\p{gcb=Control}\p{gcb=CR}\p{gcb=LF}] + ) + [\p{gcb=Extend}\p{gcb=ZWJ}\p{gcb=SpacingMark}]* + ) + "; + + let dfa = DFA::builder() + .configure( + DFA::config().start_kind(StartKind::Anchored).minimize(true), + ) + .build(pattern) + .unwrap(); + let input = Input::new(b"\xE2").anchored(Anchored::Yes); + assert_eq!(Ok(None), dfa.try_search_fwd(&input)); +} diff --git a/vendor/regex-automata/tests/dfa/suite.rs b/vendor/regex-automata/tests/dfa/suite.rs new file mode 100644 index 0000000..8ed6dd0 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/suite.rs @@ -0,0 +1,443 @@ +use { + anyhow::Result, + regex_automata::{ + dfa::{ + self, dense, regex::Regex, sparse, Automaton, OverlappingState, + StartKind, + }, + nfa::thompson, + util::{prefilter::Prefilter, syntax}, + Anchored, Input, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, untestify_kind}; + +const EXPANSIONS: &[&str] = &["is_match", "find", "which"]; + +/// Runs the test suite with the default configuration. +#[test] +fn unminimized_default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with the default configuration and a prefilter enabled, +/// if one can be built. +#[test] +fn unminimized_prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dense(dense::DFA::config().prefilter(pre)); + compiler(builder, |_, _, re| { + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + })(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + +/// Runs the test suite with start states specialized. +#[test] +fn unminimized_specialized_start_states() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().specialize_start_states(true)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with byte classes disabled. +#[test] +fn unminimized_no_byte_class() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().byte_classes(false)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with NFA shrinking enabled. +#[test] +fn unminimized_nfa_shrink() -> Result<()> { + let mut builder = Regex::builder(); + builder.thompson(thompson::Config::new().shrink(true)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a minimized DFA with an otherwise default +/// configuration. +#[test] +fn minimized_default() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().minimize(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a minimized DFA with byte classes disabled. +#[test] +fn minimized_no_byte_class() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().minimize(true).byte_classes(false)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a sparse unminimized DFA. +#[test] +fn sparse_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), sparse_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a sparse unminimized DFA with prefilters enabled. +#[test] +fn sparse_unminimized_prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dense(dense::DFA::config().prefilter(pre)); + compiler(builder, |builder, _, re| { + let fwd = re.forward().to_sparse()?; + let rev = re.reverse().to_sparse()?; + let re = builder.build_from_dfas(fwd, rev); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + })(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + +/// Another basic sanity test that checks we can serialize and then deserialize +/// a regex, and that the resulting regex can be used for searching correctly. +#[test] +fn serialization_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + let my_compiler = |builder| { + compiler(builder, |builder, _, re| { + let builder = builder.clone(); + let (fwd_bytes, _) = re.forward().to_bytes_native_endian(); + let (rev_bytes, _) = re.reverse().to_bytes_native_endian(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + let fwd: dense::DFA<&[u32]> = + dense::DFA::from_bytes(&fwd_bytes).unwrap().0; + let rev: dense::DFA<&[u32]> = + dense::DFA::from_bytes(&rev_bytes).unwrap().0; + let re = builder.build_from_dfas(fwd, rev); + + run_test(&re, test) + })) + }) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler(builder)) + .assert(); + Ok(()) +} + +/// A basic sanity test that checks we can serialize and then deserialize a +/// regex using sparse DFAs, and that the resulting regex can be used for +/// searching correctly. +#[test] +fn sparse_serialization_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + let my_compiler = |builder| { + compiler(builder, |builder, _, re| { + let builder = builder.clone(); + let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian(); + let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + let fwd: sparse::DFA<&[u8]> = + sparse::DFA::from_bytes(&fwd_bytes).unwrap().0; + let rev: sparse::DFA<&[u8]> = + sparse::DFA::from_bytes(&rev_bytes).unwrap().0; + let re = builder.build_from_dfas(fwd, rev); + run_test(&re, test) + })) + }) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler(builder)) + .assert(); + Ok(()) +} + +fn dense_compiler( + builder: dfa::regex::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + compiler(builder, |_, _, re| { + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + }) +} + +fn sparse_compiler( + builder: dfa::regex::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + compiler(builder, |builder, _, re| { + let fwd = re.forward().to_sparse()?; + let rev = re.reverse().to_sparse()?; + let re = builder.build_from_dfas(fwd, rev); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + }) +} + +fn compiler( + mut builder: dfa::regex::Builder, + mut create_matcher: impl FnMut( + &dfa::regex::Builder, + Option, + Regex, + ) -> Result, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + // Parse regexes as HIRs for some analysis below. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + + // Get a prefilter in case the test wants it. + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + if !test.haystack().is_ascii() { + for hir in hirs.iter() { + if hir.properties().look_set().contains_word_unicode() { + return Ok(CompiledRegex::skip()); + } + } + } + if !configure_regex_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + create_matcher(&builder, pre, builder.build_many(®exes)?) + } +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(input.earliest(true))), + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + TestResult::matches( + re.find_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ) + } + SearchKind::Overlapping => { + try_search_overlapping(re, &input).unwrap() + } + }, + "which" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + // There are no "which" APIs for standard searches. + TestResult::skip() + } + SearchKind::Overlapping => { + let dfa = re.forward(); + let mut patset = PatternSet::new(dfa.pattern_len()); + dfa.try_which_overlapping_matches(&input, &mut patset) + .unwrap(); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_regex_builder( + test: &RegexTest, + builder: &mut dfa::regex::Builder, +) -> bool { + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + + let starts = if test.anchored() { + StartKind::Anchored + } else { + StartKind::Unanchored + }; + let mut dense_config = dense::Config::new() + .start_kind(starts) + .match_kind(match_kind) + .unicode_word_boundary(true); + // When doing an overlapping search, we might try to find the start of each + // match with a custom search routine. In that case, we need to tell the + // reverse search (for the start offset) which pattern to look for. The + // only way that API works is when anchored starting states are compiled + // for each pattern. This does technically also enable it for the forward + // DFA, but we're okay with that. + if test.search_kind() == SearchKind::Overlapping { + dense_config = dense_config.starts_for_each_pattern(true); + } + + builder + .syntax(config_syntax(test)) + .thompson(config_thompson(test)) + .dense(dense_config); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex syntax from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} + +/// Execute an overlapping search, and for each match found, also find its +/// overlapping starting positions. +/// +/// N.B. This routine used to be part of the crate API, but 1) it wasn't clear +/// to me how useful it was and 2) it wasn't clear to me what its semantics +/// should be. In particular, a potentially surprising footgun of this routine +/// that it is worst case *quadratic* in the size of the haystack. Namely, it's +/// possible to report a match at every position, and for every such position, +/// scan all the way to the beginning of the haystack to find the starting +/// position. Typical leftmost non-overlapping searches don't suffer from this +/// because, well, matches can't overlap. So subsequent searches after a match +/// is found don't revisit previously scanned parts of the haystack. +/// +/// Its semantics can be strange for other reasons too. For example, given +/// the regex '.*' and the haystack 'zz', the full set of overlapping matches +/// is: [0, 0], [1, 1], [0, 1], [2, 2], [1, 2], [0, 2]. The ordering of +/// those matches is quite strange, but makes sense when you think about the +/// implementation: an end offset is found left-to-right, and then one or more +/// starting offsets are found right-to-left. +/// +/// Nevertheless, we provide this routine in our test suite because it's +/// useful to test the low level DFA overlapping search and our test suite +/// is written in a way that requires starting offsets. +fn try_search_overlapping( + re: &Regex, + input: &Input<'_>, +) -> Result { + let mut matches = vec![]; + let mut fwd_state = OverlappingState::start(); + let (fwd_dfa, rev_dfa) = (re.forward(), re.reverse()); + while let Some(end) = { + fwd_dfa.try_search_overlapping_fwd(input, &mut fwd_state)?; + fwd_state.get_match() + } { + let revsearch = input + .clone() + .range(input.start()..end.offset()) + .anchored(Anchored::Pattern(end.pattern())) + .earliest(false); + let mut rev_state = OverlappingState::start(); + while let Some(start) = { + rev_dfa.try_search_overlapping_rev(&revsearch, &mut rev_state)?; + rev_state.get_match() + } { + let span = Span { start: start.offset(), end: end.offset() }; + let mat = Match { id: end.pattern().as_usize(), span }; + matches.push(mat); + } + } + Ok(TestResult::matches(matches)) +} diff --git a/vendor/regex-automata/tests/fuzz/dense.rs b/vendor/regex-automata/tests/fuzz/dense.rs new file mode 100644 index 0000000..213891b --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/dense.rs @@ -0,0 +1,52 @@ +// This test was found by a fuzzer input that crafted a way to provide +// an invalid serialization of ByteClasses that passed our verification. +// Specifically, the verification step in the deserialization of ByteClasses +// used an iterator that depends on part of the serialized bytes being correct. +// (Specifically, the encoding of the number of classes.) +#[test] +fn invalid_byte_classes() { + let data = include_bytes!( + "testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +#[test] +fn invalid_byte_classes_min() { + let data = include_bytes!( + "testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-dense\x00\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::dense::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/vendor/regex-automata/tests/fuzz/mod.rs b/vendor/regex-automata/tests/fuzz/mod.rs new file mode 100644 index 0000000..960cb42 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/mod.rs @@ -0,0 +1,2 @@ +mod dense; +mod sparse; diff --git a/vendor/regex-automata/tests/fuzz/sparse.rs b/vendor/regex-automata/tests/fuzz/sparse.rs new file mode 100644 index 0000000..837ad10 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/sparse.rs @@ -0,0 +1,132 @@ +// This is a regression test for a bug in how special states are handled. The +// fuzzer found a case where a state returned true for 'is_special_state' but +// *didn't* return true for 'is_dead_state', 'is_quit_state', 'is_match_state', +// 'is_start_state' or 'is_accel_state'. This in turn tripped a debug assertion +// in the core matching loop that requires 'is_special_state' being true to +// imply that one of the other routines returns true. +// +// We fixed this by adding some validation to both dense and sparse DFAs that +// checks that this property is true for every state ID in the DFA. +#[test] +fn invalid_special_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838", + ); + let _ = fuzz_run(data); +} + +// This is an interesting case where a fuzzer generated a DFA with +// a transition to a state ID that decoded as a valid state, but +// where the ID itself did not point to one of the two existing +// states for this particular DFA. This combined with marking this +// transition's state ID as special but without actually making one of the +// 'is_{dead,quit,match,start,accel}_state' predicates return true ended up +// tripping the 'debug_assert(dfa.is_quit_state(sid))' code in the search +// routine. +// +// We fixed this in alloc mode by checking that every transition points to a +// valid state ID. Technically this bug still exists in core-only mode, but +// it's not clear how to fix it. And it's worth pointing out that the search +// routine won't panic in production. It will just provide invalid results. And +// that's acceptable within the contract of DFA::from_bytes. +#[test] +fn transition_to_invalid_but_valid_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9", + ); + let _ = fuzz_run(data); +} + +// Another one caught by the fuzzer where it generated a DFA that reported a +// start state as a match state. Since matches are always delayed by one byte, +// start states specifically cannot be match states. And indeed, the search +// code relies on this. +#[test] +fn start_state_is_not_match_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000", + ); + let _ = fuzz_run(data); +} + +// This is variation on 'transition_to_invalid_but_valid_state', but happens +// to a start state. Namely, the fuzz data here builds a DFA with a start +// state ID that is incorrect but points to a sequence of bytes that satisfies +// state decoding validation. This errant state in turn has a non-zero number +// of transitions, and its those transitions that point to a state that does +// *not* satisfy state decoding validation. But we never checked those. So the +// fix here was to add validation of the transitions off of the start state. +#[test] +fn start_state_has_valid_transitions() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the match state +// ID range, but where the state itself was encoded with zero pattern IDs. We +// added validation code to check this case. +#[test] +fn match_state_inconsistency() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the accelerator +// range, but who didn't have any accelerators. This violated an invariant that +// assumes that if 'dfa.is_accel_state(sid)' returns true, then the state must +// have some accelerators. +#[test] +fn invalid_accelerators() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose EOI transition led to +// a quit state, which is generally considered illegal. Why? Because the EOI +// transition is defined over a special sentinel alphabet element and one +// cannot configure a DFA to "quit" on that sentinel. +#[test] +fn eoi_transition_to_quit_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-sparse\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::sparse::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 new file mode 100644 index 0000000000000000000000000000000000000000..972bfb2cd405c68babf906df77a2ab452c01d10a GIT binary patch literal 1894 zcmd^AOHKnZ41F#IbqO~ZiPh}Nif!6ku;3W%D#2~I23M%WhQmN=JL5VtnTSlq4omsH z_<8Ku$q?`u4}IT*0BpEDbBo;e+*AZq>3}y}so)cM8erW2#Q189@%WvVQ3Uk<;cVF; z(v51j6F-tSXD1X1xY04|W^EA*=;(VKxKGl>OpT%n{KWpm0VPFt{`)FS*u{WJy~0kWN^slEmL_7mdmek zEHOE9VH{E@?O1(Tap^dB|JTBux&L~^Glt7g)e*U_mx(9%GJcIUI>ee%fj8A;!TN`Q Q7L&y7j}re3CkH_|0iGi2$p8QV literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 new file mode 100644 index 0000000000000000000000000000000000000000..72dbdad825d233306b86959937492c336281016e GIT binary patch literal 1882 zcmd^AOHKnZ41F#IbqO~ZiPh}Nif!6ku;3W%D#2~I23M%WhQmN=JL5VtnTSlq4omsH z_<8Ku$q?`u4}IT*0BpEDbBo;e+*AZq>3}y}so)cM8erW2#Q189@%WvVQ3Uk<;cVF; z(v51j6F-tSXD1X1xY04|W^EA*=;(VKxKGl>OpT%n{KWpm0VPFt{`)FS*u{WJy~0kWN^slEmL_7mdmek zEHOE9VH{E@?O1(Tap^dB|JTBux&L~^Glt7g)e*U_mx(9%GJcIUI>ee%fj8A;!TKkF P7Ldg14-x+WC&3d=%n|Cz literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 new file mode 100644 index 0000000000000000000000000000000000000000..5ce508803ef45322bdbd969c864237e04105a6bc GIT binary patch literal 941 zcmWe-WdH+S1||s2QqNQY5dxAda25l@GBEqke<1h?7Y6D!G%z%%g9~6~ur^~AW`Np> zLy}@)Mu-Xj5eBV+F&#i&(}nR+X+}s$FaSOJ4I~5dF_2Z{}%MVF5EZXrn9#@Q_ggzeqKcI4&K>9Wwbx2a#3=HpqTv5^g|KBqJ E0N%YJX#fBK literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 new file mode 100644 index 0000000000000000000000000000000000000000..4fa13fbed47013e22bd6e9c9209ca0e8a61937fb GIT binary patch literal 924 zcmZSHK6fTF1H=FS{|_(#X$D3h1_B^IBR&HN3{ilfp`n2)k_boz7-Yj389<$IF!4Xw zr2ou7^C~MUA)pyZ0mVUp$-1)A7Ag#(fP!p5@nEPCXF)WIg31;sSAPPOQCC^nURha# zPzBZd2TU`9yaECsh5>~6|NpW741Ymvu)uGyTR_DB|At@{K&AkQ4+;!c5QD)1Otv#* zHB1Mya1d>1;>l=y5Em}S(g5T+Fq{CAPymyHMgju^cf;pzFiCQ09+EUNp{PdXfzrod w08n=@!h?x6jshk`oB)z4a7u$EegH|3Is+iS4Hm&e025X{RGOs$$Z!B+0PByNd;kCd literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 new file mode 100644 index 0000000000000000000000000000000000000000..0f809f33f44d17fb4e2d04c100226208e407d109 GIT binary patch literal 933 zcmd;OVEFSN2tNI8V5> zA;$bin6?JSbO14QVLVit5$Y;nXnX_87=nTYNHaD-l*vOV<_rd?`5-=eKrsWw*dPj_ zB&G<8G?oD*FHbdj03yoGBN@)!T4Zv@p-1Nn_%68BTLxF@#eAJ4{9t1YjH-Gz(mO8k{xCfQJAZoJAhv2T+?1lGbD(`wJ2u U$^>R12#YBgn)-p11H*d;040QR*8l(j literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 new file mode 100644 index 0000000000000000000000000000000000000000..aa72eb1dd6123880d4b062ec55f9adc2c0d2539d GIT binary patch literal 728 zcmZQ-V5qG8_y2!=<$opy1_nz91_K}k1pogV{)d4K6fk2t1Y}ggKy$lFTU!l`2hop2 zsvxm(Fd=#;fGI`>1_21a31(y^NT$8*KWhS11}*_}BAhmWvyd3?;cQVLZ~ze?AR&tE zoLDHw2uQPl*bX291f(Gpgk)-9Fl1oRgbD#2%b{_y2zY8Pa!; literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/gen/README.md b/vendor/regex-automata/tests/gen/README.md new file mode 100644 index 0000000..59439a1 --- /dev/null +++ b/vendor/regex-automata/tests/gen/README.md @@ -0,0 +1,65 @@ +This directory contains tests for serialized objects from the regex-automata +crate. Currently, there are only two supported such objects: dense and sparse +DFAs. + +The idea behind these tests is to commit some serialized objects and run some +basic tests by deserializing them and running searches and ensuring they are +correct. We also make sure these are run under Miri, since deserialization is +one of the biggest places where undefined behavior might occur in this crate +(at the time of writing). + +The main thing we're testing is that the *current* code can still deserialize +*old* objects correctly. Generally speaking, compatibility extends to semver +compatible releases of this crate. Beyond that, no promises are made, although +in practice callers can at least depend on errors occurring. (The serialized +format always includes a version number, and incompatible changes increment +that version number such that an error will occur if an unsupported version is +detected.) + +To generate the dense DFAs, I used this command: + +``` +$ regex-cli generate serialize dense regex \ + MULTI_PATTERN_V2 \ + tests/gen/dense/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +And to generate the sparse DFAs, I used this command, which is the same as +above, but with `s/dense/sparse/g`. + +``` +$ regex-cli generate serialize sparse regex \ + MULTI_PATTERN_V2 \ + tests/gen/sparse/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +The idea is to try to enable as many of the DFA's options as possible in order +to test that serialization works for all of them. + +Arguably we should increase test coverage here, but this is a start. Note +that in particular, this does not need to test that serialization and +deserialization correctly roundtrips on its own. Indeed, the normal regex test +suite has a test that does a serialization round trip for every test supported +by DFAs. So that has very good coverage. What we're interested in testing here +is our compatibility promise: do DFAs generated with an older revision of the +code still deserialize correctly? diff --git a/vendor/regex-automata/tests/gen/dense/mod.rs b/vendor/regex-automata/tests/gen/dense/mod.rs new file mode 100644 index 0000000..b4365d4 --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/mod.rs @@ -0,0 +1,22 @@ +use regex_automata::{Input, Match}; + +mod multi_pattern_v2; + +#[test] +fn multi_pattern_v2() { + use multi_pattern_v2::MULTI_PATTERN_V2 as RE; + + assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd")); + assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @")); + assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n")); + assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n")); + assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n")); + assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@")); + assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n")); + + // Fails because we have heuristic support for Unicode word boundaries + // enabled. + assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err()); +} diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs new file mode 100644 index 0000000..a95fd20 --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs @@ -0,0 +1,43 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// regex-cli generate serialize dense regex MULTI_PATTERN_V2 tests/gen/dense/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$ +// +// regex-cli 0.0.1 is available on crates.io. + +use regex_automata::{ + dfa::{dense::DFA, regex::Regex}, + util::{lazy::Lazy, wire::AlignAs}, +}; + +pub static MULTI_PATTERN_V2: Lazy>> = + Lazy::new(|| { + let dfafwd = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_fwd.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_rev.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..6d6e040c36f08157a233d1fa08614f9856fbfa87 GIT binary patch literal 11100 zcmeHN+invv5cTG=n{X%Gp+LE}g#3elB@%%rKx$KoM}L~Xj55cbadw@{N~ytJwMUvB zpYg_?V>{VR)RVK*P5w%O!N)^3+ ze=uA_Sv4^V7<=5YuBlc_kpM%@gP0Xd4%`;m{7b!D_P!Jz0W6PTR*wbfYm03DrCu)kU(+y} zAZSe;IU=ejo6O-Ib0$JK|F!WX*Est+dM~ifviCH1D#&MDeLTrEKKpF+S;)HU{^ed* z_b<8ny>Dom_d?NE#eaP*>g}%km$P2qf5|n@zmCp7>#qCnTcr>K^5fWe4teGEn67dC zk-jSa2kblMuKOR!zUtNfOAb6H-;Sw&lkU3ziaf^WUvl6P`FBkIH|ehXFU`7t$$?8t zL0l=%u~wQoas*AA0tF4SD=-K8hs8rWBBSoiaQcvE(lG{ZH!UviGH3dyhUm z>$v@Aj&+W`$DFD5zs*0+fb(yeBi(iX@?B4nKL_K#=b_#B{;7BMzxC(7Qhfd+ zoy&i!^IvH13;Q3|f^+CAE#--E{x|7d{!^X*$|w)ZOe8sQTXfz(`NroSp8td^&Vj@C zAL`$vv-zibQb4@6`lrbn{#ke3e?dNW1@<5PBh4J?T>ep03KYis!v4E4!Q2Kr|6tC? zI+uUcn*#D%kKF$;uH+iO_i#S^9>hAEf2)!}ytewM*&6G?y6gUTz4y_F{dTOGBc02? zd#_S07e!(DeJ^ZCa~tgZgE=4TT>eu%|FrG@K=HoQ8sOSk=klLw|H)E7-`fj%bE630 z{-D5$kw!T-;eG-dys!QX=^HP9S}*!q7P?NP2r+6Cok%~TJenbDR&j-t;Y zZLn(_>f?ZRKe!ONgC++PgQ1Oyz;XbGwLI#V>!?}Bz>?AwhF)%o8Mxf_Ng}YUgYaL~ Ca~hcd literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..a1f4b3da157c729f7146d7e479d5be9e99564c0d GIT binary patch literal 11100 zcmeHL*=`gu5OnT@n{Y#ba3_TQhyImFEIa{H4v9y9n!k)th$z8#(%4nBVYhu@EfA%wGEAuK})>K?dG!?3ixva-51 zf%T1XY))8;Q9E<~D-bNigRSb18>2|6 z2xvZP9|iL~n2@&xWNj2BYA~w#sJ-NP2S(&<0{sX`8Z1)n!+_dD!4MB7cuYRk$R>49%*sT7|Dc4_p2^x6ap`u#b9A+Une9f(K$5UjiZ%?Gvr_r&V|KV}PtcrYPv3!Jxp z+lN{Oi|}AV-X_rV&$B<3xbF0eD(f$8f$71#R^(n3N$XAwgm#2#{2rn|+4Zk%LAM9=eOw1o9!0pb%_Y&#Wl-(k cvGjAEjYD$`T(`gz!NiFMOkm$M*gly425W{InE(I) literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..74f74ec2a95568ad24c31c70bafe2721122d6e14 GIT binary patch literal 7584 zcmeHL?QRn>5cFLxDJ_&j_$~#?Hznj9{3?+M`~svlmH6pLyCN0i zy{f;0!PX+~L~J+MX~kXO-tPTY>=6$h4u``>kBKM5(}-u!ffxHP8ypg`~7nkBQ=;de2G}jx^xPurDq@iRkAKu@ov*@FBi+ZPG^zY!u!Vq>+>3d~brq7sZ6YP`p9-Yf5~ z$yL0gC-$2I^ylH`Dm7{swfysUN)GUyUqr6K`+e8wN^Vh6AbZLIIqTx@DF?50Gjn?0 zwZ$6Vxn}IQX?5dkiMhC!r3=c~h8RlUC6wnsbvak_?@SgH@I?QG3(8n((5h~7o_DM- z=UV>RZx)pES$eQH&)T9QB0LfEm4FWvW~CepZA<*`IzR5-SwQ>Z*dNog%_ec9D#i{q z)6~5(7TQpEP?EVQ<2oU7vjY^+xiD+&nfX@8w?oc3`0Q?|Gu{g^pHGqI d$#>xAm35(&!ypSn%NiQMsVj%e3S=)u&jI9TR@eXl literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..663bdb9ead53e60f1c085c2a66b7ba12bd773aed GIT binary patch literal 7584 zcmeHL+foxj5S=7k1Vu335f#N7#{5J7N~(-LsZ|iFe4J04FB8w%ogN0V$(G4TM~hQ+ zINd#$?w;8Vfs?b-$?#d#EX|#aCrFY z^*kKCdHe3Y;=}Rr$H8FmX?Ou`|3+_4V389LEVAGtwf>DY7QHj)sz6YP1xxiG*6N)? zwqc54jP^VV)>$wh?8v2 z9vu@bv|wKEGPYcKuDO@SsIBCD10rP+goge#MpZievqbG`Jo3?j74wjJJ3)L8v z&wAxYo$|3v3|Q5Ryvw3`m$8YD8&NezZ6)U$&?9FN=>5-9Cc&7;Sx_rvz93-1R7+LR z;y(iW0s^0Zz=J8qXTF7k`4-fAk$x!%`{sQ{bx@5_*?;BZ^S{X#EVrO%y#*kArrCc~ z+8Uz<{_bOp{Wti6ofv;{kCcW7}9x@M@L3AKDnMl<62_&5UnGe=GXjgL8Lc hoc3>> = + Lazy::new(|| { + let dfafwd = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..aa04f63162709f12d140017f42fdad986ecb4162 GIT binary patch literal 3476 zcmcguy>1gh5Z*iAkMEo~iTMwK1QH;@SmF_=LPtYEZ7G2Y5had9>53{9JOEEYfv6~W z2Rb@x9snhL-|WuWUf&%T5i!#G+nL?@*_oX^A08i#28a8v_TLVM$D=o|hoj+O@8xiC zbTB+T+BcxnGyH^{fr#4}Eiytu*lI=b93|;2wF}BD<(1R{Ei85`X^FJF((CmuT_#;2 zT`lO^b9koG5LuJiX^znI9wjc zG;;#(2u-OtC>I+uPM{OUak^xUCBI>0#~7l_X&k(BKTf$lLJd?QdWyxK9h?hKCCY5f zB}%7Qq*ZK+Fy}C{o=lYI-L4HP>S7CqYyo%&ha%$1^1OpWqFRy_HA{^9)`ggEMN38>mJx` zyd8}jxMM1cL`(8K0C~sDcnKBjwMOJ>(+E0DLdOjibi5jw54+|GsrN`lc;sUso;+SX zHoUG{JU(aIaw?lCD6VT6FYcgw!S3|l_^9q`Q$!`EhL=!M=p54ku3CP)tDKCjU>{75y-p|&@3&XN&THq+v}UduaPqJqu^yVIA(M|D?QGAglD ze5Qy#uO(|3vw3ID@QgGks4}3K2ex;yR>Et{4c+Pcu1!RDDw}!pzgv0n4RE};Yu-d^ z`mgTv)8e7JtE~uQWa={BZKB7Yw<2GxnWBwY8I=LWJP|3O+~*s`%uhFC<^Ket&uh$B zoe=(>M-#4U78K-m6#U$T8v^`SsG=$t|688GVe8D1eM;oN5C6De@XuH4Bli5laXJ{Q qZafE`Oypdj9#jEfQz6)uHRT00Ud2DC#r@p? literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..c27d92abe1d7bccfc30bcfdd12208ec2a27532a3 GIT binary patch literal 3476 zcmchay>1gh5XaY!?ej-sC+0f@2qZv^vBV>|A{`9{wWS0qM3guZWh$yvrqAA3ZkO>h+%V`~9bbb2@H1SF(NZi}KO( zz_~`MM;-6oq&oJPRL73BBe|8U)u=x+NbA3-+stZ0O^#2p#Pq zz%Sw~7CPFNfnzyl(L1UVGWxwXr(A<z*9)u8_jAaU>I$0T{|Y<9y?4&7&4Oa;D2R z@FZxc$)v+KxiDv*4huaioy}2*K3!2$a2l#4h7b&8f}nRR;u1CMbujHbzmm;ROvRwd zAb~+PBTx3#>i0QbQgiO19d8;;%Rtj*kNET8$eT=6_*`;2yJ55v0(@-UGO@X0I z5cG4_{2?{xF^8#_=|IARY)PK%YbHQEA-u1q&WPRbJy}~vOqad$e<*IAOgen$d^!s~ zE1kVsLiFj1I)c+sCGnmDLzy7x=dAf_YTiqcjWr!ec#tj1lSzkf;oZoqdng;e6Tna= z2>O8Zm7z;@7L`1umR_D%U{KQBOru$;{>$$*W#PZcSxO8p8^(tV%*WR!@o(9*^eMcW n@=M{d*z{nQv4y{qf{^{) literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..89867d30f605af4c8ac4294e76d6a9a8023cbfbf GIT binary patch literal 1920 zcmcgs$!^p@5bbVz7JIg2*h7G@1%|;tzyamLH*m!g11AKUnTSYSa^k>8pm63_IP)L; z0O7srF2^%oloL-XS9Q7Tb=P7qk5}t#xp==g$@1g%#}D~B&)&Yvv(=}3xmpQch z7C^lRiMSA07e#TWf^KgU{Q`+c+K2%#9E}?>A$E4B)9IC~#5Ll2fg3l0TYI-X_V@1) zckgZC;Nbp)HavXv`0$D1>1_6FKA%6&E?`W2Vr)@_V4S+0Ow1%WxWk!Rx2<;@bJ|6W z>vVb(Pv$;@WCkC!*d)g~9A6>VUg3ZqnPRj@xuOx`re0{vntb&6bQ<}PN0o1duvdW( zb_6lt6JGX zO$Qh~%$M|$=@fDn?4b1~^?{+bJi&cOU`e+ImhT=?c z3G*8ahoe%C<_EGrg8VLN^+8LKpqU2uZm;E5h^@{Ba&DTn#Ps}KHg0m{ZyfXP{-Msn77fX3YvPb~hBfz1*+EAzy>mHM9&YG#hSe)>3N5 z$$yv{z~>ZutLI6E^2>|EufV;8t$ymV ia4+Rs5Vh6+6Mhx=$R&;8wDDIMKu&q54$*pF)&BrJ9)}?S literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa new file mode 100644 index 0000000000000000000000000000000000000000..c0ca807f8947442d59554983659dd07ccee99898 GIT binary patch literal 1920 zcmchXOKuZE5QclkkMY}yd4~Yu6)j-s;j!{@9L_anV!t%@pSTjauz2i^N$~rc@n>Um&CJA$#gbx?)n%8y1M$1XWj5gvX!TEJA*cv7yeCO6PF!Ww}Odk3HAUFDl8|CJKS zXN|yzuY^w#9&CHgi%r0lZ&h8Z)$7rO7fM4Q_Xu6F(XJ+^UD6t5&GPZZ0K2)}-Uq-e zhGwhP*3;Ab(y4YO^mj>7=apEHL+NGUoQ;Xbe>pOsk`4tI@BQK^V9>V z>w|1Gn%uA5>2&iu{t($%^1GnTho*Ei zaRsO>CG0|wBa6)KwFLozZc>I_NBx(5J_6m`oo6UJ^o=I-S@J8Bnws}gVt#Z{h(*3; bVr%pNl-~p&TnvU^AP><1Cwr@~dQJZW?U;ul literal 0 HcmV?d00001 diff --git a/vendor/regex-automata/tests/hybrid/api.rs b/vendor/regex-automata/tests/hybrid/api.rs new file mode 100644 index 0000000..4b04c4f --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/api.rs @@ -0,0 +1,171 @@ +use std::error::Error; + +use regex_automata::{ + hybrid::dfa::{OverlappingState, DFA}, + nfa::thompson, + HalfMatch, Input, MatchError, +}; + +// Tests that too many cache resets cause the lazy DFA to quit. +// +// We only test this on 64-bit because the test is gingerly crafted based on +// implementation details of cache sizes. It's not a great test because of +// that, but it does check some interesting properties around how positions are +// reported when a search "gives up." +// +// NOTE: If you change something in lazy DFA implementation that causes this +// test to fail by reporting different "gave up" positions, then it's generally +// okay to update the positions in the test below as long as you're sure your +// changes are correct. Namely, it is expected that if there are changes in the +// cache size (or changes in how big things are inside the cache), then its +// utilization may change slightly and thus impact where a search gives up. +// Precisely where a search gives up is not an API guarantee, so changing the +// offsets here is OK. +#[test] +#[cfg(target_pointer_width = "64")] +#[cfg(not(miri))] +fn too_many_cache_resets_cause_quit() -> Result<(), Box> { + // This is a carefully chosen regex. The idea is to pick one that requires + // some decent number of states (hence the bounded repetition). But we + // specifically choose to create a class with an ASCII letter and a + // non-ASCII letter so that we can check that no new states are created + // once the cache is full. Namely, if we fill up the cache on a haystack + // of 'a's, then in order to match one 'β', a new state will need to be + // created since a 'β' is encoded with multiple bytes. + // + // So we proceed by "filling" up the cache by searching a haystack of just + // 'a's. The cache won't have enough room to add enough states to find the + // match (because of the bounded repetition), which should result in it + // giving up before it finds a match. + // + // Since there's now no more room to create states, we search a haystack + // of 'β' and confirm that it gives up immediately. + let pattern = r"[aβ]{99}"; + let dfa = DFA::builder() + .configure( + // Configure it so that we have the minimum cache capacity + // possible. And that if any resets occur, the search quits. + DFA::config() + .skip_cache_capacity_check(true) + .cache_capacity(0) + .minimum_cache_clear_count(Some(0)), + ) + .thompson(thompson::NFA::config()) + .build(pattern)?; + let mut cache = dfa.create_cache(); + + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::gave_up(24); + // Notice that we make the same amount of progress in each search! That's + // because the cache is reused and already has states to handle the first + // N bytes. + assert_eq!( + Err(err.clone()), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + assert_eq!( + Err(err.clone()), + dfa.try_search_overlapping_fwd( + &mut cache, + &Input::new(&haystack), + &mut OverlappingState::start() + ), + ); + + let haystack = "β".repeat(101).into_bytes(); + let err = MatchError::gave_up(2); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + // no need to test that other find routines quit, since we did that above + + // OK, if we reset the cache, then we should be able to create more states + // and make more progress with searching for betas. + cache.reset(&dfa); + let err = MatchError::gave_up(26); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + + // ... switching back to ASCII still makes progress since it just needs to + // set transitions on existing states! + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::gave_up(13); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + + Ok(()) +} + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .build("[[:word:]]+$")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.try_search_fwd(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), + ); + assert_eq!( + dfa.try_search_overlapping_fwd( + &mut cache, + &Input::new(b"abcxyz"), + &mut OverlappingState::start() + ), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.try_search_rev(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + DFA::config().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box> { + let mut config = DFA::config(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = DFA::builder().configure(config).build(r"\b")?; + let mut cache = dfa.create_cache(); + let expected = HalfMatch::must(0, 1); + assert_eq!( + Ok(Some(expected)), + dfa.try_search_fwd(&mut cache, &Input::new(" a")), + ); + Ok(()) +} diff --git a/vendor/regex-automata/tests/hybrid/mod.rs b/vendor/regex-automata/tests/hybrid/mod.rs new file mode 100644 index 0000000..36667d0 --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/mod.rs @@ -0,0 +1,3 @@ +mod api; +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/hybrid/suite.rs b/vendor/regex-automata/tests/hybrid/suite.rs new file mode 100644 index 0000000..4aaca66 --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/suite.rs @@ -0,0 +1,347 @@ +use { + anyhow::Result, + regex_automata::{ + hybrid::{ + dfa::{OverlappingState, DFA}, + regex::{self, Regex}, + }, + nfa::thompson, + util::{prefilter::Prefilter, syntax}, + Anchored, Input, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, untestify_kind}; + +const EXPANSIONS: &[&str] = &["is_match", "find", "which"]; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dfa(DFA::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA with NFA shrinking enabled. +/// +/// This is *usually* not the configuration one wants for a lazy DFA. NFA +/// shrinking is mostly only advantageous when building a full DFA since it +/// can sharply decrease the amount of time determinization takes. But NFA +/// shrinking is itself otherwise fairly expensive currently. Since a lazy DFA +/// has no compilation time (other than for building the NFA of course) before +/// executing a search, it's usually worth it to forgo NFA shrinking. +/// +/// Nevertheless, we test to make sure everything is OK with NFA shrinking. As +/// a bonus, there are some tests we don't need to skip because they now fit in +/// the default cache capacity. +#[test] +fn nfa_shrink() -> Result<()> { + let mut builder = Regex::builder(); + builder.thompson(thompson::Config::new().shrink(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled for all +/// tests. +#[test] +fn starts_for_each_pattern() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().starts_for_each_pattern(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'specialize_start_states' is enabled. +#[test] +fn specialize_start_states() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().specialize_start_states(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when byte classes are disabled. +/// +/// N.B. Disabling byte classes doesn't avoid any indirection at search time. +/// All it does is cause every byte value to be its own distinct equivalence +/// class. +#[test] +fn no_byte_classes() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().byte_classes(false)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests that hybrid NFA/DFA never clears its cache for any test with the +/// default capacity. +/// +/// N.B. If a regex suite test is added that causes the cache to be cleared, +/// then this should just skip that test. (Which can be done by calling the +/// 'blacklist' method on 'TestRunner'.) +#[test] +fn no_cache_clearing() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().minimum_cache_clear_count(Some(0))); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when the minimum cache capacity is set. +#[test] +fn min_cache_capacity() -> Result<()> { + let mut builder = Regex::builder(); + builder + .dfa(DFA::config().cache_capacity(0).skip_cache_capacity_check(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: regex::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + // Parse regexes as HIRs for some analysis below. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + if !test.haystack().is_ascii() { + for hir in hirs.iter() { + if hir.properties().look_set().contains_word_unicode() { + return Ok(CompiledRegex::skip()); + } + } + } + if !configure_regex_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &Regex, + cache: &mut regex::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => { + TestResult::matched(re.is_match(cache, input.earliest(true))) + } + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + TestResult::matches( + re.find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ) + } + SearchKind::Overlapping => { + try_search_overlapping(re, cache, &input).unwrap() + } + }, + "which" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + // There are no "which" APIs for standard searches. + TestResult::skip() + } + SearchKind::Overlapping => { + let dfa = re.forward(); + let cache = cache.as_parts_mut().0; + let mut patset = PatternSet::new(dfa.pattern_len()); + dfa.try_which_overlapping_matches(cache, &input, &mut patset) + .unwrap(); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_regex_builder( + test: &RegexTest, + builder: &mut regex::Builder, +) -> bool { + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + + let mut dfa_config = + DFA::config().match_kind(match_kind).unicode_word_boundary(true); + // When doing an overlapping search, we might try to find the start of each + // match with a custom search routine. In that case, we need to tell the + // reverse search (for the start offset) which pattern to look for. The + // only way that API works is when anchored starting states are compiled + // for each pattern. This does technically also enable it for the forward + // DFA, but we're okay with that. + if test.search_kind() == SearchKind::Overlapping { + dfa_config = dfa_config.starts_for_each_pattern(true); + } + builder + .syntax(config_syntax(test)) + .thompson(config_thompson(test)) + .dfa(dfa_config); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} + +/// Execute an overlapping search, and for each match found, also find its +/// overlapping starting positions. +/// +/// N.B. This routine used to be part of the crate API, but 1) it wasn't clear +/// to me how useful it was and 2) it wasn't clear to me what its semantics +/// should be. In particular, a potentially surprising footgun of this routine +/// that it is worst case *quadratic* in the size of the haystack. Namely, it's +/// possible to report a match at every position, and for every such position, +/// scan all the way to the beginning of the haystack to find the starting +/// position. Typical leftmost non-overlapping searches don't suffer from this +/// because, well, matches can't overlap. So subsequent searches after a match +/// is found don't revisit previously scanned parts of the haystack. +/// +/// Its semantics can be strange for other reasons too. For example, given +/// the regex '.*' and the haystack 'zz', the full set of overlapping matches +/// is: [0, 0], [1, 1], [0, 1], [2, 2], [1, 2], [0, 2]. The ordering of +/// those matches is quite strange, but makes sense when you think about the +/// implementation: an end offset is found left-to-right, and then one or more +/// starting offsets are found right-to-left. +/// +/// Nevertheless, we provide this routine in our test suite because it's +/// useful to test the low level DFA overlapping search and our test suite +/// is written in a way that requires starting offsets. +fn try_search_overlapping( + re: &Regex, + cache: &mut regex::Cache, + input: &Input<'_>, +) -> Result { + let mut matches = vec![]; + let mut fwd_state = OverlappingState::start(); + let (fwd_dfa, rev_dfa) = (re.forward(), re.reverse()); + let (fwd_cache, rev_cache) = cache.as_parts_mut(); + while let Some(end) = { + fwd_dfa.try_search_overlapping_fwd( + fwd_cache, + input, + &mut fwd_state, + )?; + fwd_state.get_match() + } { + let revsearch = input + .clone() + .range(input.start()..end.offset()) + .anchored(Anchored::Pattern(end.pattern())) + .earliest(false); + let mut rev_state = OverlappingState::start(); + while let Some(start) = { + rev_dfa.try_search_overlapping_rev( + rev_cache, + &revsearch, + &mut rev_state, + )?; + rev_state.get_match() + } { + let span = Span { start: start.offset(), end: end.offset() }; + let mat = Match { id: end.pattern().as_usize(), span }; + matches.push(mat); + } + } + Ok(TestResult::matches(matches)) +} diff --git a/vendor/regex-automata/tests/lib.rs b/vendor/regex-automata/tests/lib.rs new file mode 100644 index 0000000..67c979a --- /dev/null +++ b/vendor/regex-automata/tests/lib.rs @@ -0,0 +1,115 @@ +// We have a similar config in the regex-automata crate root. Basically, it is +// just too annoying to deal with dead code when a subset of features is +// enabled. +#![cfg_attr( + not(all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid", + feature = "perf-literal-substring", + feature = "perf-literal-multisubstring", + )), + allow(dead_code, unused_imports, unused_variables) +)] +// Similar deal with Miri. Just let dead code warnings be. +#![cfg_attr(miri, allow(dead_code, unused_imports, unused_variables))] + +#[cfg(any(feature = "dfa-search", feature = "dfa-onepass"))] +mod dfa; +#[cfg(feature = "dfa-search")] +mod fuzz; +#[cfg(feature = "dfa-search")] +mod gen; +#[cfg(feature = "hybrid")] +mod hybrid; +#[cfg(feature = "meta")] +mod meta; +#[cfg(any(feature = "nfa-backtrack", feature = "nfa-pikevm"))] +mod nfa; + +fn suite() -> anyhow::Result { + let _ = env_logger::try_init(); + + let mut tests = regex_test::RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("../../testdata/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("anchored"); + load!("bytes"); + load!("crazy"); + load!("crlf"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("leftmost-all"); + load!("line-terminator"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("substring"); + load!("unicode"); + load!("utf8"); + load!("word-boundary"); + load!("word-boundary-special"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + + Ok(tests) +} + +/// Configure a regex_automata::Input with the given test configuration. +fn create_input<'h>( + test: &'h regex_test::RegexTest, +) -> regex_automata::Input<'h> { + use regex_automata::Anchored; + + let bounds = test.bounds(); + let anchored = if test.anchored() { Anchored::Yes } else { Anchored::No }; + regex_automata::Input::new(test.haystack()) + .range(bounds.start..bounds.end) + .anchored(anchored) +} + +/// Convert capture matches into the test suite's capture values. +/// +/// The given captures must represent a valid match, where the first capturing +/// group has a non-None span. Otherwise this panics. +fn testify_captures( + caps: ®ex_automata::util::captures::Captures, +) -> regex_test::Captures { + assert!(caps.is_match(), "expected captures to represent a match"); + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start, end: m.end }) + }); + // These unwraps are OK because we assume our 'caps' represents a match, + // and a match always gives a non-zero number of groups with the first + // group being non-None. + regex_test::Captures::new(caps.pattern().unwrap().as_usize(), spans) + .unwrap() +} + +/// Convert a test harness match kind to a regex-automata match kind. If +/// regex-automata doesn't support the harness kind, then `None` is returned. +fn untestify_kind( + kind: regex_test::MatchKind, +) -> Option { + match kind { + regex_test::MatchKind::All => Some(regex_automata::MatchKind::All), + regex_test::MatchKind::LeftmostFirst => { + Some(regex_automata::MatchKind::LeftmostFirst) + } + regex_test::MatchKind::LeftmostLongest => None, + } +} diff --git a/vendor/regex-automata/tests/meta/mod.rs b/vendor/regex-automata/tests/meta/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/meta/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/meta/suite.rs b/vendor/regex-automata/tests/meta/suite.rs new file mode 100644 index 0000000..20f97b4 --- /dev/null +++ b/vendor/regex-automata/tests/meta/suite.rs @@ -0,0 +1,200 @@ +use { + anyhow::Result, + regex_automata::{ + meta::{self, Regex}, + util::syntax, + MatchKind, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures}; + +const BLACKLIST: &[&str] = &[ + // These 'earliest' tests are blacklisted because the meta searcher doesn't + // give the same offsets that the test expects. This is legal because the + // 'earliest' routines don't guarantee a particular match offset other + // than "the earliest the regex engine can report a match." Some regex + // engines will quit earlier than others. The backtracker, for example, + // can't really quit before finding the full leftmost-first match. Many of + // the literal searchers also don't have the ability to quit fully or it's + // otherwise not worth doing. (A literal searcher not quitting as early as + // possible usually means looking at a few more bytes. That's no biggie.) + "earliest/", +]; + +/// Tests the default configuration of the meta regex engine. +#[test] +fn default() -> Result<()> { + let builder = Regex::builder(); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA. +#[test] +fn no_dfa() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA and lazy DFA. +#[test] +fn no_dfa_hybrid() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false).hybrid(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA, lazy DFA and one-pass +/// DFA. +#[test] +fn no_dfa_hybrid_onepass() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false).hybrid(false).onepass(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA, lazy DFA, one-pass +/// DFA and backtracker. +#[test] +fn no_dfa_hybrid_onepass_backtrack() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure( + Regex::config() + .dfa(false) + .hybrid(false) + .onepass(false) + .backtrack(false), + ); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: meta::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + if !configure_meta_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + } +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(input)), + "find" => match test.search_kind() { + SearchKind::Earliest => TestResult::matches( + re.find_iter(input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + SearchKind::Leftmost => TestResult::matches( + re.find_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + SearchKind::Overlapping => { + let mut patset = PatternSet::new(re.pattern_len()); + re.which_overlapping_matches(&input, &mut patset); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .captures_iter(input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Leftmost => { + let it = re + .captures_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // There is no overlapping regex API that supports captures. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_meta_builder( + test: &RegexTest, + builder: &mut meta::Builder, +) -> bool { + let match_kind = match test.match_kind() { + regex_test::MatchKind::All => MatchKind::All, + regex_test::MatchKind::LeftmostFirst => MatchKind::LeftmostFirst, + regex_test::MatchKind::LeftmostLongest => return false, + }; + let meta_config = Regex::config() + .match_kind(match_kind) + .utf8_empty(test.utf8()) + .line_terminator(test.line_terminator()); + builder.configure(meta_config).syntax(config_syntax(test)); + true +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/nfa/mod.rs b/vendor/regex-automata/tests/nfa/mod.rs new file mode 100644 index 0000000..3268621 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/mod.rs @@ -0,0 +1 @@ +mod thompson; diff --git a/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs b/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs b/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs new file mode 100644 index 0000000..bce0eef --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs @@ -0,0 +1,213 @@ +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + backtrack::{self, BoundedBacktracker}, + NFA, + }, + util::{prefilter::Prefilter, syntax}, + Input, + }, + regex_test::{ + CompiledRegex, Match, MatchKind, RegexTest, SearchKind, Span, + TestResult, TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures}; + +/// Tests the default configuration of the bounded backtracker. +#[test] +fn default() -> Result<()> { + let builder = BoundedBacktracker::builder(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + // At the time of writing, every regex search in the test suite fits + // into the backtracker's default visited capacity (except for the + // blacklisted tests below). If regexes are added that blow that capacity, + // then they should be blacklisted here. A tempting alternative is to + // automatically skip them by checking the haystack length against + // BoundedBacktracker::max_haystack_len, but that could wind up hiding + // interesting failure modes. e.g., If the visited capacity is somehow + // wrong or smaller than it should be. + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the backtracker with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + // We can always select leftmost-first here because the backtracker + // only supports leftmost-first matching. + let pre = Prefilter::from_hirs_prefix( + regex_automata::MatchKind::LeftmostFirst, + &hirs, + ); + let mut builder = BoundedBacktracker::builder(); + builder.configure(BoundedBacktracker::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), my_compiler).assert(); + Ok(()) +} + +/// Tests the bounded backtracker when its visited capacity is set to its +/// minimum amount. +#[test] +fn min_visited_capacity() -> Result<()> { + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner + .test_iter(suite()?.iter(), move |test, regexes| { + let nfa = NFA::compiler() + .configure(config_thompson(test)) + .syntax(config_syntax(test)) + .build_many(®exes)?; + let mut builder = BoundedBacktracker::builder(); + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + // Setup the bounded backtracker so that its visited capacity is + // the absolute minimum required for the test's haystack. + builder.configure(BoundedBacktracker::config().visited_capacity( + backtrack::min_visited_capacity( + &nfa, + &Input::new(test.haystack()), + ), + )); + + let re = builder.build_from_nfa(nfa)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + }) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: backtrack::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &BoundedBacktracker, + cache: &mut backtrack::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => { + let input = input.earliest(true); + TestResult::matched(re.try_is_match(cache, input).unwrap()) + } + }, + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::matches( + re.try_find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + }, + "captures" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::captures( + re.try_captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|caps| testify_captures(&caps)), + ), + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_backtrack_builder( + test: &RegexTest, + builder: &mut backtrack::Builder, +) -> bool { + match (test.search_kind(), test.match_kind()) { + // For testing the standard search APIs. This is the only supported + // configuration for the backtracker. + (SearchKind::Leftmost, MatchKind::LeftmostFirst) => {} + // Overlapping APIs not supported at all for backtracker. + (SearchKind::Overlapping, _) => return false, + // Backtracking doesn't really support the notion of 'earliest'. + // Namely, backtracking already works by returning as soon as it knows + // it has found a match. It just so happens that this corresponds to + // the standard 'leftmost' formulation. + // + // The 'earliest' definition in this crate does indeed permit this + // behavior, so this is "fine," but our test suite specifically looks + // for the earliest position at which a match is known, which our + // finite automata based regex engines have no problem providing. So + // for backtracking, we just skip these tests. + (SearchKind::Earliest, _) => return false, + // For backtracking, 'all' semantics don't really make sense. + (_, MatchKind::All) => return false, + // Not supported at all in regex-automata. + (_, MatchKind::LeftmostLongest) => return false, + }; + let backtrack_config = BoundedBacktracker::config(); + builder + .configure(backtrack_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/nfa/thompson/mod.rs b/vendor/regex-automata/tests/nfa/thompson/mod.rs new file mode 100644 index 0000000..b2558f7 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/mod.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "nfa-backtrack")] +mod backtrack; +#[cfg(feature = "nfa-pikevm")] +mod pikevm; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs new file mode 100644 index 0000000..d32842a --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs @@ -0,0 +1,162 @@ +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + pikevm::{self, PikeVM}, + }, + util::{prefilter::Prefilter, syntax}, + PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures, untestify_kind}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = PikeVM::builder(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the PikeVM with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = PikeVM::builder(); + builder.configure(PikeVM::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), my_compiler).assert(); + Ok(()) +} + +fn compiler( + mut builder: pikevm::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + if !configure_pikevm_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &PikeVM, + cache: &mut pikevm::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(cache, input)), + "find" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .find_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Leftmost => { + let it = re + .find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Overlapping => { + let mut patset = PatternSet::new(re.get_nfa().pattern_len()); + re.which_overlapping_matches(cache, &input, &mut patset); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .captures_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Leftmost => { + let it = re + .captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // There is no overlapping PikeVM API that supports captures. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_pikevm_builder( + test: &RegexTest, + builder: &mut pikevm::Builder, +) -> bool { + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + let pikevm_config = PikeVM::config().match_kind(match_kind); + builder + .configure(pikevm_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-syntax/.cargo-checksum.json b/vendor/regex-syntax/.cargo-checksum.json index 09ad1af..8152441 100644 --- a/vendor/regex-syntax/.cargo-checksum.json +++ b/vendor/regex-syntax/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"64cd89fcae533a9869fd71526374a61adeb14a0e4fd29eb8b81d6744525a817c","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"67a3e673a9da6826fd4db5be6902841c821b52b98dc22c300f6e327872392b0a","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"a306c08610968d850a4666a9775fb7e8cf6646c02bab766440e6a93ab2e38f58","src/ast/parse.rs":"150b42e944f766fdca70d654dbe32f8a17498432729c78b9eb50b73ae7f91f86","src/ast/print.rs":"d12f2cc75cd62f35623e1eb7a77ab8ac804b971752082700d2c4f550f834b249","src/ast/visitor.rs":"1a7b473147e4f6b89623ef1744a9e87f665bcf160fe08a33ce8e35011811ba71","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"b3c5903a8937d2aff229a3ec65d4571d01ec4d9874c9a242ed6562c32702bcbd","src/hir/interval.rs":"e767fed363bebe4bbda0d78b8f07e73f321eaf4f837e2d7bd14a1617387e9a89","src/hir/literal/mod.rs":"ffe9a0aff7827f97bffd29eb2f4ba96627b16953161dce6c50a2f760e76bbd98","src/hir/mod.rs":"3a6d4e0b9ec221a98fead13ba885c8a1efbbd5601e6445a2e928aa37e7716549","src/hir/print.rs":"651b5d9776532a78612a5f9081372a57bad693890639ac19e3128b4defa96662","src/hir/translate.rs":"c7cd9693f73760263fd49a968714d27e7985ebe840211b2d83bca6686b0602a8","src/hir/visitor.rs":"e5bf7f8c09f6155e59c9d676fe25437f7e3700f9bf5d91101d7e246a64c11d5a","src/lib.rs":"a004f65196dd5745b3112e4acc8c467b18495cecac64a58d6608b35de67371cb","src/parser.rs":"0dfb553a152e008b2755f115663e553ed99c4b8e6a4dcbcad1662737534de49d","src/unicode.rs":"2ad48193433fefbede0837bd645f4288f6b39b1facb59dbb7d541bce7bf19109","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"2a2599a4e406fbbd0efd16aa6ce385c3f97b87c34820d6686a9f9113a5231c67","src/unicode_tables/case_folding_simple.rs":"9583803d4a10486da372b76979dbd26349b40766229467238eff972c1d78e47b","src/unicode_tables/general_category.rs":"36a93ba1cdeed96a00ff29a5ab5afd2c578a89541bf4dd8b18478146cebda0aa","src/unicode_tables/grapheme_cluster_break.rs":"39c388e9805a8391d3d3e69d74d831ce4fb99aa7e13e52c64dd2bd16d4765301","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"a98ea4afe71c2947023ae12bd25c46bf4c7de48eeb40979eca5c96ba62cee02e","src/unicode_tables/perl_space.rs":"ea2b3b84b4a48334082dadc6c37d9fcc9c9ded84b40e8f5c9c9314898638967e","src/unicode_tables/perl_word.rs":"6f1156bd6af32151ecffea4abe07a38fa04b1fc1b227ec1a8dac5d5f08d9d74b","src/unicode_tables/property_bool.rs":"0bd64f6e3228eaecf47824e238bdf1f8a9eef113ace6e790a57f045a8106701c","src/unicode_tables/property_names.rs":"5ca25437927eb70c62adf7d038e99a601cfb8a718677fd6de832589664d3c481","src/unicode_tables/property_values.rs":"5b4cc02392d382cf7af60455fc87b9980e97409b62a4b8d6c5843190d2e2d21d","src/unicode_tables/script.rs":"ea1d771b6d0a4b12d143f9bad2ea9342a0887878cbbe3c11262b6eabedaf2dd4","src/unicode_tables/script_extension.rs":"beeb8349703d903ff861beb8401bfd2599e457dc25df872e69d6ad1615f8b5e9","src/unicode_tables/sentence_break.rs":"2befe2a27cc4e8aecb624e310ef9f371462470dd3b2f572cec1f5873a5e30aa9","src/unicode_tables/word_break.rs":"94679177731b515f0c360eff394286a1f99b59527bdbc826cbf51d32f9666187","src/utf8.rs":"de854b3bfb3f7dbefc422f6a25935aaeef55ead2c35386c712a1fe9bf81a7b6f","test":"8a9bd1bd9fb389e08288f951319a9bbb0d4c5284a2ba63cbdab7f6afa2c2f76e"},"package":"f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"} \ No newline at end of file +{"files":{"Cargo.toml":"33c96af38ed9f42d1ccbf85ecfeea1d46202943d01c595b8ee4dddef760e6bd5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"b2484aa7e66fb92d1378e9a7ce7605af18f77cb12c179866eaf92ba28cfec1d9","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"700c2f779fccb529db7b444819d53c38f916b065d3d05a74282f929af581e8b1","src/ast/parse.rs":"fcd45146eaf747d15a2a519d34754638d451ab83e88b5962841cf7a0dd32e988","src/ast/print.rs":"99cb69ece252ef31e0be177fb3364797eb30b785f936532b8dcd8106e7be0738","src/ast/visitor.rs":"f0fdf758801fe70e6b299b73ab63196e814af95ef6eccad7ef4f72075743fcf6","src/debug.rs":"7a16cca02be9715fdc8c26a32279465774623cd12fab1ec59ac25a6e3047817f","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"01a67e3407b0d0d869119363e47a94d92158834bfe5936366c2e3f6f4ed13f36","src/hir/interval.rs":"2358e74b4d4aabfa62f79df855fd5d183779b86c4e14aae4ee42d8695bb3d010","src/hir/literal.rs":"6a8108b8919fbfd9ab93072846124c51d2998489810fcd6e7a89fdccc45833e0","src/hir/mod.rs":"eca183b8e173f486c1a11a5fa10895c96067162c8ec936871f937ca7fca5f710","src/hir/print.rs":"ad51c515c933bfd67d307ba3d7e6ac59c9c5903b4f393a9f9a4785c92b88348d","src/hir/translate.rs":"5fbff527c53f217ba2bac9b0948d7de74164625d08674b91a479ced271159ebd","src/hir/visitor.rs":"71ca9c93aa48a5ed445399659fa6455093a1bbd9ef44b66bc7095c1b08b2ec1f","src/lib.rs":"5ae457d402e49443bdb23b71353693dd3b0d263b57a6eeb9eb5b5dae5c901bdd","src/parser.rs":"6b2f4f27e3331a01a25b87c89368dd2e54396bd425dac57941f9c1ebfd238ac8","src/rank.rs":"ff3d58b0cc5ffa69e2e8c56fc7d9ef41dd399d59a639a253a51551b858cb5bbd","src/unicode.rs":"9829458ef321b3bc22c21eae4b22805b33f8b5e67022928ffd9a9e0287bc7c31","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"2a2599a4e406fbbd0efd16aa6ce385c3f97b87c34820d6686a9f9113a5231c67","src/unicode_tables/case_folding_simple.rs":"9583803d4a10486da372b76979dbd26349b40766229467238eff972c1d78e47b","src/unicode_tables/general_category.rs":"36a93ba1cdeed96a00ff29a5ab5afd2c578a89541bf4dd8b18478146cebda0aa","src/unicode_tables/grapheme_cluster_break.rs":"39c388e9805a8391d3d3e69d74d831ce4fb99aa7e13e52c64dd2bd16d4765301","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"a98ea4afe71c2947023ae12bd25c46bf4c7de48eeb40979eca5c96ba62cee02e","src/unicode_tables/perl_space.rs":"ea2b3b84b4a48334082dadc6c37d9fcc9c9ded84b40e8f5c9c9314898638967e","src/unicode_tables/perl_word.rs":"6f1156bd6af32151ecffea4abe07a38fa04b1fc1b227ec1a8dac5d5f08d9d74b","src/unicode_tables/property_bool.rs":"0bd64f6e3228eaecf47824e238bdf1f8a9eef113ace6e790a57f045a8106701c","src/unicode_tables/property_names.rs":"5ca25437927eb70c62adf7d038e99a601cfb8a718677fd6de832589664d3c481","src/unicode_tables/property_values.rs":"5b4cc02392d382cf7af60455fc87b9980e97409b62a4b8d6c5843190d2e2d21d","src/unicode_tables/script.rs":"ea1d771b6d0a4b12d143f9bad2ea9342a0887878cbbe3c11262b6eabedaf2dd4","src/unicode_tables/script_extension.rs":"beeb8349703d903ff861beb8401bfd2599e457dc25df872e69d6ad1615f8b5e9","src/unicode_tables/sentence_break.rs":"2befe2a27cc4e8aecb624e310ef9f371462470dd3b2f572cec1f5873a5e30aa9","src/unicode_tables/word_break.rs":"94679177731b515f0c360eff394286a1f99b59527bdbc826cbf51d32f9666187","src/utf8.rs":"e9a13623a94295b81969c5483de17219ff74bb20768be13c527010351245acbd","test":"c7de5fbc0010d9b5b758cd49956375a64b88601c068167fd366808950257f108"},"package":"c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"} \ No newline at end of file diff --git a/vendor/regex-syntax/Cargo.toml b/vendor/regex-syntax/Cargo.toml index 72e9856..3602ab3 100644 --- a/vendor/regex-syntax/Cargo.toml +++ b/vendor/regex-syntax/Cargo.toml @@ -10,19 +10,39 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" +rust-version = "1.65" name = "regex-syntax" -version = "0.6.29" -authors = ["The Rust Project Developers"] +version = "0.8.2" +authors = [ + "The Rust Project Developers", + "Andrew Gallant ", +] description = "A regular expression parser." -homepage = "https://github.com/rust-lang/regex" documentation = "https://docs.rs/regex-syntax" readme = "README.md" license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/regex" +repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + +[dependencies.arbitrary] +version = "1.3.0" +features = ["derive"] +optional = true [features] -default = ["unicode"] +arbitrary = ["dep:arbitrary"] +default = [ + "std", + "unicode", +] +std = [] unicode = [ "unicode-age", "unicode-bool", diff --git a/vendor/regex-syntax/README.md b/vendor/regex-syntax/README.md index 592f842..529513b 100644 --- a/vendor/regex-syntax/README.md +++ b/vendor/regex-syntax/README.md @@ -4,7 +4,6 @@ This crate provides a robust regular expression parser. [![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) [![Crates.io](https://img.shields.io/crates/v/regex-syntax.svg)](https://crates.io/crates/regex-syntax) -[![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) ### Documentation @@ -30,13 +29,12 @@ concrete syntax that produced the `Hir`. This example shows how to parse a pattern string into its HIR: ```rust -use regex_syntax::Parser; -use regex_syntax::hir::{self, Hir}; +use regex_syntax::{hir::Hir, parse}; -let hir = Parser::new().parse("a|b").unwrap(); +let hir = parse("a|b").unwrap(); assert_eq!(hir, Hir::alternation(vec![ - Hir::literal(hir::Literal::Unicode('a')), - Hir::literal(hir::Literal::Unicode('b')), + Hir::literal("a".as_bytes()), + Hir::literal("b".as_bytes()), ])); ``` diff --git a/vendor/regex-syntax/src/ast/mod.rs b/vendor/regex-syntax/src/ast/mod.rs index 9db9afa..6a77ee1 100644 --- a/vendor/regex-syntax/src/ast/mod.rs +++ b/vendor/regex-syntax/src/ast/mod.rs @@ -2,9 +2,9 @@ Defines an abstract syntax for regular expressions. */ -use std::cmp::Ordering; -use std::error; -use std::fmt; +use core::cmp::Ordering; + +use alloc::{boxed::Box, string::String, vec, vec::Vec}; pub use crate::ast::visitor::{visit, Visitor}; @@ -20,6 +20,7 @@ mod visitor; /// valid Unicode property name. That particular error is reported when /// translating an AST to the high-level intermediate representation (`HIR`). #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Error { /// The kind of error. kind: ErrorKind, @@ -65,7 +66,12 @@ impl Error { } /// The type of an error that occurred while building an AST. +/// +/// This error type is marked as `non_exhaustive`. This means that adding a +/// new variant is not considered a breaking change. +#[non_exhaustive] #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ErrorKind { /// The capturing group limit was exceeded. /// @@ -156,6 +162,18 @@ pub enum ErrorKind { /// `(?i)*`. It is, however, possible to create a repetition operating on /// an empty sub-expression. For example, `()*` is still considered valid. RepetitionMissing, + /// The special word boundary syntax, `\b{something}`, was used, but + /// either EOF without `}` was seen, or an invalid character in the + /// braces was seen. + SpecialWordBoundaryUnclosed, + /// The special word boundary syntax, `\b{something}`, was used, but + /// `something` was not recognized as a valid word boundary kind. + SpecialWordBoundaryUnrecognized, + /// The syntax `\b{` was observed, but afterwards the end of the pattern + /// was observed without being able to tell whether it was meant to be a + /// bounded repetition on the `\b` or the beginning of a special word + /// boundary assertion. + SpecialWordOrRepetitionUnexpectedEof, /// The Unicode class is not valid. This typically occurs when a `\p` is /// followed by something other than a `{`. UnicodeClassInvalid, @@ -169,71 +187,26 @@ pub enum ErrorKind { /// `(? &str { - use self::ErrorKind::*; - match self.kind { - CaptureLimitExceeded => "capture group limit exceeded", - ClassEscapeInvalid => "invalid escape sequence in character class", - ClassRangeInvalid => "invalid character class range", - ClassRangeLiteral => "invalid range boundary, must be a literal", - ClassUnclosed => "unclosed character class", - DecimalEmpty => "empty decimal literal", - DecimalInvalid => "invalid decimal literal", - EscapeHexEmpty => "empty hexadecimal literal", - EscapeHexInvalid => "invalid hexadecimal literal", - EscapeHexInvalidDigit => "invalid hexadecimal digit", - EscapeUnexpectedEof => "unexpected eof (escape sequence)", - EscapeUnrecognized => "unrecognized escape sequence", - FlagDanglingNegation => "dangling flag negation operator", - FlagDuplicate { .. } => "duplicate flag", - FlagRepeatedNegation { .. } => "repeated negation", - FlagUnexpectedEof => "unexpected eof (flag)", - FlagUnrecognized => "unrecognized flag", - GroupNameDuplicate { .. } => "duplicate capture group name", - GroupNameEmpty => "empty capture group name", - GroupNameInvalid => "invalid capture group name", - GroupNameUnexpectedEof => "unclosed capture group name", - GroupUnclosed => "unclosed group", - GroupUnopened => "unopened group", - NestLimitExceeded(_) => "nest limit exceeded", - RepetitionCountInvalid => "invalid repetition count range", - RepetitionCountUnclosed => "unclosed counted repetition", - RepetitionMissing => "repetition operator missing expression", - UnicodeClassInvalid => "invalid Unicode character class", - UnsupportedBackreference => "backreferences are not supported", - UnsupportedLookAround => "look-around is not supported", - _ => unreachable!(), - } - } -} +#[cfg(feature = "std")] +impl std::error::Error for Error {} -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { crate::error::Formatter::from(self).fmt(f) } } -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use self::ErrorKind::*; match *self { CaptureLimitExceeded => write!( f, "exceeded the maximum number of \ capturing groups ({})", - ::std::u32::MAX + u32::MAX ), ClassEscapeInvalid => { write!(f, "invalid escape sequence found in character class") @@ -299,6 +272,29 @@ impl fmt::Display for ErrorKind { RepetitionMissing => { write!(f, "repetition operator missing expression") } + SpecialWordBoundaryUnclosed => { + write!( + f, + "special word boundary assertion is either \ + unclosed or contains an invalid character", + ) + } + SpecialWordBoundaryUnrecognized => { + write!( + f, + "unrecognized special word boundary assertion, \ + valid choices are: start, end, start-half \ + or end-half", + ) + } + SpecialWordOrRepetitionUnexpectedEof => { + write!( + f, + "found either the beginning of a special word \ + boundary or a bounded repetition on a \\b with \ + an opening brace, but no closing brace", + ) + } UnicodeClassInvalid => { write!(f, "invalid Unicode character class") } @@ -310,7 +306,6 @@ impl fmt::Display for ErrorKind { "look-around, including look-ahead and look-behind, \ is not supported" ), - _ => unreachable!(), } } } @@ -320,6 +315,7 @@ impl fmt::Display for ErrorKind { /// All span positions are absolute byte offsets that can be used on the /// original regular expression that was parsed. #[derive(Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Span { /// The start byte offset. pub start: Position, @@ -327,8 +323,8 @@ pub struct Span { pub end: Position, } -impl fmt::Debug for Span { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Span { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "Span({:?}, {:?})", self.start, self.end) } } @@ -350,6 +346,7 @@ impl PartialOrd for Span { /// A position encodes one half of a span, and include the byte offset, line /// number and column number. #[derive(Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Position { /// The absolute offset of this position, starting at `0` from the /// beginning of the regular expression pattern string. @@ -360,8 +357,8 @@ pub struct Position { pub column: usize, } -impl fmt::Debug for Position { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Position { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Position(o: {:?}, l: {:?}, c: {:?})", @@ -438,6 +435,7 @@ impl Position { /// comment contains a span of precisely where it occurred in the original /// regular expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct WithComments { /// The actual ast. pub ast: Ast, @@ -450,6 +448,7 @@ pub struct WithComments { /// A regular expression can only contain comments when the `x` flag is /// enabled. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Comment { /// The span of this comment, including the beginning `#` and ending `\n`. pub span: Span, @@ -466,31 +465,97 @@ pub struct Comment { /// This type defines its own destructor that uses constant stack space and /// heap space proportional to the size of the `Ast`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Ast { /// An empty regex that matches everything. - Empty(Span), + Empty(Box), /// A set of flags, e.g., `(?is)`. - Flags(SetFlags), + Flags(Box), /// A single character literal, which includes escape sequences. - Literal(Literal), + Literal(Box), /// The "any character" class. - Dot(Span), + Dot(Box), /// A single zero-width assertion. - Assertion(Assertion), - /// A single character class. This includes all forms of character classes - /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`. - Class(Class), + Assertion(Box), + /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`. + ClassUnicode(Box), + /// A single perl character class, e.g., `\d` or `\W`. + ClassPerl(Box), + /// A single bracketed character class set, which may contain zero or more + /// character ranges and/or zero or more nested classes. e.g., + /// `[a-zA-Z\pL]`. + ClassBracketed(Box), /// A repetition operator applied to an arbitrary regular expression. - Repetition(Repetition), + Repetition(Box), /// A grouped regular expression. - Group(Group), + Group(Box), /// An alternation of regular expressions. - Alternation(Alternation), + Alternation(Box), /// A concatenation of regular expressions. - Concat(Concat), + Concat(Box), } impl Ast { + /// Create an "empty" AST item. + pub fn empty(span: Span) -> Ast { + Ast::Empty(Box::new(span)) + } + + /// Create a "flags" AST item. + pub fn flags(e: SetFlags) -> Ast { + Ast::Flags(Box::new(e)) + } + + /// Create a "literal" AST item. + pub fn literal(e: Literal) -> Ast { + Ast::Literal(Box::new(e)) + } + + /// Create a "dot" AST item. + pub fn dot(span: Span) -> Ast { + Ast::Dot(Box::new(span)) + } + + /// Create a "assertion" AST item. + pub fn assertion(e: Assertion) -> Ast { + Ast::Assertion(Box::new(e)) + } + + /// Create a "Unicode class" AST item. + pub fn class_unicode(e: ClassUnicode) -> Ast { + Ast::ClassUnicode(Box::new(e)) + } + + /// Create a "Perl class" AST item. + pub fn class_perl(e: ClassPerl) -> Ast { + Ast::ClassPerl(Box::new(e)) + } + + /// Create a "bracketed class" AST item. + pub fn class_bracketed(e: ClassBracketed) -> Ast { + Ast::ClassBracketed(Box::new(e)) + } + + /// Create a "repetition" AST item. + pub fn repetition(e: Repetition) -> Ast { + Ast::Repetition(Box::new(e)) + } + + /// Create a "group" AST item. + pub fn group(e: Group) -> Ast { + Ast::Group(Box::new(e)) + } + + /// Create a "alternation" AST item. + pub fn alternation(e: Alternation) -> Ast { + Ast::Alternation(Box::new(e)) + } + + /// Create a "concat" AST item. + pub fn concat(e: Concat) -> Ast { + Ast::Concat(Box::new(e)) + } + /// Return the span of this abstract syntax tree. pub fn span(&self) -> &Span { match *self { @@ -499,7 +564,9 @@ impl Ast { Ast::Literal(ref x) => &x.span, Ast::Dot(ref span) => span, Ast::Assertion(ref x) => &x.span, - Ast::Class(ref x) => x.span(), + Ast::ClassUnicode(ref x) => &x.span, + Ast::ClassPerl(ref x) => &x.span, + Ast::ClassBracketed(ref x) => &x.span, Ast::Repetition(ref x) => &x.span, Ast::Group(ref x) => &x.span, Ast::Alternation(ref x) => &x.span, @@ -523,8 +590,10 @@ impl Ast { | Ast::Flags(_) | Ast::Literal(_) | Ast::Dot(_) - | Ast::Assertion(_) => false, - Ast::Class(_) + | Ast::Assertion(_) + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) => false, + Ast::ClassBracketed(_) | Ast::Repetition(_) | Ast::Group(_) | Ast::Alternation(_) @@ -541,8 +610,8 @@ impl Ast { /// /// This implementation uses constant stack space and heap space proportional /// to the size of the `Ast`. -impl fmt::Display for Ast { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Ast { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use crate::ast::print::Printer; Printer::new().print(self, f) } @@ -550,6 +619,7 @@ impl fmt::Display for Ast { /// An alternation of regular expressions. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Alternation { /// The span of this alternation. pub span: Span, @@ -560,20 +630,21 @@ pub struct Alternation { impl Alternation { /// Return this alternation as an AST. /// - /// If this alternation contains zero ASTs, then Ast::Empty is - /// returned. If this alternation contains exactly 1 AST, then the - /// corresponding AST is returned. Otherwise, Ast::Alternation is returned. + /// If this alternation contains zero ASTs, then `Ast::empty` is returned. + /// If this alternation contains exactly 1 AST, then the corresponding AST + /// is returned. Otherwise, `Ast::alternation` is returned. pub fn into_ast(mut self) -> Ast { match self.asts.len() { - 0 => Ast::Empty(self.span), + 0 => Ast::empty(self.span), 1 => self.asts.pop().unwrap(), - _ => Ast::Alternation(self), + _ => Ast::alternation(self), } } } /// A concatenation of regular expressions. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Concat { /// The span of this concatenation. pub span: Span, @@ -584,14 +655,14 @@ pub struct Concat { impl Concat { /// Return this concatenation as an AST. /// - /// If this concatenation contains zero ASTs, then Ast::Empty is - /// returned. If this concatenation contains exactly 1 AST, then the - /// corresponding AST is returned. Otherwise, Ast::Concat is returned. + /// If this alternation contains zero ASTs, then `Ast::empty` is returned. + /// If this alternation contains exactly 1 AST, then the corresponding AST + /// is returned. Otherwise, `Ast::concat` is returned. pub fn into_ast(mut self) -> Ast { match self.asts.len() { - 0 => Ast::Empty(self.span), + 0 => Ast::empty(self.span), 1 => self.asts.pop().unwrap(), - _ => Ast::Concat(self), + _ => Ast::concat(self), } } } @@ -602,6 +673,7 @@ impl Concat { /// represented in their literal form, e.g., `a` or in their escaped form, /// e.g., `\x61`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Literal { /// The span of this literal. pub span: Span, @@ -615,23 +687,27 @@ impl Literal { /// If this literal was written as a `\x` hex escape, then this returns /// the corresponding byte value. Otherwise, this returns `None`. pub fn byte(&self) -> Option { - let short_hex = LiteralKind::HexFixed(HexLiteralKind::X); - if self.c as u32 <= 255 && self.kind == short_hex { - Some(self.c as u8) - } else { - None + match self.kind { + LiteralKind::HexFixed(HexLiteralKind::X) => { + u8::try_from(self.c).ok() + } + _ => None, } } } /// The kind of a single literal expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum LiteralKind { /// The literal is written verbatim, e.g., `a` or `☃`. Verbatim, - /// The literal is written as an escape because it is punctuation, e.g., - /// `\*` or `\[`. - Punctuation, + /// The literal is written as an escape because it is otherwise a special + /// regex meta character, e.g., `\*` or `\[`. + Meta, + /// The literal is written as an escape despite the fact that the escape is + /// unnecessary, e.g., `\%` or `\/`. + Superfluous, /// The literal is written as an octal escape, e.g., `\141`. Octal, /// The literal is written as a hex code with a fixed number of digits @@ -652,6 +728,7 @@ pub enum LiteralKind { /// A special literal is a special escape sequence recognized by the regex /// parser, e.g., `\f` or `\n`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum SpecialLiteralKind { /// Bell, spelled `\a` (`\x07`). Bell, @@ -676,6 +753,7 @@ pub enum SpecialLiteralKind { /// differ when used without brackets in the number of hex digits that must /// follow. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum HexLiteralKind { /// A `\x` prefix. When used without brackets, this form is limited to /// two digits. @@ -701,32 +779,9 @@ impl HexLiteralKind { } } -/// A single character class expression. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Class { - /// A Unicode character class, e.g., `\pL` or `\p{Greek}`. - Unicode(ClassUnicode), - /// A perl character class, e.g., `\d` or `\W`. - Perl(ClassPerl), - /// A bracketed character class set, which may contain zero or more - /// character ranges and/or zero or more nested classes. e.g., - /// `[a-zA-Z\pL]`. - Bracketed(ClassBracketed), -} - -impl Class { - /// Return the span of this character class. - pub fn span(&self) -> &Span { - match *self { - Class::Perl(ref x) => &x.span, - Class::Unicode(ref x) => &x.span, - Class::Bracketed(ref x) => &x.span, - } - } -} - /// A Perl character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassPerl { /// The span of this class. pub span: Span, @@ -739,6 +794,7 @@ pub struct ClassPerl { /// The available Perl character classes. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassPerlKind { /// Decimal numbers. Digit, @@ -750,6 +806,7 @@ pub enum ClassPerlKind { /// An ASCII character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassAscii { /// The span of this class. pub span: Span, @@ -762,6 +819,7 @@ pub struct ClassAscii { /// The available ASCII character classes. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassAsciiKind { /// `[0-9A-Za-z]` Alnum, @@ -825,6 +883,7 @@ impl ClassAsciiKind { /// A Unicode character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassUnicode { /// The span of this class. pub span: Span, @@ -877,8 +936,156 @@ pub enum ClassUnicodeKind { }, } +#[cfg(feature = "arbitrary")] +impl arbitrary::Arbitrary<'_> for ClassUnicodeKind { + fn arbitrary( + u: &mut arbitrary::Unstructured, + ) -> arbitrary::Result { + #[cfg(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + ))] + { + use alloc::string::ToString; + + use super::unicode_tables::{ + property_names::PROPERTY_NAMES, + property_values::PROPERTY_VALUES, + }; + + match u.choose_index(3)? { + 0 => { + let all = PROPERTY_VALUES + .iter() + .flat_map(|e| e.1.iter()) + .filter(|(name, _)| name.len() == 1) + .count(); + let idx = u.choose_index(all)?; + let value = PROPERTY_VALUES + .iter() + .flat_map(|e| e.1.iter()) + .take(idx + 1) + .last() + .unwrap() + .0 + .chars() + .next() + .unwrap(); + Ok(ClassUnicodeKind::OneLetter(value)) + } + 1 => { + let all = PROPERTY_VALUES + .iter() + .map(|e| e.1.len()) + .sum::() + + PROPERTY_NAMES.len(); + let idx = u.choose_index(all)?; + let name = PROPERTY_VALUES + .iter() + .flat_map(|e| e.1.iter()) + .chain(PROPERTY_NAMES) + .map(|(_, e)| e) + .take(idx + 1) + .last() + .unwrap(); + Ok(ClassUnicodeKind::Named(name.to_string())) + } + 2 => { + let all = PROPERTY_VALUES + .iter() + .map(|e| e.1.len()) + .sum::(); + let idx = u.choose_index(all)?; + let (prop, value) = PROPERTY_VALUES + .iter() + .flat_map(|e| { + e.1.iter().map(|(_, value)| (e.0, value)) + }) + .take(idx + 1) + .last() + .unwrap(); + Ok(ClassUnicodeKind::NamedValue { + op: u.arbitrary()?, + name: prop.to_string(), + value: value.to_string(), + }) + } + _ => unreachable!("index chosen is impossible"), + } + } + #[cfg(not(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + )))] + { + match u.choose_index(3)? { + 0 => Ok(ClassUnicodeKind::OneLetter(u.arbitrary()?)), + 1 => Ok(ClassUnicodeKind::Named(u.arbitrary()?)), + 2 => Ok(ClassUnicodeKind::NamedValue { + op: u.arbitrary()?, + name: u.arbitrary()?, + value: u.arbitrary()?, + }), + _ => unreachable!("index chosen is impossible"), + } + } + } + + fn size_hint(depth: usize) -> (usize, Option) { + #[cfg(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + ))] + { + arbitrary::size_hint::and_all(&[ + usize::size_hint(depth), + usize::size_hint(depth), + arbitrary::size_hint::or( + (0, Some(0)), + ClassUnicodeOpKind::size_hint(depth), + ), + ]) + } + #[cfg(not(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + )))] + { + arbitrary::size_hint::and( + usize::size_hint(depth), + arbitrary::size_hint::or_all(&[ + char::size_hint(depth), + String::size_hint(depth), + arbitrary::size_hint::and_all(&[ + String::size_hint(depth), + String::size_hint(depth), + ClassUnicodeOpKind::size_hint(depth), + ]), + ]), + ) + } + } +} + /// The type of op used in a Unicode character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassUnicodeOpKind { /// A property set to a specific value, e.g., `\p{scx=Katakana}`. Equal, @@ -901,6 +1108,7 @@ impl ClassUnicodeOpKind { /// A bracketed character class, e.g., `[a-z0-9]`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassBracketed { /// The span of this class. pub span: Span, @@ -919,6 +1127,7 @@ pub struct ClassBracketed { /// items (literals, ranges, other bracketed classes) or a tree of binary set /// operations. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassSet { /// An item, which can be a single literal, range, nested character class /// or a union of items. @@ -952,6 +1161,7 @@ impl ClassSet { /// A single component of a character class set. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassSetItem { /// An empty item. /// @@ -995,6 +1205,7 @@ impl ClassSetItem { /// A single character class range in a set. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassSetRange { /// The span of this range. pub span: Span, @@ -1016,6 +1227,7 @@ impl ClassSetRange { /// A union of items inside a character class set. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassSetUnion { /// The span of the items in this operation. e.g., the `a-z0-9` in /// `[^a-z0-9]` @@ -1060,6 +1272,7 @@ impl ClassSetUnion { /// A Unicode character class set operation. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassSetBinaryOp { /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`. pub span: Span, @@ -1077,6 +1290,7 @@ pub struct ClassSetBinaryOp { /// explicit union operator. Concatenation inside a character class corresponds /// to the union operation. #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassSetBinaryOpKind { /// The intersection of two sets, e.g., `\pN&&[a-z]`. Intersection, @@ -1090,6 +1304,7 @@ pub enum ClassSetBinaryOpKind { /// A single zero-width assertion. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Assertion { /// The span of this assertion. pub span: Span, @@ -1099,6 +1314,7 @@ pub struct Assertion { /// An assertion kind. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum AssertionKind { /// `^` StartLine, @@ -1112,10 +1328,23 @@ pub enum AssertionKind { WordBoundary, /// `\B` NotWordBoundary, + /// `\b{start}` + WordBoundaryStart, + /// `\b{end}` + WordBoundaryEnd, + /// `\<` (alias for `\b{start}`) + WordBoundaryStartAngle, + /// `\>` (alias for `\b{end}`) + WordBoundaryEndAngle, + /// `\b{start-half}` + WordBoundaryStartHalf, + /// `\b{end-half}` + WordBoundaryEndHalf, } /// A repetition operation applied to a regular expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Repetition { /// The span of this operation. pub span: Span, @@ -1129,6 +1358,7 @@ pub struct Repetition { /// The repetition operator itself. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct RepetitionOp { /// The span of this operator. This includes things like `+`, `*?` and /// `{m,n}`. @@ -1139,6 +1369,7 @@ pub struct RepetitionOp { /// The kind of a repetition operator. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum RepetitionKind { /// `?` ZeroOrOne, @@ -1152,6 +1383,7 @@ pub enum RepetitionKind { /// A range repetition operator. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum RepetitionRange { /// `{m}` Exactly(u32), @@ -1181,6 +1413,7 @@ impl RepetitionRange { /// contains a sub-expression, e.g., `(a)`, `(?Pa)`, `(?:a)` and /// `(?is:a)`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Group { /// The span of this group. pub span: Span, @@ -1203,7 +1436,7 @@ impl Group { /// Returns true if and only if this group is capturing. pub fn is_capturing(&self) -> bool { match self.kind { - GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true, + GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true, GroupKind::NonCapturing(_) => false, } } @@ -1214,7 +1447,7 @@ impl Group { pub fn capture_index(&self) -> Option { match self.kind { GroupKind::CaptureIndex(i) => Some(i), - GroupKind::CaptureName(ref x) => Some(x.index), + GroupKind::CaptureName { ref name, .. } => Some(name.index), GroupKind::NonCapturing(_) => None, } } @@ -1222,11 +1455,17 @@ impl Group { /// The kind of a group. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum GroupKind { /// `(a)` CaptureIndex(u32), - /// `(?Pa)` - CaptureName(CaptureName), + /// `(?a)` or `(?Pa)` + CaptureName { + /// True if the `?P<` syntax is used and false if the `?<` syntax is used. + starts_with_p: bool, + /// The capture name. + name: CaptureName, + }, /// `(?:a)` and `(?i:a)` NonCapturing(Flags), } @@ -1245,8 +1484,38 @@ pub struct CaptureName { pub index: u32, } +#[cfg(feature = "arbitrary")] +impl arbitrary::Arbitrary<'_> for CaptureName { + fn arbitrary( + u: &mut arbitrary::Unstructured, + ) -> arbitrary::Result { + let len = u.arbitrary_len::()?; + if len == 0 { + return Err(arbitrary::Error::NotEnoughData); + } + let mut name: String = String::new(); + for _ in 0..len { + let ch: char = u.arbitrary()?; + let cp = u32::from(ch); + let ascii_letter_offset = u8::try_from(cp % 26).unwrap(); + let ascii_letter = b'a' + ascii_letter_offset; + name.push(char::from(ascii_letter)); + } + Ok(CaptureName { span: u.arbitrary()?, name, index: u.arbitrary()? }) + } + + fn size_hint(depth: usize) -> (usize, Option) { + arbitrary::size_hint::and_all(&[ + Span::size_hint(depth), + usize::size_hint(depth), + u32::size_hint(depth), + ]) + } +} + /// A group of flags that is not applied to a particular regular expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct SetFlags { /// The span of these flags, including the grouping parentheses. pub span: Span, @@ -1258,6 +1527,7 @@ pub struct SetFlags { /// /// This corresponds only to the sequence of flags themselves, e.g., `is-u`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Flags { /// The span of this group of flags. pub span: Span, @@ -1310,6 +1580,7 @@ impl Flags { /// A single item in a group of flags. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct FlagsItem { /// The span of this item. pub span: Span, @@ -1319,6 +1590,7 @@ pub struct FlagsItem { /// The kind of an item in a group of flags. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum FlagsItemKind { /// A negation operator applied to all subsequent flags in the enclosing /// group. @@ -1339,6 +1611,7 @@ impl FlagsItemKind { /// A single flag. #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Flag { /// `i` CaseInsensitive, @@ -1350,6 +1623,8 @@ pub enum Flag { SwapGreed, /// `u` Unicode, + /// `R` + CRLF, /// `x` IgnoreWhitespace, } @@ -1358,7 +1633,7 @@ pub enum Flag { /// space but heap space proportional to the depth of the `Ast`. impl Drop for Ast { fn drop(&mut self) { - use std::mem; + use core::mem; match *self { Ast::Empty(_) @@ -1366,8 +1641,10 @@ impl Drop for Ast { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - // Classes are recursive, so they get their own Drop impl. - | Ast::Class(_) => return, + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) + // Bracketed classes are recursive, they get their own Drop impl. + | Ast::ClassBracketed(_) => return, Ast::Repetition(ref x) if !x.ast.has_subexprs() => return, Ast::Group(ref x) if !x.ast.has_subexprs() => return, Ast::Alternation(ref x) if x.asts.is_empty() => return, @@ -1376,7 +1653,7 @@ impl Drop for Ast { } let empty_span = || Span::splat(Position::new(0, 0, 0)); - let empty_ast = || Ast::Empty(empty_span()); + let empty_ast = || Ast::empty(empty_span()); let mut stack = vec![mem::replace(self, empty_ast())]; while let Some(mut ast) = stack.pop() { match ast { @@ -1385,8 +1662,11 @@ impl Drop for Ast { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - // Classes are recursive, so they get their own Drop impl. - | Ast::Class(_) => {} + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) + // Bracketed classes are recursive, so they get their own Drop + // impl. + | Ast::ClassBracketed(_) => {} Ast::Repetition(ref mut x) => { stack.push(mem::replace(&mut x.ast, empty_ast())); } @@ -1408,7 +1688,7 @@ impl Drop for Ast { /// stack space but heap space proportional to the depth of the `ClassSet`. impl Drop for ClassSet { fn drop(&mut self) { - use std::mem; + use core::mem; match *self { ClassSet::Item(ref item) => match *item { @@ -1479,9 +1759,9 @@ mod tests { let run = || { let span = || Span::splat(Position::new(0, 0, 0)); - let mut ast = Ast::Empty(span()); + let mut ast = Ast::empty(span()); for i in 0..200 { - ast = Ast::Group(Group { + ast = Ast::group(Group { span: span(), kind: GroupKind::CaptureIndex(i), ast: Box::new(ast), @@ -1510,4 +1790,20 @@ mod tests { .join() .unwrap(); } + + // This tests that our `Ast` has a reasonable size. This isn't a hard rule + // and it can be increased if given a good enough reason. But this test + // exists because the size of `Ast` was at one point over 200 bytes on a + // 64-bit target. Wow. + #[test] + fn ast_size() { + let max = 2 * core::mem::size_of::(); + let size = core::mem::size_of::(); + assert!( + size <= max, + "Ast size of {} bytes is bigger than suggested max {}", + size, + max + ); + } } diff --git a/vendor/regex-syntax/src/ast/parse.rs b/vendor/regex-syntax/src/ast/parse.rs index 6e9c9ac..593b14f 100644 --- a/vendor/regex-syntax/src/ast/parse.rs +++ b/vendor/regex-syntax/src/ast/parse.rs @@ -2,17 +2,26 @@ This module provides a regular expression parser. */ -use std::borrow::Borrow; -use std::cell::{Cell, RefCell}; -use std::mem; -use std::result; - -use crate::ast::{self, Ast, Position, Span}; -use crate::either::Either; - -use crate::is_meta_character; - -type Result = result::Result; +use core::{ + borrow::Borrow, + cell::{Cell, RefCell}, + mem, +}; + +use alloc::{ + boxed::Box, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ + ast::{self, Ast, Position, Span}, + either::Either, + is_escapeable_character, is_meta_character, +}; + +type Result = core::result::Result; /// A primitive is an expression with no sub-expressions. This includes /// literals, assertions and non-set character classes. This representation @@ -44,11 +53,11 @@ impl Primitive { /// Convert this primitive into a proper AST. fn into_ast(self) -> Ast { match self { - Primitive::Literal(lit) => Ast::Literal(lit), - Primitive::Assertion(assert) => Ast::Assertion(assert), - Primitive::Dot(span) => Ast::Dot(span), - Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)), - Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)), + Primitive::Literal(lit) => Ast::literal(lit), + Primitive::Assertion(assert) => Ast::assertion(assert), + Primitive::Dot(span) => Ast::dot(span), + Primitive::Perl(cls) => Ast::class_perl(cls), + Primitive::Unicode(cls) => Ast::class_unicode(cls), } } @@ -100,11 +109,11 @@ fn is_hex(c: char) -> bool { /// If `first` is true, then `c` is treated as the first character in the /// group name (which must be alphabetic or underscore). fn is_capture_char(c: char, first: bool) -> bool { - c == '_' - || (!first - && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']')) - || ('A' <= c && c <= 'Z') - || ('a' <= c && c <= 'z') + if first { + c == '_' || c.is_alphabetic() + } else { + c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric() + } } /// A builder for a regular expression parser. @@ -162,7 +171,7 @@ impl ParserBuilder { /// constant stack space and moving the call stack to the heap), other /// crates may. /// - /// This limit is not checked until the entire Ast is parsed. Therefore, + /// This limit is not checked until the entire AST is parsed. Therefore, /// if callers want to put a limit on the amount of heap space used, then /// they should impose a limit on the length, in bytes, of the concrete /// pattern string. In particular, this is viable since this parser @@ -220,8 +229,7 @@ impl ParserBuilder { /// abstract syntax tree. The size of the tree is proportional to the length /// of the regular expression pattern. /// -/// A `Parser` can be configured in more detail via a -/// [`ParserBuilder`](struct.ParserBuilder.html). +/// A `Parser` can be configured in more detail via a [`ParserBuilder`]. #[derive(Clone, Debug)] pub struct Parser { /// The current position of the parser. @@ -327,8 +335,7 @@ impl Parser { /// The parser can be run with either the `parse` or `parse_with_comments` /// methods. The parse methods return an abstract syntax tree. /// - /// To set configuration options on the parser, use - /// [`ParserBuilder`](struct.ParserBuilder.html). + /// To set configuration options on the parser, use [`ParserBuilder`]. pub fn new() -> Parser { ParserBuilder::new().build() } @@ -376,7 +383,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// Return a reference to the pattern being parsed. fn pattern(&self) -> &str { - self.pattern.borrow() + self.pattern } /// Create a new error with the given span and error type. @@ -684,7 +691,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { self.parser().ignore_whitespace.set(v); } - concat.asts.push(Ast::Flags(set)); + concat.asts.push(Ast::flags(set)); Ok(concat) } Either::Right(group) => { @@ -757,7 +764,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { group.ast = Box::new(group_concat.into_ast()); } } - prior_concat.asts.push(Ast::Group(group)); + prior_concat.asts.push(Ast::group(group)); Ok(prior_concat) } @@ -776,7 +783,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { Some(GroupState::Alternation(mut alt)) => { alt.span.end = self.pos(); alt.asts.push(concat.into_ast()); - Ok(Ast::Alternation(alt)) + Ok(Ast::alternation(alt)) } Some(GroupState::Group { group, .. }) => { return Err( @@ -843,7 +850,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { fn pop_class( &self, nested_union: ast::ClassSetUnion, - ) -> Result> { + ) -> Result> { assert_eq!(self.char(), ']'); let item = ast::ClassSet::Item(nested_union.into_item()); @@ -875,7 +882,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { set.span.end = self.pos(); set.kind = prevset; if stack.is_empty() { - Ok(Either::Right(ast::Class::Bracketed(set))) + Ok(Either::Right(set)) } else { union.push(ast::ClassSetItem::Bracketed(Box::new(set))); Ok(Either::Left(union)) @@ -969,7 +976,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { '|' => concat = self.push_alternate(concat)?, '[' => { let class = self.parse_set_class()?; - concat.asts.push(Ast::Class(class)); + concat.asts.push(Ast::class_bracketed(class)); } '?' => { concat = self.parse_uncounted_repetition( @@ -1050,7 +1057,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { greedy = false; self.bump(); } - concat.asts.push(Ast::Repetition(ast::Repetition { + concat.asts.push(Ast::repetition(ast::Repetition { span: ast.span().with_end(self.pos()), op: ast::RepetitionOp { span: Span::new(op_start, self.pos()), @@ -1152,7 +1159,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { self.error(op_span, ast::ErrorKind::RepetitionCountInvalid) ); } - concat.asts.push(Ast::Repetition(ast::Repetition { + concat.asts.push(Ast::repetition(ast::Repetition { span: ast.span().with_end(self.pos()), op: ast::RepetitionOp { span: op_span, @@ -1195,13 +1202,17 @@ impl<'s, P: Borrow> ParserI<'s, P> { )); } let inner_span = self.span(); - if self.bump_if("?P<") { + let mut starts_with_p = true; + if self.bump_if("?P<") || { + starts_with_p = false; + self.bump_if("?<") + } { let capture_index = self.next_capture_index(open_span)?; - let cap = self.parse_capture_name(capture_index)?; + let name = self.parse_capture_name(capture_index)?; Ok(Either::Right(ast::Group { span: open_span, - kind: ast::GroupKind::CaptureName(cap), - ast: Box::new(Ast::Empty(self.span())), + kind: ast::GroupKind::CaptureName { starts_with_p, name }, + ast: Box::new(Ast::empty(self.span())), })) } else if self.bump_if("?") { if self.is_eof() { @@ -1230,7 +1241,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { Ok(Either::Right(ast::Group { span: open_span, kind: ast::GroupKind::NonCapturing(flags), - ast: Box::new(Ast::Empty(self.span())), + ast: Box::new(Ast::empty(self.span())), })) } } else { @@ -1238,7 +1249,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { Ok(Either::Right(ast::Group { span: open_span, kind: ast::GroupKind::CaptureIndex(capture_index), - ast: Box::new(Ast::Empty(self.span())), + ast: Box::new(Ast::empty(self.span())), })) } } @@ -1370,6 +1381,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { 's' => Ok(ast::Flag::DotMatchesNewLine), 'U' => Ok(ast::Flag::SwapGreed), 'u' => Ok(ast::Flag::Unicode), + 'R' => Ok(ast::Flag::CRLF), 'x' => Ok(ast::Flag::IgnoreWhitespace), _ => { Err(self @@ -1483,7 +1495,14 @@ impl<'s, P: Borrow> ParserI<'s, P> { if is_meta_character(c) { return Ok(Primitive::Literal(ast::Literal { span, - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, + c, + })); + } + if is_escapeable_character(c) { + return Ok(Primitive::Literal(ast::Literal { + span, + kind: ast::LiteralKind::Superfluous, c, })); } @@ -1501,9 +1520,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { 'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'), 'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'), 'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'), - ' ' if self.ignore_whitespace() => { - special(ast::SpecialLiteralKind::Space, ' ') - } 'A' => Ok(Primitive::Assertion(ast::Assertion { span, kind: ast::AssertionKind::StartText, @@ -1512,18 +1528,115 @@ impl<'s, P: Borrow> ParserI<'s, P> { span, kind: ast::AssertionKind::EndText, })), - 'b' => Ok(Primitive::Assertion(ast::Assertion { - span, - kind: ast::AssertionKind::WordBoundary, - })), + 'b' => { + let mut wb = ast::Assertion { + span, + kind: ast::AssertionKind::WordBoundary, + }; + // After a \b, we "try" to parse things like \b{start} for + // special word boundary assertions. + if !self.is_eof() && self.char() == '{' { + if let Some(kind) = + self.maybe_parse_special_word_boundary(start)? + { + wb.kind = kind; + wb.span.end = self.pos(); + } + } + Ok(Primitive::Assertion(wb)) + } 'B' => Ok(Primitive::Assertion(ast::Assertion { span, kind: ast::AssertionKind::NotWordBoundary, })), + '<' => Ok(Primitive::Assertion(ast::Assertion { + span, + kind: ast::AssertionKind::WordBoundaryStartAngle, + })), + '>' => Ok(Primitive::Assertion(ast::Assertion { + span, + kind: ast::AssertionKind::WordBoundaryEndAngle, + })), _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)), } } + /// Attempt to parse a specialty word boundary. That is, `\b{start}`, + /// `\b{end}`, `\b{start-half}` or `\b{end-half}`. + /// + /// This is similar to `maybe_parse_ascii_class` in that, in most cases, + /// if it fails it will just return `None` with no error. This is done + /// because `\b{5}` is a valid expression and we want to let that be parsed + /// by the existing counted repetition parsing code. (I thought about just + /// invoking the counted repetition code from here, but it seemed a little + /// ham-fisted.) + /// + /// Unlike `maybe_parse_ascii_class` though, this can return an error. + /// Namely, if we definitely know it isn't a counted repetition, then we + /// return an error specific to the specialty word boundaries. + /// + /// This assumes the parser is positioned at a `{` immediately following + /// a `\b`. When `None` is returned, the parser is returned to the position + /// at which it started: pointing at a `{`. + /// + /// The position given should correspond to the start of the `\b`. + fn maybe_parse_special_word_boundary( + &self, + wb_start: Position, + ) -> Result> { + assert_eq!(self.char(), '{'); + + let is_valid_char = |c| match c { + 'A'..='Z' | 'a'..='z' | '-' => true, + _ => false, + }; + let start = self.pos(); + if !self.bump_and_bump_space() { + return Err(self.error( + Span::new(wb_start, self.pos()), + ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, + )); + } + let start_contents = self.pos(); + // This is one of the critical bits: if the first non-whitespace + // character isn't in [-A-Za-z] (i.e., this can't be a special word + // boundary), then we bail and let the counted repetition parser deal + // with this. + if !is_valid_char(self.char()) { + self.parser().pos.set(start); + return Ok(None); + } + + // Now collect up our chars until we see a '}'. + let mut scratch = self.parser().scratch.borrow_mut(); + scratch.clear(); + while !self.is_eof() && is_valid_char(self.char()) { + scratch.push(self.char()); + self.bump_and_bump_space(); + } + if self.is_eof() || self.char() != '}' { + return Err(self.error( + Span::new(start, self.pos()), + ast::ErrorKind::SpecialWordBoundaryUnclosed, + )); + } + let end = self.pos(); + self.bump(); + let kind = match scratch.as_str() { + "start" => ast::AssertionKind::WordBoundaryStart, + "end" => ast::AssertionKind::WordBoundaryEnd, + "start-half" => ast::AssertionKind::WordBoundaryStartHalf, + "end-half" => ast::AssertionKind::WordBoundaryEndHalf, + _ => { + return Err(self.error( + Span::new(start_contents, end), + ast::ErrorKind::SpecialWordBoundaryUnrecognized, + )) + } + }; + Ok(Some(kind)) + } + /// Parse an octal representation of a Unicode codepoint up to 3 digits /// long. This expects the parser to be positioned at the first octal /// digit and advances the parser to the first character immediately @@ -1533,9 +1646,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// Assuming the preconditions are met, this routine can never fail. #[inline(never)] fn parse_octal(&self) -> ast::Literal { - use std::char; - use std::u32; - assert!(self.parser().octal); assert!('0' <= self.char() && self.char() <= '7'); let start = self.pos(); @@ -1600,9 +1710,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { &self, kind: ast::HexLiteralKind, ) -> Result { - use std::char; - use std::u32; - let mut scratch = self.parser().scratch.borrow_mut(); scratch.clear(); @@ -1646,9 +1753,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { &self, kind: ast::HexLiteralKind, ) -> Result { - use std::char; - use std::u32; - let mut scratch = self.parser().scratch.borrow_mut(); scratch.clear(); @@ -1736,7 +1840,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// is successful, then the parser is advanced to the position immediately /// following the closing `]`. #[inline(never)] - fn parse_set_class(&self) -> Result { + fn parse_set_class(&self) -> Result { assert_eq!(self.char(), '['); let mut union = @@ -1960,9 +2064,9 @@ impl<'s, P: Borrow> ParserI<'s, P> { // because parsing cannot fail with any interesting error. For example, // in order to use an ASCII character class, it must be enclosed in // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think - // of it as "ASCII character characters have the syntax `[:NAME:]` - // which can only appear within character brackets." This means that - // things like `[[:lower:]A]` are legal constructs. + // of it as "ASCII character classes have the syntax `[:NAME:]` which + // can only appear within character brackets." This means that things + // like `[[:lower:]A]` are legal constructs. // // However, if one types an incorrect ASCII character class, e.g., // `[[:loower:]]`, then we treat that as a normal nested character @@ -2146,7 +2250,7 @@ impl<'p, 's, P: Borrow> NestLimiter<'p, 's, P> { let new = self.depth.checked_add(1).ok_or_else(|| { self.p.error( span.clone(), - ast::ErrorKind::NestLimitExceeded(::std::u32::MAX), + ast::ErrorKind::NestLimitExceeded(u32::MAX), ) })?; let limit = self.p.parser().nest_limit; @@ -2182,12 +2286,12 @@ impl<'p, 's, P: Borrow> ast::Visitor for NestLimiter<'p, 's, P> { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - | Ast::Class(ast::Class::Unicode(_)) - | Ast::Class(ast::Class::Perl(_)) => { + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) => { // These are all base cases, so we don't increment depth. return Ok(()); } - Ast::Class(ast::Class::Bracketed(ref x)) => &x.span, + Ast::ClassBracketed(ref x) => &x.span, Ast::Repetition(ref x) => &x.span, Ast::Group(ref x) => &x.span, Ast::Alternation(ref x) => &x.span, @@ -2203,12 +2307,12 @@ impl<'p, 's, P: Borrow> ast::Visitor for NestLimiter<'p, 's, P> { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - | Ast::Class(ast::Class::Unicode(_)) - | Ast::Class(ast::Class::Perl(_)) => { + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) => { // These are all base cases, so we don't decrement depth. Ok(()) } - Ast::Class(ast::Class::Bracketed(_)) + Ast::ClassBracketed(_) | Ast::Repetition(_) | Ast::Group(_) | Ast::Alternation(_) @@ -2297,11 +2401,14 @@ fn specialize_err( #[cfg(test)] mod tests { - use std::ops::Range; + use core::ops::Range; + + use alloc::format; - use super::{Parser, ParserBuilder, ParserI, Primitive}; use crate::ast::{self, Ast, Position, Span}; + use super::*; + // Our own assert_eq, which has slightly better formatting (but honestly // still kind of crappy). macro_rules! assert_eq { @@ -2414,18 +2521,14 @@ mod tests { lit_with(c, span(start..start + c.len_utf8())) } - /// Create a punctuation literal starting at the given position. - fn punct_lit(c: char, span: Span) -> Ast { - Ast::Literal(ast::Literal { - span, - kind: ast::LiteralKind::Punctuation, - c, - }) + /// Create a meta literal starting at the given position. + fn meta_lit(c: char, span: Span) -> Ast { + Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c }) } /// Create a verbatim literal with the given span. fn lit_with(c: char, span: Span) -> Ast { - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Verbatim, c, @@ -2439,17 +2542,17 @@ mod tests { /// Create a concatenation with the given span. fn concat_with(span: Span, asts: Vec) -> Ast { - Ast::Concat(ast::Concat { span, asts }) + Ast::concat(ast::Concat { span, asts }) } /// Create an alternation with the given span. fn alt(range: Range, asts: Vec) -> Ast { - Ast::Alternation(ast::Alternation { span: span(range), asts }) + Ast::alternation(ast::Alternation { span: span(range), asts }) } /// Create a capturing group with the given span. fn group(range: Range, index: u32, ast: Ast) -> Ast { - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span(range), kind: ast::GroupKind::CaptureIndex(index), ast: Box::new(ast), @@ -2482,7 +2585,7 @@ mod tests { }, ); } - Ast::Flags(ast::SetFlags { + Ast::flags(ast::SetFlags { span: span_range(pat, range.clone()), flags: ast::Flags { span: span_range(pat, (range.start + 2)..(range.end - 1)), @@ -2496,7 +2599,7 @@ mod tests { // A nest limit of 0 still allows some types of regexes. assert_eq!( parser_nest_limit("", 0).parse(), - Ok(Ast::Empty(span(0..0))) + Ok(Ast::empty(span(0..0))) ); assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0))); @@ -2510,7 +2613,7 @@ mod tests { ); assert_eq!( parser_nest_limit("a+", 1).parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2536,14 +2639,14 @@ mod tests { ); assert_eq!( parser_nest_limit("a+*", 2).parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..3), op: ast::RepetitionOp { span: span(2..3), kind: ast::RepetitionKind::ZeroOrMore, }, greedy: true, - ast: Box::new(Ast::Repetition(ast::Repetition { + ast: Box::new(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2600,7 +2703,7 @@ mod tests { ); assert_eq!( parser_nest_limit("[a]", 1).parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: ast::ClassSet::Item(ast::ClassSetItem::Literal( @@ -2610,7 +2713,7 @@ mod tests { c: 'a', } )), - }))) + })) ); assert_eq!( parser_nest_limit("[ab]", 1).parse().unwrap_err(), @@ -2704,24 +2807,24 @@ bar Ok(concat( 0..36, vec![ - punct_lit('\\', span(0..2)), - punct_lit('.', span(2..4)), - punct_lit('+', span(4..6)), - punct_lit('*', span(6..8)), - punct_lit('?', span(8..10)), - punct_lit('(', span(10..12)), - punct_lit(')', span(12..14)), - punct_lit('|', span(14..16)), - punct_lit('[', span(16..18)), - punct_lit(']', span(18..20)), - punct_lit('{', span(20..22)), - punct_lit('}', span(22..24)), - punct_lit('^', span(24..26)), - punct_lit('$', span(26..28)), - punct_lit('#', span(28..30)), - punct_lit('&', span(30..32)), - punct_lit('-', span(32..34)), - punct_lit('~', span(34..36)), + meta_lit('\\', span(0..2)), + meta_lit('.', span(2..4)), + meta_lit('+', span(4..6)), + meta_lit('*', span(6..8)), + meta_lit('?', span(8..10)), + meta_lit('(', span(10..12)), + meta_lit(')', span(12..14)), + meta_lit('|', span(14..16)), + meta_lit('[', span(16..18)), + meta_lit(']', span(18..20)), + meta_lit('{', span(20..22)), + meta_lit('}', span(22..24)), + meta_lit('^', span(24..26)), + meta_lit('$', span(26..28)), + meta_lit('#', span(28..30)), + meta_lit('&', span(30..32)), + meta_lit('-', span(32..34)), + meta_lit('~', span(34..36)), ] )) ); @@ -2770,7 +2873,7 @@ bar vec![ lit_with('a', span_range(pat, 0..1)), lit_with(' ', span_range(pat, 1..2)), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 2..9), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span_range(pat, 4..5), @@ -2797,13 +2900,16 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 4..pat.len()), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span_range(pat, 9..12), - name: s("foo"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span_range(pat, 9..12), + name: s("foo"), + index: 1, + } + }, ast: Box::new(lit_with('a', span_range(pat, 14..15))), }), ] @@ -2816,7 +2922,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 4..pat.len()), kind: ast::GroupKind::CaptureIndex(1), ast: Box::new(lit_with('a', span_range(pat, 7..8))), @@ -2831,7 +2937,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 4..pat.len()), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span_range(pat, 8..8), @@ -2849,7 +2955,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(4..13), kind: ast::LiteralKind::HexBrace( ast::HexLiteralKind::X @@ -2868,25 +2974,14 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span_range(pat, 4..6), - kind: ast::LiteralKind::Special( - ast::SpecialLiteralKind::Space - ), + kind: ast::LiteralKind::Superfluous, c: ' ', }), ] )) ); - // ... but only when `x` mode is enabled. - let pat = r"\ "; - assert_eq!( - parser(pat).parse().unwrap_err(), - TestError { - span: span_range(pat, 0..2), - kind: ast::ErrorKind::EscapeUnrecognized, - } - ); } #[test] @@ -2897,9 +2992,9 @@ bar Ok(concat_with( span_range(pat, 0..3), vec![ - Ast::Dot(span_range(pat, 0..1)), + Ast::dot(span_range(pat, 0..1)), lit_with('\n', span_range(pat, 1..2)), - Ast::Dot(span_range(pat, 2..3)), + Ast::dot(span_range(pat, 2..3)), ] )) ); @@ -2935,7 +3030,7 @@ bar fn parse_uncounted_repetition() { assert_eq!( parser(r"a*").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2947,7 +3042,7 @@ bar ); assert_eq!( parser(r"a+").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2960,7 +3055,7 @@ bar assert_eq!( parser(r"a?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2972,7 +3067,7 @@ bar ); assert_eq!( parser(r"a??").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..3), op: ast::RepetitionOp { span: span(1..3), @@ -2984,7 +3079,7 @@ bar ); assert_eq!( parser(r"a?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2999,7 +3094,7 @@ bar Ok(concat( 0..3, vec![ - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -3017,7 +3112,7 @@ bar Ok(concat( 0..4, vec![ - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(0..3), op: ast::RepetitionOp { span: span(1..3), @@ -3036,7 +3131,7 @@ bar 0..3, vec![ lit('a', 0), - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(1..3), op: ast::RepetitionOp { span: span(2..3), @@ -3050,7 +3145,7 @@ bar ); assert_eq!( parser(r"(ab)?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..5), op: ast::RepetitionOp { span: span(4..5), @@ -3069,8 +3164,8 @@ bar Ok(alt( 0..3, vec![ - Ast::Empty(span(0..0)), - Ast::Repetition(ast::Repetition { + Ast::empty(span(0..0)), + Ast::repetition(ast::Repetition { span: span(1..3), op: ast::RepetitionOp { span: span(2..3), @@ -3159,7 +3254,7 @@ bar fn parse_counted_repetition() { assert_eq!( parser(r"a{5}").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..4), op: ast::RepetitionOp { span: span(1..4), @@ -3173,7 +3268,7 @@ bar ); assert_eq!( parser(r"a{5,}").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..5), op: ast::RepetitionOp { span: span(1..5), @@ -3187,7 +3282,7 @@ bar ); assert_eq!( parser(r"a{5,9}").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..6), op: ast::RepetitionOp { span: span(1..6), @@ -3201,7 +3296,7 @@ bar ); assert_eq!( parser(r"a{5}?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..5), op: ast::RepetitionOp { span: span(1..5), @@ -3219,7 +3314,7 @@ bar 0..5, vec![ lit('a', 0), - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(1..5), op: ast::RepetitionOp { span: span(2..5), @@ -3239,7 +3334,7 @@ bar 0..6, vec![ lit('a', 0), - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(1..5), op: ast::RepetitionOp { span: span(2..5), @@ -3257,7 +3352,7 @@ bar assert_eq!( parser(r"a{ 5 }").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..6), op: ast::RepetitionOp { span: span(1..6), @@ -3271,7 +3366,7 @@ bar ); assert_eq!( parser(r"a{ 5 , 9 }").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..10), op: ast::RepetitionOp { span: span(1..10), @@ -3285,7 +3380,7 @@ bar ); assert_eq!( parser_ignore_whitespace(r"a{5,9} ?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..8), op: ast::RepetitionOp { span: span(1..8), @@ -3297,6 +3392,23 @@ bar ast: Box::new(lit('a', 0)), })) ); + assert_eq!( + parser(r"\b{5,9}").parse(), + Ok(Ast::repetition(ast::Repetition { + span: span(0..7), + op: ast::RepetitionOp { + span: span(2..7), + kind: ast::RepetitionKind::Range( + ast::RepetitionRange::Bounded(5, 9) + ), + }, + greedy: true, + ast: Box::new(Ast::assertion(ast::Assertion { + span: span(0..2), + kind: ast::AssertionKind::WordBoundary, + })), + })) + ); assert_eq!( parser(r"(?i){0}").parse().unwrap_err(), @@ -3416,7 +3528,7 @@ bar fn parse_alternate() { assert_eq!( parser(r"a|b").parse(), - Ok(Ast::Alternation(ast::Alternation { + Ok(Ast::alternation(ast::Alternation { span: span(0..3), asts: vec![lit('a', 0), lit('b', 2)], })) @@ -3426,7 +3538,7 @@ bar Ok(group( 0..5, 1, - Ast::Alternation(ast::Alternation { + Ast::alternation(ast::Alternation { span: span(1..4), asts: vec![lit('a', 1), lit('b', 3)], }) @@ -3435,14 +3547,14 @@ bar assert_eq!( parser(r"a|b|c").parse(), - Ok(Ast::Alternation(ast::Alternation { + Ok(Ast::alternation(ast::Alternation { span: span(0..5), asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)], })) ); assert_eq!( parser(r"ax|by|cz").parse(), - Ok(Ast::Alternation(ast::Alternation { + Ok(Ast::alternation(ast::Alternation { span: span(0..8), asts: vec![ concat(0..2, vec![lit('a', 0), lit('x', 1)]), @@ -3456,7 +3568,7 @@ bar Ok(group( 0..10, 1, - Ast::Alternation(ast::Alternation { + Ast::alternation(ast::Alternation { span: span(1..9), asts: vec![ concat(1..3, vec![lit('a', 1), lit('x', 2)]), @@ -3505,7 +3617,7 @@ bar parser(r"|").parse(), Ok(alt( 0..1, - vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),] + vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),] )) ); assert_eq!( @@ -3513,19 +3625,19 @@ bar Ok(alt( 0..2, vec![ - Ast::Empty(span(0..0)), - Ast::Empty(span(1..1)), - Ast::Empty(span(2..2)), + Ast::empty(span(0..0)), + Ast::empty(span(1..1)), + Ast::empty(span(2..2)), ] )) ); assert_eq!( parser(r"a|").parse(), - Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),])) + Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),])) ); assert_eq!( parser(r"|a").parse(), - Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),])) + Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),])) ); assert_eq!( @@ -3535,7 +3647,7 @@ bar 1, alt( 1..2, - vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),] + vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),] ) )) ); @@ -3544,7 +3656,7 @@ bar Ok(group( 0..4, 1, - alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),]) + alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),]) )) ); assert_eq!( @@ -3552,7 +3664,7 @@ bar Ok(group( 0..4, 1, - alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),]) + alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),]) )) ); @@ -3608,7 +3720,7 @@ bar fn parse_group() { assert_eq!( parser("(?i)").parse(), - Ok(Ast::Flags(ast::SetFlags { + Ok(Ast::flags(ast::SetFlags { span: span(0..4), flags: ast::Flags { span: span(2..3), @@ -3623,7 +3735,7 @@ bar ); assert_eq!( parser("(?iU)").parse(), - Ok(Ast::Flags(ast::SetFlags { + Ok(Ast::flags(ast::SetFlags { span: span(0..5), flags: ast::Flags { span: span(2..4), @@ -3646,7 +3758,7 @@ bar ); assert_eq!( parser("(?i-U)").parse(), - Ok(Ast::Flags(ast::SetFlags { + Ok(Ast::flags(ast::SetFlags { span: span(0..6), flags: ast::Flags { span: span(2..5), @@ -3674,15 +3786,15 @@ bar assert_eq!( parser("()").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..2), kind: ast::GroupKind::CaptureIndex(1), - ast: Box::new(Ast::Empty(span(1..1))), + ast: Box::new(Ast::empty(span(1..1))), })) ); assert_eq!( parser("(a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..3), kind: ast::GroupKind::CaptureIndex(1), ast: Box::new(lit('a', 1)), @@ -3690,20 +3802,20 @@ bar ); assert_eq!( parser("(())").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..4), kind: ast::GroupKind::CaptureIndex(1), - ast: Box::new(Ast::Group(ast::Group { + ast: Box::new(Ast::group(ast::Group { span: span(1..3), kind: ast::GroupKind::CaptureIndex(2), - ast: Box::new(Ast::Empty(span(2..2))), + ast: Box::new(Ast::empty(span(2..2))), })), })) ); assert_eq!( parser("(?:a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..5), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span(2..2), @@ -3715,7 +3827,7 @@ bar assert_eq!( parser("(?i:a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..6), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span(2..3), @@ -3731,7 +3843,7 @@ bar ); assert_eq!( parser("(?i-U:a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..8), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span(2..5), @@ -3818,70 +3930,149 @@ bar #[test] fn parse_capture_name() { + assert_eq!( + parser("(?z)").parse(), + Ok(Ast::group(ast::Group { + span: span(0..7), + kind: ast::GroupKind::CaptureName { + starts_with_p: false, + name: ast::CaptureName { + span: span(3..4), + name: s("a"), + index: 1, + } + }, + ast: Box::new(lit('z', 5)), + })) + ); assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..8), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..5), - name: s("a"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..5), + name: s("a"), + index: 1, + } + }, ast: Box::new(lit('z', 6)), })) ); assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..10), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..7), - name: s("abc"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..7), + name: s("abc"), + index: 1, + } + }, ast: Box::new(lit('z', 8)), })) ); assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..10), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..7), - name: s("a_1"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..7), + name: s("a_1"), + index: 1, + } + }, ast: Box::new(lit('z', 8)), })) ); assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..10), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..7), - name: s("a.1"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..7), + name: s("a.1"), + index: 1, + } + }, ast: Box::new(lit('z', 8)), })) ); assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..11), - kind: ast::GroupKind::CaptureName(ast::CaptureName { - span: span(4..8), - name: s("a[1]"), - index: 1, - }), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: span(4..8), + name: s("a[1]"), + index: 1, + } + }, ast: Box::new(lit('z', 9)), })) ); + assert_eq!( + parser("(?P)").parse(), + Ok(Ast::group(ast::Group { + span: Span::new( + Position::new(0, 1, 1), + Position::new(9, 1, 9), + ), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: Span::new( + Position::new(4, 1, 5), + Position::new(7, 1, 7), + ), + name: s("a¾"), + index: 1, + } + }, + ast: Box::new(Ast::empty(Span::new( + Position::new(8, 1, 8), + Position::new(8, 1, 8), + ))), + })) + ); + assert_eq!( + parser("(?P<名字>)").parse(), + Ok(Ast::group(ast::Group { + span: Span::new( + Position::new(0, 1, 1), + Position::new(12, 1, 9), + ), + kind: ast::GroupKind::CaptureName { + starts_with_p: true, + name: ast::CaptureName { + span: Span::new( + Position::new(4, 1, 5), + Position::new(10, 1, 7), + ), + name: s("名字"), + index: 1, + } + }, + ast: Box::new(Ast::empty(Span::new( + Position::new(11, 1, 8), + Position::new(11, 1, 8), + ))), + })) + ); + assert_eq!( parser("(?P<").parse().unwrap_err(), TestError { @@ -3940,6 +4131,60 @@ bar }, } ); + assert_eq!( + parser("(?P<5>)").parse().unwrap_err(), + TestError { + span: span(4..5), + kind: ast::ErrorKind::GroupNameInvalid, + } + ); + assert_eq!( + parser("(?P<5a>)").parse().unwrap_err(), + TestError { + span: span(4..5), + kind: ast::ErrorKind::GroupNameInvalid, + } + ); + assert_eq!( + parser("(?P<¾>)").parse().unwrap_err(), + TestError { + span: Span::new( + Position::new(4, 1, 5), + Position::new(6, 1, 6), + ), + kind: ast::ErrorKind::GroupNameInvalid, + } + ); + assert_eq!( + parser("(?P<¾a>)").parse().unwrap_err(), + TestError { + span: Span::new( + Position::new(4, 1, 5), + Position::new(6, 1, 6), + ), + kind: ast::ErrorKind::GroupNameInvalid, + } + ); + assert_eq!( + parser("(?P<☃>)").parse().unwrap_err(), + TestError { + span: Span::new( + Position::new(4, 1, 5), + Position::new(7, 1, 6), + ), + kind: ast::ErrorKind::GroupNameInvalid, + } + ); + assert_eq!( + parser("(?P)").parse().unwrap_err(), + TestError { + span: Span::new( + Position::new(5, 1, 6), + Position::new(8, 1, 7), + ), + kind: ast::ErrorKind::GroupNameInvalid, + } + ); } #[test] @@ -4046,6 +4291,34 @@ bar ], }) ); + assert_eq!( + parser("i-sR:").parse_flags(), + Ok(ast::Flags { + span: span(0..4), + items: vec![ + ast::FlagsItem { + span: span(0..1), + kind: ast::FlagsItemKind::Flag( + ast::Flag::CaseInsensitive + ), + }, + ast::FlagsItem { + span: span(1..2), + kind: ast::FlagsItemKind::Negation, + }, + ast::FlagsItem { + span: span(2..3), + kind: ast::FlagsItemKind::Flag( + ast::Flag::DotMatchesNewLine + ), + }, + ast::FlagsItem { + span: span(3..4), + kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF), + }, + ], + }) + ); assert_eq!( parser("isU").parse_flags().unwrap_err(), @@ -4107,6 +4380,7 @@ bar assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine)); assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed)); assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode)); + assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF)); assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace)); assert_eq!( @@ -4178,7 +4452,7 @@ bar parser(r"\|").parse_primitive(), Ok(Primitive::Literal(ast::Literal { span: span(0..2), - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, c: '|', })) ); @@ -4221,6 +4495,48 @@ bar kind: ast::AssertionKind::WordBoundary, })) ); + assert_eq!( + parser(r"\b{start}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..9), + kind: ast::AssertionKind::WordBoundaryStart, + })) + ); + assert_eq!( + parser(r"\b{end}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..7), + kind: ast::AssertionKind::WordBoundaryEnd, + })) + ); + assert_eq!( + parser(r"\b{start-half}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..14), + kind: ast::AssertionKind::WordBoundaryStartHalf, + })) + ); + assert_eq!( + parser(r"\b{end-half}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..12), + kind: ast::AssertionKind::WordBoundaryEndHalf, + })) + ); + assert_eq!( + parser(r"\<").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..2), + kind: ast::AssertionKind::WordBoundaryStartAngle, + })) + ); + assert_eq!( + parser(r"\>").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..2), + kind: ast::AssertionKind::WordBoundaryEndAngle, + })) + ); assert_eq!( parser(r"\B").parse_primitive(), Ok(Primitive::Assertion(ast::Assertion { @@ -4229,11 +4545,26 @@ bar })) ); + // We also support superfluous escapes in most cases now too. + for c in ['!', '@', '%', '"', '\'', '/', ' '] { + let pat = format!(r"\{}", c); + assert_eq!( + parser(&pat).parse_primitive(), + Ok(Primitive::Literal(ast::Literal { + span: span(0..2), + kind: ast::LiteralKind::Superfluous, + c, + })) + ); + } + + // Some superfluous escapes, namely [0-9A-Za-z], are still banned. This + // gives flexibility for future evolution. assert_eq!( - parser(r"\").parse_escape().unwrap_err(), + parser(r"\e").parse_escape().unwrap_err(), TestError { - span: span(0..1), - kind: ast::ErrorKind::EscapeUnexpectedEof, + span: span(0..2), + kind: ast::ErrorKind::EscapeUnrecognized, } ); assert_eq!( @@ -4243,6 +4574,71 @@ bar kind: ast::ErrorKind::EscapeUnrecognized, } ); + + // Starting a special word boundary without any non-whitespace chars + // after the brace makes it ambiguous whether the user meant to write + // a counted repetition (probably not?) or an actual special word + // boundary assertion. + assert_eq!( + parser(r"\b{").parse_escape().unwrap_err(), + TestError { + span: span(0..3), + kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, + } + ); + assert_eq!( + parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(), + TestError { + span: span(0..4), + kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, + } + ); + // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char, + // and thus causes the parser to treat it as a counted repetition. + assert_eq!( + parser(r"\b{ ").parse().unwrap_err(), + TestError { + span: span(4..4), + kind: ast::ErrorKind::RepetitionCountDecimalEmpty, + } + ); + // In this case, we got some valid chars that makes it look like the + // user is writing one of the special word boundary assertions, but + // we forget to close the brace. + assert_eq!( + parser(r"\b{foo").parse_escape().unwrap_err(), + TestError { + span: span(2..6), + kind: ast::ErrorKind::SpecialWordBoundaryUnclosed, + } + ); + // We get the same error as above, except it is provoked by seeing a + // char that we know is invalid before seeing a closing brace. + assert_eq!( + parser(r"\b{foo!}").parse_escape().unwrap_err(), + TestError { + span: span(2..6), + kind: ast::ErrorKind::SpecialWordBoundaryUnclosed, + } + ); + // And this one occurs when, syntactically, everything looks okay, but + // we don't use a valid spelling of a word boundary assertion. + assert_eq!( + parser(r"\b{foo}").parse_escape().unwrap_err(), + TestError { + span: span(3..6), + kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized, + } + ); + + // An unfinished escape is illegal. + assert_eq!( + parser(r"\").parse_escape().unwrap_err(), + TestError { + span: span(0..1), + kind: ast::ErrorKind::EscapeUnexpectedEof, + } + ); } #[test] @@ -4272,7 +4668,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..pat.len()), kind: ast::LiteralKind::Octal, - c: ::std::char::from_u32(i).unwrap(), + c: char::from_u32(i).unwrap(), })) ); } @@ -4294,15 +4690,15 @@ bar ); assert_eq!( parser_octal(r"\778").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..4), asts: vec![ - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(0..3), kind: ast::LiteralKind::Octal, c: '?', }), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(3..4), kind: ast::LiteralKind::Verbatim, c: '8', @@ -4312,15 +4708,15 @@ bar ); assert_eq!( parser_octal(r"\7777").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..5), asts: vec![ - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(0..4), kind: ast::LiteralKind::Octal, c: '\u{01FF}', }), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(4..5), kind: ast::LiteralKind::Verbatim, c: '7', @@ -4347,7 +4743,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..pat.len()), kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X), - c: ::std::char::from_u32(i).unwrap(), + c: char::from_u32(i).unwrap(), })) ); } @@ -4378,7 +4774,7 @@ bar #[test] fn parse_hex_four() { for i in 0..65536 { - let c = match ::std::char::from_u32(i) { + let c = match char::from_u32(i) { None => continue, Some(c) => c, }; @@ -4442,7 +4838,7 @@ bar #[test] fn parse_hex_eight() { for i in 0..65536 { - let c = match ::std::char::from_u32(i) { + let c = match char::from_u32(i) { None => continue, Some(c) => c, }; @@ -4765,15 +5161,15 @@ bar assert_eq!( parser("[[:alnum:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..11), negated: false, kind: itemset(item_ascii(alnum(span(1..10), false))), - }))) + })) ); assert_eq!( parser("[[[:alnum:]]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..13), negated: false, kind: itemset(item_bracket(ast::ClassBracketed { @@ -4781,11 +5177,11 @@ bar negated: false, kind: itemset(item_ascii(alnum(span(2..11), false))), })), - }))) + })) ); assert_eq!( parser("[[:alnum:]&&[:lower:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: intersection( @@ -4793,11 +5189,11 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[[:alnum:]--[:lower:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: difference( @@ -4805,11 +5201,11 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[[:alnum:]~~[:lower:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: symdifference( @@ -4817,20 +5213,20 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[a]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: itemset(lit(span(1..2), 'a')), - }))) + })) ); assert_eq!( parser(r"[a\]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: union( @@ -4839,16 +5235,16 @@ bar lit(span(1..2), 'a'), ast::ClassSetItem::Literal(ast::Literal { span: span(2..4), - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, c: ']', }), ] ), - }))) + })) ); assert_eq!( parser(r"[a\-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: union( @@ -4857,50 +5253,50 @@ bar lit(span(1..2), 'a'), ast::ClassSetItem::Literal(ast::Literal { span: span(2..4), - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, c: '-', }), lit(span(4..5), 'z'), ] ), - }))) + })) ); assert_eq!( parser("[ab]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),] ), - }))) + })) ); assert_eq!( parser("[a-]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),] ), - }))) + })) ); assert_eq!( parser("[-a]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),] ), - }))) + })) ); assert_eq!( parser(r"[\pL]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: itemset(item_unicode(ast::ClassUnicode { @@ -4908,11 +5304,11 @@ bar negated: false, kind: ast::ClassUnicodeKind::OneLetter('L'), })), - }))) + })) ); assert_eq!( parser(r"[\w]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: itemset(item_perl(ast::ClassPerl { @@ -4920,11 +5316,11 @@ bar kind: ast::ClassPerlKind::Word, negated: false, })), - }))) + })) ); assert_eq!( parser(r"[a\wz]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: union( @@ -4939,20 +5335,20 @@ bar lit(span(4..5), 'z'), ] ), - }))) + })) ); assert_eq!( parser("[a-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: itemset(range(span(1..4), 'a', 'z')), - }))) + })) ); assert_eq!( parser("[a-cx-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..8), negated: false, kind: union( @@ -4962,11 +5358,11 @@ bar range(span(4..7), 'x', 'z'), ] ), - }))) + })) ); assert_eq!( parser(r"[\w&&a-cx-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..12), negated: false, kind: intersection( @@ -4984,11 +5380,11 @@ bar ] ), ), - }))) + })) ); assert_eq!( parser(r"[a-cx-z&&\w]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..12), negated: false, kind: intersection( @@ -5006,11 +5402,11 @@ bar negated: false, })), ), - }))) + })) ); assert_eq!( parser(r"[a--b--c]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..9), negated: false, kind: difference( @@ -5022,11 +5418,11 @@ bar ), itemset(lit(span(7..8), 'c')), ), - }))) + })) ); assert_eq!( parser(r"[a~~b~~c]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..9), negated: false, kind: symdifference( @@ -5038,43 +5434,43 @@ bar ), itemset(lit(span(7..8), 'c')), ), - }))) + })) ); assert_eq!( parser(r"[\^&&^]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..7), negated: false, kind: intersection( span(1..6), itemset(ast::ClassSetItem::Literal(ast::Literal { span: span(1..3), - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, c: '^', })), itemset(lit(span(5..6), '^')), ), - }))) + })) ); assert_eq!( parser(r"[\&&&&]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..7), negated: false, kind: intersection( span(1..6), itemset(ast::ClassSetItem::Literal(ast::Literal { span: span(1..3), - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, c: '&', })), itemset(lit(span(5..6), '&')), ), - }))) + })) ); assert_eq!( parser(r"[&&&&]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: intersection( @@ -5086,13 +5482,13 @@ bar ), itemset(empty(span(5..5))), ), - }))) + })) ); let pat = "[☃-⛄]"; assert_eq!( parser(pat).parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span_range(pat, 0..9), negated: false, kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange { @@ -5108,20 +5504,20 @@ bar c: '⛄', }, })), - }))) + })) ); assert_eq!( parser(r"[]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: itemset(lit(span(1..2), ']')), - }))) + })) ); assert_eq!( parser(r"[]\[]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: union( @@ -5130,30 +5526,30 @@ bar lit(span(1..2), ']'), ast::ClassSetItem::Literal(ast::Literal { span: span(2..4), - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, c: '[', }), ] ), - }))) + })) ); assert_eq!( parser(r"[\[]]").parse(), Ok(concat( 0..5, vec![ - Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: itemset(ast::ClassSetItem::Literal( ast::Literal { span: span(1..3), - kind: ast::LiteralKind::Punctuation, + kind: ast::LiteralKind::Meta, c: '[', } )), - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(4..5), kind: ast::LiteralKind::Verbatim, c: ']', @@ -5714,15 +6110,15 @@ bar assert_eq!( parser(r"\pNz").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..4), asts: vec![ - Ast::Class(ast::Class::Unicode(ast::ClassUnicode { + Ast::class_unicode(ast::ClassUnicode { span: span(0..3), negated: false, kind: ast::ClassUnicodeKind::OneLetter('N'), - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(3..4), kind: ast::LiteralKind::Verbatim, c: 'z', @@ -5732,15 +6128,15 @@ bar ); assert_eq!( parser(r"\p{Greek}z").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..10), asts: vec![ - Ast::Class(ast::Class::Unicode(ast::ClassUnicode { + Ast::class_unicode(ast::ClassUnicode { span: span(0..9), negated: false, kind: ast::ClassUnicodeKind::Named(s("Greek")), - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(9..10), kind: ast::LiteralKind::Verbatim, c: 'z', @@ -5817,23 +6213,23 @@ bar assert_eq!( parser(r"\d").parse(), - Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl { + Ok(Ast::class_perl(ast::ClassPerl { span: span(0..2), kind: ast::ClassPerlKind::Digit, negated: false, - }))) + })) ); assert_eq!( parser(r"\dz").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..3), asts: vec![ - Ast::Class(ast::Class::Perl(ast::ClassPerl { + Ast::class_perl(ast::ClassPerl { span: span(0..2), kind: ast::ClassPerlKind::Digit, negated: false, - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(2..3), kind: ast::LiteralKind::Verbatim, c: 'z', diff --git a/vendor/regex-syntax/src/ast/print.rs b/vendor/regex-syntax/src/ast/print.rs index 045de2e..1ceb3c7 100644 --- a/vendor/regex-syntax/src/ast/print.rs +++ b/vendor/regex-syntax/src/ast/print.rs @@ -2,10 +2,13 @@ This module provides a regular expression printer for `Ast`. */ -use std::fmt; +use core::fmt; -use crate::ast::visitor::{self, Visitor}; -use crate::ast::{self, Ast}; +use crate::ast::{ + self, + visitor::{self, Visitor}, + Ast, +}; /// A builder for constructing a printer. /// @@ -77,27 +80,21 @@ impl Visitor for Writer { fn visit_pre(&mut self, ast: &Ast) -> fmt::Result { match *ast { Ast::Group(ref x) => self.fmt_group_pre(x), - Ast::Class(ast::Class::Bracketed(ref x)) => { - self.fmt_class_bracketed_pre(x) - } + Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x), _ => Ok(()), } } fn visit_post(&mut self, ast: &Ast) -> fmt::Result { - use crate::ast::Class; - match *ast { Ast::Empty(_) => Ok(()), Ast::Flags(ref x) => self.fmt_set_flags(x), Ast::Literal(ref x) => self.fmt_literal(x), Ast::Dot(_) => self.wtr.write_str("."), Ast::Assertion(ref x) => self.fmt_assertion(x), - Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x), - Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x), - Ast::Class(Class::Bracketed(ref x)) => { - self.fmt_class_bracketed_post(x) - } + Ast::ClassPerl(ref x) => self.fmt_class_perl(x), + Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x), + Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x), Ast::Repetition(ref x) => self.fmt_repetition(x), Ast::Group(ref x) => self.fmt_group_post(x), Ast::Alternation(_) => Ok(()), @@ -157,9 +154,10 @@ impl Writer { use crate::ast::GroupKind::*; match ast.kind { CaptureIndex(_) => self.wtr.write_str("("), - CaptureName(ref x) => { - self.wtr.write_str("(?P<")?; - self.wtr.write_str(&x.name)?; + CaptureName { ref name, starts_with_p } => { + let start = if starts_with_p { "(?P<" } else { "(?<" }; + self.wtr.write_str(start)?; + self.wtr.write_str(&name.name)?; self.wtr.write_str(">")?; Ok(()) } @@ -212,25 +210,25 @@ impl Writer { match ast.kind { Verbatim => self.wtr.write_char(ast.c), - Punctuation => write!(self.wtr, r"\{}", ast.c), - Octal => write!(self.wtr, r"\{:o}", ast.c as u32), + Meta | Superfluous => write!(self.wtr, r"\{}", ast.c), + Octal => write!(self.wtr, r"\{:o}", u32::from(ast.c)), HexFixed(ast::HexLiteralKind::X) => { - write!(self.wtr, r"\x{:02X}", ast.c as u32) + write!(self.wtr, r"\x{:02X}", u32::from(ast.c)) } HexFixed(ast::HexLiteralKind::UnicodeShort) => { - write!(self.wtr, r"\u{:04X}", ast.c as u32) + write!(self.wtr, r"\u{:04X}", u32::from(ast.c)) } HexFixed(ast::HexLiteralKind::UnicodeLong) => { - write!(self.wtr, r"\U{:08X}", ast.c as u32) + write!(self.wtr, r"\U{:08X}", u32::from(ast.c)) } HexBrace(ast::HexLiteralKind::X) => { - write!(self.wtr, r"\x{{{:X}}}", ast.c as u32) + write!(self.wtr, r"\x{{{:X}}}", u32::from(ast.c)) } HexBrace(ast::HexLiteralKind::UnicodeShort) => { - write!(self.wtr, r"\u{{{:X}}}", ast.c as u32) + write!(self.wtr, r"\u{{{:X}}}", u32::from(ast.c)) } HexBrace(ast::HexLiteralKind::UnicodeLong) => { - write!(self.wtr, r"\U{{{:X}}}", ast.c as u32) + write!(self.wtr, r"\U{{{:X}}}", u32::from(ast.c)) } Special(ast::SpecialLiteralKind::Bell) => { self.wtr.write_str(r"\a") @@ -263,6 +261,12 @@ impl Writer { EndText => self.wtr.write_str(r"\z"), WordBoundary => self.wtr.write_str(r"\b"), NotWordBoundary => self.wtr.write_str(r"\B"), + WordBoundaryStart => self.wtr.write_str(r"\b{start}"), + WordBoundaryEnd => self.wtr.write_str(r"\b{end}"), + WordBoundaryStartAngle => self.wtr.write_str(r"\<"), + WordBoundaryEndAngle => self.wtr.write_str(r"\>"), + WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"), + WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"), } } @@ -285,6 +289,7 @@ impl Writer { Flag::DotMatchesNewLine => self.wtr.write_str("s"), Flag::SwapGreed => self.wtr.write_str("U"), Flag::Unicode => self.wtr.write_str("u"), + Flag::CRLF => self.wtr.write_str("R"), Flag::IgnoreWhitespace => self.wtr.write_str("x"), }, }?; @@ -395,9 +400,12 @@ impl Writer { #[cfg(test)] mod tests { - use super::Printer; + use alloc::string::String; + use crate::ast::parse::ParserBuilder; + use super::*; + fn roundtrip(given: &str) { roundtrip_with(|b| b, given); } @@ -499,6 +507,7 @@ mod tests { fn print_group() { roundtrip("(?i:a)"); roundtrip("(?Pa)"); + roundtrip("(?a)"); roundtrip("(a)"); } diff --git a/vendor/regex-syntax/src/ast/visitor.rs b/vendor/regex-syntax/src/ast/visitor.rs index 78ee487..c1bb24d 100644 --- a/vendor/regex-syntax/src/ast/visitor.rs +++ b/vendor/regex-syntax/src/ast/visitor.rs @@ -1,4 +1,4 @@ -use std::fmt; +use alloc::{vec, vec::Vec}; use crate::ast::{self, Ast}; @@ -11,15 +11,12 @@ use crate::ast::{self, Ast}; /// may be proportional to end user input. /// /// Typical usage of this trait involves providing an implementation and then -/// running it using the [`visit`](fn.visit.html) function. +/// running it using the [`visit`] function. /// /// Note that the abstract syntax tree for a regular expression is quite -/// complex. Unless you specifically need it, you might be able to use the -/// much simpler -/// [high-level intermediate representation](../hir/struct.Hir.html) -/// and its -/// [corresponding `Visitor` trait](../hir/trait.Visitor.html) -/// instead. +/// complex. Unless you specifically need it, you might be able to use the much +/// simpler [high-level intermediate representation](crate::hir::Hir) and its +/// [corresponding `Visitor` trait](crate::hir::Visitor) instead. pub trait Visitor { /// The result of visiting an AST. type Output; @@ -46,13 +43,17 @@ pub trait Visitor { } /// This method is called between child nodes of an - /// [`Alternation`](struct.Alternation.html). + /// [`Alternation`](ast::Alternation). fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { Ok(()) } - /// This method is called on every - /// [`ClassSetItem`](enum.ClassSetItem.html) + /// This method is called between child nodes of a concatenation. + fn visit_concat_in(&mut self) -> Result<(), Self::Err> { + Ok(()) + } + + /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) /// before descending into child nodes. fn visit_class_set_item_pre( &mut self, @@ -61,8 +62,7 @@ pub trait Visitor { Ok(()) } - /// This method is called on every - /// [`ClassSetItem`](enum.ClassSetItem.html) + /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) /// after descending into child nodes. fn visit_class_set_item_post( &mut self, @@ -72,8 +72,8 @@ pub trait Visitor { } /// This method is called on every - /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html) - /// before descending into child nodes. + /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into + /// child nodes. fn visit_class_set_binary_op_pre( &mut self, _ast: &ast::ClassSetBinaryOp, @@ -82,8 +82,8 @@ pub trait Visitor { } /// This method is called on every - /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html) - /// after descending into child nodes. + /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child + /// nodes. fn visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, @@ -92,7 +92,7 @@ pub trait Visitor { } /// This method is called between the left hand and right hand child nodes - /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html). + /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp). fn visit_class_set_binary_op_in( &mut self, _ast: &ast::ClassSetBinaryOp, @@ -104,8 +104,7 @@ pub trait Visitor { /// Executes an implementation of `Visitor` in constant stack space. /// /// This function will visit every node in the given `Ast` while calling the -/// appropriate methods provided by the -/// [`Visitor`](trait.Visitor.html) trait. +/// appropriate methods provided by the [`Visitor`] trait. /// /// The primary use case for this method is when one wants to perform case /// analysis over an `Ast` without using a stack size proportional to the depth @@ -234,8 +233,14 @@ impl<'a> HeapVisitor<'a> { // If this is a concat/alternate, then we might have additional // inductive steps to process. if let Some(x) = self.pop(frame) { - if let Frame::Alternation { .. } = x { - visitor.visit_alternation_in()?; + match x { + Frame::Alternation { .. } => { + visitor.visit_alternation_in()?; + } + Frame::Concat { .. } => { + visitor.visit_concat_in()?; + } + _ => {} } ast = x.child(); self.stack.push((post_ast, x)); @@ -259,7 +264,7 @@ impl<'a> HeapVisitor<'a> { visitor: &mut V, ) -> Result>, V::Err> { Ok(match *ast { - Ast::Class(ast::Class::Bracketed(ref x)) => { + Ast::ClassBracketed(ref x) => { self.visit_class(x, visitor)?; None } @@ -475,8 +480,8 @@ impl<'a> ClassInduct<'a> { } } -impl<'a> fmt::Debug for ClassFrame<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'a> core::fmt::Debug for ClassFrame<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let x = match *self { ClassFrame::Union { .. } => "Union", ClassFrame::Binary { .. } => "Binary", @@ -487,8 +492,8 @@ impl<'a> fmt::Debug for ClassFrame<'a> { } } -impl<'a> fmt::Debug for ClassInduct<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'a> core::fmt::Debug for ClassInduct<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let x = match *self { ClassInduct::Item(it) => match *it { ast::ClassSetItem::Empty(_) => "Item(Empty)", diff --git a/vendor/regex-syntax/src/debug.rs b/vendor/regex-syntax/src/debug.rs new file mode 100644 index 0000000..a0b051b --- /dev/null +++ b/vendor/regex-syntax/src/debug.rs @@ -0,0 +1,107 @@ +/// A type that wraps a single byte with a convenient fmt::Debug impl that +/// escapes the byte. +pub(crate) struct Byte(pub(crate) u8); + +impl core::fmt::Debug for Byte { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + // Special case ASCII space. It's too hard to read otherwise, so + // put quotes around it. I sometimes wonder whether just '\x20' would + // be better... + if self.0 == b' ' { + return write!(f, "' '"); + } + // 10 bytes is enough to cover any output from ascii::escape_default. + let mut bytes = [0u8; 10]; + let mut len = 0; + for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { + // capitalize \xab to \xAB + if i >= 2 && b'a' <= b && b <= b'f' { + b -= 32; + } + bytes[len] = b; + len += 1; + } + write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) + } +} + +/// A type that provides a human readable debug impl for arbitrary bytes. +/// +/// This generally works best when the bytes are presumed to be mostly UTF-8, +/// but will work for anything. +/// +/// N.B. This is copied nearly verbatim from regex-automata. Sigh. +pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); + +impl<'a> core::fmt::Debug for Bytes<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "\"")?; + // This is a sad re-implementation of a similar impl found in bstr. + let mut bytes = self.0; + while let Some(result) = utf8_decode(bytes) { + let ch = match result { + Ok(ch) => ch, + Err(byte) => { + write!(f, r"\x{:02x}", byte)?; + bytes = &bytes[1..]; + continue; + } + }; + bytes = &bytes[ch.len_utf8()..]; + match ch { + '\0' => write!(f, "\\0")?, + // ASCII control characters except \0, \n, \r, \t + '\x01'..='\x08' + | '\x0b' + | '\x0c' + | '\x0e'..='\x19' + | '\x7f' => { + write!(f, "\\x{:02x}", u32::from(ch))?; + } + '\n' | '\r' | '\t' | _ => { + write!(f, "{}", ch.escape_debug())?; + } + } + } + write!(f, "\"")?; + Ok(()) + } +} + +/// Decodes the next UTF-8 encoded codepoint from the given byte slice. +/// +/// If no valid encoding of a codepoint exists at the beginning of the given +/// byte slice, then the first byte is returned instead. +/// +/// This returns `None` if and only if `bytes` is empty. +pub(crate) fn utf8_decode(bytes: &[u8]) -> Option> { + fn len(byte: u8) -> Option { + if byte <= 0x7F { + return Some(1); + } else if byte & 0b1100_0000 == 0b1000_0000 { + return None; + } else if byte <= 0b1101_1111 { + Some(2) + } else if byte <= 0b1110_1111 { + Some(3) + } else if byte <= 0b1111_0111 { + Some(4) + } else { + None + } + } + + if bytes.is_empty() { + return None; + } + let len = match len(bytes[0]) { + None => return Some(Err(bytes[0])), + Some(len) if len > bytes.len() => return Some(Err(bytes[0])), + Some(1) => return Some(Ok(char::from(bytes[0]))), + Some(len) => len, + }; + match core::str::from_utf8(&bytes[..len]) { + Ok(s) => Some(Ok(s.chars().next().unwrap())), + Err(_) => Some(Err(bytes[0])), + } +} diff --git a/vendor/regex-syntax/src/error.rs b/vendor/regex-syntax/src/error.rs index 1230d2f..98869c4 100644 --- a/vendor/regex-syntax/src/error.rs +++ b/vendor/regex-syntax/src/error.rs @@ -1,15 +1,17 @@ -use std::cmp; -use std::error; -use std::fmt; -use std::result; +use alloc::{ + format, + string::{String, ToString}, + vec, + vec::Vec, +}; -use crate::ast; -use crate::hir; - -/// A type alias for dealing with errors returned by this crate. -pub type Result = result::Result; +use crate::{ast, hir}; /// This error type encompasses any error that can be returned by this crate. +/// +/// This error type is marked as `non_exhaustive`. This means that adding a +/// new variant is not considered a breaking change. +#[non_exhaustive] #[derive(Clone, Debug, Eq, PartialEq)] pub enum Error { /// An error that occurred while translating concrete syntax into abstract @@ -18,13 +20,6 @@ pub enum Error { /// An error that occurred while translating abstract syntax into a high /// level intermediate representation (HIR). Translate(hir::Error), - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, } impl From for Error { @@ -39,24 +34,14 @@ impl From for Error { } } -impl error::Error for Error { - // TODO: Remove this method entirely on the next breaking semver release. - #[allow(deprecated)] - fn description(&self) -> &str { - match *self { - Error::Parse(ref x) => x.description(), - Error::Translate(ref x) => x.description(), - _ => unreachable!(), - } - } -} +#[cfg(feature = "std")] +impl std::error::Error for Error {} -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match *self { Error::Parse(ref x) => x.fmt(f), Error::Translate(ref x) => x.fmt(f), - _ => unreachable!(), } } } @@ -101,8 +86,8 @@ impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> { } } -impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let spans = Spans::from_formatter(self); if self.pattern.contains('\n') { let divider = repeat_char('~', 79); @@ -168,7 +153,7 @@ struct Spans<'p> { impl<'p> Spans<'p> { /// Build a sequence of spans from a formatter. - fn from_formatter<'e, E: fmt::Display>( + fn from_formatter<'e, E: core::fmt::Display>( fmter: &'p Formatter<'e, E>, ) -> Spans<'p> { let mut line_count = fmter.pattern.lines().count(); @@ -248,7 +233,7 @@ impl<'p> Spans<'p> { pos += 1; } let note_len = span.end.column.saturating_sub(span.start.column); - for _ in 0..cmp::max(1, note_len) { + for _ in 0..core::cmp::max(1, note_len) { notes.push('^'); pos += 1; } @@ -281,11 +266,13 @@ impl<'p> Spans<'p> { } fn repeat_char(c: char, count: usize) -> String { - ::std::iter::repeat(c).take(count).collect() + core::iter::repeat(c).take(count).collect() } #[cfg(test)] mod tests { + use alloc::string::ToString; + use crate::ast::parse::Parser; fn assert_panic_message(pattern: &str, expected_msg: &str) { diff --git a/vendor/regex-syntax/src/hir/interval.rs b/vendor/regex-syntax/src/hir/interval.rs index 56698c5..e063390 100644 --- a/vendor/regex-syntax/src/hir/interval.rs +++ b/vendor/regex-syntax/src/hir/interval.rs @@ -1,8 +1,6 @@ -use std::char; -use std::cmp; -use std::fmt::Debug; -use std::slice; -use std::u8; +use core::{char, cmp, fmt::Debug, slice}; + +use alloc::vec::Vec; use crate::unicode; @@ -32,9 +30,38 @@ use crate::unicode; // // Tests on this are relegated to the public API of HIR in src/hir.rs. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug)] pub struct IntervalSet { + /// A sorted set of non-overlapping ranges. ranges: Vec, + /// While not required at all for correctness, we keep track of whether an + /// interval set has been case folded or not. This helps us avoid doing + /// redundant work if, for example, a set has already been cased folded. + /// And note that whether a set is folded or not is preserved through + /// all of the pairwise set operations. That is, if both interval sets + /// have been case folded, then any of difference, union, intersection or + /// symmetric difference all produce a case folded set. + /// + /// Note that when this is true, it *must* be the case that the set is case + /// folded. But when it's false, the set *may* be case folded. In other + /// words, we only set this to true when we know it to be case, but we're + /// okay with it being false if it would otherwise be costly to determine + /// whether it should be true. This means code cannot assume that a false + /// value necessarily indicates that the set is not case folded. + /// + /// Bottom line: this is a performance optimization. + folded: bool, +} + +impl Eq for IntervalSet {} + +// We implement PartialEq manually so that we don't consider the set's internal +// 'folded' property to be part of its identity. The 'folded' property is +// strictly an optimization. +impl PartialEq for IntervalSet { + fn eq(&self, other: &IntervalSet) -> bool { + self.ranges.eq(&other.ranges) + } } impl IntervalSet { @@ -44,7 +71,10 @@ impl IntervalSet { /// The given ranges do not need to be in any specific order, and ranges /// may overlap. pub fn new>(intervals: T) -> IntervalSet { - let mut set = IntervalSet { ranges: intervals.into_iter().collect() }; + let ranges: Vec = intervals.into_iter().collect(); + // An empty set is case folded. + let folded = ranges.is_empty(); + let mut set = IntervalSet { ranges, folded }; set.canonicalize(); set } @@ -55,6 +85,10 @@ impl IntervalSet { // it preserves canonicalization. self.ranges.push(interval); self.canonicalize(); + // We don't know whether the new interval added here is considered + // case folded, so we conservatively assume that the entire set is + // no longer case folded if it was previously. + self.folded = false; } /// Return an iterator over all intervals in this set. @@ -79,6 +113,9 @@ impl IntervalSet { /// This returns an error if the necessary case mapping data is not /// available. pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { + if self.folded { + return Ok(()); + } let len = self.ranges.len(); for i in 0..len { let range = self.ranges[i]; @@ -88,14 +125,19 @@ impl IntervalSet { } } self.canonicalize(); + self.folded = true; Ok(()) } /// Union this set with the given set, in place. pub fn union(&mut self, other: &IntervalSet) { + if other.ranges.is_empty() || self.ranges == other.ranges { + return; + } // This could almost certainly be done more efficiently. self.ranges.extend(&other.ranges); self.canonicalize(); + self.folded = self.folded && other.folded; } /// Intersect this set with the given set, in place. @@ -105,6 +147,8 @@ impl IntervalSet { } if other.ranges.is_empty() { self.ranges.clear(); + // An empty set is case folded. + self.folded = true; return; } @@ -134,6 +178,7 @@ impl IntervalSet { } } self.ranges.drain(..drain_end); + self.folded = self.folded && other.folded; } /// Subtract the given set from this set, in place. @@ -226,6 +271,7 @@ impl IntervalSet { a += 1; } self.ranges.drain(..drain_end); + self.folded = self.folded && other.folded; } /// Compute the symmetric difference of the two sets, in place. @@ -251,6 +297,8 @@ impl IntervalSet { if self.ranges.is_empty() { let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); self.ranges.push(I::create(min, max)); + // The set containing everything must case folded. + self.folded = true; return; } @@ -276,6 +324,19 @@ impl IntervalSet { self.ranges.push(I::create(lower, I::Bound::max_value())); } self.ranges.drain(..drain_end); + // We don't need to update whether this set is folded or not, because + // it is conservatively preserved through negation. Namely, if a set + // is not folded, then it is possible that its negation is folded, for + // example, [^☃]. But we're fine with assuming that the set is not + // folded in that case. (`folded` permits false negatives but not false + // positives.) + // + // But what about when a set is folded, is its negation also + // necessarily folded? Yes. Because if a set is folded, then for every + // character in the set, it necessarily included its equivalence class + // of case folded characters. Negating it in turn means that all + // equivalence classes in the set are negated, and any equivalence + // class that was previously not in the set is now entirely in the set. } /// Converts this set into a canonical ordering. @@ -481,7 +542,7 @@ impl Bound for u8 { u8::MAX } fn as_u32(self) -> u32 { - self as u32 + u32::from(self) } fn increment(self) -> Self { self.checked_add(1).unwrap() @@ -499,20 +560,20 @@ impl Bound for char { '\u{10FFFF}' } fn as_u32(self) -> u32 { - self as u32 + u32::from(self) } fn increment(self) -> Self { match self { '\u{D7FF}' => '\u{E000}', - c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(), + c => char::from_u32(u32::from(c).checked_add(1).unwrap()).unwrap(), } } fn decrement(self) -> Self { match self { '\u{E000}' => '\u{D7FF}', - c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(), + c => char::from_u32(u32::from(c).checked_sub(1).unwrap()).unwrap(), } } } diff --git a/vendor/regex-syntax/src/hir/literal.rs b/vendor/regex-syntax/src/hir/literal.rs new file mode 100644 index 0000000..a5a3737 --- /dev/null +++ b/vendor/regex-syntax/src/hir/literal.rs @@ -0,0 +1,3214 @@ +/*! +Provides literal extraction from `Hir` expressions. + +An [`Extractor`] pulls literals out of [`Hir`] expressions and returns a +[`Seq`] of [`Literal`]s. + +The purpose of literal extraction is generally to provide avenues for +optimizing regex searches. The main idea is that substring searches can be an +order of magnitude faster than a regex search. Therefore, if one can execute +a substring search to find candidate match locations and only run the regex +search at those locations, then it is possible for huge improvements in +performance to be realized. + +With that said, literal optimizations are generally a black art because even +though substring search is generally faster, if the number of candidates +produced is high, then it can create a lot of overhead by ping-ponging between +the substring search and the regex search. + +Here are some heuristics that might be used to help increase the chances of +effective literal optimizations: + +* Stick to small [`Seq`]s. If you search for too many literals, it's likely +to lead to substring search that is only a little faster than a regex search, +and thus the overhead of using literal optimizations in the first place might +make things slower overall. +* The literals in your [`Seq`] shouldn't be too short. In general, longer is +better. A sequence corresponding to single bytes that occur frequently in the +haystack, for example, is probably a bad literal optimization because it's +likely to produce many false positive candidates. Longer literals are less +likely to match, and thus probably produce fewer false positives. +* If it's possible to estimate the approximate frequency of each byte according +to some pre-computed background distribution, it is possible to compute a score +of how "good" a `Seq` is. If a `Seq` isn't good enough, you might consider +skipping the literal optimization and just use the regex engine. + +(It should be noted that there are always pathological cases that can make +any kind of literal optimization be a net slower result. This is why it +might be a good idea to be conservative, or to even provide a means for +literal optimizations to be dynamically disabled if they are determined to be +ineffective according to some measure.) + +You're encouraged to explore the methods on [`Seq`], which permit shrinking +the size of sequences in a preference-order preserving fashion. + +Finally, note that it isn't strictly necessary to use an [`Extractor`]. Namely, +an `Extractor` only uses public APIs of the [`Seq`] and [`Literal`] types, +so it is possible to implement your own extractor. For example, for n-grams +or "inner" literals (i.e., not prefix or suffix literals). The `Extractor` +is mostly responsible for the case analysis over `Hir` expressions. Much of +the "trickier" parts are how to combine literal sequences, and that is all +implemented on [`Seq`]. +*/ + +use core::{cmp, mem, num::NonZeroUsize}; + +use alloc::{vec, vec::Vec}; + +use crate::hir::{self, Hir}; + +/// Extracts prefix or suffix literal sequences from [`Hir`] expressions. +/// +/// Literal extraction is based on the following observations: +/// +/// * Many regexes start with one or a small number of literals. +/// * Substring search for literals is often much faster (sometimes by an order +/// of magnitude) than a regex search. +/// +/// Thus, in many cases, one can search for literals to find candidate starting +/// locations of a match, and then only run the full regex engine at each such +/// location instead of over the full haystack. +/// +/// The main downside of literal extraction is that it can wind up causing a +/// search to be slower overall. For example, if there are many matches or if +/// there are many candidates that don't ultimately lead to a match, then a +/// lot of overhead will be spent in shuffing back-and-forth between substring +/// search and the regex engine. This is the fundamental reason why literal +/// optimizations for regex patterns is sometimes considered a "black art." +/// +/// # Look-around assertions +/// +/// Literal extraction treats all look-around assertions as-if they match every +/// empty string. So for example, the regex `\bquux\b` will yield a sequence +/// containing a single exact literal `quux`. However, not all occurrences +/// of `quux` correspond to a match a of the regex. For example, `\bquux\b` +/// does not match `ZquuxZ` anywhere because `quux` does not fall on a word +/// boundary. +/// +/// In effect, if your regex contains look-around assertions, then a match of +/// an exact literal does not necessarily mean the regex overall matches. So +/// you may still need to run the regex engine in such cases to confirm the +/// match. +/// +/// The precise guarantee you get from a literal sequence is: if every literal +/// in the sequence is exact and the original regex contains zero look-around +/// assertions, then a preference-order multi-substring search of those +/// literals will precisely match a preference-order search of the original +/// regex. +/// +/// # Example +/// +/// This shows how to extract prefixes: +/// +/// ``` +/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; +/// +/// let hir = parse(r"(a|b|c)(x|y|z)[A-Z]+foo")?; +/// let got = Extractor::new().extract(&hir); +/// // All literals returned are "inexact" because none of them reach the +/// // match state. +/// let expected = Seq::from_iter([ +/// Literal::inexact("ax"), +/// Literal::inexact("ay"), +/// Literal::inexact("az"), +/// Literal::inexact("bx"), +/// Literal::inexact("by"), +/// Literal::inexact("bz"), +/// Literal::inexact("cx"), +/// Literal::inexact("cy"), +/// Literal::inexact("cz"), +/// ]); +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This shows how to extract suffixes: +/// +/// ``` +/// use regex_syntax::{ +/// hir::literal::{Extractor, ExtractKind, Literal, Seq}, +/// parse, +/// }; +/// +/// let hir = parse(r"foo|[A-Z]+bar")?; +/// let got = Extractor::new().kind(ExtractKind::Suffix).extract(&hir); +/// // Since 'foo' gets to a match state, it is considered exact. But 'bar' +/// // does not because of the '[A-Z]+', and thus is marked inexact. +/// let expected = Seq::from_iter([ +/// Literal::exact("foo"), +/// Literal::inexact("bar"), +/// ]); +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Extractor { + kind: ExtractKind, + limit_class: usize, + limit_repeat: usize, + limit_literal_len: usize, + limit_total: usize, +} + +impl Extractor { + /// Create a new extractor with a default configuration. + /// + /// The extractor can be optionally configured before calling + /// [`Extractor::extract`] to get a literal sequence. + pub fn new() -> Extractor { + Extractor { + kind: ExtractKind::Prefix, + limit_class: 10, + limit_repeat: 10, + limit_literal_len: 100, + limit_total: 250, + } + } + + /// Execute the extractor and return a sequence of literals. + pub fn extract(&self, hir: &Hir) -> Seq { + use crate::hir::HirKind::*; + + match *hir.kind() { + Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])), + Literal(hir::Literal(ref bytes)) => { + let mut seq = + Seq::singleton(self::Literal::exact(bytes.to_vec())); + self.enforce_literal_len(&mut seq); + seq + } + Class(hir::Class::Unicode(ref cls)) => { + self.extract_class_unicode(cls) + } + Class(hir::Class::Bytes(ref cls)) => self.extract_class_bytes(cls), + Repetition(ref rep) => self.extract_repetition(rep), + Capture(hir::Capture { ref sub, .. }) => self.extract(sub), + Concat(ref hirs) => match self.kind { + ExtractKind::Prefix => self.extract_concat(hirs.iter()), + ExtractKind::Suffix => self.extract_concat(hirs.iter().rev()), + }, + Alternation(ref hirs) => { + // Unlike concat, we always union starting from the beginning, + // since the beginning corresponds to the highest preference, + // which doesn't change based on forwards vs reverse. + self.extract_alternation(hirs.iter()) + } + } + } + + /// Set the kind of literal sequence to extract from an [`Hir`] expression. + /// + /// The default is to extract prefixes, but suffixes can be selected + /// instead. The contract for prefixes is that every match of the + /// corresponding `Hir` must start with one of the literals in the sequence + /// returned. Moreover, the _order_ of the sequence returned corresponds to + /// the preference order. + /// + /// Suffixes satisfy a similar contract in that every match of the + /// corresponding `Hir` must end with one of the literals in the sequence + /// returned. However, there is no guarantee that the literals are in + /// preference order. + /// + /// Remember that a sequence can be infinite. For example, unless the + /// limits are configured to be impractically large, attempting to extract + /// prefixes (or suffixes) for the pattern `[A-Z]` will return an infinite + /// sequence. Generally speaking, if the sequence returned is infinite, + /// then it is presumed to be unwise to do prefix (or suffix) optimizations + /// for the pattern. + pub fn kind(&mut self, kind: ExtractKind) -> &mut Extractor { + self.kind = kind; + self + } + + /// Configure a limit on the length of the sequence that is permitted for + /// a character class. If a character class exceeds this limit, then the + /// sequence returned for it is infinite. + /// + /// This prevents classes like `[A-Z]` or `\pL` from getting turned into + /// huge and likely unproductive sequences of literals. + /// + /// # Example + /// + /// This example shows how this limit can be lowered to decrease the tolerance + /// for character classes being turned into literal sequences. + /// + /// ``` + /// use regex_syntax::{hir::literal::{Extractor, Seq}, parse}; + /// + /// let hir = parse(r"[0-9]")?; + /// + /// let got = Extractor::new().extract(&hir); + /// let expected = Seq::new([ + /// "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", + /// ]); + /// assert_eq!(expected, got); + /// + /// // Now let's shrink the limit and see how that changes things. + /// let got = Extractor::new().limit_class(4).extract(&hir); + /// let expected = Seq::infinite(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn limit_class(&mut self, limit: usize) -> &mut Extractor { + self.limit_class = limit; + self + } + + /// Configure a limit on the total number of repetitions that is permitted + /// before literal extraction is stopped. + /// + /// This is useful for limiting things like `(abcde){50}`, or more + /// insidiously, `(?:){1000000000}`. This limit prevents any one single + /// repetition from adding too much to a literal sequence. + /// + /// With this limit set, repetitions that exceed it will be stopped and any + /// literals extracted up to that point will be made inexact. + /// + /// # Example + /// + /// This shows how to decrease the limit and compares it with the default. + /// + /// ``` + /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; + /// + /// let hir = parse(r"(abc){8}")?; + /// + /// let got = Extractor::new().extract(&hir); + /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]); + /// assert_eq!(expected, got); + /// + /// // Now let's shrink the limit and see how that changes things. + /// let got = Extractor::new().limit_repeat(4).extract(&hir); + /// let expected = Seq::from_iter([ + /// Literal::inexact("abcabcabcabc"), + /// ]); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn limit_repeat(&mut self, limit: usize) -> &mut Extractor { + self.limit_repeat = limit; + self + } + + /// Configure a limit on the maximum length of any literal in a sequence. + /// + /// This is useful for limiting things like `(abcde){5}{5}{5}{5}`. While + /// each repetition or literal in that regex is small, when all the + /// repetitions are applied, one ends up with a literal of length `5^4 = + /// 625`. + /// + /// With this limit set, literals that exceed it will be made inexact and + /// thus prevented from growing. + /// + /// # Example + /// + /// This shows how to decrease the limit and compares it with the default. + /// + /// ``` + /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; + /// + /// let hir = parse(r"(abc){2}{2}{2}")?; + /// + /// let got = Extractor::new().extract(&hir); + /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]); + /// assert_eq!(expected, got); + /// + /// // Now let's shrink the limit and see how that changes things. + /// let got = Extractor::new().limit_literal_len(14).extract(&hir); + /// let expected = Seq::from_iter([ + /// Literal::inexact("abcabcabcabcab"), + /// ]); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn limit_literal_len(&mut self, limit: usize) -> &mut Extractor { + self.limit_literal_len = limit; + self + } + + /// Configure a limit on the total number of literals that will be + /// returned. + /// + /// This is useful as a practical measure for avoiding the creation of + /// large sequences of literals. While the extractor will automatically + /// handle local creations of large sequences (for example, `[A-Z]` yields + /// an infinite sequence by default), large sequences can be created + /// through non-local means as well. + /// + /// For example, `[ab]{3}{3}` would yield a sequence of length `512 = 2^9` + /// despite each of the repetitions being small on their own. This limit + /// thus represents a "catch all" for avoiding locally small sequences from + /// combining into large sequences. + /// + /// # Example + /// + /// This example shows how reducing the limit will change the literal + /// sequence returned. + /// + /// ``` + /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; + /// + /// let hir = parse(r"[ab]{2}{2}")?; + /// + /// let got = Extractor::new().extract(&hir); + /// let expected = Seq::new([ + /// "aaaa", "aaab", "aaba", "aabb", + /// "abaa", "abab", "abba", "abbb", + /// "baaa", "baab", "baba", "babb", + /// "bbaa", "bbab", "bbba", "bbbb", + /// ]); + /// assert_eq!(expected, got); + /// + /// // The default limit is not too big, but big enough to extract all + /// // literals from '[ab]{2}{2}'. If we shrink the limit to less than 16, + /// // then we'll get a truncated set. Notice that it returns a sequence of + /// // length 4 even though our limit was 10. This is because the sequence + /// // is difficult to increase without blowing the limit. Notice also + /// // that every literal in the sequence is now inexact because they were + /// // stripped of some suffix. + /// let got = Extractor::new().limit_total(10).extract(&hir); + /// let expected = Seq::from_iter([ + /// Literal::inexact("aa"), + /// Literal::inexact("ab"), + /// Literal::inexact("ba"), + /// Literal::inexact("bb"), + /// ]); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn limit_total(&mut self, limit: usize) -> &mut Extractor { + self.limit_total = limit; + self + } + + /// Extract a sequence from the given concatenation. Sequences from each of + /// the child HIR expressions are combined via cross product. + /// + /// This short circuits once the cross product turns into a sequence + /// containing only inexact literals. + fn extract_concat<'a, I: Iterator>(&self, it: I) -> Seq { + let mut seq = Seq::singleton(self::Literal::exact(vec![])); + for hir in it { + // If every element in the sequence is inexact, then a cross + // product will always be a no-op. Thus, there is nothing else we + // can add to it and can quit early. Note that this also includes + // infinite sequences. + if seq.is_inexact() { + break; + } + // Note that 'cross' also dispatches based on whether we're + // extracting prefixes or suffixes. + seq = self.cross(seq, &mut self.extract(hir)); + } + seq + } + + /// Extract a sequence from the given alternation. + /// + /// This short circuits once the union turns into an infinite sequence. + fn extract_alternation<'a, I: Iterator>( + &self, + it: I, + ) -> Seq { + let mut seq = Seq::empty(); + for hir in it { + // Once our 'seq' is infinite, every subsequent union + // operation on it will itself always result in an + // infinite sequence. Thus, it can never change and we can + // short-circuit. + if !seq.is_finite() { + break; + } + seq = self.union(seq, &mut self.extract(hir)); + } + seq + } + + /// Extract a sequence of literals from the given repetition. We do our + /// best, Some examples: + /// + /// 'a*' => [inexact(a), exact("")] + /// 'a*?' => [exact(""), inexact(a)] + /// 'a+' => [inexact(a)] + /// 'a{3}' => [exact(aaa)] + /// 'a{3,5} => [inexact(aaa)] + /// + /// The key here really is making sure we get the 'inexact' vs 'exact' + /// attributes correct on each of the literals we add. For example, the + /// fact that 'a*' gives us an inexact 'a' and an exact empty string means + /// that a regex like 'ab*c' will result in [inexact(ab), exact(ac)] + /// literals being extracted, which might actually be a better prefilter + /// than just 'a'. + fn extract_repetition(&self, rep: &hir::Repetition) -> Seq { + let mut subseq = self.extract(&rep.sub); + match *rep { + hir::Repetition { min: 0, max, greedy, .. } => { + // When 'max=1', we can retain exactness, since 'a?' is + // equivalent to 'a|'. Similarly below, 'a??' is equivalent to + // '|a'. + if max != Some(1) { + subseq.make_inexact(); + } + let mut empty = Seq::singleton(Literal::exact(vec![])); + if !greedy { + mem::swap(&mut subseq, &mut empty); + } + self.union(subseq, &mut empty) + } + hir::Repetition { min, max: Some(max), .. } if min == max => { + assert!(min > 0); // handled above + let limit = + u32::try_from(self.limit_repeat).unwrap_or(u32::MAX); + let mut seq = Seq::singleton(Literal::exact(vec![])); + for _ in 0..cmp::min(min, limit) { + if seq.is_inexact() { + break; + } + seq = self.cross(seq, &mut subseq.clone()); + } + if usize::try_from(min).is_err() || min > limit { + seq.make_inexact(); + } + seq + } + hir::Repetition { min, .. } => { + assert!(min > 0); // handled above + let limit = + u32::try_from(self.limit_repeat).unwrap_or(u32::MAX); + let mut seq = Seq::singleton(Literal::exact(vec![])); + for _ in 0..cmp::min(min, limit) { + if seq.is_inexact() { + break; + } + seq = self.cross(seq, &mut subseq.clone()); + } + seq.make_inexact(); + seq + } + } + } + + /// Convert the given Unicode class into a sequence of literals if the + /// class is small enough. If the class is too big, return an infinite + /// sequence. + fn extract_class_unicode(&self, cls: &hir::ClassUnicode) -> Seq { + if self.class_over_limit_unicode(cls) { + return Seq::infinite(); + } + let mut seq = Seq::empty(); + for r in cls.iter() { + for ch in r.start()..=r.end() { + seq.push(Literal::from(ch)); + } + } + self.enforce_literal_len(&mut seq); + seq + } + + /// Convert the given byte class into a sequence of literals if the class + /// is small enough. If the class is too big, return an infinite sequence. + fn extract_class_bytes(&self, cls: &hir::ClassBytes) -> Seq { + if self.class_over_limit_bytes(cls) { + return Seq::infinite(); + } + let mut seq = Seq::empty(); + for r in cls.iter() { + for b in r.start()..=r.end() { + seq.push(Literal::from(b)); + } + } + self.enforce_literal_len(&mut seq); + seq + } + + /// Returns true if the given Unicode class exceeds the configured limits + /// on this extractor. + fn class_over_limit_unicode(&self, cls: &hir::ClassUnicode) -> bool { + let mut count = 0; + for r in cls.iter() { + if count > self.limit_class { + return true; + } + count += r.len(); + } + count > self.limit_class + } + + /// Returns true if the given byte class exceeds the configured limits on + /// this extractor. + fn class_over_limit_bytes(&self, cls: &hir::ClassBytes) -> bool { + let mut count = 0; + for r in cls.iter() { + if count > self.limit_class { + return true; + } + count += r.len(); + } + count > self.limit_class + } + + /// Compute the cross product of the two sequences if the result would be + /// within configured limits. Otherwise, make `seq2` infinite and cross the + /// infinite sequence with `seq1`. + fn cross(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq { + if seq1.max_cross_len(seq2).map_or(false, |len| len > self.limit_total) + { + seq2.make_infinite(); + } + if let ExtractKind::Suffix = self.kind { + seq1.cross_reverse(seq2); + } else { + seq1.cross_forward(seq2); + } + assert!(seq1.len().map_or(true, |x| x <= self.limit_total)); + self.enforce_literal_len(&mut seq1); + seq1 + } + + /// Union the two sequences if the result would be within configured + /// limits. Otherwise, make `seq2` infinite and union the infinite sequence + /// with `seq1`. + fn union(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq { + if seq1.max_union_len(seq2).map_or(false, |len| len > self.limit_total) + { + // We try to trim our literal sequences to see if we can make + // room for more literals. The idea is that we'd rather trim down + // literals already in our sequence if it means we can add a few + // more and retain a finite sequence. Otherwise, we'll union with + // an infinite sequence and that infects everything and effectively + // stops literal extraction in its tracks. + // + // We do we keep 4 bytes here? Well, it's a bit of an abstraction + // leakage. Downstream, the literals may wind up getting fed to + // the Teddy algorithm, which supports searching literals up to + // length 4. So that's why we pick that number here. Arguably this + // should be a tuneable parameter, but it seems a little tricky to + // describe. And I'm still unsure if this is the right way to go + // about culling literal sequences. + match self.kind { + ExtractKind::Prefix => { + seq1.keep_first_bytes(4); + seq2.keep_first_bytes(4); + } + ExtractKind::Suffix => { + seq1.keep_last_bytes(4); + seq2.keep_last_bytes(4); + } + } + seq1.dedup(); + seq2.dedup(); + if seq1 + .max_union_len(seq2) + .map_or(false, |len| len > self.limit_total) + { + seq2.make_infinite(); + } + } + seq1.union(seq2); + assert!(seq1.len().map_or(true, |x| x <= self.limit_total)); + seq1 + } + + /// Applies the literal length limit to the given sequence. If none of the + /// literals in the sequence exceed the limit, then this is a no-op. + fn enforce_literal_len(&self, seq: &mut Seq) { + let len = self.limit_literal_len; + match self.kind { + ExtractKind::Prefix => seq.keep_first_bytes(len), + ExtractKind::Suffix => seq.keep_last_bytes(len), + } + } +} + +impl Default for Extractor { + fn default() -> Extractor { + Extractor::new() + } +} + +/// The kind of literals to extract from an [`Hir`] expression. +/// +/// The default extraction kind is `Prefix`. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub enum ExtractKind { + /// Extracts only prefix literals from a regex. + Prefix, + /// Extracts only suffix literals from a regex. + /// + /// Note that the sequence returned by suffix literals currently may + /// not correctly represent leftmost-first or "preference" order match + /// semantics. + Suffix, +} + +impl ExtractKind { + /// Returns true if this kind is the `Prefix` variant. + pub fn is_prefix(&self) -> bool { + matches!(*self, ExtractKind::Prefix) + } + + /// Returns true if this kind is the `Suffix` variant. + pub fn is_suffix(&self) -> bool { + matches!(*self, ExtractKind::Suffix) + } +} + +impl Default for ExtractKind { + fn default() -> ExtractKind { + ExtractKind::Prefix + } +} + +/// A sequence of literals. +/// +/// A `Seq` is very much like a set in that it represents a union of its +/// members. That is, it corresponds to a set of literals where at least one +/// must match in order for a particular [`Hir`] expression to match. (Whether +/// this corresponds to the entire `Hir` expression, a prefix of it or a suffix +/// of it depends on how the `Seq` was extracted from the `Hir`.) +/// +/// It is also unlike a set in that multiple identical literals may appear, +/// and that the order of the literals in the `Seq` matters. For example, if +/// the sequence is `[sam, samwise]` and leftmost-first matching is used, then +/// `samwise` can never match and the sequence is equivalent to `[sam]`. +/// +/// # States of a sequence +/// +/// A `Seq` has a few different logical states to consider: +/// +/// * The sequence can represent "any" literal. When this happens, the set does +/// not have a finite size. The purpose of this state is to inhibit callers +/// from making assumptions about what literals are required in order to match +/// a particular [`Hir`] expression. Generally speaking, when a set is in this +/// state, literal optimizations are inhibited. A good example of a regex that +/// will cause this sort of set to appear is `[A-Za-z]`. The character class +/// is just too big (and also too narrow) to be usefully expanded into 52 +/// different literals. (Note that the decision for when a seq should become +/// infinite is determined by the caller. A seq itself has no hard-coded +/// limits.) +/// * The sequence can be empty, in which case, it is an affirmative statement +/// that there are no literals that can match the corresponding `Hir`. +/// Consequently, the `Hir` never matches any input. For example, `[a&&b]`. +/// * The sequence can be non-empty, in which case, at least one of the +/// literals must match in order for the corresponding `Hir` to match. +/// +/// # Example +/// +/// This example shows how literal sequences can be simplified by stripping +/// suffixes and minimizing while maintaining preference order. +/// +/// ``` +/// use regex_syntax::hir::literal::{Literal, Seq}; +/// +/// let mut seq = Seq::new(&[ +/// "farm", +/// "appliance", +/// "faraway", +/// "apple", +/// "fare", +/// "gap", +/// "applicant", +/// "applaud", +/// ]); +/// seq.keep_first_bytes(3); +/// seq.minimize_by_preference(); +/// // Notice that 'far' comes before 'app', which matches the order in the +/// // original sequence. This guarantees that leftmost-first semantics are +/// // not altered by simplifying the set. +/// let expected = Seq::from_iter([ +/// Literal::inexact("far"), +/// Literal::inexact("app"), +/// Literal::exact("gap"), +/// ]); +/// assert_eq!(expected, seq); +/// ``` +#[derive(Clone, Eq, PartialEq)] +pub struct Seq { + /// The members of this seq. + /// + /// When `None`, the seq represents all possible literals. That is, it + /// prevents one from making assumptions about specific literals in the + /// seq, and forces one to treat it as if any literal might be in the seq. + /// + /// Note that `Some(vec![])` is valid and corresponds to the empty seq of + /// literals, i.e., a regex that can never match. For example, `[a&&b]`. + /// It is distinct from `Some(vec![""])`, which corresponds to the seq + /// containing an empty string, which matches at every position. + literals: Option>, +} + +impl Seq { + /// Returns an empty sequence. + /// + /// An empty sequence matches zero literals, and thus corresponds to a + /// regex that itself can never match. + #[inline] + pub fn empty() -> Seq { + Seq { literals: Some(vec![]) } + } + + /// Returns a sequence of literals without a finite size and may contain + /// any literal. + /// + /// A sequence without finite size does not reveal anything about the + /// characteristics of the literals in its set. There are no fixed prefixes + /// or suffixes, nor are lower or upper bounds on the length of the literals + /// in the set known. + /// + /// This is useful to represent constructs in a regex that are "too big" + /// to useful represent as a sequence of literals. For example, `[A-Za-z]`. + /// When sequences get too big, they lose their discriminating nature and + /// are more likely to produce false positives, which in turn makes them + /// less likely to speed up searches. + /// + /// More pragmatically, for many regexes, enumerating all possible literals + /// is itself not possible or might otherwise use too many resources. So + /// constraining the size of sets during extraction is a practical trade + /// off to make. + #[inline] + pub fn infinite() -> Seq { + Seq { literals: None } + } + + /// Returns a sequence containing a single literal. + #[inline] + pub fn singleton(lit: Literal) -> Seq { + Seq { literals: Some(vec![lit]) } + } + + /// Returns a sequence of exact literals from the given byte strings. + #[inline] + pub fn new(it: I) -> Seq + where + I: IntoIterator, + B: AsRef<[u8]>, + { + it.into_iter().map(|b| Literal::exact(b.as_ref())).collect() + } + + /// If this is a finite sequence, return its members as a slice of + /// literals. + /// + /// The slice returned may be empty, in which case, there are no literals + /// that can match this sequence. + #[inline] + pub fn literals(&self) -> Option<&[Literal]> { + self.literals.as_deref() + } + + /// Push a literal to the end of this sequence. + /// + /// If this sequence is not finite, then this is a no-op. + /// + /// Similarly, if the most recently added item of this sequence is + /// equivalent to the literal given, then it is not added. This reflects + /// a `Seq`'s "set like" behavior, and represents a practical trade off. + /// Namely, there is never any need to have two adjacent and equivalent + /// literals in the same sequence, _and_ it is easy to detect in some + /// cases. + #[inline] + pub fn push(&mut self, lit: Literal) { + let lits = match self.literals { + None => return, + Some(ref mut lits) => lits, + }; + if lits.last().map_or(false, |m| m == &lit) { + return; + } + lits.push(lit); + } + + /// Make all of the literals in this sequence inexact. + /// + /// This is a no-op if this sequence is not finite. + #[inline] + pub fn make_inexact(&mut self) { + let lits = match self.literals { + None => return, + Some(ref mut lits) => lits, + }; + for lit in lits.iter_mut() { + lit.make_inexact(); + } + } + + /// Converts this sequence to an infinite sequence. + /// + /// This is a no-op if the sequence is already infinite. + #[inline] + pub fn make_infinite(&mut self) { + self.literals = None; + } + + /// Modify this sequence to contain the cross product between it and the + /// sequence given. + /// + /// The cross product only considers literals in this sequence that are + /// exact. That is, inexact literals are not extended. + /// + /// The literals are always drained from `other`, even if none are used. + /// This permits callers to reuse the sequence allocation elsewhere. + /// + /// If this sequence is infinite, then this is a no-op, regardless of what + /// `other` contains (and in this case, the literals are still drained from + /// `other`). If `other` is infinite and this sequence is finite, then this + /// is a no-op, unless this sequence contains a zero-length literal. In + /// which case, the infiniteness of `other` infects this sequence, and this + /// sequence is itself made infinite. + /// + /// Like [`Seq::union`], this may attempt to deduplicate literals. See + /// [`Seq::dedup`] for how deduplication deals with exact and inexact + /// literals. + /// + /// # Example + /// + /// This example shows basic usage and how exact and inexact literals + /// interact. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// let mut seq2 = Seq::from_iter([ + /// Literal::inexact("quux"), + /// Literal::exact("baz"), + /// ]); + /// seq1.cross_forward(&mut seq2); + /// + /// // The literals are pulled out of seq2. + /// assert_eq!(Some(0), seq2.len()); + /// + /// let expected = Seq::from_iter([ + /// Literal::inexact("fooquux"), + /// Literal::exact("foobaz"), + /// Literal::inexact("bar"), + /// ]); + /// assert_eq!(expected, seq1); + /// ``` + /// + /// This example shows the behavior of when `other` is an infinite + /// sequence. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// let mut seq2 = Seq::infinite(); + /// seq1.cross_forward(&mut seq2); + /// + /// // When seq2 is infinite, cross product doesn't add anything, but + /// // ensures all members of seq1 are inexact. + /// let expected = Seq::from_iter([ + /// Literal::inexact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// assert_eq!(expected, seq1); + /// ``` + /// + /// This example is like the one above, but shows what happens when this + /// sequence contains an empty string. In this case, an infinite `other` + /// sequence infects this sequence (because the empty string means that + /// there are no finite prefixes): + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::exact(""), // inexact provokes same behavior + /// Literal::inexact("bar"), + /// ]); + /// let mut seq2 = Seq::infinite(); + /// seq1.cross_forward(&mut seq2); + /// + /// // seq1 is now infinite! + /// assert!(!seq1.is_finite()); + /// ``` + /// + /// This example shows the behavior of this sequence is infinite. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::infinite(); + /// let mut seq2 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// seq1.cross_forward(&mut seq2); + /// + /// // seq1 remains unchanged. + /// assert!(!seq1.is_finite()); + /// // Even though the literals in seq2 weren't used, it was still drained. + /// assert_eq!(Some(0), seq2.len()); + /// ``` + #[inline] + pub fn cross_forward(&mut self, other: &mut Seq) { + let (lits1, lits2) = match self.cross_preamble(other) { + None => return, + Some((lits1, lits2)) => (lits1, lits2), + }; + let newcap = lits1.len().saturating_mul(lits2.len()); + for selflit in mem::replace(lits1, Vec::with_capacity(newcap)) { + if !selflit.is_exact() { + lits1.push(selflit); + continue; + } + for otherlit in lits2.iter() { + let mut newlit = Literal::exact(Vec::with_capacity( + selflit.len() + otherlit.len(), + )); + newlit.extend(&selflit); + newlit.extend(&otherlit); + if !otherlit.is_exact() { + newlit.make_inexact(); + } + lits1.push(newlit); + } + } + lits2.drain(..); + self.dedup(); + } + + /// Modify this sequence to contain the cross product between it and + /// the sequence given, where the sequences are treated as suffixes + /// instead of prefixes. Namely, the sequence `other` is *prepended* + /// to `self` (as opposed to `other` being *appended* to `self` in + /// [`Seq::cross_forward`]). + /// + /// The cross product only considers literals in this sequence that are + /// exact. That is, inexact literals are not extended. + /// + /// The literals are always drained from `other`, even if none are used. + /// This permits callers to reuse the sequence allocation elsewhere. + /// + /// If this sequence is infinite, then this is a no-op, regardless of what + /// `other` contains (and in this case, the literals are still drained from + /// `other`). If `other` is infinite and this sequence is finite, then this + /// is a no-op, unless this sequence contains a zero-length literal. In + /// which case, the infiniteness of `other` infects this sequence, and this + /// sequence is itself made infinite. + /// + /// Like [`Seq::union`], this may attempt to deduplicate literals. See + /// [`Seq::dedup`] for how deduplication deals with exact and inexact + /// literals. + /// + /// # Example + /// + /// This example shows basic usage and how exact and inexact literals + /// interact. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// let mut seq2 = Seq::from_iter([ + /// Literal::inexact("quux"), + /// Literal::exact("baz"), + /// ]); + /// seq1.cross_reverse(&mut seq2); + /// + /// // The literals are pulled out of seq2. + /// assert_eq!(Some(0), seq2.len()); + /// + /// let expected = Seq::from_iter([ + /// Literal::inexact("quuxfoo"), + /// Literal::inexact("bar"), + /// Literal::exact("bazfoo"), + /// ]); + /// assert_eq!(expected, seq1); + /// ``` + /// + /// This example shows the behavior of when `other` is an infinite + /// sequence. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// let mut seq2 = Seq::infinite(); + /// seq1.cross_reverse(&mut seq2); + /// + /// // When seq2 is infinite, cross product doesn't add anything, but + /// // ensures all members of seq1 are inexact. + /// let expected = Seq::from_iter([ + /// Literal::inexact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// assert_eq!(expected, seq1); + /// ``` + /// + /// This example is like the one above, but shows what happens when this + /// sequence contains an empty string. In this case, an infinite `other` + /// sequence infects this sequence (because the empty string means that + /// there are no finite suffixes): + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::exact(""), // inexact provokes same behavior + /// Literal::inexact("bar"), + /// ]); + /// let mut seq2 = Seq::infinite(); + /// seq1.cross_reverse(&mut seq2); + /// + /// // seq1 is now infinite! + /// assert!(!seq1.is_finite()); + /// ``` + /// + /// This example shows the behavior when this sequence is infinite. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq1 = Seq::infinite(); + /// let mut seq2 = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::inexact("bar"), + /// ]); + /// seq1.cross_reverse(&mut seq2); + /// + /// // seq1 remains unchanged. + /// assert!(!seq1.is_finite()); + /// // Even though the literals in seq2 weren't used, it was still drained. + /// assert_eq!(Some(0), seq2.len()); + /// ``` + #[inline] + pub fn cross_reverse(&mut self, other: &mut Seq) { + let (lits1, lits2) = match self.cross_preamble(other) { + None => return, + Some((lits1, lits2)) => (lits1, lits2), + }; + // We basically proceed as we do in 'cross_forward' at this point, + // except that the outer loop is now 'other' and the inner loop is now + // 'self'. That's because 'self' corresponds to suffixes and 'other' + // corresponds to the sequence we want to *prepend* to the suffixes. + let newcap = lits1.len().saturating_mul(lits2.len()); + let selflits = mem::replace(lits1, Vec::with_capacity(newcap)); + for (i, otherlit) in lits2.drain(..).enumerate() { + for selflit in selflits.iter() { + if !selflit.is_exact() { + // If the suffix isn't exact, then we can't prepend + // anything to it. However, we still want to keep it. But + // we only want to keep one of them, to avoid duplication. + // (The duplication is okay from a correctness perspective, + // but wasteful.) + if i == 0 { + lits1.push(selflit.clone()); + } + continue; + } + let mut newlit = Literal::exact(Vec::with_capacity( + otherlit.len() + selflit.len(), + )); + newlit.extend(&otherlit); + newlit.extend(&selflit); + if !otherlit.is_exact() { + newlit.make_inexact(); + } + lits1.push(newlit); + } + } + self.dedup(); + } + + /// A helper function the corresponds to the subtle preamble for both + /// `cross_forward` and `cross_reverse`. In effect, it handles the cases + /// of infinite sequences for both `self` and `other`, as well as ensuring + /// that literals from `other` are drained even if they aren't used. + fn cross_preamble<'a>( + &'a mut self, + other: &'a mut Seq, + ) -> Option<(&'a mut Vec, &'a mut Vec)> { + let lits2 = match other.literals { + None => { + // If our current seq contains the empty string and the seq + // we're adding matches any literal, then it follows that the + // current seq must now also match any literal. + // + // Otherwise, we just have to make sure everything in this + // sequence is inexact. + if self.min_literal_len() == Some(0) { + *self = Seq::infinite(); + } else { + self.make_inexact(); + } + return None; + } + Some(ref mut lits) => lits, + }; + let lits1 = match self.literals { + None => { + // If we aren't going to make it to the end of this routine + // where lits2 is drained, then we need to do it now. + lits2.drain(..); + return None; + } + Some(ref mut lits) => lits, + }; + Some((lits1, lits2)) + } + + /// Unions the `other` sequence into this one. + /// + /// The literals are always drained out of the given `other` sequence, + /// even if they are being unioned into an infinite sequence. This permits + /// the caller to reuse the `other` sequence in another context. + /// + /// Some literal deduping may be performed. If any deduping happens, + /// any leftmost-first or "preference" order match semantics will be + /// preserved. + /// + /// # Example + /// + /// This example shows basic usage. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let mut seq1 = Seq::new(&["foo", "bar"]); + /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]); + /// seq1.union(&mut seq2); + /// + /// // The literals are pulled out of seq2. + /// assert_eq!(Some(0), seq2.len()); + /// + /// // Adjacent literals are deduped, but non-adjacent literals may not be. + /// assert_eq!(Seq::new(&["foo", "bar", "quux", "foo"]), seq1); + /// ``` + /// + /// This example shows that literals are drained from `other` even when + /// they aren't necessarily used. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let mut seq1 = Seq::infinite(); + /// // Infinite sequences have no finite length. + /// assert_eq!(None, seq1.len()); + /// + /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]); + /// seq1.union(&mut seq2); + /// + /// // seq1 is still infinite and seq2 has been drained. + /// assert_eq!(None, seq1.len()); + /// assert_eq!(Some(0), seq2.len()); + /// ``` + #[inline] + pub fn union(&mut self, other: &mut Seq) { + let lits2 = match other.literals { + None => { + // Unioning with an infinite sequence always results in an + // infinite sequence. + self.make_infinite(); + return; + } + Some(ref mut lits) => lits.drain(..), + }; + let lits1 = match self.literals { + None => return, + Some(ref mut lits) => lits, + }; + lits1.extend(lits2); + self.dedup(); + } + + /// Unions the `other` sequence into this one by splice the `other` + /// sequence at the position of the first zero-length literal. + /// + /// This is useful for preserving preference order semantics when combining + /// two literal sequences. For example, in the regex `(a||f)+foo`, the + /// correct preference order prefix sequence is `[a, foo, f]`. + /// + /// The literals are always drained out of the given `other` sequence, + /// even if they are being unioned into an infinite sequence. This permits + /// the caller to reuse the `other` sequence in another context. Note that + /// the literals are drained even if no union is performed as well, i.e., + /// when this sequence does not contain a zero-length literal. + /// + /// Some literal deduping may be performed. If any deduping happens, + /// any leftmost-first or "preference" order match semantics will be + /// preserved. + /// + /// # Example + /// + /// This example shows basic usage. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let mut seq1 = Seq::new(&["a", "", "f", ""]); + /// let mut seq2 = Seq::new(&["foo"]); + /// seq1.union_into_empty(&mut seq2); + /// + /// // The literals are pulled out of seq2. + /// assert_eq!(Some(0), seq2.len()); + /// // 'foo' gets spliced into seq1 where the first empty string occurs. + /// assert_eq!(Seq::new(&["a", "foo", "f"]), seq1); + /// ``` + /// + /// This example shows that literals are drained from `other` even when + /// they aren't necessarily used. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let mut seq1 = Seq::new(&["foo", "bar"]); + /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]); + /// seq1.union_into_empty(&mut seq2); + /// + /// // seq1 has no zero length literals, so no splicing happens. + /// assert_eq!(Seq::new(&["foo", "bar"]), seq1); + /// // Even though no splicing happens, seq2 is still drained. + /// assert_eq!(Some(0), seq2.len()); + /// ``` + #[inline] + pub fn union_into_empty(&mut self, other: &mut Seq) { + let lits2 = other.literals.as_mut().map(|lits| lits.drain(..)); + let lits1 = match self.literals { + None => return, + Some(ref mut lits) => lits, + }; + let first_empty = match lits1.iter().position(|m| m.is_empty()) { + None => return, + Some(i) => i, + }; + let lits2 = match lits2 { + None => { + // Note that we are only here if we've found an empty literal, + // which implies that an infinite sequence infects this seq and + // also turns it into an infinite sequence. + self.literals = None; + return; + } + Some(lits) => lits, + }; + // Clearing out the empties needs to come before the splice because + // the splice might add more empties that we don't want to get rid + // of. Since we're splicing into the position of the first empty, the + // 'first_empty' position computed above is still correct. + lits1.retain(|m| !m.is_empty()); + lits1.splice(first_empty..first_empty, lits2); + self.dedup(); + } + + /// Deduplicate adjacent equivalent literals in this sequence. + /// + /// If adjacent literals are equivalent strings but one is exact and the + /// other inexact, the inexact literal is kept and the exact one is + /// removed. + /// + /// Deduping an infinite sequence is a no-op. + /// + /// # Example + /// + /// This example shows how literals that are duplicate byte strings but + /// are not equivalent with respect to exactness are resolved. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::inexact("foo"), + /// ]); + /// seq.dedup(); + /// + /// assert_eq!(Seq::from_iter([Literal::inexact("foo")]), seq); + /// ``` + #[inline] + pub fn dedup(&mut self) { + if let Some(ref mut lits) = self.literals { + lits.dedup_by(|lit1, lit2| { + if lit1.as_bytes() != lit2.as_bytes() { + return false; + } + if lit1.is_exact() != lit2.is_exact() { + lit1.make_inexact(); + lit2.make_inexact(); + } + true + }); + } + } + + /// Sorts this sequence of literals lexicographically. + /// + /// Note that if, before sorting, if a literal that is a prefix of another + /// literal appears after it, then after sorting, the sequence will not + /// represent the same preference order match semantics. For example, + /// sorting the sequence `[samwise, sam]` yields the sequence `[sam, + /// samwise]`. Under preference order semantics, the latter sequence will + /// never match `samwise` where as the first sequence can. + /// + /// # Example + /// + /// This example shows basic usage. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let mut seq = Seq::new(&["foo", "quux", "bar"]); + /// seq.sort(); + /// + /// assert_eq!(Seq::new(&["bar", "foo", "quux"]), seq); + /// ``` + #[inline] + pub fn sort(&mut self) { + if let Some(ref mut lits) = self.literals { + lits.sort(); + } + } + + /// Reverses all of the literals in this sequence. + /// + /// The order of the sequence itself is preserved. + /// + /// # Example + /// + /// This example shows basic usage. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let mut seq = Seq::new(&["oof", "rab"]); + /// seq.reverse_literals(); + /// assert_eq!(Seq::new(&["foo", "bar"]), seq); + /// ``` + #[inline] + pub fn reverse_literals(&mut self) { + if let Some(ref mut lits) = self.literals { + for lit in lits.iter_mut() { + lit.reverse(); + } + } + } + + /// Shrinks this seq to its minimal size while respecting the preference + /// order of its literals. + /// + /// While this routine will remove duplicate literals from this seq, it + /// will also remove literals that can never match in a leftmost-first or + /// "preference order" search. Similar to [`Seq::dedup`], if a literal is + /// deduped, then the one that remains is made inexact. + /// + /// This is a no-op on seqs that are empty or not finite. + /// + /// # Example + /// + /// This example shows the difference between `{sam, samwise}` and + /// `{samwise, sam}`. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// // If 'sam' comes before 'samwise' and a preference order search is + /// // executed, then 'samwise' can never match. + /// let mut seq = Seq::new(&["sam", "samwise"]); + /// seq.minimize_by_preference(); + /// assert_eq!(Seq::from_iter([Literal::inexact("sam")]), seq); + /// + /// // But if they are reversed, then it's possible for 'samwise' to match + /// // since it is given higher preference. + /// let mut seq = Seq::new(&["samwise", "sam"]); + /// seq.minimize_by_preference(); + /// assert_eq!(Seq::new(&["samwise", "sam"]), seq); + /// ``` + /// + /// This example shows that if an empty string is in this seq, then + /// anything that comes after it can never match. + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// // An empty string is a prefix of all strings, so it automatically + /// // inhibits any subsequent strings from matching. + /// let mut seq = Seq::new(&["foo", "bar", "", "quux", "fox"]); + /// seq.minimize_by_preference(); + /// let expected = Seq::from_iter([ + /// Literal::exact("foo"), + /// Literal::exact("bar"), + /// Literal::inexact(""), + /// ]); + /// assert_eq!(expected, seq); + /// + /// // And of course, if it's at the beginning, then it makes it impossible + /// // for anything else to match. + /// let mut seq = Seq::new(&["", "foo", "quux", "fox"]); + /// seq.minimize_by_preference(); + /// assert_eq!(Seq::from_iter([Literal::inexact("")]), seq); + /// ``` + #[inline] + pub fn minimize_by_preference(&mut self) { + if let Some(ref mut lits) = self.literals { + PreferenceTrie::minimize(lits, false); + } + } + + /// Trims all literals in this seq such that only the first `len` bytes + /// remain. If a literal has less than or equal to `len` bytes, then it + /// remains unchanged. Otherwise, it is trimmed and made inexact. + /// + /// # Example + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq = Seq::new(&["a", "foo", "quux"]); + /// seq.keep_first_bytes(2); + /// + /// let expected = Seq::from_iter([ + /// Literal::exact("a"), + /// Literal::inexact("fo"), + /// Literal::inexact("qu"), + /// ]); + /// assert_eq!(expected, seq); + /// ``` + #[inline] + pub fn keep_first_bytes(&mut self, len: usize) { + if let Some(ref mut lits) = self.literals { + for m in lits.iter_mut() { + m.keep_first_bytes(len); + } + } + } + + /// Trims all literals in this seq such that only the last `len` bytes + /// remain. If a literal has less than or equal to `len` bytes, then it + /// remains unchanged. Otherwise, it is trimmed and made inexact. + /// + /// # Example + /// + /// ``` + /// use regex_syntax::hir::literal::{Literal, Seq}; + /// + /// let mut seq = Seq::new(&["a", "foo", "quux"]); + /// seq.keep_last_bytes(2); + /// + /// let expected = Seq::from_iter([ + /// Literal::exact("a"), + /// Literal::inexact("oo"), + /// Literal::inexact("ux"), + /// ]); + /// assert_eq!(expected, seq); + /// ``` + #[inline] + pub fn keep_last_bytes(&mut self, len: usize) { + if let Some(ref mut lits) = self.literals { + for m in lits.iter_mut() { + m.keep_last_bytes(len); + } + } + } + + /// Returns true if this sequence is finite. + /// + /// When false, this sequence is infinite and must be treated as if it + /// contains every possible literal. + #[inline] + pub fn is_finite(&self) -> bool { + self.literals.is_some() + } + + /// Returns true if and only if this sequence is finite and empty. + /// + /// An empty sequence never matches anything. It can only be produced by + /// literal extraction when the corresponding regex itself cannot match. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == Some(0) + } + + /// Returns the number of literals in this sequence if the sequence is + /// finite. If the sequence is infinite, then `None` is returned. + #[inline] + pub fn len(&self) -> Option { + self.literals.as_ref().map(|lits| lits.len()) + } + + /// Returns true if and only if all literals in this sequence are exact. + /// + /// This returns false if the sequence is infinite. + #[inline] + pub fn is_exact(&self) -> bool { + self.literals().map_or(false, |lits| lits.iter().all(|x| x.is_exact())) + } + + /// Returns true if and only if all literals in this sequence are inexact. + /// + /// This returns true if the sequence is infinite. + #[inline] + pub fn is_inexact(&self) -> bool { + self.literals().map_or(true, |lits| lits.iter().all(|x| !x.is_exact())) + } + + /// Return the maximum length of the sequence that would result from + /// unioning `self` with `other`. If either set is infinite, then this + /// returns `None`. + #[inline] + pub fn max_union_len(&self, other: &Seq) -> Option { + let len1 = self.len()?; + let len2 = other.len()?; + Some(len1.saturating_add(len2)) + } + + /// Return the maximum length of the sequence that would result from the + /// cross product of `self` with `other`. If either set is infinite, then + /// this returns `None`. + #[inline] + pub fn max_cross_len(&self, other: &Seq) -> Option { + let len1 = self.len()?; + let len2 = other.len()?; + Some(len1.saturating_mul(len2)) + } + + /// Returns the length of the shortest literal in this sequence. + /// + /// If the sequence is infinite or empty, then this returns `None`. + #[inline] + pub fn min_literal_len(&self) -> Option { + self.literals.as_ref()?.iter().map(|x| x.len()).min() + } + + /// Returns the length of the longest literal in this sequence. + /// + /// If the sequence is infinite or empty, then this returns `None`. + #[inline] + pub fn max_literal_len(&self) -> Option { + self.literals.as_ref()?.iter().map(|x| x.len()).max() + } + + /// Returns the longest common prefix from this seq. + /// + /// If the seq matches any literal or other contains no literals, then + /// there is no meaningful prefix and this returns `None`. + /// + /// # Example + /// + /// This shows some example seqs and their longest common prefix. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let seq = Seq::new(&["foo", "foobar", "fo"]); + /// assert_eq!(Some(&b"fo"[..]), seq.longest_common_prefix()); + /// let seq = Seq::new(&["foo", "foo"]); + /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_prefix()); + /// let seq = Seq::new(&["foo", "bar"]); + /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix()); + /// let seq = Seq::new(&[""]); + /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix()); + /// + /// let seq = Seq::infinite(); + /// assert_eq!(None, seq.longest_common_prefix()); + /// let seq = Seq::empty(); + /// assert_eq!(None, seq.longest_common_prefix()); + /// ``` + #[inline] + pub fn longest_common_prefix(&self) -> Option<&[u8]> { + // If we match everything or match nothing, then there's no meaningful + // longest common prefix. + let lits = match self.literals { + None => return None, + Some(ref lits) => lits, + }; + if lits.len() == 0 { + return None; + } + let base = lits[0].as_bytes(); + let mut len = base.len(); + for m in lits.iter().skip(1) { + len = m + .as_bytes() + .iter() + .zip(base[..len].iter()) + .take_while(|&(a, b)| a == b) + .count(); + if len == 0 { + return Some(&[]); + } + } + Some(&base[..len]) + } + + /// Returns the longest common suffix from this seq. + /// + /// If the seq matches any literal or other contains no literals, then + /// there is no meaningful suffix and this returns `None`. + /// + /// # Example + /// + /// This shows some example seqs and their longest common suffix. + /// + /// ``` + /// use regex_syntax::hir::literal::Seq; + /// + /// let seq = Seq::new(&["oof", "raboof", "of"]); + /// assert_eq!(Some(&b"of"[..]), seq.longest_common_suffix()); + /// let seq = Seq::new(&["foo", "foo"]); + /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_suffix()); + /// let seq = Seq::new(&["foo", "bar"]); + /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix()); + /// let seq = Seq::new(&[""]); + /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix()); + /// + /// let seq = Seq::infinite(); + /// assert_eq!(None, seq.longest_common_suffix()); + /// let seq = Seq::empty(); + /// assert_eq!(None, seq.longest_common_suffix()); + /// ``` + #[inline] + pub fn longest_common_suffix(&self) -> Option<&[u8]> { + // If we match everything or match nothing, then there's no meaningful + // longest common suffix. + let lits = match self.literals { + None => return None, + Some(ref lits) => lits, + }; + if lits.len() == 0 { + return None; + } + let base = lits[0].as_bytes(); + let mut len = base.len(); + for m in lits.iter().skip(1) { + len = m + .as_bytes() + .iter() + .rev() + .zip(base[base.len() - len..].iter().rev()) + .take_while(|&(a, b)| a == b) + .count(); + if len == 0 { + return Some(&[]); + } + } + Some(&base[base.len() - len..]) + } + + /// Optimizes this seq while treating its literals as prefixes and + /// respecting the preference order of its literals. + /// + /// The specific way "optimization" works is meant to be an implementation + /// detail, as it essentially represents a set of heuristics. The goal + /// that optimization tries to accomplish is to make the literals in this + /// set reflect inputs that will result in a more effective prefilter. + /// Principally by reducing the false positive rate of candidates found by + /// the literals in this sequence. That is, when a match of a literal is + /// found, we would like it to be a strong predictor of the overall match + /// of the regex. If it isn't, then much time will be spent starting and + /// stopping the prefilter search and attempting to confirm the match only + /// to have it fail. + /// + /// Some of those heuristics might be: + /// + /// * Identifying a common prefix from a larger sequence of literals, and + /// shrinking the sequence down to that single common prefix. + /// * Rejecting the sequence entirely if it is believed to result in very + /// high false positive rate. When this happens, the sequence is made + /// infinite. + /// * Shrinking the sequence to a smaller number of literals representing + /// prefixes, but not shrinking it so much as to make literals too short. + /// (A sequence with very short literals, of 1 or 2 bytes, will typically + /// result in a higher false positive rate.) + /// + /// Optimization should only be run once extraction is complete. Namely, + /// optimization may make assumptions that do not compose with other + /// operations in the middle of extraction. For example, optimization will + /// reduce `[E(sam), E(samwise)]` to `[E(sam)]`, but such a transformation + /// is only valid if no other extraction will occur. If other extraction + /// may occur, then the correct transformation would be to `[I(sam)]`. + /// + /// The [`Seq::optimize_for_suffix_by_preference`] does the same thing, but + /// for suffixes. + /// + /// # Example + /// + /// This shows how optimization might transform a sequence. Note that + /// the specific behavior is not a documented guarantee. The heuristics + /// used are an implementation detail and may change over time in semver + /// compatible releases. + /// + /// ``` + /// use regex_syntax::hir::literal::{Seq, Literal}; + /// + /// let mut seq = Seq::new(&[ + /// "samantha", + /// "sam", + /// "samwise", + /// "frodo", + /// ]); + /// seq.optimize_for_prefix_by_preference(); + /// assert_eq!(Seq::from_iter([ + /// Literal::exact("samantha"), + /// // Kept exact even though 'samwise' got pruned + /// // because optimization assumes literal extraction + /// // has finished. + /// Literal::exact("sam"), + /// Literal::exact("frodo"), + /// ]), seq); + /// ``` + /// + /// # Example: optimization may make the sequence infinite + /// + /// If the heuristics deem that the sequence could cause a very high false + /// positive rate, then it may make the sequence infinite, effectively + /// disabling its use as a prefilter. + /// + /// ``` + /// use regex_syntax::hir::literal::{Seq, Literal}; + /// + /// let mut seq = Seq::new(&[ + /// "samantha", + /// // An empty string matches at every position, + /// // thus rendering the prefilter completely + /// // ineffective. + /// "", + /// "sam", + /// "samwise", + /// "frodo", + /// ]); + /// seq.optimize_for_prefix_by_preference(); + /// assert!(!seq.is_finite()); + /// ``` + /// + /// Do note that just because there is a `" "` in the sequence, that + /// doesn't mean the sequence will always be made infinite after it is + /// optimized. Namely, if the sequence is considered exact (any match + /// corresponds to an overall match of the original regex), then any match + /// is an overall match, and so the false positive rate is always `0`. + /// + /// To demonstrate this, we remove `samwise` from our sequence. This + /// results in no optimization happening and all literals remain exact. + /// Thus the entire sequence is exact, and it is kept as-is, even though + /// one is an ASCII space: + /// + /// ``` + /// use regex_syntax::hir::literal::{Seq, Literal}; + /// + /// let mut seq = Seq::new(&[ + /// "samantha", + /// " ", + /// "sam", + /// "frodo", + /// ]); + /// seq.optimize_for_prefix_by_preference(); + /// assert!(seq.is_finite()); + /// ``` + #[inline] + pub fn optimize_for_prefix_by_preference(&mut self) { + self.optimize_by_preference(true); + } + + /// Optimizes this seq while treating its literals as suffixes and + /// respecting the preference order of its literals. + /// + /// Optimization should only be run once extraction is complete. + /// + /// The [`Seq::optimize_for_prefix_by_preference`] does the same thing, but + /// for prefixes. See its documentation for more explanation. + #[inline] + pub fn optimize_for_suffix_by_preference(&mut self) { + self.optimize_by_preference(false); + } + + fn optimize_by_preference(&mut self, prefix: bool) { + let origlen = match self.len() { + None => return, + Some(len) => len, + }; + // Just give up now if our sequence contains an empty string. + if self.min_literal_len().map_or(false, |len| len == 0) { + // We squash the sequence so that nobody else gets any bright + // ideas to try and use it. An empty string implies a match at + // every position. A prefilter cannot help you here. + self.make_infinite(); + return; + } + // Make sure we start with the smallest sequence possible. We use a + // special version of preference minimization that retains exactness. + // This is legal because optimization is only expected to occur once + // extraction is complete. + if prefix { + if let Some(ref mut lits) = self.literals { + PreferenceTrie::minimize(lits, true); + } + } + + // Look for a common prefix (or suffix). If we found one of those and + // it's long enough, then it's a good bet that it will be our fastest + // possible prefilter since single-substring search is so fast. + let fix = if prefix { + self.longest_common_prefix() + } else { + self.longest_common_suffix() + }; + if let Some(fix) = fix { + // As a special case, if we have a common prefix and the leading + // byte of that prefix is one that we think probably occurs rarely, + // then strip everything down to just that single byte. This should + // promote the use of memchr. + // + // ... we only do this though if our sequence has more than one + // literal. Otherwise, we'd rather just stick with a single literal + // scan. That is, using memchr is probably better than looking + // for 2 or more literals, but probably not as good as a straight + // memmem search. + // + // ... and also only do this when the prefix is short and probably + // not too discriminatory anyway. If it's longer, then it's + // probably quite discriminatory and thus is likely to have a low + // false positive rate. + if prefix + && origlen > 1 + && fix.len() >= 1 + && fix.len() <= 3 + && rank(fix[0]) < 200 + { + self.keep_first_bytes(1); + self.dedup(); + return; + } + // We only strip down to the common prefix/suffix if we think + // the existing set of literals isn't great, or if the common + // prefix/suffix is expected to be particularly discriminatory. + let isfast = + self.is_exact() && self.len().map_or(false, |len| len <= 16); + let usefix = fix.len() > 4 || (fix.len() > 1 && !isfast); + if usefix { + // If we keep exactly the number of bytes equal to the length + // of the prefix (or suffix), then by the definition of a + // prefix, every literal in the sequence will be equivalent. + // Thus, 'dedup' will leave us with one literal. + // + // We do it this way to avoid an alloc, but also to make sure + // the exactness of literals is kept (or not). + if prefix { + self.keep_first_bytes(fix.len()); + } else { + self.keep_last_bytes(fix.len()); + } + self.dedup(); + assert_eq!(Some(1), self.len()); + // We still fall through here. In particular, we want our + // longest common prefix to be subject to the poison check. + } + } + // If we have an exact sequence, we *probably* just want to keep it + // as-is. But there are some cases where we don't. So we save a copy of + // the exact sequence now, and then try to do some more optimizations + // below. If those don't work out, we go back to this exact sequence. + // + // The specific motivation for this is that we sometimes wind up with + // an exact sequence with a hefty number of literals. Say, 100. If we + // stuck with that, it would be too big for Teddy and would result in + // using Aho-Corasick. Which is fine... but the lazy DFA is plenty + // suitable in such cases. The real issue is that we will wind up not + // using a fast prefilter at all. So in cases like this, even though + // we have an exact sequence, it would be better to try and shrink the + // sequence (which we do below) and use it as a prefilter that can + // produce false positive matches. + // + // But if the shrinking below results in a sequence that "sucks," then + // we don't want to use that because we already have an exact sequence + // in hand. + let exact: Option = + if self.is_exact() { Some(self.clone()) } else { None }; + // Now we attempt to shorten the sequence. The idea here is that we + // don't want to look for too many literals, but we want to shorten + // our sequence enough to improve our odds of using better algorithms + // downstream (such as Teddy). + // + // The pair of numbers in this list corresponds to the maximal prefix + // (in bytes) to keep for all literals and the length of the sequence + // at which to do it. + // + // So for example, the pair (3, 500) would mean, "if we have more than + // 500 literals in our sequence, then truncate all of our literals + // such that they are at most 3 bytes in length and the minimize the + // sequence." + const ATTEMPTS: [(usize, usize); 5] = + [(5, 10), (4, 10), (3, 64), (2, 64), (1, 10)]; + for (keep, limit) in ATTEMPTS { + let len = match self.len() { + None => break, + Some(len) => len, + }; + if len <= limit { + break; + } + if prefix { + self.keep_first_bytes(keep); + } else { + self.keep_last_bytes(keep); + } + if prefix { + if let Some(ref mut lits) = self.literals { + PreferenceTrie::minimize(lits, true); + } + } + } + // Check for a poison literal. A poison literal is one that is short + // and is believed to have a very high match count. These poisons + // generally lead to a prefilter with a very high false positive rate, + // and thus overall worse performance. + // + // We do this last because we could have gone from a non-poisonous + // sequence to a poisonous one. Perhaps we should add some code to + // prevent such transitions in the first place, but then again, we + // likely only made the transition in the first place if the sequence + // was itself huge. And huge sequences are themselves poisonous. So... + if let Some(lits) = self.literals() { + if lits.iter().any(|lit| lit.is_poisonous()) { + self.make_infinite(); + } + } + // OK, if we had an exact sequence before attempting more optimizations + // above and our post-optimized sequence sucks for some reason or + // another, then we go back to the exact sequence. + if let Some(exact) = exact { + // If optimizing resulted in dropping our literals, then certainly + // backup and use the exact sequence that we had. + if !self.is_finite() { + *self = exact; + return; + } + // If our optimized sequence contains a short literal, then it's + // *probably* not so great. So throw it away and revert to the + // exact sequence. + if self.min_literal_len().map_or(true, |len| len <= 2) { + *self = exact; + return; + } + // Finally, if our optimized sequence is "big" (i.e., can't use + // Teddy), then also don't use it and rely on the exact sequence. + if self.len().map_or(true, |len| len > 64) { + *self = exact; + return; + } + } + } +} + +impl core::fmt::Debug for Seq { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Seq")?; + if let Some(lits) = self.literals() { + f.debug_list().entries(lits.iter()).finish() + } else { + write!(f, "[∞]") + } + } +} + +impl FromIterator for Seq { + fn from_iter>(it: T) -> Seq { + let mut seq = Seq::empty(); + for literal in it { + seq.push(literal); + } + seq + } +} + +/// A single literal extracted from an [`Hir`] expression. +/// +/// A literal is composed of two things: +/// +/// * A sequence of bytes. No guarantees with respect to UTF-8 are provided. +/// In particular, even if the regex a literal is extracted from is UTF-8, the +/// literal extracted may not be valid UTF-8. (For example, if an [`Extractor`] +/// limit resulted in trimming a literal in a way that splits a codepoint.) +/// * Whether the literal is "exact" or not. An "exact" literal means that it +/// has not been trimmed, and may continue to be extended. If a literal is +/// "exact" after visiting the entire `Hir` expression, then this implies that +/// the literal leads to a match state. (Although it doesn't necessarily imply +/// all occurrences of the literal correspond to a match of the regex, since +/// literal extraction ignores look-around assertions.) +#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)] +pub struct Literal { + bytes: Vec, + exact: bool, +} + +impl Literal { + /// Returns a new exact literal containing the bytes given. + #[inline] + pub fn exact>>(bytes: B) -> Literal { + Literal { bytes: bytes.into(), exact: true } + } + + /// Returns a new inexact literal containing the bytes given. + #[inline] + pub fn inexact>>(bytes: B) -> Literal { + Literal { bytes: bytes.into(), exact: false } + } + + /// Returns the bytes in this literal. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes + } + + /// Yields ownership of the bytes inside this literal. + /// + /// Note that this throws away whether the literal is "exact" or not. + #[inline] + pub fn into_bytes(self) -> Vec { + self.bytes + } + + /// Returns the length of this literal in bytes. + #[inline] + pub fn len(&self) -> usize { + self.as_bytes().len() + } + + /// Returns true if and only if this literal has zero bytes. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns true if and only if this literal is exact. + #[inline] + pub fn is_exact(&self) -> bool { + self.exact + } + + /// Marks this literal as inexact. + /// + /// Inexact literals can never be extended. For example, + /// [`Seq::cross_forward`] will not extend inexact literals. + #[inline] + pub fn make_inexact(&mut self) { + self.exact = false; + } + + /// Reverse the bytes in this literal. + #[inline] + pub fn reverse(&mut self) { + self.bytes.reverse(); + } + + /// Extend this literal with the literal given. + /// + /// If this literal is inexact, then this is a no-op. + #[inline] + pub fn extend(&mut self, lit: &Literal) { + if !self.is_exact() { + return; + } + self.bytes.extend_from_slice(&lit.bytes); + } + + /// Trims this literal such that only the first `len` bytes remain. If + /// this literal has fewer than `len` bytes, then it remains unchanged. + /// Otherwise, the literal is marked as inexact. + #[inline] + pub fn keep_first_bytes(&mut self, len: usize) { + if len >= self.len() { + return; + } + self.make_inexact(); + self.bytes.truncate(len); + } + + /// Trims this literal such that only the last `len` bytes remain. If this + /// literal has fewer than `len` bytes, then it remains unchanged. + /// Otherwise, the literal is marked as inexact. + #[inline] + pub fn keep_last_bytes(&mut self, len: usize) { + if len >= self.len() { + return; + } + self.make_inexact(); + self.bytes.drain(..self.len() - len); + } + + /// Returns true if it is believe that this literal is likely to match very + /// frequently, and is thus not a good candidate for a prefilter. + fn is_poisonous(&self) -> bool { + self.is_empty() || (self.len() == 1 && rank(self.as_bytes()[0]) >= 250) + } +} + +impl From for Literal { + fn from(byte: u8) -> Literal { + Literal::exact(vec![byte]) + } +} + +impl From for Literal { + fn from(ch: char) -> Literal { + use alloc::string::ToString; + Literal::exact(ch.encode_utf8(&mut [0; 4]).to_string()) + } +} + +impl AsRef<[u8]> for Literal { + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl core::fmt::Debug for Literal { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let tag = if self.exact { "E" } else { "I" }; + f.debug_tuple(tag) + .field(&crate::debug::Bytes(self.as_bytes())) + .finish() + } +} + +/// A "preference" trie that rejects literals that will never match when +/// executing a leftmost first or "preference" search. +/// +/// For example, if 'sam' is inserted, then trying to insert 'samwise' will be +/// rejected because 'samwise' can never match since 'sam' will always take +/// priority. However, if 'samwise' is inserted first, then inserting 'sam' +/// after it is accepted. In this case, either 'samwise' or 'sam' can match in +/// a "preference" search. +/// +/// Note that we only use this trie as a "set." That is, given a sequence of +/// literals, we insert each one in order. An `insert` will reject a literal +/// if a prefix of that literal already exists in the trie. Thus, to rebuild +/// the "minimal" sequence, we simply only keep literals that were successfully +/// inserted. (Since we don't need traversal, one wonders whether we can make +/// some simplifications here, but I haven't given it a ton of thought and I've +/// never seen this show up on a profile. Because of the heuristic limits +/// imposed on literal extractions, the size of the inputs here is usually +/// very small.) +#[derive(Debug)] +struct PreferenceTrie { + /// The states in this trie. The index of a state in this vector is its ID. + states: Vec, + /// This vec indicates which states are match states. It always has + /// the same length as `states` and is indexed by the same state ID. + /// A state with identifier `sid` is a match state if and only if + /// `matches[sid].is_some()`. The option contains the index of the literal + /// corresponding to the match. The index is offset by 1 so that it fits in + /// a NonZeroUsize. + matches: Vec>, + /// The index to allocate to the next literal added to this trie. Starts at + /// 1 and increments by 1 for every literal successfully added to the trie. + next_literal_index: usize, +} + +/// A single state in a trie. Uses a sparse representation for its transitions. +#[derive(Debug, Default)] +struct State { + /// Sparse representation of the transitions out of this state. Transitions + /// are sorted by byte. There is at most one such transition for any + /// particular byte. + trans: Vec<(u8, usize)>, +} + +impl PreferenceTrie { + /// Minimizes the given sequence of literals while preserving preference + /// order semantics. + /// + /// When `keep_exact` is true, the exactness of every literal retained is + /// kept. This is useful when dealing with a fully extracted `Seq` that + /// only contains exact literals. In that case, we can keep all retained + /// literals as exact because we know we'll never need to match anything + /// after them and because any removed literals are guaranteed to never + /// match. + fn minimize(literals: &mut Vec, keep_exact: bool) { + let mut trie = PreferenceTrie { + states: vec![], + matches: vec![], + next_literal_index: 1, + }; + let mut make_inexact = vec![]; + literals.retain_mut(|lit| match trie.insert(lit.as_bytes()) { + Ok(_) => true, + Err(i) => { + if !keep_exact { + make_inexact.push(i.checked_sub(1).unwrap()); + } + false + } + }); + for i in make_inexact { + literals[i].make_inexact(); + } + } + + /// Returns `Ok` if the given byte string is accepted into this trie and + /// `Err` otherwise. The index for the success case corresponds to the + /// index of the literal added. The index for the error case corresponds to + /// the index of the literal already in the trie that prevented the given + /// byte string from being added. (Which implies it is a prefix of the one + /// given.) + /// + /// In short, the byte string given is accepted into the trie if and only + /// if it is possible for it to match when executing a preference order + /// search. + fn insert(&mut self, bytes: &[u8]) -> Result { + let mut prev = self.root(); + if let Some(idx) = self.matches[prev] { + return Err(idx.get()); + } + for &b in bytes.iter() { + match self.states[prev].trans.binary_search_by_key(&b, |t| t.0) { + Ok(i) => { + prev = self.states[prev].trans[i].1; + if let Some(idx) = self.matches[prev] { + return Err(idx.get()); + } + } + Err(i) => { + let next = self.create_state(); + self.states[prev].trans.insert(i, (b, next)); + prev = next; + } + } + } + let idx = self.next_literal_index; + self.next_literal_index += 1; + self.matches[prev] = NonZeroUsize::new(idx); + Ok(idx) + } + + /// Returns the root state ID, and if it doesn't exist, creates it. + fn root(&mut self) -> usize { + if !self.states.is_empty() { + 0 + } else { + self.create_state() + } + } + + /// Creates a new empty state and returns its ID. + fn create_state(&mut self) -> usize { + let id = self.states.len(); + self.states.push(State::default()); + self.matches.push(None); + id + } +} + +/// Returns the "rank" of the given byte. +/// +/// The minimum rank value is `0` and the maximum rank value is `255`. +/// +/// The rank of a byte is derived from a heuristic background distribution of +/// relative frequencies of bytes. The heuristic says that lower the rank of a +/// byte, the less likely that byte is to appear in any arbitrary haystack. +pub fn rank(byte: u8) -> u8 { + crate::rank::BYTE_FREQUENCIES[usize::from(byte)] +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse(pattern: &str) -> Hir { + crate::ParserBuilder::new().utf8(false).build().parse(pattern).unwrap() + } + + fn prefixes(pattern: &str) -> Seq { + Extractor::new().kind(ExtractKind::Prefix).extract(&parse(pattern)) + } + + fn suffixes(pattern: &str) -> Seq { + Extractor::new().kind(ExtractKind::Suffix).extract(&parse(pattern)) + } + + fn e(pattern: &str) -> (Seq, Seq) { + (prefixes(pattern), suffixes(pattern)) + } + + #[allow(non_snake_case)] + fn E(x: &str) -> Literal { + Literal::exact(x.as_bytes()) + } + + #[allow(non_snake_case)] + fn I(x: &str) -> Literal { + Literal::inexact(x.as_bytes()) + } + + fn seq>(it: I) -> Seq { + Seq::from_iter(it) + } + + fn infinite() -> (Seq, Seq) { + (Seq::infinite(), Seq::infinite()) + } + + fn inexact(it1: I1, it2: I2) -> (Seq, Seq) + where + I1: IntoIterator, + I2: IntoIterator, + { + (Seq::from_iter(it1), Seq::from_iter(it2)) + } + + fn exact, I: IntoIterator>(it: I) -> (Seq, Seq) { + let s1 = Seq::new(it); + let s2 = s1.clone(); + (s1, s2) + } + + fn opt, I: IntoIterator>(it: I) -> (Seq, Seq) { + let (mut p, mut s) = exact(it); + p.optimize_for_prefix_by_preference(); + s.optimize_for_suffix_by_preference(); + (p, s) + } + + #[test] + fn literal() { + assert_eq!(exact(["a"]), e("a")); + assert_eq!(exact(["aaaaa"]), e("aaaaa")); + assert_eq!(exact(["A", "a"]), e("(?i-u)a")); + assert_eq!(exact(["AB", "Ab", "aB", "ab"]), e("(?i-u)ab")); + assert_eq!(exact(["abC", "abc"]), e("ab(?i-u)c")); + + assert_eq!(exact([b"\xFF"]), e(r"(?-u:\xFF)")); + + #[cfg(feature = "unicode-case")] + { + assert_eq!(exact(["☃"]), e("☃")); + assert_eq!(exact(["☃"]), e("(?i)☃")); + assert_eq!(exact(["☃☃☃☃☃"]), e("☃☃☃☃☃")); + + assert_eq!(exact(["Δ"]), e("Δ")); + assert_eq!(exact(["δ"]), e("δ")); + assert_eq!(exact(["Δ", "δ"]), e("(?i)Δ")); + assert_eq!(exact(["Δ", "δ"]), e("(?i)δ")); + + assert_eq!(exact(["S", "s", "ſ"]), e("(?i)S")); + assert_eq!(exact(["S", "s", "ſ"]), e("(?i)s")); + assert_eq!(exact(["S", "s", "ſ"]), e("(?i)ſ")); + } + + let letters = "ͱͳͷΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋ"; + assert_eq!(exact([letters]), e(letters)); + } + + #[test] + fn class() { + assert_eq!(exact(["a", "b", "c"]), e("[abc]")); + assert_eq!(exact(["a1b", "a2b", "a3b"]), e("a[123]b")); + assert_eq!(exact(["δ", "ε"]), e("[εδ]")); + #[cfg(feature = "unicode-case")] + { + assert_eq!(exact(["Δ", "Ε", "δ", "ε", "ϵ"]), e(r"(?i)[εδ]")); + } + } + + #[test] + fn look() { + assert_eq!(exact(["ab"]), e(r"a\Ab")); + assert_eq!(exact(["ab"]), e(r"a\zb")); + assert_eq!(exact(["ab"]), e(r"a(?m:^)b")); + assert_eq!(exact(["ab"]), e(r"a(?m:$)b")); + assert_eq!(exact(["ab"]), e(r"a\bb")); + assert_eq!(exact(["ab"]), e(r"a\Bb")); + assert_eq!(exact(["ab"]), e(r"a(?-u:\b)b")); + assert_eq!(exact(["ab"]), e(r"a(?-u:\B)b")); + + assert_eq!(exact(["ab"]), e(r"^ab")); + assert_eq!(exact(["ab"]), e(r"$ab")); + assert_eq!(exact(["ab"]), e(r"(?m:^)ab")); + assert_eq!(exact(["ab"]), e(r"(?m:$)ab")); + assert_eq!(exact(["ab"]), e(r"\bab")); + assert_eq!(exact(["ab"]), e(r"\Bab")); + assert_eq!(exact(["ab"]), e(r"(?-u:\b)ab")); + assert_eq!(exact(["ab"]), e(r"(?-u:\B)ab")); + + assert_eq!(exact(["ab"]), e(r"ab^")); + assert_eq!(exact(["ab"]), e(r"ab$")); + assert_eq!(exact(["ab"]), e(r"ab(?m:^)")); + assert_eq!(exact(["ab"]), e(r"ab(?m:$)")); + assert_eq!(exact(["ab"]), e(r"ab\b")); + assert_eq!(exact(["ab"]), e(r"ab\B")); + assert_eq!(exact(["ab"]), e(r"ab(?-u:\b)")); + assert_eq!(exact(["ab"]), e(r"ab(?-u:\B)")); + + let expected = (seq([I("aZ"), E("ab")]), seq([I("Zb"), E("ab")])); + assert_eq!(expected, e(r"^aZ*b")); + } + + #[test] + fn repetition() { + assert_eq!(exact(["a", ""]), e(r"a?")); + assert_eq!(exact(["", "a"]), e(r"a??")); + assert_eq!(inexact([I("a"), E("")], [I("a"), E("")]), e(r"a*")); + assert_eq!(inexact([E(""), I("a")], [E(""), I("a")]), e(r"a*?")); + assert_eq!(inexact([I("a")], [I("a")]), e(r"a+")); + assert_eq!(inexact([I("a")], [I("a")]), e(r"(a+)+")); + + assert_eq!(exact(["ab"]), e(r"aZ{0}b")); + assert_eq!(exact(["aZb", "ab"]), e(r"aZ?b")); + assert_eq!(exact(["ab", "aZb"]), e(r"aZ??b")); + assert_eq!( + inexact([I("aZ"), E("ab")], [I("Zb"), E("ab")]), + e(r"aZ*b") + ); + assert_eq!( + inexact([E("ab"), I("aZ")], [E("ab"), I("Zb")]), + e(r"aZ*?b") + ); + assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+b")); + assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+?b")); + + assert_eq!(exact(["aZZb"]), e(r"aZ{2}b")); + assert_eq!(inexact([I("aZZ")], [I("ZZb")]), e(r"aZ{2,3}b")); + + assert_eq!(exact(["abc", ""]), e(r"(abc)?")); + assert_eq!(exact(["", "abc"]), e(r"(abc)??")); + + assert_eq!(inexact([I("a"), E("b")], [I("ab"), E("b")]), e(r"a*b")); + assert_eq!(inexact([E("b"), I("a")], [E("b"), I("ab")]), e(r"a*?b")); + assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+")); + assert_eq!(inexact([I("a"), I("b")], [I("b")]), e(r"a*b+")); + + // FIXME: The suffixes for this don't look quite right to me. I think + // the right suffixes would be: [I(ac), I(bc), E(c)]. The main issue I + // think is that suffixes are computed by iterating over concatenations + // in reverse, and then [bc, ac, c] ordering is indeed correct from + // that perspective. We also test a few more equivalent regexes, and + // we get the same result, so it is consistent at least I suppose. + // + // The reason why this isn't an issue is that it only messes up + // preference order, and currently, suffixes are never used in a + // context where preference order matters. For prefixes it matters + // because we sometimes want to use prefilters without confirmation + // when all of the literals are exact (and there's no look-around). But + // we never do that for suffixes. Any time we use suffixes, we always + // include a confirmation step. If that ever changes, then it's likely + // this bug will need to be fixed, but last time I looked, it appears + // hard to do so. + assert_eq!( + inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]), + e(r"a*b*c") + ); + assert_eq!( + inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]), + e(r"(a+)?(b+)?c") + ); + assert_eq!( + inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]), + e(r"(a+|)(b+|)c") + ); + // A few more similarish but not identical regexes. These may have a + // similar problem as above. + assert_eq!( + inexact( + [I("a"), I("b"), I("c"), E("")], + [I("c"), I("b"), I("a"), E("")] + ), + e(r"a*b*c*") + ); + assert_eq!(inexact([I("a"), I("b"), I("c")], [I("c")]), e(r"a*b*c+")); + assert_eq!(inexact([I("a"), I("b")], [I("bc")]), e(r"a*b+c")); + assert_eq!(inexact([I("a"), I("b")], [I("c"), I("b")]), e(r"a*b+c*")); + assert_eq!(inexact([I("ab"), E("a")], [I("b"), E("a")]), e(r"ab*")); + assert_eq!( + inexact([I("ab"), E("ac")], [I("bc"), E("ac")]), + e(r"ab*c") + ); + assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+")); + assert_eq!(inexact([I("ab")], [I("bc")]), e(r"ab+c")); + + assert_eq!( + inexact([I("z"), E("azb")], [I("zazb"), E("azb")]), + e(r"z*azb") + ); + + let expected = + exact(["aaa", "aab", "aba", "abb", "baa", "bab", "bba", "bbb"]); + assert_eq!(expected, e(r"[ab]{3}")); + let expected = inexact( + [ + I("aaa"), + I("aab"), + I("aba"), + I("abb"), + I("baa"), + I("bab"), + I("bba"), + I("bbb"), + ], + [ + I("aaa"), + I("aab"), + I("aba"), + I("abb"), + I("baa"), + I("bab"), + I("bba"), + I("bbb"), + ], + ); + assert_eq!(expected, e(r"[ab]{3,4}")); + } + + #[test] + fn concat() { + let empty: [&str; 0] = []; + + assert_eq!(exact(["abcxyz"]), e(r"abc()xyz")); + assert_eq!(exact(["abcxyz"]), e(r"(abc)(xyz)")); + assert_eq!(exact(["abcmnoxyz"]), e(r"abc()mno()xyz")); + assert_eq!(exact(empty), e(r"abc[a&&b]xyz")); + assert_eq!(exact(["abcxyz"]), e(r"abc[a&&b]*xyz")); + } + + #[test] + fn alternation() { + assert_eq!(exact(["abc", "mno", "xyz"]), e(r"abc|mno|xyz")); + assert_eq!( + inexact( + [E("abc"), I("mZ"), E("mo"), E("xyz")], + [E("abc"), I("Zo"), E("mo"), E("xyz")] + ), + e(r"abc|mZ*o|xyz") + ); + assert_eq!(exact(["abc", "xyz"]), e(r"abc|M[a&&b]N|xyz")); + assert_eq!(exact(["abc", "MN", "xyz"]), e(r"abc|M[a&&b]*N|xyz")); + + assert_eq!(exact(["aaa", "aaaaa"]), e(r"(?:|aa)aaa")); + assert_eq!( + inexact( + [I("aaa"), E(""), I("aaaaa"), E("aa")], + [I("aaa"), E(""), E("aa")] + ), + e(r"(?:|aa)(?:aaa)*") + ); + assert_eq!( + inexact( + [E(""), I("aaa"), E("aa"), I("aaaaa")], + [E(""), I("aaa"), E("aa")] + ), + e(r"(?:|aa)(?:aaa)*?") + ); + + assert_eq!( + inexact([E("a"), I("b"), E("")], [E("a"), I("b"), E("")]), + e(r"a|b*") + ); + assert_eq!(inexact([E("a"), I("b")], [E("a"), I("b")]), e(r"a|b+")); + + assert_eq!( + inexact([I("a"), E("b"), E("c")], [I("ab"), E("b"), E("c")]), + e(r"a*b|c") + ); + + assert_eq!( + inexact( + [E("a"), E("b"), I("c"), E("")], + [E("a"), E("b"), I("c"), E("")] + ), + e(r"a|(?:b|c*)") + ); + + assert_eq!( + inexact( + [I("a"), I("b"), E("c"), I("a"), I("ab"), E("c")], + [I("ac"), I("bc"), E("c"), I("ac"), I("abc"), E("c")], + ), + e(r"(a|b)*c|(a|ab)*c") + ); + + assert_eq!( + exact(["abef", "abgh", "cdef", "cdgh"]), + e(r"(ab|cd)(ef|gh)") + ); + assert_eq!( + exact([ + "abefij", "abefkl", "abghij", "abghkl", "cdefij", "cdefkl", + "cdghij", "cdghkl", + ]), + e(r"(ab|cd)(ef|gh)(ij|kl)") + ); + + assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}")); + + assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}")); + + assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}")); + } + + #[test] + fn impossible() { + let empty: [&str; 0] = []; + + assert_eq!(exact(empty), e(r"[a&&b]")); + assert_eq!(exact(empty), e(r"a[a&&b]")); + assert_eq!(exact(empty), e(r"[a&&b]b")); + assert_eq!(exact(empty), e(r"a[a&&b]b")); + assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]|b")); + assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]|b")); + assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]d|b")); + assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]d|b")); + assert_eq!(exact([""]), e(r"[a&&b]*")); + assert_eq!(exact(["MN"]), e(r"M[a&&b]*N")); + } + + // This tests patterns that contain something that defeats literal + // detection, usually because it would blow some limit on the total number + // of literals that can be returned. + // + // The main idea is that when literal extraction sees something that + // it knows will blow a limit, it replaces it with a marker that says + // "any literal will match here." While not necessarily true, the + // over-estimation is just fine for the purposes of literal extraction, + // because the imprecision doesn't matter: too big is too big. + // + // This is one of the trickier parts of literal extraction, since we need + // to make sure all of our literal extraction operations correctly compose + // with the markers. + #[test] + fn anything() { + assert_eq!(infinite(), e(r".")); + assert_eq!(infinite(), e(r"(?s).")); + assert_eq!(infinite(), e(r"[A-Za-z]")); + assert_eq!(infinite(), e(r"[A-Z]")); + assert_eq!(exact([""]), e(r"[A-Z]{0}")); + assert_eq!(infinite(), e(r"[A-Z]?")); + assert_eq!(infinite(), e(r"[A-Z]*")); + assert_eq!(infinite(), e(r"[A-Z]+")); + assert_eq!((seq([I("1")]), Seq::infinite()), e(r"1[A-Z]")); + assert_eq!((seq([I("1")]), seq([I("2")])), e(r"1[A-Z]2")); + assert_eq!((Seq::infinite(), seq([I("123")])), e(r"[A-Z]+123")); + assert_eq!(infinite(), e(r"[A-Z]+123[A-Z]+")); + assert_eq!(infinite(), e(r"1|[A-Z]|3")); + assert_eq!( + (seq([E("1"), I("2"), E("3")]), Seq::infinite()), + e(r"1|2[A-Z]|3"), + ); + assert_eq!( + (Seq::infinite(), seq([E("1"), I("2"), E("3")])), + e(r"1|[A-Z]2|3"), + ); + assert_eq!( + (seq([E("1"), I("2"), E("4")]), seq([E("1"), I("3"), E("4")])), + e(r"1|2[A-Z]3|4"), + ); + assert_eq!((Seq::infinite(), seq([I("2")])), e(r"(?:|1)[A-Z]2")); + assert_eq!(inexact([I("a")], [I("z")]), e(r"a.z")); + } + + // Like the 'anything' test, but it uses smaller limits in order to test + // the logic for effectively aborting literal extraction when the seqs get + // too big. + #[test] + fn anything_small_limits() { + fn prefixes(pattern: &str) -> Seq { + Extractor::new() + .kind(ExtractKind::Prefix) + .limit_total(10) + .extract(&parse(pattern)) + } + + fn suffixes(pattern: &str) -> Seq { + Extractor::new() + .kind(ExtractKind::Suffix) + .limit_total(10) + .extract(&parse(pattern)) + } + + fn e(pattern: &str) -> (Seq, Seq) { + (prefixes(pattern), suffixes(pattern)) + } + + assert_eq!( + ( + seq([ + I("aaa"), + I("aab"), + I("aba"), + I("abb"), + I("baa"), + I("bab"), + I("bba"), + I("bbb") + ]), + seq([ + I("aaa"), + I("aab"), + I("aba"), + I("abb"), + I("baa"), + I("bab"), + I("bba"), + I("bbb") + ]) + ), + e(r"[ab]{3}{3}") + ); + + assert_eq!(infinite(), e(r"ab|cd|ef|gh|ij|kl|mn|op|qr|st|uv|wx|yz")); + } + + #[test] + fn empty() { + assert_eq!(exact([""]), e(r"")); + assert_eq!(exact([""]), e(r"^")); + assert_eq!(exact([""]), e(r"$")); + assert_eq!(exact([""]), e(r"(?m:^)")); + assert_eq!(exact([""]), e(r"(?m:$)")); + assert_eq!(exact([""]), e(r"\b")); + assert_eq!(exact([""]), e(r"\B")); + assert_eq!(exact([""]), e(r"(?-u:\b)")); + assert_eq!(exact([""]), e(r"(?-u:\B)")); + } + + #[test] + fn odds_and_ends() { + assert_eq!((Seq::infinite(), seq([I("a")])), e(r".a")); + assert_eq!((seq([I("a")]), Seq::infinite()), e(r"a.")); + assert_eq!(infinite(), e(r"a|.")); + assert_eq!(infinite(), e(r".|a")); + + let pat = r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]"; + let expected = inexact( + ["Mo'am", "Moam", "Mu'am", "Muam"].map(I), + [ + "ddafi", "ddafy", "dhafi", "dhafy", "dzafi", "dzafy", "dafi", + "dafy", "tdafi", "tdafy", "thafi", "thafy", "tzafi", "tzafy", + "tafi", "tafy", "zdafi", "zdafy", "zhafi", "zhafy", "zzafi", + "zzafy", "zafi", "zafy", + ] + .map(I), + ); + assert_eq!(expected, e(pat)); + + assert_eq!( + (seq(["fn is_", "fn as_"].map(I)), Seq::infinite()), + e(r"fn is_([A-Z]+)|fn as_([A-Z]+)"), + ); + assert_eq!( + inexact([I("foo")], [I("quux")]), + e(r"foo[A-Z]+bar[A-Z]+quux") + ); + assert_eq!(infinite(), e(r"[A-Z]+bar[A-Z]+")); + assert_eq!( + exact(["Sherlock Holmes"]), + e(r"(?m)^Sherlock Holmes|Sherlock Holmes$") + ); + + assert_eq!(exact(["sa", "sb"]), e(r"\bs(?:[ab])")); + } + + // This tests a specific regex along with some heuristic steps to reduce + // the sequences extracted. This is meant to roughly correspond to the + // types of heuristics used to shrink literal sets in practice. (Shrinking + // is done because you want to balance "spend too much work looking for + // too many literals" and "spend too much work processing false positive + // matches from short literals.") + #[test] + #[cfg(feature = "unicode-case")] + fn holmes() { + let expected = inexact( + ["HOL", "HOl", "HoL", "Hol", "hOL", "hOl", "hoL", "hol"].map(I), + [ + "MES", "MEs", "Eſ", "MeS", "Mes", "eſ", "mES", "mEs", "meS", + "mes", + ] + .map(I), + ); + let (mut prefixes, mut suffixes) = e(r"(?i)Holmes"); + prefixes.keep_first_bytes(3); + suffixes.keep_last_bytes(3); + prefixes.minimize_by_preference(); + suffixes.minimize_by_preference(); + assert_eq!(expected, (prefixes, suffixes)); + } + + // This tests that we get some kind of literals extracted for a beefier + // alternation with case insensitive mode enabled. At one point during + // development, this returned nothing, and motivated some special case + // code in Extractor::union to try and trim down the literal sequences + // if the union would blow the limits set. + #[test] + #[cfg(feature = "unicode-case")] + fn holmes_alt() { + let mut pre = + prefixes(r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker"); + assert!(pre.len().unwrap() > 0); + pre.optimize_for_prefix_by_preference(); + assert!(pre.len().unwrap() > 0); + } + + // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 + // See: CVE-2022-24713 + // + // We test this here to ensure literal extraction completes in reasonable + // time and isn't materially impacted by these sorts of pathological + // repeats. + #[test] + fn crazy_repeats() { + assert_eq!(inexact([E("")], [E("")]), e(r"(?:){4294967295}")); + assert_eq!( + inexact([E("")], [E("")]), + e(r"(?:){64}{64}{64}{64}{64}{64}") + ); + assert_eq!(inexact([E("")], [E("")]), e(r"x{0}{4294967295}")); + assert_eq!(inexact([E("")], [E("")]), e(r"(?:|){4294967295}")); + + assert_eq!( + inexact([E("")], [E("")]), + e(r"(?:){8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}") + ); + let repa = "a".repeat(100); + assert_eq!( + inexact([I(&repa)], [I(&repa)]), + e(r"a{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}") + ); + } + + #[test] + fn huge() { + let pat = r#"(?-u) + 2(?: + [45]\d{3}| + 7(?: + 1[0-267]| + 2[0-289]| + 3[0-29]| + 4[01]| + 5[1-3]| + 6[013]| + 7[0178]| + 91 + )| + 8(?: + 0[125]| + [139][1-6]| + 2[0157-9]| + 41| + 6[1-35]| + 7[1-5]| + 8[1-8]| + 90 + )| + 9(?: + 0[0-2]| + 1[0-4]| + 2[568]| + 3[3-6]| + 5[5-7]| + 6[0167]| + 7[15]| + 8[0146-9] + ) + )\d{4}| + 3(?: + 12?[5-7]\d{2}| + 0(?: + 2(?: + [025-79]\d| + [348]\d{1,2} + )| + 3(?: + [2-4]\d| + [56]\d? + ) + )| + 2(?: + 1\d{2}| + 2(?: + [12]\d| + [35]\d{1,2}| + 4\d? + ) + )| + 3(?: + 1\d{2}| + 2(?: + [2356]\d| + 4\d{1,2} + ) + )| + 4(?: + 1\d{2}| + 2(?: + 2\d{1,2}| + [47]| + 5\d{2} + ) + )| + 5(?: + 1\d{2}| + 29 + )| + [67]1\d{2}| + 8(?: + 1\d{2}| + 2(?: + 2\d{2}| + 3| + 4\d + ) + ) + )\d{3}| + 4(?: + 0(?: + 2(?: + [09]\d| + 7 + )| + 33\d{2} + )| + 1\d{3}| + 2(?: + 1\d{2}| + 2(?: + [25]\d?| + [348]\d| + [67]\d{1,2} + ) + )| + 3(?: + 1\d{2}(?: + \d{2} + )?| + 2(?: + [045]\d| + [236-9]\d{1,2} + )| + 32\d{2} + )| + 4(?: + [18]\d{2}| + 2(?: + [2-46]\d{2}| + 3 + )| + 5[25]\d{2} + )| + 5(?: + 1\d{2}| + 2(?: + 3\d| + 5 + ) + )| + 6(?: + [18]\d{2}| + 2(?: + 3(?: + \d{2} + )?| + [46]\d{1,2}| + 5\d{2}| + 7\d + )| + 5(?: + 3\d?| + 4\d| + [57]\d{1,2}| + 6\d{2}| + 8 + ) + )| + 71\d{2}| + 8(?: + [18]\d{2}| + 23\d{2}| + 54\d{2} + )| + 9(?: + [18]\d{2}| + 2[2-5]\d{2}| + 53\d{1,2} + ) + )\d{3}| + 5(?: + 02[03489]\d{2}| + 1\d{2}| + 2(?: + 1\d{2}| + 2(?: + 2(?: + \d{2} + )?| + [457]\d{2} + ) + )| + 3(?: + 1\d{2}| + 2(?: + [37](?: + \d{2} + )?| + [569]\d{2} + ) + )| + 4(?: + 1\d{2}| + 2[46]\d{2} + )| + 5(?: + 1\d{2}| + 26\d{1,2} + )| + 6(?: + [18]\d{2}| + 2| + 53\d{2} + )| + 7(?: + 1| + 24 + )\d{2}| + 8(?: + 1| + 26 + )\d{2}| + 91\d{2} + )\d{3}| + 6(?: + 0(?: + 1\d{2}| + 2(?: + 3\d{2}| + 4\d{1,2} + ) + )| + 2(?: + 2[2-5]\d{2}| + 5(?: + [3-5]\d{2}| + 7 + )| + 8\d{2} + )| + 3(?: + 1| + 2[3478] + )\d{2}| + 4(?: + 1| + 2[34] + )\d{2}| + 5(?: + 1| + 2[47] + )\d{2}| + 6(?: + [18]\d{2}| + 6(?: + 2(?: + 2\d| + [34]\d{2} + )| + 5(?: + [24]\d{2}| + 3\d| + 5\d{1,2} + ) + ) + )| + 72[2-5]\d{2}| + 8(?: + 1\d{2}| + 2[2-5]\d{2} + )| + 9(?: + 1\d{2}| + 2[2-6]\d{2} + ) + )\d{3}| + 7(?: + (?: + 02| + [3-589]1| + 6[12]| + 72[24] + )\d{2}| + 21\d{3}| + 32 + )\d{3}| + 8(?: + (?: + 4[12]| + [5-7]2| + 1\d? + )| + (?: + 0| + 3[12]| + [5-7]1| + 217 + )\d + )\d{4}| + 9(?: + [35]1| + (?: + [024]2| + 81 + )\d| + (?: + 1| + [24]1 + )\d{2} + )\d{3} + "#; + // TODO: This is a good candidate of a seq of literals that could be + // shrunk quite a bit and still be very productive with respect to + // literal optimizations. + let (prefixes, suffixes) = e(pat); + assert!(!suffixes.is_finite()); + assert_eq!(Some(243), prefixes.len()); + } + + #[test] + fn optimize() { + // This gets a common prefix that isn't too short. + let (p, s) = + opt(["foobarfoobar", "foobar", "foobarzfoobar", "foobarfoobar"]); + assert_eq!(seq([I("foobar")]), p); + assert_eq!(seq([I("foobar")]), s); + + // This also finds a common prefix, but since it's only one byte, it + // prefers the multiple literals. + let (p, s) = opt(["abba", "akka", "abccba"]); + assert_eq!(exact(["abba", "akka", "abccba"]), (p, s)); + + let (p, s) = opt(["sam", "samwise"]); + assert_eq!((seq([E("sam")]), seq([E("sam"), E("samwise")])), (p, s)); + + // The empty string is poisonous, so our seq becomes infinite, even + // though all literals are exact. + let (p, s) = opt(["foobarfoo", "foo", "", "foozfoo", "foofoo"]); + assert!(!p.is_finite()); + assert!(!s.is_finite()); + + // A space is also poisonous, so our seq becomes infinite. But this + // only gets triggered when we don't have a completely exact sequence. + // When the sequence is exact, spaces are okay, since we presume that + // any prefilter will match a space more quickly than the regex engine. + // (When the sequence is exact, there's a chance of the prefilter being + // used without needing the regex engine at all.) + let mut p = seq([E("foobarfoo"), I("foo"), E(" "), E("foofoo")]); + p.optimize_for_prefix_by_preference(); + assert!(!p.is_finite()); + } +} diff --git a/vendor/regex-syntax/src/hir/literal/mod.rs b/vendor/regex-syntax/src/hir/literal/mod.rs deleted file mode 100644 index fbc5d3c..0000000 --- a/vendor/regex-syntax/src/hir/literal/mod.rs +++ /dev/null @@ -1,1686 +0,0 @@ -/*! -Provides routines for extracting literal prefixes and suffixes from an `Hir`. -*/ - -use std::cmp; -use std::fmt; -use std::iter; -use std::mem; -use std::ops; - -use crate::hir::{self, Hir, HirKind}; - -/// A set of literal byte strings extracted from a regular expression. -/// -/// Every member of the set is a `Literal`, which is represented by a -/// `Vec`. (Notably, it may contain invalid UTF-8.) Every member is -/// said to be either *complete* or *cut*. A complete literal means that -/// it extends until the beginning (or end) of the regular expression. In -/// some circumstances, this can be used to indicate a match in the regular -/// expression. -/// -/// A key aspect of literal extraction is knowing when to stop. It is not -/// feasible to blindly extract all literals from a regular expression, even if -/// there are finitely many. For example, the regular expression `[0-9]{10}` -/// has `10^10` distinct literals. For this reason, literal extraction is -/// bounded to some low number by default using heuristics, but the limits can -/// be tweaked. -/// -/// **WARNING**: Literal extraction uses stack space proportional to the size -/// of the `Hir` expression. At some point, this drawback will be eliminated. -/// To protect yourself, set a reasonable -/// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit). -/// This is done for you by default. -#[derive(Clone, Eq, PartialEq)] -pub struct Literals { - lits: Vec, - limit_size: usize, - limit_class: usize, -} - -/// A single member of a set of literals extracted from a regular expression. -/// -/// This type has `Deref` and `DerefMut` impls to `Vec` so that all slice -/// and `Vec` operations are available. -#[derive(Clone, Eq, Ord)] -pub struct Literal { - v: Vec, - cut: bool, -} - -impl Literals { - /// Returns a new empty set of literals using default limits. - pub fn empty() -> Literals { - Literals { lits: vec![], limit_size: 250, limit_class: 10 } - } - - /// Returns a set of literal prefixes extracted from the given `Hir`. - pub fn prefixes(expr: &Hir) -> Literals { - let mut lits = Literals::empty(); - lits.union_prefixes(expr); - lits - } - - /// Returns a set of literal suffixes extracted from the given `Hir`. - pub fn suffixes(expr: &Hir) -> Literals { - let mut lits = Literals::empty(); - lits.union_suffixes(expr); - lits - } - - /// Get the approximate size limit (in bytes) of this set. - pub fn limit_size(&self) -> usize { - self.limit_size - } - - /// Set the approximate size limit (in bytes) of this set. - /// - /// If extracting a literal would put the set over this limit, then - /// extraction stops. - /// - /// The new limits will only apply to additions to this set. Existing - /// members remain unchanged, even if the set exceeds the new limit. - pub fn set_limit_size(&mut self, size: usize) -> &mut Literals { - self.limit_size = size; - self - } - - /// Get the character class size limit for this set. - pub fn limit_class(&self) -> usize { - self.limit_class - } - - /// Limits the size of character(or byte) classes considered. - /// - /// A value of `0` prevents all character classes from being considered. - /// - /// This limit also applies to case insensitive literals, since each - /// character in the case insensitive literal is converted to a class, and - /// then case folded. - /// - /// The new limits will only apply to additions to this set. Existing - /// members remain unchanged, even if the set exceeds the new limit. - pub fn set_limit_class(&mut self, size: usize) -> &mut Literals { - self.limit_class = size; - self - } - - /// Returns the set of literals as a slice. Its order is unspecified. - pub fn literals(&self) -> &[Literal] { - &self.lits - } - - /// Returns the length of the smallest literal. - /// - /// Returns None is there are no literals in the set. - pub fn min_len(&self) -> Option { - let mut min = None; - for lit in &self.lits { - match min { - None => min = Some(lit.len()), - Some(m) if lit.len() < m => min = Some(lit.len()), - _ => {} - } - } - min - } - - /// Returns true if all members in this set are complete. - pub fn all_complete(&self) -> bool { - !self.lits.is_empty() && self.lits.iter().all(|l| !l.is_cut()) - } - - /// Returns true if any member in this set is complete. - pub fn any_complete(&self) -> bool { - self.lits.iter().any(|lit| !lit.is_cut()) - } - - /// Returns true if this set contains an empty literal. - pub fn contains_empty(&self) -> bool { - self.lits.iter().any(|lit| lit.is_empty()) - } - - /// Returns true if this set is empty or if all of its members is empty. - pub fn is_empty(&self) -> bool { - self.lits.is_empty() || self.lits.iter().all(|lit| lit.is_empty()) - } - - /// Returns a new empty set of literals using this set's limits. - pub fn to_empty(&self) -> Literals { - let mut lits = Literals::empty(); - lits.set_limit_size(self.limit_size).set_limit_class(self.limit_class); - lits - } - - /// Returns the longest common prefix of all members in this set. - pub fn longest_common_prefix(&self) -> &[u8] { - if self.is_empty() { - return &[]; - } - let lit0 = &*self.lits[0]; - let mut len = lit0.len(); - for lit in &self.lits[1..] { - len = cmp::min( - len, - lit.iter().zip(lit0).take_while(|&(a, b)| a == b).count(), - ); - } - &self.lits[0][..len] - } - - /// Returns the longest common suffix of all members in this set. - pub fn longest_common_suffix(&self) -> &[u8] { - if self.is_empty() { - return &[]; - } - let lit0 = &*self.lits[0]; - let mut len = lit0.len(); - for lit in &self.lits[1..] { - len = cmp::min( - len, - lit.iter() - .rev() - .zip(lit0.iter().rev()) - .take_while(|&(a, b)| a == b) - .count(), - ); - } - &self.lits[0][self.lits[0].len() - len..] - } - - /// Returns a new set of literals with the given number of bytes trimmed - /// from the suffix of each literal. - /// - /// If any literal would be cut out completely by trimming, then None is - /// returned. - /// - /// Any duplicates that are created as a result of this transformation are - /// removed. - pub fn trim_suffix(&self, num_bytes: usize) -> Option { - if self.min_len().map(|len| len <= num_bytes).unwrap_or(true) { - return None; - } - let mut new = self.to_empty(); - for mut lit in self.lits.iter().cloned() { - let new_len = lit.len() - num_bytes; - lit.truncate(new_len); - lit.cut(); - new.lits.push(lit); - } - new.lits.sort(); - new.lits.dedup(); - Some(new) - } - - /// Returns a new set of prefixes of this set of literals that are - /// guaranteed to be unambiguous. - /// - /// Any substring match with a member of the set is returned is guaranteed - /// to never overlap with a substring match of another member of the set - /// at the same starting position. - /// - /// Given any two members of the returned set, neither is a substring of - /// the other. - pub fn unambiguous_prefixes(&self) -> Literals { - if self.lits.is_empty() { - return self.to_empty(); - } - let mut old = self.lits.to_vec(); - let mut new = self.to_empty(); - 'OUTER: while let Some(mut candidate) = old.pop() { - if candidate.is_empty() { - continue; - } - if new.lits.is_empty() { - new.lits.push(candidate); - continue; - } - for lit2 in &mut new.lits { - if lit2.is_empty() { - continue; - } - if &candidate == lit2 { - // If the literal is already in the set, then we can - // just drop it. But make sure that cut literals are - // infectious! - candidate.cut = candidate.cut || lit2.cut; - lit2.cut = candidate.cut; - continue 'OUTER; - } - if candidate.len() < lit2.len() { - if let Some(i) = position(&candidate, &lit2) { - candidate.cut(); - let mut lit3 = lit2.clone(); - lit3.truncate(i); - lit3.cut(); - old.push(lit3); - lit2.clear(); - } - } else if let Some(i) = position(&lit2, &candidate) { - lit2.cut(); - let mut new_candidate = candidate.clone(); - new_candidate.truncate(i); - new_candidate.cut(); - old.push(new_candidate); - candidate.clear(); - } - // Oops, the candidate is already represented in the set. - if candidate.is_empty() { - continue 'OUTER; - } - } - new.lits.push(candidate); - } - new.lits.retain(|lit| !lit.is_empty()); - new.lits.sort(); - new.lits.dedup(); - new - } - - /// Returns a new set of suffixes of this set of literals that are - /// guaranteed to be unambiguous. - /// - /// Any substring match with a member of the set is returned is guaranteed - /// to never overlap with a substring match of another member of the set - /// at the same ending position. - /// - /// Given any two members of the returned set, neither is a substring of - /// the other. - pub fn unambiguous_suffixes(&self) -> Literals { - // This is a touch wasteful... - let mut lits = self.clone(); - lits.reverse(); - let mut unamb = lits.unambiguous_prefixes(); - unamb.reverse(); - unamb - } - - /// Unions the prefixes from the given expression to this set. - /// - /// If prefixes could not be added (for example, this set would exceed its - /// size limits or the set of prefixes from `expr` includes the empty - /// string), then false is returned. - /// - /// Note that prefix literals extracted from `expr` are said to be complete - /// if and only if the literal extends from the beginning of `expr` to the - /// end of `expr`. - pub fn union_prefixes(&mut self, expr: &Hir) -> bool { - let mut lits = self.to_empty(); - prefixes(expr, &mut lits); - !lits.is_empty() && !lits.contains_empty() && self.union(lits) - } - - /// Unions the suffixes from the given expression to this set. - /// - /// If suffixes could not be added (for example, this set would exceed its - /// size limits or the set of suffixes from `expr` includes the empty - /// string), then false is returned. - /// - /// Note that prefix literals extracted from `expr` are said to be complete - /// if and only if the literal extends from the end of `expr` to the - /// beginning of `expr`. - pub fn union_suffixes(&mut self, expr: &Hir) -> bool { - let mut lits = self.to_empty(); - suffixes(expr, &mut lits); - lits.reverse(); - !lits.is_empty() && !lits.contains_empty() && self.union(lits) - } - - /// Unions this set with another set. - /// - /// If the union would cause the set to exceed its limits, then the union - /// is skipped and it returns false. Otherwise, if the union succeeds, it - /// returns true. - pub fn union(&mut self, lits: Literals) -> bool { - if self.num_bytes() + lits.num_bytes() > self.limit_size { - return false; - } - if lits.is_empty() { - self.lits.push(Literal::empty()); - } else { - self.lits.extend(lits.lits); - } - true - } - - /// Extends this set with another set. - /// - /// The set of literals is extended via a cross product. - /// - /// If a cross product would cause this set to exceed its limits, then the - /// cross product is skipped and it returns false. Otherwise, if the cross - /// product succeeds, it returns true. - pub fn cross_product(&mut self, lits: &Literals) -> bool { - if lits.is_empty() { - return true; - } - // Check that we make sure we stay in our limits. - let mut size_after; - if self.is_empty() || !self.any_complete() { - size_after = self.num_bytes(); - for lits_lit in lits.literals() { - size_after += lits_lit.len(); - } - } else { - size_after = self.lits.iter().fold(0, |accum, lit| { - accum + if lit.is_cut() { lit.len() } else { 0 } - }); - for lits_lit in lits.literals() { - for self_lit in self.literals() { - if !self_lit.is_cut() { - size_after += self_lit.len() + lits_lit.len(); - } - } - } - } - if size_after > self.limit_size { - return false; - } - - let mut base = self.remove_complete(); - if base.is_empty() { - base = vec![Literal::empty()]; - } - for lits_lit in lits.literals() { - for mut self_lit in base.clone() { - self_lit.extend(&**lits_lit); - self_lit.cut = lits_lit.cut; - self.lits.push(self_lit); - } - } - true - } - - /// Extends each literal in this set with the bytes given. - /// - /// If the set is empty, then the given literal is added to the set. - /// - /// If adding any number of bytes to all members of this set causes a limit - /// to be exceeded, then no bytes are added and false is returned. If a - /// prefix of `bytes` can be fit into this set, then it is used and all - /// resulting literals are cut. - pub fn cross_add(&mut self, bytes: &[u8]) -> bool { - // N.B. This could be implemented by simply calling cross_product with - // a literal set containing just `bytes`, but we can be smarter about - // taking shorter prefixes of `bytes` if they'll fit. - if bytes.is_empty() { - return true; - } - if self.lits.is_empty() { - let i = cmp::min(self.limit_size, bytes.len()); - self.lits.push(Literal::new(bytes[..i].to_owned())); - self.lits[0].cut = i < bytes.len(); - return !self.lits[0].is_cut(); - } - let size = self.num_bytes(); - if size + self.lits.len() >= self.limit_size { - return false; - } - let mut i = 1; - while size + (i * self.lits.len()) <= self.limit_size - && i < bytes.len() - { - i += 1; - } - for lit in &mut self.lits { - if !lit.is_cut() { - lit.extend(&bytes[..i]); - if i < bytes.len() { - lit.cut(); - } - } - } - true - } - - /// Adds the given literal to this set. - /// - /// Returns false if adding this literal would cause the class to be too - /// big. - pub fn add(&mut self, lit: Literal) -> bool { - if self.num_bytes() + lit.len() > self.limit_size { - return false; - } - self.lits.push(lit); - true - } - - /// Extends each literal in this set with the character class given. - /// - /// Returns false if the character class was too big to add. - pub fn add_char_class(&mut self, cls: &hir::ClassUnicode) -> bool { - self._add_char_class(cls, false) - } - - /// Extends each literal in this set with the character class given, - /// writing the bytes of each character in reverse. - /// - /// Returns false if the character class was too big to add. - fn add_char_class_reverse(&mut self, cls: &hir::ClassUnicode) -> bool { - self._add_char_class(cls, true) - } - - fn _add_char_class( - &mut self, - cls: &hir::ClassUnicode, - reverse: bool, - ) -> bool { - use std::char; - - if self.class_exceeds_limits(cls_char_count(cls)) { - return false; - } - let mut base = self.remove_complete(); - if base.is_empty() { - base = vec![Literal::empty()]; - } - for r in cls.iter() { - let (s, e) = (r.start as u32, r.end as u32 + 1); - for c in (s..e).filter_map(char::from_u32) { - for mut lit in base.clone() { - let mut bytes = c.to_string().into_bytes(); - if reverse { - bytes.reverse(); - } - lit.extend(&bytes); - self.lits.push(lit); - } - } - } - true - } - - /// Extends each literal in this set with the byte class given. - /// - /// Returns false if the byte class was too big to add. - pub fn add_byte_class(&mut self, cls: &hir::ClassBytes) -> bool { - if self.class_exceeds_limits(cls_byte_count(cls)) { - return false; - } - let mut base = self.remove_complete(); - if base.is_empty() { - base = vec![Literal::empty()]; - } - for r in cls.iter() { - let (s, e) = (r.start as u32, r.end as u32 + 1); - for b in (s..e).map(|b| b as u8) { - for mut lit in base.clone() { - lit.push(b); - self.lits.push(lit); - } - } - } - true - } - - /// Cuts every member of this set. When a member is cut, it can never - /// be extended. - pub fn cut(&mut self) { - for lit in &mut self.lits { - lit.cut(); - } - } - - /// Reverses all members in place. - pub fn reverse(&mut self) { - for lit in &mut self.lits { - lit.reverse(); - } - } - - /// Clears this set of all members. - pub fn clear(&mut self) { - self.lits.clear(); - } - - /// Pops all complete literals out of this set. - fn remove_complete(&mut self) -> Vec { - let mut base = vec![]; - for lit in mem::replace(&mut self.lits, vec![]) { - if lit.is_cut() { - self.lits.push(lit); - } else { - base.push(lit); - } - } - base - } - - /// Returns the total number of bytes in this set. - fn num_bytes(&self) -> usize { - self.lits.iter().fold(0, |accum, lit| accum + lit.len()) - } - - /// Returns true if a character class with the given size would cause this - /// set to exceed its limits. - /// - /// The size given should correspond to the number of items in the class. - fn class_exceeds_limits(&self, size: usize) -> bool { - if size > self.limit_class { - return true; - } - // This is an approximation since codepoints in a char class can encode - // to 1-4 bytes. - let new_byte_count = if self.lits.is_empty() { - size - } else { - self.lits.iter().fold(0, |accum, lit| { - accum - + if lit.is_cut() { - // If the literal is cut, then we'll never add - // anything to it, so don't count it. - 0 - } else { - (lit.len() + 1) * size - } - }) - }; - new_byte_count > self.limit_size - } -} - -fn prefixes(expr: &Hir, lits: &mut Literals) { - match *expr.kind() { - HirKind::Literal(hir::Literal::Unicode(c)) => { - let mut buf = [0; 4]; - lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); - } - HirKind::Literal(hir::Literal::Byte(b)) => { - lits.cross_add(&[b]); - } - HirKind::Class(hir::Class::Unicode(ref cls)) => { - if !lits.add_char_class(cls) { - lits.cut(); - } - } - HirKind::Class(hir::Class::Bytes(ref cls)) => { - if !lits.add_byte_class(cls) { - lits.cut(); - } - } - HirKind::Group(hir::Group { ref hir, .. }) => { - prefixes(&**hir, lits); - } - HirKind::Repetition(ref x) => match x.kind { - hir::RepetitionKind::ZeroOrOne => { - repeat_zero_or_one_literals(&x.hir, lits, prefixes); - } - hir::RepetitionKind::ZeroOrMore => { - repeat_zero_or_more_literals(&x.hir, lits, prefixes); - } - hir::RepetitionKind::OneOrMore => { - repeat_one_or_more_literals(&x.hir, lits, prefixes); - } - hir::RepetitionKind::Range(ref rng) => { - let (min, max) = match *rng { - hir::RepetitionRange::Exactly(m) => (m, Some(m)), - hir::RepetitionRange::AtLeast(m) => (m, None), - hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), - }; - repeat_range_literals( - &x.hir, min, max, x.greedy, lits, prefixes, - ) - } - }, - HirKind::Concat(ref es) if es.is_empty() => {} - HirKind::Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits), - HirKind::Concat(ref es) => { - for e in es { - if let HirKind::Anchor(hir::Anchor::StartText) = *e.kind() { - if !lits.is_empty() { - lits.cut(); - break; - } - lits.add(Literal::empty()); - continue; - } - let mut lits2 = lits.to_empty(); - prefixes(e, &mut lits2); - if !lits.cross_product(&lits2) || !lits2.any_complete() { - // If this expression couldn't yield any literal that - // could be extended, then we need to quit. Since we're - // short-circuiting, we also need to freeze every member. - lits.cut(); - break; - } - } - } - HirKind::Alternation(ref es) => { - alternate_literals(es, lits, prefixes); - } - _ => lits.cut(), - } -} - -fn suffixes(expr: &Hir, lits: &mut Literals) { - match *expr.kind() { - HirKind::Literal(hir::Literal::Unicode(c)) => { - let mut buf = [0u8; 4]; - let i = c.encode_utf8(&mut buf).len(); - let buf = &mut buf[..i]; - buf.reverse(); - lits.cross_add(buf); - } - HirKind::Literal(hir::Literal::Byte(b)) => { - lits.cross_add(&[b]); - } - HirKind::Class(hir::Class::Unicode(ref cls)) => { - if !lits.add_char_class_reverse(cls) { - lits.cut(); - } - } - HirKind::Class(hir::Class::Bytes(ref cls)) => { - if !lits.add_byte_class(cls) { - lits.cut(); - } - } - HirKind::Group(hir::Group { ref hir, .. }) => { - suffixes(&**hir, lits); - } - HirKind::Repetition(ref x) => match x.kind { - hir::RepetitionKind::ZeroOrOne => { - repeat_zero_or_one_literals(&x.hir, lits, suffixes); - } - hir::RepetitionKind::ZeroOrMore => { - repeat_zero_or_more_literals(&x.hir, lits, suffixes); - } - hir::RepetitionKind::OneOrMore => { - repeat_one_or_more_literals(&x.hir, lits, suffixes); - } - hir::RepetitionKind::Range(ref rng) => { - let (min, max) = match *rng { - hir::RepetitionRange::Exactly(m) => (m, Some(m)), - hir::RepetitionRange::AtLeast(m) => (m, None), - hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), - }; - repeat_range_literals( - &x.hir, min, max, x.greedy, lits, suffixes, - ) - } - }, - HirKind::Concat(ref es) if es.is_empty() => {} - HirKind::Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits), - HirKind::Concat(ref es) => { - for e in es.iter().rev() { - if let HirKind::Anchor(hir::Anchor::EndText) = *e.kind() { - if !lits.is_empty() { - lits.cut(); - break; - } - lits.add(Literal::empty()); - continue; - } - let mut lits2 = lits.to_empty(); - suffixes(e, &mut lits2); - if !lits.cross_product(&lits2) || !lits2.any_complete() { - // If this expression couldn't yield any literal that - // could be extended, then we need to quit. Since we're - // short-circuiting, we also need to freeze every member. - lits.cut(); - break; - } - } - } - HirKind::Alternation(ref es) => { - alternate_literals(es, lits, suffixes); - } - _ => lits.cut(), - } -} - -fn repeat_zero_or_one_literals( - e: &Hir, - lits: &mut Literals, - mut f: F, -) { - f( - &Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, - // FIXME: Our literal extraction doesn't care about greediness. - // Which is partially why we're treating 'e?' as 'e*'. Namely, - // 'ab??' yields [Complete(ab), Complete(a)], but it should yield - // [Complete(a), Complete(ab)] because of the non-greediness. - greedy: true, - hir: Box::new(e.clone()), - }), - lits, - ); -} - -fn repeat_zero_or_more_literals( - e: &Hir, - lits: &mut Literals, - mut f: F, -) { - let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty()); - lits3.set_limit_size(lits.limit_size() / 2); - f(e, &mut lits3); - - if lits3.is_empty() || !lits2.cross_product(&lits3) { - lits.cut(); - return; - } - lits2.cut(); - lits2.add(Literal::empty()); - if !lits.union(lits2) { - lits.cut(); - } -} - -fn repeat_one_or_more_literals( - e: &Hir, - lits: &mut Literals, - mut f: F, -) { - f(e, lits); - lits.cut(); -} - -fn repeat_range_literals( - e: &Hir, - min: u32, - max: Option, - greedy: bool, - lits: &mut Literals, - mut f: F, -) { - if min == 0 { - // This is a bit conservative. If `max` is set, then we could - // treat this as a finite set of alternations. For now, we - // just treat it as `e*`. - f( - &Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, - greedy, - hir: Box::new(e.clone()), - }), - lits, - ); - } else { - if min > 0 { - let n = cmp::min(lits.limit_size, min as usize); - let es = iter::repeat(e.clone()).take(n).collect(); - f(&Hir::concat(es), lits); - if n < min as usize || lits.contains_empty() { - lits.cut(); - } - } - if max.map_or(true, |max| min < max) { - lits.cut(); - } - } -} - -fn alternate_literals( - es: &[Hir], - lits: &mut Literals, - mut f: F, -) { - let mut lits2 = lits.to_empty(); - for e in es { - let mut lits3 = lits.to_empty(); - lits3.set_limit_size(lits.limit_size() / 5); - f(e, &mut lits3); - if lits3.is_empty() || !lits2.union(lits3) { - // If we couldn't find suffixes for *any* of the - // alternates, then the entire alternation has to be thrown - // away and any existing members must be frozen. Similarly, - // if the union couldn't complete, stop and freeze. - lits.cut(); - return; - } - } - if !lits.cross_product(&lits2) { - lits.cut(); - } -} - -impl fmt::Debug for Literals { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Literals") - .field("lits", &self.lits) - .field("limit_size", &self.limit_size) - .field("limit_class", &self.limit_class) - .finish() - } -} - -impl Literal { - /// Returns a new complete literal with the bytes given. - pub fn new(bytes: Vec) -> Literal { - Literal { v: bytes, cut: false } - } - - /// Returns a new complete empty literal. - pub fn empty() -> Literal { - Literal { v: vec![], cut: false } - } - - /// Returns true if this literal was "cut." - pub fn is_cut(&self) -> bool { - self.cut - } - - /// Cuts this literal. - pub fn cut(&mut self) { - self.cut = true; - } -} - -impl PartialEq for Literal { - fn eq(&self, other: &Literal) -> bool { - self.v == other.v - } -} - -impl PartialOrd for Literal { - fn partial_cmp(&self, other: &Literal) -> Option { - self.v.partial_cmp(&other.v) - } -} - -impl fmt::Debug for Literal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_cut() { - write!(f, "Cut({})", escape_unicode(&self.v)) - } else { - write!(f, "Complete({})", escape_unicode(&self.v)) - } - } -} - -impl AsRef<[u8]> for Literal { - fn as_ref(&self) -> &[u8] { - &self.v - } -} - -impl ops::Deref for Literal { - type Target = Vec; - fn deref(&self) -> &Vec { - &self.v - } -} - -impl ops::DerefMut for Literal { - fn deref_mut(&mut self) -> &mut Vec { - &mut self.v - } -} - -fn position(needle: &[u8], mut haystack: &[u8]) -> Option { - let mut i = 0; - while haystack.len() >= needle.len() { - if needle == &haystack[..needle.len()] { - return Some(i); - } - i += 1; - haystack = &haystack[1..]; - } - None -} - -fn escape_unicode(bytes: &[u8]) -> String { - let show = match ::std::str::from_utf8(bytes) { - Ok(v) => v.to_string(), - Err(_) => escape_bytes(bytes), - }; - let mut space_escaped = String::new(); - for c in show.chars() { - if c.is_whitespace() { - let escaped = if c as u32 <= 0x7F { - escape_byte(c as u8) - } else if c as u32 <= 0xFFFF { - format!(r"\u{{{:04x}}}", c as u32) - } else { - format!(r"\U{{{:08x}}}", c as u32) - }; - space_escaped.push_str(&escaped); - } else { - space_escaped.push(c); - } - } - space_escaped -} - -fn escape_bytes(bytes: &[u8]) -> String { - let mut s = String::new(); - for &b in bytes { - s.push_str(&escape_byte(b)); - } - s -} - -fn escape_byte(byte: u8) -> String { - use std::ascii::escape_default; - - let escaped: Vec = escape_default(byte).collect(); - String::from_utf8_lossy(&escaped).into_owned() -} - -fn cls_char_count(cls: &hir::ClassUnicode) -> usize { - cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::() - as usize -} - -fn cls_byte_count(cls: &hir::ClassBytes) -> usize { - cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::() - as usize -} - -#[cfg(test)] -mod tests { - use std::fmt; - - use super::{escape_bytes, Literal, Literals}; - use crate::hir::Hir; - use crate::ParserBuilder; - - // To make test failures easier to read. - #[derive(Debug, Eq, PartialEq)] - struct Bytes(Vec); - #[derive(Debug, Eq, PartialEq)] - struct Unicode(Vec); - - fn escape_lits(blits: &[Literal]) -> Vec { - let mut ulits = vec![]; - for blit in blits { - ulits - .push(ULiteral { v: escape_bytes(&blit), cut: blit.is_cut() }); - } - ulits - } - - fn create_lits>(it: I) -> Literals { - Literals { - lits: it.into_iter().collect(), - limit_size: 0, - limit_class: 0, - } - } - - // Needs to be pub for 1.3? - #[derive(Clone, Eq, PartialEq)] - pub struct ULiteral { - v: String, - cut: bool, - } - - impl ULiteral { - fn is_cut(&self) -> bool { - self.cut - } - } - - impl fmt::Debug for ULiteral { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_cut() { - write!(f, "Cut({})", self.v) - } else { - write!(f, "Complete({})", self.v) - } - } - } - - impl PartialEq for ULiteral { - fn eq(&self, other: &Literal) -> bool { - self.v.as_bytes() == &*other.v && self.is_cut() == other.is_cut() - } - } - - impl PartialEq for Literal { - fn eq(&self, other: &ULiteral) -> bool { - &*self.v == other.v.as_bytes() && self.is_cut() == other.is_cut() - } - } - - #[allow(non_snake_case)] - fn C(s: &'static str) -> ULiteral { - ULiteral { v: s.to_owned(), cut: true } - } - #[allow(non_snake_case)] - fn M(s: &'static str) -> ULiteral { - ULiteral { v: s.to_owned(), cut: false } - } - - fn prefixes(lits: &mut Literals, expr: &Hir) { - lits.union_prefixes(expr); - } - - fn suffixes(lits: &mut Literals, expr: &Hir) { - lits.union_suffixes(expr); - } - - macro_rules! assert_lit_eq { - ($which:ident, $got_lits:expr, $($expected_lit:expr),*) => {{ - let expected: Vec = vec![$($expected_lit),*]; - let lits = $got_lits; - assert_eq!( - $which(expected.clone()), - $which(escape_lits(lits.literals()))); - assert_eq!( - !expected.is_empty() && expected.iter().all(|l| !l.is_cut()), - lits.all_complete()); - assert_eq!( - expected.iter().any(|l| !l.is_cut()), - lits.any_complete()); - }}; - } - - macro_rules! test_lit { - ($name:ident, $which:ident, $re:expr) => { - test_lit!($name, $which, $re,); - }; - ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => { - #[test] - fn $name() { - let expr = ParserBuilder::new() - .build() - .parse($re) - .unwrap(); - let lits = Literals::$which(&expr); - assert_lit_eq!(Unicode, lits, $($lit),*); - - let expr = ParserBuilder::new() - .allow_invalid_utf8(true) - .unicode(false) - .build() - .parse($re) - .unwrap(); - let lits = Literals::$which(&expr); - assert_lit_eq!(Bytes, lits, $($lit),*); - } - }; - } - - // ************************************************************************ - // Tests for prefix literal extraction. - // ************************************************************************ - - // Elementary tests. - test_lit!(pfx_one_lit1, prefixes, "a", M("a")); - test_lit!(pfx_one_lit2, prefixes, "abc", M("abc")); - test_lit!(pfx_one_lit3, prefixes, "(?u)☃", M("\\xe2\\x98\\x83")); - #[cfg(feature = "unicode-case")] - test_lit!(pfx_one_lit4, prefixes, "(?ui)☃", M("\\xe2\\x98\\x83")); - test_lit!(pfx_class1, prefixes, "[1-4]", M("1"), M("2"), M("3"), M("4")); - test_lit!( - pfx_class2, - prefixes, - "(?u)[☃Ⅰ]", - M("\\xe2\\x85\\xa0"), - M("\\xe2\\x98\\x83") - ); - #[cfg(feature = "unicode-case")] - test_lit!( - pfx_class3, - prefixes, - "(?ui)[☃Ⅰ]", - M("\\xe2\\x85\\xa0"), - M("\\xe2\\x85\\xb0"), - M("\\xe2\\x98\\x83") - ); - test_lit!(pfx_one_lit_casei1, prefixes, "(?i-u)a", M("A"), M("a")); - test_lit!( - pfx_one_lit_casei2, - prefixes, - "(?i-u)abc", - M("ABC"), - M("aBC"), - M("AbC"), - M("abC"), - M("ABc"), - M("aBc"), - M("Abc"), - M("abc") - ); - test_lit!(pfx_group1, prefixes, "(a)", M("a")); - test_lit!(pfx_rep_zero_or_one1, prefixes, "a?"); - test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?"); - test_lit!(pfx_rep_zero_or_one_cat1, prefixes, "ab?", C("ab"), M("a")); - // FIXME: This should return [M("a"), M("ab")] because of the non-greedy - // repetition. As a work-around, we rewrite ab?? as ab*?, and thus we get - // a cut literal. - test_lit!(pfx_rep_zero_or_one_cat2, prefixes, "ab??", C("ab"), M("a")); - test_lit!(pfx_rep_zero_or_more1, prefixes, "a*"); - test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*"); - test_lit!(pfx_rep_one_or_more1, prefixes, "a+", C("a")); - test_lit!(pfx_rep_one_or_more2, prefixes, "(?:abc)+", C("abc")); - test_lit!(pfx_rep_nested_one_or_more, prefixes, "(?:a+)+", C("a")); - test_lit!(pfx_rep_range1, prefixes, "a{0}"); - test_lit!(pfx_rep_range2, prefixes, "a{0,}"); - test_lit!(pfx_rep_range3, prefixes, "a{0,1}"); - test_lit!(pfx_rep_range4, prefixes, "a{1}", M("a")); - test_lit!(pfx_rep_range5, prefixes, "a{2}", M("aa")); - test_lit!(pfx_rep_range6, prefixes, "a{1,2}", C("a")); - test_lit!(pfx_rep_range7, prefixes, "a{2,3}", C("aa")); - - // Test regexes with concatenations. - test_lit!(pfx_cat1, prefixes, "(?:a)(?:b)", M("ab")); - test_lit!(pfx_cat2, prefixes, "[ab]z", M("az"), M("bz")); - test_lit!( - pfx_cat3, - prefixes, - "(?i-u)[ab]z", - M("AZ"), - M("BZ"), - M("aZ"), - M("bZ"), - M("Az"), - M("Bz"), - M("az"), - M("bz") - ); - test_lit!( - pfx_cat4, - prefixes, - "[ab][yz]", - M("ay"), - M("by"), - M("az"), - M("bz") - ); - test_lit!(pfx_cat5, prefixes, "a*b", C("a"), M("b")); - test_lit!(pfx_cat6, prefixes, "a*b*c", C("a"), C("b"), M("c")); - test_lit!(pfx_cat7, prefixes, "a*b*c+", C("a"), C("b"), C("c")); - test_lit!(pfx_cat8, prefixes, "a*b+c", C("a"), C("b")); - test_lit!(pfx_cat9, prefixes, "a*b+c*", C("a"), C("b")); - test_lit!(pfx_cat10, prefixes, "ab*", C("ab"), M("a")); - test_lit!(pfx_cat11, prefixes, "ab*c", C("ab"), M("ac")); - test_lit!(pfx_cat12, prefixes, "ab+", C("ab")); - test_lit!(pfx_cat13, prefixes, "ab+c", C("ab")); - test_lit!(pfx_cat14, prefixes, "a^", C("a")); - test_lit!(pfx_cat15, prefixes, "$a"); - test_lit!(pfx_cat16, prefixes, r"ab*c", C("ab"), M("ac")); - test_lit!(pfx_cat17, prefixes, r"ab+c", C("ab")); - test_lit!(pfx_cat18, prefixes, r"z*azb", C("z"), M("azb")); - test_lit!(pfx_cat19, prefixes, "a.z", C("a")); - - // Test regexes with alternations. - test_lit!(pfx_alt1, prefixes, "a|b", M("a"), M("b")); - test_lit!(pfx_alt2, prefixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b")); - test_lit!(pfx_alt3, prefixes, "y(?:a|b)z", M("yaz"), M("ybz")); - test_lit!(pfx_alt4, prefixes, "a|b*"); - test_lit!(pfx_alt5, prefixes, "a|b+", M("a"), C("b")); - test_lit!(pfx_alt6, prefixes, "a|(?:b|c*)"); - test_lit!( - pfx_alt7, - prefixes, - "(a|b)*c|(a|ab)*c", - C("a"), - C("b"), - M("c"), - C("a"), - C("ab"), - M("c") - ); - test_lit!(pfx_alt8, prefixes, "a*b|c", C("a"), M("b"), M("c")); - - // Test regexes with empty assertions. - test_lit!(pfx_empty1, prefixes, "^a", M("a")); - test_lit!(pfx_empty2, prefixes, "a${2}", C("a")); - test_lit!(pfx_empty3, prefixes, "^abc", M("abc")); - test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z")); - - // Make sure some curious regexes have no prefixes. - test_lit!(pfx_nothing1, prefixes, "."); - test_lit!(pfx_nothing2, prefixes, "(?s)."); - test_lit!(pfx_nothing3, prefixes, "^"); - test_lit!(pfx_nothing4, prefixes, "$"); - test_lit!(pfx_nothing6, prefixes, "(?m)$"); - test_lit!(pfx_nothing7, prefixes, r"\b"); - test_lit!(pfx_nothing8, prefixes, r"\B"); - - // Test a few regexes that defeat any prefix literal detection. - test_lit!(pfx_defeated1, prefixes, ".a"); - test_lit!(pfx_defeated2, prefixes, "(?s).a"); - test_lit!(pfx_defeated3, prefixes, "a*b*c*"); - test_lit!(pfx_defeated4, prefixes, "a|."); - test_lit!(pfx_defeated5, prefixes, ".|a"); - test_lit!(pfx_defeated6, prefixes, "a|^"); - test_lit!(pfx_defeated7, prefixes, ".(?:a(?:b)(?:c))"); - test_lit!(pfx_defeated8, prefixes, "$a"); - test_lit!(pfx_defeated9, prefixes, "(?m)$a"); - test_lit!(pfx_defeated10, prefixes, r"\ba"); - test_lit!(pfx_defeated11, prefixes, r"\Ba"); - test_lit!(pfx_defeated12, prefixes, "^*a"); - test_lit!(pfx_defeated13, prefixes, "^+a"); - - test_lit!( - pfx_crazy1, - prefixes, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - C("Mo\\'"), - C("Mu\\'"), - C("Moam"), - C("Muam") - ); - - // ************************************************************************ - // Tests for quiting prefix literal search. - // ************************************************************************ - - macro_rules! test_exhausted { - ($name:ident, $which:ident, $re:expr) => { - test_exhausted!($name, $which, $re,); - }; - ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => { - #[test] - fn $name() { - let expr = ParserBuilder::new() - .build() - .parse($re) - .unwrap(); - let mut lits = Literals::empty(); - lits.set_limit_size(20).set_limit_class(10); - $which(&mut lits, &expr); - assert_lit_eq!(Unicode, lits, $($lit),*); - - let expr = ParserBuilder::new() - .allow_invalid_utf8(true) - .unicode(false) - .build() - .parse($re) - .unwrap(); - let mut lits = Literals::empty(); - lits.set_limit_size(20).set_limit_class(10); - $which(&mut lits, &expr); - assert_lit_eq!(Bytes, lits, $($lit),*); - } - }; - } - - // These test use a much lower limit than the default so that we can - // write test cases of reasonable size. - test_exhausted!(pfx_exhausted1, prefixes, "[a-z]"); - test_exhausted!(pfx_exhausted2, prefixes, "[a-z]*A"); - test_exhausted!(pfx_exhausted3, prefixes, "A[a-z]Z", C("A")); - test_exhausted!( - pfx_exhausted4, - prefixes, - "(?i-u)foobar", - C("FO"), - C("fO"), - C("Fo"), - C("fo") - ); - test_exhausted!( - pfx_exhausted5, - prefixes, - "(?:ab){100}", - C("abababababababababab") - ); - test_exhausted!( - pfx_exhausted6, - prefixes, - "(?:(?:ab){100})*cd", - C("ababababab"), - M("cd") - ); - test_exhausted!( - pfx_exhausted7, - prefixes, - "z(?:(?:ab){100})*cd", - C("zababababab"), - M("zcd") - ); - test_exhausted!( - pfx_exhausted8, - prefixes, - "aaaaaaaaaaaaaaaaaaaaz", - C("aaaaaaaaaaaaaaaaaaaa") - ); - - // ************************************************************************ - // Tests for suffix literal extraction. - // ************************************************************************ - - // Elementary tests. - test_lit!(sfx_one_lit1, suffixes, "a", M("a")); - test_lit!(sfx_one_lit2, suffixes, "abc", M("abc")); - test_lit!(sfx_one_lit3, suffixes, "(?u)☃", M("\\xe2\\x98\\x83")); - #[cfg(feature = "unicode-case")] - test_lit!(sfx_one_lit4, suffixes, "(?ui)☃", M("\\xe2\\x98\\x83")); - test_lit!(sfx_class1, suffixes, "[1-4]", M("1"), M("2"), M("3"), M("4")); - test_lit!( - sfx_class2, - suffixes, - "(?u)[☃Ⅰ]", - M("\\xe2\\x85\\xa0"), - M("\\xe2\\x98\\x83") - ); - #[cfg(feature = "unicode-case")] - test_lit!( - sfx_class3, - suffixes, - "(?ui)[☃Ⅰ]", - M("\\xe2\\x85\\xa0"), - M("\\xe2\\x85\\xb0"), - M("\\xe2\\x98\\x83") - ); - test_lit!(sfx_one_lit_casei1, suffixes, "(?i-u)a", M("A"), M("a")); - test_lit!( - sfx_one_lit_casei2, - suffixes, - "(?i-u)abc", - M("ABC"), - M("ABc"), - M("AbC"), - M("Abc"), - M("aBC"), - M("aBc"), - M("abC"), - M("abc") - ); - test_lit!(sfx_group1, suffixes, "(a)", M("a")); - test_lit!(sfx_rep_zero_or_one1, suffixes, "a?"); - test_lit!(sfx_rep_zero_or_one2, suffixes, "(?:abc)?"); - test_lit!(sfx_rep_zero_or_more1, suffixes, "a*"); - test_lit!(sfx_rep_zero_or_more2, suffixes, "(?:abc)*"); - test_lit!(sfx_rep_one_or_more1, suffixes, "a+", C("a")); - test_lit!(sfx_rep_one_or_more2, suffixes, "(?:abc)+", C("abc")); - test_lit!(sfx_rep_nested_one_or_more, suffixes, "(?:a+)+", C("a")); - test_lit!(sfx_rep_range1, suffixes, "a{0}"); - test_lit!(sfx_rep_range2, suffixes, "a{0,}"); - test_lit!(sfx_rep_range3, suffixes, "a{0,1}"); - test_lit!(sfx_rep_range4, suffixes, "a{1}", M("a")); - test_lit!(sfx_rep_range5, suffixes, "a{2}", M("aa")); - test_lit!(sfx_rep_range6, suffixes, "a{1,2}", C("a")); - test_lit!(sfx_rep_range7, suffixes, "a{2,3}", C("aa")); - - // Test regexes with concatenations. - test_lit!(sfx_cat1, suffixes, "(?:a)(?:b)", M("ab")); - test_lit!(sfx_cat2, suffixes, "[ab]z", M("az"), M("bz")); - test_lit!( - sfx_cat3, - suffixes, - "(?i-u)[ab]z", - M("AZ"), - M("Az"), - M("BZ"), - M("Bz"), - M("aZ"), - M("az"), - M("bZ"), - M("bz") - ); - test_lit!( - sfx_cat4, - suffixes, - "[ab][yz]", - M("ay"), - M("az"), - M("by"), - M("bz") - ); - test_lit!(sfx_cat5, suffixes, "a*b", C("ab"), M("b")); - test_lit!(sfx_cat6, suffixes, "a*b*c", C("bc"), C("ac"), M("c")); - test_lit!(sfx_cat7, suffixes, "a*b*c+", C("c")); - test_lit!(sfx_cat8, suffixes, "a*b+c", C("bc")); - test_lit!(sfx_cat9, suffixes, "a*b+c*", C("c"), C("b")); - test_lit!(sfx_cat10, suffixes, "ab*", C("b"), M("a")); - test_lit!(sfx_cat11, suffixes, "ab*c", C("bc"), M("ac")); - test_lit!(sfx_cat12, suffixes, "ab+", C("b")); - test_lit!(sfx_cat13, suffixes, "ab+c", C("bc")); - test_lit!(sfx_cat14, suffixes, "a^"); - test_lit!(sfx_cat15, suffixes, "$a", C("a")); - test_lit!(sfx_cat16, suffixes, r"ab*c", C("bc"), M("ac")); - test_lit!(sfx_cat17, suffixes, r"ab+c", C("bc")); - test_lit!(sfx_cat18, suffixes, r"z*azb", C("zazb"), M("azb")); - test_lit!(sfx_cat19, suffixes, "a.z", C("z")); - - // Test regexes with alternations. - test_lit!(sfx_alt1, suffixes, "a|b", M("a"), M("b")); - test_lit!(sfx_alt2, suffixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b")); - test_lit!(sfx_alt3, suffixes, "y(?:a|b)z", M("yaz"), M("ybz")); - test_lit!(sfx_alt4, suffixes, "a|b*"); - test_lit!(sfx_alt5, suffixes, "a|b+", M("a"), C("b")); - test_lit!(sfx_alt6, suffixes, "a|(?:b|c*)"); - test_lit!( - sfx_alt7, - suffixes, - "(a|b)*c|(a|ab)*c", - C("ac"), - C("bc"), - M("c"), - C("ac"), - C("abc"), - M("c") - ); - test_lit!(sfx_alt8, suffixes, "a*b|c", C("ab"), M("b"), M("c")); - - // Test regexes with empty assertions. - test_lit!(sfx_empty1, suffixes, "a$", M("a")); - test_lit!(sfx_empty2, suffixes, "${2}a", C("a")); - - // Make sure some curious regexes have no suffixes. - test_lit!(sfx_nothing1, suffixes, "."); - test_lit!(sfx_nothing2, suffixes, "(?s)."); - test_lit!(sfx_nothing3, suffixes, "^"); - test_lit!(sfx_nothing4, suffixes, "$"); - test_lit!(sfx_nothing6, suffixes, "(?m)$"); - test_lit!(sfx_nothing7, suffixes, r"\b"); - test_lit!(sfx_nothing8, suffixes, r"\B"); - - // Test a few regexes that defeat any suffix literal detection. - test_lit!(sfx_defeated1, suffixes, "a."); - test_lit!(sfx_defeated2, suffixes, "(?s)a."); - test_lit!(sfx_defeated3, suffixes, "a*b*c*"); - test_lit!(sfx_defeated4, suffixes, "a|."); - test_lit!(sfx_defeated5, suffixes, ".|a"); - test_lit!(sfx_defeated6, suffixes, "a|^"); - test_lit!(sfx_defeated7, suffixes, "(?:a(?:b)(?:c))."); - test_lit!(sfx_defeated8, suffixes, "a^"); - test_lit!(sfx_defeated9, suffixes, "(?m)a$"); - test_lit!(sfx_defeated10, suffixes, r"a\b"); - test_lit!(sfx_defeated11, suffixes, r"a\B"); - test_lit!(sfx_defeated12, suffixes, "a^*"); - test_lit!(sfx_defeated13, suffixes, "a^+"); - - // These test use a much lower limit than the default so that we can - // write test cases of reasonable size. - test_exhausted!(sfx_exhausted1, suffixes, "[a-z]"); - test_exhausted!(sfx_exhausted2, suffixes, "A[a-z]*"); - test_exhausted!(sfx_exhausted3, suffixes, "A[a-z]Z", C("Z")); - test_exhausted!( - sfx_exhausted4, - suffixes, - "(?i-u)foobar", - C("AR"), - C("Ar"), - C("aR"), - C("ar") - ); - test_exhausted!( - sfx_exhausted5, - suffixes, - "(?:ab){100}", - C("abababababababababab") - ); - test_exhausted!( - sfx_exhausted6, - suffixes, - "cd(?:(?:ab){100})*", - C("ababababab"), - M("cd") - ); - test_exhausted!( - sfx_exhausted7, - suffixes, - "cd(?:(?:ab){100})*z", - C("abababababz"), - M("cdz") - ); - test_exhausted!( - sfx_exhausted8, - suffixes, - "zaaaaaaaaaaaaaaaaaaaa", - C("aaaaaaaaaaaaaaaaaaaa") - ); - - // ************************************************************************ - // Tests for generating unambiguous literal sets. - // ************************************************************************ - - macro_rules! test_unamb { - ($name:ident, $given:expr, $expected:expr) => { - #[test] - fn $name() { - let given: Vec = $given - .into_iter() - .map(|ul| { - let cut = ul.is_cut(); - Literal { v: ul.v.into_bytes(), cut: cut } - }) - .collect(); - let lits = create_lits(given); - let got = lits.unambiguous_prefixes(); - assert_eq!($expected, escape_lits(got.literals())); - } - }; - } - - test_unamb!(unambiguous1, vec![M("z"), M("azb")], vec![C("a"), C("z")]); - test_unamb!( - unambiguous2, - vec![M("zaaaaaa"), M("aa")], - vec![C("aa"), C("z")] - ); - test_unamb!( - unambiguous3, - vec![M("Sherlock"), M("Watson")], - vec![M("Sherlock"), M("Watson")] - ); - test_unamb!(unambiguous4, vec![M("abc"), M("bc")], vec![C("a"), C("bc")]); - test_unamb!(unambiguous5, vec![M("bc"), M("abc")], vec![C("a"), C("bc")]); - test_unamb!(unambiguous6, vec![M("a"), M("aa")], vec![C("a")]); - test_unamb!(unambiguous7, vec![M("aa"), M("a")], vec![C("a")]); - test_unamb!(unambiguous8, vec![M("ab"), M("a")], vec![C("a")]); - test_unamb!( - unambiguous9, - vec![M("ac"), M("bc"), M("c"), M("ac"), M("abc"), M("c")], - vec![C("a"), C("b"), C("c")] - ); - test_unamb!( - unambiguous10, - vec![M("Mo'"), M("Mu'"), M("Mo"), M("Mu")], - vec![C("Mo"), C("Mu")] - ); - test_unamb!( - unambiguous11, - vec![M("zazb"), M("azb")], - vec![C("a"), C("z")] - ); - test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]); - test_unamb!( - unambiguous13, - vec![M("ABCX"), M("CDAX"), M("BCX")], - vec![C("A"), C("BCX"), C("CD")] - ); - test_unamb!( - unambiguous14, - vec![M("IMGX"), M("MVIX"), M("MGX"), M("DSX")], - vec![M("DSX"), C("I"), C("MGX"), C("MV")] - ); - test_unamb!( - unambiguous15, - vec![M("IMG_"), M("MG_"), M("CIMG")], - vec![C("C"), C("I"), C("MG_")] - ); - - // ************************************************************************ - // Tests for suffix trimming. - // ************************************************************************ - macro_rules! test_trim { - ($name:ident, $trim:expr, $given:expr, $expected:expr) => { - #[test] - fn $name() { - let given: Vec = $given - .into_iter() - .map(|ul| { - let cut = ul.is_cut(); - Literal { v: ul.v.into_bytes(), cut: cut } - }) - .collect(); - let lits = create_lits(given); - let got = lits.trim_suffix($trim).unwrap(); - assert_eq!($expected, escape_lits(got.literals())); - } - }; - } - - test_trim!(trim1, 1, vec![M("ab"), M("yz")], vec![C("a"), C("y")]); - test_trim!(trim2, 1, vec![M("abc"), M("abd")], vec![C("ab")]); - test_trim!(trim3, 2, vec![M("abc"), M("abd")], vec![C("a")]); - test_trim!(trim4, 2, vec![M("abc"), M("ghij")], vec![C("a"), C("gh")]); - - // ************************************************************************ - // Tests for longest common prefix. - // ************************************************************************ - - macro_rules! test_lcp { - ($name:ident, $given:expr, $expected:expr) => { - #[test] - fn $name() { - let given: Vec = $given - .into_iter() - .map(|s: &str| Literal { - v: s.to_owned().into_bytes(), - cut: false, - }) - .collect(); - let lits = create_lits(given); - let got = lits.longest_common_prefix(); - assert_eq!($expected, escape_bytes(got)); - } - }; - } - - test_lcp!(lcp1, vec!["a"], "a"); - test_lcp!(lcp2, vec![], ""); - test_lcp!(lcp3, vec!["a", "b"], ""); - test_lcp!(lcp4, vec!["ab", "ab"], "ab"); - test_lcp!(lcp5, vec!["ab", "a"], "a"); - test_lcp!(lcp6, vec!["a", "ab"], "a"); - test_lcp!(lcp7, vec!["ab", "b"], ""); - test_lcp!(lcp8, vec!["b", "ab"], ""); - test_lcp!(lcp9, vec!["foobar", "foobaz"], "fooba"); - test_lcp!(lcp10, vec!["foobar", "foobaz", "a"], ""); - test_lcp!(lcp11, vec!["a", "foobar", "foobaz"], ""); - test_lcp!(lcp12, vec!["foo", "flub", "flab", "floo"], "f"); - - // ************************************************************************ - // Tests for longest common suffix. - // ************************************************************************ - - macro_rules! test_lcs { - ($name:ident, $given:expr, $expected:expr) => { - #[test] - fn $name() { - let given: Vec = $given - .into_iter() - .map(|s: &str| Literal { - v: s.to_owned().into_bytes(), - cut: false, - }) - .collect(); - let lits = create_lits(given); - let got = lits.longest_common_suffix(); - assert_eq!($expected, escape_bytes(got)); - } - }; - } - - test_lcs!(lcs1, vec!["a"], "a"); - test_lcs!(lcs2, vec![], ""); - test_lcs!(lcs3, vec!["a", "b"], ""); - test_lcs!(lcs4, vec!["ab", "ab"], "ab"); - test_lcs!(lcs5, vec!["ab", "a"], ""); - test_lcs!(lcs6, vec!["a", "ab"], ""); - test_lcs!(lcs7, vec!["ab", "b"], "b"); - test_lcs!(lcs8, vec!["b", "ab"], "b"); - test_lcs!(lcs9, vec!["barfoo", "bazfoo"], "foo"); - test_lcs!(lcs10, vec!["barfoo", "bazfoo", "a"], ""); - test_lcs!(lcs11, vec!["a", "barfoo", "bazfoo"], ""); - test_lcs!(lcs12, vec!["flub", "bub", "boob", "dub"], "b"); -} diff --git a/vendor/regex-syntax/src/hir/mod.rs b/vendor/regex-syntax/src/hir/mod.rs index 156bcc2..ce38ead 100644 --- a/vendor/regex-syntax/src/hir/mod.rs +++ b/vendor/regex-syntax/src/hir/mod.rs @@ -1,19 +1,42 @@ /*! -Defines a high-level intermediate representation for regular expressions. +Defines a high-level intermediate (HIR) representation for regular expressions. + +The HIR is represented by the [`Hir`] type, and it principally constructed via +[translation](translate) from an [`Ast`](crate::ast::Ast). Alternatively, users +may use the smart constructors defined on `Hir` to build their own by hand. The +smart constructors simultaneously simplify and "optimize" the HIR, and are also +the same routines used by translation. + +Most regex engines only have an HIR like this, and usually construct it +directly from the concrete syntax. This crate however first parses the +concrete syntax into an `Ast`, and only then creates the HIR from the `Ast`, +as mentioned above. It's done this way to facilitate better error reporting, +and to have a structured representation of a regex that faithfully represents +its concrete syntax. Namely, while an `Hir` value can be converted back to an +equivalent regex pattern string, it is unlikely to look like the original due +to its simplified structure. */ -use std::char; -use std::cmp; -use std::error; -use std::fmt; -use std::result; -use std::u8; -use crate::ast::Span; -use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter}; -use crate::unicode; +use core::{char, cmp}; -pub use crate::hir::visitor::{visit, Visitor}; -pub use crate::unicode::CaseFoldError; +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ + ast::Span, + hir::interval::{Interval, IntervalSet, IntervalSetIter}, + unicode, +}; + +pub use crate::{ + hir::visitor::{visit, Visitor}, + unicode::CaseFoldError, +}; mod interval; pub mod literal; @@ -53,14 +76,21 @@ impl Error { } /// The type of an error that occurred while building an `Hir`. +/// +/// This error type is marked as `non_exhaustive`. This means that adding a +/// new variant is not considered a breaking change. +#[non_exhaustive] #[derive(Clone, Debug, Eq, PartialEq)] pub enum ErrorKind { /// This error occurs when a Unicode feature is used when Unicode /// support is disabled. For example `(?-u:\pL)` would trigger this error. UnicodeNotAllowed, /// This error occurs when translating a pattern that could match a byte - /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled. + /// sequence that isn't UTF-8 and `utf8` was enabled. InvalidUtf8, + /// This error occurs when one uses a non-ASCII byte for a line terminator, + /// but where Unicode mode is enabled and UTF-8 mode is disabled. + InvalidLineTerminator, /// This occurs when an unrecognized Unicode property name could not /// be found. UnicodePropertyNotFound, @@ -75,29 +105,25 @@ pub enum ErrorKind { /// available, and the regular expression required Unicode aware case /// insensitivity. UnicodeCaseUnavailable, - /// This occurs when the translator attempts to construct a character class - /// that is empty. - /// - /// Note that this restriction in the translator may be removed in the - /// future. - EmptyClassNotAllowed, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, } -impl ErrorKind { - // TODO: Remove this method entirely on the next breaking semver release. - #[allow(deprecated)] - fn description(&self) -> &str { +#[cfg(feature = "std")] +impl std::error::Error for Error {} + +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + crate::error::Formatter::from(self).fmt(f) + } +} + +impl core::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use self::ErrorKind::*; - match *self { + + let msg = match *self { UnicodeNotAllowed => "Unicode not allowed here", InvalidUtf8 => "pattern can match invalid UTF-8", + InvalidLineTerminator => "invalid line terminator, must be ASCII", UnicodePropertyNotFound => "Unicode property not found", UnicodePropertyValueNotFound => "Unicode property value not found", UnicodePerlClassNotFound => { @@ -108,112 +134,82 @@ impl ErrorKind { "Unicode-aware case insensitivity matching is not available \ (make sure the unicode-case feature is enabled)" } - EmptyClassNotAllowed => "empty character classes are not allowed", - __Nonexhaustive => unreachable!(), - } - } -} - -impl error::Error for Error { - // TODO: Remove this method entirely on the next breaking semver release. - #[allow(deprecated)] - fn description(&self) -> &str { - self.kind.description() - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - crate::error::Formatter::from(self).fmt(f) - } -} - -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // TODO: Remove this on the next breaking semver release. - #[allow(deprecated)] - f.write_str(self.description()) + }; + f.write_str(msg) } } /// A high-level intermediate representation (HIR) for a regular expression. /// -/// The HIR of a regular expression represents an intermediate step between its -/// abstract syntax (a structured description of the concrete syntax) and -/// compiled byte codes. The purpose of HIR is to make regular expressions +/// An HIR value is a combination of a [`HirKind`] and a set of [`Properties`]. +/// An `HirKind` indicates what kind of regular expression it is (a literal, +/// a repetition, a look-around assertion, etc.), where as a `Properties` +/// describes various facts about the regular expression. For example, whether +/// it matches UTF-8 or if it matches the empty string. +/// +/// The HIR of a regular expression represents an intermediate step between +/// its abstract syntax (a structured description of the concrete syntax) and +/// an actual regex matcher. The purpose of HIR is to make regular expressions /// easier to analyze. In particular, the AST is much more complex than the /// HIR. For example, while an AST supports arbitrarily nested character /// classes, the HIR will flatten all nested classes into a single set. The HIR /// will also "compile away" every flag present in the concrete syntax. For /// example, users of HIR expressions never need to worry about case folding; -/// it is handled automatically by the translator (e.g., by translating `(?i)A` -/// to `[aA]`). -/// -/// If the HIR was produced by a translator that disallows invalid UTF-8, then -/// the HIR is guaranteed to match UTF-8 exclusively. -/// -/// This type defines its own destructor that uses constant stack space and -/// heap space proportional to the size of the HIR. +/// it is handled automatically by the translator (e.g., by translating +/// `(?i:A)` to `[aA]`). /// /// The specific type of an HIR expression can be accessed via its `kind` /// or `into_kind` methods. This extra level of indirection exists for two /// reasons: /// -/// 1. Construction of an HIR expression *must* use the constructor methods -/// on this `Hir` type instead of building the `HirKind` values directly. -/// This permits construction to enforce invariants like "concatenations -/// always consist of two or more sub-expressions." +/// 1. Construction of an HIR expression *must* use the constructor methods on +/// this `Hir` type instead of building the `HirKind` values directly. This +/// permits construction to enforce invariants like "concatenations always +/// consist of two or more sub-expressions." /// 2. Every HIR expression contains attributes that are defined inductively, -/// and can be computed cheaply during the construction process. For -/// example, one such attribute is whether the expression must match at the -/// beginning of the text. +/// and can be computed cheaply during the construction process. For example, +/// one such attribute is whether the expression must match at the beginning of +/// the haystack. +/// +/// In particular, if you have an `HirKind` value, then there is intentionally +/// no way to build an `Hir` value from it. You instead need to do case +/// analysis on the `HirKind` value and build the `Hir` value using its smart +/// constructors. +/// +/// # UTF-8 +/// +/// If the HIR was produced by a translator with +/// [`TranslatorBuilder::utf8`](translate::TranslatorBuilder::utf8) enabled, +/// then the HIR is guaranteed to match UTF-8 exclusively for all non-empty +/// matches. +/// +/// For empty matches, those can occur at any position. It is the +/// responsibility of the regex engine to determine whether empty matches are +/// permitted between the code units of a single codepoint. +/// +/// # Stack space +/// +/// This type defines its own destructor that uses constant stack space and +/// heap space proportional to the size of the HIR. /// /// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular /// expression pattern string, and uses constant stack space and heap space -/// proportional to the size of the `Hir`. -#[derive(Clone, Debug, Eq, PartialEq)] +/// proportional to the size of the `Hir`. The regex it prints is guaranteed to +/// be _semantically_ equivalent to the original concrete syntax, but it may +/// look very different. (And potentially not practically readable by a human.) +/// +/// An `Hir`'s `fmt::Debug` implementation currently does not use constant +/// stack space. The implementation will also suppress some details (such as +/// the `Properties` inlined into every `Hir` value to make it less noisy). +#[derive(Clone, Eq, PartialEq)] pub struct Hir { /// The underlying HIR kind. kind: HirKind, /// Analysis info about this HIR, computed during construction. - info: HirInfo, -} - -/// The kind of an arbitrary `Hir` expression. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum HirKind { - /// The empty regular expression, which matches everything, including the - /// empty string. - Empty, - /// A single literal character that matches exactly this character. - Literal(Literal), - /// A single character class that matches any of the characters in the - /// class. A class can either consist of Unicode scalar values as - /// characters, or it can use bytes. - Class(Class), - /// An anchor assertion. An anchor assertion match always has zero length. - Anchor(Anchor), - /// A word boundary assertion, which may or may not be Unicode aware. A - /// word boundary assertion match always has zero length. - WordBoundary(WordBoundary), - /// A repetition operation applied to a child expression. - Repetition(Repetition), - /// A possibly capturing group, which contains a child expression. - Group(Group), - /// A concatenation of expressions. A concatenation always has at least two - /// child expressions. - /// - /// A concatenation matches only if each of its child expression matches - /// one after the other. - Concat(Vec), - /// An alternation of expressions. An alternation always has at least two - /// child expressions. - /// - /// An alternation matches only if at least one of its child expression - /// matches. If multiple expressions match, then the leftmost is preferred. - Alternation(Vec), + props: Properties, } +/// Methods for accessing the underlying `HirKind` and `Properties`. impl Hir { /// Returns a reference to the underlying HIR kind. pub fn kind(&self) -> &HirKind { @@ -223,563 +219,602 @@ impl Hir { /// Consumes ownership of this HIR expression and returns its underlying /// `HirKind`. pub fn into_kind(mut self) -> HirKind { - use std::mem; - mem::replace(&mut self.kind, HirKind::Empty) + core::mem::replace(&mut self.kind, HirKind::Empty) + } + + /// Returns the properties computed for this `Hir`. + pub fn properties(&self) -> &Properties { + &self.props } + /// Splits this HIR into its constituent parts. + /// + /// This is useful because `let Hir { kind, props } = hir;` does not work + /// because of `Hir`'s custom `Drop` implementation. + fn into_parts(mut self) -> (HirKind, Properties) { + ( + core::mem::replace(&mut self.kind, HirKind::Empty), + core::mem::replace(&mut self.props, Properties::empty()), + ) + } +} + +/// Smart constructors for HIR values. +/// +/// These constructors are called "smart" because they do inductive work or +/// simplifications. For example, calling `Hir::repetition` with a repetition +/// like `a{0}` will actually return a `Hir` with a `HirKind::Empty` kind +/// since it is equivalent to an empty regex. Another example is calling +/// `Hir::concat(vec![expr])`. Instead of getting a `HirKind::Concat`, you'll +/// just get back the original `expr` since it's precisely equivalent. +/// +/// Smart constructors enable maintaining invariants about the HIR data type +/// while also simulanteously keeping the representation as simple as possible. +impl Hir { /// Returns an empty HIR expression. /// /// An empty HIR expression always matches, including the empty string. + #[inline] pub fn empty() -> Hir { - let mut info = HirInfo::new(); - info.set_always_utf8(true); - info.set_all_assertions(true); - info.set_anchored_start(false); - info.set_anchored_end(false); - info.set_line_anchored_start(false); - info.set_line_anchored_end(false); - info.set_any_anchored_start(false); - info.set_any_anchored_end(false); - info.set_match_empty(true); - info.set_literal(false); - info.set_alternation_literal(false); - Hir { kind: HirKind::Empty, info } + let props = Properties::empty(); + Hir { kind: HirKind::Empty, props } + } + + /// Returns an HIR expression that can never match anything. That is, + /// the size of the set of strings in the language described by the HIR + /// returned is `0`. + /// + /// This is distinct from [`Hir::empty`] in that the empty string matches + /// the HIR returned by `Hir::empty`. That is, the set of strings in the + /// language describe described by `Hir::empty` is non-empty. + /// + /// Note that currently, the HIR returned uses an empty character class to + /// indicate that nothing can match. An equivalent expression that cannot + /// match is an empty alternation, but all such "fail" expressions are + /// normalized (via smart constructors) to empty character classes. This is + /// because empty character classes can be spelled in the concrete syntax + /// of a regex (e.g., `\P{any}` or `(?-u:[^\x00-\xFF])` or `[a&&b]`), but + /// empty alternations cannot. + #[inline] + pub fn fail() -> Hir { + let class = Class::Bytes(ClassBytes::empty()); + let props = Properties::class(&class); + // We can't just call Hir::class here because it defers to Hir::fail + // in order to canonicalize the Hir value used to represent "cannot + // match." + Hir { kind: HirKind::Class(class), props } } /// Creates a literal HIR expression. /// - /// If the given literal has a `Byte` variant with an ASCII byte, then this - /// method panics. This enforces the invariant that `Byte` variants are - /// only used to express matching of invalid UTF-8. - pub fn literal(lit: Literal) -> Hir { - if let Literal::Byte(b) = lit { - assert!(b > 0x7F); + /// This accepts anything that can be converted into a `Box<[u8]>`. + /// + /// Note that there is no mechanism for storing a `char` or a `Box` + /// in an HIR. Everything is "just bytes." Whether a `Literal` (or + /// any HIR node) matches valid UTF-8 exclusively can be queried via + /// [`Properties::is_utf8`]. + /// + /// # Example + /// + /// This example shows that concatenations of `Literal` HIR values will + /// automatically get flattened and combined together. So for example, even + /// if you concat multiple `Literal` values that are themselves not valid + /// UTF-8, they might add up to valid UTF-8. This also demonstrates just + /// how "smart" Hir's smart constructors are. + /// + /// ``` + /// use regex_syntax::hir::{Hir, HirKind, Literal}; + /// + /// let literals = vec![ + /// Hir::literal([0xE2]), + /// Hir::literal([0x98]), + /// Hir::literal([0x83]), + /// ]; + /// // Each literal, on its own, is invalid UTF-8. + /// assert!(literals.iter().all(|hir| !hir.properties().is_utf8())); + /// + /// let concat = Hir::concat(literals); + /// // But the concatenation is valid UTF-8! + /// assert!(concat.properties().is_utf8()); + /// + /// // And also notice that the literals have been concatenated into a + /// // single `Literal`, to the point where there is no explicit `Concat`! + /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes()))); + /// assert_eq!(&expected, concat.kind()); + /// ``` + #[inline] + pub fn literal>>(lit: B) -> Hir { + let bytes = lit.into(); + if bytes.is_empty() { + return Hir::empty(); } - let mut info = HirInfo::new(); - info.set_always_utf8(lit.is_unicode()); - info.set_all_assertions(false); - info.set_anchored_start(false); - info.set_anchored_end(false); - info.set_line_anchored_start(false); - info.set_line_anchored_end(false); - info.set_any_anchored_start(false); - info.set_any_anchored_end(false); - info.set_match_empty(false); - info.set_literal(true); - info.set_alternation_literal(true); - Hir { kind: HirKind::Literal(lit), info } - } - - /// Creates a class HIR expression. + let lit = Literal(bytes); + let props = Properties::literal(&lit); + Hir { kind: HirKind::Literal(lit), props } + } + + /// Creates a class HIR expression. The class may either be defined over + /// ranges of Unicode codepoints or ranges of raw byte values. + /// + /// Note that an empty class is permitted. An empty class is equivalent to + /// `Hir::fail()`. + #[inline] pub fn class(class: Class) -> Hir { - let mut info = HirInfo::new(); - info.set_always_utf8(class.is_always_utf8()); - info.set_all_assertions(false); - info.set_anchored_start(false); - info.set_anchored_end(false); - info.set_line_anchored_start(false); - info.set_line_anchored_end(false); - info.set_any_anchored_start(false); - info.set_any_anchored_end(false); - info.set_match_empty(false); - info.set_literal(false); - info.set_alternation_literal(false); - Hir { kind: HirKind::Class(class), info } - } - - /// Creates an anchor assertion HIR expression. - pub fn anchor(anchor: Anchor) -> Hir { - let mut info = HirInfo::new(); - info.set_always_utf8(true); - info.set_all_assertions(true); - info.set_anchored_start(false); - info.set_anchored_end(false); - info.set_line_anchored_start(false); - info.set_line_anchored_end(false); - info.set_any_anchored_start(false); - info.set_any_anchored_end(false); - info.set_match_empty(true); - info.set_literal(false); - info.set_alternation_literal(false); - if let Anchor::StartText = anchor { - info.set_anchored_start(true); - info.set_line_anchored_start(true); - info.set_any_anchored_start(true); - } - if let Anchor::EndText = anchor { - info.set_anchored_end(true); - info.set_line_anchored_end(true); - info.set_any_anchored_end(true); + if class.is_empty() { + return Hir::fail(); + } else if let Some(bytes) = class.literal() { + return Hir::literal(bytes); } - if let Anchor::StartLine = anchor { - info.set_line_anchored_start(true); - } - if let Anchor::EndLine = anchor { - info.set_line_anchored_end(true); + let props = Properties::class(&class); + Hir { kind: HirKind::Class(class), props } + } + + /// Creates a look-around assertion HIR expression. + #[inline] + pub fn look(look: Look) -> Hir { + let props = Properties::look(look); + Hir { kind: HirKind::Look(look), props } + } + + /// Creates a repetition HIR expression. + #[inline] + pub fn repetition(mut rep: Repetition) -> Hir { + // If the sub-expression of a repetition can only match the empty + // string, then we force its maximum to be at most 1. + if rep.sub.properties().maximum_len() == Some(0) { + rep.min = cmp::min(rep.min, 1); + rep.max = rep.max.map(|n| cmp::min(n, 1)).or(Some(1)); } - Hir { kind: HirKind::Anchor(anchor), info } - } - - /// Creates a word boundary assertion HIR expression. - pub fn word_boundary(word_boundary: WordBoundary) -> Hir { - let mut info = HirInfo::new(); - info.set_always_utf8(true); - info.set_all_assertions(true); - info.set_anchored_start(false); - info.set_anchored_end(false); - info.set_line_anchored_start(false); - info.set_line_anchored_end(false); - info.set_any_anchored_start(false); - info.set_any_anchored_end(false); - info.set_literal(false); - info.set_alternation_literal(false); - // A negated word boundary matches '', so that's fine. But \b does not - // match \b, so why do we say it can match the empty string? Well, - // because, if you search for \b against 'a', it will report [0, 0) and - // [1, 1) as matches, and both of those matches correspond to the empty - // string. Thus, only *certain* empty strings match \b, which similarly - // applies to \B. - info.set_match_empty(true); - // Negated ASCII word boundaries can match invalid UTF-8. - if let WordBoundary::AsciiNegate = word_boundary { - info.set_always_utf8(false); + // The regex 'a{0}' is always equivalent to the empty regex. This is + // true even when 'a' is an expression that never matches anything + // (like '\P{any}'). + // + // Additionally, the regex 'a{1}' is always equivalent to 'a'. + if rep.min == 0 && rep.max == Some(0) { + return Hir::empty(); + } else if rep.min == 1 && rep.max == Some(1) { + return *rep.sub; } - Hir { kind: HirKind::WordBoundary(word_boundary), info } + let props = Properties::repetition(&rep); + Hir { kind: HirKind::Repetition(rep), props } } - /// Creates a repetition HIR expression. - pub fn repetition(rep: Repetition) -> Hir { - let mut info = HirInfo::new(); - info.set_always_utf8(rep.hir.is_always_utf8()); - info.set_all_assertions(rep.hir.is_all_assertions()); - // If this operator can match the empty string, then it can never - // be anchored. - info.set_anchored_start( - !rep.is_match_empty() && rep.hir.is_anchored_start(), - ); - info.set_anchored_end( - !rep.is_match_empty() && rep.hir.is_anchored_end(), - ); - info.set_line_anchored_start( - !rep.is_match_empty() && rep.hir.is_anchored_start(), - ); - info.set_line_anchored_end( - !rep.is_match_empty() && rep.hir.is_anchored_end(), - ); - info.set_any_anchored_start(rep.hir.is_any_anchored_start()); - info.set_any_anchored_end(rep.hir.is_any_anchored_end()); - info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty()); - info.set_literal(false); - info.set_alternation_literal(false); - Hir { kind: HirKind::Repetition(rep), info } - } - - /// Creates a group HIR expression. - pub fn group(group: Group) -> Hir { - let mut info = HirInfo::new(); - info.set_always_utf8(group.hir.is_always_utf8()); - info.set_all_assertions(group.hir.is_all_assertions()); - info.set_anchored_start(group.hir.is_anchored_start()); - info.set_anchored_end(group.hir.is_anchored_end()); - info.set_line_anchored_start(group.hir.is_line_anchored_start()); - info.set_line_anchored_end(group.hir.is_line_anchored_end()); - info.set_any_anchored_start(group.hir.is_any_anchored_start()); - info.set_any_anchored_end(group.hir.is_any_anchored_end()); - info.set_match_empty(group.hir.is_match_empty()); - info.set_literal(false); - info.set_alternation_literal(false); - Hir { kind: HirKind::Group(group), info } + /// Creates a capture HIR expression. + /// + /// Note that there is no explicit HIR value for a non-capturing group. + /// Since a non-capturing group only exists to override precedence in the + /// concrete syntax and since an HIR already does its own grouping based on + /// what is parsed, there is no need to explicitly represent non-capturing + /// groups in the HIR. + #[inline] + pub fn capture(capture: Capture) -> Hir { + let props = Properties::capture(&capture); + Hir { kind: HirKind::Capture(capture), props } } /// Returns the concatenation of the given expressions. /// - /// This flattens the concatenation as appropriate. - pub fn concat(mut exprs: Vec) -> Hir { - match exprs.len() { - 0 => Hir::empty(), - 1 => exprs.pop().unwrap(), - _ => { - let mut info = HirInfo::new(); - info.set_always_utf8(true); - info.set_all_assertions(true); - info.set_any_anchored_start(false); - info.set_any_anchored_end(false); - info.set_match_empty(true); - info.set_literal(true); - info.set_alternation_literal(true); - - // Some attributes require analyzing all sub-expressions. - for e in &exprs { - let x = info.is_always_utf8() && e.is_always_utf8(); - info.set_always_utf8(x); - - let x = info.is_all_assertions() && e.is_all_assertions(); - info.set_all_assertions(x); - - let x = info.is_any_anchored_start() - || e.is_any_anchored_start(); - info.set_any_anchored_start(x); - - let x = - info.is_any_anchored_end() || e.is_any_anchored_end(); - info.set_any_anchored_end(x); - - let x = info.is_match_empty() && e.is_match_empty(); - info.set_match_empty(x); - - let x = info.is_literal() && e.is_literal(); - info.set_literal(x); - - let x = info.is_alternation_literal() - && e.is_alternation_literal(); - info.set_alternation_literal(x); + /// This attempts to flatten and simplify the concatenation as appropriate. + /// + /// # Example + /// + /// This shows a simple example of basic flattening of both concatenations + /// and literals. + /// + /// ``` + /// use regex_syntax::hir::Hir; + /// + /// let hir = Hir::concat(vec![ + /// Hir::concat(vec![ + /// Hir::literal([b'a']), + /// Hir::literal([b'b']), + /// Hir::literal([b'c']), + /// ]), + /// Hir::concat(vec![ + /// Hir::literal([b'x']), + /// Hir::literal([b'y']), + /// Hir::literal([b'z']), + /// ]), + /// ]); + /// let expected = Hir::literal("abcxyz".as_bytes()); + /// assert_eq!(expected, hir); + /// ``` + pub fn concat(subs: Vec) -> Hir { + // We rebuild the concatenation by simplifying it. Would be nice to do + // it in place, but that seems a little tricky? + let mut new = vec![]; + // This gobbles up any adjacent literals in a concatenation and smushes + // them together. Basically, when we see a literal, we add its bytes + // to 'prior_lit', and whenever we see anything else, we first take + // any bytes in 'prior_lit' and add it to the 'new' concatenation. + let mut prior_lit: Option> = None; + for sub in subs { + let (kind, props) = sub.into_parts(); + match kind { + HirKind::Literal(Literal(bytes)) => { + if let Some(ref mut prior_bytes) = prior_lit { + prior_bytes.extend_from_slice(&bytes); + } else { + prior_lit = Some(bytes.to_vec()); + } + } + // We also flatten concats that are direct children of another + // concat. We only need to do this one level deep since + // Hir::concat is the only way to build concatenations, and so + // flattening happens inductively. + HirKind::Concat(subs2) => { + for sub2 in subs2 { + let (kind2, props2) = sub2.into_parts(); + match kind2 { + HirKind::Literal(Literal(bytes)) => { + if let Some(ref mut prior_bytes) = prior_lit { + prior_bytes.extend_from_slice(&bytes); + } else { + prior_lit = Some(bytes.to_vec()); + } + } + kind2 => { + if let Some(prior_bytes) = prior_lit.take() { + new.push(Hir::literal(prior_bytes)); + } + new.push(Hir { kind: kind2, props: props2 }); + } + } + } + } + // We can just skip empty HIRs. + HirKind::Empty => {} + kind => { + if let Some(prior_bytes) = prior_lit.take() { + new.push(Hir::literal(prior_bytes)); + } + new.push(Hir { kind, props }); } - // Anchored attributes require something slightly more - // sophisticated. Normally, WLOG, to determine whether an - // expression is anchored to the start, we'd only need to check - // the first expression of a concatenation. However, - // expressions like `$\b^` are still anchored to the start, - // but the first expression in the concatenation *isn't* - // anchored to the start. So the "first" expression to look at - // is actually one that is either not an assertion or is - // specifically the StartText assertion. - info.set_anchored_start( - exprs - .iter() - .take_while(|e| { - e.is_anchored_start() || e.is_all_assertions() - }) - .any(|e| e.is_anchored_start()), - ); - // Similarly for the end anchor, but in reverse. - info.set_anchored_end( - exprs - .iter() - .rev() - .take_while(|e| { - e.is_anchored_end() || e.is_all_assertions() - }) - .any(|e| e.is_anchored_end()), - ); - // Repeat the process for line anchors. - info.set_line_anchored_start( - exprs - .iter() - .take_while(|e| { - e.is_line_anchored_start() || e.is_all_assertions() - }) - .any(|e| e.is_line_anchored_start()), - ); - info.set_line_anchored_end( - exprs - .iter() - .rev() - .take_while(|e| { - e.is_line_anchored_end() || e.is_all_assertions() - }) - .any(|e| e.is_line_anchored_end()), - ); - Hir { kind: HirKind::Concat(exprs), info } } } + if let Some(prior_bytes) = prior_lit.take() { + new.push(Hir::literal(prior_bytes)); + } + if new.is_empty() { + return Hir::empty(); + } else if new.len() == 1 { + return new.pop().unwrap(); + } + let props = Properties::concat(&new); + Hir { kind: HirKind::Concat(new), props } } /// Returns the alternation of the given expressions. /// - /// This flattens the alternation as appropriate. - pub fn alternation(mut exprs: Vec) -> Hir { - match exprs.len() { - 0 => Hir::empty(), - 1 => exprs.pop().unwrap(), - _ => { - let mut info = HirInfo::new(); - info.set_always_utf8(true); - info.set_all_assertions(true); - info.set_anchored_start(true); - info.set_anchored_end(true); - info.set_line_anchored_start(true); - info.set_line_anchored_end(true); - info.set_any_anchored_start(false); - info.set_any_anchored_end(false); - info.set_match_empty(false); - info.set_literal(false); - info.set_alternation_literal(true); - - // Some attributes require analyzing all sub-expressions. - for e in &exprs { - let x = info.is_always_utf8() && e.is_always_utf8(); - info.set_always_utf8(x); - - let x = info.is_all_assertions() && e.is_all_assertions(); - info.set_all_assertions(x); - - let x = info.is_anchored_start() && e.is_anchored_start(); - info.set_anchored_start(x); - - let x = info.is_anchored_end() && e.is_anchored_end(); - info.set_anchored_end(x); - - let x = info.is_line_anchored_start() - && e.is_line_anchored_start(); - info.set_line_anchored_start(x); - - let x = info.is_line_anchored_end() - && e.is_line_anchored_end(); - info.set_line_anchored_end(x); - - let x = info.is_any_anchored_start() - || e.is_any_anchored_start(); - info.set_any_anchored_start(x); - - let x = - info.is_any_anchored_end() || e.is_any_anchored_end(); - info.set_any_anchored_end(x); - - let x = info.is_match_empty() || e.is_match_empty(); - info.set_match_empty(x); - - let x = info.is_alternation_literal() && e.is_literal(); - info.set_alternation_literal(x); + /// This flattens and simplifies the alternation as appropriate. This may + /// include factoring out common prefixes or even rewriting the alternation + /// as a character class. + /// + /// Note that an empty alternation is equivalent to `Hir::fail()`. (It + /// is not possible for one to write an empty alternation, or even an + /// alternation with a single sub-expression, in the concrete syntax of a + /// regex.) + /// + /// # Example + /// + /// This is a simple example showing how an alternation might get + /// simplified. + /// + /// ``` + /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange}; + /// + /// let hir = Hir::alternation(vec![ + /// Hir::literal([b'a']), + /// Hir::literal([b'b']), + /// Hir::literal([b'c']), + /// Hir::literal([b'd']), + /// Hir::literal([b'e']), + /// Hir::literal([b'f']), + /// ]); + /// let expected = Hir::class(Class::Unicode(ClassUnicode::new([ + /// ClassUnicodeRange::new('a', 'f'), + /// ]))); + /// assert_eq!(expected, hir); + /// ``` + /// + /// And another example showing how common prefixes might get factored + /// out. + /// + /// ``` + /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange}; + /// + /// let hir = Hir::alternation(vec![ + /// Hir::concat(vec![ + /// Hir::literal("abc".as_bytes()), + /// Hir::class(Class::Unicode(ClassUnicode::new([ + /// ClassUnicodeRange::new('A', 'Z'), + /// ]))), + /// ]), + /// Hir::concat(vec![ + /// Hir::literal("abc".as_bytes()), + /// Hir::class(Class::Unicode(ClassUnicode::new([ + /// ClassUnicodeRange::new('a', 'z'), + /// ]))), + /// ]), + /// ]); + /// let expected = Hir::concat(vec![ + /// Hir::literal("abc".as_bytes()), + /// Hir::alternation(vec![ + /// Hir::class(Class::Unicode(ClassUnicode::new([ + /// ClassUnicodeRange::new('A', 'Z'), + /// ]))), + /// Hir::class(Class::Unicode(ClassUnicode::new([ + /// ClassUnicodeRange::new('a', 'z'), + /// ]))), + /// ]), + /// ]); + /// assert_eq!(expected, hir); + /// ``` + /// + /// Note that these sorts of simplifications are not guaranteed. + pub fn alternation(subs: Vec) -> Hir { + // We rebuild the alternation by simplifying it. We proceed similarly + // as the concatenation case. But in this case, there's no literal + // simplification happening. We're just flattening alternations. + let mut new = Vec::with_capacity(subs.len()); + for sub in subs { + let (kind, props) = sub.into_parts(); + match kind { + HirKind::Alternation(subs2) => { + new.extend(subs2); + } + kind => { + new.push(Hir { kind, props }); } - Hir { kind: HirKind::Alternation(exprs), info } } } - } - - /// Build an HIR expression for `.`. - /// - /// A `.` expression matches any character except for `\n`. To build an - /// expression that matches any character, including `\n`, use the `any` - /// method. - /// - /// If `bytes` is `true`, then this assumes characters are limited to a - /// single byte. - pub fn dot(bytes: bool) -> Hir { - if bytes { - let mut cls = ClassBytes::empty(); - cls.push(ClassBytesRange::new(b'\0', b'\x09')); - cls.push(ClassBytesRange::new(b'\x0B', b'\xFF')); - Hir::class(Class::Bytes(cls)) - } else { - let mut cls = ClassUnicode::empty(); - cls.push(ClassUnicodeRange::new('\0', '\x09')); - cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}')); - Hir::class(Class::Unicode(cls)) + if new.is_empty() { + return Hir::fail(); + } else if new.len() == 1 { + return new.pop().unwrap(); + } + // Now that it's completely flattened, look for the special case of + // 'char1|char2|...|charN' and collapse that into a class. Note that + // we look for 'char' first and then bytes. The issue here is that if + // we find both non-ASCII codepoints and non-ASCII singleton bytes, + // then it isn't actually possible to smush them into a single class. + // (Because classes are either "all codepoints" or "all bytes." You + // can have a class that both matches non-ASCII but valid UTF-8 and + // invalid UTF-8.) So we look for all chars and then all bytes, and + // don't handle anything else. + if let Some(singletons) = singleton_chars(&new) { + let it = singletons + .into_iter() + .map(|ch| ClassUnicodeRange { start: ch, end: ch }); + return Hir::class(Class::Unicode(ClassUnicode::new(it))); + } + if let Some(singletons) = singleton_bytes(&new) { + let it = singletons + .into_iter() + .map(|b| ClassBytesRange { start: b, end: b }); + return Hir::class(Class::Bytes(ClassBytes::new(it))); + } + // Similar to singleton chars, we can also look for alternations of + // classes. Those can be smushed into a single class. + if let Some(cls) = class_chars(&new) { + return Hir::class(cls); } + if let Some(cls) = class_bytes(&new) { + return Hir::class(cls); + } + // Factor out a common prefix if we can, which might potentially + // simplify the expression and unlock other optimizations downstream. + // It also might generally make NFA matching and DFA construction + // faster by reducing the scope of branching in the regex. + new = match lift_common_prefix(new) { + Ok(hir) => return hir, + Err(unchanged) => unchanged, + }; + let props = Properties::alternation(&new); + Hir { kind: HirKind::Alternation(new), props } } - /// Build an HIR expression for `(?s).`. + /// Returns an HIR expression for `.`. /// - /// A `(?s).` expression matches any character, including `\n`. To build an - /// expression that matches any character except for `\n`, then use the - /// `dot` method. + /// * [`Dot::AnyChar`] maps to `(?su-R:.)`. + /// * [`Dot::AnyByte`] maps to `(?s-Ru:.)`. + /// * [`Dot::AnyCharExceptLF`] maps to `(?u-Rs:.)`. + /// * [`Dot::AnyCharExceptCRLF`] maps to `(?Ru-s:.)`. + /// * [`Dot::AnyByteExceptLF`] maps to `(?-Rsu:.)`. + /// * [`Dot::AnyByteExceptCRLF`] maps to `(?R-su:.)`. /// - /// If `bytes` is `true`, then this assumes characters are limited to a - /// single byte. - pub fn any(bytes: bool) -> Hir { - if bytes { - let mut cls = ClassBytes::empty(); - cls.push(ClassBytesRange::new(b'\0', b'\xFF')); - Hir::class(Class::Bytes(cls)) - } else { - let mut cls = ClassUnicode::empty(); - cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}')); - Hir::class(Class::Unicode(cls)) - } - } - - /// Return true if and only if this HIR will always match valid UTF-8. + /// # Example /// - /// When this returns false, then it is possible for this HIR expression - /// to match invalid UTF-8. - pub fn is_always_utf8(&self) -> bool { - self.info.is_always_utf8() - } - - /// Returns true if and only if this entire HIR expression is made up of - /// zero-width assertions. + /// Note that this is a convenience routine for constructing the correct + /// character class based on the value of `Dot`. There is no explicit "dot" + /// HIR value. It is just an abbreviation for a common character class. /// - /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but - /// not `^a`. - pub fn is_all_assertions(&self) -> bool { - self.info.is_all_assertions() - } - - /// Return true if and only if this HIR is required to match from the - /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`, - /// `^foo|^bar` but not `^foo|bar`. - pub fn is_anchored_start(&self) -> bool { - self.info.is_anchored_start() - } - - /// Return true if and only if this HIR is required to match at the end - /// of text. This includes expressions like `foo$`, `(foo|bar)$`, - /// `foo$|bar$` but not `foo$|bar`. - pub fn is_anchored_end(&self) -> bool { - self.info.is_anchored_end() - } - - /// Return true if and only if this HIR is required to match from the - /// beginning of text or the beginning of a line. This includes expressions - /// like `^foo`, `(?m)^foo`, `^(foo|bar)`, `^(foo|bar)`, `(?m)^foo|^bar` - /// but not `^foo|bar` or `(?m)^foo|bar`. + /// ``` + /// use regex_syntax::hir::{Hir, Dot, Class, ClassBytes, ClassBytesRange}; /// - /// Note that if `is_anchored_start` is `true`, then - /// `is_line_anchored_start` will also be `true`. The reverse implication - /// is not true. For example, `(?m)^foo` is line anchored, but not - /// `is_anchored_start`. - pub fn is_line_anchored_start(&self) -> bool { - self.info.is_line_anchored_start() + /// let hir = Hir::dot(Dot::AnyByte); + /// let expected = Hir::class(Class::Bytes(ClassBytes::new([ + /// ClassBytesRange::new(0x00, 0xFF), + /// ]))); + /// assert_eq!(expected, hir); + /// ``` + #[inline] + pub fn dot(dot: Dot) -> Hir { + match dot { + Dot::AnyChar => { + let mut cls = ClassUnicode::empty(); + cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}')); + Hir::class(Class::Unicode(cls)) + } + Dot::AnyByte => { + let mut cls = ClassBytes::empty(); + cls.push(ClassBytesRange::new(b'\0', b'\xFF')); + Hir::class(Class::Bytes(cls)) + } + Dot::AnyCharExcept(ch) => { + let mut cls = + ClassUnicode::new([ClassUnicodeRange::new(ch, ch)]); + cls.negate(); + Hir::class(Class::Unicode(cls)) + } + Dot::AnyCharExceptLF => { + let mut cls = ClassUnicode::empty(); + cls.push(ClassUnicodeRange::new('\0', '\x09')); + cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}')); + Hir::class(Class::Unicode(cls)) + } + Dot::AnyCharExceptCRLF => { + let mut cls = ClassUnicode::empty(); + cls.push(ClassUnicodeRange::new('\0', '\x09')); + cls.push(ClassUnicodeRange::new('\x0B', '\x0C')); + cls.push(ClassUnicodeRange::new('\x0E', '\u{10FFFF}')); + Hir::class(Class::Unicode(cls)) + } + Dot::AnyByteExcept(byte) => { + let mut cls = + ClassBytes::new([ClassBytesRange::new(byte, byte)]); + cls.negate(); + Hir::class(Class::Bytes(cls)) + } + Dot::AnyByteExceptLF => { + let mut cls = ClassBytes::empty(); + cls.push(ClassBytesRange::new(b'\0', b'\x09')); + cls.push(ClassBytesRange::new(b'\x0B', b'\xFF')); + Hir::class(Class::Bytes(cls)) + } + Dot::AnyByteExceptCRLF => { + let mut cls = ClassBytes::empty(); + cls.push(ClassBytesRange::new(b'\0', b'\x09')); + cls.push(ClassBytesRange::new(b'\x0B', b'\x0C')); + cls.push(ClassBytesRange::new(b'\x0E', b'\xFF')); + Hir::class(Class::Bytes(cls)) + } + } } +} - /// Return true if and only if this HIR is required to match at the - /// end of text or the end of a line. This includes expressions like - /// `foo$`, `(?m)foo$`, `(foo|bar)$`, `(?m)(foo|bar)$`, `foo$|bar$`, - /// `(?m)(foo|bar)$`, but not `foo$|bar` or `(?m)foo$|bar`. +/// The underlying kind of an arbitrary [`Hir`] expression. +/// +/// An `HirKind` is principally useful for doing case analysis on the type +/// of a regular expression. If you're looking to build new `Hir` values, +/// then you _must_ use the smart constructors defined on `Hir`, like +/// [`Hir::repetition`], to build new `Hir` values. The API intentionally does +/// not expose any way of building an `Hir` directly from an `HirKind`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum HirKind { + /// The empty regular expression, which matches everything, including the + /// empty string. + Empty, + /// A literalstring that matches exactly these bytes. + Literal(Literal), + /// A single character class that matches any of the characters in the + /// class. A class can either consist of Unicode scalar values as + /// characters, or it can use bytes. /// - /// Note that if `is_anchored_end` is `true`, then - /// `is_line_anchored_end` will also be `true`. The reverse implication - /// is not true. For example, `(?m)foo$` is line anchored, but not - /// `is_anchored_end`. - pub fn is_line_anchored_end(&self) -> bool { - self.info.is_line_anchored_end() - } - - /// Return true if and only if this HIR contains any sub-expression that - /// is required to match at the beginning of text. Specifically, this - /// returns true if the `^` symbol (when multiline mode is disabled) or the - /// `\A` escape appear anywhere in the regex. - pub fn is_any_anchored_start(&self) -> bool { - self.info.is_any_anchored_start() - } - - /// Return true if and only if this HIR contains any sub-expression that is - /// required to match at the end of text. Specifically, this returns true - /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape - /// appear anywhere in the regex. - pub fn is_any_anchored_end(&self) -> bool { - self.info.is_any_anchored_end() - } - - /// Return true if and only if the empty string is part of the language - /// matched by this regular expression. + /// A class may be empty. In which case, it matches nothing. + Class(Class), + /// A look-around assertion. A look-around match always has zero length. + Look(Look), + /// A repetition operation applied to a sub-expression. + Repetition(Repetition), + /// A capturing group, which contains a sub-expression. + Capture(Capture), + /// A concatenation of expressions. /// - /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\b` - /// and `\B`, but not `a` or `a+`. - pub fn is_match_empty(&self) -> bool { - self.info.is_match_empty() - } - - /// Return true if and only if this HIR is a simple literal. This is only - /// true when this HIR expression is either itself a `Literal` or a - /// concatenation of only `Literal`s. + /// A concatenation matches only if each of its sub-expressions match one + /// after the other. /// - /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`, - /// `` are not (even though that contain sub-expressions that are literals). - pub fn is_literal(&self) -> bool { - self.info.is_literal() - } - - /// Return true if and only if this HIR is either a simple literal or an - /// alternation of simple literals. This is only - /// true when this HIR expression is either itself a `Literal` or a - /// concatenation of only `Literal`s or an alternation of only `Literal`s. + /// Concatenations are guaranteed by `Hir`'s smart constructors to always + /// have at least two sub-expressions. + Concat(Vec), + /// An alternation of expressions. /// - /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation - /// literals, but `f+`, `(foo)`, `foo()`, `` - /// are not (even though that contain sub-expressions that are literals). - pub fn is_alternation_literal(&self) -> bool { - self.info.is_alternation_literal() - } + /// An alternation matches only if at least one of its sub-expressions + /// match. If multiple sub-expressions match, then the leftmost is + /// preferred. + /// + /// Alternations are guaranteed by `Hir`'s smart constructors to always + /// have at least two sub-expressions. + Alternation(Vec), } impl HirKind { - /// Return true if and only if this HIR is the empty regular expression. - /// - /// Note that this is not defined inductively. That is, it only tests if - /// this kind is the `Empty` variant. To get the inductive definition, - /// use the `is_match_empty` method on [`Hir`](struct.Hir.html). - pub fn is_empty(&self) -> bool { - match *self { - HirKind::Empty => true, - _ => false, - } - } + /// Returns a slice of this kind's sub-expressions, if any. + pub fn subs(&self) -> &[Hir] { + use core::slice::from_ref; - /// Returns true if and only if this kind has any (including possibly - /// empty) subexpressions. - pub fn has_subexprs(&self) -> bool { match *self { HirKind::Empty | HirKind::Literal(_) | HirKind::Class(_) - | HirKind::Anchor(_) - | HirKind::WordBoundary(_) => false, - HirKind::Group(_) - | HirKind::Repetition(_) - | HirKind::Concat(_) - | HirKind::Alternation(_) => true, + | HirKind::Look(_) => &[], + HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub), + HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub), + HirKind::Concat(ref subs) => subs, + HirKind::Alternation(ref subs) => subs, } } } +impl core::fmt::Debug for Hir { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.kind.fmt(f) + } +} + /// Print a display representation of this Hir. /// /// The result of this is a valid regular expression pattern string. /// /// This implementation uses constant stack space and heap space proportional /// to the size of the `Hir`. -impl fmt::Display for Hir { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use crate::hir::print::Printer; - Printer::new().print(self, f) +impl core::fmt::Display for Hir { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + crate::hir::print::Printer::new().print(self, f) } } /// The high-level intermediate representation of a literal. /// -/// A literal corresponds to a single character, where a character is either -/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters -/// are preferred whenever possible. In particular, a `Byte` variant is only -/// ever produced when it could match invalid UTF-8. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Literal { - /// A single character represented by a Unicode scalar value. - Unicode(char), - /// A single character represented by an arbitrary byte. - Byte(u8), -} +/// A literal corresponds to `0` or more bytes that should be matched +/// literally. The smart constructors defined on `Hir` will automatically +/// concatenate adjacent literals into one literal, and will even automatically +/// replace empty literals with `Hir::empty()`. +/// +/// Note that despite a literal being represented by a sequence of bytes, its +/// `Debug` implementation will attempt to print it as a normal string. (That +/// is, not a sequence of decimal numbers.) +#[derive(Clone, Eq, PartialEq)] +pub struct Literal(pub Box<[u8]>); -impl Literal { - /// Returns true if and only if this literal corresponds to a Unicode - /// scalar value. - pub fn is_unicode(&self) -> bool { - match *self { - Literal::Unicode(_) => true, - Literal::Byte(b) if b <= 0x7F => true, - Literal::Byte(_) => false, - } +impl core::fmt::Debug for Literal { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + crate::debug::Bytes(&self.0).fmt(f) } } /// The high-level intermediate representation of a character class. /// /// A character class corresponds to a set of characters. A character is either -/// defined by a Unicode scalar value or a byte. Unicode characters are used -/// by default, while bytes are used when Unicode mode (via the `u` flag) is -/// disabled. +/// defined by a Unicode scalar value or a byte. /// /// A character class, regardless of its character type, is represented by a /// sequence of non-overlapping non-adjacent ranges of characters. /// -/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may -/// be produced even when it exclusively matches valid UTF-8. This is because -/// a `Bytes` variant represents an intention by the author of the regular -/// expression to disable Unicode mode, which in turn impacts the semantics of -/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not -/// match the same set of strings. -#[derive(Clone, Debug, Eq, PartialEq)] +/// There are no guarantees about which class variant is used. Generally +/// speaking, the Unicode variat is used whenever a class needs to contain +/// non-ASCII Unicode scalar values. But the Unicode variant can be used even +/// when Unicode mode is disabled. For example, at the time of writing, the +/// regex `(?-u:a|\xc2\xa0)` will compile down to HIR for the Unicode class +/// `[a\u00A0]` due to optimizations. +/// +/// Note that `Bytes` variant may be produced even when it exclusively matches +/// valid UTF-8. This is because a `Bytes` variant represents an intention by +/// the author of the regular expression to disable Unicode mode, which in turn +/// impacts the semantics of case insensitive matching. For example, `(?i)k` +/// and `(?i-u)k` will not match the same set of strings. +#[derive(Clone, Eq, PartialEq)] pub enum Class { /// A set of characters represented by Unicode scalar values. Unicode(ClassUnicode), @@ -795,6 +830,15 @@ impl Class { /// /// If this is a byte oriented character class, then this will be limited /// to the ASCII ranges `A-Z` and `a-z`. + /// + /// # Panics + /// + /// This routine panics when the case mapping data necessary for this + /// routine to complete is unavailable. This occurs when the `unicode-case` + /// feature is not enabled and the underlying class is Unicode oriented. + /// + /// Callers should prefer using `try_case_fold_simple` instead, which will + /// return an error instead of panicking. pub fn case_fold_simple(&mut self) { match *self { Class::Unicode(ref mut x) => x.case_fold_simple(), @@ -802,6 +846,29 @@ impl Class { } } + /// Apply Unicode simple case folding to this character class, in place. + /// The character class will be expanded to include all simple case folded + /// character variants. + /// + /// If this is a byte oriented character class, then this will be limited + /// to the ASCII ranges `A-Z` and `a-z`. + /// + /// # Error + /// + /// This routine returns an error when the case mapping data necessary + /// for this routine to complete is unavailable. This occurs when the + /// `unicode-case` feature is not enabled and the underlying class is + /// Unicode oriented. + pub fn try_case_fold_simple( + &mut self, + ) -> core::result::Result<(), CaseFoldError> { + match *self { + Class::Unicode(ref mut x) => x.try_case_fold_simple()?, + Class::Bytes(ref mut x) => x.case_fold_simple(), + } + Ok(()) + } + /// Negate this character class in place. /// /// After completion, this character class will contain precisely the @@ -824,38 +891,177 @@ impl Class { /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete /// syntax or in the parser builder. By default, Unicode mode is /// enabled. - pub fn is_always_utf8(&self) -> bool { + pub fn is_utf8(&self) -> bool { match *self { Class::Unicode(_) => true, - Class::Bytes(ref x) => x.is_all_ascii(), + Class::Bytes(ref x) => x.is_ascii(), } } -} - -/// A set of characters represented by Unicode scalar values. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ClassUnicode { - set: IntervalSet, -} -impl ClassUnicode { - /// Create a new class from a sequence of ranges. + /// Returns the length, in bytes, of the smallest string matched by this + /// character class. /// - /// The given ranges do not need to be in any specific order, and ranges - /// may overlap. - pub fn new(ranges: I) -> ClassUnicode - where - I: IntoIterator, - { - ClassUnicode { set: IntervalSet::new(ranges) } + /// For non-empty byte oriented classes, this always returns `1`. For + /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or + /// `4`. For empty classes, `None` is returned. It is impossible for `0` to + /// be returned. + /// + /// # Example + /// + /// This example shows some examples of regexes and their corresponding + /// minimum length, if any. + /// + /// ``` + /// use regex_syntax::{hir::Properties, parse}; + /// + /// // The empty string has a min length of 0. + /// let hir = parse(r"")?; + /// assert_eq!(Some(0), hir.properties().minimum_len()); + /// // As do other types of regexes that only match the empty string. + /// let hir = parse(r"^$\b\B")?; + /// assert_eq!(Some(0), hir.properties().minimum_len()); + /// // A regex that can match the empty string but match more is still 0. + /// let hir = parse(r"a*")?; + /// assert_eq!(Some(0), hir.properties().minimum_len()); + /// // A regex that matches nothing has no minimum defined. + /// let hir = parse(r"[a&&b]")?; + /// assert_eq!(None, hir.properties().minimum_len()); + /// // Character classes usually have a minimum length of 1. + /// let hir = parse(r"\w")?; + /// assert_eq!(Some(1), hir.properties().minimum_len()); + /// // But sometimes Unicode classes might be bigger! + /// let hir = parse(r"\p{Cyrillic}")?; + /// assert_eq!(Some(2), hir.properties().minimum_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn minimum_len(&self) -> Option { + match *self { + Class::Unicode(ref x) => x.minimum_len(), + Class::Bytes(ref x) => x.minimum_len(), + } } - /// Create a new class with no ranges. - pub fn empty() -> ClassUnicode { - ClassUnicode::new(vec![]) + /// Returns the length, in bytes, of the longest string matched by this + /// character class. + /// + /// For non-empty byte oriented classes, this always returns `1`. For + /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or + /// `4`. For empty classes, `None` is returned. It is impossible for `0` to + /// be returned. + /// + /// # Example + /// + /// This example shows some examples of regexes and their corresponding + /// maximum length, if any. + /// + /// ``` + /// use regex_syntax::{hir::Properties, parse}; + /// + /// // The empty string has a max length of 0. + /// let hir = parse(r"")?; + /// assert_eq!(Some(0), hir.properties().maximum_len()); + /// // As do other types of regexes that only match the empty string. + /// let hir = parse(r"^$\b\B")?; + /// assert_eq!(Some(0), hir.properties().maximum_len()); + /// // A regex that matches nothing has no maximum defined. + /// let hir = parse(r"[a&&b]")?; + /// assert_eq!(None, hir.properties().maximum_len()); + /// // Bounded repeats work as you expect. + /// let hir = parse(r"x{2,10}")?; + /// assert_eq!(Some(10), hir.properties().maximum_len()); + /// // An unbounded repeat means there is no maximum. + /// let hir = parse(r"x{2,}")?; + /// assert_eq!(None, hir.properties().maximum_len()); + /// // With Unicode enabled, \w can match up to 4 bytes! + /// let hir = parse(r"\w")?; + /// assert_eq!(Some(4), hir.properties().maximum_len()); + /// // Without Unicode enabled, \w matches at most 1 byte. + /// let hir = parse(r"(?-u)\w")?; + /// assert_eq!(Some(1), hir.properties().maximum_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn maximum_len(&self) -> Option { + match *self { + Class::Unicode(ref x) => x.maximum_len(), + Class::Bytes(ref x) => x.maximum_len(), + } } - /// Add a new range to this set. + /// Returns true if and only if this character class is empty. That is, + /// it has no elements. + /// + /// An empty character can never match anything, including an empty string. + pub fn is_empty(&self) -> bool { + match *self { + Class::Unicode(ref x) => x.ranges().is_empty(), + Class::Bytes(ref x) => x.ranges().is_empty(), + } + } + + /// If this class consists of exactly one element (whether a codepoint or a + /// byte), then return it as a literal byte string. + /// + /// If this class is empty or contains more than one element, then `None` + /// is returned. + pub fn literal(&self) -> Option> { + match *self { + Class::Unicode(ref x) => x.literal(), + Class::Bytes(ref x) => x.literal(), + } + } +} + +impl core::fmt::Debug for Class { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use crate::debug::Byte; + + let mut fmter = f.debug_set(); + match *self { + Class::Unicode(ref cls) => { + for r in cls.ranges().iter() { + fmter.entry(&(r.start..=r.end)); + } + } + Class::Bytes(ref cls) => { + for r in cls.ranges().iter() { + fmter.entry(&(Byte(r.start)..=Byte(r.end))); + } + } + } + fmter.finish() + } +} + +/// A set of characters represented by Unicode scalar values. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ClassUnicode { + set: IntervalSet, +} + +impl ClassUnicode { + /// Create a new class from a sequence of ranges. + /// + /// The given ranges do not need to be in any specific order, and ranges + /// may overlap. Ranges will automatically be sorted into a canonical + /// non-overlapping order. + pub fn new(ranges: I) -> ClassUnicode + where + I: IntoIterator, + { + ClassUnicode { set: IntervalSet::new(ranges) } + } + + /// Create a new class with no ranges. + /// + /// An empty class matches nothing. That is, it is equivalent to + /// [`Hir::fail`]. + pub fn empty() -> ClassUnicode { + ClassUnicode::new(vec![]) + } + + /// Add a new range to this set. pub fn push(&mut self, range: ClassUnicodeRange) { self.set.push(range); } @@ -903,7 +1109,7 @@ impl ClassUnicode { /// `unicode-case` feature is not enabled. pub fn try_case_fold_simple( &mut self, - ) -> result::Result<(), CaseFoldError> { + ) -> core::result::Result<(), CaseFoldError> { self.set.case_fold_simple() } @@ -946,9 +1152,59 @@ impl ClassUnicode { /// Returns true if and only if this character class will either match /// nothing or only ASCII bytes. Stated differently, this returns false /// if and only if this class contains a non-ASCII codepoint. - pub fn is_all_ascii(&self) -> bool { + pub fn is_ascii(&self) -> bool { self.set.intervals().last().map_or(true, |r| r.end <= '\x7F') } + + /// Returns the length, in bytes, of the smallest string matched by this + /// character class. + /// + /// Returns `None` when the class is empty. + pub fn minimum_len(&self) -> Option { + let first = self.ranges().get(0)?; + // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8(). + Some(first.start.len_utf8()) + } + + /// Returns the length, in bytes, of the longest string matched by this + /// character class. + /// + /// Returns `None` when the class is empty. + pub fn maximum_len(&self) -> Option { + let last = self.ranges().last()?; + // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8(). + Some(last.end.len_utf8()) + } + + /// If this class consists of exactly one codepoint, then return it as + /// a literal byte string. + /// + /// If this class is empty or contains more than one codepoint, then `None` + /// is returned. + pub fn literal(&self) -> Option> { + let rs = self.ranges(); + if rs.len() == 1 && rs[0].start == rs[0].end { + Some(rs[0].start.encode_utf8(&mut [0; 4]).to_string().into_bytes()) + } else { + None + } + } + + /// If this class consists of only ASCII ranges, then return its + /// corresponding and equivalent byte class. + pub fn to_byte_class(&self) -> Option { + if !self.is_ascii() { + return None; + } + Some(ClassBytes::new(self.ranges().iter().map(|r| { + // Since we are guaranteed that our codepoint range is ASCII, the + // 'u8::try_from' calls below are guaranteed to be correct. + ClassBytesRange { + start: u8::try_from(r.start).unwrap(), + end: u8::try_from(r.end).unwrap(), + } + }))) + } } /// An iterator over all ranges in a Unicode character class. @@ -975,18 +1231,18 @@ pub struct ClassUnicodeRange { end: char, } -impl fmt::Debug for ClassUnicodeRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for ClassUnicodeRange { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let start = if !self.start.is_whitespace() && !self.start.is_control() { self.start.to_string() } else { - format!("0x{:X}", self.start as u32) + format!("0x{:X}", u32::from(self.start)) }; let end = if !self.end.is_whitespace() && !self.end.is_control() { self.end.to_string() } else { - format!("0x{:X}", self.end as u32) + format!("0x{:X}", u32::from(self.end)) }; f.debug_struct("ClassUnicodeRange") .field("start", &start) @@ -1023,24 +1279,13 @@ impl Interval for ClassUnicodeRange { &self, ranges: &mut Vec, ) -> Result<(), unicode::CaseFoldError> { - if !unicode::contains_simple_case_mapping(self.start, self.end)? { + let mut folder = unicode::SimpleCaseFolder::new()?; + if !folder.overlaps(self.start, self.end) { return Ok(()); } - let start = self.start as u32; - let end = (self.end as u32).saturating_add(1); - let mut next_simple_cp = None; - for cp in (start..end).filter_map(char::from_u32) { - if next_simple_cp.map_or(false, |next| cp < next) { - continue; - } - let it = match unicode::simple_fold(cp)? { - Ok(it) => it, - Err(next) => { - next_simple_cp = next; - continue; - } - }; - for cp_folded in it { + let (start, end) = (u32::from(self.start), u32::from(self.end)); + for cp in (start..=end).filter_map(char::from_u32) { + for &cp_folded in folder.mapping(cp) { ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded)); } } @@ -1072,10 +1317,23 @@ impl ClassUnicodeRange { pub fn end(&self) -> char { self.end } + + /// Returns the number of codepoints in this range. + pub fn len(&self) -> usize { + let diff = 1 + u32::from(self.end) - u32::from(self.start); + // This is likely to panic in 16-bit targets since a usize can only fit + // 2^16. It's not clear what to do here, other than to return an error + // when building a Unicode class that contains a range whose length + // overflows usize. (Which, to be honest, is probably quite common on + // 16-bit targets. For example, this would imply that '.' and '\p{any}' + // would be impossible to build.) + usize::try_from(diff).expect("char class len fits in usize") + } } -/// A set of characters represented by arbitrary bytes (where one byte -/// corresponds to one character). +/// A set of characters represented by arbitrary bytes. +/// +/// Each byte corresponds to one character. #[derive(Clone, Debug, Eq, PartialEq)] pub struct ClassBytes { set: IntervalSet, @@ -1085,7 +1343,8 @@ impl ClassBytes { /// Create a new class from a sequence of ranges. /// /// The given ranges do not need to be in any specific order, and ranges - /// may overlap. + /// may overlap. Ranges will automatically be sorted into a canonical + /// non-overlapping order. pub fn new(ranges: I) -> ClassBytes where I: IntoIterator, @@ -1094,6 +1353,9 @@ impl ClassBytes { } /// Create a new class with no ranges. + /// + /// An empty class matches nothing. That is, it is equivalent to + /// [`Hir::fail`]. pub fn empty() -> ClassBytes { ClassBytes::new(vec![]) } @@ -1115,410 +1377,1687 @@ impl ClassBytes { self.set.intervals() } - /// Expand this character class such that it contains all case folded - /// characters. For example, if this class consists of the range `a-z`, - /// then applying case folding will result in the class containing both the - /// ranges `a-z` and `A-Z`. - /// - /// Note that this only applies ASCII case folding, which is limited to the - /// characters `a-z` and `A-Z`. - pub fn case_fold_simple(&mut self) { - self.set.case_fold_simple().expect("ASCII case folding never fails"); + /// Expand this character class such that it contains all case folded + /// characters. For example, if this class consists of the range `a-z`, + /// then applying case folding will result in the class containing both the + /// ranges `a-z` and `A-Z`. + /// + /// Note that this only applies ASCII case folding, which is limited to the + /// characters `a-z` and `A-Z`. + pub fn case_fold_simple(&mut self) { + self.set.case_fold_simple().expect("ASCII case folding never fails"); + } + + /// Negate this byte class. + /// + /// For all `b` where `b` is a any byte, if `b` was in this set, then it + /// will not be in this set after negation. + pub fn negate(&mut self) { + self.set.negate(); + } + + /// Union this byte class with the given byte class, in place. + pub fn union(&mut self, other: &ClassBytes) { + self.set.union(&other.set); + } + + /// Intersect this byte class with the given byte class, in place. + pub fn intersect(&mut self, other: &ClassBytes) { + self.set.intersect(&other.set); + } + + /// Subtract the given byte class from this byte class, in place. + pub fn difference(&mut self, other: &ClassBytes) { + self.set.difference(&other.set); + } + + /// Compute the symmetric difference of the given byte classes, in place. + /// + /// This computes the symmetric difference of two byte classes. This + /// removes all elements in this class that are also in the given class, + /// but all adds all elements from the given class that aren't in this + /// class. That is, the class will contain all elements in either class, + /// but will not contain any elements that are in both classes. + pub fn symmetric_difference(&mut self, other: &ClassBytes) { + self.set.symmetric_difference(&other.set); + } + + /// Returns true if and only if this character class will either match + /// nothing or only ASCII bytes. Stated differently, this returns false + /// if and only if this class contains a non-ASCII byte. + pub fn is_ascii(&self) -> bool { + self.set.intervals().last().map_or(true, |r| r.end <= 0x7F) + } + + /// Returns the length, in bytes, of the smallest string matched by this + /// character class. + /// + /// Returns `None` when the class is empty. + pub fn minimum_len(&self) -> Option { + if self.ranges().is_empty() { + None + } else { + Some(1) + } + } + + /// Returns the length, in bytes, of the longest string matched by this + /// character class. + /// + /// Returns `None` when the class is empty. + pub fn maximum_len(&self) -> Option { + if self.ranges().is_empty() { + None + } else { + Some(1) + } + } + + /// If this class consists of exactly one byte, then return it as + /// a literal byte string. + /// + /// If this class is empty or contains more than one byte, then `None` + /// is returned. + pub fn literal(&self) -> Option> { + let rs = self.ranges(); + if rs.len() == 1 && rs[0].start == rs[0].end { + Some(vec![rs[0].start]) + } else { + None + } + } + + /// If this class consists of only ASCII ranges, then return its + /// corresponding and equivalent Unicode class. + pub fn to_unicode_class(&self) -> Option { + if !self.is_ascii() { + return None; + } + Some(ClassUnicode::new(self.ranges().iter().map(|r| { + // Since we are guaranteed that our byte range is ASCII, the + // 'char::from' calls below are correct and will not erroneously + // convert a raw byte value into its corresponding codepoint. + ClassUnicodeRange { + start: char::from(r.start), + end: char::from(r.end), + } + }))) + } +} + +/// An iterator over all ranges in a byte character class. +/// +/// The lifetime `'a` refers to the lifetime of the underlying class. +#[derive(Debug)] +pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>); + +impl<'a> Iterator for ClassBytesIter<'a> { + type Item = &'a ClassBytesRange; + + fn next(&mut self) -> Option<&'a ClassBytesRange> { + self.0.next() + } +} + +/// A single range of characters represented by arbitrary bytes. +/// +/// The range is closed. That is, the start and end of the range are included +/// in the range. +#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)] +pub struct ClassBytesRange { + start: u8, + end: u8, +} + +impl Interval for ClassBytesRange { + type Bound = u8; + + #[inline] + fn lower(&self) -> u8 { + self.start + } + #[inline] + fn upper(&self) -> u8 { + self.end + } + #[inline] + fn set_lower(&mut self, bound: u8) { + self.start = bound; + } + #[inline] + fn set_upper(&mut self, bound: u8) { + self.end = bound; + } + + /// Apply simple case folding to this byte range. Only ASCII case mappings + /// (for a-z) are applied. + /// + /// Additional ranges are appended to the given vector. Canonical ordering + /// is *not* maintained in the given vector. + fn case_fold_simple( + &self, + ranges: &mut Vec, + ) -> Result<(), unicode::CaseFoldError> { + if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) { + let lower = cmp::max(self.start, b'a'); + let upper = cmp::min(self.end, b'z'); + ranges.push(ClassBytesRange::new(lower - 32, upper - 32)); + } + if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) { + let lower = cmp::max(self.start, b'A'); + let upper = cmp::min(self.end, b'Z'); + ranges.push(ClassBytesRange::new(lower + 32, upper + 32)); + } + Ok(()) + } +} + +impl ClassBytesRange { + /// Create a new byte range for a character class. + /// + /// The returned range is always in a canonical form. That is, the range + /// returned always satisfies the invariant that `start <= end`. + pub fn new(start: u8, end: u8) -> ClassBytesRange { + ClassBytesRange::create(start, end) + } + + /// Return the start of this range. + /// + /// The start of a range is always less than or equal to the end of the + /// range. + pub fn start(&self) -> u8 { + self.start + } + + /// Return the end of this range. + /// + /// The end of a range is always greater than or equal to the start of the + /// range. + pub fn end(&self) -> u8 { + self.end + } + + /// Returns the number of bytes in this range. + pub fn len(&self) -> usize { + usize::from(self.end.checked_sub(self.start).unwrap()) + .checked_add(1) + .unwrap() + } +} + +impl core::fmt::Debug for ClassBytesRange { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("ClassBytesRange") + .field("start", &crate::debug::Byte(self.start)) + .field("end", &crate::debug::Byte(self.end)) + .finish() + } +} + +/// The high-level intermediate representation for a look-around assertion. +/// +/// An assertion match is always zero-length. Also called an "empty match." +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Look { + /// Match the beginning of text. Specifically, this matches at the starting + /// position of the input. + Start = 1 << 0, + /// Match the end of text. Specifically, this matches at the ending + /// position of the input. + End = 1 << 1, + /// Match the beginning of a line or the beginning of text. Specifically, + /// this matches at the starting position of the input, or at the position + /// immediately following a `\n` character. + StartLF = 1 << 2, + /// Match the end of a line or the end of text. Specifically, this matches + /// at the end position of the input, or at the position immediately + /// preceding a `\n` character. + EndLF = 1 << 3, + /// Match the beginning of a line or the beginning of text. Specifically, + /// this matches at the starting position of the input, or at the position + /// immediately following either a `\r` or `\n` character, but never after + /// a `\r` when a `\n` follows. + StartCRLF = 1 << 4, + /// Match the end of a line or the end of text. Specifically, this matches + /// at the end position of the input, or at the position immediately + /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r` + /// precedes it. + EndCRLF = 1 << 5, + /// Match an ASCII-only word boundary. That is, this matches a position + /// where the left adjacent character and right adjacent character + /// correspond to a word and non-word or a non-word and word character. + WordAscii = 1 << 6, + /// Match an ASCII-only negation of a word boundary. + WordAsciiNegate = 1 << 7, + /// Match a Unicode-aware word boundary. That is, this matches a position + /// where the left adjacent character and right adjacent character + /// correspond to a word and non-word or a non-word and word character. + WordUnicode = 1 << 8, + /// Match a Unicode-aware negation of a word boundary. + WordUnicodeNegate = 1 << 9, + /// Match the start of an ASCII-only word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartAscii = 1 << 10, + /// Match the end of an ASCII-only word boundary. That is, this matches + /// a position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndAscii = 1 << 11, + /// Match the start of a Unicode word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartUnicode = 1 << 12, + /// Match the end of a Unicode word boundary. That is, this matches a + /// position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndUnicode = 1 << 13, + /// Match the start half of an ASCII-only word boundary. That is, this + /// matches a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfAscii = 1 << 14, + /// Match the end half of an ASCII-only word boundary. That is, this + /// matches a position at either the end of the haystack or where the + /// following character is not a word character. + WordEndHalfAscii = 1 << 15, + /// Match the start half of a Unicode word boundary. That is, this matches + /// a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfUnicode = 1 << 16, + /// Match the end half of a Unicode word boundary. That is, this matches + /// a position at either the end of the haystack or where the following + /// character is not a word character. + WordEndHalfUnicode = 1 << 17, +} + +impl Look { + /// Flip the look-around assertion to its equivalent for reverse searches. + /// For example, `StartLF` gets translated to `EndLF`. + /// + /// Some assertions, such as `WordUnicode`, remain the same since they + /// match the same positions regardless of the direction of the search. + #[inline] + pub const fn reversed(self) -> Look { + match self { + Look::Start => Look::End, + Look::End => Look::Start, + Look::StartLF => Look::EndLF, + Look::EndLF => Look::StartLF, + Look::StartCRLF => Look::EndCRLF, + Look::EndCRLF => Look::StartCRLF, + Look::WordAscii => Look::WordAscii, + Look::WordAsciiNegate => Look::WordAsciiNegate, + Look::WordUnicode => Look::WordUnicode, + Look::WordUnicodeNegate => Look::WordUnicodeNegate, + Look::WordStartAscii => Look::WordEndAscii, + Look::WordEndAscii => Look::WordStartAscii, + Look::WordStartUnicode => Look::WordEndUnicode, + Look::WordEndUnicode => Look::WordStartUnicode, + Look::WordStartHalfAscii => Look::WordEndHalfAscii, + Look::WordEndHalfAscii => Look::WordStartHalfAscii, + Look::WordStartHalfUnicode => Look::WordEndHalfUnicode, + Look::WordEndHalfUnicode => Look::WordStartHalfUnicode, + } + } + + /// Return the underlying representation of this look-around enumeration + /// as an integer. Giving the return value to the [`Look::from_repr`] + /// constructor is guaranteed to return the same look-around variant that + /// one started with within a semver compatible release of this crate. + #[inline] + pub const fn as_repr(self) -> u32 { + // AFAIK, 'as' is the only way to zero-cost convert an int enum to an + // actual int. + self as u32 + } + + /// Given the underlying representation of a `Look` value, return the + /// corresponding `Look` value if the representation is valid. Otherwise + /// `None` is returned. + #[inline] + pub const fn from_repr(repr: u32) -> Option { + match repr { + 0b00_0000_0000_0000_0001 => Some(Look::Start), + 0b00_0000_0000_0000_0010 => Some(Look::End), + 0b00_0000_0000_0000_0100 => Some(Look::StartLF), + 0b00_0000_0000_0000_1000 => Some(Look::EndLF), + 0b00_0000_0000_0001_0000 => Some(Look::StartCRLF), + 0b00_0000_0000_0010_0000 => Some(Look::EndCRLF), + 0b00_0000_0000_0100_0000 => Some(Look::WordAscii), + 0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate), + 0b00_0000_0001_0000_0000 => Some(Look::WordUnicode), + 0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate), + 0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii), + 0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii), + 0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode), + 0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode), + 0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii), + 0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii), + 0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode), + 0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode), + _ => None, + } + } + + /// Returns a convenient single codepoint representation of this + /// look-around assertion. Each assertion is guaranteed to be represented + /// by a distinct character. + /// + /// This is useful for succinctly representing a look-around assertion in + /// human friendly but succinct output intended for a programmer working on + /// regex internals. + #[inline] + pub const fn as_char(self) -> char { + match self { + Look::Start => 'A', + Look::End => 'z', + Look::StartLF => '^', + Look::EndLF => '$', + Look::StartCRLF => 'r', + Look::EndCRLF => 'R', + Look::WordAscii => 'b', + Look::WordAsciiNegate => 'B', + Look::WordUnicode => '𝛃', + Look::WordUnicodeNegate => '𝚩', + Look::WordStartAscii => '<', + Look::WordEndAscii => '>', + Look::WordStartUnicode => '〈', + Look::WordEndUnicode => '〉', + Look::WordStartHalfAscii => '◁', + Look::WordEndHalfAscii => '▷', + Look::WordStartHalfUnicode => '◀', + Look::WordEndHalfUnicode => '▶', + } + } +} + +/// The high-level intermediate representation for a capturing group. +/// +/// A capturing group always has an index and a child expression. It may +/// also have a name associated with it (e.g., `(?P\w)`), but it's not +/// necessary. +/// +/// Note that there is no explicit representation of a non-capturing group +/// in a `Hir`. Instead, non-capturing grouping is handled automatically by +/// the recursive structure of the `Hir` itself. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Capture { + /// The capture index of the capture. + pub index: u32, + /// The name of the capture, if it exists. + pub name: Option>, + /// The expression inside the capturing group, which may be empty. + pub sub: Box, +} + +/// The high-level intermediate representation of a repetition operator. +/// +/// A repetition operator permits the repetition of an arbitrary +/// sub-expression. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Repetition { + /// The minimum range of the repetition. + /// + /// Note that special cases like `?`, `+` and `*` all get translated into + /// the ranges `{0,1}`, `{1,}` and `{0,}`, respectively. + /// + /// When `min` is zero, this expression can match the empty string + /// regardless of what its sub-expression is. + pub min: u32, + /// The maximum range of the repetition. + /// + /// Note that when `max` is `None`, `min` acts as a lower bound but where + /// there is no upper bound. For something like `x{5}` where the min and + /// max are equivalent, `min` will be set to `5` and `max` will be set to + /// `Some(5)`. + pub max: Option, + /// Whether this repetition operator is greedy or not. A greedy operator + /// will match as much as it can. A non-greedy operator will match as + /// little as it can. + /// + /// Typically, operators are greedy by default and are only non-greedy when + /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is + /// not. However, this can be inverted via the `U` "ungreedy" flag. + pub greedy: bool, + /// The expression being repeated. + pub sub: Box, +} + +impl Repetition { + /// Returns a new repetition with the same `min`, `max` and `greedy` + /// values, but with its sub-expression replaced with the one given. + pub fn with(&self, sub: Hir) -> Repetition { + Repetition { + min: self.min, + max: self.max, + greedy: self.greedy, + sub: Box::new(sub), + } + } +} + +/// A type describing the different flavors of `.`. +/// +/// This type is meant to be used with [`Hir::dot`], which is a convenience +/// routine for building HIR values derived from the `.` regex. +#[non_exhaustive] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Dot { + /// Matches the UTF-8 encoding of any Unicode scalar value. + /// + /// This is equivalent to `(?su:.)` and also `\p{any}`. + AnyChar, + /// Matches any byte value. + /// + /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`. + AnyByte, + /// Matches the UTF-8 encoding of any Unicode scalar value except for the + /// `char` given. + /// + /// This is equivalent to using `(?u-s:.)` with the line terminator set + /// to a particular ASCII byte. (Because of peculiarities in the regex + /// engines, a line terminator must be a single byte. It follows that when + /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar + /// value. That is, ti must be ASCII.) + /// + /// (This and `AnyCharExceptLF` both exist because of legacy reasons. + /// `AnyCharExceptLF` will be dropped in the next breaking change release.) + AnyCharExcept(char), + /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`. + /// + /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`. + AnyCharExceptLF, + /// Matches the UTF-8 encoding of any Unicode scalar value except for `\r` + /// and `\n`. + /// + /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`. + AnyCharExceptCRLF, + /// Matches any byte value except for the `u8` given. + /// + /// This is equivalent to using `(?-us:.)` with the line terminator set + /// to a particular ASCII byte. (Because of peculiarities in the regex + /// engines, a line terminator must be a single byte. It follows that when + /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar + /// value. That is, ti must be ASCII.) + /// + /// (This and `AnyByteExceptLF` both exist because of legacy reasons. + /// `AnyByteExceptLF` will be dropped in the next breaking change release.) + AnyByteExcept(u8), + /// Matches any byte value except for `\n`. + /// + /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`. + AnyByteExceptLF, + /// Matches any byte value except for `\r` and `\n`. + /// + /// This is equivalent to `(?R-su:.)` and also `(?-u:[[\x00-\xFF]--\r\n])`. + AnyByteExceptCRLF, +} + +/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack +/// space but heap space proportional to the depth of the total `Hir`. +impl Drop for Hir { + fn drop(&mut self) { + use core::mem; + + match *self.kind() { + HirKind::Empty + | HirKind::Literal(_) + | HirKind::Class(_) + | HirKind::Look(_) => return, + HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return, + HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => { + return + } + HirKind::Concat(ref x) if x.is_empty() => return, + HirKind::Alternation(ref x) if x.is_empty() => return, + _ => {} + } + + let mut stack = vec![mem::replace(self, Hir::empty())]; + while let Some(mut expr) = stack.pop() { + match expr.kind { + HirKind::Empty + | HirKind::Literal(_) + | HirKind::Class(_) + | HirKind::Look(_) => {} + HirKind::Capture(ref mut x) => { + stack.push(mem::replace(&mut x.sub, Hir::empty())); + } + HirKind::Repetition(ref mut x) => { + stack.push(mem::replace(&mut x.sub, Hir::empty())); + } + HirKind::Concat(ref mut x) => { + stack.extend(x.drain(..)); + } + HirKind::Alternation(ref mut x) => { + stack.extend(x.drain(..)); + } + } + } + } +} + +/// A type that collects various properties of an HIR value. +/// +/// Properties are always scalar values and represent meta data that is +/// computed inductively on an HIR value. Properties are defined for all +/// HIR values. +/// +/// All methods on a `Properties` value take constant time and are meant to +/// be cheap to call. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Properties(Box); + +/// The property definition. It is split out so that we can box it, and +/// there by make `Properties` use less stack size. This is kind-of important +/// because every HIR value has a `Properties` attached to it. +/// +/// This does have the unfortunate consequence that creating any HIR value +/// always leads to at least one alloc for properties, but this is generally +/// true anyway (for pretty much all HirKinds except for look-arounds). +#[derive(Clone, Debug, Eq, PartialEq)] +struct PropertiesI { + minimum_len: Option, + maximum_len: Option, + look_set: LookSet, + look_set_prefix: LookSet, + look_set_suffix: LookSet, + look_set_prefix_any: LookSet, + look_set_suffix_any: LookSet, + utf8: bool, + explicit_captures_len: usize, + static_explicit_captures_len: Option, + literal: bool, + alternation_literal: bool, +} + +impl Properties { + /// Returns the length (in bytes) of the smallest string matched by this + /// HIR. + /// + /// A return value of `0` is possible and occurs when the HIR can match an + /// empty string. + /// + /// `None` is returned when there is no minimum length. This occurs in + /// precisely the cases where the HIR matches nothing. i.e., The language + /// the regex matches is empty. An example of such a regex is `\P{any}`. + #[inline] + pub fn minimum_len(&self) -> Option { + self.0.minimum_len + } + + /// Returns the length (in bytes) of the longest string matched by this + /// HIR. + /// + /// A return value of `0` is possible and occurs when nothing longer than + /// the empty string is in the language described by this HIR. + /// + /// `None` is returned when there is no longest matching string. This + /// occurs when the HIR matches nothing or when there is no upper bound on + /// the length of matching strings. Example of such regexes are `\P{any}` + /// (matches nothing) and `a+` (has no upper bound). + #[inline] + pub fn maximum_len(&self) -> Option { + self.0.maximum_len + } + + /// Returns a set of all look-around assertions that appear at least once + /// in this HIR value. + #[inline] + pub fn look_set(&self) -> LookSet { + self.0.look_set + } + + /// Returns a set of all look-around assertions that appear as a prefix for + /// this HIR value. That is, the set returned corresponds to the set of + /// assertions that must be passed before matching any bytes in a haystack. + /// + /// For example, `hir.look_set_prefix().contains(Look::Start)` returns true + /// if and only if the HIR is fully anchored at the start. + #[inline] + pub fn look_set_prefix(&self) -> LookSet { + self.0.look_set_prefix + } + + /// Returns a set of all look-around assertions that appear as a _possible_ + /// prefix for this HIR value. That is, the set returned corresponds to the + /// set of assertions that _may_ be passed before matching any bytes in a + /// haystack. + /// + /// For example, `hir.look_set_prefix_any().contains(Look::Start)` returns + /// true if and only if it's possible for the regex to match through a + /// anchored assertion before consuming any input. + #[inline] + pub fn look_set_prefix_any(&self) -> LookSet { + self.0.look_set_prefix_any + } + + /// Returns a set of all look-around assertions that appear as a suffix for + /// this HIR value. That is, the set returned corresponds to the set of + /// assertions that must be passed in order to be considered a match after + /// all other consuming HIR expressions. + /// + /// For example, `hir.look_set_suffix().contains(Look::End)` returns true + /// if and only if the HIR is fully anchored at the end. + #[inline] + pub fn look_set_suffix(&self) -> LookSet { + self.0.look_set_suffix + } + + /// Returns a set of all look-around assertions that appear as a _possible_ + /// suffix for this HIR value. That is, the set returned corresponds to the + /// set of assertions that _may_ be passed before matching any bytes in a + /// haystack. + /// + /// For example, `hir.look_set_suffix_any().contains(Look::End)` returns + /// true if and only if it's possible for the regex to match through a + /// anchored assertion at the end of a match without consuming any input. + #[inline] + pub fn look_set_suffix_any(&self) -> LookSet { + self.0.look_set_suffix_any + } + + /// Return true if and only if the corresponding HIR will always match + /// valid UTF-8. + /// + /// When this returns false, then it is possible for this HIR expression to + /// match invalid UTF-8, including by matching between the code units of + /// a single UTF-8 encoded codepoint. + /// + /// Note that this returns true even when the corresponding HIR can match + /// the empty string. Since an empty string can technically appear between + /// UTF-8 code units, it is possible for a match to be reported that splits + /// a codepoint which could in turn be considered matching invalid UTF-8. + /// However, it is generally assumed that such empty matches are handled + /// specially by the search routine if it is absolutely required that + /// matches not split a codepoint. + /// + /// # Example + /// + /// This code example shows the UTF-8 property of a variety of patterns. + /// + /// ``` + /// use regex_syntax::{ParserBuilder, parse}; + /// + /// // Examples of 'is_utf8() == true'. + /// assert!(parse(r"a")?.properties().is_utf8()); + /// assert!(parse(r"[^a]")?.properties().is_utf8()); + /// assert!(parse(r".")?.properties().is_utf8()); + /// assert!(parse(r"\W")?.properties().is_utf8()); + /// assert!(parse(r"\b")?.properties().is_utf8()); + /// assert!(parse(r"\B")?.properties().is_utf8()); + /// assert!(parse(r"(?-u)\b")?.properties().is_utf8()); + /// assert!(parse(r"(?-u)\B")?.properties().is_utf8()); + /// // Unicode mode is enabled by default, and in + /// // that mode, all \x hex escapes are treated as + /// // codepoints. So this actually matches the UTF-8 + /// // encoding of U+00FF. + /// assert!(parse(r"\xFF")?.properties().is_utf8()); + /// + /// // Now we show examples of 'is_utf8() == false'. + /// // The only way to do this is to force the parser + /// // to permit invalid UTF-8, otherwise all of these + /// // would fail to parse! + /// let parse = |pattern| { + /// ParserBuilder::new().utf8(false).build().parse(pattern) + /// }; + /// assert!(!parse(r"(?-u)[^a]")?.properties().is_utf8()); + /// assert!(!parse(r"(?-u).")?.properties().is_utf8()); + /// assert!(!parse(r"(?-u)\W")?.properties().is_utf8()); + /// // Conversely to the equivalent example above, + /// // when Unicode mode is disabled, \x hex escapes + /// // are treated as their raw byte values. + /// assert!(!parse(r"(?-u)\xFF")?.properties().is_utf8()); + /// // Note that just because we disabled UTF-8 in the + /// // parser doesn't mean we still can't use Unicode. + /// // It is enabled by default, so \xFF is still + /// // equivalent to matching the UTF-8 encoding of + /// // U+00FF by default. + /// assert!(parse(r"\xFF")?.properties().is_utf8()); + /// // Even though we use raw bytes that individually + /// // are not valid UTF-8, when combined together, the + /// // overall expression *does* match valid UTF-8! + /// assert!(parse(r"(?-u)\xE2\x98\x83")?.properties().is_utf8()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_utf8(&self) -> bool { + self.0.utf8 + } + + /// Returns the total number of explicit capturing groups in the + /// corresponding HIR. + /// + /// Note that this does not include the implicit capturing group + /// corresponding to the entire match that is typically included by regex + /// engines. + /// + /// # Example + /// + /// This method will return `0` for `a` and `1` for `(a)`: + /// + /// ``` + /// use regex_syntax::parse; + /// + /// assert_eq!(0, parse("a")?.properties().explicit_captures_len()); + /// assert_eq!(1, parse("(a)")?.properties().explicit_captures_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn explicit_captures_len(&self) -> usize { + self.0.explicit_captures_len + } + + /// Returns the total number of explicit capturing groups that appear in + /// every possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that this does not include the implicit capturing group + /// corresponding to the entire match. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex_syntax::parse; + /// + /// let len = |pattern| { + /// parse(pattern).map(|h| { + /// h.properties().static_explicit_captures_len() + /// }) + /// }; + /// + /// assert_eq!(Some(0), len("a")?); + /// assert_eq!(Some(1), len("(a)")?); + /// assert_eq!(Some(1), len("(a)|(b)")?); + /// assert_eq!(Some(2), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(1), len("(b)+")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn static_explicit_captures_len(&self) -> Option { + self.0.static_explicit_captures_len + } + + /// Return true if and only if this HIR is a simple literal. This is + /// only true when this HIR expression is either itself a `Literal` or a + /// concatenation of only `Literal`s. + /// + /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()` and + /// the empty string are not (even though they contain sub-expressions that + /// are literals). + #[inline] + pub fn is_literal(&self) -> bool { + self.0.literal + } + + /// Return true if and only if this HIR is either a simple literal or an + /// alternation of simple literals. This is only + /// true when this HIR expression is either itself a `Literal` or a + /// concatenation of only `Literal`s or an alternation of only `Literal`s. + /// + /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation + /// literals, but `f+`, `(foo)`, `foo()`, and the empty pattern are not + /// (even though that contain sub-expressions that are literals). + #[inline] + pub fn is_alternation_literal(&self) -> bool { + self.0.alternation_literal + } + + /// Returns the total amount of heap memory usage, in bytes, used by this + /// `Properties` value. + #[inline] + pub fn memory_usage(&self) -> usize { + core::mem::size_of::() + } + + /// Returns a new set of properties that corresponds to the union of the + /// iterator of properties given. + /// + /// This is useful when one has multiple `Hir` expressions and wants + /// to combine them into a single alternation without constructing the + /// corresponding `Hir`. This routine provides a way of combining the + /// properties of each `Hir` expression into one set of properties + /// representing the union of those expressions. + /// + /// # Example: union with HIRs that never match + /// + /// This example shows that unioning properties together with one that + /// represents a regex that never matches will "poison" certain attributes, + /// like the minimum and maximum lengths. + /// + /// ``` + /// use regex_syntax::{hir::Properties, parse}; + /// + /// let hir1 = parse("ab?c?")?; + /// assert_eq!(Some(1), hir1.properties().minimum_len()); + /// assert_eq!(Some(3), hir1.properties().maximum_len()); + /// + /// let hir2 = parse(r"[a&&b]")?; + /// assert_eq!(None, hir2.properties().minimum_len()); + /// assert_eq!(None, hir2.properties().maximum_len()); + /// + /// let hir3 = parse(r"wxy?z?")?; + /// assert_eq!(Some(2), hir3.properties().minimum_len()); + /// assert_eq!(Some(4), hir3.properties().maximum_len()); + /// + /// let unioned = Properties::union([ + /// hir1.properties(), + /// hir2.properties(), + /// hir3.properties(), + /// ]); + /// assert_eq!(None, unioned.minimum_len()); + /// assert_eq!(None, unioned.maximum_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// The maximum length can also be "poisoned" by a pattern that has no + /// upper bound on the length of a match. The minimum length remains + /// unaffected: + /// + /// ``` + /// use regex_syntax::{hir::Properties, parse}; + /// + /// let hir1 = parse("ab?c?")?; + /// assert_eq!(Some(1), hir1.properties().minimum_len()); + /// assert_eq!(Some(3), hir1.properties().maximum_len()); + /// + /// let hir2 = parse(r"a+")?; + /// assert_eq!(Some(1), hir2.properties().minimum_len()); + /// assert_eq!(None, hir2.properties().maximum_len()); + /// + /// let hir3 = parse(r"wxy?z?")?; + /// assert_eq!(Some(2), hir3.properties().minimum_len()); + /// assert_eq!(Some(4), hir3.properties().maximum_len()); + /// + /// let unioned = Properties::union([ + /// hir1.properties(), + /// hir2.properties(), + /// hir3.properties(), + /// ]); + /// assert_eq!(Some(1), unioned.minimum_len()); + /// assert_eq!(None, unioned.maximum_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn union(props: I) -> Properties + where + I: IntoIterator, + P: core::borrow::Borrow, + { + let mut it = props.into_iter().peekable(); + // While empty alternations aren't possible, we still behave as if they + // are. When we have an empty alternate, then clearly the look-around + // prefix and suffix is empty. Otherwise, it is the intersection of all + // prefixes and suffixes (respectively) of the branches. + let fix = if it.peek().is_none() { + LookSet::empty() + } else { + LookSet::full() + }; + // And also, an empty alternate means we have 0 static capture groups, + // but we otherwise start with the number corresponding to the first + // alternate. If any subsequent alternate has a different number of + // static capture groups, then we overall have a variation and not a + // static number of groups. + let static_explicit_captures_len = + it.peek().and_then(|p| p.borrow().static_explicit_captures_len()); + // The base case is an empty alternation, which matches nothing. + // Note though that empty alternations aren't possible, because the + // Hir::alternation smart constructor rewrites those as empty character + // classes. + let mut props = PropertiesI { + minimum_len: None, + maximum_len: None, + look_set: LookSet::empty(), + look_set_prefix: fix, + look_set_suffix: fix, + look_set_prefix_any: LookSet::empty(), + look_set_suffix_any: LookSet::empty(), + utf8: true, + explicit_captures_len: 0, + static_explicit_captures_len, + literal: false, + alternation_literal: true, + }; + let (mut min_poisoned, mut max_poisoned) = (false, false); + // Handle properties that need to visit every child hir. + for prop in it { + let p = prop.borrow(); + props.look_set.set_union(p.look_set()); + props.look_set_prefix.set_intersect(p.look_set_prefix()); + props.look_set_suffix.set_intersect(p.look_set_suffix()); + props.look_set_prefix_any.set_union(p.look_set_prefix_any()); + props.look_set_suffix_any.set_union(p.look_set_suffix_any()); + props.utf8 = props.utf8 && p.is_utf8(); + props.explicit_captures_len = props + .explicit_captures_len + .saturating_add(p.explicit_captures_len()); + if props.static_explicit_captures_len + != p.static_explicit_captures_len() + { + props.static_explicit_captures_len = None; + } + props.alternation_literal = + props.alternation_literal && p.is_literal(); + if !min_poisoned { + if let Some(xmin) = p.minimum_len() { + if props.minimum_len.map_or(true, |pmin| xmin < pmin) { + props.minimum_len = Some(xmin); + } + } else { + props.minimum_len = None; + min_poisoned = true; + } + } + if !max_poisoned { + if let Some(xmax) = p.maximum_len() { + if props.maximum_len.map_or(true, |pmax| xmax > pmax) { + props.maximum_len = Some(xmax); + } + } else { + props.maximum_len = None; + max_poisoned = true; + } + } + } + Properties(Box::new(props)) + } +} + +impl Properties { + /// Create a new set of HIR properties for an empty regex. + fn empty() -> Properties { + let inner = PropertiesI { + minimum_len: Some(0), + maximum_len: Some(0), + look_set: LookSet::empty(), + look_set_prefix: LookSet::empty(), + look_set_suffix: LookSet::empty(), + look_set_prefix_any: LookSet::empty(), + look_set_suffix_any: LookSet::empty(), + // It is debatable whether an empty regex always matches at valid + // UTF-8 boundaries. Strictly speaking, at a byte oriented view, + // it is clearly false. There are, for example, many empty strings + // between the bytes encoding a '☃'. + // + // However, when Unicode mode is enabled, the fundamental atom + // of matching is really a codepoint. And in that scenario, an + // empty regex is defined to only match at valid UTF-8 boundaries + // and to never split a codepoint. It just so happens that this + // enforcement is somewhat tricky to do for regexes that match + // the empty string inside regex engines themselves. It usually + // requires some layer above the regex engine to filter out such + // matches. + // + // In any case, 'true' is really the only coherent option. If it + // were false, for example, then 'a*' would also need to be false + // since it too can match the empty string. + utf8: true, + explicit_captures_len: 0, + static_explicit_captures_len: Some(0), + literal: false, + alternation_literal: false, + }; + Properties(Box::new(inner)) + } + + /// Create a new set of HIR properties for a literal regex. + fn literal(lit: &Literal) -> Properties { + let inner = PropertiesI { + minimum_len: Some(lit.0.len()), + maximum_len: Some(lit.0.len()), + look_set: LookSet::empty(), + look_set_prefix: LookSet::empty(), + look_set_suffix: LookSet::empty(), + look_set_prefix_any: LookSet::empty(), + look_set_suffix_any: LookSet::empty(), + utf8: core::str::from_utf8(&lit.0).is_ok(), + explicit_captures_len: 0, + static_explicit_captures_len: Some(0), + literal: true, + alternation_literal: true, + }; + Properties(Box::new(inner)) + } + + /// Create a new set of HIR properties for a character class. + fn class(class: &Class) -> Properties { + let inner = PropertiesI { + minimum_len: class.minimum_len(), + maximum_len: class.maximum_len(), + look_set: LookSet::empty(), + look_set_prefix: LookSet::empty(), + look_set_suffix: LookSet::empty(), + look_set_prefix_any: LookSet::empty(), + look_set_suffix_any: LookSet::empty(), + utf8: class.is_utf8(), + explicit_captures_len: 0, + static_explicit_captures_len: Some(0), + literal: false, + alternation_literal: false, + }; + Properties(Box::new(inner)) + } + + /// Create a new set of HIR properties for a look-around assertion. + fn look(look: Look) -> Properties { + let inner = PropertiesI { + minimum_len: Some(0), + maximum_len: Some(0), + look_set: LookSet::singleton(look), + look_set_prefix: LookSet::singleton(look), + look_set_suffix: LookSet::singleton(look), + look_set_prefix_any: LookSet::singleton(look), + look_set_suffix_any: LookSet::singleton(look), + // This requires a little explanation. Basically, we don't consider + // matching an empty string to be equivalent to matching invalid + // UTF-8, even though technically matching every empty string will + // split the UTF-8 encoding of a single codepoint when treating a + // UTF-8 encoded string as a sequence of bytes. Our defense here is + // that in such a case, a codepoint should logically be treated as + // the fundamental atom for matching, and thus the only valid match + // points are between codepoints and not bytes. + // + // More practically, this is true here because it's also true + // for 'Hir::empty()', otherwise something like 'a*' would be + // considered to match invalid UTF-8. That in turn makes this + // property borderline useless. + utf8: true, + explicit_captures_len: 0, + static_explicit_captures_len: Some(0), + literal: false, + alternation_literal: false, + }; + Properties(Box::new(inner)) + } + + /// Create a new set of HIR properties for a repetition. + fn repetition(rep: &Repetition) -> Properties { + let p = rep.sub.properties(); + let minimum_len = p.minimum_len().map(|child_min| { + let rep_min = usize::try_from(rep.min).unwrap_or(usize::MAX); + child_min.saturating_mul(rep_min) + }); + let maximum_len = rep.max.and_then(|rep_max| { + let rep_max = usize::try_from(rep_max).ok()?; + let child_max = p.maximum_len()?; + child_max.checked_mul(rep_max) + }); + + let mut inner = PropertiesI { + minimum_len, + maximum_len, + look_set: p.look_set(), + look_set_prefix: LookSet::empty(), + look_set_suffix: LookSet::empty(), + look_set_prefix_any: p.look_set_prefix_any(), + look_set_suffix_any: p.look_set_suffix_any(), + utf8: p.is_utf8(), + explicit_captures_len: p.explicit_captures_len(), + static_explicit_captures_len: p.static_explicit_captures_len(), + literal: false, + alternation_literal: false, + }; + // If the repetition operator can match the empty string, then its + // lookset prefix and suffixes themselves remain empty since they are + // no longer required to match. + if rep.min > 0 { + inner.look_set_prefix = p.look_set_prefix(); + inner.look_set_suffix = p.look_set_suffix(); + } + // If the static captures len of the sub-expression is not known or + // is greater than zero, then it automatically propagates to the + // repetition, regardless of the repetition. Otherwise, it might + // change, but only when the repetition can match 0 times. + if rep.min == 0 + && inner.static_explicit_captures_len.map_or(false, |len| len > 0) + { + // If we require a match 0 times, then our captures len is + // guaranteed to be zero. Otherwise, if we *can* match the empty + // string, then it's impossible to know how many captures will be + // in the resulting match. + if rep.max == Some(0) { + inner.static_explicit_captures_len = Some(0); + } else { + inner.static_explicit_captures_len = None; + } + } + Properties(Box::new(inner)) + } + + /// Create a new set of HIR properties for a capture. + fn capture(capture: &Capture) -> Properties { + let p = capture.sub.properties(); + Properties(Box::new(PropertiesI { + explicit_captures_len: p.explicit_captures_len().saturating_add(1), + static_explicit_captures_len: p + .static_explicit_captures_len() + .map(|len| len.saturating_add(1)), + literal: false, + alternation_literal: false, + ..*p.0.clone() + })) + } + + /// Create a new set of HIR properties for a concatenation. + fn concat(concat: &[Hir]) -> Properties { + // The base case is an empty concatenation, which matches the empty + // string. Note though that empty concatenations aren't possible, + // because the Hir::concat smart constructor rewrites those as + // Hir::empty. + let mut props = PropertiesI { + minimum_len: Some(0), + maximum_len: Some(0), + look_set: LookSet::empty(), + look_set_prefix: LookSet::empty(), + look_set_suffix: LookSet::empty(), + look_set_prefix_any: LookSet::empty(), + look_set_suffix_any: LookSet::empty(), + utf8: true, + explicit_captures_len: 0, + static_explicit_captures_len: Some(0), + literal: true, + alternation_literal: true, + }; + // Handle properties that need to visit every child hir. + for x in concat.iter() { + let p = x.properties(); + props.look_set.set_union(p.look_set()); + props.utf8 = props.utf8 && p.is_utf8(); + props.explicit_captures_len = props + .explicit_captures_len + .saturating_add(p.explicit_captures_len()); + props.static_explicit_captures_len = p + .static_explicit_captures_len() + .and_then(|len1| { + Some((len1, props.static_explicit_captures_len?)) + }) + .and_then(|(len1, len2)| Some(len1.saturating_add(len2))); + props.literal = props.literal && p.is_literal(); + props.alternation_literal = + props.alternation_literal && p.is_alternation_literal(); + if let Some(minimum_len) = props.minimum_len { + match p.minimum_len() { + None => props.minimum_len = None, + Some(len) => { + // We use saturating arithmetic here because the + // minimum is just a lower bound. We can't go any + // higher than what our number types permit. + props.minimum_len = + Some(minimum_len.saturating_add(len)); + } + } + } + if let Some(maximum_len) = props.maximum_len { + match p.maximum_len() { + None => props.maximum_len = None, + Some(len) => { + props.maximum_len = maximum_len.checked_add(len) + } + } + } + } + // Handle the prefix properties, which only requires visiting + // child exprs until one matches more than the empty string. + let mut it = concat.iter(); + while let Some(x) = it.next() { + props.look_set_prefix.set_union(x.properties().look_set_prefix()); + props + .look_set_prefix_any + .set_union(x.properties().look_set_prefix_any()); + if x.properties().maximum_len().map_or(true, |x| x > 0) { + break; + } + } + // Same thing for the suffix properties, but in reverse. + let mut it = concat.iter().rev(); + while let Some(x) = it.next() { + props.look_set_suffix.set_union(x.properties().look_set_suffix()); + props + .look_set_suffix_any + .set_union(x.properties().look_set_suffix_any()); + if x.properties().maximum_len().map_or(true, |x| x > 0) { + break; + } + } + Properties(Box::new(props)) + } + + /// Create a new set of HIR properties for a concatenation. + fn alternation(alts: &[Hir]) -> Properties { + Properties::union(alts.iter().map(|hir| hir.properties())) + } +} + +/// A set of look-around assertions. +/// +/// This is useful for efficiently tracking look-around assertions. For +/// example, an [`Hir`] provides properties that return `LookSet`s. +#[derive(Clone, Copy, Default, Eq, PartialEq)] +pub struct LookSet { + /// The underlying representation this set is exposed to make it possible + /// to store it somewhere efficiently. The representation is that + /// of a bitset, where each assertion occupies bit `i` where `i = + /// Look::as_repr()`. + /// + /// Note that users of this internal representation must permit the full + /// range of `u16` values to be represented. For example, even if the + /// current implementation only makes use of the 10 least significant bits, + /// it may use more bits in a future semver compatible release. + pub bits: u32, +} + +impl LookSet { + /// Create an empty set of look-around assertions. + #[inline] + pub fn empty() -> LookSet { + LookSet { bits: 0 } + } + + /// Create a full set of look-around assertions. + /// + /// This set contains all possible look-around assertions. + #[inline] + pub fn full() -> LookSet { + LookSet { bits: !0 } + } + + /// Create a look-around set containing the look-around assertion given. + /// + /// This is a convenience routine for creating an empty set and inserting + /// one look-around assertions. + #[inline] + pub fn singleton(look: Look) -> LookSet { + LookSet::empty().insert(look) + } + + /// Returns the total number of look-around assertions in this set. + #[inline] + pub fn len(self) -> usize { + // OK because max value always fits in a u8, which in turn always + // fits in a usize, regardless of target. + usize::try_from(self.bits.count_ones()).unwrap() + } + + /// Returns true if and only if this set is empty. + #[inline] + pub fn is_empty(self) -> bool { + self.len() == 0 + } + + /// Returns true if and only if the given look-around assertion is in this + /// set. + #[inline] + pub fn contains(self, look: Look) -> bool { + self.bits & look.as_repr() != 0 + } + + /// Returns true if and only if this set contains any anchor assertions. + /// This includes both "start/end of haystack" and "start/end of line." + #[inline] + pub fn contains_anchor(&self) -> bool { + self.contains_anchor_haystack() || self.contains_anchor_line() + } + + /// Returns true if and only if this set contains any "start/end of + /// haystack" anchors. This doesn't include "start/end of line" anchors. + #[inline] + pub fn contains_anchor_haystack(&self) -> bool { + self.contains(Look::Start) || self.contains(Look::End) + } + + /// Returns true if and only if this set contains any "start/end of line" + /// anchors. This doesn't include "start/end of haystack" anchors. This + /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors. + #[inline] + pub fn contains_anchor_line(&self) -> bool { + self.contains(Look::StartLF) + || self.contains(Look::EndLF) + || self.contains(Look::StartCRLF) + || self.contains(Look::EndCRLF) } - /// Negate this byte class. - /// - /// For all `b` where `b` is a any byte, if `b` was in this set, then it - /// will not be in this set after negation. - pub fn negate(&mut self) { - self.set.negate(); + /// Returns true if and only if this set contains any "start/end of line" + /// anchors that only treat `\n` as line terminators. This does not include + /// haystack anchors or CRLF aware line anchors. + #[inline] + pub fn contains_anchor_lf(&self) -> bool { + self.contains(Look::StartLF) || self.contains(Look::EndLF) } - /// Union this byte class with the given byte class, in place. - pub fn union(&mut self, other: &ClassBytes) { - self.set.union(&other.set); + /// Returns true if and only if this set contains any "start/end of line" + /// anchors that are CRLF-aware. This doesn't include "start/end of + /// haystack" or "start/end of line-feed" anchors. + #[inline] + pub fn contains_anchor_crlf(&self) -> bool { + self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF) } - /// Intersect this byte class with the given byte class, in place. - pub fn intersect(&mut self, other: &ClassBytes) { - self.set.intersect(&other.set); + /// Returns true if and only if this set contains any word boundary or + /// negated word boundary assertions. This include both Unicode and ASCII + /// word boundaries. + #[inline] + pub fn contains_word(self) -> bool { + self.contains_word_unicode() || self.contains_word_ascii() } - /// Subtract the given byte class from this byte class, in place. - pub fn difference(&mut self, other: &ClassBytes) { - self.set.difference(&other.set); + /// Returns true if and only if this set contains any Unicode word boundary + /// or negated Unicode word boundary assertions. + #[inline] + pub fn contains_word_unicode(self) -> bool { + self.contains(Look::WordUnicode) + || self.contains(Look::WordUnicodeNegate) + || self.contains(Look::WordStartUnicode) + || self.contains(Look::WordEndUnicode) + || self.contains(Look::WordStartHalfUnicode) + || self.contains(Look::WordEndHalfUnicode) } - /// Compute the symmetric difference of the given byte classes, in place. - /// - /// This computes the symmetric difference of two byte classes. This - /// removes all elements in this class that are also in the given class, - /// but all adds all elements from the given class that aren't in this - /// class. That is, the class will contain all elements in either class, - /// but will not contain any elements that are in both classes. - pub fn symmetric_difference(&mut self, other: &ClassBytes) { - self.set.symmetric_difference(&other.set); + /// Returns true if and only if this set contains any ASCII word boundary + /// or negated ASCII word boundary assertions. + #[inline] + pub fn contains_word_ascii(self) -> bool { + self.contains(Look::WordAscii) + || self.contains(Look::WordAsciiNegate) + || self.contains(Look::WordStartAscii) + || self.contains(Look::WordEndAscii) + || self.contains(Look::WordStartHalfAscii) + || self.contains(Look::WordEndHalfAscii) } - /// Returns true if and only if this character class will either match - /// nothing or only ASCII bytes. Stated differently, this returns false - /// if and only if this class contains a non-ASCII byte. - pub fn is_all_ascii(&self) -> bool { - self.set.intervals().last().map_or(true, |r| r.end <= 0x7F) + /// Returns an iterator over all of the look-around assertions in this set. + #[inline] + pub fn iter(self) -> LookSetIter { + LookSetIter { set: self } } -} - -/// An iterator over all ranges in a byte character class. -/// -/// The lifetime `'a` refers to the lifetime of the underlying class. -#[derive(Debug)] -pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>); -impl<'a> Iterator for ClassBytesIter<'a> { - type Item = &'a ClassBytesRange; + /// Return a new set that is equivalent to the original, but with the given + /// assertion added to it. If the assertion is already in the set, then the + /// returned set is equivalent to the original. + #[inline] + pub fn insert(self, look: Look) -> LookSet { + LookSet { bits: self.bits | look.as_repr() } + } - fn next(&mut self) -> Option<&'a ClassBytesRange> { - self.0.next() + /// Updates this set in place with the result of inserting the given + /// assertion into this set. + #[inline] + pub fn set_insert(&mut self, look: Look) { + *self = self.insert(look); } -} -/// A single range of characters represented by arbitrary bytes. -/// -/// The range is closed. That is, the start and end of the range are included -/// in the range. -#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)] -pub struct ClassBytesRange { - start: u8, - end: u8, -} + /// Return a new set that is equivalent to the original, but with the given + /// assertion removed from it. If the assertion is not in the set, then the + /// returned set is equivalent to the original. + #[inline] + pub fn remove(self, look: Look) -> LookSet { + LookSet { bits: self.bits & !look.as_repr() } + } -impl Interval for ClassBytesRange { - type Bound = u8; + /// Updates this set in place with the result of removing the given + /// assertion from this set. + #[inline] + pub fn set_remove(&mut self, look: Look) { + *self = self.remove(look); + } + /// Returns a new set that is the result of subtracting the given set from + /// this set. #[inline] - fn lower(&self) -> u8 { - self.start + pub fn subtract(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits & !other.bits } } + + /// Updates this set in place with the result of subtracting the given set + /// from this set. #[inline] - fn upper(&self) -> u8 { - self.end + pub fn set_subtract(&mut self, other: LookSet) { + *self = self.subtract(other); } + + /// Returns a new set that is the union of this and the one given. #[inline] - fn set_lower(&mut self, bound: u8) { - self.start = bound; + pub fn union(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits | other.bits } } + + /// Updates this set in place with the result of unioning it with the one + /// given. #[inline] - fn set_upper(&mut self, bound: u8) { - self.end = bound; + pub fn set_union(&mut self, other: LookSet) { + *self = self.union(other); } - /// Apply simple case folding to this byte range. Only ASCII case mappings - /// (for a-z) are applied. - /// - /// Additional ranges are appended to the given vector. Canonical ordering - /// is *not* maintained in the given vector. - fn case_fold_simple( - &self, - ranges: &mut Vec, - ) -> Result<(), unicode::CaseFoldError> { - if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) { - let lower = cmp::max(self.start, b'a'); - let upper = cmp::min(self.end, b'z'); - ranges.push(ClassBytesRange::new(lower - 32, upper - 32)); - } - if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) { - let lower = cmp::max(self.start, b'A'); - let upper = cmp::min(self.end, b'Z'); - ranges.push(ClassBytesRange::new(lower + 32, upper + 32)); - } - Ok(()) + /// Returns a new set that is the intersection of this and the one given. + #[inline] + pub fn intersect(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits & other.bits } } -} -impl ClassBytesRange { - /// Create a new byte range for a character class. - /// - /// The returned range is always in a canonical form. That is, the range - /// returned always satisfies the invariant that `start <= end`. - pub fn new(start: u8, end: u8) -> ClassBytesRange { - ClassBytesRange::create(start, end) + /// Updates this set in place with the result of intersecting it with the + /// one given. + #[inline] + pub fn set_intersect(&mut self, other: LookSet) { + *self = self.intersect(other); } - /// Return the start of this range. + /// Return a `LookSet` from the slice given as a native endian 32-bit + /// integer. /// - /// The start of a range is always less than or equal to the end of the - /// range. - pub fn start(&self) -> u8 { - self.start + /// # Panics + /// + /// This panics if `slice.len() < 4`. + #[inline] + pub fn read_repr(slice: &[u8]) -> LookSet { + let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap()); + LookSet { bits } } - /// Return the end of this range. + /// Write a `LookSet` as a native endian 32-bit integer to the beginning + /// of the slice given. /// - /// The end of a range is always greater than or equal to the start of the - /// range. - pub fn end(&self) -> u8 { - self.end + /// # Panics + /// + /// This panics if `slice.len() < 4`. + #[inline] + pub fn write_repr(self, slice: &mut [u8]) { + let raw = self.bits.to_ne_bytes(); + slice[0] = raw[0]; + slice[1] = raw[1]; + slice[2] = raw[2]; + slice[3] = raw[3]; } } -impl fmt::Debug for ClassBytesRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut debug = f.debug_struct("ClassBytesRange"); - if self.start <= 0x7F { - debug.field("start", &(self.start as char)); - } else { - debug.field("start", &self.start); +impl core::fmt::Debug for LookSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_empty() { + return write!(f, "∅"); } - if self.end <= 0x7F { - debug.field("end", &(self.end as char)); - } else { - debug.field("end", &self.end); + for look in self.iter() { + write!(f, "{}", look.as_char())?; } - debug.finish() + Ok(()) } } -/// The high-level intermediate representation for an anchor assertion. +/// An iterator over all look-around assertions in a [`LookSet`]. /// -/// A matching anchor assertion is always zero-length. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Anchor { - /// Match the beginning of a line or the beginning of text. Specifically, - /// this matches at the starting position of the input, or at the position - /// immediately following a `\n` character. - StartLine, - /// Match the end of a line or the end of text. Specifically, - /// this matches at the end position of the input, or at the position - /// immediately preceding a `\n` character. - EndLine, - /// Match the beginning of text. Specifically, this matches at the starting - /// position of the input. - StartText, - /// Match the end of text. Specifically, this matches at the ending - /// position of the input. - EndText, +/// This iterator is created by [`LookSet::iter`]. +#[derive(Clone, Debug)] +pub struct LookSetIter { + set: LookSet, } -/// The high-level intermediate representation for a word-boundary assertion. -/// -/// A matching word boundary assertion is always zero-length. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum WordBoundary { - /// Match a Unicode-aware word boundary. That is, this matches a position - /// where the left adjacent character and right adjacent character - /// correspond to a word and non-word or a non-word and word character. - Unicode, - /// Match a Unicode-aware negation of a word boundary. - UnicodeNegate, - /// Match an ASCII-only word boundary. That is, this matches a position - /// where the left adjacent character and right adjacent character - /// correspond to a word and non-word or a non-word and word character. - Ascii, - /// Match an ASCII-only negation of a word boundary. - AsciiNegate, -} +impl Iterator for LookSetIter { + type Item = Look; -impl WordBoundary { - /// Returns true if and only if this word boundary assertion is negated. - pub fn is_negated(&self) -> bool { - match *self { - WordBoundary::Unicode | WordBoundary::Ascii => false, - WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true, + #[inline] + fn next(&mut self) -> Option { + if self.set.is_empty() { + return None; } + // We'll never have more than u8::MAX distinct look-around assertions, + // so 'bit' will always fit into a u16. + let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap(); + let look = Look::from_repr(1 << bit)?; + self.set = self.set.remove(look); + Some(look) } } -/// The high-level intermediate representation for a group. -/// -/// This represents one of three possible group types: -/// -/// 1. A non-capturing group (e.g., `(?:expr)`). -/// 2. A capturing group (e.g., `(expr)`). -/// 3. A named capturing group (e.g., `(?Pexpr)`). -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Group { - /// The kind of this group. If it is a capturing group, then the kind - /// contains the capture group index (and the name, if it is a named - /// group). - pub kind: GroupKind, - /// The expression inside the capturing group, which may be empty. - pub hir: Box, -} - -/// The kind of group. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum GroupKind { - /// A normal unnamed capturing group. - /// - /// The value is the capture index of the group. - CaptureIndex(u32), - /// A named capturing group. - CaptureName { - /// The name of the group. - name: String, - /// The capture index of the group. - index: u32, - }, - /// A non-capturing group. - NonCapturing, +/// Given a sequence of HIR values where each value corresponds to a Unicode +/// class (or an all-ASCII byte class), return a single Unicode class +/// corresponding to the union of the classes found. +fn class_chars(hirs: &[Hir]) -> Option { + let mut cls = ClassUnicode::new(vec![]); + for hir in hirs.iter() { + match *hir.kind() { + HirKind::Class(Class::Unicode(ref cls2)) => { + cls.union(cls2); + } + HirKind::Class(Class::Bytes(ref cls2)) => { + cls.union(&cls2.to_unicode_class()?); + } + _ => return None, + }; + } + Some(Class::Unicode(cls)) } -/// The high-level intermediate representation of a repetition operator. -/// -/// A repetition operator permits the repetition of an arbitrary -/// sub-expression. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Repetition { - /// The kind of this repetition operator. - pub kind: RepetitionKind, - /// Whether this repetition operator is greedy or not. A greedy operator - /// will match as much as it can. A non-greedy operator will match as - /// little as it can. - /// - /// Typically, operators are greedy by default and are only non-greedy when - /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is - /// not. However, this can be inverted via the `U` "ungreedy" flag. - pub greedy: bool, - /// The expression being repeated. - pub hir: Box, +/// Given a sequence of HIR values where each value corresponds to a byte class +/// (or an all-ASCII Unicode class), return a single byte class corresponding +/// to the union of the classes found. +fn class_bytes(hirs: &[Hir]) -> Option { + let mut cls = ClassBytes::new(vec![]); + for hir in hirs.iter() { + match *hir.kind() { + HirKind::Class(Class::Unicode(ref cls2)) => { + cls.union(&cls2.to_byte_class()?); + } + HirKind::Class(Class::Bytes(ref cls2)) => { + cls.union(cls2); + } + _ => return None, + }; + } + Some(Class::Bytes(cls)) } -impl Repetition { - /// Returns true if and only if this repetition operator makes it possible - /// to match the empty string. - /// - /// Note that this is not defined inductively. For example, while `a*` - /// will report `true`, `()+` will not, even though `()` matches the empty - /// string and one or more occurrences of something that matches the empty - /// string will always match the empty string. In order to get the - /// inductive definition, see the corresponding method on - /// [`Hir`](struct.Hir.html). - pub fn is_match_empty(&self) -> bool { - match self.kind { - RepetitionKind::ZeroOrOne => true, - RepetitionKind::ZeroOrMore => true, - RepetitionKind::OneOrMore => false, - RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0, - RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0, - RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0, +/// Given a sequence of HIR values where each value corresponds to a literal +/// that is a single `char`, return that sequence of `char`s. Otherwise return +/// None. No deduplication is done. +fn singleton_chars(hirs: &[Hir]) -> Option> { + let mut singletons = vec![]; + for hir in hirs.iter() { + let literal = match *hir.kind() { + HirKind::Literal(Literal(ref bytes)) => bytes, + _ => return None, + }; + let ch = match crate::debug::utf8_decode(literal) { + None => return None, + Some(Err(_)) => return None, + Some(Ok(ch)) => ch, + }; + if literal.len() != ch.len_utf8() { + return None; } + singletons.push(ch); } + Some(singletons) } -/// The kind of a repetition operator. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum RepetitionKind { - /// Matches a sub-expression zero or one times. - ZeroOrOne, - /// Matches a sub-expression zero or more times. - ZeroOrMore, - /// Matches a sub-expression one or more times. - OneOrMore, - /// Matches a sub-expression within a bounded range of times. - Range(RepetitionRange), -} - -/// The kind of a counted repetition operator. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum RepetitionRange { - /// Matches a sub-expression exactly this many times. - Exactly(u32), - /// Matches a sub-expression at least this many times. - AtLeast(u32), - /// Matches a sub-expression at least `m` times and at most `n` times. - Bounded(u32, u32), -} - -/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack -/// space but heap space proportional to the depth of the total `Hir`. -impl Drop for Hir { - fn drop(&mut self) { - use std::mem; - - match *self.kind() { - HirKind::Empty - | HirKind::Literal(_) - | HirKind::Class(_) - | HirKind::Anchor(_) - | HirKind::WordBoundary(_) => return, - HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return, - HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return, - HirKind::Concat(ref x) if x.is_empty() => return, - HirKind::Alternation(ref x) if x.is_empty() => return, - _ => {} - } - - let mut stack = vec![mem::replace(self, Hir::empty())]; - while let Some(mut expr) = stack.pop() { - match expr.kind { - HirKind::Empty - | HirKind::Literal(_) - | HirKind::Class(_) - | HirKind::Anchor(_) - | HirKind::WordBoundary(_) => {} - HirKind::Group(ref mut x) => { - stack.push(mem::replace(&mut x.hir, Hir::empty())); - } - HirKind::Repetition(ref mut x) => { - stack.push(mem::replace(&mut x.hir, Hir::empty())); - } - HirKind::Concat(ref mut x) => { - stack.extend(x.drain(..)); - } - HirKind::Alternation(ref mut x) => { - stack.extend(x.drain(..)); - } - } +/// Given a sequence of HIR values where each value corresponds to a literal +/// that is a single byte, return that sequence of bytes. Otherwise return +/// None. No deduplication is done. +fn singleton_bytes(hirs: &[Hir]) -> Option> { + let mut singletons = vec![]; + for hir in hirs.iter() { + let literal = match *hir.kind() { + HirKind::Literal(Literal(ref bytes)) => bytes, + _ => return None, + }; + if literal.len() != 1 { + return None; } + singletons.push(literal[0]); } + Some(singletons) } -/// A type that documents various attributes of an HIR expression. +/// Looks for a common prefix in the list of alternation branches given. If one +/// is found, then an equivalent but (hopefully) simplified Hir is returned. +/// Otherwise, the original given list of branches is returned unmodified. /// -/// These attributes are typically defined inductively on the HIR. -#[derive(Clone, Debug, Eq, PartialEq)] -struct HirInfo { - /// Represent yes/no questions by a bitfield to conserve space, since - /// this is included in every HIR expression. - /// - /// If more attributes need to be added, it is OK to increase the size of - /// this as appropriate. - bools: u16, -} - -// A simple macro for defining bitfield accessors/mutators. -macro_rules! define_bool { - ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => { - fn $is_fn_name(&self) -> bool { - self.bools & (0b1 << $bit) > 0 +/// This is not quite as good as it could be. Right now, it requires that +/// all branches are 'Concat' expressions. It also doesn't do well with +/// literals. For example, given 'foofoo|foobar', it will not refactor it to +/// 'foo(?:foo|bar)' because literals are flattened into their own special +/// concatenation. (One wonders if perhaps 'Literal' should be a single atom +/// instead of a string of bytes because of this. Otherwise, handling the +/// current representation in this routine will be pretty gnarly. Sigh.) +fn lift_common_prefix(hirs: Vec) -> Result> { + if hirs.len() <= 1 { + return Err(hirs); + } + let mut prefix = match hirs[0].kind() { + HirKind::Concat(ref xs) => &**xs, + _ => return Err(hirs), + }; + if prefix.is_empty() { + return Err(hirs); + } + for h in hirs.iter().skip(1) { + let concat = match h.kind() { + HirKind::Concat(ref xs) => xs, + _ => return Err(hirs), + }; + let common_len = prefix + .iter() + .zip(concat.iter()) + .take_while(|(x, y)| x == y) + .count(); + prefix = &prefix[..common_len]; + if prefix.is_empty() { + return Err(hirs); } - - fn $set_fn_name(&mut self, yes: bool) { - if yes { - self.bools |= 1 << $bit; - } else { - self.bools &= !(1 << $bit); - } + } + let len = prefix.len(); + assert_ne!(0, len); + let mut prefix_concat = vec![]; + let mut suffix_alts = vec![]; + for h in hirs { + let mut concat = match h.into_kind() { + HirKind::Concat(xs) => xs, + // We required all sub-expressions to be + // concats above, so we're only here if we + // have a concat. + _ => unreachable!(), + }; + suffix_alts.push(Hir::concat(concat.split_off(len))); + if prefix_concat.is_empty() { + prefix_concat = concat; } - }; -} - -impl HirInfo { - fn new() -> HirInfo { - HirInfo { bools: 0 } - } - - define_bool!(0, is_always_utf8, set_always_utf8); - define_bool!(1, is_all_assertions, set_all_assertions); - define_bool!(2, is_anchored_start, set_anchored_start); - define_bool!(3, is_anchored_end, set_anchored_end); - define_bool!(4, is_line_anchored_start, set_line_anchored_start); - define_bool!(5, is_line_anchored_end, set_line_anchored_end); - define_bool!(6, is_any_anchored_start, set_any_anchored_start); - define_bool!(7, is_any_anchored_end, set_any_anchored_end); - define_bool!(8, is_match_empty, set_match_empty); - define_bool!(9, is_literal, set_literal); - define_bool!(10, is_alternation_literal, set_alternation_literal); + } + let mut concat = prefix_concat; + concat.push(Hir::alternation(suffix_alts)); + Ok(Hir::concat(concat)) } #[cfg(test)] @@ -2244,12 +3783,6 @@ mod tests { assert_eq!(expected, bsymdifference(&cls1, &cls2)); } - #[test] - #[should_panic] - fn hir_byte_literal_non_ascii() { - Hir::literal(Literal::Byte(b'a')); - } - // We use a thread with an explicit stack size to test that our destructor // for Hir can handle arbitrarily sized expressions in constant stack // space. In case we run on a platform without threads (WASM?), we limit @@ -2262,26 +3795,28 @@ mod tests { let run = || { let mut expr = Hir::empty(); for _ in 0..100 { - expr = Hir::group(Group { - kind: GroupKind::NonCapturing, - hir: Box::new(expr), + expr = Hir::capture(Capture { + index: 1, + name: None, + sub: Box::new(expr), }); expr = Hir::repetition(Repetition { - kind: RepetitionKind::ZeroOrOne, + min: 0, + max: Some(1), greedy: true, - hir: Box::new(expr), + sub: Box::new(expr), }); expr = Hir { kind: HirKind::Concat(vec![expr]), - info: HirInfo::new(), + props: Properties::empty(), }; expr = Hir { kind: HirKind::Alternation(vec![expr]), - info: HirInfo::new(), + props: Properties::empty(), }; } - assert!(!expr.kind.is_empty()); + assert!(!matches!(*expr.kind(), HirKind::Empty)); }; // We run our test on a thread with a small stack size so we can @@ -2296,4 +3831,31 @@ mod tests { .join() .unwrap(); } + + #[test] + fn look_set_iter() { + let set = LookSet::empty(); + assert_eq!(0, set.iter().count()); + + let set = LookSet::full(); + assert_eq!(18, set.iter().count()); + + let set = + LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode); + assert_eq!(2, set.iter().count()); + + let set = LookSet::empty().insert(Look::StartLF); + assert_eq!(1, set.iter().count()); + + let set = LookSet::empty().insert(Look::WordAsciiNegate); + assert_eq!(1, set.iter().count()); + } + + #[test] + fn look_set_debug() { + let res = format!("{:?}", LookSet::empty()); + assert_eq!("∅", res); + let res = format!("{:?}", LookSet::full()); + assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res); + } } diff --git a/vendor/regex-syntax/src/hir/print.rs b/vendor/regex-syntax/src/hir/print.rs index b71f389..dfa6d40 100644 --- a/vendor/regex-syntax/src/hir/print.rs +++ b/vendor/regex-syntax/src/hir/print.rs @@ -2,11 +2,16 @@ This module provides a regular expression printer for `Hir`. */ -use std::fmt; +use core::fmt; -use crate::hir::visitor::{self, Visitor}; -use crate::hir::{self, Hir, HirKind}; -use crate::is_meta_character; +use crate::{ + hir::{ + self, + visitor::{self, Visitor}, + Hir, HirKind, + }, + is_meta_character, +}; /// A builder for constructing a printer. /// @@ -84,21 +89,61 @@ impl Visitor for Writer { fn visit_pre(&mut self, hir: &Hir) -> fmt::Result { match *hir.kind() { - HirKind::Empty - | HirKind::Repetition(_) - | HirKind::Concat(_) - | HirKind::Alternation(_) => {} - HirKind::Literal(hir::Literal::Unicode(c)) => { - self.write_literal_char(c)?; + HirKind::Empty => { + // Technically an empty sub-expression could be "printed" by + // just ignoring it, but in practice, you could have a + // repetition operator attached to an empty expression, and you + // really need something in the concrete syntax to make that + // work as you'd expect. + self.wtr.write_str(r"(?:)")?; } - HirKind::Literal(hir::Literal::Byte(b)) => { - self.write_literal_byte(b)?; + // Repetition operators are strictly suffix oriented. + HirKind::Repetition(_) => {} + HirKind::Literal(hir::Literal(ref bytes)) => { + // See the comment on the 'Concat' and 'Alternation' case below + // for why we put parens here. Literals are, conceptually, + // a special case of concatenation where each element is a + // character. The HIR flattens this into a Box<[u8]>, but we + // still need to treat it like a concatenation for correct + // printing. As a special case, we don't write parens if there + // is only one character. One character means there is no + // concat so we don't need parens. Adding parens would still be + // correct, but we drop them here because it tends to create + // rather noisy regexes even in simple cases. + let result = core::str::from_utf8(bytes); + let len = result.map_or(bytes.len(), |s| s.chars().count()); + if len > 1 { + self.wtr.write_str(r"(?:")?; + } + match result { + Ok(string) => { + for c in string.chars() { + self.write_literal_char(c)?; + } + } + Err(_) => { + for &b in bytes.iter() { + self.write_literal_byte(b)?; + } + } + } + if len > 1 { + self.wtr.write_str(r")")?; + } } HirKind::Class(hir::Class::Unicode(ref cls)) => { + if cls.ranges().is_empty() { + return self.wtr.write_str("[a&&b]"); + } self.wtr.write_str("[")?; for range in cls.iter() { if range.start() == range.end() { self.write_literal_char(range.start())?; + } else if u32::from(range.start()) + 1 + == u32::from(range.end()) + { + self.write_literal_char(range.start())?; + self.write_literal_char(range.end())?; } else { self.write_literal_char(range.start())?; self.wtr.write_str("-")?; @@ -108,10 +153,16 @@ impl Visitor for Writer { self.wtr.write_str("]")?; } HirKind::Class(hir::Class::Bytes(ref cls)) => { + if cls.ranges().is_empty() { + return self.wtr.write_str("[a&&b]"); + } self.wtr.write_str("(?-u:[")?; for range in cls.iter() { if range.start() == range.end() { self.write_literal_class_byte(range.start())?; + } else if range.start() + 1 == range.end() { + self.write_literal_class_byte(range.start())?; + self.write_literal_class_byte(range.end())?; } else { self.write_literal_class_byte(range.start())?; self.wtr.write_str("-")?; @@ -120,41 +171,84 @@ impl Visitor for Writer { } self.wtr.write_str("])")?; } - HirKind::Anchor(hir::Anchor::StartLine) => { - self.wtr.write_str("(?m:^)")?; - } - HirKind::Anchor(hir::Anchor::EndLine) => { - self.wtr.write_str("(?m:$)")?; - } - HirKind::Anchor(hir::Anchor::StartText) => { - self.wtr.write_str(r"\A")?; - } - HirKind::Anchor(hir::Anchor::EndText) => { - self.wtr.write_str(r"\z")?; - } - HirKind::WordBoundary(hir::WordBoundary::Unicode) => { - self.wtr.write_str(r"\b")?; - } - HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => { - self.wtr.write_str(r"\B")?; - } - HirKind::WordBoundary(hir::WordBoundary::Ascii) => { - self.wtr.write_str(r"(?-u:\b)")?; - } - HirKind::WordBoundary(hir::WordBoundary::AsciiNegate) => { - self.wtr.write_str(r"(?-u:\B)")?; - } - HirKind::Group(ref x) => match x.kind { - hir::GroupKind::CaptureIndex(_) => { - self.wtr.write_str("(")?; + HirKind::Look(ref look) => match *look { + hir::Look::Start => { + self.wtr.write_str(r"\A")?; + } + hir::Look::End => { + self.wtr.write_str(r"\z")?; + } + hir::Look::StartLF => { + self.wtr.write_str("(?m:^)")?; + } + hir::Look::EndLF => { + self.wtr.write_str("(?m:$)")?; + } + hir::Look::StartCRLF => { + self.wtr.write_str("(?mR:^)")?; + } + hir::Look::EndCRLF => { + self.wtr.write_str("(?mR:$)")?; + } + hir::Look::WordAscii => { + self.wtr.write_str(r"(?-u:\b)")?; + } + hir::Look::WordAsciiNegate => { + self.wtr.write_str(r"(?-u:\B)")?; + } + hir::Look::WordUnicode => { + self.wtr.write_str(r"\b")?; + } + hir::Look::WordUnicodeNegate => { + self.wtr.write_str(r"\B")?; } - hir::GroupKind::CaptureName { ref name, .. } => { - write!(self.wtr, "(?P<{}>", name)?; + hir::Look::WordStartAscii => { + self.wtr.write_str(r"(?-u:\b{start})")?; } - hir::GroupKind::NonCapturing => { - self.wtr.write_str("(?:")?; + hir::Look::WordEndAscii => { + self.wtr.write_str(r"(?-u:\b{end})")?; + } + hir::Look::WordStartUnicode => { + self.wtr.write_str(r"\b{start}")?; + } + hir::Look::WordEndUnicode => { + self.wtr.write_str(r"\b{end}")?; + } + hir::Look::WordStartHalfAscii => { + self.wtr.write_str(r"(?-u:\b{start-half})")?; + } + hir::Look::WordEndHalfAscii => { + self.wtr.write_str(r"(?-u:\b{end-half})")?; + } + hir::Look::WordStartHalfUnicode => { + self.wtr.write_str(r"\b{start-half}")?; + } + hir::Look::WordEndHalfUnicode => { + self.wtr.write_str(r"\b{end-half}")?; } }, + HirKind::Capture(hir::Capture { ref name, .. }) => { + self.wtr.write_str("(")?; + if let Some(ref name) = *name { + write!(self.wtr, "?P<{}>", name)?; + } + } + // Why do this? Wrapping concats and alts in non-capturing groups + // is not *always* necessary, but is sometimes necessary. For + // example, 'concat(a, alt(b, c))' should be written as 'a(?:b|c)' + // and not 'ab|c'. The former is clearly the intended meaning, but + // the latter is actually 'alt(concat(a, b), c)'. + // + // It would be possible to only group these things in cases where + // it's strictly necessary, but it requires knowing the parent + // expression. And since this technique is simpler and always + // correct, we take this route. More to the point, it is a non-goal + // of an HIR printer to show a nice easy-to-read regex. Indeed, + // its construction forbids it from doing so. Therefore, inserting + // extra groups where they aren't necessary is perfectly okay. + HirKind::Concat(_) | HirKind::Alternation(_) => { + self.wtr.write_str(r"(?:")?; + } } Ok(()) } @@ -165,39 +259,42 @@ impl Visitor for Writer { HirKind::Empty | HirKind::Literal(_) | HirKind::Class(_) - | HirKind::Anchor(_) - | HirKind::WordBoundary(_) - | HirKind::Concat(_) - | HirKind::Alternation(_) => {} + | HirKind::Look(_) => {} HirKind::Repetition(ref x) => { - match x.kind { - hir::RepetitionKind::ZeroOrOne => { + match (x.min, x.max) { + (0, Some(1)) => { self.wtr.write_str("?")?; } - hir::RepetitionKind::ZeroOrMore => { + (0, None) => { self.wtr.write_str("*")?; } - hir::RepetitionKind::OneOrMore => { + (1, None) => { self.wtr.write_str("+")?; } - hir::RepetitionKind::Range(ref x) => match *x { - hir::RepetitionRange::Exactly(m) => { - write!(self.wtr, "{{{}}}", m)?; - } - hir::RepetitionRange::AtLeast(m) => { - write!(self.wtr, "{{{},}}", m)?; - } - hir::RepetitionRange::Bounded(m, n) => { - write!(self.wtr, "{{{},{}}}", m, n)?; - } - }, + (1, Some(1)) => { + // 'a{1}' and 'a{1}?' are exactly equivalent to 'a'. + return Ok(()); + } + (m, None) => { + write!(self.wtr, "{{{},}}", m)?; + } + (m, Some(n)) if m == n => { + write!(self.wtr, "{{{}}}", m)?; + // a{m} and a{m}? are always exactly equivalent. + return Ok(()); + } + (m, Some(n)) => { + write!(self.wtr, "{{{},{}}}", m, n)?; + } } if !x.greedy { self.wtr.write_str("?")?; } } - HirKind::Group(_) => { - self.wtr.write_str(")")?; + HirKind::Capture(_) + | HirKind::Concat(_) + | HirKind::Alternation(_) => { + self.wtr.write_str(r")")?; } } Ok(()) @@ -217,18 +314,16 @@ impl Writer { } fn write_literal_byte(&mut self, b: u8) -> fmt::Result { - let c = b as char; - if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() { - self.write_literal_char(c) + if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() { + self.write_literal_char(char::try_from(b).unwrap()) } else { write!(self.wtr, "(?-u:\\x{:02X})", b) } } fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result { - let c = b as char; - if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() { - self.write_literal_char(c) + if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() { + self.write_literal_char(char::try_from(b).unwrap()) } else { write!(self.wtr, "\\x{:02X}", b) } @@ -237,15 +332,21 @@ impl Writer { #[cfg(test)] mod tests { - use super::Printer; + use alloc::{ + boxed::Box, + string::{String, ToString}, + }; + use crate::ParserBuilder; + use super::*; + fn roundtrip(given: &str, expected: &str) { roundtrip_with(|b| b, given, expected); } fn roundtrip_bytes(given: &str, expected: &str) { - roundtrip_with(|b| b.allow_invalid_utf8(true), given, expected); + roundtrip_with(|b| b.utf8(false), given, expected); } fn roundtrip_with(mut f: F, given: &str, expected: &str) @@ -277,28 +378,35 @@ mod tests { #[test] fn print_class() { - roundtrip(r"[a]", r"[a]"); + roundtrip(r"[a]", r"a"); + roundtrip(r"[ab]", r"[ab]"); roundtrip(r"[a-z]", r"[a-z]"); roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]"); - roundtrip(r"[^\x01-\u{10FFFF}]", "[\u{0}]"); - roundtrip(r"[-]", r"[\-]"); + roundtrip(r"[^\x01-\u{10FFFF}]", "\u{0}"); + roundtrip(r"[-]", r"\-"); roundtrip(r"[☃-⛄]", r"[☃-⛄]"); - roundtrip(r"(?-u)[a]", r"(?-u:[a])"); + roundtrip(r"(?-u)[a]", r"a"); + roundtrip(r"(?-u)[ab]", r"(?-u:[ab])"); roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])"); roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])"); // The following test that the printer escapes meta characters // in character classes. - roundtrip(r"[\[]", r"[\[]"); + roundtrip(r"[\[]", r"\["); roundtrip(r"[Z-_]", r"[Z-_]"); roundtrip(r"[Z-_--Z]", r"[\[-_]"); // The following test that the printer escapes meta characters // in byte oriented character classes. - roundtrip_bytes(r"(?-u)[\[]", r"(?-u:[\[])"); + roundtrip_bytes(r"(?-u)[\[]", r"\["); roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])"); roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])"); + + // This tests that an empty character class is correctly roundtripped. + #[cfg(feature = "unicode-gencat")] + roundtrip(r"\P{any}", r"[a&&b]"); + roundtrip_bytes(r"(?-u)[^\x00-\xFF]", r"[a&&b]"); } #[test] @@ -331,37 +439,170 @@ mod tests { roundtrip("a+?", "a+?"); roundtrip("(?U)a+", "a+?"); - roundtrip("a{1}", "a{1}"); - roundtrip("a{1,}", "a{1,}"); + roundtrip("a{1}", "a"); + roundtrip("a{2}", "a{2}"); + roundtrip("a{1,}", "a+"); roundtrip("a{1,5}", "a{1,5}"); - roundtrip("a{1}?", "a{1}?"); - roundtrip("a{1,}?", "a{1,}?"); + roundtrip("a{1}?", "a"); + roundtrip("a{2}?", "a{2}"); + roundtrip("a{1,}?", "a+?"); roundtrip("a{1,5}?", "a{1,5}?"); - roundtrip("(?U)a{1}", "a{1}?"); - roundtrip("(?U)a{1,}", "a{1,}?"); + roundtrip("(?U)a{1}", "a"); + roundtrip("(?U)a{2}", "a{2}"); + roundtrip("(?U)a{1,}", "a+?"); roundtrip("(?U)a{1,5}", "a{1,5}?"); + + // Test that various zero-length repetitions always translate to an + // empty regex. This is more a property of HIR's smart constructors + // than the printer though. + roundtrip("a{0}", "(?:)"); + roundtrip("(?:ab){0}", "(?:)"); + #[cfg(feature = "unicode-gencat")] + { + roundtrip(r"\p{any}{0}", "(?:)"); + roundtrip(r"\P{any}{0}", "(?:)"); + } } #[test] fn print_group() { - roundtrip("()", "()"); - roundtrip("(?P)", "(?P)"); + roundtrip("()", "((?:))"); + roundtrip("(?P)", "(?P(?:))"); roundtrip("(?:)", "(?:)"); roundtrip("(a)", "(a)"); roundtrip("(?Pa)", "(?Pa)"); - roundtrip("(?:a)", "(?:a)"); + roundtrip("(?:a)", "a"); roundtrip("((((a))))", "((((a))))"); } #[test] fn print_alternation() { - roundtrip("|", "|"); - roundtrip("||", "||"); + roundtrip("|", "(?:(?:)|(?:))"); + roundtrip("||", "(?:(?:)|(?:)|(?:))"); + + roundtrip("a|b", "[ab]"); + roundtrip("ab|cd", "(?:(?:ab)|(?:cd))"); + roundtrip("a|b|c", "[a-c]"); + roundtrip("ab|cd|ef", "(?:(?:ab)|(?:cd)|(?:ef))"); + roundtrip("foo|bar|quux", "(?:(?:foo)|(?:bar)|(?:quux))"); + } - roundtrip("a|b", "a|b"); - roundtrip("a|b|c", "a|b|c"); - roundtrip("foo|bar|quux", "foo|bar|quux"); + // This is a regression test that stresses a peculiarity of how the HIR + // is both constructed and printed. Namely, it is legal for a repetition + // to directly contain a concatenation. This particular construct isn't + // really possible to build from the concrete syntax directly, since you'd + // be forced to put the concatenation into (at least) a non-capturing + // group. Concurrently, the printer doesn't consider this case and just + // kind of naively prints the child expression and tacks on the repetition + // operator. + // + // As a result, if you attached '+' to a 'concat(a, b)', the printer gives + // you 'ab+', but clearly it really should be '(?:ab)+'. + // + // This bug isn't easy to surface because most ways of building an HIR + // come directly from the concrete syntax, and as mentioned above, it just + // isn't possible to build this kind of HIR from the concrete syntax. + // Nevertheless, this is definitely a bug. + // + // See: https://github.com/rust-lang/regex/issues/731 + #[test] + fn regression_repetition_concat() { + let expr = Hir::concat(alloc::vec![ + Hir::literal("x".as_bytes()), + Hir::repetition(hir::Repetition { + min: 1, + max: None, + greedy: true, + sub: Box::new(Hir::literal("ab".as_bytes())), + }), + Hir::literal("y".as_bytes()), + ]); + assert_eq!(r"(?:x(?:ab)+y)", expr.to_string()); + + let expr = Hir::concat(alloc::vec![ + Hir::look(hir::Look::Start), + Hir::repetition(hir::Repetition { + min: 1, + max: None, + greedy: true, + sub: Box::new(Hir::concat(alloc::vec![ + Hir::look(hir::Look::Start), + Hir::look(hir::Look::End), + ])), + }), + Hir::look(hir::Look::End), + ]); + assert_eq!(r"(?:\A\A\z\z)", expr.to_string()); + } + + // Just like regression_repetition_concat, but with the repetition using + // an alternation as a child expression instead. + // + // See: https://github.com/rust-lang/regex/issues/731 + #[test] + fn regression_repetition_alternation() { + let expr = Hir::concat(alloc::vec![ + Hir::literal("ab".as_bytes()), + Hir::repetition(hir::Repetition { + min: 1, + max: None, + greedy: true, + sub: Box::new(Hir::alternation(alloc::vec![ + Hir::literal("cd".as_bytes()), + Hir::literal("ef".as_bytes()), + ])), + }), + Hir::literal("gh".as_bytes()), + ]); + assert_eq!(r"(?:(?:ab)(?:(?:cd)|(?:ef))+(?:gh))", expr.to_string()); + + let expr = Hir::concat(alloc::vec![ + Hir::look(hir::Look::Start), + Hir::repetition(hir::Repetition { + min: 1, + max: None, + greedy: true, + sub: Box::new(Hir::alternation(alloc::vec![ + Hir::look(hir::Look::Start), + Hir::look(hir::Look::End), + ])), + }), + Hir::look(hir::Look::End), + ]); + assert_eq!(r"(?:\A(?:\A|\z)\z)", expr.to_string()); + } + + // This regression test is very similar in flavor to + // regression_repetition_concat in that the root of the issue lies in a + // peculiarity of how the HIR is represented and how the printer writes it + // out. Like the other regression, this one is also rooted in the fact that + // you can't produce the peculiar HIR from the concrete syntax. Namely, you + // just can't have a 'concat(a, alt(b, c))' because the 'alt' will normally + // be in (at least) a non-capturing group. Why? Because the '|' has very + // low precedence (lower that concatenation), and so something like 'ab|c' + // is actually 'alt(ab, c)'. + // + // See: https://github.com/rust-lang/regex/issues/516 + #[test] + fn regression_alternation_concat() { + let expr = Hir::concat(alloc::vec![ + Hir::literal("ab".as_bytes()), + Hir::alternation(alloc::vec![ + Hir::literal("mn".as_bytes()), + Hir::literal("xy".as_bytes()), + ]), + ]); + assert_eq!(r"(?:(?:ab)(?:(?:mn)|(?:xy)))", expr.to_string()); + + let expr = Hir::concat(alloc::vec![ + Hir::look(hir::Look::Start), + Hir::alternation(alloc::vec![ + Hir::look(hir::Look::Start), + Hir::look(hir::Look::End), + ]), + ]); + assert_eq!(r"(?:\A(?:\A|\z))", expr.to_string()); } } diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs index 890e160..313a1e9 100644 --- a/vendor/regex-syntax/src/hir/translate.rs +++ b/vendor/regex-syntax/src/hir/translate.rs @@ -2,19 +2,24 @@ Defines a translator that converts an `Ast` to an `Hir`. */ -use std::cell::{Cell, RefCell}; -use std::result; +use core::cell::{Cell, RefCell}; -use crate::ast::{self, Ast, Span, Visitor}; -use crate::hir::{self, Error, ErrorKind, Hir}; -use crate::unicode::{self, ClassQuery}; +use alloc::{boxed::Box, string::ToString, vec, vec::Vec}; -type Result = result::Result; +use crate::{ + ast::{self, Ast, Span, Visitor}, + either::Either, + hir::{self, Error, ErrorKind, Hir, HirKind}, + unicode::{self, ClassQuery}, +}; + +type Result = core::result::Result; /// A builder for constructing an AST->HIR translator. #[derive(Clone, Debug)] pub struct TranslatorBuilder { - allow_invalid_utf8: bool, + utf8: bool, + line_terminator: u8, flags: Flags, } @@ -28,7 +33,8 @@ impl TranslatorBuilder { /// Create a new translator builder with a default c onfiguration. pub fn new() -> TranslatorBuilder { TranslatorBuilder { - allow_invalid_utf8: false, + utf8: true, + line_terminator: b'\n', flags: Flags::default(), } } @@ -38,23 +44,53 @@ impl TranslatorBuilder { Translator { stack: RefCell::new(vec![]), flags: Cell::new(self.flags), - allow_invalid_utf8: self.allow_invalid_utf8, + utf8: self.utf8, + line_terminator: self.line_terminator, } } - /// When enabled, translation will permit the construction of a regular + /// When disabled, translation will permit the construction of a regular /// expression that may match invalid UTF-8. /// - /// When disabled (the default), the translator is guaranteed to produce - /// an expression that will only ever match valid UTF-8 (otherwise, the - /// translator will return an error). + /// When enabled (the default), the translator is guaranteed to produce an + /// expression that, for non-empty matches, will only ever produce spans + /// that are entirely valid UTF-8 (otherwise, the translator will return an + /// error). + /// + /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even + /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete + /// syntax) will be allowed even though they can produce matches that split + /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty" + /// matches, and it is expected that the regex engine itself must handle + /// these cases if necessary (perhaps by suppressing any zero-width matches + /// that split a codepoint). + pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder { + self.utf8 = yes; + self + } + + /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. + /// + /// Namely, instead of `.` (by default) matching everything except for `\n`, + /// this will cause `.` to match everything except for the byte given. + /// + /// If `.` is used in a context where Unicode mode is enabled and this byte + /// isn't ASCII, then an error will be returned. When Unicode mode is + /// disabled, then any byte is permitted, but will return an error if UTF-8 + /// mode is enabled and it is a non-ASCII byte. + /// + /// In short, any ASCII value for a line terminator is always okay. But a + /// non-ASCII byte might result in an error depending on whether Unicode + /// mode or UTF-8 mode are enabled. /// - /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII - /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause - /// the parser to return an error. Namely, a negated ASCII word boundary - /// can result in matching positions that aren't valid UTF-8 boundaries. - pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder { - self.allow_invalid_utf8 = yes; + /// Note that if `R` mode is enabled then it always takes precedence and + /// the line terminator will be treated as `\r` and `\n` simultaneously. + /// + /// Note also that this *doesn't* impact the look-around assertions + /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional + /// configuration in the regex engine itself. + pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder { + self.line_terminator = byte; self } @@ -80,6 +116,12 @@ impl TranslatorBuilder { self } + /// Enable or disable the CRLF mode flag (`R`) by default. + pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder { + self.flags.crlf = if yes { Some(true) } else { None }; + self + } + /// Enable or disable the "swap greed" flag (`U`) by default. pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder { self.flags.swap_greed = if yes { Some(true) } else { None }; @@ -100,7 +142,7 @@ impl TranslatorBuilder { /// many abstract syntax trees. /// /// A `Translator` can be configured in more detail via a -/// [`TranslatorBuilder`](struct.TranslatorBuilder.html). +/// [`TranslatorBuilder`]. #[derive(Clone, Debug)] pub struct Translator { /// Our call stack, but on the heap. @@ -108,7 +150,9 @@ pub struct Translator { /// The current flag settings. flags: Cell, /// Whether we're allowed to produce HIR that can match arbitrary bytes. - allow_invalid_utf8: bool, + utf8: bool, + /// The line terminator to use for `.`. + line_terminator: u8, } impl Translator { @@ -143,6 +187,12 @@ enum HirFrame { /// case in the Ast. They get popped after an inductive (i.e., recursive) /// step is complete. Expr(Hir), + /// A literal that is being constructed, character by character, from the + /// AST. We need this because the AST gives each individual character its + /// own node. So as we see characters, we peek at the top-most HirFrame. + /// If it's a literal, then we add to it. Otherwise, we push a new literal. + /// When it comes time to pop it, we convert it to an Hir via Hir::literal. + Literal(Vec), /// A Unicode character class. This frame is mutated as we descend into /// the Ast of a character class (which is itself its own mini recursive /// structure). @@ -152,10 +202,17 @@ enum HirFrame { /// recursive structure). /// /// Byte character classes are created when Unicode mode (`u`) is disabled. - /// If `allow_invalid_utf8` is disabled (the default), then a byte - /// character is only permitted to match ASCII text. + /// If `utf8` is enabled (the default), then a byte character is only + /// permitted to match ASCII text. ClassBytes(hir::ClassBytes), - /// This is pushed on to the stack upon first seeing any kind of group, + /// This is pushed whenever a repetition is observed. After visiting every + /// sub-expression in the repetition, the translator's stack is expected to + /// have this sentinel at the top. + /// + /// This sentinel only exists to stop other things (like flattening + /// literals) from reaching across repetition operators. + Repetition, + /// This is pushed on to the stack upon first seeing any kind of capture, /// indicated by parentheses (including non-capturing groups). It is popped /// upon leaving a group. Group { @@ -181,6 +238,14 @@ enum HirFrame { /// every sub-expression in the alternation, the translator's stack is /// popped until it sees an Alternation frame. Alternation, + /// This is pushed immediately before each sub-expression in an + /// alternation. This separates the branches of an alternation on the + /// stack and prevents literal flattening from reaching across alternation + /// branches. + /// + /// It is popped after each expression in a branch until an 'Alternation' + /// frame is observed when doing a post visit on an alternation. + AlternationBranch, } impl HirFrame { @@ -188,6 +253,7 @@ impl HirFrame { fn unwrap_expr(self) -> Hir { match self { HirFrame::Expr(expr) => expr, + HirFrame::Literal(lit) => Hir::literal(lit), _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self), } } @@ -218,6 +284,20 @@ impl HirFrame { } } + /// Assert that the current stack frame is a repetition sentinel. If it + /// isn't, then panic. + fn unwrap_repetition(self) { + match self { + HirFrame::Repetition => {} + _ => { + panic!( + "tried to unwrap repetition from HirFrame, got: {:?}", + self + ) + } + } + } + /// Assert that the current stack frame is a group indicator and return /// its corresponding flags (the flags that were active at the time the /// group was entered). @@ -229,6 +309,20 @@ impl HirFrame { } } } + + /// Assert that the current stack frame is an alternation pipe sentinel. If + /// it isn't, then panic. + fn unwrap_alternation_pipe(self) { + match self { + HirFrame::AlternationBranch => {} + _ => { + panic!( + "tried to unwrap alt pipe from HirFrame, got: {:?}", + self + ) + } + } + } } impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { @@ -243,7 +337,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { fn visit_pre(&mut self, ast: &Ast) -> Result<()> { match *ast { - Ast::Class(ast::Class::Bracketed(_)) => { + Ast::ClassBracketed(_) => { if self.flags().unicode() { let cls = hir::ClassUnicode::empty(); self.push(HirFrame::ClassUnicode(cls)); @@ -252,6 +346,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.push(HirFrame::ClassBytes(cls)); } } + Ast::Repetition(_) => self.push(HirFrame::Repetition), Ast::Group(ref x) => { let old_flags = x .flags() @@ -259,13 +354,14 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { .unwrap_or_else(|| self.flags()); self.push(HirFrame::Group { old_flags }); } - Ast::Concat(ref x) if x.asts.is_empty() => {} Ast::Concat(_) => { self.push(HirFrame::Concat); } - Ast::Alternation(ref x) if x.asts.is_empty() => {} - Ast::Alternation(_) => { + Ast::Alternation(ref x) => { self.push(HirFrame::Alternation); + if !x.asts.is_empty() { + self.push(HirFrame::AlternationBranch); + } } _ => {} } @@ -290,31 +386,35 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { // consistency sake. self.push(HirFrame::Expr(Hir::empty())); } - Ast::Literal(ref x) => { - self.push(HirFrame::Expr(self.hir_literal(x)?)); - } - Ast::Dot(span) => { - self.push(HirFrame::Expr(self.hir_dot(span)?)); + Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? { + Either::Right(byte) => self.push_byte(byte), + Either::Left(ch) => match self.case_fold_char(x.span, ch)? { + None => self.push_char(ch), + Some(expr) => self.push(HirFrame::Expr(expr)), + }, + }, + Ast::Dot(ref span) => { + self.push(HirFrame::Expr(self.hir_dot(**span)?)); } Ast::Assertion(ref x) => { self.push(HirFrame::Expr(self.hir_assertion(x)?)); } - Ast::Class(ast::Class::Perl(ref x)) => { + Ast::ClassPerl(ref x) => { if self.flags().unicode() { let cls = self.hir_perl_unicode_class(x)?; let hcls = hir::Class::Unicode(cls); self.push(HirFrame::Expr(Hir::class(hcls))); } else { - let cls = self.hir_perl_byte_class(x); + let cls = self.hir_perl_byte_class(x)?; let hcls = hir::Class::Bytes(cls); self.push(HirFrame::Expr(Hir::class(hcls))); } } - Ast::Class(ast::Class::Unicode(ref x)) => { + Ast::ClassUnicode(ref x) => { let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); self.push(HirFrame::Expr(Hir::class(cls))); } - Ast::Class(ast::Class::Bracketed(ref ast)) => { + Ast::ClassBracketed(ref ast) => { if self.flags().unicode() { let mut cls = self.pop().unwrap().unwrap_class_unicode(); self.unicode_fold_and_negate( @@ -322,12 +422,6 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { ast.negated, &mut cls, )?; - if cls.ranges().is_empty() { - return Err(self.error( - ast.span, - ErrorKind::EmptyClassNotAllowed, - )); - } let expr = Hir::class(hir::Class::Unicode(cls)); self.push(HirFrame::Expr(expr)); } else { @@ -337,31 +431,25 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { ast.negated, &mut cls, )?; - if cls.ranges().is_empty() { - return Err(self.error( - ast.span, - ErrorKind::EmptyClassNotAllowed, - )); - } - let expr = Hir::class(hir::Class::Bytes(cls)); self.push(HirFrame::Expr(expr)); } } Ast::Repetition(ref x) => { let expr = self.pop().unwrap().unwrap_expr(); + self.pop().unwrap().unwrap_repetition(); self.push(HirFrame::Expr(self.hir_repetition(x, expr))); } Ast::Group(ref x) => { let expr = self.pop().unwrap().unwrap_expr(); let old_flags = self.pop().unwrap().unwrap_group(); self.trans().flags.set(old_flags); - self.push(HirFrame::Expr(self.hir_group(x, expr))); + self.push(HirFrame::Expr(self.hir_capture(x, expr))); } Ast::Concat(_) => { let mut exprs = vec![]; - while let Some(HirFrame::Expr(expr)) = self.pop() { - if !expr.kind().is_empty() { + while let Some(expr) = self.pop_concat_expr() { + if !matches!(*expr.kind(), HirKind::Empty) { exprs.push(expr); } } @@ -370,7 +458,8 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { } Ast::Alternation(_) => { let mut exprs = vec![]; - while let Some(HirFrame::Expr(expr)) = self.pop() { + while let Some(expr) = self.pop_alt_expr() { + self.pop().unwrap().unwrap_alternation_pipe(); exprs.push(expr); } exprs.reverse(); @@ -380,6 +469,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { Ok(()) } + fn visit_alternation_in(&mut self) -> Result<()> { + self.push(HirFrame::AlternationBranch); + Ok(()) + } + fn visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, @@ -458,7 +552,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { cls.union(&xcls); self.push(HirFrame::ClassUnicode(cls)); } else { - let xcls = self.hir_perl_byte_class(x); + let xcls = self.hir_perl_byte_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_bytes(); cls.union(&xcls); self.push(HirFrame::ClassBytes(cls)); @@ -602,11 +696,103 @@ impl<'t, 'p> TranslatorI<'t, 'p> { self.trans().stack.borrow_mut().push(frame); } + /// Push the given literal char on to the call stack. + /// + /// If the top-most element of the stack is a literal, then the char + /// is appended to the end of that literal. Otherwise, a new literal + /// containing just the given char is pushed to the top of the stack. + fn push_char(&self, ch: char) { + let mut buf = [0; 4]; + let bytes = ch.encode_utf8(&mut buf).as_bytes(); + let mut stack = self.trans().stack.borrow_mut(); + if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() { + literal.extend_from_slice(bytes); + } else { + stack.push(HirFrame::Literal(bytes.to_vec())); + } + } + + /// Push the given literal byte on to the call stack. + /// + /// If the top-most element of the stack is a literal, then the byte + /// is appended to the end of that literal. Otherwise, a new literal + /// containing just the given byte is pushed to the top of the stack. + fn push_byte(&self, byte: u8) { + let mut stack = self.trans().stack.borrow_mut(); + if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() { + literal.push(byte); + } else { + stack.push(HirFrame::Literal(vec![byte])); + } + } + /// Pop the top of the call stack. If the call stack is empty, return None. fn pop(&self) -> Option { self.trans().stack.borrow_mut().pop() } + /// Pop an HIR expression from the top of the stack for a concatenation. + /// + /// This returns None if the stack is empty or when a concat frame is seen. + /// Otherwise, it panics if it could not find an HIR expression. + fn pop_concat_expr(&self) -> Option { + let frame = self.pop()?; + match frame { + HirFrame::Concat => None, + HirFrame::Expr(expr) => Some(expr), + HirFrame::Literal(lit) => Some(Hir::literal(lit)), + HirFrame::ClassUnicode(_) => { + unreachable!("expected expr or concat, got Unicode class") + } + HirFrame::ClassBytes(_) => { + unreachable!("expected expr or concat, got byte class") + } + HirFrame::Repetition => { + unreachable!("expected expr or concat, got repetition") + } + HirFrame::Group { .. } => { + unreachable!("expected expr or concat, got group") + } + HirFrame::Alternation => { + unreachable!("expected expr or concat, got alt marker") + } + HirFrame::AlternationBranch => { + unreachable!("expected expr or concat, got alt branch marker") + } + } + } + + /// Pop an HIR expression from the top of the stack for an alternation. + /// + /// This returns None if the stack is empty or when an alternation frame is + /// seen. Otherwise, it panics if it could not find an HIR expression. + fn pop_alt_expr(&self) -> Option { + let frame = self.pop()?; + match frame { + HirFrame::Alternation => None, + HirFrame::Expr(expr) => Some(expr), + HirFrame::Literal(lit) => Some(Hir::literal(lit)), + HirFrame::ClassUnicode(_) => { + unreachable!("expected expr or alt, got Unicode class") + } + HirFrame::ClassBytes(_) => { + unreachable!("expected expr or alt, got byte class") + } + HirFrame::Repetition => { + unreachable!("expected expr or alt, got repetition") + } + HirFrame::Group { .. } => { + unreachable!("expected expr or alt, got group") + } + HirFrame::Concat => { + unreachable!("expected expr or alt, got concat marker") + } + HirFrame::AlternationBranch => { + unreachable!("expected expr or alt, got alt branch marker") + } + } + } + /// Create a new error with the given span and error type. fn error(&self, span: Span, kind: ErrorKind) -> Error { Error { kind, pattern: self.pattern.to_string(), span } @@ -627,63 +813,48 @@ impl<'t, 'p> TranslatorI<'t, 'p> { old_flags } - fn hir_literal(&self, lit: &ast::Literal) -> Result { - let ch = match self.literal_to_char(lit)? { - byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)), - hir::Literal::Unicode(ch) => ch, - }; - if self.flags().case_insensitive() { - self.hir_from_char_case_insensitive(lit.span, ch) - } else { - self.hir_from_char(lit.span, ch) - } - } - /// Convert an Ast literal to its scalar representation. /// /// When Unicode mode is enabled, then this always succeeds and returns a /// `char` (Unicode scalar value). /// - /// When Unicode mode is disabled, then a raw byte is returned. If that - /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns - /// an error. - fn literal_to_char(&self, lit: &ast::Literal) -> Result { + /// When Unicode mode is disabled, then a `char` will still be returned + /// whenever possible. A byte is returned only when invalid UTF-8 is + /// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte + /// will result in an error when invalid UTF-8 is not allowed. + fn ast_literal_to_scalar( + &self, + lit: &ast::Literal, + ) -> Result> { if self.flags().unicode() { - return Ok(hir::Literal::Unicode(lit.c)); + return Ok(Either::Left(lit.c)); } let byte = match lit.byte() { - None => return Ok(hir::Literal::Unicode(lit.c)), + None => return Ok(Either::Left(lit.c)), Some(byte) => byte, }; if byte <= 0x7F { - return Ok(hir::Literal::Unicode(byte as char)); + return Ok(Either::Left(char::try_from(byte).unwrap())); } - if !self.trans().allow_invalid_utf8 { + if self.trans().utf8 { return Err(self.error(lit.span, ErrorKind::InvalidUtf8)); } - Ok(hir::Literal::Byte(byte)) + Ok(Either::Right(byte)) } - fn hir_from_char(&self, span: Span, c: char) -> Result { - if !self.flags().unicode() && c.len_utf8() > 1 { - return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); + fn case_fold_char(&self, span: Span, c: char) -> Result> { + if !self.flags().case_insensitive() { + return Ok(None); } - Ok(Hir::literal(hir::Literal::Unicode(c))) - } - - fn hir_from_char_case_insensitive( - &self, - span: Span, - c: char, - ) -> Result { if self.flags().unicode() { // If case folding won't do anything, then don't bother trying. - let map = - unicode::contains_simple_case_mapping(c, c).map_err(|_| { + let map = unicode::SimpleCaseFolder::new() + .map(|f| f.overlaps(c, c)) + .map_err(|_| { self.error(span, ErrorKind::UnicodeCaseUnavailable) })?; if !map { - return self.hir_from_char(span, c); + return Ok(None); } let mut cls = hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new( @@ -692,117 +863,166 @@ impl<'t, 'p> TranslatorI<'t, 'p> { cls.try_case_fold_simple().map_err(|_| { self.error(span, ErrorKind::UnicodeCaseUnavailable) })?; - Ok(Hir::class(hir::Class::Unicode(cls))) + Ok(Some(Hir::class(hir::Class::Unicode(cls)))) } else { - if c.len_utf8() > 1 { - return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); + if !c.is_ascii() { + return Ok(None); } // If case folding won't do anything, then don't bother trying. match c { 'A'..='Z' | 'a'..='z' => {} - _ => return self.hir_from_char(span, c), + _ => return Ok(None), } let mut cls = hir::ClassBytes::new(vec![hir::ClassBytesRange::new( - c as u8, c as u8, + // OK because 'c.len_utf8() == 1' which in turn implies + // that 'c' is ASCII. + u8::try_from(c).unwrap(), + u8::try_from(c).unwrap(), )]); cls.case_fold_simple(); - Ok(Hir::class(hir::Class::Bytes(cls))) + Ok(Some(Hir::class(hir::Class::Bytes(cls)))) } } fn hir_dot(&self, span: Span) -> Result { - let unicode = self.flags().unicode(); - if !unicode && !self.trans().allow_invalid_utf8 { + let (utf8, lineterm, flags) = + (self.trans().utf8, self.trans().line_terminator, self.flags()); + if utf8 && (!flags.unicode() || !lineterm.is_ascii()) { return Err(self.error(span, ErrorKind::InvalidUtf8)); } - Ok(if self.flags().dot_matches_new_line() { - Hir::any(!unicode) + let dot = if flags.dot_matches_new_line() { + if flags.unicode() { + hir::Dot::AnyChar + } else { + hir::Dot::AnyByte + } } else { - Hir::dot(!unicode) - }) + if flags.unicode() { + if flags.crlf() { + hir::Dot::AnyCharExceptCRLF + } else { + if !lineterm.is_ascii() { + return Err( + self.error(span, ErrorKind::InvalidLineTerminator) + ); + } + hir::Dot::AnyCharExcept(char::from(lineterm)) + } + } else { + if flags.crlf() { + hir::Dot::AnyByteExceptCRLF + } else { + hir::Dot::AnyByteExcept(lineterm) + } + } + }; + Ok(Hir::dot(dot)) } fn hir_assertion(&self, asst: &ast::Assertion) -> Result { let unicode = self.flags().unicode(); let multi_line = self.flags().multi_line(); + let crlf = self.flags().crlf(); Ok(match asst.kind { - ast::AssertionKind::StartLine => Hir::anchor(if multi_line { - hir::Anchor::StartLine + ast::AssertionKind::StartLine => Hir::look(if multi_line { + if crlf { + hir::Look::StartCRLF + } else { + hir::Look::StartLF + } + } else { + hir::Look::Start + }), + ast::AssertionKind::EndLine => Hir::look(if multi_line { + if crlf { + hir::Look::EndCRLF + } else { + hir::Look::EndLF + } + } else { + hir::Look::End + }), + ast::AssertionKind::StartText => Hir::look(hir::Look::Start), + ast::AssertionKind::EndText => Hir::look(hir::Look::End), + ast::AssertionKind::WordBoundary => Hir::look(if unicode { + hir::Look::WordUnicode } else { - hir::Anchor::StartText + hir::Look::WordAscii }), - ast::AssertionKind::EndLine => Hir::anchor(if multi_line { - hir::Anchor::EndLine + ast::AssertionKind::NotWordBoundary => Hir::look(if unicode { + hir::Look::WordUnicodeNegate } else { - hir::Anchor::EndText + hir::Look::WordAsciiNegate }), - ast::AssertionKind::StartText => { - Hir::anchor(hir::Anchor::StartText) + ast::AssertionKind::WordBoundaryStart + | ast::AssertionKind::WordBoundaryStartAngle => { + Hir::look(if unicode { + hir::Look::WordStartUnicode + } else { + hir::Look::WordStartAscii + }) } - ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText), - ast::AssertionKind::WordBoundary => { - Hir::word_boundary(if unicode { - hir::WordBoundary::Unicode + ast::AssertionKind::WordBoundaryEnd + | ast::AssertionKind::WordBoundaryEndAngle => { + Hir::look(if unicode { + hir::Look::WordEndUnicode } else { - hir::WordBoundary::Ascii + hir::Look::WordEndAscii }) } - ast::AssertionKind::NotWordBoundary => { - Hir::word_boundary(if unicode { - hir::WordBoundary::UnicodeNegate + ast::AssertionKind::WordBoundaryStartHalf => { + Hir::look(if unicode { + hir::Look::WordStartHalfUnicode } else { - // It is possible for negated ASCII word boundaries to - // match at invalid UTF-8 boundaries, even when searching - // valid UTF-8. - if !self.trans().allow_invalid_utf8 { - return Err( - self.error(asst.span, ErrorKind::InvalidUtf8) - ); - } - hir::WordBoundary::AsciiNegate + hir::Look::WordStartHalfAscii }) } + ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode { + hir::Look::WordEndHalfUnicode + } else { + hir::Look::WordEndHalfAscii + }), }) } - fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { - let kind = match group.kind { - ast::GroupKind::CaptureIndex(idx) => { - hir::GroupKind::CaptureIndex(idx) + fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir { + let (index, name) = match group.kind { + ast::GroupKind::CaptureIndex(index) => (index, None), + ast::GroupKind::CaptureName { ref name, .. } => { + (name.index, Some(name.name.clone().into_boxed_str())) } - ast::GroupKind::CaptureName(ref capname) => { - hir::GroupKind::CaptureName { - name: capname.name.clone(), - index: capname.index, - } - } - ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, + // The HIR doesn't need to use non-capturing groups, since the way + // in which the data type is defined handles this automatically. + ast::GroupKind::NonCapturing(_) => return expr, }; - Hir::group(hir::Group { kind, hir: Box::new(expr) }) + Hir::capture(hir::Capture { index, name, sub: Box::new(expr) }) } fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { - let kind = match rep.op.kind { - ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne, - ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore, - ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore, + let (min, max) = match rep.op.kind { + ast::RepetitionKind::ZeroOrOne => (0, Some(1)), + ast::RepetitionKind::ZeroOrMore => (0, None), + ast::RepetitionKind::OneOrMore => (1, None), ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => { - hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m)) + (m, Some(m)) } ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => { - hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m)) + (m, None) } ast::RepetitionKind::Range(ast::RepetitionRange::Bounded( m, n, - )) => { - hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n)) - } + )) => (m, Some(n)), }; let greedy = if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; - Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) }) + Hir::repetition(hir::Repetition { + min, + max, + greedy, + sub: Box::new(expr), + }) } fn hir_unicode_class( @@ -834,11 +1054,6 @@ impl<'t, 'p> TranslatorI<'t, 'p> { ast_class.negated, class, )?; - if class.ranges().is_empty() { - let err = self - .error(ast_class.span, ErrorKind::EmptyClassNotAllowed); - return Err(err); - } } result } @@ -848,9 +1063,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> { ast: &ast::ClassAscii, ) -> Result { let mut cls = hir::ClassUnicode::new( - ascii_class(&ast.kind) - .iter() - .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)), + ascii_class_as_chars(&ast.kind) + .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)), ); self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; Ok(cls) @@ -862,8 +1076,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { ) -> Result { let mut cls = hir::ClassBytes::new( ascii_class(&ast.kind) - .iter() - .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)), + .map(|(s, e)| hir::ClassBytesRange::new(s, e)), ); self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; Ok(cls) @@ -894,7 +1107,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { fn hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, - ) -> hir::ClassBytes { + ) -> Result { use crate::ast::ClassPerlKind::*; assert!(!self.flags().unicode()); @@ -908,7 +1121,13 @@ impl<'t, 'p> TranslatorI<'t, 'p> { if ast_class.negated { class.negate(); } - class + // Negating a Perl byte class is likely to cause it to match invalid + // UTF-8. That's only OK if the translator is configured to allow such + // things. + if self.trans().utf8 && !class.is_ascii() { + return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8)); + } + Ok(class) } /// Converts the given Unicode specific error to an HIR translation error. @@ -918,7 +1137,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { fn convert_unicode_class_error( &self, span: &Span, - result: unicode::Result, + result: core::result::Result, ) -> Result { result.map_err(|err| { let sp = span.clone(); @@ -943,7 +1162,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { class: &mut hir::ClassUnicode, ) -> Result<()> { // Note that we must apply case folding before negation! - // Consider `(?i)[^x]`. If we applied negation field, then + // Consider `(?i)[^x]`. If we applied negation first, then // the result would be the character class that matched any // Unicode scalar value. if self.flags().case_insensitive() { @@ -973,7 +1192,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { if negated { class.negate(); } - if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() { + if self.trans().utf8 && !class.is_ascii() { return Err(self.error(span.clone(), ErrorKind::InvalidUtf8)); } Ok(()) @@ -982,11 +1201,11 @@ impl<'t, 'p> TranslatorI<'t, 'p> { /// Return a scalar byte value suitable for use as a literal in a byte /// character class. fn class_literal_byte(&self, ast: &ast::Literal) -> Result { - match self.literal_to_char(ast)? { - hir::Literal::Byte(byte) => Ok(byte), - hir::Literal::Unicode(ch) => { - if ch <= 0x7F as char { - Ok(ch as u8) + match self.ast_literal_to_scalar(ast)? { + Either::Right(byte) => Ok(byte), + Either::Left(ch) => { + if ch.is_ascii() { + Ok(u8::try_from(ch).unwrap()) } else { // We can't feasibly support Unicode in // byte oriented classes. Byte classes don't @@ -1010,6 +1229,7 @@ struct Flags { dot_matches_new_line: Option, swap_greed: Option, unicode: Option, + crlf: Option, // Note that `ignore_whitespace` is omitted here because it is handled // entirely in the parser. } @@ -1038,6 +1258,9 @@ impl Flags { ast::FlagsItemKind::Flag(ast::Flag::Unicode) => { flags.unicode = Some(enable); } + ast::FlagsItemKind::Flag(ast::Flag::CRLF) => { + flags.crlf = Some(enable); + } ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {} } } @@ -1060,6 +1283,9 @@ impl Flags { if self.unicode.is_none() { self.unicode = previous.unicode; } + if self.crlf.is_none() { + self.crlf = previous.crlf; + } } fn case_insensitive(&self) -> bool { @@ -1081,52 +1307,63 @@ impl Flags { fn unicode(&self) -> bool { self.unicode.unwrap_or(true) } + + fn crlf(&self) -> bool { + self.crlf.unwrap_or(false) + } } fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { let ranges: Vec<_> = ascii_class(kind) - .iter() - .cloned() - .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)) + .map(|(s, e)| hir::ClassBytesRange::new(s, e)) .collect(); hir::ClassBytes::new(ranges) } -fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { +fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator { use crate::ast::ClassAsciiKind::*; - match *kind { - Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')], - Alpha => &[('A', 'Z'), ('a', 'z')], - Ascii => &[('\x00', '\x7F')], - Blank => &[('\t', '\t'), (' ', ' ')], - Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')], - Digit => &[('0', '9')], - Graph => &[('!', '~')], - Lower => &[('a', 'z')], - Print => &[(' ', '~')], - Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')], + + let slice: &'static [(u8, u8)] = match *kind { + Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')], + Alpha => &[(b'A', b'Z'), (b'a', b'z')], + Ascii => &[(b'\x00', b'\x7F')], + Blank => &[(b'\t', b'\t'), (b' ', b' ')], + Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')], + Digit => &[(b'0', b'9')], + Graph => &[(b'!', b'~')], + Lower => &[(b'a', b'z')], + Print => &[(b' ', b'~')], + Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')], Space => &[ - ('\t', '\t'), - ('\n', '\n'), - ('\x0B', '\x0B'), - ('\x0C', '\x0C'), - ('\r', '\r'), - (' ', ' '), + (b'\t', b'\t'), + (b'\n', b'\n'), + (b'\x0B', b'\x0B'), + (b'\x0C', b'\x0C'), + (b'\r', b'\r'), + (b' ', b' '), ], - Upper => &[('A', 'Z')], - Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')], - Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')], - } + Upper => &[(b'A', b'Z')], + Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')], + Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')], + }; + slice.iter().copied() +} + +fn ascii_class_as_chars( + kind: &ast::ClassAsciiKind, +) -> impl Iterator { + ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e))) } #[cfg(test)] mod tests { - use crate::ast::parse::ParserBuilder; - use crate::ast::{self, Ast, Position, Span}; - use crate::hir::{self, Hir, HirKind}; - use crate::unicode::{self, ClassQuery}; + use crate::{ + ast::{self, parse::ParserBuilder, Ast, Position, Span}, + hir::{self, Hir, HirKind, Look, Properties}, + unicode::{self, ClassQuery}, + }; - use super::{ascii_class, TranslatorBuilder}; + use super::*; // We create these errors to compare with real hir::Errors in the tests. // We define equality between TestError and hir::Error to disregard the @@ -1155,7 +1392,7 @@ mod tests { fn t(pattern: &str) -> Hir { TranslatorBuilder::new() - .allow_invalid_utf8(false) + .utf8(true) .build() .translate(pattern, &parse(pattern)) .unwrap() @@ -1163,7 +1400,7 @@ mod tests { fn t_err(pattern: &str) -> hir::Error { TranslatorBuilder::new() - .allow_invalid_utf8(false) + .utf8(true) .build() .translate(pattern, &parse(pattern)) .unwrap_err() @@ -1171,95 +1408,73 @@ mod tests { fn t_bytes(pattern: &str) -> Hir { TranslatorBuilder::new() - .allow_invalid_utf8(true) + .utf8(false) .build() .translate(pattern, &parse(pattern)) .unwrap() } - fn hir_lit(s: &str) -> Hir { - match s.len() { - 0 => Hir::empty(), - _ => { - let lits = s - .chars() - .map(hir::Literal::Unicode) - .map(Hir::literal) - .collect(); - Hir::concat(lits) - } - } + fn props(pattern: &str) -> Properties { + t(pattern).properties().clone() } - fn hir_blit(s: &[u8]) -> Hir { - match s.len() { - 0 => Hir::empty(), - 1 => Hir::literal(hir::Literal::Byte(s[0])), - _ => { - let lits = s - .iter() - .cloned() - .map(hir::Literal::Byte) - .map(Hir::literal) - .collect(); - Hir::concat(lits) - } - } + fn props_bytes(pattern: &str) -> Properties { + t_bytes(pattern).properties().clone() } - fn hir_group(i: u32, expr: Hir) -> Hir { - Hir::group(hir::Group { - kind: hir::GroupKind::CaptureIndex(i), - hir: Box::new(expr), - }) + fn hir_lit(s: &str) -> Hir { + hir_blit(s.as_bytes()) } - fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir { - Hir::group(hir::Group { - kind: hir::GroupKind::CaptureName { - name: name.to_string(), - index: i, - }, - hir: Box::new(expr), - }) + fn hir_blit(s: &[u8]) -> Hir { + Hir::literal(s) + } + + fn hir_capture(index: u32, expr: Hir) -> Hir { + Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) }) } - fn hir_group_nocap(expr: Hir) -> Hir { - Hir::group(hir::Group { - kind: hir::GroupKind::NonCapturing, - hir: Box::new(expr), + fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir { + Hir::capture(hir::Capture { + index, + name: Some(name.into()), + sub: Box::new(expr), }) } fn hir_quest(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrOne, + min: 0, + max: Some(1), greedy, - hir: Box::new(expr), + sub: Box::new(expr), }) } fn hir_star(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, + min: 0, + max: None, greedy, - hir: Box::new(expr), + sub: Box::new(expr), }) } fn hir_plus(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::OneOrMore, + min: 1, + max: None, greedy, - hir: Box::new(expr), + sub: Box::new(expr), }) } - fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { + fn hir_range(greedy: bool, min: u32, max: Option, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::Range(range), + min, + max, greedy, - hir: Box::new(expr), + sub: Box::new(expr), }) } @@ -1281,32 +1496,25 @@ mod tests { Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap())) } - fn hir_uclass(ranges: &[(char, char)]) -> Hir { - let ranges: Vec = ranges - .iter() - .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) - .collect(); - Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges))) + fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir { + Hir::class(hir::Class::Unicode(hir::ClassUnicode::new( + ascii_class_as_chars(kind) + .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)), + ))) } - fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { - let ranges: Vec = ranges - .iter() - .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) - .collect(); - Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) + fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir { + Hir::class(hir::Class::Bytes(hir::ClassBytes::new( + ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)), + ))) } - fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir { - let ranges: Vec = ranges - .iter() - .map(|&(s, e)| { - assert!(s as u32 <= 0x7F); - assert!(e as u32 <= 0x7F); - hir::ClassBytesRange::new(s as u8, e as u8) - }) - .collect(); - Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) + fn hir_uclass(ranges: &[(char, char)]) -> Hir { + Hir::class(uclass(ranges)) + } + + fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { + Hir::class(bclass(ranges)) } fn hir_case_fold(expr: Hir) -> Hir { @@ -1329,6 +1537,33 @@ mod tests { } } + fn uclass(ranges: &[(char, char)]) -> hir::Class { + let ranges: Vec = ranges + .iter() + .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) + .collect(); + hir::Class::Unicode(hir::ClassUnicode::new(ranges)) + } + + fn bclass(ranges: &[(u8, u8)]) -> hir::Class { + let ranges: Vec = ranges + .iter() + .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) + .collect(); + hir::Class::Bytes(hir::ClassBytes::new(ranges)) + } + + #[cfg(feature = "unicode-case")] + fn class_case_fold(mut cls: hir::Class) -> Hir { + cls.case_fold_simple(); + Hir::class(cls) + } + + fn class_negate(mut cls: hir::Class) -> Hir { + cls.negate(); + Hir::class(cls) + } + #[allow(dead_code)] fn hir_union(expr1: Hir, expr2: Hir) -> Hir { use crate::hir::Class::{Bytes, Unicode}; @@ -1363,47 +1598,43 @@ mod tests { } } - fn hir_anchor(anchor: hir::Anchor) -> Hir { - Hir::anchor(anchor) - } - - fn hir_word(wb: hir::WordBoundary) -> Hir { - Hir::word_boundary(wb) + fn hir_look(look: hir::Look) -> Hir { + Hir::look(look) } #[test] fn empty() { assert_eq!(t(""), Hir::empty()); assert_eq!(t("(?i)"), Hir::empty()); - assert_eq!(t("()"), hir_group(1, Hir::empty())); - assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); - assert_eq!(t("(?P)"), hir_group_name(1, "wat", Hir::empty())); + assert_eq!(t("()"), hir_capture(1, Hir::empty())); + assert_eq!(t("(?:)"), Hir::empty()); + assert_eq!(t("(?P)"), hir_capture_name(1, "wat", Hir::empty())); assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()])); assert_eq!( t("()|()"), hir_alt(vec![ - hir_group(1, Hir::empty()), - hir_group(2, Hir::empty()), + hir_capture(1, Hir::empty()), + hir_capture(2, Hir::empty()), ]) ); assert_eq!( t("(|b)"), - hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) + hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) ); assert_eq!( t("(a|)"), - hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) + hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) ); assert_eq!( t("(a||c)"), - hir_group( + hir_capture( 1, hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),]) ) ); assert_eq!( t("(||)"), - hir_group( + hir_capture( 1, hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),]) ) @@ -1422,16 +1653,7 @@ mod tests { assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a")); assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF")); - assert_eq!( - t_err("(?-u)☃"), - TestError { - kind: hir::ErrorKind::UnicodeNotAllowed, - span: Span::new( - Position::new(5, 1, 6), - Position::new(8, 1, 7) - ), - } - ); + assert_eq!(t("(?-u)☃"), hir_lit("☃")); assert_eq!( t_err(r"(?-u)\xFF"), TestError { @@ -1449,10 +1671,7 @@ mod tests { #[cfg(feature = "unicode-case")] assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),])); #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i:a)"), - hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],)) - ); + assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')])); #[cfg(feature = "unicode-case")] assert_eq!( t("a(?i)a(?-i)a"), @@ -1512,30 +1731,39 @@ mod tests { ); assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF")); - assert_eq!( - t_err("(?i-u)β"), - TestError { - kind: hir::ErrorKind::UnicodeNotAllowed, - span: Span::new( - Position::new(6, 1, 7), - Position::new(8, 1, 8), - ), - } - ); + assert_eq!(t("(?i-u)β"), hir_lit("β"),); } #[test] fn dot() { assert_eq!( t("."), - hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),]) + hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')]) + ); + assert_eq!( + t("(?R)."), + hir_uclass(&[ + ('\0', '\t'), + ('\x0B', '\x0C'), + ('\x0E', '\u{10FFFF}'), + ]) ); - assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),])); + assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')])); + assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')])); assert_eq!( t_bytes("(?-u)."), - hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),]) + hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')]) + ); + assert_eq!( + t_bytes("(?R-u)."), + hir_bclass(&[ + (b'\0', b'\t'), + (b'\x0B', b'\x0C'), + (b'\x0E', b'\xFF'), + ]) ); assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),])); + assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),])); // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed. assert_eq!( @@ -1549,7 +1777,7 @@ mod tests { } ); assert_eq!( - t_err("(?s-u)."), + t_err("(?R-u)."), TestError { kind: hir::ErrorKind::InvalidUtf8, span: Span::new( @@ -1558,94 +1786,123 @@ mod tests { ), } ); - } - - #[test] - fn assertions() { - assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText)); - assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText)); - assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText)); - assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText)); - assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine)); - assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine)); - assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText)); - assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText)); - - assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode)); - assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate)); - assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii)); assert_eq!( - t_bytes(r"(?-u)\B"), - hir_word(hir::WordBoundary::AsciiNegate) + t_err("(?s-u)."), + TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new( + Position::new(6, 1, 7), + Position::new(7, 1, 8) + ), + } ); - assert_eq!( - t_err(r"(?-u)\B"), + t_err("(?Rs-u)."), TestError { kind: hir::ErrorKind::InvalidUtf8, span: Span::new( - Position::new(5, 1, 6), - Position::new(7, 1, 8) + Position::new(7, 1, 8), + Position::new(8, 1, 9) ), } ); } + #[test] + fn assertions() { + assert_eq!(t("^"), hir_look(hir::Look::Start)); + assert_eq!(t("$"), hir_look(hir::Look::End)); + assert_eq!(t(r"\A"), hir_look(hir::Look::Start)); + assert_eq!(t(r"\z"), hir_look(hir::Look::End)); + assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF)); + assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF)); + assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start)); + assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End)); + + assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode)); + assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate)); + assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii)); + assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate)); + } + #[test] fn group() { - assert_eq!(t("(a)"), hir_group(1, hir_lit("a"))); + assert_eq!(t("(a)"), hir_capture(1, hir_lit("a"))); assert_eq!( t("(a)(b)"), hir_cat(vec![ - hir_group(1, hir_lit("a")), - hir_group(2, hir_lit("b")), + hir_capture(1, hir_lit("a")), + hir_capture(2, hir_lit("b")), ]) ); assert_eq!( t("(a)|(b)"), hir_alt(vec![ - hir_group(1, hir_lit("a")), - hir_group(2, hir_lit("b")), + hir_capture(1, hir_lit("a")), + hir_capture(2, hir_lit("b")), ]) ); - assert_eq!(t("(?P)"), hir_group_name(1, "foo", Hir::empty())); - assert_eq!(t("(?Pa)"), hir_group_name(1, "foo", hir_lit("a"))); + assert_eq!(t("(?P)"), hir_capture_name(1, "foo", Hir::empty())); + assert_eq!(t("(?Pa)"), hir_capture_name(1, "foo", hir_lit("a"))); assert_eq!( t("(?Pa)(?Pb)"), hir_cat(vec![ - hir_group_name(1, "foo", hir_lit("a")), - hir_group_name(2, "bar", hir_lit("b")), + hir_capture_name(1, "foo", hir_lit("a")), + hir_capture_name(2, "bar", hir_lit("b")), ]) ); - assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); - assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a"))); + assert_eq!(t("(?:)"), Hir::empty()); + assert_eq!(t("(?:a)"), hir_lit("a")); assert_eq!( t("(?:a)(b)"), - hir_cat(vec![ - hir_group_nocap(hir_lit("a")), - hir_group(1, hir_lit("b")), - ]) + hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),]) ); assert_eq!( t("(a)(?:b)(c)"), hir_cat(vec![ - hir_group(1, hir_lit("a")), - hir_group_nocap(hir_lit("b")), - hir_group(2, hir_lit("c")), + hir_capture(1, hir_lit("a")), + hir_lit("b"), + hir_capture(2, hir_lit("c")), ]) ); assert_eq!( t("(a)(?Pb)(c)"), hir_cat(vec![ - hir_group(1, hir_lit("a")), - hir_group_name(2, "foo", hir_lit("b")), - hir_group(3, hir_lit("c")), + hir_capture(1, hir_lit("a")), + hir_capture_name(2, "foo", hir_lit("b")), + hir_capture(3, hir_lit("c")), ]) ); - assert_eq!(t("()"), hir_group(1, Hir::empty())); - assert_eq!(t("((?i))"), hir_group(1, Hir::empty())); - assert_eq!(t("((?x))"), hir_group(1, Hir::empty())); - assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty()))); + assert_eq!(t("()"), hir_capture(1, Hir::empty())); + assert_eq!(t("((?i))"), hir_capture(1, Hir::empty())); + assert_eq!(t("((?x))"), hir_capture(1, Hir::empty())); + assert_eq!( + t("(((?x)))"), + hir_capture(1, hir_capture(2, Hir::empty())) + ); + } + + #[test] + fn line_anchors() { + assert_eq!(t("^"), hir_look(hir::Look::Start)); + assert_eq!(t("$"), hir_look(hir::Look::End)); + assert_eq!(t(r"\A"), hir_look(hir::Look::Start)); + assert_eq!(t(r"\z"), hir_look(hir::Look::End)); + + assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start)); + assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End)); + assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF)); + assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF)); + + assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start)); + assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End)); + assert_eq!(t("(?R)^"), hir_look(hir::Look::Start)); + assert_eq!(t("(?R)$"), hir_look(hir::Look::End)); + + assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start)); + assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End)); + assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF)); + assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF)); } #[test] @@ -1653,46 +1910,44 @@ mod tests { #[cfg(feature = "unicode-case")] assert_eq!( t("(?i:a)a"), - hir_cat(vec![ - hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])), - hir_lit("a"), - ]) + hir_cat( + vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),] + ) ); assert_eq!( t("(?i-u:a)β"), hir_cat(vec![ - hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), + hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), hir_lit("β"), ]) ); assert_eq!( t("(?:(?i-u)a)b"), hir_cat(vec![ - hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), + hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), hir_lit("b"), ]) ); assert_eq!( t("((?i-u)a)b"), hir_cat(vec![ - hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), + hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), hir_lit("b"), ]) ); #[cfg(feature = "unicode-case")] assert_eq!( t("(?i)(?-i:a)a"), - hir_cat(vec![ - hir_group_nocap(hir_lit("a")), - hir_uclass(&[('A', 'A'), ('a', 'a')]), - ]) + hir_cat( + vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),] + ) ); #[cfg(feature = "unicode-case")] assert_eq!( t("(?im)a^"), hir_cat(vec![ hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_anchor(hir::Anchor::StartLine), + hir_look(hir::Look::StartLF), ]) ); #[cfg(feature = "unicode-case")] @@ -1700,9 +1955,9 @@ mod tests { t("(?im)a^(?i-m)a^"), hir_cat(vec![ hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_anchor(hir::Anchor::StartLine), + hir_look(hir::Look::StartLF), hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_anchor(hir::Anchor::StartText), + hir_look(hir::Look::Start), ]) ); assert_eq!( @@ -1718,10 +1973,10 @@ mod tests { assert_eq!( t("(?:a(?i)a)a"), hir_cat(vec![ - hir_group_nocap(hir_cat(vec![ + hir_cat(vec![ hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]), - ])), + ]), hir_lit("a"), ]) ); @@ -1729,10 +1984,10 @@ mod tests { assert_eq!( t("(?i)(?:a(?-i)a)a"), hir_cat(vec![ - hir_group_nocap(hir_cat(vec![ + hir_cat(vec![ hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"), - ])), + ]), hir_uclass(&[('A', 'A'), ('a', 'a')]), ]) ); @@ -1755,46 +2010,18 @@ mod tests { assert_eq!(t("a*?"), hir_star(false, hir_lit("a"))); assert_eq!(t("a+?"), hir_plus(false, hir_lit("a"))); - assert_eq!( - t("a{1}"), - hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),) - ); - assert_eq!( - t("a{1,}"), - hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) - ); - assert_eq!( - t("a{1,2}"), - hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),) - ); - assert_eq!( - t("a{1}?"), - hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),) - ); - assert_eq!( - t("a{1,}?"), - hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) - ); - assert_eq!( - t("a{1,2}?"), - hir_range( - false, - hir::RepetitionRange::Bounded(1, 2), - hir_lit("a"), - ) - ); + assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),)); + assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),)); + assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),)); + assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),)); + assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),)); + assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),)); assert_eq!( t("ab?"), hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) ); - assert_eq!( - t("(ab)?"), - hir_quest( - true, - hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) - ) - ); + assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab")))); assert_eq!( t("a|b?"), hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) @@ -1803,48 +2030,49 @@ mod tests { #[test] fn cat_alt() { + let a = || hir_look(hir::Look::Start); + let b = || hir_look(hir::Look::End); + let c = || hir_look(hir::Look::WordUnicode); + let d = || hir_look(hir::Look::WordUnicodeNegate); + + assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()]))); + assert_eq!(t("^|$"), hir_alt(vec![a(), b()])); + assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()])); assert_eq!( - t("(ab)"), - hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) - ); - assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),])); - assert_eq!( - t("a|b|c"), - hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) - ); - assert_eq!( - t("ab|bc|cd"), - hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) - ); - assert_eq!( - t("(a|b)"), - hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),])) + t(r"^$|$\b|\b\B"), + hir_alt(vec![ + hir_cat(vec![a(), b()]), + hir_cat(vec![b(), c()]), + hir_cat(vec![c(), d()]), + ]) ); + assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()]))); assert_eq!( - t("(a|b|c)"), - hir_group( - 1, - hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) - ) + t(r"(^|$|\b)"), + hir_capture(1, hir_alt(vec![a(), b(), c()])) ); assert_eq!( - t("(ab|bc|cd)"), - hir_group( + t(r"(^$|$\b|\b\B)"), + hir_capture( 1, - hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) + hir_alt(vec![ + hir_cat(vec![a(), b()]), + hir_cat(vec![b(), c()]), + hir_cat(vec![c(), d()]), + ]) ) ); assert_eq!( - t("(ab|(bc|(cd)))"), - hir_group( + t(r"(^$|($\b|(\b\B)))"), + hir_capture( 1, hir_alt(vec![ - hir_lit("ab"), - hir_group( + hir_cat(vec![a(), b()]), + hir_capture( 2, hir_alt(vec![ - hir_lit("bc"), - hir_group(3, hir_lit("cd")), + hir_cat(vec![b(), c()]), + hir_capture(3, hir_cat(vec![c(), d()])), ]) ), ]) @@ -1852,68 +2080,107 @@ mod tests { ); } + // Tests the HIR transformation of things like '[a-z]|[A-Z]' into + // '[A-Za-z]'. In other words, an alternation of just classes is always + // equivalent to a single class corresponding to the union of the branches + // in that class. (Unless some branches match invalid UTF-8 and others + // match non-ASCII Unicode.) + #[test] + fn cat_class_flattened() { + assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')])); + // Combining all of the letter properties should give us the one giant + // letter property. + #[cfg(feature = "unicode-gencat")] + assert_eq!( + t(r"(?x) + \p{Lowercase_Letter} + |\p{Uppercase_Letter} + |\p{Titlecase_Letter} + |\p{Modifier_Letter} + |\p{Other_Letter} + "), + hir_uclass_query(ClassQuery::Binary("letter")) + ); + // Byte classes that can truly match invalid UTF-8 cannot be combined + // with Unicode classes. + assert_eq!( + t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"), + hir_alt(vec![ + hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]), + hir_bclass(&[(b'\x90', b'\xFF')]), + hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]), + ]) + ); + // Byte classes on their own can be combined, even if some are ASCII + // and others are invalid UTF-8. + assert_eq!( + t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"), + hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]), + ); + } + #[test] fn class_ascii() { assert_eq!( t("[[:alnum:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)) + hir_ascii_uclass(&ast::ClassAsciiKind::Alnum) ); assert_eq!( t("[[:alpha:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)) + hir_ascii_uclass(&ast::ClassAsciiKind::Alpha) ); assert_eq!( t("[[:ascii:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)) + hir_ascii_uclass(&ast::ClassAsciiKind::Ascii) ); assert_eq!( t("[[:blank:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)) + hir_ascii_uclass(&ast::ClassAsciiKind::Blank) ); assert_eq!( t("[[:cntrl:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)) + hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl) ); assert_eq!( t("[[:digit:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)) + hir_ascii_uclass(&ast::ClassAsciiKind::Digit) ); assert_eq!( t("[[:graph:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)) + hir_ascii_uclass(&ast::ClassAsciiKind::Graph) ); assert_eq!( t("[[:lower:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)) + hir_ascii_uclass(&ast::ClassAsciiKind::Lower) ); assert_eq!( t("[[:print:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)) + hir_ascii_uclass(&ast::ClassAsciiKind::Print) ); assert_eq!( t("[[:punct:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)) + hir_ascii_uclass(&ast::ClassAsciiKind::Punct) ); assert_eq!( t("[[:space:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)) + hir_ascii_uclass(&ast::ClassAsciiKind::Space) ); assert_eq!( t("[[:upper:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)) + hir_ascii_uclass(&ast::ClassAsciiKind::Upper) ); assert_eq!( t("[[:word:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)) + hir_ascii_uclass(&ast::ClassAsciiKind::Word) ); assert_eq!( t("[[:xdigit:]]"), - hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)) + hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit) ); assert_eq!( t("[[:^lower:]]"), - hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))) + hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower)) ); #[cfg(feature = "unicode-case")] assert_eq!( @@ -1928,13 +2195,11 @@ mod tests { assert_eq!( t("(?-u)[[:lower:]]"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)) + hir_ascii_bclass(&ast::ClassAsciiKind::Lower) ); assert_eq!( t("(?i-u)[[:lower:]]"), - hir_case_fold(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Lower - ))) + hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower)) ); assert_eq!( @@ -1965,14 +2230,14 @@ mod tests { assert_eq!( t("[[:alnum:][:^ascii:]]"), hir_union( - hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_ascii_uclass(&ast::ClassAsciiKind::Alnum), hir_uclass(&[('\u{80}', '\u{10FFFF}')]), ), ); assert_eq!( t_bytes("(?-u)[[:alnum:][:^ascii:]]"), hir_union( - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_ascii_bclass(&ast::ClassAsciiKind::Alnum), hir_bclass(&[(0x80, 0xFF)]), ), ); @@ -1980,7 +2245,7 @@ mod tests { #[test] #[cfg(feature = "unicode-perl")] - fn class_perl() { + fn class_perl_unicode() { // Unicode assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit"))); assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space"))); @@ -2020,69 +2285,124 @@ mod tests { ); #[cfg(feature = "unicode-case")] assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word())); + } + #[test] + fn class_perl_ascii() { // ASCII only assert_eq!( t(r"(?-u)\d"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) + hir_ascii_bclass(&ast::ClassAsciiKind::Digit) ); assert_eq!( t(r"(?-u)\s"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) + hir_ascii_bclass(&ast::ClassAsciiKind::Space) ); assert_eq!( t(r"(?-u)\w"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) + hir_ascii_bclass(&ast::ClassAsciiKind::Word) ); assert_eq!( t(r"(?i-u)\d"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) + hir_ascii_bclass(&ast::ClassAsciiKind::Digit) ); assert_eq!( t(r"(?i-u)\s"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) + hir_ascii_bclass(&ast::ClassAsciiKind::Space) ); assert_eq!( t(r"(?i-u)\w"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) + hir_ascii_bclass(&ast::ClassAsciiKind::Word) ); // ASCII only, negated assert_eq!( - t(r"(?-u)\D"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Digit - ))) + t_bytes(r"(?-u)\D"), + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) ); assert_eq!( - t(r"(?-u)\S"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Space - ))) + t_bytes(r"(?-u)\S"), + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space)) ); assert_eq!( - t(r"(?-u)\W"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Word - ))) + t_bytes(r"(?-u)\W"), + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word)) ); assert_eq!( - t(r"(?i-u)\D"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Digit - ))) + t_bytes(r"(?i-u)\D"), + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) ); assert_eq!( - t(r"(?i-u)\S"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Space - ))) + t_bytes(r"(?i-u)\S"), + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space)) ); assert_eq!( - t(r"(?i-u)\W"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Word - ))) + t_bytes(r"(?i-u)\W"), + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word)) + ); + + // ASCII only, negated, with UTF-8 mode enabled. + // In this case, negating any Perl class results in an error because + // all such classes can match invalid UTF-8. + assert_eq!( + t_err(r"(?-u)\D"), + TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new( + Position::new(5, 1, 6), + Position::new(7, 1, 8), + ), + }, + ); + assert_eq!( + t_err(r"(?-u)\S"), + TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new( + Position::new(5, 1, 6), + Position::new(7, 1, 8), + ), + }, + ); + assert_eq!( + t_err(r"(?-u)\W"), + TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new( + Position::new(5, 1, 6), + Position::new(7, 1, 8), + ), + }, + ); + assert_eq!( + t_err(r"(?i-u)\D"), + TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new( + Position::new(6, 1, 7), + Position::new(8, 1, 9), + ), + }, + ); + assert_eq!( + t_err(r"(?i-u)\S"), + TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new( + Position::new(6, 1, 7), + Position::new(8, 1, 9), + ), + }, + ); + assert_eq!( + t_err(r"(?i-u)\W"), + TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new( + Position::new(6, 1, 7), + Position::new(8, 1, 9), + ), + }, ); } @@ -2360,16 +2680,7 @@ mod tests { #[test] #[cfg(feature = "unicode-gencat")] fn class_unicode_any_empty() { - assert_eq!( - t_err(r"\P{any}"), - TestError { - kind: hir::ErrorKind::EmptyClassNotAllowed, - span: Span::new( - Position::new(0, 1, 1), - Position::new(7, 1, 8) - ), - } - ); + assert_eq!(t(r"\P{any}"), hir_uclass(&[]),); } #[test] @@ -2389,8 +2700,9 @@ mod tests { #[test] fn class_bracketed() { - assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')])); - assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')]))); + assert_eq!(t("[a]"), hir_lit("a")); + assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')])); + assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')]))); assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')])); assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')])); assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')])); @@ -2453,11 +2765,11 @@ mod tests { ); assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),])); - assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')]))); - assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')]))); + assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')]))); + assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')]))); assert_eq!( t_bytes("(?-u)[^a]"), - hir_negate(hir_bclass(&[(b'a', b'a')])) + class_negate(bclass(&[(b'a', b'a')])) ); #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] assert_eq!( @@ -2521,27 +2833,9 @@ mod tests { } ); #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] - assert_eq!( - t_err(r"[^\s\S]"), - TestError { - kind: hir::ErrorKind::EmptyClassNotAllowed, - span: Span::new( - Position::new(0, 1, 1), - Position::new(7, 1, 8) - ), - } - ); + assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),); #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] - assert_eq!( - t_err(r"(?-u)[^\s\S]"), - TestError { - kind: hir::ErrorKind::EmptyClassNotAllowed, - span: Span::new( - Position::new(5, 1, 6), - Position::new(12, 1, 13) - ), - } - ); + assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),); } #[test] @@ -2663,9 +2957,9 @@ mod tests { #[test] fn class_bracketed_nested() { - assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); - assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); - assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[]))); + assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')]))); + assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')]))); + assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[]))); assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')])); assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')])); @@ -2673,12 +2967,12 @@ mod tests { #[cfg(feature = "unicode-case")] assert_eq!( t(r"(?i)[a[^c]]"), - hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) + hir_negate(class_case_fold(uclass(&[('c', 'c')]))) ); #[cfg(feature = "unicode-case")] assert_eq!( t(r"(?i)[a-b[^c]]"), - hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) + hir_negate(class_case_fold(uclass(&[('c', 'c')]))) ); #[cfg(feature = "unicode-case")] @@ -2689,27 +2983,9 @@ mod tests { hir_uclass(&[('C', 'C'), ('c', 'c')]) ); - assert_eq!( - t_err(r"[^a-c[^c]]"), - TestError { - kind: hir::ErrorKind::EmptyClassNotAllowed, - span: Span::new( - Position::new(0, 1, 1), - Position::new(10, 1, 11) - ), - } - ); + assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),); #[cfg(feature = "unicode-case")] - assert_eq!( - t_err(r"(?i)[^a-c[^c]]"), - TestError { - kind: hir::ErrorKind::EmptyClassNotAllowed, - span: Span::new( - Position::new(4, 1, 5), - Position::new(14, 1, 15) - ), - } - ); + assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),); } #[test] @@ -2826,9 +3102,7 @@ mod tests { #[cfg(feature = "unicode-perl")] assert_eq!( t_bytes(r"(?-u)[^\w&&\d]"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Digit - ))) + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) ); assert_eq!( t_bytes(r"(?-u)[^[a-z&&a-c]]"), @@ -2836,19 +3110,15 @@ mod tests { ); assert_eq!( t_bytes(r"(?-u)[^[\w&&\d]]"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Digit - ))) + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) ); assert_eq!( t_bytes(r"(?-u)[^[^\w&&\d]]"), - hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) + hir_ascii_bclass(&ast::ClassAsciiKind::Digit) ); assert_eq!( t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"), - hir_negate(hir_bclass_from_char(ascii_class( - &ast::ClassAsciiKind::Word - ))) + hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word)) ); } @@ -2924,284 +3194,531 @@ mod tests { , # comment 10 # comment } # comment"), - hir_range( - true, - hir::RepetitionRange::Bounded(5, 10), - hir_lit("a") - ) + hir_range(true, 5, Some(10), hir_lit("a")) ); assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a ")); } #[test] - fn analysis_is_always_utf8() { + fn analysis_is_utf8() { // Positive examples. - assert!(t_bytes(r"a").is_always_utf8()); - assert!(t_bytes(r"ab").is_always_utf8()); - assert!(t_bytes(r"(?-u)a").is_always_utf8()); - assert!(t_bytes(r"(?-u)ab").is_always_utf8()); - assert!(t_bytes(r"\xFF").is_always_utf8()); - assert!(t_bytes(r"\xFF\xFF").is_always_utf8()); - assert!(t_bytes(r"[^a]").is_always_utf8()); - assert!(t_bytes(r"[^a][^a]").is_always_utf8()); - assert!(t_bytes(r"\b").is_always_utf8()); - assert!(t_bytes(r"\B").is_always_utf8()); - assert!(t_bytes(r"(?-u)\b").is_always_utf8()); + assert!(props_bytes(r"a").is_utf8()); + assert!(props_bytes(r"ab").is_utf8()); + assert!(props_bytes(r"(?-u)a").is_utf8()); + assert!(props_bytes(r"(?-u)ab").is_utf8()); + assert!(props_bytes(r"\xFF").is_utf8()); + assert!(props_bytes(r"\xFF\xFF").is_utf8()); + assert!(props_bytes(r"[^a]").is_utf8()); + assert!(props_bytes(r"[^a][^a]").is_utf8()); + assert!(props_bytes(r"\b").is_utf8()); + assert!(props_bytes(r"\B").is_utf8()); + assert!(props_bytes(r"(?-u)\b").is_utf8()); + assert!(props_bytes(r"(?-u)\B").is_utf8()); // Negative examples. - assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8()); - assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8()); - assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8()); - assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8()); - assert!(!t_bytes(r"(?-u)\B").is_always_utf8()); + assert!(!props_bytes(r"(?-u)\xFF").is_utf8()); + assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8()); + assert!(!props_bytes(r"(?-u)[^a]").is_utf8()); + assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8()); + } + + #[test] + fn analysis_captures_len() { + assert_eq!(0, props(r"a").explicit_captures_len()); + assert_eq!(0, props(r"(?:a)").explicit_captures_len()); + assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len()); + assert_eq!(0, props(r"(?i-u)a").explicit_captures_len()); + assert_eq!(1, props(r"(a)").explicit_captures_len()); + assert_eq!(1, props(r"(?Pa)").explicit_captures_len()); + assert_eq!(1, props(r"()").explicit_captures_len()); + assert_eq!(1, props(r"()a").explicit_captures_len()); + assert_eq!(1, props(r"(a)+").explicit_captures_len()); + assert_eq!(2, props(r"(a)(b)").explicit_captures_len()); + assert_eq!(2, props(r"(a)|(b)").explicit_captures_len()); + assert_eq!(2, props(r"((a))").explicit_captures_len()); + assert_eq!(1, props(r"([a&&b])").explicit_captures_len()); + } + + #[test] + fn analysis_static_captures_len() { + let len = |pattern| props(pattern).static_explicit_captures_len(); + assert_eq!(Some(0), len(r"")); + assert_eq!(Some(0), len(r"foo|bar")); + assert_eq!(None, len(r"(foo)|bar")); + assert_eq!(None, len(r"foo|(bar)")); + assert_eq!(Some(1), len(r"(foo|bar)")); + assert_eq!(Some(1), len(r"(a|b|c|d|e|f)")); + assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)")); + assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)")); + assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)")); + assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()")); + assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)")); + assert_eq!(None, len(r"(a)(b)(extra)?")); + assert_eq!(Some(1), len(r"(foo)|(bar)")); + assert_eq!(Some(2), len(r"(foo)(bar)")); + assert_eq!(Some(2), len(r"(foo)+(bar)")); + assert_eq!(None, len(r"(foo)*(bar)")); + assert_eq!(Some(0), len(r"(foo)?{0}")); + assert_eq!(None, len(r"(foo)?{1}")); + assert_eq!(Some(1), len(r"(foo){1}")); + assert_eq!(Some(1), len(r"(foo){1,}")); + assert_eq!(Some(1), len(r"(foo){1,}?")); + assert_eq!(None, len(r"(foo){1,}??")); + assert_eq!(None, len(r"(foo){0,}")); + assert_eq!(Some(1), len(r"(foo)(?:bar)")); + assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))")); + assert_eq!(Some(2), len(r"(?Pfoo)(?:bar)(bal|loon)")); + assert_eq!( + Some(2), + len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#) + ); } #[test] fn analysis_is_all_assertions() { // Positive examples. - assert!(t(r"\b").is_all_assertions()); - assert!(t(r"\B").is_all_assertions()); - assert!(t(r"^").is_all_assertions()); - assert!(t(r"$").is_all_assertions()); - assert!(t(r"\A").is_all_assertions()); - assert!(t(r"\z").is_all_assertions()); - assert!(t(r"$^\z\A\b\B").is_all_assertions()); - assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions()); - assert!(t(r"^$|$^").is_all_assertions()); - assert!(t(r"((\b)+())*^").is_all_assertions()); + let p = props(r"\b"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"\B"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"^"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"$"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"\A"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"\z"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"$^\z\A\b\B"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"$|^|\z|\A|\b|\B"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"^$|$^"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); + + let p = props(r"((\b)+())*^"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(0)); // Negative examples. - assert!(!t(r"^a").is_all_assertions()); + let p = props(r"^a"); + assert!(!p.look_set().is_empty()); + assert_eq!(p.minimum_len(), Some(1)); + } + + #[test] + fn analysis_look_set_prefix_any() { + let p = props(r"(?-u)(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"); + assert!(p.look_set_prefix_any().contains(Look::WordAscii)); } #[test] fn analysis_is_anchored() { + let is_start = |p| props(p).look_set_prefix().contains(Look::Start); + let is_end = |p| props(p).look_set_suffix().contains(Look::End); + // Positive examples. - assert!(t(r"^").is_anchored_start()); - assert!(t(r"$").is_anchored_end()); - assert!(t(r"^").is_line_anchored_start()); - assert!(t(r"$").is_line_anchored_end()); - - assert!(t(r"^^").is_anchored_start()); - assert!(t(r"$$").is_anchored_end()); - assert!(t(r"^^").is_line_anchored_start()); - assert!(t(r"$$").is_line_anchored_end()); - - assert!(t(r"^$").is_anchored_start()); - assert!(t(r"^$").is_anchored_end()); - assert!(t(r"^$").is_line_anchored_start()); - assert!(t(r"^$").is_line_anchored_end()); - - assert!(t(r"^foo").is_anchored_start()); - assert!(t(r"foo$").is_anchored_end()); - assert!(t(r"^foo").is_line_anchored_start()); - assert!(t(r"foo$").is_line_anchored_end()); - - assert!(t(r"^foo|^bar").is_anchored_start()); - assert!(t(r"foo$|bar$").is_anchored_end()); - assert!(t(r"^foo|^bar").is_line_anchored_start()); - assert!(t(r"foo$|bar$").is_line_anchored_end()); - - assert!(t(r"^(foo|bar)").is_anchored_start()); - assert!(t(r"(foo|bar)$").is_anchored_end()); - assert!(t(r"^(foo|bar)").is_line_anchored_start()); - assert!(t(r"(foo|bar)$").is_line_anchored_end()); - - assert!(t(r"^+").is_anchored_start()); - assert!(t(r"$+").is_anchored_end()); - assert!(t(r"^+").is_line_anchored_start()); - assert!(t(r"$+").is_line_anchored_end()); - assert!(t(r"^++").is_anchored_start()); - assert!(t(r"$++").is_anchored_end()); - assert!(t(r"^++").is_line_anchored_start()); - assert!(t(r"$++").is_line_anchored_end()); - assert!(t(r"(^)+").is_anchored_start()); - assert!(t(r"($)+").is_anchored_end()); - assert!(t(r"(^)+").is_line_anchored_start()); - assert!(t(r"($)+").is_line_anchored_end()); - - assert!(t(r"$^").is_anchored_start()); - assert!(t(r"$^").is_anchored_start()); - assert!(t(r"$^").is_line_anchored_end()); - assert!(t(r"$^").is_line_anchored_end()); - assert!(t(r"$^|^$").is_anchored_start()); - assert!(t(r"$^|^$").is_anchored_end()); - assert!(t(r"$^|^$").is_line_anchored_start()); - assert!(t(r"$^|^$").is_line_anchored_end()); - - assert!(t(r"\b^").is_anchored_start()); - assert!(t(r"$\b").is_anchored_end()); - assert!(t(r"\b^").is_line_anchored_start()); - assert!(t(r"$\b").is_line_anchored_end()); - assert!(t(r"^(?m:^)").is_anchored_start()); - assert!(t(r"(?m:$)$").is_anchored_end()); - assert!(t(r"^(?m:^)").is_line_anchored_start()); - assert!(t(r"(?m:$)$").is_line_anchored_end()); - assert!(t(r"(?m:^)^").is_anchored_start()); - assert!(t(r"$(?m:$)").is_anchored_end()); - assert!(t(r"(?m:^)^").is_line_anchored_start()); - assert!(t(r"$(?m:$)").is_line_anchored_end()); + assert!(is_start(r"^")); + assert!(is_end(r"$")); - // Negative examples. - assert!(!t(r"(?m)^").is_anchored_start()); - assert!(!t(r"(?m)$").is_anchored_end()); - assert!(!t(r"(?m:^$)|$^").is_anchored_start()); - assert!(!t(r"(?m:^$)|$^").is_anchored_end()); - assert!(!t(r"$^|(?m:^$)").is_anchored_start()); - assert!(!t(r"$^|(?m:^$)").is_anchored_end()); - - assert!(!t(r"a^").is_anchored_start()); - assert!(!t(r"$a").is_anchored_start()); - assert!(!t(r"a^").is_line_anchored_start()); - assert!(!t(r"$a").is_line_anchored_start()); - - assert!(!t(r"a^").is_anchored_end()); - assert!(!t(r"$a").is_anchored_end()); - assert!(!t(r"a^").is_line_anchored_end()); - assert!(!t(r"$a").is_line_anchored_end()); - - assert!(!t(r"^foo|bar").is_anchored_start()); - assert!(!t(r"foo|bar$").is_anchored_end()); - assert!(!t(r"^foo|bar").is_line_anchored_start()); - assert!(!t(r"foo|bar$").is_line_anchored_end()); - - assert!(!t(r"^*").is_anchored_start()); - assert!(!t(r"$*").is_anchored_end()); - assert!(!t(r"^*").is_line_anchored_start()); - assert!(!t(r"$*").is_line_anchored_end()); - assert!(!t(r"^*+").is_anchored_start()); - assert!(!t(r"$*+").is_anchored_end()); - assert!(!t(r"^*+").is_line_anchored_start()); - assert!(!t(r"$*+").is_line_anchored_end()); - assert!(!t(r"^+*").is_anchored_start()); - assert!(!t(r"$+*").is_anchored_end()); - assert!(!t(r"^+*").is_line_anchored_start()); - assert!(!t(r"$+*").is_line_anchored_end()); - assert!(!t(r"(^)*").is_anchored_start()); - assert!(!t(r"($)*").is_anchored_end()); - assert!(!t(r"(^)*").is_line_anchored_start()); - assert!(!t(r"($)*").is_line_anchored_end()); - } + assert!(is_start(r"^^")); + assert!(props(r"$$").look_set_suffix().contains(Look::End)); - #[test] - fn analysis_is_line_anchored() { - assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start()); - assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end()); + assert!(is_start(r"^$")); + assert!(is_end(r"^$")); - assert!(t(r"(?m)^foo|^bar").is_line_anchored_start()); - assert!(t(r"(?m)foo$|bar$").is_line_anchored_end()); + assert!(is_start(r"^foo")); + assert!(is_end(r"foo$")); - assert!(t(r"(?m)^").is_line_anchored_start()); - assert!(t(r"(?m)$").is_line_anchored_end()); + assert!(is_start(r"^foo|^bar")); + assert!(is_end(r"foo$|bar$")); - assert!(t(r"(?m:^$)|$^").is_line_anchored_start()); - assert!(t(r"(?m:^$)|$^").is_line_anchored_end()); + assert!(is_start(r"^(foo|bar)")); + assert!(is_end(r"(foo|bar)$")); - assert!(t(r"$^|(?m:^$)").is_line_anchored_start()); - assert!(t(r"$^|(?m:^$)").is_line_anchored_end()); + assert!(is_start(r"^+")); + assert!(is_end(r"$+")); + assert!(is_start(r"^++")); + assert!(is_end(r"$++")); + assert!(is_start(r"(^)+")); + assert!(is_end(r"($)+")); + + assert!(is_start(r"$^")); + assert!(is_start(r"$^")); + assert!(is_start(r"$^|^$")); + assert!(is_end(r"$^|^$")); + + assert!(is_start(r"\b^")); + assert!(is_end(r"$\b")); + assert!(is_start(r"^(?m:^)")); + assert!(is_end(r"(?m:$)$")); + assert!(is_start(r"(?m:^)^")); + assert!(is_end(r"$(?m:$)")); + + // Negative examples. + assert!(!is_start(r"(?m)^")); + assert!(!is_end(r"(?m)$")); + assert!(!is_start(r"(?m:^$)|$^")); + assert!(!is_end(r"(?m:^$)|$^")); + assert!(!is_start(r"$^|(?m:^$)")); + assert!(!is_end(r"$^|(?m:^$)")); + + assert!(!is_start(r"a^")); + assert!(!is_start(r"$a")); + + assert!(!is_end(r"a^")); + assert!(!is_end(r"$a")); + + assert!(!is_start(r"^foo|bar")); + assert!(!is_end(r"foo|bar$")); + + assert!(!is_start(r"^*")); + assert!(!is_end(r"$*")); + assert!(!is_start(r"^*+")); + assert!(!is_end(r"$*+")); + assert!(!is_start(r"^+*")); + assert!(!is_end(r"$+*")); + assert!(!is_start(r"(^)*")); + assert!(!is_end(r"($)*")); } #[test] fn analysis_is_any_anchored() { + let is_start = |p| props(p).look_set().contains(Look::Start); + let is_end = |p| props(p).look_set().contains(Look::End); + // Positive examples. - assert!(t(r"^").is_any_anchored_start()); - assert!(t(r"$").is_any_anchored_end()); - assert!(t(r"\A").is_any_anchored_start()); - assert!(t(r"\z").is_any_anchored_end()); + assert!(is_start(r"^")); + assert!(is_end(r"$")); + assert!(is_start(r"\A")); + assert!(is_end(r"\z")); // Negative examples. - assert!(!t(r"(?m)^").is_any_anchored_start()); - assert!(!t(r"(?m)$").is_any_anchored_end()); - assert!(!t(r"$").is_any_anchored_start()); - assert!(!t(r"^").is_any_anchored_end()); + assert!(!is_start(r"(?m)^")); + assert!(!is_end(r"(?m)$")); + assert!(!is_start(r"$")); + assert!(!is_end(r"^")); } #[test] - fn analysis_is_match_empty() { + fn analysis_can_empty() { // Positive examples. - assert!(t(r"").is_match_empty()); - assert!(t(r"()").is_match_empty()); - assert!(t(r"()*").is_match_empty()); - assert!(t(r"()+").is_match_empty()); - assert!(t(r"()?").is_match_empty()); - assert!(t(r"a*").is_match_empty()); - assert!(t(r"a?").is_match_empty()); - assert!(t(r"a{0}").is_match_empty()); - assert!(t(r"a{0,}").is_match_empty()); - assert!(t(r"a{0,1}").is_match_empty()); - assert!(t(r"a{0,10}").is_match_empty()); + let assert_empty = + |p| assert_eq!(Some(0), props_bytes(p).minimum_len()); + assert_empty(r""); + assert_empty(r"()"); + assert_empty(r"()*"); + assert_empty(r"()+"); + assert_empty(r"()?"); + assert_empty(r"a*"); + assert_empty(r"a?"); + assert_empty(r"a{0}"); + assert_empty(r"a{0,}"); + assert_empty(r"a{0,1}"); + assert_empty(r"a{0,10}"); #[cfg(feature = "unicode-gencat")] - assert!(t(r"\pL*").is_match_empty()); - assert!(t(r"a*|b").is_match_empty()); - assert!(t(r"b|a*").is_match_empty()); - assert!(t(r"a|").is_match_empty()); - assert!(t(r"|a").is_match_empty()); - assert!(t(r"a||b").is_match_empty()); - assert!(t(r"a*a?(abcd)*").is_match_empty()); - assert!(t(r"^").is_match_empty()); - assert!(t(r"$").is_match_empty()); - assert!(t(r"(?m)^").is_match_empty()); - assert!(t(r"(?m)$").is_match_empty()); - assert!(t(r"\A").is_match_empty()); - assert!(t(r"\z").is_match_empty()); - assert!(t(r"\B").is_match_empty()); - assert!(t_bytes(r"(?-u)\B").is_match_empty()); - assert!(t(r"\b").is_match_empty()); - assert!(t(r"(?-u)\b").is_match_empty()); + assert_empty(r"\pL*"); + assert_empty(r"a*|b"); + assert_empty(r"b|a*"); + assert_empty(r"a|"); + assert_empty(r"|a"); + assert_empty(r"a||b"); + assert_empty(r"a*a?(abcd)*"); + assert_empty(r"^"); + assert_empty(r"$"); + assert_empty(r"(?m)^"); + assert_empty(r"(?m)$"); + assert_empty(r"\A"); + assert_empty(r"\z"); + assert_empty(r"\B"); + assert_empty(r"(?-u)\B"); + assert_empty(r"\b"); + assert_empty(r"(?-u)\b"); // Negative examples. - assert!(!t(r"a+").is_match_empty()); - assert!(!t(r"a{1}").is_match_empty()); - assert!(!t(r"a{1,}").is_match_empty()); - assert!(!t(r"a{1,2}").is_match_empty()); - assert!(!t(r"a{1,10}").is_match_empty()); - assert!(!t(r"b|a").is_match_empty()); - assert!(!t(r"a*a+(abcd)*").is_match_empty()); + let assert_non_empty = + |p| assert_ne!(Some(0), props_bytes(p).minimum_len()); + assert_non_empty(r"a+"); + assert_non_empty(r"a{1}"); + assert_non_empty(r"a{1,}"); + assert_non_empty(r"a{1,2}"); + assert_non_empty(r"a{1,10}"); + assert_non_empty(r"b|a"); + assert_non_empty(r"a*a+(abcd)*"); + #[cfg(feature = "unicode-gencat")] + assert_non_empty(r"\P{any}"); + assert_non_empty(r"[a--a]"); + assert_non_empty(r"[a&&b]"); } #[test] fn analysis_is_literal() { // Positive examples. - assert!(t(r"a").is_literal()); - assert!(t(r"ab").is_literal()); - assert!(t(r"abc").is_literal()); - assert!(t(r"(?m)abc").is_literal()); + assert!(props(r"a").is_literal()); + assert!(props(r"ab").is_literal()); + assert!(props(r"abc").is_literal()); + assert!(props(r"(?m)abc").is_literal()); + assert!(props(r"(?:a)").is_literal()); + assert!(props(r"foo(?:a)").is_literal()); + assert!(props(r"(?:a)foo").is_literal()); + assert!(props(r"[a]").is_literal()); // Negative examples. - assert!(!t(r"").is_literal()); - assert!(!t(r"^").is_literal()); - assert!(!t(r"a|b").is_literal()); - assert!(!t(r"(a)").is_literal()); - assert!(!t(r"a+").is_literal()); - assert!(!t(r"foo(a)").is_literal()); - assert!(!t(r"(a)foo").is_literal()); - assert!(!t(r"[a]").is_literal()); + assert!(!props(r"").is_literal()); + assert!(!props(r"^").is_literal()); + assert!(!props(r"a|b").is_literal()); + assert!(!props(r"(a)").is_literal()); + assert!(!props(r"a+").is_literal()); + assert!(!props(r"foo(a)").is_literal()); + assert!(!props(r"(a)foo").is_literal()); + assert!(!props(r"[ab]").is_literal()); } #[test] fn analysis_is_alternation_literal() { // Positive examples. - assert!(t(r"a").is_alternation_literal()); - assert!(t(r"ab").is_alternation_literal()); - assert!(t(r"abc").is_alternation_literal()); - assert!(t(r"(?m)abc").is_alternation_literal()); - assert!(t(r"a|b").is_alternation_literal()); - assert!(t(r"a|b|c").is_alternation_literal()); - assert!(t(r"foo|bar").is_alternation_literal()); - assert!(t(r"foo|bar|baz").is_alternation_literal()); + assert!(props(r"a").is_alternation_literal()); + assert!(props(r"ab").is_alternation_literal()); + assert!(props(r"abc").is_alternation_literal()); + assert!(props(r"(?m)abc").is_alternation_literal()); + assert!(props(r"foo|bar").is_alternation_literal()); + assert!(props(r"foo|bar|baz").is_alternation_literal()); + assert!(props(r"[a]").is_alternation_literal()); + assert!(props(r"(?:ab)|cd").is_alternation_literal()); + assert!(props(r"ab|(?:cd)").is_alternation_literal()); // Negative examples. - assert!(!t(r"").is_alternation_literal()); - assert!(!t(r"^").is_alternation_literal()); - assert!(!t(r"(a)").is_alternation_literal()); - assert!(!t(r"a+").is_alternation_literal()); - assert!(!t(r"foo(a)").is_alternation_literal()); - assert!(!t(r"(a)foo").is_alternation_literal()); - assert!(!t(r"[a]").is_alternation_literal()); - assert!(!t(r"[a]|b").is_alternation_literal()); - assert!(!t(r"a|[b]").is_alternation_literal()); - assert!(!t(r"(a)|b").is_alternation_literal()); - assert!(!t(r"a|(b)").is_alternation_literal()); + assert!(!props(r"").is_alternation_literal()); + assert!(!props(r"^").is_alternation_literal()); + assert!(!props(r"(a)").is_alternation_literal()); + assert!(!props(r"a+").is_alternation_literal()); + assert!(!props(r"foo(a)").is_alternation_literal()); + assert!(!props(r"(a)foo").is_alternation_literal()); + assert!(!props(r"[ab]").is_alternation_literal()); + assert!(!props(r"[ab]|b").is_alternation_literal()); + assert!(!props(r"a|[ab]").is_alternation_literal()); + assert!(!props(r"(a)|b").is_alternation_literal()); + assert!(!props(r"a|(b)").is_alternation_literal()); + assert!(!props(r"a|b").is_alternation_literal()); + assert!(!props(r"a|b|c").is_alternation_literal()); + assert!(!props(r"[a]|b").is_alternation_literal()); + assert!(!props(r"a|[b]").is_alternation_literal()); + assert!(!props(r"(?:a)|b").is_alternation_literal()); + assert!(!props(r"a|(?:b)").is_alternation_literal()); + assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal()); + } + + // This tests that the smart Hir::repetition constructors does some basic + // simplifications. + #[test] + fn smart_repetition() { + assert_eq!(t(r"a{0}"), Hir::empty()); + assert_eq!(t(r"a{1}"), hir_lit("a")); + assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate)); + } + + // This tests that the smart Hir::concat constructor simplifies the given + // exprs in a way we expect. + #[test] + fn smart_concat() { + assert_eq!(t(""), Hir::empty()); + assert_eq!(t("(?:)"), Hir::empty()); + assert_eq!(t("abc"), hir_lit("abc")); + assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar")); + assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz")); + assert_eq!( + t("foo(?:bar^baz)quux"), + hir_cat(vec![ + hir_lit("foobar"), + hir_look(hir::Look::Start), + hir_lit("bazquux"), + ]) + ); + assert_eq!( + t("foo(?:ba(?:r^b)az)quux"), + hir_cat(vec![ + hir_lit("foobar"), + hir_look(hir::Look::Start), + hir_lit("bazquux"), + ]) + ); + } + + // This tests that the smart Hir::alternation constructor simplifies the + // given exprs in a way we expect. + #[test] + fn smart_alternation() { + assert_eq!( + t("(?:foo)|(?:bar)"), + hir_alt(vec![hir_lit("foo"), hir_lit("bar")]) + ); + assert_eq!( + t("quux|(?:abc|def|xyz)|baz"), + hir_alt(vec![ + hir_lit("quux"), + hir_lit("abc"), + hir_lit("def"), + hir_lit("xyz"), + hir_lit("baz"), + ]) + ); + assert_eq!( + t("quux|(?:abc|(?:def|mno)|xyz)|baz"), + hir_alt(vec![ + hir_lit("quux"), + hir_lit("abc"), + hir_lit("def"), + hir_lit("mno"), + hir_lit("xyz"), + hir_lit("baz"), + ]) + ); + assert_eq!( + t("a|b|c|d|e|f|x|y|z"), + hir_uclass(&[('a', 'f'), ('x', 'z')]), + ); + // Tests that we lift common prefixes out of an alternation. + assert_eq!( + t("[A-Z]foo|[A-Z]quux"), + hir_cat(vec![ + hir_uclass(&[('A', 'Z')]), + hir_alt(vec![hir_lit("foo"), hir_lit("quux")]), + ]), + ); + assert_eq!( + t("[A-Z][A-Z]|[A-Z]quux"), + hir_cat(vec![ + hir_uclass(&[('A', 'Z')]), + hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]), + ]), + ); + assert_eq!( + t("[A-Z][A-Z]|[A-Z][A-Z]quux"), + hir_cat(vec![ + hir_uclass(&[('A', 'Z')]), + hir_uclass(&[('A', 'Z')]), + hir_alt(vec![Hir::empty(), hir_lit("quux")]), + ]), + ); + assert_eq!( + t("[A-Z]foo|[A-Z]foobar"), + hir_cat(vec![ + hir_uclass(&[('A', 'Z')]), + hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]), + ]), + ); + } + + #[test] + fn regression_alt_empty_concat() { + use crate::ast::{self, Ast}; + + let span = Span::splat(Position::new(0, 0, 0)); + let ast = Ast::alternation(ast::Alternation { + span, + asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })], + }); + + let mut t = Translator::new(); + assert_eq!(Ok(Hir::empty()), t.translate("", &ast)); + } + + #[test] + fn regression_empty_alt() { + use crate::ast::{self, Ast}; + + let span = Span::splat(Position::new(0, 0, 0)); + let ast = Ast::concat(ast::Concat { + span, + asts: vec![Ast::alternation(ast::Alternation { + span, + asts: vec![], + })], + }); + + let mut t = Translator::new(); + assert_eq!(Ok(Hir::fail()), t.translate("", &ast)); + } + + #[test] + fn regression_singleton_alt() { + use crate::{ + ast::{self, Ast}, + hir::Dot, + }; + + let span = Span::splat(Position::new(0, 0, 0)); + let ast = Ast::concat(ast::Concat { + span, + asts: vec![Ast::alternation(ast::Alternation { + span, + asts: vec![Ast::dot(span)], + })], + }); + + let mut t = Translator::new(); + assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast)); + } + + // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168 + #[test] + fn regression_fuzz_match() { + let pat = "[(\u{6} \0-\u{afdf5}] \0 "; + let ast = ParserBuilder::new() + .octal(false) + .ignore_whitespace(true) + .build() + .parse(pat) + .unwrap(); + let hir = TranslatorBuilder::new() + .utf8(true) + .case_insensitive(false) + .multi_line(false) + .dot_matches_new_line(false) + .swap_greed(true) + .unicode(true) + .build() + .translate(pat, &ast) + .unwrap(); + assert_eq!( + hir, + Hir::concat(vec![ + hir_uclass(&[('\0', '\u{afdf5}')]), + hir_lit("\0"), + ]) + ); + } + + // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155 + #[cfg(feature = "unicode")] + #[test] + fn regression_fuzz_difference1() { + let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*"; + let _ = t(pat); // shouldn't panic + } + + // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153 + #[test] + fn regression_fuzz_char_decrement1() { + let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0] Result<(), Self::Err> { Ok(()) } + + /// This method is called between child nodes of a concatenation. + fn visit_concat_in(&mut self) -> Result<(), Self::Err> { + Ok(()) + } } /// Executes an implementation of `Visitor` in constant stack space. /// /// This function will visit every node in the given `Hir` while calling -/// appropriate methods provided by the -/// [`Visitor`](trait.Visitor.html) trait. +/// appropriate methods provided by the [`Visitor`] trait. /// /// The primary use case for this method is when one wants to perform case /// analysis over an `Hir` without using a stack size proportional to the depth @@ -74,9 +80,9 @@ enum Frame<'a> { /// A stack frame allocated just before descending into a repetition /// operator's child node. Repetition(&'a hir::Repetition), - /// A stack frame allocated just before descending into a group's child + /// A stack frame allocated just before descending into a capture's child /// node. - Group(&'a hir::Group), + Capture(&'a hir::Capture), /// The stack frame used while visiting every child node of a concatenation /// of expressions. Concat { @@ -130,8 +136,14 @@ impl<'a> HeapVisitor<'a> { // If this is a concat/alternate, then we might have additional // inductive steps to process. if let Some(x) = self.pop(frame) { - if let Frame::Alternation { .. } = x { - visitor.visit_alternation_in()?; + match x { + Frame::Alternation { .. } => { + visitor.visit_alternation_in()?; + } + Frame::Concat { .. } => { + visitor.visit_concat_in()?; + } + _ => {} } hir = x.child(); self.stack.push((post_hir, x)); @@ -149,7 +161,7 @@ impl<'a> HeapVisitor<'a> { fn induct(&mut self, hir: &'a Hir) -> Option> { match *hir.kind() { HirKind::Repetition(ref x) => Some(Frame::Repetition(x)), - HirKind::Group(ref x) => Some(Frame::Group(x)), + HirKind::Capture(ref x) => Some(Frame::Capture(x)), HirKind::Concat(ref x) if x.is_empty() => None, HirKind::Concat(ref x) => { Some(Frame::Concat { head: &x[0], tail: &x[1..] }) @@ -167,7 +179,7 @@ impl<'a> HeapVisitor<'a> { fn pop(&self, induct: Frame<'a>) -> Option> { match induct { Frame::Repetition(_) => None, - Frame::Group(_) => None, + Frame::Capture(_) => None, Frame::Concat { tail, .. } => { if tail.is_empty() { None @@ -194,8 +206,8 @@ impl<'a> Frame<'a> { /// child HIR node to visit. fn child(&self) -> &'a Hir { match *self { - Frame::Repetition(rep) => &rep.hir, - Frame::Group(group) => &group.hir, + Frame::Repetition(rep) => &rep.sub, + Frame::Capture(capture) => &capture.sub, Frame::Concat { head, .. } => head, Frame::Alternation { head, .. } => head, } diff --git a/vendor/regex-syntax/src/lib.rs b/vendor/regex-syntax/src/lib.rs index 1dfb38a..20f25db 100644 --- a/vendor/regex-syntax/src/lib.rs +++ b/vendor/regex-syntax/src/lib.rs @@ -3,14 +3,14 @@ This crate provides a robust regular expression parser. This crate defines two primary types: -* [`Ast`](ast/enum.Ast.html) is the abstract syntax of a regular expression. +* [`Ast`](ast::Ast) is the abstract syntax of a regular expression. An abstract syntax corresponds to a *structured representation* of the concrete syntax of a regular expression, where the concrete syntax is the pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it can be converted back to the original concrete syntax (modulo some details, like whitespace). To a first approximation, the abstract syntax is complex and difficult to analyze. -* [`Hir`](hir/struct.Hir.html) is the high-level intermediate representation +* [`Hir`](hir::Hir) is the high-level intermediate representation ("HIR" or "high-level IR" for short) of regular expression. It corresponds to an intermediate state of a regular expression that sits between the abstract syntax and the low level compiled opcodes that are eventually responsible for @@ -22,14 +22,15 @@ This crate defines two primary types: These two types come with conversion routines: -* An [`ast::parse::Parser`](ast/parse/struct.Parser.html) converts concrete - syntax (a `&str`) to an [`Ast`](ast/enum.Ast.html). -* A [`hir::translate::Translator`](hir/translate/struct.Translator.html) - converts an [`Ast`](ast/enum.Ast.html) to a [`Hir`](hir/struct.Hir.html). +* An [`ast::parse::Parser`] converts concrete syntax (a `&str`) to an +[`Ast`](ast::Ast). +* A [`hir::translate::Translator`] converts an [`Ast`](ast::Ast) to a +[`Hir`](hir::Hir). As a convenience, the above two conversion routines are combined into one via -the top-level [`Parser`](struct.Parser.html) type. This `Parser` will first -convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`. +the top-level [`Parser`] type. This `Parser` will first convert your pattern to +an `Ast` and then convert the `Ast` to an `Hir`. It's also exposed as top-level +[`parse`] free function. # Example @@ -37,14 +38,14 @@ convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`. This example shows how to parse a pattern string into its HIR: ``` -use regex_syntax::Parser; -use regex_syntax::hir::{self, Hir}; +use regex_syntax::{hir::Hir, parse}; -let hir = Parser::new().parse("a|b").unwrap(); +let hir = parse("a|b")?; assert_eq!(hir, Hir::alternation(vec![ - Hir::literal(hir::Literal::Unicode('a')), - Hir::literal(hir::Literal::Unicode('b')), + Hir::literal("a".as_bytes()), + Hir::literal("b".as_bytes()), ])); +# Ok::<(), Box>(()) ``` @@ -81,10 +82,9 @@ in a monospace font. # Literal extraction -This crate provides limited support for -[literal extraction from `Hir` values](hir/literal/struct.Literals.html). -Be warned that literal extraction currently uses recursion, and therefore, -stack size proportional to the size of the `Hir`. +This crate provides limited support for [literal extraction from `Hir` +values](hir::literal). Be warned that literal extraction uses recursion, and +therefore, stack size proportional to the size of the `Hir`. The purpose of literal extraction is to speed up searches. That is, if you know a regular expression must match a prefix or suffix literal, then it is @@ -116,6 +116,11 @@ match semantics of a regular expression. The following features are available: +* **std** - + Enables support for the standard library. This feature is enabled by default. + When disabled, only `core` and `alloc` are used. Otherwise, enabling `std` + generally just enables `std::error::Error` trait impls for the various error + types. * **unicode** - Enables all Unicode features. This feature is enabled by default, and will always cover all Unicode features, even if more are added in the future. @@ -133,7 +138,7 @@ The following features are available: [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). * **unicode-gencat** - Provide the data for - [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). + [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). This includes, but is not limited to, `Decimal_Number`, `Letter`, `Math_Symbol`, `Number` and `Punctuation`. * **unicode-perl** - @@ -152,21 +157,39 @@ The following features are available: [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/). This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and `\p{sb=ATerm}`. +* **arbitrary** - + Enabling this feature introduces a public dependency on the + [`arbitrary`](https://crates.io/crates/arbitrary) + crate. Namely, it implements the `Arbitrary` trait from that crate for the + [`Ast`](crate::ast::Ast) type. This feature is disabled by default. */ -#![deny(missing_docs)] -#![warn(missing_debug_implementations)] +#![no_std] #![forbid(unsafe_code)] +#![deny(missing_docs, rustdoc::broken_intra_doc_links)] +#![warn(missing_debug_implementations)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +#[cfg(any(test, feature = "std"))] +extern crate std; -pub use crate::error::{Error, Result}; -pub use crate::parser::{Parser, ParserBuilder}; -pub use crate::unicode::UnicodeWordError; +extern crate alloc; + +pub use crate::{ + error::Error, + parser::{parse, Parser, ParserBuilder}, + unicode::UnicodeWordError, +}; + +use alloc::string::String; pub mod ast; +mod debug; mod either; mod error; pub mod hir; mod parser; +mod rank; mod unicode; mod unicode_tables; pub mod utf8; @@ -197,13 +220,43 @@ pub fn escape_into(text: &str, buf: &mut String) { /// Returns true if the given character has significance in a regex. /// -/// These are the only characters that are allowed to be escaped, with one -/// exception: an ASCII space character may be escaped when extended mode (with -/// the `x` flag) is enabled. In particular, `is_meta_character(' ')` returns -/// `false`. +/// Generally speaking, these are the only characters which _must_ be escaped +/// in order to match their literal meaning. For example, to match a literal +/// `|`, one could write `\|`. Sometimes escaping isn't always necessary. For +/// example, `-` is treated as a meta character because of its significance +/// for writing ranges inside of character classes, but the regex `-` will +/// match a literal `-` because `-` has no special meaning outside of character +/// classes. +/// +/// In order to determine whether a character may be escaped at all, the +/// [`is_escapeable_character`] routine should be used. The difference between +/// `is_meta_character` and `is_escapeable_character` is that the latter will +/// return true for some characters that are _not_ meta characters. For +/// example, `%` and `\%` both match a literal `%` in all contexts. In other +/// words, `is_escapeable_character` includes "superfluous" escapes. /// /// Note that the set of characters for which this function returns `true` or -/// `false` is fixed and won't change in a semver compatible release. +/// `false` is fixed and won't change in a semver compatible release. (In this +/// case, "semver compatible release" actually refers to the `regex` crate +/// itself, since reducing or expanding the set of meta characters would be a +/// breaking change for not just `regex-syntax` but also `regex` itself.) +/// +/// # Example +/// +/// ``` +/// use regex_syntax::is_meta_character; +/// +/// assert!(is_meta_character('?')); +/// assert!(is_meta_character('-')); +/// assert!(is_meta_character('&')); +/// assert!(is_meta_character('#')); +/// +/// assert!(!is_meta_character('%')); +/// assert!(!is_meta_character('/')); +/// assert!(!is_meta_character('!')); +/// assert!(!is_meta_character('"')); +/// assert!(!is_meta_character('e')); +/// ``` pub fn is_meta_character(c: char) -> bool { match c { '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' @@ -212,6 +265,71 @@ pub fn is_meta_character(c: char) -> bool { } } +/// Returns true if the given character can be escaped in a regex. +/// +/// This returns true in all cases that `is_meta_character` returns true, but +/// also returns true in some cases where `is_meta_character` returns false. +/// For example, `%` is not a meta character, but it is escapeable. That is, +/// `%` and `\%` both match a literal `%` in all contexts. +/// +/// The purpose of this routine is to provide knowledge about what characters +/// may be escaped. Namely, most regex engines permit "superfluous" escapes +/// where characters without any special significance may be escaped even +/// though there is no actual _need_ to do so. +/// +/// This will return false for some characters. For example, `e` is not +/// escapeable. Therefore, `\e` will either result in a parse error (which is +/// true today), or it could backwards compatibly evolve into a new construct +/// with its own meaning. Indeed, that is the purpose of banning _some_ +/// superfluous escapes: it provides a way to evolve the syntax in a compatible +/// manner. +/// +/// # Example +/// +/// ``` +/// use regex_syntax::is_escapeable_character; +/// +/// assert!(is_escapeable_character('?')); +/// assert!(is_escapeable_character('-')); +/// assert!(is_escapeable_character('&')); +/// assert!(is_escapeable_character('#')); +/// assert!(is_escapeable_character('%')); +/// assert!(is_escapeable_character('/')); +/// assert!(is_escapeable_character('!')); +/// assert!(is_escapeable_character('"')); +/// +/// assert!(!is_escapeable_character('e')); +/// ``` +pub fn is_escapeable_character(c: char) -> bool { + // Certainly escapeable if it's a meta character. + if is_meta_character(c) { + return true; + } + // Any character that isn't ASCII is definitely not escapeable. There's + // no real need to allow things like \☃ right? + if !c.is_ascii() { + return false; + } + // Otherwise, we basically say that everything is escapeable unless it's a + // letter or digit. Things like \3 are either octal (when enabled) or an + // error, and we should keep it that way. Otherwise, letters are reserved + // for adding new syntax in a backwards compatible way. + match c { + '0'..='9' | 'A'..='Z' | 'a'..='z' => false, + // While not currently supported, we keep these as not escapeable to + // give us some flexibility with respect to supporting the \< and + // \> word boundary assertions in the future. By rejecting them as + // escapeable, \< and \> will result in a parse error. Thus, we can + // turn them into something else in the future without it being a + // backwards incompatible change. + // + // OK, now we support \< and \>, and we need to retain them as *not* + // escapeable here since the escape sequence is significant. + '<' | '>' => false, + _ => true, + } +} + /// Returns true if and only if the given character is a Unicode word /// character. /// @@ -224,10 +342,9 @@ pub fn is_meta_character(c: char) -> bool { /// /// # Panics /// -/// If the `unicode-perl` feature is not enabled, then this function panics. -/// For this reason, it is recommended that callers use -/// [`try_is_word_character`](fn.try_is_word_character.html) -/// instead. +/// If the `unicode-perl` feature is not enabled, then this function +/// panics. For this reason, it is recommended that callers use +/// [`try_is_word_character`] instead. pub fn is_word_character(c: char) -> bool { try_is_word_character(c).expect("unicode-perl feature must be enabled") } @@ -248,14 +365,14 @@ pub fn is_word_character(c: char) -> bool { /// returns an error. pub fn try_is_word_character( c: char, -) -> std::result::Result { +) -> core::result::Result { unicode::is_word_character(c) } /// Returns true if and only if the given character is an ASCII word character. /// /// An ASCII word character is defined by the following character class: -/// `[_0-9a-zA-Z]'. +/// `[_0-9a-zA-Z]`. pub fn is_word_byte(c: u8) -> bool { match c { b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true, @@ -265,6 +382,8 @@ pub fn is_word_byte(c: u8) -> bool { #[cfg(test)] mod tests { + use alloc::string::ToString; + use super::*; #[test] diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs index ded95b2..f482b84 100644 --- a/vendor/regex-syntax/src/parser.rs +++ b/vendor/regex-syntax/src/parser.rs @@ -1,16 +1,26 @@ -use crate::ast; -use crate::hir; +use crate::{ast, hir, Error}; -use crate::Result; +/// A convenience routine for parsing a regex using default options. +/// +/// This is equivalent to `Parser::new().parse(pattern)`. +/// +/// If you need to set non-default options, then use a [`ParserBuilder`]. +/// +/// This routine returns an [`Hir`](hir::Hir) value. Namely, it automatically +/// parses the pattern as an [`Ast`](ast::Ast) and then invokes the translator +/// to convert the `Ast` into an `Hir`. If you need access to the `Ast`, then +/// you should use a [`ast::parse::Parser`]. +pub fn parse(pattern: &str) -> Result { + Parser::new().parse(pattern) +} /// A builder for a regular expression parser. /// /// This builder permits modifying configuration options for the parser. /// -/// This type combines the builder options for both the -/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html) -/// and the -/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html). +/// This type combines the builder options for both the [AST +/// `ParserBuilder`](ast::parse::ParserBuilder) and the [HIR +/// `TranslatorBuilder`](hir::translate::TranslatorBuilder). #[derive(Clone, Debug, Default)] pub struct ParserBuilder { ast: ast::parse::ParserBuilder, @@ -78,19 +88,23 @@ impl ParserBuilder { self } - /// When enabled, the parser will permit the construction of a regular + /// When disabled, translation will permit the construction of a regular /// expression that may match invalid UTF-8. /// - /// When disabled (the default), the parser is guaranteed to produce - /// an expression that will only ever match valid UTF-8 (otherwise, the - /// parser will return an error). - /// - /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII - /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause - /// the parser to return an error. Namely, a negated ASCII word boundary - /// can result in matching positions that aren't valid UTF-8 boundaries. - pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.allow_invalid_utf8(yes); + /// When enabled (the default), the translator is guaranteed to produce an + /// expression that, for non-empty matches, will only ever produce spans + /// that are entirely valid UTF-8 (otherwise, the translator will return an + /// error). + /// + /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even + /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete + /// syntax) will be allowed even though they can produce matches that split + /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty" + /// matches, and it is expected that the regex engine itself must handle + /// these cases if necessary (perhaps by suppressing any zero-width matches + /// that split a codepoint). + pub fn utf8(&mut self, yes: bool) -> &mut ParserBuilder { + self.hir.utf8(yes); self } @@ -134,6 +148,48 @@ impl ParserBuilder { self } + /// Enable or disable the CRLF mode flag by default. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `R` flag. + /// + /// When CRLF mode is enabled, the following happens: + /// + /// * Unless `dot_matches_new_line` is enabled, `.` will match any character + /// except for `\r` and `\n`. + /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`, + /// `\r` and `\n` as line terminators. And in particular, neither will + /// match between a `\r` and a `\n`. + pub fn crlf(&mut self, yes: bool) -> &mut ParserBuilder { + self.hir.crlf(yes); + self + } + + /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. + /// + /// Namely, instead of `.` (by default) matching everything except for `\n`, + /// this will cause `.` to match everything except for the byte given. + /// + /// If `.` is used in a context where Unicode mode is enabled and this byte + /// isn't ASCII, then an error will be returned. When Unicode mode is + /// disabled, then any byte is permitted, but will return an error if UTF-8 + /// mode is enabled and it is a non-ASCII byte. + /// + /// In short, any ASCII value for a line terminator is always okay. But a + /// non-ASCII byte might result in an error depending on whether Unicode + /// mode or UTF-8 mode are enabled. + /// + /// Note that if `R` mode is enabled then it always takes precedence and + /// the line terminator will be treated as `\r` and `\n` simultaneously. + /// + /// Note also that this *doesn't* impact the look-around assertions + /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional + /// configuration in the regex engine itself. + pub fn line_terminator(&mut self, byte: u8) -> &mut ParserBuilder { + self.hir.line_terminator(byte); + self + } + /// Enable or disable the "swap greed" flag by default. /// /// By default this is disabled. It may alternatively be selectively @@ -148,9 +204,9 @@ impl ParserBuilder { /// By default this is **enabled**. It may alternatively be selectively /// disabled in the regular expression itself via the `u` flag. /// - /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by - /// default), a regular expression will fail to parse if Unicode mode is - /// disabled and a sub-expression could possibly match invalid UTF-8. + /// Note that unless `utf8` is disabled (it's enabled by default), a + /// regular expression will fail to parse if Unicode mode is disabled and a + /// sub-expression could possibly match invalid UTF-8. pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder { self.hir.unicode(yes); self @@ -167,10 +223,9 @@ impl ParserBuilder { /// convenience for never having to deal with it at all. /// /// If callers have more fine grained use cases that need an AST, then please -/// see the [`ast::parse`](ast/parse/index.html) module. +/// see the [`ast::parse`] module. /// -/// A `Parser` can be configured in more detail via a -/// [`ParserBuilder`](struct.ParserBuilder.html). +/// A `Parser` can be configured in more detail via a [`ParserBuilder`]. #[derive(Clone, Debug)] pub struct Parser { ast: ast::parse::Parser, @@ -184,15 +239,14 @@ impl Parser { /// a high level intermediate representation of the given regular /// expression. /// - /// To set configuration options on the parser, use - /// [`ParserBuilder`](struct.ParserBuilder.html). + /// To set configuration options on the parser, use [`ParserBuilder`]. pub fn new() -> Parser { ParserBuilder::new().build() } /// Parse the regular expression into a high level intermediate /// representation. - pub fn parse(&mut self, pattern: &str) -> Result { + pub fn parse(&mut self, pattern: &str) -> Result { let ast = self.ast.parse(pattern)?; let hir = self.hir.translate(pattern, &ast)?; Ok(hir) diff --git a/vendor/regex/src/freqs.rs b/vendor/regex-syntax/src/rank.rs similarity index 96% rename from vendor/regex/src/freqs.rs rename to vendor/regex-syntax/src/rank.rs index fcffa95..ccb25a2 100644 --- a/vendor/regex/src/freqs.rs +++ b/vendor/regex-syntax/src/rank.rs @@ -1,7 +1,4 @@ -// NOTE: The following code was generated by "scripts/frequencies.py", do not -// edit directly - -pub const BYTE_FREQUENCIES: [u8; 256] = [ +pub(crate) const BYTE_FREQUENCIES: [u8; 256] = [ 55, // '\x00' 52, // '\x01' 51, // '\x02' diff --git a/vendor/regex-syntax/src/unicode.rs b/vendor/regex-syntax/src/unicode.rs index 8194d7f..393a4c0 100644 --- a/vendor/regex-syntax/src/unicode.rs +++ b/vendor/regex-syntax/src/unicode.rs @@ -1,12 +1,10 @@ -use std::error; -use std::fmt; -use std::result; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use crate::hir; -/// A type alias for errors specific to Unicode handling of classes. -pub type Result = result::Result; - /// An inclusive range of codepoints from a generated file (hence the static /// lifetime). type Range = &'static [(char, char)]; @@ -24,9 +22,6 @@ pub enum Error { PerlClassNotFound, } -/// A type alias for errors specific to Unicode case folding. -pub type FoldResult = result::Result; - /// An error that occurs when Unicode-aware simple case folding fails. /// /// This error can occur when the case mapping tables necessary for Unicode @@ -35,10 +30,11 @@ pub type FoldResult = result::Result; #[derive(Debug)] pub struct CaseFoldError(()); -impl error::Error for CaseFoldError {} +#[cfg(feature = "std")] +impl std::error::Error for CaseFoldError {} -impl fmt::Display for CaseFoldError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for CaseFoldError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Unicode-aware case folding is not available \ @@ -55,10 +51,11 @@ impl fmt::Display for CaseFoldError { #[derive(Debug)] pub struct UnicodeWordError(()); -impl error::Error for UnicodeWordError {} +#[cfg(feature = "std")] +impl std::error::Error for UnicodeWordError {} -impl fmt::Display for UnicodeWordError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for UnicodeWordError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Unicode-aware \\w class is not available \ @@ -67,74 +64,122 @@ impl fmt::Display for UnicodeWordError { } } -/// Return an iterator over the equivalence class of simple case mappings -/// for the given codepoint. The equivalence class does not include the -/// given codepoint. +/// A state oriented traverser of the simple case folding table. /// -/// If the equivalence class is empty, then this returns the next scalar -/// value that has a non-empty equivalence class, if it exists. If no such -/// scalar value exists, then `None` is returned. The point of this behavior -/// is to permit callers to avoid calling `simple_fold` more than they need -/// to, since there is some cost to fetching the equivalence class. +/// A case folder can be constructed via `SimpleCaseFolder::new()`, which will +/// return an error if the underlying case folding table is unavailable. /// -/// This returns an error if the Unicode case folding tables are not available. -pub fn simple_fold( - c: char, -) -> FoldResult, Option>> { - #[cfg(not(feature = "unicode-case"))] - fn imp( - _: char, - ) -> FoldResult, Option>> - { - use std::option::IntoIter; - Err::, _>, _>(CaseFoldError(())) - } +/// After construction, it is expected that callers will use +/// `SimpleCaseFolder::mapping` by calling it with codepoints in strictly +/// increasing order. For example, calling it on `b` and then on `a` is illegal +/// and will result in a panic. +/// +/// The main idea of this type is that it tries hard to make mapping lookups +/// fast by exploiting the structure of the underlying table, and the ordering +/// assumption enables this. +#[derive(Debug)] +pub struct SimpleCaseFolder { + /// The simple case fold table. It's a sorted association list, where the + /// keys are Unicode scalar values and the values are the corresponding + /// equivalence class (not including the key) of the "simple" case folded + /// Unicode scalar values. + table: &'static [(char, &'static [char])], + /// The last codepoint that was used for a lookup. + last: Option, + /// The index to the entry in `table` corresponding to the smallest key `k` + /// such that `k > k0`, where `k0` is the most recent key lookup. Note that + /// in particular, `k0` may not be in the table! + next: usize, +} - #[cfg(feature = "unicode-case")] - fn imp( - c: char, - ) -> FoldResult, Option>> - { - use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; - - Ok(CASE_FOLDING_SIMPLE - .binary_search_by_key(&c, |&(c1, _)| c1) - .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().copied()) - .map_err(|i| { - if i >= CASE_FOLDING_SIMPLE.len() { - None - } else { - Some(CASE_FOLDING_SIMPLE[i].0) - } - })) +impl SimpleCaseFolder { + /// Create a new simple case folder, returning an error if the underlying + /// case folding table is unavailable. + pub fn new() -> Result { + #[cfg(not(feature = "unicode-case"))] + { + Err(CaseFoldError(())) + } + #[cfg(feature = "unicode-case")] + { + Ok(SimpleCaseFolder { + table: crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE, + last: None, + next: 0, + }) + } } - imp(c) -} - -/// Returns true if and only if the given (inclusive) range contains at least -/// one Unicode scalar value that has a non-empty non-trivial simple case -/// mapping. -/// -/// This function panics if `end < start`. -/// -/// This returns an error if the Unicode case folding tables are not available. -pub fn contains_simple_case_mapping( - start: char, - end: char, -) -> FoldResult { - #[cfg(not(feature = "unicode-case"))] - fn imp(_: char, _: char) -> FoldResult { - Err(CaseFoldError(())) + /// Return the equivalence class of case folded codepoints for the given + /// codepoint. The equivalence class returned never includes the codepoint + /// given. If the given codepoint has no case folded codepoints (i.e., + /// no entry in the underlying case folding table), then this returns an + /// empty slice. + /// + /// # Panics + /// + /// This panics when called with a `c` that is less than or equal to the + /// previous call. In other words, callers need to use this method with + /// strictly increasing values of `c`. + pub fn mapping(&mut self, c: char) -> &'static [char] { + if let Some(last) = self.last { + assert!( + last < c, + "got codepoint U+{:X} which occurs before \ + last codepoint U+{:X}", + u32::from(c), + u32::from(last), + ); + } + self.last = Some(c); + if self.next >= self.table.len() { + return &[]; + } + let (k, v) = self.table[self.next]; + if k == c { + self.next += 1; + return v; + } + match self.get(c) { + Err(i) => { + self.next = i; + &[] + } + Ok(i) => { + // Since we require lookups to proceed + // in order, anything we find should be + // after whatever we thought might be + // next. Otherwise, the caller is either + // going out of order or we would have + // found our next key at 'self.next'. + assert!(i > self.next); + self.next = i + 1; + self.table[i].1 + } + } } - #[cfg(feature = "unicode-case")] - fn imp(start: char, end: char) -> FoldResult { - use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; - use std::cmp::Ordering; + /// Returns true if and only if the given range overlaps with any region + /// of the underlying case folding table. That is, when true, there exists + /// at least one codepoint in the inclusive range `[start, end]` that has + /// a non-trivial equivalence class of case folded codepoints. Conversely, + /// when this returns false, all codepoints in the range `[start, end]` + /// correspond to the trivial equivalence class of case folded codepoints, + /// i.e., itself. + /// + /// This is useful to call before iterating over the codepoints in the + /// range and looking up the mapping for each. If you know none of the + /// mappings will return anything, then you might be able to skip doing it + /// altogether. + /// + /// # Panics + /// + /// This panics when `end < start`. + pub fn overlaps(&self, start: char, end: char) -> bool { + use core::cmp::Ordering; assert!(start <= end); - Ok(CASE_FOLDING_SIMPLE + self.table .binary_search_by(|&(c, _)| { if start <= c && c <= end { Ordering::Equal @@ -144,10 +189,15 @@ pub fn contains_simple_case_mapping( Ordering::Less } }) - .is_ok()) + .is_ok() } - imp(start, end) + /// Returns the index at which `c` occurs in the simple case fold table. If + /// `c` does not occur, then this returns an `i` such that `table[i-1].0 < + /// c` and `table[i].0 > c`. + fn get(&self, c: char) -> Result { + self.table.binary_search_by_key(&c, |&(c1, _)| c1) + } } /// A query for finding a character class defined by Unicode. This supports @@ -185,7 +235,7 @@ pub enum ClassQuery<'a> { } impl<'a> ClassQuery<'a> { - fn canonicalize(&self) -> Result { + fn canonicalize(&self) -> Result { match *self { ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()), ClassQuery::Binary(name) => self.canonical_binary(name), @@ -234,7 +284,10 @@ impl<'a> ClassQuery<'a> { } } - fn canonical_binary(&self, name: &str) -> Result { + fn canonical_binary( + &self, + name: &str, + ) -> Result { let norm = symbolic_name_normalize(name); // This is a special case where 'cf' refers to the 'Format' general @@ -243,7 +296,17 @@ impl<'a> ClassQuery<'a> { // a general category. (Currently, we don't even support the // 'Case_Folding' property. But if we do in the future, users will be // required to spell it out.) - if norm != "cf" { + // + // Also 'sc' refers to the 'Currency_Symbol' general category, but is + // also the abbreviation for the 'Script' property. So we avoid calling + // 'canonical_prop' for it too, which would erroneously normalize it + // to 'Script'. + // + // Another case: 'lc' is an abbreviation for the 'Cased_Letter' + // general category, but is also an abbreviation for the 'Lowercase_Mapping' + // property. We don't currently support the latter, so as with 'cf' + // above, we treat 'lc' as 'Cased_Letter'. + if norm != "cf" && norm != "sc" && norm != "lc" { if let Some(canon) = canonical_prop(&norm)? { return Ok(CanonicalClassQuery::Binary(canon)); } @@ -285,7 +348,7 @@ enum CanonicalClassQuery { /// Looks up a Unicode class given a query. If one doesn't exist, then /// `None` is returned. -pub fn class(query: ClassQuery<'_>) -> Result { +pub fn class(query: ClassQuery<'_>) -> Result { use self::CanonicalClassQuery::*; match query.canonicalize()? { @@ -322,14 +385,14 @@ pub fn class(query: ClassQuery<'_>) -> Result { /// Returns a Unicode aware class for \w. /// /// This returns an error if the data is not available for \w. -pub fn perl_word() -> Result { +pub fn perl_word() -> Result { #[cfg(not(feature = "unicode-perl"))] - fn imp() -> Result { + fn imp() -> Result { Err(Error::PerlClassNotFound) } #[cfg(feature = "unicode-perl")] - fn imp() -> Result { + fn imp() -> Result { use crate::unicode_tables::perl_word::PERL_WORD; Ok(hir_class(PERL_WORD)) } @@ -340,20 +403,20 @@ pub fn perl_word() -> Result { /// Returns a Unicode aware class for \s. /// /// This returns an error if the data is not available for \s. -pub fn perl_space() -> Result { +pub fn perl_space() -> Result { #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))] - fn imp() -> Result { + fn imp() -> Result { Err(Error::PerlClassNotFound) } #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))] - fn imp() -> Result { + fn imp() -> Result { use crate::unicode_tables::perl_space::WHITE_SPACE; Ok(hir_class(WHITE_SPACE)) } #[cfg(feature = "unicode-bool")] - fn imp() -> Result { + fn imp() -> Result { use crate::unicode_tables::property_bool::WHITE_SPACE; Ok(hir_class(WHITE_SPACE)) } @@ -364,20 +427,20 @@ pub fn perl_space() -> Result { /// Returns a Unicode aware class for \d. /// /// This returns an error if the data is not available for \d. -pub fn perl_digit() -> Result { +pub fn perl_digit() -> Result { #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))] - fn imp() -> Result { + fn imp() -> Result { Err(Error::PerlClassNotFound) } #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))] - fn imp() -> Result { + fn imp() -> Result { use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER; Ok(hir_class(DECIMAL_NUMBER)) } #[cfg(feature = "unicode-gencat")] - fn imp() -> Result { + fn imp() -> Result { use crate::unicode_tables::general_category::DECIMAL_NUMBER; Ok(hir_class(DECIMAL_NUMBER)) } @@ -397,23 +460,23 @@ pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode { /// Returns true only if the given codepoint is in the `\w` character class. /// /// If the `unicode-perl` feature is not enabled, then this returns an error. -pub fn is_word_character(c: char) -> result::Result { +pub fn is_word_character(c: char) -> Result { #[cfg(not(feature = "unicode-perl"))] - fn imp(_: char) -> result::Result { + fn imp(_: char) -> Result { Err(UnicodeWordError(())) } #[cfg(feature = "unicode-perl")] - fn imp(c: char) -> result::Result { - use crate::is_word_byte; - use crate::unicode_tables::perl_word::PERL_WORD; - use std::cmp::Ordering; + fn imp(c: char) -> Result { + use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD}; - if c <= 0x7F as char && is_word_byte(c as u8) { + if u8::try_from(c).map_or(false, is_word_byte) { return Ok(true); } Ok(PERL_WORD .binary_search_by(|&(start, end)| { + use core::cmp::Ordering; + if start <= c && c <= end { Ordering::Equal } else if start > c { @@ -435,7 +498,9 @@ pub fn is_word_character(c: char) -> result::Result { /// value. type PropertyValues = &'static [(&'static str, &'static str)]; -fn canonical_gencat(normalized_value: &str) -> Result> { +fn canonical_gencat( + normalized_value: &str, +) -> Result, Error> { Ok(match normalized_value { "any" => Some("Any"), "assigned" => Some("Assigned"), @@ -447,7 +512,9 @@ fn canonical_gencat(normalized_value: &str) -> Result> { }) } -fn canonical_script(normalized_value: &str) -> Result> { +fn canonical_script( + normalized_value: &str, +) -> Result, Error> { let scripts = property_values("Script")?.unwrap(); Ok(canonical_value(scripts, normalized_value)) } @@ -460,7 +527,9 @@ fn canonical_script(normalized_value: &str) -> Result> { /// UAX44 LM3, which can be done using `symbolic_name_normalize`. /// /// If the property names data is not available, then an error is returned. -fn canonical_prop(normalized_name: &str) -> Result> { +fn canonical_prop( + normalized_name: &str, +) -> Result, Error> { #[cfg(not(any( feature = "unicode-age", feature = "unicode-bool", @@ -469,7 +538,7 @@ fn canonical_prop(normalized_name: &str) -> Result> { feature = "unicode-script", feature = "unicode-segment", )))] - fn imp(_: &str) -> Result> { + fn imp(_: &str) -> Result, Error> { Err(Error::PropertyNotFound) } @@ -481,7 +550,7 @@ fn canonical_prop(normalized_name: &str) -> Result> { feature = "unicode-script", feature = "unicode-segment", ))] - fn imp(name: &str) -> Result> { + fn imp(name: &str) -> Result, Error> { use crate::unicode_tables::property_names::PROPERTY_NAMES; Ok(PROPERTY_NAMES @@ -517,7 +586,7 @@ fn canonical_value( /// If the property values data is not available, then an error is returned. fn property_values( canonical_property_name: &'static str, -) -> Result> { +) -> Result, Error> { #[cfg(not(any( feature = "unicode-age", feature = "unicode-bool", @@ -526,7 +595,7 @@ fn property_values( feature = "unicode-script", feature = "unicode-segment", )))] - fn imp(_: &'static str) -> Result> { + fn imp(_: &'static str) -> Result, Error> { Err(Error::PropertyValueNotFound) } @@ -538,7 +607,7 @@ fn property_values( feature = "unicode-script", feature = "unicode-segment", ))] - fn imp(name: &'static str) -> Result> { + fn imp(name: &'static str) -> Result, Error> { use crate::unicode_tables::property_values::PROPERTY_VALUES; Ok(PROPERTY_VALUES @@ -569,15 +638,15 @@ fn property_set( /// /// If the given age value isn't valid or if the data isn't available, then an /// error is returned instead. -fn ages(canonical_age: &str) -> Result> { +fn ages(canonical_age: &str) -> Result, Error> { #[cfg(not(feature = "unicode-age"))] - fn imp(_: &str) -> Result> { - use std::option::IntoIter; + fn imp(_: &str) -> Result, Error> { + use core::option::IntoIter; Err::, _>(Error::PropertyNotFound) } #[cfg(feature = "unicode-age")] - fn imp(canonical_age: &str) -> Result> { + fn imp(canonical_age: &str) -> Result, Error> { use crate::unicode_tables::age; const AGES: &[(&str, Range)] = &[ @@ -625,14 +694,14 @@ fn ages(canonical_age: &str) -> Result> { /// /// If the given general category could not be found, or if the general /// category data is not available, then an error is returned. -fn gencat(canonical_name: &'static str) -> Result { +fn gencat(canonical_name: &'static str) -> Result { #[cfg(not(feature = "unicode-gencat"))] - fn imp(_: &'static str) -> Result { + fn imp(_: &'static str) -> Result { Err(Error::PropertyNotFound) } #[cfg(feature = "unicode-gencat")] - fn imp(name: &'static str) -> Result { + fn imp(name: &'static str) -> Result { use crate::unicode_tables::general_category::BY_NAME; match name { "ASCII" => Ok(hir_class(&[('\0', '\x7F')])), @@ -660,14 +729,14 @@ fn gencat(canonical_name: &'static str) -> Result { /// /// If the given script could not be found, or if the script data is not /// available, then an error is returned. -fn script(canonical_name: &'static str) -> Result { +fn script(canonical_name: &'static str) -> Result { #[cfg(not(feature = "unicode-script"))] - fn imp(_: &'static str) -> Result { + fn imp(_: &'static str) -> Result { Err(Error::PropertyNotFound) } #[cfg(feature = "unicode-script")] - fn imp(name: &'static str) -> Result { + fn imp(name: &'static str) -> Result { use crate::unicode_tables::script::BY_NAME; property_set(BY_NAME, name) .map(hir_class) @@ -685,14 +754,14 @@ fn script(canonical_name: &'static str) -> Result { /// not available, then an error is returned. fn script_extension( canonical_name: &'static str, -) -> Result { +) -> Result { #[cfg(not(feature = "unicode-script"))] - fn imp(_: &'static str) -> Result { + fn imp(_: &'static str) -> Result { Err(Error::PropertyNotFound) } #[cfg(feature = "unicode-script")] - fn imp(name: &'static str) -> Result { + fn imp(name: &'static str) -> Result { use crate::unicode_tables::script_extension::BY_NAME; property_set(BY_NAME, name) .map(hir_class) @@ -709,14 +778,16 @@ fn script_extension( /// /// If the given boolean property could not be found, or if the boolean /// property data is not available, then an error is returned. -fn bool_property(canonical_name: &'static str) -> Result { +fn bool_property( + canonical_name: &'static str, +) -> Result { #[cfg(not(feature = "unicode-bool"))] - fn imp(_: &'static str) -> Result { + fn imp(_: &'static str) -> Result { Err(Error::PropertyNotFound) } #[cfg(feature = "unicode-bool")] - fn imp(name: &'static str) -> Result { + fn imp(name: &'static str) -> Result { use crate::unicode_tables::property_bool::BY_NAME; property_set(BY_NAME, name) .map(hir_class) @@ -737,14 +808,14 @@ fn bool_property(canonical_name: &'static str) -> Result { /// /// If the given property could not be found, or if the corresponding data is /// not available, then an error is returned. -fn gcb(canonical_name: &'static str) -> Result { +fn gcb(canonical_name: &'static str) -> Result { #[cfg(not(feature = "unicode-segment"))] - fn imp(_: &'static str) -> Result { + fn imp(_: &'static str) -> Result { Err(Error::PropertyNotFound) } #[cfg(feature = "unicode-segment")] - fn imp(name: &'static str) -> Result { + fn imp(name: &'static str) -> Result { use crate::unicode_tables::grapheme_cluster_break::BY_NAME; property_set(BY_NAME, name) .map(hir_class) @@ -761,14 +832,14 @@ fn gcb(canonical_name: &'static str) -> Result { /// /// If the given property could not be found, or if the corresponding data is /// not available, then an error is returned. -fn wb(canonical_name: &'static str) -> Result { +fn wb(canonical_name: &'static str) -> Result { #[cfg(not(feature = "unicode-segment"))] - fn imp(_: &'static str) -> Result { + fn imp(_: &'static str) -> Result { Err(Error::PropertyNotFound) } #[cfg(feature = "unicode-segment")] - fn imp(name: &'static str) -> Result { + fn imp(name: &'static str) -> Result { use crate::unicode_tables::word_break::BY_NAME; property_set(BY_NAME, name) .map(hir_class) @@ -785,14 +856,14 @@ fn wb(canonical_name: &'static str) -> Result { /// /// If the given property could not be found, or if the corresponding data is /// not available, then an error is returned. -fn sb(canonical_name: &'static str) -> Result { +fn sb(canonical_name: &'static str) -> Result { #[cfg(not(feature = "unicode-segment"))] - fn imp(_: &'static str) -> Result { + fn imp(_: &'static str) -> Result { Err(Error::PropertyNotFound) } #[cfg(feature = "unicode-segment")] - fn imp(name: &'static str) -> Result { + fn imp(name: &'static str) -> Result { use crate::unicode_tables::sentence_break::BY_NAME; property_set(BY_NAME, name) .map(hir_class) @@ -873,72 +944,45 @@ fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] { #[cfg(test)] mod tests { - use super::{ - contains_simple_case_mapping, simple_fold, symbolic_name_normalize, - symbolic_name_normalize_bytes, - }; + use super::*; #[cfg(feature = "unicode-case")] fn simple_fold_ok(c: char) -> impl Iterator { - simple_fold(c).unwrap().unwrap() - } - - #[cfg(feature = "unicode-case")] - fn simple_fold_err(c: char) -> Option { - match simple_fold(c).unwrap() { - Ok(_) => unreachable!("simple_fold returned Ok iterator"), - Err(next) => next, - } + SimpleCaseFolder::new().unwrap().mapping(c).iter().copied() } #[cfg(feature = "unicode-case")] fn contains_case_map(start: char, end: char) -> bool { - contains_simple_case_mapping(start, end).unwrap() + SimpleCaseFolder::new().unwrap().overlaps(start, end) } #[test] #[cfg(feature = "unicode-case")] fn simple_fold_k() { let xs: Vec = simple_fold_ok('k').collect(); - assert_eq!(xs, vec!['K', 'K']); + assert_eq!(xs, alloc::vec!['K', 'K']); let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, vec!['k', 'K']); + assert_eq!(xs, alloc::vec!['k', 'K']); let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, vec!['K', 'k']); + assert_eq!(xs, alloc::vec!['K', 'k']); } #[test] #[cfg(feature = "unicode-case")] fn simple_fold_a() { let xs: Vec = simple_fold_ok('a').collect(); - assert_eq!(xs, vec!['A']); + assert_eq!(xs, alloc::vec!['A']); let xs: Vec = simple_fold_ok('A').collect(); - assert_eq!(xs, vec!['a']); - } - - #[test] - #[cfg(feature = "unicode-case")] - fn simple_fold_empty() { - assert_eq!(Some('A'), simple_fold_err('?')); - assert_eq!(Some('A'), simple_fold_err('@')); - assert_eq!(Some('a'), simple_fold_err('[')); - assert_eq!(Some('Ⰰ'), simple_fold_err('☃')); - } - - #[test] - #[cfg(feature = "unicode-case")] - fn simple_fold_max() { - assert_eq!(None, simple_fold_err('\u{10FFFE}')); - assert_eq!(None, simple_fold_err('\u{10FFFF}')); + assert_eq!(xs, alloc::vec!['a']); } #[test] #[cfg(not(feature = "unicode-case"))] fn simple_fold_disabled() { - assert!(simple_fold('a').is_err()); + assert!(SimpleCaseFolder::new().is_err()); } #[test] @@ -957,12 +1001,6 @@ mod tests { assert!(!contains_case_map('☃', '☃')); } - #[test] - #[cfg(not(feature = "unicode-case"))] - fn range_contains_disabled() { - assert!(contains_simple_case_mapping('a', 'a').is_err()); - } - #[test] #[cfg(feature = "unicode-gencat")] fn regression_466() { diff --git a/vendor/regex-syntax/src/utf8.rs b/vendor/regex-syntax/src/utf8.rs index b9c8655..e13b55a 100644 --- a/vendor/regex-syntax/src/utf8.rs +++ b/vendor/regex-syntax/src/utf8.rs @@ -3,7 +3,7 @@ Converts ranges of Unicode scalar values to equivalent ranges of UTF-8 bytes. This is sub-module is useful for constructing byte based automatons that need to embed UTF-8 decoding. The most common use of this module is in conjunction -with the [`hir::ClassUnicodeRange`](../hir/struct.ClassUnicodeRange.html) type. +with the [`hir::ClassUnicodeRange`](crate::hir::ClassUnicodeRange) type. See the documentation on the `Utf8Sequences` iterator for more details and an example. @@ -80,12 +80,9 @@ I also got the idea from which uses it for executing automata on their term index. */ -#![deny(missing_docs)] +use core::{char, fmt, iter::FusedIterator, slice}; -use std::char; -use std::fmt; -use std::iter::FusedIterator; -use std::slice; +use alloc::{vec, vec::Vec}; const MAX_UTF8_BYTES: usize = 4; @@ -306,7 +303,7 @@ impl Utf8Sequences { /// given. pub fn new(start: char, end: char) -> Self { let mut it = Utf8Sequences { range_stack: vec![] }; - it.push(start as u32, end as u32); + it.push(u32::from(start), u32::from(end)); it } @@ -317,7 +314,7 @@ impl Utf8Sequences { #[doc(hidden)] pub fn reset(&mut self, start: char, end: char) { self.range_stack.clear(); - self.push(start as u32, end as u32); + self.push(u32::from(start), u32::from(end)); } fn push(&mut self, start: u32, end: u32) { @@ -416,7 +413,9 @@ impl ScalarRange { /// values in this range can be encoded as a single byte. fn as_ascii(&self) -> Option { if self.is_ascii() { - Some(Utf8Range::new(self.start as u8, self.end as u8)) + let start = u8::try_from(self.start).unwrap(); + let end = u8::try_from(self.end).unwrap(); + Some(Utf8Range::new(start, end)) } else { None } @@ -455,7 +454,9 @@ fn max_scalar_value(nbytes: usize) -> u32 { #[cfg(test)] mod tests { - use std::char; + use core::char; + + use alloc::{vec, vec::Vec}; use crate::utf8::{Utf8Range, Utf8Sequences}; @@ -472,7 +473,11 @@ mod tests { "Sequence ({:X}, {:X}) contains range {:?}, \ which matches surrogate code point {:X} \ with encoded bytes {:?}", - start as u32, end as u32, r, cp, buf, + u32::from(start), + u32::from(end), + r, + cp, + buf, ); } } @@ -579,9 +584,9 @@ mod tests { assert!(0xD800 <= cp && cp < 0xE000); let mut dst = [0; 3]; - dst[0] = (cp >> 12 & 0x0F) as u8 | TAG_THREE_B; - dst[1] = (cp >> 6 & 0x3F) as u8 | TAG_CONT; - dst[2] = (cp & 0x3F) as u8 | TAG_CONT; + dst[0] = u8::try_from(cp >> 12 & 0x0F).unwrap() | TAG_THREE_B; + dst[1] = u8::try_from(cp >> 6 & 0x3F).unwrap() | TAG_CONT; + dst[2] = u8::try_from(cp & 0x3F).unwrap() | TAG_CONT; dst } } diff --git a/vendor/regex-syntax/test b/vendor/regex-syntax/test index 4b1b9fb..8626c3b 100755 --- a/vendor/regex-syntax/test +++ b/vendor/regex-syntax/test @@ -2,11 +2,16 @@ set -e +# cd to the directory containing this crate's Cargo.toml so that we don't need +# to pass --manifest-path to every `cargo` command. +cd "$(dirname "$0")" + # This is a convenience script for running a broad swath of the syntax tests. echo "===== DEFAULT FEATURES ===" cargo test features=( + std unicode unicode-age unicode-bool @@ -17,6 +22,9 @@ features=( unicode-segment ) for f in "${features[@]}"; do - echo "===== FEATURE: $f ===" - cargo test --no-default-features --features "$f" + echo "=== FEATURE: $f ===" + # We only run library tests because I couldn't figure out how to easily + # make doc tests run in 'no_std' mode. In particular, without the Error + # trait, using '?' in doc tests seems tricky. + cargo test --no-default-features --lib --features "$f" done diff --git a/vendor/regex/.cargo-checksum.json b/vendor/regex/.cargo-checksum.json index d6ea1df..5418949 100644 --- a/vendor/regex/.cargo-checksum.json +++ b/vendor/regex/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"c66cc76a297a1068a3aa81d08326175be887b60fb6330c4b7922f4ad286bd144","Cargo.lock":"fa78ce7955999185e017ef812db215124b65ae8aef67b4ea9ca5bd18c50b0d35","Cargo.toml":"566dca60827f0cbafd1d920457d6d7fad9bd5b85630832207de8cc9c35cab274","HACKING.md":"17818f7a17723608f6bdbe6388ad0a913d4f96f76a16649aaf4e274b1fa0ea97","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","PERFORMANCE.md":"0d5ef3866386918dfdefb1aa9a28cfe33cb3c8ceeb79f3f8ba5b88253dd95991","README.md":"f69204a0f446047d8f4d1f3d84b75f235adb5c26477f3a37b671411bc954d14c","UNICODE.md":"a8a8399540eed000d19420135a527f400247a04572e44d124c786b870f518776","examples/regexdna-input.txt":"156a49710bb3e1ed4bc2bbb0af0f383b747b3d0281453cfff39c296124c598f8","examples/regexdna-output.txt":"35e85b19b70a893d752fd43e54e1e9da08bac43559191cea85b33387c24c4cc1","examples/shootout-regex-dna-bytes.rs":"fa2daedb4e0a05f64f33f4af62fbb0176db998e3676f8637ab684b725367a7b4","examples/shootout-regex-dna-cheat.rs":"1f871a6eaaf8372299fa3c762051112fa89a14235b03f734fc50ebd51ecaee72","examples/shootout-regex-dna-replace.rs":"32ffdf13ac6c4ce3fc32116a048e9cc682aa34cdb8e5beaf565a22addbdcd9ab","examples/shootout-regex-dna-single-cheat.rs":"809f75bf1e1917a53623eb6f1a3ce3b7d2ed98a6a1dbc0bd4853bec49a0c6f94","examples/shootout-regex-dna-single.rs":"1ab14f5703cd4be2e75a2e792e0ba1d322b9e4b14535d396805a4316d577f5bb","examples/shootout-regex-dna.rs":"20ea46ab63f91e3ac6a64e997eadd436a9cbc2f1bdade28e4512052f0e25bc34","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/backtrack.rs":"52987d80448f3d7f5d4e3545ddfc09f1f30de7602d9b5489961db4b215a377fd","src/compile.rs":"79a59be2d2db650b5a322e15e9bf1d3227944410bc780fc6089da8f4d2609b77","src/dfa.rs":"10273980d1f08aaff495e11efa240249a2b2c08a4db7c49c8d6759bc65a3b174","src/error.rs":"71c85db839514f26ee024a689061743ea94a34eb7a3291e6c2b69b45a9682d09","src/exec.rs":"4726e2c210c6adb91b25390cbc864ab71deabb17ca87a46e83acef26a194bfc2","src/expand.rs":"71220309a3bac797f55129f49e79c03e96efec894ea338c735b78695367e04ca","src/find_byte.rs":"b387247b77e3269f057c3399aefe5a815032c3af918c876f80eb4b282e4eb95e","src/freqs.rs":"255555f3d95b08a5bb3bc2f38d5a06cc100a39c0f0127fe4f50c33afa1cadc65","src/input.rs":"13f49c1bce2fadd04a45b421d374cd0f8b72bef83f7e8fda958962aaccbe799a","src/lib.rs":"982fadba415c4c5b93f4d7d4a73a23ec88e2d96daaa03b679d14490ea0f63197","src/literal/imp.rs":"b7f63a861c299bea4baaab17353a420ee339c2cf76d3858c95f39342bd4463e7","src/literal/mod.rs":"533f1d68af088e9485170145e27518368e541a0337fdb44f63249ebf97310300","src/pattern.rs":"993d8b6b4bcea5e02bee3c76e17c356a5a47f8fc53c5555edfd1ebb71c0878bf","src/pikevm.rs":"6c0eaa7e878c945ac4c3c545c98f5706ad04846fc432a5086c8ee78eb030dfa7","src/pool.rs":"942e991ae31ef349bd76efd78b2a712c01166dec965bf93742977ed0870d5a10","src/prog.rs":"bebb3e50745bbc05d6c8240d972ba55a1818c51b1161dc1c21f3fe13c11d4884","src/re_builder.rs":"943344bf6e2fc90902ee04b11b741c32418ac6814b21b7982cc0a3a817713f3e","src/re_bytes.rs":"63ee1db1637a3764addb10e27248129acffaf78bb0a69624add4d9d6f1e97040","src/re_set.rs":"7921ac4a919b7a5deffe82d099a9ccaf5487aebd890dfb7a661e602c6ad3f1a9","src/re_trait.rs":"d237121b6f6b606836c72305cbcb3bbdbc54d1f6827d19a19cd0fbb4372e0145","src/re_unicode.rs":"4ca66d6e835df7c0f570c8cde52667ef90ba1687d5285f12fedef2e38ae925b4","src/sparse.rs":"0da3ddb7972109869248a764dbb10254555f4bb51c375e89fb3fab9cafa47320","src/testdata/LICENSE":"58cf078acc03da3e280a938c2bd9943f554fc9b6ced89ad93ba35ca436872899","src/testdata/README":"45f869e37f798905c773bfbe0ef19a5fb7e585cbf0b7c21b5b5a784e8cec3c14","src/testdata/basic.dat":"b5b33aa89d48a61cd67cb1fbfd8f70e62c83e30b86256f9f915a5190dd38ff06","src/testdata/nullsubexpr.dat":"496ac0278eec3b6d9170faace14554569032dd3d909618364d9326156de39ecf","src/testdata/repetition.dat":"1f7959063015b284b18a4a2c1c8b416d438a2d6c4b1a362da43406b865f50e69","src/utf8.rs":"f85a356ff5d5b19e417b73ce1dd84581b21d283f6dddd195547c30af9c60bd1a","test":"0d62fdca7da12fc19ea5306b5de1d83e68d9365a029c043d524334da138b0304","tests/api.rs":"7b2a0ef75e99b9776094967bd66e9cdeaa8e11359f5f0a12bd08ef0e8d0c11fc","tests/api_str.rs":"2ae38c04e7e8fac008b609a820d0b1561ba75f39b0edc0987d6d3d06132da77f","tests/bytes.rs":"edc50f526c5fee43df89d639ef18b237e4eb91e9d533bfc43f3cbab7417d38ba","tests/consistent.rs":"d69435154c09478076497216e43081a835ac65147181a4fbddad7bff469605b2","tests/crates_regex.rs":"91a59d470e0700b4bcb3ff735d06799f3107b8ef4875a2e9904607b164be0326","tests/crazy.rs":"c0d56380dff19bdd5d7a3eb731d0e2dc564e169a1b73c81e1879b1e87f5f5f77","tests/flags.rs":"05caace2c81a99d2168037f3a38035d4dffe9f85ef3ebd7ef18b1bc6612f1ea8","tests/fowler.rs":"d78cf914de40b1e125cc92b65ccb444d462586bd07b5e05de4e4a1b5de16aa76","tests/macros.rs":"6db70c16fc90df13e6b30d2b606f8b6dd4dc976697967f6ee001b15aab6d0b19","tests/macros_bytes.rs":"a049f528a93173a1bb176cd46932dce1880679f4a1752e099be920f0e4546fd0","tests/macros_str.rs":"e585b1461374c45a2eca44ca045bc3c1fe984b2b4212e432b0c695b420e708b7","tests/misc.rs":"395f52793fa022e4cdda78675b6a6fba1a3106b4b99c834c39f7801574054bd1","tests/multiline.rs":"1b1a3326ed976437c1357f01d81833ece7ea244f38826246eab55cacd5d0862a","tests/noparse.rs":"12b6be0eff3d80779d33c6459396c74c0f6ebf4ddc9f1d33c3e747ea9e3bf268","tests/regression.rs":"1c965fefb8c7a2b1dfdab3e3fdeebaf47846555c50c8005e5537f96a52a3e252","tests/regression_fuzz.rs":"a504ec563e0d23bd2039493b7b1767fe1f831d7d668f6f4b2ecd124fc7899bcd","tests/replace.rs":"66f97532e40697934e2a77605b9002dfd22c46b6033ccb755e7660d855229f41","tests/searcher.rs":"ce35e47b0a276a7e8c9060c6a0b225ffba163aebc61fbc15555a6897fa0e552c","tests/set.rs":"f1e2af6baeeaed3cc99ed347ff516fe7b2eb0027ef64b891502e1486598eaf8a","tests/shortest_match.rs":"a2c94390c0d61bc24796b4c1288c924e90c8c9c6156fdebb858175177a194a42","tests/suffix_reverse.rs":"b95f89397404871227d9efe6df23b9ded147f183db81597e608f693955c668b5","tests/test_backtrack.rs":"b70c5e5f1241efd76dd9f9dd4a4df8a7b38113bd407d1f5f56867f1176177a59","tests/test_backtrack_bytes.rs":"b8a111d4b4109c8bba7e2afb650572c495a14d357fb1f743c1076fb001f704b5","tests/test_backtrack_utf8bytes.rs":"c0c279785d18beac2b4e178e7bf6c14ed235d65f00ca467cfd9c333d79487649","tests/test_crates_regex.rs":"fd9525c2eef0e2f8cb7f787bc2b721bcd0b5d84f3bca49adfe48d657a99c721a","tests/test_default.rs":"b32c11a43da4379a3717dd7a5f152c811257c7d6595c9d3c51f2de102e320c87","tests/test_default_bytes.rs":"831d3e6bfb882feb15f700e30304bd34328f888fb4c15c7169371e25024ce9a7","tests/test_nfa.rs":"f119fc43a018249c39c813d57096b0654ff69f337345f2bbd9b0e61cc9137285","tests/test_nfa_bytes.rs":"89eae3bef6a1d0bcea6b5de5be35ad72f613f2ceb8b58fe82a6c6ef2ccdc07d0","tests/test_nfa_utf8bytes.rs":"7d830b4aa401887d7cf098b62fed4cd8017ef8b61f625c7c9a2159a6b4cfeb71","tests/unicode.rs":"1af9db7f09a6b0113b8a64733e06c8415fef720b2fdef227ae398d94332287cd","tests/word_boundary.rs":"7081317ddcec1e82dd4a2090a571c6abf2ff4bbfa8cd10395e1eb3f386157fae","tests/word_boundary_ascii.rs":"cd0be5b5b485de0ba7994b42e2864585556c3d2d8bf5eab05b58931d9aaf4b87","tests/word_boundary_unicode.rs":"75dbcc35d3abc0f9795c2ea99e216dc227b0a5b58e9ca5eef767815ff0513921"},"package":"8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"} \ No newline at end of file +{"files":{"CHANGELOG.md":"e77650ae39a200cd7cb7aea153e5fbe03e68179af0d192416a395f0cf7e573e7","Cargo.toml":"a8d510f250e770370e021f8f5a4af09504b308ceb72311f32558b17a80958e22","Cross.toml":"4a11d6c63ecc919016b59fa0fe23674eb05682fb91ffbe677a4a7077e9e684ff","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"984b116bf94decdcb0fcdeb14641b332c9fe40da63e5a256dd39eb91a2e13387","UNICODE.md":"845fca1982e82e190109a784952579fce54faea120b702b7efd61164a12f601f","bench/README.md":"0aee42206b0e5edcb400a11faa2c536f512bcc6086e5ffdda001b9bfe4d19808","record/README.md":"02e6f85f8a43f18540e4a52a75d1001494df7aceac3873e9a13e3ceba190206d","record/compile-test/2023-04-19_1.7.3.csv":"460059ba2f10456175ff92bd75d4a365b14a1843e2b46e7b285d58da59e6d3ca","record/compile-test/2023-04-20_master.csv":"6b94df278e4ed82a3fd0d4bfe92a4614714e00435e983c7649ee9f54925f906e","record/compile-test/2023-07-05.csv":"cf00b4981b8c12980113810dba40e2063a8400354ad4dab16f7c212ff0b5db74","record/compile-test/README.md":"ba2b606993edd8d705ad1677ec954862614e52b028407e1908bb5dfb07767f2d","record/old-bench-log/01-lazy-dfa/dynamic":"dec9f74b8835403c71edc0c2d93bbdde0f5a0e37d46585e416c80496d5b14497","record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa":"c0ce02bef9ada8cd55672f0a9c3c5fc64f71e08bfb2b45978082a140b4fc111f","record/old-bench-log/01-lazy-dfa/native":"9de61ff787e36f5c6f1eaec68b8bb0583e57b0aad23712afe8c0048988c761b8","record/old-bench-log/01-lazy-dfa/nfa":"38c0be44a00b2caef17101bc425410fec2958e4df6da25d2ba5b6664f8bccad9","record/old-bench-log/01-lazy-dfa/pcre":"3b38026c24e4ca487ff62de83cc093ccb46b918f4875663249ff84ce27636942","record/old-bench-log/02-set/dynamic":"8ef5c00f0ac42e5f008e4b6337669527b48fba38df94c50d3e683c6aac66a48c","record/old-bench-log/03-bytes/onig":"f32347a6e0f25f46ad1b0aa736c29eca47c25f90d32c8823ea0d14204859a35b","record/old-bench-log/03-bytes/pcre":"b90982575c0ad55617b2ce50c2e9853d090502bf07e1eb19edf9009d3c9f2987","record/old-bench-log/03-bytes/rust":"b1e70e5ae48a9c726d8cd8a98019c0efe5a1095563c61cf0ac75e24de32461b4","record/old-bench-log/03-bytes/rust-bytes":"fbf0e6cb8102c7ca8e59bd459bb0ae7f1feaf8103def70b8d4793c59e68e8736","record/old-bench-log/04/onig":"4e34e2ede0a806b8ee540e63e4babee38049e5a8ab3be99c4f5d8b02bbc653fd","record/old-bench-log/04/pcre1-jit":"736c4941e991ef94f76379cf2187d0ea2a41b052cf80c94d0dd0c9ea758a6491","record/old-bench-log/04/pcre2-jit":"00e7bbf7749904fca8dff9b441d15bbe670f37b427e385ddf740f7a49de3b1fb","record/old-bench-log/04/re2":"b8b8595f6b68da127b56dc7c61a9fd15548251fda1be9d2c50c2d48382e887b6","record/old-bench-log/04/rust":"c5a6b918e815294f0c4e3d37267c444d49692ff131c5a08f7462c24d0721fcec","record/old-bench-log/04/tcl":"c4d8d12b8cf48ff2017549e95e49dc95a90ea15483834cd70d2d7d7c237bbd32","record/old-bench-log/05/onig":"70a4da9aafaefa6493cd09d3a529dd5d2d9eacf390bb093681bc7be28a1f926c","record/old-bench-log/05/onig-vs-rust":"b942a79735b7330241437776c15b18f4db3eff01d3e6c35494f4a8732e74a23a","record/old-bench-log/05/pcre1":"b29b7efbe79b55ce0aaf24bbbecc376a865fa219a68d96124e3d95951cdb47f4","record/old-bench-log/05/pcre1-vs-rust":"a458e5c62f0500898e08757753c10981551649656432ec096f0c82b414ef8d82","record/old-bench-log/05/pcre2":"faa93937c3490cfdff88c32dc04e57f2ae881923b87781e5fe876535fd690770","record/old-bench-log/05/pcre2-vs-rust":"bf9faa6a679dd98e9452e52c0941d2eb84dcf0b6632c15507f8334ed7bc309da","record/old-bench-log/05/re2":"692866b28e1bc368c7a59f519b8dfe1da50a135946ce153298a0ab228a5ee59d","record/old-bench-log/05/re2-vs-rust":"55e4cb14c397574751aebe38068c429a4580a5e309857b2715047944903dca58","record/old-bench-log/05/rust":"aac6acda9f63e51613712d0a33bb7fb46dfc7adc425f76b9b71195be8c8a42e7","record/old-bench-log/05/tcl":"f03e39eccd3252162cc6099bb0426014df669d299ba0ef79e89b8401886a5172","record/old-bench-log/05/tcl-vs-rust":"ae6ac4668573bf5488cc235c5da16ad9358d07b7644207d9bcea88ba6f5514a6","record/old-bench-log/06/dphobos-dmd":"473328306be335a0320c690d9c2dbefdf7f2f5a80e4ca69443c7ed2e81bb093f","record/old-bench-log/06/dphobos-dmd-ct":"60341c736382a6db21d9889369ea4617c521acbf30d4b3bf38bcd17f4f85b9b1","record/old-bench-log/06/dphobos-ldc":"ae60c2bed84afb89ae43615f26de4cc5d0042e179089b639507378518eed3252","record/old-bench-log/06/dphobos-ldc-ct":"a157ef450793b73de3a816fab1d93a6d11e90a817082bae5e3da02a66fcc833f","record/old-bench-log/06/pcre1":"ad10fd7db732e8670dd3d4eedb05f48f547b4782495aaadff8ec25a6ea1992a0","record/old-bench-log/06/pcre2":"f789a73bd41a0bc401bdebe7f10a03a8aa587de48643d88507d16181a8fa39d3","record/old-bench-log/06/re2":"203c273a110d71f5edf722630202a6142c39d6b7a9951686adf8b9c20c5db278","record/old-bench-log/06/rust":"6a642a16cd279c99ef5a580a25fb3a63ac6239cd73df9261c02912fa08145753","record/old-bench-log/07/boost":"255bc652c4d9e9d20aa9b22d8d86e952e7ec6c8b9fcde0c3d6e38c967e04d40e","record/old-bench-log/07/dphobos-dmd":"fb3ac60037050858611145ca3e71412164688dcdec52c022787d33304e022260","record/old-bench-log/07/dphobos-dmd-ct":"40a5088441f8ffe3dae0abaf31c105cedfbe3b56c06772f075947d504976d2ed","record/old-bench-log/07/oniguruma":"ae0cd60adb15845eb9ef706111d4ee0e6ad5a58f0276b787d68bd7d637f8f7c6","record/old-bench-log/07/pcre1":"a812d065ec248249f9bb3d6d970f15c18d342f6b443265ad4b07fa91b73575cc","record/old-bench-log/07/pcre2":"88230663eccd0b382cf5be81ce1ae6cfa3fa835a65a31c1eba4369d2e8de5d27","record/old-bench-log/07/re2":"e330ef21ce44351afc3c43821d862e9c625877606569f3af0ddbadcd7b21c602","record/old-bench-log/07/rust":"d8c6bd5c46f5df9d0ac222f7be7793527a8137d273c8826b3715c67e16209aac","record/old-bench-log/07/rust-bytes":"e21d02fa2ef1e5ed7204920b33ed24c9fb620e068ed47ed6879b72e76369a27e","record/old-bench-log/07/stdcpp":"9df02d89dc8232c700b8cf8bc6f1ece3ca7af84ab52e67a660039d6c9168aed4","record/old-bench-log/07/stdcpp-libcxx":"f90849a0b5dc11dc0280ad97886e92e1d91c080403ad7a4ecd638a26fe5e8c5e","record/old-bench-log/07/tcl":"7f6e347bb507f9c00ff664d3e627c0a9cf842b416eeb2af9f3b6cccd041c58e4","record/old-bench-log/08-new-memmem/rust-after-01":"646c7d50aea9c560a35eb60116f301f4fb8d4b03fd5814d8b24adffd070332e3","record/old-bench-log/08-new-memmem/rust-after-02":"14e7fb6c6faa85a8f90617528cef79ae382aeba07c2e5c253c68445902b060ba","record/old-bench-log/08-new-memmem/rust-before-01":"7e3b58de0f502c1a1bf6d27e0e85c654b1189716f7374cec4ed4dd365b13101f","record/old-bench-log/08-new-memmem/rust-before-02":"ab6d09529eeeca7ff0da945d59701dbbcfdec5e05581bb9bf154779d12a35e53","record/old-bench-log/09-new-baseline/pcre2":"28df8e2762f267d1ea628906a6e4bbc21f99e6a445bd322c86d0ca483b21b5b3","record/old-bench-log/09-new-baseline/re2":"421437193cc3f159c178479f98bde8dbe27883ec7757b1ddd8d745862f5899ff","record/old-bench-log/09-new-baseline/rust":"6f932a769171b6cdb717c9d01e44a70762ef660c4045b9d2bb3797a9bdf65405","record/old-bench-log/09-new-baseline/rust-bytes":"9c5acd5c1eeac9acfe76d03588041f9b6d65b4351085c3510888ceeb83e8a7b5","record/old-bench-log/10-last-frontier/rust-after-literal.log":"02baef9b3b49acbbff43e81f48ea5a9287e30ff4fc298a3f3b48991d8374aabf","record/old-bench-log/10-last-frontier/rust-before-literal.log":"e5a3bcc2b9e93cf3cb27bc9e6305b3bc03215751bbeef2a70fb25577d6b42874","record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log":"29834c7a5396ac61acedd07c0b7ca60716865ec3e70f35fbaa7826a2309a79d9","record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log":"4e7468a3e8629814bd4af91e2a8eb42d0899d352b5dff3058b801aa637046be2","record/old-bench-log/11-regex-1.7.3/rust":"d7cc18a62070ea7a999e1ba2458f26cf94595f1af276c2b3e96cee638eccf3f0","record/old-bench-log/11-regex-1.7.3/rust-bytes":"64c7458020139bd7a03d1cb0927b741e6972377b686626563acb86fbc66414ca","record/old-bench-log/12-regex-1.8.1/rust":"a538c42e77e20956e81fb5a4e2e1e7d3fdf60da019d7e3df52d93f57367a3fbd","record/old-bench-log/12-regex-1.8.1/rust-bytes":"fbb00fdf8f039ce312f5346a67dddaa5e129280a93a90d7aaf6b5a9a71d2f212","record/old-bench-log/13-regex-1.9.0/rust":"0ef62700ba3fc24887af74b7942490c90b4cd2814b8fda200f7376e43391bfce","record/old-bench-log/13-regex-1.9.0/rust-bytes":"676d501d4667f0a945c88ebb56839176dd3a5a6b45af7708b1e870bf26d12603","record/old-bench-log/README.md":"d359f536fb4b8c1af9af3465a027c3522f62c3871aad44645a955b650d7deec0","record/old-bench-log/old/01-before":"c2ea2750fca8ac1742003fe2106e9422d49e92967d3fe0267f24b7ec830b07e3","record/old-bench-log/old/02-new-syntax-crate":"27fd8b3d35cf08d434035ff7d9f2e9e3c94a167e45ba655567c73ae96830f1d8","record/old-bench-log/old/03-new-syntax-crate":"d942a2b95c3a2d8f85f3f17934f258bdc84baa33e91986e8a6810ca8d6e9cc50","record/old-bench-log/old/04-fixed-benchmark":"0da29ef39ac07ece411c151ab479a76944946aba992547b15d90ec2d5484e85c","record/old-bench-log/old/05-thread-caching":"e364d87131e43187d6757426839789d1b6b47b3f3af21280daa9193d5ab19f64","record/old-bench-log/old/06-major-dynamic":"3bc2b8fd2714ae9f19b2e4f4219654982522daf01b5d3055b4aec0458afeaf13","record/old-bench-log/old/06-major-macro":"d5617ed23e71d5298ed4d629eee257e401c352fd1c91a2048dfeb1677527d4e7","record/old-bench-log/old/07-prefix-improvements":"9277d1392c85a38db215a9b69e3b0cd4a9901f8f1c72c706ca262e5f099b8819","record/old-bench-log/old/08-case-fixes":"f97cd3675cf5c967e4ca8841f2368e9eadf538b542bfe3035d31492afc5934bf","record/old-bench-log/old/09-before-compiler-rewrite":"b928686819dbd9aeaa6639b01b63a48428653f2f676a4e15d61cddec421e0389","record/old-bench-log/old/10-compiler-rewrite":"697b295ee377a5cb287d403593bfb8c078270b4e19e8d61d0b95b06ee7c903ab","record/old-bench-log/old/11-compiler-rewrite":"3f0ba494a0d82e7419285a9686474dc7763d4da0dd3faaa3bec3f624bbede481","record/old-bench-log/old/12-executor":"962e182f9a1cfddb8c0cd2d8c4681febef1430082c9a38e5373c9117b853e65e","record/old-bench-log/old/12-executor-bytes":"e01a1e878b44c80724e9bf09bb11210eeb8f01518ac7f0e3e7f2ee241281e500","record/old-bench-log/old/13-cache-byte-range-suffixes":"1d67d58a39f9177a79c26f3c6c2a1caaf51f085ce137711ab9ba74071c14680c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/builders.rs":"6dbff8c7ff7febe031dbef3eafe1f02a15112ff1ffd889761a21c10b0dd84f03","src/bytes.rs":"cce2b7012f5896cf82fc3086bf8128dc9efe2b69bf6917d041c1a171eabacdc0","src/error.rs":"4ac8361e900627a87a2ac78e5d475be17c455fe6850d1515bf5b874837c4ae25","src/find_byte.rs":"e17cd3b765467685946707840b92ea4e37d3c11081fbf316174a15858cd4bd99","src/lib.rs":"1be6ce04d4fce06a7e46bc006bbf536f5a7f82d97dc71e7f7489a6d2610f790b","src/pattern.rs":"5f37755a7c16902d861377645f57a20314961f86298d4b35ae6e1058ca4e9801","src/regex/bytes.rs":"0d5ae71d9963bc7492f8687508dcfdf05f9e5e01b2c10c350a281ec3ddefb062","src/regex/mod.rs":"c220b6dd7a5e1945f8e743d1dcd796c5f782c91b0c34eb9915c588174a517fe8","src/regex/string.rs":"e1f76e5f21597da6c76514b579a1fff8c061ecd134ac4ce16ab4cddc052b270d","src/regexset/bytes.rs":"6290dd81f47fb6cdbaa358be2340398a2a640320373d334e4c977bf30b5a9220","src/regexset/mod.rs":"c220b6dd7a5e1945f8e743d1dcd796c5f782c91b0c34eb9915c588174a517fe8","src/regexset/string.rs":"977bc167c48c7c99187599c5071ca197e61a56359d32a26b9dbc1b58a5ef1c4d","test":"c0122c20a2c9b7ba6e9a8aaeb2b7d9910315ef31063539949f28d9501ef3193c","testdata/README.md":"c0514501526560d7f6171eb6d982ad61b4527760cb38a4bfbe8e28036ff37b95","testdata/anchored.toml":"7a1b5cd81deed2099796a451bf764a3f9bd21f0d60c0fa46accd3a35666866f2","testdata/bytes.toml":"1d84179165fd25f3b94bd2bfbeb43fc8a162041f7bf98b717e0f85cef7fb652b","testdata/crazy.toml":"a146e2d2e23f1a57168979d9b1fc193c2ba38dca66294b61140d6d2a2958ec86","testdata/crlf.toml":"d19cf22756434d145dd20946c00af01c102a556a252070405c3c8294129d9ece","testdata/earliest.toml":"d561e643623ee1889b5b049fdcf3c7cb71b0c746d7eb822ddbd09d0acda2620b","testdata/empty.toml":"738dbe92fbd8971385a1cf3affb0e956e5b692c858b9b48439d718f10801c08e","testdata/expensive.toml":"5ce2f60209c99cdd2cdcb9d3069d1d5ca13d5e08a85e913efe57267b2f5f0e9d","testdata/flags.toml":"9a7e001808195c84f2a7d3e18bc0a82c7386e60f03a616e99af00c3f7f2c3fd4","testdata/fowler/basic.toml":"a82c7e233451cd7cfe0c3d817f3a1ab44478bb81ae62432efdd515fa8370275e","testdata/fowler/dat/README":"e53d6c37b5931cb26dc9ae4c40358eea63f7a469c4db6ca816c072a8ced6a61a","testdata/fowler/dat/basic.dat":"b1126dda59075c08f574987090273c9977790115f1e1941d0708c0b82b256905","testdata/fowler/dat/nullsubexpr.dat":"e5cd4145dffa8bc66f2d39079950b2bb7bae21a521514b83b557b92f4a871a9e","testdata/fowler/dat/repetition.dat":"2b8b2b191229a804fba49e6b888d8194bf488f7744057b550da9d95a2aa6617a","testdata/fowler/nullsubexpr.toml":"cd812e7e8fa0469253b34f0db93b5883c9d8b9740fc4f7825a38e7df880a4eed","testdata/fowler/repetition.toml":"8c09164f064b3db81309c53483863bdcec493781644de162416e9f485e772615","testdata/iter.toml":"6875460302974a5b3073a7304a865c45aba9653c54afea2c4d26e1ea248a81f7","testdata/leftmost-all.toml":"903bfbeff888b7664296f4d5aa367ce53d1dafe249ab0a3359223ae94d596396","testdata/line-terminator.toml":"02148068137b69d95587966917bdf0697bf7eb41ad6d47387f2eb30f67d04fd9","testdata/misc.toml":"32c9591655c6fb118dfefcb4de49a04820a63cb960533dfc2538cdaabf4f4047","testdata/multiline.toml":"eb07cf5427e6ddbcf61f4cc64c2d74ff41b5ef75ef857959651b20196f3cd157","testdata/no-unicode.toml":"d209da04506900fd5f69e48170cddaad0702355ac6176c3a75ab3ff96974457c","testdata/overlapping.toml":"5d96497a7233566d40b05ba22047e483fa8662e45515a9be86da45cf6c28703a","testdata/regex-lite.toml":"fecca7cc8c9cea2e1f84f846a89fd9b3ca7011c83698211a2eeda8924deb900c","testdata/regression.toml":"6006ef4fcfbfd7155ce5ce8b8427904f7261c5549396f20cb065c0294733686d","testdata/set.toml":"dfd265dc1aee80026e881616840df0236ae9abf12467d7ec0e141a52c236128c","testdata/substring.toml":"48122d9f3477ed81f95e3ad42c06e9bb25f849b66994601a75ceae0693b81866","testdata/unicode.toml":"7e4b013039b0cdd85fa73f32d15d096182fe901643d4e40c0910087a736cd46d","testdata/utf8.toml":"2eabce0582bcacb2073e08bbe7ca413f096d14d06e917b107949691e24f84b20","testdata/word-boundary-special.toml":"7d0ea2f796478d1ca2a6954430cb1cfbd04031a182f8611cb50a7c73e443ce33","testdata/word-boundary.toml":"51bc1c498ab825420340a2dd3e6623de4054937ba6d5020ff8cd14b1c1e45271","tests/fuzz/mod.rs":"7b01a803e1c0b5a45c062d493723553f263c57e269eade1475eb789694635d5c","tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff":"be4f42497ac9358eb020bf17cd8fdb9743691824e01d744504613ea2bfb2f663","tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9":"19df9a1e1b5a3c0f31cc038b9f2991b161d8577b4a0c8b2fc391cdfecdb6dd85","tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04":"2fde1668e9e3e60943c28d97c01c90dd3d3882f48475f060ccaf961c228069e8","tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9":"c9e00f7a31453708560900aa51e358dd5551df494439860594be97bb1fb933ba","tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e":"4433011f7af46e855e843635cf24a49713bd5705f67176ed928f04d24eda1857","tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c":"95782a847fc64e9cccdf76e9540b0d16ce80db5d05157a88b958b763f9b8479b","tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085":"8ddff12288f6f20cc9d65db76bd8187834f64f844aad48a340d082555ad5cb56","tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0":"eea6919a75fde163634b890e2253a0918cf0ba092357fa617f368bbfa131ba30","tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a":"a806f73b900046977267acceb83b105bac7ee21ede2edc2927afe1e1f0149f00","tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6":"9540cf58241cde3bc0db8364e0ccff67ff1ff9721c85b0d2ca27354c0cbf2650","tests/lib.rs":"9bffc95568c09ac95b6a3e7ca64b6e858a0552d0c0b0fca2c447da3b9c0a45a2","tests/misc.rs":"5ac5858325451e1d70f308ca0bcead5a354d095a7473800c11065231c319c456","tests/regression.rs":"3490aac99fdbf3f0949ba1f338d5184a84b505ebd96d0b6d6145c610587aa60b","tests/regression_fuzz.rs":"57e0bcba0fdfa7797865e35ae547cd7fe1c6132b80a7bfdfb06eb053a568b00d","tests/replace.rs":"78ff9bf7f78783ad83a78041bb7ee0705c7efc85b4d12301581d0ce5b2a59325","tests/searcher.rs":"04152e5c86431deec0c196d2564a11bc4ec36f14c77e8c16a2f9d1cbc9fc574e","tests/suite_bytes.rs":"7697b04e5b181aa78b3654bd2dbe1c792d9626197573ed8c649f1da8b481817d","tests/suite_bytes_set.rs":"d970168fab57a9edc60ff26a2bb7d0cc714d4298e4ee9eadba9da44a6569f2bb","tests/suite_string.rs":"1be0cf8922171f8323f99e8ecedbbf1846d339620d0dc2fd490901cbbbd2622e","tests/suite_string_set.rs":"22743107206d913521f9adb728482aed3a9625ff7b15a83df057bbf1f7050e03"},"package":"380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"} \ No newline at end of file diff --git a/vendor/regex/CHANGELOG.md b/vendor/regex/CHANGELOG.md index 44274ac..420e08f 100644 --- a/vendor/regex/CHANGELOG.md +++ b/vendor/regex/CHANGELOG.md @@ -1,3 +1,479 @@ +1.10.2 (2023-10-16) +=================== +This is a new patch release that fixes a search regression where incorrect +matches could be reported. + +Bug fixes: + +* [BUG #1110](https://github.com/rust-lang/regex/issues/1110): +Revert broadening of reverse suffix literal optimization introduced in 1.10.1. + + +1.10.1 (2023-10-14) +=================== +This is a new patch release with a minor increase in the number of valid +patterns and a broadening of some literal optimizations. + +New features: + +* [FEATURE 04f5d7be](https://github.com/rust-lang/regex/commit/04f5d7be4efc542864cc400f5d43fbea4eb9bab6): +Loosen ASCII-compatible rules such that regexes like `(?-u:☃)` are now allowed. + +Performance improvements: + +* [PERF 8a8d599f](https://github.com/rust-lang/regex/commit/8a8d599f9d2f2d78e9ad84e4084788c2d563afa5): +Broader the reverse suffix optimization to apply in more cases. + + +1.10.0 (2023-10-09) +=================== +This is a new minor release of `regex` that adds support for start and end +word boundary assertions. That is, `\<` and `\>`. The minimum supported Rust +version has also been raised to 1.65, which was released about one year ago. + +The new word boundary assertions are: + +* `\<` or `\b{start}`: a Unicode start-of-word boundary (`\W|\A` on the left, +`\w` on the right). +* `\>` or `\b{end}`: a Unicode end-of-word boundary (`\w` on the left, `\W|\z` +on the right)). +* `\b{start-half}`: half of a Unicode start-of-word boundary (`\W|\A` on the +left). +* `\b{end-half}`: half of a Unicode end-of-word boundary (`\W|\z` on the +right). + +The `\<` and `\>` are GNU extensions to POSIX regexes. They have been added +to the `regex` crate because they enjoy somewhat broad support in other regex +engines as well (for example, vim). The `\b{start}` and `\b{end}` assertions +are aliases for `\<` and `\>`, respectively. + +The `\b{start-half}` and `\b{end-half}` assertions are not found in any +other regex engine (although regex engines with general look-around support +can certainly express them). They were added principally to support the +implementation of word matching in grep programs, where one generally wants to +be a bit more flexible in what is considered a word boundary. + +New features: + +* [FEATURE #469](https://github.com/rust-lang/regex/issues/469): +Add support for `\<` and `\>` word boundary assertions. +* [FEATURE(regex-automata) #1031](https://github.com/rust-lang/regex/pull/1031): +DFAs now have a `start_state` method that doesn't use an `Input`. + +Performance improvements: + +* [PERF #1051](https://github.com/rust-lang/regex/pull/1051): +Unicode character class operations have been optimized in `regex-syntax`. +* [PERF #1090](https://github.com/rust-lang/regex/issues/1090): +Make patterns containing lots of literal characters use less memory. + +Bug fixes: + +* [BUG #1046](https://github.com/rust-lang/regex/issues/1046): +Fix a bug that could result in incorrect match spans when using a Unicode word +boundary and searching non-ASCII strings. +* [BUG(regex-syntax) #1047](https://github.com/rust-lang/regex/issues/1047): +Fix panics that can occur in `Ast->Hir` translation (not reachable from `regex` +crate). +* [BUG(regex-syntax) #1088](https://github.com/rust-lang/regex/issues/1088): +Remove guarantees in the API that connect the `u` flag with a specific HIR +representation. + +`regex-automata` breaking change release: + +This release includes a `regex-automata 0.4.0` breaking change release, which +was necessary in order to support the new word boundary assertions. For +example, the `Look` enum has new variants and the `LookSet` type now uses `u32` +instead of `u16` to represent a bitset of look-around assertions. These are +overall very minor changes, and most users of `regex-automata` should be able +to move to `0.4` from `0.3` without any changes at all. + +`regex-syntax` breaking change release: + +This release also includes a `regex-syntax 0.8.0` breaking change release, +which, like `regex-automata`, was necessary in order to support the new word +boundary assertions. This release also includes some changes to the `Ast` +type to reduce heap usage in some cases. If you are using the `Ast` type +directly, your code may require some minor modifications. Otherwise, users of +`regex-syntax 0.7` should be able to migrate to `0.8` without any code changes. + +`regex-lite` release: + +The `regex-lite 0.1.1` release contains support for the new word boundary +assertions. There are no breaking changes. + + +1.9.6 (2023-09-30) +================== +This is a patch release that fixes a panic that can occur when the default +regex size limit is increased to a large number. + +* [BUG aa4e4c71](https://github.com/rust-lang/regex/commit/aa4e4c7120b0090ce0624e3c42a2ed06dd8b918a): +Fix a bug where computing the maximum haystack length for the bounded +backtracker could result underflow and thus provoke a panic later in a search +due to a broken invariant. + + +1.9.5 (2023-09-02) +================== +This is a patch release that hopefully mostly fixes a performance bug that +occurs when sharing a regex across multiple threads. + +Issue [#934](https://github.com/rust-lang/regex/issues/934) +explains this in more detail. It is [also noted in the crate +documentation](https://docs.rs/regex/latest/regex/#sharing-a-regex-across-threads-can-result-in-contention). +The bug can appear when sharing a regex across multiple threads simultaneously, +as might be the case when using a regex from a `OnceLock`, `lazy_static` or +similar primitive. Usually high contention only results when using many threads +to execute searches on small haystacks. + +One can avoid the contention problem entirely through one of two methods. +The first is to use lower level APIs from `regex-automata` that require passing +state explicitly, such as [`meta::Regex::search_with`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html#method.search_with). +The second is to clone a regex and send it to other threads explicitly. This +will not use any additional memory usage compared to sharing the regex. The +only downside of this approach is that it may be less convenient, for example, +it won't work with things like `OnceLock` or `lazy_static` or `once_cell`. + +With that said, as of this release, the contention performance problems have +been greatly reduced. This was achieved by changing the free-list so that it +was sharded across threads, and that ensuring each sharded mutex occupies a +single cache line to mitigate false sharing. So while contention may still +impact performance in some cases, it should be a lot better now. + +Because of the changes to how the free-list works, please report any issues you +find with this release. That not only includes search time regressions but also +significant regressions in memory usage. Reporting improvements is also welcome +as well! If possible, provide a reproduction. + +Bug fixes: + +* [BUG #934](https://github.com/rust-lang/regex/issues/934): +Fix a performance bug where high contention on a single regex led to massive +slow downs. + + +1.9.4 (2023-08-26) +================== +This is a patch release that fixes a bug where `RegexSet::is_match(..)` could +incorrectly return false (even when `RegexSet::matches(..).matched_any()` +returns true). + +Bug fixes: + +* [BUG #1070](https://github.com/rust-lang/regex/issues/1070): +Fix a bug where a prefilter was incorrectly configured for a `RegexSet`. + + +1.9.3 (2023-08-05) +================== +This is a patch release that fixes a bug where some searches could result in +incorrect match offsets being reported. It is difficult to characterize the +types of regexes susceptible to this bug. They generally involve patterns +that contain no prefix or suffix literals, but have an inner literal along with +a regex prefix that can conditionally match. + +Bug fixes: + +* [BUG #1060](https://github.com/rust-lang/regex/issues/1060): +Fix a bug with the reverse inner literal optimization reporting incorrect match +offsets. + + +1.9.2 (2023-08-05) +================== +This is a patch release that fixes another memory usage regression. This +particular regression occurred only when using a `RegexSet`. In some cases, +much more heap memory (by one or two orders of magnitude) was allocated than in +versions prior to 1.9.0. + +Bug fixes: + +* [BUG #1059](https://github.com/rust-lang/regex/issues/1059): +Fix a memory usage regression when using a `RegexSet`. + + +1.9.1 (2023-07-07) +================== +This is a patch release which fixes a memory usage regression. In the regex +1.9 release, one of the internal engines used a more aggressive allocation +strategy than what was done previously. This patch release reverts to the +prior on-demand strategy. + +Bug fixes: + +* [BUG #1027](https://github.com/rust-lang/regex/issues/1027): +Change the allocation strategy for the backtracker to be less aggressive. + + +1.9.0 (2023-07-05) +================== +This release marks the end of a [years long rewrite of the regex crate +internals](https://github.com/rust-lang/regex/issues/656). Since this is +such a big release, please report any issues or regressions you find. We would +also love to hear about improvements as well. + +In addition to many internal improvements that should hopefully result in +"my regex searches are faster," there have also been a few API additions: + +* A new `Captures::extract` method for quickly accessing the substrings +that match each capture group in a regex. +* A new inline flag, `R`, which enables CRLF mode. This makes `.` match any +Unicode scalar value except for `\r` and `\n`, and also makes `(?m:^)` and +`(?m:$)` match after and before both `\r` and `\n`, respectively, but never +between a `\r` and `\n`. +* `RegexBuilder::line_terminator` was added to further customize the line +terminator used by `(?m:^)` and `(?m:$)` to be any arbitrary byte. +* The `std` Cargo feature is now actually optional. That is, the `regex` crate +can be used without the standard library. +* Because `regex 1.9` may make binary size and compile times even worse, a +new experimental crate called `regex-lite` has been published. It prioritizes +binary size and compile times over functionality (like Unicode) and +performance. It shares no code with the `regex` crate. + +New features: + +* [FEATURE #244](https://github.com/rust-lang/regex/issues/244): +One can opt into CRLF mode via the `R` flag. +e.g., `(?mR:$)` matches just before `\r\n`. +* [FEATURE #259](https://github.com/rust-lang/regex/issues/259): +Multi-pattern searches with offsets can be done with `regex-automata 0.3`. +* [FEATURE #476](https://github.com/rust-lang/regex/issues/476): +`std` is now an optional feature. `regex` may be used with only `alloc`. +* [FEATURE #644](https://github.com/rust-lang/regex/issues/644): +`RegexBuilder::line_terminator` configures how `(?m:^)` and `(?m:$)` behave. +* [FEATURE #675](https://github.com/rust-lang/regex/issues/675): +Anchored search APIs are now available in `regex-automata 0.3`. +* [FEATURE #824](https://github.com/rust-lang/regex/issues/824): +Add new `Captures::extract` method for easier capture group access. +* [FEATURE #961](https://github.com/rust-lang/regex/issues/961): +Add `regex-lite` crate with smaller binary sizes and faster compile times. +* [FEATURE #1022](https://github.com/rust-lang/regex/pull/1022): +Add `TryFrom` implementations for the `Regex` type. + +Performance improvements: + +* [PERF #68](https://github.com/rust-lang/regex/issues/68): +Added a one-pass DFA engine for faster capture group matching. +* [PERF #510](https://github.com/rust-lang/regex/issues/510): +Inner literals are now used to accelerate searches, e.g., `\w+@\w+` will scan +for `@`. +* [PERF #787](https://github.com/rust-lang/regex/issues/787), +[PERF #891](https://github.com/rust-lang/regex/issues/891): +Makes literal optimizations apply to regexes of the form `\b(foo|bar|quux)\b`. + +(There are many more performance improvements as well, but not all of them have +specific issues devoted to them.) + +Bug fixes: + +* [BUG #429](https://github.com/rust-lang/regex/issues/429): +Fix matching bugs related to `\B` and inconsistencies across internal engines. +* [BUG #517](https://github.com/rust-lang/regex/issues/517): +Fix matching bug with capture groups. +* [BUG #579](https://github.com/rust-lang/regex/issues/579): +Fix matching bug with word boundaries. +* [BUG #779](https://github.com/rust-lang/regex/issues/779): +Fix bug where some regexes like `(re)+` were not equivalent to `(re)(re)*`. +* [BUG #850](https://github.com/rust-lang/regex/issues/850): +Fix matching bug inconsistency between NFA and DFA engines. +* [BUG #921](https://github.com/rust-lang/regex/issues/921): +Fix matching bug where literal extraction got confused by `$`. +* [BUG #976](https://github.com/rust-lang/regex/issues/976): +Add documentation to replacement routines about dealing with fallibility. +* [BUG #1002](https://github.com/rust-lang/regex/issues/1002): +Use corpus rejection in fuzz testing. + + +1.8.4 (2023-06-05) +================== +This is a patch release that fixes a bug where `(?-u:\B)` was allowed in +Unicode regexes, despite the fact that the current matching engines can report +match offsets between the code units of a single UTF-8 encoded codepoint. That +in turn means that match offsets that split a codepoint could be reported, +which in turn results in panicking when one uses them to slice a `&str`. + +This bug occurred in the transition to `regex 1.8` because the underlying +syntactical error that prevented this regex from compiling was intentionally +removed. That's because `(?-u:\B)` will be permitted in Unicode regexes in +`regex 1.9`, but the matching engines will guarantee to never report match +offsets that split a codepoint. When the underlying syntactical error was +removed, no code was added to ensure that `(?-u:\B)` didn't compile in the +`regex 1.8` transition release. This release, `regex 1.8.4`, adds that code +such that `Regex::new(r"(?-u:\B)")` returns to the `regex <1.8` behavior of +not compiling. (A `bytes::Regex` can still of course compile it.) + +Bug fixes: + +* [BUG #1006](https://github.com/rust-lang/regex/issues/1006): +Fix a bug where `(?-u:\B)` was allowed in Unicode regexes, and in turn could +lead to match offsets that split a codepoint in `&str`. + + +1.8.3 (2023-05-25) +================== +This is a patch release that fixes a bug where the regex would report a +match at every position even when it shouldn't. This could occur in a very +small subset of regexes, usually an alternation of simple literals that +have particular properties. (See the issue linked below for a more precise +description.) + +Bug fixes: + +* [BUG #999](https://github.com/rust-lang/regex/issues/999): +Fix a bug where a match at every position is erroneously reported. + + +1.8.2 (2023-05-22) +================== +This is a patch release that fixes a bug where regex compilation could panic +in debug mode for regexes with large counted repetitions. For example, +`a{2147483516}{2147483416}{5}` resulted in an integer overflow that wrapped +in release mode but panicking in debug mode. Despite the unintended wrapping +arithmetic in release mode, it didn't cause any other logical bugs since the +errant code was for new analysis that wasn't used yet. + +Bug fixes: + +* [BUG #995](https://github.com/rust-lang/regex/issues/995): +Fix a bug where regex compilation with large counted repetitions could panic. + + +1.8.1 (2023-04-21) +================== +This is a patch release that fixes a bug where a regex match could be reported +where none was found. Specifically, the bug occurs when a pattern contains some +literal prefixes that could be extracted _and_ an optional word boundary in the +prefix. + +Bug fixes: + +* [BUG #981](https://github.com/rust-lang/regex/issues/981): +Fix a bug where a word boundary could interact with prefix literal +optimizations and lead to a false positive match. + + +1.8.0 (2023-04-20) +================== +This is a sizeable release that will be soon followed by another sizeable +release. Both of them will combined close over 40 existing issues and PRs. + +This first release, despite its size, essentially represents preparatory work +for the second release, which will be even bigger. Namely, this release: + +* Increases the MSRV to Rust 1.60.0, which was released about 1 year ago. +* Upgrades its dependency on `aho-corasick` to the recently released 1.0 +version. +* Upgrades its dependency on `regex-syntax` to the simultaneously released +`0.7` version. The changes to `regex-syntax` principally revolve around a +rewrite of its literal extraction code and a number of simplifications and +optimizations to its high-level intermediate representation (HIR). + +The second release, which will follow ~shortly after the release above, will +contain a soup-to-nuts rewrite of every regex engine. This will be done by +bringing [`regex-automata`](https://github.com/BurntSushi/regex-automata) into +this repository, and then changing the `regex` crate to be nothing but an API +shim layer on top of `regex-automata`'s API. + +These tandem releases are the culmination of about 3 +years of on-and-off work that [began in earnest in March +2020](https://github.com/rust-lang/regex/issues/656). + +Because of the scale of changes involved in these releases, I would love to +hear about your experience. Especially if you notice undocumented changes in +behavior or performance changes (positive *or* negative). + +Most changes in the first release are listed below. For more details, please +see the commit log, which reflects a linear and decently documented history +of all changes. + +New features: + +* [FEATURE #501](https://github.com/rust-lang/regex/issues/501): +Permit many more characters to be escaped, even if they have no significance. +More specifically, any ASCII character except for `[0-9A-Za-z<>]` can now be +escaped. Also, a new routine, `is_escapeable_character`, has been added to +`regex-syntax` to query whether a character is escapeable or not. +* [FEATURE #547](https://github.com/rust-lang/regex/issues/547): +Add `Regex::captures_at`. This fills a hole in the API, but doesn't otherwise +introduce any new expressive power. +* [FEATURE #595](https://github.com/rust-lang/regex/issues/595): +Capture group names are now Unicode-aware. They can now begin with either a `_` +or any "alphabetic" codepoint. After the first codepoint, subsequent codepoints +can be any sequence of alpha-numeric codepoints, along with `_`, `.`, `[` and +`]`. Note that replacement syntax has not changed. +* [FEATURE #810](https://github.com/rust-lang/regex/issues/810): +Add `Match::is_empty` and `Match::len` APIs. +* [FEATURE #905](https://github.com/rust-lang/regex/issues/905): +Add an `impl Default for RegexSet`, with the default being the empty set. +* [FEATURE #908](https://github.com/rust-lang/regex/issues/908): +A new method, `Regex::static_captures_len`, has been added which returns the +number of capture groups in the pattern if and only if every possible match +always contains the same number of matching groups. +* [FEATURE #955](https://github.com/rust-lang/regex/issues/955): +Named captures can now be written as `(?re)` in addition to +`(?Pre)`. +* FEATURE: `regex-syntax` now supports empty character classes. +* FEATURE: `regex-syntax` now has an optional `std` feature. (This will come +to `regex` in the second release.) +* FEATURE: The `Hir` type in `regex-syntax` has had a number of simplifications +made to it. +* FEATURE: `regex-syntax` has support for a new `R` flag for enabling CRLF +mode. This will be supported in `regex` proper in the second release. +* FEATURE: `regex-syntax` now has proper support for "regex that never +matches" via `Hir::fail()`. +* FEATURE: The `hir::literal` module of `regex-syntax` has been completely +re-worked. It now has more documentation, examples and advice. +* FEATURE: The `allow_invalid_utf8` option in `regex-syntax` has been renamed +to `utf8`, and the meaning of the boolean has been flipped. + +Performance improvements: + +* PERF: The upgrade to `aho-corasick 1.0` may improve performance in some +cases. It's difficult to characterize exactly which patterns this might impact, +but if there are a small number of longish (>= 4 bytes) prefix literals, then +it might be faster than before. + +Bug fixes: + +* [BUG #514](https://github.com/rust-lang/regex/issues/514): +Improve `Debug` impl for `Match` so that it doesn't show the entire haystack. +* BUGS [#516](https://github.com/rust-lang/regex/issues/516), +[#731](https://github.com/rust-lang/regex/issues/731): +Fix a number of issues with printing `Hir` values as regex patterns. +* [BUG #610](https://github.com/rust-lang/regex/issues/610): +Add explicit example of `foo|bar` in the regex syntax docs. +* [BUG #625](https://github.com/rust-lang/regex/issues/625): +Clarify that `SetMatches::len` does not (regretably) refer to the number of +matches in the set. +* [BUG #660](https://github.com/rust-lang/regex/issues/660): +Clarify "verbose mode" in regex syntax documentation. +* BUG [#738](https://github.com/rust-lang/regex/issues/738), +[#950](https://github.com/rust-lang/regex/issues/950): +Fix `CaptureLocations::get` so that it never panics. +* [BUG #747](https://github.com/rust-lang/regex/issues/747): +Clarify documentation for `Regex::shortest_match`. +* [BUG #835](https://github.com/rust-lang/regex/issues/835): +Fix `\p{Sc}` so that it is equivalent to `\p{Currency_Symbol}`. +* [BUG #846](https://github.com/rust-lang/regex/issues/846): +Add more clarifying documentation to the `CompiledTooBig` error variant. +* [BUG #854](https://github.com/rust-lang/regex/issues/854): +Clarify that `regex::Regex` searches as if the haystack is a sequence of +Unicode scalar values. +* [BUG #884](https://github.com/rust-lang/regex/issues/884): +Replace `__Nonexhaustive` variants with `#[non_exhaustive]` attribute. +* [BUG #893](https://github.com/rust-lang/regex/pull/893): +Optimize case folding since it can get quite slow in some pathological cases. +* [BUG #895](https://github.com/rust-lang/regex/issues/895): +Reject `(?-u:\W)` in `regex::Regex` APIs. +* [BUG #942](https://github.com/rust-lang/regex/issues/942): +Add a missing `void` keyword to indicate "no parameters" in C API. +* [BUG #965](https://github.com/rust-lang/regex/issues/965): +Fix `\p{Lc}` so that it is equivalent to `\p{Cased_Letter}`. +* [BUG #975](https://github.com/rust-lang/regex/issues/975): +Clarify documentation for `\pX` syntax. + + 1.7.3 (2023-03-24) ================== This is a small release that fixes a bug in `Regex::shortest_match_at` that @@ -743,7 +1219,7 @@ Bug gixes: ================== This release includes a ground-up rewrite of the regex-syntax crate, which has been in development for over a year. - +731 New features: * Error messages for invalid regexes have been greatly improved. You get these diff --git a/vendor/regex/Cargo.toml b/vendor/regex/Cargo.toml index 37e44fb..f514d35 100644 --- a/vendor/regex/Cargo.toml +++ b/vendor/regex/Cargo.toml @@ -10,10 +10,14 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" +rust-version = "1.65" name = "regex" -version = "1.7.3" -authors = ["The Rust Project Developers"] +version = "1.10.2" +authors = [ + "The Rust Project Developers", + "Andrew Gallant ", +] exclude = [ "/scripts/*", "/.github/*", @@ -30,82 +34,78 @@ categories = ["text-processing"] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + [profile.bench] -debug = true +debug = 2 + +[profile.dev] +opt-level = 3 +debug = 2 [profile.release] -debug = true +debug = 2 [profile.test] -debug = true - -[lib] -doctest = false -bench = false - -[[test]] -name = "default" -path = "tests/test_default.rs" - -[[test]] -name = "default-bytes" -path = "tests/test_default_bytes.rs" - -[[test]] -name = "nfa" -path = "tests/test_nfa.rs" +opt-level = 3 +debug = 2 [[test]] -name = "nfa-utf8bytes" -path = "tests/test_nfa_utf8bytes.rs" - -[[test]] -name = "nfa-bytes" -path = "tests/test_nfa_bytes.rs" - -[[test]] -name = "backtrack" -path = "tests/test_backtrack.rs" - -[[test]] -name = "backtrack-utf8bytes" -path = "tests/test_backtrack_utf8bytes.rs" - -[[test]] -name = "backtrack-bytes" -path = "tests/test_backtrack_bytes.rs" - -[[test]] -name = "crates-regex" -path = "tests/test_crates_regex.rs" +name = "integration" +path = "tests/lib.rs" [dependencies.aho-corasick] -version = "0.7.18" +version = "1.0.0" optional = true [dependencies.memchr] -version = "2.4.0" +version = "2.6.0" optional = true +[dependencies.regex-automata] +version = "0.4.3" +features = [ + "alloc", + "syntax", + "meta", + "nfa-pikevm", +] +default-features = false + [dependencies.regex-syntax] -version = "0.6.29" +version = "0.8.2" default-features = false -[dev-dependencies.lazy_static] -version = "1" +[dev-dependencies.anyhow] +version = "1.0.69" -[dev-dependencies.quickcheck] -version = "1.0.3" -default-features = false +[dev-dependencies.doc-comment] +version = "0.3" -[dev-dependencies.rand] -version = "0.8.3" +[dev-dependencies.env_logger] +version = "0.9.3" features = [ - "getrandom", - "small_rng", + "atty", + "humantime", + "termcolor", ] default-features = false +[dev-dependencies.once_cell] +version = "1.17.1" + +[dev-dependencies.quickcheck] +version = "1.0.3" +default-features = false + +[dev-dependencies.regex-test] +version = "0.1.0" + [features] default = [ "std", @@ -113,21 +113,40 @@ default = [ "unicode", "regex-syntax/default", ] +logging = [ + "aho-corasick?/logging", + "memchr?/logging", + "regex-automata/logging", +] pattern = [] perf = [ "perf-cache", "perf-dfa", + "perf-onepass", + "perf-backtrack", "perf-inline", "perf-literal", ] +perf-backtrack = ["regex-automata/nfa-backtrack"] perf-cache = [] -perf-dfa = [] -perf-inline = [] +perf-dfa = ["regex-automata/hybrid"] +perf-dfa-full = [ + "regex-automata/dfa-build", + "regex-automata/dfa-search", +] +perf-inline = ["regex-automata/perf-inline"] perf-literal = [ - "aho-corasick", - "memchr", + "dep:aho-corasick", + "dep:memchr", + "regex-automata/perf-literal", +] +perf-onepass = ["regex-automata/dfa-onepass"] +std = [ + "aho-corasick?/std", + "memchr?/std", + "regex-automata/std", + "regex-syntax/std", ] -std = [] unicode = [ "unicode-age", "unicode-bool", @@ -136,14 +155,37 @@ unicode = [ "unicode-perl", "unicode-script", "unicode-segment", + "regex-automata/unicode", "regex-syntax/unicode", ] -unicode-age = ["regex-syntax/unicode-age"] -unicode-bool = ["regex-syntax/unicode-bool"] -unicode-case = ["regex-syntax/unicode-case"] -unicode-gencat = ["regex-syntax/unicode-gencat"] -unicode-perl = ["regex-syntax/unicode-perl"] -unicode-script = ["regex-syntax/unicode-script"] -unicode-segment = ["regex-syntax/unicode-segment"] +unicode-age = [ + "regex-automata/unicode-age", + "regex-syntax/unicode-age", +] +unicode-bool = [ + "regex-automata/unicode-bool", + "regex-syntax/unicode-bool", +] +unicode-case = [ + "regex-automata/unicode-case", + "regex-syntax/unicode-case", +] +unicode-gencat = [ + "regex-automata/unicode-gencat", + "regex-syntax/unicode-gencat", +] +unicode-perl = [ + "regex-automata/unicode-perl", + "regex-automata/unicode-word-boundary", + "regex-syntax/unicode-perl", +] +unicode-script = [ + "regex-automata/unicode-script", + "regex-syntax/unicode-script", +] +unicode-segment = [ + "regex-automata/unicode-segment", + "regex-syntax/unicode-segment", +] unstable = ["pattern"] use_std = ["std"] diff --git a/vendor/regex/Cross.toml b/vendor/regex/Cross.toml new file mode 100644 index 0000000..5415e7a --- /dev/null +++ b/vendor/regex/Cross.toml @@ -0,0 +1,7 @@ +[build.env] +passthrough = [ + "RUST_BACKTRACE", + "RUST_LOG", + "REGEX_TEST", + "REGEX_TEST_VERBOSE", +] diff --git a/vendor/regex/HACKING.md b/vendor/regex/HACKING.md deleted file mode 100644 index 34af5b5..0000000 --- a/vendor/regex/HACKING.md +++ /dev/null @@ -1,341 +0,0 @@ -Your friendly guide to hacking and navigating the regex library. - -This guide assumes familiarity with Rust and Cargo, and at least a perusal of -the user facing documentation for this crate. - -If you're looking for background on the implementation in this library, then -you can do no better than Russ Cox's article series on implementing regular -expressions using finite automata: https://swtch.com/~rsc/regexp/ - - -## Architecture overview - -As you probably already know, this library executes regular expressions using -finite automata. In particular, a design goal is to make searching linear -with respect to both the regular expression and the text being searched. -Meeting that design goal on its own is not so hard and can be done with an -implementation of the Pike VM (similar to Thompson's construction, but supports -capturing groups), as described in: https://swtch.com/~rsc/regexp/regexp2.html ---- This library contains such an implementation in src/pikevm.rs. - -Making it fast is harder. One of the key problems with the Pike VM is that it -can be in more than one state at any point in time, and must shuffle capture -positions between them. The Pike VM also spends a lot of time following the -same epsilon transitions over and over again. We can employ one trick to -speed up the Pike VM: extract one or more literal prefixes from the regular -expression and execute specialized code to quickly find matches of those -prefixes in the search text. The Pike VM can then be avoided for most the -search, and instead only executed when a prefix is found. The code to find -prefixes is in the regex-syntax crate (in this repository). The code to search -for literals is in src/literals.rs. When more than one literal prefix is found, -we fall back to an Aho-Corasick DFA using the aho-corasick crate. For one -literal, we use a variant of the Boyer-Moore algorithm. Both Aho-Corasick and -Boyer-Moore use `memchr` when appropriate. The Boyer-Moore variant in this -library also uses elementary frequency analysis to choose the right byte to run -`memchr` with. - -Of course, detecting prefix literals can only take us so far. Not all regular -expressions have literal prefixes. To remedy this, we try another approach -to executing the Pike VM: backtracking, whose implementation can be found in -src/backtrack.rs. One reason why backtracking can be faster is that it avoids -excessive shuffling of capture groups. Of course, backtracking is susceptible -to exponential runtimes, so we keep track of every state we've visited to make -sure we never visit it again. This guarantees linear time execution, but we -pay for it with the memory required to track visited states. Because of the -memory requirement, we only use this engine on small search strings *and* small -regular expressions. - -Lastly, the real workhorse of this library is the "lazy" DFA in src/dfa.rs. -It is distinct from the Pike VM in that the DFA is explicitly represented in -memory and is only ever in one state at a time. It is said to be "lazy" because -the DFA is computed as text is searched, where each byte in the search text -results in at most one new DFA state. It is made fast by caching states. DFAs -are susceptible to exponential state blow up (where the worst case is computing -a new state for every input byte, regardless of what's in the state cache). To -avoid using a lot of memory, the lazy DFA uses a bounded cache. Once the cache -is full, it is wiped and state computation starts over again. If the cache is -wiped too frequently, then the DFA gives up and searching falls back to one of -the aforementioned algorithms. - -All of the above matching engines expose precisely the same matching semantics. -This is indeed tested. (See the section below about testing.) - -The following sub-sections describe the rest of the library and how each of the -matching engines are actually used. - -### Parsing - -Regular expressions are parsed using the regex-syntax crate, which is -maintained in this repository. The regex-syntax crate defines an abstract -syntax and provides very detailed error messages when a parse error is -encountered. Parsing is done in a separate crate so that others may benefit -from its existence, and because it is relatively divorced from the rest of the -regex library. - -The regex-syntax crate also provides sophisticated support for extracting -prefix and suffix literals from regular expressions. - -### Compilation - -The compiler is in src/compile.rs. The input to the compiler is some abstract -syntax for a regular expression and the output is a sequence of opcodes that -matching engines use to execute a search. (One can think of matching engines as -mini virtual machines.) The sequence of opcodes is a particular encoding of a -non-deterministic finite automaton. In particular, the opcodes explicitly rely -on epsilon transitions. - -Consider a simple regular expression like `a|b`. Its compiled form looks like -this: - - 000 Save(0) - 001 Split(2, 3) - 002 'a' (goto: 4) - 003 'b' - 004 Save(1) - 005 Match - -The first column is the instruction pointer and the second column is the -instruction. Save instructions indicate that the current position in the input -should be stored in a captured location. Split instructions represent a binary -branch in the program (i.e., epsilon transitions). The instructions `'a'` and -`'b'` indicate that the literal bytes `'a'` or `'b'` should match. - -In older versions of this library, the compilation looked like this: - - 000 Save(0) - 001 Split(2, 3) - 002 'a' - 003 Jump(5) - 004 'b' - 005 Save(1) - 006 Match - -In particular, empty instructions that merely served to move execution from one -point in the program to another were removed. Instead, every instruction has a -`goto` pointer embedded into it. This resulted in a small performance boost for -the Pike VM, because it was one fewer epsilon transition that it had to follow. - -There exist more instructions and they are defined and documented in -src/prog.rs. - -Compilation has several knobs and a few unfortunately complicated invariants. -Namely, the output of compilation can be one of two types of programs: a -program that executes on Unicode scalar values or a program that executes -on raw bytes. In the former case, the matching engine is responsible for -performing UTF-8 decoding and executing instructions using Unicode codepoints. -In the latter case, the program handles UTF-8 decoding implicitly, so that the -matching engine can execute on raw bytes. All matching engines can execute -either Unicode or byte based programs except for the lazy DFA, which requires -byte based programs. In general, both representations were kept because (1) the -lazy DFA requires byte based programs so that states can be encoded in a memory -efficient manner and (2) the Pike VM benefits greatly from inlining Unicode -character classes into fewer instructions as it results in fewer epsilon -transitions. - -N.B. UTF-8 decoding is built into the compiled program by making use of the -utf8-ranges crate. The compiler in this library factors out common suffixes to -reduce the size of huge character classes (e.g., `\pL`). - -A regrettable consequence of this split in instruction sets is we generally -need to compile two programs; one for NFA execution and one for the lazy DFA. - -In fact, it is worse than that: the lazy DFA is not capable of finding the -starting location of a match in a single scan, and must instead execute a -backwards search after finding the end location. To execute a backwards search, -we must have compiled the regular expression *in reverse*. - -This means that every compilation of a regular expression generally results in -three distinct programs. It would be possible to lazily compile the Unicode -program, since it is never needed if (1) the regular expression uses no word -boundary assertions and (2) the caller never asks for sub-capture locations. - -### Execution - -At the time of writing, there are four matching engines in this library: - -1. The Pike VM (supports captures). -2. Bounded backtracking (supports captures). -3. Literal substring or multi-substring search. -4. Lazy DFA (no support for Unicode word boundary assertions). - -Only the first two matching engines are capable of executing every regular -expression program. They also happen to be the slowest, which means we need -some logic that (1) knows various facts about the regular expression and (2) -knows what the caller wants. Using this information, we can determine which -engine (or engines) to use. - -The logic for choosing which engine to execute is in src/exec.rs and is -documented on the Exec type. Exec values contain regular expression Programs -(defined in src/prog.rs), which contain all the necessary tidbits for actually -executing a regular expression on search text. - -For the most part, the execution logic is straight-forward and follows the -limitations of each engine described above pretty faithfully. The hairiest -part of src/exec.rs by far is the execution of the lazy DFA, since it requires -a forwards and backwards search, and then falls back to either the Pike VM or -backtracking if the caller requested capture locations. - -The Exec type also contains mutable scratch space for each type of matching -engine. This scratch space is used during search (for example, for the lazy -DFA, it contains compiled states that are reused on subsequent searches). - -### Programs - -A regular expression program is essentially a sequence of opcodes produced by -the compiler plus various facts about the regular expression (such as whether -it is anchored, its capture names, etc.). - -### The regex! macro - -The `regex!` macro no longer exists. It was developed in a bygone era as a -compiler plugin during the infancy of the regex crate. Back then, then only -matching engine in the crate was the Pike VM. The `regex!` macro was, itself, -also a Pike VM. The only advantages it offered over the dynamic Pike VM that -was built at runtime were the following: - - 1. Syntax checking was done at compile time. Your Rust program wouldn't - compile if your regex didn't compile. - 2. Reduction of overhead that was proportional to the size of the regex. - For the most part, this overhead consisted of heap allocation, which - was nearly eliminated in the compiler plugin. - -The main takeaway here is that the compiler plugin was a marginally faster -version of a slow regex engine. As the regex crate evolved, it grew other regex -engines (DFA, bounded backtracker) and sophisticated literal optimizations. -The regex macro didn't keep pace, and it therefore became (dramatically) slower -than the dynamic engines. The only reason left to use it was for the compile -time guarantee that your regex is correct. Fortunately, Clippy (the Rust lint -tool) has a lint that checks your regular expression validity, which mostly -replaces that use case. - -Additionally, the regex compiler plugin stopped receiving maintenance. Nobody -complained. At that point, it seemed prudent to just remove it. - -Will a compiler plugin be brought back? The future is murky, but there is -definitely an opportunity there to build something that is faster than the -dynamic engines in some cases. But it will be challenging! As of now, there -are no plans to work on this. - - -## Testing - -A key aspect of any mature regex library is its test suite. A subset of the -tests in this library come from Glenn Fowler's AT&T test suite (its online -presence seems gone at the time of writing). The source of the test suite is -located in src/testdata. The scripts/regex-match-tests.py takes the test suite -in src/testdata and generates tests/matches.rs. - -There are also many other manually crafted tests and regression tests in -tests/tests.rs. Some of these tests were taken from RE2. - -The biggest source of complexity in the tests is related to answering this -question: how can we reuse the tests to check all of our matching engines? One -approach would have been to encode every test into some kind of format (like -the AT&T test suite) and code generate tests for each matching engine. The -approach we use in this library is to create a Cargo.toml entry point for each -matching engine we want to test. The entry points are: - -* `tests/test_default.rs` - tests `Regex::new` -* `tests/test_default_bytes.rs` - tests `bytes::Regex::new` -* `tests/test_nfa.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex. -* `tests/test_nfa_bytes.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex and use *arbitrary* byte based programs. -* `tests/test_nfa_utf8bytes.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex and use *UTF-8* byte based programs. -* `tests/test_backtrack.rs` - tests `Regex::new`, forced to use - backtracking on every regex. -* `tests/test_backtrack_bytes.rs` - tests `Regex::new`, forced to use - backtracking on every regex and use *arbitrary* byte based programs. -* `tests/test_backtrack_utf8bytes.rs` - tests `Regex::new`, forced to use - backtracking on every regex and use *UTF-8* byte based programs. -* `tests/test_crates_regex.rs` - tests to make sure that all of the - backends behave in the same way against a number of quickcheck - generated random inputs. These tests need to be enabled through - the `RUST_REGEX_RANDOM_TEST` environment variable (see - below). - -The lazy DFA and pure literal engines are absent from this list because -they cannot be used on every regular expression. Instead, we rely on -`tests/test_dynamic.rs` to test the lazy DFA and literal engines when possible. - -Since the tests are repeated several times, and because `cargo test` runs all -entry points, it can take a while to compile everything. To reduce compile -times slightly, try using `cargo test --test default`, which will only use the -`tests/test_default.rs` entry point. - -The random testing takes quite a while, so it is not enabled by default. -In order to run the random testing you can set the -`RUST_REGEX_RANDOM_TEST` environment variable to anything before -invoking `cargo test`. Note that this variable is inspected at compile -time, so if the tests don't seem to be running, you may need to run -`cargo clean`. - -## Benchmarking - -The benchmarking in this crate is made up of many micro-benchmarks. Currently, -there are two primary sets of benchmarks: the benchmarks that were adopted -at this library's inception (in `bench/src/misc.rs`) and a newer set of -benchmarks meant to test various optimizations. Specifically, the latter set -contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter -set are all executed on the same lengthy input whereas the former benchmarks -are executed on strings of varying length. - -There is also a smattering of benchmarks for parsing and compilation. - -Benchmarks are in a separate crate so that its dependencies can be managed -separately from the main regex crate. - -Benchmarking follows a similarly wonky setup as tests. There are multiple entry -points: - -* `bench_rust.rs` - benchmarks `Regex::new` -* `bench_rust_bytes.rs` benchmarks `bytes::Regex::new` -* `bench_pcre.rs` - benchmarks PCRE -* `bench_onig.rs` - benchmarks Oniguruma - -The PCRE and Oniguruma benchmarks exist as a comparison point to a mature -regular expression library. In general, this regex library compares favorably -(there are even a few benchmarks that PCRE simply runs too slowly on or -outright can't execute at all). I would love to add other regular expression -library benchmarks (especially RE2). - -If you're hacking on one of the matching engines and just want to see -benchmarks, then all you need to run is: - - $ (cd bench && ./run rust) - -If you want to compare your results with older benchmarks, then try: - - $ (cd bench && ./run rust | tee old) - $ ... make it faster - $ (cd bench && ./run rust | tee new) - $ cargo benchcmp old new --improvements - -The `cargo-benchcmp` utility is available here: -https://github.com/BurntSushi/cargo-benchcmp - -The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See -`./bench/bench --help`. - -## Dev Docs - -When digging your teeth into the codebase for the first time, the -crate documentation can be a great resource. By default `rustdoc` -will strip out all documentation of private crate members in an -effort to help consumers of the crate focus on the *interface* -without having to concern themselves with the *implementation*. -Normally this is a great thing, but if you want to start hacking -on regex internals it is not what you want. Many of the private members -of this crate are well documented with rustdoc style comments, and -it would be a shame to miss out on the opportunity that presents. -You can generate the private docs with: - -``` -$ rustdoc --crate-name docs src/lib.rs -o target/doc -L target/debug/deps --no-defaults --passes collapse-docs --passes unindent-comments -``` - -Then just point your browser at `target/doc/regex/index.html`. - -See https://github.com/rust-lang/rust/issues/15347 for more info -about generating developer docs for internal use. diff --git a/vendor/regex/PERFORMANCE.md b/vendor/regex/PERFORMANCE.md deleted file mode 100644 index 8cd0d9c..0000000 --- a/vendor/regex/PERFORMANCE.md +++ /dev/null @@ -1,277 +0,0 @@ -Your friendly guide to understanding the performance characteristics of this -crate. - -This guide assumes some familiarity with the public API of this crate, which -can be found here: https://docs.rs/regex - -## Theory vs. Practice - -One of the design goals of this crate is to provide worst case linear time -behavior with respect to the text searched using finite state automata. This -means that, *in theory*, the performance of this crate is much better than most -regex implementations, which typically use backtracking which has worst case -exponential time. - -For example, try opening a Python interpreter and typing this: - - >>> import re - >>> re.search('(a*)*c', 'a' * 30).span() - -I'll wait. - -At some point, you'll figure out that it won't terminate any time soon. ^C it. - -The promise of this crate is that *this pathological behavior can't happen*. - -With that said, just because we have protected ourselves against worst case -exponential behavior doesn't mean we are immune from large constant factors -or places where the current regex engine isn't quite optimal. This guide will -detail those cases and provide guidance on how to avoid them, among other -bits of general advice. - -## Thou Shalt Not Compile Regular Expressions In A Loop - -**Advice**: Use `lazy_static` to amortize the cost of `Regex` compilation. - -Don't do it unless you really don't mind paying for it. Compiling a regular -expression in this crate is quite expensive. It is conceivable that it may get -faster some day, but I wouldn't hold out hope for, say, an order of magnitude -improvement. In particular, compilation can take any where from a few dozen -microseconds to a few dozen milliseconds. Yes, milliseconds. Unicode character -classes, in particular, have the largest impact on compilation performance. At -the time of writing, for example, `\pL{100}` takes around 44ms to compile. This -is because `\pL` corresponds to every letter in Unicode and compilation must -turn it into a proper automaton that decodes a subset of UTF-8 which -corresponds to those letters. Compilation also spends some cycles shrinking the -size of the automaton. - -This means that in order to realize efficient regex matching, one must -*amortize the cost of compilation*. Trivially, if a call to `is_match` is -inside a loop, then make sure your call to `Regex::new` is *outside* that loop. - -In many programming languages, regular expressions can be conveniently defined -and compiled in a global scope, and code can reach out and use them as if -they were global static variables. In Rust, there is really no concept of -life-before-main, and therefore, one cannot utter this: - - static MY_REGEX: Regex = Regex::new("...").unwrap(); - -Unfortunately, this would seem to imply that one must pass `Regex` objects -around to everywhere they are used, which can be especially painful depending -on how your program is structured. Thankfully, the -[`lazy_static`](https://crates.io/crates/lazy_static) -crate provides an answer that works well: - - use lazy_static::lazy_static; - use regex::Regex; - - fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref MY_REGEX: Regex = Regex::new("...").unwrap(); - } - MY_REGEX.is_match(text) - } - -In other words, the `lazy_static!` macro enables us to define a `Regex` *as if* -it were a global static value. What is actually happening under the covers is -that the code inside the macro (i.e., `Regex::new(...)`) is run on *first use* -of `MY_REGEX` via a `Deref` impl. The implementation is admittedly magical, but -it's self contained and everything works exactly as you expect. In particular, -`MY_REGEX` can be used from multiple threads without wrapping it in an `Arc` or -a `Mutex`. On that note... - -## Using a regex from multiple threads - -**Advice**: The performance impact from using a `Regex` from multiple threads -is likely negligible. If necessary, clone the `Regex` so that each thread gets -its own copy. Cloning a regex does not incur any additional memory overhead -than what would be used by using a `Regex` from multiple threads -simultaneously. *Its only cost is ergonomics.* - -It is supported and encouraged to define your regexes using `lazy_static!` as -if they were global static values, and then use them to search text from -multiple threads simultaneously. - -One might imagine that this is possible because a `Regex` represents a -*compiled* program, so that any allocation or mutation is already done, and is -therefore read-only. Unfortunately, this is not true. Each type of search -strategy in this crate requires some kind of mutable scratch space to use -*during search*. For example, when executing a DFA, its states are computed -lazily and reused on subsequent searches. Those states go into that mutable -scratch space. - -The mutable scratch space is an implementation detail, and in general, its -mutation should not be observable from users of this crate. Therefore, it uses -interior mutability. This implies that `Regex` can either only be used from one -thread, or it must do some sort of synchronization. Either choice is -reasonable, but this crate chooses the latter, in particular because it is -ergonomic and makes use with `lazy_static!` straight forward. - -Synchronization implies *some* amount of overhead. When a `Regex` is used from -a single thread, this overhead is negligible. When a `Regex` is used from -multiple threads simultaneously, it is possible for the overhead of -synchronization from contention to impact performance. The specific cases where -contention may happen is if you are calling any of these methods repeatedly -from multiple threads simultaneously: - -* shortest_match -* is_match -* find -* captures - -In particular, every invocation of one of these methods must synchronize with -other threads to retrieve its mutable scratch space before searching can start. -If, however, you are using one of these methods: - -* find_iter -* captures_iter - -Then you may not suffer from contention since the cost of synchronization is -amortized on *construction of the iterator*. That is, the mutable scratch space -is obtained when the iterator is created and retained throughout its lifetime. - -## Only ask for what you need - -**Advice**: Prefer in this order: `is_match`, `find`, `captures`. - -There are three primary search methods on a `Regex`: - -* is_match -* find -* captures - -In general, these are ordered from fastest to slowest. - -`is_match` is fastest because it doesn't actually need to find the start or the -end of the leftmost-first match. It can quit immediately after it knows there -is a match. For example, given the regex `a+` and the haystack, `aaaaa`, the -search will quit after examining the first byte. - -In contrast, `find` must return both the start and end location of the -leftmost-first match. It can use the DFA matcher for this, but must run it -forwards once to find the end of the match *and then run it backwards* to find -the start of the match. The two scans and the cost of finding the real end of -the leftmost-first match make this more expensive than `is_match`. - -`captures` is the most expensive of them all because it must do what `find` -does, and then run either the bounded backtracker or the Pike VM to fill in the -capture group locations. Both of these are simulations of an NFA, which must -spend a lot of time shuffling states around. The DFA limits the performance hit -somewhat by restricting the amount of text that must be searched via an NFA -simulation. - -One other method not mentioned is `shortest_match`. This method has precisely -the same performance characteristics as `is_match`, except it will return the -end location of when it discovered a match. For example, given the regex `a+` -and the haystack `aaaaa`, `shortest_match` may return `1` as opposed to `5`, -the latter of which being the correct end location of the leftmost-first match. - -## Literals in your regex may make it faster - -**Advice**: Literals can reduce the work that the regex engine needs to do. Use -them if you can, especially as prefixes. - -In particular, if your regex starts with a prefix literal, the prefix is -quickly searched before entering the (much slower) regex engine. For example, -given the regex `foo\w+`, the literal `foo` will be searched for using -Boyer-Moore. If there's no match, then no regex engine is ever used. Only when -there's a match is the regex engine invoked at the location of the match, which -effectively permits the regex engine to skip large portions of a haystack. -If a regex is comprised entirely of literals (possibly more than one), then -it's possible that the regex engine can be avoided entirely even when there's a -match. - -When one literal is found, Boyer-Moore is used. When multiple literals are -found, then an optimized version of Aho-Corasick is used. - -This optimization is in particular extended quite a bit in this crate. Here are -a few examples of regexes that get literal prefixes detected: - -* `(foo|bar)` detects `foo` and `bar` -* `(a|b)c` detects `ac` and `bc` -* `[ab]foo[yz]` detects `afooy`, `afooz`, `bfooy` and `bfooz` -* `a?b` detects `a` and `b` -* `a*b` detects `a` and `b` -* `(ab){3,6}` detects `ababab` - -Literals in anchored regexes can also be used for detecting non-matches very -quickly. For example, `^foo\w+` and `\w+foo$` may be able to detect a non-match -just by examining the first (or last) three bytes of the haystack. - -## Unicode word boundaries may prevent the DFA from being used - -**Advice**: In most cases, `\b` should work well. If not, use `(?-u:\b)` -instead of `\b` if you care about consistent performance more than correctness. - -It's a sad state of the current implementation. At the moment, the DFA will try -to interpret Unicode word boundaries as if they were ASCII word boundaries. -If the DFA comes across any non-ASCII byte, it will quit and fall back to an -alternative matching engine that can handle Unicode word boundaries correctly. -The alternate matching engine is generally quite a bit slower (perhaps by an -order of magnitude). If necessary, this can be ameliorated in two ways. - -The first way is to add some number of literal prefixes to your regular -expression. Even though the DFA may not be used, specialized routines will -still kick in to find prefix literals quickly, which limits how much work the -NFA simulation will need to do. - -The second way is to give up on Unicode and use an ASCII word boundary instead. -One can use an ASCII word boundary by disabling Unicode support. That is, -instead of using `\b`, use `(?-u:\b)`. Namely, given the regex `\b.+\b`, it -can be transformed into a regex that uses the DFA with `(?-u:\b).+(?-u:\b)`. It -is important to limit the scope of disabling the `u` flag, since it might lead -to a syntax error if the regex could match arbitrary bytes. For example, if one -wrote `(?-u)\b.+\b`, then a syntax error would be returned because `.` matches -any *byte* when the Unicode flag is disabled. - -The second way isn't appreciably different than just using a Unicode word -boundary in the first place, since the DFA will speculatively interpret it as -an ASCII word boundary anyway. The key difference is that if an ASCII word -boundary is used explicitly, then the DFA won't quit in the presence of -non-ASCII UTF-8 bytes. This results in giving up correctness in exchange for -more consistent performance. - -N.B. When using `bytes::Regex`, Unicode support is disabled by default, so one -can simply write `\b` to get an ASCII word boundary. - -## Excessive counting can lead to exponential state blow up in the DFA - -**Advice**: Don't write regexes that cause DFA state blow up if you care about -match performance. - -Wait, didn't I say that this crate guards against exponential worst cases? -Well, it turns out that the process of converting an NFA to a DFA can lead to -an exponential blow up in the number of states. This crate specifically guards -against exponential blow up by doing two things: - -1. The DFA is computed lazily. That is, a state in the DFA only exists in - memory if it is visited. In particular, the lazy DFA guarantees that *at - most* one state is created for every byte of input. This, on its own, - guarantees linear time complexity. -2. Of course, creating a new state for *every* byte of input means that search - will go incredibly slow because of very large constant factors. On top of - that, creating a state for every byte in a large haystack could result in - exorbitant memory usage. To ameliorate this, the DFA bounds the number of - states it can store. Once it reaches its limit, it flushes its cache. This - prevents reuse of states that it already computed. If the cache is flushed - too frequently, then the DFA will give up and execution will fall back to - one of the NFA simulations. - -In effect, this crate will detect exponential state blow up and fall back to -a search routine with fixed memory requirements. This does, however, mean that -searching will be much slower than one might expect. Regexes that rely on -counting in particular are strong aggravators of this behavior. For example, -matching `[01]*1[01]{20}$` against a random sequence of `0`s and `1`s. - -In the future, it may be possible to increase the bound that the DFA uses, -which would allow the caller to choose how much memory they're willing to -spend. - -## Resist the temptation to "optimize" regexes - -**Advice**: This ain't a backtracking engine. - -An entire book was written on how to optimize Perl-style regular expressions. -Most of those techniques are not applicable for this library. For example, -there is no problem with using non-greedy matching or having lots of -alternations in your regex. diff --git a/vendor/regex/README.md b/vendor/regex/README.md index 861417d..f1e4c40 100644 --- a/vendor/regex/README.md +++ b/vendor/regex/README.md @@ -1,15 +1,17 @@ regex ===== -A Rust library for parsing, compiling, and executing regular expressions. Its -syntax is similar to Perl-style regular expressions, but lacks a few features -like look around and backreferences. In exchange, all searches execute in -linear time with respect to the size of the regular expression and search text. -Much of the syntax and implementation is inspired -by [RE2](https://github.com/google/re2). +This crate provides routines for searching strings for matches of a [regular +expression] (aka "regex"). The regex syntax supported by this crate is similar +to other regex engines, but it lacks several features that are not known how to +implement efficiently. This includes, but is not limited to, look-around and +backreferences. In exchange, all regex searches in this crate have worst case +`O(m * n)` time complexity, where `m` is proportional to the size of the regex +and `n` is proportional to the size of the string being searched. + +[regular expression]: https://en.wikipedia.org/wiki/Regular_expression [![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) [![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex) -[![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) ### Documentation @@ -40,8 +42,8 @@ fn main() { - (?P\d{2}) # the day ").unwrap(); - let caps = re.captures("2010-03-14").unwrap(); + let caps = re.captures("2010-03-14").unwrap(); assert_eq!("2010", &caps["year"]); assert_eq!("03", &caps["month"]); assert_eq!("14", &caps["day"]); @@ -54,32 +56,21 @@ easy to adapt the above example with an iterator: ```rust use regex::Regex; -const TO_SEARCH: &'static str = " -On 2010-03-14, foo happened. On 2014-10-14, bar happened. -"; - fn main() { let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); + let hay = "On 2010-03-14, foo happened. On 2014-10-14, bar happened."; - for caps in re.captures_iter(TO_SEARCH) { - // Note that all of the unwraps are actually OK for this regex - // because the only way for the regex to match is if all of the - // capture groups match. This is not true in general though! - println!("year: {}, month: {}, day: {}", - caps.get(1).unwrap().as_str(), - caps.get(2).unwrap().as_str(), - caps.get(3).unwrap().as_str()); + let mut dates = vec![]; + for (_, [year, month, day]) in re.captures_iter(hay).map(|c| c.extract()) { + dates.push((year, month, day)); } + assert_eq!(dates, vec![ + ("2010", "03", "14"), + ("2014", "10", "14"), + ]); } ``` -This example outputs: - -```text -year: 2010, month: 03, day: 14 -year: 2014, month: 10, day: 14 -``` - ### Usage: Avoid compiling the same regex in a loop It is an anti-pattern to compile the same regular expression in a loop since @@ -90,19 +81,23 @@ allocations internally to the matching engines. In Rust, it can sometimes be a pain to pass regular expressions around if they're used from inside a helper function. Instead, we recommend using the -[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that -regular expressions are compiled exactly once. - -For example: +[`once_cell`](https://crates.io/crates/once_cell) crate to ensure that +regular expressions are compiled exactly once. For example: -```rust,ignore -use regex::Regex; +```rust +use { + once_cell::sync::Lazy, + regex::Regex, +}; + +fn some_helper_function(haystack: &str) -> bool { + static RE: Lazy = Lazy::new(|| Regex::new(r"...").unwrap()); + RE.is_match(haystack) +} -fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref RE: Regex = Regex::new("...").unwrap(); - } - RE.is_match(text) +fn main() { + assert!(some_helper_function("abc")); + assert!(!some_helper_function("ac")); } ``` @@ -115,19 +110,21 @@ The main API of this crate (`regex::Regex`) requires the caller to pass a `&str` for searching. In Rust, an `&str` is required to be valid UTF-8, which means the main API can't be used for searching arbitrary bytes. -To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API -is identical to the main API, except that it takes an `&[u8]` to search -on instead of an `&str`. By default, `.` will match any *byte* using -`regex::bytes::Regex`, while `.` will match any *UTF-8 encoded Unicode scalar -value* using the main API. +To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API is +identical to the main API, except that it takes an `&[u8]` to search on instead +of an `&str`. The `&[u8]` APIs also permit disabling Unicode mode in the regex +even when the pattern would match invalid UTF-8. For example, `(?-u:.)` is +not allowed in `regex::Regex` but is allowed in `regex::bytes::Regex` since +`(?-u:.)` matches any byte except for `\n`. Conversely, `.` will match the +UTF-8 encoding of any Unicode scalar value except for `\n`. This example shows how to find all null-terminated strings in a slice of bytes: ```rust use regex::bytes::Regex; -let re = Regex::new(r"(?P[^\x00]+)\x00").unwrap(); -let text = b"foo\x00bar\x00baz\x00"; +let re = Regex::new(r"(?-u)(?[^\x00]+)\x00").unwrap(); +let text = b"foo\xFFbar\x00baz\x00"; // Extract all of the strings without the null terminator from each match. // The unwrap is OK here since a match requires the `cstr` capture to match. @@ -135,12 +132,12 @@ let cstrs: Vec<&[u8]> = re.captures_iter(text) .map(|c| c.name("cstr").unwrap().as_bytes()) .collect(); -assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs); +assert_eq!(vec![&b"foo\xFFbar"[..], &b"baz"[..]], cstrs); ``` -Notice here that the `[^\x00]+` will match any *byte* except for `NUL`. When -using the main API, `[^\x00]+` would instead match any valid UTF-8 sequence -except for `NUL`. +Notice here that the `[^\x00]+` will match any *byte* except for `NUL`, +including bytes like `\xFF` which are not valid UTF-8. When using the main API, +`[^\x00]+` would instead match any valid UTF-8 sequence except for `NUL`. ### Usage: match multiple regular expressions simultaneously @@ -170,11 +167,15 @@ assert!(!matches.matched(5)); assert!(matches.matched(6)); ``` -### Usage: enable SIMD optimizations -SIMD optimizations are enabled automatically on Rust stable 1.27 and newer. -For nightly versions of Rust, this requires a recent version with the SIMD -features stabilized. +### Usage: regex internals as a library + +The [`regex-automata` directory](./regex-automata/) contains a crate that +exposes all of the internal matching engines used by the `regex` crate. The +idea is that the `regex` crate exposes a simple API for 99% of use cases, but +`regex-automata` exposes oodles of customizable behaviors. + +[Documentation for `regex-automata`.](https://docs.rs/regex-automata) ### Usage: a regular expression parser @@ -186,7 +187,7 @@ This may be useful if you're implementing your own regex engine or otherwise need to do analysis on the syntax of a regular expression. It is otherwise not recommended for general use. -[Documentation `regex-syntax`.](https://docs.rs/regex-syntax) +[Documentation for `regex-syntax`.](https://docs.rs/regex-syntax) ### Crate features @@ -205,29 +206,114 @@ all such features, use the following `Cargo.toml` dependency configuration: [dependencies.regex] version = "1.3" default-features = false -# regex currently requires the standard library, you must re-enable it. +# Unless you have a specific reason not to, it's good sense to enable standard +# library support. It enables several optimizations and avoids spin locks. It +# also shouldn't meaningfully impact compile times or binary size. features = ["std"] ``` -This will reduce the dependency tree of `regex` down to a single crate -(`regex-syntax`). +This will reduce the dependency tree of `regex` down to two crates: +`regex-syntax` and `regex-automata`. The full set of features one can disable are -[in the "Crate features" section of the documentation](https://docs.rs/regex/*/#crate-features). +[in the "Crate features" section of the documentation](https://docs.rs/regex/1.*/#crate-features). + + +### Performance + +One of the goals of this crate is for the regex engine to be "fast." What that +is a somewhat nebulous goal, it is usually interpreted in one of two ways. +First, it means that all searches take worst case `O(m * n)` time, where +`m` is proportional to `len(regex)` and `n` is proportional to `len(haystack)`. +Second, it means that even aside from the time complexity constraint, regex +searches are "fast" in practice. + +While the first interpretation is pretty unambiguous, the second one remains +nebulous. While nebulous, it guides this crate's architecture and the sorts of +the trade offs it makes. For example, here are some general architectural +statements that follow as a result of the goal to be "fast": + +* When given the choice between faster regex searches and faster _Rust compile +times_, this crate will generally choose faster regex searches. +* When given the choice between faster regex searches and faster _regex compile +times_, this crate will generally choose faster regex searches. That is, it is +generally acceptable for `Regex::new` to get a little slower if it means that +searches get faster. (This is a somewhat delicate balance to strike, because +the speed of `Regex::new` needs to remain somewhat reasonable. But this is why +one should avoid re-compiling the same regex over and over again.) +* When given the choice between faster regex searches and simpler API +design, this crate will generally choose faster regex searches. For example, +if one didn't care about performance, we could like get rid of both of +the `Regex::is_match` and `Regex::find` APIs and instead just rely on +`Regex::captures`. + +There are perhaps more ways that being "fast" influences things. + +While this repository used to provide its own benchmark suite, it has since +been moved to [rebar](https://github.com/BurntSushi/rebar). The benchmarks are +quite extensive, and there are many more than what is shown in rebar's README +(which is just limited to a "curated" set meant to compare performance between +regex engines). To run all of this crate's benchmarks, first start by cloning +and installing `rebar`: +```text +$ git clone https://github.com/BurntSushi/rebar +$ cd rebar +$ cargo install --path ./ +``` -### Minimum Rust version policy +Then build the benchmark harness for just this crate: -This crate's minimum supported `rustc` version is `1.41.1`. +```text +$ rebar build -e '^rust/regex$' +``` + +Run all benchmarks for this crate as tests (each benchmark is executed once to +ensure it works): + +```text +$ rebar measure -e '^rust/regex$' -t +``` + +Record measurements for all benchmarks and save them to a CSV file: + +```text +$ rebar measure -e '^rust/regex$' | tee results.csv +``` + +Explore benchmark timings: + +```text +$ rebar cmp results.csv +``` + +See the `rebar` documentation for more details on how it works and how to +compare results with other regex engines. + + +### Hacking + +The `regex` crate is, for the most part, a pretty thin wrapper around the +[`meta::Regex`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html) +from the +[`regex-automata` crate](https://docs.rs/regex-automata/latest/regex_automata/). +Therefore, if you're looking to work on the internals of this crate, you'll +likely either want to look in `regex-syntax` (for parsing) or `regex-automata` +(for construction of finite automata and the search routines). + +My [blog on regex internals](https://blog.burntsushi.net/regex-internals/) +goes into more depth. + + +### Minimum Rust version policy -The current **tentative** policy is that the minimum Rust version required -to use this crate can be increased in minor version updates. For example, if -regex 1.0 requires Rust 1.20.0, then regex 1.0.z for all values of `z` will -also require Rust 1.20.0 or newer. However, regex 1.y for `y > 0` may require a -newer minimum version of Rust. +This crate's minimum supported `rustc` version is `1.65.0`. -In general, this crate will be conservative with respect to the minimum -supported version of Rust. +The policy is that the minimum Rust version required to use this crate can be +increased in minor version updates. For example, if regex 1.0 requires Rust +1.20.0, then regex 1.0.z for all values of `z` will also require Rust 1.20.0 or +newer. However, regex 1.y for `y > 0` may require a newer minimum version of +Rust. ### License diff --git a/vendor/regex/UNICODE.md b/vendor/regex/UNICODE.md index df7d21e..60db0aa 100644 --- a/vendor/regex/UNICODE.md +++ b/vendor/regex/UNICODE.md @@ -8,7 +8,8 @@ Full support for Level 1 ("Basic Unicode Support") is provided with two exceptions: 1. Line boundaries are not Unicode aware. Namely, only the `\n` - (`END OF LINE`) character is recognized as a line boundary. + (`END OF LINE`) character is recognized as a line boundary by default. + One can opt into `\r\n|\r|\n` being a line boundary via CRLF mode. 2. The compatibility properties specified by [RL1.2a](https://unicode.org/reports/tr18/#RL1.2a) are ASCII-only definitions. @@ -229,12 +230,10 @@ then all characters classes are case folded as well. [UTS#18 RL1.6](https://unicode.org/reports/tr18/#Line_Boundaries) The regex crate only provides support for recognizing the `\n` (`END OF LINE`) -character as a line boundary. This choice was made mostly for implementation -convenience, and to avoid performance cliffs that Unicode word boundaries are -subject to. - -Ideally, it would be nice to at least support `\r\n` as a line boundary as -well, and in theory, this could be done efficiently. +character as a line boundary by default. One can also opt into treating +`\r\n|\r|\n` as a line boundary via CRLF mode. This choice was made mostly for +implementation convenience, and to avoid performance cliffs that Unicode word +boundaries are subject to. ## RL1.7 Code Points diff --git a/vendor/regex/bench/README.md b/vendor/regex/bench/README.md new file mode 100644 index 0000000..3cc6a1a --- /dev/null +++ b/vendor/regex/bench/README.md @@ -0,0 +1,2 @@ +Benchmarks for this crate have been moved into the rebar project: +https://github.com/BurntSushi/rebar diff --git a/vendor/regex/examples/regexdna-input.txt b/vendor/regex/examples/regexdna-input.txt deleted file mode 100644 index fb23263..0000000 --- a/vendor/regex/examples/regexdna-input.txt +++ /dev/null @@ -1,1671 +0,0 @@ ->ONE Homo sapiens alu -GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA -TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT -AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG -GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG -CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT -GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA -GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA -TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG -AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA -GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT -AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC -AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG -GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC -CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG -AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT -TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA -TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT -GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG -TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT -CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG -CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG -TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA -CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG -AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG -GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC -TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA -TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA -GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT -GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC -ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT -TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC -CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG -CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG -GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC -CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT -GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC -GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA -GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA -GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA -GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG -AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT -CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA -GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA -AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC -GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT -ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG -GAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATC -GCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGC -GGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG -TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAA -AAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAG -GAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACT -CCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCC -TGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAG -ACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGC -GTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGA -ACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGA -CAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCA -CTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCA -ACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCG -CCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGG -AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTC -CGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCG -AGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACC -CCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAG -CTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAG -CCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGG -CCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATC -ACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAA -AAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGC -TGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCC -ACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGG -CTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGG -AGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT -AGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAA -TCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGC -CTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAA -TCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAG -CCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGT -GGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCG -GGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAG -CGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG -GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATG -GTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGT -AATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTT -GCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCT -CAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCG -GGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTC -TCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACT -CGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAG -ATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGG -CGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTG -AGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATA -CAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGG -CAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGC -ACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCAC -GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTC -GAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCG -GGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCT -TGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGG -CGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCA -GCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGG -CCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGC -GCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGG -CGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGA -CTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGG -CCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAA -ACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCC -CAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGT -GAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAA -AGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGG -ATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTAC -TAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGA -GGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGC -GCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGG -TGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTC -AGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAA -ATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGA -GAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC -AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTG -TAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGAC -CAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGT -GGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC -CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACA -GAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACT -TTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAAC -ATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCC -TGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAG -GTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCG -TCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAG -GCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCC -GTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCT -ACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCC -GAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCC -GGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCAC -CTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAA -ATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTG -AGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCAC -TGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCT -CACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAG -TTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAG -CCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATC -GCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCT -GGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATC -CCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCC -TGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGG -CGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG -AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCG -AGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGG -AGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGT -GAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAA -TCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGC -AGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCA -AAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGG -CGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTC -TACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCG -GGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGAT -CGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCG -CGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAG -GTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACA -AAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCA -GGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCAC -TCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGC -CTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA -GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG -CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTG -AACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCG -ACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGC -ACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCC -AACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGC -GCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCG -GAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACT -CCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCC -GAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAAC -CCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA -GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGA -GCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAG -GCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGAT -CACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTA -AAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGG -CTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGC -CACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTG -GCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAG -GAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAT -TAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGA -ATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAG -CCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTA -ATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCA -GCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGG -TGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCC -GGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGA -GCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT -GGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACAT -GGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTG -TAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGT -TGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTC -TCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGC -GGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGT -CTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTAC -TCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGA -GATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGG -GCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCT -GAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT -ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAG -GCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG -CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCA -CGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTT -CGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCC -GGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGC -TTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGG -GCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCC -AGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTG -GCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCG -CGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAG -GCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAG -ACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG -GCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGA -AACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATC -CCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAG -TGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAA -AAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCG -GATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTA -CTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGG -AGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCG -CGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCG -GTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGT -CAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAA -AATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGG -AGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTC -CAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCT -GTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA -CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCG -TGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAA -CCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGAC -AGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCAC -TTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAA -CATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGC -CTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGA -GGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCC -GTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGA -GGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCC -CGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGC -TACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGC -CGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGC -CGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCA -CCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA -AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCT -GAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCA -CTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGC -TCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGA -GTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTA -GCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAAT -CGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCC -TGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAAT -CCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGC -CTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTG -GCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGG -GAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGC -GAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG -GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGG -TGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTA -ATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTG -CAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC -AAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGG -GCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCT -CTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTC -GGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGA -TCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGC -GCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGA -GGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATAC -AAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGC -AGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCA -CTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACG -CCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCG -AGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGG -GCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTT -GAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGC -GACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAG -CACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGC -CAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCG -CGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC -GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGAC -TCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGC -CGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAA -CCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCC -AGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTG -AGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA -GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA -TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT -AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG -GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG -CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT -GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA -GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA -TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG -AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA -GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT -AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC -AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG -GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC -CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG -AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT -TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA -TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT -GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG -TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT -CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG -CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG -TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA -CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG -AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG -GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC -TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA -TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA -GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT -GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC -ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT -TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC -CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG -CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG -GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC -CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT -GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC -GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA -GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA -GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA -GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG -AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT -CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA -GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA -AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC -GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT -ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG -GAGGCTGAGGCAGGAGAATC ->TWO IUB ambiguity codes -cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg -tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa -NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt -cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga -gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa -HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca -tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt -tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt -acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct -tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt -gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa -accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt -RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt -tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag -cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg -ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat -actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg -YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa -KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata -aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa -aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg -gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc -tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK -tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt -ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg -ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa -BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt -aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc -tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc -cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac -aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga -tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga -aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD -gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg -ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV -taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa -ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat -gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg -gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa -tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt -tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt -taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca -cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag -aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt -cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt -ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW -attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag -ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa -attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc -tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta -aagacYRcaggattHaYgtKtaatgcVcaataMYacccatatcacgWDBtgaatcBaata -cKcttRaRtgatgaBDacggtaattaaYtataStgVHDtDctgactcaaatKtacaatgc -gYatBtRaDatHaactgtttatatDttttaaaKVccYcaaccNcBcgHaaVcattHctcg -attaaatBtatgcaaaaatYMctSactHatacgaWacattacMBgHttcgaatVaaaaca -BatatVtctgaaaaWtctRacgBMaatSgRgtgtcgactatcRtattaScctaStagKga -DcWgtYtDDWKRgRtHatRtggtcgaHgggcgtattaMgtcagccaBggWVcWctVaaat -tcgNaatcKWagcNaHtgaaaSaaagctcYctttRVtaaaatNtataaccKtaRgtttaM -tgtKaBtRtNaggaSattHatatWactcagtgtactaKctatttgRYYatKatgtccgtR -tttttatttaatatVgKtttgtatgtNtataRatWYNgtRtHggtaaKaYtKSDcatcKg -taaYatcSRctaVtSMWtVtRWHatttagataDtVggacagVcgKWagBgatBtaaagNc -aRtagcataBggactaacacRctKgttaatcctHgDgttKHHagttgttaatgHBtatHc -DaagtVaBaRccctVgtgDtacRHSctaagagcggWYaBtSaKtHBtaaactYacgNKBa -VYgtaacttagtVttcttaatgtBtatMtMtttaattaatBWccatRtttcatagVgMMt -agctStKctaMactacDNYgKYHgaWcgaHgagattacVgtttgtRaSttaWaVgataat -gtgtYtaStattattMtNgWtgttKaccaatagNYttattcgtatHcWtctaaaNVYKKt -tWtggcDtcgaagtNcagatacgcattaagaccWctgcagcttggNSgaNcHggatgtVt -catNtRaaBNcHVagagaaBtaaSggDaatWaatRccaVgggStctDaacataKttKatt -tggacYtattcSatcttagcaatgaVBMcttDattctYaaRgatgcattttNgVHtKcYR -aatRKctgtaaacRatVSagctgtWacBtKVatctgttttKcgtctaaDcaagtatcSat -aWVgcKKataWaYttcccSaatgaaaacccWgcRctWatNcWtBRttYaattataaNgac -acaatagtttVNtataNaYtaatRaVWKtBatKagtaatataDaNaaaaataMtaagaaS -tccBcaatNgaataWtHaNactgtcDtRcYaaVaaaaaDgtttRatctatgHtgttKtga -aNSgatactttcgagWaaatctKaaDaRttgtggKKagcDgataaattgSaacWaVtaNM -acKtcaDaaatttctRaaVcagNacaScRBatatctRatcctaNatWgRtcDcSaWSgtt -RtKaRtMtKaatgttBHcYaaBtgatSgaSWaScMgatNtctcctatttctYtatMatMt -RRtSaattaMtagaaaaStcgVgRttSVaScagtgDtttatcatcatacRcatatDctta -tcatVRtttataaHtattcYtcaaaatactttgVctagtaaYttagatagtSYacKaaac -gaaKtaaatagataatSatatgaaatSgKtaatVtttatcctgKHaatHattagaaccgt -YaaHactRcggSBNgtgctaaBagBttgtRttaaattYtVRaaaattgtaatVatttctc -ttcatgBcVgtgKgaHaaatattYatagWacNctgaaMcgaattStagWaSgtaaKagtt -ttaagaDgatKcctgtaHtcatggKttVDatcaaggtYcgccagNgtgcVttttagagat -gctaccacggggtNttttaSHaNtatNcctcatSaaVgtactgBHtagcaYggYVKNgta -KBcRttgaWatgaatVtagtcgattYgatgtaatttacDacSctgctaaaStttaWMagD -aaatcaVYctccgggcgaVtaaWtStaKMgDtttcaaMtVgBaatccagNaaatcYRMBg -gttWtaaScKttMWtYataRaDBMaDataatHBcacDaaKDactaMgagttDattaHatH -taYatDtattDcRNStgaatattSDttggtattaaNSYacttcDMgYgBatWtaMagact -VWttctttgYMaYaacRgHWaattgRtaagcattctMKVStatactacHVtatgatcBtV -NataaBttYtSttacKgggWgYDtgaVtYgatDaacattYgatggtRDaVDttNactaSa -MtgNttaacaaSaBStcDctaccacagacgcaHatMataWKYtaYattMcaMtgSttDag -cHacgatcaHttYaKHggagttccgatYcaatgatRaVRcaagatcagtatggScctata -ttaNtagcgacgtgKaaWaactSgagtMYtcttccaKtStaacggMtaagNttattatcg -tctaRcactctctDtaacWYtgaYaSaagaWtNtatttRacatgNaatgttattgWDDcN -aHcctgaaHacSgaataaRaataMHttatMtgaSDSKatatHHaNtacagtccaYatWtc -actaactatKDacSaStcggataHgYatagKtaatKagStaNgtatactatggRHacttg -tattatgtDVagDVaRctacMYattDgtttYgtctatggtKaRSttRccRtaaccttaga -gRatagSaaMaacgcaNtatgaaatcaRaagataatagatactcHaaYKBctccaagaRa -BaStNagataggcgaatgaMtagaatgtcaKttaaatgtaWcaBttaatRcggtgNcaca -aKtttScRtWtgcatagtttWYaagBttDKgcctttatMggNttattBtctagVtacata -aaYttacacaaRttcYtWttgHcaYYtaMgBaBatctNgcDtNttacgacDcgataaSat -YaSttWtcctatKaatgcagHaVaacgctgcatDtgttaSataaaaYSNttatagtaNYt -aDaaaNtggggacttaBggcHgcgtNtaaMcctggtVtaKcgNacNtatVaSWctWtgaW -cggNaBagctctgaYataMgaagatBSttctatacttgtgtKtaattttRagtDtacata -tatatgatNHVgBMtKtaKaNttDHaagatactHaccHtcatttaaagttVaMcNgHata -tKtaNtgYMccttatcaaNagctggacStttcNtggcaVtattactHaSttatgNMVatt -MMDtMactattattgWMSgtHBttStStgatatRaDaagattttctatMtaaaaaggtac -taaVttaSacNaatactgMttgacHaHRttgMacaaaatagttaatatWKRgacDgaRta -tatttattatcYttaWtgtBRtWatgHaaattHataagtVaDtWaVaWtgStcgtMSgaS -RgMKtaaataVacataatgtaSaatttagtcgaaHtaKaatgcacatcggRaggSKctDc -agtcSttcccStYtccRtctctYtcaaKcgagtaMttttcRaYDttgttatctaatcata -NctctgctatcaMatactataggDaHaaSttMtaDtcNatataattctMcStaaBYtaNa -gatgtaatHagagSttgWHVcttatKaYgDctcttggtgttMcRaVgSgggtagacaata -aDtaattSaDaNaHaBctattgNtaccaaRgaVtKNtaaYggHtaKKgHcatctWtctDt -ttctttggSDtNtaStagttataaacaattgcaBaBWggHgcaaaBtYgctaatgaaatW -cDcttHtcMtWWattBHatcatcaaatctKMagtDNatttWaBtHaaaNgMttaaStagt -tctctaatDtcRVaYttgttMtRtgtcaSaaYVgSWDRtaatagctcagDgcWWaaaBaa -RaBctgVgggNgDWStNaNBKcBctaaKtttDcttBaaggBttgaccatgaaaNgttttt -tttatctatgttataccaaDRaaSagtaVtDtcaWatBtacattaWacttaSgtattggD -gKaaatScaattacgWcagKHaaccaYcRcaRttaDttRtttHgaHVggcttBaRgtccc -tDatKaVtKtcRgYtaKttacgtatBtStaagcaattaagaRgBagSaattccSWYttta -ttVaataNctgHgttaaNBgcVYgtRtcccagWNaaaacaDNaBcaaaaRVtcWMgBagM -tttattacgDacttBtactatcattggaaatVccggttRttcatagttVYcatYaSHaHc -ttaaagcNWaHataaaRWtctVtRYtagHtaaaYMataHYtNBctNtKaatattStgaMc -BtRgctaKtgcScSttDgYatcVtggaaKtaagatWccHccgKYctaNNctacaWctttt -gcRtgtVcgaKttcMRHgctaHtVaataaDtatgKDcttatBtDttggNtacttttMtga -acRattaaNagaactcaaaBBVtcDtcgaStaDctgaaaSgttMaDtcgttcaccaaaag -gWtcKcgSMtcDtatgtttStaaBtatagDcatYatWtaaaBacaKgcaDatgRggaaYc -taRtccagattDaWtttggacBaVcHtHtaacDacYgtaatataMagaatgHMatcttat -acgtatttttatattacHactgttataMgStYaattYaccaattgagtcaaattaYtgta -tcatgMcaDcgggtcttDtKgcatgWRtataatatRacacNRBttcHtBgcRttgtgcgt -catacMtttBctatctBaatcattMttMYgattaaVYatgDaatVagtattDacaacDMa -tcMtHcccataagatgBggaccattVWtRtSacatgctcaaggggYtttDtaaNgNtaaB -atggaatgtctRtaBgBtcNYatatNRtagaacMgagSaSDDSaDcctRagtVWSHtVSR -ggaacaBVaccgtttaStagaacaMtactccagtttVctaaRaaHttNcttagcaattta -ttaatRtaaaatctaacDaBttggSagagctacHtaaRWgattcaaBtctRtSHaNtgta -cattVcaHaNaagtataccacaWtaRtaaVKgMYaWgttaKggKMtKcgWatcaDatYtK -SttgtacgaccNctSaattcDcatcttcaaaDKttacHtggttHggRRaRcaWacaMtBW -VHSHgaaMcKattgtaRWttScNattBBatYtaNRgcggaagacHSaattRtttcYgacc -BRccMacccKgatgaacttcgDgHcaaaaaRtatatDtatYVtttttHgSHaSaatagct -NYtaHYaVYttattNtttgaaaYtaKttWtctaNtgagaaaNctNDctaaHgttagDcRt -tatagccBaacgcaRBtRctRtggtaMYYttWtgataatcgaataattattataVaaaaa -ttacNRVYcaaMacNatRttcKatMctgaagactaattataaYgcKcaSYaatMNctcaa -cgtgatttttBacNtgatDccaattattKWWcattttatatatgatBcDtaaaagttgaa -VtaHtaHHtBtataRBgtgDtaataMttRtDgDcttattNtggtctatctaaBcatctaR -atgNacWtaatgaagtcMNaacNgHttatactaWgcNtaStaRgttaaHacccgaYStac -aaaatWggaYaWgaattattcMaactcBKaaaRVNcaNRDcYcgaBctKaacaaaaaSgc -tccYBBHYaVagaatagaaaacagYtctVccaMtcgtttVatcaatttDRtgWctagtac -RttMctgtDctttcKtWttttataaatgVttgBKtgtKWDaWagMtaaagaaattDVtag -gttacatcatttatgtcgMHaVcttaBtVRtcgtaYgBRHatttHgaBcKaYWaatcNSc -tagtaaaaatttacaatcactSWacgtaatgKttWattagttttNaggtctcaagtcact -attcttctaagKggaataMgtttcataagataaaaatagattatDgcBVHWgaBKttDgc -atRHaagcaYcRaattattatgtMatatattgHDtcaDtcaaaHctStattaatHaccga -cNattgatatattttgtgtDtRatagSacaMtcRtcattcccgacacSattgttKaWatt -NHcaacttccgtttSRtgtctgDcgctcaaMagVtBctBMcMcWtgtaacgactctcttR -ggRKSttgYtYatDccagttDgaKccacgVatWcataVaaagaataMgtgataaKYaaat -cHDaacgataYctRtcYatcgcaMgtNttaBttttgatttaRtStgcaacaaaataccVg -aaDgtVgDcStctatatttattaaaaRKDatagaaagaKaaYYcaYSgKStctccSttac -agtcNactttDVttagaaagMHttRaNcSaRaMgBttattggtttaRMggatggcKDgWR -tNaataataWKKacttcKWaaagNaBttaBatMHtccattaacttccccYtcBcYRtaga -ttaagctaaYBDttaNtgaaaccHcaRMtKtaaHMcNBttaNaNcVcgVttWNtDaBatg -ataaVtcWKcttRggWatcattgaRagHgaattNtatttctctattaattaatgaDaaMa -tacgttgggcHaYVaaNaDDttHtcaaHtcVVDgBVagcMacgtgttaaBRNtatRtcag -taagaggtttaagacaVaaggttaWatctccgtVtaDtcDatttccVatgtacNtttccg -tHttatKgScBatgtVgHtYcWagcaKtaMYaaHgtaattaSaHcgcagtWNaatNccNN -YcacgVaagaRacttctcattcccRtgtgtaattagcSttaaStWaMtctNNcSMacatt -ataaactaDgtatWgtagtttaagaaaattgtagtNagtcaataaatttgatMMYactaa -tatcggBWDtVcYttcDHtVttatacYaRgaMaacaStaatcRttttVtagaDtcacWat -ttWtgaaaagaaagNRacDtttStVatBaDNtaactatatcBSMcccaSttccggaMatg -attaaWatKMaBaBatttgataNctgttKtVaagtcagScgaaaDggaWgtgttttKtWt -atttHaatgtagttcactaaKMagttSYBtKtaYgaactcagagRtatagtVtatcaaaW -YagcgNtaDagtacNSaaYDgatBgtcgataacYDtaaactacagWDcYKaagtttatta -gcatcgagttKcatDaattgattatDtcagRtWSKtcgNtMaaaaacaMttKcaWcaaSV -MaaaccagMVtaMaDtMaHaBgaacataBBVtaatVYaNSWcSgNtDNaaKacacBttta -tKtgtttcaaHaMctcagtaacgtcgYtactDcgcctaNgagagcYgatattttaaattt -ccattttacatttDaaRctattttWctttacgtDatYtttcagacgcaaVttagtaaKaa -aRtgVtccataBggacttatttgtttaWNtgttVWtaWNVDaattgtatttBaagcBtaa -BttaaVatcHcaVgacattccNggtcgacKttaaaRtagRtctWagaYggtgMtataatM -tgaaRttattttgWcttNtDRRgMDKacagaaaaggaaaRStcccagtYccVattaNaaK -StNWtgacaVtagaagcttSaaDtcacaacgDYacWDYtgtttKatcVtgcMaDaSKStV -cgtagaaWaKaagtttcHaHgMgMtctataagBtKaaaKKcactggagRRttaagaBaaN -atVVcgRcKSttDaactagtSttSattgttgaaRYatggttVttaataaHttccaagDtg -atNWtaagHtgcYtaactRgcaatgMgtgtRaatRaNaacHKtagactactggaatttcg -ccataacgMctRgatgttaccctaHgtgWaYcactcacYaattcttaBtgacttaaacct -gYgaWatgBttcttVttcgttWttMcNYgtaaaatctYgMgaaattacNgaHgaacDVVM -tttggtHtctaaRgtacagacgHtVtaBMNBgattagcttaRcttacaHcRctgttcaaD -BggttKaacatgKtttYataVaNattccgMcgcgtagtRaVVaattaKaatggttRgaMc -agtatcWBttNtHagctaatctagaaNaaacaYBctatcgcVctBtgcaaagDgttVtga -HtactSNYtaaNccatgtgDacgaVtDcgKaRtacDcttgctaagggcagMDagggtBWR -tttSgccttttttaacgtcHctaVtVDtagatcaNMaVtcVacatHctDWNaataRgcgt -aVHaggtaaaaSgtttMtattDgBtctgatSgtRagagYtctSaKWaataMgattRKtaa -catttYcgtaacacattRWtBtcggtaaatMtaaacBatttctKagtcDtttgcBtKYYB -aKttctVttgttaDtgattttcttccacttgSaaacggaaaNDaattcYNNaWcgaaYat -tttMgcBtcatRtgtaaagatgaWtgaccaYBHgaatagataVVtHtttVgYBtMctaMt -cctgaDcYttgtccaaaRNtacagcMctKaaaggatttacatgtttaaWSaYaKttBtag -DacactagctMtttNaKtctttcNcSattNacttggaacaatDagtattRtgSHaataat -gccVgacccgatactatccctgtRctttgagaSgatcatatcgDcagWaaHSgctYYWta -tHttggttctttatVattatcgactaagtgtagcatVgtgHMtttgtttcgttaKattcM -atttgtttWcaaStNatgtHcaaaDtaagBaKBtRgaBgDtSagtatMtaacYaatYtVc -KatgtgcaacVaaaatactKcRgtaYtgtNgBBNcKtcttaccttKgaRaYcaNKtactt -tgagSBtgtRagaNgcaaaNcacagtVtttHWatgttaNatBgtttaatNgVtctgaata -tcaRtattcttttttttRaaKcRStctcggDgKagattaMaaaKtcaHacttaataataK -taRgDtKVBttttcgtKaggHHcatgttagHggttNctcgtatKKagVagRaaaggaaBt -NatttVKcRttaHctaHtcaaatgtaggHccaBataNaNaggttgcWaatctgatYcaaa -HaatWtaVgaaBttagtaagaKKtaaaKtRHatMaDBtBctagcatWtatttgWttVaaa -ScMNattRactttgtYtttaaaagtaagtMtaMaSttMBtatgaBtttaKtgaatgagYg -tNNacMtcNRacMMHcttWtgtRtctttaacaacattattcYaMagBaacYttMatcttK -cRMtgMNccattaRttNatHaHNaSaaHMacacaVaatacaKaSttHatattMtVatWga -ttttttaYctttKttHgScWaacgHtttcaVaaMgaacagNatcgttaacaaaaagtaca -HBNaattgttKtcttVttaaBtctgctacgBgcWtttcaggacacatMgacatcccagcg -gMgaVKaBattgacttaatgacacacaaaaaatRKaaBctacgtRaDcgtagcVBaacDS -BHaaaaSacatatacagacRNatcttNaaVtaaaataHattagtaaaaSWccgtatWatg -gDttaactattgcccatcttHaSgYataBttBaactattBtcHtgatcaataSttaBtat -KSHYttWggtcYtttBttaataccRgVatStaHaKagaatNtagRMNgtcttYaaSaact -cagDSgagaaYtMttDtMRVgWKWtgMaKtKaDttttgactatacataatcNtatNaHat -tVagacgYgatatatttttgtStWaaatctWaMgagaRttRatacgStgattcttaagaD -taWccaaatRcagcagaaNKagtaaDggcgccBtYtagSBMtactaaataMataBSacRM -gDgattMMgtcHtcaYDtRaDaacggttDaggcMtttatgttaNctaattaVacgaaMMt -aatDccSgtattgaRtWWaccaccgagtactMcgVNgctDctaMScatagcgtcaactat -acRacgHRttgctatttaatgaattataYKttgtaagWgtYttgcHgMtaMattWaWVta -RgcttgYgttBHtYataSccStBtgtagMgtDtggcVaaSBaatagDttgBgtctttctc -attttaNagtHKtaMWcYactVcgcgtatMVtttRacVagDaatcttgctBBcRDgcaac -KttgatSKtYtagBMagaRtcgBattHcBWcaactgatttaatttWDccatttatcgagS -KaWttataHactaHMttaatHtggaHtHagaatgtKtaaRactgtttMatacgatcaagD -gatKaDctataMggtHDtggHacctttRtatcttYattttgacttgaaSaataaatYcgB -aaaaccgNatVBttMacHaKaataagtatKgtcaagactcttaHttcggaattgttDtct -aaccHttttWaaatgaaatataaaWattccYDtKtaaaacggtgaggWVtctattagtga -ctattaagtMgtttaagcatttgSgaaatatccHaaggMaaaattttcWtatKctagDtY -tMcctagagHcactttactatacaaacattaacttaHatcVMYattYgVgtMttaaRtga -aataaDatcaHgtHHatKcDYaatcttMtNcgatYatgSaMaNtcttKcWataScKggta -tcttacgcttWaaagNatgMgHtctttNtaacVtgttcMaaRatccggggactcMtttaY -MtcWRgNctgNccKatcttgYDcMgattNYaRagatHaaHgKctcataRDttacatBatc -cattgDWttatttaWgtcggagaaaaatacaatacSNtgggtttccttacSMaagBatta -caMaNcactMttatgaRBacYcYtcaaaWtagctSaacttWgDMHgaggatgBVgcHaDt -ggaactttggtcNatNgtaKaBcccaNtaagttBaacagtatacDYttcctNgWgcgSMc -acatStctHatgRcNcgtacacaatRttMggaNKKggataaaSaYcMVcMgtaMaHtgat -tYMatYcggtcttcctHtcDccgtgRatcattgcgccgatatMaaYaataaYSggatagc -gcBtNtaaaScaKgttBgagVagttaKagagtatVaactaSacWactSaKatWccaKaaa -atBKgaaKtDMattttgtaaatcRctMatcaaMagMttDgVatggMaaWgttcgaWatga -aatttgRtYtattaWHKcRgctacatKttctaccaaHttRatctaYattaaWatVNccat -NgagtcKttKataStRaatatattcctRWatDctVagttYDgSBaatYgttttgtVaatt -taatagcagMatRaacttBctattgtMagagattaaactaMatVtHtaaatctRgaaaaa -aaatttWacaacaYccYDSaattMatgaccKtaBKWBattgtcaagcHKaagttMMtaat -ttcKcMagNaaKagattggMagaggtaatttYacatcWaaDgatMgKHacMacgcVaaca -DtaDatatYggttBcgtatgWgaSatttgtagaHYRVacaRtctHaaRtatgaactaata -tctSSBgggaaHMWtcaagatKgagtDaSatagttgattVRatNtctMtcSaagaSHaat -aNataataRaaRgattctttaataaagWaRHcYgcatgtWRcttgaaggaMcaataBRaa -ccagStaaacNtttcaatataYtaatatgHaDgcStcWttaacctaRgtYaRtataKtgM -ttttatgactaaaatttacYatcccRWtttHRtattaaatgtttatatttgttYaatMca -RcSVaaDatcgtaYMcatgtagacatgaaattgRtcaaYaaYtRBatKacttataccaNa -aattVaBtctggacaagKaaYaaatatWtMtatcYaaVNtcgHaactBaagKcHgtctac -aatWtaDtSgtaHcataHtactgataNctRgttMtDcDttatHtcgtacatcccaggStt -aBgtcacacWtccNMcNatMVaVgtccDYStatMaccDatggYaRKaaagataRatttHK -tSaaatDgataaacttaHgttgVBtcttVttHgDacgaKatgtatatNYataactctSat -atatattgcHRRYttStggaactHgttttYtttaWtatMcttttctatctDtagVHYgMR -BgtHttcctaatYRttKtaagatggaVRataKDctaMtKBNtMtHNtWtttYcVtattMc -gRaacMcctNSctcatttaaagDcaHtYccSgatgcaatYaaaaDcttcgtaWtaattct -cgttttScttggtaatctttYgtctaactKataHacctMctcttacHtKataacacagcN -RatgKatttttSaaatRYcgDttaMRcgaaattactMtgcgtaagcgttatBtttttaat -taagtNacatHgttcRgacKcBBtVgatKttcgaBaatactDRgtRtgaNacWtcacYtt -aaKcgttctHaKttaNaMgWgWaggtctRgaKgWttSttBtDcNtgtttacaaatYcDRt -gVtgcctattcNtctaaaDMNttttNtggctgagaVctDaacVtWccaagtaacacaNct -gaScattccDHcVBatcgatgtMtaatBgHaatDctMYgagaatgYWKcctaatNaStHa -aaKccgHgcgtYaaYtattgtStgtgcaaRtattaKatattagaWVtcaMtBagttatta -gNaWHcVgcaattttDcMtgtaRHVYtHtctgtaaaaHVtMKacatcgNaatttMatatg -ttgttactagWYtaRacgataKagYNKcattataNaRtgaacKaYgcaaYYacaNccHat -MatDcNgtHttRaWttagaaDcaaaaaatagggtKDtStaDaRtaVtHWKNtgtattVct -SVgRgataDaRaWataBgaagaaKtaataaYgDcaStaNgtaDaaggtattHaRaWMYaY -aWtggttHYgagVtgtgcttttcaaDKcagVcgttagacNaaWtagtaataDttctggtt -VcatcataaagtgKaaaNaMtaBBaattaatWaattgctHaVKaSgDaaVKaHtatatat -HatcatSBagNgHtatcHYMHgttDgtaHtBttWatcgtttaRaattgStKgSKNWKatc -agDtctcagatttctRtYtBatBgHHtKaWtgYBgacVVWaKtacKcDttKMaKaVcggt -gttataagaataaHaatattagtataatMHgttYgaRttagtaRtcaaVatacggtcMcg -agtaaRttacWgactKRYataaaagSattYaWgagatYagKagatgSaagKgttaatMgg -tataatgttWYttatgagaaacctNVataatHcccKtDctcctaatactggctHggaSag -gRtKHaWaattcgSatMatttagaggcYtctaMcgctcataSatatgRagacNaaDagga -VBagaYttKtacNaKgtSYtagttggaWcatcWttaatctatgaVtcgtgtMtatcaYcg -tRccaaYgDctgcMgtgtWgacWtgataacacgcgctBtgttaKtYDtatDcatcagKaV -MctaatcttgVcaaRgcRMtDcgattaHttcaNatgaatMtactacVgtRgatggaWttt -actaaKatgagSaaKggtaNtactVaYtaaKRagaacccacaMtaaMtKtatBcttgtaa -WBtMctaataaVcDaaYtcRHBtcgttNtaaHatttBNgRStVDattBatVtaagttaYa -tVattaagaBcacggtSgtVtatttaRattgatgtaHDKgcaatattKtggcctatgaWD -KRYcggattgRctatNgatacaatMNttctgtcRBYRaaaHctNYattcHtaWcaattct -BtMKtVgYataatMgYtcagcttMDataVtggRtKtgaatgccNcRttcaMtRgattaac -attRcagcctHtWMtgtDRagaKaBtgDttYaaaaKatKgatctVaaYaacWcgcatagB -VtaNtRtYRaggBaaBtgKgttacataagagcatgtRattccacttaccatRaaatgWgD -aMHaYVgVtaSctatcgKaatatattaDgacccYagtgtaYNaaatKcagtBRgagtcca -tgKgaaaccBgaagBtgSttWtacgatWHaYatcgatttRaaNRgcaNaKVacaNtDgat -tgHVaatcDaagcgtatgcNttaDataatcSataaKcaataaHWataBtttatBtcaKtK -tatagttaDgSaYctacaRatNtaWctSaatatttYaKaKtaccWtatcRagacttaYtt -VcKgSDcgagaagatccHtaattctSttatggtKYgtMaHagVaBRatttctgtRgtcta -tgggtaHKgtHacHtSYacgtacacHatacKaaBaVaccaDtatcSaataaHaagagaat -ScagactataaRttagcaaVcaHataKgDacatWccccaagcaBgagWatctaYttgaaa -tctVNcYtttWagHcgcgcDcVaaatgttKcHtNtcaatagtgtNRaactttttcaatgg -WgBcgDtgVgtttctacMtaaataaaRggaaacWaHttaRtNtgctaaRRtVBctYtVta -tDcattDtgaccYatagatYRKatNYKttNgcctagtaWtgaactaMVaacctgaStttc -tgaKVtaaVaRKDttVtVctaDNtataaaDtccccaagtWtcgatcactDgYaBcatcct -MtVtacDaaBtYtMaKNatNtcaNacgDatYcatcgcaRatWBgaacWttKttagYtaat -tcggttgSWttttDWctttacYtatatWtcatDtMgtBttgRtVDggttaacYtacgtac -atgaattgaaWcttMStaDgtatattgaDtcRBcattSgaaVBRgagccaaKtttcDgcg -aSMtatgWattaKttWtgDBMaggBBttBaatWttRtgcNtHcgttttHtKtcWtagHSt -aacagttgatatBtaWSaWggtaataaMttaKacDaatactcBttcaatatHttcBaaSa -aatYggtaRtatNtHcaatcaHtagVtgtattataNggaMtcttHtNagctaaaggtaga -YctMattNaMVNtcKtactBKcaHHcBttaSagaKacataYgctaKaYgttYcgacWVtt -WtSagcaacatcccHaccKtcttaacgaKttcacKtNtacHtatatRtaaatacactaBt -ttgaHaRttggttWtatYagcatYDatcggagagcWBataagRtacctataRKgtBgatg -aDatataSttagBaHtaatNtaDWcWtgtaattacagKttcNtMagtattaNgtctcgtc -ctcttBaHaKcKccgtRcaaYagSattaagtKataDatatatagtcDtaacaWHcaKttD -gaaRcgtgYttgtcatatNtatttttatggccHtgDtYHtWgttatYaacaattcaWtat -NgctcaaaSttRgctaatcaaatNatcgtttaBtNNVtgttataagcaaagattBacgtD -atttNatttaaaDcBgtaSKgacgtagataatttcHMVNttgttBtDtgtaWKaaRMcKM -tHtaVtagataWctccNNaSWtVaHatctcMgggDgtNHtDaDttatatVWttgttattt -aacctttcacaaggaSaDcggttttttatatVtctgVtaacaStDVaKactaMtttaSNa -gtgaaattaNacttSKctattcctctaSagKcaVttaagNaVcttaVaaRNaHaaHttat -gtHttgtgatMccaggtaDcgaccgtWgtWMtttaHcRtattgScctatttKtaaccaag -tYagaHgtWcHaatgccKNRtttagtMYSgaDatctgtgaWDtccMNcgHgcaaacNDaa -aRaStDWtcaaaaHKtaNBctagBtgtattaactaattttVctagaatggcWSatMaccc -ttHttaSgSgtgMRcatRVKtatctgaaaccDNatYgaaVHNgatMgHRtacttaaaRta -tStRtDtatDttYatattHggaBcttHgcgattgaKcKtttcRataMtcgaVttWacatN -catacctRataDDatVaWNcggttgaHtgtMacVtttaBHtgagVttMaataattatgtt -cttagtttgtgcDtSatttgBtcaacHattaaBagVWcgcaSYttMgcttacYKtVtatc -aYaKctgBatgcgggcYcaaaaacgNtctagKBtattatctttKtaVttatagtaYtRag -NtaYataaVtgaatatcHgcaaRataHtacacatgtaNtgtcgYatWMatttgaactacR -ctaWtWtatacaatctBatatgYtaagtatgtgtatSttactVatcttYtaBcKgRaSgg -RaaaaatgcagtaaaWgtaRgcgataatcBaataccgtatttttccatcNHtatWYgatH -SaaaDHttgctgtccHtggggcctaataatttttctatattYWtcattBtgBRcVttaVM -RSgctaatMagtYtttaaaaatBRtcBttcaaVtaacagctccSaaSttKNtHtKYcagc -agaaaccccRtttttaaDcDtaStatccaagcgctHtatcttaDRYgatDHtWcaaaBcW -gKWHttHataagHacgMNKttMKHccaYcatMVaacgttaKgYcaVaaBtacgcaacttt -MctaaHaatgtBatgagaSatgtatgSRgHgWaVWgataaatatttccKagVgataattW -aHNcYggaaatgctHtKtaDtctaaagtMaatVDVactWtSaaWaaMtaHtaSKtcBRaN -cttStggtBttacNagcatagRgtKtgcgaacaacBcgKaatgataagatgaaaattgta -ctgcgggtccHHWHaaNacaBttNKtKtcaaBatatgctaHNgtKcDWgtttatNgVDHg -accaacWctKaaggHttgaRgYaatHcaBacaatgagcaaattactgtaVaaYaDtagat -tgagNKggtggtgKtWKaatacagDRtatRaMRtgattDggtcaaYRtatttNtagaDtc -acaaSDctDtataatcgtactaHttatacaatYaacaaHttHatHtgcgatRRttNgcat -SVtacWWgaaggagtatVMaVaaattScDDKNcaYBYaDatHgtctatBagcaacaagaa -tgagaaRcataaKNaRtBDatcaaacgcattttttaaBtcSgtacaRggatgtMNaattg -gatatWtgagtattaaaVctgcaYMtatgatttttYgaHtgtcttaagWBttHttgtctt -attDtcgtatWtataataSgctaHagcDVcNtaatcaagtaBDaWaDgtttagYctaNcc -DtaKtaHcttaataacccaRKtacaVaatNgcWRaMgaattatgaBaaagattVYaHMDc -aDHtcRcgYtcttaaaWaaaVKgatacRtttRRKYgaatacaWVacVcRtatMacaBtac -tggMataaattttHggNagSctacHgtBagcgtcgtgattNtttgatSaaggMttctttc -ttNtYNagBtaaacaaatttMgaccttacataattgYtcgacBtVMctgStgMDtagtaR -ctHtatgttcatatVRNWataDKatWcgaaaaagttaaaagcacgHNacgtaatctttMR -tgacttttDacctataaacgaaatatgattagaactccSYtaBctttaataacWgaaaYa -tagatgWttcatKtNgatttttcaagHtaYgaaRaDaagtaggagcttatVtagtctttc -attaaaatcgKtattaRttacagVaDatgcatVgattgggtctttHVtagKaaRBtaHta -aggccccaaaaKatggtttaMWgtBtaaacttcactttKHtcgatctccctaYaBacMgt -cttBaBaNgcgaaacaatctagtHccHtKttcRtRVttccVctttcatacYagMVtMcag -aMaaacaataBctgYtaatRaaagattaaccatVRatHtaRagcgcaBcgDttStttttc -VtttaDtKgcaaWaaaaatSccMcVatgtKgtaKgcgatatgtagtSaaaDttatacaaa -catYaRRcVRHctKtcgacKttaaVctaDaatgttMggRcWaacttttHaDaKaDaBctg -taggcgtttaHBccatccattcNHtDaYtaataMttacggctNVaacDattgatatttta -cVttSaattacaaRtataNDgacVtgaacataVRttttaDtcaaacataYDBtttaatBa -DtttYDaDaMccMttNBttatatgagaaMgaNtattHccNataattcaHagtgaaggDga -tgtatatatgYatgaStcataaBStWacgtcccataRMaaDattggttaaattcMKtctM -acaBSactcggaatDDgatDgcWctaacaccgggaVcacWKVacggtaNatatacctMta -tgatagtgcaKagggVaDtgtaacttggagtcKatatcgMcttRaMagcattaBRaStct -YSggaHYtacaactMBaagDcaBDRaaacMYacaHaattagcattaaaHgcgctaaggSc -cKtgaaKtNaBtatDDcKBSaVtgatVYaagVtctSgMctacgttaacWaaattctSgtD -actaaStaaattgcagBBRVctaatatacctNttMcRggctttMttagacRaHcaBaacV -KgaataHttttMgYgattcYaNRgttMgcVaaacaVVcDHaatttgKtMYgtatBtVVct -WgVtatHtacaaHttcacgatagcagtaaNattBatatatttcVgaDagcggttMaagtc -ScHagaaatgcYNggcgtttttMtStggtRatctacttaaatVVtBacttHNttttaRca -aatcacagHgagagtMgatcSWaNRacagDtatactaaDKaSRtgattctccatSaaRtt -aaYctacacNtaRtaactggatgaccYtacactttaattaattgattYgttcagDtNKtt -agDttaaaaaaaBtttaaNaYWKMBaaaacVcBMtatWtgBatatgaacVtattMtYatM -NYDKNcKgDttDaVtaaaatgggatttctgtaaatWtctcWgtVVagtcgRgacttcccc -taDcacagcRcagagtgtWSatgtacatgttaaSttgtaaHcgatgggMagtgaacttat -RtttaVcaccaWaMgtactaatSSaHtcMgaaYtatcgaaggYgggcgtgaNDtgttMNg -aNDMtaattcgVttttaacatgVatgtWVMatatcaKgaaattcaBcctccWcttgaaWH -tWgHtcgNWgaRgctcBgSgaattgcaaHtgattgtgNagtDttHHgBttaaWcaaWagc -aSaHHtaaaVctRaaMagtaDaatHtDMtcVaWMtagSagcttHSattaacaaagtRacM -tRtctgttagcMtcaBatVKtKtKacgagaSNatSactgtatatcBctgagVtYactgta -aattaaaggcYgDHgtaacatSRDatMMccHatKgttaacgactKtgKagtcttcaaHRV -tccttKgtSataatttacaactggatDNgaacttcaRtVaagDcaWatcBctctHYatHa -DaaatttagYatSatccaWtttagaaatVaacBatHcatcgtacaatatcgcNYRcaata -YaRaYtgattVttgaatgaVaactcRcaNStgtgtattMtgaggtNttBaDRcgaaaagc -tNgBcWaWgtSaDcVtgVaatMKBtttcgtttctaaHctaaagYactgMtatBDtcStga -ccgtSDattYaataHctgggaYYttcggttaWaatctggtRagWMaDagtaacBccacta -cgHWMKaatgatWatcctgHcaBaSctVtcMtgtDttacctaVgatYcWaDRaaaaRtag -atcgaMagtggaRaWctctgMgcWttaagKBRtaaDaaWtctgtaagYMttactaHtaat -cttcataacggcacBtSgcgttNHtgtHccatgttttaaagtatcgaKtMttVcataYBB -aKtaMVaVgtattNDSataHcagtWMtaggtaSaaKgttgBtVtttgttatcatKcgHac -acRtctHatNVagSBgatgHtgaRaSgttRcctaacaaattDNttgacctaaYtBgaaaa -tagttattactcttttgatgtNNtVtgtatMgtcttRttcatttgatgacacttcHSaaa -ccaWWDtWagtaRDDVNacVaRatgttBccttaatHtgtaaacStcVNtcacaSRttcYa -gacagaMMttttgMcNttBcgWBtactgVtaRttctccaaYHBtaaagaBattaYacgat -ttacatctgtaaMKaRYtttttactaaVatWgctBtttDVttctggcDaHaggDaagtcg -aWcaagtagtWttHtgKtVataStccaMcWcaagataagatcactctHatgtcYgaKcat -cagatactaagNSStHcctRRNtattgtccttagttagMVgtatagactaactctVcaat -MctgtttgtgttgccttatWgtaBVtttctggMcaaKgDWtcgtaaYStgSactatttHg -atctgKagtagBtVacRaagRtMctatgggcaaaKaaaatacttcHctaRtgtDcttDat -taggaaatttcYHaRaaBttaatggcacKtgctHVcaDcaaaVDaaaVcgMttgtNagcg -taDWgtcgttaatDgKgagcSatatcSHtagtagttggtgtHaWtaHKtatagctgtVga -ttaBVaatgaataagtaatVatSttaHctttKtttgtagttaccttaatcgtagtcctgB -cgactatttVcMacHaaaggaatgDatggKtaHtgStatattaaSagctWcctccRtata -BaDYcgttgcNaagaggatRaaaYtaWgNtSMcaatttactaacatttaaWttHtatBat -tgtcgacaatNgattgcNgtMaaaKaBDattHacttggtRtttaYaacgVactBtaBaKt -gBttatgVttgtVttcaatcWcNctDBaaBgaDHacBttattNtgtDtatttVSaaacag -gatgcRatSgtaSaNtgBatagttcHBgcBBaaattaHgtDattatDaKaatBaaYaaMa -ataaataKtttYtagtBgMatNcatgtttgaNagtgttgtgKaNaSagtttgaSMaYBca -aaacDStagttVacaaaaactaaWttBaagtctgtgcgtMgtaattctcctacctcaNtt -taaccaaaaVtBcacataacaccccBcWMtatVtggaatgaWtcaaWaaaaaaaaWtDta -atatRcctDWtcctaccMtVVatKttaWaaKaaatataaagScHBagaggBaSMtaWaVt -atattactSaaaKNaactatNatccttgaYctattcaaaVgatttYHcRagattttaSat -aggttattcVtaaagaKgtattattKtRttNcggcRgtgtgtWYtaacHgKatKgatYta -cYagDtWcHBDctctgRaYKaYagcactKcacSaRtBttttBHKcMtNtcBatttatttt -tgSatVgaaagaWtcDtagDatatgMacaacRgatatatgtttgtKtNRaatatNatgYc -aHtgHataacKtgagtagtaacYttaNccaaatHcacaacaVDtagtaYtccagcattNt -acKtBtactaaagaBatVtKaaHBctgStgtBgtatgaSNtgDataaccctgtagcaBgt -gatcttaDataStgaMaccaSBBgWagtacKcgattgaDgNNaaaacacagtSatBacKD -gcgtataBKcatacactaSaatYtYcDaactHttcatRtttaatcaattataRtttgtaa -gMcgNttcatcBtYBagtNWNMtSHcattcRctttttRWgaKacKttgggagBcgttcgc -MaWHtaatactgtctctatttataVgtttaBScttttaBMaNaatMacactYtBMggtHa -cMagtaRtctgcatttaHtcaaaatttgagKtgNtactBacaHtcgtatttctMaSRagc -agttaatgtNtaaattgagagWcKtaNttagVtacgatttgaatttcgRtgtWcVatcgt -taaDVctgtttBWgaccagaaagtcSgtVtatagaBccttttcctaaattgHtatcggRa -ttttcaaggcYSKaagWaWtRactaaaacccBatMtttBaatYtaagaactSttcgaaSc -aatagtattgaccaagtgttttctaacatgtttNVaatcaaagagaaaNattaaRtttta -VaaaccgcaggNMtatattVctcaagaggaacgBgtttaacaagttcKcYaatatactaa -ccBaaaSggttcNtattctagttRtBacgScVctcaatttaatYtaaaaaaatgSaatga -tagaMBRatgRcMcgttgaWHtcaVYgaatYtaatctttYttatRaWtctgBtDcgatNa -tcKaBaDgatgtaNatWKctccgatattaacattNaaacDatgBgttctgtDtaaaMggt -gaBaSHataacgccSctaBtttaRBtcNHcDatcDcctagagtcRtaBgWttDRVHagat -tYatgtatcWtaHtttYcattWtaaagtctNgtStggRNcgcggagSSaaagaaaatYcH -DtcgctttaatgYcKBVSgtattRaYBaDaaatBgtatgaHtaaRaRgcaSWNtagatHa -acttNctBtcaccatctMcatattccaSatttgcgaDagDgtatYtaaaVDtaagtttWV -aagtagYatRttaagDcNgacKBcScagHtattatcDaDactaaaaaYgHttBcgaDttg -gataaaKSRcBMaBcgaBSttcWtgNBatRaccgattcatttataacggHVtaattcaca -agagVttaaRaatVVRKcgWtVgacctgDgYaaHaWtctttcacMagggatVgactagMa -aataKaaNWagKatagNaaWtaaaatttgaattttatttgctaaVgaHatBatcaaBWcB -gttcMatcgBaaNgttcgSNaggSaRtttgHtRtattaNttcDcatSaVttttcgaaaaa -ttgHatctaRaggSaNatMDaaatDcacgattttagaHgHaWtYgattaatHNSttatMS -gggNtcKtYatRggtttgtMWVtttaYtagcagBagHaYagttatatggtBacYcattaR -SataBatMtttaaatctHcaaaSaaaagttNSaaWcWRccRtKaagtBWtcaaattSttM -tattggaaaccttaacgttBtWatttatatWcDaatagattcctScacctaagggRaaYt -aNaatgVtBcttaaBaacaMVaaattatStYgRcctgtactatcMcVKatttcgSgatRH -MaaaHtagtaaHtVgcaaataatatcgKKtgccaatBNgaaWcVttgagttaKatagttc -aggKDatDtattgaKaVcaKtaataDataataHSaHcattagttaatRVYcNaHtaRcaa -ggtNHcgtcaaccaBaaagYtHWaaaRcKgaYaaDttgcWYtataRgaatatgtYtgcKt -aNttWacatYHctRaDtYtattcBttttatcSataYaYgttWaRagcacHMgtttHtYtt -YaatcggtatStttcgtRSattaaDaKMaatatactaNBaWgctacacYtgaYVgtgHta -aaRaaRgHtagtWattataaaSDaaWtgMattatcgaaaagtaYRSaWtSgNtBgagcRY -aMDtactaacttaWgtatctagacaagNtattHggataatYttYatcataDcgHgttBtt -ctttVttgccgaaWtaaaacgKgtatctaaaaaNtccDtaDatBMaMggaatNKtatBaa -atVtccRaHtaSacataHattgtttKVYattcataVaattWtcgtgMttcttKtgtctaa -cVtatctatatBRataactcgKatStatattcatHHRttKtccaacgtgggtgRgtgaMt -attattggctatcgtgacMtRcBDtcttgtactaatRHttttaagatcgVMDStattatY -BtttDttgtBtNttgRcMtYtgBacHaWaBaatDKctaagtgaaactaatgRaaKgatcc -aagNaaaatattaggWNtaagtatacttttKcgtcggSYtcttgRctataYcttatataa -agtatattaatttataVaacacaDHatctatttttKYVatHRactttaBHccaWagtact -BtcacgaVgcgttRtttttttSVgtSagtBaaattctgaHgactcttgMcattttagVta -agaattHctHtcaDaaNtaacRggWatagttcgtSttgaDatcNgNagctagDgatcNtt -KgttgtaDtctttRaaYStRatDtgMggactSttaDtagSaVtBDttgtDgccatcacaM -attaaaMtNacaVcgSWcVaaDatcaHaatgaattaMtatccVtctBtaattgtWattat -BRcWcaatgNNtactWYtDaKttaaatcactcagtRaaRgatggtKgcgccaaHgaggat -StattYcaNMtcaBttacttatgagDaNtaMgaaWtgtttcttctaHtMNgttatctaWW -atMtBtaaatagDVatgtBYtatcggcttaagacMRtaHScgatatYgRDtcattatSDa -HggaaataNgaWSRRaaaBaatagBattaDctttgHWNttacaataaaaaaatacggttt -gHgVtaHtWMttNtBtctagtMcgKMgHgYtataHaNagWtcaacYattaataYRgtaWK -gaBctataaccgatttaHaNBRaRaMtccggtNgacMtctcatttgcaattcWgMactta -caaDaaNtactWatVtttagccttMaatcagVaagtctVaaDaBtattaattaYtNaYtg -gattaKtaKctYaMtattYgatattataatKtVgDcttatatNBtcgttgtStttttMag -aggttaHYSttcKgtcKtDNtataagttataagSgttatDtRttattgttttSNggRtca -aKMNatgaatattgtBWtaMacctgggYgaSgaagYataagattacgagaatBtggtRcV -HtgYggaDgaYaKagWagctatagacgaaHgtWaNgacttHRatVaWacKYtgRVNgVcS -gRWctacatcKSactctgWYtBggtataagcttNRttVtgRcaWaaatDMatYattaact -ttcgaagRatSctgccttgcRKaccHtttSNVagtagHagBagttagaccaRtataBcca -taatSHatRtcHagacBWatagcaMtacaRtgtgaaBatctKRtScttccaNaatcNgta -atatWtcaMgactctBtWtaaNactHaaaaRctcgcatggctMcaaNtcagaaaaacaca -gtggggWttRttagtaagaVctVMtcgaatcttcMaaaHcaHBttcgattatgtcaDagc -YRtBtYcgacMgtDcagcgaNgttaataatagcagKYYtcgtaBtYctMaRtaRtDagaa -aacacatgYaBttgattattcgaaNttBctSataaMataWRgaHtttccgtDgaYtatgg -tDgHKgMtatttVtMtVagttaRatMattRagataaccctKctMtSttgaHagtcStcta -tttccSagatgttccacgaggYNttHRacgattcDatatDcataaaatBBttatcgaHtN -HaaatatDNaggctgaNcaaggagttBttMgRagVatBcRtaWgatgBtSgaKtcgHttt -gaatcaaDaHttcSBgHcagtVaaSttDcagccgttNBtgttHagYtattctttRWaaVt -SttcatatKaaRaaaNacaVtVctMtSDtDtRHRcgtaatgctcttaaatSacacaatcg -HattcaWcttaaaatHaaatcNctWttaNMcMtaKctVtcctaagYgatgatcYaaaRac -tctaRDaYagtaacgtDgaggaaatctcaaacatcaScttcKttNtaccatNtaNataca -tttHaaDHgcaDatMWaaBttcRggctMaagctVYcacgatcaDttatYtaatcKatWat -caatVYtNagatttgattgaYttttYgacttVtcKaRagaaaHVgDtaMatKYagagttN -atWttaccNtYtcDWgSatgaRgtMatgKtcgacaagWtacttaagtcgKtgatccttNc -ttatagMatHVggtagcgHctatagccctYttggtaattKNaacgaaYatatVctaataM -aaaYtgVtcKaYtaataacagaatHcacVagatYWHttagaaSMaatWtYtgtaaagNaa -acaVgaWtcacNWgataNttcaSagctMDaRttgNactaccgataMaaatgtttattDtc -aagacgctDHYYatggttcaagccNctccttcMctttagacBtaaWtaWVHggaaaaNat -ttaDtDtgctaaHHtMtatNtMtagtcatttgcaaaRatacagRHtatDNtgtDgaatVg -tVNtcaaatYBMaaaagcaKgtgatgatMgWWMaHttttMgMagatDtataaattaacca -actMtacataaattgRataatacgBtKtaataattRgtatDagDtcRDacctatRcagag -cSHatNtcaScNtttggacNtaaggaccgtgKNttgttNcttgaaRgYgRtNtcagttBc -ttttcHtKtgcttYaaNgYagtaaatgaatggWaMattBHtatctatSgtcYtgcHtaat -tHgaaMtHcagaaSatggtatgccaHBtYtcNattWtgtNgctttaggtttgtWatNtgH -tgcDttactttttttgcNtactKtWRaVcttcatagtgSNKaNccgaataaBttataata -YtSagctttaaatSttggctaaKSaatRccgWHgagDttaaatcatgagMtcgagtVtaD -ggaBtatttgDacataaacgtagYRagBWtgDStKDgatgaagttcattatttaKWcata -aatWRgatataRgttRacaaNKttNtKagaaYaStaactScattattaacgatttaaatg -DtaattagatHgaYataaactatggggatVHtgccgtNgatNYcaStRtagaccacWcaM -tatRagHgVactYtWHtcttcatgatWgagaKggagtatgaWtDtVtNaNtcgYYgtaaa -ctttaDtBactagtaDctatagtaatatttatatataacgHaaaRagKattSagttYtSt ->THREE Homo sapiens frequency -agagagacgatgaaaattaatcgtcaatacgctggcgaacactgagggggacccaatgct -cttctcggtctaaaaaggaatgtgtcagaaattggtcagttcaaaagtagaccggatctt -tgcggagaacaattcacggaacgtagcgttgggaaatatcctttctaccacacatcggat -tttcgccctctcccattatttattgtgttctcacatagaattattgtttagacatccctc -gttgtatggagagttgcccgagcgtaaaggcataatccatataccgccgggtgagtgacc -tgaaattgtttttagttgggatttcgctatggattagcttacacgaagagattctaatgg -tactataggataattataatgctgcgtggcgcagtacaccgttacaaacgtcgttcgcat -atgtggctaacacggtgaaaatacctacatcgtatttgcaatttcggtcgtttcatagag -cgcattgaattactcaaaaattatatatgttgattatttgattagactgcgtggaaagaa -ggggtactcaagccatttgtaaaagctgcatctcgcttaagtttgagagcttacattagt -ctatttcagtcttctaggaaatgtctgtgtgagtggttgtcgtccataggtcactggcat -atgcgattcatgacatgctaaactaagaaagtagattactattaccggcatgcctaatgc -gattgcactgctatgaaggtgcggacgtcgcgcccatgtagccctgataataccaatact -tacatttggtcagcaattctgacattatacctagcacccataaatttactcagacttgag -gacaggctcttggagtcgatcttctgtttgtatgcatgtgatcatatagatgaataagcg -atgcgactagttagggcatagtatagatctgtgtatacagttcagctgaacgtccgcgag -tggaagtacagctgagatctatcctaaaatgcaaccatatcgttcacacatgatatgaac -ccagggggaaacattgagttcagttaaattggcagcgaatcccccaagaagaaggcggag -tgacgttgaacgggcttatggtttttcagtacttcctccgtataagttgagcgaaatgta -aacagaataatcgttgtgttaacaacattaaaatcgcggaatatgatgagaatacacagt -gtgagcatttcacttgtaaaatatctttggtagaacttactttgctttaaatatgttaaa -ccgatctaataatctacaaaacggtagattttgcctagcacattgcgtccttctctattc -agatagaggcaatactcagaaggttttatccaaagcactgtgttgactaacctaagtttt -agtctaataatcatgattgattataggtgccgtggactacatgactcgtccacaaataat -acttagcagatcagcaattggccaagcacccgacttttatttaatggttgtgcaatagtc -cagattcgtattcgggactctttcaaataatagtttcctggcatctaagtaagaaaagct -cataaggaagcgatattatgacacgctcttccgccgctgttttgaaacttgagtattgct -cgtccgaaattgagggtcacttcaaaatttactgagaagacgaagatcgactaaagttaa -aatgctagtccacagttggtcaagttgaattcatccacgagttatatagctattttaatt -tatagtcgagtgtacaaaaaacatccacaataagatttatcttagaataacaacccccgt -atcatcgaaatcctccgttatggcctgactcctcgagcttatagcatttgtgctggcgct -cttgccaggaacttgctcgcgaggtggtgacgagtgagatgatcagtttcattatgatga -tacgattttatcgcgactagttaatcatcatagcaagtaaaatttgaattatgtcattat -catgctccattaacaggttatttaattgatactgacgaaattttttcacaatgggttttc -tagaatttaatatcagtaattgaagccttcataggggtcctactagtatcctacacgacg -caggtccgcagtatcctggagggacgtgttactgattaaaagggtcaaaggaatgaaggc -tcacaatgttacctgcttcaccatagtgagccgatgagttttacattagtactaaatccc -aaatcatactttacgatgaggcttgctagcgctaaagagaatacatacaccaccacatag -aattgttagcgatgatatcaaatagactcctggaagtgtcagggggaaactgttcaatat -ttcgtccacaggactgaccaggcatggaaaagactgacgttggaaactataccatctcac -gcccgacgcttcactaattgatgatccaaaaaatatagcccggattcctgattagcaaag -ggttcacagagaaagatattatcgacgtatatcccaaaaaacagacgtaatgtgcatctt -cgaatcgggatgaatacttgtatcataaaaatgtgacctctagtatacaggttaatgtta -gtgatacacaatactcgtgggccatgggttctcaaataaaatgtaatattgcgtcgatca -ctcacccacgtatttggtctaattatgttttatttagtgacaatccaatagataaccggt -cctattaagggctatatttttagcgaccacgcgtttaaacaaaggattgtatgtagatgg -taccagtttaattgccagtgggcaatcctaagcaaaatgagattctatcctaaagtttgg -gcttgatataagatttcggatgtatgggttttataatcgttggagagctcaatcatgagc -taatacatggatttcgctacctcaccgagagaccttgcatgaagaattctaaccaaaagt -ttaataggccggattggattgagttaattaagaccttgttcagtcatagtaaaaaccctt -aaattttaccgattgacaaagtgagcagtcgcaataccctatgcgaaacgcctcgatagt -gactaggtatacaaggtttttgagttcctttgaaatagttaactaatttaaaattaatta -acgacatggaaatcacagaacctaatgctttgtaggagttatttatgctgtttactgcct -ctacaaccctaataaagcagtcctaagaatgaaacgcatcttttagttcagaaagtggta -tccagggtggtcaatttaataaattcaacatcgggtctcaggatattcggtcatataatt -tattaagggctcttcgagtcttactctgagtgaaattggaaacagtcatccttttcgttg -tgaggcatcttacaccgctatcgatatacaatgcattccaccgcggtgtcccgtacacaa -ggaaacttgttaccttggggatataagaaaactcacacgtctcattattaaactgagtac -aatttttgcacgagaaagtaatgcaatacaatatgatgaaagccagctaatgaaaaggga -tggaacgcacctcggatctgttgcactggattaaaatccgattatttttaaaaatattca -gtgctagagcatatcaggtctacttttttatctggtatgtaaagcccacggagcgatagt -gagatccttacgactcaacgaaaagttataacataactcccgttagccaaagcccaatcc -cgattactgccctaccctaacgtctgccatctaaatatcgaacttgttatgatcaatgtg -actacctcccaccctttccccttcatttgttccactggggataagctagcgttttcagaa -tcaatgcaataagaatagccaattgtctcacttcatcagagctcttggcaattccaggcg -ctacgtggttctggaatatattcatttttcaaatagtaatacgtttagtgttgctattgt -ctacacgtttggatattacgttatgtgagcggacatcaatagttgtctaactctttagta -agccagagatagcactcttagcgaatggataccatcttccataagtttagttaatagtcc -gaaacaactgcttcgagcatatttgaacctccttgtaggcaaatagcctcttcaaagcaa -tcttactaatagatagagtttgttttaagggactactagaaatgggacaatcttaatagt -atgacctaaactgacatttaaagatatatccaggtggcaagcataaagatcattgcgcca -cctccaccgtgggattacttatcagtcgatatcctatatgctaagtttgcgacggcagaa -tacaaactaagctgagttgatgctaaccttacctatgataccccattggaccggttaaca -gccctacttattccaaataaaagaacttttatgctgtagaagctattatagtgatgcctg -gtaacttcagtatattaaaatgacacacatacgccatatagagctcctggaactttgaat -aatgagcgaacttcgaagttgaagagcaagaaaccatatgtcacggttgcctaaagcccg -gtaaccagacatgtgctatcattgatcattatcgaggttttcataaccttgacccattat -cggctgtgcgcggacaagtacttaaatcactagtttcttcacctgcttatcggtaagaaa -taaggttggcaaagaatcgcataagacggacgtagagccgcagcgttgtgcgagtccagg -tgcatgcgcagcaataggattttaaattttgttccatttttaatttagccgtaaggatgt -ccgtaaatgattgaaaattggattcaatctttgggcctatgctactggaacctgatcgac -aaaatttcaaacatacgttaactccgaaagaccgtatttttgcggctagaatagtcagtc -gcttggagccatataccttaccacttaaacgacgtgctcctgtagttgaaatataaacag -aacacaaagactaccgatcatatcaactgaagatctttgtaactttgaggcgaagcaccc -tcttcgagacaactaagagtaaagtaccgggcgccgcaaggagtcgattgggaccctaaa -tcttgacgaattgctaagaggctcagagctaccactgtaatttctctagagcccataata -aatgaacgatacatccgtaggtagcacctaagggattataatggaagccaaatgcagtta -ataatattatatactggcgtacacgattcgacggatctctcacatagtgattcacgaccc -ccccctttgattgacacagcgtcagcattttgcaagaacgatcttctgcatagggtgcgc -caccgtaaggatgacgtcgaagctacaactgggtataatttaccatgcttccctgatgct -gagtgcaatacactaagaatgagtttttaccccatatcaccagtatttgttctgttattg -cgaagaaatggctatgctgagttggcgactaaagtcacccatcctttttattaggtaacc -ccctcccttaaactaactgatttgctggagctgccctgcatacatatactttatcattta -tggacgtccgtgacgcttattatccaccatagtcgatatgctacacggattcattaatgg -atcgtaggagtttaagttatatttactaagatcggtctcggctactatcccgccttaccc -ggcgctatttacggccatttttaatatattgacggtaattattcctatggtttcgaccgc -acgtccttggacaagaaagaatggcaaaaaaaatgtaaaagaaaaaaaatattgagtccc -taccatcatataaaaaatatgtgatgagtaacttgacgaaatgttagtggttattaaaga -ctatctattacaccttttgttttctgtcgtagtatattaaagtctagaagccttacagga -aaatcagggttatacagccgatactccgcagcatgaatcatcgaggaggtgtcctaccat -cgcgccttgtaatcttgtctgtgtatactgtatttagaccttttatacaaagtaaatatc -tcggctttatgtgattgggaggggcctactcaaacatgatgacttgacctaataatcact -gtgcgggcgtcttatgactagctattccttgaaatccaccaccaaatggttaatatgtaa -aaactttgacgatgaaacaaggtgaatgtgtagttactttgtgtaattagctgcgtcgag -cattgcttgtaaaaccgtcaatcgcacacgttacttccataaaatttctacgaatacacc -cttcttaaaaaaaacgtaggaattcacgagtttaacaaacgataactgtataaagtggaa -gtccgaagaaagcagatgcccgaactactcgaagatgtttcgttttcttaaccatagggg -cttcttaatggcccactacgcacattttgttcaagcccgagagggacatccccattacgg -gagtattactaaaactgttccgtaatacgttcagcaagggatgaaaaaggccactgctca -agttattgacgtgggagtattacatcggaagcctgaatcccacactatgatggtctgtac -aggcctagggactgcgtctagacggtattaccggcttctaatcatacgatcgtgagtctt -aacgggaagtaaggctcacacctaccccaaaccatttatctatgtaagtataaaattgtg -cgtaagtgttcaaagtggacaataaagacgtggcaaaaacccccgcacataagccgcttt -agatttcacaaataccaatgcggttaaaaacatccttgagtcgtacatacaccatactcg -cgttaaacggatataacagaagataataaatccggatgtggagtcggtgtaactatagaa -agccaagtgaaataatgcttaccagtcatttagctatacggctttcatttcatgtcaaga -gggtggagtttgacctgtacagttgatatatcaccgatacttagaactcacctaaagcta -aaattgctcgcagcgtgtaatccgcatattacaaacaatagatgggattcattatacata -agacacgatgatctgctttttcaggttgcgagatgttgcctatcgtcaatcgagtcctgc -cttacaccacttaaacaaaagtattgacagggaacctattttcgaggtattatatagtcc -agcttgaatatcaatttgacagttaacctagtgaaaatcagtaagaggaaatacgccaca -ttctccagtgaaattctacgggttatcgtctagtccaactatcaattataactcacgaga -tataagtaaattctcgtacttggcctgatttttattatactttggatccttagtaaacag -gaagggagaaaccttcaacgaaaaacactggattttgttttactctcaaagctcttatat -gacggaaataccctgtcaagtcttaactttattactagactaatgaaatgggcttggggt -ggccagaatcatagtacaatttagcggatacactattcggactttcctatcggctgtctg -gttggataagtatggggactaataggctagacatacctatacttaaactatacaggcgtc -atctatctctgcaactttggagttccctgatgttctcccgccctttgggttcacatcttc -tataccgacacccctaataacgattagtttgtgggttagagtaaattaatacggttaata -ttaatgtatcgttgaaaagctggtgtcgccaataaggtaaccggctaggcagagtatatg -tcacgaagtataactaccctaatgataagctgtaggaataaaattaatgctgtctctaag -cgaagagatatttccgactctgttttaatgacgaatctcattacttctgacttgcaaatg -ttcaatatggcacggtttcacggcacctttgtgacgcatataatgaacttagaagattat -aacgacggaactttatatgataatccgttacgattaaagaatctgttaaatatcataatg -gcattcagttctagaccgtgcatcatggtaaacttactttctctgcatggcgacatacat -ttcgctattcaaattcgcgtgtggttacacccactcgcacctttggaatattaagagaag -atgatcagaaaatccattcgctcaatttttctgacgtacgtctaatttatcctaggagac -aaatcgttttatgtctctcacatttttgaagaaaggttcgagagacaatactcaggtcct -gaactgctagaagatactcggtggagcgtggcaacaatgaaaaactcgtgacataaatga -atgatacttttccaagttcagttaagtgaatatgtttaacatacccggcttttcgatctt -aagctgacgctggacgtgcgagtaatgtcagtctcttacatacactagtgactccaagtt -tcgtcaaaaacgccccctcccttctcgagcccactcacgctatgtattgacgcgaacttg -ttcgggatcagacttttcaggagttcggtcgcgtgtccctatgtgctaatatataagtta -gatcgcattagatgctaatctgaatacttatagacgaccttcaacgagaacgggtaccac -cttgaggctagagttaggtgtgaaacgacaggtagggacatataaaatttgagtgcggct -ttagttaagggtttaattacctactcaaacatcacgctcgcgcccttcgtacgtaatcga -ccatctagaggctaaggggactgtactaggtagtgattaatgatatcctagacgcacgtg -ccttagatcttcagactctgatggtccgcgatcaccgtaattgtagtcctccaactcgat -cactttgttggcgtcaaagaaattacgatatctaaatacttataatacaataaccaagga -tgagaatgactcatcgcgttggagttatattgcttgaagttctatggaatgaaagcacgt -tatctgccgtcccaatatctccagtgagctaattcattggacggtccactttgatcaatc -cccgaggagatgttcggacactttagtctgtaacacttagcgttgagaccacgaacaatt -gattactcagtcttgaaggtgttttccaaagttcattttaaataagactacgataggcct -ttcctattgatataaactacccggctctgttgttcgtgtgagtcgtacttctctgtgttt -ttctgattatagcaagattcgattcttagtgtaaacagcgatttttatttgacccgtcaa -tgagaagcgcataggatctaagcaaaattatcaagttgtgccacaaggtaagatctttcc -agttattgcaggtaggatgtatcccacgttgatagtatgaggtctgacgtcaactgtcta -ggagagttgaccgcgtgcgggtacaccggatttgcatcgatgttgagaacgcagaactcc -cactgtcgtggcggcgttcctgatatttagcaagaggcgttgataaagccctcatcatct -agatctcgacctcatctgccctcttgctccatcattttctacacagactactttcctatc -tacgttagtataattgctttctatcttagtatcatttagagcttctccgtcaacaggttc -gtgctattaaagttagtacgaaagggacaacttgtagcaacgcatttaatcggttttcga -ctacttcgcacaaaatcagataaagaagtttgtcattctattagacattgaattgcgcaa -ttgacttgtaccacttatgatcgaacactgaatcaagactgtgattaactaaaatagaca -agccactatatcaactaataaaaacgcccctggtggtcgaacatagttgactacaggata -attaattggactggagccattacattctctacaatcgtatcacttcccaagtagacaact -ttgaccttgtagtttcatgtacaaaaaaatgctttcgcaggagcacattggtagttcaat -agtttcatgggaacctcttgagccgtcttctgtgggtgtgttcggatagtaggtactgat -aaagtcgtgtcgctttcgatgagagggaattcaccggaaaacaccttggttaacaggata -gtctatgtaaacttcgagacatgtttaagagttaccagcttaatccacggtgctctacta -gtatcatcagctgtcttgcctcgcctagaaatatgcattctatcgttatcctatcaacgg -ttgccgtactgagcagccttattgtggaagagtaatatataaatgtagtcttgtctttac -gaagcagacgtaagtaataatgacttggaataccaaaactaaacatagtggattatcata -ctcaagaactctccagataaataacagtttttacgatacgtcaccaatgagcttaaagat -taggatcctcaaaactgatacaaacgctaattcatttgttattggatccagtatcagtta -aactgaatggagtgaagattgtagaatgttgttctggcctcgcatggggtctaggtgata -tacaatttctcatacttacacggtagtggaaatctgattctagcttcgtagctgactata -ctcaaggaaccactgctcaaggtaggagactagttccgaccctacagtcaaagtggccga -agcttaaactatagactagttgttaaatgctgatttcaagatatcatctatatacagttt -ggacaattatgtgtgcgaaactaaaattcatgctattcagatggatttcacttatgcctt -agaaacagatattgcccgagctcaatcaacagttttagccggaaacaatcgaagcatagg -gacaatgtatcttttcctaaattgccatgtgcagatttctgagtgtcacgaagcgcataa -tagaatcttgtgttgcctcaactcgttgaaaagtttaaaacaatcgcagcagtctttttg -gggtctactgtgtgtttgcaaaataactgaaagaaacgcttgaacaactctgaagtagct -cgagtactcattaaagtgtaacacattagtgaatatcggccaatgaaccaaacgcttccc -ggtacgctatctctctcatcgggaggcgatgtgcaggttatctacgaaagcatcccttta -cgttgagagtgtcgatgcatgaacctcattgtaacaatagcccagcaaattctcatacgt -gcctcagggtccgggcgtactcctccatggaagggcgcgcatctagtgttataccaactc -gctttttaactactatgctgtagttctacaggcatagtggccagtattttctaacttctc -tggatagatgctctcactcctcatccatcacggcttcagtttacgtcttacttgcttgtt -cagcaacggatggaggcattaagtatcttcactgttccctaaaattgctgttcaatatca -aagtaaggacgatacagggaaagctcaagcacactcattgaatactgccccagttgcaac -ctcacttaatctgacaaaaataatgactactctaagtgttgcggaagcagtctcttccac -gagcttgtctgtatcacttcgtataggcatgtaactcgatagacacgaacaccgagtgag -aaactatattcttgcttccgtgtgtgtgacaccaggtaattgatgcggatataagctgga -gatcactcacgcccacacaaggcgctgctacctctttattccaatgtgtaagaatttgct -aacttcatttctagaccgcagctttgcggtcataatttcacggtacggacccttgggtta -gagacttgataacacacttcgcagtttccaccgcgcacatgttttagtggcttctaacat -agaatttttgttgtgacataaagagtgcgtgggagacttgcccgaccgttaagccataat -caattgaaagccccgtgagtcacatctaattggttgtactgcgcatttagctatccttta -gctgactcgaagagattcgattcctaatataggttaattagatggctgccgcgcgaagta -aaacgtgaaaaacgtagtgcgcagatctgcataactcgcgcttaattacttatgagtagt -tccaagttcgctacgttatgagagagattggaattaagcaaatatgttttatggtgattt -tgggatgagaaggactgctaagtacggctactaaacaaatttctaaaaccgccatctacc -ttatcttggagacatttaagttgtatatgtcactagtctagcttttgtctgtgggacgcg -ttctcggaatgagggaaatgcaagagccgattcatcaaatgcttatctaagaaagtagtg -gactattacaccaagcacgaatgccagggaactgctttcttgctcaggacctcgcgacaa -ggtaccccgcataagtcctagaattacatttggtcagcaatgctgacatttgaccgtgaa -aacataattttaatcagaaggcagctcacccgcttgctctagatcttatctttgtatgaa -tgtcagaatttactgcaatatccgttccgaatagtgagggcttagtatagttctctgtat -acaggtcacatcaaactccccctgtcctagtacagctctgagctttaattaattgcatac -atttccttcaatcatcagatgaaaacaccgcgaatcatgctcttctcgtatagggcaaga -gaagcaacaaacaactagcccgactcacgttcatccgccgtatccttgttcagttcttac -tccgtattaggtcagcgaaatctaatcagaataatcggtcgcgtatcaaaattaaaatcc -cgcttgaggttgacaattaaaacgctgagcagttatcggctattagatagtggggtgaaa -gtaattggctggaattatgttaaaacgtgatattaagctaaaatacgctacttgttgccg -acctaattcagtcattcgatattcagttagagccaagaataacaagcttgtataaattga -acggggtgcactaaacgatgtgttactctaatattcagcttggagtatacctgaaggcga -attcatgtatcggccaataataagacgttgaagatcacaatttggactagcaaaagaagg -tgatttatgcgtggggattgagtccactgtacgagtacggtctctggaaaattataggtt -cagggaatataaggaagtaaagataattaccaagagatttttggtatcgctatgacccag -aggtgttctaacgtctgttttgatccgcagaatttctgcctcaatgcatatttgacggac -ttgaactagagcctctaaagttaaatggcgacgcaactgttcctaaacttcaattattac -tactctttttttcctagggtattgtagaggccagtggacaaaataaatcaaatttaagat -gtttcggacattaacatcccccgtagcatagaaatcatcagttatccaatctctcatcga -gcttttacaatttctgctggcgctatggacagcatatgccgcgagacctccgcaagactc -acttgatcactgtaagtatcttcattagaggttagagcctatagttaagctgctgaccta -gtaaaattggtattttctaattttattgctcaagttaaaggttagtgaagggataatgac -gttatttttgaacaatgggttgtattcaattttatatcacgaatggaacccttcattccc -ggcataatactagacgacacgaacaagctccgatctatcagccaggcacgtgttaaggtt -taattccggcaaaccaatgaagcatcaaaaggtgacctgatgcaacttagggtcacgatg -agtttttcaggactacttattacctattaataagttaacatgagccttcataccccgtaa -gacaatacatactccaccaattagaattctgagccatcttatctttttgtatcatcgaag -ggtatggccgaataggttaattagttactcctaacgtctctacaggcatgcatttgacgc -accttcgaaaatagtcaatctctcgccacacgcgtctagtatgcagcatcaaaaatatag -tccacggtttccggattaccaaacgcggcaaagagaaacattgtatcgacggagataact -taatacagaaggaaggggcatcttcgaatacggatgaataattctatctgtttattctga -catcttgttttcaggttaatcttacgcattcaaatgacgcctgccccatgcgtgcgcaat -tattttctaatattgacgagagcaatctcactccttttgggtctatttatgttttattga -ggcacaagcctatacagaacaggtactattaaggccgtgagtgtgagactcaaaccgtgg -aaacaaaggatgggttgttcttggtacaagttttagtgcatgtgggcaatccttaccaaa -atcagatgctatccttaactttgggctgcatttaagatggcggttggaggcctgtgagaa -tcctgcgtgtcatctttaatgaccgaattcatccatgtagattcagatcacacactcatt -ccttgatgttgtctaaacaaaagttgttgtggacgcattggagggagttaagtaacaact -tgggatcgcatacttataaaaattatatgttaaactttcacaaacgctgaagtccaaagt -aactagcccaaacgcctcgagagtcactaggtattaatggtgtttgagttcctgtgaaat -agtgttcgaaggtaaaatttatgtaccaaatcgaaagaacacttaataaggcttgcttgc -acggaggtatgatgtttactgactctacaaccctaattttccagtacgtacattcattcc -aataggttagttctcaaagtgctatacaggctcctcaattgatgatatgcttcagccgct -ctatggatattagctcattttatttaggaagcccgcttagaggcttactatgagggaaat -gccaaaatgtcatacttttcggtgtgtcccatatgacaccgctttacatagaatttgaat -taaaacgcgctctcccgttcactaccatacttggtaccgtgcgcatattacatatagata -taggatcattttttaaagctgtactaggtttgatcgacaatcttatgctatactatatga -tgtaaccctcataatcaataccgatcgtacgatcctagcataggtggcaagcgattttat -gccgattattgtgttaaatagtctgtgagtgtgattatcagggctacgttggtagagggg -ttgtatagacctcgcacacattgtgacatacttaacaatatacgaaaactgatataataa -atccccttacccaaacaccaatcccgttgaatcaactaccataacgtctcccatataaat -tgcctacttgtttgcataaatctgaatacataacaccattgcaccttcttgtgttccaat -cccgttaagattgccttgtcagatgatatgcaagaacaatagcatttgctagcaattatt -aacagctcttcgaattgcctccacataacgcgggagggtatattttaatttggcaaatac -taagtactgttggcgtcatatgctattaacggttggatattaagttatgtcagccgtaag -caagagtgggcgaaatattttgttacccagtgagagcactcttagagtttggatacaata -ggccatatgttgacttaagaggacgtaactacgccgtacaccattgttcaaccgacttct -tggcaaatagaatcgtattagcaatcttaagaatagagacacgttcgtgttagggtatac -tacaaatccgaaaatcttaagaggatcacctaaactgaaatttatacatatttcaacgtg -gatagatttaacataattcagccacctccaacctgggagtaattttcagtagatttacta -gatgattagtggcccaacgcacttgactatataagatctggggatcctaacctgacctat -gagacaaaattggaaacgttaacagcccttatgtgtacaaagaaaagtaagttgttgctg -ttcaacagatgatagtcatgacgcgtaacttcactatagtaaattgaaacaaatacgcaa -tttagacagaatggtacggtcatgaatgacagtaattcgaagtgctagaccaacttaaaa -taggtaaacgtgcccgaaaccccccttaacagaaagctgctatcatggtgcagtatcgac -gtgttcagaaacttgtaacttttgagcaggtccgagcacatggaagtatatcacgtgttt -ctgaaccggcttatccctaagatatatccgtcgcaaactttcgatttagtcccacgtaga -gcccaagcgttgtgcgactccacgtgcatgcccagaaatacgagtttaaatttggttaca -tggttaattttgaccgaagcatcgcactttatgattgataattggattcaatatgtcgcc -ctatgcgaatgcaacatgatccacaatttggctataagacgtttaatccgtatcacactt -tgtttgcggctagtatagtaacgcccgtgcaccaagagtcagtaacaattataagtactc -cgcaggtacttcaaatataaaaactaatcaaacacgacccatatgatcatctgaagatat -ttggaactttctcgacaaccaccctcgtactcaatacttacactaatcgacaggcacacg -caacgtgtacagtcgcaccatattgagtcaagatttgcttagtggcgatgagcgtacacg -cttatttctctagtcacaattagttatctacgagacatcacgagggagcaaataagcgat -gttatggctacacataggcacgtatgaatatgatataagccagttaaacagtcgaaccat -cgagcaaattctcatgcaccaacccacacgttgaggcacaaagagtaagctgtttgaatg -taacttcttctgctgagcgggccccaacgtaaggatcaactagaagagaaaactcggtat -tagtttaaatgcgtcacggagcatgagtgcatttcactaagaatgtctgtgtaaccaata -taacatctatttgttatctgattgcctacttatggctttgcggtcgtggcgactaatgtc -tccaatccttttgaggtcggtaccaactccctttaaattacgctgtgcaggctcatgcac -tgcatacatatacggtagcaggtagggacctcacgcacccttattataatcaatagtagt -tatcagtcaacgaggcaggaatgctgaggtcgaggtgttggtatattttctatgtgccgt -ctaggcgactatcacgcattaccaggcgagatttaagccaattttgaatatagtcaacgt -aatttttactatgggttccaccgaaacgccttgcacaactaagaatcccataaaatatcg -atatcaaataaaagattgtgtcaataccttcatatatattttttcggttgactaacgtga -actaaggttaggggttttgtatgtctatataggaaacagtttcttttctgtcctacttta -gtaaagtcttcaagccttactccaaaatcacggtgattaagccgttactcagcagcatga -ttctgcctgctcgggtcctaaaatccagccttgtaagagtcgctgtgtattagctaggga -gacctttgttaaaaaggatatatcgcggcgggatgtgagtgcgtggcgcatactcaatct -tcagctcgtgtcattataatatctctcccccacgcttttcactagatatgccgtgtaagc -aaacaccttatgcttaatttcgaaaatattggtacttgaaaaaagctgtaggggtactta -atgtctggtaggagatcaggagagaattgagtgtaaaaccgtaaagccctcacctgactt -catgtaaatggcttagaagactccatgatttaataaatactacgaaggaaagactggatc -taaagataactctagtaaggccaactcccttcaatgctgttgccagttataatccaagag -ctgtccttttctgaaccatagcggcttctgaagcgaactagaagcaaagttggttctagc -cagacagccacataccctgtacgggtgtattactaaaactggtccggtattagttcacca -agggaggaattaggcaaaggatctaggtatgcaagtcggagtattacatccctaccctga -atccatcaataggttcctctgtactggccttcgcaatgagtattcaaggttgtacagccg -tataataataagatagtgactatgaacgggaagtaacccgctcaccttccccaaaacatt -gttatatctaagtattaaagtctgccgtagtgttaatactcgaaaataaacaactggcaa -attacaccgcacttaagccgcttttgatttatatttttccaatgcgcttttaaaaataat -tcagtcctacatactaattaagacccttaaacggagatatcacaagttaagttttaacca -tctcgactaggtggaactatagatacccaactcaatttatcattacctgtaatgttccta -gaaggattgcatttcatgtcaagacggtggagtttcacagcgaaacttcagtgtgaacag -attctgagaaatcacctaaacctattagtcagagcacccggttagaaccagttgtcaaaa -aatagagcggttgcatgagacagaagtaacgatgagatccgttgtaacgttgagacatct -ggcctatcgtcaatacagtcctcccttaaaaatatttttaaatactaggcaaacccaaca -taggttagtcctatgtgatacgccacatggtatatcattttgtaacgttacctagggata -atcaggaagtggaattacgcaaaagtagacagtgaaatgcttagggttatagtctagtcc -aaagataaaggataaagcacgtcagagaactatattagccgaatgggaatcattgttagg -agactgtggatcatgtctaaaaagcaacgcagaaacagtcatcgaaaaaatctcgttttt -gtttgaatctaaaagagctttgatgaccgatagtacctgtatactagttactgtattacg -tgtctaatgatttcggattggggtccccagaatcagacgtcattgtagacgattcaagtt -taccaatttaatttcccagctctccttggagaactatcgccaataattgcagtcactttc -cttttctgaaacgataaagccgtcagagttctctgcaacgttggacttacctgaggttct -aacccactttcggttctaatagtagttaacgacacaacgaataacctttactgtggggct -ttcacgatattttttcgcttattattaatggttacgtcataagctggtgtccaaattaag -gttaccggcttcgcagagtagttgtatccaagtataacttccctaatcataagatcgagg -tagaaaattaatgctgtctctaaccgaacagatatgtcccactatgtggtatggacgttg -ctaattacttctgaagggaaattggtcattatggatacgtgtctaccatcaggtcggacg -cagatatggttctgtcttcagttgatccaccgttctttataggataataactgacgatta -aagattatggtaaatagattaagccaattctcttcttgtcagtgaagcatccttaactga -cttgctctgcagcccctcatacatttagctattcaaagtaccggctcgtttcaaactctc -ccacctttggaagaggttgtcaacttgataagtatatcatttacagcattttttcggacg -tacctctaatgtttcattgcagaaaattagttttttctatcgcacattttgcaagtaacg -ttagagacacaattatctgcgaatgaactgctagatctgacgaccgggagcctcgcaaat -atcaaaaaagactgacatatatcaaggagtcgttgacaagtgctggtaagtcaattggtt -tatctgtcccggcgtttcgatcttaagctgaccatgcacggcagagtaatgtcactctcg -ttcttacaagtctgtctccaagggtcggcaaaaaagacccctccattctcgagcccactc -acgatatgtagggacgacaacttgtgcggcttatgaattgtctggactgcgggcgagggt -ccatatctccgaagttagaagggacatacctttagatgataagatcaattcttattgacg -aaattcatccacaacggggaacaacttcaccctagacttacgtctgaaaagacacctagc -gtcttataaaaggtcagtgccccgtttcgtaaggctggaattacctacgcaaacttaaac -ctcgcgcccttccttacgtatcgacaagatagaggctatcgcgaatgtactacggaggca -tgaatcatatactagaaccaagtgcctgtgatattaacaagatgatccgacgcgagcacc -gtaattctaggcataaaactccagcaatttgggggccgaaaacaaatgacgttagctaat -taattatatgacatgatcaaaggaggtcaatcacgcatcgagttcgacgtatattcattg -aacttcgtgcgtttgaaagaaacttttatgaaggcaaaattgatcctgtctcctatttca -tgcgtacctcctagttgataattccccgagcagtggttaggacacttttgtcggtatcaa -gttccggtctcaaaacgtaaaattctgtaatctgtatggatggtctgtgaattagttaat -ttttatgaagtcgtcgagacgcagttcctattgatttattctaaacggagatgtgcttcg -tgggactcggaagtagatctgtgtttatgattattgctactttagatgctgactgttaac -tccgtgttgtttttcaaccgtatatcacaaccgaattggatagaacctatagtttcaagt -tctgccacaaggtatcatatttacagttagtgctggttgcttctttcaaacgtggtgagt -ttgtgctatcacgtcaacggtagagctcagtggaccgagtgcgcgttcaaccctgttcca -gagagggtgtgatagcacatataccacgctcgtcgaggcgttcatgatagtttgcaagag -ccggtgttaaacacatattattattgttatccaactaatcggacctatgcataaagcatt -gtctaaacagaataattgcctatatacggtagttttagtgatttatatcttagtatcagt -tagagcttcgaactcttcaggttcctcatatttaacgttcttcgaaagcgaaaacttcta -caaacgaatgtaagcggttttccaagtagtacctataaatcacagaaagatctgtctcag -tatagttgaaatggtattcagctagtgacgtgtaccaattatcatagttcactcaagcaa -gacgctcattaacgaatatagacaagacactatatcatataataaaaaagaacatggtgc -tcgaacatagttgaattcaccatattgaaggggaatgctgacatgtaattcgctactaga -cgatcaattccctacttgtcaaagttgaactggtacgttcttggaattaaatatgattgc -gctggaccaaattgcgacttcttgagtttcagggcaaacgattgagccggaggatgtccg -tctcttacctttcttgcttatgataaacgacggtccctgtacatcactgggaattctcag -caaaaataattgggtaaatcgagactcgatgtattcggccacaaaggtgttagacgttaa -agattattcaacggggcgataataggatcataaccggtatgcaagcgcattgaaagagcc -atgagatccttatccgataaacgctgcacggtatgtgcagccttattgtcgatcacgaat -ttataaatgtagtctgggctgtaagttgaagacctaagttataatgaagtgcaataccaa -atcgattcatagtggattatcagactcaagatatctcctgataaattacagttgttaaga -tacggataaaatgagatttaagattagcagcctctaatctgtttcaatcccgttggaatg -tggtatgcgatcaaggttaagttaaaatcaagcctgtcttcagtcttgattcttgttctg -ccatcgcatgcggtctacgtgagttaatatgtagcttacgttctagcttgtgctaatctg -agtatagattcgtagaggaatattatcaagcttccacgcctcaacgtacgtgtattggtc -acacaagacactaaaagtggaagtagcgtaaactatagtctagttgttaaatgctcagtt -cttgttatattcgatatactcttggctaatttatgtctgagtatataaaattaatgatat -taacttgcatttcacggatcccttagaaaaagattttgaccgagcgcattataaacggtt -acaccgaatcaatagaagcatacccaatagctttctttgaatttattgcctgcgcaactt -ggctgactctctagatccgaataattctatatggtcgtgacgaaactagttcattactgt -ttaaaatgccaacatgtcttttgggccgataatggctctttgcaaaattactcaatgata -cgattgatcaaagcggtagttgctagtggtagcatgtaagtctatcaaatgtctgattat -ccgaaaatcttccaaaagagtccacgtaccatatctatctcatagcgacgcgaggggaac -cttatctaactatcattccatttaccgggtgactctcgatgcaggatccgattgggataa -attgcccagaaatggctcattcctgactaagggtaaggccgttctcagcaagggaacccc -gcgaatctaggcttataccatctagattgttaactacttgcctgtagttctacagccata -ctggacagttgtttctaaatgatcgggattcatgctagcactcctctgaatgcaccgcgt -aagtttaactattacgtccgtgggcagataaggatggaggctgtatgtatcttaactgtt -acctaatatggctggtaattatcaaagtaaggaccttaatgccatagcgctagcaatcgc -tttgtatactgaccatgtgccaacctctcttaatctgtaaaatataatgtcttagctaac -tgtggacgatcatgtctctgcctagagcttcgctgtatcaattcctatagccagcgtact -agtgacacaacaacaccgtgtgagaaaagatattagtccttacgtctgtctctctacagc -ttattgatgaggattgaacatggacatatagctccccctcaaaagcagatgctacctctt -tattccattctcgaacatttgccgaacttaatttcgacaaacctgaggtcacgtcttaat -ttatcggtaacgtcacgtccctttgagactggataaatatattaccaggggccaacgagc -aattgttggaggcgcttctataatacaaggtgtcttgtcaaagaaagacggcgtgcgtct -cgtgcaactcacttaaccaatattaatgtgaaacccccctctctcacatcttatgcggtg -tactgccctggtacatttcctgtacaggactccaacagtgtagattcctaagatagctgt -tggagttgcctcacgccagatcgaaaaactgaataaactagtgagctgagctgcagaaat -accgcttaattacttatgactagttcaaagggacctacgtgatgtcagacattgcaagga -agaaattaggtttgtgcgtcattttggctggactagcactccttacttcccctactattc -aaatgtcgtaaacagcatgagacaggatcgtgctgacatttaaggtctattgggaacgag -gctacctttggtcgcgcgctcgcgttctccgaatgaccgaaatgcatgagcacagtatgc -aattgcttatagatctaaggtctggtcgttgaaaccaagcacgtaggcctgggaaatcag -ttcttcctcagcaactacacaaaagcgtccaagcattagtacttgtagtaaatgtccgaa -cctatgcgctcatttgaaagtcaaaaaatatttttaagcagtaggcacctaacccgattc -ctctacttagtagctttctttgattctcagaattgactgcaatatcactgcacaattctg -tgccattactagacttctctgtattaacgtctcatcttactaacactcgcctaggacaca -tctgagagtgaagtatttcaatacatttactgaaatcttcagttctaaaatccccgaata -aggctcttatcggtttggccaacacaagaaaaaaacttcttgcaccactcaccttcatac -gcaggagcctggggaacttagtaataactatttcggcagacaaagcttataacaagttgc -cggcgcgtataatatttaaaagaccccttgagctgctcaattaaaacgctcacctggtat -aggctattagatagtgccgtcttagtaaggggcgggaattatcggataaactgatatttt -gataaaataaccgacttgttcacgacataagtcactaaggagattttatctttctccaaa -gtatatcttccttggataatttcaaagcgctgcaatttaagttctgttactagtttatgc -tgctgggaggtgaccggaaggcgtagtaatctagaggcaaattataagaagttcatcata -tcattttcgactacaaaaacaaggtgttgtatgccggcgcattgtgtaaactggacgagt -accctagatggaaaattatacgttaagccaagatttcgatgtaatgataattacctacac -atttttgctatccataggaacaagagctgttctataggctcgtggcatacgaacatttgc -tgccgctatgaatattggaagctcttcaactacagactctattcttaattgccgtcgaaa -atgggccgaatcggctattattaatactcggtttttccgaggggattgttgtcgacagtc -gtaattattattaatattgatgttggtgaggtcatttaaatacaaccttgcagacaatga -ataagggatccaatctctcatactccttttacaattgctcatgcccctatgcaaacctta -tgccgccacacctccgcaactctctcttctgaactgtaagtagcttcattactggtttga -gactatactgaagctgatgacattctaaaatggctattttcgaatgtgattcataatgtt -tatcgtttgggatggcagaatcacgttatttttgatatagcccgggtattctattgtata -gaacgtatgctacaagtcattccccgaagaagactagaagtaaacaacatgcgaccatcg -ttaagccacgcaaggctgtagctttatttcccgataacctatcttccataaatagcggac -agcaggatactgacgctcaacatcagtggttatggtctaatttttaacttttaataaggt -aacttcagcaggcatacacagtaactctttaatttataatcaaattagaagtctgacact -tcttatatttttctatcatccaacgcgatcgcccattagcttattgtgttactaataacg -tatctaaaccaatccttttcaagctactgcctatattgtcaatatatacaaacaacagga -tagtaggctgcttaaaaaatattgtcaaccgtgtacgctttacaatacccggaaatcaca -aactttgtagacaacgagtgaaatttatacactacgaagggccagcgtacaagacccatg -aattaggcgatatgtttattctgacatattggtttatccttaatctgtcgctgtaaaatg -aagccgcccccatccctgcgaattttttttcgaagattcacgactgaaatataaatacgt -ttggctatatttatgttggagggaggcaatagcctttactgttaaccgaagatttagcca -gtgagtgtgacactaaaacactggaataaatgcaggcgttcttctgggtaaaaggtttag -tcaatctcgcctataagttcatatagctctggatataattatctggcccatgcatttatc -atggcgcttggtgccctgtgtgaagccggcctctcatattgaaggtccgaagtattccat -gtacattaagatcactctctcattcatgcatcttggcttaacaaatctggttgtccaagc -tttccaggcacgtatggtacaaattcggatcgaatacttataaaaatgatatgttaaact -gtctaaaacgctcatctacaaagtaaagtgcactaaccaatagagtctcaagaccgtgta -atgctggtgcactgaatgtgtaatacggttagaagggattagttatgttacaaatccatt -gaaaacttaagaagcattgcgtgctcggagggtgcatcttttatcaagagactaacatta -ttttcaacgacgtacatgctttacaatagggtacttatcaaacgccgagaaacgcgccta -tagtgatgttatgattatgacccgatatccattggaccgaattttatgtaggttcccagc -gtactcgcgtaatatctcggtattgccataatgtaatacttgtcggtctctcccagatga -aaaagcgttacagagtatttcaatgaaaaacagcgcgcaacgtcaatacctttaggggta -acggccgctgatttcatatagatatacgataagttggtatagctctactaggtggcatcc -acaatcgttgcatttactatagctggttacaatcataatctataccgttccttacatact -accatagcgggatagcgtttttttgccgttgattgggtttaagaggatgtcagtctcatt -atatccgattcggtgggagagccgttgttttcaaatcgcacactttgtgacataatgtac -aagataacaaaactgatataagatataaactgtcaatatcaccttgacacttgaatcaaa -gtaaattaactcgcaaatataatttgactaattgggtgcagatttctcaattaataaaaa -aatggcaccggatgggcttacaagccccttatcattcacttgtatcatgatttccaagaa -caatagaatttgctagcaagtatgaacagagattcgaattgcatccacagtacgccggag -cgtttattttaatgtggatatgacgatgtactgttggcggcatttgctagtaaccggtcc -ttatttacgtagcgcacacgtaagcatgtctgggagaaatatggtggtacaatctcagag -aaagattacagtttggtttaaataggacttatcgggtcggaagtggaacttaataagcag -tacacaattgggcaacagacgtcttgcctattacaataggattacaatgcgttagatttc -agacacgttcgtgtttggctattcgtcaattccctaaatagttagacgatcaactattat -caaagtgattctttgttcatcctccattcatgtaacagatggcacactacgcataacgcc -gaggaattttaacgagatttaagagagcagttcgggcacaacccacttgactttataaca -gctcggcagcataaacggtaatatgtgacaaatttccaaacgttataagaacgtatgtgt -acttagaaaactaagtggttcatgttcaacagatgtgacgcagcaagcctaacttatcta -ttggttttgctataaaagaacaaagttacacagaatcctaagggcttgtttcacacttat -gcctagtgcttcaccatcttaaaatagcgaaaccggcacgaatcaaaccttaaaacaatg -cgcagatattggtgatggtgactccgggtatgataatggtaactgttgaccagcgcccac -ctcatcgaagtatagaaagtggttaggataaggatgagaccgaacttatttccggccata -actttagattttctacctagtacacaacatcagggcggacacgaaaccgccatcacatca -tataccaggtttaatttgcttaatgggggaagtgtcaacgaaccttcgaactttagcagg -catatggccattatatatggccccagagcagaatgctacagcagacaaaatttggattta -tgtagtttaatacctatcaaacttggtgtgaccatacttgtctaacgacagtgcacaaag -tgtaagttacaattattactactcagcagcttctgcaatgataaaatcttatcatacacg -tcacatatgataatatctacttagggggaacgggctccacaacctacatagtactcaata -cttacactattcgacaggcacaccaaacctgtacagtcccaaaagattgagtcaactttg -cagtactgcagatcacagtaatagcttagttagcgagtcaaaattagttttctacgagac -tgcacgaccgtgcaaatttccgatgtgttggctacaaatagcaacgtatgaatttgtttg -aagccacgtaaactgtacaaccttagagataagtctcaggctactaaaaacacgttgtgg -cactaacaggatcatggttgattcttacttattcggctgaccggcccaataagtaacctt -caactagaacagaataatcgggagtagtttaattcagtcaaggtgcaggtctcattgtaa -ctaacaagctctgtgtaaccaagttaaaatcgttttcttagcggattccctacttatgga -tttgagctcgtccacaatattcgatacaagaagtttgtggtccgtaacaacgaaatttta -attacgctgtgcagcctcatccaaggaattaatagaaggttgatggtaggctccgaacgc -tccatgattataatcaagtggactgtgcagtaaacgaggaaggtatcctgacgtcgtggt -gttcgtttttgttatttgtgccctatacgagtagataaaccatgaacagcacagtgtgaa -cccatggttgattttaggctaccttatttttaatttccgttacacagaaacgaattccac -aactaacatgccattaatttttcgatatcttataaaagatggtcgaaattcattcattta -ttttttttcggttctcgaaagtcaactaagctgtcgcgttttgtttctctttagaggtaa -aagtggctttgatctcctacgtttggatactagtcaaccattactccatttgatccgtga -gtatcacctgtctaacatccagcattatgactcctcggcgaagaaaagacacacttctta -gagtcgatgtgtattagctagggacacagttgtttaatacgatagtgagcccagggaggg -cagtgcgtcccccagtagatttattcagctagtgtaagtataagatatctcacccacgag -gttcaagtgatatgcagtcttagaataatacttatcctgaatttcgatattatgggtact -tcaataatccgctagcgctactttatgtctcgttggacagcaggacacatggcagtctta -aacactaaagacatcacctgaatgaatgtaatgggattacaagaatcaatgaggtattat -atacgacgtaggaaactctggatatatacagtaatctagttacgccatcgcacttcattc -ctctggaaacttagaagacatcagctgtacgtggaggaaccagacccccgtatgtagcca -aatagaaccaaagttgcttatacaaacacacccaatgacaatggaccgctggagttcgta -aactcggaacgtagtactgcacaaacccagcatttagcaataggagctacgtatgcaact -cccacgtggtaataccttcaagctatcaatatataggtgcctagctaatcgcattcgcaa -gcagtattcaagcttgtaaaccagtataataattacagaggctctatgaaacccaacttt -ccagctaaaagtcccaattaaatggttatttcgtacttttaaagtcgcccgttctgttat -tacgcgaattgattctactccaaaattaaacacaaattatcaaccgtttcatttatattt -gtcaatgcagctgtttaaaataaggctctactaaattataattaagacacttattaccag -atttctctagttaagtttgaaccagctcgactaccgcgaaagatacattcccttctctat -ttttcagttcatctatgggtcagagaagcattgaatttattctattcaccctcgtcgttc -acagcgaatcgtcagtgtgatcagtgtatgagaaatatcctaaaccgtttagtcagacca -cacgcttagaacaagtggtctaaaaagactgccctggaaggagtaagaagtatacagctg -atccggtgtatccttcagtcatctgccctatactaattacacgacgcaaggaaaaatagg -tttattttctaggcaaacccttcataggtgactccgatgtgttacgaatcatgcttgaga -atgtgctatcgttaccgacggataataacgatctccaatgaaccaaatgtagaatgtcta -ttgattacccttttactattcgacttagagataggagatagaacctcagtgtactttttt -agccgaatgggaatctttgggaggtgaatggccataaggtcgtaaatccaaccctcttaa -agtcttccatattatatcgttgttcgtggaatcgataacagatttgttgacccatagtaa -atgtatactagtttatgttgtaagtgtagattgttttccgattgccgtccaaactttatg -tcgtaattgtagaccagtaaagttgaccaaggtaagtgcccagcgatcctgcgagatcga -tcgccaatttttccagtcactgtaagtgtaggtttagataaagccgtatgagttatatca -taagggcctcggaaagcagcttcgaaccaaagttcccttataatagtagtttaactataa -aagtatatactggtctgtcgccctttcacgatttgttttaccggtttatgaagcgttacg -tcattagagcggctccaatttaaggttaacggcttccatgtgtagttgtatacaaggata -acttaaagtatctgttcagcgagctagttaagttatcctcgatagaacacaactcagagg -tcccaagatcgggtttgcaacttgctaatttattctcaaggcaaattgggaattatcgat -acctgtataccataaggtcgctcgatgtgatgcttatgtcttctggtgatcctaccttag -ttagtgctgattaacggaacattaatgtttatcgttttgagatttagccaattctctgat -tctaactcaagatgccttatctgacgtgctatgcagcccctaagtattttacattgtaat -aggacacgctcctttaaaactcgccaaaaggtcgttgtggttctctactggttaactata -taatttacagctttgttgagctagttcctctttggtttaagtcctcaatattagttggtt -cgagcgataagttggctagttaccttagtcactatattagatccgaatgttatgcttcat -ctgaagaccgccaccctccaaaatttcttttaagactcacttattgcaaggtgtaggtga -attcggctcgtttctcaagtggtgtatctgtacacgagtttccatattttcatcaacagc -caccgcacacttatgtcactctaggtattaaaagtcgctctacaaggggacgcaattaag -aaacagacatgctagtcaaaaataaacatagcgaggcaccactaattcggccgcttatca -atgggatgctctgcgcgagacgcgccagagctcagtagttagttcggacatacatttact -tcagatgatcaattagttttctacaaatgcttactctaccccgaaaaaagtcaccagact -cttacgtctctttagtatccttccgtcttatataaggtcagtcccccgtttcggtaccct -ggaatttactaagaataatgaaacagcccccaaggacgtacgtttacaaatgatagacca -gatcgcctagcttattccgacgcatgttgcatagaattgaaccaacggaatgtgagagta -actagatgagccgaccacagcacccgtttgcgtcgcagaatacgcctgatagttcggcca -cgaaatcatatgtcctttgagtattaagtatttgtaatgatcaatcgagctcaagcaagc -ttacacttcctcggatattcagggaacttagtgcctttgaaagatacgttgatcaacgaa -aaattgataatggctcatatggaatgcctacctcatagtgctgaattaacacagcactgc -ggacctaacttttcgaggtttcaagttcacgtctcaaaacctaataggctggaatatgta -gggatcctcggtgaatttgtgattgggtttgttgtagtactgaccaagtgaatattcttt -ttttctaaaagcagatctgctgccgggcactacgaaggagatctctgtgtatcattattg -cttcttgacatgatgactcttaaatcactgtgggtgtgcaaaacgatagcacaacccaat -tcgatagtacatattgttgatacttcgcactaaaccgttcatatttaaaggttgtgctcc -ttccttcgttaaatactggtgacttggtcctatctactattagctagacctctggggaac -cacgcccccgtaaaacctgtgcaagagagggggtcatacatcttagacatcgcgcctcca -ccagggaagcattgggtgattgaccaggtgtgtaacaaatatgattattcttatactaat -attagcaaagatgcataatgatttgtattaaatgtataattgaattgataagggtctttt -agtcagtgatagagtagtataaggtagacattagaactcttaaccggacgcagatttttc -ggtcttagtaagccaattagtcgacaaaacaaggtaagagcggttactagtagtacctat -aatgcactgaatcttcggtcgaagtatagttctaatgctatgcagattgtgacggcgaca -aatgttcagacttatatcatgaaacaagctcttgtaagtattgacaaatgaaaagattga -atatttttaaatacaaaatgcgcctacttattaggggaattaaccagattgaaggccaat -cctcacatgtaatgagataatagacgataaatgaaattcttgtaatagttgaactgctac -gtgatgggtattatatatgattgagatcctccaattgccgacgtcttgtcttgatgccca -aaagattgtcaacgaggagctccctcgcgtacctgtcgtccgtatcataaacgacgcgac -atgtacagcactccgaagtataagcaataataatgcgggtaatccagactagatcttttc -ggactcaatgcggtttcacggtaaacatgattaataccggagagtagtcgagcttatcag -cgatgcaagcgaattcattgtgccaggagatacgttgcagataaaaccggcaacgtatgt -caacaagttttggcgatctcgttgtttgtattcgacgaggcgcgggaacttcaagaacta -tcgtatattcaagtccattaccttttagtttcagactggtggagctgactaaagttatat -catcattttgtacactggtttagttaacgataatttcagatttaacatgaccagacgata -atcgctgtatatccagttggaatgtggtttgccagaaaggttaacttataatcaagcctc -tcttcagtcttgattcgtcgtatcccatccattgcgctatacctcagtgtatttggagct -gtagttataccgtgtgctaagatcagtagacatgacgagagcaatattatctaccttaca -agcatcaacggacgtctagtcggaacaaaagactctaaaactcgaacttcaggttaatat -actatagttctgtattcagcagttattcttatattcgatattatcttgcctattggatgt -ctgactttagtatattaatcatagtatctgccatgtaaaggtgccagtactaaatctgtt -tcacagtgcgaattataaacggttacaaccattaaagacaacaagaccctatagctttat -ttgaattttgtcaatgcgcaacttggagctcgcgatacatcccaattagtctatagggtc -gggacgattctacggcatttctggttataatgacaacatggattgtggcccgagaatcgc -tctttcattaattaagcaatcattacagtcttataagcgctacttccgagtggtagcagg -taactcgatataaggtcgcatgagccgaatagcttaaaaaacaggccaccgaacattgat -agagaataccgaccacagcgcaacctttgattactttcattaaattgtacggctcactcg -acatcaagcttaagattgcgataatgtgaactcaaatggatcagtactgaagaaccgtaa -cccacttcgcagaaagcgtacccagagaagatacgctgttacaatatacagggtgaaatt -attgcctgttcttcgtaaccatttcgccaaacttggttagaaatgatagccattcatgat -agaaataagctgaatgataccagtatctttaactatgtagtcagggggaagataacgatg -gtccatgtatgtttctgatatgtgacagtattggccgcgtaatttgctaacgaagctact -taatgcctttgagcttcatatagatttctttaatcaaaatcggcaaaaagatagtatgag -ctataatatatgctagtagagaactctggaccatcatctatatgaatactgattcgagcg -tgcaattactttagcctgcgtactactgactctacaaaacactctgagataagtttgtag -tcagtaagtcgctctctataaaccttttggatgaccattgtacagccacttatagatccc -aataaatagcacaggagacagagtttttcaatgctcgatcatttgccgatagtattttcg -tctaacctcagggcacctattatttgatacctaacctaacggccctttcacaatggagaa -atatatgacatcgggacaaacacaaatggtgggtggccaggagatatgacatggtggcgt -ctctaagaaacacggactccctctaggcaaactcacgtaaccaattttaatgtcaaacaa -aacgctcgaaaagattttgccgtgtaatgacctggtacattgactggtcaggaatacatc -actgtagttgccgtagtgtcctgttggtgttccatcaagacacatcgtataacgcaattt -acgacggacatcagatcaagttatacagattatttaagtatcacgtgtgcattgggacat -aagggatctcacacatgccttggaacatttttgctttgtgccgctttttcgctgcactac -caatccttacttaccagtatattcaaaggtcgttaacagaatgagaaaggttagggctct -aagttatcgtcgattgggatagacgagacatttgcgagcgccctccacggatacgaatct -cccatatcaatgtgaactggatgctatgcagtttagttcttacgtctcctagtggtaaaa -atcaaagtagcactcgcatagcagttattcagaacctaatacacaaaaccgtcaaacatt -ttctaattctaggtatgggccgatcataggagctaaggtgaaactcataaatgttttgtt -agatctagcatcctaaaaagatgcatatactgagtagctggcgtgcattctctcaattgt -atcctttttaactgaactagtcggtcccatttcgtgactgagatctattaaccgataaga -ttaataacactcgcattcgtatcagctcagagtgaagtttttcaataatttgactgatat -attaacttctaaaataaccctttaagcctcggatccgtttcccaatcacatcaaaaattc -ttattccaactatctacggattaacaacgtgcatggggatcgtagtaagaacttgttccg -atcactttgagtatatcaagttgacggcccggttattattgaatagaaacattcacctgc -taaattaaataccgcacatcggatacccgatttcagagggccgtcttactaagggcaggc -tttgttcggtttaactgagatgttcattattttacagtatgcttcaactaatatgtaacg -aaggacagtggatctgtctccatagtagatcttcagtcgtgaatttcataccgctcctat -ttaagttcgcgttcgagttgttgatcatggcacgtgaaagcaacccctagtattctagac -gaaaattttttctagttcatctgataatttgccaattcaaaaacaaccgctggtttcccg -gcgcattctctaaaatggaagtcgaacctagagccattatttgtcggtaacccatgagtt -ccttcttttcagaagttaatacactgtggtcctatacagaggaaaaacagcggttatata -cgatcgtggcataacaacattggatcaagatagcaatttggctacctattctaattctca -ctagattcggtattccactacaatatcggcagattaggattggatgaataatcggtgttt -aagtccggttgcgtctccaatctcctaatttttattaatattgatcttggtgacctattg -taaataaaaacttcaagactttgaataacggtgaaaagatagaagactcatttgaaaatg -gatcatccacagatccaaacattagcaagacactaatccccaactagctattctgatcgc -gatcgtgctgcagtactcctgtcacaatagtctgttcatgatctaattctttttgggctt -tgttcgatggtgattcagaatctttatccggtcgcttccctgtagctactttgtggggat -attgcccggggattatagggttgagatcgtttcctaaaagtatttaaaccaagtagactt -caactaaactacatcagaacatcgtgaagacaccatacgcggtacctttatttaccgata -acatttcttcaagaaataccggtaagcagcataatgaccctaaacagctcggggtatcgt -cgtagttttaaattttatttaggttactgctcaaggaataaaaactaactatttaattta -taataatattacaaggctcacactgattagatttgtctataagacttcgcgatcccccat -taccggattgtcttaagaataaactagataaaccatgcattttctagataaggcctttag -tctaattagatacaaaaaacacgatagttgcatccttaatttattgtgtcaaacctggaa -ccttttaattacccgcaaatcactttatgtcgagactacctctgaaatttattatctacc -taccgcatgaggacttgaaccatcttgtaggagttatgtttattagctaagattcgttta -tcctgtagcggtccatgtatattcaacaagcaaaaagcactcagaattgtttttagttga -gtcaagactgatatataaataagtttccctagttttttcgtggtgggacgatattgaatt -gaatcttaaccgaagagtttcccactctgtcgcacaataatacacgccaatatttccagc -cctgcttatgccttaatcggttactcaatctcccattgaagttcattttgatctgcatag -aagtttcgggcccagccttttttctgccaccttcctccaagctctgtagacgcactctaa -gattgatgctcacatgtattaattctacattaacataaatatataagtcatgcatcttcg -agtaaaatatctggttctccaacatgtcctggcacgtatcgttataatgcccatacatgt -agtattaaaatgattgggttaactggatattaagatcatcgaaattgtaaagtcaaatta -acaatactgtctcaagaccgtgtattcctcgtgctcggaagggctattacgcttacttcc -gttttggtatcttaatatgactttcaaaaattaagttgcagtgagtcctacctgcgtgca -tcggttagcaagagtataaaagttgtttaaacgaactacttgctttacaataccggtcgt -atatatcgccgtgaatccagaagattgtcttctttggattatcaaccgagatcctgtgga -ccgatgttttgggaccttcacagaggactccaggtagagctcgcttttgcattaatctaa -gaattgtacctctctaaaagatctaaaacagtgaatgtgtatttcatggaaaaacacaga -gaaacgtaaattactttaggccgaaaggcacatgagttattatacatatacgagatggtg -gtatacatcgaattcggggcatacactatagttgcattgtatttagctgctttaaataat -atgatattaccttccttacataagacattaccggcataccctggttttcaacttgtgggg -ctttttgacgatcgcactctcatttgatccgagtagggcggtgacccctgcttttcaaat -acaaaaatttcgctatgaaggtaatagattacttttcgctgttatgatagaaacggtaaa -tttaaaattgaaacttctagaaaagtaaagtaacgagaaatgattttgtgaataatgcgg -tcatgattgcgcaagtaagaaaaaaaggcaaaaggatgcgcggaatagaaacttatcagt -cacgggtatcttgatttcattcttcttgtcaattgccgacataggatgaaatcagattcc -aatgcaatacacagtaacccccacccttgattgtaatgtcgatttgaagttgtacgcgtc -gacgaagtggatagtatacgggccttttgtacggtgcgatcaactatgaatctcggcgag -ttagatggtcgtacaatctcacacatagaggtcacttgcctgtaatgacgaattttcggc -taggtactcgaactttattagaagtaaaaatgtgggcaaaagaaggattccattttacaa -gacgattacaatgagttacatgtctctcaacgtagtctttccctagtagtctttgaacta -tttaggtactccagaaaattttagcaaagggtttctgtgtgaatccgccattcatgttta -tgatggaacaataagaataacgccctcgtatgttatcgacagtgaagtcagcagttcggc -caaaaacatattcaatttagtacagatccccagaagttaagctaagtgctctaaaatggc -ctaaacggttatcaaagtaggtctaattactatactaacgggtgcatcgtaataactgct -gtcgatgcaacactatatgatagtgtcgttttgctatatatgtacaatgtgacaaagaag -ccttagcgattcttgcaaacttaggacttcggattctcaatcttaaatgtccgaaaacgc -aaagattcaaaaatttaatctatgagcagatatgcctgatggtgactacgcgtatgttaa -ggctaaatgttgacaaccgcacacataatcgaactattgatagtcgggagcataaccagg -tgaacgtactttgttcacgacatttattgacatgttctaaatacgtctcaaaatcacggc -gcactagaaaacgcaatcaaatcattgtcctggtttaagggccgtaatgccggtagtgtc -aaacttcatgagaactttagctggcttttggccagtatttagggaccaagagcactagcc -ttaagctgaatattttgccatttatctactgttataactttaaaacttggtggcaccaga -cttgtcgatacacacgcatcaatctgtaacgtaaaaggtttactaagaacaagcgtagga -attgagtttatattatatttaaactaaaagatgatattagcttctgagggcgatagggct -ccaaatcataaagaggaatatattattacacgattagaaacccacaacatacctcgaatc -gcccaaaagtttgacgaaacttggcagtactccacatctcagtaatacagttgggagagt -ctcaaatgttgttttattactcaatgaaccaccctcataatttcactgctgttccattaa -atttgcaaacgatcatttgctttgaagaaacgtaaaatcgacaaaattacagataagtag -atgcataataaaaaaaactgctcgctataacacgatcatcgtgcattcttacttaggagc -atcacccgcacaataacgtaccttaaactacaacactattagaccgagtactgtaattca -cgaaagctcaagctcgcattgtaaagaacttgctctctcgtaaaatgtgataatagtttg -cggagaggattcaattattttccattgcacctactccactagattcgataaaagaaggtg -gtcctcccttaaaaagaaatgttaagtaacatcggaaccataagcaaagcatgtaagtga -accgtcatccttccctaagaaacataaaggtttttaataatgtcgactgtgaactataac -tgcatcctttcctgacctactccggttccttgttgttatttctgaacgagaccagtagat -aaacaatgtaaaccacagtgggtaccaatggtgcatgtgacgctaccgttgttttaagtg -cccgtacaaacataagaagtcataatcttacttgaaattaattttgccttttattttttt -tcaggctcgaaattaatgatttgttttttttgaccttctagttacgctaatatgcggtcg -cctgtggtttctattgagtcctataacgggatgggatctaatacgtttggttactagtaa -acaaggtataaatttgataccggagtatcaactgtataacatcaagctttatgactcata -cgcgaagtaatgacacaaggctttcaggagatcgcgagtacagagccactaaggggtgta -ttacgatagtgacaccaccgagcgcactcactccccaagtagatttatgatcctacgcta -agtattagatatataaccaaagaggttctagtcagtgcaactcttagaataataattagc -cggttttgcctttttaggcctaatgcaatattcagctagcccttatgtatctcgcgttcc -acagcaccactcatggcacgcgtttaaactaatcaaatataatctatgaatgttatgcca -gtacttgaataaatcaggttttttataagtccttgcatactctcgttatatactgttaga -gtcttaccccatagaaattctttcatctgcaaacttagaagaattctcagctacggggag -cataaagtccccaggatgttgacaaatacaacaaatgtggcttatacaaacactccatat -gaaaatcgaaccctcgtggtagttttagccgaaccttgtacggataaatccctccatttt -ccaatagcagatacctatcctactacctcgtggtattaaattaaagcttgaaatatagag -ctgcatagcttatccaattcccaagcacgagtctaccgtcgtaaccacgatttgatttac -agacgctagagcaaacccatctttaaacatataagtaaaaattaaagggtgagtgcgtac -gtgtttactagcaacttcgcttattaagacaattgtttataagccataattaaaaacata -tgttcaacaggttcattgatatttgtaattgcacaggtttttaataaggatctacgtaag -tataatgaacaaactttttaccagagttatattctgtactttgaaaatgctcctctaccg -ccttagagactttcaattagattttttgcagttaatctatgcgtaagtgaaccatgcaag -ggatgcgattcaaccgcctcgtgctaaccctatcgtctgtctcataactgtaggtctaat -ataattttcagttttcgaacacataaccctttgaaaatctgctatttaatgtctcacctg -catgcactatcttctatactgctcagaacggctatacgtcactatgctccaagtgacgat -ttaaacgaagcaaggaataataggtttattttagtgcaaaacaattaagtgcggactacg -tgctctttacaataagccttgtgattgggctataggttaagtcccatattaacgatctcc -aatgtacaaaatcgacaatcgctttgcattacccggttactagtcgaattacagatagct -gttagatactcactctaattttggacaacaatcccaatcttggggtcgtctatcgcctga -agctcgtaaatccttccatcttaaacgattacatattatagacttgttcggggtagagat -atcacagttgtgcaaacattgtaaatcgatactagtttatgttggtagtctagttgcttt -taccattccccgaaaaacttgatctactatttcgacaacagtaaacttgaactaggtaag -tgaaaacagagaatgcctcatagtgccactatttgtccactatatgtaagtgtagcttta -cataatccactatgactgagatcattacggcctaggaaagcagcgtagaaaaaaagggcc -cggatattacgactgtaactataaaactagttactggtagcgcgccatgtatagatttgt -tttaccggttgtggttgcgttaacgaatttcagccgcgaaaattgatccgttaaccagtc -catctcgacttctataaaacgataaagtaaagttgatgttcagcctccttcttatggttg -catcgagagtacactactcagtgggaaatagatcggggttcctacttcagattgtattat -ctaggcaattgccgattgtgccatacctggataaaataagctacctacatgtgatgctta -tctattatcgtcatactaccttagggtgtcctgttgaacgctacattaatctttagccgt -ttgagatgttccaatggataggagtctaacgcatgatgaagtttaggaaggcagagcatc -ccactaagtatgtgacagtgtatttcgaaacgagacgttataaatagaaaaaaggtcctt -ctggttctattctgctgaactattgaatggaaagattggttgacctacgtactatttgct -tgaagtcatcaatttgacggggtgagagacatatggtgcatactttacggactctatatt -ttagatcagaagcttagcagtcttctctacaccccctcacgacataattgcttttaagaa -tctatgtttgattcctctacgggaattcggatccgttcgcatgtgcggtttatctaaacc -aggggacatatgttcagctaaagcatacgaacactttgctaactagacgtatgtatagta -gctataaatcccgacgatatttacaaaaagaaatgagactcaaatatatacatagcgacc -ctacacttattcgcaccctgatctaggcgatcctagcacccacacccgaaagtgagcact -agtgtcttccgtattaaatttactgcagttgagattttagttgtctactaaggattactc -taacccgtaataaggatcaagactcggtactagctttactatcattccctatgtgttttc -ctaactcacaagggtacgtaccagcctatgtaattacaataatgataaagacacaaagga -agtaactttacaaatgagtctccagttacactagcttagtccctcccatcttgctttgaa -gtctaaatacgcaatctctgaggatatacagcagaagaacactcataacgttggagtcca -agaattagactcatagggcccccaacatttaatatgtactgtgagtttgaaggtgttcta -ttgttaattcctgctcttgatacatgacacgtactccgtgtttaaggcttcggactgact -ttctttcataagttgagcaacgaaaatttcagaatcgataagttggattcactaactaat -acggctgattgaaaactccactccggacctatatggtcgacctttatacgtaaccgatat -aaaacttataggctggtatatcgagccttcctagcgcaatttcggatggggtttcttcta -ctactcaacaacggaatagtctttgtttagtaaaccagagctcaggacgcccaatacgta -ggagagcgctgtggagcatgtgtcattatggactggagcactcttaaatcactctgcgtg -tgctaaacgatagatcataacatgtcctgagtaaattttcttgatacgtcgcaatatacc -gttattagttaaacgttctcatccgtcatgcgtgaaatacggctgtcgtgctcagatata -ctattagcgactcatctcgcctaacacgcacacgtataaactcggaatgactgccgctct -tacatattagaaatacagactacaccacggaagcattgggtcattctcaaccgctgtata -aaagatgattagtcttataataagattaccaaagaggcagaatcatgggtagtaaatcta -ttattcaagtgattaccgtcgtgtaggcagggagtgaggacgagatggtactcaggacaa -atattaaccggacgaagtggtttacgtcgtactttcactattagtagtaaatacaaggta -acaccggggaatagtactaaatataatgatatctatcttcgggagaacgagtcgtctatt -gctttgaacattctcaaggcgtaaaatgtgctgacttatagcatgatacaaccgattgtt -acttttgtctattcaaaagattgaatagttttttatacaaaagccgcatacttatgacgg -ctagtatacagtttcatcccctagcatcaatgctatggacagtattgaacttataggaaa -ttcttctaatagggcaaatccgtcgtgatgcctattttttttcagtcacatcctcaaatg -gcactagtattgtcgggatcccattaacaggctcaaccacgagctcacgcgaggacatgt -agtccgtatctttaacgaagcgacagcgacagaactcccatggataaccaattataaggc -ccgtaatcctctagacatcgtttaccaataaatccgctttctccgtaatcatgttgaata -ccccagagtagtccagatgataaccgatgaaacacaagtctttctcaatgcacttacggt -gaacttattaccgccaacgtagctcatcaaggttgcgacatctagttgtgtgtttgcgac -gagcccagcgaacttcatcaactttcgtatattcaacgccttgtaattttactttaagac -gcctggtgatgtagattcttagataatcagtttgttatcggctgtactttaccataattt -cacaggtttcaggtcaagaagattatagctgtatatacagttccatgctcggtgcacaga -aacgtgatcggataataatcaatcgcttatgtcgtctttaggcgtatccaatacatgccc -cgataccgcagtgtatttcgacatgtaggtataccgtcgcatttgagctcgagtcaggac -gtcagctagattagattccttaatagaatataccgacctctagtccgaactaaactatag -ataacgccaacttcaggttaattgtctagtcgtctgtttgcagatgggattcttagatga -gtgagtatcggccatattggttcgagcactttagtttttgatgcataggatatgcaatgt -atagctgaaagtactttatctgtttcaaactcacattgattaaaccggtaaacctttaaa -gactacaagaaaatattcagtgagggcaattttgtcaatcacaatcttccagctagagat -acttcacaatttgtcttgaggctacgcaacattagacggattttcgcgttttattgaaat -aatcgaggggcccaagagtatccatagttcattttgtaagatttctttacaggcttatta -cagcttcttcagactcctacatgcttacgagttatatgctagcatgtgaacaatagatta -atatacaggaaaacgtacattgagagagatgaccctacacagcgcaaccgttgagtactt -tcattaaagggtaacgctctcgagacagcatccttaagatggccttattgtcaaatcatt -tgcagaagtacgcaagatccctaaccaacgtagaagaatccctacaaacacatgagacgc -ggtgaaaatagacagggtgttagtattcaatcttcggagtatcaatttcgccaatcttgg -tgagaaagcataccctttcttcagagaaagaagatcaatcataacactatctttaacgag -gtacgcacgcgcatcattacctgcctccatggatctttaggatagcggaaagtattggca -gcgtattgtgatttcgttcctactttatcaatttcacattcatatacatgtcttttatca -aaatcgccaataagataggatgagctatattagatgctagtagagttcgcgccaacatca -tcgataggaatactcaggacagcgtgataggacttttcaatccctaatactctctataat -tataactctctcttaagtttggaggcagtaacgcgctctatataatcagtttgctgcacc -attcttcagcctctgatacatacaaataaattccacagcagtaagagggtttaattgaga -catcttgggaacttaggattttactctaacatcaccgaaacgattattggataccgtacc -taaacgaactttctcaaggcagtaatataggacatccgcaataacacaaatgctgcctcc -ccaggagttatgtcttcctggaggctatatcttacacccactcactataggcaaactaaa -gtttaaatgttgattgtctaaaaaaaagatagataagagttggccggcgtagcacatgcg -aaagtgaatcgtaagctataattctctggacttgaagttctgtcctgttcctctgcaaga -aacaaacttcctttaaagctatttacgacgcacatctcagcaagttataaacatgttgga -agtttctagtcggaattcccaaagaacggatctatctaatgcattcctacatttttcctg -tctgccgatggtgccatcctattcaaagaatttcttaaaagtagattaaatgggactttt -aacaatgagtaaccttacgcctctaagggttcctcgagtgccatacaccagtcaggtccg -agccacatacacggagaacattctaacatagcattctcaactcgatcatttgcaggttac -ttctttcctatcctagtgctaaaaatcatacttgcaatcccatagcacggattaagaacc -taagaaacaattcagtaaaacatgttcgaattcttggtatgggaacatcattgcagctat -ggtctaacgcattaatgtttgggtacatcttccatcatataaacaggaagagtctgacga -cagggagtgcttgcgatcatgtctatcattgtgaaatcaaattgtagctcacatgtcgtc -tatgagagcgtgtatccgataagatttagaaaaatagaagtcgtataagatctcactgaa -cttttgaatgaatgtgaagcatatatgatctgctttaataaaactttatccataggatac -gtttccaaatcaattcaataattattagtcaaaatagataaggatgaacaacctgaaggc -cgatcggacgtagaaagtggtcccatcactttgagttgatattgttgaaccacacgttat -tatggttttcaaacagtctcaggatattgtatatacagataatccgataccagttgtctg -acgcccctcttacgtaccccaccctttgtgacgtttaaagcagttgttcagtattttaaa -ctaggcggcaactaatttggaaagaagcacagtggatatgtctaaattcttgttattcag -gcctgaatttaatacaccgcatagttaacttcgcggtagagttgttcatcatgcctcctc -taagctaccacttctatgatacaccaatagttgttctacggaatctgataattggccaag -tcataaacttccgctgcgttcaacccccttgctcgaatatccaactcgaaaagacagcct -tttggtgtccggaacaaatcagttacttcttttctgatgttaattctctgtggtcagata -cagaccaaaaactccgcggatttaccatcctccaagaacaaatttgcatcaacatagcat -tttggctacatattctaagtctcaatagtttaggttttcaactacattatcccaacatta -ggattggaggaataatagctgggtaagtccccttgcgtctacaatcgactattttttatg -aatatgcttctgccgcacctatggttattaaaaaagtcatgactttgaagaaccctgaaa -agatagatgaatcaggtgtaatggcagcagccaaagagcatataattagcaacactctaa -gaacattatagatatgatgatagcgatcgtcatgatgttatccggtcacaatagtagctt -catcagctaattcgttttgccagtggtgacttgcgctggaagaatcgttatacggtccct -tccctcttgatacggtgggggcttattcaaccgcgtggattgggttgtcatacttgcatt -aaacgatgtaaaccatctagtagtcaactatactaaatcacaaaatagtgatcaatacat -acccgcttcatggttttaaccatttaattgattaaagatattccgctaagaaccattatc -tacctaaactgatcgccgtatcctagtagtttgaaatttgatgtaccgtaatgatcaacg -aagtaaaacgttatattgtatgtagaataataggtcttggagctaaatgatgtgattggt -agtgaagacttacccttacaactttaccggtttctcggaagaatatactagagaatcaat -gcatgggctacataagcactttagtctaatgagataaaaaatacacgagtcttccatcat -gaattttttgtcgaaaaactcgaacctggtaatttaaaccatatatctttatgtcgtcaa -taactctcatatgttttatataacttcccaatcacgacttgtaactgcttgttcgactga -gctgtttgagctatgaggccgggatccggttgagctacatctatttgctacaagaaaaat -gaaagcacatttgttgggagttctggctacactcatagagaaataagtggcccgagtggg -tgcggcctgcctccatattcaagtgtatcttaaaccaagtggttccaacgctcgcgctaa -agaattaaagcctttatttcctccacggagtagcccgtaatccggttcgaaagagaccat -tgaagttaattttcatatccagtgaagtttaggcacaagcatgtgttctgccacatgcct -caaagcgctcttcaaccaagatatgattcatcctaacttcgatgaatgcgtctgtaacat -aaatatagaaggaatgattcggcgagttaattttcgccttctccaacatggcatccctac -gttcgttataaggaccatacatgtaggttttaaaggtttgcggttaatcgatatttacat -catagaaattctatagtcaaatttacaagactctagatactcactcgttgcagccggcta -ggaagcgctttgtaccttacttcccttttcgttgcgtaatatgaatttcatatagtaagt -tcaaggcactcatacctccgtgaagagggtagatagactattaaagttgtttaatagtac -gtattgatggaaatgacccgtaggagatttaccactcaatccacaagattcgctgctgtg -cattatcaaaacagtgcatgtcgaaacatgggttgggtccttcaaacacgaatccaggta -gagatacctttgcaattttt diff --git a/vendor/regex/examples/regexdna-output.txt b/vendor/regex/examples/regexdna-output.txt deleted file mode 100644 index d36baa5..0000000 --- a/vendor/regex/examples/regexdna-output.txt +++ /dev/null @@ -1,13 +0,0 @@ -agggtaaa|tttaccct 0 -[cgt]gggtaaa|tttaccc[acg] 3 -a[act]ggtaaa|tttacc[agt]t 9 -ag[act]gtaaa|tttac[agt]ct 8 -agg[act]taaa|ttta[agt]cct 10 -aggg[acg]aaa|ttt[cgt]ccct 3 -agggt[cgt]aa|tt[acg]accct 4 -agggta[cgt]a|t[acg]taccct 3 -agggtaa[cgt]|[acg]ttaccct 5 - -101745 -100000 -133640 diff --git a/vendor/regex/examples/shootout-regex-dna-bytes.rs b/vendor/regex/examples/shootout-regex-dna-bytes.rs deleted file mode 100644 index 773fd9b..0000000 --- a/vendor/regex/examples/shootout-regex-dna-bytes.rs +++ /dev/null @@ -1,68 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::bytes::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = Vec::with_capacity(51 * (1 << 20)); - io::stdin().read_to_end(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, &b""[..]).into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (regex!("B"), &b"(c|g|t)"[..]), - (regex!("D"), &b"(a|g|t)"[..]), - (regex!("H"), &b"(a|c|t)"[..]), - (regex!("K"), &b"(g|t)"[..]), - (regex!("M"), &b"(a|c)"[..]), - (regex!("N"), &b"(a|c|g|t)"[..]), - (regex!("R"), &b"(a|g)"[..]), - (regex!("S"), &b"(c|g)"[..]), - (regex!("V"), &b"(a|c|g)"[..]), - (regex!("W"), &b"(a|t)"[..]), - (regex!("Y"), &b"(c|t)"[..]), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/vendor/regex/examples/shootout-regex-dna-cheat.rs b/vendor/regex/examples/shootout-regex-dna-cheat.rs deleted file mode 100644 index 1bde7ab..0000000 --- a/vendor/regex/examples/shootout-regex-dna-cheat.rs +++ /dev/null @@ -1,90 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -// This technically solves the problem posed in the `regex-dna` benchmark, but -// it cheats by combining all of the replacements into a single regex and -// replacing them with a single linear scan. i.e., it re-implements -// `replace_all`. As a result, this is around 25% faster. ---AG - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (b'B', "(c|g|t)"), - (b'D', "(a|g|t)"), - (b'H', "(a|c|t)"), - (b'K', "(g|t)"), - (b'M', "(a|c)"), - (b'N', "(a|c|g|t)"), - (b'R', "(a|g)"), - (b'S', "(c|g)"), - (b'V', "(a|c|g)"), - (b'W', "(a|t)"), - (b'Y', "(c|t)"), - ]; // combined into one regex in `replace_all` - let seq = replace_all(&seq, substs); - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} - -fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { - let mut replacements = vec![""; 256]; - let mut alternates = vec![]; - for (re, replacement) in substs { - replacements[re as usize] = replacement; - alternates.push((re as char).to_string()); - } - - let re = regex!(&alternates.join("|")); - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for m in re.find_iter(text) { - new.push_str(&text[last_match..m.start()]); - new.push_str(replacements[text.as_bytes()[m.start()] as usize]); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - new -} diff --git a/vendor/regex/examples/shootout-regex-dna-replace.rs b/vendor/regex/examples/shootout-regex-dna-replace.rs deleted file mode 100644 index 20694e0..0000000 --- a/vendor/regex/examples/shootout-regex-dna-replace.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).build().unwrap().into_regex() - }}; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - println!("original: {}, replaced: {}", ilen, seq.len()); -} diff --git a/vendor/regex/examples/shootout-regex-dna-single-cheat.rs b/vendor/regex/examples/shootout-regex-dna-single-cheat.rs deleted file mode 100644 index 70a979c..0000000 --- a/vendor/regex/examples/shootout-regex-dna-single-cheat.rs +++ /dev/null @@ -1,75 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - for re in variants { - println!("{} {}", re.to_string(), re.find_iter(&seq).count()); - } - - let substs = vec![ - (b'B', "(c|g|t)"), - (b'D', "(a|g|t)"), - (b'H', "(a|c|t)"), - (b'K', "(g|t)"), - (b'M', "(a|c)"), - (b'N', "(a|c|g|t)"), - (b'R', "(a|g)"), - (b'S', "(c|g)"), - (b'V', "(a|c|g)"), - (b'W', "(a|t)"), - (b'Y', "(c|t)"), - ]; // combined into one regex in `replace_all` - let seq = replace_all(&seq, substs); - - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} - -fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { - let mut replacements = vec![""; 256]; - let mut alternates = vec![]; - for (re, replacement) in substs { - replacements[re as usize] = replacement; - alternates.push((re as char).to_string()); - } - - let re = regex!(&alternates.join("|")); - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for m in re.find_iter(text) { - new.push_str(&text[last_match..m.start()]); - new.push_str(replacements[text.as_bytes()[m.start()] as usize]); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - new -} diff --git a/vendor/regex/examples/shootout-regex-dna-single.rs b/vendor/regex/examples/shootout-regex-dna-single.rs deleted file mode 100644 index b474059..0000000 --- a/vendor/regex/examples/shootout-regex-dna-single.rs +++ /dev/null @@ -1,57 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - for re in variants { - println!("{} {}", re.to_string(), re.find_iter(&seq).count()); - } - - let substs = vec![ - (regex!("B"), "(c|g|t)"), - (regex!("D"), "(a|g|t)"), - (regex!("H"), "(a|c|t)"), - (regex!("K"), "(g|t)"), - (regex!("M"), "(a|c)"), - (regex!("N"), "(a|c|g|t)"), - (regex!("R"), "(a|g)"), - (regex!("S"), "(c|g)"), - (regex!("V"), "(a|c|g)"), - (regex!("W"), "(a|t)"), - (regex!("Y"), "(c|t)"), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/vendor/regex/examples/shootout-regex-dna.rs b/vendor/regex/examples/shootout-regex-dna.rs deleted file mode 100644 index b96518e..0000000 --- a/vendor/regex/examples/shootout-regex-dna.rs +++ /dev/null @@ -1,68 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(51 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (regex!("B"), "(c|g|t)"), - (regex!("D"), "(a|g|t)"), - (regex!("H"), "(a|c|t)"), - (regex!("K"), "(g|t)"), - (regex!("M"), "(a|c)"), - (regex!("N"), "(a|c|g|t)"), - (regex!("R"), "(a|g)"), - (regex!("S"), "(c|g)"), - (regex!("V"), "(a|c|g)"), - (regex!("W"), "(a|t)"), - (regex!("Y"), "(c|t)"), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/vendor/regex/record/README.md b/vendor/regex/record/README.md new file mode 100644 index 0000000..432b06a --- /dev/null +++ b/vendor/regex/record/README.md @@ -0,0 +1,4 @@ +This directory contains various recordings of results. These are committed to +the repository so that they can be compared over time. (At the time of writing, +there is no tooling for facilitating this comparison. It has to be done +manually.) diff --git a/vendor/regex/record/compile-test/2023-04-19_1.7.3.csv b/vendor/regex/record/compile-test/2023-04-19_1.7.3.csv new file mode 100644 index 0000000..af62da1 --- /dev/null +++ b/vendor/regex/record/compile-test/2023-04-19_1.7.3.csv @@ -0,0 +1,11 @@ +name,crate,revision,profile,duration,size,relative-size +regex__dev__std_perf_unicode,regex,9582040009,dev,1.824209152s,3434992,3113064 +regex__dev__std,regex,9582040009,dev,1.206314935s,1362392,1040464 +regex__dev__std_perf,regex,9582040009,dev,1.543583435s,2726384,2404456 +regex__dev__std_unicode,regex,9582040009,dev,1.490095643s,2066904,1744976 +regex__dev__std_unicode-case_unicode-perl,regex,9582040009,dev,1.292011694s,1812952,1491024 +regex__release__std_perf_unicode,regex,9582040009,release,2.398133563s,1616216,1294368 +regex__release__std,regex,9582040009,release,1.413680252s,694592,372744 +regex__release__std_perf,regex,9582040009,release,2.341496191s,1124696,802848 +regex__release__std_unicode,regex,9582040009,release,1.671407822s,1190208,868360 +regex__release__std_unicode-case_unicode-perl,regex,9582040009,release,1.441712198s,932160,610312 diff --git a/vendor/regex/record/compile-test/2023-04-20_master.csv b/vendor/regex/record/compile-test/2023-04-20_master.csv new file mode 100644 index 0000000..4c3e916 --- /dev/null +++ b/vendor/regex/record/compile-test/2023-04-20_master.csv @@ -0,0 +1,11 @@ +name,crate,revision,profile,duration,size,relative-size +regex__dev__std_perf_unicode,regex,f1f99af2bc,dev,1.834267609s,3799536,3477608 +regex__dev__std,regex,f1f99af2bc,dev,1.263958602s,1427928,1106000 +regex__dev__std_perf,regex,f1f99af2bc,dev,1.631302845s,3234288,2912360 +regex__dev__std_unicode,regex,f1f99af2bc,dev,1.550536696s,1997272,1675344 +regex__dev__std_unicode-case_unicode-perl,regex,f1f99af2bc,dev,1.341622852s,1739224,1417296 +regex__release__std_perf_unicode,regex,f1f99af2bc,release,2.475080323s,1755480,1433632 +regex__release__std,regex,f1f99af2bc,release,1.45990031s,731456,409608 +regex__release__std_perf,regex,f1f99af2bc,release,2.421787211s,1259864,938016 +regex__release__std_unicode,regex,f1f99af2bc,release,1.693972619s,1227072,905224 +regex__release__std_unicode-case_unicode-perl,regex,f1f99af2bc,release,1.528003306s,969024,647176 diff --git a/vendor/regex/record/compile-test/2023-07-05.csv b/vendor/regex/record/compile-test/2023-07-05.csv new file mode 100644 index 0000000..6ec81f5 --- /dev/null +++ b/vendor/regex/record/compile-test/2023-07-05.csv @@ -0,0 +1,37 @@ +name,crate,revision,profile,duration,size,relative-size +regex__dev__std_perf_unicode,regex,53786ce797,dev,2.414172223s,4143600,3764328 +regex__dev__std_perf_unicode_perf-dfa-full,regex,53786ce797,dev,2.900927164s,4815368,4436096 +regex__dev__std,regex,53786ce797,dev,1.662626059s,2062808,1683536 +regex__dev__std_perf,regex,53786ce797,dev,2.136755026s,3574256,3194984 +regex__dev__std_unicode,regex,53786ce797,dev,1.943953132s,2623960,2244688 +regex__dev__std_unicode-case_unicode-perl,regex,53786ce797,dev,1.753222606s,2374104,1994832 +regex-lite__dev__std_string,regex,53786ce797,dev,498.158769ms,727504,348232 +regex-automata__dev__std_syntax_perf_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.900832296s,4872712,4493440 +regex-automata__dev__std_syntax_nfa-pikevm,regex-automata,53786ce797,dev,1.413429089s,1501648,1122376 +regex-automata__dev__std_syntax_nfa-backtrack,regex-automata,53786ce797,dev,1.412429191s,1505744,1126472 +regex-automata__dev__std_syntax_hybrid,regex-automata,53786ce797,dev,1.678331978s,1632720,1253448 +regex-automata__dev__std_syntax_dfa-onepass,regex-automata,53786ce797,dev,1.594526299s,1526224,1146952 +regex-automata__dev__std_syntax_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.992024402s,3500504,3121232 +regex-automata__dev__std_syntax_perf_unicode_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,dev,2.378489598s,4119024,3739752 +regex-automata__dev__std_syntax_perf_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.695475914s,4299272,3920000 +regex-automata__dev__std_syntax_perf_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,dev,2.120929251s,3549680,3170408 +regex-automata__dev__std_unicode_meta,regex-automata,53786ce797,dev,1.89728585s,2492888,2113616 +regex-automata__dev__std_meta,regex-automata,53786ce797,dev,1.604628942s,1927640,1548368 +regex__release__std_perf_unicode,regex,53786ce797,release,3.333636908s,2025816,1650720 +regex__release__std_perf_unicode_perf-dfa-full,regex,53786ce797,release,3.805434309s,2210160,1835064 +regex__release__std,regex,53786ce797,release,1.789749444s,932160,557064 +regex__release__std_perf,regex,53786ce797,release,2.734249431s,1505624,1130528 +regex__release__std_unicode,regex,53786ce797,release,2.04945845s,1431872,1056776 +regex__release__std_unicode-case_unicode-perl,regex,53786ce797,release,1.893829903s,1173824,798728 +regex-lite__release__std_string,regex,53786ce797,release,648.517079ms,473400,98304 +regex-automata__release__std_syntax_perf_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.893237683s,2242928,1867832 +regex-automata__release__std_syntax_nfa-pikevm,regex-automata,53786ce797,release,1.556952008s,780600,405504 +regex-automata__release__std_syntax_nfa-backtrack,regex-automata,53786ce797,release,1.576471926s,768312,393216 +regex-automata__release__std_syntax_hybrid,regex-automata,53786ce797,release,1.819539266s,813368,438272 +regex-automata__release__std_syntax_dfa-onepass,regex-automata,53786ce797,release,1.672511482s,776504,401408 +regex-automata__release__std_syntax_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.227157436s,1767744,1392648 +regex-automata__release__std_syntax_perf_unicode_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,release,3.340235296s,2005336,1630240 +regex-automata__release__std_syntax_perf_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.640335773s,1718640,1343544 +regex-automata__release__std_syntax_perf_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,release,2.876306297s,1489240,1114144 +regex-automata__release__std_unicode_meta,regex-automata,53786ce797,release,1.945654415s,1362240,987144 +regex-automata__release__std_meta,regex-automata,53786ce797,release,1.740500411s,862528,487432 diff --git a/vendor/regex/record/compile-test/README.md b/vendor/regex/record/compile-test/README.md new file mode 100644 index 0000000..7291d5d --- /dev/null +++ b/vendor/regex/record/compile-test/README.md @@ -0,0 +1,27 @@ +This directory contains the results of compilation tests. Specifically, +the results are from testing both the from scratch compilation time and +relative binary size increases of various features for both the `regex` and +`regex-automata` crates. + +Here's an example of how to run these tests for just the `regex` crate. You'll +need the `regex-cli` command installed, which can be found in the `regex-cli` +directory in the root of this repository. + +This must be run in the root of a checkout of this repository. + +``` +$ mkdir /tmp/regex-compile-test +$ regex-cli compile-test ./ /tmp/regex-compile-test | tee record/compile-test/2023-04-19_1.7.3.csv +``` + +You can then look at the results using a tool like [`xsv`][xsv]: + +``` +$ xsv table record/compile-test/2023-04-19_1.7.3.csv +``` + +Note that the relative binary size is computed by building a "baseline" hello +world program, and then subtracting that from the size of a binary that uses +the regex crate. + +[xsv]: https://github.com/BurntSushi/xsv diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic new file mode 100644 index 0000000..9ef2173 --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic @@ -0,0 +1,73 @@ + Running target/release/dynamic-e87a67d7ea67f0eb + +running 67 tests +test bench::anchored_literal_long_match ... bench: 75 ns/iter (+/- 3) = 5200 MB/s +test bench::anchored_literal_long_non_match ... bench: 61 ns/iter (+/- 2) = 6393 MB/s +test bench::anchored_literal_short_match ... bench: 75 ns/iter (+/- 3) = 346 MB/s +test bench::anchored_literal_short_non_match ... bench: 61 ns/iter (+/- 1) = 426 MB/s +test bench::easy0_1K ... bench: 196 ns/iter (+/- 8) = 5224 MB/s +test bench::easy0_1MB ... bench: 255,138 ns/iter (+/- 4,820) = 4109 MB/s +test bench::easy0_32 ... bench: 71 ns/iter (+/- 2) = 450 MB/s +test bench::easy0_32K ... bench: 5,392 ns/iter (+/- 108) = 6077 MB/s +test bench::easy1_1K ... bench: 241 ns/iter (+/- 37) = 4248 MB/s +test bench::easy1_1MB ... bench: 334,872 ns/iter (+/- 3,433) = 3131 MB/s +test bench::easy1_32 ... bench: 65 ns/iter (+/- 2) = 492 MB/s +test bench::easy1_32K ... bench: 6,139 ns/iter (+/- 703) = 5337 MB/s +test bench::hard_1K ... bench: 4,654 ns/iter (+/- 63) = 220 MB/s +test bench::hard_1MB ... bench: 4,719,487 ns/iter (+/- 71,818) = 222 MB/s +test bench::hard_32 ... bench: 199 ns/iter (+/- 8) = 160 MB/s +test bench::hard_32K ... bench: 147,389 ns/iter (+/- 4,391) = 222 MB/s +test bench::literal ... bench: 20 ns/iter (+/- 4) = 2550 MB/s +test bench::match_class ... bench: 85 ns/iter (+/- 4) = 952 MB/s +test bench::match_class_in_range ... bench: 32 ns/iter (+/- 3) = 2531 MB/s +test bench::match_class_unicode ... bench: 783 ns/iter (+/- 13) = 205 MB/s +test bench::medium_1K ... bench: 1,334 ns/iter (+/- 154) = 767 MB/s +test bench::medium_1MB ... bench: 2,044,757 ns/iter (+/- 72,936) = 512 MB/s +test bench::medium_32 ... bench: 99 ns/iter (+/- 18) = 323 MB/s +test bench::medium_32K ... bench: 59,603 ns/iter (+/- 13,750) = 549 MB/s +test bench::no_exponential ... bench: 553 ns/iter (+/- 150) = 180 MB/s +test bench::not_literal ... bench: 293 ns/iter (+/- 59) = 174 MB/s +test bench::one_pass_long_prefix ... bench: 177 ns/iter (+/- 35) = 146 MB/s +test bench::one_pass_long_prefix_not ... bench: 175 ns/iter (+/- 47) = 148 MB/s +test bench::one_pass_short ... bench: 134 ns/iter (+/- 34) = 126 MB/s +test bench::one_pass_short_not ... bench: 136 ns/iter (+/- 39) = 125 MB/s +test bench::replace_all ... bench: 153 ns/iter (+/- 17) +test bench_dynamic_compile::compile_huge ... bench: 165,209 ns/iter (+/- 4,396) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,795,770 ns/iter (+/- 2,674,909) +test bench_dynamic_compile::compile_simple ... bench: 6,883 ns/iter (+/- 391) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,281 ns/iter (+/- 751) +test bench_dynamic_compile::compile_small ... bench: 9,091 ns/iter (+/- 1,125) +test bench_dynamic_compile::compile_small_bytes ... bench: 182,815 ns/iter (+/- 3,814) +test bench_dynamic_parse::parse_huge ... bench: 1,233 ns/iter (+/- 123) +test bench_dynamic_parse::parse_simple ... bench: 2,015 ns/iter (+/- 108) +test bench_dynamic_parse::parse_small ... bench: 2,500 ns/iter (+/- 76) +test bench_sherlock::before_holmes ... bench: 2,741,811 ns/iter (+/- 58,389) = 216 MB/s +test bench_sherlock::everything_greedy ... bench: 7,807,696 ns/iter (+/- 328,585) = 76 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 5,424,922 ns/iter (+/- 78,937) = 109 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 266,557 ns/iter (+/- 3,832) = 2231 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 1,327,967 ns/iter (+/- 12,773) = 448 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 2,690,485 ns/iter (+/- 17,393) = 221 MB/s +test bench_sherlock::name_alt1 ... bench: 77,206 ns/iter (+/- 951) = 7705 MB/s +test bench_sherlock::name_alt2 ... bench: 303,775 ns/iter (+/- 5,030) = 1958 MB/s +test bench_sherlock::name_alt3 ... bench: 1,385,153 ns/iter (+/- 15,871) = 429 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 1,473,833 ns/iter (+/- 9,825) = 403 MB/s +test bench_sherlock::name_alt4 ... bench: 300,912 ns/iter (+/- 3,896) = 1977 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 1,421,519 ns/iter (+/- 16,246) = 418 MB/s +test bench_sherlock::name_holmes ... bench: 52,027 ns/iter (+/- 785) = 11435 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,241,204 ns/iter (+/- 16,862) = 479 MB/s +test bench_sherlock::name_sherlock ... bench: 34,378 ns/iter (+/- 677) = 17305 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 34,463 ns/iter (+/- 580) = 17262 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,281,540 ns/iter (+/- 11,054) = 464 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,281,293 ns/iter (+/- 13,129) = 464 MB/s +test bench_sherlock::name_whitespace ... bench: 60,463 ns/iter (+/- 815) = 9839 MB/s +test bench_sherlock::no_match_common ... bench: 568,357 ns/iter (+/- 11,237) = 1046 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,656 ns/iter (+/- 340) = 25149 MB/s +test bench_sherlock::quotes ... bench: 977,907 ns/iter (+/- 13,926) = 608 MB/s +test bench_sherlock::the_lower ... bench: 794,285 ns/iter (+/- 8,513) = 749 MB/s +test bench_sherlock::the_nocase ... bench: 1,837,240 ns/iter (+/- 22,738) = 323 MB/s +test bench_sherlock::the_upper ... bench: 54,083 ns/iter (+/- 1,153) = 11000 MB/s +test bench_sherlock::the_whitespace ... bench: 1,986,579 ns/iter (+/- 9,292) = 299 MB/s +test bench_sherlock::word_ending_n ... bench: 55,205,101 ns/iter (+/- 93,542) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa new file mode 100644 index 0000000..50d3a13 --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa @@ -0,0 +1,85 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) +src/dfa.rs:73:1: 94:2 warning: function is never used: `can_exec`, #[warn(dead_code)] on by default +src/dfa.rs:73 pub fn can_exec(insts: &Insts) -> bool { +src/dfa.rs:74 use inst::EmptyLook::*; +src/dfa.rs:75 // If for some reason we manage to allocate a regex program with more +src/dfa.rs:76 // than 2^32-1 instructions, then we can't execute the DFA because we +src/dfa.rs:77 // use 32 bit pointers. +src/dfa.rs:78 if insts.len() > ::std::u32::MAX as usize { + ... +src/exec.rs:12:11: 12:15 warning: unused import, #[warn(unused_imports)] on by default +src/exec.rs:12 use dfa::{self, Dfa, DfaResult}; + ^~~~ + Running target/release/dynamic-e87a67d7ea67f0eb + +running 67 tests +test bench::anchored_literal_long_match ... bench: 169 ns/iter (+/- 1) = 2307 MB/s +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 0) = 4588 MB/s +test bench::anchored_literal_short_match ... bench: 158 ns/iter (+/- 3) = 164 MB/s +test bench::anchored_literal_short_non_match ... bench: 84 ns/iter (+/- 2) = 309 MB/s +test bench::easy0_1K ... bench: 318 ns/iter (+/- 2) = 3220 MB/s +test bench::easy0_1MB ... bench: 257,205 ns/iter (+/- 2,448) = 4076 MB/s +test bench::easy0_32 ... bench: 82 ns/iter (+/- 1) = 390 MB/s +test bench::easy0_32K ... bench: 8,666 ns/iter (+/- 104) = 3781 MB/s +test bench::easy1_1K ... bench: 293 ns/iter (+/- 2) = 3494 MB/s +test bench::easy1_1MB ... bench: 329,774 ns/iter (+/- 6,296) = 3179 MB/s +test bench::easy1_32 ... bench: 77 ns/iter (+/- 0) = 415 MB/s +test bench::easy1_32K ... bench: 8,856 ns/iter (+/- 93) = 3700 MB/s +test bench::hard_1K ... bench: 31,888 ns/iter (+/- 83) = 32 MB/s +test bench::hard_1MB ... bench: 58,435,108 ns/iter (+/- 64,537) = 17 MB/s +test bench::hard_32 ... bench: 1,048 ns/iter (+/- 12) = 30 MB/s +test bench::hard_32K ... bench: 1,033,930 ns/iter (+/- 4,224) = 31 MB/s +test bench::literal ... bench: 20 ns/iter (+/- 0) = 2550 MB/s +test bench::match_class ... bench: 84 ns/iter (+/- 0) = 964 MB/s +test bench::match_class_in_range ... bench: 33 ns/iter (+/- 0) = 2454 MB/s +test bench::match_class_unicode ... bench: 2,218 ns/iter (+/- 8) = 72 MB/s +test bench::medium_1K ... bench: 1,368 ns/iter (+/- 9) = 748 MB/s +test bench::medium_1MB ... bench: 2,034,481 ns/iter (+/- 3,608) = 515 MB/s +test bench::medium_32 ... bench: 141 ns/iter (+/- 0) = 226 MB/s +test bench::medium_32K ... bench: 59,949 ns/iter (+/- 421) = 546 MB/s +test bench::no_exponential ... bench: 336,653 ns/iter (+/- 1,757) +test bench::not_literal ... bench: 1,247 ns/iter (+/- 5) = 40 MB/s +test bench::one_pass_long_prefix ... bench: 264 ns/iter (+/- 2) = 98 MB/s +test bench::one_pass_long_prefix_not ... bench: 267 ns/iter (+/- 1) = 97 MB/s +test bench::one_pass_short ... bench: 768 ns/iter (+/- 5) = 22 MB/s +test bench::one_pass_short_not ... bench: 797 ns/iter (+/- 20) = 21 MB/s +test bench::replace_all ... bench: 149 ns/iter (+/- 0) +test bench_dynamic_compile::compile_huge ... bench: 161,349 ns/iter (+/- 1,462) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,050,519 ns/iter (+/- 105,846) +test bench_dynamic_compile::compile_simple ... bench: 6,664 ns/iter (+/- 390) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,035 ns/iter (+/- 370) +test bench_dynamic_compile::compile_small ... bench: 8,914 ns/iter (+/- 347) +test bench_dynamic_compile::compile_small_bytes ... bench: 186,970 ns/iter (+/- 2,134) +test bench_dynamic_parse::parse_huge ... bench: 1,238 ns/iter (+/- 11) +test bench_dynamic_parse::parse_simple ... bench: 2,005 ns/iter (+/- 19) +test bench_dynamic_parse::parse_small ... bench: 2,494 ns/iter (+/- 11) +test bench_sherlock::before_holmes ... bench: 42,005,594 ns/iter (+/- 57,752) = 14 MB/s +test bench_sherlock::everything_greedy ... bench: 38,431,063 ns/iter (+/- 28,840) = 15 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 32,003,966 ns/iter (+/- 50,270) = 18 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 1,457,068 ns/iter (+/- 3,202) = 408 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 136,035,549 ns/iter (+/- 75,381) = 4 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 33,024,291 ns/iter (+/- 67,902) = 18 MB/s +test bench_sherlock::name_alt1 ... bench: 157,989 ns/iter (+/- 917) = 3765 MB/s +test bench_sherlock::name_alt2 ... bench: 545,254 ns/iter (+/- 1,908) = 1091 MB/s +test bench_sherlock::name_alt3 ... bench: 2,245,964 ns/iter (+/- 2,478) = 264 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 4,792,290 ns/iter (+/- 31,760) = 124 MB/s +test bench_sherlock::name_alt4 ... bench: 584,204 ns/iter (+/- 2,084) = 1018 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 2,318,020 ns/iter (+/- 8,493) = 256 MB/s +test bench_sherlock::name_holmes ... bench: 51,880 ns/iter (+/- 299) = 11467 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,414,500 ns/iter (+/- 2,497) = 420 MB/s +test bench_sherlock::name_sherlock ... bench: 34,294 ns/iter (+/- 349) = 17348 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 34,531 ns/iter (+/- 199) = 17228 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,692,651 ns/iter (+/- 8,846) = 351 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,657,413 ns/iter (+/- 5,534) = 358 MB/s +test bench_sherlock::name_whitespace ... bench: 131,372 ns/iter (+/- 605) = 4528 MB/s +test bench_sherlock::no_match_common ... bench: 567,065 ns/iter (+/- 2,763) = 1049 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,782 ns/iter (+/- 85) = 25016 MB/s +test bench_sherlock::quotes ... bench: 11,251,366 ns/iter (+/- 24,960) = 52 MB/s +test bench_sherlock::the_lower ... bench: 789,781 ns/iter (+/- 2,072) = 753 MB/s +test bench_sherlock::the_nocase ... bench: 1,807,509 ns/iter (+/- 4,685) = 329 MB/s +test bench_sherlock::the_upper ... bench: 53,542 ns/iter (+/- 198) = 11111 MB/s +test bench_sherlock::the_whitespace ... bench: 5,410,444 ns/iter (+/- 14,766) = 109 MB/s +test bench_sherlock::word_ending_n ... bench: 56,017,874 ns/iter (+/- 60,047) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/native b/vendor/regex/record/old-bench-log/01-lazy-dfa/native new file mode 100644 index 0000000..61fc08d --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/native @@ -0,0 +1,65 @@ + Compiling regex_macros v0.1.28 (file:///home/andrew/data/projects/rust/regex/regex_macros) + Running regex_macros/target/release/native-f2ffefeeda527264 + +running 58 tests +test bench::anchored_literal_long_match ... bench: 189 ns/iter (+/- 16) = 2063 MB/s +test bench::anchored_literal_long_non_match ... bench: 47 ns/iter (+/- 1) = 8297 MB/s +test bench::anchored_literal_short_match ... bench: 177 ns/iter (+/- 5) = 146 MB/s +test bench::anchored_literal_short_non_match ... bench: 46 ns/iter (+/- 1) = 565 MB/s +test bench::easy0_1K ... bench: 26,578 ns/iter (+/- 1,140) = 38 MB/s +test bench::easy0_1MB ... bench: 27,229,730 ns/iter (+/- 261,126) = 38 MB/s +test bench::easy0_32 ... bench: 867 ns/iter (+/- 45) = 36 MB/s +test bench::easy0_32K ... bench: 847,113 ns/iter (+/- 276,910) = 38 MB/s +test bench::easy1_1K ... bench: 23,525 ns/iter (+/- 278) = 43 MB/s +test bench::easy1_1MB ... bench: 24,075,047 ns/iter (+/- 40,396) = 43 MB/s +test bench::easy1_32 ... bench: 767 ns/iter (+/- 14) = 41 MB/s +test bench::easy1_32K ... bench: 752,730 ns/iter (+/- 9,284) = 43 MB/s +test bench::hard_1K ... bench: 44,053 ns/iter (+/- 513) = 23 MB/s +test bench::hard_1MB ... bench: 44,982,170 ns/iter (+/- 76,683) = 23 MB/s +test bench::hard_32 ... bench: 1,418 ns/iter (+/- 26) = 22 MB/s +test bench::hard_32K ... bench: 1,407,013 ns/iter (+/- 13,426) = 23 MB/s +test bench::literal ... bench: 1,202 ns/iter (+/- 16) = 42 MB/s +test bench::match_class ... bench: 2,057 ns/iter (+/- 29) = 39 MB/s +test bench::match_class_in_range ... bench: 2,060 ns/iter (+/- 34) = 39 MB/s +test bench::match_class_unicode ... bench: 12,945 ns/iter (+/- 156) = 12 MB/s +test bench::medium_1K ... bench: 27,874 ns/iter (+/- 315) = 36 MB/s +test bench::medium_1MB ... bench: 28,614,500 ns/iter (+/- 544,256) = 36 MB/s +test bench::medium_32 ... bench: 896 ns/iter (+/- 85) = 35 MB/s +test bench::medium_32K ... bench: 892,349 ns/iter (+/- 35,511) = 36 MB/s +test bench::no_exponential ... bench: 319,270 ns/iter (+/- 19,837) +test bench::not_literal ... bench: 1,477 ns/iter (+/- 104) = 34 MB/s +test bench::one_pass_long_prefix ... bench: 653 ns/iter (+/- 10) = 39 MB/s +test bench::one_pass_long_prefix_not ... bench: 651 ns/iter (+/- 6) = 39 MB/s +test bench::one_pass_short ... bench: 1,016 ns/iter (+/- 24) = 16 MB/s +test bench::one_pass_short_not ... bench: 1,588 ns/iter (+/- 28) = 10 MB/s +test bench::replace_all ... bench: 1,078 ns/iter (+/- 55) +test bench_sherlock::before_holmes ... bench: 54,264,124 ns/iter (+/- 564,692) = 10 MB/s +test bench_sherlock::everything_greedy ... bench: 22,724,158 ns/iter (+/- 44,361) = 26 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 22,168,804 ns/iter (+/- 66,296) = 26 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 24,791,824 ns/iter (+/- 37,522) = 23 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 885,999,793 ns/iter (+/- 39,704,278) +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 25,113,805 ns/iter (+/- 672,050) = 23 MB/s +test bench_sherlock::name_alt1 ... bench: 23,382,716 ns/iter (+/- 3,696,517) = 25 MB/s +test bench_sherlock::name_alt2 ... bench: 23,585,220 ns/iter (+/- 3,724,922) = 25 MB/s +test bench_sherlock::name_alt3 ... bench: 80,283,635 ns/iter (+/- 3,165,029) = 7 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 77,357,394 ns/iter (+/- 268,133) = 7 MB/s +test bench_sherlock::name_alt4 ... bench: 22,736,520 ns/iter (+/- 43,231) = 26 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 26,921,524 ns/iter (+/- 140,162) = 22 MB/s +test bench_sherlock::name_holmes ... bench: 15,145,735 ns/iter (+/- 65,980) = 39 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 16,285,042 ns/iter (+/- 71,956) = 36 MB/s +test bench_sherlock::name_sherlock ... bench: 16,189,653 ns/iter (+/- 99,929) = 36 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 14,975,742 ns/iter (+/- 118,052) = 39 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 16,904,928 ns/iter (+/- 201,104) = 35 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 16,335,907 ns/iter (+/- 118,725) = 36 MB/s +test bench_sherlock::name_whitespace ... bench: 14,837,905 ns/iter (+/- 52,201) = 40 MB/s +test bench_sherlock::no_match_common ... bench: 16,036,625 ns/iter (+/- 108,268) = 37 MB/s +test bench_sherlock::no_match_uncommon ... bench: 15,278,356 ns/iter (+/- 81,123) = 38 MB/s +test bench_sherlock::quotes ... bench: 21,580,801 ns/iter (+/- 198,772) = 27 MB/s +test bench_sherlock::the_lower ... bench: 16,059,120 ns/iter (+/- 160,640) = 37 MB/s +test bench_sherlock::the_nocase ... bench: 17,376,836 ns/iter (+/- 103,371) = 34 MB/s +test bench_sherlock::the_upper ... bench: 15,259,087 ns/iter (+/- 93,807) = 38 MB/s +test bench_sherlock::the_whitespace ... bench: 18,835,951 ns/iter (+/- 160,674) = 31 MB/s +test bench_sherlock::word_ending_n ... bench: 59,832,390 ns/iter (+/- 4,478,911) = 9 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 58 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/nfa b/vendor/regex/record/old-bench-log/01-lazy-dfa/nfa new file mode 100644 index 0000000..994137b --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/nfa @@ -0,0 +1,74 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) + Running target/release/dynamic_nfa-1e40ce11bcb7c666 + +running 67 tests +test bench::anchored_literal_long_match ... bench: 306 ns/iter (+/- 6) = 1274 MB/s +test bench::anchored_literal_long_non_match ... bench: 95 ns/iter (+/- 1) = 4105 MB/s +test bench::anchored_literal_short_match ... bench: 315 ns/iter (+/- 2) = 82 MB/s +test bench::anchored_literal_short_non_match ... bench: 96 ns/iter (+/- 2) = 270 MB/s +test bench::easy0_1K ... bench: 206 ns/iter (+/- 1) = 4970 MB/s +test bench::easy0_1MB ... bench: 255,834 ns/iter (+/- 1,273) = 4098 MB/s +test bench::easy0_32 ... bench: 72 ns/iter (+/- 2) = 444 MB/s +test bench::easy0_32K ... bench: 5,315 ns/iter (+/- 25) = 6165 MB/s +test bench::easy1_1K ... bench: 274 ns/iter (+/- 0) = 3737 MB/s +test bench::easy1_1MB ... bench: 337,047 ns/iter (+/- 1,972) = 3111 MB/s +test bench::easy1_32 ... bench: 76 ns/iter (+/- 2) = 421 MB/s +test bench::easy1_32K ... bench: 6,111 ns/iter (+/- 39) = 5362 MB/s +test bench::hard_1K ... bench: 59,596 ns/iter (+/- 264) = 17 MB/s +test bench::hard_1MB ... bench: 58,947,188 ns/iter (+/- 205,874) = 17 MB/s +test bench::hard_32 ... bench: 1,978 ns/iter (+/- 22) = 16 MB/s +test bench::hard_32K ... bench: 1,846,347 ns/iter (+/- 14,253) = 17 MB/s +test bench::literal ... bench: 172 ns/iter (+/- 1) = 296 MB/s +test bench::match_class ... bench: 240 ns/iter (+/- 1) = 337 MB/s +test bench::match_class_in_range ... bench: 190 ns/iter (+/- 2) = 426 MB/s +test bench::match_class_unicode ... bench: 4,145 ns/iter (+/- 24) = 38 MB/s +test bench::medium_1K ... bench: 1,195 ns/iter (+/- 8) = 856 MB/s +test bench::medium_1MB ... bench: 2,028,649 ns/iter (+/- 11,235) = 516 MB/s +test bench::medium_32 ... bench: 84 ns/iter (+/- 0) = 380 MB/s +test bench::medium_32K ... bench: 56,134 ns/iter (+/- 369) = 583 MB/s +test bench::no_exponential ... bench: 536 ns/iter (+/- 4) = 186 MB/s +test bench::not_literal ... bench: 2,428 ns/iter (+/- 31) = 21 MB/s +test bench::one_pass_long_prefix ... bench: 756 ns/iter (+/- 2) = 34 MB/s +test bench::one_pass_long_prefix_not ... bench: 756 ns/iter (+/- 12) = 34 MB/s +test bench::one_pass_short ... bench: 1,813 ns/iter (+/- 5) = 9 MB/s +test bench::one_pass_short_not ... bench: 2,588 ns/iter (+/- 8) = 6 MB/s +test bench::replace_all ... bench: 905 ns/iter (+/- 7) +test bench_dynamic_compile::compile_huge ... bench: 161,517 ns/iter (+/- 1,287) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,395,715 ns/iter (+/- 98,986) +test bench_dynamic_compile::compile_simple ... bench: 6,623 ns/iter (+/- 296) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,047 ns/iter (+/- 232) +test bench_dynamic_compile::compile_small ... bench: 8,948 ns/iter (+/- 526) +test bench_dynamic_compile::compile_small_bytes ... bench: 186,796 ns/iter (+/- 817) +test bench_dynamic_parse::parse_huge ... bench: 1,238 ns/iter (+/- 6) +test bench_dynamic_parse::parse_simple ... bench: 1,977 ns/iter (+/- 12) +test bench_dynamic_parse::parse_small ... bench: 2,502 ns/iter (+/- 18) +test bench_sherlock::before_holmes ... bench: 45,045,123 ns/iter (+/- 261,188) = 13 MB/s +test bench_sherlock::everything_greedy ... bench: 38,685,654 ns/iter (+/- 107,136) = 15 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 36,407,787 ns/iter (+/- 160,253) = 16 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 1,417,371 ns/iter (+/- 6,533) = 419 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 139,298,695 ns/iter (+/- 154,012) = 4 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 32,734,005 ns/iter (+/- 98,729) = 18 MB/s +test bench_sherlock::name_alt1 ... bench: 153,016 ns/iter (+/- 739) = 3888 MB/s +test bench_sherlock::name_alt2 ... bench: 534,038 ns/iter (+/- 1,909) = 1114 MB/s +test bench_sherlock::name_alt3 ... bench: 2,220,778 ns/iter (+/- 6,374) = 267 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 4,744,134 ns/iter (+/- 11,703) = 125 MB/s +test bench_sherlock::name_alt4 ... bench: 569,971 ns/iter (+/- 2,256) = 1043 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 2,324,966 ns/iter (+/- 3,082) = 255 MB/s +test bench_sherlock::name_holmes ... bench: 268,146 ns/iter (+/- 1,238) = 2218 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,409,583 ns/iter (+/- 2,808) = 422 MB/s +test bench_sherlock::name_sherlock ... bench: 95,280 ns/iter (+/- 316) = 6244 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 116,097 ns/iter (+/- 461) = 5124 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,691,210 ns/iter (+/- 3,712) = 351 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,651,722 ns/iter (+/- 7,070) = 360 MB/s +test bench_sherlock::name_whitespace ... bench: 130,960 ns/iter (+/- 923) = 4542 MB/s +test bench_sherlock::no_match_common ... bench: 568,008 ns/iter (+/- 1,723) = 1047 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,669 ns/iter (+/- 84) = 25135 MB/s +test bench_sherlock::quotes ... bench: 11,055,260 ns/iter (+/- 24,883) = 53 MB/s +test bench_sherlock::the_lower ... bench: 2,934,498 ns/iter (+/- 4,553) = 202 MB/s +test bench_sherlock::the_nocase ... bench: 4,268,193 ns/iter (+/- 8,164) = 139 MB/s +test bench_sherlock::the_upper ... bench: 272,832 ns/iter (+/- 1,436) = 2180 MB/s +test bench_sherlock::the_whitespace ... bench: 5,409,934 ns/iter (+/- 7,678) = 109 MB/s +test bench_sherlock::word_ending_n ... bench: 55,252,656 ns/iter (+/- 68,442) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/pcre b/vendor/regex/record/old-bench-log/01-lazy-dfa/pcre new file mode 100644 index 0000000..22a66e6 --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/pcre @@ -0,0 +1,60 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) + Running target/release/pcre-781840b9a3e9c199 + +running 53 tests +test anchored_literal_long_match ... bench: 90 ns/iter (+/- 7) = 4333 MB/s +test anchored_literal_long_non_match ... bench: 60 ns/iter (+/- 2) = 6500 MB/s +test anchored_literal_short_match ... bench: 87 ns/iter (+/- 6) = 298 MB/s +test anchored_literal_short_non_match ... bench: 58 ns/iter (+/- 4) = 448 MB/s +test easy0_1K ... bench: 258 ns/iter (+/- 14) = 3968 MB/s +test easy0_1MB ... bench: 226,139 ns/iter (+/- 1,637) = 4636 MB/s +test easy0_32 ... bench: 60 ns/iter (+/- 7) = 533 MB/s +test easy0_32K ... bench: 7,028 ns/iter (+/- 120) = 4662 MB/s +test easy1_1K ... bench: 794 ns/iter (+/- 20) = 1289 MB/s +test easy1_1MB ... bench: 751,438 ns/iter (+/- 11,372) = 1395 MB/s +test easy1_32 ... bench: 71 ns/iter (+/- 3) = 450 MB/s +test easy1_32K ... bench: 23,042 ns/iter (+/- 1,453) = 1422 MB/s +test hard_1K ... bench: 30,841 ns/iter (+/- 1,287) = 33 MB/s +test hard_1MB ... bench: 35,239,100 ns/iter (+/- 632,179) = 29 MB/s +test hard_32 ... bench: 86 ns/iter (+/- 11) = 372 MB/s +test hard_32K ... bench: 993,011 ns/iter (+/- 63,648) = 32 MB/s +test literal ... bench: 130 ns/iter (+/- 11) = 392 MB/s +test match_class ... bench: 183 ns/iter (+/- 33) = 442 MB/s +test match_class_in_range ... bench: 175 ns/iter (+/- 18) = 462 MB/s +test match_class_unicode ... bench: 513 ns/iter (+/- 8) = 313 MB/s +test medium_1K ... bench: 278 ns/iter (+/- 6) = 3683 MB/s +test medium_1MB ... bench: 240,699 ns/iter (+/- 17,344) = 4356 MB/s +test medium_32 ... bench: 61 ns/iter (+/- 13) = 524 MB/s +test medium_32K ... bench: 7,369 ns/iter (+/- 105) = 4446 MB/s +test not_literal ... bench: 274 ns/iter (+/- 17) = 186 MB/s +test one_pass_long_prefix ... bench: 87 ns/iter (+/- 19) = 298 MB/s +test one_pass_long_prefix_not ... bench: 86 ns/iter (+/- 13) = 302 MB/s +test one_pass_short ... bench: 117 ns/iter (+/- 44) = 145 MB/s +test one_pass_short_not ... bench: 122 ns/iter (+/- 6) = 139 MB/s +test sherlock::before_holmes ... bench: 14,450,308 ns/iter (+/- 617,786) = 41 MB/s +test sherlock::holmes_cochar_watson ... bench: 546,919 ns/iter (+/- 4,880) = 1087 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 194,524 ns/iter (+/- 6,230) = 3058 MB/s +test sherlock::name_alt1 ... bench: 457,899 ns/iter (+/- 7,781) = 1299 MB/s +test sherlock::name_alt2 ... bench: 496,659 ns/iter (+/- 6,529) = 1197 MB/s +test sherlock::name_alt3 ... bench: 983,620 ns/iter (+/- 45,359) = 604 MB/s +test sherlock::name_alt3_nocase ... bench: 3,500,367 ns/iter (+/- 79,807) = 169 MB/s +test sherlock::name_alt4 ... bench: 972,128 ns/iter (+/- 22,195) = 611 MB/s +test sherlock::name_alt4_nocase ... bench: 1,877,017 ns/iter (+/- 39,079) = 316 MB/s +test sherlock::name_holmes ... bench: 398,258 ns/iter (+/- 4,338) = 1493 MB/s +test sherlock::name_holmes_nocase ... bench: 492,292 ns/iter (+/- 4,667) = 1208 MB/s +test sherlock::name_sherlock ... bench: 268,891 ns/iter (+/- 18,063) = 2212 MB/s +test sherlock::name_sherlock_holmes ... bench: 197,067 ns/iter (+/- 8,027) = 3018 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,112,501 ns/iter (+/- 44,457) = 534 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,332,423 ns/iter (+/- 39,227) = 446 MB/s +test sherlock::name_whitespace ... bench: 267,257 ns/iter (+/- 964) = 2226 MB/s +test sherlock::no_match_common ... bench: 595,211 ns/iter (+/- 3,739) = 999 MB/s +test sherlock::no_match_uncommon ... bench: 584,057 ns/iter (+/- 6,825) = 1018 MB/s +test sherlock::quotes ... bench: 1,208,235 ns/iter (+/- 37,629) = 492 MB/s +test sherlock::the_lower ... bench: 1,210,851 ns/iter (+/- 35,900) = 491 MB/s +test sherlock::the_nocase ... bench: 1,286,611 ns/iter (+/- 35,689) = 462 MB/s +test sherlock::the_upper ... bench: 776,113 ns/iter (+/- 6,236) = 766 MB/s +test sherlock::the_whitespace ... bench: 1,368,468 ns/iter (+/- 135,282) = 434 MB/s +test sherlock::word_ending_n ... bench: 12,018,618 ns/iter (+/- 266,497) = 49 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 53 measured + diff --git a/vendor/regex/record/old-bench-log/02-set/dynamic b/vendor/regex/record/old-bench-log/02-set/dynamic new file mode 100644 index 0000000..69c9f71 --- /dev/null +++ b/vendor/regex/record/old-bench-log/02-set/dynamic @@ -0,0 +1,78 @@ + Compiling regex v0.1.52 (file:///home/andrew/data/projects/rust/regex) + Running target/release/dynamic-a76738dddf3bdc6b + +running 71 tests +test misc::anchored_literal_long_match ... bench: 74 ns/iter (+/- 8) = 5270 MB/s +test misc::anchored_literal_long_non_match ... bench: 58 ns/iter (+/- 0) = 6724 MB/s +test misc::anchored_literal_short_match ... bench: 73 ns/iter (+/- 0) = 356 MB/s +test misc::anchored_literal_short_non_match ... bench: 58 ns/iter (+/- 0) = 448 MB/s +test misc::easy0_1K ... bench: 214 ns/iter (+/- 2) = 4785 MB/s +test misc::easy0_1MB ... bench: 247,056 ns/iter (+/- 1,777) = 4244 MB/s +test misc::easy0_32 ... bench: 64 ns/iter (+/- 0) = 500 MB/s +test misc::easy0_32K ... bench: 5,281 ns/iter (+/- 29) = 6204 MB/s +test misc::easy1_1K ... bench: 278 ns/iter (+/- 5) = 3683 MB/s +test misc::easy1_1MB ... bench: 320,041 ns/iter (+/- 4,243) = 3276 MB/s +test misc::easy1_32 ... bench: 65 ns/iter (+/- 0) = 492 MB/s +test misc::easy1_32K ... bench: 5,885 ns/iter (+/- 83) = 5568 MB/s +test misc::hard_1K ... bench: 4,685 ns/iter (+/- 20) = 218 MB/s +test misc::hard_1MB ... bench: 4,745,020 ns/iter (+/- 19,440) = 220 MB/s +test misc::hard_32 ... bench: 197 ns/iter (+/- 1) = 162 MB/s +test misc::hard_32K ... bench: 147,409 ns/iter (+/- 656) = 222 MB/s +test misc::literal ... bench: 20 ns/iter (+/- 1) = 2550 MB/s +test misc::match_class ... bench: 86 ns/iter (+/- 3) = 941 MB/s +test misc::match_class_in_range ... bench: 32 ns/iter (+/- 2) = 2531 MB/s +test misc::match_class_unicode ... bench: 801 ns/iter (+/- 36) = 200 MB/s +test misc::medium_1K ... bench: 1,213 ns/iter (+/- 237) = 844 MB/s +test misc::medium_1MB ... bench: 1,991,418 ns/iter (+/- 239,612) = 526 MB/s +test misc::medium_32 ... bench: 100 ns/iter (+/- 8) = 320 MB/s +test misc::medium_32K ... bench: 57,080 ns/iter (+/- 709) = 574 MB/s +test misc::no_exponential ... bench: 522 ns/iter (+/- 17) = 191 MB/s +test misc::not_literal ... bench: 290 ns/iter (+/- 6) = 175 MB/s +test misc::one_pass_long_prefix ... bench: 176 ns/iter (+/- 15) = 147 MB/s +test misc::one_pass_long_prefix_not ... bench: 183 ns/iter (+/- 28) = 142 MB/s +test misc::one_pass_short ... bench: 136 ns/iter (+/- 8) = 125 MB/s +test misc::one_pass_short_not ... bench: 135 ns/iter (+/- 14) = 125 MB/s +test misc::replace_all ... bench: 149 ns/iter (+/- 34) +test rust_compile::compile_huge ... bench: 158,759 ns/iter (+/- 4,546) +test rust_compile::compile_huge_bytes ... bench: 17,538,290 ns/iter (+/- 1,735,383) +test rust_compile::compile_simple ... bench: 5,935 ns/iter (+/- 429) +test rust_compile::compile_simple_bytes ... bench: 6,682 ns/iter (+/- 293) +test rust_compile::compile_small ... bench: 7,664 ns/iter (+/- 473) +test rust_compile::compile_small_bytes ... bench: 175,272 ns/iter (+/- 4,492) +test rust_parse::parse_huge ... bench: 1,199 ns/iter (+/- 38) +test rust_parse::parse_simple ... bench: 1,849 ns/iter (+/- 28) +test rust_parse::parse_small ... bench: 2,470 ns/iter (+/- 35) +test sherlock::before_holmes ... bench: 2,750,028 ns/iter (+/- 21,847) = 216 MB/s +test sherlock::everything_greedy ... bench: 7,896,337 ns/iter (+/- 68,883) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,498,247 ns/iter (+/- 65,952) = 108 MB/s +test sherlock::holmes_cochar_watson ... bench: 260,499 ns/iter (+/- 4,984) = 2283 MB/s +test sherlock::holmes_coword_watson ... bench: 1,331,443 ns/iter (+/- 34,716) = 446 MB/s +test sherlock::letters ... bench: 60,985,848 ns/iter (+/- 592,838) = 9 MB/s +test sherlock::letters_lower ... bench: 59,041,695 ns/iter (+/- 186,034) = 10 MB/s +test sherlock::letters_upper ... bench: 4,714,214 ns/iter (+/- 35,672) = 126 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,730,524 ns/iter (+/- 69,565) = 217 MB/s +test sherlock::name_alt1 ... bench: 41,866 ns/iter (+/- 682) = 14210 MB/s +test sherlock::name_alt2 ... bench: 194,322 ns/iter (+/- 6,628) = 3061 MB/s +test sherlock::name_alt3 ... bench: 1,252,965 ns/iter (+/- 18,828) = 474 MB/s +test sherlock::name_alt3_nocase ... bench: 1,476,169 ns/iter (+/- 14,557) = 403 MB/s +test sherlock::name_alt4 ... bench: 298,639 ns/iter (+/- 3,905) = 1992 MB/s +test sherlock::name_alt4_nocase ... bench: 1,426,191 ns/iter (+/- 23,584) = 417 MB/s +test sherlock::name_holmes ... bench: 49,719 ns/iter (+/- 811) = 11965 MB/s +test sherlock::name_holmes_nocase ... bench: 1,191,400 ns/iter (+/- 19,175) = 499 MB/s +test sherlock::name_sherlock ... bench: 34,091 ns/iter (+/- 877) = 17451 MB/s +test sherlock::name_sherlock_holmes ... bench: 33,785 ns/iter (+/- 1,207) = 17609 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,235,442 ns/iter (+/- 18,023) = 481 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,236,252 ns/iter (+/- 26,934) = 481 MB/s +test sherlock::name_whitespace ... bench: 60,200 ns/iter (+/- 1,873) = 9882 MB/s +test sherlock::no_match_common ... bench: 559,886 ns/iter (+/- 20,306) = 1062 MB/s +test sherlock::no_match_uncommon ... bench: 23,631 ns/iter (+/- 497) = 25175 MB/s +test sherlock::quotes ... bench: 967,379 ns/iter (+/- 12,856) = 614 MB/s +test sherlock::the_lower ... bench: 766,950 ns/iter (+/- 21,944) = 775 MB/s +test sherlock::the_nocase ... bench: 1,706,539 ns/iter (+/- 26,003) = 348 MB/s +test sherlock::the_upper ... bench: 52,529 ns/iter (+/- 1,208) = 11325 MB/s +test sherlock::the_whitespace ... bench: 2,012,952 ns/iter (+/- 26,968) = 295 MB/s +test sherlock::word_ending_n ... bench: 55,578,841 ns/iter (+/- 537,463) = 10 MB/s +test sherlock::words ... bench: 19,103,327 ns/iter (+/- 102,828) = 31 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 71 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/onig b/vendor/regex/record/old-bench-log/03-bytes/onig new file mode 100644 index 0000000..aaf666b --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/onig @@ -0,0 +1,68 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/onig-e3bc363aa56fb408 + +running 61 tests +test misc::anchored_literal_long_match ... bench: 70 ns/iter (+/- 1) = 5571 MB/s +test misc::anchored_literal_long_non_match ... bench: 424 ns/iter (+/- 4) = 919 MB/s +test misc::anchored_literal_short_match ... bench: 70 ns/iter (+/- 1) = 371 MB/s +test misc::anchored_literal_short_non_match ... bench: 38 ns/iter (+/- 0) = 684 MB/s +test misc::easy0_1K ... bench: 176 ns/iter (+/- 2) = 5818 MB/s +test misc::easy0_1MB ... bench: 163,547 ns/iter (+/- 1,451) = 6411 MB/s +test misc::easy0_32 ... bench: 20 ns/iter (+/- 1) = 1600 MB/s +test misc::easy0_32K ... bench: 5,056 ns/iter (+/- 64) = 6481 MB/s +test misc::easy1_1K ... bench: 4,103 ns/iter (+/- 11) = 249 MB/s +test misc::easy1_1MB ... bench: 4,198,406 ns/iter (+/- 62,171) = 249 MB/s +test misc::easy1_32 ... bench: 139 ns/iter (+/- 1) = 230 MB/s +test misc::easy1_32K ... bench: 131,083 ns/iter (+/- 1,310) = 249 MB/s +test misc::hard_1K ... bench: 163 ns/iter (+/- 3) = 6282 MB/s +test misc::hard_1MB ... bench: 163,910 ns/iter (+/- 2,368) = 6397 MB/s +test misc::hard_32 ... bench: 20 ns/iter (+/- 1) = 1600 MB/s +test misc::hard_32K ... bench: 5,002 ns/iter (+/- 306) = 6550 MB/s +test misc::literal ... bench: 226 ns/iter (+/- 0) = 225 MB/s +test misc::match_class ... bench: 337 ns/iter (+/- 2) = 240 MB/s +test misc::match_class_in_range ... bench: 337 ns/iter (+/- 1) = 240 MB/s +test misc::match_class_unicode ... bench: 2,004 ns/iter (+/- 26) = 80 MB/s +test misc::medium_1K ... bench: 191 ns/iter (+/- 2) = 5361 MB/s +test misc::medium_1MB ... bench: 164,027 ns/iter (+/- 2,494) = 6392 MB/s +test misc::medium_32 ... bench: 22 ns/iter (+/- 1) = 1454 MB/s +test misc::medium_32K ... bench: 4,962 ns/iter (+/- 60) = 6603 MB/s +test misc::not_literal ... bench: 359 ns/iter (+/- 5) = 142 MB/s +test misc::one_pass_long_prefix ... bench: 94 ns/iter (+/- 3) = 276 MB/s +test misc::one_pass_long_prefix_not ... bench: 101 ns/iter (+/- 1) = 257 MB/s +test misc::one_pass_short ... bench: 332 ns/iter (+/- 6) = 51 MB/s +test misc::one_pass_short_not ... bench: 318 ns/iter (+/- 4) = 53 MB/s +test sherlock::before_holmes ... bench: 70,859,542 ns/iter (+/- 594,306) = 8 MB/s +test sherlock::everything_greedy ... bench: 5,129,894 ns/iter (+/- 33,792) = 115 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,388,047 ns/iter (+/- 19,666) = 249 MB/s +test sherlock::ing_suffix ... bench: 28,413,935 ns/iter (+/- 800,513) = 20 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,636,327 ns/iter (+/- 66,410) = 225 MB/s +test sherlock::letters ... bench: 26,471,724 ns/iter (+/- 872,994) = 22 MB/s +test sherlock::letters_lower ... bench: 26,124,489 ns/iter (+/- 556,750) = 22 MB/s +test sherlock::letters_upper ... bench: 11,268,144 ns/iter (+/- 338,510) = 52 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 195,797 ns/iter (+/- 1,621) = 3038 MB/s +test sherlock::name_alt1 ... bench: 2,100,763 ns/iter (+/- 16,823) = 283 MB/s +test sherlock::name_alt2 ... bench: 2,212,816 ns/iter (+/- 17,997) = 268 MB/s +test sherlock::name_alt3 ... bench: 3,031,567 ns/iter (+/- 35,631) = 196 MB/s +test sherlock::name_alt3_nocase ... bench: 39,737,911 ns/iter (+/- 166,863) = 14 MB/s +test sherlock::name_alt4 ... bench: 2,230,681 ns/iter (+/- 18,856) = 266 MB/s +test sherlock::name_alt4_nocase ... bench: 8,294,698 ns/iter (+/- 36,887) = 71 MB/s +test sherlock::name_holmes ... bench: 402,600 ns/iter (+/- 6,232) = 1477 MB/s +test sherlock::name_holmes_nocase ... bench: 4,074,155 ns/iter (+/- 23,317) = 146 MB/s +test sherlock::name_sherlock ... bench: 270,225 ns/iter (+/- 2,815) = 2201 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,502 ns/iter (+/- 2,168) = 3027 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 4,397,347 ns/iter (+/- 28,567) = 135 MB/s +test sherlock::name_sherlock_nocase ... bench: 4,400,574 ns/iter (+/- 25,127) = 135 MB/s +test sherlock::name_whitespace ... bench: 274,462 ns/iter (+/- 3,180) = 2167 MB/s +test sherlock::no_match_common ... bench: 596,601 ns/iter (+/- 9,285) = 997 MB/s +test sherlock::no_match_uncommon ... bench: 586,258 ns/iter (+/- 7,702) = 1014 MB/s +test sherlock::quotes ... bench: 4,069,570 ns/iter (+/- 20,372) = 146 MB/s +test sherlock::repeated_class_negation ... bench: 44,936,445 ns/iter (+/- 103,467) = 13 MB/s +test sherlock::the_lower ... bench: 1,300,513 ns/iter (+/- 12,884) = 457 MB/s +test sherlock::the_nocase ... bench: 5,141,237 ns/iter (+/- 25,487) = 115 MB/s +test sherlock::the_upper ... bench: 821,454 ns/iter (+/- 13,420) = 724 MB/s +test sherlock::the_whitespace ... bench: 2,009,530 ns/iter (+/- 14,082) = 296 MB/s +test sherlock::word_ending_n ... bench: 27,847,316 ns/iter (+/- 47,618) = 21 MB/s +test sherlock::words ... bench: 21,105,627 ns/iter (+/- 33,436) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 61 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/pcre b/vendor/regex/record/old-bench-log/03-bytes/pcre new file mode 100644 index 0000000..236613a --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/pcre @@ -0,0 +1,66 @@ + Running benches/target/release/pcre-855c18fb35cdf072 + +running 60 tests +test misc::anchored_literal_long_match ... bench: 88 ns/iter (+/- 12) = 4431 MB/s +test misc::anchored_literal_long_non_match ... bench: 58 ns/iter (+/- 1) = 6724 MB/s +test misc::anchored_literal_short_match ... bench: 88 ns/iter (+/- 1) = 295 MB/s +test misc::anchored_literal_short_non_match ... bench: 60 ns/iter (+/- 3) = 433 MB/s +test misc::easy0_1K ... bench: 266 ns/iter (+/- 1) = 3849 MB/s +test misc::easy0_1MB ... bench: 227,366 ns/iter (+/- 794) = 4611 MB/s +test misc::easy0_32 ... bench: 62 ns/iter (+/- 2) = 516 MB/s +test misc::easy0_32K ... bench: 7,061 ns/iter (+/- 109) = 4640 MB/s +test misc::easy1_1K ... bench: 805 ns/iter (+/- 10) = 1272 MB/s +test misc::easy1_1MB ... bench: 751,948 ns/iter (+/- 6,995) = 1394 MB/s +test misc::easy1_32 ... bench: 71 ns/iter (+/- 1) = 450 MB/s +test misc::easy1_32K ... bench: 23,635 ns/iter (+/- 213) = 1386 MB/s +test misc::hard_1K ... bench: 31,008 ns/iter (+/- 299) = 33 MB/s +test misc::hard_1MB ... bench: 35,078,241 ns/iter (+/- 94,197) = 29 MB/s +test misc::hard_32 ... bench: 313 ns/iter (+/- 1) = 102 MB/s +test misc::hard_32K ... bench: 995,958 ns/iter (+/- 10,945) = 32 MB/s +test misc::literal ... bench: 130 ns/iter (+/- 1) = 392 MB/s +test misc::match_class ... bench: 176 ns/iter (+/- 2) = 460 MB/s +test misc::match_class_in_range ... bench: 178 ns/iter (+/- 1) = 455 MB/s +test misc::match_class_unicode ... bench: 511 ns/iter (+/- 6) = 315 MB/s +test misc::medium_1K ... bench: 275 ns/iter (+/- 4) = 3723 MB/s +test misc::medium_1MB ... bench: 239,603 ns/iter (+/- 1,808) = 4376 MB/s +test misc::medium_32 ... bench: 62 ns/iter (+/- 1) = 516 MB/s +test misc::medium_32K ... bench: 7,385 ns/iter (+/- 43) = 4437 MB/s +test misc::not_literal ... bench: 274 ns/iter (+/- 3) = 186 MB/s +test misc::one_pass_long_prefix ... bench: 87 ns/iter (+/- 1) = 298 MB/s +test misc::one_pass_long_prefix_not ... bench: 88 ns/iter (+/- 0) = 295 MB/s +test misc::one_pass_short ... bench: 115 ns/iter (+/- 0) = 147 MB/s +test misc::one_pass_short_not ... bench: 118 ns/iter (+/- 0) = 144 MB/s +test sherlock::before_holmes ... bench: 14,338,348 ns/iter (+/- 23,734) = 41 MB/s +test sherlock::holmes_cochar_watson ... bench: 547,196 ns/iter (+/- 4,100) = 1087 MB/s +test sherlock::ing_suffix ... bench: 6,012,620 ns/iter (+/- 51,777) = 98 MB/s +test sherlock::ing_suffix_limited_space ... bench: 6,374,577 ns/iter (+/- 46,486) = 93 MB/s +test sherlock::letters ... bench: 28,575,184 ns/iter (+/- 65,051) = 20 MB/s +test sherlock::letters_lower ... bench: 25,819,606 ns/iter (+/- 180,823) = 23 MB/s +test sherlock::letters_upper ... bench: 3,227,381 ns/iter (+/- 11,443) = 184 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 193,512 ns/iter (+/- 1,316) = 3074 MB/s +test sherlock::name_alt1 ... bench: 454,510 ns/iter (+/- 2,721) = 1308 MB/s +test sherlock::name_alt2 ... bench: 499,453 ns/iter (+/- 4,692) = 1191 MB/s +test sherlock::name_alt3 ... bench: 1,085,732 ns/iter (+/- 6,841) = 547 MB/s +test sherlock::name_alt3_nocase ... bench: 3,194,995 ns/iter (+/- 12,655) = 186 MB/s +test sherlock::name_alt4 ... bench: 944,353 ns/iter (+/- 12,661) = 629 MB/s +test sherlock::name_alt4_nocase ... bench: 1,646,368 ns/iter (+/- 12,376) = 361 MB/s +test sherlock::name_holmes ... bench: 395,019 ns/iter (+/- 3,929) = 1506 MB/s +test sherlock::name_holmes_nocase ... bench: 493,327 ns/iter (+/- 7,213) = 1205 MB/s +test sherlock::name_sherlock ... bench: 266,400 ns/iter (+/- 1,591) = 2233 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,357 ns/iter (+/- 1,770) = 3029 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,259,747 ns/iter (+/- 4,939) = 472 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,128,970 ns/iter (+/- 6,730) = 526 MB/s +test sherlock::name_whitespace ... bench: 267,323 ns/iter (+/- 1,296) = 2225 MB/s +test sherlock::no_match_common ... bench: 595,372 ns/iter (+/- 5,690) = 999 MB/s +test sherlock::no_match_uncommon ... bench: 585,406 ns/iter (+/- 5,719) = 1016 MB/s +test sherlock::quotes ... bench: 1,223,528 ns/iter (+/- 6,579) = 486 MB/s +test sherlock::repeated_class_negation ... bench: 6,440,584 ns/iter (+/- 20,444) = 92 MB/s +test sherlock::the_lower ... bench: 1,220,999 ns/iter (+/- 7,595) = 487 MB/s +test sherlock::the_nocase ... bench: 1,263,078 ns/iter (+/- 15,321) = 471 MB/s +test sherlock::the_upper ... bench: 781,141 ns/iter (+/- 15,408) = 761 MB/s +test sherlock::the_whitespace ... bench: 1,383,414 ns/iter (+/- 548,289) = 430 MB/s +test sherlock::word_ending_n ... bench: 12,709,045 ns/iter (+/- 51,420) = 46 MB/s +test sherlock::words ... bench: 10,798,918 ns/iter (+/- 40,027) = 55 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 60 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/rust b/vendor/regex/record/old-bench-log/03-bytes/rust new file mode 100644 index 0000000..6dec097 --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/rust @@ -0,0 +1,83 @@ + Compiling regex-syntax v0.2.5 (file:///home/andrew/data/projects/rust/regex/benches) + Compiling regex v0.1.55 (file:///home/andrew/data/projects/rust/regex/benches) + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/rust-50db306d093e5666 + +running 74 tests +test misc::anchored_literal_long_match ... bench: 75 ns/iter (+/- 5) = 5200 MB/s +test misc::anchored_literal_long_non_match ... bench: 56 ns/iter (+/- 0) = 6964 MB/s +test misc::anchored_literal_short_match ... bench: 79 ns/iter (+/- 0) = 329 MB/s +test misc::anchored_literal_short_non_match ... bench: 56 ns/iter (+/- 1) = 464 MB/s +test misc::easy0_1K ... bench: 138 ns/iter (+/- 0) = 7420 MB/s +test misc::easy0_1MB ... bench: 247,159 ns/iter (+/- 724) = 4242 MB/s +test misc::easy0_32 ... bench: 71 ns/iter (+/- 0) = 450 MB/s +test misc::easy0_32K ... bench: 5,474 ns/iter (+/- 34) = 5986 MB/s +test misc::easy1_1K ... bench: 273 ns/iter (+/- 1) = 3750 MB/s +test misc::easy1_1MB ... bench: 317,946 ns/iter (+/- 2,512) = 3297 MB/s +test misc::easy1_32 ... bench: 67 ns/iter (+/- 0) = 477 MB/s +test misc::easy1_32K ... bench: 5,882 ns/iter (+/- 32) = 5570 MB/s +test misc::hard_1K ... bench: 4,713 ns/iter (+/- 13) = 217 MB/s +test misc::hard_1MB ... bench: 4,732,901 ns/iter (+/- 6,948) = 221 MB/s +test misc::hard_32 ... bench: 201 ns/iter (+/- 0) = 159 MB/s +test misc::hard_32K ... bench: 147,994 ns/iter (+/- 900) = 221 MB/s +test misc::literal ... bench: 19 ns/iter (+/- 0) = 2684 MB/s +test misc::match_class ... bench: 85 ns/iter (+/- 0) = 952 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 1) = 2700 MB/s +test misc::match_class_unicode ... bench: 806 ns/iter (+/- 2) = 199 MB/s +test misc::medium_1K ... bench: 1,384 ns/iter (+/- 10) = 739 MB/s +test misc::medium_1MB ... bench: 1,974,381 ns/iter (+/- 7,383) = 531 MB/s +test misc::medium_32 ... bench: 130 ns/iter (+/- 0) = 246 MB/s +test misc::medium_32K ... bench: 52,783 ns/iter (+/- 465) = 620 MB/s +test misc::no_exponential ... bench: 536 ns/iter (+/- 13) = 186 MB/s +test misc::not_literal ... bench: 293 ns/iter (+/- 1) = 174 MB/s +test misc::one_pass_long_prefix ... bench: 179 ns/iter (+/- 1) = 145 MB/s +test misc::one_pass_long_prefix_not ... bench: 180 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_short ... bench: 139 ns/iter (+/- 1) = 122 MB/s +test misc::one_pass_short_not ... bench: 142 ns/iter (+/- 1) = 119 MB/s +test misc::replace_all ... bench: 171 ns/iter (+/- 1) +test rust_compile::compile_huge ... bench: 126,158 ns/iter (+/- 1,790) +test rust_compile::compile_huge_bytes ... bench: 18,088,719 ns/iter (+/- 518,980) +test rust_compile::compile_simple ... bench: 6,141 ns/iter (+/- 394) +test rust_compile::compile_simple_bytes ... bench: 6,669 ns/iter (+/- 306) +test rust_compile::compile_small ... bench: 7,431 ns/iter (+/- 275) +test rust_compile::compile_small_bytes ... bench: 191,002 ns/iter (+/- 1,297) +test rust_parse::parse_huge ... bench: 1,204 ns/iter (+/- 9) +test rust_parse::parse_simple ... bench: 1,905 ns/iter (+/- 16) +test rust_parse::parse_small ... bench: 2,454 ns/iter (+/- 24) +test sherlock::before_holmes ... bench: 2,748,082 ns/iter (+/- 11,406) = 216 MB/s +test sherlock::everything_greedy ... bench: 7,833,414 ns/iter (+/- 42,538) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,426,141 ns/iter (+/- 31,378) = 109 MB/s +test sherlock::holmes_cochar_watson ... bench: 262,322 ns/iter (+/- 5,243) = 2267 MB/s +test sherlock::holmes_coword_watson ... bench: 1,324,677 ns/iter (+/- 21,666) = 449 MB/s +test sherlock::ing_suffix ... bench: 3,179,928 ns/iter (+/- 40,246) = 187 MB/s +test sherlock::ing_suffix_limited_space ... bench: 3,525,004 ns/iter (+/- 37,262) = 168 MB/s +test sherlock::letters ... bench: 60,268,445 ns/iter (+/- 1,958,610) = 9 MB/s +test sherlock::letters_lower ... bench: 57,743,679 ns/iter (+/- 84,675) = 10 MB/s +test sherlock::letters_upper ... bench: 4,549,709 ns/iter (+/- 9,312) = 130 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,690,794 ns/iter (+/- 2,796) = 221 MB/s +test sherlock::name_alt1 ... bench: 42,476 ns/iter (+/- 346) = 14006 MB/s +test sherlock::name_alt2 ... bench: 199,058 ns/iter (+/- 1,498) = 2988 MB/s +test sherlock::name_alt3 ... bench: 1,248,439 ns/iter (+/- 3,051) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 1,463,628 ns/iter (+/- 2,799) = 406 MB/s +test sherlock::name_alt4 ... bench: 296,390 ns/iter (+/- 798) = 2007 MB/s +test sherlock::name_alt4_nocase ... bench: 1,415,770 ns/iter (+/- 3,400) = 420 MB/s +test sherlock::name_holmes ... bench: 49,713 ns/iter (+/- 317) = 11967 MB/s +test sherlock::name_holmes_nocase ... bench: 1,181,147 ns/iter (+/- 2,842) = 503 MB/s +test sherlock::name_sherlock ... bench: 34,263 ns/iter (+/- 136) = 17363 MB/s +test sherlock::name_sherlock_holmes ... bench: 34,179 ns/iter (+/- 188) = 17406 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,236,384 ns/iter (+/- 5,012) = 481 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,232,613 ns/iter (+/- 5,009) = 482 MB/s +test sherlock::name_whitespace ... bench: 60,024 ns/iter (+/- 187) = 9911 MB/s +test sherlock::no_match_common ... bench: 558,607 ns/iter (+/- 2,595) = 1065 MB/s +test sherlock::no_match_uncommon ... bench: 24,049 ns/iter (+/- 54) = 24738 MB/s +test sherlock::quotes ... bench: 966,792 ns/iter (+/- 2,982) = 615 MB/s +test sherlock::repeated_class_negation ... bench: 84,186,484 ns/iter (+/- 66,800) = 7 MB/s +test sherlock::the_lower ... bench: 773,759 ns/iter (+/- 2,759) = 768 MB/s +test sherlock::the_nocase ... bench: 1,705,648 ns/iter (+/- 4,604) = 348 MB/s +test sherlock::the_upper ... bench: 52,729 ns/iter (+/- 209) = 11282 MB/s +test sherlock::the_whitespace ... bench: 1,981,215 ns/iter (+/- 8,080) = 300 MB/s +test sherlock::word_ending_n ... bench: 53,482,650 ns/iter (+/- 73,844) = 11 MB/s +test sherlock::words ... bench: 18,961,987 ns/iter (+/- 27,794) = 31 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 74 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/rust-bytes b/vendor/regex/record/old-bench-log/03-bytes/rust-bytes new file mode 100644 index 0000000..735d259 --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/rust-bytes @@ -0,0 +1,66 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/rust_bytes-9f3b188bc741e04b + +running 59 tests +test misc::anchored_literal_long_match ... bench: 75 ns/iter (+/- 6) = 5200 MB/s +test misc::anchored_literal_long_non_match ... bench: 55 ns/iter (+/- 0) = 7090 MB/s +test misc::anchored_literal_short_match ... bench: 75 ns/iter (+/- 0) = 346 MB/s +test misc::anchored_literal_short_non_match ... bench: 55 ns/iter (+/- 0) = 472 MB/s +test misc::easy0_1K ... bench: 245 ns/iter (+/- 0) = 4179 MB/s +test misc::easy0_1MB ... bench: 251,614 ns/iter (+/- 1,143) = 4167 MB/s +test misc::easy0_32 ... bench: 62 ns/iter (+/- 1) = 516 MB/s +test misc::easy0_32K ... bench: 5,281 ns/iter (+/- 66) = 6204 MB/s +test misc::easy1_1K ... bench: 266 ns/iter (+/- 1) = 3849 MB/s +test misc::easy1_1MB ... bench: 325,060 ns/iter (+/- 2,011) = 3225 MB/s +test misc::easy1_32 ... bench: 73 ns/iter (+/- 0) = 438 MB/s +test misc::easy1_32K ... bench: 5,609 ns/iter (+/- 41) = 5842 MB/s +test misc::hard_1K ... bench: 4,678 ns/iter (+/- 38) = 218 MB/s +test misc::hard_1MB ... bench: 4,736,631 ns/iter (+/- 26,227) = 221 MB/s +test misc::hard_32 ... bench: 199 ns/iter (+/- 0) = 160 MB/s +test misc::hard_32K ... bench: 148,282 ns/iter (+/- 1,353) = 220 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 0) = 2833 MB/s +test misc::match_class ... bench: 83 ns/iter (+/- 0) = 975 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 0) = 2700 MB/s +test misc::medium_1K ... bench: 1,147 ns/iter (+/- 10) = 892 MB/s +test misc::medium_1MB ... bench: 1,953,230 ns/iter (+/- 10,530) = 536 MB/s +test misc::medium_32 ... bench: 99 ns/iter (+/- 0) = 323 MB/s +test misc::medium_32K ... bench: 54,705 ns/iter (+/- 349) = 598 MB/s +test misc::no_exponential ... bench: 534 ns/iter (+/- 4) = 187 MB/s +test misc::not_literal ... bench: 292 ns/iter (+/- 3) = 174 MB/s +test misc::one_pass_long_prefix ... bench: 179 ns/iter (+/- 1) = 145 MB/s +test misc::one_pass_long_prefix_not ... bench: 180 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_short ... bench: 139 ns/iter (+/- 0) = 122 MB/s +test misc::one_pass_short_not ... bench: 139 ns/iter (+/- 0) = 122 MB/s +test sherlock::before_holmes ... bench: 2,778,686 ns/iter (+/- 8,735) = 214 MB/s +test sherlock::everything_greedy ... bench: 7,884,691 ns/iter (+/- 37,268) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,406,627 ns/iter (+/- 24,707) = 110 MB/s +test sherlock::holmes_cochar_watson ... bench: 262,175 ns/iter (+/- 1,995) = 2269 MB/s +test sherlock::holmes_coword_watson ... bench: 1,299,904 ns/iter (+/- 5,090) = 457 MB/s +test sherlock::ing_suffix ... bench: 3,202,899 ns/iter (+/- 20,810) = 185 MB/s +test sherlock::ing_suffix_limited_space ... bench: 3,367,381 ns/iter (+/- 14,143) = 176 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,725,593 ns/iter (+/- 10,736) = 218 MB/s +test sherlock::name_alt1 ... bench: 42,161 ns/iter (+/- 355) = 14110 MB/s +test sherlock::name_alt2 ... bench: 195,390 ns/iter (+/- 1,112) = 3044 MB/s +test sherlock::name_alt3 ... bench: 1,248,432 ns/iter (+/- 3,244) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 3,371,906 ns/iter (+/- 42,421) = 176 MB/s +test sherlock::name_alt4 ... bench: 296,423 ns/iter (+/- 1,812) = 2007 MB/s +test sherlock::name_alt4_nocase ... bench: 1,753,178 ns/iter (+/- 23,269) = 339 MB/s +test sherlock::name_holmes ... bench: 49,554 ns/iter (+/- 261) = 12005 MB/s +test sherlock::name_holmes_nocase ... bench: 1,347,682 ns/iter (+/- 5,678) = 441 MB/s +test sherlock::name_sherlock ... bench: 33,937 ns/iter (+/- 208) = 17530 MB/s +test sherlock::name_sherlock_holmes ... bench: 33,870 ns/iter (+/- 225) = 17565 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,212,233 ns/iter (+/- 5,452) = 490 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,190,590 ns/iter (+/- 3,248) = 499 MB/s +test sherlock::name_whitespace ... bench: 59,434 ns/iter (+/- 253) = 10009 MB/s +test sherlock::no_match_common ... bench: 565,962 ns/iter (+/- 4,601) = 1051 MB/s +test sherlock::no_match_uncommon ... bench: 23,729 ns/iter (+/- 218) = 25071 MB/s +test sherlock::quotes ... bench: 966,904 ns/iter (+/- 7,115) = 615 MB/s +test sherlock::repeated_class_negation ... bench: 121,271,073 ns/iter (+/- 242,789) = 4 MB/s +test sherlock::the_lower ... bench: 778,850 ns/iter (+/- 6,781) = 763 MB/s +test sherlock::the_nocase ... bench: 2,876,190 ns/iter (+/- 8,611) = 206 MB/s +test sherlock::the_upper ... bench: 52,617 ns/iter (+/- 315) = 11306 MB/s +test sherlock::the_whitespace ... bench: 1,982,270 ns/iter (+/- 11,079) = 300 MB/s +test sherlock::word_ending_n ... bench: 76,442,330 ns/iter (+/- 236,690) = 7 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 59 measured + diff --git a/vendor/regex/record/old-bench-log/04/onig b/vendor/regex/record/old-bench-log/04/onig new file mode 100644 index 0000000..81b4098 --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/onig @@ -0,0 +1,78 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 71 tests +test misc::anchored_literal_long_match ... bench: 66 ns/iter (+/- 1) = 5909 MB/s +test misc::anchored_literal_long_non_match ... bench: 414 ns/iter (+/- 2) = 942 MB/s +test misc::anchored_literal_short_match ... bench: 66 ns/iter (+/- 1) = 393 MB/s +test misc::anchored_literal_short_non_match ... bench: 36 ns/iter (+/- 0) = 722 MB/s +test misc::easy0_1K ... bench: 217 ns/iter (+/- 2) = 4843 MB/s +test misc::easy0_1MB ... bench: 130,657 ns/iter (+/- 365) = 8025 MB/s +test misc::easy0_32 ... bench: 84 ns/iter (+/- 1) = 702 MB/s +test misc::easy0_32K ... bench: 4,092 ns/iter (+/- 25) = 8014 MB/s +test misc::easy1_1K ... bench: 3,682 ns/iter (+/- 25) = 283 MB/s +test misc::easy1_1MB ... bench: 3,613,381 ns/iter (+/- 5,960) = 290 MB/s +test misc::easy1_32 ... bench: 237 ns/iter (+/- 2) = 219 MB/s +test misc::easy1_32K ... bench: 113,040 ns/iter (+/- 303) = 290 MB/s +test misc::hard_1K ... bench: 184,299 ns/iter (+/- 2,508) = 5 MB/s +test misc::hard_1MB ... bench: 198,378,531 ns/iter (+/- 150,404) = 5 MB/s +test misc::hard_32 ... bench: 5,765 ns/iter (+/- 26) = 10 MB/s +test misc::hard_32K ... bench: 6,177,362 ns/iter (+/- 21,959) = 5 MB/s +test misc::literal ... bench: 219 ns/iter (+/- 1) = 232 MB/s +test misc::long_needle1 ... bench: 6,978,321 ns/iter (+/- 120,792) = 14 MB/s +test misc::long_needle2 ... bench: 6,981,122 ns/iter (+/- 120,371) = 14 MB/s +test misc::match_class ... bench: 329 ns/iter (+/- 5) = 246 MB/s +test misc::match_class_in_range ... bench: 332 ns/iter (+/- 1) = 243 MB/s +test misc::match_class_unicode ... bench: 1,980 ns/iter (+/- 23) = 81 MB/s +test misc::medium_1K ... bench: 232 ns/iter (+/- 0) = 4534 MB/s +test misc::medium_1MB ... bench: 130,702 ns/iter (+/- 997) = 8022 MB/s +test misc::medium_32 ... bench: 95 ns/iter (+/- 1) = 631 MB/s +test misc::medium_32K ... bench: 4,103 ns/iter (+/- 13) = 7993 MB/s +test misc::not_literal ... bench: 353 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_long_prefix ... bench: 89 ns/iter (+/- 1) = 292 MB/s +test misc::one_pass_long_prefix_not ... bench: 97 ns/iter (+/- 0) = 268 MB/s +test misc::one_pass_short ... bench: 329 ns/iter (+/- 4) = 51 MB/s +test misc::one_pass_short_not ... bench: 324 ns/iter (+/- 4) = 52 MB/s +test misc::reallyhard2_1K ... bench: 563,552 ns/iter (+/- 2,559) = 1 MB/s +test misc::reallyhard_1K ... bench: 184,200 ns/iter (+/- 553) = 5 MB/s +test misc::reallyhard_1MB ... bench: 198,336,145 ns/iter (+/- 149,796) = 5 MB/s +test misc::reallyhard_32 ... bench: 5,766 ns/iter (+/- 16) = 10 MB/s +test misc::reallyhard_32K ... bench: 6,174,904 ns/iter (+/- 5,491) = 5 MB/s +test sherlock::before_holmes ... bench: 70,476,093 ns/iter (+/- 271,168) = 8 MB/s +test sherlock::everything_greedy ... bench: 5,175,140 ns/iter (+/- 19,413) = 114 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,379,427 ns/iter (+/- 5,816) = 250 MB/s +test sherlock::ing_suffix ... bench: 28,275,131 ns/iter (+/- 49,569) = 21 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,648,838 ns/iter (+/- 9,247) = 224 MB/s +test sherlock::letters ... bench: 25,940,039 ns/iter (+/- 57,724) = 22 MB/s +test sherlock::letters_lower ... bench: 25,680,050 ns/iter (+/- 48,209) = 23 MB/s +test sherlock::letters_upper ... bench: 11,122,063 ns/iter (+/- 28,302) = 53 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 195,903 ns/iter (+/- 1,162) = 3036 MB/s +test sherlock::name_alt1 ... bench: 2,100,175 ns/iter (+/- 4,251) = 283 MB/s +test sherlock::name_alt2 ... bench: 2,210,122 ns/iter (+/- 7,514) = 269 MB/s +test sherlock::name_alt3 ... bench: 3,025,653 ns/iter (+/- 9,375) = 196 MB/s +test sherlock::name_alt3_nocase ... bench: 39,475,102 ns/iter (+/- 51,488) = 15 MB/s +test sherlock::name_alt4 ... bench: 2,225,952 ns/iter (+/- 7,340) = 267 MB/s +test sherlock::name_alt4_nocase ... bench: 8,227,413 ns/iter (+/- 18,088) = 72 MB/s +test sherlock::name_alt5 ... bench: 2,300,803 ns/iter (+/- 6,325) = 258 MB/s +test sherlock::name_alt5_nocase ... bench: 11,488,783 ns/iter (+/- 28,880) = 51 MB/s +test sherlock::name_holmes ... bench: 400,760 ns/iter (+/- 907) = 1484 MB/s +test sherlock::name_holmes_nocase ... bench: 4,044,850 ns/iter (+/- 11,665) = 147 MB/s +test sherlock::name_sherlock ... bench: 269,021 ns/iter (+/- 791) = 2211 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,161 ns/iter (+/- 899) = 3032 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 4,363,621 ns/iter (+/- 5,339) = 136 MB/s +test sherlock::name_sherlock_nocase ... bench: 4,389,375 ns/iter (+/- 11,077) = 135 MB/s +test sherlock::name_whitespace ... bench: 273,691 ns/iter (+/- 957) = 2173 MB/s +test sherlock::no_match_common ... bench: 588,744 ns/iter (+/- 1,732) = 1010 MB/s +test sherlock::no_match_really_common ... bench: 673,335 ns/iter (+/- 1,407) = 883 MB/s +test sherlock::no_match_uncommon ... bench: 578,009 ns/iter (+/- 5,111) = 1029 MB/s +test sherlock::quotes ... bench: 4,066,005 ns/iter (+/- 10,116) = 146 MB/s +test sherlock::repeated_class_negation ... bench: 43,374,733 ns/iter (+/- 48,409) = 13 MB/s +test sherlock::the_lower ... bench: 1,275,300 ns/iter (+/- 5,351) = 466 MB/s +test sherlock::the_nocase ... bench: 5,100,832 ns/iter (+/- 11,024) = 116 MB/s +test sherlock::the_upper ... bench: 816,606 ns/iter (+/- 3,370) = 728 MB/s +test sherlock::the_whitespace ... bench: 2,079,544 ns/iter (+/- 4,585) = 286 MB/s +test sherlock::word_ending_n ... bench: 27,699,175 ns/iter (+/- 58,998) = 21 MB/s +test sherlock::words ... bench: 19,460,356 ns/iter (+/- 29,406) = 30 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 71 measured + diff --git a/vendor/regex/record/old-bench-log/04/pcre1-jit b/vendor/regex/record/old-bench-log/04/pcre1-jit new file mode 100644 index 0000000..2118d1f --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/pcre1-jit @@ -0,0 +1,77 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 70 tests +test misc::anchored_literal_long_match ... bench: 32 ns/iter (+/- 0) = 12187 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 1) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::anchored_literal_short_non_match ... bench: 27 ns/iter (+/- 2) = 962 MB/s +test misc::easy0_1K ... bench: 247 ns/iter (+/- 1) = 4255 MB/s +test misc::easy0_1MB ... bench: 193,485 ns/iter (+/- 906) = 5419 MB/s +test misc::easy0_32 ... bench: 55 ns/iter (+/- 1) = 1072 MB/s +test misc::easy0_32K ... bench: 6,057 ns/iter (+/- 19) = 5414 MB/s +test misc::easy1_1K ... bench: 604 ns/iter (+/- 3) = 1728 MB/s +test misc::easy1_1MB ... bench: 553,893 ns/iter (+/- 1,299) = 1893 MB/s +test misc::easy1_32 ... bench: 81 ns/iter (+/- 1) = 641 MB/s +test misc::easy1_32K ... bench: 17,335 ns/iter (+/- 33) = 1891 MB/s +test misc::hard_1K ... bench: 56,956 ns/iter (+/- 148) = 18 MB/s +test misc::hard_1MB ... bench: 63,576,485 ns/iter (+/- 93,278) = 16 MB/s +test misc::hard_32 ... bench: 1,744 ns/iter (+/- 10) = 33 MB/s +test misc::hard_32K ... bench: 1,931,799 ns/iter (+/- 7,752) = 16 MB/s +test misc::literal ... bench: 73 ns/iter (+/- 1) = 698 MB/s +test misc::long_needle1 ... bench: 532,256 ns/iter (+/- 4,633) = 187 MB/s +test misc::long_needle2 ... bench: 532,131 ns/iter (+/- 3,771) = 187 MB/s +test misc::match_class ... bench: 120 ns/iter (+/- 0) = 675 MB/s +test misc::match_class_in_range ... bench: 119 ns/iter (+/- 0) = 680 MB/s +test misc::match_class_unicode ... bench: 456 ns/iter (+/- 2) = 353 MB/s +test misc::medium_1K ... bench: 260 ns/iter (+/- 1) = 4046 MB/s +test misc::medium_1MB ... bench: 206,175 ns/iter (+/- 983) = 5085 MB/s +test misc::medium_32 ... bench: 58 ns/iter (+/- 0) = 1034 MB/s +test misc::medium_32K ... bench: 6,443 ns/iter (+/- 26) = 5090 MB/s +test misc::not_literal ... bench: 216 ns/iter (+/- 0) = 236 MB/s +test misc::one_pass_long_prefix ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::one_pass_long_prefix_not ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::one_pass_short ... bench: 59 ns/iter (+/- 0) = 288 MB/s +test misc::one_pass_short_not ... bench: 63 ns/iter (+/- 2) = 269 MB/s +test misc::reallyhard2_1K ... bench: 96,070 ns/iter (+/- 238) = 10 MB/s +test misc::reallyhard_1K ... bench: 60,783 ns/iter (+/- 170) = 17 MB/s +test misc::reallyhard_1MB ... bench: 60,899,076 ns/iter (+/- 483,661) = 17 MB/s +test misc::reallyhard_32 ... bench: 1,822 ns/iter (+/- 58) = 32 MB/s +test misc::reallyhard_32K ... bench: 1,809,770 ns/iter (+/- 45,348) = 18 MB/s +test sherlock::before_holmes ... bench: 14,513,309 ns/iter (+/- 146,332) = 40 MB/s +test sherlock::holmes_cochar_watson ... bench: 543,738 ns/iter (+/- 4,549) = 1094 MB/s +test sherlock::ing_suffix ... bench: 5,561,653 ns/iter (+/- 44,720) = 106 MB/s +test sherlock::ing_suffix_limited_space ... bench: 6,116,112 ns/iter (+/- 91,799) = 97 MB/s +test sherlock::letters ... bench: 15,633,185 ns/iter (+/- 313,036) = 38 MB/s +test sherlock::letters_lower ... bench: 15,228,423 ns/iter (+/- 290,879) = 39 MB/s +test sherlock::letters_upper ... bench: 3,279,472 ns/iter (+/- 48,073) = 181 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 192,935 ns/iter (+/- 1,405) = 3083 MB/s +test sherlock::name_alt1 ... bench: 452,708 ns/iter (+/- 4,728) = 1314 MB/s +test sherlock::name_alt2 ... bench: 477,092 ns/iter (+/- 6,192) = 1246 MB/s +test sherlock::name_alt3 ... bench: 959,514 ns/iter (+/- 25,214) = 620 MB/s +test sherlock::name_alt3_nocase ... bench: 3,478,546 ns/iter (+/- 52,300) = 171 MB/s +test sherlock::name_alt4 ... bench: 947,187 ns/iter (+/- 9,985) = 628 MB/s +test sherlock::name_alt4_nocase ... bench: 1,852,289 ns/iter (+/- 30,616) = 321 MB/s +test sherlock::name_alt5 ... bench: 655,616 ns/iter (+/- 9,327) = 907 MB/s +test sherlock::name_alt5_nocase ... bench: 1,957,627 ns/iter (+/- 47,271) = 303 MB/s +test sherlock::name_holmes ... bench: 383,813 ns/iter (+/- 1,185) = 1550 MB/s +test sherlock::name_holmes_nocase ... bench: 478,335 ns/iter (+/- 4,851) = 1243 MB/s +test sherlock::name_sherlock ... bench: 263,611 ns/iter (+/- 875) = 2256 MB/s +test sherlock::name_sherlock_holmes ... bench: 193,687 ns/iter (+/- 1,070) = 3071 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,258,447 ns/iter (+/- 32,369) = 472 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,330,069 ns/iter (+/- 36,657) = 447 MB/s +test sherlock::name_whitespace ... bench: 264,340 ns/iter (+/- 2,723) = 2250 MB/s +test sherlock::no_match_common ... bench: 589,309 ns/iter (+/- 5,038) = 1009 MB/s +test sherlock::no_match_really_common ... bench: 683,909 ns/iter (+/- 4,987) = 869 MB/s +test sherlock::no_match_uncommon ... bench: 578,309 ns/iter (+/- 2,831) = 1028 MB/s +test sherlock::quotes ... bench: 1,184,492 ns/iter (+/- 27,247) = 502 MB/s +test sherlock::repeated_class_negation ... bench: 7,208,342 ns/iter (+/- 17,978) = 82 MB/s +test sherlock::the_lower ... bench: 1,001,754 ns/iter (+/- 6,215) = 593 MB/s +test sherlock::the_nocase ... bench: 1,043,260 ns/iter (+/- 10,217) = 570 MB/s +test sherlock::the_upper ... bench: 753,058 ns/iter (+/- 1,640) = 790 MB/s +test sherlock::the_whitespace ... bench: 1,195,227 ns/iter (+/- 9,524) = 497 MB/s +test sherlock::word_ending_n ... bench: 11,767,448 ns/iter (+/- 15,460) = 50 MB/s +test sherlock::words ... bench: 7,551,361 ns/iter (+/- 25,566) = 78 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 70 measured + diff --git a/vendor/regex/record/old-bench-log/04/pcre2-jit b/vendor/regex/record/old-bench-log/04/pcre2-jit new file mode 100644 index 0000000..9a110b5 --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/pcre2-jit @@ -0,0 +1,77 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 70 tests +test misc::anchored_literal_long_match ... bench: 22 ns/iter (+/- 0) = 17727 MB/s +test misc::anchored_literal_long_non_match ... bench: 14 ns/iter (+/- 0) = 27857 MB/s +test misc::anchored_literal_short_match ... bench: 21 ns/iter (+/- 0) = 1238 MB/s +test misc::anchored_literal_short_non_match ... bench: 14 ns/iter (+/- 1) = 1857 MB/s +test misc::easy0_1K ... bench: 235 ns/iter (+/- 2) = 4472 MB/s +test misc::easy0_1MB ... bench: 193,652 ns/iter (+/- 524) = 5414 MB/s +test misc::easy0_32 ... bench: 43 ns/iter (+/- 0) = 1372 MB/s +test misc::easy0_32K ... bench: 6,024 ns/iter (+/- 12) = 5444 MB/s +test misc::easy1_1K ... bench: 235 ns/iter (+/- 4) = 4442 MB/s +test misc::easy1_1MB ... bench: 193,685 ns/iter (+/- 617) = 5413 MB/s +test misc::easy1_32 ... bench: 45 ns/iter (+/- 0) = 1155 MB/s +test misc::easy1_32K ... bench: 6,018 ns/iter (+/- 9) = 5448 MB/s +test misc::hard_1K ... bench: 1,880 ns/iter (+/- 7) = 559 MB/s +test misc::hard_1MB ... bench: 1,283,101 ns/iter (+/- 4,420) = 817 MB/s +test misc::hard_32 ... bench: 119 ns/iter (+/- 2) = 495 MB/s +test misc::hard_32K ... bench: 39,919 ns/iter (+/- 95) = 821 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 1) = 2833 MB/s +test misc::long_needle1 ... bench: 513,050 ns/iter (+/- 2,267) = 194 MB/s +test misc::long_needle2 ... bench: 518,009 ns/iter (+/- 3,066) = 193 MB/s +test misc::match_class ... bench: 106 ns/iter (+/- 1) = 764 MB/s +test misc::match_class_in_range ... bench: 24 ns/iter (+/- 1) = 3375 MB/s +test misc::match_class_unicode ... bench: 370 ns/iter (+/- 2) = 435 MB/s +test misc::medium_1K ... bench: 237 ns/iter (+/- 0) = 4438 MB/s +test misc::medium_1MB ... bench: 193,478 ns/iter (+/- 540) = 5419 MB/s +test misc::medium_32 ... bench: 46 ns/iter (+/- 0) = 1304 MB/s +test misc::medium_32K ... bench: 6,024 ns/iter (+/- 15) = 5444 MB/s +test misc::not_literal ... bench: 274 ns/iter (+/- 1) = 186 MB/s +test misc::one_pass_long_prefix ... bench: 19 ns/iter (+/- 1) = 1368 MB/s +test misc::one_pass_long_prefix_not ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::one_pass_short ... bench: 47 ns/iter (+/- 0) = 361 MB/s +test misc::one_pass_short_not ... bench: 50 ns/iter (+/- 2) = 340 MB/s +test misc::reallyhard2_1K ... bench: 4,959 ns/iter (+/- 34) = 209 MB/s +test misc::reallyhard_1K ... bench: 2,145 ns/iter (+/- 17) = 489 MB/s +test misc::reallyhard_1MB ... bench: 1,292,683 ns/iter (+/- 3,342) = 811 MB/s +test misc::reallyhard_32 ... bench: 124 ns/iter (+/- 4) = 475 MB/s +test misc::reallyhard_32K ... bench: 47,263 ns/iter (+/- 173) = 693 MB/s +test sherlock::before_holmes ... bench: 4,706,445 ns/iter (+/- 23,483) = 126 MB/s +test sherlock::holmes_cochar_watson ... bench: 488,613 ns/iter (+/- 2,921) = 1217 MB/s +test sherlock::ing_suffix ... bench: 1,886,092 ns/iter (+/- 9,951) = 315 MB/s +test sherlock::ing_suffix_limited_space ... bench: 5,091,401 ns/iter (+/- 21,315) = 116 MB/s +test sherlock::letters ... bench: 10,082,811 ns/iter (+/- 41,989) = 59 MB/s +test sherlock::letters_lower ... bench: 9,640,481 ns/iter (+/- 46,499) = 61 MB/s +test sherlock::letters_upper ... bench: 1,772,105 ns/iter (+/- 8,833) = 335 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 192,075 ns/iter (+/- 1,043) = 3097 MB/s +test sherlock::name_alt1 ... bench: 447,382 ns/iter (+/- 2,142) = 1329 MB/s +test sherlock::name_alt2 ... bench: 447,421 ns/iter (+/- 2,077) = 1329 MB/s +test sherlock::name_alt3 ... bench: 963,775 ns/iter (+/- 1,684) = 617 MB/s +test sherlock::name_alt3_nocase ... bench: 3,152,920 ns/iter (+/- 5,757) = 188 MB/s +test sherlock::name_alt4 ... bench: 80,204 ns/iter (+/- 379) = 7417 MB/s +test sherlock::name_alt4_nocase ... bench: 1,665,405 ns/iter (+/- 7,134) = 357 MB/s +test sherlock::name_alt5 ... bench: 649,701 ns/iter (+/- 1,722) = 915 MB/s +test sherlock::name_alt5_nocase ... bench: 1,773,323 ns/iter (+/- 9,648) = 335 MB/s +test sherlock::name_holmes ... bench: 377,003 ns/iter (+/- 3,390) = 1578 MB/s +test sherlock::name_holmes_nocase ... bench: 472,947 ns/iter (+/- 1,011) = 1257 MB/s +test sherlock::name_sherlock ... bench: 262,237 ns/iter (+/- 1,268) = 2268 MB/s +test sherlock::name_sherlock_holmes ... bench: 192,306 ns/iter (+/- 520) = 3093 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,318,573 ns/iter (+/- 1,462) = 451 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,110,629 ns/iter (+/- 13,163) = 535 MB/s +test sherlock::name_whitespace ... bench: 262,889 ns/iter (+/- 637) = 2263 MB/s +test sherlock::no_match_common ... bench: 388,869 ns/iter (+/- 1,512) = 1529 MB/s +test sherlock::no_match_really_common ... bench: 422,058 ns/iter (+/- 1,788) = 1409 MB/s +test sherlock::no_match_uncommon ... bench: 30,594 ns/iter (+/- 166) = 19446 MB/s +test sherlock::quotes ... bench: 569,628 ns/iter (+/- 2,052) = 1044 MB/s +test sherlock::repeated_class_negation ... bench: 6,410,128 ns/iter (+/- 19,866) = 92 MB/s +test sherlock::the_lower ... bench: 648,366 ns/iter (+/- 5,142) = 917 MB/s +test sherlock::the_nocase ... bench: 694,035 ns/iter (+/- 4,844) = 857 MB/s +test sherlock::the_upper ... bench: 54,007 ns/iter (+/- 486) = 11015 MB/s +test sherlock::the_whitespace ... bench: 850,430 ns/iter (+/- 9,641) = 699 MB/s +test sherlock::word_ending_n ... bench: 5,768,961 ns/iter (+/- 20,924) = 103 MB/s +test sherlock::words ... bench: 5,866,550 ns/iter (+/- 34,451) = 101 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 70 measured + diff --git a/vendor/regex/record/old-bench-log/04/re2 b/vendor/regex/record/old-bench-log/04/re2 new file mode 100644 index 0000000..31a6e6d --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/re2 @@ -0,0 +1,79 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 72 tests +test misc::anchored_literal_long_match ... bench: 119 ns/iter (+/- 2) = 3277 MB/s +test misc::anchored_literal_long_non_match ... bench: 45 ns/iter (+/- 0) = 8666 MB/s +test misc::anchored_literal_short_match ... bench: 120 ns/iter (+/- 1) = 216 MB/s +test misc::anchored_literal_short_non_match ... bench: 45 ns/iter (+/- 0) = 577 MB/s +test misc::easy0_1K ... bench: 187 ns/iter (+/- 0) = 5620 MB/s +test misc::easy0_1MB ... bench: 39,573 ns/iter (+/- 600) = 26497 MB/s +test misc::easy0_32 ... bench: 165 ns/iter (+/- 1) = 357 MB/s +test misc::easy0_32K ... bench: 971 ns/iter (+/- 20) = 33774 MB/s +test misc::easy1_1K ... bench: 175 ns/iter (+/- 1) = 5965 MB/s +test misc::easy1_1MB ... bench: 39,451 ns/iter (+/- 183) = 26579 MB/s +test misc::easy1_32 ... bench: 153 ns/iter (+/- 1) = 339 MB/s +test misc::easy1_32K ... bench: 942 ns/iter (+/- 24) = 34806 MB/s +test misc::hard_1K ... bench: 2,362 ns/iter (+/- 11) = 444 MB/s +test misc::hard_1MB ... bench: 2,386,627 ns/iter (+/- 12,925) = 439 MB/s +test misc::hard_32 ... bench: 228 ns/iter (+/- 1) = 258 MB/s +test misc::hard_32K ... bench: 74,482 ns/iter (+/- 190) = 440 MB/s +test misc::literal ... bench: 120 ns/iter (+/- 0) = 425 MB/s +test misc::long_needle1 ... bench: 184,777 ns/iter (+/- 1,644) = 541 MB/s +test misc::long_needle2 ... bench: 184,685 ns/iter (+/- 289) = 541 MB/s +test misc::match_class ... bench: 267 ns/iter (+/- 1) = 303 MB/s +test misc::match_class_in_range ... bench: 267 ns/iter (+/- 1) = 303 MB/s +test misc::match_class_unicode ... bench: 491 ns/iter (+/- 3) = 327 MB/s +test misc::medium_1K ... bench: 2,065 ns/iter (+/- 4) = 509 MB/s +test misc::medium_1MB ... bench: 1,938,951 ns/iter (+/- 11,278) = 540 MB/s +test misc::medium_32 ... bench: 302 ns/iter (+/- 149) = 198 MB/s +test misc::medium_32K ... bench: 60,766 ns/iter (+/- 1,018) = 539 MB/s +test misc::not_literal ... bench: 203 ns/iter (+/- 2) = 251 MB/s +test misc::one_pass_long_prefix ... bench: 119 ns/iter (+/- 1) = 218 MB/s +test misc::one_pass_long_prefix_not ... bench: 161 ns/iter (+/- 0) = 161 MB/s +test misc::one_pass_short ... bench: 143 ns/iter (+/- 0) = 118 MB/s +test misc::one_pass_short_not ... bench: 145 ns/iter (+/- 1) = 117 MB/s +test misc::reallyhard2_1K ... bench: 2,030 ns/iter (+/- 22) = 512 MB/s +test misc::reallyhard_1K ... bench: 2,362 ns/iter (+/- 18) = 444 MB/s +test misc::reallyhard_1MB ... bench: 2,386,760 ns/iter (+/- 22,075) = 439 MB/s +test misc::reallyhard_32 ... bench: 230 ns/iter (+/- 2) = 256 MB/s +test misc::reallyhard_32K ... bench: 74,506 ns/iter (+/- 740) = 440 MB/s +test sherlock::before_holmes ... bench: 1,446,270 ns/iter (+/- 5,771) = 411 MB/s +test sherlock::everything_greedy ... bench: 9,111,570 ns/iter (+/- 54,091) = 65 MB/s +test sherlock::everything_greedy_nl ... bench: 2,489,649 ns/iter (+/- 23,310) = 238 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,176,642 ns/iter (+/- 2,181) = 505 MB/s +test sherlock::holmes_coword_watson ... bench: 1,389,000 ns/iter (+/- 258,245) = 428 MB/s +test sherlock::ing_suffix ... bench: 3,050,918 ns/iter (+/- 16,854) = 195 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,954,264 ns/iter (+/- 4,656) = 304 MB/s +test sherlock::letters ... bench: 111,162,180 ns/iter (+/- 108,719) = 5 MB/s +test sherlock::letters_lower ... bench: 106,751,460 ns/iter (+/- 414,985) = 5 MB/s +test sherlock::letters_upper ... bench: 4,705,474 ns/iter (+/- 10,913) = 126 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,539,425 ns/iter (+/- 5,440) = 234 MB/s +test sherlock::name_alt1 ... bench: 77,719 ns/iter (+/- 275) = 7654 MB/s +test sherlock::name_alt2 ... bench: 1,319,600 ns/iter (+/- 2,771) = 450 MB/s +test sherlock::name_alt3 ... bench: 1,433,629 ns/iter (+/- 2,943) = 414 MB/s +test sherlock::name_alt3_nocase ... bench: 2,748,137 ns/iter (+/- 4,343) = 216 MB/s +test sherlock::name_alt4 ... bench: 1,354,024 ns/iter (+/- 2,312) = 439 MB/s +test sherlock::name_alt4_nocase ... bench: 2,018,381 ns/iter (+/- 2,442) = 294 MB/s +test sherlock::name_alt5 ... bench: 1,348,150 ns/iter (+/- 3,870) = 441 MB/s +test sherlock::name_alt5_nocase ... bench: 2,114,276 ns/iter (+/- 3,365) = 281 MB/s +test sherlock::name_holmes ... bench: 168,436 ns/iter (+/- 1,503) = 3532 MB/s +test sherlock::name_holmes_nocase ... bench: 1,645,658 ns/iter (+/- 3,816) = 361 MB/s +test sherlock::name_sherlock ... bench: 59,010 ns/iter (+/- 380) = 10081 MB/s +test sherlock::name_sherlock_holmes ... bench: 60,467 ns/iter (+/- 179) = 9838 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,539,137 ns/iter (+/- 5,506) = 386 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,535,058 ns/iter (+/- 3,352) = 387 MB/s +test sherlock::name_whitespace ... bench: 62,700 ns/iter (+/- 440) = 9488 MB/s +test sherlock::no_match_common ... bench: 439,560 ns/iter (+/- 1,545) = 1353 MB/s +test sherlock::no_match_really_common ... bench: 439,333 ns/iter (+/- 1,020) = 1354 MB/s +test sherlock::no_match_uncommon ... bench: 23,882 ns/iter (+/- 134) = 24911 MB/s +test sherlock::quotes ... bench: 1,396,564 ns/iter (+/- 2,785) = 425 MB/s +test sherlock::the_lower ... bench: 2,478,251 ns/iter (+/- 5,859) = 240 MB/s +test sherlock::the_nocase ... bench: 3,708,713 ns/iter (+/- 6,919) = 160 MB/s +test sherlock::the_upper ... bench: 232,490 ns/iter (+/- 4,478) = 2558 MB/s +test sherlock::the_whitespace ... bench: 2,286,399 ns/iter (+/- 5,006) = 260 MB/s +test sherlock::word_ending_n ... bench: 3,295,919 ns/iter (+/- 27,810) = 180 MB/s +test sherlock::words ... bench: 30,375,810 ns/iter (+/- 37,415) = 19 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 72 measured + diff --git a/vendor/regex/record/old-bench-log/04/rust b/vendor/regex/record/old-bench-log/04/rust new file mode 100644 index 0000000..01e6f44 --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/rust @@ -0,0 +1,81 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 74 tests +test misc::anchored_literal_long_match ... bench: 24 ns/iter (+/- 1) = 16250 MB/s +test misc::anchored_literal_long_non_match ... bench: 21 ns/iter (+/- 0) = 18571 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 1) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 21 ns/iter (+/- 0) = 1238 MB/s +test misc::easy0_1K ... bench: 18 ns/iter (+/- 6) = 58388 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 4) = 49933476 MB/s +test misc::easy0_32 ... bench: 17 ns/iter (+/- 0) = 3470 MB/s +test misc::easy0_32K ... bench: 18 ns/iter (+/- 9) = 1821944 MB/s +test misc::easy1_1K ... bench: 52 ns/iter (+/- 0) = 20076 MB/s +test misc::easy1_1MB ... bench: 55 ns/iter (+/- 0) = 19065381 MB/s +test misc::easy1_32 ... bench: 50 ns/iter (+/- 0) = 1040 MB/s +test misc::easy1_32K ... bench: 50 ns/iter (+/- 0) = 655760 MB/s +test misc::hard_1K ... bench: 66 ns/iter (+/- 0) = 15924 MB/s +test misc::hard_1MB ... bench: 70 ns/iter (+/- 1) = 14980042 MB/s +test misc::hard_32 ... bench: 62 ns/iter (+/- 1) = 951 MB/s +test misc::hard_32K ... bench: 62 ns/iter (+/- 1) = 528951 MB/s +test misc::literal ... bench: 17 ns/iter (+/- 0) = 3000 MB/s +test misc::long_needle1 ... bench: 2,359 ns/iter (+/- 37) = 42391 MB/s +test misc::long_needle2 ... bench: 634,783 ns/iter (+/- 4,313) = 157 MB/s +test misc::match_class ... bench: 82 ns/iter (+/- 1) = 987 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 15) = 2700 MB/s +test misc::match_class_unicode ... bench: 317 ns/iter (+/- 2) = 507 MB/s +test misc::medium_1K ... bench: 18 ns/iter (+/- 0) = 58444 MB/s +test misc::medium_1MB ... bench: 22 ns/iter (+/- 0) = 47663818 MB/s +test misc::medium_32 ... bench: 18 ns/iter (+/- 0) = 3333 MB/s +test misc::medium_32K ... bench: 18 ns/iter (+/- 0) = 1822000 MB/s +test misc::not_literal ... bench: 115 ns/iter (+/- 0) = 443 MB/s +test misc::one_pass_long_prefix ... bench: 69 ns/iter (+/- 1) = 376 MB/s +test misc::one_pass_long_prefix_not ... bench: 68 ns/iter (+/- 0) = 382 MB/s +test misc::one_pass_short ... bench: 50 ns/iter (+/- 0) = 340 MB/s +test misc::one_pass_short_not ... bench: 52 ns/iter (+/- 0) = 326 MB/s +test misc::reallyhard2_1K ... bench: 1,939 ns/iter (+/- 12) = 536 MB/s +test misc::reallyhard_1K ... bench: 1,964 ns/iter (+/- 7) = 535 MB/s +test misc::reallyhard_1MB ... bench: 1,934,967 ns/iter (+/- 4,626) = 541 MB/s +test misc::reallyhard_32 ... bench: 130 ns/iter (+/- 0) = 453 MB/s +test misc::reallyhard_32K ... bench: 60,581 ns/iter (+/- 176) = 541 MB/s +test misc::replace_all ... bench: 142 ns/iter (+/- 1) +test sherlock::before_holmes ... bench: 1,127,747 ns/iter (+/- 2,052) = 527 MB/s +test sherlock::everything_greedy ... bench: 2,598,664 ns/iter (+/- 6,137) = 228 MB/s +test sherlock::everything_greedy_nl ... bench: 1,202,183 ns/iter (+/- 1,965) = 494 MB/s +test sherlock::holmes_cochar_watson ... bench: 220,378 ns/iter (+/- 1,229) = 2699 MB/s +test sherlock::holmes_coword_watson ... bench: 631,731 ns/iter (+/- 2,071) = 941 MB/s +test sherlock::ing_suffix ... bench: 1,344,980 ns/iter (+/- 1,799) = 442 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,286,425 ns/iter (+/- 2,965) = 462 MB/s +test sherlock::letters ... bench: 24,356,951 ns/iter (+/- 47,224) = 24 MB/s +test sherlock::letters_lower ... bench: 23,816,732 ns/iter (+/- 44,203) = 24 MB/s +test sherlock::letters_upper ... bench: 2,051,873 ns/iter (+/- 8,712) = 289 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 1,102,534 ns/iter (+/- 6,071) = 539 MB/s +test sherlock::name_alt1 ... bench: 36,474 ns/iter (+/- 308) = 16311 MB/s +test sherlock::name_alt2 ... bench: 185,668 ns/iter (+/- 1,023) = 3204 MB/s +test sherlock::name_alt3 ... bench: 1,152,554 ns/iter (+/- 1,991) = 516 MB/s +test sherlock::name_alt3_nocase ... bench: 1,254,885 ns/iter (+/- 5,387) = 474 MB/s +test sherlock::name_alt4 ... bench: 228,721 ns/iter (+/- 854) = 2601 MB/s +test sherlock::name_alt4_nocase ... bench: 1,223,457 ns/iter (+/- 2,307) = 486 MB/s +test sherlock::name_alt5 ... bench: 317,372 ns/iter (+/- 951) = 1874 MB/s +test sherlock::name_alt5_nocase ... bench: 1,224,434 ns/iter (+/- 3,886) = 485 MB/s +test sherlock::name_holmes ... bench: 42,905 ns/iter (+/- 217) = 13866 MB/s +test sherlock::name_holmes_nocase ... bench: 1,080,290 ns/iter (+/- 5,686) = 550 MB/s +test sherlock::name_sherlock ... bench: 70,041 ns/iter (+/- 444) = 8494 MB/s +test sherlock::name_sherlock_holmes ... bench: 36,092 ns/iter (+/- 189) = 16483 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,156,696 ns/iter (+/- 3,922) = 514 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,156,088 ns/iter (+/- 2,453) = 514 MB/s +test sherlock::name_whitespace ... bench: 79,560 ns/iter (+/- 426) = 7477 MB/s +test sherlock::no_match_common ... bench: 25,940 ns/iter (+/- 119) = 22934 MB/s +test sherlock::no_match_really_common ... bench: 364,911 ns/iter (+/- 1,302) = 1630 MB/s +test sherlock::no_match_uncommon ... bench: 25,851 ns/iter (+/- 112) = 23013 MB/s +test sherlock::quotes ... bench: 561,575 ns/iter (+/- 2,083) = 1059 MB/s +test sherlock::repeated_class_negation ... bench: 88,961,089 ns/iter (+/- 132,661) = 6 MB/s +test sherlock::the_lower ... bench: 609,891 ns/iter (+/- 1,451) = 975 MB/s +test sherlock::the_nocase ... bench: 1,622,541 ns/iter (+/- 6,851) = 366 MB/s +test sherlock::the_upper ... bench: 48,810 ns/iter (+/- 245) = 12188 MB/s +test sherlock::the_whitespace ... bench: 1,192,755 ns/iter (+/- 4,168) = 498 MB/s +test sherlock::word_ending_n ... bench: 1,991,440 ns/iter (+/- 7,313) = 298 MB/s +test sherlock::words ... bench: 9,688,357 ns/iter (+/- 17,267) = 61 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 74 measured + diff --git a/vendor/regex/record/old-bench-log/04/tcl b/vendor/regex/record/old-bench-log/04/tcl new file mode 100644 index 0000000..934bf6e --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/tcl @@ -0,0 +1,72 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 65 tests +test misc::anchored_literal_long_match ... bench: 925 ns/iter (+/- 16) = 421 MB/s +test misc::anchored_literal_long_non_match ... bench: 144 ns/iter (+/- 2) = 2708 MB/s +test misc::anchored_literal_short_match ... bench: 920 ns/iter (+/- 11) = 28 MB/s +test misc::anchored_literal_short_non_match ... bench: 144 ns/iter (+/- 1) = 180 MB/s +test misc::easy0_1K ... bench: 14,228 ns/iter (+/- 204) = 73 MB/s +test misc::easy0_1MB ... bench: 3,728,677 ns/iter (+/- 4,564) = 281 MB/s +test misc::easy0_32 ... bench: 10,023 ns/iter (+/- 156) = 5 MB/s +test misc::easy0_32K ... bench: 125,851 ns/iter (+/- 287) = 260 MB/s +test misc::easy1_1K ... bench: 8,797 ns/iter (+/- 90) = 118 MB/s +test misc::easy1_1MB ... bench: 3,722,675 ns/iter (+/- 4,912) = 281 MB/s +test misc::easy1_32 ... bench: 5,189 ns/iter (+/- 77) = 10 MB/s +test misc::easy1_32K ... bench: 121,106 ns/iter (+/- 694) = 270 MB/s +test misc::hard_1K ... bench: 17,111 ns/iter (+/- 251) = 61 MB/s +test misc::hard_1MB ... bench: 3,743,313 ns/iter (+/- 7,634) = 280 MB/s +test misc::hard_32 ... bench: 13,489 ns/iter (+/- 220) = 4 MB/s +test misc::hard_32K ... bench: 129,358 ns/iter (+/- 257) = 253 MB/s +test misc::literal ... bench: 629 ns/iter (+/- 5) = 81 MB/s +test misc::long_needle1 ... bench: 21,495,182 ns/iter (+/- 41,993) = 4 MB/s +test misc::long_needle2 ... bench: 21,501,034 ns/iter (+/- 34,033) = 4 MB/s +test misc::match_class ... bench: 732 ns/iter (+/- 3) = 110 MB/s +test misc::match_class_in_range ... bench: 736 ns/iter (+/- 6) = 110 MB/s +test misc::medium_1K ... bench: 14,433 ns/iter (+/- 49) = 72 MB/s +test misc::medium_1MB ... bench: 3,729,861 ns/iter (+/- 4,198) = 281 MB/s +test misc::medium_32 ... bench: 10,756 ns/iter (+/- 75) = 5 MB/s +test misc::medium_32K ... bench: 126,593 ns/iter (+/- 169) = 259 MB/s +test misc::not_literal ... bench: 2,350 ns/iter (+/- 13) = 21 MB/s +test misc::one_pass_long_prefix ... bench: 9,183 ns/iter (+/- 198) = 2 MB/s +test misc::one_pass_long_prefix_not ... bench: 8,470 ns/iter (+/- 110) = 3 MB/s +test misc::one_pass_short ... bench: 956 ns/iter (+/- 4) = 17 MB/s +test misc::one_pass_short_not ... bench: 1,042 ns/iter (+/- 13) = 16 MB/s +test misc::reallyhard2_1K ... bench: 129,563 ns/iter (+/- 336) = 8 MB/s +test misc::reallyhard_1K ... bench: 16,656 ns/iter (+/- 152) = 63 MB/s +test misc::reallyhard_1MB ... bench: 3,744,123 ns/iter (+/- 4,556) = 280 MB/s +test misc::reallyhard_32 ... bench: 12,910 ns/iter (+/- 112) = 4 MB/s +test misc::reallyhard_32K ... bench: 129,293 ns/iter (+/- 301) = 253 MB/s +test sherlock::before_holmes ... bench: 3,593,560 ns/iter (+/- 8,574) = 165 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,906,271 ns/iter (+/- 5,153) = 204 MB/s +test sherlock::ing_suffix ... bench: 7,016,213 ns/iter (+/- 30,321) = 84 MB/s +test sherlock::ing_suffix_limited_space ... bench: 24,592,817 ns/iter (+/- 78,720) = 24 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,457,984 ns/iter (+/- 3,932) = 242 MB/s +test sherlock::name_alt1 ... bench: 2,569,156 ns/iter (+/- 5,789) = 231 MB/s +test sherlock::name_alt2 ... bench: 3,686,183 ns/iter (+/- 13,550) = 161 MB/s +test sherlock::name_alt3 ... bench: 6,715,311 ns/iter (+/- 15,208) = 88 MB/s +test sherlock::name_alt3_nocase ... bench: 9,702,060 ns/iter (+/- 32,628) = 61 MB/s +test sherlock::name_alt4 ... bench: 3,834,029 ns/iter (+/- 3,955) = 155 MB/s +test sherlock::name_alt4_nocase ... bench: 4,762,730 ns/iter (+/- 751,201) = 124 MB/s +test sherlock::name_alt5 ... bench: 4,582,303 ns/iter (+/- 8,073) = 129 MB/s +test sherlock::name_alt5_nocase ... bench: 5,583,652 ns/iter (+/- 14,573) = 106 MB/s +test sherlock::name_holmes ... bench: 2,968,764 ns/iter (+/- 6,198) = 200 MB/s +test sherlock::name_holmes_nocase ... bench: 3,066,080 ns/iter (+/- 8,986) = 194 MB/s +test sherlock::name_sherlock ... bench: 2,372,708 ns/iter (+/- 3,272) = 250 MB/s +test sherlock::name_sherlock_holmes ... bench: 2,607,914 ns/iter (+/- 3,361) = 228 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 2,641,260 ns/iter (+/- 9,409) = 225 MB/s +test sherlock::name_sherlock_nocase ... bench: 2,420,591 ns/iter (+/- 11,053) = 245 MB/s +test sherlock::name_whitespace ... bench: 2,592,553 ns/iter (+/- 3,476) = 229 MB/s +test sherlock::no_match_common ... bench: 2,114,367 ns/iter (+/- 1,665) = 281 MB/s +test sherlock::no_match_really_common ... bench: 2,114,835 ns/iter (+/- 2,491) = 281 MB/s +test sherlock::no_match_uncommon ... bench: 2,105,274 ns/iter (+/- 1,657) = 282 MB/s +test sherlock::quotes ... bench: 10,978,890 ns/iter (+/- 30,645) = 54 MB/s +test sherlock::repeated_class_negation ... bench: 69,836,043 ns/iter (+/- 117,415) = 8 MB/s +test sherlock::the_lower ... bench: 9,343,518 ns/iter (+/- 29,387) = 63 MB/s +test sherlock::the_nocase ... bench: 9,690,676 ns/iter (+/- 42,585) = 61 MB/s +test sherlock::the_upper ... bench: 2,780,398 ns/iter (+/- 6,949) = 213 MB/s +test sherlock::the_whitespace ... bench: 11,562,612 ns/iter (+/- 78,789) = 51 MB/s +test sherlock::words ... bench: 64,139,234 ns/iter (+/- 491,422) = 9 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 65 measured + diff --git a/vendor/regex/record/old-bench-log/05/onig b/vendor/regex/record/old-bench-log/05/onig new file mode 100644 index 0000000..373b149 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/onig @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 158 ns/iter (+/- 4) = 2468 MB/s +test misc::anchored_literal_long_non_match ... bench: 495 ns/iter (+/- 7) = 787 MB/s +test misc::anchored_literal_short_match ... bench: 160 ns/iter (+/- 3) = 162 MB/s +test misc::anchored_literal_short_non_match ... bench: 44 ns/iter (+/- 4) = 590 MB/s +test misc::easy0_1K ... bench: 315 ns/iter (+/- 15) = 3336 MB/s +test misc::easy0_1MB ... bench: 136,864 ns/iter (+/- 5,984) = 7661 MB/s +test misc::easy0_32 ... bench: 163 ns/iter (+/- 11) = 361 MB/s +test misc::easy0_32K ... bench: 4,562 ns/iter (+/- 255) = 7188 MB/s +test misc::easy1_1K ... bench: 3,947 ns/iter (+/- 199) = 264 MB/s +test misc::easy1_1MB ... bench: 3,920,564 ns/iter (+/- 122,902) = 267 MB/s +test misc::easy1_32 ... bench: 321 ns/iter (+/- 20) = 161 MB/s +test misc::easy1_32K ... bench: 121,449 ns/iter (+/- 4,899) = 269 MB/s +test misc::hard_1K ... bench: 125,960 ns/iter (+/- 7,255) = 8 MB/s +test misc::hard_1MB ... bench: 134,129,947 ns/iter (+/- 4,797,942) = 7 MB/s +test misc::hard_32 ... bench: 4,044 ns/iter (+/- 227) = 14 MB/s +test misc::hard_32K ... bench: 4,183,228 ns/iter (+/- 127,808) = 7 MB/s +test misc::literal ... bench: 331 ns/iter (+/- 21) = 154 MB/s +test misc::long_needle1 ... bench: 5,715,563 ns/iter (+/- 250,535) = 17 MB/s +test misc::long_needle2 ... bench: 5,779,968 ns/iter (+/- 195,784) = 17 MB/s +test misc::match_class ... bench: 431 ns/iter (+/- 5) = 187 MB/s +test misc::match_class_in_range ... bench: 427 ns/iter (+/- 27) = 189 MB/s +test misc::match_class_unicode ... bench: 1,946 ns/iter (+/- 88) = 82 MB/s +test misc::medium_1K ... bench: 325 ns/iter (+/- 23) = 3236 MB/s +test misc::medium_1MB ... bench: 138,022 ns/iter (+/- 5,142) = 7597 MB/s +test misc::medium_32 ... bench: 182 ns/iter (+/- 7) = 329 MB/s +test misc::medium_32K ... bench: 4,511 ns/iter (+/- 190) = 7270 MB/s +test misc::not_literal ... bench: 436 ns/iter (+/- 25) = 116 MB/s +test misc::one_pass_long_prefix ... bench: 168 ns/iter (+/- 6) = 154 MB/s +test misc::one_pass_long_prefix_not ... bench: 176 ns/iter (+/- 7) = 147 MB/s +test misc::one_pass_short ... bench: 325 ns/iter (+/- 16) = 52 MB/s +test misc::one_pass_short_not ... bench: 322 ns/iter (+/- 21) = 52 MB/s +test misc::reallyhard2_1K ... bench: 289,956 ns/iter (+/- 16,350) = 3 MB/s +test misc::reallyhard_1K ... bench: 126,089 ns/iter (+/- 5,350) = 8 MB/s +test misc::reallyhard_1MB ... bench: 133,197,312 ns/iter (+/- 3,057,491) = 7 MB/s +test misc::reallyhard_32 ... bench: 4,060 ns/iter (+/- 11) = 14 MB/s +test misc::reallyhard_32K ... bench: 4,215,469 ns/iter (+/- 200,526) = 7 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 27,622 ns/iter (+/- 778) = 289 MB/s +test regexdna::find_new_lines ... bench: 30,503,604 ns/iter (+/- 1,120,697) = 166 MB/s +test regexdna::subst1 ... bench: 23,276,552 ns/iter (+/- 1,019,308) = 218 MB/s +test regexdna::subst10 ... bench: 23,199,415 ns/iter (+/- 790,938) = 219 MB/s +test regexdna::subst11 ... bench: 23,138,469 ns/iter (+/- 884,700) = 219 MB/s +test regexdna::subst2 ... bench: 23,076,376 ns/iter (+/- 644,391) = 220 MB/s +test regexdna::subst3 ... bench: 23,115,770 ns/iter (+/- 737,666) = 219 MB/s +test regexdna::subst4 ... bench: 23,093,288 ns/iter (+/- 1,003,519) = 220 MB/s +test regexdna::subst5 ... bench: 23,618,534 ns/iter (+/- 773,260) = 215 MB/s +test regexdna::subst6 ... bench: 23,301,581 ns/iter (+/- 679,681) = 218 MB/s +test regexdna::subst7 ... bench: 23,371,339 ns/iter (+/- 714,433) = 217 MB/s +test regexdna::subst8 ... bench: 23,187,513 ns/iter (+/- 863,031) = 219 MB/s +test regexdna::subst9 ... bench: 23,143,027 ns/iter (+/- 890,422) = 219 MB/s +test regexdna::variant1 ... bench: 104,906,982 ns/iter (+/- 3,391,942) = 48 MB/s +test regexdna::variant2 ... bench: 118,326,728 ns/iter (+/- 3,378,748) = 42 MB/s +test regexdna::variant3 ... bench: 109,348,596 ns/iter (+/- 3,647,056) = 46 MB/s +test regexdna::variant4 ... bench: 104,574,675 ns/iter (+/- 3,236,753) = 48 MB/s +test regexdna::variant5 ... bench: 102,968,132 ns/iter (+/- 2,792,754) = 49 MB/s +test regexdna::variant6 ... bench: 103,783,112 ns/iter (+/- 2,851,581) = 48 MB/s +test regexdna::variant7 ... bench: 103,939,805 ns/iter (+/- 3,118,277) = 48 MB/s +test regexdna::variant8 ... bench: 109,722,594 ns/iter (+/- 3,739,958) = 46 MB/s +test regexdna::variant9 ... bench: 128,702,724 ns/iter (+/- 3,739,103) = 39 MB/s +test sherlock::before_after_holmes ... bench: 39,219,739 ns/iter (+/- 1,622,425) = 15 MB/s +test sherlock::before_holmes ... bench: 37,454,934 ns/iter (+/- 1,055,140) = 15 MB/s +test sherlock::everything_greedy ... bench: 7,341,629 ns/iter (+/- 241,072) = 81 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,298,534 ns/iter (+/- 94,224) = 258 MB/s +test sherlock::ing_suffix ... bench: 18,533,670 ns/iter (+/- 505,855) = 32 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,419,034 ns/iter (+/- 124,616) = 245 MB/s +test sherlock::letters ... bench: 61,910,045 ns/iter (+/- 2,122,755) = 9 MB/s +test sherlock::letters_lower ... bench: 60,831,022 ns/iter (+/- 2,559,720) = 9 MB/s +test sherlock::letters_upper ... bench: 10,747,265 ns/iter (+/- 761,147) = 55 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 215,376 ns/iter (+/- 13,622) = 2762 MB/s +test sherlock::name_alt1 ... bench: 2,282,320 ns/iter (+/- 154,104) = 260 MB/s +test sherlock::name_alt2 ... bench: 2,206,087 ns/iter (+/- 158,376) = 269 MB/s +test sherlock::name_alt3 ... bench: 2,771,932 ns/iter (+/- 181,216) = 214 MB/s +test sherlock::name_alt3_nocase ... bench: 19,198,056 ns/iter (+/- 816,668) = 30 MB/s +test sherlock::name_alt4 ... bench: 2,254,798 ns/iter (+/- 135,379) = 263 MB/s +test sherlock::name_alt4_nocase ... bench: 5,734,254 ns/iter (+/- 411,596) = 103 MB/s +test sherlock::name_alt5 ... bench: 2,276,779 ns/iter (+/- 172,557) = 261 MB/s +test sherlock::name_alt5_nocase ... bench: 7,314,318 ns/iter (+/- 377,963) = 81 MB/s +test sherlock::name_holmes ... bench: 477,888 ns/iter (+/- 37,472) = 1244 MB/s +test sherlock::name_holmes_nocase ... bench: 3,487,005 ns/iter (+/- 278,896) = 170 MB/s +test sherlock::name_sherlock ... bench: 295,313 ns/iter (+/- 16,739) = 2014 MB/s +test sherlock::name_sherlock_holmes ... bench: 216,522 ns/iter (+/- 15,594) = 2747 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 3,480,703 ns/iter (+/- 272,332) = 170 MB/s +test sherlock::name_sherlock_nocase ... bench: 3,511,444 ns/iter (+/- 283,013) = 169 MB/s +test sherlock::name_whitespace ... bench: 304,043 ns/iter (+/- 19,186) = 1956 MB/s +test sherlock::no_match_common ... bench: 632,615 ns/iter (+/- 44,676) = 940 MB/s +test sherlock::no_match_really_common ... bench: 727,565 ns/iter (+/- 54,169) = 817 MB/s +test sherlock::no_match_uncommon ... bench: 624,061 ns/iter (+/- 37,791) = 953 MB/s +test sherlock::quotes ... bench: 3,776,688 ns/iter (+/- 186,393) = 157 MB/s +test sherlock::repeated_class_negation ... bench: 34,354,179 ns/iter (+/- 1,534,267) = 17 MB/s +test sherlock::the_lower ... bench: 1,965,787 ns/iter (+/- 137,099) = 302 MB/s +test sherlock::the_nocase ... bench: 4,853,843 ns/iter (+/- 259,890) = 122 MB/s +test sherlock::the_upper ... bench: 949,071 ns/iter (+/- 66,016) = 626 MB/s +test sherlock::the_whitespace ... bench: 2,173,683 ns/iter (+/- 142,384) = 273 MB/s +test sherlock::word_ending_n ... bench: 19,711,057 ns/iter (+/- 942,152) = 30 MB/s +test sherlock::words ... bench: 21,979,387 ns/iter (+/- 1,250,588) = 27 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured + diff --git a/vendor/regex/record/old-bench-log/05/onig-vs-rust b/vendor/regex/record/old-bench-log/05/onig-vs-rust new file mode 100644 index 0000000..4625e8f --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/onig-vs-rust @@ -0,0 +1,95 @@ + name onig ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 158 (2468 MB/s) 24 (16250 MB/s) -134 -84.81% + misc::anchored_literal_long_non_match 495 (787 MB/s) 27 (14444 MB/s) -468 -94.55% + misc::anchored_literal_short_match 160 (162 MB/s) 22 (1181 MB/s) -138 -86.25% + misc::anchored_literal_short_non_match 44 (590 MB/s) 24 (1083 MB/s) -20 -45.45% + misc::easy0_1K 315 (3336 MB/s) 16 (65687 MB/s) -299 -94.92% + misc::easy0_1MB 136,864 (7661 MB/s) 20 (52430150 MB/s) -136,844 -99.99% + misc::easy0_32 163 (361 MB/s) 16 (3687 MB/s) -147 -90.18% + misc::easy0_32K 4,562 (7188 MB/s) 16 (2049687 MB/s) -4,546 -99.65% + misc::easy1_1K 3,947 (264 MB/s) 48 (21750 MB/s) -3,899 -98.78% + misc::easy1_1MB 3,920,564 (267 MB/s) 48 (21845750 MB/s) -3,920,516 -100.00% + misc::easy1_32 321 (161 MB/s) 46 (1130 MB/s) -275 -85.67% + misc::easy1_32K 121,449 (269 MB/s) 47 (697617 MB/s) -121,402 -99.96% + misc::hard_1K 125,960 (8 MB/s) 58 (18120 MB/s) -125,902 -99.95% + misc::hard_1MB 134,129,947 (7 MB/s) 61 (17190213 MB/s) -134,129,886 -100.00% + misc::hard_32 4,044 (14 MB/s) 58 (1017 MB/s) -3,986 -98.57% + misc::hard_32K 4,183,228 (7 MB/s) 56 (585625 MB/s) -4,183,172 -100.00% + misc::literal 331 (154 MB/s) 16 (3187 MB/s) -315 -95.17% + misc::long_needle1 5,715,563 (17 MB/s) 2,226 (44924 MB/s) -5,713,337 -99.96% + misc::long_needle2 5,779,968 (17 MB/s) 576,997 (173 MB/s) -5,202,971 -90.02% + misc::match_class 431 (187 MB/s) 65 (1246 MB/s) -366 -84.92% + misc::match_class_in_range 427 (189 MB/s) 27 (3000 MB/s) -400 -93.68% + misc::match_class_unicode 1,946 (82 MB/s) 283 (568 MB/s) -1,663 -85.46% + misc::medium_1K 325 (3236 MB/s) 16 (65750 MB/s) -309 -95.08% + misc::medium_1MB 138,022 (7597 MB/s) 21 (49933523 MB/s) -138,001 -99.98% + misc::medium_32 182 (329 MB/s) 17 (3529 MB/s) -165 -90.66% + misc::medium_32K 4,511 (7270 MB/s) 17 (1929176 MB/s) -4,494 -99.62% + misc::not_literal 436 (116 MB/s) 105 (485 MB/s) -331 -75.92% + misc::one_pass_long_prefix 168 (154 MB/s) 68 (382 MB/s) -100 -59.52% + misc::one_pass_long_prefix_not 176 (147 MB/s) 58 (448 MB/s) -118 -67.05% + misc::one_pass_short 325 (52 MB/s) 45 (377 MB/s) -280 -86.15% + misc::one_pass_short_not 322 (52 MB/s) 50 (340 MB/s) -272 -84.47% + misc::reallyhard2_1K 289,956 (3 MB/s) 83 (12530 MB/s) -289,873 -99.97% + misc::reallyhard_1K 126,089 (8 MB/s) 1,822 (576 MB/s) -124,267 -98.55% + misc::reallyhard_1MB 133,197,312 (7 MB/s) 1,768,327 (592 MB/s) -131,428,985 -98.67% + misc::reallyhard_32 4,060 (14 MB/s) 121 (487 MB/s) -3,939 -97.02% + misc::reallyhard_32K 4,215,469 (7 MB/s) 56,375 (581 MB/s) -4,159,094 -98.66% + misc::reverse_suffix_no_quadratic 27,622 (289 MB/s) 5,803 (1378 MB/s) -21,819 -78.99% + regexdna::find_new_lines 30,503,604 (166 MB/s) 14,818,233 (343 MB/s) -15,685,371 -51.42% + regexdna::subst1 23,276,552 (218 MB/s) 896,790 (5668 MB/s) -22,379,762 -96.15% + regexdna::subst10 23,199,415 (219 MB/s) 957,325 (5310 MB/s) -22,242,090 -95.87% + regexdna::subst11 23,138,469 (219 MB/s) 917,248 (5542 MB/s) -22,221,221 -96.04% + regexdna::subst2 23,076,376 (220 MB/s) 892,129 (5698 MB/s) -22,184,247 -96.13% + regexdna::subst3 23,115,770 (219 MB/s) 929,250 (5470 MB/s) -22,186,520 -95.98% + regexdna::subst4 23,093,288 (220 MB/s) 872,581 (5825 MB/s) -22,220,707 -96.22% + regexdna::subst5 23,618,534 (215 MB/s) 875,804 (5804 MB/s) -22,742,730 -96.29% + regexdna::subst6 23,301,581 (218 MB/s) 884,639 (5746 MB/s) -22,416,942 -96.20% + regexdna::subst7 23,371,339 (217 MB/s) 872,791 (5824 MB/s) -22,498,548 -96.27% + regexdna::subst8 23,187,513 (219 MB/s) 873,833 (5817 MB/s) -22,313,680 -96.23% + regexdna::subst9 23,143,027 (219 MB/s) 886,744 (5732 MB/s) -22,256,283 -96.17% + regexdna::variant1 104,906,982 (48 MB/s) 3,699,267 (1374 MB/s) -101,207,715 -96.47% + regexdna::variant2 118,326,728 (42 MB/s) 6,760,952 (751 MB/s) -111,565,776 -94.29% + regexdna::variant3 109,348,596 (46 MB/s) 8,030,646 (633 MB/s) -101,317,950 -92.66% + regexdna::variant4 104,574,675 (48 MB/s) 8,077,290 (629 MB/s) -96,497,385 -92.28% + regexdna::variant5 102,968,132 (49 MB/s) 6,787,242 (748 MB/s) -96,180,890 -93.41% + regexdna::variant6 103,783,112 (48 MB/s) 6,577,777 (772 MB/s) -97,205,335 -93.66% + regexdna::variant7 103,939,805 (48 MB/s) 6,705,580 (758 MB/s) -97,234,225 -93.55% + regexdna::variant8 109,722,594 (46 MB/s) 6,818,785 (745 MB/s) -102,903,809 -93.79% + regexdna::variant9 128,702,724 (39 MB/s) 6,821,453 (745 MB/s) -121,881,271 -94.70% + sherlock::before_after_holmes 39,219,739 (15 MB/s) 1,029,866 (577 MB/s) -38,189,873 -97.37% + sherlock::before_holmes 37,454,934 (15 MB/s) 76,633 (7763 MB/s) -37,378,301 -99.80% + sherlock::everything_greedy 7,341,629 (81 MB/s) 2,375,079 (250 MB/s) -4,966,550 -67.65% + sherlock::holmes_cochar_watson 2,298,534 (258 MB/s) 144,725 (4110 MB/s) -2,153,809 -93.70% + sherlock::ing_suffix 18,533,670 (32 MB/s) 436,202 (1363 MB/s) -18,097,468 -97.65% + sherlock::ing_suffix_limited_space 2,419,034 (245 MB/s) 1,182,943 (502 MB/s) -1,236,091 -51.10% + sherlock::letters 61,910,045 (9 MB/s) 24,390,452 (24 MB/s) -37,519,593 -60.60% + sherlock::letters_lower 60,831,022 (9 MB/s) 23,784,108 (25 MB/s) -37,046,914 -60.90% + sherlock::letters_upper 10,747,265 (55 MB/s) 1,993,838 (298 MB/s) -8,753,427 -81.45% + sherlock::line_boundary_sherlock_holmes 215,376 (2762 MB/s) 999,414 (595 MB/s) 784,038 364.03% + sherlock::name_alt1 2,282,320 (260 MB/s) 34,298 (17345 MB/s) -2,248,022 -98.50% + sherlock::name_alt2 2,206,087 (269 MB/s) 124,226 (4789 MB/s) -2,081,861 -94.37% + sherlock::name_alt3 2,771,932 (214 MB/s) 137,742 (4319 MB/s) -2,634,190 -95.03% + sherlock::name_alt3_nocase 19,198,056 (30 MB/s) 1,293,763 (459 MB/s) -17,904,293 -93.26% + sherlock::name_alt4 2,254,798 (263 MB/s) 164,900 (3607 MB/s) -2,089,898 -92.69% + sherlock::name_alt4_nocase 5,734,254 (103 MB/s) 235,023 (2531 MB/s) -5,499,231 -95.90% + sherlock::name_alt5 2,276,779 (261 MB/s) 127,928 (4650 MB/s) -2,148,851 -94.38% + sherlock::name_alt5_nocase 7,314,318 (81 MB/s) 659,591 (901 MB/s) -6,654,727 -90.98% + sherlock::name_holmes 477,888 (1244 MB/s) 40,902 (14545 MB/s) -436,986 -91.44% + sherlock::name_holmes_nocase 3,487,005 (170 MB/s) 198,658 (2994 MB/s) -3,288,347 -94.30% + sherlock::name_sherlock 295,313 (2014 MB/s) 68,924 (8631 MB/s) -226,389 -76.66% + sherlock::name_sherlock_holmes 216,522 (2747 MB/s) 31,640 (18803 MB/s) -184,882 -85.39% + sherlock::name_sherlock_holmes_nocase 3,480,703 (170 MB/s) 173,522 (3428 MB/s) -3,307,181 -95.01% + sherlock::name_sherlock_nocase 3,511,444 (169 MB/s) 170,888 (3481 MB/s) -3,340,556 -95.13% + sherlock::name_whitespace 304,043 (1956 MB/s) 84,314 (7056 MB/s) -219,729 -72.27% + sherlock::no_match_common 632,615 (940 MB/s) 20,727 (28703 MB/s) -611,888 -96.72% + sherlock::no_match_really_common 727,565 (817 MB/s) 381,476 (1559 MB/s) -346,089 -47.57% + sherlock::no_match_uncommon 624,061 (953 MB/s) 20,786 (28621 MB/s) -603,275 -96.67% + sherlock::quotes 3,776,688 (157 MB/s) 531,487 (1119 MB/s) -3,245,201 -85.93% + sherlock::repeated_class_negation 34,354,179 (17 MB/s) 85,881,944 (6 MB/s) 51,527,765 149.99% + sherlock::the_lower 1,965,787 (302 MB/s) 654,110 (909 MB/s) -1,311,677 -66.73% + sherlock::the_nocase 4,853,843 (122 MB/s) 474,456 (1253 MB/s) -4,379,387 -90.23% + sherlock::the_upper 949,071 (626 MB/s) 43,746 (13599 MB/s) -905,325 -95.39% + sherlock::the_whitespace 2,173,683 (273 MB/s) 1,181,974 (503 MB/s) -991,709 -45.62% + sherlock::word_ending_n 19,711,057 (30 MB/s) 1,925,578 (308 MB/s) -17,785,479 -90.23% + sherlock::words 21,979,387 (27 MB/s) 9,697,201 (61 MB/s) -12,282,186 -55.88% diff --git a/vendor/regex/record/old-bench-log/05/pcre1 b/vendor/regex/record/old-bench-log/05/pcre1 new file mode 100644 index 0000000..51af361 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 30 ns/iter (+/- 0) = 13000 MB/s +test misc::anchored_literal_long_non_match ... bench: 24 ns/iter (+/- 1) = 16250 MB/s +test misc::anchored_literal_short_match ... bench: 29 ns/iter (+/- 1) = 896 MB/s +test misc::anchored_literal_short_non_match ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::easy0_1K ... bench: 260 ns/iter (+/- 15) = 4042 MB/s +test misc::easy0_1MB ... bench: 202,849 ns/iter (+/- 7,973) = 5169 MB/s +test misc::easy0_32 ... bench: 47 ns/iter (+/- 3) = 1255 MB/s +test misc::easy0_32K ... bench: 6,378 ns/iter (+/- 236) = 5141 MB/s +test misc::easy1_1K ... bench: 248 ns/iter (+/- 15) = 4209 MB/s +test misc::easy1_1MB ... bench: 203,105 ns/iter (+/- 7,590) = 5162 MB/s +test misc::easy1_32 ... bench: 51 ns/iter (+/- 1) = 1019 MB/s +test misc::easy1_32K ... bench: 6,508 ns/iter (+/- 160) = 5038 MB/s +test misc::hard_1K ... bench: 1,324 ns/iter (+/- 46) = 793 MB/s +test misc::hard_1MB ... bench: 1,134,691 ns/iter (+/- 41,296) = 924 MB/s +test misc::hard_32 ... bench: 113 ns/iter (+/- 13) = 522 MB/s +test misc::hard_32K ... bench: 42,269 ns/iter (+/- 2,298) = 775 MB/s +test misc::literal ... bench: 28 ns/iter (+/- 0) = 1821 MB/s +test misc::long_needle1 ... bench: 547,122 ns/iter (+/- 34,029) = 182 MB/s +test misc::long_needle2 ... bench: 546,018 ns/iter (+/- 24,721) = 183 MB/s +test misc::match_class ... bench: 97 ns/iter (+/- 5) = 835 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 1) = 2700 MB/s +test misc::match_class_unicode ... bench: 343 ns/iter (+/- 2) = 469 MB/s +test misc::medium_1K ... bench: 253 ns/iter (+/- 15) = 4158 MB/s +test misc::medium_1MB ... bench: 202,025 ns/iter (+/- 11,252) = 5190 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 2) = 1176 MB/s +test misc::medium_32K ... bench: 6,406 ns/iter (+/- 318) = 5119 MB/s +test misc::not_literal ... bench: 169 ns/iter (+/- 6) = 301 MB/s +test misc::one_pass_long_prefix ... bench: 28 ns/iter (+/- 1) = 928 MB/s +test misc::one_pass_long_prefix_not ... bench: 28 ns/iter (+/- 0) = 928 MB/s +test misc::one_pass_short ... bench: 54 ns/iter (+/- 0) = 314 MB/s +test misc::one_pass_short_not ... bench: 55 ns/iter (+/- 3) = 309 MB/s +test misc::reallyhard2_1K ... bench: 4,664 ns/iter (+/- 123) = 222 MB/s +test misc::reallyhard_1K ... bench: 1,595 ns/iter (+/- 34) = 658 MB/s +test misc::reallyhard_1MB ... bench: 1,377,542 ns/iter (+/- 2,203) = 761 MB/s +test misc::reallyhard_32 ... bench: 106 ns/iter (+/- 2) = 556 MB/s +test misc::reallyhard_32K ... bench: 43,256 ns/iter (+/- 1,230) = 758 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,607 ns/iter (+/- 68) = 1736 MB/s +test regexdna::find_new_lines ... bench: 2,840,298 ns/iter (+/- 128,040) = 1789 MB/s +test regexdna::subst1 ... bench: 1,284,283 ns/iter (+/- 39,986) = 3958 MB/s +test regexdna::subst10 ... bench: 1,269,531 ns/iter (+/- 63,116) = 4004 MB/s +test regexdna::subst11 ... bench: 1,286,171 ns/iter (+/- 49,256) = 3952 MB/s +test regexdna::subst2 ... bench: 1,303,022 ns/iter (+/- 1,553) = 3901 MB/s +test regexdna::subst3 ... bench: 1,295,961 ns/iter (+/- 57,880) = 3922 MB/s +test regexdna::subst4 ... bench: 1,313,706 ns/iter (+/- 2,115) = 3869 MB/s +test regexdna::subst5 ... bench: 1,286,339 ns/iter (+/- 2,093) = 3951 MB/s +test regexdna::subst6 ... bench: 1,385,644 ns/iter (+/- 3,387) = 3668 MB/s +test regexdna::subst7 ... bench: 1,286,743 ns/iter (+/- 2,339) = 3950 MB/s +test regexdna::subst8 ... bench: 1,306,406 ns/iter (+/- 1,686) = 3891 MB/s +test regexdna::subst9 ... bench: 1,280,365 ns/iter (+/- 52,649) = 3970 MB/s +test regexdna::variant1 ... bench: 15,271,875 ns/iter (+/- 510,399) = 332 MB/s +test regexdna::variant2 ... bench: 16,704,090 ns/iter (+/- 446,145) = 304 MB/s +test regexdna::variant3 ... bench: 20,745,546 ns/iter (+/- 500,573) = 245 MB/s +test regexdna::variant4 ... bench: 19,285,154 ns/iter (+/- 543,793) = 263 MB/s +test regexdna::variant5 ... bench: 17,234,130 ns/iter (+/- 291,232) = 294 MB/s +test regexdna::variant6 ... bench: 17,462,350 ns/iter (+/- 510,036) = 291 MB/s +test regexdna::variant7 ... bench: 19,671,680 ns/iter (+/- 562,610) = 258 MB/s +test regexdna::variant8 ... bench: 24,515,319 ns/iter (+/- 725,298) = 207 MB/s +test regexdna::variant9 ... bench: 22,623,755 ns/iter (+/- 637,538) = 224 MB/s +test sherlock::before_after_holmes ... bench: 4,510,830 ns/iter (+/- 170,864) = 131 MB/s +test sherlock::before_holmes ... bench: 4,706,836 ns/iter (+/- 186,202) = 126 MB/s +test sherlock::holmes_cochar_watson ... bench: 523,122 ns/iter (+/- 988) = 1137 MB/s +test sherlock::ing_suffix ... bench: 2,030,438 ns/iter (+/- 9,228) = 293 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,996,956 ns/iter (+/- 197,705) = 119 MB/s +test sherlock::letters ... bench: 13,529,105 ns/iter (+/- 496,645) = 43 MB/s +test sherlock::letters_lower ... bench: 13,681,607 ns/iter (+/- 448,932) = 43 MB/s +test sherlock::letters_upper ... bench: 1,904,757 ns/iter (+/- 94,484) = 312 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 207,695 ns/iter (+/- 8,892) = 2864 MB/s +test sherlock::name_alt1 ... bench: 486,857 ns/iter (+/- 21,004) = 1221 MB/s +test sherlock::name_alt2 ... bench: 483,926 ns/iter (+/- 26,860) = 1229 MB/s +test sherlock::name_alt3 ... bench: 978,827 ns/iter (+/- 43,851) = 607 MB/s +test sherlock::name_alt3_nocase ... bench: 2,986,143 ns/iter (+/- 78,155) = 199 MB/s +test sherlock::name_alt4 ... bench: 78,104 ns/iter (+/- 4,056) = 7617 MB/s +test sherlock::name_alt4_nocase ... bench: 1,638,351 ns/iter (+/- 62,542) = 363 MB/s +test sherlock::name_alt5 ... bench: 685,723 ns/iter (+/- 26,092) = 867 MB/s +test sherlock::name_alt5_nocase ... bench: 1,817,760 ns/iter (+/- 80,781) = 327 MB/s +test sherlock::name_holmes ... bench: 411,102 ns/iter (+/- 1,887) = 1447 MB/s +test sherlock::name_holmes_nocase ... bench: 516,003 ns/iter (+/- 2,295) = 1152 MB/s +test sherlock::name_sherlock ... bench: 284,300 ns/iter (+/- 1,117) = 2092 MB/s +test sherlock::name_sherlock_holmes ... bench: 209,139 ns/iter (+/- 380) = 2844 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,118,324 ns/iter (+/- 1,654) = 531 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,126,992 ns/iter (+/- 1,180) = 527 MB/s +test sherlock::name_whitespace ... bench: 284,672 ns/iter (+/- 510) = 2089 MB/s +test sherlock::no_match_common ... bench: 439,955 ns/iter (+/- 939) = 1352 MB/s +test sherlock::no_match_really_common ... bench: 439,266 ns/iter (+/- 3,751) = 1354 MB/s +test sherlock::no_match_uncommon ... bench: 28,872 ns/iter (+/- 31) = 20605 MB/s +test sherlock::quotes ... bench: 522,877 ns/iter (+/- 32,723) = 1137 MB/s +test sherlock::repeated_class_negation ... bench: 5,997,745 ns/iter (+/- 209,544) = 99 MB/s +test sherlock::the_lower ... bench: 747,234 ns/iter (+/- 43,110) = 796 MB/s +test sherlock::the_nocase ... bench: 802,320 ns/iter (+/- 27,715) = 741 MB/s +test sherlock::the_upper ... bench: 58,163 ns/iter (+/- 2,202) = 10228 MB/s +test sherlock::the_whitespace ... bench: 920,781 ns/iter (+/- 30,609) = 646 MB/s +test sherlock::word_ending_n ... bench: 5,703,864 ns/iter (+/- 191,007) = 104 MB/s +test sherlock::words ... bench: 6,786,318 ns/iter (+/- 168,049) = 87 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured + diff --git a/vendor/regex/record/old-bench-log/05/pcre1-vs-rust b/vendor/regex/record/old-bench-log/05/pcre1-vs-rust new file mode 100644 index 0000000..1d8c0d6 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre1-vs-rust @@ -0,0 +1,94 @@ + name pcre1 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 30 (13000 MB/s) 24 (16250 MB/s) -6 -20.00% + misc::anchored_literal_long_non_match 24 (16250 MB/s) 27 (14444 MB/s) 3 12.50% + misc::anchored_literal_short_match 29 (896 MB/s) 22 (1181 MB/s) -7 -24.14% + misc::anchored_literal_short_non_match 24 (1083 MB/s) 24 (1083 MB/s) 0 0.00% + misc::easy0_1K 260 (4042 MB/s) 16 (65687 MB/s) -244 -93.85% + misc::easy0_1MB 202,849 (5169 MB/s) 20 (52430150 MB/s) -202,829 -99.99% + misc::easy0_32 47 (1255 MB/s) 16 (3687 MB/s) -31 -65.96% + misc::easy0_32K 6,378 (5141 MB/s) 16 (2049687 MB/s) -6,362 -99.75% + misc::easy1_1K 248 (4209 MB/s) 48 (21750 MB/s) -200 -80.65% + misc::easy1_1MB 203,105 (5162 MB/s) 48 (21845750 MB/s) -203,057 -99.98% + misc::easy1_32 51 (1019 MB/s) 46 (1130 MB/s) -5 -9.80% + misc::easy1_32K 6,508 (5038 MB/s) 47 (697617 MB/s) -6,461 -99.28% + misc::hard_1K 1,324 (793 MB/s) 58 (18120 MB/s) -1,266 -95.62% + misc::hard_1MB 1,134,691 (924 MB/s) 61 (17190213 MB/s) -1,134,630 -99.99% + misc::hard_32 113 (522 MB/s) 58 (1017 MB/s) -55 -48.67% + misc::hard_32K 42,269 (775 MB/s) 56 (585625 MB/s) -42,213 -99.87% + misc::literal 28 (1821 MB/s) 16 (3187 MB/s) -12 -42.86% + misc::long_needle1 547,122 (182 MB/s) 2,226 (44924 MB/s) -544,896 -99.59% + misc::long_needle2 546,018 (183 MB/s) 576,997 (173 MB/s) 30,979 5.67% + misc::match_class 97 (835 MB/s) 65 (1246 MB/s) -32 -32.99% + misc::match_class_in_range 30 (2700 MB/s) 27 (3000 MB/s) -3 -10.00% + misc::match_class_unicode 343 (469 MB/s) 283 (568 MB/s) -60 -17.49% + misc::medium_1K 253 (4158 MB/s) 16 (65750 MB/s) -237 -93.68% + misc::medium_1MB 202,025 (5190 MB/s) 21 (49933523 MB/s) -202,004 -99.99% + misc::medium_32 51 (1176 MB/s) 17 (3529 MB/s) -34 -66.67% + misc::medium_32K 6,406 (5119 MB/s) 17 (1929176 MB/s) -6,389 -99.73% + misc::not_literal 169 (301 MB/s) 105 (485 MB/s) -64 -37.87% + misc::one_pass_long_prefix 28 (928 MB/s) 68 (382 MB/s) 40 142.86% + misc::one_pass_long_prefix_not 28 (928 MB/s) 58 (448 MB/s) 30 107.14% + misc::one_pass_short 54 (314 MB/s) 45 (377 MB/s) -9 -16.67% + misc::one_pass_short_not 55 (309 MB/s) 50 (340 MB/s) -5 -9.09% + misc::reallyhard2_1K 4,664 (222 MB/s) 83 (12530 MB/s) -4,581 -98.22% + misc::reallyhard_1K 1,595 (658 MB/s) 1,822 (576 MB/s) 227 14.23% + misc::reallyhard_1MB 1,377,542 (761 MB/s) 1,768,327 (592 MB/s) 390,785 28.37% + misc::reallyhard_32 106 (556 MB/s) 121 (487 MB/s) 15 14.15% + misc::reallyhard_32K 43,256 (758 MB/s) 56,375 (581 MB/s) 13,119 30.33% + misc::reverse_suffix_no_quadratic 4,607 (1736 MB/s) 5,803 (1378 MB/s) 1,196 25.96% + regexdna::find_new_lines 2,840,298 (1789 MB/s) 14,818,233 (343 MB/s) 11,977,935 421.71% + regexdna::subst1 1,284,283 (3958 MB/s) 896,790 (5668 MB/s) -387,493 -30.17% + regexdna::subst10 1,269,531 (4004 MB/s) 957,325 (5310 MB/s) -312,206 -24.59% + regexdna::subst11 1,286,171 (3952 MB/s) 917,248 (5542 MB/s) -368,923 -28.68% + regexdna::subst2 1,303,022 (3901 MB/s) 892,129 (5698 MB/s) -410,893 -31.53% + regexdna::subst3 1,295,961 (3922 MB/s) 929,250 (5470 MB/s) -366,711 -28.30% + regexdna::subst4 1,313,706 (3869 MB/s) 872,581 (5825 MB/s) -441,125 -33.58% + regexdna::subst5 1,286,339 (3951 MB/s) 875,804 (5804 MB/s) -410,535 -31.91% + regexdna::subst6 1,385,644 (3668 MB/s) 884,639 (5746 MB/s) -501,005 -36.16% + regexdna::subst7 1,286,743 (3950 MB/s) 872,791 (5824 MB/s) -413,952 -32.17% + regexdna::subst8 1,306,406 (3891 MB/s) 873,833 (5817 MB/s) -432,573 -33.11% + regexdna::subst9 1,280,365 (3970 MB/s) 886,744 (5732 MB/s) -393,621 -30.74% + regexdna::variant1 15,271,875 (332 MB/s) 3,699,267 (1374 MB/s) -11,572,608 -75.78% + regexdna::variant2 16,704,090 (304 MB/s) 6,760,952 (751 MB/s) -9,943,138 -59.53% + regexdna::variant3 20,745,546 (245 MB/s) 8,030,646 (633 MB/s) -12,714,900 -61.29% + regexdna::variant4 19,285,154 (263 MB/s) 8,077,290 (629 MB/s) -11,207,864 -58.12% + regexdna::variant5 17,234,130 (294 MB/s) 6,787,242 (748 MB/s) -10,446,888 -60.62% + regexdna::variant6 17,462,350 (291 MB/s) 6,577,777 (772 MB/s) -10,884,573 -62.33% + regexdna::variant7 19,671,680 (258 MB/s) 6,705,580 (758 MB/s) -12,966,100 -65.91% + regexdna::variant8 24,515,319 (207 MB/s) 6,818,785 (745 MB/s) -17,696,534 -72.19% + regexdna::variant9 22,623,755 (224 MB/s) 6,821,453 (745 MB/s) -15,802,302 -69.85% + sherlock::before_after_holmes 4,510,830 (131 MB/s) 1,029,866 (577 MB/s) -3,480,964 -77.17% + sherlock::before_holmes 4,706,836 (126 MB/s) 76,633 (7763 MB/s) -4,630,203 -98.37% + sherlock::holmes_cochar_watson 523,122 (1137 MB/s) 144,725 (4110 MB/s) -378,397 -72.33% + sherlock::ing_suffix 2,030,438 (293 MB/s) 436,202 (1363 MB/s) -1,594,236 -78.52% + sherlock::ing_suffix_limited_space 4,996,956 (119 MB/s) 1,182,943 (502 MB/s) -3,814,013 -76.33% + sherlock::letters 13,529,105 (43 MB/s) 24,390,452 (24 MB/s) 10,861,347 80.28% + sherlock::letters_lower 13,681,607 (43 MB/s) 23,784,108 (25 MB/s) 10,102,501 73.84% + sherlock::letters_upper 1,904,757 (312 MB/s) 1,993,838 (298 MB/s) 89,081 4.68% + sherlock::line_boundary_sherlock_holmes 207,695 (2864 MB/s) 999,414 (595 MB/s) 791,719 381.19% + sherlock::name_alt1 486,857 (1221 MB/s) 34,298 (17345 MB/s) -452,559 -92.96% + sherlock::name_alt2 483,926 (1229 MB/s) 124,226 (4789 MB/s) -359,700 -74.33% + sherlock::name_alt3 978,827 (607 MB/s) 137,742 (4319 MB/s) -841,085 -85.93% + sherlock::name_alt3_nocase 2,986,143 (199 MB/s) 1,293,763 (459 MB/s) -1,692,380 -56.67% + sherlock::name_alt4 78,104 (7617 MB/s) 164,900 (3607 MB/s) 86,796 111.13% + sherlock::name_alt4_nocase 1,638,351 (363 MB/s) 235,023 (2531 MB/s) -1,403,328 -85.65% + sherlock::name_alt5 685,723 (867 MB/s) 127,928 (4650 MB/s) -557,795 -81.34% + sherlock::name_alt5_nocase 1,817,760 (327 MB/s) 659,591 (901 MB/s) -1,158,169 -63.71% + sherlock::name_holmes 411,102 (1447 MB/s) 40,902 (14545 MB/s) -370,200 -90.05% + sherlock::name_holmes_nocase 516,003 (1152 MB/s) 198,658 (2994 MB/s) -317,345 -61.50% + sherlock::name_sherlock 284,300 (2092 MB/s) 68,924 (8631 MB/s) -215,376 -75.76% + sherlock::name_sherlock_holmes 209,139 (2844 MB/s) 31,640 (18803 MB/s) -177,499 -84.87% + sherlock::name_sherlock_holmes_nocase 1,118,324 (531 MB/s) 173,522 (3428 MB/s) -944,802 -84.48% + sherlock::name_sherlock_nocase 1,126,992 (527 MB/s) 170,888 (3481 MB/s) -956,104 -84.84% + sherlock::name_whitespace 284,672 (2089 MB/s) 84,314 (7056 MB/s) -200,358 -70.38% + sherlock::no_match_common 439,955 (1352 MB/s) 20,727 (28703 MB/s) -419,228 -95.29% + sherlock::no_match_really_common 439,266 (1354 MB/s) 381,476 (1559 MB/s) -57,790 -13.16% + sherlock::no_match_uncommon 28,872 (20605 MB/s) 20,786 (28621 MB/s) -8,086 -28.01% + sherlock::quotes 522,877 (1137 MB/s) 531,487 (1119 MB/s) 8,610 1.65% + sherlock::repeated_class_negation 5,997,745 (99 MB/s) 85,881,944 (6 MB/s) 79,884,199 1331.90% + sherlock::the_lower 747,234 (796 MB/s) 654,110 (909 MB/s) -93,124 -12.46% + sherlock::the_nocase 802,320 (741 MB/s) 474,456 (1253 MB/s) -327,864 -40.86% + sherlock::the_upper 58,163 (10228 MB/s) 43,746 (13599 MB/s) -14,417 -24.79% + sherlock::the_whitespace 920,781 (646 MB/s) 1,181,974 (503 MB/s) 261,193 28.37% + sherlock::word_ending_n 5,703,864 (104 MB/s) 1,925,578 (308 MB/s) -3,778,286 -66.24% + sherlock::words 6,786,318 (87 MB/s) 9,697,201 (61 MB/s) 2,910,883 42.89% diff --git a/vendor/regex/record/old-bench-log/05/pcre2 b/vendor/regex/record/old-bench-log/05/pcre2 new file mode 100644 index 0000000..76b3242 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_long_non_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_short_match ... bench: 19 ns/iter (+/- 1) = 1368 MB/s +test misc::anchored_literal_short_non_match ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::easy0_1K ... bench: 241 ns/iter (+/- 9) = 4360 MB/s +test misc::easy0_1MB ... bench: 207,103 ns/iter (+/- 8,557) = 5063 MB/s +test misc::easy0_32 ... bench: 39 ns/iter (+/- 0) = 1512 MB/s +test misc::easy0_32K ... bench: 6,522 ns/iter (+/- 20) = 5028 MB/s +test misc::easy1_1K ... bench: 247 ns/iter (+/- 3) = 4226 MB/s +test misc::easy1_1MB ... bench: 206,893 ns/iter (+/- 9,489) = 5068 MB/s +test misc::easy1_32 ... bench: 41 ns/iter (+/- 0) = 1268 MB/s +test misc::easy1_32K ... bench: 6,516 ns/iter (+/- 301) = 5031 MB/s +test misc::hard_1K ... bench: 1,566 ns/iter (+/- 79) = 671 MB/s +test misc::hard_1MB ... bench: 1,119,234 ns/iter (+/- 38,605) = 936 MB/s +test misc::hard_32 ... bench: 95 ns/iter (+/- 4) = 621 MB/s +test misc::hard_32K ... bench: 34,411 ns/iter (+/- 1,542) = 953 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 0) = 2833 MB/s +test misc::long_needle1 ... bench: 550,340 ns/iter (+/- 30,668) = 181 MB/s +test misc::long_needle2 ... bench: 553,056 ns/iter (+/- 25,618) = 180 MB/s +test misc::match_class ... bench: 82 ns/iter (+/- 1) = 987 MB/s +test misc::match_class_in_range ... bench: 20 ns/iter (+/- 1) = 4050 MB/s +test misc::match_class_unicode ... bench: 351 ns/iter (+/- 14) = 458 MB/s +test misc::medium_1K ... bench: 242 ns/iter (+/- 13) = 4347 MB/s +test misc::medium_1MB ... bench: 207,290 ns/iter (+/- 1,458) = 5058 MB/s +test misc::medium_32 ... bench: 41 ns/iter (+/- 0) = 1463 MB/s +test misc::medium_32K ... bench: 6,529 ns/iter (+/- 293) = 5023 MB/s +test misc::not_literal ... bench: 161 ns/iter (+/- 7) = 316 MB/s +test misc::one_pass_long_prefix ... bench: 17 ns/iter (+/- 1) = 1529 MB/s +test misc::one_pass_long_prefix_not ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::one_pass_short ... bench: 45 ns/iter (+/- 2) = 377 MB/s +test misc::one_pass_short_not ... bench: 49 ns/iter (+/- 2) = 346 MB/s +test misc::reallyhard2_1K ... bench: 4,487 ns/iter (+/- 190) = 231 MB/s +test misc::reallyhard_1K ... bench: 1,260 ns/iter (+/- 46) = 834 MB/s +test misc::reallyhard_1MB ... bench: 1,361,796 ns/iter (+/- 46,490) = 770 MB/s +test misc::reallyhard_32 ... bench: 93 ns/iter (+/- 8) = 634 MB/s +test misc::reallyhard_32K ... bench: 42,503 ns/iter (+/- 1,721) = 771 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,559 ns/iter (+/- 171) = 1754 MB/s +test regexdna::find_new_lines ... bench: 1,887,325 ns/iter (+/- 74,026) = 2693 MB/s +test regexdna::subst1 ... bench: 963,089 ns/iter (+/- 3,478) = 5278 MB/s +test regexdna::subst10 ... bench: 968,221 ns/iter (+/- 5,406) = 5250 MB/s +test regexdna::subst11 ... bench: 961,661 ns/iter (+/- 45,597) = 5286 MB/s +test regexdna::subst2 ... bench: 956,430 ns/iter (+/- 32,654) = 5314 MB/s +test regexdna::subst3 ... bench: 961,204 ns/iter (+/- 48,799) = 5288 MB/s +test regexdna::subst4 ... bench: 961,897 ns/iter (+/- 50,762) = 5284 MB/s +test regexdna::subst5 ... bench: 953,412 ns/iter (+/- 69,554) = 5331 MB/s +test regexdna::subst6 ... bench: 962,362 ns/iter (+/- 42,136) = 5282 MB/s +test regexdna::subst7 ... bench: 961,694 ns/iter (+/- 100,348) = 5285 MB/s +test regexdna::subst8 ... bench: 963,230 ns/iter (+/- 10,882) = 5277 MB/s +test regexdna::subst9 ... bench: 960,246 ns/iter (+/- 27,407) = 5293 MB/s +test regexdna::variant1 ... bench: 15,553,281 ns/iter (+/- 566,810) = 326 MB/s +test regexdna::variant2 ... bench: 16,563,452 ns/iter (+/- 546,097) = 306 MB/s +test regexdna::variant3 ... bench: 20,405,916 ns/iter (+/- 809,236) = 249 MB/s +test regexdna::variant4 ... bench: 19,489,291 ns/iter (+/- 710,721) = 260 MB/s +test regexdna::variant5 ... bench: 17,406,769 ns/iter (+/- 656,024) = 292 MB/s +test regexdna::variant6 ... bench: 17,412,027 ns/iter (+/- 730,347) = 291 MB/s +test regexdna::variant7 ... bench: 19,509,193 ns/iter (+/- 783,850) = 260 MB/s +test regexdna::variant8 ... bench: 24,295,734 ns/iter (+/- 816,832) = 209 MB/s +test regexdna::variant9 ... bench: 22,541,558 ns/iter (+/- 783,104) = 225 MB/s +test sherlock::before_after_holmes ... bench: 4,583,804 ns/iter (+/- 124,057) = 129 MB/s +test sherlock::before_holmes ... bench: 4,640,546 ns/iter (+/- 241,311) = 128 MB/s +test sherlock::holmes_cochar_watson ... bench: 509,088 ns/iter (+/- 25,069) = 1168 MB/s +test sherlock::ing_suffix ... bench: 1,865,631 ns/iter (+/- 68,625) = 318 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,922,883 ns/iter (+/- 232,825) = 120 MB/s +test sherlock::letters ... bench: 9,848,144 ns/iter (+/- 206,915) = 60 MB/s +test sherlock::letters_lower ... bench: 9,723,642 ns/iter (+/- 370,000) = 61 MB/s +test sherlock::letters_upper ... bench: 1,762,773 ns/iter (+/- 86,671) = 337 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 206,367 ns/iter (+/- 8,874) = 2882 MB/s +test sherlock::name_alt1 ... bench: 485,953 ns/iter (+/- 15,036) = 1224 MB/s +test sherlock::name_alt2 ... bench: 483,813 ns/iter (+/- 17,822) = 1229 MB/s +test sherlock::name_alt3 ... bench: 903,013 ns/iter (+/- 38,445) = 658 MB/s +test sherlock::name_alt3_nocase ... bench: 2,993,633 ns/iter (+/- 131,218) = 198 MB/s +test sherlock::name_alt4 ... bench: 78,831 ns/iter (+/- 2,012) = 7546 MB/s +test sherlock::name_alt4_nocase ... bench: 1,647,202 ns/iter (+/- 5,838) = 361 MB/s +test sherlock::name_alt5 ... bench: 678,798 ns/iter (+/- 1,146) = 876 MB/s +test sherlock::name_alt5_nocase ... bench: 1,792,461 ns/iter (+/- 3,532) = 331 MB/s +test sherlock::name_holmes ... bench: 406,138 ns/iter (+/- 1,157) = 1464 MB/s +test sherlock::name_holmes_nocase ... bench: 517,884 ns/iter (+/- 8,548) = 1148 MB/s +test sherlock::name_sherlock ... bench: 282,357 ns/iter (+/- 13,583) = 2107 MB/s +test sherlock::name_sherlock_holmes ... bench: 207,894 ns/iter (+/- 1,847) = 2861 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,122,583 ns/iter (+/- 52,189) = 529 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,092,252 ns/iter (+/- 43,130) = 544 MB/s +test sherlock::name_whitespace ... bench: 280,360 ns/iter (+/- 12,136) = 2122 MB/s +test sherlock::no_match_common ... bench: 436,303 ns/iter (+/- 19,423) = 1363 MB/s +test sherlock::no_match_really_common ... bench: 417,686 ns/iter (+/- 15,258) = 1424 MB/s +test sherlock::no_match_uncommon ... bench: 28,504 ns/iter (+/- 1,032) = 20871 MB/s +test sherlock::quotes ... bench: 541,513 ns/iter (+/- 21,121) = 1098 MB/s +test sherlock::repeated_class_negation ... bench: 5,489,721 ns/iter (+/- 185,165) = 108 MB/s +test sherlock::the_lower ... bench: 680,710 ns/iter (+/- 29,403) = 873 MB/s +test sherlock::the_nocase ... bench: 737,040 ns/iter (+/- 4,391) = 807 MB/s +test sherlock::the_upper ... bench: 50,026 ns/iter (+/- 205) = 11892 MB/s +test sherlock::the_whitespace ... bench: 885,922 ns/iter (+/- 9,145) = 671 MB/s +test sherlock::word_ending_n ... bench: 5,424,773 ns/iter (+/- 154,353) = 109 MB/s +test sherlock::words ... bench: 5,753,231 ns/iter (+/- 177,890) = 103 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured + diff --git a/vendor/regex/record/old-bench-log/05/pcre2-vs-rust b/vendor/regex/record/old-bench-log/05/pcre2-vs-rust new file mode 100644 index 0000000..3d89e19 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre2-vs-rust @@ -0,0 +1,94 @@ + name pcre2 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 20 (19500 MB/s) 24 (16250 MB/s) 4 20.00% + misc::anchored_literal_long_non_match 15 (26000 MB/s) 27 (14444 MB/s) 12 80.00% + misc::anchored_literal_short_match 19 (1368 MB/s) 22 (1181 MB/s) 3 15.79% + misc::anchored_literal_short_non_match 13 (2000 MB/s) 24 (1083 MB/s) 11 84.62% + misc::easy0_1K 241 (4360 MB/s) 16 (65687 MB/s) -225 -93.36% + misc::easy0_1MB 207,103 (5063 MB/s) 20 (52430150 MB/s) -207,083 -99.99% + misc::easy0_32 39 (1512 MB/s) 16 (3687 MB/s) -23 -58.97% + misc::easy0_32K 6,522 (5028 MB/s) 16 (2049687 MB/s) -6,506 -99.75% + misc::easy1_1K 247 (4226 MB/s) 48 (21750 MB/s) -199 -80.57% + misc::easy1_1MB 206,893 (5068 MB/s) 48 (21845750 MB/s) -206,845 -99.98% + misc::easy1_32 41 (1268 MB/s) 46 (1130 MB/s) 5 12.20% + misc::easy1_32K 6,516 (5031 MB/s) 47 (697617 MB/s) -6,469 -99.28% + misc::hard_1K 1,566 (671 MB/s) 58 (18120 MB/s) -1,508 -96.30% + misc::hard_1MB 1,119,234 (936 MB/s) 61 (17190213 MB/s) -1,119,173 -99.99% + misc::hard_32 95 (621 MB/s) 58 (1017 MB/s) -37 -38.95% + misc::hard_32K 34,411 (953 MB/s) 56 (585625 MB/s) -34,355 -99.84% + misc::literal 18 (2833 MB/s) 16 (3187 MB/s) -2 -11.11% + misc::long_needle1 550,340 (181 MB/s) 2,226 (44924 MB/s) -548,114 -99.60% + misc::long_needle2 553,056 (180 MB/s) 576,997 (173 MB/s) 23,941 4.33% + misc::match_class 82 (987 MB/s) 65 (1246 MB/s) -17 -20.73% + misc::match_class_in_range 20 (4050 MB/s) 27 (3000 MB/s) 7 35.00% + misc::match_class_unicode 351 (458 MB/s) 283 (568 MB/s) -68 -19.37% + misc::medium_1K 242 (4347 MB/s) 16 (65750 MB/s) -226 -93.39% + misc::medium_1MB 207,290 (5058 MB/s) 21 (49933523 MB/s) -207,269 -99.99% + misc::medium_32 41 (1463 MB/s) 17 (3529 MB/s) -24 -58.54% + misc::medium_32K 6,529 (5023 MB/s) 17 (1929176 MB/s) -6,512 -99.74% + misc::not_literal 161 (316 MB/s) 105 (485 MB/s) -56 -34.78% + misc::one_pass_long_prefix 17 (1529 MB/s) 68 (382 MB/s) 51 300.00% + misc::one_pass_long_prefix_not 18 (1444 MB/s) 58 (448 MB/s) 40 222.22% + misc::one_pass_short 45 (377 MB/s) 45 (377 MB/s) 0 0.00% + misc::one_pass_short_not 49 (346 MB/s) 50 (340 MB/s) 1 2.04% + misc::reallyhard2_1K 4,487 (231 MB/s) 83 (12530 MB/s) -4,404 -98.15% + misc::reallyhard_1K 1,260 (834 MB/s) 1,822 (576 MB/s) 562 44.60% + misc::reallyhard_1MB 1,361,796 (770 MB/s) 1,768,327 (592 MB/s) 406,531 29.85% + misc::reallyhard_32 93 (634 MB/s) 121 (487 MB/s) 28 30.11% + misc::reallyhard_32K 42,503 (771 MB/s) 56,375 (581 MB/s) 13,872 32.64% + misc::reverse_suffix_no_quadratic 4,559 (1754 MB/s) 5,803 (1378 MB/s) 1,244 27.29% + regexdna::find_new_lines 1,887,325 (2693 MB/s) 14,818,233 (343 MB/s) 12,930,908 685.14% + regexdna::subst1 963,089 (5278 MB/s) 896,790 (5668 MB/s) -66,299 -6.88% + regexdna::subst10 968,221 (5250 MB/s) 957,325 (5310 MB/s) -10,896 -1.13% + regexdna::subst11 961,661 (5286 MB/s) 917,248 (5542 MB/s) -44,413 -4.62% + regexdna::subst2 956,430 (5314 MB/s) 892,129 (5698 MB/s) -64,301 -6.72% + regexdna::subst3 961,204 (5288 MB/s) 929,250 (5470 MB/s) -31,954 -3.32% + regexdna::subst4 961,897 (5284 MB/s) 872,581 (5825 MB/s) -89,316 -9.29% + regexdna::subst5 953,412 (5331 MB/s) 875,804 (5804 MB/s) -77,608 -8.14% + regexdna::subst6 962,362 (5282 MB/s) 884,639 (5746 MB/s) -77,723 -8.08% + regexdna::subst7 961,694 (5285 MB/s) 872,791 (5824 MB/s) -88,903 -9.24% + regexdna::subst8 963,230 (5277 MB/s) 873,833 (5817 MB/s) -89,397 -9.28% + regexdna::subst9 960,246 (5293 MB/s) 886,744 (5732 MB/s) -73,502 -7.65% + regexdna::variant1 15,553,281 (326 MB/s) 3,699,267 (1374 MB/s) -11,854,014 -76.22% + regexdna::variant2 16,563,452 (306 MB/s) 6,760,952 (751 MB/s) -9,802,500 -59.18% + regexdna::variant3 20,405,916 (249 MB/s) 8,030,646 (633 MB/s) -12,375,270 -60.65% + regexdna::variant4 19,489,291 (260 MB/s) 8,077,290 (629 MB/s) -11,412,001 -58.56% + regexdna::variant5 17,406,769 (292 MB/s) 6,787,242 (748 MB/s) -10,619,527 -61.01% + regexdna::variant6 17,412,027 (291 MB/s) 6,577,777 (772 MB/s) -10,834,250 -62.22% + regexdna::variant7 19,509,193 (260 MB/s) 6,705,580 (758 MB/s) -12,803,613 -65.63% + regexdna::variant8 24,295,734 (209 MB/s) 6,818,785 (745 MB/s) -17,476,949 -71.93% + regexdna::variant9 22,541,558 (225 MB/s) 6,821,453 (745 MB/s) -15,720,105 -69.74% + sherlock::before_after_holmes 4,583,804 (129 MB/s) 1,029,866 (577 MB/s) -3,553,938 -77.53% + sherlock::before_holmes 4,640,546 (128 MB/s) 76,633 (7763 MB/s) -4,563,913 -98.35% + sherlock::holmes_cochar_watson 509,088 (1168 MB/s) 144,725 (4110 MB/s) -364,363 -71.57% + sherlock::ing_suffix 1,865,631 (318 MB/s) 436,202 (1363 MB/s) -1,429,429 -76.62% + sherlock::ing_suffix_limited_space 4,922,883 (120 MB/s) 1,182,943 (502 MB/s) -3,739,940 -75.97% + sherlock::letters 9,848,144 (60 MB/s) 24,390,452 (24 MB/s) 14,542,308 147.67% + sherlock::letters_lower 9,723,642 (61 MB/s) 23,784,108 (25 MB/s) 14,060,466 144.60% + sherlock::letters_upper 1,762,773 (337 MB/s) 1,993,838 (298 MB/s) 231,065 13.11% + sherlock::line_boundary_sherlock_holmes 206,367 (2882 MB/s) 999,414 (595 MB/s) 793,047 384.29% + sherlock::name_alt1 485,953 (1224 MB/s) 34,298 (17345 MB/s) -451,655 -92.94% + sherlock::name_alt2 483,813 (1229 MB/s) 124,226 (4789 MB/s) -359,587 -74.32% + sherlock::name_alt3 903,013 (658 MB/s) 137,742 (4319 MB/s) -765,271 -84.75% + sherlock::name_alt3_nocase 2,993,633 (198 MB/s) 1,293,763 (459 MB/s) -1,699,870 -56.78% + sherlock::name_alt4 78,831 (7546 MB/s) 164,900 (3607 MB/s) 86,069 109.18% + sherlock::name_alt4_nocase 1,647,202 (361 MB/s) 235,023 (2531 MB/s) -1,412,179 -85.73% + sherlock::name_alt5 678,798 (876 MB/s) 127,928 (4650 MB/s) -550,870 -81.15% + sherlock::name_alt5_nocase 1,792,461 (331 MB/s) 659,591 (901 MB/s) -1,132,870 -63.20% + sherlock::name_holmes 406,138 (1464 MB/s) 40,902 (14545 MB/s) -365,236 -89.93% + sherlock::name_holmes_nocase 517,884 (1148 MB/s) 198,658 (2994 MB/s) -319,226 -61.64% + sherlock::name_sherlock 282,357 (2107 MB/s) 68,924 (8631 MB/s) -213,433 -75.59% + sherlock::name_sherlock_holmes 207,894 (2861 MB/s) 31,640 (18803 MB/s) -176,254 -84.78% + sherlock::name_sherlock_holmes_nocase 1,122,583 (529 MB/s) 173,522 (3428 MB/s) -949,061 -84.54% + sherlock::name_sherlock_nocase 1,092,252 (544 MB/s) 170,888 (3481 MB/s) -921,364 -84.35% + sherlock::name_whitespace 280,360 (2122 MB/s) 84,314 (7056 MB/s) -196,046 -69.93% + sherlock::no_match_common 436,303 (1363 MB/s) 20,727 (28703 MB/s) -415,576 -95.25% + sherlock::no_match_really_common 417,686 (1424 MB/s) 381,476 (1559 MB/s) -36,210 -8.67% + sherlock::no_match_uncommon 28,504 (20871 MB/s) 20,786 (28621 MB/s) -7,718 -27.08% + sherlock::quotes 541,513 (1098 MB/s) 531,487 (1119 MB/s) -10,026 -1.85% + sherlock::repeated_class_negation 5,489,721 (108 MB/s) 85,881,944 (6 MB/s) 80,392,223 1464.41% + sherlock::the_lower 680,710 (873 MB/s) 654,110 (909 MB/s) -26,600 -3.91% + sherlock::the_nocase 737,040 (807 MB/s) 474,456 (1253 MB/s) -262,584 -35.63% + sherlock::the_upper 50,026 (11892 MB/s) 43,746 (13599 MB/s) -6,280 -12.55% + sherlock::the_whitespace 885,922 (671 MB/s) 1,181,974 (503 MB/s) 296,052 33.42% + sherlock::word_ending_n 5,424,773 (109 MB/s) 1,925,578 (308 MB/s) -3,499,195 -64.50% + sherlock::words 5,753,231 (103 MB/s) 9,697,201 (61 MB/s) 3,943,970 68.55% diff --git a/vendor/regex/record/old-bench-log/05/re2 b/vendor/regex/record/old-bench-log/05/re2 new file mode 100644 index 0000000..d1f0bea --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 102 ns/iter (+/- 3) = 3823 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 95 ns/iter (+/- 8) = 273 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 0) = 1529 MB/s +test misc::easy0_1K ... bench: 149 ns/iter (+/- 10) = 7053 MB/s +test misc::easy0_1MB ... bench: 29,234 ns/iter (+/- 886) = 35869 MB/s +test misc::easy0_32 ... bench: 126 ns/iter (+/- 4) = 468 MB/s +test misc::easy0_32K ... bench: 1,266 ns/iter (+/- 42) = 25904 MB/s +test misc::easy1_1K ... bench: 130 ns/iter (+/- 4) = 8030 MB/s +test misc::easy1_1MB ... bench: 29,218 ns/iter (+/- 791) = 35888 MB/s +test misc::easy1_32 ... bench: 112 ns/iter (+/- 7) = 464 MB/s +test misc::easy1_32K ... bench: 1,251 ns/iter (+/- 45) = 26209 MB/s +test misc::hard_1K ... bench: 2,357 ns/iter (+/- 33) = 445 MB/s +test misc::hard_1MB ... bench: 2,149,909 ns/iter (+/- 151,258) = 487 MB/s +test misc::hard_32 ... bench: 195 ns/iter (+/- 16) = 302 MB/s +test misc::hard_32K ... bench: 105,137 ns/iter (+/- 6,252) = 311 MB/s +test misc::literal ... bench: 89 ns/iter (+/- 3) = 573 MB/s +test misc::long_needle1 ... bench: 170,090 ns/iter (+/- 5,891) = 587 MB/s +test misc::long_needle2 ... bench: 174,341 ns/iter (+/- 7,949) = 573 MB/s +test misc::match_class ... bench: 220 ns/iter (+/- 16) = 368 MB/s +test misc::match_class_in_range ... bench: 215 ns/iter (+/- 16) = 376 MB/s +test misc::match_class_unicode ... bench: 382 ns/iter (+/- 27) = 421 MB/s +test misc::medium_1K ... bench: 1,939 ns/iter (+/- 153) = 542 MB/s +test misc::medium_1MB ... bench: 1,775,335 ns/iter (+/- 91,241) = 590 MB/s +test misc::medium_32 ... bench: 190 ns/iter (+/- 12) = 315 MB/s +test misc::medium_32K ... bench: 83,245 ns/iter (+/- 5,385) = 393 MB/s +test misc::no_exponential ... bench: 269 ns/iter (+/- 22) = 371 MB/s +test misc::not_literal ... bench: 167 ns/iter (+/- 13) = 305 MB/s +test misc::one_pass_long_prefix ... bench: 84 ns/iter (+/- 7) = 309 MB/s +test misc::one_pass_long_prefix_not ... bench: 137 ns/iter (+/- 12) = 189 MB/s +test misc::one_pass_short ... bench: 108 ns/iter (+/- 3) = 157 MB/s +test misc::one_pass_short_not ... bench: 105 ns/iter (+/- 6) = 161 MB/s +test misc::reallyhard2_1K ... bench: 1,811 ns/iter (+/- 44) = 574 MB/s +test misc::reallyhard_1K ... bench: 2,324 ns/iter (+/- 223) = 452 MB/s +test misc::reallyhard_1MB ... bench: 2,033,298 ns/iter (+/- 148,939) = 515 MB/s +test misc::reallyhard_32 ... bench: 185 ns/iter (+/- 8) = 318 MB/s +test misc::reallyhard_32K ... bench: 83,263 ns/iter (+/- 4,231) = 393 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 13,501 ns/iter (+/- 1,380) = 592 MB/s +test regexdna::find_new_lines ... bench: 31,464,067 ns/iter (+/- 2,248,457) = 161 MB/s +test regexdna::subst1 ... bench: 5,257,629 ns/iter (+/- 142,910) = 966 MB/s +test regexdna::subst10 ... bench: 5,189,384 ns/iter (+/- 130,525) = 979 MB/s +test regexdna::subst11 ... bench: 5,261,936 ns/iter (+/- 309,355) = 966 MB/s +test regexdna::subst2 ... bench: 5,268,281 ns/iter (+/- 348,592) = 964 MB/s +test regexdna::subst3 ... bench: 5,245,664 ns/iter (+/- 403,198) = 969 MB/s +test regexdna::subst4 ... bench: 5,264,833 ns/iter (+/- 312,063) = 965 MB/s +test regexdna::subst5 ... bench: 5,181,850 ns/iter (+/- 117,306) = 981 MB/s +test regexdna::subst6 ... bench: 5,200,226 ns/iter (+/- 124,723) = 977 MB/s +test regexdna::subst7 ... bench: 5,233,678 ns/iter (+/- 367,749) = 971 MB/s +test regexdna::subst8 ... bench: 5,242,400 ns/iter (+/- 317,859) = 969 MB/s +test regexdna::subst9 ... bench: 5,325,464 ns/iter (+/- 395,485) = 954 MB/s +test regexdna::variant1 ... bench: 24,377,246 ns/iter (+/- 733,355) = 208 MB/s +test regexdna::variant2 ... bench: 26,405,686 ns/iter (+/- 771,755) = 192 MB/s +test regexdna::variant3 ... bench: 25,130,419 ns/iter (+/- 1,245,527) = 202 MB/s +test regexdna::variant4 ... bench: 32,527,780 ns/iter (+/- 5,073,721) = 156 MB/s +test regexdna::variant5 ... bench: 31,081,800 ns/iter (+/- 1,256,796) = 163 MB/s +test regexdna::variant6 ... bench: 28,744,478 ns/iter (+/- 1,243,565) = 176 MB/s +test regexdna::variant7 ... bench: 26,693,756 ns/iter (+/- 886,566) = 190 MB/s +test regexdna::variant8 ... bench: 21,478,184 ns/iter (+/- 1,374,415) = 236 MB/s +test regexdna::variant9 ... bench: 18,639,814 ns/iter (+/- 519,136) = 272 MB/s +test sherlock::before_after_holmes ... bench: 1,552,265 ns/iter (+/- 105,467) = 383 MB/s +test sherlock::before_holmes ... bench: 1,360,446 ns/iter (+/- 111,123) = 437 MB/s +test sherlock::everything_greedy ... bench: 6,356,610 ns/iter (+/- 343,163) = 93 MB/s +test sherlock::everything_greedy_nl ... bench: 2,380,946 ns/iter (+/- 36,936) = 249 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,144,439 ns/iter (+/- 25,948) = 519 MB/s +test sherlock::holmes_coword_watson ... bench: 1,503,311 ns/iter (+/- 99,075) = 395 MB/s +test sherlock::ing_suffix ... bench: 3,003,144 ns/iter (+/- 239,408) = 198 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,721,656 ns/iter (+/- 129,795) = 345 MB/s +test sherlock::letters ... bench: 73,833,131 ns/iter (+/- 2,542,107) = 8 MB/s +test sherlock::letters_lower ... bench: 72,250,289 ns/iter (+/- 1,280,826) = 8 MB/s +test sherlock::letters_upper ... bench: 3,397,481 ns/iter (+/- 160,294) = 175 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 3,694,486 ns/iter (+/- 403,679) = 161 MB/s +test sherlock::name_alt1 ... bench: 70,121 ns/iter (+/- 3,926) = 8484 MB/s +test sherlock::name_alt2 ... bench: 1,120,245 ns/iter (+/- 36,040) = 531 MB/s +test sherlock::name_alt3 ... bench: 1,247,630 ns/iter (+/- 127,226) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 2,894,586 ns/iter (+/- 201,023) = 205 MB/s +test sherlock::name_alt4 ... bench: 1,142,872 ns/iter (+/- 82,896) = 520 MB/s +test sherlock::name_alt4_nocase ... bench: 1,785,266 ns/iter (+/- 166,100) = 333 MB/s +test sherlock::name_alt5 ... bench: 1,167,553 ns/iter (+/- 91,672) = 509 MB/s +test sherlock::name_alt5_nocase ... bench: 2,023,732 ns/iter (+/- 74,558) = 293 MB/s +test sherlock::name_holmes ... bench: 126,480 ns/iter (+/- 6,959) = 4703 MB/s +test sherlock::name_holmes_nocase ... bench: 1,420,548 ns/iter (+/- 75,407) = 418 MB/s +test sherlock::name_sherlock ... bench: 57,090 ns/iter (+/- 1,392) = 10420 MB/s +test sherlock::name_sherlock_holmes ... bench: 57,965 ns/iter (+/- 2,996) = 10263 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,837,721 ns/iter (+/- 66,965) = 323 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,672,451 ns/iter (+/- 204,590) = 355 MB/s +test sherlock::name_whitespace ... bench: 60,342 ns/iter (+/- 3,290) = 9859 MB/s +test sherlock::no_match_common ... bench: 434,496 ns/iter (+/- 35,617) = 1369 MB/s +test sherlock::no_match_really_common ... bench: 431,778 ns/iter (+/- 11,799) = 1377 MB/s +test sherlock::no_match_uncommon ... bench: 19,313 ns/iter (+/- 1,167) = 30804 MB/s +test sherlock::quotes ... bench: 1,301,485 ns/iter (+/- 92,772) = 457 MB/s +test sherlock::the_lower ... bench: 1,846,403 ns/iter (+/- 39,799) = 322 MB/s +test sherlock::the_nocase ... bench: 2,956,115 ns/iter (+/- 136,011) = 201 MB/s +test sherlock::the_upper ... bench: 165,976 ns/iter (+/- 5,838) = 3584 MB/s +test sherlock::the_whitespace ... bench: 1,816,669 ns/iter (+/- 117,437) = 327 MB/s +test sherlock::word_ending_n ... bench: 2,601,847 ns/iter (+/- 166,024) = 228 MB/s +test sherlock::words ... bench: 21,137,049 ns/iter (+/- 750,253) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured + diff --git a/vendor/regex/record/old-bench-log/05/re2-vs-rust b/vendor/regex/record/old-bench-log/05/re2-vs-rust new file mode 100644 index 0000000..180e431 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/re2-vs-rust @@ -0,0 +1,97 @@ + name re2 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 102 (3823 MB/s) 24 (16250 MB/s) -78 -76.47% + misc::anchored_literal_long_non_match 20 (19500 MB/s) 27 (14444 MB/s) 7 35.00% + misc::anchored_literal_short_match 95 (273 MB/s) 22 (1181 MB/s) -73 -76.84% + misc::anchored_literal_short_non_match 17 (1529 MB/s) 24 (1083 MB/s) 7 41.18% + misc::easy0_1K 149 (7053 MB/s) 16 (65687 MB/s) -133 -89.26% + misc::easy0_1MB 29,234 (35869 MB/s) 20 (52430150 MB/s) -29,214 -99.93% + misc::easy0_32 126 (468 MB/s) 16 (3687 MB/s) -110 -87.30% + misc::easy0_32K 1,266 (25904 MB/s) 16 (2049687 MB/s) -1,250 -98.74% + misc::easy1_1K 130 (8030 MB/s) 48 (21750 MB/s) -82 -63.08% + misc::easy1_1MB 29,218 (35888 MB/s) 48 (21845750 MB/s) -29,170 -99.84% + misc::easy1_32 112 (464 MB/s) 46 (1130 MB/s) -66 -58.93% + misc::easy1_32K 1,251 (26209 MB/s) 47 (697617 MB/s) -1,204 -96.24% + misc::hard_1K 2,357 (445 MB/s) 58 (18120 MB/s) -2,299 -97.54% + misc::hard_1MB 2,149,909 (487 MB/s) 61 (17190213 MB/s) -2,149,848 -100.00% + misc::hard_32 195 (302 MB/s) 58 (1017 MB/s) -137 -70.26% + misc::hard_32K 105,137 (311 MB/s) 56 (585625 MB/s) -105,081 -99.95% + misc::literal 89 (573 MB/s) 16 (3187 MB/s) -73 -82.02% + misc::long_needle1 170,090 (587 MB/s) 2,226 (44924 MB/s) -167,864 -98.69% + misc::long_needle2 174,341 (573 MB/s) 576,997 (173 MB/s) 402,656 230.96% + misc::match_class 220 (368 MB/s) 65 (1246 MB/s) -155 -70.45% + misc::match_class_in_range 215 (376 MB/s) 27 (3000 MB/s) -188 -87.44% + misc::match_class_unicode 382 (421 MB/s) 283 (568 MB/s) -99 -25.92% + misc::medium_1K 1,939 (542 MB/s) 16 (65750 MB/s) -1,923 -99.17% + misc::medium_1MB 1,775,335 (590 MB/s) 21 (49933523 MB/s) -1,775,314 -100.00% + misc::medium_32 190 (315 MB/s) 17 (3529 MB/s) -173 -91.05% + misc::medium_32K 83,245 (393 MB/s) 17 (1929176 MB/s) -83,228 -99.98% + misc::no_exponential 269 (371 MB/s) 394 (253 MB/s) 125 46.47% + misc::not_literal 167 (305 MB/s) 105 (485 MB/s) -62 -37.13% + misc::one_pass_long_prefix 84 (309 MB/s) 68 (382 MB/s) -16 -19.05% + misc::one_pass_long_prefix_not 137 (189 MB/s) 58 (448 MB/s) -79 -57.66% + misc::one_pass_short 108 (157 MB/s) 45 (377 MB/s) -63 -58.33% + misc::one_pass_short_not 105 (161 MB/s) 50 (340 MB/s) -55 -52.38% + misc::reallyhard2_1K 1,811 (574 MB/s) 83 (12530 MB/s) -1,728 -95.42% + misc::reallyhard_1K 2,324 (452 MB/s) 1,822 (576 MB/s) -502 -21.60% + misc::reallyhard_1MB 2,033,298 (515 MB/s) 1,768,327 (592 MB/s) -264,971 -13.03% + misc::reallyhard_32 185 (318 MB/s) 121 (487 MB/s) -64 -34.59% + misc::reallyhard_32K 83,263 (393 MB/s) 56,375 (581 MB/s) -26,888 -32.29% + misc::reverse_suffix_no_quadratic 13,501 (592 MB/s) 5,803 (1378 MB/s) -7,698 -57.02% + regexdna::find_new_lines 31,464,067 (161 MB/s) 14,818,233 (343 MB/s) -16,645,834 -52.90% + regexdna::subst1 5,257,629 (966 MB/s) 896,790 (5668 MB/s) -4,360,839 -82.94% + regexdna::subst10 5,189,384 (979 MB/s) 957,325 (5310 MB/s) -4,232,059 -81.55% + regexdna::subst11 5,261,936 (966 MB/s) 917,248 (5542 MB/s) -4,344,688 -82.57% + regexdna::subst2 5,268,281 (964 MB/s) 892,129 (5698 MB/s) -4,376,152 -83.07% + regexdna::subst3 5,245,664 (969 MB/s) 929,250 (5470 MB/s) -4,316,414 -82.29% + regexdna::subst4 5,264,833 (965 MB/s) 872,581 (5825 MB/s) -4,392,252 -83.43% + regexdna::subst5 5,181,850 (981 MB/s) 875,804 (5804 MB/s) -4,306,046 -83.10% + regexdna::subst6 5,200,226 (977 MB/s) 884,639 (5746 MB/s) -4,315,587 -82.99% + regexdna::subst7 5,233,678 (971 MB/s) 872,791 (5824 MB/s) -4,360,887 -83.32% + regexdna::subst8 5,242,400 (969 MB/s) 873,833 (5817 MB/s) -4,368,567 -83.33% + regexdna::subst9 5,325,464 (954 MB/s) 886,744 (5732 MB/s) -4,438,720 -83.35% + regexdna::variant1 24,377,246 (208 MB/s) 3,699,267 (1374 MB/s) -20,677,979 -84.82% + regexdna::variant2 26,405,686 (192 MB/s) 6,760,952 (751 MB/s) -19,644,734 -74.40% + regexdna::variant3 25,130,419 (202 MB/s) 8,030,646 (633 MB/s) -17,099,773 -68.04% + regexdna::variant4 32,527,780 (156 MB/s) 8,077,290 (629 MB/s) -24,450,490 -75.17% + regexdna::variant5 31,081,800 (163 MB/s) 6,787,242 (748 MB/s) -24,294,558 -78.16% + regexdna::variant6 28,744,478 (176 MB/s) 6,577,777 (772 MB/s) -22,166,701 -77.12% + regexdna::variant7 26,693,756 (190 MB/s) 6,705,580 (758 MB/s) -19,988,176 -74.88% + regexdna::variant8 21,478,184 (236 MB/s) 6,818,785 (745 MB/s) -14,659,399 -68.25% + regexdna::variant9 18,639,814 (272 MB/s) 6,821,453 (745 MB/s) -11,818,361 -63.40% + sherlock::before_after_holmes 1,552,265 (383 MB/s) 1,029,866 (577 MB/s) -522,399 -33.65% + sherlock::before_holmes 1,360,446 (437 MB/s) 76,633 (7763 MB/s) -1,283,813 -94.37% + sherlock::everything_greedy 6,356,610 (93 MB/s) 2,375,079 (250 MB/s) -3,981,531 -62.64% + sherlock::everything_greedy_nl 2,380,946 (249 MB/s) 916,250 (649 MB/s) -1,464,696 -61.52% + sherlock::holmes_cochar_watson 1,144,439 (519 MB/s) 144,725 (4110 MB/s) -999,714 -87.35% + sherlock::holmes_coword_watson 1,503,311 (395 MB/s) 565,247 (1052 MB/s) -938,064 -62.40% + sherlock::ing_suffix 3,003,144 (198 MB/s) 436,202 (1363 MB/s) -2,566,942 -85.48% + sherlock::ing_suffix_limited_space 1,721,656 (345 MB/s) 1,182,943 (502 MB/s) -538,713 -31.29% + sherlock::letters 73,833,131 (8 MB/s) 24,390,452 (24 MB/s) -49,442,679 -66.97% + sherlock::letters_lower 72,250,289 (8 MB/s) 23,784,108 (25 MB/s) -48,466,181 -67.08% + sherlock::letters_upper 3,397,481 (175 MB/s) 1,993,838 (298 MB/s) -1,403,643 -41.31% + sherlock::line_boundary_sherlock_holmes 3,694,486 (161 MB/s) 999,414 (595 MB/s) -2,695,072 -72.95% + sherlock::name_alt1 70,121 (8484 MB/s) 34,298 (17345 MB/s) -35,823 -51.09% + sherlock::name_alt2 1,120,245 (531 MB/s) 124,226 (4789 MB/s) -996,019 -88.91% + sherlock::name_alt3 1,247,630 (476 MB/s) 137,742 (4319 MB/s) -1,109,888 -88.96% + sherlock::name_alt3_nocase 2,894,586 (205 MB/s) 1,293,763 (459 MB/s) -1,600,823 -55.30% + sherlock::name_alt4 1,142,872 (520 MB/s) 164,900 (3607 MB/s) -977,972 -85.57% + sherlock::name_alt4_nocase 1,785,266 (333 MB/s) 235,023 (2531 MB/s) -1,550,243 -86.84% + sherlock::name_alt5 1,167,553 (509 MB/s) 127,928 (4650 MB/s) -1,039,625 -89.04% + sherlock::name_alt5_nocase 2,023,732 (293 MB/s) 659,591 (901 MB/s) -1,364,141 -67.41% + sherlock::name_holmes 126,480 (4703 MB/s) 40,902 (14545 MB/s) -85,578 -67.66% + sherlock::name_holmes_nocase 1,420,548 (418 MB/s) 198,658 (2994 MB/s) -1,221,890 -86.02% + sherlock::name_sherlock 57,090 (10420 MB/s) 68,924 (8631 MB/s) 11,834 20.73% + sherlock::name_sherlock_holmes 57,965 (10263 MB/s) 31,640 (18803 MB/s) -26,325 -45.42% + sherlock::name_sherlock_holmes_nocase 1,837,721 (323 MB/s) 173,522 (3428 MB/s) -1,664,199 -90.56% + sherlock::name_sherlock_nocase 1,672,451 (355 MB/s) 170,888 (3481 MB/s) -1,501,563 -89.78% + sherlock::name_whitespace 60,342 (9859 MB/s) 84,314 (7056 MB/s) 23,972 39.73% + sherlock::no_match_common 434,496 (1369 MB/s) 20,727 (28703 MB/s) -413,769 -95.23% + sherlock::no_match_really_common 431,778 (1377 MB/s) 381,476 (1559 MB/s) -50,302 -11.65% + sherlock::no_match_uncommon 19,313 (30804 MB/s) 20,786 (28621 MB/s) 1,473 7.63% + sherlock::quotes 1,301,485 (457 MB/s) 531,487 (1119 MB/s) -769,998 -59.16% + sherlock::the_lower 1,846,403 (322 MB/s) 654,110 (909 MB/s) -1,192,293 -64.57% + sherlock::the_nocase 2,956,115 (201 MB/s) 474,456 (1253 MB/s) -2,481,659 -83.95% + sherlock::the_upper 165,976 (3584 MB/s) 43,746 (13599 MB/s) -122,230 -73.64% + sherlock::the_whitespace 1,816,669 (327 MB/s) 1,181,974 (503 MB/s) -634,695 -34.94% + sherlock::word_ending_n 2,601,847 (228 MB/s) 1,925,578 (308 MB/s) -676,269 -25.99% + sherlock::words 21,137,049 (28 MB/s) 9,697,201 (61 MB/s) -11,439,848 -54.12% diff --git a/vendor/regex/record/old-bench-log/05/rust b/vendor/regex/record/old-bench-log/05/rust new file mode 100644 index 0000000..22848cc --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/rust @@ -0,0 +1,103 @@ + +running 98 tests +test misc::anchored_literal_long_match ... bench: 24 ns/iter (+/- 0) = 16250 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 0) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::easy0_1K ... bench: 16 ns/iter (+/- 0) = 65687 MB/s +test misc::easy0_1MB ... bench: 20 ns/iter (+/- 0) = 52430150 MB/s +test misc::easy0_32 ... bench: 16 ns/iter (+/- 0) = 3687 MB/s +test misc::easy0_32K ... bench: 16 ns/iter (+/- 0) = 2049687 MB/s +test misc::easy1_1K ... bench: 48 ns/iter (+/- 2) = 21750 MB/s +test misc::easy1_1MB ... bench: 48 ns/iter (+/- 2) = 21845750 MB/s +test misc::easy1_32 ... bench: 46 ns/iter (+/- 0) = 1130 MB/s +test misc::easy1_32K ... bench: 47 ns/iter (+/- 0) = 697617 MB/s +test misc::hard_1K ... bench: 58 ns/iter (+/- 0) = 18120 MB/s +test misc::hard_1MB ... bench: 61 ns/iter (+/- 0) = 17190213 MB/s +test misc::hard_32 ... bench: 58 ns/iter (+/- 0) = 1017 MB/s +test misc::hard_32K ... bench: 56 ns/iter (+/- 2) = 585625 MB/s +test misc::literal ... bench: 16 ns/iter (+/- 0) = 3187 MB/s +test misc::long_needle1 ... bench: 2,226 ns/iter (+/- 139) = 44924 MB/s +test misc::long_needle2 ... bench: 576,997 ns/iter (+/- 21,660) = 173 MB/s +test misc::match_class ... bench: 65 ns/iter (+/- 3) = 1246 MB/s +test misc::match_class_in_range ... bench: 27 ns/iter (+/- 0) = 3000 MB/s +test misc::match_class_unicode ... bench: 283 ns/iter (+/- 15) = 568 MB/s +test misc::medium_1K ... bench: 16 ns/iter (+/- 0) = 65750 MB/s +test misc::medium_1MB ... bench: 21 ns/iter (+/- 1) = 49933523 MB/s +test misc::medium_32 ... bench: 17 ns/iter (+/- 0) = 3529 MB/s +test misc::medium_32K ... bench: 17 ns/iter (+/- 0) = 1929176 MB/s +test misc::no_exponential ... bench: 394 ns/iter (+/- 0) = 253 MB/s +test misc::not_literal ... bench: 105 ns/iter (+/- 0) = 485 MB/s +test misc::one_pass_long_prefix ... bench: 68 ns/iter (+/- 0) = 382 MB/s +test misc::one_pass_long_prefix_not ... bench: 58 ns/iter (+/- 3) = 448 MB/s +test misc::one_pass_short ... bench: 45 ns/iter (+/- 2) = 377 MB/s +test misc::one_pass_short_not ... bench: 50 ns/iter (+/- 16) = 340 MB/s +test misc::reallyhard2_1K ... bench: 83 ns/iter (+/- 4) = 12530 MB/s +test misc::reallyhard_1K ... bench: 1,822 ns/iter (+/- 72) = 576 MB/s +test misc::reallyhard_1MB ... bench: 1,768,327 ns/iter (+/- 67,421) = 592 MB/s +test misc::reallyhard_32 ... bench: 121 ns/iter (+/- 4) = 487 MB/s +test misc::reallyhard_32K ... bench: 56,375 ns/iter (+/- 1,404) = 581 MB/s +test misc::replace_all ... bench: 142 ns/iter (+/- 0) +test misc::reverse_suffix_no_quadratic ... bench: 5,803 ns/iter (+/- 6) = 1378 MB/s +test regexdna::find_new_lines ... bench: 14,818,233 ns/iter (+/- 430,454) = 343 MB/s +test regexdna::subst1 ... bench: 896,790 ns/iter (+/- 2,273) = 5668 MB/s +test regexdna::subst10 ... bench: 957,325 ns/iter (+/- 7,490) = 5310 MB/s +test regexdna::subst11 ... bench: 917,248 ns/iter (+/- 12,886) = 5542 MB/s +test regexdna::subst2 ... bench: 892,129 ns/iter (+/- 36,230) = 5698 MB/s +test regexdna::subst3 ... bench: 929,250 ns/iter (+/- 38,312) = 5470 MB/s +test regexdna::subst4 ... bench: 872,581 ns/iter (+/- 27,431) = 5825 MB/s +test regexdna::subst5 ... bench: 875,804 ns/iter (+/- 30,611) = 5804 MB/s +test regexdna::subst6 ... bench: 884,639 ns/iter (+/- 44,927) = 5746 MB/s +test regexdna::subst7 ... bench: 872,791 ns/iter (+/- 31,810) = 5824 MB/s +test regexdna::subst8 ... bench: 873,833 ns/iter (+/- 37,335) = 5817 MB/s +test regexdna::subst9 ... bench: 886,744 ns/iter (+/- 42,880) = 5732 MB/s +test regexdna::variant1 ... bench: 3,699,267 ns/iter (+/- 134,945) = 1374 MB/s +test regexdna::variant2 ... bench: 6,760,952 ns/iter (+/- 228,082) = 751 MB/s +test regexdna::variant3 ... bench: 8,030,646 ns/iter (+/- 271,204) = 633 MB/s +test regexdna::variant4 ... bench: 8,077,290 ns/iter (+/- 266,264) = 629 MB/s +test regexdna::variant5 ... bench: 6,787,242 ns/iter (+/- 226,071) = 748 MB/s +test regexdna::variant6 ... bench: 6,577,777 ns/iter (+/- 226,332) = 772 MB/s +test regexdna::variant7 ... bench: 6,705,580 ns/iter (+/- 232,953) = 758 MB/s +test regexdna::variant8 ... bench: 6,818,785 ns/iter (+/- 241,075) = 745 MB/s +test regexdna::variant9 ... bench: 6,821,453 ns/iter (+/- 257,044) = 745 MB/s +test sherlock::before_after_holmes ... bench: 1,029,866 ns/iter (+/- 42,662) = 577 MB/s +test sherlock::before_holmes ... bench: 76,633 ns/iter (+/- 1,135) = 7763 MB/s +test sherlock::everything_greedy ... bench: 2,375,079 ns/iter (+/- 102,532) = 250 MB/s +test sherlock::everything_greedy_nl ... bench: 916,250 ns/iter (+/- 37,950) = 649 MB/s +test sherlock::holmes_cochar_watson ... bench: 144,725 ns/iter (+/- 8,793) = 4110 MB/s +test sherlock::holmes_coword_watson ... bench: 565,247 ns/iter (+/- 24,056) = 1052 MB/s +test sherlock::ing_suffix ... bench: 436,202 ns/iter (+/- 19,863) = 1363 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,182,943 ns/iter (+/- 38,658) = 502 MB/s +test sherlock::letters ... bench: 24,390,452 ns/iter (+/- 869,008) = 24 MB/s +test sherlock::letters_lower ... bench: 23,784,108 ns/iter (+/- 796,195) = 25 MB/s +test sherlock::letters_upper ... bench: 1,993,838 ns/iter (+/- 77,697) = 298 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 999,414 ns/iter (+/- 31,202) = 595 MB/s +test sherlock::name_alt1 ... bench: 34,298 ns/iter (+/- 1,091) = 17345 MB/s +test sherlock::name_alt2 ... bench: 124,226 ns/iter (+/- 5,579) = 4789 MB/s +test sherlock::name_alt3 ... bench: 137,742 ns/iter (+/- 6,496) = 4319 MB/s +test sherlock::name_alt3_nocase ... bench: 1,293,763 ns/iter (+/- 51,097) = 459 MB/s +test sherlock::name_alt4 ... bench: 164,900 ns/iter (+/- 10,023) = 3607 MB/s +test sherlock::name_alt4_nocase ... bench: 235,023 ns/iter (+/- 14,465) = 2531 MB/s +test sherlock::name_alt5 ... bench: 127,928 ns/iter (+/- 6,882) = 4650 MB/s +test sherlock::name_alt5_nocase ... bench: 659,591 ns/iter (+/- 20,587) = 901 MB/s +test sherlock::name_holmes ... bench: 40,902 ns/iter (+/- 402) = 14545 MB/s +test sherlock::name_holmes_nocase ... bench: 198,658 ns/iter (+/- 3,782) = 2994 MB/s +test sherlock::name_sherlock ... bench: 68,924 ns/iter (+/- 1,456) = 8631 MB/s +test sherlock::name_sherlock_holmes ... bench: 31,640 ns/iter (+/- 383) = 18803 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 173,522 ns/iter (+/- 7,812) = 3428 MB/s +test sherlock::name_sherlock_nocase ... bench: 170,888 ns/iter (+/- 612) = 3481 MB/s +test sherlock::name_whitespace ... bench: 84,314 ns/iter (+/- 508) = 7056 MB/s +test sherlock::no_match_common ... bench: 20,727 ns/iter (+/- 565) = 28703 MB/s +test sherlock::no_match_really_common ... bench: 381,476 ns/iter (+/- 2,338) = 1559 MB/s +test sherlock::no_match_uncommon ... bench: 20,786 ns/iter (+/- 717) = 28621 MB/s +test sherlock::quotes ... bench: 531,487 ns/iter (+/- 5,517) = 1119 MB/s +test sherlock::repeated_class_negation ... bench: 85,881,944 ns/iter (+/- 4,906,514) = 6 MB/s +test sherlock::the_lower ... bench: 654,110 ns/iter (+/- 34,542) = 909 MB/s +test sherlock::the_nocase ... bench: 474,456 ns/iter (+/- 16,549) = 1253 MB/s +test sherlock::the_upper ... bench: 43,746 ns/iter (+/- 579) = 13599 MB/s +test sherlock::the_whitespace ... bench: 1,181,974 ns/iter (+/- 3,005) = 503 MB/s +test sherlock::word_ending_n ... bench: 1,925,578 ns/iter (+/- 3,811) = 308 MB/s +test sherlock::words ... bench: 9,697,201 ns/iter (+/- 156,772) = 61 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 98 measured + diff --git a/vendor/regex/record/old-bench-log/05/tcl b/vendor/regex/record/old-bench-log/05/tcl new file mode 100644 index 0000000..3e1778b --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/tcl @@ -0,0 +1,94 @@ + +running 89 tests +test misc::anchored_literal_long_match ... bench: 662 ns/iter (+/- 12) = 589 MB/s +test misc::anchored_literal_long_non_match ... bench: 133 ns/iter (+/- 1) = 2932 MB/s +test misc::anchored_literal_short_match ... bench: 616 ns/iter (+/- 18) = 42 MB/s +test misc::anchored_literal_short_non_match ... bench: 122 ns/iter (+/- 1) = 213 MB/s +test misc::easy0_1K ... bench: 11,816 ns/iter (+/- 92) = 88 MB/s +test misc::easy0_1MB ... bench: 3,409,439 ns/iter (+/- 94,972) = 307 MB/s +test misc::easy0_32 ... bench: 8,785 ns/iter (+/- 183) = 6 MB/s +test misc::easy0_32K ... bench: 115,371 ns/iter (+/- 2,279) = 284 MB/s +test misc::easy1_1K ... bench: 7,038 ns/iter (+/- 145) = 148 MB/s +test misc::easy1_1MB ... bench: 3,396,028 ns/iter (+/- 100,173) = 308 MB/s +test misc::easy1_32 ... bench: 3,687 ns/iter (+/- 44) = 14 MB/s +test misc::easy1_32K ... bench: 109,689 ns/iter (+/- 3,757) = 298 MB/s +test misc::hard_1K ... bench: 14,836 ns/iter (+/- 518) = 70 MB/s +test misc::hard_1MB ... bench: 3,376,015 ns/iter (+/- 95,045) = 310 MB/s +test misc::hard_32 ... bench: 11,278 ns/iter (+/- 389) = 5 MB/s +test misc::hard_32K ... bench: 115,400 ns/iter (+/- 4,738) = 284 MB/s +test misc::literal ... bench: 511 ns/iter (+/- 11) = 99 MB/s +test misc::long_needle1 ... bench: 18,076,901 ns/iter (+/- 523,761) = 5 MB/s +test misc::long_needle2 ... bench: 18,497,725 ns/iter (+/- 465,516) = 5 MB/s +test misc::match_class ... bench: 620 ns/iter (+/- 23) = 130 MB/s +test misc::match_class_in_range ... bench: 605 ns/iter (+/- 26) = 133 MB/s +test misc::medium_1K ... bench: 12,355 ns/iter (+/- 390) = 85 MB/s +test misc::medium_1MB ... bench: 3,410,978 ns/iter (+/- 112,021) = 307 MB/s +test misc::medium_32 ... bench: 9,086 ns/iter (+/- 287) = 6 MB/s +test misc::medium_32K ... bench: 116,944 ns/iter (+/- 5,654) = 280 MB/s +test misc::no_exponential ... bench: 2,379,518 ns/iter (+/- 92,628) +test misc::not_literal ... bench: 1,979 ns/iter (+/- 116) = 25 MB/s +test misc::one_pass_long_prefix ... bench: 6,932 ns/iter (+/- 464) = 3 MB/s +test misc::one_pass_long_prefix_not ... bench: 6,242 ns/iter (+/- 384) = 4 MB/s +test misc::one_pass_short ... bench: 630 ns/iter (+/- 42) = 26 MB/s +test misc::one_pass_short_not ... bench: 718 ns/iter (+/- 64) = 23 MB/s +test misc::reallyhard2_1K ... bench: 108,421 ns/iter (+/- 6,489) = 9 MB/s +test misc::reallyhard_1K ... bench: 14,330 ns/iter (+/- 814) = 73 MB/s +test misc::reallyhard_1MB ... bench: 3,287,965 ns/iter (+/- 203,546) = 318 MB/s +test misc::reallyhard_32 ... bench: 11,193 ns/iter (+/- 683) = 5 MB/s +test misc::reallyhard_32K ... bench: 112,731 ns/iter (+/- 5,966) = 290 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 26,907 ns/iter (+/- 2,396) = 297 MB/s +test regexdna::find_new_lines ... bench: 48,223,361 ns/iter (+/- 2,855,654) = 105 MB/s +test regexdna::subst1 ... bench: 27,177,359 ns/iter (+/- 1,359,987) = 187 MB/s +test regexdna::subst10 ... bench: 26,722,144 ns/iter (+/- 1,090,216) = 190 MB/s +test regexdna::subst11 ... bench: 27,382,875 ns/iter (+/- 1,656,754) = 185 MB/s +test regexdna::subst2 ... bench: 26,957,766 ns/iter (+/- 1,433,630) = 188 MB/s +test regexdna::subst3 ... bench: 27,195,925 ns/iter (+/- 1,828,460) = 186 MB/s +test regexdna::subst4 ... bench: 26,342,249 ns/iter (+/- 1,949,172) = 192 MB/s +test regexdna::subst5 ... bench: 26,543,675 ns/iter (+/- 2,143,336) = 191 MB/s +test regexdna::subst6 ... bench: 26,185,452 ns/iter (+/- 2,199,220) = 194 MB/s +test regexdna::subst7 ... bench: 26,338,573 ns/iter (+/- 2,124,778) = 193 MB/s +test regexdna::subst8 ... bench: 26,468,652 ns/iter (+/- 1,923,567) = 192 MB/s +test regexdna::subst9 ... bench: 26,487,784 ns/iter (+/- 1,250,319) = 191 MB/s +test regexdna::variant1 ... bench: 16,325,983 ns/iter (+/- 491,000) = 311 MB/s +test regexdna::variant2 ... bench: 16,845,952 ns/iter (+/- 470,062) = 301 MB/s +test regexdna::variant3 ... bench: 19,258,030 ns/iter (+/- 525,045) = 263 MB/s +test regexdna::variant4 ... bench: 18,018,713 ns/iter (+/- 1,235,670) = 282 MB/s +test regexdna::variant5 ... bench: 19,583,528 ns/iter (+/- 1,756,762) = 259 MB/s +test regexdna::variant6 ... bench: 17,630,308 ns/iter (+/- 973,191) = 288 MB/s +test regexdna::variant7 ... bench: 17,121,666 ns/iter (+/- 1,274,478) = 296 MB/s +test regexdna::variant8 ... bench: 17,154,863 ns/iter (+/- 425,504) = 296 MB/s +test regexdna::variant9 ... bench: 17,930,482 ns/iter (+/- 587,712) = 283 MB/s +test sherlock::before_after_holmes ... bench: 2,600,503 ns/iter (+/- 383,440) = 228 MB/s +test sherlock::before_holmes ... bench: 3,145,648 ns/iter (+/- 37,316) = 189 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,668,355 ns/iter (+/- 193,724) = 222 MB/s +test sherlock::ing_suffix ... bench: 5,638,296 ns/iter (+/- 69,345) = 105 MB/s +test sherlock::ing_suffix_limited_space ... bench: 22,466,946 ns/iter (+/- 659,956) = 26 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,251,996 ns/iter (+/- 66,639) = 264 MB/s +test sherlock::name_alt1 ... bench: 2,276,056 ns/iter (+/- 64,088) = 261 MB/s +test sherlock::name_alt2 ... bench: 3,196,348 ns/iter (+/- 202,979) = 186 MB/s +test sherlock::name_alt3 ... bench: 5,260,374 ns/iter (+/- 426,028) = 113 MB/s +test sherlock::name_alt3_nocase ... bench: 8,529,394 ns/iter (+/- 558,731) = 69 MB/s +test sherlock::name_alt4 ... bench: 2,787,972 ns/iter (+/- 153,839) = 213 MB/s +test sherlock::name_alt4_nocase ... bench: 3,370,452 ns/iter (+/- 140,385) = 176 MB/s +test sherlock::name_alt5 ... bench: 3,795,793 ns/iter (+/- 182,240) = 156 MB/s +test sherlock::name_alt5_nocase ... bench: 4,691,422 ns/iter (+/- 161,515) = 126 MB/s +test sherlock::name_holmes ... bench: 2,513,139 ns/iter (+/- 72,157) = 236 MB/s +test sherlock::name_holmes_nocase ... bench: 2,636,441 ns/iter (+/- 78,402) = 225 MB/s +test sherlock::name_sherlock ... bench: 2,015,753 ns/iter (+/- 104,000) = 295 MB/s +test sherlock::name_sherlock_holmes ... bench: 2,180,684 ns/iter (+/- 162,201) = 272 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 2,306,664 ns/iter (+/- 165,960) = 257 MB/s +test sherlock::name_sherlock_nocase ... bench: 2,065,630 ns/iter (+/- 155,223) = 288 MB/s +test sherlock::name_whitespace ... bench: 2,266,188 ns/iter (+/- 173,380) = 262 MB/s +test sherlock::no_match_common ... bench: 1,881,887 ns/iter (+/- 123,883) = 316 MB/s +test sherlock::no_match_really_common ... bench: 1,804,352 ns/iter (+/- 33,396) = 329 MB/s +test sherlock::no_match_uncommon ... bench: 1,809,300 ns/iter (+/- 123,888) = 328 MB/s +test sherlock::quotes ... bench: 9,682,507 ns/iter (+/- 1,200,909) = 61 MB/s +test sherlock::repeated_class_negation ... bench: 68,600,251 ns/iter (+/- 2,043,582) = 8 MB/s +test sherlock::the_lower ... bench: 6,849,558 ns/iter (+/- 517,709) = 86 MB/s +test sherlock::the_nocase ... bench: 7,354,742 ns/iter (+/- 390,834) = 80 MB/s +test sherlock::the_upper ... bench: 2,442,364 ns/iter (+/- 174,452) = 243 MB/s +test sherlock::the_whitespace ... bench: 9,210,338 ns/iter (+/- 651,675) = 64 MB/s +test sherlock::words ... bench: 47,863,652 ns/iter (+/- 3,536,998) = 12 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 89 measured + diff --git a/vendor/regex/record/old-bench-log/05/tcl-vs-rust b/vendor/regex/record/old-bench-log/05/tcl-vs-rust new file mode 100644 index 0000000..0faefe9 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/tcl-vs-rust @@ -0,0 +1,90 @@ + name tcl ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 662 (589 MB/s) 24 (16250 MB/s) -638 -96.37% + misc::anchored_literal_long_non_match 133 (2932 MB/s) 27 (14444 MB/s) -106 -79.70% + misc::anchored_literal_short_match 616 (42 MB/s) 22 (1181 MB/s) -594 -96.43% + misc::anchored_literal_short_non_match 122 (213 MB/s) 24 (1083 MB/s) -98 -80.33% + misc::easy0_1K 11,816 (88 MB/s) 16 (65687 MB/s) -11,800 -99.86% + misc::easy0_1MB 3,409,439 (307 MB/s) 20 (52430150 MB/s) -3,409,419 -100.00% + misc::easy0_32 8,785 (6 MB/s) 16 (3687 MB/s) -8,769 -99.82% + misc::easy0_32K 115,371 (284 MB/s) 16 (2049687 MB/s) -115,355 -99.99% + misc::easy1_1K 7,038 (148 MB/s) 48 (21750 MB/s) -6,990 -99.32% + misc::easy1_1MB 3,396,028 (308 MB/s) 48 (21845750 MB/s) -3,395,980 -100.00% + misc::easy1_32 3,687 (14 MB/s) 46 (1130 MB/s) -3,641 -98.75% + misc::easy1_32K 109,689 (298 MB/s) 47 (697617 MB/s) -109,642 -99.96% + misc::hard_1K 14,836 (70 MB/s) 58 (18120 MB/s) -14,778 -99.61% + misc::hard_1MB 3,376,015 (310 MB/s) 61 (17190213 MB/s) -3,375,954 -100.00% + misc::hard_32 11,278 (5 MB/s) 58 (1017 MB/s) -11,220 -99.49% + misc::hard_32K 115,400 (284 MB/s) 56 (585625 MB/s) -115,344 -99.95% + misc::literal 511 (99 MB/s) 16 (3187 MB/s) -495 -96.87% + misc::long_needle1 18,076,901 (5 MB/s) 2,226 (44924 MB/s) -18,074,675 -99.99% + misc::long_needle2 18,497,725 (5 MB/s) 576,997 (173 MB/s) -17,920,728 -96.88% + misc::match_class 620 (130 MB/s) 65 (1246 MB/s) -555 -89.52% + misc::match_class_in_range 605 (133 MB/s) 27 (3000 MB/s) -578 -95.54% + misc::medium_1K 12,355 (85 MB/s) 16 (65750 MB/s) -12,339 -99.87% + misc::medium_1MB 3,410,978 (307 MB/s) 21 (49933523 MB/s) -3,410,957 -100.00% + misc::medium_32 9,086 (6 MB/s) 17 (3529 MB/s) -9,069 -99.81% + misc::medium_32K 116,944 (280 MB/s) 17 (1929176 MB/s) -116,927 -99.99% + misc::no_exponential 2,379,518 394 (253 MB/s) -2,379,124 -99.98% + misc::not_literal 1,979 (25 MB/s) 105 (485 MB/s) -1,874 -94.69% + misc::one_pass_long_prefix 6,932 (3 MB/s) 68 (382 MB/s) -6,864 -99.02% + misc::one_pass_long_prefix_not 6,242 (4 MB/s) 58 (448 MB/s) -6,184 -99.07% + misc::one_pass_short 630 (26 MB/s) 45 (377 MB/s) -585 -92.86% + misc::one_pass_short_not 718 (23 MB/s) 50 (340 MB/s) -668 -93.04% + misc::reallyhard2_1K 108,421 (9 MB/s) 83 (12530 MB/s) -108,338 -99.92% + misc::reallyhard_1K 14,330 (73 MB/s) 1,822 (576 MB/s) -12,508 -87.29% + misc::reallyhard_1MB 3,287,965 (318 MB/s) 1,768,327 (592 MB/s) -1,519,638 -46.22% + misc::reallyhard_32 11,193 (5 MB/s) 121 (487 MB/s) -11,072 -98.92% + misc::reallyhard_32K 112,731 (290 MB/s) 56,375 (581 MB/s) -56,356 -49.99% + misc::reverse_suffix_no_quadratic 26,907 (297 MB/s) 5,803 (1378 MB/s) -21,104 -78.43% + regexdna::find_new_lines 48,223,361 (105 MB/s) 14,818,233 (343 MB/s) -33,405,128 -69.27% + regexdna::subst1 27,177,359 (187 MB/s) 896,790 (5668 MB/s) -26,280,569 -96.70% + regexdna::subst10 26,722,144 (190 MB/s) 957,325 (5310 MB/s) -25,764,819 -96.42% + regexdna::subst11 27,382,875 (185 MB/s) 917,248 (5542 MB/s) -26,465,627 -96.65% + regexdna::subst2 26,957,766 (188 MB/s) 892,129 (5698 MB/s) -26,065,637 -96.69% + regexdna::subst3 27,195,925 (186 MB/s) 929,250 (5470 MB/s) -26,266,675 -96.58% + regexdna::subst4 26,342,249 (192 MB/s) 872,581 (5825 MB/s) -25,469,668 -96.69% + regexdna::subst5 26,543,675 (191 MB/s) 875,804 (5804 MB/s) -25,667,871 -96.70% + regexdna::subst6 26,185,452 (194 MB/s) 884,639 (5746 MB/s) -25,300,813 -96.62% + regexdna::subst7 26,338,573 (193 MB/s) 872,791 (5824 MB/s) -25,465,782 -96.69% + regexdna::subst8 26,468,652 (192 MB/s) 873,833 (5817 MB/s) -25,594,819 -96.70% + regexdna::subst9 26,487,784 (191 MB/s) 886,744 (5732 MB/s) -25,601,040 -96.65% + regexdna::variant1 16,325,983 (311 MB/s) 3,699,267 (1374 MB/s) -12,626,716 -77.34% + regexdna::variant2 16,845,952 (301 MB/s) 6,760,952 (751 MB/s) -10,085,000 -59.87% + regexdna::variant3 19,258,030 (263 MB/s) 8,030,646 (633 MB/s) -11,227,384 -58.30% + regexdna::variant4 18,018,713 (282 MB/s) 8,077,290 (629 MB/s) -9,941,423 -55.17% + regexdna::variant5 19,583,528 (259 MB/s) 6,787,242 (748 MB/s) -12,796,286 -65.34% + regexdna::variant6 17,630,308 (288 MB/s) 6,577,777 (772 MB/s) -11,052,531 -62.69% + regexdna::variant7 17,121,666 (296 MB/s) 6,705,580 (758 MB/s) -10,416,086 -60.84% + regexdna::variant8 17,154,863 (296 MB/s) 6,818,785 (745 MB/s) -10,336,078 -60.25% + regexdna::variant9 17,930,482 (283 MB/s) 6,821,453 (745 MB/s) -11,109,029 -61.96% + sherlock::before_after_holmes 2,600,503 (228 MB/s) 1,029,866 (577 MB/s) -1,570,637 -60.40% + sherlock::before_holmes 3,145,648 (189 MB/s) 76,633 (7763 MB/s) -3,069,015 -97.56% + sherlock::holmes_cochar_watson 2,668,355 (222 MB/s) 144,725 (4110 MB/s) -2,523,630 -94.58% + sherlock::ing_suffix 5,638,296 (105 MB/s) 436,202 (1363 MB/s) -5,202,094 -92.26% + sherlock::ing_suffix_limited_space 22,466,946 (26 MB/s) 1,182,943 (502 MB/s) -21,284,003 -94.73% + sherlock::line_boundary_sherlock_holmes 2,251,996 (264 MB/s) 999,414 (595 MB/s) -1,252,582 -55.62% + sherlock::name_alt1 2,276,056 (261 MB/s) 34,298 (17345 MB/s) -2,241,758 -98.49% + sherlock::name_alt2 3,196,348 (186 MB/s) 124,226 (4789 MB/s) -3,072,122 -96.11% + sherlock::name_alt3 5,260,374 (113 MB/s) 137,742 (4319 MB/s) -5,122,632 -97.38% + sherlock::name_alt3_nocase 8,529,394 (69 MB/s) 1,293,763 (459 MB/s) -7,235,631 -84.83% + sherlock::name_alt4 2,787,972 (213 MB/s) 164,900 (3607 MB/s) -2,623,072 -94.09% + sherlock::name_alt4_nocase 3,370,452 (176 MB/s) 235,023 (2531 MB/s) -3,135,429 -93.03% + sherlock::name_alt5 3,795,793 (156 MB/s) 127,928 (4650 MB/s) -3,667,865 -96.63% + sherlock::name_alt5_nocase 4,691,422 (126 MB/s) 659,591 (901 MB/s) -4,031,831 -85.94% + sherlock::name_holmes 2,513,139 (236 MB/s) 40,902 (14545 MB/s) -2,472,237 -98.37% + sherlock::name_holmes_nocase 2,636,441 (225 MB/s) 198,658 (2994 MB/s) -2,437,783 -92.46% + sherlock::name_sherlock 2,015,753 (295 MB/s) 68,924 (8631 MB/s) -1,946,829 -96.58% + sherlock::name_sherlock_holmes 2,180,684 (272 MB/s) 31,640 (18803 MB/s) -2,149,044 -98.55% + sherlock::name_sherlock_holmes_nocase 2,306,664 (257 MB/s) 173,522 (3428 MB/s) -2,133,142 -92.48% + sherlock::name_sherlock_nocase 2,065,630 (288 MB/s) 170,888 (3481 MB/s) -1,894,742 -91.73% + sherlock::name_whitespace 2,266,188 (262 MB/s) 84,314 (7056 MB/s) -2,181,874 -96.28% + sherlock::no_match_common 1,881,887 (316 MB/s) 20,727 (28703 MB/s) -1,861,160 -98.90% + sherlock::no_match_really_common 1,804,352 (329 MB/s) 381,476 (1559 MB/s) -1,422,876 -78.86% + sherlock::no_match_uncommon 1,809,300 (328 MB/s) 20,786 (28621 MB/s) -1,788,514 -98.85% + sherlock::quotes 9,682,507 (61 MB/s) 531,487 (1119 MB/s) -9,151,020 -94.51% + sherlock::repeated_class_negation 68,600,251 (8 MB/s) 85,881,944 (6 MB/s) 17,281,693 25.19% + sherlock::the_lower 6,849,558 (86 MB/s) 654,110 (909 MB/s) -6,195,448 -90.45% + sherlock::the_nocase 7,354,742 (80 MB/s) 474,456 (1253 MB/s) -6,880,286 -93.55% + sherlock::the_upper 2,442,364 (243 MB/s) 43,746 (13599 MB/s) -2,398,618 -98.21% + sherlock::the_whitespace 9,210,338 (64 MB/s) 1,181,974 (503 MB/s) -8,028,364 -87.17% + sherlock::words 47,863,652 (12 MB/s) 9,697,201 (61 MB/s) -38,166,451 -79.74% diff --git a/vendor/regex/record/old-bench-log/06/dphobos-dmd b/vendor/regex/record/old-bench-log/06/dphobos-dmd new file mode 100644 index 0000000..bffdd29 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-dmd @@ -0,0 +1,98 @@ +running 95 tests +test misc::anchored_literal_long_match ... bench: 356 ns/iter (+/- 4) = 1095 MB/s +test misc::anchored_literal_long_non_match ... bench: 280 ns/iter (+/- 18) = 1392 MB/s +test misc::anchored_literal_short_match ... bench: 351 ns/iter (+/- 16) = 74 MB/s +test misc::anchored_literal_short_non_match ... bench: 274 ns/iter (+/- 17) = 94 MB/s +test misc::easy0_1K ... bench: 810 ns/iter (+/- 38) = 1297 MB/s +test misc::easy0_1MB ... bench: 25,296 ns/iter (+/- 3,592) = 41453 MB/s +test misc::easy0_32 ... bench: 745 ns/iter (+/- 60) = 79 MB/s +test misc::easy0_32K ... bench: 1,111 ns/iter (+/- 82) = 29518 MB/s +test misc::easy1_1K ... bench: 730 ns/iter (+/- 20) = 1430 MB/s +test misc::easy1_1MB ... bench: 25,442 ns/iter (+/- 2,076) = 41215 MB/s +test misc::easy1_32 ... bench: 730 ns/iter (+/- 79) = 71 MB/s +test misc::easy1_32K ... bench: 1,104 ns/iter (+/- 93) = 29699 MB/s +test misc::hard_1K ... bench: 18,238 ns/iter (+/- 1,173) = 57 MB/s +test misc::hard_1MB ... bench: 19,302,344 ns/iter (+/- 2,039,538) = 54 MB/s +test misc::hard_32 ... bench: 2,508 ns/iter (+/- 119) = 23 MB/s +test misc::hard_32K ... bench: 666,948 ns/iter (+/- 58,067) = 49 MB/s +test misc::literal ... bench: 196 ns/iter (+/- 17) = 260 MB/s +test misc::long_needle1 ... bench: 82,532 ns/iter (+/- 4,618) = 1211 MB/s +test misc::long_needle2 ... bench: 84,079 ns/iter (+/- 5,930) = 1189 MB/s +test misc::match_class ... bench: 300 ns/iter (+/- 41) = 270 MB/s +test misc::match_class_in_range ... bench: 258 ns/iter (+/- 16) = 313 MB/s +test misc::match_class_unicode ... bench: 1,563 ns/iter (+/- 171) = 103 MB/s +test misc::medium_1K ... bench: 1,541 ns/iter (+/- 127) = 682 MB/s +test misc::medium_1MB ... bench: 617,650 ns/iter (+/- 59,618) = 1697 MB/s +test misc::medium_32 ... bench: 985 ns/iter (+/- 62) = 60 MB/s +test misc::medium_32K ... bench: 19,948 ns/iter (+/- 1,388) = 1644 MB/s +test misc::no_exponential ... bench: 430,777 ns/iter (+/- 52,435) +test misc::not_literal ... bench: 1,202 ns/iter (+/- 60) = 42 MB/s +test misc::one_pass_long_prefix ... bench: 630 ns/iter (+/- 45) = 41 MB/s +test misc::one_pass_long_prefix_not ... bench: 617 ns/iter (+/- 60) = 42 MB/s +test misc::one_pass_short ... bench: 1,102 ns/iter (+/- 38) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,481 ns/iter (+/- 44) = 11 MB/s +test misc::reallyhard2_1K ... bench: 40,749 ns/iter (+/- 2,027) = 25 MB/s +test misc::reallyhard_1K ... bench: 18,987 ns/iter (+/- 1,419) = 55 MB/s +test misc::reallyhard_1MB ... bench: 19,923,786 ns/iter (+/- 1,499,750) = 52 MB/s +test misc::reallyhard_32 ... bench: 2,369 ns/iter (+/- 115) = 24 MB/s +test misc::reallyhard_32K ... bench: 627,664 ns/iter (+/- 30,507) = 52 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,933 ns/iter (+/- 700) = 1621 MB/s +test regexdna::find_new_lines ... bench: 28,886,666 ns/iter (+/- 1,969,669) = 175 MB/s +test regexdna::subst1 ... bench: 6,722,884 ns/iter (+/- 431,722) = 756 MB/s +test regexdna::subst10 ... bench: 6,923,833 ns/iter (+/- 677,840) = 734 MB/s +test regexdna::subst11 ... bench: 6,917,738 ns/iter (+/- 306,829) = 734 MB/s +test regexdna::subst2 ... bench: 6,914,593 ns/iter (+/- 625,342) = 735 MB/s +test regexdna::subst3 ... bench: 6,582,793 ns/iter (+/- 297,052) = 772 MB/s +test regexdna::subst4 ... bench: 6,528,804 ns/iter (+/- 463,331) = 778 MB/s +test regexdna::subst5 ... bench: 6,886,457 ns/iter (+/- 1,015,943) = 738 MB/s +test regexdna::subst6 ... bench: 6,789,493 ns/iter (+/- 573,137) = 748 MB/s +test regexdna::subst7 ... bench: 6,533,609 ns/iter (+/- 372,293) = 778 MB/s +test regexdna::subst8 ... bench: 6,536,845 ns/iter (+/- 290,249) = 777 MB/s +test regexdna::subst9 ... bench: 6,509,834 ns/iter (+/- 402,426) = 780 MB/s +test regexdna::variant1 ... bench: 5,746,639 ns/iter (+/- 205,103) = 884 MB/s +test regexdna::variant2 ... bench: 7,661,372 ns/iter (+/- 145,811) = 663 MB/s +test regexdna::variant3 ... bench: 12,801,668 ns/iter (+/- 337,572) = 397 MB/s +test regexdna::variant4 ... bench: 11,109,679 ns/iter (+/- 357,680) = 457 MB/s +test regexdna::variant5 ... bench: 11,238,093 ns/iter (+/- 1,571,929) = 452 MB/s +test regexdna::variant6 ... bench: 8,453,224 ns/iter (+/- 185,044) = 601 MB/s +test regexdna::variant7 ... bench: 8,784,446 ns/iter (+/- 153,626) = 578 MB/s +test regexdna::variant8 ... bench: 11,151,797 ns/iter (+/- 366,593) = 455 MB/s +test regexdna::variant9 ... bench: 22,206,248 ns/iter (+/- 1,143,965) = 228 MB/s +test sherlock::before_after_holmes ... bench: 23,458,512 ns/iter (+/- 1,982,069) = 25 MB/s +test sherlock::before_holmes ... bench: 23,040,796 ns/iter (+/- 688,881) = 25 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,035,156 ns/iter (+/- 113,881) = 574 MB/s +test sherlock::holmes_coword_watson ... bench: 118,126,447 ns/iter (+/- 8,394,250) = 5 MB/s +test sherlock::ing_suffix ... bench: 16,122,434 ns/iter (+/- 236,636) = 36 MB/s +test sherlock::ing_suffix_limited_space ... bench: 22,239,435 ns/iter (+/- 364,604) = 26 MB/s +test sherlock::letters ... bench: 92,002,273 ns/iter (+/- 2,056,908) = 6 MB/s +test sherlock::letters_lower ... bench: 90,778,580 ns/iter (+/- 4,179,255) = 6 MB/s +test sherlock::letters_upper ... bench: 3,392,415 ns/iter (+/- 143,338) = 175 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 428,636 ns/iter (+/- 14,993) = 1387 MB/s +test sherlock::name_alt1 ... bench: 432,574 ns/iter (+/- 13,731) = 1375 MB/s +test sherlock::name_alt2 ... bench: 644,165 ns/iter (+/- 15,049) = 923 MB/s +test sherlock::name_alt3 ... bench: 1,176,979 ns/iter (+/- 105,694) = 505 MB/s +test sherlock::name_alt3_nocase ... bench: 2,054,990 ns/iter (+/- 91,909) = 289 MB/s +test sherlock::name_alt4 ... bench: 712,039 ns/iter (+/- 36,911) = 835 MB/s +test sherlock::name_alt4_nocase ... bench: 993,415 ns/iter (+/- 27,355) = 598 MB/s +test sherlock::name_alt5 ... bench: 757,045 ns/iter (+/- 29,126) = 785 MB/s +test sherlock::name_alt5_nocase ... bench: 953,821 ns/iter (+/- 37,252) = 623 MB/s +test sherlock::name_holmes ... bench: 186,801 ns/iter (+/- 6,676) = 3184 MB/s +test sherlock::name_holmes_nocase ... bench: 539,857 ns/iter (+/- 40,614) = 1102 MB/s +test sherlock::name_sherlock ... bench: 56,113 ns/iter (+/- 4,566) = 10602 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,558 ns/iter (+/- 6,746) = 8806 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 429,123 ns/iter (+/- 51,647) = 1386 MB/s +test sherlock::name_sherlock_nocase ... bench: 396,070 ns/iter (+/- 33,934) = 1502 MB/s +test sherlock::name_whitespace ... bench: 84,630 ns/iter (+/- 6,285) = 7029 MB/s +test sherlock::no_match_common ... bench: 292,844 ns/iter (+/- 24,013) = 2031 MB/s +test sherlock::no_match_really_common ... bench: 290,986 ns/iter (+/- 10,163) = 2044 MB/s +test sherlock::no_match_uncommon ... bench: 14,041 ns/iter (+/- 599) = 42371 MB/s +test sherlock::quotes ... bench: 6,489,945 ns/iter (+/- 132,983) = 91 MB/s +test sherlock::repeated_class_negation ... bench: 49,479,000 ns/iter (+/- 965,144) = 12 MB/s +test sherlock::the_lower ... bench: 2,268,881 ns/iter (+/- 134,889) = 262 MB/s +test sherlock::the_nocase ... bench: 2,906,824 ns/iter (+/- 72,615) = 204 MB/s +test sherlock::the_upper ... bench: 211,138 ns/iter (+/- 9,935) = 2817 MB/s +test sherlock::the_whitespace ... bench: 3,488,249 ns/iter (+/- 254,294) = 170 MB/s +test sherlock::word_ending_n ... bench: 30,917,395 ns/iter (+/- 2,298,620) = 19 MB/s +test sherlock::words ... bench: 39,830,572 ns/iter (+/- 2,662,348) = 14 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out diff --git a/vendor/regex/record/old-bench-log/06/dphobos-dmd-ct b/vendor/regex/record/old-bench-log/06/dphobos-dmd-ct new file mode 100644 index 0000000..426fa6c --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-dmd-ct @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 336 ns/iter (+/- 145) = 1160 MB/s +test misc::anchored_literal_long_non_match ... bench: 246 ns/iter (+/- 27) = 1585 MB/s +test misc::anchored_literal_short_match ... bench: 313 ns/iter (+/- 32) = 83 MB/s +test misc::anchored_literal_short_non_match ... bench: 248 ns/iter (+/- 31) = 104 MB/s +test misc::easy0_1K ... bench: 792 ns/iter (+/- 109) = 1327 MB/s +test misc::easy0_1MB ... bench: 24,706 ns/iter (+/- 812) = 42443 MB/s +test misc::easy0_32 ... bench: 793 ns/iter (+/- 77) = 74 MB/s +test misc::easy0_32K ... bench: 1,179 ns/iter (+/- 55) = 27815 MB/s +test misc::easy1_1K ... bench: 720 ns/iter (+/- 85) = 1450 MB/s +test misc::easy1_1MB ... bench: 24,647 ns/iter (+/- 761) = 42544 MB/s +test misc::easy1_32 ... bench: 717 ns/iter (+/- 28) = 72 MB/s +test misc::easy1_32K ... bench: 1,140 ns/iter (+/- 116) = 28761 MB/s +test misc::hard_1K ... bench: 19,153 ns/iter (+/- 2,063) = 54 MB/s +test misc::hard_1MB ... bench: 19,966,822 ns/iter (+/- 1,979,640) = 52 MB/s +test misc::hard_32 ... bench: 2,617 ns/iter (+/- 354) = 22 MB/s +test misc::hard_32K ... bench: 621,150 ns/iter (+/- 24,244) = 52 MB/s +test misc::literal ... bench: 194 ns/iter (+/- 28) = 262 MB/s +test misc::long_needle1 ... bench: 83,293 ns/iter (+/- 3,287) = 1200 MB/s +test misc::long_needle2 ... bench: 83,214 ns/iter (+/- 3,344) = 1201 MB/s +test misc::match_class ... bench: 301 ns/iter (+/- 38) = 269 MB/s +test misc::match_class_in_range ... bench: 258 ns/iter (+/- 27) = 313 MB/s +test misc::match_class_unicode ... bench: 1,565 ns/iter (+/- 187) = 102 MB/s +test misc::medium_1K ... bench: 1,572 ns/iter (+/- 230) = 669 MB/s +test misc::medium_1MB ... bench: 609,944 ns/iter (+/- 23,088) = 1719 MB/s +test misc::medium_32 ... bench: 980 ns/iter (+/- 112) = 61 MB/s +test misc::medium_32K ... bench: 20,058 ns/iter (+/- 884) = 1635 MB/s +test misc::not_literal ... bench: 1,218 ns/iter (+/- 67) = 41 MB/s +test misc::one_pass_long_prefix ... bench: 588 ns/iter (+/- 93) = 44 MB/s +test misc::one_pass_long_prefix_not ... bench: 595 ns/iter (+/- 77) = 43 MB/s +test misc::one_pass_short ... bench: 1,114 ns/iter (+/- 52) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,481 ns/iter (+/- 183) = 11 MB/s +test misc::reallyhard2_1K ... bench: 40,858 ns/iter (+/- 1,860) = 25 MB/s +test misc::reallyhard_1K ... bench: 18,678 ns/iter (+/- 835) = 56 MB/s +test misc::reallyhard_1MB ... bench: 19,824,750 ns/iter (+/- 354,159) = 52 MB/s +test misc::reallyhard_32 ... bench: 2,340 ns/iter (+/- 68) = 25 MB/s +test misc::reallyhard_32K ... bench: 621,351 ns/iter (+/- 21,369) = 52 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,919 ns/iter (+/- 224) = 1626 MB/s +test regexdna::find_new_lines ... bench: 27,265,128 ns/iter (+/- 1,416,486) = 186 MB/s +test regexdna::subst1 ... bench: 6,414,636 ns/iter (+/- 696,943) = 792 MB/s +test regexdna::subst10 ... bench: 6,426,829 ns/iter (+/- 206,773) = 790 MB/s +test regexdna::subst11 ... bench: 6,435,800 ns/iter (+/- 439,175) = 789 MB/s +test regexdna::subst2 ... bench: 6,428,455 ns/iter (+/- 214,961) = 790 MB/s +test regexdna::subst3 ... bench: 6,428,692 ns/iter (+/- 681,910) = 790 MB/s +test regexdna::subst4 ... bench: 6,425,781 ns/iter (+/- 129,718) = 791 MB/s +test regexdna::subst5 ... bench: 6,414,376 ns/iter (+/- 151,827) = 792 MB/s +test regexdna::subst6 ... bench: 6,455,032 ns/iter (+/- 423,915) = 787 MB/s +test regexdna::subst7 ... bench: 6,668,649 ns/iter (+/- 686,734) = 762 MB/s +test regexdna::subst8 ... bench: 6,393,791 ns/iter (+/- 172,533) = 795 MB/s +test regexdna::subst9 ... bench: 6,426,100 ns/iter (+/- 175,951) = 791 MB/s +test regexdna::variant1 ... bench: 5,612,507 ns/iter (+/- 128,406) = 905 MB/s +test regexdna::variant2 ... bench: 7,572,661 ns/iter (+/- 159,047) = 671 MB/s +test regexdna::variant3 ... bench: 12,287,183 ns/iter (+/- 378,305) = 413 MB/s +test regexdna::variant4 ... bench: 11,223,976 ns/iter (+/- 1,191,250) = 452 MB/s +test regexdna::variant5 ... bench: 11,016,081 ns/iter (+/- 714,537) = 461 MB/s +test regexdna::variant6 ... bench: 8,198,798 ns/iter (+/- 471,338) = 620 MB/s +test regexdna::variant7 ... bench: 8,895,886 ns/iter (+/- 885,690) = 571 MB/s +test regexdna::variant8 ... bench: 11,000,942 ns/iter (+/- 886,538) = 462 MB/s +test regexdna::variant9 ... bench: 20,761,109 ns/iter (+/- 629,876) = 244 MB/s +test sherlock::before_after_holmes ... bench: 24,417,513 ns/iter (+/- 2,359,425) = 24 MB/s +test sherlock::before_holmes ... bench: 24,435,196 ns/iter (+/- 2,164,187) = 24 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,025,780 ns/iter (+/- 121,876) = 579 MB/s +test sherlock::holmes_coword_watson ... bench: 122,988,753 ns/iter (+/- 7,606,302) = 4 MB/s +test sherlock::ing_suffix ... bench: 16,322,427 ns/iter (+/- 321,746) = 36 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,993,282 ns/iter (+/- 434,365) = 27 MB/s +test sherlock::letters ... bench: 88,877,258 ns/iter (+/- 504,024) = 6 MB/s +test sherlock::letters_lower ... bench: 87,709,419 ns/iter (+/- 659,859) = 6 MB/s +test sherlock::letters_upper ... bench: 3,299,811 ns/iter (+/- 78,850) = 180 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 431,548 ns/iter (+/- 54,716) = 1378 MB/s +test sherlock::name_alt1 ... bench: 470,052 ns/iter (+/- 49,985) = 1265 MB/s +test sherlock::name_alt2 ... bench: 705,694 ns/iter (+/- 45,724) = 843 MB/s +test sherlock::name_alt3 ... bench: 1,148,456 ns/iter (+/- 51,018) = 518 MB/s +test sherlock::name_alt3_nocase ... bench: 2,026,355 ns/iter (+/- 220,043) = 293 MB/s +test sherlock::name_alt4 ... bench: 699,625 ns/iter (+/- 40,361) = 850 MB/s +test sherlock::name_alt4_nocase ... bench: 979,151 ns/iter (+/- 41,460) = 607 MB/s +test sherlock::name_alt5 ... bench: 751,646 ns/iter (+/- 31,601) = 791 MB/s +test sherlock::name_alt5_nocase ... bench: 950,701 ns/iter (+/- 102,078) = 625 MB/s +test sherlock::name_holmes ... bench: 184,935 ns/iter (+/- 6,633) = 3216 MB/s +test sherlock::name_holmes_nocase ... bench: 532,703 ns/iter (+/- 33,919) = 1116 MB/s +test sherlock::name_sherlock ... bench: 55,468 ns/iter (+/- 1,776) = 10725 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,327 ns/iter (+/- 5,464) = 8836 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 417,724 ns/iter (+/- 45,520) = 1424 MB/s +test sherlock::name_sherlock_nocase ... bench: 392,285 ns/iter (+/- 14,778) = 1516 MB/s +test sherlock::name_whitespace ... bench: 77,112 ns/iter (+/- 2,785) = 7715 MB/s +test sherlock::no_match_common ... bench: 291,222 ns/iter (+/- 10,477) = 2042 MB/s +test sherlock::no_match_really_common ... bench: 291,393 ns/iter (+/- 10,834) = 2041 MB/s +test sherlock::no_match_uncommon ... bench: 14,016 ns/iter (+/- 376) = 42446 MB/s +test sherlock::quotes ... bench: 6,557,639 ns/iter (+/- 158,929) = 90 MB/s +test sherlock::repeated_class_negation ... bench: 49,697,910 ns/iter (+/- 773,749) = 11 MB/s +test sherlock::the_lower ... bench: 2,236,055 ns/iter (+/- 72,024) = 266 MB/s +test sherlock::the_nocase ... bench: 2,892,430 ns/iter (+/- 89,222) = 205 MB/s +test sherlock::the_upper ... bench: 207,035 ns/iter (+/- 8,624) = 2873 MB/s +test sherlock::the_whitespace ... bench: 3,435,267 ns/iter (+/- 416,560) = 173 MB/s +test sherlock::word_ending_n ... bench: 31,751,871 ns/iter (+/- 374,472) = 18 MB/s +test sherlock::words ... bench: 38,793,659 ns/iter (+/- 3,022,370) = 15 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/dphobos-ldc b/vendor/regex/record/old-bench-log/06/dphobos-ldc new file mode 100644 index 0000000..29f5595 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-ldc @@ -0,0 +1,100 @@ + +running 95 tests +test misc::anchored_literal_long_match ... bench: 203 ns/iter (+/- 13) = 1921 MB/s +test misc::anchored_literal_long_non_match ... bench: 126 ns/iter (+/- 5) = 3095 MB/s +test misc::anchored_literal_short_match ... bench: 204 ns/iter (+/- 4) = 127 MB/s +test misc::anchored_literal_short_non_match ... bench: 127 ns/iter (+/- 8) = 204 MB/s +test misc::easy0_1K ... bench: 571 ns/iter (+/- 44) = 1840 MB/s +test misc::easy0_1MB ... bench: 25,321 ns/iter (+/- 421) = 41412 MB/s +test misc::easy0_32 ... bench: 553 ns/iter (+/- 9) = 106 MB/s +test misc::easy0_32K ... bench: 971 ns/iter (+/- 29) = 33774 MB/s +test misc::easy1_1K ... bench: 508 ns/iter (+/- 22) = 2055 MB/s +test misc::easy1_1MB ... bench: 24,181 ns/iter (+/- 704) = 43364 MB/s +test misc::easy1_32 ... bench: 494 ns/iter (+/- 14) = 105 MB/s +test misc::easy1_32K ... bench: 892 ns/iter (+/- 82) = 36757 MB/s +test misc::hard_1K ... bench: 15,335 ns/iter (+/- 1,224) = 68 MB/s +test misc::hard_1MB ... bench: 16,105,838 ns/iter (+/- 319,567) = 65 MB/s +test misc::hard_32 ... bench: 1,798 ns/iter (+/- 79) = 32 MB/s +test misc::hard_32K ... bench: 504,123 ns/iter (+/- 44,829) = 65 MB/s +test misc::literal ... bench: 74 ns/iter (+/- 9) = 689 MB/s +test misc::long_needle1 ... bench: 56,853 ns/iter (+/- 3,662) = 1758 MB/s +test misc::long_needle2 ... bench: 57,038 ns/iter (+/- 2,532) = 1753 MB/s +test misc::match_class ... bench: 140 ns/iter (+/- 15) = 578 MB/s +test misc::match_class_in_range ... bench: 126 ns/iter (+/- 17) = 642 MB/s +test misc::match_class_unicode ... bench: 1,407 ns/iter (+/- 122) = 114 MB/s +test misc::medium_1K ... bench: 1,199 ns/iter (+/- 80) = 877 MB/s +test misc::medium_1MB ... bench: 558,323 ns/iter (+/- 20,908) = 1878 MB/s +test misc::medium_32 ... bench: 661 ns/iter (+/- 30) = 90 MB/s +test misc::medium_32K ... bench: 18,148 ns/iter (+/- 1,038) = 1807 MB/s +test misc::no_exponential ... bench: 334,786 ns/iter (+/- 18,234) +test misc::not_literal ... bench: 1,347 ns/iter (+/- 49) = 37 MB/s +test misc::one_pass_long_prefix ... bench: 499 ns/iter (+/- 59) = 52 MB/s +test misc::one_pass_long_prefix_not ... bench: 522 ns/iter (+/- 64) = 49 MB/s +test misc::one_pass_short ... bench: 804 ns/iter (+/- 37) = 21 MB/s +test misc::one_pass_short_not ... bench: 1,260 ns/iter (+/- 130) = 13 MB/s +test misc::reallyhard2_1K ... bench: 37,726 ns/iter (+/- 1,284) = 27 MB/s +test misc::reallyhard_1K ... bench: 15,246 ns/iter (+/- 901) = 68 MB/s +test misc::reallyhard_1MB ... bench: 16,187,692 ns/iter (+/- 1,552,760) = 64 MB/s +test misc::reallyhard_32 ... bench: 1,882 ns/iter (+/- 237) = 31 MB/s +test misc::reallyhard_32K ... bench: 541,567 ns/iter (+/- 64,929) = 60 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,576 ns/iter (+/- 185) = 1748 MB/s +test regexdna::find_new_lines ... bench: 14,744,849 ns/iter (+/- 1,141,621) = 344 MB/s +test regexdna::subst1 ... bench: 2,801,370 ns/iter (+/- 105,875) = 1814 MB/s +test regexdna::subst10 ... bench: 3,015,410 ns/iter (+/- 446,982) = 1685 MB/s +test regexdna::subst11 ... bench: 2,923,557 ns/iter (+/- 193,230) = 1738 MB/s +test regexdna::subst2 ... bench: 2,948,002 ns/iter (+/- 306,203) = 1724 MB/s +test regexdna::subst3 ... bench: 2,899,076 ns/iter (+/- 174,958) = 1753 MB/s +test regexdna::subst4 ... bench: 2,908,685 ns/iter (+/- 221,436) = 1747 MB/s +test regexdna::subst5 ... bench: 3,780,044 ns/iter (+/- 150,740) = 1344 MB/s +test regexdna::subst6 ... bench: 2,920,193 ns/iter (+/- 142,191) = 1740 MB/s +test regexdna::subst7 ... bench: 2,918,785 ns/iter (+/- 175,109) = 1741 MB/s +test regexdna::subst8 ... bench: 2,932,075 ns/iter (+/- 152,745) = 1733 MB/s +test regexdna::subst9 ... bench: 2,914,694 ns/iter (+/- 176,327) = 1744 MB/s +test regexdna::variant1 ... bench: 5,172,617 ns/iter (+/- 269,855) = 982 MB/s +test regexdna::variant2 ... bench: 6,770,702 ns/iter (+/- 474,076) = 750 MB/s +test regexdna::variant3 ... bench: 11,124,754 ns/iter (+/- 649,591) = 456 MB/s +test regexdna::variant4 ... bench: 9,751,982 ns/iter (+/- 460,679) = 521 MB/s +test regexdna::variant5 ... bench: 9,791,229 ns/iter (+/- 461,486) = 519 MB/s +test regexdna::variant6 ... bench: 7,417,031 ns/iter (+/- 275,225) = 685 MB/s +test regexdna::variant7 ... bench: 7,873,097 ns/iter (+/- 451,115) = 645 MB/s +test regexdna::variant8 ... bench: 9,707,683 ns/iter (+/- 418,865) = 523 MB/s +test regexdna::variant9 ... bench: 18,696,520 ns/iter (+/- 742,018) = 271 MB/s +test sherlock::before_after_holmes ... bench: 22,314,084 ns/iter (+/- 888,249) = 26 MB/s +test sherlock::before_holmes ... bench: 22,501,540 ns/iter (+/- 892,027) = 26 MB/s +test sherlock::holmes_cochar_watson ... bench: 929,372 ns/iter (+/- 46,859) = 640 MB/s +test sherlock::holmes_coword_watson ... bench: 125,548,613 ns/iter (+/- 3,297,687) = 4 MB/s +test sherlock::ing_suffix ... bench: 18,023,803 ns/iter (+/- 1,079,960) = 33 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,809,497 ns/iter (+/- 1,259,989) = 27 MB/s +test sherlock::letters ... bench: 39,512,315 ns/iter (+/- 3,309,084) = 15 MB/s +test sherlock::letters_lower ... bench: 37,160,354 ns/iter (+/- 3,084,525) = 16 MB/s +test sherlock::letters_upper ... bench: 1,721,867 ns/iter (+/- 66,812) = 345 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 378,307 ns/iter (+/- 30,914) = 1572 MB/s +test sherlock::name_alt1 ... bench: 381,242 ns/iter (+/- 41,954) = 1560 MB/s +test sherlock::name_alt2 ... bench: 503,558 ns/iter (+/- 46,044) = 1181 MB/s +test sherlock::name_alt3 ... bench: 912,340 ns/iter (+/- 79,787) = 652 MB/s +test sherlock::name_alt3_nocase ... bench: 1,515,048 ns/iter (+/- 74,623) = 392 MB/s +test sherlock::name_alt4 ... bench: 580,652 ns/iter (+/- 60,407) = 1024 MB/s +test sherlock::name_alt4_nocase ... bench: 826,866 ns/iter (+/- 58,485) = 719 MB/s +test sherlock::name_alt5 ... bench: 651,281 ns/iter (+/- 64,134) = 913 MB/s +test sherlock::name_alt5_nocase ... bench: 808,974 ns/iter (+/- 49,119) = 735 MB/s +test sherlock::name_holmes ... bench: 120,010 ns/iter (+/- 9,458) = 4957 MB/s +test sherlock::name_holmes_nocase ... bench: 441,316 ns/iter (+/- 56,990) = 1348 MB/s +test sherlock::name_sherlock ... bench: 39,935 ns/iter (+/- 4,078) = 14897 MB/s +test sherlock::name_sherlock_holmes ... bench: 49,126 ns/iter (+/- 3,082) = 12110 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 366,865 ns/iter (+/- 18,520) = 1621 MB/s +test sherlock::name_sherlock_nocase ... bench: 349,337 ns/iter (+/- 18,365) = 1703 MB/s +test sherlock::name_whitespace ... bench: 57,076 ns/iter (+/- 6,314) = 10423 MB/s +test sherlock::no_match_common ... bench: 291,022 ns/iter (+/- 30,143) = 2044 MB/s +test sherlock::no_match_really_common ... bench: 286,214 ns/iter (+/- 15,722) = 2078 MB/s +test sherlock::no_match_uncommon ... bench: 13,963 ns/iter (+/- 759) = 42607 MB/s +test sherlock::quotes ... bench: 5,580,378 ns/iter (+/- 295,941) = 106 MB/s +test sherlock::repeated_class_negation ... bench: 52,797,981 ns/iter (+/- 2,731,805) = 11 MB/s +test sherlock::the_lower ... bench: 1,295,105 ns/iter (+/- 62,365) = 459 MB/s +test sherlock::the_nocase ... bench: 1,620,713 ns/iter (+/- 73,503) = 367 MB/s +test sherlock::the_upper ... bench: 112,911 ns/iter (+/- 5,843) = 5269 MB/s +test sherlock::the_whitespace ... bench: 2,441,986 ns/iter (+/- 133,012) = 243 MB/s +test sherlock::word_ending_n ... bench: 26,478,327 ns/iter (+/- 1,361,757) = 22 MB/s +test sherlock::words ... bench: 23,948,872 ns/iter (+/- 2,323,993) = 24 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/dphobos-ldc-ct b/vendor/regex/record/old-bench-log/06/dphobos-ldc-ct new file mode 100644 index 0000000..6aaa5de --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-ldc-ct @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 189 ns/iter (+/- 23) = 2063 MB/s +test misc::anchored_literal_long_non_match ... bench: 128 ns/iter (+/- 14) = 3046 MB/s +test misc::anchored_literal_short_match ... bench: 191 ns/iter (+/- 20) = 136 MB/s +test misc::anchored_literal_short_non_match ... bench: 120 ns/iter (+/- 13) = 216 MB/s +test misc::easy0_1K ... bench: 536 ns/iter (+/- 49) = 1960 MB/s +test misc::easy0_1MB ... bench: 24,516 ns/iter (+/- 2,181) = 42772 MB/s +test misc::easy0_32 ... bench: 551 ns/iter (+/- 36) = 107 MB/s +test misc::easy0_32K ... bench: 961 ns/iter (+/- 105) = 34125 MB/s +test misc::easy1_1K ... bench: 518 ns/iter (+/- 59) = 2015 MB/s +test misc::easy1_1MB ... bench: 25,352 ns/iter (+/- 2,847) = 41361 MB/s +test misc::easy1_32 ... bench: 501 ns/iter (+/- 42) = 103 MB/s +test misc::easy1_32K ... bench: 919 ns/iter (+/- 69) = 35677 MB/s +test misc::hard_1K ... bench: 16,146 ns/iter (+/- 1,124) = 65 MB/s +test misc::hard_1MB ... bench: 16,482,695 ns/iter (+/- 805,077) = 63 MB/s +test misc::hard_32 ... bench: 1,807 ns/iter (+/- 173) = 32 MB/s +test misc::hard_32K ... bench: 516,772 ns/iter (+/- 33,884) = 63 MB/s +test misc::literal ... bench: 77 ns/iter (+/- 9) = 662 MB/s +test misc::long_needle1 ... bench: 56,900 ns/iter (+/- 3,087) = 1757 MB/s +test misc::long_needle2 ... bench: 57,364 ns/iter (+/- 4,166) = 1743 MB/s +test misc::match_class ... bench: 156 ns/iter (+/- 21) = 519 MB/s +test misc::match_class_in_range ... bench: 121 ns/iter (+/- 12) = 669 MB/s +test misc::match_class_unicode ... bench: 1,515 ns/iter (+/- 207) = 106 MB/s +test misc::medium_1K ... bench: 1,186 ns/iter (+/- 120) = 887 MB/s +test misc::medium_1MB ... bench: 559,677 ns/iter (+/- 59,284) = 1873 MB/s +test misc::medium_32 ... bench: 657 ns/iter (+/- 86) = 91 MB/s +test misc::medium_32K ... bench: 18,142 ns/iter (+/- 915) = 1807 MB/s +test misc::not_literal ... bench: 1,319 ns/iter (+/- 128) = 38 MB/s +test misc::one_pass_long_prefix ... bench: 509 ns/iter (+/- 56) = 51 MB/s +test misc::one_pass_long_prefix_not ... bench: 517 ns/iter (+/- 38) = 50 MB/s +test misc::one_pass_short ... bench: 783 ns/iter (+/- 83) = 21 MB/s +test misc::one_pass_short_not ... bench: 1,239 ns/iter (+/- 98) = 13 MB/s +test misc::reallyhard2_1K ... bench: 40,580 ns/iter (+/- 3,041) = 25 MB/s +test misc::reallyhard_1K ... bench: 15,162 ns/iter (+/- 652) = 69 MB/s +test misc::reallyhard_1MB ... bench: 16,065,920 ns/iter (+/- 886,245) = 65 MB/s +test misc::reallyhard_32 ... bench: 1,829 ns/iter (+/- 90) = 32 MB/s +test misc::reallyhard_32K ... bench: 520,572 ns/iter (+/- 88,290) = 62 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,423 ns/iter (+/- 493) = 1808 MB/s +test regexdna::find_new_lines ... bench: 14,658,357 ns/iter (+/- 1,784,941) = 346 MB/s +test regexdna::subst1 ... bench: 2,984,959 ns/iter (+/- 422,186) = 1703 MB/s +test regexdna::subst10 ... bench: 2,836,747 ns/iter (+/- 274,300) = 1791 MB/s +test regexdna::subst11 ... bench: 2,809,880 ns/iter (+/- 309,516) = 1809 MB/s +test regexdna::subst2 ... bench: 2,868,765 ns/iter (+/- 435,511) = 1771 MB/s +test regexdna::subst3 ... bench: 2,837,000 ns/iter (+/- 319,135) = 1791 MB/s +test regexdna::subst4 ... bench: 2,856,540 ns/iter (+/- 320,458) = 1779 MB/s +test regexdna::subst5 ... bench: 2,820,953 ns/iter (+/- 340,996) = 1802 MB/s +test regexdna::subst6 ... bench: 3,588,607 ns/iter (+/- 462,158) = 1416 MB/s +test regexdna::subst7 ... bench: 2,896,235 ns/iter (+/- 165,525) = 1755 MB/s +test regexdna::subst8 ... bench: 2,982,961 ns/iter (+/- 315,768) = 1704 MB/s +test regexdna::subst9 ... bench: 3,024,311 ns/iter (+/- 300,274) = 1680 MB/s +test regexdna::variant1 ... bench: 5,234,342 ns/iter (+/- 269,577) = 971 MB/s +test regexdna::variant2 ... bench: 6,463,683 ns/iter (+/- 532,663) = 786 MB/s +test regexdna::variant3 ... bench: 10,720,523 ns/iter (+/- 414,684) = 474 MB/s +test regexdna::variant4 ... bench: 9,882,647 ns/iter (+/- 297,904) = 514 MB/s +test regexdna::variant5 ... bench: 9,664,151 ns/iter (+/- 659,587) = 526 MB/s +test regexdna::variant6 ... bench: 7,174,368 ns/iter (+/- 322,025) = 708 MB/s +test regexdna::variant7 ... bench: 7,605,668 ns/iter (+/- 411,605) = 668 MB/s +test regexdna::variant8 ... bench: 9,580,481 ns/iter (+/- 373,332) = 530 MB/s +test regexdna::variant9 ... bench: 18,270,186 ns/iter (+/- 986,510) = 278 MB/s +test sherlock::before_after_holmes ... bench: 21,982,853 ns/iter (+/- 1,032,853) = 27 MB/s +test sherlock::before_holmes ... bench: 21,947,949 ns/iter (+/- 848,014) = 27 MB/s +test sherlock::holmes_cochar_watson ... bench: 909,691 ns/iter (+/- 48,847) = 653 MB/s +test sherlock::holmes_coword_watson ... bench: 124,771,191 ns/iter (+/- 8,084,768) = 4 MB/s +test sherlock::ing_suffix ... bench: 17,864,129 ns/iter (+/- 1,343,114) = 33 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,009,249 ns/iter (+/- 452,676) = 28 MB/s +test sherlock::letters ... bench: 37,888,421 ns/iter (+/- 2,482,541) = 15 MB/s +test sherlock::letters_lower ... bench: 37,029,883 ns/iter (+/- 481,280) = 16 MB/s +test sherlock::letters_upper ... bench: 1,627,107 ns/iter (+/- 51,063) = 365 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 370,850 ns/iter (+/- 15,384) = 1604 MB/s +test sherlock::name_alt1 ... bench: 371,780 ns/iter (+/- 28,486) = 1600 MB/s +test sherlock::name_alt2 ... bench: 506,859 ns/iter (+/- 17,553) = 1173 MB/s +test sherlock::name_alt3 ... bench: 915,729 ns/iter (+/- 99,429) = 649 MB/s +test sherlock::name_alt3_nocase ... bench: 1,512,050 ns/iter (+/- 186,130) = 393 MB/s +test sherlock::name_alt4 ... bench: 578,710 ns/iter (+/- 18,089) = 1028 MB/s +test sherlock::name_alt4_nocase ... bench: 752,912 ns/iter (+/- 51,342) = 790 MB/s +test sherlock::name_alt5 ... bench: 595,803 ns/iter (+/- 15,053) = 998 MB/s +test sherlock::name_alt5_nocase ... bench: 730,149 ns/iter (+/- 40,662) = 814 MB/s +test sherlock::name_holmes ... bench: 115,596 ns/iter (+/- 4,597) = 5146 MB/s +test sherlock::name_holmes_nocase ... bench: 429,765 ns/iter (+/- 16,685) = 1384 MB/s +test sherlock::name_sherlock ... bench: 38,985 ns/iter (+/- 2,195) = 15260 MB/s +test sherlock::name_sherlock_holmes ... bench: 49,610 ns/iter (+/- 2,005) = 11992 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 374,335 ns/iter (+/- 37,062) = 1589 MB/s +test sherlock::name_sherlock_nocase ... bench: 355,559 ns/iter (+/- 40,873) = 1673 MB/s +test sherlock::name_whitespace ... bench: 57,616 ns/iter (+/- 5,124) = 10325 MB/s +test sherlock::no_match_common ... bench: 284,228 ns/iter (+/- 29,087) = 2093 MB/s +test sherlock::no_match_really_common ... bench: 287,263 ns/iter (+/- 22,755) = 2071 MB/s +test sherlock::no_match_uncommon ... bench: 14,030 ns/iter (+/- 526) = 42404 MB/s +test sherlock::quotes ... bench: 5,563,019 ns/iter (+/- 537,611) = 106 MB/s +test sherlock::repeated_class_negation ... bench: 54,831,275 ns/iter (+/- 5,982,214) = 10 MB/s +test sherlock::the_lower ... bench: 1,298,205 ns/iter (+/- 73,265) = 458 MB/s +test sherlock::the_nocase ... bench: 1,572,579 ns/iter (+/- 63,536) = 378 MB/s +test sherlock::the_upper ... bench: 112,795 ns/iter (+/- 4,179) = 5274 MB/s +test sherlock::the_whitespace ... bench: 2,630,026 ns/iter (+/- 227,760) = 226 MB/s +test sherlock::word_ending_n ... bench: 26,975,356 ns/iter (+/- 2,531,982) = 22 MB/s +test sherlock::words ... bench: 23,116,326 ns/iter (+/- 458,721) = 25 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/pcre1 b/vendor/regex/record/old-bench-log/06/pcre1 new file mode 100644 index 0000000..f8a9100 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 32 ns/iter (+/- 38) = 12187 MB/s +test misc::anchored_literal_long_non_match ... bench: 23 ns/iter (+/- 1) = 16956 MB/s +test misc::anchored_literal_short_match ... bench: 30 ns/iter (+/- 1) = 866 MB/s +test misc::anchored_literal_short_non_match ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::easy0_1K ... bench: 261 ns/iter (+/- 21) = 4026 MB/s +test misc::easy0_1MB ... bench: 202,218 ns/iter (+/- 16,050) = 5185 MB/s +test misc::easy0_32 ... bench: 49 ns/iter (+/- 3) = 1204 MB/s +test misc::easy0_32K ... bench: 6,305 ns/iter (+/- 448) = 5201 MB/s +test misc::easy1_1K ... bench: 245 ns/iter (+/- 5) = 4261 MB/s +test misc::easy1_1MB ... bench: 198,215 ns/iter (+/- 10,461) = 5290 MB/s +test misc::easy1_32 ... bench: 49 ns/iter (+/- 1) = 1061 MB/s +test misc::easy1_32K ... bench: 6,309 ns/iter (+/- 358) = 5197 MB/s +test misc::hard_1K ... bench: 1,306 ns/iter (+/- 50) = 804 MB/s +test misc::hard_1MB ... bench: 1,219,034 ns/iter (+/- 92,693) = 860 MB/s +test misc::hard_32 ... bench: 95 ns/iter (+/- 7) = 621 MB/s +test misc::hard_32K ... bench: 37,713 ns/iter (+/- 948) = 869 MB/s +test misc::literal ... bench: 29 ns/iter (+/- 1) = 1758 MB/s +test misc::long_needle1 ... bench: 548,012 ns/iter (+/- 26,029) = 182 MB/s +test misc::long_needle2 ... bench: 538,536 ns/iter (+/- 54,612) = 185 MB/s +test misc::match_class ... bench: 94 ns/iter (+/- 3) = 861 MB/s +test misc::match_class_in_range ... bench: 29 ns/iter (+/- 1) = 2793 MB/s +test misc::match_class_unicode ... bench: 370 ns/iter (+/- 19) = 435 MB/s +test misc::medium_1K ... bench: 256 ns/iter (+/- 13) = 4109 MB/s +test misc::medium_1MB ... bench: 207,655 ns/iter (+/- 9,168) = 5049 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 5) = 1176 MB/s +test misc::medium_32K ... bench: 6,144 ns/iter (+/- 327) = 5337 MB/s +test misc::not_literal ... bench: 166 ns/iter (+/- 14) = 307 MB/s +test misc::one_pass_long_prefix ... bench: 27 ns/iter (+/- 2) = 962 MB/s +test misc::one_pass_long_prefix_not ... bench: 29 ns/iter (+/- 1) = 896 MB/s +test misc::one_pass_short ... bench: 55 ns/iter (+/- 2) = 309 MB/s +test misc::one_pass_short_not ... bench: 55 ns/iter (+/- 3) = 309 MB/s +test misc::reallyhard2_1K ... bench: 4,404 ns/iter (+/- 346) = 236 MB/s +test misc::reallyhard_1K ... bench: 1,365 ns/iter (+/- 52) = 769 MB/s +test misc::reallyhard_1MB ... bench: 1,118,777 ns/iter (+/- 72,209) = 937 MB/s +test misc::reallyhard_32 ... bench: 112 ns/iter (+/- 4) = 526 MB/s +test misc::reallyhard_32K ... bench: 41,164 ns/iter (+/- 2,351) = 796 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,413 ns/iter (+/- 304) = 1812 MB/s +test regexdna::find_new_lines ... bench: 2,802,109 ns/iter (+/- 129,768) = 1814 MB/s +test regexdna::subst1 ... bench: 1,263,401 ns/iter (+/- 54,374) = 4023 MB/s +test regexdna::subst10 ... bench: 1,254,544 ns/iter (+/- 101,656) = 4051 MB/s +test regexdna::subst11 ... bench: 1,408,321 ns/iter (+/- 121,522) = 3609 MB/s +test regexdna::subst2 ... bench: 1,364,704 ns/iter (+/- 106,508) = 3724 MB/s +test regexdna::subst3 ... bench: 1,258,687 ns/iter (+/- 84,504) = 4038 MB/s +test regexdna::subst4 ... bench: 1,301,822 ns/iter (+/- 62,866) = 3904 MB/s +test regexdna::subst5 ... bench: 1,338,338 ns/iter (+/- 313,996) = 3798 MB/s +test regexdna::subst6 ... bench: 1,349,310 ns/iter (+/- 117,181) = 3767 MB/s +test regexdna::subst7 ... bench: 1,390,090 ns/iter (+/- 210,430) = 3656 MB/s +test regexdna::subst8 ... bench: 1,293,481 ns/iter (+/- 38,532) = 3930 MB/s +test regexdna::subst9 ... bench: 1,245,652 ns/iter (+/- 58,026) = 4080 MB/s +test regexdna::variant1 ... bench: 15,239,324 ns/iter (+/- 414,621) = 333 MB/s +test regexdna::variant2 ... bench: 16,489,922 ns/iter (+/- 825,229) = 308 MB/s +test regexdna::variant3 ... bench: 19,945,871 ns/iter (+/- 665,046) = 254 MB/s +test regexdna::variant4 ... bench: 18,604,011 ns/iter (+/- 712,670) = 273 MB/s +test regexdna::variant5 ... bench: 17,084,919 ns/iter (+/- 1,379,879) = 297 MB/s +test regexdna::variant6 ... bench: 16,918,130 ns/iter (+/- 975,620) = 300 MB/s +test regexdna::variant7 ... bench: 19,114,194 ns/iter (+/- 857,330) = 265 MB/s +test regexdna::variant8 ... bench: 23,831,138 ns/iter (+/- 878,576) = 213 MB/s +test regexdna::variant9 ... bench: 21,835,777 ns/iter (+/- 1,339,143) = 232 MB/s +test sherlock::before_after_holmes ... bench: 4,401,834 ns/iter (+/- 218,696) = 135 MB/s +test sherlock::before_holmes ... bench: 4,436,717 ns/iter (+/- 109,324) = 134 MB/s +test sherlock::holmes_cochar_watson ... bench: 497,667 ns/iter (+/- 19,212) = 1195 MB/s +test sherlock::ing_suffix ... bench: 1,852,390 ns/iter (+/- 77,888) = 321 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,775,078 ns/iter (+/- 152,556) = 124 MB/s +test sherlock::letters ... bench: 13,888,750 ns/iter (+/- 668,831) = 42 MB/s +test sherlock::letters_lower ... bench: 13,452,405 ns/iter (+/- 453,184) = 44 MB/s +test sherlock::letters_upper ... bench: 1,870,502 ns/iter (+/- 57,825) = 318 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 198,649 ns/iter (+/- 9,080) = 2994 MB/s +test sherlock::name_alt1 ... bench: 464,513 ns/iter (+/- 29,935) = 1280 MB/s +test sherlock::name_alt2 ... bench: 470,746 ns/iter (+/- 12,931) = 1263 MB/s +test sherlock::name_alt3 ... bench: 874,352 ns/iter (+/- 38,618) = 680 MB/s +test sherlock::name_alt3_nocase ... bench: 2,821,106 ns/iter (+/- 113,055) = 210 MB/s +test sherlock::name_alt4 ... bench: 78,753 ns/iter (+/- 3,111) = 7554 MB/s +test sherlock::name_alt4_nocase ... bench: 1,596,406 ns/iter (+/- 62,919) = 372 MB/s +test sherlock::name_alt5 ... bench: 655,870 ns/iter (+/- 32,597) = 907 MB/s +test sherlock::name_alt5_nocase ... bench: 1,732,595 ns/iter (+/- 75,827) = 343 MB/s +test sherlock::name_holmes ... bench: 400,037 ns/iter (+/- 16,935) = 1487 MB/s +test sherlock::name_holmes_nocase ... bench: 501,467 ns/iter (+/- 20,805) = 1186 MB/s +test sherlock::name_sherlock ... bench: 267,873 ns/iter (+/- 10,199) = 2220 MB/s +test sherlock::name_sherlock_holmes ... bench: 202,107 ns/iter (+/- 10,314) = 2943 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,070,780 ns/iter (+/- 43,144) = 555 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,074,139 ns/iter (+/- 48,270) = 553 MB/s +test sherlock::name_whitespace ... bench: 271,978 ns/iter (+/- 10,137) = 2187 MB/s +test sherlock::no_match_common ... bench: 411,484 ns/iter (+/- 13,213) = 1445 MB/s +test sherlock::no_match_really_common ... bench: 403,709 ns/iter (+/- 12,415) = 1473 MB/s +test sherlock::no_match_uncommon ... bench: 27,730 ns/iter (+/- 928) = 21454 MB/s +test sherlock::quotes ... bench: 515,141 ns/iter (+/- 17,799) = 1154 MB/s +test sherlock::repeated_class_negation ... bench: 5,842,243 ns/iter (+/- 282,478) = 101 MB/s +test sherlock::the_lower ... bench: 725,059 ns/iter (+/- 36,233) = 820 MB/s +test sherlock::the_nocase ... bench: 812,888 ns/iter (+/- 34,200) = 731 MB/s +test sherlock::the_upper ... bench: 56,746 ns/iter (+/- 2,186) = 10484 MB/s +test sherlock::the_whitespace ... bench: 920,705 ns/iter (+/- 37,325) = 646 MB/s +test sherlock::word_ending_n ... bench: 5,625,614 ns/iter (+/- 199,408) = 105 MB/s +test sherlock::words ... bench: 7,122,561 ns/iter (+/- 161,013) = 83 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/pcre2 b/vendor/regex/record/old-bench-log/06/pcre2 new file mode 100644 index 0000000..5185301 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_long_non_match ... bench: 13 ns/iter (+/- 1) = 30000 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 1) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 13 ns/iter (+/- 1) = 2000 MB/s +test misc::easy0_1K ... bench: 104 ns/iter (+/- 5) = 10105 MB/s +test misc::easy0_1MB ... bench: 64,102 ns/iter (+/- 4,103) = 16358 MB/s +test misc::easy0_32 ... bench: 32 ns/iter (+/- 4) = 1843 MB/s +test misc::easy0_32K ... bench: 2,042 ns/iter (+/- 152) = 16060 MB/s +test misc::easy1_1K ... bench: 102 ns/iter (+/- 11) = 10235 MB/s +test misc::easy1_1MB ... bench: 63,117 ns/iter (+/- 4,547) = 16613 MB/s +test misc::easy1_32 ... bench: 33 ns/iter (+/- 4) = 1575 MB/s +test misc::easy1_32K ... bench: 2,019 ns/iter (+/- 181) = 16239 MB/s +test misc::hard_1K ... bench: 1,236 ns/iter (+/- 82) = 850 MB/s +test misc::hard_1MB ... bench: 1,041,354 ns/iter (+/- 39,123) = 1006 MB/s +test misc::hard_32 ... bench: 86 ns/iter (+/- 8) = 686 MB/s +test misc::hard_32K ... bench: 33,054 ns/iter (+/- 1,813) = 992 MB/s +test misc::literal ... bench: 20 ns/iter (+/- 2) = 2550 MB/s +test misc::long_needle1 ... bench: 501,732 ns/iter (+/- 52,173) = 199 MB/s +test misc::long_needle2 ... bench: 515,127 ns/iter (+/- 48,790) = 194 MB/s +test misc::match_class ... bench: 55 ns/iter (+/- 7) = 1472 MB/s +test misc::match_class_in_range ... bench: 19 ns/iter (+/- 2) = 4263 MB/s +test misc::match_class_unicode ... bench: 342 ns/iter (+/- 60) = 470 MB/s +test misc::medium_1K ... bench: 106 ns/iter (+/- 4) = 9924 MB/s +test misc::medium_1MB ... bench: 63,011 ns/iter (+/- 4,942) = 16641 MB/s +test misc::medium_32 ... bench: 32 ns/iter (+/- 3) = 1875 MB/s +test misc::medium_32K ... bench: 2,068 ns/iter (+/- 189) = 15858 MB/s +test misc::not_literal ... bench: 147 ns/iter (+/- 13) = 346 MB/s +test misc::one_pass_long_prefix ... bench: 15 ns/iter (+/- 1) = 1733 MB/s +test misc::one_pass_long_prefix_not ... bench: 15 ns/iter (+/- 1) = 1733 MB/s +test misc::one_pass_short ... bench: 42 ns/iter (+/- 3) = 404 MB/s +test misc::one_pass_short_not ... bench: 43 ns/iter (+/- 5) = 395 MB/s +test misc::reallyhard2_1K ... bench: 4,356 ns/iter (+/- 499) = 238 MB/s +test misc::reallyhard_1K ... bench: 1,196 ns/iter (+/- 113) = 878 MB/s +test misc::reallyhard_1MB ... bench: 1,070,155 ns/iter (+/- 90,895) = 979 MB/s +test misc::reallyhard_32 ... bench: 93 ns/iter (+/- 12) = 634 MB/s +test misc::reallyhard_32K ... bench: 33,521 ns/iter (+/- 2,663) = 978 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 3,065 ns/iter (+/- 393) = 2610 MB/s +test regexdna::find_new_lines ... bench: 1,891,736 ns/iter (+/- 232,990) = 2687 MB/s +test regexdna::subst1 ... bench: 920,853 ns/iter (+/- 75,276) = 5520 MB/s +test regexdna::subst10 ... bench: 892,533 ns/iter (+/- 77,177) = 5695 MB/s +test regexdna::subst11 ... bench: 869,335 ns/iter (+/- 75,754) = 5847 MB/s +test regexdna::subst2 ... bench: 901,876 ns/iter (+/- 75,287) = 5636 MB/s +test regexdna::subst3 ... bench: 870,185 ns/iter (+/- 53,535) = 5841 MB/s +test regexdna::subst4 ... bench: 859,924 ns/iter (+/- 63,888) = 5911 MB/s +test regexdna::subst5 ... bench: 886,748 ns/iter (+/- 87,929) = 5732 MB/s +test regexdna::subst6 ... bench: 870,428 ns/iter (+/- 47,015) = 5840 MB/s +test regexdna::subst7 ... bench: 865,513 ns/iter (+/- 41,507) = 5873 MB/s +test regexdna::subst8 ... bench: 870,030 ns/iter (+/- 110,449) = 5842 MB/s +test regexdna::subst9 ... bench: 875,649 ns/iter (+/- 32,905) = 5805 MB/s +test regexdna::variant1 ... bench: 9,234,989 ns/iter (+/- 127,076) = 550 MB/s +test regexdna::variant2 ... bench: 11,759,628 ns/iter (+/- 575,788) = 432 MB/s +test regexdna::variant3 ... bench: 11,229,965 ns/iter (+/- 522,759) = 452 MB/s +test regexdna::variant4 ... bench: 10,040,716 ns/iter (+/- 309,357) = 506 MB/s +test regexdna::variant5 ... bench: 10,052,052 ns/iter (+/- 522,338) = 505 MB/s +test regexdna::variant6 ... bench: 10,719,366 ns/iter (+/- 577,988) = 474 MB/s +test regexdna::variant7 ... bench: 11,076,094 ns/iter (+/- 1,291,237) = 458 MB/s +test regexdna::variant8 ... bench: 11,855,290 ns/iter (+/- 667,429) = 428 MB/s +test regexdna::variant9 ... bench: 12,531,240 ns/iter (+/- 606,198) = 405 MB/s +test sherlock::before_after_holmes ... bench: 4,169,656 ns/iter (+/- 222,900) = 142 MB/s +test sherlock::before_holmes ... bench: 4,144,394 ns/iter (+/- 170,133) = 143 MB/s +test sherlock::holmes_cochar_watson ... bench: 74,437 ns/iter (+/- 4,266) = 7992 MB/s +test sherlock::ing_suffix ... bench: 1,731,507 ns/iter (+/- 162,892) = 343 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,418,194 ns/iter (+/- 240,220) = 134 MB/s +test sherlock::letters ... bench: 8,847,041 ns/iter (+/- 392,402) = 67 MB/s +test sherlock::letters_lower ... bench: 8,547,432 ns/iter (+/- 304,256) = 69 MB/s +test sherlock::letters_upper ... bench: 1,584,248 ns/iter (+/- 51,331) = 375 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 38,057 ns/iter (+/- 1,666) = 15632 MB/s +test sherlock::name_alt1 ... bench: 50,415 ns/iter (+/- 3,173) = 11800 MB/s +test sherlock::name_alt2 ... bench: 66,062 ns/iter (+/- 2,807) = 9005 MB/s +test sherlock::name_alt3 ... bench: 720,097 ns/iter (+/- 32,351) = 826 MB/s +test sherlock::name_alt3_nocase ... bench: 2,591,049 ns/iter (+/- 86,537) = 229 MB/s +test sherlock::name_alt4 ... bench: 65,860 ns/iter (+/- 2,780) = 9033 MB/s +test sherlock::name_alt4_nocase ... bench: 1,204,839 ns/iter (+/- 41,087) = 493 MB/s +test sherlock::name_alt5 ... bench: 615,483 ns/iter (+/- 24,177) = 966 MB/s +test sherlock::name_alt5_nocase ... bench: 1,467,461 ns/iter (+/- 71,032) = 405 MB/s +test sherlock::name_holmes ... bench: 48,997 ns/iter (+/- 2,471) = 12142 MB/s +test sherlock::name_holmes_nocase ... bench: 88,549 ns/iter (+/- 4,814) = 6718 MB/s +test sherlock::name_sherlock ... bench: 38,309 ns/iter (+/- 1,354) = 15529 MB/s +test sherlock::name_sherlock_holmes ... bench: 39,062 ns/iter (+/- 4,253) = 15230 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 713,355 ns/iter (+/- 77,990) = 833 MB/s +test sherlock::name_sherlock_nocase ... bench: 719,747 ns/iter (+/- 85,736) = 826 MB/s +test sherlock::name_whitespace ... bench: 39,161 ns/iter (+/- 3,678) = 15191 MB/s +test sherlock::no_match_common ... bench: 35,574 ns/iter (+/- 3,433) = 16723 MB/s +test sherlock::no_match_really_common ... bench: 56,847 ns/iter (+/- 7,068) = 10465 MB/s +test sherlock::no_match_uncommon ... bench: 36,185 ns/iter (+/- 4,938) = 16441 MB/s +test sherlock::quotes ... bench: 454,135 ns/iter (+/- 18,816) = 1310 MB/s +test sherlock::repeated_class_negation ... bench: 5,724,068 ns/iter (+/- 342,211) = 103 MB/s +test sherlock::the_lower ... bench: 256,190 ns/iter (+/- 25,452) = 2322 MB/s +test sherlock::the_nocase ... bench: 284,080 ns/iter (+/- 17,165) = 2094 MB/s +test sherlock::the_upper ... bench: 56,120 ns/iter (+/- 2,826) = 10601 MB/s +test sherlock::the_whitespace ... bench: 456,734 ns/iter (+/- 23,405) = 1302 MB/s +test sherlock::word_ending_n ... bench: 5,079,288 ns/iter (+/- 214,895) = 117 MB/s +test sherlock::words ... bench: 5,200,092 ns/iter (+/- 250,085) = 114 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/re2 b/vendor/regex/record/old-bench-log/06/re2 new file mode 100644 index 0000000..3e1585a --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 73 ns/iter (+/- 8) = 5342 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 1) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 73 ns/iter (+/- 9) = 356 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 1) = 1625 MB/s +test misc::easy0_1K ... bench: 119 ns/iter (+/- 11) = 8831 MB/s +test misc::easy0_1MB ... bench: 25,312 ns/iter (+/- 875) = 41427 MB/s +test misc::easy0_32 ... bench: 112 ns/iter (+/- 5) = 526 MB/s +test misc::easy0_32K ... bench: 534 ns/iter (+/- 43) = 61413 MB/s +test misc::easy1_1K ... bench: 109 ns/iter (+/- 9) = 9577 MB/s +test misc::easy1_1MB ... bench: 23,892 ns/iter (+/- 715) = 43889 MB/s +test misc::easy1_32 ... bench: 102 ns/iter (+/- 8) = 509 MB/s +test misc::easy1_32K ... bench: 519 ns/iter (+/- 54) = 63175 MB/s +test misc::hard_1K ... bench: 1,859 ns/iter (+/- 202) = 565 MB/s +test misc::hard_1MB ... bench: 1,871,446 ns/iter (+/- 99,961) = 560 MB/s +test misc::hard_32 ... bench: 162 ns/iter (+/- 20) = 364 MB/s +test misc::hard_32K ... bench: 57,459 ns/iter (+/- 4,672) = 570 MB/s +test misc::literal ... bench: 70 ns/iter (+/- 8) = 728 MB/s +test misc::long_needle1 ... bench: 130,995 ns/iter (+/- 4,935) = 763 MB/s +test misc::long_needle2 ... bench: 129,668 ns/iter (+/- 8,852) = 771 MB/s +test misc::match_class ... bench: 195 ns/iter (+/- 16) = 415 MB/s +test misc::match_class_in_range ... bench: 194 ns/iter (+/- 22) = 417 MB/s +test misc::match_class_unicode ... bench: 630 ns/iter (+/- 61) = 255 MB/s +test misc::medium_1K ... bench: 1,699 ns/iter (+/- 147) = 619 MB/s +test misc::medium_1MB ... bench: 1,633,131 ns/iter (+/- 65,889) = 642 MB/s +test misc::medium_32 ... bench: 169 ns/iter (+/- 18) = 355 MB/s +test misc::medium_32K ... bench: 51,313 ns/iter (+/- 1,855) = 639 MB/s +test misc::no_exponential ... bench: 216 ns/iter (+/- 13) = 462 MB/s +test misc::not_literal ... bench: 140 ns/iter (+/- 6) = 364 MB/s +test misc::one_pass_long_prefix ... bench: 71 ns/iter (+/- 2) = 366 MB/s +test misc::one_pass_long_prefix_not ... bench: 109 ns/iter (+/- 9) = 238 MB/s +test misc::one_pass_short ... bench: 99 ns/iter (+/- 7) = 171 MB/s +test misc::one_pass_short_not ... bench: 96 ns/iter (+/- 5) = 177 MB/s +test misc::reallyhard2_1K ... bench: 1,405 ns/iter (+/- 134) = 740 MB/s +test misc::reallyhard_1K ... bench: 1,875 ns/iter (+/- 168) = 560 MB/s +test misc::reallyhard_1MB ... bench: 1,853,207 ns/iter (+/- 103,218) = 565 MB/s +test misc::reallyhard_32 ... bench: 157 ns/iter (+/- 11) = 375 MB/s +test misc::reallyhard_32K ... bench: 57,880 ns/iter (+/- 5,319) = 566 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 12,686 ns/iter (+/- 536) = 630 MB/s +test regexdna::find_new_lines ... bench: 28,761,913 ns/iter (+/- 1,447,326) = 176 MB/s +test regexdna::subst1 ... bench: 4,629,782 ns/iter (+/- 142,214) = 1097 MB/s +test regexdna::subst10 ... bench: 4,692,819 ns/iter (+/- 156,805) = 1083 MB/s +test regexdna::subst11 ... bench: 4,652,438 ns/iter (+/- 206,457) = 1092 MB/s +test regexdna::subst2 ... bench: 4,682,943 ns/iter (+/- 176,335) = 1085 MB/s +test regexdna::subst3 ... bench: 4,646,162 ns/iter (+/- 241,873) = 1094 MB/s +test regexdna::subst4 ... bench: 4,653,380 ns/iter (+/- 188,899) = 1092 MB/s +test regexdna::subst5 ... bench: 4,770,480 ns/iter (+/- 238,930) = 1065 MB/s +test regexdna::subst6 ... bench: 4,671,427 ns/iter (+/- 286,241) = 1088 MB/s +test regexdna::subst7 ... bench: 4,658,214 ns/iter (+/- 210,723) = 1091 MB/s +test regexdna::subst8 ... bench: 4,909,600 ns/iter (+/- 417,894) = 1035 MB/s +test regexdna::subst9 ... bench: 4,910,285 ns/iter (+/- 587,024) = 1035 MB/s +test regexdna::variant1 ... bench: 20,895,772 ns/iter (+/- 2,313,771) = 243 MB/s +test regexdna::variant2 ... bench: 20,465,984 ns/iter (+/- 1,913,613) = 248 MB/s +test regexdna::variant3 ... bench: 19,469,527 ns/iter (+/- 1,367,226) = 261 MB/s +test regexdna::variant4 ... bench: 21,662,238 ns/iter (+/- 1,489,235) = 234 MB/s +test regexdna::variant5 ... bench: 21,808,098 ns/iter (+/- 2,294,522) = 233 MB/s +test regexdna::variant6 ... bench: 21,208,952 ns/iter (+/- 986,848) = 239 MB/s +test regexdna::variant7 ... bench: 20,289,473 ns/iter (+/- 595,084) = 250 MB/s +test regexdna::variant8 ... bench: 17,765,356 ns/iter (+/- 503,529) = 286 MB/s +test regexdna::variant9 ... bench: 13,222,010 ns/iter (+/- 509,278) = 384 MB/s +test sherlock::before_after_holmes ... bench: 1,313,676 ns/iter (+/- 52,992) = 452 MB/s +test sherlock::before_holmes ... bench: 1,337,432 ns/iter (+/- 37,054) = 444 MB/s +test sherlock::everything_greedy ... bench: 6,080,272 ns/iter (+/- 110,011) = 97 MB/s +test sherlock::everything_greedy_nl ... bench: 2,395,932 ns/iter (+/- 123,521) = 248 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,052,245 ns/iter (+/- 33,929) = 565 MB/s +test sherlock::holmes_coword_watson ... bench: 1,063,007 ns/iter (+/- 34,462) = 559 MB/s +test sherlock::ing_suffix ... bench: 2,703,395 ns/iter (+/- 63,263) = 220 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,608,756 ns/iter (+/- 42,100) = 369 MB/s +test sherlock::letters ... bench: 68,220,129 ns/iter (+/- 3,602,216) = 8 MB/s +test sherlock::letters_lower ... bench: 67,390,101 ns/iter (+/- 6,032,867) = 8 MB/s +test sherlock::letters_upper ... bench: 3,708,482 ns/iter (+/- 235,128) = 160 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,816,517 ns/iter (+/- 99,081) = 211 MB/s +test sherlock::name_alt1 ... bench: 53,193 ns/iter (+/- 1,575) = 11184 MB/s +test sherlock::name_alt2 ... bench: 1,133,704 ns/iter (+/- 36,634) = 524 MB/s +test sherlock::name_alt3 ... bench: 1,227,785 ns/iter (+/- 31,742) = 484 MB/s +test sherlock::name_alt3_nocase ... bench: 2,451,285 ns/iter (+/- 103,766) = 242 MB/s +test sherlock::name_alt4 ... bench: 1,168,955 ns/iter (+/- 87,785) = 508 MB/s +test sherlock::name_alt4_nocase ... bench: 1,699,899 ns/iter (+/- 91,762) = 349 MB/s +test sherlock::name_alt5 ... bench: 1,167,232 ns/iter (+/- 51,695) = 509 MB/s +test sherlock::name_alt5_nocase ... bench: 1,805,463 ns/iter (+/- 74,631) = 329 MB/s +test sherlock::name_holmes ... bench: 108,195 ns/iter (+/- 3,815) = 5498 MB/s +test sherlock::name_holmes_nocase ... bench: 1,360,092 ns/iter (+/- 60,416) = 437 MB/s +test sherlock::name_sherlock ... bench: 40,376 ns/iter (+/- 5,104) = 14734 MB/s +test sherlock::name_sherlock_holmes ... bench: 41,361 ns/iter (+/- 2,553) = 14383 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,317,594 ns/iter (+/- 168,248) = 451 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,284,026 ns/iter (+/- 92,188) = 463 MB/s +test sherlock::name_whitespace ... bench: 44,973 ns/iter (+/- 5,888) = 13228 MB/s +test sherlock::no_match_common ... bench: 341,867 ns/iter (+/- 32,045) = 1740 MB/s +test sherlock::no_match_really_common ... bench: 331,760 ns/iter (+/- 43,608) = 1793 MB/s +test sherlock::no_match_uncommon ... bench: 14,285 ns/iter (+/- 760) = 41647 MB/s +test sherlock::quotes ... bench: 1,342,144 ns/iter (+/- 96,471) = 443 MB/s +test sherlock::the_lower ... bench: 1,722,919 ns/iter (+/- 83,873) = 345 MB/s +test sherlock::the_nocase ... bench: 2,866,258 ns/iter (+/- 117,349) = 207 MB/s +test sherlock::the_upper ... bench: 151,020 ns/iter (+/- 13,454) = 3939 MB/s +test sherlock::the_whitespace ... bench: 1,597,329 ns/iter (+/- 149,689) = 372 MB/s +test sherlock::word_ending_n ... bench: 2,193,027 ns/iter (+/- 136,408) = 271 MB/s +test sherlock::words ... bench: 20,721,148 ns/iter (+/- 1,968,912) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/rust b/vendor/regex/record/old-bench-log/06/rust new file mode 100644 index 0000000..53ab222 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/rust @@ -0,0 +1,113 @@ + +running 108 tests +test misc::anchored_literal_long_match ... bench: 22 ns/iter (+/- 2) = 17727 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 2) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 1) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 26 ns/iter (+/- 2) = 1000 MB/s +test misc::easy0_1K ... bench: 16 ns/iter (+/- 1) = 65687 MB/s +test misc::easy0_1MB ... bench: 19 ns/iter (+/- 2) = 55189631 MB/s +test misc::easy0_32 ... bench: 15 ns/iter (+/- 1) = 3933 MB/s +test misc::easy0_32K ... bench: 16 ns/iter (+/- 0) = 2049687 MB/s +test misc::easy1_1K ... bench: 43 ns/iter (+/- 2) = 24279 MB/s +test misc::easy1_1MB ... bench: 45 ns/iter (+/- 4) = 23302133 MB/s +test misc::easy1_32 ... bench: 43 ns/iter (+/- 5) = 1209 MB/s +test misc::easy1_32K ... bench: 43 ns/iter (+/- 2) = 762511 MB/s +test misc::hard_1K ... bench: 53 ns/iter (+/- 6) = 19830 MB/s +test misc::hard_1MB ... bench: 57 ns/iter (+/- 1) = 18396543 MB/s +test misc::hard_32 ... bench: 53 ns/iter (+/- 4) = 1113 MB/s +test misc::hard_32K ... bench: 53 ns/iter (+/- 6) = 618773 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 1) = 3923 MB/s +test misc::long_needle1 ... bench: 1,203 ns/iter (+/- 55) = 83126 MB/s +test misc::long_needle2 ... bench: 149,418 ns/iter (+/- 13,825) = 669 MB/s +test misc::match_class ... bench: 62 ns/iter (+/- 6) = 1306 MB/s +test misc::match_class_in_range ... bench: 23 ns/iter (+/- 2) = 3521 MB/s +test misc::match_class_unicode ... bench: 268 ns/iter (+/- 30) = 600 MB/s +test misc::medium_1K ... bench: 16 ns/iter (+/- 0) = 65750 MB/s +test misc::medium_1MB ... bench: 20 ns/iter (+/- 15) = 52430200 MB/s +test misc::medium_32 ... bench: 16 ns/iter (+/- 2) = 3750 MB/s +test misc::medium_32K ... bench: 16 ns/iter (+/- 1) = 2049750 MB/s +test misc::no_exponential ... bench: 353 ns/iter (+/- 26) = 283 MB/s +test misc::not_literal ... bench: 97 ns/iter (+/- 9) = 525 MB/s +test misc::one_pass_long_prefix ... bench: 58 ns/iter (+/- 5) = 448 MB/s +test misc::one_pass_long_prefix_not ... bench: 60 ns/iter (+/- 6) = 433 MB/s +test misc::one_pass_short ... bench: 43 ns/iter (+/- 4) = 395 MB/s +test misc::one_pass_short_not ... bench: 46 ns/iter (+/- 2) = 369 MB/s +test misc::reallyhard2_1K ... bench: 62 ns/iter (+/- 5) = 16774 MB/s +test misc::reallyhard_1K ... bench: 1,650 ns/iter (+/- 176) = 636 MB/s +test misc::reallyhard_1MB ... bench: 1,635,447 ns/iter (+/- 97,611) = 641 MB/s +test misc::reallyhard_32 ... bench: 109 ns/iter (+/- 9) = 541 MB/s +test misc::reallyhard_32K ... bench: 50,991 ns/iter (+/- 4,031) = 643 MB/s +test misc::replace_all ... bench: 155 ns/iter (+/- 8) +test misc::reverse_suffix_no_quadratic ... bench: 4,254 ns/iter (+/- 489) = 1880 MB/s +test misc::short_haystack_1000000x ... bench: 91,124 ns/iter (+/- 4,584) = 87792 MB/s +test misc::short_haystack_100000x ... bench: 10,681 ns/iter (+/- 420) = 74900 MB/s +test misc::short_haystack_10000x ... bench: 3,240 ns/iter (+/- 395) = 24694 MB/s +test misc::short_haystack_1000x ... bench: 403 ns/iter (+/- 48) = 19878 MB/s +test misc::short_haystack_100x ... bench: 303 ns/iter (+/- 27) = 2676 MB/s +test misc::short_haystack_10x ... bench: 272 ns/iter (+/- 27) = 334 MB/s +test misc::short_haystack_1x ... bench: 264 ns/iter (+/- 32) = 71 MB/s +test misc::short_haystack_2x ... bench: 269 ns/iter (+/- 25) = 100 MB/s +test misc::short_haystack_3x ... bench: 264 ns/iter (+/- 26) = 132 MB/s +test misc::short_haystack_4x ... bench: 271 ns/iter (+/- 28) = 158 MB/s +test regexdna::find_new_lines ... bench: 13,700,405 ns/iter (+/- 647,840) = 371 MB/s +test regexdna::subst1 ... bench: 806,342 ns/iter (+/- 48,014) = 6304 MB/s +test regexdna::subst10 ... bench: 794,403 ns/iter (+/- 40,393) = 6399 MB/s +test regexdna::subst11 ... bench: 801,963 ns/iter (+/- 46,164) = 6338 MB/s +test regexdna::subst2 ... bench: 779,768 ns/iter (+/- 81,505) = 6519 MB/s +test regexdna::subst3 ... bench: 777,024 ns/iter (+/- 52,795) = 6542 MB/s +test regexdna::subst4 ... bench: 769,862 ns/iter (+/- 48,980) = 6603 MB/s +test regexdna::subst5 ... bench: 779,754 ns/iter (+/- 39,784) = 6519 MB/s +test regexdna::subst6 ... bench: 769,400 ns/iter (+/- 69,980) = 6606 MB/s +test regexdna::subst7 ... bench: 771,457 ns/iter (+/- 40,490) = 6589 MB/s +test regexdna::subst8 ... bench: 808,468 ns/iter (+/- 53,093) = 6287 MB/s +test regexdna::subst9 ... bench: 771,869 ns/iter (+/- 50,966) = 6585 MB/s +test regexdna::variant1 ... bench: 3,093,422 ns/iter (+/- 222,818) = 1643 MB/s +test regexdna::variant2 ... bench: 6,520,178 ns/iter (+/- 400,704) = 779 MB/s +test regexdna::variant3 ... bench: 7,297,818 ns/iter (+/- 319,866) = 696 MB/s +test regexdna::variant4 ... bench: 7,356,045 ns/iter (+/- 530,375) = 691 MB/s +test regexdna::variant5 ... bench: 5,977,343 ns/iter (+/- 296,375) = 850 MB/s +test regexdna::variant6 ... bench: 6,045,776 ns/iter (+/- 270,954) = 840 MB/s +test regexdna::variant7 ... bench: 5,447,060 ns/iter (+/- 223,542) = 933 MB/s +test regexdna::variant8 ... bench: 5,615,676 ns/iter (+/- 419,756) = 905 MB/s +test regexdna::variant9 ... bench: 5,457,949 ns/iter (+/- 439,821) = 931 MB/s +test sherlock::before_after_holmes ... bench: 957,660 ns/iter (+/- 96,491) = 621 MB/s +test sherlock::before_holmes ... bench: 65,680 ns/iter (+/- 3,085) = 9058 MB/s +test sherlock::everything_greedy ... bench: 2,151,577 ns/iter (+/- 70,114) = 276 MB/s +test sherlock::everything_greedy_nl ... bench: 836,942 ns/iter (+/- 81,010) = 710 MB/s +test sherlock::holmes_cochar_watson ... bench: 137,441 ns/iter (+/- 14,157) = 4328 MB/s +test sherlock::holmes_coword_watson ... bench: 514,100 ns/iter (+/- 48,210) = 1157 MB/s +test sherlock::ing_suffix ... bench: 409,126 ns/iter (+/- 23,370) = 1454 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,117,457 ns/iter (+/- 53,545) = 532 MB/s +test sherlock::letters ... bench: 23,152,671 ns/iter (+/- 1,002,203) = 25 MB/s +test sherlock::letters_lower ... bench: 22,521,833 ns/iter (+/- 1,178,375) = 26 MB/s +test sherlock::letters_upper ... bench: 1,841,871 ns/iter (+/- 108,471) = 323 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 946,126 ns/iter (+/- 53,405) = 628 MB/s +test sherlock::name_alt1 ... bench: 25,830 ns/iter (+/- 1,054) = 23032 MB/s +test sherlock::name_alt2 ... bench: 116,879 ns/iter (+/- 6,000) = 5090 MB/s +test sherlock::name_alt3 ... bench: 125,746 ns/iter (+/- 7,121) = 4731 MB/s +test sherlock::name_alt3_nocase ... bench: 1,203,114 ns/iter (+/- 72,037) = 494 MB/s +test sherlock::name_alt4 ... bench: 156,208 ns/iter (+/- 5,188) = 3808 MB/s +test sherlock::name_alt4_nocase ... bench: 222,618 ns/iter (+/- 30,017) = 2672 MB/s +test sherlock::name_alt5 ... bench: 133,440 ns/iter (+/- 14,831) = 4458 MB/s +test sherlock::name_alt5_nocase ... bench: 558,482 ns/iter (+/- 22,435) = 1065 MB/s +test sherlock::name_holmes ... bench: 30,800 ns/iter (+/- 2,933) = 19316 MB/s +test sherlock::name_holmes_nocase ... bench: 190,736 ns/iter (+/- 24,310) = 3119 MB/s +test sherlock::name_sherlock ... bench: 56,238 ns/iter (+/- 3,310) = 10578 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,129 ns/iter (+/- 2,662) = 24656 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 174,649 ns/iter (+/- 13,487) = 3406 MB/s +test sherlock::name_sherlock_nocase ... bench: 157,674 ns/iter (+/- 7,888) = 3773 MB/s +test sherlock::name_whitespace ... bench: 74,637 ns/iter (+/- 6,523) = 7971 MB/s +test sherlock::no_match_common ... bench: 15,140 ns/iter (+/- 969) = 39295 MB/s +test sherlock::no_match_really_common ... bench: 305,112 ns/iter (+/- 31,314) = 1949 MB/s +test sherlock::no_match_uncommon ... bench: 15,539 ns/iter (+/- 1,269) = 38286 MB/s +test sherlock::quotes ... bench: 482,180 ns/iter (+/- 33,736) = 1233 MB/s +test sherlock::repeated_class_negation ... bench: 78,428,426 ns/iter (+/- 6,705,217) = 7 MB/s +test sherlock::the_lower ... bench: 576,511 ns/iter (+/- 21,735) = 1031 MB/s +test sherlock::the_nocase ... bench: 413,565 ns/iter (+/- 42,941) = 1438 MB/s +test sherlock::the_upper ... bench: 34,491 ns/iter (+/- 1,901) = 17248 MB/s +test sherlock::the_whitespace ... bench: 1,061,365 ns/iter (+/- 66,639) = 560 MB/s +test sherlock::word_ending_n ... bench: 1,763,795 ns/iter (+/- 83,031) = 337 MB/s +test sherlock::words ... bench: 9,281,896 ns/iter (+/- 934,308) = 64 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 108 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/boost b/vendor/regex/record/old-bench-log/07/boost new file mode 100644 index 0000000..5a13a10 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/boost @@ -0,0 +1,97 @@ + +running 92 tests +test misc::anchored_literal_long_match ... bench: 174 ns/iter (+/- 0) = 2241 MB/s +test misc::anchored_literal_long_non_match ... bench: 329 ns/iter (+/- 3) = 1185 MB/s +test misc::anchored_literal_short_match ... bench: 168 ns/iter (+/- 0) = 154 MB/s +test misc::anchored_literal_short_non_match ... bench: 121 ns/iter (+/- 0) = 214 MB/s +test misc::easy0_1K ... bench: 660 ns/iter (+/- 3) = 1592 MB/s +test misc::easy0_1MB ... bench: 514,707 ns/iter (+/- 2,689) = 2037 MB/s +test misc::easy0_32 ... bench: 170 ns/iter (+/- 2) = 347 MB/s +test misc::easy0_32K ... bench: 16,208 ns/iter (+/- 99) = 2023 MB/s +test misc::easy1_1K ... bench: 756 ns/iter (+/- 1) = 1380 MB/s +test misc::easy1_1MB ... bench: 514,816 ns/iter (+/- 2,832) = 2036 MB/s +test misc::easy1_32 ... bench: 271 ns/iter (+/- 3) = 191 MB/s +test misc::easy1_32K ... bench: 16,316 ns/iter (+/- 93) = 2009 MB/s +test misc::hard_1K ... bench: 63,089 ns/iter (+/- 594) = 16 MB/s +test misc::hard_1MB ... bench: 66,537,328 ns/iter (+/- 866,695) = 15 MB/s +test misc::hard_32 ... bench: 2,125 ns/iter (+/- 8) = 27 MB/s +test misc::hard_32K ... bench: 2,075,568 ns/iter (+/- 6,634) = 15 MB/s +test misc::literal ... bench: 143 ns/iter (+/- 1) = 356 MB/s +test misc::long_needle1 ... bench: 6,557,839 ns/iter (+/- 27,779) = 15 MB/s +test misc::long_needle2 ... bench: 6,557,332 ns/iter (+/- 101,494) = 15 MB/s +test misc::match_class ... bench: 157 ns/iter (+/- 0) = 515 MB/s +test misc::match_class_in_range ... bench: 157 ns/iter (+/- 4) = 515 MB/s +test misc::medium_1K ... bench: 665 ns/iter (+/- 2) = 1581 MB/s +test misc::medium_1MB ... bench: 514,869 ns/iter (+/- 5,832) = 2036 MB/s +test misc::medium_32 ... bench: 167 ns/iter (+/- 1) = 359 MB/s +test misc::medium_32K ... bench: 16,253 ns/iter (+/- 74) = 2017 MB/s +test misc::no_exponential ... bench: 1,717 ns/iter (+/- 13) = 58 MB/s +test misc::not_literal ... bench: 1,084 ns/iter (+/- 16) = 47 MB/s +test misc::one_pass_long_prefix ... bench: 169 ns/iter (+/- 2) = 153 MB/s +test misc::one_pass_long_prefix_not ... bench: 169 ns/iter (+/- 6) = 153 MB/s +test misc::one_pass_short ... bench: 1,105 ns/iter (+/- 2) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,076 ns/iter (+/- 10) = 15 MB/s +test misc::reallyhard2_1K ... bench: 4,935 ns/iter (+/- 39) = 210 MB/s +test misc::reallyhard_1K ... bench: 63,076 ns/iter (+/- 226) = 16 MB/s +test misc::reallyhard_1MB ... bench: 68,534,102 ns/iter (+/- 125,043) = 15 MB/s +test misc::reallyhard_32 ... bench: 2,134 ns/iter (+/- 8) = 27 MB/s +test misc::reallyhard_32K ... bench: 2,074,582 ns/iter (+/- 5,943) = 15 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,001 ns/iter (+/- 5) = 1999 MB/s +test regexdna::find_new_lines ... bench: 12,942,765 ns/iter (+/- 21,828) = 392 MB/s +test regexdna::subst1 ... bench: 6,241,036 ns/iter (+/- 13,806) = 814 MB/s +test regexdna::subst10 ... bench: 6,247,896 ns/iter (+/- 28,406) = 813 MB/s +test regexdna::subst11 ... bench: 6,240,960 ns/iter (+/- 20,660) = 814 MB/s +test regexdna::subst2 ... bench: 6,245,156 ns/iter (+/- 17,639) = 813 MB/s +test regexdna::subst3 ... bench: 6,276,881 ns/iter (+/- 14,851) = 809 MB/s +test regexdna::subst4 ... bench: 6,249,549 ns/iter (+/- 30,600) = 813 MB/s +test regexdna::subst5 ... bench: 6,251,942 ns/iter (+/- 33,889) = 813 MB/s +test regexdna::subst6 ... bench: 6,244,011 ns/iter (+/- 11,642) = 814 MB/s +test regexdna::subst7 ... bench: 6,283,445 ns/iter (+/- 11,693) = 809 MB/s +test regexdna::subst8 ... bench: 6,247,310 ns/iter (+/- 11,590) = 813 MB/s +test regexdna::subst9 ... bench: 6,249,184 ns/iter (+/- 8,159) = 813 MB/s +test regexdna::variant1 ... bench: 73,947,890 ns/iter (+/- 930,039) = 68 MB/s +test regexdna::variant2 ... bench: 108,486,922 ns/iter (+/- 181,287) = 46 MB/s +test regexdna::variant3 ... bench: 93,241,161 ns/iter (+/- 143,224) = 54 MB/s +test regexdna::variant4 ... bench: 75,615,061 ns/iter (+/- 107,918) = 67 MB/s +test regexdna::variant5 ... bench: 74,484,623 ns/iter (+/- 121,807) = 68 MB/s +test regexdna::variant6 ... bench: 74,594,078 ns/iter (+/- 121,252) = 68 MB/s +test regexdna::variant7 ... bench: 77,064,066 ns/iter (+/- 123,262) = 65 MB/s +test regexdna::variant8 ... bench: 87,267,656 ns/iter (+/- 128,639) = 58 MB/s +test regexdna::variant9 ... bench: 98,197,000 ns/iter (+/- 149,379) = 51 MB/s +test sherlock::before_after_holmes ... bench: 7,100,744 ns/iter (+/- 29,939) = 83 MB/s +test sherlock::before_holmes ... bench: 7,120,564 ns/iter (+/- 32,659) = 83 MB/s +test sherlock::everything_greedy ... bench: 3,777,458 ns/iter (+/- 8,802) = 157 MB/s +test sherlock::everything_greedy_nl ... bench: 282 ns/iter (+/- 2) = 2109691 MB/s +test sherlock::holmes_cochar_watson ... bench: 389,335 ns/iter (+/- 1,472) = 1528 MB/s +test sherlock::ing_suffix ... bench: 6,256,416 ns/iter (+/- 8,735) = 95 MB/s +test sherlock::ing_suffix_limited_space ... bench: 7,572,167 ns/iter (+/- 15,521) = 78 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 343,402 ns/iter (+/- 3,693) = 1732 MB/s +test sherlock::name_alt1 ... bench: 347,605 ns/iter (+/- 916) = 1711 MB/s +test sherlock::name_alt2 ... bench: 420,500 ns/iter (+/- 2,846) = 1414 MB/s +test sherlock::name_alt3 ... bench: 762,684 ns/iter (+/- 3,834) = 780 MB/s +test sherlock::name_alt3_nocase ... bench: 9,980,804 ns/iter (+/- 49,424) = 59 MB/s +test sherlock::name_alt4 ... bench: 431,744 ns/iter (+/- 682) = 1377 MB/s +test sherlock::name_alt4_nocase ... bench: 3,464,135 ns/iter (+/- 11,476) = 171 MB/s +test sherlock::name_alt5 ... bench: 472,923 ns/iter (+/- 846) = 1257 MB/s +test sherlock::name_alt5_nocase ... bench: 4,338,924 ns/iter (+/- 31,118) = 137 MB/s +test sherlock::name_holmes ... bench: 378,950 ns/iter (+/- 865) = 1569 MB/s +test sherlock::name_holmes_nocase ... bench: 1,952,035 ns/iter (+/- 8,233) = 304 MB/s +test sherlock::name_sherlock ... bench: 324,845 ns/iter (+/- 8,376) = 1831 MB/s +test sherlock::name_sherlock_holmes ... bench: 324,647 ns/iter (+/- 2,901) = 1832 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,870,400 ns/iter (+/- 10,609) = 318 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,814,172 ns/iter (+/- 16,455) = 327 MB/s +test sherlock::name_whitespace ... bench: 326,252 ns/iter (+/- 1,557) = 1823 MB/s +test sherlock::no_match_common ... bench: 1,154,445 ns/iter (+/- 8,544) = 515 MB/s +test sherlock::no_match_really_common ... bench: 1,154,314 ns/iter (+/- 5,467) = 515 MB/s +test sherlock::no_match_uncommon ... bench: 295,301 ns/iter (+/- 906) = 2014 MB/s +test sherlock::quotes ... bench: 863,138 ns/iter (+/- 3,072) = 689 MB/s +test sherlock::repeated_class_negation ... bench: 13,594,294 ns/iter (+/- 40,354) = 43 MB/s +test sherlock::the_lower ... bench: 2,171,731 ns/iter (+/- 7,148) = 273 MB/s +test sherlock::the_nocase ... bench: 3,556,278 ns/iter (+/- 7,269) = 167 MB/s +test sherlock::the_upper ... bench: 404,851 ns/iter (+/- 865) = 1469 MB/s +test sherlock::the_whitespace ... bench: 2,139,597 ns/iter (+/- 7,427) = 278 MB/s +test sherlock::word_ending_n ... bench: 7,824,965 ns/iter (+/- 30,691) = 76 MB/s +test sherlock::words ... bench: 18,386,285 ns/iter (+/- 34,161) = 32 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 92 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/dphobos-dmd b/vendor/regex/record/old-bench-log/07/dphobos-dmd new file mode 100644 index 0000000..835a096 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/dphobos-dmd @@ -0,0 +1,100 @@ + +running 95 tests +test misc::anchored_literal_long_match ... bench: 365 ns/iter (+/- 2) = 1068 MB/s +test misc::anchored_literal_long_non_match ... bench: 300 ns/iter (+/- 0) = 1300 MB/s +test misc::anchored_literal_short_match ... bench: 364 ns/iter (+/- 2) = 71 MB/s +test misc::anchored_literal_short_non_match ... bench: 306 ns/iter (+/- 9) = 84 MB/s +test misc::easy0_1K ... bench: 768 ns/iter (+/- 5) = 1368 MB/s +test misc::easy0_1MB ... bench: 17,062 ns/iter (+/- 252) = 61458 MB/s +test misc::easy0_32 ... bench: 759 ns/iter (+/- 7) = 77 MB/s +test misc::easy0_32K ... bench: 1,095 ns/iter (+/- 20) = 29949 MB/s +test misc::easy1_1K ... bench: 723 ns/iter (+/- 1) = 1443 MB/s +test misc::easy1_1MB ... bench: 17,021 ns/iter (+/- 229) = 61606 MB/s +test misc::easy1_32 ... bench: 714 ns/iter (+/- 1) = 72 MB/s +test misc::easy1_32K ... bench: 1,052 ns/iter (+/- 12) = 31167 MB/s +test misc::hard_1K ... bench: 17,044 ns/iter (+/- 176) = 61 MB/s +test misc::hard_1MB ... bench: 17,965,420 ns/iter (+/- 72,226) = 58 MB/s +test misc::hard_32 ... bench: 2,171 ns/iter (+/- 2) = 27 MB/s +test misc::hard_32K ... bench: 561,207 ns/iter (+/- 5,654) = 58 MB/s +test misc::literal ... bench: 240 ns/iter (+/- 0) = 212 MB/s +test misc::long_needle1 ... bench: 76,640 ns/iter (+/- 1,043) = 1304 MB/s +test misc::long_needle2 ... bench: 76,747 ns/iter (+/- 3,299) = 1302 MB/s +test misc::match_class ... bench: 344 ns/iter (+/- 1) = 235 MB/s +test misc::match_class_in_range ... bench: 306 ns/iter (+/- 9) = 264 MB/s +test misc::match_class_unicode ... bench: 1,435 ns/iter (+/- 9) = 112 MB/s +test misc::medium_1K ... bench: 1,480 ns/iter (+/- 16) = 710 MB/s +test misc::medium_1MB ... bench: 564,378 ns/iter (+/- 18,695) = 1857 MB/s +test misc::medium_32 ... bench: 941 ns/iter (+/- 32) = 63 MB/s +test misc::medium_32K ... bench: 18,465 ns/iter (+/- 116) = 1776 MB/s +test misc::no_exponential ... bench: 367,476 ns/iter (+/- 15,176) +test misc::not_literal ... bench: 1,165 ns/iter (+/- 9) = 43 MB/s +test misc::one_pass_long_prefix ... bench: 596 ns/iter (+/- 2) = 43 MB/s +test misc::one_pass_long_prefix_not ... bench: 602 ns/iter (+/- 6) = 43 MB/s +test misc::one_pass_short ... bench: 1,068 ns/iter (+/- 3) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,434 ns/iter (+/- 11) = 11 MB/s +test misc::reallyhard2_1K ... bench: 36,539 ns/iter (+/- 281) = 28 MB/s +test misc::reallyhard_1K ... bench: 17,086 ns/iter (+/- 94) = 61 MB/s +test misc::reallyhard_1MB ... bench: 17,973,007 ns/iter (+/- 64,010) = 58 MB/s +test misc::reallyhard_32 ... bench: 2,200 ns/iter (+/- 16) = 26 MB/s +test misc::reallyhard_32K ... bench: 561,371 ns/iter (+/- 8,688) = 58 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,606 ns/iter (+/- 16) = 1736 MB/s +test regexdna::find_new_lines ... bench: 31,579,756 ns/iter (+/- 121,047) = 160 MB/s +test regexdna::subst1 ... bench: 7,930,333 ns/iter (+/- 27,118) = 641 MB/s +test regexdna::subst10 ... bench: 7,942,534 ns/iter (+/- 36,470) = 640 MB/s +test regexdna::subst11 ... bench: 7,934,646 ns/iter (+/- 42,013) = 640 MB/s +test regexdna::subst2 ... bench: 7,947,802 ns/iter (+/- 53,427) = 639 MB/s +test regexdna::subst3 ... bench: 7,941,691 ns/iter (+/- 122,303) = 640 MB/s +test regexdna::subst4 ... bench: 7,928,228 ns/iter (+/- 30,493) = 641 MB/s +test regexdna::subst5 ... bench: 7,936,901 ns/iter (+/- 37,894) = 640 MB/s +test regexdna::subst6 ... bench: 7,936,211 ns/iter (+/- 46,269) = 640 MB/s +test regexdna::subst7 ... bench: 7,946,477 ns/iter (+/- 62,660) = 639 MB/s +test regexdna::subst8 ... bench: 7,930,830 ns/iter (+/- 31,234) = 640 MB/s +test regexdna::subst9 ... bench: 7,937,951 ns/iter (+/- 36,425) = 640 MB/s +test regexdna::variant1 ... bench: 5,104,224 ns/iter (+/- 26,612) = 995 MB/s +test regexdna::variant2 ... bench: 6,847,162 ns/iter (+/- 31,233) = 742 MB/s +test regexdna::variant3 ... bench: 11,153,739 ns/iter (+/- 114,193) = 455 MB/s +test regexdna::variant4 ... bench: 9,665,797 ns/iter (+/- 47,148) = 525 MB/s +test regexdna::variant5 ... bench: 9,645,193 ns/iter (+/- 35,250) = 527 MB/s +test regexdna::variant6 ... bench: 7,280,069 ns/iter (+/- 21,171) = 698 MB/s +test regexdna::variant7 ... bench: 7,841,177 ns/iter (+/- 20,797) = 648 MB/s +test regexdna::variant8 ... bench: 9,783,978 ns/iter (+/- 35,231) = 519 MB/s +test regexdna::variant9 ... bench: 19,157,329 ns/iter (+/- 445,911) = 265 MB/s +test sherlock::before_after_holmes ... bench: 20,995,307 ns/iter (+/- 258,419) = 28 MB/s +test sherlock::before_holmes ... bench: 20,899,416 ns/iter (+/- 122,256) = 28 MB/s +test sherlock::holmes_cochar_watson ... bench: 904,439 ns/iter (+/- 6,934) = 657 MB/s +test sherlock::holmes_coword_watson ... bench: 103,706,930 ns/iter (+/- 176,711) = 5 MB/s +test sherlock::ing_suffix ... bench: 14,927,612 ns/iter (+/- 90,346) = 39 MB/s +test sherlock::ing_suffix_limited_space ... bench: 19,743,662 ns/iter (+/- 78,506) = 30 MB/s +test sherlock::letters ... bench: 112,708,213 ns/iter (+/- 251,690) = 5 MB/s +test sherlock::letters_lower ... bench: 111,058,829 ns/iter (+/- 192,793) = 5 MB/s +test sherlock::letters_upper ... bench: 4,072,062 ns/iter (+/- 20,273) = 146 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 399,796 ns/iter (+/- 4,222) = 1488 MB/s +test sherlock::name_alt1 ... bench: 411,415 ns/iter (+/- 2,257) = 1446 MB/s +test sherlock::name_alt2 ... bench: 626,671 ns/iter (+/- 5,745) = 949 MB/s +test sherlock::name_alt3 ... bench: 1,086,570 ns/iter (+/- 6,105) = 547 MB/s +test sherlock::name_alt3_nocase ... bench: 1,827,028 ns/iter (+/- 12,788) = 325 MB/s +test sherlock::name_alt4 ... bench: 687,454 ns/iter (+/- 11,421) = 865 MB/s +test sherlock::name_alt4_nocase ... bench: 943,925 ns/iter (+/- 4,108) = 630 MB/s +test sherlock::name_alt5 ... bench: 734,969 ns/iter (+/- 7,215) = 809 MB/s +test sherlock::name_alt5_nocase ... bench: 895,903 ns/iter (+/- 5,647) = 664 MB/s +test sherlock::name_holmes ... bench: 199,880 ns/iter (+/- 1,654) = 2976 MB/s +test sherlock::name_holmes_nocase ... bench: 529,590 ns/iter (+/- 1,288) = 1123 MB/s +test sherlock::name_sherlock ... bench: 57,720 ns/iter (+/- 555) = 10307 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,596 ns/iter (+/- 580) = 8801 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 393,903 ns/iter (+/- 2,700) = 1510 MB/s +test sherlock::name_sherlock_nocase ... bench: 373,411 ns/iter (+/- 3,264) = 1593 MB/s +test sherlock::name_whitespace ... bench: 79,175 ns/iter (+/- 1,288) = 7514 MB/s +test sherlock::no_match_common ... bench: 276,503 ns/iter (+/- 2,155) = 2151 MB/s +test sherlock::no_match_really_common ... bench: 276,535 ns/iter (+/- 416) = 2151 MB/s +test sherlock::no_match_uncommon ... bench: 10,535 ns/iter (+/- 105) = 56472 MB/s +test sherlock::quotes ... bench: 5,746,202 ns/iter (+/- 33,993) = 103 MB/s +test sherlock::repeated_class_negation ... bench: 46,124,528 ns/iter (+/- 125,861) = 12 MB/s +test sherlock::the_lower ... bench: 2,527,960 ns/iter (+/- 12,351) = 235 MB/s +test sherlock::the_nocase ... bench: 3,210,112 ns/iter (+/- 10,799) = 185 MB/s +test sherlock::the_upper ... bench: 240,272 ns/iter (+/- 3,902) = 2476 MB/s +test sherlock::the_whitespace ... bench: 3,511,711 ns/iter (+/- 17,181) = 169 MB/s +test sherlock::word_ending_n ... bench: 29,535,089 ns/iter (+/- 95,201) = 20 MB/s +test sherlock::words ... bench: 43,341,782 ns/iter (+/- 110,038) = 13 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/dphobos-dmd-ct b/vendor/regex/record/old-bench-log/07/dphobos-dmd-ct new file mode 100644 index 0000000..9dd6d02 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/dphobos-dmd-ct @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 354 ns/iter (+/- 25) = 1101 MB/s +test misc::anchored_literal_long_non_match ... bench: 314 ns/iter (+/- 3) = 1242 MB/s +test misc::anchored_literal_short_match ... bench: 331 ns/iter (+/- 0) = 78 MB/s +test misc::anchored_literal_short_non_match ... bench: 314 ns/iter (+/- 4) = 82 MB/s +test misc::easy0_1K ... bench: 430 ns/iter (+/- 2) = 2444 MB/s +test misc::easy0_1MB ... bench: 16,692 ns/iter (+/- 222) = 62820 MB/s +test misc::easy0_32 ... bench: 420 ns/iter (+/- 4) = 140 MB/s +test misc::easy0_32K ... bench: 755 ns/iter (+/- 5) = 43437 MB/s +test misc::easy1_1K ... bench: 407 ns/iter (+/- 10) = 2565 MB/s +test misc::easy1_1MB ... bench: 16,670 ns/iter (+/- 205) = 62903 MB/s +test misc::easy1_32 ... bench: 389 ns/iter (+/- 0) = 133 MB/s +test misc::easy1_32K ... bench: 732 ns/iter (+/- 6) = 44792 MB/s +test misc::hard_1K ... bench: 35,518 ns/iter (+/- 346) = 29 MB/s +test misc::hard_1MB ... bench: 31,657,473 ns/iter (+/- 512,618) = 33 MB/s +test misc::hard_32 ... bench: 1,057 ns/iter (+/- 7) = 55 MB/s +test misc::hard_32K ... bench: 950,905 ns/iter (+/- 13,239) = 34 MB/s +test misc::literal ... bench: 320 ns/iter (+/- 3) = 159 MB/s +test misc::long_needle1 ... bench: 73,954 ns/iter (+/- 331) = 1352 MB/s +test misc::long_needle2 ... bench: 73,915 ns/iter (+/- 199) = 1352 MB/s +test misc::match_class ... bench: 374 ns/iter (+/- 3) = 216 MB/s +test misc::match_class_in_range ... bench: 372 ns/iter (+/- 0) = 217 MB/s +test misc::match_class_unicode ... bench: 1,631 ns/iter (+/- 8) = 98 MB/s +test misc::medium_1K ... bench: 965 ns/iter (+/- 10) = 1090 MB/s +test misc::medium_1MB ... bench: 563,242 ns/iter (+/- 6,767) = 1861 MB/s +test misc::medium_32 ... bench: 434 ns/iter (+/- 3) = 138 MB/s +test misc::medium_32K ... bench: 17,976 ns/iter (+/- 137) = 1824 MB/s +test misc::not_literal ... bench: 1,063 ns/iter (+/- 2) = 47 MB/s +test misc::one_pass_long_prefix ... bench: 405 ns/iter (+/- 4) = 64 MB/s +test misc::one_pass_long_prefix_not ... bench: 410 ns/iter (+/- 2) = 63 MB/s +test misc::one_pass_short ... bench: 539 ns/iter (+/- 12) = 31 MB/s +test misc::one_pass_short_not ... bench: 534 ns/iter (+/- 1) = 31 MB/s +test misc::reallyhard2_1K ... bench: 75,108 ns/iter (+/- 699) = 13 MB/s +test misc::reallyhard_1K ... bench: 34,681 ns/iter (+/- 268) = 30 MB/s +test misc::reallyhard_1MB ... bench: 30,579,065 ns/iter (+/- 389,443) = 34 MB/s +test misc::reallyhard_32 ... bench: 1,025 ns/iter (+/- 22) = 57 MB/s +test misc::reallyhard_32K ... bench: 920,515 ns/iter (+/- 26,281) = 35 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,607 ns/iter (+/- 36) = 1736 MB/s +test regexdna::find_new_lines ... bench: 34,122,227 ns/iter (+/- 842,345) = 148 MB/s +test regexdna::subst1 ... bench: 9,932,271 ns/iter (+/- 86,915) = 511 MB/s +test regexdna::subst10 ... bench: 9,977,738 ns/iter (+/- 51,656) = 509 MB/s +test regexdna::subst11 ... bench: 9,945,085 ns/iter (+/- 53,175) = 511 MB/s +test regexdna::subst2 ... bench: 9,928,284 ns/iter (+/- 32,335) = 512 MB/s +test regexdna::subst3 ... bench: 9,968,901 ns/iter (+/- 41,254) = 509 MB/s +test regexdna::subst4 ... bench: 9,912,463 ns/iter (+/- 28,171) = 512 MB/s +test regexdna::subst5 ... bench: 9,948,128 ns/iter (+/- 22,949) = 510 MB/s +test regexdna::subst6 ... bench: 9,916,200 ns/iter (+/- 28,947) = 512 MB/s +test regexdna::subst7 ... bench: 9,996,277 ns/iter (+/- 37,585) = 508 MB/s +test regexdna::subst8 ... bench: 9,974,849 ns/iter (+/- 41,503) = 509 MB/s +test regexdna::subst9 ... bench: 9,961,948 ns/iter (+/- 28,254) = 510 MB/s +test regexdna::variant1 ... bench: 3,504,049 ns/iter (+/- 15,090) = 1450 MB/s +test regexdna::variant2 ... bench: 3,800,264 ns/iter (+/- 12,123) = 1337 MB/s +test regexdna::variant3 ... bench: 4,932,027 ns/iter (+/- 15,553) = 1030 MB/s +test regexdna::variant4 ... bench: 4,709,109 ns/iter (+/- 15,213) = 1079 MB/s +test regexdna::variant5 ... bench: 4,918,928 ns/iter (+/- 19,196) = 1033 MB/s +test regexdna::variant6 ... bench: 4,244,250 ns/iter (+/- 24,367) = 1197 MB/s +test regexdna::variant7 ... bench: 4,245,530 ns/iter (+/- 16,178) = 1197 MB/s +test regexdna::variant8 ... bench: 4,205,036 ns/iter (+/- 10,733) = 1208 MB/s +test regexdna::variant9 ... bench: 3,864,771 ns/iter (+/- 11,864) = 1315 MB/s +test sherlock::before_after_holmes ... bench: 22,490,817 ns/iter (+/- 571,510) = 26 MB/s +test sherlock::before_holmes ... bench: 22,603,264 ns/iter (+/- 74,703) = 26 MB/s +test sherlock::holmes_cochar_watson ... bench: 519,710 ns/iter (+/- 5,878) = 1144 MB/s +test sherlock::ing_suffix ... bench: 9,237,783 ns/iter (+/- 30,188) = 64 MB/s +test sherlock::ing_suffix_limited_space ... bench: 12,074,301 ns/iter (+/- 30,860) = 49 MB/s +test sherlock::letters ... bench: 137,678,575 ns/iter (+/- 131,761) = 4 MB/s +test sherlock::letters_lower ... bench: 135,414,657 ns/iter (+/- 134,307) = 4 MB/s +test sherlock::letters_upper ... bench: 5,004,996 ns/iter (+/- 23,224) = 118 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 341,556 ns/iter (+/- 1,500) = 1741 MB/s +test sherlock::name_alt1 ... bench: 378,291 ns/iter (+/- 1,545) = 1572 MB/s +test sherlock::name_alt2 ... bench: 528,403 ns/iter (+/- 2,273) = 1125 MB/s +test sherlock::name_alt3 ... bench: 685,634 ns/iter (+/- 17,205) = 867 MB/s +test sherlock::name_alt3_nocase ... bench: 825,069 ns/iter (+/- 10,490) = 721 MB/s +test sherlock::name_alt4 ... bench: 555,717 ns/iter (+/- 3,223) = 1070 MB/s +test sherlock::name_alt4_nocase ... bench: 649,913 ns/iter (+/- 4,543) = 915 MB/s +test sherlock::name_alt5 ... bench: 570,036 ns/iter (+/- 543) = 1043 MB/s +test sherlock::name_alt5_nocase ... bench: 580,445 ns/iter (+/- 2,100) = 1024 MB/s +test sherlock::name_holmes ... bench: 185,140 ns/iter (+/- 2,100) = 3213 MB/s +test sherlock::name_holmes_nocase ... bench: 479,902 ns/iter (+/- 5,898) = 1239 MB/s +test sherlock::name_sherlock ... bench: 51,053 ns/iter (+/- 491) = 11653 MB/s +test sherlock::name_sherlock_holmes ... bench: 50,722 ns/iter (+/- 296) = 11729 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 355,142 ns/iter (+/- 1,424) = 1675 MB/s +test sherlock::name_sherlock_nocase ... bench: 354,932 ns/iter (+/- 1,554) = 1676 MB/s +test sherlock::name_whitespace ... bench: 56,972 ns/iter (+/- 271) = 10442 MB/s +test sherlock::no_match_common ... bench: 274,260 ns/iter (+/- 3,092) = 2169 MB/s +test sherlock::no_match_really_common ... bench: 273,984 ns/iter (+/- 2,202) = 2171 MB/s +test sherlock::no_match_uncommon ... bench: 10,444 ns/iter (+/- 68) = 56964 MB/s +test sherlock::quotes ... bench: 2,755,414 ns/iter (+/- 11,488) = 215 MB/s +test sherlock::repeated_class_negation ... bench: 21,585,138 ns/iter (+/- 50,347) = 27 MB/s +test sherlock::the_lower ... bench: 2,835,360 ns/iter (+/- 10,083) = 209 MB/s +test sherlock::the_nocase ... bench: 3,060,088 ns/iter (+/- 10,321) = 194 MB/s +test sherlock::the_upper ... bench: 272,416 ns/iter (+/- 3,308) = 2183 MB/s +test sherlock::the_whitespace ... bench: 2,991,214 ns/iter (+/- 27,223) = 198 MB/s +test sherlock::word_ending_n ... bench: 30,726,303 ns/iter (+/- 83,743) = 19 MB/s +test sherlock::words ... bench: 42,256,710 ns/iter (+/- 88,302) = 14 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/oniguruma b/vendor/regex/record/old-bench-log/07/oniguruma new file mode 100644 index 0000000..b9e8e29 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/oniguruma @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 129 ns/iter (+/- 3) = 3023 MB/s +test misc::anchored_literal_long_non_match ... bench: 402 ns/iter (+/- 1) = 970 MB/s +test misc::anchored_literal_short_match ... bench: 130 ns/iter (+/- 1) = 200 MB/s +test misc::anchored_literal_short_non_match ... bench: 49 ns/iter (+/- 0) = 530 MB/s +test misc::easy0_1K ... bench: 281 ns/iter (+/- 3) = 3740 MB/s +test misc::easy0_1MB ... bench: 119,847 ns/iter (+/- 713) = 8749 MB/s +test misc::easy0_32 ... bench: 154 ns/iter (+/- 4) = 383 MB/s +test misc::easy0_32K ... bench: 3,985 ns/iter (+/- 24) = 8229 MB/s +test misc::easy1_1K ... bench: 3,472 ns/iter (+/- 11) = 300 MB/s +test misc::easy1_1MB ... bench: 3,385,764 ns/iter (+/- 6,630) = 309 MB/s +test misc::easy1_32 ... bench: 283 ns/iter (+/- 6) = 183 MB/s +test misc::easy1_32K ... bench: 105,977 ns/iter (+/- 319) = 309 MB/s +test misc::hard_1K ... bench: 106,973 ns/iter (+/- 1,091) = 9 MB/s +test misc::hard_1MB ... bench: 114,602,847 ns/iter (+/- 336,051) = 9 MB/s +test misc::hard_32 ... bench: 3,569 ns/iter (+/- 3) = 16 MB/s +test misc::hard_32K ... bench: 3,570,108 ns/iter (+/- 17,057) = 9 MB/s +test misc::literal ... bench: 287 ns/iter (+/- 1) = 177 MB/s +test misc::long_needle1 ... bench: 5,430,190 ns/iter (+/- 271,737) = 18 MB/s +test misc::long_needle2 ... bench: 5,651,748 ns/iter (+/- 260,960) = 17 MB/s +test misc::match_class ... bench: 369 ns/iter (+/- 0) = 219 MB/s +test misc::match_class_in_range ... bench: 370 ns/iter (+/- 8) = 218 MB/s +test misc::match_class_unicode ... bench: 1,600 ns/iter (+/- 24) = 100 MB/s +test misc::medium_1K ... bench: 295 ns/iter (+/- 4) = 3566 MB/s +test misc::medium_1MB ... bench: 119,845 ns/iter (+/- 707) = 8749 MB/s +test misc::medium_32 ... bench: 166 ns/iter (+/- 0) = 361 MB/s +test misc::medium_32K ... bench: 3,995 ns/iter (+/- 30) = 8209 MB/s +test misc::not_literal ... bench: 365 ns/iter (+/- 1) = 139 MB/s +test misc::one_pass_long_prefix ... bench: 155 ns/iter (+/- 0) = 167 MB/s +test misc::one_pass_long_prefix_not ... bench: 162 ns/iter (+/- 0) = 160 MB/s +test misc::one_pass_short ... bench: 279 ns/iter (+/- 0) = 60 MB/s +test misc::one_pass_short_not ... bench: 269 ns/iter (+/- 3) = 63 MB/s +test misc::reallyhard2_1K ... bench: 227,630 ns/iter (+/- 963) = 4 MB/s +test misc::reallyhard_1K ... bench: 106,964 ns/iter (+/- 1,199) = 9 MB/s +test misc::reallyhard_1MB ... bench: 114,622,989 ns/iter (+/- 206,430) = 9 MB/s +test misc::reallyhard_32 ... bench: 3,477 ns/iter (+/- 13) = 16 MB/s +test misc::reallyhard_32K ... bench: 3,580,927 ns/iter (+/- 15,784) = 9 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 23,518 ns/iter (+/- 105) = 340 MB/s +test regexdna::find_new_lines ... bench: 33,300,039 ns/iter (+/- 827,837) = 152 MB/s +test regexdna::subst1 ... bench: 22,829,688 ns/iter (+/- 81,653) = 222 MB/s +test regexdna::subst10 ... bench: 22,843,694 ns/iter (+/- 94,299) = 222 MB/s +test regexdna::subst11 ... bench: 22,827,872 ns/iter (+/- 84,129) = 222 MB/s +test regexdna::subst2 ... bench: 22,841,925 ns/iter (+/- 84,394) = 222 MB/s +test regexdna::subst3 ... bench: 22,885,409 ns/iter (+/- 114,277) = 222 MB/s +test regexdna::subst4 ... bench: 22,837,475 ns/iter (+/- 58,938) = 222 MB/s +test regexdna::subst5 ... bench: 22,835,207 ns/iter (+/- 39,862) = 222 MB/s +test regexdna::subst6 ... bench: 22,833,199 ns/iter (+/- 77,142) = 222 MB/s +test regexdna::subst7 ... bench: 22,851,757 ns/iter (+/- 322,186) = 222 MB/s +test regexdna::subst8 ... bench: 22,842,892 ns/iter (+/- 86,166) = 222 MB/s +test regexdna::subst9 ... bench: 22,840,862 ns/iter (+/- 105,926) = 222 MB/s +test regexdna::variant1 ... bench: 91,691,325 ns/iter (+/- 194,247) = 55 MB/s +test regexdna::variant2 ... bench: 105,586,659 ns/iter (+/- 320,354) = 48 MB/s +test regexdna::variant3 ... bench: 94,437,485 ns/iter (+/- 277,744) = 53 MB/s +test regexdna::variant4 ... bench: 90,399,600 ns/iter (+/- 184,588) = 56 MB/s +test regexdna::variant5 ... bench: 90,332,232 ns/iter (+/- 174,254) = 56 MB/s +test regexdna::variant6 ... bench: 90,519,504 ns/iter (+/- 227,643) = 56 MB/s +test regexdna::variant7 ... bench: 90,881,562 ns/iter (+/- 221,861) = 55 MB/s +test regexdna::variant8 ... bench: 96,962,980 ns/iter (+/- 180,002) = 52 MB/s +test regexdna::variant9 ... bench: 109,558,711 ns/iter (+/- 166,337) = 46 MB/s +test sherlock::before_after_holmes ... bench: 31,530,493 ns/iter (+/- 112,639) = 18 MB/s +test sherlock::before_holmes ... bench: 30,420,729 ns/iter (+/- 114,072) = 19 MB/s +test sherlock::everything_greedy ... bench: 6,656,677 ns/iter (+/- 167,110) = 89 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,992,839 ns/iter (+/- 8,037) = 298 MB/s +test sherlock::ing_suffix ... bench: 15,878,331 ns/iter (+/- 150,901) = 37 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,085,471 ns/iter (+/- 8,438) = 285 MB/s +test sherlock::letters ... bench: 89,091,241 ns/iter (+/- 182,225) = 6 MB/s +test sherlock::letters_lower ... bench: 55,634,237 ns/iter (+/- 115,097) = 10 MB/s +test sherlock::letters_upper ... bench: 10,126,641 ns/iter (+/- 36,015) = 58 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 191,963 ns/iter (+/- 687) = 3099 MB/s +test sherlock::name_alt1 ... bench: 1,983,070 ns/iter (+/- 5,863) = 300 MB/s +test sherlock::name_alt2 ... bench: 1,972,746 ns/iter (+/- 14,082) = 301 MB/s +test sherlock::name_alt3 ... bench: 2,424,033 ns/iter (+/- 13,209) = 245 MB/s +test sherlock::name_alt3_nocase ... bench: 16,876,942 ns/iter (+/- 77,218) = 35 MB/s +test sherlock::name_alt4 ... bench: 1,986,579 ns/iter (+/- 9,195) = 299 MB/s +test sherlock::name_alt4_nocase ... bench: 4,992,277 ns/iter (+/- 10,882) = 119 MB/s +test sherlock::name_alt5 ... bench: 2,033,937 ns/iter (+/- 13,627) = 292 MB/s +test sherlock::name_alt5_nocase ... bench: 6,292,627 ns/iter (+/- 14,666) = 94 MB/s +test sherlock::name_holmes ... bench: 450,290 ns/iter (+/- 1,882) = 1321 MB/s +test sherlock::name_holmes_nocase ... bench: 3,032,489 ns/iter (+/- 8,728) = 196 MB/s +test sherlock::name_sherlock ... bench: 265,379 ns/iter (+/- 865) = 2241 MB/s +test sherlock::name_sherlock_holmes ... bench: 201,375 ns/iter (+/- 2,146) = 2954 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 3,010,059 ns/iter (+/- 7,093) = 197 MB/s +test sherlock::name_sherlock_nocase ... bench: 3,016,713 ns/iter (+/- 11,280) = 197 MB/s +test sherlock::name_whitespace ... bench: 266,706 ns/iter (+/- 908) = 2230 MB/s +test sherlock::no_match_common ... bench: 544,428 ns/iter (+/- 7,562) = 1092 MB/s +test sherlock::no_match_really_common ... bench: 626,986 ns/iter (+/- 2,959) = 948 MB/s +test sherlock::no_match_uncommon ... bench: 534,517 ns/iter (+/- 4,342) = 1113 MB/s +test sherlock::quotes ... bench: 3,210,614 ns/iter (+/- 15,699) = 185 MB/s +test sherlock::repeated_class_negation ... bench: 31,147,103 ns/iter (+/- 117,471) = 19 MB/s +test sherlock::the_lower ... bench: 2,275,468 ns/iter (+/- 19,220) = 261 MB/s +test sherlock::the_nocase ... bench: 4,999,086 ns/iter (+/- 20,184) = 119 MB/s +test sherlock::the_upper ... bench: 893,288 ns/iter (+/- 11,368) = 666 MB/s +test sherlock::the_whitespace ... bench: 2,364,893 ns/iter (+/- 21,124) = 251 MB/s +test sherlock::word_ending_n ... bench: 18,221,921 ns/iter (+/- 62,927) = 32 MB/s +test sherlock::words ... bench: 27,552,543 ns/iter (+/- 89,437) = 21 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/pcre1 b/vendor/regex/record/old-bench-log/07/pcre1 new file mode 100644 index 0000000..a28d3cb --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 23 ns/iter (+/- 0) = 16956 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 0) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::easy0_1K ... bench: 223 ns/iter (+/- 2) = 4713 MB/s +test misc::easy0_1MB ... bench: 178,098 ns/iter (+/- 3,124) = 5887 MB/s +test misc::easy0_32 ... bench: 39 ns/iter (+/- 0) = 1512 MB/s +test misc::easy0_32K ... bench: 5,600 ns/iter (+/- 27) = 5856 MB/s +test misc::easy1_1K ... bench: 210 ns/iter (+/- 7) = 4971 MB/s +test misc::easy1_1MB ... bench: 178,177 ns/iter (+/- 1,024) = 5885 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 0) = 1300 MB/s +test misc::easy1_32K ... bench: 5,592 ns/iter (+/- 52) = 5863 MB/s +test misc::hard_1K ... bench: 1,223 ns/iter (+/- 14) = 859 MB/s +test misc::hard_1MB ... bench: 983,169 ns/iter (+/- 13,398) = 1066 MB/s +test misc::hard_32 ... bench: 99 ns/iter (+/- 0) = 595 MB/s +test misc::hard_32K ... bench: 31,422 ns/iter (+/- 326) = 1043 MB/s +test misc::literal ... bench: 23 ns/iter (+/- 0) = 2217 MB/s +test misc::long_needle1 ... bench: 464,932 ns/iter (+/- 1,869) = 215 MB/s +test misc::long_needle2 ... bench: 462,587 ns/iter (+/- 6,375) = 216 MB/s +test misc::match_class ... bench: 73 ns/iter (+/- 0) = 1109 MB/s +test misc::match_class_in_range ... bench: 25 ns/iter (+/- 0) = 3240 MB/s +test misc::match_class_unicode ... bench: 263 ns/iter (+/- 2) = 612 MB/s +test misc::medium_1K ... bench: 213 ns/iter (+/- 3) = 4938 MB/s +test misc::medium_1MB ... bench: 178,077 ns/iter (+/- 1,844) = 5888 MB/s +test misc::medium_32 ... bench: 48 ns/iter (+/- 0) = 1250 MB/s +test misc::medium_32K ... bench: 5,598 ns/iter (+/- 38) = 5858 MB/s +test misc::not_literal ... bench: 131 ns/iter (+/- 0) = 389 MB/s +test misc::one_pass_long_prefix ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_long_prefix_not ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_short ... bench: 44 ns/iter (+/- 0) = 386 MB/s +test misc::one_pass_short_not ... bench: 44 ns/iter (+/- 0) = 386 MB/s +test misc::reallyhard2_1K ... bench: 3,503 ns/iter (+/- 29) = 296 MB/s +test misc::reallyhard_1K ... bench: 1,276 ns/iter (+/- 14) = 823 MB/s +test misc::reallyhard_1MB ... bench: 1,003,152 ns/iter (+/- 10,884) = 1045 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 7) = 578 MB/s +test misc::reallyhard_32K ... bench: 31,035 ns/iter (+/- 221) = 1056 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 3,924 ns/iter (+/- 23) = 2038 MB/s +test regexdna::find_new_lines ... bench: 2,398,578 ns/iter (+/- 28,663) = 2119 MB/s +test regexdna::subst1 ... bench: 1,073,632 ns/iter (+/- 7,567) = 4734 MB/s +test regexdna::subst10 ... bench: 1,068,696 ns/iter (+/- 14,896) = 4756 MB/s +test regexdna::subst11 ... bench: 1,071,991 ns/iter (+/- 21,623) = 4742 MB/s +test regexdna::subst2 ... bench: 1,064,244 ns/iter (+/- 22,701) = 4776 MB/s +test regexdna::subst3 ... bench: 1,081,402 ns/iter (+/- 25,919) = 4700 MB/s +test regexdna::subst4 ... bench: 1,078,319 ns/iter (+/- 8,278) = 4714 MB/s +test regexdna::subst5 ... bench: 1,067,600 ns/iter (+/- 6,079) = 4761 MB/s +test regexdna::subst6 ... bench: 1,073,509 ns/iter (+/- 8,068) = 4735 MB/s +test regexdna::subst7 ... bench: 1,075,462 ns/iter (+/- 9,145) = 4726 MB/s +test regexdna::subst8 ... bench: 1,073,592 ns/iter (+/- 10,284) = 4734 MB/s +test regexdna::subst9 ... bench: 1,074,960 ns/iter (+/- 11,802) = 4728 MB/s +test regexdna::variant1 ... bench: 14,120,901 ns/iter (+/- 85,462) = 359 MB/s +test regexdna::variant2 ... bench: 15,606,152 ns/iter (+/- 128,452) = 325 MB/s +test regexdna::variant3 ... bench: 18,892,502 ns/iter (+/- 82,790) = 269 MB/s +test regexdna::variant4 ... bench: 17,988,621 ns/iter (+/- 50,462) = 282 MB/s +test regexdna::variant5 ... bench: 15,854,890 ns/iter (+/- 54,745) = 320 MB/s +test regexdna::variant6 ... bench: 16,126,069 ns/iter (+/- 76,013) = 315 MB/s +test regexdna::variant7 ... bench: 17,997,470 ns/iter (+/- 94,472) = 282 MB/s +test regexdna::variant8 ... bench: 23,004,949 ns/iter (+/- 81,626) = 220 MB/s +test regexdna::variant9 ... bench: 20,272,633 ns/iter (+/- 99,674) = 250 MB/s +test sherlock::before_after_holmes ... bench: 3,660,138 ns/iter (+/- 41,095) = 162 MB/s +test sherlock::before_holmes ... bench: 3,632,955 ns/iter (+/- 25,761) = 163 MB/s +test sherlock::holmes_cochar_watson ... bench: 458,639 ns/iter (+/- 9,185) = 1297 MB/s +test sherlock::ing_suffix ... bench: 1,746,052 ns/iter (+/- 31,762) = 340 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,067,141 ns/iter (+/- 12,943) = 146 MB/s +test sherlock::letters ... bench: 11,360,188 ns/iter (+/- 22,264) = 52 MB/s +test sherlock::letters_lower ... bench: 11,137,940 ns/iter (+/- 35,225) = 53 MB/s +test sherlock::letters_upper ... bench: 1,505,435 ns/iter (+/- 10,318) = 395 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 181,084 ns/iter (+/- 3,121) = 3285 MB/s +test sherlock::name_alt1 ... bench: 427,474 ns/iter (+/- 1,601) = 1391 MB/s +test sherlock::name_alt2 ... bench: 434,858 ns/iter (+/- 6,444) = 1368 MB/s +test sherlock::name_alt3 ... bench: 747,274 ns/iter (+/- 7,303) = 796 MB/s +test sherlock::name_alt3_nocase ... bench: 2,574,102 ns/iter (+/- 44,203) = 231 MB/s +test sherlock::name_alt4 ... bench: 66,428 ns/iter (+/- 336) = 8956 MB/s +test sherlock::name_alt4_nocase ... bench: 1,333,932 ns/iter (+/- 6,683) = 445 MB/s +test sherlock::name_alt5 ... bench: 598,062 ns/iter (+/- 4,936) = 994 MB/s +test sherlock::name_alt5_nocase ... bench: 1,496,292 ns/iter (+/- 6,595) = 397 MB/s +test sherlock::name_holmes ... bench: 359,203 ns/iter (+/- 6,202) = 1656 MB/s +test sherlock::name_holmes_nocase ... bench: 454,624 ns/iter (+/- 2,658) = 1308 MB/s +test sherlock::name_sherlock ... bench: 243,450 ns/iter (+/- 2,435) = 2443 MB/s +test sherlock::name_sherlock_holmes ... bench: 182,407 ns/iter (+/- 878) = 3261 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 940,244 ns/iter (+/- 6,064) = 632 MB/s +test sherlock::name_sherlock_nocase ... bench: 904,285 ns/iter (+/- 9,405) = 657 MB/s +test sherlock::name_whitespace ... bench: 244,114 ns/iter (+/- 1,875) = 2437 MB/s +test sherlock::no_match_common ... bench: 358,735 ns/iter (+/- 4,090) = 1658 MB/s +test sherlock::no_match_really_common ... bench: 348,964 ns/iter (+/- 6,060) = 1704 MB/s +test sherlock::no_match_uncommon ... bench: 21,256 ns/iter (+/- 144) = 27988 MB/s +test sherlock::quotes ... bench: 422,149 ns/iter (+/- 1,540) = 1409 MB/s +test sherlock::repeated_class_negation ... bench: 5,232,683 ns/iter (+/- 21,609) = 113 MB/s +test sherlock::the_lower ... bench: 651,539 ns/iter (+/- 1,763) = 913 MB/s +test sherlock::the_nocase ... bench: 693,506 ns/iter (+/- 13,143) = 857 MB/s +test sherlock::the_upper ... bench: 46,904 ns/iter (+/- 657) = 12684 MB/s +test sherlock::the_whitespace ... bench: 788,070 ns/iter (+/- 17,403) = 754 MB/s +test sherlock::word_ending_n ... bench: 4,545,774 ns/iter (+/- 26,965) = 130 MB/s +test sherlock::words ... bench: 5,493,039 ns/iter (+/- 16,767) = 108 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/pcre2 b/vendor/regex/record/old-bench-log/07/pcre2 new file mode 100644 index 0000000..c2bbd39 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 0) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 11 ns/iter (+/- 0) = 2363 MB/s +test misc::easy0_1K ... bench: 81 ns/iter (+/- 4) = 12975 MB/s +test misc::easy0_1MB ... bench: 60,199 ns/iter (+/- 658) = 17418 MB/s +test misc::easy0_32 ... bench: 28 ns/iter (+/- 0) = 2107 MB/s +test misc::easy0_32K ... bench: 1,878 ns/iter (+/- 25) = 17462 MB/s +test misc::easy1_1K ... bench: 81 ns/iter (+/- 0) = 12888 MB/s +test misc::easy1_1MB ... bench: 59,222 ns/iter (+/- 598) = 17706 MB/s +test misc::easy1_32 ... bench: 28 ns/iter (+/- 0) = 1857 MB/s +test misc::easy1_32K ... bench: 1,819 ns/iter (+/- 6) = 18025 MB/s +test misc::hard_1K ... bench: 1,147 ns/iter (+/- 13) = 916 MB/s +test misc::hard_1MB ... bench: 990,924 ns/iter (+/- 6,065) = 1058 MB/s +test misc::hard_32 ... bench: 82 ns/iter (+/- 3) = 719 MB/s +test misc::hard_32K ... bench: 32,218 ns/iter (+/- 471) = 1017 MB/s +test misc::literal ... bench: 15 ns/iter (+/- 0) = 3400 MB/s +test misc::long_needle1 ... bench: 464,061 ns/iter (+/- 2,241) = 215 MB/s +test misc::long_needle2 ... bench: 465,191 ns/iter (+/- 823) = 214 MB/s +test misc::match_class ... bench: 46 ns/iter (+/- 1) = 1760 MB/s +test misc::match_class_in_range ... bench: 16 ns/iter (+/- 0) = 5062 MB/s +test misc::match_class_unicode ... bench: 246 ns/iter (+/- 0) = 654 MB/s +test misc::medium_1K ... bench: 102 ns/iter (+/- 9) = 10313 MB/s +test misc::medium_1MB ... bench: 60,042 ns/iter (+/- 585) = 17464 MB/s +test misc::medium_32 ... bench: 29 ns/iter (+/- 1) = 2068 MB/s +test misc::medium_32K ... bench: 1,901 ns/iter (+/- 23) = 17251 MB/s +test misc::not_literal ... bench: 122 ns/iter (+/- 2) = 418 MB/s +test misc::one_pass_long_prefix ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::one_pass_long_prefix_not ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 0) = 472 MB/s +test misc::one_pass_short_not ... bench: 36 ns/iter (+/- 0) = 472 MB/s +test misc::reallyhard2_1K ... bench: 3,517 ns/iter (+/- 39) = 295 MB/s +test misc::reallyhard_1K ... bench: 1,123 ns/iter (+/- 12) = 935 MB/s +test misc::reallyhard_1MB ... bench: 992,521 ns/iter (+/- 13,407) = 1056 MB/s +test misc::reallyhard_32 ... bench: 71 ns/iter (+/- 0) = 830 MB/s +test misc::reallyhard_32K ... bench: 30,626 ns/iter (+/- 206) = 1070 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,824 ns/iter (+/- 21) = 2832 MB/s +test regexdna::find_new_lines ... bench: 1,500,377 ns/iter (+/- 8,152) = 3388 MB/s +test regexdna::subst1 ... bench: 815,769 ns/iter (+/- 14,286) = 6231 MB/s +test regexdna::subst10 ... bench: 820,459 ns/iter (+/- 57,098) = 6195 MB/s +test regexdna::subst11 ... bench: 810,986 ns/iter (+/- 4,270) = 6268 MB/s +test regexdna::subst2 ... bench: 815,568 ns/iter (+/- 35,148) = 6232 MB/s +test regexdna::subst3 ... bench: 812,590 ns/iter (+/- 6,782) = 6255 MB/s +test regexdna::subst4 ... bench: 831,679 ns/iter (+/- 12,372) = 6112 MB/s +test regexdna::subst5 ... bench: 823,207 ns/iter (+/- 12,977) = 6175 MB/s +test regexdna::subst6 ... bench: 815,506 ns/iter (+/- 11,610) = 6233 MB/s +test regexdna::subst7 ... bench: 818,104 ns/iter (+/- 4,807) = 6213 MB/s +test regexdna::subst8 ... bench: 815,265 ns/iter (+/- 21,504) = 6235 MB/s +test regexdna::subst9 ... bench: 809,236 ns/iter (+/- 7,003) = 6281 MB/s +test regexdna::variant1 ... bench: 8,375,573 ns/iter (+/- 80,345) = 606 MB/s +test regexdna::variant2 ... bench: 11,207,698 ns/iter (+/- 45,582) = 453 MB/s +test regexdna::variant3 ... bench: 10,505,744 ns/iter (+/- 69,756) = 483 MB/s +test regexdna::variant4 ... bench: 9,276,177 ns/iter (+/- 50,904) = 548 MB/s +test regexdna::variant5 ... bench: 9,333,446 ns/iter (+/- 41,108) = 544 MB/s +test regexdna::variant6 ... bench: 9,865,395 ns/iter (+/- 26,010) = 515 MB/s +test regexdna::variant7 ... bench: 10,033,179 ns/iter (+/- 28,272) = 506 MB/s +test regexdna::variant8 ... bench: 10,752,604 ns/iter (+/- 37,714) = 472 MB/s +test regexdna::variant9 ... bench: 11,397,272 ns/iter (+/- 41,200) = 446 MB/s +test sherlock::before_after_holmes ... bench: 3,627,616 ns/iter (+/- 18,202) = 164 MB/s +test sherlock::before_holmes ... bench: 3,614,713 ns/iter (+/- 18,191) = 164 MB/s +test sherlock::holmes_cochar_watson ... bench: 68,419 ns/iter (+/- 918) = 8695 MB/s +test sherlock::ing_suffix ... bench: 1,766,571 ns/iter (+/- 16,612) = 336 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,018,396 ns/iter (+/- 11,822) = 148 MB/s +test sherlock::letters ... bench: 8,058,390 ns/iter (+/- 39,083) = 73 MB/s +test sherlock::letters_lower ... bench: 8,014,051 ns/iter (+/- 33,500) = 74 MB/s +test sherlock::letters_upper ... bench: 1,452,421 ns/iter (+/- 157,023) = 409 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 36,248 ns/iter (+/- 252) = 16412 MB/s +test sherlock::name_alt1 ... bench: 45,538 ns/iter (+/- 235) = 13064 MB/s +test sherlock::name_alt2 ... bench: 62,202 ns/iter (+/- 892) = 9564 MB/s +test sherlock::name_alt3 ... bench: 623,900 ns/iter (+/- 3,139) = 953 MB/s +test sherlock::name_alt3_nocase ... bench: 2,518,464 ns/iter (+/- 31,943) = 236 MB/s +test sherlock::name_alt4 ... bench: 62,015 ns/iter (+/- 712) = 9593 MB/s +test sherlock::name_alt4_nocase ... bench: 1,162,489 ns/iter (+/- 14,622) = 511 MB/s +test sherlock::name_alt5 ... bench: 589,686 ns/iter (+/- 6,775) = 1008 MB/s +test sherlock::name_alt5_nocase ... bench: 1,359,066 ns/iter (+/- 7,487) = 437 MB/s +test sherlock::name_holmes ... bench: 45,993 ns/iter (+/- 812) = 12935 MB/s +test sherlock::name_holmes_nocase ... bench: 82,326 ns/iter (+/- 758) = 7226 MB/s +test sherlock::name_sherlock ... bench: 36,848 ns/iter (+/- 50) = 16145 MB/s +test sherlock::name_sherlock_holmes ... bench: 36,778 ns/iter (+/- 621) = 16176 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 636,825 ns/iter (+/- 2,957) = 934 MB/s +test sherlock::name_sherlock_nocase ... bench: 635,313 ns/iter (+/- 10,776) = 936 MB/s +test sherlock::name_whitespace ... bench: 37,360 ns/iter (+/- 132) = 15924 MB/s +test sherlock::no_match_common ... bench: 34,545 ns/iter (+/- 239) = 17221 MB/s +test sherlock::no_match_really_common ... bench: 49,019 ns/iter (+/- 590) = 12136 MB/s +test sherlock::no_match_uncommon ... bench: 34,410 ns/iter (+/- 182) = 17289 MB/s +test sherlock::quotes ... bench: 414,599 ns/iter (+/- 3,528) = 1434 MB/s +test sherlock::repeated_class_negation ... bench: 5,106,885 ns/iter (+/- 23,660) = 116 MB/s +test sherlock::the_lower ... bench: 234,135 ns/iter (+/- 3,821) = 2540 MB/s +test sherlock::the_nocase ... bench: 261,765 ns/iter (+/- 2,272) = 2272 MB/s +test sherlock::the_upper ... bench: 50,816 ns/iter (+/- 583) = 11707 MB/s +test sherlock::the_whitespace ... bench: 408,355 ns/iter (+/- 5,463) = 1456 MB/s +test sherlock::word_ending_n ... bench: 4,367,721 ns/iter (+/- 55,474) = 136 MB/s +test sherlock::words ... bench: 4,640,171 ns/iter (+/- 20,462) = 128 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/re2 b/vendor/regex/record/old-bench-log/07/re2 new file mode 100644 index 0000000..6888bea --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 69 ns/iter (+/- 0) = 5652 MB/s +test misc::anchored_literal_long_non_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_short_match ... bench: 69 ns/iter (+/- 0) = 376 MB/s +test misc::anchored_literal_short_non_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::easy0_1K ... bench: 106 ns/iter (+/- 0) = 9915 MB/s +test misc::easy0_1MB ... bench: 15,311 ns/iter (+/- 113) = 68486 MB/s +test misc::easy0_32 ... bench: 100 ns/iter (+/- 3) = 590 MB/s +test misc::easy0_32K ... bench: 426 ns/iter (+/- 1) = 76983 MB/s +test misc::easy1_1K ... bench: 98 ns/iter (+/- 0) = 10653 MB/s +test misc::easy1_1MB ... bench: 15,299 ns/iter (+/- 136) = 68540 MB/s +test misc::easy1_32 ... bench: 91 ns/iter (+/- 0) = 571 MB/s +test misc::easy1_32K ... bench: 413 ns/iter (+/- 5) = 79389 MB/s +test misc::hard_1K ... bench: 1,815 ns/iter (+/- 43) = 579 MB/s +test misc::hard_1MB ... bench: 1,842,293 ns/iter (+/- 10,227) = 569 MB/s +test misc::hard_32 ... bench: 146 ns/iter (+/- 4) = 404 MB/s +test misc::hard_32K ... bench: 57,638 ns/iter (+/- 481) = 568 MB/s +test misc::literal ... bench: 64 ns/iter (+/- 1) = 796 MB/s +test misc::long_needle1 ... bench: 122,154 ns/iter (+/- 840) = 818 MB/s +test misc::long_needle2 ... bench: 122,105 ns/iter (+/- 578) = 818 MB/s +test misc::match_class ... bench: 178 ns/iter (+/- 0) = 455 MB/s +test misc::match_class_in_range ... bench: 178 ns/iter (+/- 2) = 455 MB/s +test misc::match_class_unicode ... bench: 293 ns/iter (+/- 2) = 549 MB/s +test misc::medium_1K ... bench: 1,610 ns/iter (+/- 26) = 653 MB/s +test misc::medium_1MB ... bench: 1,537,932 ns/iter (+/- 4,134) = 681 MB/s +test misc::medium_32 ... bench: 158 ns/iter (+/- 1) = 379 MB/s +test misc::medium_32K ... bench: 48,172 ns/iter (+/- 390) = 680 MB/s +test misc::no_exponential ... bench: 216 ns/iter (+/- 1) = 462 MB/s +test misc::not_literal ... bench: 127 ns/iter (+/- 1) = 401 MB/s +test misc::one_pass_long_prefix ... bench: 64 ns/iter (+/- 0) = 406 MB/s +test misc::one_pass_long_prefix_not ... bench: 100 ns/iter (+/- 1) = 260 MB/s +test misc::one_pass_short ... bench: 88 ns/iter (+/- 0) = 193 MB/s +test misc::one_pass_short_not ... bench: 86 ns/iter (+/- 0) = 197 MB/s +test misc::reallyhard2_1K ... bench: 1,332 ns/iter (+/- 41) = 780 MB/s +test misc::reallyhard_1K ... bench: 1,815 ns/iter (+/- 16) = 579 MB/s +test misc::reallyhard_1MB ... bench: 1,842,206 ns/iter (+/- 9,086) = 569 MB/s +test misc::reallyhard_32 ... bench: 149 ns/iter (+/- 1) = 395 MB/s +test misc::reallyhard_32K ... bench: 57,591 ns/iter (+/- 101) = 569 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 11,753 ns/iter (+/- 130) = 680 MB/s +test regexdna::find_new_lines ... bench: 24,330,235 ns/iter (+/- 374,274) = 208 MB/s +test regexdna::subst1 ... bench: 4,079,631 ns/iter (+/- 51,348) = 1246 MB/s +test regexdna::subst10 ... bench: 4,080,803 ns/iter (+/- 30,966) = 1245 MB/s +test regexdna::subst11 ... bench: 4,154,389 ns/iter (+/- 34,825) = 1223 MB/s +test regexdna::subst2 ... bench: 4,076,784 ns/iter (+/- 102,863) = 1246 MB/s +test regexdna::subst3 ... bench: 4,074,850 ns/iter (+/- 52,106) = 1247 MB/s +test regexdna::subst4 ... bench: 4,078,456 ns/iter (+/- 12,343) = 1246 MB/s +test regexdna::subst5 ... bench: 4,075,812 ns/iter (+/- 24,524) = 1247 MB/s +test regexdna::subst6 ... bench: 4,097,009 ns/iter (+/- 13,240) = 1240 MB/s +test regexdna::subst7 ... bench: 4,069,096 ns/iter (+/- 29,794) = 1249 MB/s +test regexdna::subst8 ... bench: 4,078,838 ns/iter (+/- 20,713) = 1246 MB/s +test regexdna::subst9 ... bench: 4,116,905 ns/iter (+/- 14,130) = 1234 MB/s +test regexdna::variant1 ... bench: 21,411,252 ns/iter (+/- 568,076) = 237 MB/s +test regexdna::variant2 ... bench: 21,082,571 ns/iter (+/- 92,912) = 241 MB/s +test regexdna::variant3 ... bench: 20,302,954 ns/iter (+/- 118,421) = 250 MB/s +test regexdna::variant4 ... bench: 21,290,669 ns/iter (+/- 102,527) = 238 MB/s +test regexdna::variant5 ... bench: 21,451,671 ns/iter (+/- 99,524) = 236 MB/s +test regexdna::variant6 ... bench: 21,057,017 ns/iter (+/- 530,904) = 241 MB/s +test regexdna::variant7 ... bench: 20,394,037 ns/iter (+/- 128,973) = 249 MB/s +test regexdna::variant8 ... bench: 17,839,069 ns/iter (+/- 122,671) = 284 MB/s +test regexdna::variant9 ... bench: 12,720,049 ns/iter (+/- 76,816) = 399 MB/s +test sherlock::before_after_holmes ... bench: 1,044,129 ns/iter (+/- 4,967) = 569 MB/s +test sherlock::before_holmes ... bench: 1,067,879 ns/iter (+/- 11,345) = 557 MB/s +test sherlock::everything_greedy ... bench: 5,193,222 ns/iter (+/- 10,990) = 114 MB/s +test sherlock::everything_greedy_nl ... bench: 2,038,599 ns/iter (+/- 18,946) = 291 MB/s +test sherlock::holmes_cochar_watson ... bench: 909,342 ns/iter (+/- 5,295) = 654 MB/s +test sherlock::holmes_coword_watson ... bench: 939,154 ns/iter (+/- 6,087) = 633 MB/s +test sherlock::ing_suffix ... bench: 2,729,081 ns/iter (+/- 15,969) = 217 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,373,143 ns/iter (+/- 17,068) = 433 MB/s +test sherlock::letters ... bench: 56,266,035 ns/iter (+/- 165,398) = 10 MB/s +test sherlock::letters_lower ... bench: 54,590,671 ns/iter (+/- 138,842) = 10 MB/s +test sherlock::letters_upper ... bench: 2,702,242 ns/iter (+/- 9,889) = 220 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,430,065 ns/iter (+/- 27,756) = 244 MB/s +test sherlock::name_alt1 ... bench: 45,514 ns/iter (+/- 403) = 13071 MB/s +test sherlock::name_alt2 ... bench: 975,861 ns/iter (+/- 11,553) = 609 MB/s +test sherlock::name_alt3 ... bench: 1,070,967 ns/iter (+/- 11,065) = 555 MB/s +test sherlock::name_alt3_nocase ... bench: 2,574,585 ns/iter (+/- 39,816) = 231 MB/s +test sherlock::name_alt4 ... bench: 978,776 ns/iter (+/- 25,503) = 607 MB/s +test sherlock::name_alt4_nocase ... bench: 1,643,230 ns/iter (+/- 27,685) = 362 MB/s +test sherlock::name_alt5 ... bench: 998,349 ns/iter (+/- 6,658) = 595 MB/s +test sherlock::name_alt5_nocase ... bench: 1,781,006 ns/iter (+/- 22,507) = 334 MB/s +test sherlock::name_holmes ... bench: 92,561 ns/iter (+/- 1,358) = 6427 MB/s +test sherlock::name_holmes_nocase ... bench: 1,281,827 ns/iter (+/- 7,651) = 464 MB/s +test sherlock::name_sherlock ... bench: 31,994 ns/iter (+/- 326) = 18595 MB/s +test sherlock::name_sherlock_holmes ... bench: 34,272 ns/iter (+/- 445) = 17359 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,218,006 ns/iter (+/- 19,301) = 488 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,214,744 ns/iter (+/- 11,473) = 489 MB/s +test sherlock::name_whitespace ... bench: 35,455 ns/iter (+/- 412) = 16779 MB/s +test sherlock::no_match_common ... bench: 299,771 ns/iter (+/- 7,799) = 1984 MB/s +test sherlock::no_match_really_common ... bench: 299,595 ns/iter (+/- 926) = 1985 MB/s +test sherlock::no_match_uncommon ... bench: 9,803 ns/iter (+/- 139) = 60688 MB/s +test sherlock::quotes ... bench: 1,033,423 ns/iter (+/- 9,177) = 575 MB/s +test sherlock::the_lower ... bench: 1,454,358 ns/iter (+/- 75,304) = 409 MB/s +test sherlock::the_nocase ... bench: 2,486,681 ns/iter (+/- 9,026) = 239 MB/s +test sherlock::the_upper ... bench: 123,989 ns/iter (+/- 1,097) = 4798 MB/s +test sherlock::the_whitespace ... bench: 1,454,732 ns/iter (+/- 118,006) = 408 MB/s +test sherlock::word_ending_n ... bench: 1,922,008 ns/iter (+/- 15,040) = 309 MB/s +test sherlock::words ... bench: 16,054,888 ns/iter (+/- 90,684) = 37 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/rust b/vendor/regex/record/old-bench-log/07/rust new file mode 100644 index 0000000..f5e73a2 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/rust @@ -0,0 +1,113 @@ + +running 108 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 0) = 1625 MB/s +test misc::easy0_1K ... bench: 11 ns/iter (+/- 0) = 95545 MB/s +test misc::easy0_1MB ... bench: 15 ns/iter (+/- 0) = 69906866 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 11 ns/iter (+/- 0) = 2981363 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 0) = 29000 MB/s +test misc::easy1_1MB ... bench: 38 ns/iter (+/- 0) = 27594631 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 0) = 1333 MB/s +test misc::easy1_32K ... bench: 36 ns/iter (+/- 0) = 910777 MB/s +test misc::hard_1K ... bench: 48 ns/iter (+/- 0) = 21895 MB/s +test misc::hard_1MB ... bench: 51 ns/iter (+/- 0) = 20560843 MB/s +test misc::hard_32 ... bench: 48 ns/iter (+/- 1) = 1229 MB/s +test misc::hard_32K ... bench: 48 ns/iter (+/- 0) = 683229 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 956 ns/iter (+/- 14) = 104603 MB/s +test misc::long_needle2 ... bench: 538,237 ns/iter (+/- 8,739) = 185 MB/s +test misc::match_class ... bench: 57 ns/iter (+/- 0) = 1421 MB/s +test misc::match_class_in_range ... bench: 22 ns/iter (+/- 0) = 3681 MB/s +test misc::match_class_unicode ... bench: 245 ns/iter (+/- 4) = 657 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 0) = 80923 MB/s +test misc::medium_1MB ... bench: 15 ns/iter (+/- 0) = 69906933 MB/s +test misc::medium_32 ... bench: 12 ns/iter (+/- 0) = 5000 MB/s +test misc::medium_32K ... bench: 12 ns/iter (+/- 0) = 2733000 MB/s +test misc::no_exponential ... bench: 318 ns/iter (+/- 0) = 314 MB/s +test misc::not_literal ... bench: 85 ns/iter (+/- 0) = 600 MB/s +test misc::one_pass_long_prefix ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_long_prefix_not ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_short ... bench: 34 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_short_not ... bench: 37 ns/iter (+/- 0) = 459 MB/s +test misc::reallyhard2_1K ... bench: 51 ns/iter (+/- 0) = 20392 MB/s +test misc::reallyhard_1K ... bench: 1,547 ns/iter (+/- 19) = 679 MB/s +test misc::reallyhard_1MB ... bench: 1,533,883 ns/iter (+/- 9,553) = 683 MB/s +test misc::reallyhard_32 ... bench: 96 ns/iter (+/- 0) = 614 MB/s +test misc::reallyhard_32K ... bench: 47,989 ns/iter (+/- 198) = 683 MB/s +test misc::replace_all ... bench: 136 ns/iter (+/- 0) +test misc::reverse_suffix_no_quadratic ... bench: 4,016 ns/iter (+/- 21) = 1992 MB/s +test misc::short_haystack_1000000x ... bench: 64,033 ns/iter (+/- 470) = 124935 MB/s +test misc::short_haystack_100000x ... bench: 6,472 ns/iter (+/- 44) = 123611 MB/s +test misc::short_haystack_10000x ... bench: 1,906 ns/iter (+/- 49) = 41978 MB/s +test misc::short_haystack_1000x ... bench: 362 ns/iter (+/- 1) = 22129 MB/s +test misc::short_haystack_100x ... bench: 259 ns/iter (+/- 2) = 3131 MB/s +test misc::short_haystack_10x ... bench: 228 ns/iter (+/- 0) = 399 MB/s +test misc::short_haystack_1x ... bench: 223 ns/iter (+/- 2) = 85 MB/s +test misc::short_haystack_2x ... bench: 224 ns/iter (+/- 2) = 120 MB/s +test misc::short_haystack_3x ... bench: 221 ns/iter (+/- 2) = 158 MB/s +test misc::short_haystack_4x ... bench: 223 ns/iter (+/- 2) = 192 MB/s +test regexdna::find_new_lines ... bench: 11,885,905 ns/iter (+/- 23,501) = 427 MB/s +test regexdna::subst1 ... bench: 712,544 ns/iter (+/- 16,100) = 7134 MB/s +test regexdna::subst10 ... bench: 709,739 ns/iter (+/- 8,467) = 7162 MB/s +test regexdna::subst11 ... bench: 714,261 ns/iter (+/- 8,495) = 7117 MB/s +test regexdna::subst2 ... bench: 711,197 ns/iter (+/- 14,736) = 7147 MB/s +test regexdna::subst3 ... bench: 718,083 ns/iter (+/- 5,050) = 7079 MB/s +test regexdna::subst4 ... bench: 725,196 ns/iter (+/- 20,044) = 7009 MB/s +test regexdna::subst5 ... bench: 709,301 ns/iter (+/- 10,961) = 7166 MB/s +test regexdna::subst6 ... bench: 715,658 ns/iter (+/- 16,431) = 7103 MB/s +test regexdna::subst7 ... bench: 707,472 ns/iter (+/- 5,764) = 7185 MB/s +test regexdna::subst8 ... bench: 707,300 ns/iter (+/- 19,545) = 7187 MB/s +test regexdna::subst9 ... bench: 709,950 ns/iter (+/- 11,319) = 7160 MB/s +test regexdna::variant1 ... bench: 2,498,980 ns/iter (+/- 67,933) = 2034 MB/s +test regexdna::variant2 ... bench: 5,544,923 ns/iter (+/- 31,911) = 916 MB/s +test regexdna::variant3 ... bench: 6,441,568 ns/iter (+/- 20,197) = 789 MB/s +test regexdna::variant4 ... bench: 6,421,276 ns/iter (+/- 161,499) = 791 MB/s +test regexdna::variant5 ... bench: 5,093,567 ns/iter (+/- 18,696) = 998 MB/s +test regexdna::variant6 ... bench: 5,094,859 ns/iter (+/- 22,894) = 997 MB/s +test regexdna::variant7 ... bench: 4,540,111 ns/iter (+/- 11,863) = 1119 MB/s +test regexdna::variant8 ... bench: 4,636,741 ns/iter (+/- 23,448) = 1096 MB/s +test regexdna::variant9 ... bench: 4,557,500 ns/iter (+/- 16,168) = 1115 MB/s +test sherlock::before_after_holmes ... bench: 880,959 ns/iter (+/- 3,004) = 675 MB/s +test sherlock::before_holmes ... bench: 54,416 ns/iter (+/- 1,099) = 10933 MB/s +test sherlock::everything_greedy ... bench: 1,736,180 ns/iter (+/- 9,410) = 342 MB/s +test sherlock::everything_greedy_nl ... bench: 783,848 ns/iter (+/- 19,640) = 758 MB/s +test sherlock::holmes_cochar_watson ... bench: 90,085 ns/iter (+/- 499) = 6604 MB/s +test sherlock::holmes_coword_watson ... bench: 459,431 ns/iter (+/- 830) = 1294 MB/s +test sherlock::ing_suffix ... bench: 348,103 ns/iter (+/- 9,052) = 1709 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,017,387 ns/iter (+/- 3,906) = 584 MB/s +test sherlock::letters ... bench: 18,265,074 ns/iter (+/- 463,241) = 32 MB/s +test sherlock::letters_lower ... bench: 17,846,209 ns/iter (+/- 431,089) = 33 MB/s +test sherlock::letters_upper ... bench: 1,594,743 ns/iter (+/- 3,151) = 373 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 873,308 ns/iter (+/- 1,379) = 681 MB/s +test sherlock::name_alt1 ... bench: 21,144 ns/iter (+/- 315) = 28137 MB/s +test sherlock::name_alt2 ... bench: 71,354 ns/iter (+/- 1,432) = 8337 MB/s +test sherlock::name_alt3 ... bench: 79,167 ns/iter (+/- 294) = 7514 MB/s +test sherlock::name_alt3_nocase ... bench: 1,111,300 ns/iter (+/- 4,434) = 535 MB/s +test sherlock::name_alt4 ... bench: 100,864 ns/iter (+/- 570) = 5898 MB/s +test sherlock::name_alt4_nocase ... bench: 157,266 ns/iter (+/- 4,048) = 3782 MB/s +test sherlock::name_alt5 ... bench: 74,375 ns/iter (+/- 576) = 7999 MB/s +test sherlock::name_alt5_nocase ... bench: 467,879 ns/iter (+/- 2,115) = 1271 MB/s +test sherlock::name_holmes ... bench: 26,856 ns/iter (+/- 345) = 22152 MB/s +test sherlock::name_holmes_nocase ... bench: 124,140 ns/iter (+/- 1,111) = 4792 MB/s +test sherlock::name_sherlock ... bench: 52,330 ns/iter (+/- 316) = 11368 MB/s +test sherlock::name_sherlock_holmes ... bench: 19,646 ns/iter (+/- 355) = 30282 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 107,973 ns/iter (+/- 508) = 5510 MB/s +test sherlock::name_sherlock_nocase ... bench: 105,141 ns/iter (+/- 426) = 5658 MB/s +test sherlock::name_whitespace ... bench: 61,149 ns/iter (+/- 350) = 9729 MB/s +test sherlock::no_match_common ... bench: 11,735 ns/iter (+/- 185) = 50697 MB/s +test sherlock::no_match_really_common ... bench: 274,089 ns/iter (+/- 617) = 2170 MB/s +test sherlock::no_match_uncommon ... bench: 11,581 ns/iter (+/- 298) = 51371 MB/s +test sherlock::quotes ... bench: 447,749 ns/iter (+/- 1,173) = 1328 MB/s +test sherlock::repeated_class_negation ... bench: 69,119,491 ns/iter (+/- 117,739) = 8 MB/s +test sherlock::the_lower ... bench: 492,559 ns/iter (+/- 1,674) = 1207 MB/s +test sherlock::the_nocase ... bench: 341,445 ns/iter (+/- 6,455) = 1742 MB/s +test sherlock::the_upper ... bench: 30,555 ns/iter (+/- 168) = 19470 MB/s +test sherlock::the_whitespace ... bench: 950,630 ns/iter (+/- 25,179) = 625 MB/s +test sherlock::word_ending_n ... bench: 1,551,930 ns/iter (+/- 17,792) = 383 MB/s +test sherlock::words ... bench: 7,229,870 ns/iter (+/- 25,046) = 82 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 108 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/rust-bytes b/vendor/regex/record/old-bench-log/07/rust-bytes new file mode 100644 index 0000000..310d775 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/rust-bytes @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 0) = 1625 MB/s +test misc::easy0_1K ... bench: 11 ns/iter (+/- 0) = 95545 MB/s +test misc::easy0_1MB ... bench: 14 ns/iter (+/- 0) = 74900214 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 11 ns/iter (+/- 0) = 2981363 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 0) = 29000 MB/s +test misc::easy1_1MB ... bench: 38 ns/iter (+/- 0) = 27594631 MB/s +test misc::easy1_32 ... bench: 36 ns/iter (+/- 0) = 1444 MB/s +test misc::easy1_32K ... bench: 36 ns/iter (+/- 0) = 910777 MB/s +test misc::hard_1K ... bench: 46 ns/iter (+/- 0) = 22847 MB/s +test misc::hard_1MB ... bench: 49 ns/iter (+/- 0) = 21400061 MB/s +test misc::hard_32 ... bench: 46 ns/iter (+/- 0) = 1282 MB/s +test misc::hard_32K ... bench: 46 ns/iter (+/- 0) = 712934 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 1,119 ns/iter (+/- 22) = 89366 MB/s +test misc::long_needle2 ... bench: 535,168 ns/iter (+/- 2,976) = 186 MB/s +test misc::match_class ... bench: 67 ns/iter (+/- 0) = 1208 MB/s +test misc::match_class_in_range ... bench: 21 ns/iter (+/- 0) = 3857 MB/s +test misc::medium_1K ... bench: 12 ns/iter (+/- 0) = 87666 MB/s +test misc::medium_1MB ... bench: 16 ns/iter (+/- 0) = 65537750 MB/s +test misc::medium_32 ... bench: 12 ns/iter (+/- 0) = 5000 MB/s +test misc::medium_32K ... bench: 12 ns/iter (+/- 0) = 2733000 MB/s +test misc::no_exponential ... bench: 320 ns/iter (+/- 3) = 312 MB/s +test misc::not_literal ... bench: 86 ns/iter (+/- 0) = 593 MB/s +test misc::one_pass_long_prefix ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_long_prefix_not ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_short ... bench: 34 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_short_not ... bench: 37 ns/iter (+/- 0) = 459 MB/s +test misc::reallyhard2_1K ... bench: 50 ns/iter (+/- 0) = 20800 MB/s +test misc::reallyhard_1K ... bench: 1,548 ns/iter (+/- 0) = 678 MB/s +test misc::reallyhard_1MB ... bench: 1,534,068 ns/iter (+/- 14,813) = 683 MB/s +test misc::reallyhard_32 ... bench: 98 ns/iter (+/- 1) = 602 MB/s +test misc::reallyhard_32K ... bench: 48,003 ns/iter (+/- 128) = 683 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,015 ns/iter (+/- 11) = 1992 MB/s +test regexdna::find_new_lines ... bench: 11,859,603 ns/iter (+/- 22,707) = 428 MB/s +test regexdna::subst1 ... bench: 717,255 ns/iter (+/- 3,261) = 7087 MB/s +test regexdna::subst10 ... bench: 719,600 ns/iter (+/- 4,712) = 7064 MB/s +test regexdna::subst11 ... bench: 708,612 ns/iter (+/- 6,314) = 7173 MB/s +test regexdna::subst2 ... bench: 715,174 ns/iter (+/- 5,097) = 7107 MB/s +test regexdna::subst3 ... bench: 711,261 ns/iter (+/- 12,051) = 7147 MB/s +test regexdna::subst4 ... bench: 761,920 ns/iter (+/- 4,924) = 6671 MB/s +test regexdna::subst5 ... bench: 740,755 ns/iter (+/- 12,762) = 6862 MB/s +test regexdna::subst6 ... bench: 713,936 ns/iter (+/- 7,103) = 7120 MB/s +test regexdna::subst7 ... bench: 710,142 ns/iter (+/- 5,377) = 7158 MB/s +test regexdna::subst8 ... bench: 712,154 ns/iter (+/- 4,485) = 7138 MB/s +test regexdna::subst9 ... bench: 713,214 ns/iter (+/- 6,830) = 7127 MB/s +test regexdna::variant1 ... bench: 2,448,709 ns/iter (+/- 10,799) = 2075 MB/s +test regexdna::variant2 ... bench: 5,541,606 ns/iter (+/- 26,197) = 917 MB/s +test regexdna::variant3 ... bench: 6,563,736 ns/iter (+/- 163,805) = 774 MB/s +test regexdna::variant4 ... bench: 6,428,096 ns/iter (+/- 38,372) = 790 MB/s +test regexdna::variant5 ... bench: 5,110,667 ns/iter (+/- 141,363) = 994 MB/s +test regexdna::variant6 ... bench: 5,086,936 ns/iter (+/- 25,675) = 999 MB/s +test regexdna::variant7 ... bench: 4,607,360 ns/iter (+/- 31,834) = 1103 MB/s +test regexdna::variant8 ... bench: 4,636,550 ns/iter (+/- 11,143) = 1096 MB/s +test regexdna::variant9 ... bench: 4,534,765 ns/iter (+/- 18,435) = 1120 MB/s +test sherlock::before_after_holmes ... bench: 880,980 ns/iter (+/- 1,386) = 675 MB/s +test sherlock::before_holmes ... bench: 56,626 ns/iter (+/- 612) = 10506 MB/s +test sherlock::everything_greedy ... bench: 1,715,022 ns/iter (+/- 7,374) = 346 MB/s +test sherlock::everything_greedy_nl ... bench: 778,398 ns/iter (+/- 6,195) = 764 MB/s +test sherlock::holmes_cochar_watson ... bench: 91,093 ns/iter (+/- 266) = 6531 MB/s +test sherlock::holmes_coword_watson ... bench: 457,793 ns/iter (+/- 3,094) = 1299 MB/s +test sherlock::ing_suffix ... bench: 348,696 ns/iter (+/- 2,174) = 1706 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,017,664 ns/iter (+/- 8,581) = 584 MB/s +test sherlock::letters ... bench: 19,098,779 ns/iter (+/- 36,233) = 31 MB/s +test sherlock::letters_lower ... bench: 17,748,386 ns/iter (+/- 37,835) = 33 MB/s +test sherlock::letters_upper ... bench: 1,592,729 ns/iter (+/- 2,977) = 373 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 873,365 ns/iter (+/- 1,399) = 681 MB/s +test sherlock::name_alt1 ... bench: 21,965 ns/iter (+/- 336) = 27085 MB/s +test sherlock::name_alt2 ... bench: 73,887 ns/iter (+/- 107) = 8051 MB/s +test sherlock::name_alt3 ... bench: 79,186 ns/iter (+/- 274) = 7513 MB/s +test sherlock::name_alt3_nocase ... bench: 1,111,949 ns/iter (+/- 3,589) = 535 MB/s +test sherlock::name_alt4 ... bench: 102,493 ns/iter (+/- 959) = 5804 MB/s +test sherlock::name_alt4_nocase ... bench: 158,438 ns/iter (+/- 946) = 3754 MB/s +test sherlock::name_alt5 ... bench: 74,362 ns/iter (+/- 139) = 8000 MB/s +test sherlock::name_alt5_nocase ... bench: 469,720 ns/iter (+/- 5,941) = 1266 MB/s +test sherlock::name_holmes ... bench: 28,919 ns/iter (+/- 372) = 20572 MB/s +test sherlock::name_holmes_nocase ... bench: 123,251 ns/iter (+/- 786) = 4827 MB/s +test sherlock::name_sherlock ... bench: 53,032 ns/iter (+/- 487) = 11218 MB/s +test sherlock::name_sherlock_holmes ... bench: 20,566 ns/iter (+/- 280) = 28927 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 108,166 ns/iter (+/- 303) = 5500 MB/s +test sherlock::name_sherlock_nocase ... bench: 105,034 ns/iter (+/- 797) = 5664 MB/s +test sherlock::name_whitespace ... bench: 60,968 ns/iter (+/- 490) = 9758 MB/s +test sherlock::no_match_common ... bench: 12,191 ns/iter (+/- 128) = 48801 MB/s +test sherlock::no_match_really_common ... bench: 274,528 ns/iter (+/- 1,101) = 2167 MB/s +test sherlock::no_match_uncommon ... bench: 12,197 ns/iter (+/- 191) = 48776 MB/s +test sherlock::quotes ... bench: 446,264 ns/iter (+/- 5,936) = 1333 MB/s +test sherlock::repeated_class_negation ... bench: 69,728,764 ns/iter (+/- 155,104) = 8 MB/s +test sherlock::the_lower ... bench: 493,734 ns/iter (+/- 5,997) = 1204 MB/s +test sherlock::the_nocase ... bench: 339,088 ns/iter (+/- 3,760) = 1754 MB/s +test sherlock::the_upper ... bench: 30,957 ns/iter (+/- 313) = 19218 MB/s +test sherlock::the_whitespace ... bench: 921,059 ns/iter (+/- 8,102) = 645 MB/s +test sherlock::word_ending_n ... bench: 1,530,899 ns/iter (+/- 18,006) = 388 MB/s +test sherlock::words ... bench: 6,959,355 ns/iter (+/- 31,671) = 85 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/stdcpp b/vendor/regex/record/old-bench-log/07/stdcpp new file mode 100644 index 0000000..57c25ae --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/stdcpp @@ -0,0 +1,87 @@ + +running 82 tests +test misc::anchored_literal_long_match ... bench: 142 ns/iter (+/- 0) = 2746 MB/s +test misc::anchored_literal_long_non_match ... bench: 5,504 ns/iter (+/- 20) = 70 MB/s +test misc::anchored_literal_short_match ... bench: 143 ns/iter (+/- 0) = 181 MB/s +test misc::anchored_literal_short_non_match ... bench: 471 ns/iter (+/- 1) = 55 MB/s +test misc::easy0_1K ... bench: 14,534 ns/iter (+/- 87) = 72 MB/s +test misc::easy0_1MB ... bench: 14,554,912 ns/iter (+/- 33,264) = 72 MB/s +test misc::easy0_32 ... bench: 730 ns/iter (+/- 1) = 80 MB/s +test misc::easy0_32K ... bench: 454,911 ns/iter (+/- 526) = 72 MB/s +test misc::easy1_1K ... bench: 14,486 ns/iter (+/- 45) = 72 MB/s +test misc::easy1_1MB ... bench: 14,555,850 ns/iter (+/- 108,290) = 72 MB/s +test misc::easy1_32 ... bench: 692 ns/iter (+/- 1) = 75 MB/s +test misc::easy1_32K ... bench: 456,269 ns/iter (+/- 2,856) = 71 MB/s +test misc::hard_1K ... bench: 299,581 ns/iter (+/- 7,493) = 3 MB/s +test misc::hard_1MB ... bench: 314,289,240 ns/iter (+/- 128,869) = 3 MB/s +test misc::hard_32 ... bench: 9,202 ns/iter (+/- 17) = 6 MB/s +test misc::hard_32K ... bench: 9,777,807 ns/iter (+/- 19,451) = 3 MB/s +test misc::literal ... bench: 804 ns/iter (+/- 2) = 63 MB/s +test misc::long_needle1 ... bench: 15,712,941 ns/iter (+/- 23,893) = 6 MB/s +test misc::long_needle2 ... bench: 15,955,109 ns/iter (+/- 26,652) = 6 MB/s +test misc::match_class ... bench: 1,250 ns/iter (+/- 4) = 64 MB/s +test misc::match_class_in_range ... bench: 1,250 ns/iter (+/- 4) = 64 MB/s +test misc::medium_1K ... bench: 14,913 ns/iter (+/- 108) = 70 MB/s +test misc::medium_1MB ... bench: 14,929,542 ns/iter (+/- 38,890) = 70 MB/s +test misc::medium_32 ... bench: 736 ns/iter (+/- 0) = 81 MB/s +test misc::medium_32K ... bench: 466,504 ns/iter (+/- 1,488) = 70 MB/s +test misc::not_literal ... bench: 1,015 ns/iter (+/- 8) = 50 MB/s +test misc::one_pass_long_prefix ... bench: 262 ns/iter (+/- 0) = 99 MB/s +test misc::one_pass_long_prefix_not ... bench: 263 ns/iter (+/- 3) = 98 MB/s +test misc::one_pass_short ... bench: 502 ns/iter (+/- 2) = 33 MB/s +test misc::one_pass_short_not ... bench: 498 ns/iter (+/- 0) = 34 MB/s +test misc::reallyhard2_1K ... bench: 304,485 ns/iter (+/- 762) = 3 MB/s +test misc::reallyhard_1K ... bench: 292,315 ns/iter (+/- 1,985) = 3 MB/s +test misc::reallyhard_1MB ... bench: 313,208,610 ns/iter (+/- 163,013) = 3 MB/s +test misc::reallyhard_32 ... bench: 9,232 ns/iter (+/- 21) = 6 MB/s +test misc::reallyhard_32K ... bench: 9,952,463 ns/iter (+/- 22,317) = 3 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 114,029 ns/iter (+/- 734) = 70 MB/s +test regexdna::find_new_lines ... bench: 121,481,845 ns/iter (+/- 289,966) = 41 MB/s +test regexdna::subst1 ... bench: 73,580,323 ns/iter (+/- 82,998) = 69 MB/s +test regexdna::subst10 ... bench: 73,588,543 ns/iter (+/- 95,250) = 69 MB/s +test regexdna::subst11 ... bench: 73,592,436 ns/iter (+/- 86,358) = 69 MB/s +test regexdna::subst2 ... bench: 73,581,323 ns/iter (+/- 88,210) = 69 MB/s +test regexdna::subst3 ... bench: 73,577,422 ns/iter (+/- 48,215) = 69 MB/s +test regexdna::subst4 ... bench: 73,586,896 ns/iter (+/- 82,117) = 69 MB/s +test regexdna::subst5 ... bench: 73,652,696 ns/iter (+/- 95,155) = 69 MB/s +test regexdna::subst6 ... bench: 74,633,620 ns/iter (+/- 74,754) = 68 MB/s +test regexdna::subst7 ... bench: 73,586,338 ns/iter (+/- 82,645) = 69 MB/s +test regexdna::subst8 ... bench: 75,009,572 ns/iter (+/- 116,800) = 67 MB/s +test regexdna::subst9 ... bench: 73,581,469 ns/iter (+/- 146,286) = 69 MB/s +test regexdna::variant1 ... bench: 140,768,740 ns/iter (+/- 113,580) = 36 MB/s +test regexdna::variant2 ... bench: 153,330,005 ns/iter (+/- 11,581,095) = 33 MB/s +test regexdna::variant3 ... bench: 145,484,512 ns/iter (+/- 150,566) = 34 MB/s +test regexdna::variant4 ... bench: 141,659,767 ns/iter (+/- 123,940) = 35 MB/s +test regexdna::variant5 ... bench: 145,309,207 ns/iter (+/- 129,675) = 34 MB/s +test regexdna::variant6 ... bench: 141,145,017 ns/iter (+/- 164,414) = 36 MB/s +test regexdna::variant7 ... bench: 141,897,206 ns/iter (+/- 212,981) = 35 MB/s +test regexdna::variant8 ... bench: 150,467,139 ns/iter (+/- 120,619) = 33 MB/s +test regexdna::variant9 ... bench: 151,635,430 ns/iter (+/- 128,912) = 33 MB/s +test sherlock::before_after_holmes ... bench: 36,941,681 ns/iter (+/- 36,199) = 16 MB/s +test sherlock::before_holmes ... bench: 36,920,860 ns/iter (+/- 38,258) = 16 MB/s +test sherlock::everything_greedy ... bench: 9,047,684 ns/iter (+/- 18,290) = 65 MB/s +test sherlock::holmes_cochar_watson ... bench: 12,634,723 ns/iter (+/- 36,086) = 47 MB/s +test sherlock::ing_suffix ... bench: 30,232,323 ns/iter (+/- 49,084) = 19 MB/s +test sherlock::ing_suffix_limited_space ... bench: 18,837,733 ns/iter (+/- 39,569) = 31 MB/s +test sherlock::name_alt1 ... bench: 12,462,918 ns/iter (+/- 17,158) = 47 MB/s +test sherlock::name_alt2 ... bench: 12,490,419 ns/iter (+/- 26,214) = 47 MB/s +test sherlock::name_alt3 ... bench: 33,156,941 ns/iter (+/- 47,236) = 17 MB/s +test sherlock::name_alt4 ... bench: 12,583,828 ns/iter (+/- 26,121) = 47 MB/s +test sherlock::name_alt5 ... bench: 16,615,345 ns/iter (+/- 22,930) = 35 MB/s +test sherlock::name_holmes ... bench: 8,307,917 ns/iter (+/- 17,452) = 71 MB/s +test sherlock::name_sherlock ... bench: 8,273,395 ns/iter (+/- 25,717) = 71 MB/s +test sherlock::name_sherlock_holmes ... bench: 8,270,000 ns/iter (+/- 19,702) = 71 MB/s +test sherlock::name_whitespace ... bench: 8,453,784 ns/iter (+/- 19,604) = 70 MB/s +test sherlock::no_match_common ... bench: 8,679,069 ns/iter (+/- 27,721) = 68 MB/s +test sherlock::no_match_really_common ... bench: 8,679,099 ns/iter (+/- 17,665) = 68 MB/s +test sherlock::no_match_uncommon ... bench: 8,260,259 ns/iter (+/- 147,913) = 72 MB/s +test sherlock::quotes ... bench: 10,257,367 ns/iter (+/- 25,054) = 58 MB/s +test sherlock::repeated_class_negation ... bench: 25,374,678 ns/iter (+/- 23,494) = 23 MB/s +test sherlock::the_lower ... bench: 9,424,206 ns/iter (+/- 23,231) = 63 MB/s +test sherlock::the_upper ... bench: 8,350,015 ns/iter (+/- 23,176) = 71 MB/s +test sherlock::the_whitespace ... bench: 9,285,991 ns/iter (+/- 16,835) = 64 MB/s +test sherlock::word_ending_n ... bench: 69,609,427 ns/iter (+/- 52,974) = 8 MB/s +test sherlock::words ... bench: 20,107,601 ns/iter (+/- 36,086) = 29 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 82 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/stdcpp-libcxx b/vendor/regex/record/old-bench-log/07/stdcpp-libcxx new file mode 100644 index 0000000..ff21e67 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/stdcpp-libcxx @@ -0,0 +1,87 @@ + +running 82 tests +test misc::anchored_literal_long_match ... bench: 162 ns/iter (+/- 0) = 2407 MB/s +test misc::anchored_literal_long_non_match ... bench: 21,901 ns/iter (+/- 140) = 17 MB/s +test misc::anchored_literal_short_match ... bench: 162 ns/iter (+/- 0) = 160 MB/s +test misc::anchored_literal_short_non_match ... bench: 1,501 ns/iter (+/- 1) = 17 MB/s +test misc::easy0_1K ... bench: 39,405 ns/iter (+/- 250) = 26 MB/s +test misc::easy0_1MB ... bench: 39,706,678 ns/iter (+/- 103,211) = 26 MB/s +test misc::easy0_32 ... bench: 1,415 ns/iter (+/- 3) = 41 MB/s +test misc::easy0_32K ... bench: 1,241,085 ns/iter (+/- 5,625) = 26 MB/s +test misc::easy1_1K ... bench: 39,421 ns/iter (+/- 275) = 26 MB/s +test misc::easy1_1MB ... bench: 39,725,158 ns/iter (+/- 64,488) = 26 MB/s +test misc::easy1_32 ... bench: 1,421 ns/iter (+/- 8) = 36 MB/s +test misc::easy1_32K ... bench: 1,240,953 ns/iter (+/- 5,794) = 26 MB/s +test misc::hard_1K ... bench: 1,263,948 ns/iter (+/- 31,771) +test misc::hard_1MB ... bench: 1,331,000,673 ns/iter (+/- 7,401,131) +test misc::hard_32 ... bench: 37,752 ns/iter (+/- 109) = 1 MB/s +test misc::hard_32K ... bench: 41,044,286 ns/iter (+/- 57,765) +test misc::literal ... bench: 1,980 ns/iter (+/- 7) = 25 MB/s +test misc::long_needle1 ... bench: 12,425,121 ns/iter (+/- 36,611) = 8 MB/s +test misc::long_needle2 ... bench: 12,568,992 ns/iter (+/- 28,513) = 7 MB/s +test misc::match_class ... bench: 3,918 ns/iter (+/- 67) = 20 MB/s +test misc::match_class_in_range ... bench: 3,534 ns/iter (+/- 11) = 22 MB/s +test misc::medium_1K ... bench: 44,910 ns/iter (+/- 167) = 23 MB/s +test misc::medium_1MB ... bench: 45,558,328 ns/iter (+/- 77,166) = 23 MB/s +test misc::medium_32 ... bench: 1,599 ns/iter (+/- 12) = 37 MB/s +test misc::medium_32K ... bench: 1,423,945 ns/iter (+/- 9,468) = 23 MB/s +test misc::not_literal ... bench: 2,051 ns/iter (+/- 16) = 24 MB/s +test misc::one_pass_long_prefix ... bench: 222 ns/iter (+/- 0) = 117 MB/s +test misc::one_pass_long_prefix_not ... bench: 223 ns/iter (+/- 0) = 116 MB/s +test misc::one_pass_short ... bench: 2,002 ns/iter (+/- 37) = 8 MB/s +test misc::one_pass_short_not ... bench: 1,990 ns/iter (+/- 6) = 8 MB/s +test misc::reallyhard2_1K ... bench: 1,335,845 ns/iter (+/- 6,233) +test misc::reallyhard_1K ... bench: 1,208,846 ns/iter (+/- 6,070) +test misc::reallyhard_1MB ... bench: 1,291,183,401 ns/iter (+/- 4,281,775) +test misc::reallyhard_32 ... bench: 36,521 ns/iter (+/- 157) = 1 MB/s +test misc::reallyhard_32K ... bench: 40,131,467 ns/iter (+/- 66,846) +test misc::reverse_suffix_no_quadratic ... bench: 506,352 ns/iter (+/- 632) = 15 MB/s +test regexdna::find_new_lines ... bench: 510,954,670 ns/iter (+/- 1,946,366) = 9 MB/s +test regexdna::subst1 ... bench: 198,786,137 ns/iter (+/- 240,963) = 25 MB/s +test regexdna::subst10 ... bench: 198,733,597 ns/iter (+/- 770,484) = 25 MB/s +test regexdna::subst11 ... bench: 198,734,922 ns/iter (+/- 198,116) = 25 MB/s +test regexdna::subst2 ... bench: 198,735,715 ns/iter (+/- 235,337) = 25 MB/s +test regexdna::subst3 ... bench: 198,736,727 ns/iter (+/- 157,633) = 25 MB/s +test regexdna::subst4 ... bench: 198,811,880 ns/iter (+/- 1,502,214) = 25 MB/s +test regexdna::subst5 ... bench: 198,697,281 ns/iter (+/- 211,978) = 25 MB/s +test regexdna::subst6 ... bench: 198,714,239 ns/iter (+/- 1,187,050) = 25 MB/s +test regexdna::subst7 ... bench: 199,021,730 ns/iter (+/- 1,555,969) = 25 MB/s +test regexdna::subst8 ... bench: 199,033,133 ns/iter (+/- 213,859) = 25 MB/s +test regexdna::subst9 ... bench: 199,466,527 ns/iter (+/- 1,394,750) = 25 MB/s +test regexdna::variant1 ... bench: 403,588,578 ns/iter (+/- 493,905) = 12 MB/s +test regexdna::variant2 ... bench: 440,582,945 ns/iter (+/- 305,836) = 11 MB/s +test regexdna::variant3 ... bench: 417,460,804 ns/iter (+/- 1,858,105) = 12 MB/s +test regexdna::variant4 ... bench: 407,209,088 ns/iter (+/- 1,374,513) = 12 MB/s +test regexdna::variant5 ... bench: 408,665,895 ns/iter (+/- 338,946) = 12 MB/s +test regexdna::variant6 ... bench: 408,640,565 ns/iter (+/- 1,895,287) = 12 MB/s +test regexdna::variant7 ... bench: 406,340,097 ns/iter (+/- 2,309,358) = 12 MB/s +test regexdna::variant8 ... bench: 413,195,331 ns/iter (+/- 2,178,194) = 12 MB/s +test regexdna::variant9 ... bench: 438,844,927 ns/iter (+/- 2,589,599) = 11 MB/s +test sherlock::before_after_holmes ... bench: 165,435,560 ns/iter (+/- 165,901) = 3 MB/s +test sherlock::before_holmes ... bench: 164,466,984 ns/iter (+/- 178,082) = 3 MB/s +test sherlock::everything_greedy ... bench: 34,680,745 ns/iter (+/- 862,671) = 17 MB/s +test sherlock::holmes_cochar_watson ... bench: 59,712,596 ns/iter (+/- 85,049) = 9 MB/s +test sherlock::ing_suffix ... bench: 135,611,524 ns/iter (+/- 383,869) = 4 MB/s +test sherlock::ing_suffix_limited_space ... bench: 73,398,446 ns/iter (+/- 112,893) = 8 MB/s +test sherlock::name_alt1 ... bench: 42,274,906 ns/iter (+/- 60,836) = 14 MB/s +test sherlock::name_alt2 ... bench: 42,159,449 ns/iter (+/- 56,642) = 14 MB/s +test sherlock::name_alt3 ... bench: 121,926,811 ns/iter (+/- 624,877) = 4 MB/s +test sherlock::name_alt4 ... bench: 58,912,788 ns/iter (+/- 101,576) = 10 MB/s +test sherlock::name_alt5 ... bench: 63,891,303 ns/iter (+/- 79,754) = 9 MB/s +test sherlock::name_holmes ... bench: 22,995,759 ns/iter (+/- 45,074) = 25 MB/s +test sherlock::name_sherlock ... bench: 23,024,135 ns/iter (+/- 86,982) = 25 MB/s +test sherlock::name_sherlock_holmes ... bench: 23,026,357 ns/iter (+/- 42,271) = 25 MB/s +test sherlock::name_whitespace ... bench: 32,485,572 ns/iter (+/- 77,736) = 18 MB/s +test sherlock::no_match_common ... bench: 23,544,207 ns/iter (+/- 590,037) = 25 MB/s +test sherlock::no_match_really_common ... bench: 23,543,480 ns/iter (+/- 51,838) = 25 MB/s +test sherlock::no_match_uncommon ... bench: 23,024,692 ns/iter (+/- 78,358) = 25 MB/s +test sherlock::quotes ... bench: 42,376,602 ns/iter (+/- 49,060) = 14 MB/s +test sherlock::repeated_class_negation ... bench: 92,701,274 ns/iter (+/- 208,063) = 6 MB/s +test sherlock::the_lower ... bench: 23,553,163 ns/iter (+/- 61,446) = 25 MB/s +test sherlock::the_upper ... bench: 23,281,951 ns/iter (+/- 35,811) = 25 MB/s +test sherlock::the_whitespace ... bench: 33,011,779 ns/iter (+/- 65,085) = 18 MB/s +test sherlock::word_ending_n ... bench: 64,965,762 ns/iter (+/- 106,103) = 9 MB/s +test sherlock::words ... bench: 47,466,153 ns/iter (+/- 773,222) = 12 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 82 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/tcl b/vendor/regex/record/old-bench-log/07/tcl new file mode 100644 index 0000000..0586935 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/tcl @@ -0,0 +1,94 @@ + +running 89 tests +test misc::anchored_literal_long_match ... bench: 452 ns/iter (+/- 6) = 862 MB/s +test misc::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 2) = 4239 MB/s +test misc::anchored_literal_short_match ... bench: 454 ns/iter (+/- 6) = 57 MB/s +test misc::anchored_literal_short_non_match ... bench: 92 ns/iter (+/- 1) = 282 MB/s +test misc::easy0_1K ... bench: 9,231 ns/iter (+/- 59) = 113 MB/s +test misc::easy0_1MB ... bench: 2,828,050 ns/iter (+/- 9,104) = 370 MB/s +test misc::easy0_32 ... bench: 6,527 ns/iter (+/- 78) = 9 MB/s +test misc::easy0_32K ... bench: 94,825 ns/iter (+/- 410) = 345 MB/s +test misc::easy1_1K ... bench: 5,420 ns/iter (+/- 54) = 192 MB/s +test misc::easy1_1MB ... bench: 2,823,597 ns/iter (+/- 8,534) = 371 MB/s +test misc::easy1_32 ... bench: 2,727 ns/iter (+/- 80) = 19 MB/s +test misc::easy1_32K ... bench: 93,382 ns/iter (+/- 108) = 351 MB/s +test misc::hard_1K ... bench: 12,046 ns/iter (+/- 88) = 87 MB/s +test misc::hard_1MB ... bench: 2,831,445 ns/iter (+/- 9,713) = 370 MB/s +test misc::hard_32 ... bench: 9,257 ns/iter (+/- 63) = 6 MB/s +test misc::hard_32K ... bench: 97,613 ns/iter (+/- 533) = 335 MB/s +test misc::literal ... bench: 398 ns/iter (+/- 14) = 128 MB/s +test misc::long_needle1 ... bench: 18,459,088 ns/iter (+/- 162,391) = 5 MB/s +test misc::long_needle2 ... bench: 18,390,595 ns/iter (+/- 96,143) = 5 MB/s +test misc::match_class ... bench: 480 ns/iter (+/- 1) = 168 MB/s +test misc::match_class_in_range ... bench: 477 ns/iter (+/- 10) = 169 MB/s +test misc::medium_1K ... bench: 9,573 ns/iter (+/- 94) = 109 MB/s +test misc::medium_1MB ... bench: 2,828,512 ns/iter (+/- 28,270) = 370 MB/s +test misc::medium_32 ... bench: 6,874 ns/iter (+/- 68) = 8 MB/s +test misc::medium_32K ... bench: 95,040 ns/iter (+/- 517) = 345 MB/s +test misc::no_exponential ... bench: 1,976,788 ns/iter (+/- 20,661) +test misc::not_literal ... bench: 1,548 ns/iter (+/- 15) = 32 MB/s +test misc::one_pass_long_prefix ... bench: 5,063 ns/iter (+/- 76) = 5 MB/s +test misc::one_pass_long_prefix_not ... bench: 4,933 ns/iter (+/- 62) = 5 MB/s +test misc::one_pass_short ... bench: 486 ns/iter (+/- 4) = 34 MB/s +test misc::one_pass_short_not ... bench: 579 ns/iter (+/- 3) = 29 MB/s +test misc::reallyhard2_1K ... bench: 88,153 ns/iter (+/- 2,317) = 11 MB/s +test misc::reallyhard_1K ... bench: 12,157 ns/iter (+/- 51) = 86 MB/s +test misc::reallyhard_1MB ... bench: 2,866,126 ns/iter (+/- 71,338) = 365 MB/s +test misc::reallyhard_32 ... bench: 9,321 ns/iter (+/- 138) = 6 MB/s +test misc::reallyhard_32K ... bench: 97,799 ns/iter (+/- 1,087) = 335 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 22,679 ns/iter (+/- 293) = 352 MB/s +test regexdna::find_new_lines ... bench: 38,700,951 ns/iter (+/- 105,197) = 131 MB/s +test regexdna::subst1 ... bench: 22,123,470 ns/iter (+/- 96,738) = 229 MB/s +test regexdna::subst10 ... bench: 22,125,412 ns/iter (+/- 65,856) = 229 MB/s +test regexdna::subst11 ... bench: 22,178,791 ns/iter (+/- 75,853) = 229 MB/s +test regexdna::subst2 ... bench: 22,348,278 ns/iter (+/- 228,790) = 227 MB/s +test regexdna::subst3 ... bench: 22,187,493 ns/iter (+/- 69,149) = 229 MB/s +test regexdna::subst4 ... bench: 22,134,373 ns/iter (+/- 71,979) = 229 MB/s +test regexdna::subst5 ... bench: 22,183,169 ns/iter (+/- 66,220) = 229 MB/s +test regexdna::subst6 ... bench: 22,263,432 ns/iter (+/- 91,605) = 228 MB/s +test regexdna::subst7 ... bench: 22,256,481 ns/iter (+/- 62,794) = 228 MB/s +test regexdna::subst8 ... bench: 22,134,314 ns/iter (+/- 75,199) = 229 MB/s +test regexdna::subst9 ... bench: 22,144,129 ns/iter (+/- 76,744) = 229 MB/s +test regexdna::variant1 ... bench: 13,846,793 ns/iter (+/- 33,520) = 367 MB/s +test regexdna::variant2 ... bench: 14,248,239 ns/iter (+/- 62,252) = 356 MB/s +test regexdna::variant3 ... bench: 15,702,520 ns/iter (+/- 339,738) = 323 MB/s +test regexdna::variant4 ... bench: 15,143,136 ns/iter (+/- 52,300) = 335 MB/s +test regexdna::variant5 ... bench: 16,324,698 ns/iter (+/- 50,942) = 311 MB/s +test regexdna::variant6 ... bench: 14,508,593 ns/iter (+/- 46,251) = 350 MB/s +test regexdna::variant7 ... bench: 14,443,485 ns/iter (+/- 80,444) = 351 MB/s +test regexdna::variant8 ... bench: 14,430,571 ns/iter (+/- 63,143) = 352 MB/s +test regexdna::variant9 ... bench: 14,883,129 ns/iter (+/- 76,837) = 341 MB/s +test sherlock::before_after_holmes ... bench: 2,227,807 ns/iter (+/- 9,119) = 267 MB/s +test sherlock::before_holmes ... bench: 2,700,579 ns/iter (+/- 24,875) = 220 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,211,847 ns/iter (+/- 15,027) = 268 MB/s +test sherlock::ing_suffix ... bench: 4,398,150 ns/iter (+/- 27,219) = 135 MB/s +test sherlock::ing_suffix_limited_space ... bench: 17,992,130 ns/iter (+/- 457,978) = 33 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 1,845,704 ns/iter (+/- 9,382) = 322 MB/s +test sherlock::name_alt1 ... bench: 1,890,373 ns/iter (+/- 9,971) = 314 MB/s +test sherlock::name_alt2 ... bench: 2,626,524 ns/iter (+/- 18,261) = 226 MB/s +test sherlock::name_alt3 ... bench: 4,468,643 ns/iter (+/- 11,946) = 133 MB/s +test sherlock::name_alt3_nocase ... bench: 7,226,342 ns/iter (+/- 57,220) = 82 MB/s +test sherlock::name_alt4 ... bench: 2,395,105 ns/iter (+/- 31,101) = 248 MB/s +test sherlock::name_alt4_nocase ... bench: 2,895,153 ns/iter (+/- 12,446) = 205 MB/s +test sherlock::name_alt5 ... bench: 3,253,560 ns/iter (+/- 33,725) = 182 MB/s +test sherlock::name_alt5_nocase ... bench: 4,008,656 ns/iter (+/- 39,415) = 148 MB/s +test sherlock::name_holmes ... bench: 2,076,117 ns/iter (+/- 6,376) = 286 MB/s +test sherlock::name_holmes_nocase ... bench: 2,157,634 ns/iter (+/- 6,494) = 275 MB/s +test sherlock::name_sherlock ... bench: 1,757,317 ns/iter (+/- 5,935) = 338 MB/s +test sherlock::name_sherlock_holmes ... bench: 1,897,004 ns/iter (+/- 12,012) = 313 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,939,722 ns/iter (+/- 6,273) = 306 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,801,334 ns/iter (+/- 3,179) = 330 MB/s +test sherlock::name_whitespace ... bench: 1,910,996 ns/iter (+/- 6,429) = 311 MB/s +test sherlock::no_match_common ... bench: 1,601,431 ns/iter (+/- 7,131) = 371 MB/s +test sherlock::no_match_really_common ... bench: 1,601,153 ns/iter (+/- 4,375) = 371 MB/s +test sherlock::no_match_uncommon ... bench: 1,600,840 ns/iter (+/- 8,348) = 371 MB/s +test sherlock::quotes ... bench: 7,620,650 ns/iter (+/- 48,467) = 78 MB/s +test sherlock::repeated_class_negation ... bench: 55,564,521 ns/iter (+/- 210,324) = 10 MB/s +test sherlock::the_lower ... bench: 5,628,558 ns/iter (+/- 19,934) = 105 MB/s +test sherlock::the_nocase ... bench: 6,063,195 ns/iter (+/- 28,534) = 98 MB/s +test sherlock::the_upper ... bench: 1,992,703 ns/iter (+/- 6,736) = 298 MB/s +test sherlock::the_whitespace ... bench: 7,159,423 ns/iter (+/- 38,306) = 83 MB/s +test sherlock::words ... bench: 38,358,421 ns/iter (+/- 99,230) = 15 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 89 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-01 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-01 new file mode 100644 index 0000000..521e935 --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 2) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 2) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 0) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 2) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 40 ns/iter (+/- 2) = 26100 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 5) = 23831727 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 5) = 1333 MB/s +test misc::easy1_32K ... bench: 41 ns/iter (+/- 3) = 799707 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 7) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 6) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 7) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 2) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 2) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 1) = 4250 MB/s +test misc::long_needle1 ... bench: 3,252 ns/iter (+/- 168) = 30750 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 34,074) = 281 MB/s +test misc::match_class ... bench: 67 ns/iter (+/- 2) = 1208 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 256 ns/iter (+/- 36) = 628 MB/s +test misc::matches_set ... bench: 458 ns/iter (+/- 65) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 1) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 0) = 2186400 MB/s +test misc::no_exponential ... bench: 406 ns/iter (+/- 32) = 246 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 53 ns/iter (+/- 1) = 490 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_short ... bench: 37 ns/iter (+/- 1) = 459 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 5) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,591 ns/iter (+/- 227) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,602 ns/iter (+/- 204,573) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 7) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,327 ns/iter (+/- 4,812) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 13) +test misc::reverse_suffix_no_quadratic ... bench: 4,190 ns/iter (+/- 581) = 1909 MB/s +test misc::short_haystack_1000000x ... bench: 132,982 ns/iter (+/- 18,045) = 60158 MB/s +test misc::short_haystack_100000x ... bench: 14,720 ns/iter (+/- 946) = 54348 MB/s +test misc::short_haystack_10000x ... bench: 5,993 ns/iter (+/- 381) = 13350 MB/s +test misc::short_haystack_1000x ... bench: 476 ns/iter (+/- 58) = 16829 MB/s +test misc::short_haystack_100x ... bench: 227 ns/iter (+/- 22) = 3572 MB/s +test misc::short_haystack_10x ... bench: 211 ns/iter (+/- 13) = 431 MB/s +test misc::short_haystack_1x ... bench: 204 ns/iter (+/- 29) = 93 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 7) = 131 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 16) = 165 MB/s +test misc::short_haystack_4x ... bench: 207 ns/iter (+/- 29) = 207 MB/s +test regexdna::find_new_lines ... bench: 12,053,740 ns/iter (+/- 393,644) = 421 MB/s +test regexdna::subst1 ... bench: 786,112 ns/iter (+/- 91,136) = 6466 MB/s +test regexdna::subst10 ... bench: 831,353 ns/iter (+/- 67,293) = 6114 MB/s +test regexdna::subst11 ... bench: 784,021 ns/iter (+/- 28,112) = 6483 MB/s +test regexdna::subst2 ... bench: 785,838 ns/iter (+/- 108,510) = 6468 MB/s +test regexdna::subst3 ... bench: 791,789 ns/iter (+/- 37,364) = 6420 MB/s +test regexdna::subst4 ... bench: 784,224 ns/iter (+/- 23,802) = 6482 MB/s +test regexdna::subst5 ... bench: 788,368 ns/iter (+/- 75,171) = 6448 MB/s +test regexdna::subst6 ... bench: 784,730 ns/iter (+/- 48,594) = 6477 MB/s +test regexdna::subst7 ... bench: 788,067 ns/iter (+/- 88,333) = 6450 MB/s +test regexdna::subst8 ... bench: 810,784 ns/iter (+/- 111,836) = 6269 MB/s +test regexdna::subst9 ... bench: 788,854 ns/iter (+/- 66,496) = 6444 MB/s +test regexdna::variant1 ... bench: 2,238,677 ns/iter (+/- 144,752) = 2270 MB/s +test regexdna::variant2 ... bench: 3,258,761 ns/iter (+/- 205,012) = 1559 MB/s +test regexdna::variant3 ... bench: 3,818,146 ns/iter (+/- 254,877) = 1331 MB/s +test regexdna::variant4 ... bench: 3,837,323 ns/iter (+/- 349,373) = 1324 MB/s +test regexdna::variant5 ... bench: 2,698,901 ns/iter (+/- 111,145) = 1883 MB/s +test regexdna::variant6 ... bench: 2,687,854 ns/iter (+/- 184,039) = 1891 MB/s +test regexdna::variant7 ... bench: 3,291,211 ns/iter (+/- 220,992) = 1544 MB/s +test regexdna::variant8 ... bench: 3,359,262 ns/iter (+/- 185,610) = 1513 MB/s +test regexdna::variant9 ... bench: 3,293,953 ns/iter (+/- 245,454) = 1543 MB/s +test rust_compile::compile_huge ... bench: 95,142 ns/iter (+/- 10,195) +test rust_compile::compile_huge_bytes ... bench: 5,650,680 ns/iter (+/- 252,936) +test rust_compile::compile_huge_full ... bench: 10,867,986 ns/iter (+/- 275,259) +test rust_compile::compile_simple ... bench: 3,751 ns/iter (+/- 310) +test rust_compile::compile_simple_bytes ... bench: 3,664 ns/iter (+/- 172) +test rust_compile::compile_simple_full ... bench: 22,078 ns/iter (+/- 3,259) +test rust_compile::compile_small ... bench: 8,499 ns/iter (+/- 942) +test rust_compile::compile_small_bytes ... bench: 151,196 ns/iter (+/- 16,322) +test rust_compile::compile_small_full ... bench: 309,597 ns/iter (+/- 32,622) +test sherlock::before_after_holmes ... bench: 917,591 ns/iter (+/- 55,643) = 648 MB/s +test sherlock::before_holmes ... bench: 62,726 ns/iter (+/- 8,861) = 9484 MB/s +test sherlock::everything_greedy ... bench: 2,036,050 ns/iter (+/- 152,461) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,690 ns/iter (+/- 71,089) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,258 ns/iter (+/- 8,294) = 5598 MB/s +test sherlock::holmes_coword_watson ... bench: 481,086 ns/iter (+/- 60,212) = 1236 MB/s +test sherlock::ing_suffix ... bench: 322,033 ns/iter (+/- 8,912) = 1847 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,523 ns/iter (+/- 89,630) = 557 MB/s +test sherlock::letters ... bench: 22,745,932 ns/iter (+/- 428,787) = 26 MB/s +test sherlock::letters_lower ... bench: 22,228,365 ns/iter (+/- 495,287) = 26 MB/s +test sherlock::letters_upper ... bench: 1,775,941 ns/iter (+/- 158,985) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,327 ns/iter (+/- 49,085) = 663 MB/s +test sherlock::name_alt1 ... bench: 32,008 ns/iter (+/- 4,011) = 18587 MB/s +test sherlock::name_alt2 ... bench: 86,850 ns/iter (+/- 5,463) = 6850 MB/s +test sherlock::name_alt3 ... bench: 98,359 ns/iter (+/- 14,052) = 6048 MB/s +test sherlock::name_alt3_nocase ... bench: 381,147 ns/iter (+/- 16,996) = 1560 MB/s +test sherlock::name_alt4 ... bench: 121,025 ns/iter (+/- 16,654) = 4915 MB/s +test sherlock::name_alt4_nocase ... bench: 188,972 ns/iter (+/- 26,145) = 3148 MB/s +test sherlock::name_alt5 ... bench: 91,832 ns/iter (+/- 6,188) = 6478 MB/s +test sherlock::name_alt5_nocase ... bench: 351,422 ns/iter (+/- 49,084) = 1692 MB/s +test sherlock::name_holmes ... bench: 33,405 ns/iter (+/- 3,113) = 17809 MB/s +test sherlock::name_holmes_nocase ... bench: 134,899 ns/iter (+/- 10,883) = 4410 MB/s +test sherlock::name_sherlock ... bench: 22,455 ns/iter (+/- 2,027) = 26494 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,283 ns/iter (+/- 2,281) = 26698 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,492 ns/iter (+/- 6,496) = 6102 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,627 ns/iter (+/- 8,442) = 6221 MB/s +test sherlock::name_whitespace ... bench: 30,702 ns/iter (+/- 4,194) = 19377 MB/s +test sherlock::no_match_common ... bench: 19,616 ns/iter (+/- 2,677) = 30328 MB/s +test sherlock::no_match_really_common ... bench: 25,601 ns/iter (+/- 2,506) = 23238 MB/s +test sherlock::no_match_uncommon ... bench: 19,641 ns/iter (+/- 2,175) = 30290 MB/s +test sherlock::quotes ... bench: 369,048 ns/iter (+/- 25,898) = 1612 MB/s +test sherlock::repeated_class_negation ... bench: 75,780,396 ns/iter (+/- 1,032,817) = 7 MB/s +test sherlock::the_lower ... bench: 327,762 ns/iter (+/- 48,769) = 1815 MB/s +test sherlock::the_nocase ... bench: 532,075 ns/iter (+/- 40,117) = 1118 MB/s +test sherlock::the_upper ... bench: 45,197 ns/iter (+/- 1,621) = 13163 MB/s +test sherlock::the_whitespace ... bench: 819,239 ns/iter (+/- 81,388) = 726 MB/s +test sherlock::word_ending_n ... bench: 1,716,625 ns/iter (+/- 120,247) = 346 MB/s +test sherlock::words ... bench: 8,690,764 ns/iter (+/- 322,915) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 114.31s + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-02 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-02 new file mode 100644 index 0000000..60d0578 --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 2) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 1) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 2) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 1) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 1) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 1) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 4) = 26769 MB/s +test misc::easy1_1MB ... bench: 43 ns/iter (+/- 3) = 24385953 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 4) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 3) = 840717 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 5) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 7) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 5) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 6) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 4) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 0) = 4250 MB/s +test misc::long_needle1 ... bench: 3,251 ns/iter (+/- 333) = 30760 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 24,612) = 281 MB/s +test misc::match_class ... bench: 66 ns/iter (+/- 1) = 1227 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 1) = 5785 MB/s +test misc::match_class_unicode ... bench: 254 ns/iter (+/- 25) = 633 MB/s +test misc::matches_set ... bench: 456 ns/iter (+/- 17) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 2) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 2) = 2186400 MB/s +test misc::no_exponential ... bench: 403 ns/iter (+/- 55) = 248 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 5) = 509 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 1) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 148) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,299 ns/iter (+/- 142,145) = 665 MB/s +test misc::reallyhard_32 ... bench: 103 ns/iter (+/- 8) = 572 MB/s +test misc::reallyhard_32K ... bench: 49,326 ns/iter (+/- 3,202) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 16) +test misc::reverse_suffix_no_quadratic ... bench: 4,168 ns/iter (+/- 227) = 1919 MB/s +test misc::short_haystack_1000000x ... bench: 132,733 ns/iter (+/- 18,141) = 60271 MB/s +test misc::short_haystack_100000x ... bench: 14,468 ns/iter (+/- 1,777) = 55295 MB/s +test misc::short_haystack_10000x ... bench: 6,316 ns/iter (+/- 360) = 12667 MB/s +test misc::short_haystack_1000x ... bench: 474 ns/iter (+/- 69) = 16900 MB/s +test misc::short_haystack_100x ... bench: 229 ns/iter (+/- 32) = 3541 MB/s +test misc::short_haystack_10x ... bench: 212 ns/iter (+/- 18) = 429 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 28) = 92 MB/s +test misc::short_haystack_2x ... bench: 207 ns/iter (+/- 20) = 130 MB/s +test misc::short_haystack_3x ... bench: 213 ns/iter (+/- 7) = 164 MB/s +test misc::short_haystack_4x ... bench: 208 ns/iter (+/- 9) = 206 MB/s +test regexdna::find_new_lines ... bench: 12,050,847 ns/iter (+/- 346,484) = 421 MB/s +test regexdna::subst1 ... bench: 817,689 ns/iter (+/- 104,629) = 6216 MB/s +test regexdna::subst10 ... bench: 788,728 ns/iter (+/- 66,497) = 6445 MB/s +test regexdna::subst11 ... bench: 787,188 ns/iter (+/- 49,158) = 6457 MB/s +test regexdna::subst2 ... bench: 787,143 ns/iter (+/- 108,541) = 6458 MB/s +test regexdna::subst3 ... bench: 792,452 ns/iter (+/- 32,963) = 6414 MB/s +test regexdna::subst4 ... bench: 820,043 ns/iter (+/- 71,037) = 6198 MB/s +test regexdna::subst5 ... bench: 790,043 ns/iter (+/- 39,234) = 6434 MB/s +test regexdna::subst6 ... bench: 785,007 ns/iter (+/- 18,701) = 6475 MB/s +test regexdna::subst7 ... bench: 789,393 ns/iter (+/- 51,525) = 6439 MB/s +test regexdna::subst8 ... bench: 784,190 ns/iter (+/- 90,675) = 6482 MB/s +test regexdna::subst9 ... bench: 789,021 ns/iter (+/- 88,256) = 6442 MB/s +test regexdna::variant1 ... bench: 2,237,592 ns/iter (+/- 146,174) = 2271 MB/s +test regexdna::variant2 ... bench: 3,255,382 ns/iter (+/- 179,473) = 1561 MB/s +test regexdna::variant3 ... bench: 3,812,799 ns/iter (+/- 210,786) = 1333 MB/s +test regexdna::variant4 ... bench: 3,853,476 ns/iter (+/- 263,442) = 1319 MB/s +test regexdna::variant5 ... bench: 2,696,756 ns/iter (+/- 161,353) = 1885 MB/s +test regexdna::variant6 ... bench: 2,683,221 ns/iter (+/- 149,650) = 1894 MB/s +test regexdna::variant7 ... bench: 3,289,426 ns/iter (+/- 209,217) = 1545 MB/s +test regexdna::variant8 ... bench: 3,362,858 ns/iter (+/- 274,273) = 1511 MB/s +test regexdna::variant9 ... bench: 3,287,253 ns/iter (+/- 188,894) = 1546 MB/s +test rust_compile::compile_huge ... bench: 94,912 ns/iter (+/- 12,311) +test rust_compile::compile_huge_bytes ... bench: 5,534,281 ns/iter (+/- 192,069) +test rust_compile::compile_huge_full ... bench: 10,969,970 ns/iter (+/- 312,230) +test rust_compile::compile_simple ... bench: 3,523 ns/iter (+/- 525) +test rust_compile::compile_simple_bytes ... bench: 3,564 ns/iter (+/- 355) +test rust_compile::compile_simple_full ... bench: 19,887 ns/iter (+/- 1,885) +test rust_compile::compile_small ... bench: 8,294 ns/iter (+/- 1,123) +test rust_compile::compile_small_bytes ... bench: 153,070 ns/iter (+/- 20,825) +test rust_compile::compile_small_full ... bench: 313,318 ns/iter (+/- 28,271) +test sherlock::before_after_holmes ... bench: 907,585 ns/iter (+/- 86,027) = 655 MB/s +test sherlock::before_holmes ... bench: 62,765 ns/iter (+/- 6,413) = 9478 MB/s +test sherlock::everything_greedy ... bench: 2,033,519 ns/iter (+/- 97,963) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,514 ns/iter (+/- 48,247) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 107,788 ns/iter (+/- 15,545) = 5519 MB/s +test sherlock::holmes_coword_watson ... bench: 482,686 ns/iter (+/- 49,033) = 1232 MB/s +test sherlock::ing_suffix ... bench: 322,901 ns/iter (+/- 46,329) = 1842 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,799 ns/iter (+/- 57,022) = 557 MB/s +test sherlock::letters ... bench: 22,823,246 ns/iter (+/- 472,094) = 26 MB/s +test sherlock::letters_lower ... bench: 22,137,278 ns/iter (+/- 443,188) = 26 MB/s +test sherlock::letters_upper ... bench: 1,773,598 ns/iter (+/- 96,994) = 335 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,623 ns/iter (+/- 48,509) = 662 MB/s +test sherlock::name_alt1 ... bench: 31,882 ns/iter (+/- 3,354) = 18660 MB/s +test sherlock::name_alt2 ... bench: 86,500 ns/iter (+/- 7,997) = 6877 MB/s +test sherlock::name_alt3 ... bench: 98,159 ns/iter (+/- 6,106) = 6060 MB/s +test sherlock::name_alt3_nocase ... bench: 383,858 ns/iter (+/- 19,224) = 1549 MB/s +test sherlock::name_alt4 ... bench: 122,489 ns/iter (+/- 17,271) = 4857 MB/s +test sherlock::name_alt4_nocase ... bench: 192,081 ns/iter (+/- 10,999) = 3097 MB/s +test sherlock::name_alt5 ... bench: 91,396 ns/iter (+/- 6,399) = 6509 MB/s +test sherlock::name_alt5_nocase ... bench: 354,804 ns/iter (+/- 26,158) = 1676 MB/s +test sherlock::name_holmes ... bench: 33,569 ns/iter (+/- 4,647) = 17722 MB/s +test sherlock::name_holmes_nocase ... bench: 136,387 ns/iter (+/- 14,005) = 4362 MB/s +test sherlock::name_sherlock ... bench: 22,468 ns/iter (+/- 1,144) = 26479 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,279 ns/iter (+/- 1,563) = 26703 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 98,003 ns/iter (+/- 10,978) = 6070 MB/s +test sherlock::name_sherlock_nocase ... bench: 96,130 ns/iter (+/- 4,373) = 6188 MB/s +test sherlock::name_whitespace ... bench: 30,532 ns/iter (+/- 3,125) = 19485 MB/s +test sherlock::no_match_common ... bench: 19,644 ns/iter (+/- 2,118) = 30285 MB/s +test sherlock::no_match_really_common ... bench: 25,374 ns/iter (+/- 1,538) = 23446 MB/s +test sherlock::no_match_uncommon ... bench: 19,602 ns/iter (+/- 427) = 30350 MB/s +test sherlock::quotes ... bench: 369,657 ns/iter (+/- 52,406) = 1609 MB/s +test sherlock::repeated_class_negation ... bench: 76,922,839 ns/iter (+/- 1,261,770) = 7 MB/s +test sherlock::the_lower ... bench: 326,221 ns/iter (+/- 35,683) = 1823 MB/s +test sherlock::the_nocase ... bench: 525,254 ns/iter (+/- 26,000) = 1132 MB/s +test sherlock::the_upper ... bench: 44,702 ns/iter (+/- 5,012) = 13308 MB/s +test sherlock::the_whitespace ... bench: 814,494 ns/iter (+/- 66,715) = 730 MB/s +test sherlock::word_ending_n ... bench: 1,705,139 ns/iter (+/- 97,420) = 348 MB/s +test sherlock::words ... bench: 8,632,437 ns/iter (+/- 278,177) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 106.01s + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-01 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-01 new file mode 100644 index 0000000..1316e6d --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 17 ns/iter (+/- 1) = 22941 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 1) = 87583 MB/s +test misc::easy0_1MB ... bench: 15 ns/iter (+/- 0) = 69906866 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 2) = 2732916 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 5) = 26769 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 6) = 26214900 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 3) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 5) = 840717 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 1) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 2) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 2) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 3) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 1) = 4636 MB/s +test misc::long_needle1 ... bench: 1,161 ns/iter (+/- 54) = 86133 MB/s +test misc::long_needle2 ... bench: 680,687 ns/iter (+/- 63,713) = 146 MB/s +test misc::match_class ... bench: 69 ns/iter (+/- 4) = 1173 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 0) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 9) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 0) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 9) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 2) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 4) = 472 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 2) = 435 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 225) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,580,163 ns/iter (+/- 224,935) = 663 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 6) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,318 ns/iter (+/- 6,046) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 14) +test misc::reverse_suffix_no_quadratic ... bench: 4,240 ns/iter (+/- 117) = 1886 MB/s +test misc::short_haystack_1000000x ... bench: 89,004 ns/iter (+/- 2,927) = 89883 MB/s +test misc::short_haystack_100000x ... bench: 10,349 ns/iter (+/- 334) = 77303 MB/s +test misc::short_haystack_10000x ... bench: 5,835 ns/iter (+/- 700) = 13712 MB/s +test misc::short_haystack_1000x ... bench: 563 ns/iter (+/- 33) = 14229 MB/s +test misc::short_haystack_100x ... bench: 260 ns/iter (+/- 21) = 3119 MB/s +test misc::short_haystack_10x ... bench: 221 ns/iter (+/- 31) = 411 MB/s +test misc::short_haystack_1x ... bench: 211 ns/iter (+/- 30) = 90 MB/s +test misc::short_haystack_2x ... bench: 213 ns/iter (+/- 19) = 126 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 7) = 165 MB/s +test misc::short_haystack_4x ... bench: 221 ns/iter (+/- 26) = 194 MB/s +test regexdna::find_new_lines ... bench: 12,035,248 ns/iter (+/- 362,122) = 422 MB/s +test regexdna::subst1 ... bench: 787,853 ns/iter (+/- 29,667) = 6452 MB/s +test regexdna::subst10 ... bench: 750,718 ns/iter (+/- 103,118) = 6771 MB/s +test regexdna::subst11 ... bench: 749,377 ns/iter (+/- 103,312) = 6783 MB/s +test regexdna::subst2 ... bench: 748,785 ns/iter (+/- 83,175) = 6788 MB/s +test regexdna::subst3 ... bench: 755,004 ns/iter (+/- 75,589) = 6732 MB/s +test regexdna::subst4 ... bench: 747,617 ns/iter (+/- 70,600) = 6799 MB/s +test regexdna::subst5 ... bench: 752,458 ns/iter (+/- 86,154) = 6755 MB/s +test regexdna::subst6 ... bench: 749,801 ns/iter (+/- 102,642) = 6779 MB/s +test regexdna::subst7 ... bench: 760,975 ns/iter (+/- 105,159) = 6680 MB/s +test regexdna::subst8 ... bench: 749,002 ns/iter (+/- 82,082) = 6786 MB/s +test regexdna::subst9 ... bench: 751,248 ns/iter (+/- 100,152) = 6766 MB/s +test regexdna::variant1 ... bench: 2,211,035 ns/iter (+/- 150,147) = 2299 MB/s +test regexdna::variant2 ... bench: 3,210,193 ns/iter (+/- 161,942) = 1583 MB/s +test regexdna::variant3 ... bench: 3,793,641 ns/iter (+/- 203,795) = 1339 MB/s +test regexdna::variant4 ... bench: 3,799,721 ns/iter (+/- 140,933) = 1337 MB/s +test regexdna::variant5 ... bench: 2,652,750 ns/iter (+/- 185,489) = 1916 MB/s +test regexdna::variant6 ... bench: 2,633,257 ns/iter (+/- 211,323) = 1930 MB/s +test regexdna::variant7 ... bench: 3,268,111 ns/iter (+/- 176,273) = 1555 MB/s +test regexdna::variant8 ... bench: 3,331,333 ns/iter (+/- 264,431) = 1525 MB/s +test regexdna::variant9 ... bench: 3,268,398 ns/iter (+/- 298,223) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,562 ns/iter (+/- 2,194) +test rust_compile::compile_huge_bytes ... bench: 5,611,428 ns/iter (+/- 202,365) +test rust_compile::compile_huge_full ... bench: 10,933,505 ns/iter (+/- 325,078) +test rust_compile::compile_simple ... bench: 3,496 ns/iter (+/- 156) +test rust_compile::compile_simple_bytes ... bench: 3,572 ns/iter (+/- 389) +test rust_compile::compile_simple_full ... bench: 20,283 ns/iter (+/- 1,894) +test rust_compile::compile_small ... bench: 8,475 ns/iter (+/- 1,008) +test rust_compile::compile_small_bytes ... bench: 157,446 ns/iter (+/- 11,319) +test rust_compile::compile_small_full ... bench: 316,041 ns/iter (+/- 23,620) +test sherlock::before_after_holmes ... bench: 906,578 ns/iter (+/- 129,507) = 656 MB/s +test sherlock::before_holmes ... bench: 64,715 ns/iter (+/- 9,107) = 9193 MB/s +test sherlock::everything_greedy ... bench: 2,065,017 ns/iter (+/- 156,855) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,672 ns/iter (+/- 100,547) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,124 ns/iter (+/- 10,948) = 5606 MB/s +test sherlock::holmes_coword_watson ... bench: 488,503 ns/iter (+/- 63,243) = 1217 MB/s +test sherlock::ing_suffix ... bench: 384,936 ns/iter (+/- 25,316) = 1545 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,294 ns/iter (+/- 152,263) = 561 MB/s +test sherlock::letters ... bench: 22,127,059 ns/iter (+/- 413,502) = 26 MB/s +test sherlock::letters_lower ... bench: 21,535,012 ns/iter (+/- 463,835) = 27 MB/s +test sherlock::letters_upper ... bench: 1,758,480 ns/iter (+/- 130,352) = 338 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,329 ns/iter (+/- 96,625) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,585 ns/iter (+/- 2,796) = 18835 MB/s +test sherlock::name_alt2 ... bench: 86,223 ns/iter (+/- 9,553) = 6899 MB/s +test sherlock::name_alt3 ... bench: 97,177 ns/iter (+/- 11,479) = 6122 MB/s +test sherlock::name_alt3_nocase ... bench: 381,511 ns/iter (+/- 55,025) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,672 ns/iter (+/- 9,253) = 4889 MB/s +test sherlock::name_alt4_nocase ... bench: 187,887 ns/iter (+/- 26,932) = 3166 MB/s +test sherlock::name_alt5 ... bench: 90,732 ns/iter (+/- 7,251) = 6557 MB/s +test sherlock::name_alt5_nocase ... bench: 352,388 ns/iter (+/- 50,408) = 1688 MB/s +test sherlock::name_holmes ... bench: 33,836 ns/iter (+/- 3,388) = 17582 MB/s +test sherlock::name_holmes_nocase ... bench: 133,068 ns/iter (+/- 7,602) = 4470 MB/s +test sherlock::name_sherlock ... bench: 62,719 ns/iter (+/- 8,927) = 9485 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,688 ns/iter (+/- 2,482) = 24098 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,793 ns/iter (+/- 12,078) = 6083 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,772 ns/iter (+/- 13,713) = 6211 MB/s +test sherlock::name_whitespace ... bench: 70,942 ns/iter (+/- 5,565) = 8386 MB/s +test sherlock::no_match_common ... bench: 14,645 ns/iter (+/- 1,430) = 40623 MB/s +test sherlock::no_match_really_common ... bench: 239,346 ns/iter (+/- 17,292) = 2485 MB/s +test sherlock::no_match_uncommon ... bench: 14,637 ns/iter (+/- 1,360) = 40645 MB/s +test sherlock::quotes ... bench: 367,945 ns/iter (+/- 35,370) = 1616 MB/s +test sherlock::repeated_class_negation ... bench: 74,367,046 ns/iter (+/- 1,114,875) = 7 MB/s +test sherlock::the_lower ... bench: 463,888 ns/iter (+/- 67,551) = 1282 MB/s +test sherlock::the_nocase ... bench: 520,822 ns/iter (+/- 76,131) = 1142 MB/s +test sherlock::the_upper ... bench: 37,354 ns/iter (+/- 4,110) = 15926 MB/s +test sherlock::the_whitespace ... bench: 922,312 ns/iter (+/- 95,082) = 645 MB/s +test sherlock::word_ending_n ... bench: 1,679,343 ns/iter (+/- 165,580) = 354 MB/s +test sherlock::words ... bench: 8,280,082 ns/iter (+/- 290,280) = 71 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 113.49s + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-02 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-02 new file mode 100644 index 0000000..5d75102 --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 17 ns/iter (+/- 0) = 22941 MB/s +test misc::anchored_literal_long_non_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 0) = 87583 MB/s +test misc::easy0_1MB ... bench: 14 ns/iter (+/- 1) = 74900214 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 1) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 1) = 2732916 MB/s +test misc::easy1_1K ... bench: 38 ns/iter (+/- 5) = 27473 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 5) = 26214900 MB/s +test misc::easy1_32 ... bench: 38 ns/iter (+/- 1) = 1368 MB/s +test misc::easy1_32K ... bench: 38 ns/iter (+/- 1) = 862842 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 4) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 7) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 1) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 6) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 0) = 4636 MB/s +test misc::long_needle1 ... bench: 1,179 ns/iter (+/- 92) = 84818 MB/s +test misc::long_needle2 ... bench: 680,418 ns/iter (+/- 27,142) = 146 MB/s +test misc::match_class ... bench: 68 ns/iter (+/- 6) = 1191 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 1) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 33) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 1) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 12) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 6) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 2) = 472 MB/s +test misc::one_pass_short_not ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 64) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,581,975 ns/iter (+/- 126,709) = 662 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 4) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,323 ns/iter (+/- 7,063) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 4,171 ns/iter (+/- 624) = 1918 MB/s +test misc::short_haystack_1000000x ... bench: 88,960 ns/iter (+/- 7,710) = 89928 MB/s +test misc::short_haystack_100000x ... bench: 10,193 ns/iter (+/- 952) = 78486 MB/s +test misc::short_haystack_10000x ... bench: 5,798 ns/iter (+/- 636) = 13799 MB/s +test misc::short_haystack_1000x ... bench: 418 ns/iter (+/- 60) = 19165 MB/s +test misc::short_haystack_100x ... bench: 258 ns/iter (+/- 21) = 3143 MB/s +test misc::short_haystack_10x ... bench: 216 ns/iter (+/- 21) = 421 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 29) = 92 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 22) = 131 MB/s +test misc::short_haystack_3x ... bench: 205 ns/iter (+/- 29) = 170 MB/s +test misc::short_haystack_4x ... bench: 214 ns/iter (+/- 6) = 200 MB/s +test regexdna::find_new_lines ... bench: 12,039,715 ns/iter (+/- 410,515) = 422 MB/s +test regexdna::subst1 ... bench: 750,454 ns/iter (+/- 65,358) = 6773 MB/s +test regexdna::subst10 ... bench: 748,321 ns/iter (+/- 93,416) = 6793 MB/s +test regexdna::subst11 ... bench: 747,906 ns/iter (+/- 92,141) = 6796 MB/s +test regexdna::subst2 ... bench: 755,082 ns/iter (+/- 88,044) = 6732 MB/s +test regexdna::subst3 ... bench: 753,496 ns/iter (+/- 70,987) = 6746 MB/s +test regexdna::subst4 ... bench: 747,103 ns/iter (+/- 102,992) = 6804 MB/s +test regexdna::subst5 ... bench: 750,805 ns/iter (+/- 72,572) = 6770 MB/s +test regexdna::subst6 ... bench: 748,419 ns/iter (+/- 47,272) = 6792 MB/s +test regexdna::subst7 ... bench: 752,556 ns/iter (+/- 95,329) = 6754 MB/s +test regexdna::subst8 ... bench: 756,009 ns/iter (+/- 78,049) = 6724 MB/s +test regexdna::subst9 ... bench: 749,278 ns/iter (+/- 70,259) = 6784 MB/s +test regexdna::variant1 ... bench: 2,215,182 ns/iter (+/- 114,543) = 2294 MB/s +test regexdna::variant2 ... bench: 3,207,983 ns/iter (+/- 184,419) = 1584 MB/s +test regexdna::variant3 ... bench: 3,791,716 ns/iter (+/- 192,185) = 1340 MB/s +test regexdna::variant4 ... bench: 3,809,934 ns/iter (+/- 222,872) = 1334 MB/s +test regexdna::variant5 ... bench: 2,651,345 ns/iter (+/- 183,673) = 1917 MB/s +test regexdna::variant6 ... bench: 2,635,566 ns/iter (+/- 170,288) = 1928 MB/s +test regexdna::variant7 ... bench: 3,265,519 ns/iter (+/- 234,923) = 1556 MB/s +test regexdna::variant8 ... bench: 3,340,830 ns/iter (+/- 183,129) = 1521 MB/s +test regexdna::variant9 ... bench: 3,267,141 ns/iter (+/- 185,543) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,368 ns/iter (+/- 13,293) +test rust_compile::compile_huge_bytes ... bench: 5,616,594 ns/iter (+/- 243,462) +test rust_compile::compile_huge_full ... bench: 10,862,100 ns/iter (+/- 260,207) +test rust_compile::compile_simple ... bench: 3,463 ns/iter (+/- 350) +test rust_compile::compile_simple_bytes ... bench: 3,542 ns/iter (+/- 504) +test rust_compile::compile_simple_full ... bench: 20,562 ns/iter (+/- 3,117) +test rust_compile::compile_small ... bench: 8,325 ns/iter (+/- 641) +test rust_compile::compile_small_bytes ... bench: 153,450 ns/iter (+/- 11,174) +test rust_compile::compile_small_full ... bench: 315,871 ns/iter (+/- 33,828) +test sherlock::before_after_holmes ... bench: 906,423 ns/iter (+/- 34,801) = 656 MB/s +test sherlock::before_holmes ... bench: 64,457 ns/iter (+/- 8,343) = 9229 MB/s +test sherlock::everything_greedy ... bench: 2,058,675 ns/iter (+/- 208,885) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,638 ns/iter (+/- 39,955) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,048 ns/iter (+/- 8,158) = 5610 MB/s +test sherlock::holmes_coword_watson ... bench: 482,243 ns/iter (+/- 30,955) = 1233 MB/s +test sherlock::ing_suffix ... bench: 385,767 ns/iter (+/- 24,902) = 1542 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,762 ns/iter (+/- 94,273) = 560 MB/s +test sherlock::letters ... bench: 22,127,007 ns/iter (+/- 467,539) = 26 MB/s +test sherlock::letters_lower ... bench: 21,719,871 ns/iter (+/- 459,587) = 27 MB/s +test sherlock::letters_upper ... bench: 1,753,028 ns/iter (+/- 172,914) = 339 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,091 ns/iter (+/- 109,954) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,636 ns/iter (+/- 2,323) = 18805 MB/s +test sherlock::name_alt2 ... bench: 85,898 ns/iter (+/- 10,486) = 6926 MB/s +test sherlock::name_alt3 ... bench: 97,104 ns/iter (+/- 8,851) = 6126 MB/s +test sherlock::name_alt3_nocase ... bench: 381,487 ns/iter (+/- 14,829) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,301 ns/iter (+/- 17,178) = 4904 MB/s +test sherlock::name_alt4_nocase ... bench: 187,262 ns/iter (+/- 17,478) = 3177 MB/s +test sherlock::name_alt5 ... bench: 90,773 ns/iter (+/- 2,791) = 6554 MB/s +test sherlock::name_alt5_nocase ... bench: 351,900 ns/iter (+/- 40,408) = 1690 MB/s +test sherlock::name_holmes ... bench: 34,767 ns/iter (+/- 3,334) = 17112 MB/s +test sherlock::name_holmes_nocase ... bench: 132,953 ns/iter (+/- 15,747) = 4474 MB/s +test sherlock::name_sherlock ... bench: 66,566 ns/iter (+/- 6,822) = 8937 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,481 ns/iter (+/- 2,330) = 24301 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,531 ns/iter (+/- 12,331) = 6099 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,808 ns/iter (+/- 13,250) = 6209 MB/s +test sherlock::name_whitespace ... bench: 71,342 ns/iter (+/- 9,877) = 8339 MB/s +test sherlock::no_match_common ... bench: 14,704 ns/iter (+/- 1,241) = 40460 MB/s +test sherlock::no_match_really_common ... bench: 238,731 ns/iter (+/- 31,179) = 2492 MB/s +test sherlock::no_match_uncommon ... bench: 14,620 ns/iter (+/- 1,250) = 40693 MB/s +test sherlock::quotes ... bench: 367,740 ns/iter (+/- 10,107) = 1617 MB/s +test sherlock::repeated_class_negation ... bench: 76,315,217 ns/iter (+/- 940,903) = 7 MB/s +test sherlock::the_lower ... bench: 464,322 ns/iter (+/- 14,654) = 1281 MB/s +test sherlock::the_nocase ... bench: 519,069 ns/iter (+/- 59,161) = 1146 MB/s +test sherlock::the_upper ... bench: 37,575 ns/iter (+/- 2,455) = 15833 MB/s +test sherlock::the_whitespace ... bench: 939,412 ns/iter (+/- 60,941) = 633 MB/s +test sherlock::word_ending_n ... bench: 1,681,192 ns/iter (+/- 156,265) = 353 MB/s +test sherlock::words ... bench: 8,213,141 ns/iter (+/- 322,533) = 72 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 94.52s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/pcre2 b/vendor/regex/record/old-bench-log/09-new-baseline/pcre2 new file mode 100644 index 0000000..595365d --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_short_match ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::anchored_literal_short_non_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::easy0_1K ... bench: 32 ns/iter (+/- 1) = 32843 MB/s +test misc::easy0_1MB ... bench: 22,160 ns/iter (+/- 3,887) = 47319 MB/s +test misc::easy0_32 ... bench: 10 ns/iter (+/- 0) = 5900 MB/s +test misc::easy0_32K ... bench: 651 ns/iter (+/- 2) = 50376 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 1) = 29000 MB/s +test misc::easy1_1MB ... bench: 22,982 ns/iter (+/- 2,839) = 45626 MB/s +test misc::easy1_32 ... bench: 12 ns/iter (+/- 0) = 4333 MB/s +test misc::easy1_32K ... bench: 654 ns/iter (+/- 2) = 50134 MB/s +test misc::hard_1K ... bench: 469 ns/iter (+/- 9) = 2240 MB/s +test misc::hard_1MB ... bench: 733,962 ns/iter (+/- 28,297) = 1428 MB/s +test misc::hard_32 ... bench: 34 ns/iter (+/- 4) = 1735 MB/s +test misc::hard_32K ... bench: 19,567 ns/iter (+/- 363) = 1676 MB/s +test misc::literal ... bench: 8 ns/iter (+/- 0) = 6375 MB/s +test misc::long_needle1 ... bench: 257,858 ns/iter (+/- 646) = 387 MB/s +test misc::long_needle2 ... bench: 259,045 ns/iter (+/- 2,220) = 386 MB/s +test misc::match_class ... bench: 34 ns/iter (+/- 1) = 2382 MB/s +test misc::match_class_in_range ... bench: 9 ns/iter (+/- 0) = 9000 MB/s +test misc::match_class_unicode ... bench: 125 ns/iter (+/- 3) = 1288 MB/s +test misc::medium_1K ... bench: 35 ns/iter (+/- 3) = 30057 MB/s +test misc::medium_1MB ... bench: 21,126 ns/iter (+/- 4,036) = 49635 MB/s +test misc::medium_32 ... bench: 10 ns/iter (+/- 0) = 6000 MB/s +test misc::medium_32K ... bench: 714 ns/iter (+/- 122) = 45932 MB/s +test misc::not_literal ... bench: 62 ns/iter (+/- 2) = 822 MB/s +test misc::one_pass_long_prefix ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::one_pass_long_prefix_not ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::one_pass_short ... bench: 19 ns/iter (+/- 1) = 894 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 1) = 894 MB/s +test misc::reallyhard2_1K ... bench: 1,704 ns/iter (+/- 17) = 610 MB/s +test misc::reallyhard_1K ... bench: 495 ns/iter (+/- 9) = 2123 MB/s +test misc::reallyhard_1MB ... bench: 682,371 ns/iter (+/- 31,284) = 1536 MB/s +test misc::reallyhard_32 ... bench: 34 ns/iter (+/- 2) = 1735 MB/s +test misc::reallyhard_32K ... bench: 17,994 ns/iter (+/- 540) = 1822 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 1,507 ns/iter (+/- 10) = 5308 MB/s +test regexdna::find_new_lines ... bench: 849,983 ns/iter (+/- 25,898) = 5980 MB/s +test regexdna::subst1 ... bench: 520,602 ns/iter (+/- 12,170) = 9764 MB/s +test regexdna::subst10 ... bench: 517,151 ns/iter (+/- 19,060) = 9829 MB/s +test regexdna::subst11 ... bench: 519,209 ns/iter (+/- 12,477) = 9790 MB/s +test regexdna::subst2 ... bench: 513,418 ns/iter (+/- 19,803) = 9901 MB/s +test regexdna::subst3 ... bench: 514,166 ns/iter (+/- 13,019) = 9886 MB/s +test regexdna::subst4 ... bench: 517,808 ns/iter (+/- 30,655) = 9817 MB/s +test regexdna::subst5 ... bench: 516,922 ns/iter (+/- 17,204) = 9834 MB/s +test regexdna::subst6 ... bench: 509,430 ns/iter (+/- 20,608) = 9978 MB/s +test regexdna::subst7 ... bench: 519,437 ns/iter (+/- 10,537) = 9786 MB/s +test regexdna::subst8 ... bench: 520,282 ns/iter (+/- 25,742) = 9770 MB/s +test regexdna::subst9 ... bench: 512,819 ns/iter (+/- 11,443) = 9912 MB/s +test regexdna::variant1 ... bench: 5,302,526 ns/iter (+/- 158,370) = 958 MB/s +test regexdna::variant2 ... bench: 7,421,107 ns/iter (+/- 105,716) = 684 MB/s +test regexdna::variant3 ... bench: 7,310,968 ns/iter (+/- 103,989) = 695 MB/s +test regexdna::variant4 ... bench: 6,152,891 ns/iter (+/- 144,194) = 826 MB/s +test regexdna::variant5 ... bench: 5,717,515 ns/iter (+/- 42,902) = 889 MB/s +test regexdna::variant6 ... bench: 5,840,938 ns/iter (+/- 47,730) = 870 MB/s +test regexdna::variant7 ... bench: 6,624,859 ns/iter (+/- 37,376) = 767 MB/s +test regexdna::variant8 ... bench: 7,308,342 ns/iter (+/- 58,395) = 695 MB/s +test regexdna::variant9 ... bench: 7,372,260 ns/iter (+/- 76,966) = 689 MB/s +test sherlock::before_after_holmes ... bench: 2,817,108 ns/iter (+/- 18,002) = 211 MB/s +test sherlock::before_holmes ... bench: 2,841,515 ns/iter (+/- 14,677) = 209 MB/s +test sherlock::holmes_cochar_watson ... bench: 33,066 ns/iter (+/- 1,766) = 17992 MB/s +test sherlock::ing_suffix ... bench: 1,299,382 ns/iter (+/- 19,674) = 457 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,799,189 ns/iter (+/- 33,841) = 212 MB/s +test sherlock::letters ... bench: 4,923,399 ns/iter (+/- 111,904) = 120 MB/s +test sherlock::letters_lower ... bench: 5,057,224 ns/iter (+/- 102,860) = 117 MB/s +test sherlock::letters_upper ... bench: 874,306 ns/iter (+/- 10,587) = 680 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,876 ns/iter (+/- 58) = 37473 MB/s +test sherlock::name_alt1 ... bench: 19,349 ns/iter (+/- 201) = 30747 MB/s +test sherlock::name_alt2 ... bench: 29,916 ns/iter (+/- 581) = 19886 MB/s +test sherlock::name_alt3 ... bench: 461,887 ns/iter (+/- 5,337) = 1288 MB/s +test sherlock::name_alt3_nocase ... bench: 1,813,574 ns/iter (+/- 27,519) = 328 MB/s +test sherlock::name_alt4 ... bench: 30,155 ns/iter (+/- 1,407) = 19729 MB/s +test sherlock::name_alt4_nocase ... bench: 822,605 ns/iter (+/- 56,624) = 723 MB/s +test sherlock::name_alt5 ... bench: 426,318 ns/iter (+/- 12,233) = 1395 MB/s +test sherlock::name_alt5_nocase ... bench: 1,012,097 ns/iter (+/- 27,806) = 587 MB/s +test sherlock::name_holmes ... bench: 19,833 ns/iter (+/- 499) = 29997 MB/s +test sherlock::name_holmes_nocase ... bench: 40,266 ns/iter (+/- 2,089) = 14775 MB/s +test sherlock::name_sherlock ... bench: 14,589 ns/iter (+/- 115) = 40779 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,213 ns/iter (+/- 81) = 41858 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 602,296 ns/iter (+/- 98,066) = 987 MB/s +test sherlock::name_sherlock_nocase ... bench: 479,745 ns/iter (+/- 18,070) = 1240 MB/s +test sherlock::name_whitespace ... bench: 14,584 ns/iter (+/- 44) = 40793 MB/s +test sherlock::no_match_common ... bench: 13,499 ns/iter (+/- 1,090) = 44072 MB/s +test sherlock::no_match_really_common ... bench: 12,507 ns/iter (+/- 1,238) = 47568 MB/s +test sherlock::no_match_uncommon ... bench: 11,534 ns/iter (+/- 9) = 51580 MB/s +test sherlock::quotes ... bench: 251,867 ns/iter (+/- 11,818) = 2362 MB/s +test sherlock::repeated_class_negation ... bench: 2,969,330 ns/iter (+/- 287,150) = 200 MB/s +test sherlock::the_lower ... bench: 206,513 ns/iter (+/- 3,294) = 2880 MB/s +test sherlock::the_nocase ... bench: 237,655 ns/iter (+/- 6,616) = 2503 MB/s +test sherlock::the_upper ... bench: 23,922 ns/iter (+/- 510) = 24869 MB/s +test sherlock::the_whitespace ... bench: 326,257 ns/iter (+/- 10,038) = 1823 MB/s +test sherlock::word_ending_n ... bench: 3,264,085 ns/iter (+/- 57,242) = 182 MB/s +test sherlock::words ... bench: 3,161,731 ns/iter (+/- 45,794) = 188 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out; finished in 184.16s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/re2 b/vendor/regex/record/old-bench-log/09-new-baseline/re2 new file mode 100644 index 0000000..9bae2a1 --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 26 ns/iter (+/- 0) = 15000 MB/s +test misc::anchored_literal_long_non_match ... bench: 4 ns/iter (+/- 0) = 97500 MB/s +test misc::anchored_literal_short_match ... bench: 26 ns/iter (+/- 0) = 1000 MB/s +test misc::anchored_literal_short_non_match ... bench: 4 ns/iter (+/- 0) = 6500 MB/s +test misc::easy0_1K ... bench: 50 ns/iter (+/- 0) = 21020 MB/s +test misc::easy0_1MB ... bench: 51 ns/iter (+/- 0) = 20560843 MB/s +test misc::easy0_32 ... bench: 50 ns/iter (+/- 0) = 1180 MB/s +test misc::easy0_32K ... bench: 50 ns/iter (+/- 0) = 655900 MB/s +test misc::easy1_1K ... bench: 43 ns/iter (+/- 1) = 24279 MB/s +test misc::easy1_1MB ... bench: 43 ns/iter (+/- 0) = 24385953 MB/s +test misc::easy1_32 ... bench: 43 ns/iter (+/- 1) = 1209 MB/s +test misc::easy1_32K ... bench: 43 ns/iter (+/- 0) = 762511 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 0) = 21020 MB/s +test misc::hard_1MB ... bench: 50 ns/iter (+/- 0) = 20972060 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 0) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 0) = 655900 MB/s +test misc::literal ... bench: 25 ns/iter (+/- 0) = 2040 MB/s +test misc::long_needle1 ... bench: 356,319 ns/iter (+/- 680) = 280 MB/s +test misc::long_needle2 ... bench: 356,384 ns/iter (+/- 3,126) = 280 MB/s +test misc::match_class ... bench: 94 ns/iter (+/- 0) = 861 MB/s +test misc::match_class_in_range ... bench: 94 ns/iter (+/- 0) = 861 MB/s +test misc::match_class_unicode ... bench: 168 ns/iter (+/- 1) = 958 MB/s +test misc::medium_1K ... bench: 51 ns/iter (+/- 0) = 20627 MB/s +test misc::medium_1MB ... bench: 51 ns/iter (+/- 0) = 20560862 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 0) = 1176 MB/s +test misc::medium_32K ... bench: 51 ns/iter (+/- 1) = 643058 MB/s +test misc::no_exponential ... bench: 112 ns/iter (+/- 0) = 892 MB/s +test misc::not_literal ... bench: 66 ns/iter (+/- 0) = 772 MB/s +test misc::one_pass_long_prefix ... bench: 25 ns/iter (+/- 0) = 1040 MB/s +test misc::one_pass_long_prefix_not ... bench: 44 ns/iter (+/- 0) = 590 MB/s +test misc::one_pass_short ... bench: 43 ns/iter (+/- 0) = 395 MB/s +test misc::one_pass_short_not ... bench: 41 ns/iter (+/- 0) = 414 MB/s +test misc::reallyhard2_1K ... bench: 978 ns/iter (+/- 7) = 1063 MB/s +test misc::reallyhard_1K ... bench: 987 ns/iter (+/- 11) = 1064 MB/s +test misc::reallyhard_1MB ... bench: 957,501 ns/iter (+/- 8,247) = 1095 MB/s +test misc::reallyhard_32 ... bench: 73 ns/iter (+/- 0) = 808 MB/s +test misc::reallyhard_32K ... bench: 30,057 ns/iter (+/- 315) = 1091 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 7,368 ns/iter (+/- 105) = 1085 MB/s +test regexdna::find_new_lines ... bench: 15,567,882 ns/iter (+/- 48,213) = 326 MB/s +test regexdna::subst1 ... bench: 2,011,288 ns/iter (+/- 23,092) = 2527 MB/s +test regexdna::subst10 ... bench: 2,013,337 ns/iter (+/- 33,388) = 2524 MB/s +test regexdna::subst11 ... bench: 2,005,968 ns/iter (+/- 25,799) = 2534 MB/s +test regexdna::subst2 ... bench: 2,022,572 ns/iter (+/- 23,311) = 2513 MB/s +test regexdna::subst3 ... bench: 2,018,386 ns/iter (+/- 32,071) = 2518 MB/s +test regexdna::subst4 ... bench: 2,013,345 ns/iter (+/- 32,599) = 2524 MB/s +test regexdna::subst5 ... bench: 2,015,871 ns/iter (+/- 25,081) = 2521 MB/s +test regexdna::subst6 ... bench: 2,008,492 ns/iter (+/- 24,502) = 2530 MB/s +test regexdna::subst7 ... bench: 2,018,804 ns/iter (+/- 38,700) = 2518 MB/s +test regexdna::subst8 ... bench: 2,010,856 ns/iter (+/- 23,695) = 2527 MB/s +test regexdna::subst9 ... bench: 2,023,767 ns/iter (+/- 17,040) = 2511 MB/s +test regexdna::variant1 ... bench: 4,688,839 ns/iter (+/- 19,258) = 1084 MB/s +test regexdna::variant2 ... bench: 4,693,463 ns/iter (+/- 31,741) = 1083 MB/s +test regexdna::variant3 ... bench: 4,674,020 ns/iter (+/- 15,755) = 1087 MB/s +test regexdna::variant4 ... bench: 4,666,017 ns/iter (+/- 16,318) = 1089 MB/s +test regexdna::variant5 ... bench: 4,682,965 ns/iter (+/- 17,552) = 1085 MB/s +test regexdna::variant6 ... bench: 4,661,825 ns/iter (+/- 21,667) = 1090 MB/s +test regexdna::variant7 ... bench: 4,697,959 ns/iter (+/- 24,282) = 1082 MB/s +test regexdna::variant8 ... bench: 4,700,703 ns/iter (+/- 21,377) = 1081 MB/s +test regexdna::variant9 ... bench: 4,665,298 ns/iter (+/- 19,086) = 1089 MB/s +test sherlock::before_after_holmes ... bench: 560,350 ns/iter (+/- 3,852) = 1061 MB/s +test sherlock::before_holmes ... bench: 574,423 ns/iter (+/- 4,638) = 1035 MB/s +test sherlock::everything_greedy ... bench: 2,688,852 ns/iter (+/- 16,320) = 221 MB/s +test sherlock::everything_greedy_nl ... bench: 1,206,136 ns/iter (+/- 6,173) = 493 MB/s +test sherlock::holmes_cochar_watson ... bench: 547,910 ns/iter (+/- 7,147) = 1085 MB/s +test sherlock::holmes_coword_watson ... bench: 610,803 ns/iter (+/- 1,029) = 974 MB/s +test sherlock::ing_suffix ... bench: 777,478 ns/iter (+/- 3,028) = 765 MB/s +test sherlock::ing_suffix_limited_space ... bench: 725,653 ns/iter (+/- 4,746) = 819 MB/s +test sherlock::letters ... bench: 25,265,004 ns/iter (+/- 120,234) = 23 MB/s +test sherlock::letters_lower ... bench: 24,615,621 ns/iter (+/- 134,875) = 24 MB/s +test sherlock::letters_upper ... bench: 1,485,920 ns/iter (+/- 21,446) = 400 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 547,229 ns/iter (+/- 3,726) = 1087 MB/s +test sherlock::name_alt1 ... bench: 18,148 ns/iter (+/- 131) = 32782 MB/s +test sherlock::name_alt2 ... bench: 586,335 ns/iter (+/- 3,679) = 1014 MB/s +test sherlock::name_alt3 ... bench: 601,096 ns/iter (+/- 3,781) = 989 MB/s +test sherlock::name_alt3_nocase ... bench: 602,319 ns/iter (+/- 7,872) = 987 MB/s +test sherlock::name_alt4 ... bench: 586,762 ns/iter (+/- 3,465) = 1013 MB/s +test sherlock::name_alt4_nocase ... bench: 595,539 ns/iter (+/- 3,240) = 998 MB/s +test sherlock::name_alt5 ... bench: 592,474 ns/iter (+/- 6,361) = 1004 MB/s +test sherlock::name_alt5_nocase ... bench: 593,214 ns/iter (+/- 4,667) = 1002 MB/s +test sherlock::name_holmes ... bench: 40,236 ns/iter (+/- 514) = 14786 MB/s +test sherlock::name_holmes_nocase ... bench: 215,216 ns/iter (+/- 4,822) = 2764 MB/s +test sherlock::name_sherlock ... bench: 14,064 ns/iter (+/- 159) = 42301 MB/s +test sherlock::name_sherlock_holmes ... bench: 15,727 ns/iter (+/- 166) = 37828 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 552,042 ns/iter (+/- 6,395) = 1077 MB/s +test sherlock::name_sherlock_nocase ... bench: 552,475 ns/iter (+/- 5,365) = 1076 MB/s +test sherlock::name_whitespace ... bench: 16,210 ns/iter (+/- 194) = 36701 MB/s +test sherlock::no_match_common ... bench: 147,489 ns/iter (+/- 602) = 4033 MB/s +test sherlock::no_match_really_common ... bench: 157,205 ns/iter (+/- 350) = 3784 MB/s +test sherlock::no_match_uncommon ... bench: 4,849 ns/iter (+/- 5) = 122691 MB/s +test sherlock::quotes ... bench: 619,880 ns/iter (+/- 5,189) = 959 MB/s +test sherlock::the_lower ... bench: 685,396 ns/iter (+/- 12,559) = 868 MB/s +test sherlock::the_nocase ... bench: 771,051 ns/iter (+/- 18,470) = 771 MB/s +test sherlock::the_upper ... bench: 59,139 ns/iter (+/- 1,604) = 10059 MB/s +test sherlock::the_whitespace ... bench: 736,147 ns/iter (+/- 7,668) = 808 MB/s +test sherlock::word_ending_n ... bench: 1,200,401 ns/iter (+/- 11,206) = 495 MB/s +test sherlock::words ... bench: 8,024,768 ns/iter (+/- 93,051) = 74 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out; finished in 86.80s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/rust b/vendor/regex/record/old-bench-log/09-new-baseline/rust new file mode 100644 index 0000000..30924d4 --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 8 ns/iter (+/- 0) = 131375 MB/s +test misc::easy0_1MB ... bench: 12 ns/iter (+/- 0) = 87383583 MB/s +test misc::easy0_32 ... bench: 8 ns/iter (+/- 0) = 7375 MB/s +test misc::easy0_32K ... bench: 8 ns/iter (+/- 0) = 4099375 MB/s +test misc::easy1_1K ... bench: 25 ns/iter (+/- 0) = 41760 MB/s +test misc::easy1_1MB ... bench: 26 ns/iter (+/- 0) = 40330615 MB/s +test misc::easy1_32 ... bench: 25 ns/iter (+/- 0) = 2080 MB/s +test misc::easy1_32K ... bench: 26 ns/iter (+/- 0) = 1261076 MB/s +test misc::hard_1K ... bench: 33 ns/iter (+/- 0) = 31848 MB/s +test misc::hard_1MB ... bench: 33 ns/iter (+/- 0) = 31775848 MB/s +test misc::hard_32 ... bench: 34 ns/iter (+/- 0) = 1735 MB/s +test misc::hard_32K ... bench: 33 ns/iter (+/- 0) = 993787 MB/s +test misc::is_match_set ... bench: 35 ns/iter (+/- 0) = 714 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,517 ns/iter (+/- 25) = 65920 MB/s +test misc::long_needle2 ... bench: 186,131 ns/iter (+/- 1,191) = 537 MB/s +test misc::match_class ... bench: 37 ns/iter (+/- 0) = 2189 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::match_class_unicode ... bench: 160 ns/iter (+/- 1) = 1006 MB/s +test misc::matches_set ... bench: 200 ns/iter (+/- 4) = 125 MB/s +test misc::medium_1K ... bench: 8 ns/iter (+/- 0) = 131500 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 8 ns/iter (+/- 0) = 4099500 MB/s +test misc::no_exponential ... bench: 262 ns/iter (+/- 6) = 381 MB/s +test misc::not_literal ... bench: 43 ns/iter (+/- 1) = 1186 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 18 ns/iter (+/- 0) = 944 MB/s +test misc::reallyhard2_1K ... bench: 36 ns/iter (+/- 1) = 28888 MB/s +test misc::reallyhard_1K ... bench: 1,155 ns/iter (+/- 11) = 909 MB/s +test misc::reallyhard_1MB ... bench: 1,152,983 ns/iter (+/- 6,607) = 909 MB/s +test misc::reallyhard_32 ... bench: 52 ns/iter (+/- 2) = 1134 MB/s +test misc::reallyhard_32K ... bench: 36,194 ns/iter (+/- 327) = 906 MB/s +test misc::replace_all ... bench: 81 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 2,269 ns/iter (+/- 3) = 3525 MB/s +test misc::short_haystack_1000000x ... bench: 63,956 ns/iter (+/- 209) = 125086 MB/s +test misc::short_haystack_100000x ... bench: 5,877 ns/iter (+/- 66) = 136125 MB/s +test misc::short_haystack_10000x ... bench: 2,414 ns/iter (+/- 10) = 33144 MB/s +test misc::short_haystack_1000x ... bench: 195 ns/iter (+/- 11) = 41082 MB/s +test misc::short_haystack_100x ... bench: 96 ns/iter (+/- 7) = 8447 MB/s +test misc::short_haystack_10x ... bench: 85 ns/iter (+/- 8) = 1070 MB/s +test misc::short_haystack_1x ... bench: 85 ns/iter (+/- 6) = 223 MB/s +test misc::short_haystack_2x ... bench: 86 ns/iter (+/- 12) = 313 MB/s +test misc::short_haystack_3x ... bench: 85 ns/iter (+/- 22) = 411 MB/s +test misc::short_haystack_4x ... bench: 85 ns/iter (+/- 12) = 505 MB/s +test regexdna::find_new_lines ... bench: 6,977,678 ns/iter (+/- 90,937) = 728 MB/s +test regexdna::subst1 ... bench: 423,846 ns/iter (+/- 41,460) = 11993 MB/s +test regexdna::subst10 ... bench: 424,043 ns/iter (+/- 55,743) = 11987 MB/s +test regexdna::subst11 ... bench: 418,549 ns/iter (+/- 12,106) = 12145 MB/s +test regexdna::subst2 ... bench: 430,056 ns/iter (+/- 8,862) = 11820 MB/s +test regexdna::subst3 ... bench: 429,634 ns/iter (+/- 26,807) = 11831 MB/s +test regexdna::subst4 ... bench: 419,313 ns/iter (+/- 42,070) = 12123 MB/s +test regexdna::subst5 ... bench: 425,299 ns/iter (+/- 43,161) = 11952 MB/s +test regexdna::subst6 ... bench: 420,177 ns/iter (+/- 49,394) = 12098 MB/s +test regexdna::subst7 ... bench: 425,118 ns/iter (+/- 46,952) = 11957 MB/s +test regexdna::subst8 ... bench: 420,840 ns/iter (+/- 11,623) = 12079 MB/s +test regexdna::subst9 ... bench: 420,752 ns/iter (+/- 10,186) = 12081 MB/s +test regexdna::variant1 ... bench: 1,445,103 ns/iter (+/- 29,436) = 3517 MB/s +test regexdna::variant2 ... bench: 2,234,423 ns/iter (+/- 24,502) = 2275 MB/s +test regexdna::variant3 ... bench: 2,730,972 ns/iter (+/- 26,961) = 1861 MB/s +test regexdna::variant4 ... bench: 2,708,975 ns/iter (+/- 36,517) = 1876 MB/s +test regexdna::variant5 ... bench: 1,663,458 ns/iter (+/- 39,508) = 3055 MB/s +test regexdna::variant6 ... bench: 1,673,873 ns/iter (+/- 14,846) = 3036 MB/s +test regexdna::variant7 ... bench: 2,322,347 ns/iter (+/- 33,731) = 2188 MB/s +test regexdna::variant8 ... bench: 2,350,779 ns/iter (+/- 54,976) = 2162 MB/s +test regexdna::variant9 ... bench: 2,326,741 ns/iter (+/- 20,836) = 2184 MB/s +test rust_compile::compile_huge ... bench: 47,700 ns/iter (+/- 230) +test rust_compile::compile_huge_bytes ... bench: 2,987,898 ns/iter (+/- 32,819) +test rust_compile::compile_huge_full ... bench: 5,705,551 ns/iter (+/- 63,483) +test rust_compile::compile_simple ... bench: 1,963 ns/iter (+/- 44) +test rust_compile::compile_simple_bytes ... bench: 1,970 ns/iter (+/- 32) +test rust_compile::compile_simple_full ... bench: 9,677 ns/iter (+/- 69) +test rust_compile::compile_small ... bench: 4,501 ns/iter (+/- 70) +test rust_compile::compile_small_bytes ... bench: 75,372 ns/iter (+/- 2,007) +test rust_compile::compile_small_full ... bench: 151,733 ns/iter (+/- 2,378) +test sherlock::before_after_holmes ... bench: 655,827 ns/iter (+/- 1,426) = 907 MB/s +test sherlock::before_holmes ... bench: 24,653 ns/iter (+/- 224) = 24132 MB/s +test sherlock::everything_greedy ... bench: 1,026,254 ns/iter (+/- 27,926) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 469,676 ns/iter (+/- 62,296) = 1266 MB/s +test sherlock::holmes_cochar_watson ... bench: 47,578 ns/iter (+/- 1,730) = 12504 MB/s +test sherlock::holmes_coword_watson ... bench: 321,318 ns/iter (+/- 3,235) = 1851 MB/s +test sherlock::ing_suffix ... bench: 150,908 ns/iter (+/- 3,952) = 3942 MB/s +test sherlock::ing_suffix_limited_space ... bench: 726,848 ns/iter (+/- 5,314) = 818 MB/s +test sherlock::letters ... bench: 9,719,997 ns/iter (+/- 67,717) = 61 MB/s +test sherlock::letters_lower ... bench: 9,559,105 ns/iter (+/- 79,257) = 62 MB/s +test sherlock::letters_upper ... bench: 1,066,791 ns/iter (+/- 13,193) = 557 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 653,228 ns/iter (+/- 881) = 910 MB/s +test sherlock::name_alt1 ... bench: 10,663 ns/iter (+/- 76) = 55794 MB/s +test sherlock::name_alt2 ... bench: 33,831 ns/iter (+/- 967) = 17585 MB/s +test sherlock::name_alt3 ... bench: 38,061 ns/iter (+/- 1,123) = 15631 MB/s +test sherlock::name_alt3_nocase ... bench: 218,691 ns/iter (+/- 2,345) = 2720 MB/s +test sherlock::name_alt4 ... bench: 52,408 ns/iter (+/- 1,315) = 11351 MB/s +test sherlock::name_alt4_nocase ... bench: 84,212 ns/iter (+/- 2,708) = 7064 MB/s +test sherlock::name_alt5 ... bench: 35,272 ns/iter (+/- 1,784) = 16867 MB/s +test sherlock::name_alt5_nocase ... bench: 193,585 ns/iter (+/- 5,057) = 3073 MB/s +test sherlock::name_holmes ... bench: 15,018 ns/iter (+/- 440) = 39614 MB/s +test sherlock::name_holmes_nocase ... bench: 60,207 ns/iter (+/- 1,046) = 9881 MB/s +test sherlock::name_sherlock ... bench: 10,344 ns/iter (+/- 52) = 57514 MB/s +test sherlock::name_sherlock_holmes ... bench: 10,374 ns/iter (+/- 98) = 57348 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 42,037 ns/iter (+/- 1,363) = 14152 MB/s +test sherlock::name_sherlock_nocase ... bench: 41,192 ns/iter (+/- 881) = 14442 MB/s +test sherlock::name_whitespace ... bench: 13,893 ns/iter (+/- 55) = 42822 MB/s +test sherlock::no_match_common ... bench: 8,700 ns/iter (+/- 10) = 68383 MB/s +test sherlock::no_match_really_common ... bench: 10,368 ns/iter (+/- 123) = 57381 MB/s +test sherlock::no_match_uncommon ... bench: 8,695 ns/iter (+/- 7) = 68422 MB/s +test sherlock::quotes ... bench: 222,526 ns/iter (+/- 5,362) = 2673 MB/s +test sherlock::repeated_class_negation ... bench: 35,869,193 ns/iter (+/- 551,212) = 16 MB/s +test sherlock::the_lower ... bench: 187,208 ns/iter (+/- 4,374) = 3177 MB/s +test sherlock::the_nocase ... bench: 280,625 ns/iter (+/- 10,142) = 2120 MB/s +test sherlock::the_upper ... bench: 19,742 ns/iter (+/- 692) = 30135 MB/s +test sherlock::the_whitespace ... bench: 396,099 ns/iter (+/- 10,400) = 1501 MB/s +test sherlock::word_ending_n ... bench: 1,055,639 ns/iter (+/- 6,627) = 563 MB/s +test sherlock::words ... bench: 4,280,471 ns/iter (+/- 53,841) = 138 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 141.25s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/rust-bytes b/vendor/regex/record/old-bench-log/09-new-baseline/rust-bytes new file mode 100644 index 0000000..ff08ed1 --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 9 ns/iter (+/- 0) = 2888 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 1) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 0) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 17 ns/iter (+/- 0) = 3058 MB/s +test misc::easy1_32K ... bench: 17 ns/iter (+/- 1) = 1928705 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 27 ns/iter (+/- 0) = 38837148 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 35 ns/iter (+/- 0) = 714 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,325 ns/iter (+/- 18) = 75472 MB/s +test misc::long_needle2 ... bench: 186,021 ns/iter (+/- 1,157) = 537 MB/s +test misc::match_class ... bench: 38 ns/iter (+/- 3) = 2131 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 172 ns/iter (+/- 4) = 145 MB/s +test misc::medium_1K ... bench: 7 ns/iter (+/- 0) = 150285 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 7 ns/iter (+/- 0) = 4685142 MB/s +test misc::no_exponential ... bench: 272 ns/iter (+/- 10) = 367 MB/s +test misc::not_literal ... bench: 42 ns/iter (+/- 1) = 1214 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_short ... bench: 15 ns/iter (+/- 0) = 1133 MB/s +test misc::one_pass_short_not ... bench: 18 ns/iter (+/- 0) = 944 MB/s +test misc::reallyhard2_1K ... bench: 36 ns/iter (+/- 0) = 28888 MB/s +test misc::reallyhard_1K ... bench: 1,152 ns/iter (+/- 14) = 912 MB/s +test misc::reallyhard_1MB ... bench: 1,155,496 ns/iter (+/- 7,722) = 907 MB/s +test misc::reallyhard_32 ... bench: 51 ns/iter (+/- 1) = 1156 MB/s +test misc::reallyhard_32K ... bench: 36,202 ns/iter (+/- 167) = 905 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,269 ns/iter (+/- 2) = 3525 MB/s +test regexdna::find_new_lines ... bench: 6,982,142 ns/iter (+/- 70,086) = 728 MB/s +test regexdna::subst1 ... bench: 425,753 ns/iter (+/- 15,075) = 11939 MB/s +test regexdna::subst10 ... bench: 431,401 ns/iter (+/- 19,346) = 11783 MB/s +test regexdna::subst11 ... bench: 427,131 ns/iter (+/- 38,166) = 11901 MB/s +test regexdna::subst2 ... bench: 423,284 ns/iter (+/- 9,016) = 12009 MB/s +test regexdna::subst3 ... bench: 425,850 ns/iter (+/- 7,324) = 11937 MB/s +test regexdna::subst4 ... bench: 426,013 ns/iter (+/- 6,922) = 11932 MB/s +test regexdna::subst5 ... bench: 426,029 ns/iter (+/- 8,697) = 11932 MB/s +test regexdna::subst6 ... bench: 427,781 ns/iter (+/- 8,166) = 11883 MB/s +test regexdna::subst7 ... bench: 426,589 ns/iter (+/- 13,274) = 11916 MB/s +test regexdna::subst8 ... bench: 424,152 ns/iter (+/- 14,879) = 11984 MB/s +test regexdna::subst9 ... bench: 428,066 ns/iter (+/- 8,773) = 11875 MB/s +test regexdna::variant1 ... bench: 1,446,630 ns/iter (+/- 53,195) = 3513 MB/s +test regexdna::variant2 ... bench: 2,241,934 ns/iter (+/- 42,563) = 2267 MB/s +test regexdna::variant3 ... bench: 2,741,736 ns/iter (+/- 28,424) = 1854 MB/s +test regexdna::variant4 ... bench: 2,725,768 ns/iter (+/- 37,801) = 1864 MB/s +test regexdna::variant5 ... bench: 1,686,366 ns/iter (+/- 25,054) = 3014 MB/s +test regexdna::variant6 ... bench: 1,689,225 ns/iter (+/- 24,479) = 3009 MB/s +test regexdna::variant7 ... bench: 2,343,567 ns/iter (+/- 34,646) = 2169 MB/s +test regexdna::variant8 ... bench: 2,363,133 ns/iter (+/- 69,696) = 2151 MB/s +test regexdna::variant9 ... bench: 2,337,512 ns/iter (+/- 32,958) = 2174 MB/s +test rust_compile::compile_huge ... bench: 53,055 ns/iter (+/- 88) +test rust_compile::compile_huge_bytes ... bench: 2,979,724 ns/iter (+/- 43,904) +test rust_compile::compile_huge_full ... bench: 5,825,193 ns/iter (+/- 61,322) +test rust_compile::compile_simple ... bench: 1,927 ns/iter (+/- 39) +test rust_compile::compile_simple_bytes ... bench: 1,924 ns/iter (+/- 29) +test rust_compile::compile_simple_full ... bench: 9,830 ns/iter (+/- 108) +test rust_compile::compile_small ... bench: 4,569 ns/iter (+/- 70) +test rust_compile::compile_small_bytes ... bench: 74,875 ns/iter (+/- 1,337) +test rust_compile::compile_small_full ... bench: 151,485 ns/iter (+/- 3,063) +test sherlock::before_after_holmes ... bench: 655,632 ns/iter (+/- 801) = 907 MB/s +test sherlock::before_holmes ... bench: 24,576 ns/iter (+/- 307) = 24207 MB/s +test sherlock::everything_greedy ... bench: 1,026,410 ns/iter (+/- 57,265) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 424,490 ns/iter (+/- 7,188) = 1401 MB/s +test sherlock::holmes_cochar_watson ... bench: 46,935 ns/iter (+/- 1,007) = 12675 MB/s +test sherlock::holmes_coword_watson ... bench: 322,497 ns/iter (+/- 3,680) = 1844 MB/s +test sherlock::ing_suffix ... bench: 149,923 ns/iter (+/- 2,936) = 3968 MB/s +test sherlock::ing_suffix_limited_space ... bench: 732,021 ns/iter (+/- 10,242) = 812 MB/s +test sherlock::letters ... bench: 9,716,641 ns/iter (+/- 56,270) = 61 MB/s +test sherlock::letters_lower ... bench: 9,541,922 ns/iter (+/- 63,715) = 62 MB/s +test sherlock::letters_upper ... bench: 1,070,240 ns/iter (+/- 10,505) = 555 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 652,312 ns/iter (+/- 546) = 912 MB/s +test sherlock::name_alt1 ... bench: 10,832 ns/iter (+/- 499) = 54923 MB/s +test sherlock::name_alt2 ... bench: 33,528 ns/iter (+/- 484) = 17744 MB/s +test sherlock::name_alt3 ... bench: 37,352 ns/iter (+/- 1,173) = 15927 MB/s +test sherlock::name_alt3_nocase ... bench: 217,570 ns/iter (+/- 3,401) = 2734 MB/s +test sherlock::name_alt4 ... bench: 52,711 ns/iter (+/- 1,257) = 11286 MB/s +test sherlock::name_alt4_nocase ... bench: 81,635 ns/iter (+/- 1,740) = 7287 MB/s +test sherlock::name_alt5 ... bench: 34,935 ns/iter (+/- 1,190) = 17029 MB/s +test sherlock::name_alt5_nocase ... bench: 194,600 ns/iter (+/- 3,742) = 3057 MB/s +test sherlock::name_holmes ... bench: 14,670 ns/iter (+/- 153) = 40554 MB/s +test sherlock::name_holmes_nocase ... bench: 59,906 ns/iter (+/- 898) = 9931 MB/s +test sherlock::name_sherlock ... bench: 10,470 ns/iter (+/- 74) = 56822 MB/s +test sherlock::name_sherlock_holmes ... bench: 10,291 ns/iter (+/- 29) = 57810 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,316 ns/iter (+/- 1,350) = 14399 MB/s +test sherlock::name_sherlock_nocase ... bench: 40,748 ns/iter (+/- 724) = 14600 MB/s +test sherlock::name_whitespace ... bench: 14,682 ns/iter (+/- 52) = 40521 MB/s +test sherlock::no_match_common ... bench: 8,822 ns/iter (+/- 310) = 67437 MB/s +test sherlock::no_match_really_common ... bench: 8,990 ns/iter (+/- 129) = 66177 MB/s +test sherlock::no_match_uncommon ... bench: 8,649 ns/iter (+/- 192) = 68786 MB/s +test sherlock::quotes ... bench: 218,225 ns/iter (+/- 4,267) = 2726 MB/s +test sherlock::repeated_class_negation ... bench: 35,771,807 ns/iter (+/- 640,817) = 16 MB/s +test sherlock::the_lower ... bench: 190,205 ns/iter (+/- 9,051) = 3127 MB/s +test sherlock::the_nocase ... bench: 280,386 ns/iter (+/- 5,346) = 2121 MB/s +test sherlock::the_upper ... bench: 19,325 ns/iter (+/- 695) = 30785 MB/s +test sherlock::the_whitespace ... bench: 409,665 ns/iter (+/- 7,657) = 1452 MB/s +test sherlock::word_ending_n ... bench: 1,066,052 ns/iter (+/- 7,072) = 558 MB/s +test sherlock::words ... bench: 4,330,659 ns/iter (+/- 53,403) = 137 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 131.99s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log new file mode 100644 index 0000000..c45b55c --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 51 ns/iter (+/- 2) = 20607 MB/s +test misc::easy0_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::easy0_32 ... bench: 51 ns/iter (+/- 0) = 1156 MB/s +test misc::easy0_32K ... bench: 53 ns/iter (+/- 1) = 618773 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 0) = 25463 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 1) = 23831727 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 1) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 2) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 2) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 3,242 ns/iter (+/- 79) = 30845 MB/s +test misc::long_needle2 ... bench: 350,572 ns/iter (+/- 6,860) = 285 MB/s +test misc::match_class ... bench: 62 ns/iter (+/- 6) = 1306 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 259 ns/iter (+/- 15) = 621 MB/s +test misc::matches_set ... bench: 462 ns/iter (+/- 9) = 54 MB/s +test misc::medium_1K ... bench: 53 ns/iter (+/- 0) = 19849 MB/s +test misc::medium_1MB ... bench: 58 ns/iter (+/- 1) = 18079379 MB/s +test misc::medium_32 ... bench: 53 ns/iter (+/- 1) = 1132 MB/s +test misc::medium_32K ... bench: 53 ns/iter (+/- 1) = 618792 MB/s +test misc::no_exponential ... bench: 423 ns/iter (+/- 13) = 236 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 0) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 52 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 1) = 447 MB/s +test misc::one_pass_short_not ... bench: 41 ns/iter (+/- 1) = 414 MB/s +test misc::reallyhard2_1K ... bench: 81 ns/iter (+/- 1) = 12839 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,822 ns/iter (+/- 39,203) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 0) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,328 ns/iter (+/- 2,598) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 3) +test misc::reverse_suffix_no_quadratic ... bench: 4,171 ns/iter (+/- 134) = 1918 MB/s +test misc::short_haystack_1000000x ... bench: 132,251 ns/iter (+/- 729) = 60491 MB/s +test misc::short_haystack_100000x ... bench: 13,184 ns/iter (+/- 408) = 60680 MB/s +test misc::short_haystack_10000x ... bench: 6,036 ns/iter (+/- 167) = 13255 MB/s +test misc::short_haystack_1000x ... bench: 602 ns/iter (+/- 14) = 13307 MB/s +test misc::short_haystack_100x ... bench: 230 ns/iter (+/- 7) = 3526 MB/s +test misc::short_haystack_10x ... bench: 218 ns/iter (+/- 3) = 417 MB/s +test misc::short_haystack_1x ... bench: 210 ns/iter (+/- 8) = 90 MB/s +test misc::short_haystack_2x ... bench: 225 ns/iter (+/- 6) = 120 MB/s +test misc::short_haystack_3x ... bench: 211 ns/iter (+/- 8) = 165 MB/s +test misc::short_haystack_4x ... bench: 212 ns/iter (+/- 6) = 202 MB/s +test regexdna::find_new_lines ... bench: 12,245,066 ns/iter (+/- 117,141) = 415 MB/s +test regexdna::subst1 ... bench: 786,357 ns/iter (+/- 14,200) = 6464 MB/s +test regexdna::subst10 ... bench: 788,550 ns/iter (+/- 26,456) = 6446 MB/s +test regexdna::subst11 ... bench: 782,161 ns/iter (+/- 15,583) = 6499 MB/s +test regexdna::subst2 ... bench: 784,902 ns/iter (+/- 23,379) = 6476 MB/s +test regexdna::subst3 ... bench: 786,640 ns/iter (+/- 27,063) = 6462 MB/s +test regexdna::subst4 ... bench: 785,591 ns/iter (+/- 20,498) = 6470 MB/s +test regexdna::subst5 ... bench: 787,447 ns/iter (+/- 20,892) = 6455 MB/s +test regexdna::subst6 ... bench: 784,994 ns/iter (+/- 19,687) = 6475 MB/s +test regexdna::subst7 ... bench: 801,921 ns/iter (+/- 15,391) = 6339 MB/s +test regexdna::subst8 ... bench: 785,541 ns/iter (+/- 11,908) = 6471 MB/s +test regexdna::subst9 ... bench: 785,848 ns/iter (+/- 28,020) = 6468 MB/s +test regexdna::variant1 ... bench: 2,195,058 ns/iter (+/- 44,066) = 2315 MB/s +test regexdna::variant2 ... bench: 3,219,968 ns/iter (+/- 59,372) = 1578 MB/s +test regexdna::variant3 ... bench: 3,776,467 ns/iter (+/- 54,326) = 1346 MB/s +test regexdna::variant4 ... bench: 3,803,674 ns/iter (+/- 95,281) = 1336 MB/s +test regexdna::variant5 ... bench: 2,661,333 ns/iter (+/- 46,408) = 1910 MB/s +test regexdna::variant6 ... bench: 2,645,716 ns/iter (+/- 38,659) = 1921 MB/s +test regexdna::variant7 ... bench: 3,228,352 ns/iter (+/- 69,155) = 1574 MB/s +test regexdna::variant8 ... bench: 3,305,563 ns/iter (+/- 59,321) = 1537 MB/s +test regexdna::variant9 ... bench: 3,225,039 ns/iter (+/- 49,720) = 1576 MB/s +test rust_compile::compile_huge ... bench: 100,381 ns/iter (+/- 2,052) +test rust_compile::compile_huge_bytes ... bench: 5,899,989 ns/iter (+/- 114,363) +test rust_compile::compile_huge_full ... bench: 11,650,995 ns/iter (+/- 172,285) +test rust_compile::compile_simple ... bench: 4,082 ns/iter (+/- 88) +test rust_compile::compile_simple_bytes ... bench: 4,153 ns/iter (+/- 120) +test rust_compile::compile_simple_full ... bench: 20,414 ns/iter (+/- 1,860) +test rust_compile::compile_small ... bench: 9,114 ns/iter (+/- 216) +test rust_compile::compile_small_bytes ... bench: 183,049 ns/iter (+/- 9,917) +test rust_compile::compile_small_full ... bench: 361,291 ns/iter (+/- 11,045) +test sherlock::before_after_holmes ... bench: 907,103 ns/iter (+/- 12,165) = 655 MB/s +test sherlock::before_holmes ... bench: 62,501 ns/iter (+/- 1,880) = 9518 MB/s +test sherlock::everything_greedy ... bench: 2,062,116 ns/iter (+/- 41,900) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 894,529 ns/iter (+/- 38,723) = 665 MB/s +test sherlock::holmes_cochar_watson ... bench: 103,305 ns/iter (+/- 3,798) = 5758 MB/s +test sherlock::holmes_coword_watson ... bench: 479,423 ns/iter (+/- 13,924) = 1240 MB/s +test sherlock::ing_suffix ... bench: 318,300 ns/iter (+/- 6,846) = 1869 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,066,300 ns/iter (+/- 19,375) = 557 MB/s +test sherlock::letters ... bench: 21,777,358 ns/iter (+/- 230,478) = 27 MB/s +test sherlock::letters_lower ... bench: 21,152,019 ns/iter (+/- 203,617) = 28 MB/s +test sherlock::letters_upper ... bench: 1,777,626 ns/iter (+/- 26,243) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,509 ns/iter (+/- 24,983) = 662 MB/s +test sherlock::name_alt1 ... bench: 32,255 ns/iter (+/- 681) = 18444 MB/s +test sherlock::name_alt2 ... bench: 86,369 ns/iter (+/- 2,494) = 6888 MB/s +test sherlock::name_alt3 ... bench: 97,618 ns/iter (+/- 564) = 6094 MB/s +test sherlock::name_alt3_nocase ... bench: 944,848 ns/iter (+/- 31,039) = 629 MB/s +test sherlock::name_alt4 ... bench: 122,029 ns/iter (+/- 2,716) = 4875 MB/s +test sherlock::name_alt4_nocase ... bench: 225,544 ns/iter (+/- 5,783) = 2637 MB/s +test sherlock::name_alt5 ... bench: 91,897 ns/iter (+/- 3,796) = 6473 MB/s +test sherlock::name_alt5_nocase ... bench: 936,420 ns/iter (+/- 15,092) = 635 MB/s +test sherlock::name_holmes ... bench: 33,448 ns/iter (+/- 959) = 17786 MB/s +test sherlock::name_holmes_nocase ... bench: 115,864 ns/iter (+/- 1,645) = 5134 MB/s +test sherlock::name_sherlock ... bench: 22,474 ns/iter (+/- 674) = 26472 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,184 ns/iter (+/- 497) = 26818 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 99,629 ns/iter (+/- 2,398) = 5971 MB/s +test sherlock::name_sherlock_nocase ... bench: 99,523 ns/iter (+/- 2,674) = 5977 MB/s +test sherlock::name_whitespace ... bench: 30,815 ns/iter (+/- 107) = 19306 MB/s +test sherlock::no_match_common ... bench: 19,661 ns/iter (+/- 656) = 30259 MB/s +test sherlock::no_match_really_common ... bench: 27,544 ns/iter (+/- 527) = 21599 MB/s +test sherlock::no_match_uncommon ... bench: 19,553 ns/iter (+/- 31) = 30426 MB/s +test sherlock::quotes ... bench: 369,144 ns/iter (+/- 45,316) = 1611 MB/s +test sherlock::repeated_class_negation ... bench: 68,838,857 ns/iter (+/- 330,544) = 8 MB/s +test sherlock::the_lower ... bench: 321,692 ns/iter (+/- 5,418) = 1849 MB/s +test sherlock::the_nocase ... bench: 507,936 ns/iter (+/- 3,080) = 1171 MB/s +test sherlock::the_upper ... bench: 43,705 ns/iter (+/- 788) = 13612 MB/s +test sherlock::the_whitespace ... bench: 819,179 ns/iter (+/- 20,071) = 726 MB/s +test sherlock::word_ending_n ... bench: 1,700,300 ns/iter (+/- 36,623) = 349 MB/s +test sherlock::words ... bench: 8,249,767 ns/iter (+/- 75,015) = 72 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 111.55s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log new file mode 100644 index 0000000..98b3496 --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 0) = 70066 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 0) = 49933476 MB/s +test misc::easy0_32 ... bench: 15 ns/iter (+/- 0) = 3933 MB/s +test misc::easy0_32K ... bench: 14 ns/iter (+/- 0) = 2342500 MB/s +test misc::easy1_1K ... bench: 40 ns/iter (+/- 1) = 26100 MB/s +test misc::easy1_1MB ... bench: 45 ns/iter (+/- 1) = 23302133 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 5) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 1) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 0) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 3) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 3,259 ns/iter (+/- 86) = 30684 MB/s +test misc::long_needle2 ... bench: 350,722 ns/iter (+/- 6,984) = 285 MB/s +test misc::match_class ... bench: 60 ns/iter (+/- 1) = 1350 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 255 ns/iter (+/- 0) = 631 MB/s +test misc::matches_set ... bench: 481 ns/iter (+/- 11) = 51 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 22 ns/iter (+/- 0) = 47663818 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 0) = 2186400 MB/s +test misc::no_exponential ... bench: 442 ns/iter (+/- 13) = 226 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 1) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 54 ns/iter (+/- 1) = 481 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 39 ns/iter (+/- 0) = 435 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 0) = 404 MB/s +test misc::reallyhard2_1K ... bench: 83 ns/iter (+/- 6) = 12530 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 4) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,691 ns/iter (+/- 29,668) = 665 MB/s +test misc::reallyhard_32 ... bench: 101 ns/iter (+/- 5) = 584 MB/s +test misc::reallyhard_32K ... bench: 49,325 ns/iter (+/- 1,734) = 664 MB/s +test misc::replace_all ... bench: 134 ns/iter (+/- 2) +test misc::reverse_suffix_no_quadratic ... bench: 4,189 ns/iter (+/- 274) = 1909 MB/s +test misc::short_haystack_1000000x ... bench: 132,182 ns/iter (+/- 4,966) = 60522 MB/s +test misc::short_haystack_100000x ... bench: 13,344 ns/iter (+/- 275) = 59952 MB/s +test misc::short_haystack_10000x ... bench: 6,119 ns/iter (+/- 285) = 13075 MB/s +test misc::short_haystack_1000x ... bench: 617 ns/iter (+/- 15) = 12983 MB/s +test misc::short_haystack_100x ... bench: 230 ns/iter (+/- 7) = 3526 MB/s +test misc::short_haystack_10x ... bench: 207 ns/iter (+/- 8) = 439 MB/s +test misc::short_haystack_1x ... bench: 213 ns/iter (+/- 7) = 89 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 6) = 131 MB/s +test misc::short_haystack_3x ... bench: 207 ns/iter (+/- 10) = 169 MB/s +test misc::short_haystack_4x ... bench: 208 ns/iter (+/- 7) = 206 MB/s +test regexdna::find_new_lines ... bench: 12,275,804 ns/iter (+/- 145,331) = 414 MB/s +test regexdna::subst1 ... bench: 793,517 ns/iter (+/- 44,203) = 6406 MB/s +test regexdna::subst10 ... bench: 794,922 ns/iter (+/- 23,459) = 6394 MB/s +test regexdna::subst11 ... bench: 790,525 ns/iter (+/- 23,010) = 6430 MB/s +test regexdna::subst2 ... bench: 790,637 ns/iter (+/- 17,962) = 6429 MB/s +test regexdna::subst3 ... bench: 793,559 ns/iter (+/- 17,575) = 6405 MB/s +test regexdna::subst4 ... bench: 792,738 ns/iter (+/- 15,237) = 6412 MB/s +test regexdna::subst5 ... bench: 795,060 ns/iter (+/- 26,172) = 6393 MB/s +test regexdna::subst6 ... bench: 792,357 ns/iter (+/- 15,067) = 6415 MB/s +test regexdna::subst7 ... bench: 797,006 ns/iter (+/- 27,928) = 6378 MB/s +test regexdna::subst8 ... bench: 790,603 ns/iter (+/- 22,754) = 6429 MB/s +test regexdna::subst9 ... bench: 793,055 ns/iter (+/- 13,202) = 6409 MB/s +test regexdna::variant1 ... bench: 2,204,304 ns/iter (+/- 50,669) = 2306 MB/s +test regexdna::variant2 ... bench: 3,224,798 ns/iter (+/- 45,705) = 1576 MB/s +test regexdna::variant3 ... bench: 3,802,774 ns/iter (+/- 86,530) = 1336 MB/s +test regexdna::variant4 ... bench: 3,805,916 ns/iter (+/- 69,737) = 1335 MB/s +test regexdna::variant5 ... bench: 2,662,373 ns/iter (+/- 61,259) = 1909 MB/s +test regexdna::variant6 ... bench: 2,654,072 ns/iter (+/- 51,095) = 1915 MB/s +test regexdna::variant7 ... bench: 3,232,369 ns/iter (+/- 67,147) = 1572 MB/s +test regexdna::variant8 ... bench: 3,311,225 ns/iter (+/- 66,086) = 1535 MB/s +test regexdna::variant9 ... bench: 3,241,601 ns/iter (+/- 68,394) = 1568 MB/s +test rust_compile::compile_huge ... bench: 100,955 ns/iter (+/- 2,466) +test rust_compile::compile_huge_bytes ... bench: 5,936,732 ns/iter (+/- 126,993) +test rust_compile::compile_huge_full ... bench: 11,880,838 ns/iter (+/- 211,387) +test rust_compile::compile_simple ... bench: 4,575 ns/iter (+/- 139) +test rust_compile::compile_simple_bytes ... bench: 4,653 ns/iter (+/- 122) +test rust_compile::compile_simple_full ... bench: 20,656 ns/iter (+/- 535) +test rust_compile::compile_small ... bench: 9,613 ns/iter (+/- 992) +test rust_compile::compile_small_bytes ... bench: 188,349 ns/iter (+/- 4,733) +test rust_compile::compile_small_full ... bench: 341,554 ns/iter (+/- 9,774) +test sherlock::before_after_holmes ... bench: 907,419 ns/iter (+/- 11,645) = 655 MB/s +test sherlock::before_holmes ... bench: 62,036 ns/iter (+/- 1,854) = 9590 MB/s +test sherlock::everything_greedy ... bench: 2,072,694 ns/iter (+/- 45,192) = 287 MB/s +test sherlock::everything_greedy_nl ... bench: 884,483 ns/iter (+/- 25,710) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 103,873 ns/iter (+/- 1,310) = 5727 MB/s +test sherlock::holmes_coword_watson ... bench: 481,491 ns/iter (+/- 11,516) = 1235 MB/s +test sherlock::ing_suffix ... bench: 323,119 ns/iter (+/- 7,438) = 1841 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,293 ns/iter (+/- 18,661) = 557 MB/s +test sherlock::letters ... bench: 21,732,526 ns/iter (+/- 253,563) = 27 MB/s +test sherlock::letters_lower ... bench: 21,187,465 ns/iter (+/- 191,023) = 28 MB/s +test sherlock::letters_upper ... bench: 1,766,003 ns/iter (+/- 17,494) = 336 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,387 ns/iter (+/- 26,674) = 662 MB/s +test sherlock::name_alt1 ... bench: 34,183 ns/iter (+/- 885) = 17404 MB/s +test sherlock::name_alt2 ... bench: 87,151 ns/iter (+/- 2,139) = 6826 MB/s +test sherlock::name_alt3 ... bench: 99,293 ns/iter (+/- 1,938) = 5991 MB/s +test sherlock::name_alt3_nocase ... bench: 379,228 ns/iter (+/- 22,539) = 1568 MB/s +test sherlock::name_alt4 ... bench: 123,040 ns/iter (+/- 2,676) = 4835 MB/s +test sherlock::name_alt4_nocase ... bench: 186,045 ns/iter (+/- 403) = 3197 MB/s +test sherlock::name_alt5 ... bench: 91,679 ns/iter (+/- 2,543) = 6489 MB/s +test sherlock::name_alt5_nocase ... bench: 343,668 ns/iter (+/- 6,807) = 1731 MB/s +test sherlock::name_holmes ... bench: 33,802 ns/iter (+/- 936) = 17600 MB/s +test sherlock::name_holmes_nocase ... bench: 136,208 ns/iter (+/- 4,317) = 4367 MB/s +test sherlock::name_sherlock ... bench: 22,534 ns/iter (+/- 462) = 26401 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,514 ns/iter (+/- 697) = 26425 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,796 ns/iter (+/- 2,037) = 6083 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,809 ns/iter (+/- 1,538) = 6209 MB/s +test sherlock::name_whitespace ... bench: 30,959 ns/iter (+/- 968) = 19216 MB/s +test sherlock::no_match_common ... bench: 19,568 ns/iter (+/- 616) = 30403 MB/s +test sherlock::no_match_really_common ... bench: 26,273 ns/iter (+/- 1,143) = 22644 MB/s +test sherlock::no_match_uncommon ... bench: 19,643 ns/iter (+/- 496) = 30287 MB/s +test sherlock::quotes ... bench: 371,876 ns/iter (+/- 2,494) = 1599 MB/s +test sherlock::repeated_class_negation ... bench: 76,963,104 ns/iter (+/- 277,311) = 7 MB/s +test sherlock::the_lower ... bench: 331,250 ns/iter (+/- 8,588) = 1796 MB/s +test sherlock::the_nocase ... bench: 516,528 ns/iter (+/- 40,826) = 1151 MB/s +test sherlock::the_upper ... bench: 44,206 ns/iter (+/- 1,277) = 13458 MB/s +test sherlock::the_whitespace ... bench: 822,577 ns/iter (+/- 23,649) = 723 MB/s +test sherlock::word_ending_n ... bench: 1,685,110 ns/iter (+/- 34,615) = 353 MB/s +test sherlock::words ... bench: 8,333,499 ns/iter (+/- 152,757) = 71 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 124.94s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log new file mode 100644 index 0000000..470e09b --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 1) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 54 ns/iter (+/- 2) = 19462 MB/s +test misc::easy0_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::easy0_32 ... bench: 51 ns/iter (+/- 1) = 1156 MB/s +test misc::easy0_32K ... bench: 51 ns/iter (+/- 2) = 643039 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 1) = 25463 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 1) = 23831727 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 2) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 52 ns/iter (+/- 1) = 20211 MB/s +test misc::hard_1MB ... bench: 57 ns/iter (+/- 0) = 18396543 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 0) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 3) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 14 ns/iter (+/- 0) = 3642 MB/s +test misc::long_needle1 ... bench: 3,249 ns/iter (+/- 87) = 30779 MB/s +test misc::long_needle2 ... bench: 350,559 ns/iter (+/- 7,154) = 285 MB/s +test misc::match_class ... bench: 61 ns/iter (+/- 4) = 1327 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::matches_set ... bench: 401 ns/iter (+/- 17) = 62 MB/s +test misc::medium_1K ... bench: 53 ns/iter (+/- 0) = 19849 MB/s +test misc::medium_1MB ... bench: 58 ns/iter (+/- 0) = 18079379 MB/s +test misc::medium_32 ... bench: 53 ns/iter (+/- 0) = 1132 MB/s +test misc::medium_32K ... bench: 53 ns/iter (+/- 2) = 618792 MB/s +test misc::no_exponential ... bench: 421 ns/iter (+/- 8) = 237 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 53 ns/iter (+/- 1) = 490 MB/s +test misc::one_pass_long_prefix_not ... bench: 53 ns/iter (+/- 0) = 490 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 0) = 447 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 3) = 404 MB/s +test misc::reallyhard2_1K ... bench: 77 ns/iter (+/- 1) = 13506 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,759 ns/iter (+/- 49,997) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 2) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,326 ns/iter (+/- 1,055) = 664 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,161 ns/iter (+/- 94) = 1922 MB/s +test regexdna::find_new_lines ... bench: 12,344,799 ns/iter (+/- 188,054) = 411 MB/s +test regexdna::subst1 ... bench: 780,449 ns/iter (+/- 14,474) = 6513 MB/s +test regexdna::subst10 ... bench: 795,203 ns/iter (+/- 40,742) = 6392 MB/s +test regexdna::subst11 ... bench: 816,444 ns/iter (+/- 23,334) = 6226 MB/s +test regexdna::subst2 ... bench: 777,546 ns/iter (+/- 19,625) = 6537 MB/s +test regexdna::subst3 ... bench: 783,295 ns/iter (+/- 8,266) = 6489 MB/s +test regexdna::subst4 ... bench: 775,154 ns/iter (+/- 21,350) = 6557 MB/s +test regexdna::subst5 ... bench: 781,414 ns/iter (+/- 21,057) = 6505 MB/s +test regexdna::subst6 ... bench: 783,595 ns/iter (+/- 23,835) = 6487 MB/s +test regexdna::subst7 ... bench: 821,620 ns/iter (+/- 46,131) = 6187 MB/s +test regexdna::subst8 ... bench: 818,402 ns/iter (+/- 21,350) = 6211 MB/s +test regexdna::subst9 ... bench: 779,115 ns/iter (+/- 21,335) = 6524 MB/s +test regexdna::variant1 ... bench: 2,189,308 ns/iter (+/- 32,528) = 2321 MB/s +test regexdna::variant2 ... bench: 3,217,478 ns/iter (+/- 36,011) = 1579 MB/s +test regexdna::variant3 ... bench: 3,771,330 ns/iter (+/- 74,944) = 1347 MB/s +test regexdna::variant4 ... bench: 3,787,593 ns/iter (+/- 37,825) = 1342 MB/s +test regexdna::variant5 ... bench: 2,669,799 ns/iter (+/- 69,777) = 1904 MB/s +test regexdna::variant6 ... bench: 2,651,559 ns/iter (+/- 33,895) = 1917 MB/s +test regexdna::variant7 ... bench: 3,222,991 ns/iter (+/- 41,014) = 1577 MB/s +test regexdna::variant8 ... bench: 3,298,048 ns/iter (+/- 41,331) = 1541 MB/s +test regexdna::variant9 ... bench: 3,218,486 ns/iter (+/- 50,318) = 1579 MB/s +test rust_compile::compile_huge ... bench: 100,031 ns/iter (+/- 3,464) +test rust_compile::compile_huge_bytes ... bench: 5,885,102 ns/iter (+/- 130,016) +test rust_compile::compile_huge_full ... bench: 11,641,251 ns/iter (+/- 147,700) +test rust_compile::compile_simple ... bench: 4,263 ns/iter (+/- 116) +test rust_compile::compile_simple_bytes ... bench: 4,236 ns/iter (+/- 91) +test rust_compile::compile_simple_full ... bench: 22,349 ns/iter (+/- 2,085) +test rust_compile::compile_small ... bench: 9,537 ns/iter (+/- 298) +test rust_compile::compile_small_bytes ... bench: 178,561 ns/iter (+/- 3,796) +test rust_compile::compile_small_full ... bench: 363,343 ns/iter (+/- 9,481) +test sherlock::before_after_holmes ... bench: 907,022 ns/iter (+/- 19,133) = 655 MB/s +test sherlock::before_holmes ... bench: 63,729 ns/iter (+/- 1,830) = 9335 MB/s +test sherlock::everything_greedy ... bench: 2,181,593 ns/iter (+/- 46,002) = 272 MB/s +test sherlock::everything_greedy_nl ... bench: 884,811 ns/iter (+/- 26,211) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 105,610 ns/iter (+/- 3,120) = 5633 MB/s +test sherlock::holmes_coword_watson ... bench: 480,986 ns/iter (+/- 13,228) = 1236 MB/s +test sherlock::ing_suffix ... bench: 322,921 ns/iter (+/- 3,555) = 1842 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,065,372 ns/iter (+/- 21,242) = 558 MB/s +test sherlock::letters ... bench: 22,109,015 ns/iter (+/- 146,243) = 26 MB/s +test sherlock::letters_lower ... bench: 21,686,153 ns/iter (+/- 206,041) = 27 MB/s +test sherlock::letters_upper ... bench: 1,778,225 ns/iter (+/- 25,935) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,355 ns/iter (+/- 26,781) = 662 MB/s +test sherlock::name_alt1 ... bench: 31,927 ns/iter (+/- 633) = 18634 MB/s +test sherlock::name_alt2 ... bench: 87,040 ns/iter (+/- 1,859) = 6835 MB/s +test sherlock::name_alt3 ... bench: 97,715 ns/iter (+/- 2,109) = 6088 MB/s +test sherlock::name_alt3_nocase ... bench: 944,955 ns/iter (+/- 26,503) = 629 MB/s +test sherlock::name_alt4 ... bench: 120,935 ns/iter (+/- 2,399) = 4919 MB/s +test sherlock::name_alt4_nocase ... bench: 228,597 ns/iter (+/- 7,137) = 2602 MB/s +test sherlock::name_alt5 ... bench: 91,174 ns/iter (+/- 1,096) = 6525 MB/s +test sherlock::name_alt5_nocase ... bench: 937,189 ns/iter (+/- 23,839) = 634 MB/s +test sherlock::name_holmes ... bench: 34,020 ns/iter (+/- 752) = 17487 MB/s +test sherlock::name_holmes_nocase ... bench: 117,194 ns/iter (+/- 3,444) = 5076 MB/s +test sherlock::name_sherlock ... bench: 22,557 ns/iter (+/- 388) = 26374 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,428 ns/iter (+/- 683) = 26526 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 99,637 ns/iter (+/- 636) = 5971 MB/s +test sherlock::name_sherlock_nocase ... bench: 97,895 ns/iter (+/- 1,875) = 6077 MB/s +test sherlock::name_whitespace ... bench: 30,772 ns/iter (+/- 1,591) = 19333 MB/s +test sherlock::no_match_common ... bench: 19,665 ns/iter (+/- 296) = 30253 MB/s +test sherlock::no_match_really_common ... bench: 27,403 ns/iter (+/- 2,507) = 21710 MB/s +test sherlock::no_match_uncommon ... bench: 19,601 ns/iter (+/- 293) = 30352 MB/s +test sherlock::quotes ... bench: 370,323 ns/iter (+/- 1,345) = 1606 MB/s +test sherlock::repeated_class_negation ... bench: 68,414,794 ns/iter (+/- 342,428) = 8 MB/s +test sherlock::the_lower ... bench: 327,767 ns/iter (+/- 5,493) = 1815 MB/s +test sherlock::the_nocase ... bench: 507,818 ns/iter (+/- 1,796) = 1171 MB/s +test sherlock::the_upper ... bench: 45,045 ns/iter (+/- 1,400) = 13207 MB/s +test sherlock::the_whitespace ... bench: 822,080 ns/iter (+/- 16,581) = 723 MB/s +test sherlock::word_ending_n ... bench: 1,690,084 ns/iter (+/- 40,361) = 352 MB/s +test sherlock::words ... bench: 8,573,617 ns/iter (+/- 143,313) = 69 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 110.03s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log new file mode 100644 index 0000000..7016e3c --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 1) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 14 ns/iter (+/- 0) = 75071 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 0) = 49933476 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 0) = 4214 MB/s +test misc::easy0_32K ... bench: 14 ns/iter (+/- 0) = 2342500 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 0) = 25463 MB/s +test misc::easy1_1MB ... bench: 48 ns/iter (+/- 0) = 21845750 MB/s +test misc::easy1_32 ... bench: 41 ns/iter (+/- 0) = 1268 MB/s +test misc::easy1_32K ... bench: 41 ns/iter (+/- 1) = 799707 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 1) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 2) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 6) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 62 ns/iter (+/- 2) = 403 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 2,825 ns/iter (+/- 57) = 35398 MB/s +test misc::long_needle2 ... bench: 350,755 ns/iter (+/- 11,905) = 285 MB/s +test misc::match_class ... bench: 64 ns/iter (+/- 1) = 1265 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 0) = 6230 MB/s +test misc::matches_set ... bench: 422 ns/iter (+/- 12) = 59 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 21 ns/iter (+/- 0) = 49933523 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 14 ns/iter (+/- 0) = 2342571 MB/s +test misc::no_exponential ... bench: 443 ns/iter (+/- 12) = 225 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 1) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 40 ns/iter (+/- 1) = 425 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 0) = 404 MB/s +test misc::reallyhard2_1K ... bench: 80 ns/iter (+/- 0) = 13000 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,789 ns/iter (+/- 34,236) = 665 MB/s +test misc::reallyhard_32 ... bench: 101 ns/iter (+/- 2) = 584 MB/s +test misc::reallyhard_32K ... bench: 49,321 ns/iter (+/- 2,718) = 664 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,158 ns/iter (+/- 93) = 1924 MB/s +test regexdna::find_new_lines ... bench: 12,391,732 ns/iter (+/- 180,913) = 410 MB/s +test regexdna::subst1 ... bench: 781,690 ns/iter (+/- 29,637) = 6503 MB/s +test regexdna::subst10 ... bench: 778,306 ns/iter (+/- 22,706) = 6531 MB/s +test regexdna::subst11 ... bench: 777,716 ns/iter (+/- 24,635) = 6536 MB/s +test regexdna::subst2 ... bench: 791,786 ns/iter (+/- 15,778) = 6420 MB/s +test regexdna::subst3 ... bench: 783,470 ns/iter (+/- 25,543) = 6488 MB/s +test regexdna::subst4 ... bench: 814,902 ns/iter (+/- 14,146) = 6238 MB/s +test regexdna::subst5 ... bench: 781,464 ns/iter (+/- 19,532) = 6504 MB/s +test regexdna::subst6 ... bench: 780,116 ns/iter (+/- 16,558) = 6516 MB/s +test regexdna::subst7 ... bench: 795,982 ns/iter (+/- 11,254) = 6386 MB/s +test regexdna::subst8 ... bench: 781,746 ns/iter (+/- 24,996) = 6502 MB/s +test regexdna::subst9 ... bench: 783,793 ns/iter (+/- 14,943) = 6485 MB/s +test regexdna::variant1 ... bench: 2,188,940 ns/iter (+/- 42,308) = 2322 MB/s +test regexdna::variant2 ... bench: 3,218,011 ns/iter (+/- 50,700) = 1579 MB/s +test regexdna::variant3 ... bench: 3,778,907 ns/iter (+/- 90,543) = 1345 MB/s +test regexdna::variant4 ... bench: 3,803,852 ns/iter (+/- 68,319) = 1336 MB/s +test regexdna::variant5 ... bench: 2,660,949 ns/iter (+/- 55,488) = 1910 MB/s +test regexdna::variant6 ... bench: 2,647,131 ns/iter (+/- 26,846) = 1920 MB/s +test regexdna::variant7 ... bench: 3,235,032 ns/iter (+/- 37,599) = 1571 MB/s +test regexdna::variant8 ... bench: 3,305,124 ns/iter (+/- 67,109) = 1538 MB/s +test regexdna::variant9 ... bench: 3,231,033 ns/iter (+/- 55,626) = 1573 MB/s +test rust_compile::compile_huge ... bench: 99,387 ns/iter (+/- 2,366) +test rust_compile::compile_huge_bytes ... bench: 5,865,693 ns/iter (+/- 62,255) +test rust_compile::compile_huge_full ... bench: 11,752,845 ns/iter (+/- 195,440) +test rust_compile::compile_simple ... bench: 4,117 ns/iter (+/- 141) +test rust_compile::compile_simple_bytes ... bench: 4,162 ns/iter (+/- 67) +test rust_compile::compile_simple_full ... bench: 19,955 ns/iter (+/- 622) +test rust_compile::compile_small ... bench: 9,140 ns/iter (+/- 112) +test rust_compile::compile_small_bytes ... bench: 165,990 ns/iter (+/- 5,876) +test rust_compile::compile_small_full ... bench: 342,897 ns/iter (+/- 13,730) +test sherlock::before_after_holmes ... bench: 906,789 ns/iter (+/- 13,931) = 656 MB/s +test sherlock::before_holmes ... bench: 62,319 ns/iter (+/- 790) = 9546 MB/s +test sherlock::everything_greedy ... bench: 2,175,424 ns/iter (+/- 47,720) = 273 MB/s +test sherlock::everything_greedy_nl ... bench: 884,406 ns/iter (+/- 22,679) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 105,261 ns/iter (+/- 3,536) = 5651 MB/s +test sherlock::holmes_coword_watson ... bench: 479,524 ns/iter (+/- 7,749) = 1240 MB/s +test sherlock::ing_suffix ... bench: 321,401 ns/iter (+/- 9,123) = 1851 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,069,722 ns/iter (+/- 16,366) = 556 MB/s +test sherlock::letters ... bench: 21,959,896 ns/iter (+/- 204,695) = 27 MB/s +test sherlock::letters_lower ... bench: 21,462,457 ns/iter (+/- 207,449) = 27 MB/s +test sherlock::letters_upper ... bench: 1,768,026 ns/iter (+/- 41,459) = 336 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,197 ns/iter (+/- 14,349) = 663 MB/s +test sherlock::name_alt1 ... bench: 34,037 ns/iter (+/- 719) = 17479 MB/s +test sherlock::name_alt2 ... bench: 86,788 ns/iter (+/- 1,203) = 6855 MB/s +test sherlock::name_alt3 ... bench: 98,225 ns/iter (+/- 1,589) = 6056 MB/s +test sherlock::name_alt3_nocase ... bench: 377,597 ns/iter (+/- 14,840) = 1575 MB/s +test sherlock::name_alt4 ... bench: 122,440 ns/iter (+/- 8,123) = 4858 MB/s +test sherlock::name_alt4_nocase ... bench: 187,282 ns/iter (+/- 5,176) = 3176 MB/s +test sherlock::name_alt5 ... bench: 91,429 ns/iter (+/- 1,944) = 6507 MB/s +test sherlock::name_alt5_nocase ... bench: 348,111 ns/iter (+/- 12,721) = 1709 MB/s +test sherlock::name_holmes ... bench: 33,547 ns/iter (+/- 1,119) = 17734 MB/s +test sherlock::name_holmes_nocase ... bench: 132,342 ns/iter (+/- 3,974) = 4495 MB/s +test sherlock::name_sherlock ... bench: 22,562 ns/iter (+/- 364) = 26368 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,313 ns/iter (+/- 579) = 26663 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,556 ns/iter (+/- 2,092) = 6098 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,917 ns/iter (+/- 4,054) = 6202 MB/s +test sherlock::name_whitespace ... bench: 30,997 ns/iter (+/- 1,039) = 19193 MB/s +test sherlock::no_match_common ... bench: 19,690 ns/iter (+/- 378) = 30214 MB/s +test sherlock::no_match_really_common ... bench: 27,629 ns/iter (+/- 465) = 21532 MB/s +test sherlock::no_match_uncommon ... bench: 19,681 ns/iter (+/- 291) = 30228 MB/s +test sherlock::quotes ... bench: 368,290 ns/iter (+/- 1,508) = 1615 MB/s +test sherlock::repeated_class_negation ... bench: 73,004,024 ns/iter (+/- 1,040,743) = 8 MB/s +test sherlock::the_lower ... bench: 320,929 ns/iter (+/- 12,287) = 1853 MB/s +test sherlock::the_nocase ... bench: 514,946 ns/iter (+/- 11,241) = 1155 MB/s +test sherlock::the_upper ... bench: 43,816 ns/iter (+/- 1,719) = 13577 MB/s +test sherlock::the_whitespace ... bench: 825,245 ns/iter (+/- 20,797) = 720 MB/s +test sherlock::word_ending_n ... bench: 1,676,908 ns/iter (+/- 40,650) = 354 MB/s +test sherlock::words ... bench: 8,449,099 ns/iter (+/- 123,842) = 70 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 128.47s + diff --git a/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust new file mode 100644 index 0000000..aed99af --- /dev/null +++ b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 1) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 1) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 1) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 8 ns/iter (+/- 1) = 6375 MB/s +test misc::long_needle1 ... bench: 1,785 ns/iter (+/- 1) = 56022 MB/s +test misc::long_needle2 ... bench: 193,595 ns/iter (+/- 1,486) = 516 MB/s +test misc::match_class ... bench: 37 ns/iter (+/- 1) = 2189 MB/s +test misc::match_class_in_range ... bench: 8 ns/iter (+/- 0) = 10125 MB/s +test misc::match_class_unicode ... bench: 181 ns/iter (+/- 1) = 889 MB/s +test misc::matches_set ... bench: 216 ns/iter (+/- 9) = 115 MB/s +test misc::medium_1K ... bench: 7 ns/iter (+/- 0) = 150285 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 1) = 87383666 MB/s +test misc::medium_32 ... bench: 7 ns/iter (+/- 0) = 8571 MB/s +test misc::medium_32K ... bench: 7 ns/iter (+/- 0) = 4685142 MB/s +test misc::no_exponential ... bench: 283 ns/iter (+/- 7) = 353 MB/s +test misc::not_literal ... bench: 53 ns/iter (+/- 1) = 962 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 41 ns/iter (+/- 0) = 25365 MB/s +test misc::reallyhard_1K ... bench: 1,208 ns/iter (+/- 2) = 870 MB/s +test misc::reallyhard_1MB ... bench: 1,213,959 ns/iter (+/- 7,198) = 863 MB/s +test misc::reallyhard_32 ... bench: 62 ns/iter (+/- 0) = 951 MB/s +test misc::reallyhard_32K ... bench: 38,231 ns/iter (+/- 354) = 857 MB/s +test misc::replace_all ... bench: 86 ns/iter (+/- 3) +test misc::reverse_suffix_no_quadratic ... bench: 2,351 ns/iter (+/- 31) = 3402 MB/s +test misc::short_haystack_1000000x ... bench: 91,018 ns/iter (+/- 203) = 87894 MB/s +test misc::short_haystack_100000x ... bench: 9,277 ns/iter (+/- 40) = 86235 MB/s +test misc::short_haystack_10000x ... bench: 2,863 ns/iter (+/- 4) = 27946 MB/s +test misc::short_haystack_1000x ... bench: 201 ns/iter (+/- 3) = 39855 MB/s +test misc::short_haystack_100x ... bench: 100 ns/iter (+/- 2) = 8110 MB/s +test misc::short_haystack_10x ... bench: 88 ns/iter (+/- 0) = 1034 MB/s +test misc::short_haystack_1x ... bench: 86 ns/iter (+/- 1) = 220 MB/s +test misc::short_haystack_2x ... bench: 87 ns/iter (+/- 0) = 310 MB/s +test misc::short_haystack_3x ... bench: 88 ns/iter (+/- 1) = 397 MB/s +test misc::short_haystack_4x ... bench: 88 ns/iter (+/- 1) = 488 MB/s +test regexdna::find_new_lines ... bench: 7,348,651 ns/iter (+/- 40,559) = 691 MB/s +test regexdna::subst1 ... bench: 493,624 ns/iter (+/- 10,315) = 10298 MB/s +test regexdna::subst10 ... bench: 489,573 ns/iter (+/- 18,151) = 10383 MB/s +test regexdna::subst11 ... bench: 492,501 ns/iter (+/- 11,650) = 10321 MB/s +test regexdna::subst2 ... bench: 492,283 ns/iter (+/- 12,363) = 10326 MB/s +test regexdna::subst3 ... bench: 496,795 ns/iter (+/- 20,704) = 10232 MB/s +test regexdna::subst4 ... bench: 489,245 ns/iter (+/- 10,289) = 10390 MB/s +test regexdna::subst5 ... bench: 499,701 ns/iter (+/- 11,359) = 10172 MB/s +test regexdna::subst6 ... bench: 490,460 ns/iter (+/- 8,758) = 10364 MB/s +test regexdna::subst7 ... bench: 496,398 ns/iter (+/- 18,774) = 10240 MB/s +test regexdna::subst8 ... bench: 497,077 ns/iter (+/- 24,767) = 10226 MB/s +test regexdna::subst9 ... bench: 496,763 ns/iter (+/- 12,477) = 10233 MB/s +test regexdna::variant1 ... bench: 1,454,747 ns/iter (+/- 48,995) = 3494 MB/s +test regexdna::variant2 ... bench: 2,311,001 ns/iter (+/- 63,347) = 2199 MB/s +test regexdna::variant3 ... bench: 2,832,483 ns/iter (+/- 33,976) = 1794 MB/s +test regexdna::variant4 ... bench: 2,796,710 ns/iter (+/- 56,279) = 1817 MB/s +test regexdna::variant5 ... bench: 1,708,634 ns/iter (+/- 25,749) = 2975 MB/s +test regexdna::variant6 ... bench: 1,706,259 ns/iter (+/- 22,151) = 2979 MB/s +test regexdna::variant7 ... bench: 2,400,436 ns/iter (+/- 24,655) = 2117 MB/s +test regexdna::variant8 ... bench: 2,413,765 ns/iter (+/- 50,326) = 2106 MB/s +test regexdna::variant9 ... bench: 2,402,528 ns/iter (+/- 26,150) = 2115 MB/s +test rust_compile::compile_huge ... bench: 51,936 ns/iter (+/- 834) +test rust_compile::compile_huge_bytes ... bench: 3,294,633 ns/iter (+/- 40,585) +test rust_compile::compile_huge_full ... bench: 6,323,294 ns/iter (+/- 66,684) +test rust_compile::compile_simple ... bench: 1,992 ns/iter (+/- 25) +test rust_compile::compile_simple_bytes ... bench: 2,004 ns/iter (+/- 20) +test rust_compile::compile_simple_full ... bench: 9,697 ns/iter (+/- 68) +test rust_compile::compile_small ... bench: 4,261 ns/iter (+/- 72) +test rust_compile::compile_small_bytes ... bench: 83,908 ns/iter (+/- 1,405) +test rust_compile::compile_small_full ... bench: 166,152 ns/iter (+/- 3,508) +test sherlock::before_after_holmes ... bench: 699,767 ns/iter (+/- 6,201) = 850 MB/s +test sherlock::before_holmes ... bench: 29,284 ns/iter (+/- 573) = 20315 MB/s +test sherlock::everything_greedy ... bench: 1,070,812 ns/iter (+/- 18,795) = 555 MB/s +test sherlock::everything_greedy_nl ... bench: 445,517 ns/iter (+/- 7,760) = 1335 MB/s +test sherlock::holmes_cochar_watson ... bench: 43,459 ns/iter (+/- 901) = 13689 MB/s +test sherlock::holmes_coword_watson ... bench: 335,772 ns/iter (+/- 6,348) = 1771 MB/s +test sherlock::ing_suffix ... bench: 153,546 ns/iter (+/- 3,075) = 3874 MB/s +test sherlock::ing_suffix_limited_space ... bench: 777,388 ns/iter (+/- 8,447) = 765 MB/s +test sherlock::letters ... bench: 10,123,374 ns/iter (+/- 90,059) = 58 MB/s +test sherlock::letters_lower ... bench: 9,957,916 ns/iter (+/- 63,766) = 59 MB/s +test sherlock::letters_upper ... bench: 1,123,119 ns/iter (+/- 17,972) = 529 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 694,714 ns/iter (+/- 7,006) = 856 MB/s +test sherlock::name_alt1 ... bench: 13,427 ns/iter (+/- 331) = 44308 MB/s +test sherlock::name_alt2 ... bench: 33,171 ns/iter (+/- 1,029) = 17935 MB/s +test sherlock::name_alt3 ... bench: 36,816 ns/iter (+/- 1,138) = 16159 MB/s +test sherlock::name_alt3_nocase ... bench: 221,185 ns/iter (+/- 3,268) = 2689 MB/s +test sherlock::name_alt4 ... bench: 49,883 ns/iter (+/- 1,150) = 11926 MB/s +test sherlock::name_alt4_nocase ... bench: 74,967 ns/iter (+/- 1,807) = 7935 MB/s +test sherlock::name_alt5 ... bench: 34,675 ns/iter (+/- 1,335) = 17157 MB/s +test sherlock::name_alt5_nocase ... bench: 192,109 ns/iter (+/- 6,194) = 3096 MB/s +test sherlock::name_holmes ... bench: 18,355 ns/iter (+/- 389) = 32412 MB/s +test sherlock::name_holmes_nocase ... bench: 58,179 ns/iter (+/- 917) = 10225 MB/s +test sherlock::name_sherlock ... bench: 14,307 ns/iter (+/- 74) = 41583 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,332 ns/iter (+/- 144) = 41510 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,336 ns/iter (+/- 736) = 14392 MB/s +test sherlock::name_sherlock_nocase ... bench: 40,029 ns/iter (+/- 1,393) = 14862 MB/s +test sherlock::name_whitespace ... bench: 17,807 ns/iter (+/- 105) = 33410 MB/s +test sherlock::no_match_common ... bench: 13,625 ns/iter (+/- 15) = 43664 MB/s +test sherlock::no_match_really_common ... bench: 13,818 ns/iter (+/- 282) = 43054 MB/s +test sherlock::no_match_uncommon ... bench: 13,628 ns/iter (+/- 27) = 43655 MB/s +test sherlock::quotes ... bench: 232,910 ns/iter (+/- 1,883) = 2554 MB/s +test sherlock::repeated_class_negation ... bench: 36,892,964 ns/iter (+/- 629,538) = 16 MB/s +test sherlock::the_lower ... bench: 203,077 ns/iter (+/- 2,574) = 2929 MB/s +test sherlock::the_nocase ... bench: 290,781 ns/iter (+/- 6,597) = 2045 MB/s +test sherlock::the_upper ... bench: 22,731 ns/iter (+/- 439) = 26172 MB/s +test sherlock::the_whitespace ... bench: 423,983 ns/iter (+/- 10,849) = 1403 MB/s +test sherlock::word_ending_n ... bench: 1,109,013 ns/iter (+/- 12,645) = 536 MB/s +test sherlock::words ... bench: 4,529,451 ns/iter (+/- 44,285) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 164.08s + diff --git a/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes new file mode 100644 index 0000000..e9f750e --- /dev/null +++ b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 0) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 0) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 0) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 2,186 ns/iter (+/- 19) = 45746 MB/s +test misc::long_needle2 ... bench: 210,378 ns/iter (+/- 61,574) = 475 MB/s +test misc::match_class ... bench: 39 ns/iter (+/- 1) = 2076 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 176 ns/iter (+/- 12) = 142 MB/s +test misc::medium_1K ... bench: 8 ns/iter (+/- 0) = 131500 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 8 ns/iter (+/- 0) = 4099500 MB/s +test misc::no_exponential ... bench: 274 ns/iter (+/- 7) = 364 MB/s +test misc::not_literal ... bench: 53 ns/iter (+/- 0) = 962 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 1) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 5) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,220 ns/iter (+/- 15) = 861 MB/s +test misc::reallyhard_1MB ... bench: 1,215,297 ns/iter (+/- 5,229) = 862 MB/s +test misc::reallyhard_32 ... bench: 63 ns/iter (+/- 1) = 936 MB/s +test misc::reallyhard_32K ... bench: 38,164 ns/iter (+/- 232) = 859 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,353 ns/iter (+/- 8) = 3399 MB/s +test regexdna::find_new_lines ... bench: 7,346,276 ns/iter (+/- 46,149) = 691 MB/s +test regexdna::subst1 ... bench: 486,203 ns/iter (+/- 21,159) = 10455 MB/s +test regexdna::subst10 ... bench: 494,356 ns/iter (+/- 6,423) = 10282 MB/s +test regexdna::subst11 ... bench: 481,930 ns/iter (+/- 19,639) = 10548 MB/s +test regexdna::subst2 ... bench: 486,672 ns/iter (+/- 22,184) = 10445 MB/s +test regexdna::subst3 ... bench: 487,152 ns/iter (+/- 19,776) = 10434 MB/s +test regexdna::subst4 ... bench: 486,534 ns/iter (+/- 23,897) = 10448 MB/s +test regexdna::subst5 ... bench: 481,412 ns/iter (+/- 26,310) = 10559 MB/s +test regexdna::subst6 ... bench: 479,498 ns/iter (+/- 20,310) = 10601 MB/s +test regexdna::subst7 ... bench: 481,960 ns/iter (+/- 18,492) = 10547 MB/s +test regexdna::subst8 ... bench: 482,282 ns/iter (+/- 22,522) = 10540 MB/s +test regexdna::subst9 ... bench: 489,224 ns/iter (+/- 25,264) = 10390 MB/s +test regexdna::variant1 ... bench: 1,470,068 ns/iter (+/- 65,563) = 3457 MB/s +test regexdna::variant2 ... bench: 2,298,112 ns/iter (+/- 27,688) = 2211 MB/s +test regexdna::variant3 ... bench: 2,818,539 ns/iter (+/- 31,432) = 1803 MB/s +test regexdna::variant4 ... bench: 2,786,226 ns/iter (+/- 30,699) = 1824 MB/s +test regexdna::variant5 ... bench: 1,716,429 ns/iter (+/- 20,264) = 2961 MB/s +test regexdna::variant6 ... bench: 1,719,420 ns/iter (+/- 23,944) = 2956 MB/s +test regexdna::variant7 ... bench: 2,391,022 ns/iter (+/- 23,192) = 2126 MB/s +test regexdna::variant8 ... bench: 2,418,744 ns/iter (+/- 44,152) = 2101 MB/s +test regexdna::variant9 ... bench: 2,400,918 ns/iter (+/- 24,041) = 2117 MB/s +test rust_compile::compile_huge ... bench: 57,745 ns/iter (+/- 816) +test rust_compile::compile_huge_bytes ... bench: 3,346,952 ns/iter (+/- 39,488) +test rust_compile::compile_huge_full ... bench: 6,344,293 ns/iter (+/- 53,114) +test rust_compile::compile_simple ... bench: 2,040 ns/iter (+/- 32) +test rust_compile::compile_simple_bytes ... bench: 2,010 ns/iter (+/- 34) +test rust_compile::compile_simple_full ... bench: 9,632 ns/iter (+/- 464) +test rust_compile::compile_small ... bench: 4,445 ns/iter (+/- 77) +test rust_compile::compile_small_bytes ... bench: 83,791 ns/iter (+/- 1,929) +test rust_compile::compile_small_full ... bench: 164,948 ns/iter (+/- 2,595) +test sherlock::before_after_holmes ... bench: 699,996 ns/iter (+/- 6,647) = 849 MB/s +test sherlock::before_holmes ... bench: 28,208 ns/iter (+/- 233) = 21090 MB/s +test sherlock::everything_greedy ... bench: 1,033,048 ns/iter (+/- 9,790) = 575 MB/s +test sherlock::everything_greedy_nl ... bench: 424,081 ns/iter (+/- 22,574) = 1402 MB/s +test sherlock::holmes_cochar_watson ... bench: 43,131 ns/iter (+/- 827) = 13793 MB/s +test sherlock::holmes_coword_watson ... bench: 336,678 ns/iter (+/- 6,985) = 1767 MB/s +test sherlock::ing_suffix ... bench: 153,589 ns/iter (+/- 3,193) = 3873 MB/s +test sherlock::ing_suffix_limited_space ... bench: 776,911 ns/iter (+/- 8,815) = 765 MB/s +test sherlock::letters ... bench: 10,056,702 ns/iter (+/- 49,688) = 59 MB/s +test sherlock::letters_lower ... bench: 9,900,568 ns/iter (+/- 76,118) = 60 MB/s +test sherlock::letters_upper ... bench: 1,120,456 ns/iter (+/- 13,538) = 530 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,727 ns/iter (+/- 6,840) = 857 MB/s +test sherlock::name_alt1 ... bench: 11,101 ns/iter (+/- 65) = 53592 MB/s +test sherlock::name_alt2 ... bench: 34,003 ns/iter (+/- 966) = 17496 MB/s +test sherlock::name_alt3 ... bench: 37,975 ns/iter (+/- 1,313) = 15666 MB/s +test sherlock::name_alt3_nocase ... bench: 214,299 ns/iter (+/- 3,026) = 2776 MB/s +test sherlock::name_alt4 ... bench: 50,551 ns/iter (+/- 1,377) = 11768 MB/s +test sherlock::name_alt4_nocase ... bench: 74,713 ns/iter (+/- 1,359) = 7962 MB/s +test sherlock::name_alt5 ... bench: 35,426 ns/iter (+/- 625) = 16793 MB/s +test sherlock::name_alt5_nocase ... bench: 190,521 ns/iter (+/- 4,903) = 3122 MB/s +test sherlock::name_holmes ... bench: 18,070 ns/iter (+/- 763) = 32923 MB/s +test sherlock::name_holmes_nocase ... bench: 58,454 ns/iter (+/- 1,228) = 10177 MB/s +test sherlock::name_sherlock ... bench: 14,380 ns/iter (+/- 227) = 41372 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,491 ns/iter (+/- 116) = 41055 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 40,722 ns/iter (+/- 231) = 14609 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,937 ns/iter (+/- 623) = 14896 MB/s +test sherlock::name_whitespace ... bench: 17,979 ns/iter (+/- 140) = 33090 MB/s +test sherlock::no_match_common ... bench: 13,650 ns/iter (+/- 112) = 43584 MB/s +test sherlock::no_match_really_common ... bench: 13,623 ns/iter (+/- 295) = 43671 MB/s +test sherlock::no_match_uncommon ... bench: 13,641 ns/iter (+/- 55) = 43613 MB/s +test sherlock::quotes ... bench: 232,451 ns/iter (+/- 6,555) = 2559 MB/s +test sherlock::repeated_class_negation ... bench: 36,984,199 ns/iter (+/- 623,153) = 16 MB/s +test sherlock::the_lower ... bench: 189,502 ns/iter (+/- 4,870) = 3139 MB/s +test sherlock::the_nocase ... bench: 294,945 ns/iter (+/- 9,381) = 2017 MB/s +test sherlock::the_upper ... bench: 21,591 ns/iter (+/- 680) = 27554 MB/s +test sherlock::the_whitespace ... bench: 424,862 ns/iter (+/- 7,197) = 1400 MB/s +test sherlock::word_ending_n ... bench: 1,126,768 ns/iter (+/- 13,900) = 527 MB/s +test sherlock::words ... bench: 4,517,167 ns/iter (+/- 55,809) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 150.58s + diff --git a/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust new file mode 100644 index 0000000..282893e --- /dev/null +++ b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 9 ns/iter (+/- 0) = 43333 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 9 ns/iter (+/- 0) = 2888 MB/s +test misc::easy0_1K ... bench: 24 ns/iter (+/- 1) = 43791 MB/s +test misc::easy0_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::easy0_32 ... bench: 25 ns/iter (+/- 0) = 2360 MB/s +test misc::easy0_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::easy1_1K ... bench: 18 ns/iter (+/- 1) = 58000 MB/s +test misc::easy1_1MB ... bench: 21 ns/iter (+/- 0) = 49933142 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 0) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 29 ns/iter (+/- 0) = 36158724 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,802 ns/iter (+/- 6) = 55494 MB/s +test misc::long_needle2 ... bench: 207,353 ns/iter (+/- 165) = 482 MB/s +test misc::match_class ... bench: 41 ns/iter (+/- 2) = 1975 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::match_class_unicode ... bench: 168 ns/iter (+/- 3) = 958 MB/s +test misc::matches_set ... bench: 210 ns/iter (+/- 5) = 119 MB/s +test misc::medium_1K ... bench: 25 ns/iter (+/- 0) = 42080 MB/s +test misc::medium_1MB ... bench: 29 ns/iter (+/- 0) = 36158758 MB/s +test misc::medium_32 ... bench: 25 ns/iter (+/- 0) = 2400 MB/s +test misc::medium_32K ... bench: 25 ns/iter (+/- 0) = 1311840 MB/s +test misc::no_exponential ... bench: 268 ns/iter (+/- 7) = 373 MB/s +test misc::not_literal ... bench: 44 ns/iter (+/- 4) = 1159 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 2) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 1) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,215 ns/iter (+/- 12) = 865 MB/s +test misc::reallyhard_1MB ... bench: 1,215,907 ns/iter (+/- 6,442) = 862 MB/s +test misc::reallyhard_32 ... bench: 53 ns/iter (+/- 2) = 1113 MB/s +test misc::reallyhard_32K ... bench: 38,162 ns/iter (+/- 464) = 859 MB/s +test misc::replace_all ... bench: 86 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 2,355 ns/iter (+/- 470) = 3397 MB/s +test misc::short_haystack_1000000x ... bench: 91,039 ns/iter (+/- 157) = 87874 MB/s +test misc::short_haystack_100000x ... bench: 7,595 ns/iter (+/- 33) = 105333 MB/s +test misc::short_haystack_10000x ... bench: 2,865 ns/iter (+/- 9) = 27927 MB/s +test misc::short_haystack_1000x ... bench: 211 ns/iter (+/- 2) = 37966 MB/s +test misc::short_haystack_100x ... bench: 98 ns/iter (+/- 3) = 8275 MB/s +test misc::short_haystack_10x ... bench: 92 ns/iter (+/- 4) = 989 MB/s +test misc::short_haystack_1x ... bench: 90 ns/iter (+/- 2) = 211 MB/s +test misc::short_haystack_2x ... bench: 88 ns/iter (+/- 3) = 306 MB/s +test misc::short_haystack_3x ... bench: 91 ns/iter (+/- 3) = 384 MB/s +test misc::short_haystack_4x ... bench: 90 ns/iter (+/- 3) = 477 MB/s +test regexdna::find_new_lines ... bench: 7,323,399 ns/iter (+/- 24,661) = 694 MB/s +test regexdna::subst1 ... bench: 473,671 ns/iter (+/- 16,963) = 10731 MB/s +test regexdna::subst10 ... bench: 463,672 ns/iter (+/- 13,433) = 10963 MB/s +test regexdna::subst11 ... bench: 470,891 ns/iter (+/- 28,305) = 10795 MB/s +test regexdna::subst2 ... bench: 469,218 ns/iter (+/- 26,181) = 10833 MB/s +test regexdna::subst3 ... bench: 467,417 ns/iter (+/- 30,700) = 10875 MB/s +test regexdna::subst4 ... bench: 469,373 ns/iter (+/- 17,254) = 10830 MB/s +test regexdna::subst5 ... bench: 467,035 ns/iter (+/- 30,365) = 10884 MB/s +test regexdna::subst6 ... bench: 466,540 ns/iter (+/- 18,283) = 10895 MB/s +test regexdna::subst7 ... bench: 470,291 ns/iter (+/- 23,930) = 10809 MB/s +test regexdna::subst8 ... bench: 466,425 ns/iter (+/- 27,080) = 10898 MB/s +test regexdna::subst9 ... bench: 468,192 ns/iter (+/- 17,296) = 10857 MB/s +test regexdna::variant1 ... bench: 653,471 ns/iter (+/- 8,898) = 7779 MB/s +test regexdna::variant2 ... bench: 902,852 ns/iter (+/- 12,549) = 5630 MB/s +test regexdna::variant3 ... bench: 1,158,000 ns/iter (+/- 14,075) = 4389 MB/s +test regexdna::variant4 ... bench: 1,149,520 ns/iter (+/- 13,482) = 4422 MB/s +test regexdna::variant5 ... bench: 1,132,121 ns/iter (+/- 7,624) = 4490 MB/s +test regexdna::variant6 ... bench: 1,069,227 ns/iter (+/- 13,436) = 4754 MB/s +test regexdna::variant7 ... bench: 1,150,436 ns/iter (+/- 28,302) = 4418 MB/s +test regexdna::variant8 ... bench: 1,148,923 ns/iter (+/- 49,063) = 4424 MB/s +test regexdna::variant9 ... bench: 1,190,858 ns/iter (+/- 15,044) = 4268 MB/s +test rust_compile::compile_huge ... bench: 52,168 ns/iter (+/- 827) +test rust_compile::compile_huge_bytes ... bench: 3,330,456 ns/iter (+/- 57,242) +test rust_compile::compile_huge_full ... bench: 6,378,126 ns/iter (+/- 85,019) +test rust_compile::compile_simple ... bench: 2,291 ns/iter (+/- 39) +test rust_compile::compile_simple_bytes ... bench: 2,355 ns/iter (+/- 37) +test rust_compile::compile_simple_full ... bench: 14,581 ns/iter (+/- 103) +test rust_compile::compile_small ... bench: 10,443 ns/iter (+/- 114) +test rust_compile::compile_small_bytes ... bench: 11,269 ns/iter (+/- 150) +test rust_compile::compile_small_full ... bench: 14,746 ns/iter (+/- 212) +test sherlock::before_after_holmes ... bench: 699,736 ns/iter (+/- 6,402) = 850 MB/s +test sherlock::before_holmes ... bench: 28,001 ns/iter (+/- 198) = 21246 MB/s +test sherlock::everything_greedy ... bench: 1,029,174 ns/iter (+/- 33,321) = 578 MB/s +test sherlock::everything_greedy_nl ... bench: 460,103 ns/iter (+/- 23,290) = 1293 MB/s +test sherlock::holmes_cochar_watson ... bench: 57,666 ns/iter (+/- 907) = 10316 MB/s +test sherlock::holmes_coword_watson ... bench: 345,016 ns/iter (+/- 4,672) = 1724 MB/s +test sherlock::ing_suffix ... bench: 150,499 ns/iter (+/- 4,855) = 3953 MB/s +test sherlock::ing_suffix_limited_space ... bench: 777,723 ns/iter (+/- 8,076) = 764 MB/s +test sherlock::letters ... bench: 10,022,203 ns/iter (+/- 77,897) = 59 MB/s +test sherlock::letters_lower ... bench: 9,861,816 ns/iter (+/- 76,172) = 60 MB/s +test sherlock::letters_upper ... bench: 1,134,201 ns/iter (+/- 11,926) = 524 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,533 ns/iter (+/- 6,686) = 857 MB/s +test sherlock::name_alt1 ... bench: 11,974 ns/iter (+/- 292) = 49685 MB/s +test sherlock::name_alt2 ... bench: 44,708 ns/iter (+/- 573) = 13307 MB/s +test sherlock::name_alt3 ... bench: 49,873 ns/iter (+/- 785) = 11928 MB/s +test sherlock::name_alt3_nocase ... bench: 190,194 ns/iter (+/- 2,944) = 3128 MB/s +test sherlock::name_alt4 ... bench: 52,028 ns/iter (+/- 1,102) = 11434 MB/s +test sherlock::name_alt4_nocase ... bench: 119,891 ns/iter (+/- 921) = 4962 MB/s +test sherlock::name_alt5 ... bench: 47,139 ns/iter (+/- 1,617) = 12620 MB/s +test sherlock::name_alt5_nocase ... bench: 200,159 ns/iter (+/- 3,992) = 2972 MB/s +test sherlock::name_holmes ... bench: 17,902 ns/iter (+/- 577) = 33232 MB/s +test sherlock::name_holmes_nocase ... bench: 58,219 ns/iter (+/- 1,215) = 10218 MB/s +test sherlock::name_sherlock ... bench: 14,314 ns/iter (+/- 45) = 41563 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,399 ns/iter (+/- 45) = 41317 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,418 ns/iter (+/- 591) = 14364 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,877 ns/iter (+/- 545) = 14919 MB/s +test sherlock::name_whitespace ... bench: 17,883 ns/iter (+/- 151) = 33268 MB/s +test sherlock::no_match_common ... bench: 13,696 ns/iter (+/- 123) = 43438 MB/s +test sherlock::no_match_really_common ... bench: 10,157 ns/iter (+/- 222) = 58573 MB/s +test sherlock::no_match_uncommon ... bench: 13,663 ns/iter (+/- 53) = 43543 MB/s +test sherlock::quotes ... bench: 234,890 ns/iter (+/- 4,574) = 2532 MB/s +test sherlock::repeated_class_negation ... bench: 36,406,680 ns/iter (+/- 397,378) = 16 MB/s +test sherlock::the_lower ... bench: 192,028 ns/iter (+/- 5,315) = 3098 MB/s +test sherlock::the_nocase ... bench: 311,087 ns/iter (+/- 6,723) = 1912 MB/s +test sherlock::the_upper ... bench: 21,710 ns/iter (+/- 1,269) = 27403 MB/s +test sherlock::the_whitespace ... bench: 425,246 ns/iter (+/- 7,741) = 1399 MB/s +test sherlock::word_ending_n ... bench: 1,116,412 ns/iter (+/- 11,753) = 532 MB/s +test sherlock::words ... bench: 4,452,805 ns/iter (+/- 84,309) = 133 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 142.33s + diff --git a/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes new file mode 100644 index 0000000..f5380a7 --- /dev/null +++ b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::easy0_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::easy0_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::easy0_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::easy0_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::easy1_1K ... bench: 18 ns/iter (+/- 0) = 58000 MB/s +test misc::easy1_1MB ... bench: 21 ns/iter (+/- 0) = 49933142 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 2) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,801 ns/iter (+/- 2) = 55525 MB/s +test misc::long_needle2 ... bench: 212,892 ns/iter (+/- 206) = 469 MB/s +test misc::match_class ... bench: 40 ns/iter (+/- 0) = 2025 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 174 ns/iter (+/- 2) = 143 MB/s +test misc::medium_1K ... bench: 25 ns/iter (+/- 0) = 42080 MB/s +test misc::medium_1MB ... bench: 29 ns/iter (+/- 0) = 36158758 MB/s +test misc::medium_32 ... bench: 25 ns/iter (+/- 0) = 2400 MB/s +test misc::medium_32K ... bench: 25 ns/iter (+/- 0) = 1311840 MB/s +test misc::no_exponential ... bench: 270 ns/iter (+/- 8) = 370 MB/s +test misc::not_literal ... bench: 44 ns/iter (+/- 1) = 1159 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 1) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 2) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,215 ns/iter (+/- 15) = 865 MB/s +test misc::reallyhard_1MB ... bench: 1,217,631 ns/iter (+/- 11,216) = 861 MB/s +test misc::reallyhard_32 ... bench: 53 ns/iter (+/- 4) = 1113 MB/s +test misc::reallyhard_32K ... bench: 38,251 ns/iter (+/- 364) = 857 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,353 ns/iter (+/- 4) = 3399 MB/s +test regexdna::find_new_lines ... bench: 7,322,463 ns/iter (+/- 37,966) = 694 MB/s +test regexdna::subst1 ... bench: 466,849 ns/iter (+/- 12,252) = 10888 MB/s +test regexdna::subst10 ... bench: 465,011 ns/iter (+/- 19,693) = 10931 MB/s +test regexdna::subst11 ... bench: 457,806 ns/iter (+/- 13,453) = 11103 MB/s +test regexdna::subst2 ... bench: 456,878 ns/iter (+/- 32,828) = 11126 MB/s +test regexdna::subst3 ... bench: 465,531 ns/iter (+/- 21,786) = 10919 MB/s +test regexdna::subst4 ... bench: 454,553 ns/iter (+/- 12,698) = 11183 MB/s +test regexdna::subst5 ... bench: 456,977 ns/iter (+/- 13,155) = 11123 MB/s +test regexdna::subst6 ... bench: 466,105 ns/iter (+/- 15,667) = 10906 MB/s +test regexdna::subst7 ... bench: 462,655 ns/iter (+/- 18,871) = 10987 MB/s +test regexdna::subst8 ... bench: 456,642 ns/iter (+/- 19,218) = 11132 MB/s +test regexdna::subst9 ... bench: 456,307 ns/iter (+/- 15,369) = 11140 MB/s +test regexdna::variant1 ... bench: 655,033 ns/iter (+/- 7,901) = 7760 MB/s +test regexdna::variant2 ... bench: 902,675 ns/iter (+/- 15,165) = 5631 MB/s +test regexdna::variant3 ... bench: 1,159,521 ns/iter (+/- 14,489) = 4384 MB/s +test regexdna::variant4 ... bench: 1,147,781 ns/iter (+/- 16,536) = 4428 MB/s +test regexdna::variant5 ... bench: 1,133,068 ns/iter (+/- 13,938) = 4486 MB/s +test regexdna::variant6 ... bench: 1,061,174 ns/iter (+/- 14,478) = 4790 MB/s +test regexdna::variant7 ... bench: 1,151,637 ns/iter (+/- 35,753) = 4414 MB/s +test regexdna::variant8 ... bench: 1,137,068 ns/iter (+/- 37,678) = 4470 MB/s +test regexdna::variant9 ... bench: 1,185,082 ns/iter (+/- 14,355) = 4289 MB/s +test rust_compile::compile_huge ... bench: 66,894 ns/iter (+/- 2,425) +test rust_compile::compile_huge_bytes ... bench: 3,331,663 ns/iter (+/- 47,261) +test rust_compile::compile_huge_full ... bench: 6,446,254 ns/iter (+/- 65,334) +test rust_compile::compile_simple ... bench: 2,351 ns/iter (+/- 71) +test rust_compile::compile_simple_bytes ... bench: 2,350 ns/iter (+/- 49) +test rust_compile::compile_simple_full ... bench: 14,460 ns/iter (+/- 144) +test rust_compile::compile_small ... bench: 10,350 ns/iter (+/- 120) +test rust_compile::compile_small_bytes ... bench: 10,993 ns/iter (+/- 89) +test rust_compile::compile_small_full ... bench: 14,201 ns/iter (+/- 139) +test sherlock::before_after_holmes ... bench: 698,092 ns/iter (+/- 6,907) = 852 MB/s +test sherlock::before_holmes ... bench: 29,127 ns/iter (+/- 1,001) = 20425 MB/s +test sherlock::everything_greedy ... bench: 1,026,902 ns/iter (+/- 86,299) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 433,157 ns/iter (+/- 10,129) = 1373 MB/s +test sherlock::holmes_cochar_watson ... bench: 57,103 ns/iter (+/- 509) = 10418 MB/s +test sherlock::holmes_coword_watson ... bench: 344,973 ns/iter (+/- 3,288) = 1724 MB/s +test sherlock::ing_suffix ... bench: 158,337 ns/iter (+/- 2,492) = 3757 MB/s +test sherlock::ing_suffix_limited_space ... bench: 776,703 ns/iter (+/- 8,000) = 765 MB/s +test sherlock::letters ... bench: 10,179,909 ns/iter (+/- 55,188) = 58 MB/s +test sherlock::letters_lower ... bench: 10,007,465 ns/iter (+/- 75,168) = 59 MB/s +test sherlock::letters_upper ... bench: 1,116,201 ns/iter (+/- 11,571) = 532 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,124 ns/iter (+/- 6,540) = 858 MB/s +test sherlock::name_alt1 ... bench: 12,079 ns/iter (+/- 192) = 49253 MB/s +test sherlock::name_alt2 ... bench: 44,336 ns/iter (+/- 1,424) = 13418 MB/s +test sherlock::name_alt3 ... bench: 49,569 ns/iter (+/- 721) = 12002 MB/s +test sherlock::name_alt3_nocase ... bench: 189,812 ns/iter (+/- 2,952) = 3134 MB/s +test sherlock::name_alt4 ... bench: 52,132 ns/iter (+/- 1,182) = 11412 MB/s +test sherlock::name_alt4_nocase ... bench: 120,591 ns/iter (+/- 2,521) = 4933 MB/s +test sherlock::name_alt5 ... bench: 46,956 ns/iter (+/- 545) = 12670 MB/s +test sherlock::name_alt5_nocase ... bench: 199,252 ns/iter (+/- 2,212) = 2985 MB/s +test sherlock::name_holmes ... bench: 17,983 ns/iter (+/- 591) = 33083 MB/s +test sherlock::name_holmes_nocase ... bench: 58,139 ns/iter (+/- 919) = 10232 MB/s +test sherlock::name_sherlock ... bench: 14,283 ns/iter (+/- 113) = 41653 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,587 ns/iter (+/- 82) = 40785 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 40,947 ns/iter (+/- 385) = 14529 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,607 ns/iter (+/- 567) = 15020 MB/s +test sherlock::name_whitespace ... bench: 18,803 ns/iter (+/- 232) = 31640 MB/s +test sherlock::no_match_common ... bench: 13,704 ns/iter (+/- 73) = 43413 MB/s +test sherlock::no_match_really_common ... bench: 14,166 ns/iter (+/- 191) = 41997 MB/s +test sherlock::no_match_uncommon ... bench: 13,702 ns/iter (+/- 36) = 43419 MB/s +test sherlock::quotes ... bench: 232,609 ns/iter (+/- 3,217) = 2557 MB/s +test sherlock::repeated_class_negation ... bench: 36,167,769 ns/iter (+/- 592,579) = 16 MB/s +test sherlock::the_lower ... bench: 188,281 ns/iter (+/- 2,966) = 3159 MB/s +test sherlock::the_nocase ... bench: 312,853 ns/iter (+/- 23,145) = 1901 MB/s +test sherlock::the_upper ... bench: 20,987 ns/iter (+/- 909) = 28347 MB/s +test sherlock::the_whitespace ... bench: 427,154 ns/iter (+/- 6,396) = 1392 MB/s +test sherlock::word_ending_n ... bench: 1,112,964 ns/iter (+/- 15,393) = 534 MB/s +test sherlock::words ... bench: 4,513,468 ns/iter (+/- 35,410) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 143.96s + diff --git a/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust new file mode 100644 index 0000000..b46bdf9 --- /dev/null +++ b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust @@ -0,0 +1,115 @@ + +running 110 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 1) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::anchored_literal_short_non_match ... bench: 12 ns/iter (+/- 1) = 2166 MB/s +test misc::easy0_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::easy0_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::easy0_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::easy0_32K ... bench: 43 ns/iter (+/- 0) = 762674 MB/s +test misc::easy1_1K ... bench: 35 ns/iter (+/- 0) = 29828 MB/s +test misc::easy1_1MB ... bench: 35 ns/iter (+/- 0) = 29959885 MB/s +test misc::easy1_32 ... bench: 35 ns/iter (+/- 0) = 1485 MB/s +test misc::easy1_32K ... bench: 35 ns/iter (+/- 0) = 936800 MB/s +test misc::hard_1K ... bench: 43 ns/iter (+/- 0) = 24441 MB/s +test misc::hard_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::hard_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::hard_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::is_match_set ... bench: 46 ns/iter (+/- 1) = 543 MB/s +test misc::literal ... bench: 9 ns/iter (+/- 0) = 5666 MB/s +test misc::long_needle1 ... bench: 1,801 ns/iter (+/- 24) = 55525 MB/s +test misc::long_needle2 ... bench: 194,124 ns/iter (+/- 289) = 515 MB/s +test misc::match_class ... bench: 22 ns/iter (+/- 1) = 3681 MB/s +test misc::match_class_in_range ... bench: 10 ns/iter (+/- 0) = 8100 MB/s +test misc::match_class_unicode ... bench: 196 ns/iter (+/- 0) = 821 MB/s +test misc::matches_set ... bench: 55 ns/iter (+/- 3) = 454 MB/s +test misc::medium_1K ... bench: 43 ns/iter (+/- 0) = 24465 MB/s +test misc::medium_1MB ... bench: 43 ns/iter (+/- 0) = 24386139 MB/s +test misc::medium_32 ... bench: 43 ns/iter (+/- 0) = 1395 MB/s +test misc::medium_32K ... bench: 43 ns/iter (+/- 0) = 762697 MB/s +test misc::no_exponential ... bench: 167 ns/iter (+/- 0) = 598 MB/s +test misc::not_literal ... bench: 26 ns/iter (+/- 1) = 1961 MB/s +test misc::one_pass_long_prefix ... bench: 40 ns/iter (+/- 0) = 650 MB/s +test misc::one_pass_long_prefix_not ... bench: 40 ns/iter (+/- 0) = 650 MB/s +test misc::one_pass_short ... bench: 30 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_short_not ... bench: 31 ns/iter (+/- 0) = 548 MB/s +test misc::reallyhard2_1K ... bench: 67 ns/iter (+/- 1) = 15522 MB/s +test misc::reallyhard_1K ... bench: 78 ns/iter (+/- 1) = 13474 MB/s +test misc::reallyhard_1MB ... bench: 19,310 ns/iter (+/- 80) = 54303 MB/s +test misc::reallyhard_32 ... bench: 62 ns/iter (+/- 2) = 951 MB/s +test misc::reallyhard_32K ... bench: 543 ns/iter (+/- 4) = 60395 MB/s +test misc::replace_all ... bench: 151 ns/iter (+/- 13) +test misc::reverse_suffix_no_quadratic ... bench: 9,302 ns/iter (+/- 25) = 860 MB/s +test misc::short_haystack_1000000x ... bench: 90,868 ns/iter (+/- 354) = 88039 MB/s +test misc::short_haystack_100000x ... bench: 7,215 ns/iter (+/- 18) = 110881 MB/s +test misc::short_haystack_10000x ... bench: 605 ns/iter (+/- 2) = 132249 MB/s +test misc::short_haystack_1000x ... bench: 148 ns/iter (+/- 2) = 54128 MB/s +test misc::short_haystack_100x ... bench: 83 ns/iter (+/- 3) = 9771 MB/s +test misc::short_haystack_10x ... bench: 89 ns/iter (+/- 1) = 1022 MB/s +test misc::short_haystack_1x ... bench: 79 ns/iter (+/- 1) = 240 MB/s +test misc::short_haystack_2x ... bench: 79 ns/iter (+/- 1) = 341 MB/s +test misc::short_haystack_3x ... bench: 80 ns/iter (+/- 2) = 437 MB/s +test misc::short_haystack_4x ... bench: 79 ns/iter (+/- 1) = 544 MB/s +test regexdna::find_new_lines ... bench: 1,748,215 ns/iter (+/- 25,793) = 2907 MB/s +test regexdna::subst1 ... bench: 486,169 ns/iter (+/- 11,425) = 10456 MB/s +test regexdna::subst10 ... bench: 479,019 ns/iter (+/- 7,468) = 10612 MB/s +test regexdna::subst11 ... bench: 481,118 ns/iter (+/- 10,305) = 10565 MB/s +test regexdna::subst2 ... bench: 484,508 ns/iter (+/- 11,753) = 10491 MB/s +test regexdna::subst3 ... bench: 481,861 ns/iter (+/- 7,991) = 10549 MB/s +test regexdna::subst4 ... bench: 477,043 ns/iter (+/- 12,101) = 10656 MB/s +test regexdna::subst5 ... bench: 483,954 ns/iter (+/- 7,728) = 10503 MB/s +test regexdna::subst6 ... bench: 479,564 ns/iter (+/- 13,514) = 10600 MB/s +test regexdna::subst7 ... bench: 481,345 ns/iter (+/- 11,205) = 10560 MB/s +test regexdna::subst8 ... bench: 479,772 ns/iter (+/- 13,266) = 10595 MB/s +test regexdna::subst9 ... bench: 480,299 ns/iter (+/- 9,997) = 10583 MB/s +test regexdna::variant1 ... bench: 693,230 ns/iter (+/- 21,808) = 7332 MB/s +test regexdna::variant2 ... bench: 936,552 ns/iter (+/- 9,916) = 5427 MB/s +test regexdna::variant3 ... bench: 1,192,921 ns/iter (+/- 11,038) = 4261 MB/s +test regexdna::variant4 ... bench: 1,170,341 ns/iter (+/- 27,745) = 4343 MB/s +test regexdna::variant5 ... bench: 1,166,877 ns/iter (+/- 8,369) = 4356 MB/s +test regexdna::variant6 ... bench: 1,085,919 ns/iter (+/- 9,594) = 4681 MB/s +test regexdna::variant7 ... bench: 1,248,718 ns/iter (+/- 13,480) = 4070 MB/s +test regexdna::variant8 ... bench: 1,216,643 ns/iter (+/- 15,505) = 4178 MB/s +test regexdna::variant9 ... bench: 1,219,951 ns/iter (+/- 14,109) = 4166 MB/s +test sherlock::before_after_holmes ... bench: 27,363 ns/iter (+/- 604) = 21742 MB/s +test sherlock::before_holmes ... bench: 31,147 ns/iter (+/- 876) = 19100 MB/s +test sherlock::everything_greedy ... bench: 1,326,354 ns/iter (+/- 22,628) = 448 MB/s +test sherlock::everything_greedy_nl ... bench: 801,343 ns/iter (+/- 895) = 742 MB/s +test sherlock::holmes_cochar_watson ... bench: 56,328 ns/iter (+/- 1,009) = 10561 MB/s +test sherlock::holmes_coword_watson ... bench: 301,186 ns/iter (+/- 3,615) = 1975 MB/s +test sherlock::ing_suffix ... bench: 176,428 ns/iter (+/- 2,182) = 3372 MB/s +test sherlock::ing_suffix_limited_space ... bench: 173,948 ns/iter (+/- 5,073) = 3420 MB/s +test sherlock::letters ... bench: 7,226,608 ns/iter (+/- 261,849) = 82 MB/s +test sherlock::letters_lower ... bench: 7,024,589 ns/iter (+/- 145,281) = 84 MB/s +test sherlock::letters_upper ... bench: 1,004,841 ns/iter (+/- 6,857) = 592 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,978 ns/iter (+/- 90) = 37234 MB/s +test sherlock::name_alt1 ... bench: 11,151 ns/iter (+/- 289) = 53352 MB/s +test sherlock::name_alt2 ... bench: 45,441 ns/iter (+/- 960) = 13092 MB/s +test sherlock::name_alt3 ... bench: 51,934 ns/iter (+/- 806) = 11455 MB/s +test sherlock::name_alt3_nocase ... bench: 171,844 ns/iter (+/- 4,176) = 3462 MB/s +test sherlock::name_alt4 ... bench: 46,611 ns/iter (+/- 1,072) = 12763 MB/s +test sherlock::name_alt4_nocase ... bench: 74,956 ns/iter (+/- 2,098) = 7937 MB/s +test sherlock::name_alt5 ... bench: 47,595 ns/iter (+/- 595) = 12499 MB/s +test sherlock::name_alt5_nocase ... bench: 100,636 ns/iter (+/- 814) = 5911 MB/s +test sherlock::name_holmes ... bench: 19,293 ns/iter (+/- 687) = 30836 MB/s +test sherlock::name_holmes_nocase ... bench: 52,310 ns/iter (+/- 1,024) = 11373 MB/s +test sherlock::name_sherlock ... bench: 16,080 ns/iter (+/- 327) = 36998 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,605 ns/iter (+/- 120) = 40734 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 38,662 ns/iter (+/- 360) = 15388 MB/s +test sherlock::name_sherlock_nocase ... bench: 37,650 ns/iter (+/- 316) = 15801 MB/s +test sherlock::name_whitespace ... bench: 16,234 ns/iter (+/- 125) = 36647 MB/s +test sherlock::no_match_common ... bench: 13,709 ns/iter (+/- 72) = 43397 MB/s +test sherlock::no_match_really_common ... bench: 9,870 ns/iter (+/- 133) = 60276 MB/s +test sherlock::no_match_uncommon ... bench: 13,735 ns/iter (+/- 57) = 43315 MB/s +test sherlock::quotes ... bench: 189,377 ns/iter (+/- 2,105) = 3141 MB/s +test sherlock::repeated_class_negation ... bench: 29,934 ns/iter (+/- 1,249) = 19874 MB/s +test sherlock::the_lower ... bench: 213,236 ns/iter (+/- 3,823) = 2790 MB/s +test sherlock::the_nocase ... bench: 322,922 ns/iter (+/- 5,946) = 1842 MB/s +test sherlock::the_upper ... bench: 23,494 ns/iter (+/- 718) = 25322 MB/s +test sherlock::the_whitespace ... bench: 392,113 ns/iter (+/- 6,046) = 1517 MB/s +test sherlock::word_ending_n ... bench: 673,618 ns/iter (+/- 12,865) = 883 MB/s +test sherlock::words ... bench: 3,632,096 ns/iter (+/- 56,944) = 163 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 110 measured; 0 filtered out; finished in 117.87s + diff --git a/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes new file mode 100644 index 0000000..8ac6c04 --- /dev/null +++ b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes @@ -0,0 +1,103 @@ + +running 98 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 0) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::anchored_literal_short_non_match ... bench: 12 ns/iter (+/- 0) = 2166 MB/s +test misc::easy0_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::easy0_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::easy0_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::easy0_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::easy1_1K ... bench: 34 ns/iter (+/- 1) = 30705 MB/s +test misc::easy1_1MB ... bench: 34 ns/iter (+/- 0) = 30841058 MB/s +test misc::easy1_32 ... bench: 34 ns/iter (+/- 0) = 1529 MB/s +test misc::easy1_32K ... bench: 34 ns/iter (+/- 0) = 964352 MB/s +test misc::hard_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::hard_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::hard_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::hard_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::is_match_set ... bench: 47 ns/iter (+/- 1) = 531 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 1,808 ns/iter (+/- 7) = 55310 MB/s +test misc::long_needle2 ... bench: 213,106 ns/iter (+/- 416) = 469 MB/s +test misc::match_class ... bench: 23 ns/iter (+/- 1) = 3521 MB/s +test misc::match_class_in_range ... bench: 11 ns/iter (+/- 0) = 7363 MB/s +test misc::matches_set ... bench: 56 ns/iter (+/- 3) = 446 MB/s +test misc::medium_1K ... bench: 43 ns/iter (+/- 0) = 24465 MB/s +test misc::medium_1MB ... bench: 43 ns/iter (+/- 0) = 24386139 MB/s +test misc::medium_32 ... bench: 43 ns/iter (+/- 0) = 1395 MB/s +test misc::medium_32K ... bench: 43 ns/iter (+/- 0) = 762697 MB/s +test misc::no_exponential ... bench: 162 ns/iter (+/- 4) = 617 MB/s +test misc::not_literal ... bench: 27 ns/iter (+/- 1) = 1888 MB/s +test misc::one_pass_long_prefix ... bench: 41 ns/iter (+/- 0) = 634 MB/s +test misc::one_pass_long_prefix_not ... bench: 41 ns/iter (+/- 0) = 634 MB/s +test misc::one_pass_short ... bench: 30 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_short_not ... bench: 31 ns/iter (+/- 0) = 548 MB/s +test misc::reallyhard2_1K ... bench: 70 ns/iter (+/- 1) = 14857 MB/s +test misc::reallyhard_1K ... bench: 78 ns/iter (+/- 3) = 13474 MB/s +test misc::reallyhard_1MB ... bench: 19,850 ns/iter (+/- 345) = 52826 MB/s +test misc::reallyhard_32 ... bench: 61 ns/iter (+/- 2) = 967 MB/s +test misc::reallyhard_32K ... bench: 546 ns/iter (+/- 8) = 60064 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 9,304 ns/iter (+/- 29) = 859 MB/s +test regexdna::find_new_lines ... bench: 1,733,767 ns/iter (+/- 66,699) = 2932 MB/s +test regexdna::subst1 ... bench: 486,442 ns/iter (+/- 11,929) = 10450 MB/s +test regexdna::subst10 ... bench: 486,073 ns/iter (+/- 12,157) = 10458 MB/s +test regexdna::subst11 ... bench: 483,485 ns/iter (+/- 11,703) = 10514 MB/s +test regexdna::subst2 ... bench: 487,298 ns/iter (+/- 9,184) = 10431 MB/s +test regexdna::subst3 ... bench: 491,219 ns/iter (+/- 9,614) = 10348 MB/s +test regexdna::subst4 ... bench: 482,668 ns/iter (+/- 9,576) = 10531 MB/s +test regexdna::subst5 ... bench: 489,673 ns/iter (+/- 8,331) = 10381 MB/s +test regexdna::subst6 ... bench: 484,707 ns/iter (+/- 5,276) = 10487 MB/s +test regexdna::subst7 ... bench: 485,109 ns/iter (+/- 9,360) = 10478 MB/s +test regexdna::subst8 ... bench: 485,790 ns/iter (+/- 9,298) = 10464 MB/s +test regexdna::subst9 ... bench: 483,255 ns/iter (+/- 12,434) = 10519 MB/s +test regexdna::variant1 ... bench: 654,757 ns/iter (+/- 8,719) = 7763 MB/s +test regexdna::variant2 ... bench: 905,052 ns/iter (+/- 9,599) = 5616 MB/s +test regexdna::variant3 ... bench: 1,161,187 ns/iter (+/- 13,798) = 4377 MB/s +test regexdna::variant4 ... bench: 1,144,656 ns/iter (+/- 15,198) = 4440 MB/s +test regexdna::variant5 ... bench: 1,136,222 ns/iter (+/- 9,112) = 4473 MB/s +test regexdna::variant6 ... bench: 1,062,124 ns/iter (+/- 12,336) = 4786 MB/s +test regexdna::variant7 ... bench: 1,144,371 ns/iter (+/- 44,700) = 4442 MB/s +test regexdna::variant8 ... bench: 1,143,064 ns/iter (+/- 53,456) = 4447 MB/s +test regexdna::variant9 ... bench: 1,187,063 ns/iter (+/- 14,341) = 4282 MB/s +test sherlock::before_after_holmes ... bench: 27,804 ns/iter (+/- 598) = 21397 MB/s +test sherlock::before_holmes ... bench: 31,197 ns/iter (+/- 933) = 19070 MB/s +test sherlock::everything_greedy ... bench: 1,272,335 ns/iter (+/- 12,466) = 467 MB/s +test sherlock::everything_greedy_nl ... bench: 801,469 ns/iter (+/- 955) = 742 MB/s +test sherlock::holmes_cochar_watson ... bench: 56,790 ns/iter (+/- 1,606) = 10476 MB/s +test sherlock::holmes_coword_watson ... bench: 300,554 ns/iter (+/- 3,460) = 1979 MB/s +test sherlock::ing_suffix ... bench: 179,355 ns/iter (+/- 5,486) = 3317 MB/s +test sherlock::ing_suffix_limited_space ... bench: 175,703 ns/iter (+/- 2,380) = 3386 MB/s +test sherlock::letters ... bench: 7,197,094 ns/iter (+/- 181,502) = 82 MB/s +test sherlock::letters_lower ... bench: 7,100,979 ns/iter (+/- 155,898) = 83 MB/s +test sherlock::letters_upper ... bench: 1,018,217 ns/iter (+/- 21,695) = 584 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,931 ns/iter (+/- 140) = 37344 MB/s +test sherlock::name_alt1 ... bench: 10,932 ns/iter (+/- 96) = 54421 MB/s +test sherlock::name_alt2 ... bench: 45,580 ns/iter (+/- 829) = 13052 MB/s +test sherlock::name_alt3 ... bench: 51,942 ns/iter (+/- 1,418) = 11453 MB/s +test sherlock::name_alt3_nocase ... bench: 171,749 ns/iter (+/- 1,451) = 3463 MB/s +test sherlock::name_alt4 ... bench: 45,705 ns/iter (+/- 1,536) = 13016 MB/s +test sherlock::name_alt4_nocase ... bench: 73,782 ns/iter (+/- 1,679) = 8063 MB/s +test sherlock::name_alt5 ... bench: 48,045 ns/iter (+/- 1,261) = 12382 MB/s +test sherlock::name_alt5_nocase ... bench: 100,307 ns/iter (+/- 553) = 5931 MB/s +test sherlock::name_holmes ... bench: 18,916 ns/iter (+/- 662) = 31451 MB/s +test sherlock::name_holmes_nocase ... bench: 52,714 ns/iter (+/- 774) = 11286 MB/s +test sherlock::name_sherlock ... bench: 14,575 ns/iter (+/- 163) = 40818 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,625 ns/iter (+/- 166) = 40679 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 39,024 ns/iter (+/- 361) = 15245 MB/s +test sherlock::name_sherlock_nocase ... bench: 38,025 ns/iter (+/- 418) = 15645 MB/s +test sherlock::name_whitespace ... bench: 16,247 ns/iter (+/- 88) = 36618 MB/s +test sherlock::no_match_common ... bench: 13,724 ns/iter (+/- 28) = 43349 MB/s +test sherlock::no_match_really_common ... bench: 13,798 ns/iter (+/- 93) = 43117 MB/s +test sherlock::no_match_uncommon ... bench: 13,671 ns/iter (+/- 80) = 43517 MB/s +test sherlock::quotes ... bench: 189,359 ns/iter (+/- 2,334) = 3141 MB/s +test sherlock::repeated_class_negation ... bench: 29,083 ns/iter (+/- 708) = 20456 MB/s +test sherlock::the_lower ... bench: 204,122 ns/iter (+/- 4,256) = 2914 MB/s +test sherlock::the_nocase ... bench: 319,388 ns/iter (+/- 6,790) = 1862 MB/s +test sherlock::the_upper ... bench: 22,706 ns/iter (+/- 961) = 26201 MB/s +test sherlock::the_whitespace ... bench: 386,276 ns/iter (+/- 4,950) = 1540 MB/s +test sherlock::word_ending_n ... bench: 690,010 ns/iter (+/- 8,516) = 862 MB/s +test sherlock::words ... bench: 3,659,990 ns/iter (+/- 104,505) = 162 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 98 measured; 0 filtered out; finished in 105.65s + diff --git a/vendor/regex/record/old-bench-log/README.md b/vendor/regex/record/old-bench-log/README.md new file mode 100644 index 0000000..aab290e --- /dev/null +++ b/vendor/regex/record/old-bench-log/README.md @@ -0,0 +1,11 @@ +These represent an old log of benchmarks from regex 1.7.3 and older. New +and much more comprehensive benchmarks are now maintained as part of the +[rebar] project. + +We keep these old benchmark recordings for posterity, but they may be removed +in the future. + +Measurements can be compared using the [`cargo-benchcmp`][cargo-benchcmp] tool. + +[rebar]: https://github.com/BurntSushi/rebar +[cargo-benchcmp]: https://github.com/BurntSushi/cargo-benchcmp diff --git a/vendor/regex/record/old-bench-log/old/01-before b/vendor/regex/record/old-bench-log/old/01-before new file mode 100644 index 0000000..74890a3 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/01-before @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 520 ns/iter (+/- 1) +test bench::anchored_literal_long_non_match ... bench: 236 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 519 ns/iter (+/- 2) +test bench::anchored_literal_short_non_match ... bench: 238 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 7742 ns/iter (+/- 97) = 132 MB/s +test bench::easy0_32 ... bench: 4989 ns/iter (+/- 20) = 6 MB/s +test bench::easy0_32K ... bench: 96347 ns/iter (+/- 997) = 340 MB/s +test bench::easy1_1K ... bench: 9805 ns/iter (+/- 1846) = 104 MB/s +test bench::easy1_32 ... bench: 4930 ns/iter (+/- 202) = 6 MB/s +test bench::easy1_32K ... bench: 163332 ns/iter (+/- 9207) = 200 MB/s +test bench::hard_1K ... bench: 97455 ns/iter (+/- 1089) = 10 MB/s +test bench::hard_32 ... bench: 8256 ns/iter (+/- 148) = 3 MB/s +test bench::hard_32K ... bench: 2948095 ns/iter (+/- 11988) = 11 MB/s +test bench::literal ... bench: 371 ns/iter (+/- 5) +test bench::match_class ... bench: 2168 ns/iter (+/- 12) +test bench::match_class_in_range ... bench: 2379 ns/iter (+/- 13) +test bench::medium_1K ... bench: 37073 ns/iter (+/- 1100) = 27 MB/s +test bench::medium_32 ... bench: 6183 ns/iter (+/- 218) = 5 MB/s +test bench::medium_32K ... bench: 1032000 ns/iter (+/- 8278) = 31 MB/s +test bench::no_exponential ... bench: 727975 ns/iter (+/- 2970) +test bench::not_literal ... bench: 4670 ns/iter (+/- 29) +test bench::one_pass_long_prefix ... bench: 1562 ns/iter (+/- 24) +test bench::one_pass_long_prefix_not ... bench: 1539 ns/iter (+/- 40) +test bench::one_pass_short_a ... bench: 2688 ns/iter (+/- 21) +test bench::one_pass_short_a_not ... bench: 4197 ns/iter (+/- 36) +test bench::one_pass_short_b ... bench: 2198 ns/iter (+/- 22) +test bench::one_pass_short_b_not ... bench: 3761 ns/iter (+/- 41) +test bench::replace_all ... bench: 2874 ns/iter (+/- 25) diff --git a/vendor/regex/record/old-bench-log/old/02-new-syntax-crate b/vendor/regex/record/old-bench-log/old/02-new-syntax-crate new file mode 100644 index 0000000..267808f --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/02-new-syntax-crate @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 545 ns/iter (+/- 12) +test bench::anchored_literal_long_non_match ... bench: 251 ns/iter (+/- 11) +test bench::anchored_literal_short_match ... bench: 521 ns/iter (+/- 31) +test bench::anchored_literal_short_non_match ... bench: 231 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 7465 ns/iter (+/- 102) = 137 MB/s +test bench::easy0_32 ... bench: 4995 ns/iter (+/- 27) = 6 MB/s +test bench::easy0_32K ... bench: 86985 ns/iter (+/- 755) = 376 MB/s +test bench::easy1_1K ... bench: 9493 ns/iter (+/- 1727) = 107 MB/s +test bench::easy1_32 ... bench: 4955 ns/iter (+/- 324) = 6 MB/s +test bench::easy1_32K ... bench: 155288 ns/iter (+/- 13016) = 210 MB/s +test bench::hard_1K ... bench: 95925 ns/iter (+/- 1674) = 10 MB/s +test bench::hard_32 ... bench: 8264 ns/iter (+/- 151) = 3 MB/s +test bench::hard_32K ... bench: 2886440 ns/iter (+/- 25807) = 11 MB/s +test bench::literal ... bench: 365 ns/iter (+/- 12) +test bench::match_class ... bench: 2313 ns/iter (+/- 8) +test bench::match_class_in_range ... bench: 2596 ns/iter (+/- 8) +test bench::medium_1K ... bench: 38136 ns/iter (+/- 941) = 26 MB/s +test bench::medium_32 ... bench: 6178 ns/iter (+/- 147) = 5 MB/s +test bench::medium_32K ... bench: 1065698 ns/iter (+/- 6815) = 30 MB/s +test bench::no_exponential ... bench: 682461 ns/iter (+/- 2860) +test bench::not_literal ... bench: 4525 ns/iter (+/- 67) +test bench::one_pass_long_prefix ... bench: 1459 ns/iter (+/- 13) +test bench::one_pass_long_prefix_not ... bench: 1463 ns/iter (+/- 8) +test bench::one_pass_short_a ... bench: 2615 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 4066 ns/iter (+/- 48) +test bench::one_pass_short_b ... bench: 2064 ns/iter (+/- 10) +test bench::one_pass_short_b_not ... bench: 3502 ns/iter (+/- 24) +test bench::replace_all ... bench: 2949 ns/iter (+/- 15) diff --git a/vendor/regex/record/old-bench-log/old/03-new-syntax-crate b/vendor/regex/record/old-bench-log/old/03-new-syntax-crate new file mode 100644 index 0000000..a50005d --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/03-new-syntax-crate @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 373 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 204 ns/iter (+/- 3) +test bench::anchored_literal_short_match ... bench: 376 ns/iter (+/- 5) +test bench::anchored_literal_short_non_match ... bench: 206 ns/iter (+/- 3) +test bench::easy0_1K ... bench: 9136 ns/iter (+/- 177) = 112 MB/s +test bench::easy0_32 ... bench: 6641 ns/iter (+/- 86) = 4 MB/s +test bench::easy0_32K ... bench: 88826 ns/iter (+/- 1366) = 368 MB/s +test bench::easy1_1K ... bench: 10937 ns/iter (+/- 737) = 93 MB/s +test bench::easy1_32 ... bench: 7366 ns/iter (+/- 219) = 4 MB/s +test bench::easy1_32K ... bench: 122324 ns/iter (+/- 4628) = 267 MB/s +test bench::hard_1K ... bench: 59998 ns/iter (+/- 965) = 17 MB/s +test bench::hard_32 ... bench: 9058 ns/iter (+/- 123) = 3 MB/s +test bench::hard_32K ... bench: 1694326 ns/iter (+/- 27226) = 19 MB/s +test bench::literal ... bench: 336 ns/iter (+/- 6) +test bench::match_class ... bench: 2109 ns/iter (+/- 27) +test bench::match_class_in_range ... bench: 2274 ns/iter (+/- 32) +test bench::medium_1K ... bench: 38317 ns/iter (+/- 1075) = 26 MB/s +test bench::medium_32 ... bench: 7969 ns/iter (+/- 115) = 4 MB/s +test bench::medium_32K ... bench: 1028260 ns/iter (+/- 12905) = 31 MB/s +test bench::no_exponential ... bench: 257719 ns/iter (+/- 4939) +test bench::not_literal ... bench: 1699 ns/iter (+/- 31) +test bench::one_pass_long_prefix ... bench: 750 ns/iter (+/- 9) +test bench::one_pass_long_prefix_not ... bench: 747 ns/iter (+/- 12) +test bench::one_pass_short_a ... bench: 1844 ns/iter (+/- 22) +test bench::one_pass_short_a_not ... bench: 2395 ns/iter (+/- 21) +test bench::one_pass_short_b ... bench: 1270 ns/iter (+/- 26) +test bench::one_pass_short_b_not ... bench: 1869 ns/iter (+/- 25) +test bench::replace_all ... bench: 3124 ns/iter (+/- 53) diff --git a/vendor/regex/record/old-bench-log/old/04-fixed-benchmark b/vendor/regex/record/old-bench-log/old/04-fixed-benchmark new file mode 100644 index 0000000..1956e98 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/04-fixed-benchmark @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 373 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 202 ns/iter (+/- 12) +test bench::anchored_literal_short_match ... bench: 380 ns/iter (+/- 135) +test bench::anchored_literal_short_non_match ... bench: 211 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 2,723 ns/iter (+/- 101) = 376 MB/s +test bench::easy0_32 ... bench: 255 ns/iter (+/- 2) = 125 MB/s +test bench::easy0_32K ... bench: 81,845 ns/iter (+/- 598) = 400 MB/s +test bench::easy1_1K ... bench: 3,872 ns/iter (+/- 783) = 264 MB/s +test bench::easy1_32 ... bench: 287 ns/iter (+/- 143) = 111 MB/s +test bench::easy1_32K ... bench: 115,340 ns/iter (+/- 4,717) = 284 MB/s +test bench::hard_1K ... bench: 52,484 ns/iter (+/- 472) = 19 MB/s +test bench::hard_32 ... bench: 1,923 ns/iter (+/- 49) = 16 MB/s +test bench::hard_32K ... bench: 1,710,214 ns/iter (+/- 9,733) = 19 MB/s +test bench::literal ... bench: 337 ns/iter (+/- 13) +test bench::match_class ... bench: 2,141 ns/iter (+/- 7) +test bench::match_class_in_range ... bench: 2,301 ns/iter (+/- 7) +test bench::medium_1K ... bench: 31,696 ns/iter (+/- 961) = 32 MB/s +test bench::medium_32 ... bench: 1,155 ns/iter (+/- 71) = 27 MB/s +test bench::medium_32K ... bench: 1,016,101 ns/iter (+/- 12,090) = 32 MB/s +test bench::no_exponential ... bench: 262,801 ns/iter (+/- 1,332) +test bench::not_literal ... bench: 1,729 ns/iter (+/- 3) +test bench::one_pass_long_prefix ... bench: 779 ns/iter (+/- 4) +test bench::one_pass_long_prefix_not ... bench: 779 ns/iter (+/- 6) +test bench::one_pass_short_a ... bench: 1,943 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 2,545 ns/iter (+/- 9) +test bench::one_pass_short_b ... bench: 1,364 ns/iter (+/- 4) +test bench::one_pass_short_b_not ... bench: 2,029 ns/iter (+/- 22) +test bench::replace_all ... bench: 3,185 ns/iter (+/- 12) diff --git a/vendor/regex/record/old-bench-log/old/05-thread-caching b/vendor/regex/record/old-bench-log/old/05-thread-caching new file mode 100644 index 0000000..238f978 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/05-thread-caching @@ -0,0 +1,29 @@ +test bench::anchored_literal_long_match ... bench: 287 ns/iter (+/- 11) +test bench::anchored_literal_long_non_match ... bench: 111 ns/iter (+/- 0) +test bench::anchored_literal_short_match ... bench: 286 ns/iter (+/- 4) +test bench::anchored_literal_short_non_match ... bench: 114 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 2562 ns/iter (+/- 94) = 399 MB/s +test bench::easy0_32 ... bench: 95 ns/iter (+/- 1) = 336 MB/s +test bench::easy0_32K ... bench: 81755 ns/iter (+/- 576) = 400 MB/s +test bench::easy1_1K ... bench: 3586 ns/iter (+/- 917) = 285 MB/s +test bench::easy1_32 ... bench: 155 ns/iter (+/- 132) = 206 MB/s +test bench::easy1_32K ... bench: 113980 ns/iter (+/- 9331) = 287 MB/s +test bench::hard_1K ... bench: 54573 ns/iter (+/- 565) = 18 MB/s +test bench::hard_32 ... bench: 1806 ns/iter (+/- 44) = 17 MB/s +test bench::hard_32K ... bench: 1754465 ns/iter (+/- 7867) = 18 MB/s +test bench::literal ... bench: 299 ns/iter (+/- 1) +test bench::match_class ... bench: 2399 ns/iter (+/- 23) +test bench::match_class_in_range ... bench: 2142 ns/iter (+/- 8) +test bench::match_class_unicode ... bench: 2804 ns/iter (+/- 9) +test bench::medium_1K ... bench: 29536 ns/iter (+/- 537) = 34 MB/s +test bench::medium_32 ... bench: 962 ns/iter (+/- 59) = 33 MB/s +test bench::medium_32K ... bench: 946483 ns/iter (+/- 7106) = 34 MB/s +test bench::no_exponential ... bench: 274301 ns/iter (+/- 552) +test bench::not_literal ... bench: 2039 ns/iter (+/- 13) +test bench::one_pass_long_prefix ... bench: 573 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 577 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 1951 ns/iter (+/- 29) +test bench::one_pass_short_a_not ... bench: 2464 ns/iter (+/- 10) +test bench::one_pass_short_b ... bench: 1301 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 1785 ns/iter (+/- 6) +test bench::replace_all ... bench: 2168 ns/iter (+/- 152) diff --git a/vendor/regex/record/old-bench-log/old/06-major-dynamic b/vendor/regex/record/old-bench-log/old/06-major-dynamic new file mode 100644 index 0000000..123efdd --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/06-major-dynamic @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 206 ns/iter (+/- 7) +test bench::anchored_literal_long_non_match ... bench: 97 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 193 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 86 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 356 ns/iter (+/- 136) = 2876 MB/s +test bench::easy0_1MB ... bench: 352,434 ns/iter (+/- 7,874) = 2974 MB/s +test bench::easy0_32 ... bench: 72 ns/iter (+/- 21) = 444 MB/s +test bench::easy0_32K ... bench: 11,053 ns/iter (+/- 1,388) = 2964 MB/s +test bench::easy1_1K ... bench: 331 ns/iter (+/- 162) = 3093 MB/s +test bench::easy1_1MB ... bench: 353,723 ns/iter (+/- 6,836) = 2964 MB/s +test bench::easy1_32 ... bench: 73 ns/iter (+/- 20) = 438 MB/s +test bench::easy1_32K ... bench: 10,297 ns/iter (+/- 1,137) = 3182 MB/s +test bench::hard_1K ... bench: 34,951 ns/iter (+/- 171) = 29 MB/s +test bench::hard_1MB ... bench: 63,323,613 ns/iter (+/- 279,582) = 15 MB/s +test bench::hard_32 ... bench: 1,131 ns/iter (+/- 13) = 28 MB/s +test bench::hard_32K ... bench: 1,099,921 ns/iter (+/- 1,338) = 29 MB/s +test bench::literal ... bench: 16 ns/iter (+/- 0) +test bench::match_class ... bench: 188 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 188 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 1,940 ns/iter (+/- 10) +test bench::medium_1K ... bench: 5,262 ns/iter (+/- 256) = 194 MB/s +test bench::medium_1MB ... bench: 5,295,539 ns/iter (+/- 9,808) = 197 MB/s +test bench::medium_32 ... bench: 217 ns/iter (+/- 19) = 147 MB/s +test bench::medium_32K ... bench: 169,169 ns/iter (+/- 1,606) = 193 MB/s +test bench::no_exponential ... bench: 293,739 ns/iter (+/- 1,632) +test bench::not_literal ... bench: 1,371 ns/iter (+/- 136) +test bench::one_pass_long_prefix ... bench: 337 ns/iter (+/- 6) +test bench::one_pass_long_prefix_not ... bench: 341 ns/iter (+/- 6) +test bench::one_pass_short_a ... bench: 1,399 ns/iter (+/- 16) +test bench::one_pass_short_a_not ... bench: 1,229 ns/iter (+/- 13) +test bench::one_pass_short_b ... bench: 844 ns/iter (+/- 24) +test bench::one_pass_short_b_not ... bench: 849 ns/iter (+/- 45) +test bench::replace_all ... bench: 579 ns/iter (+/- 3) diff --git a/vendor/regex/record/old-bench-log/old/06-major-macro b/vendor/regex/record/old-bench-log/old/06-major-macro new file mode 100644 index 0000000..199561d --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/06-major-macro @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 225 ns/iter (+/- 22) +test bench::anchored_literal_long_non_match ... bench: 62 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 225 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 60 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 29,984 ns/iter (+/- 190) = 34 MB/s +test bench::easy0_1MB ... bench: 30,641,690 ns/iter (+/- 110,535) = 33 MB/s +test bench::easy0_32 ... bench: 981 ns/iter (+/- 12) = 32 MB/s +test bench::easy0_32K ... bench: 957,358 ns/iter (+/- 2,633) = 34 MB/s +test bench::easy1_1K ... bench: 29,636 ns/iter (+/- 150) = 34 MB/s +test bench::easy1_1MB ... bench: 30,295,321 ns/iter (+/- 98,181) = 34 MB/s +test bench::easy1_32 ... bench: 971 ns/iter (+/- 30) = 32 MB/s +test bench::easy1_32K ... bench: 947,307 ns/iter (+/- 4,258) = 34 MB/s +test bench::hard_1K ... bench: 54,856 ns/iter (+/- 209) = 18 MB/s +test bench::hard_1MB ... bench: 56,126,571 ns/iter (+/- 224,163) = 17 MB/s +test bench::hard_32 ... bench: 1,776 ns/iter (+/- 23) = 18 MB/s +test bench::hard_32K ... bench: 1,753,833 ns/iter (+/- 54,427) = 18 MB/s +test bench::literal ... bench: 1,516 ns/iter (+/- 6) +test bench::match_class ... bench: 2,429 ns/iter (+/- 11) +test bench::match_class_in_range ... bench: 2,398 ns/iter (+/- 4) +test bench::match_class_unicode ... bench: 12,915 ns/iter (+/- 29) +test bench::medium_1K ... bench: 31,914 ns/iter (+/- 276) = 32 MB/s +test bench::medium_1MB ... bench: 32,617,173 ns/iter (+/- 68,114) = 31 MB/s +test bench::medium_32 ... bench: 1,046 ns/iter (+/- 42) = 30 MB/s +test bench::medium_32K ... bench: 1,019,516 ns/iter (+/- 3,788) = 32 MB/s +test bench::no_exponential ... bench: 303,239 ns/iter (+/- 518) +test bench::not_literal ... bench: 1,756 ns/iter (+/- 115) +test bench::one_pass_long_prefix ... bench: 834 ns/iter (+/- 7) +test bench::one_pass_long_prefix_not ... bench: 858 ns/iter (+/- 15) +test bench::one_pass_short_a ... bench: 1,597 ns/iter (+/- 9) +test bench::one_pass_short_a_not ... bench: 1,950 ns/iter (+/- 21) +test bench::one_pass_short_b ... bench: 1,077 ns/iter (+/- 5) +test bench::one_pass_short_b_not ... bench: 1,596 ns/iter (+/- 9) +test bench::replace_all ... bench: 1,288 ns/iter (+/- 13) diff --git a/vendor/regex/record/old-bench-log/old/07-prefix-improvements b/vendor/regex/record/old-bench-log/old/07-prefix-improvements new file mode 100644 index 0000000..55477fd --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/07-prefix-improvements @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 197 ns/iter (+/- 9) +test bench::anchored_literal_long_non_match ... bench: 95 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 193 ns/iter (+/- 2) +test bench::anchored_literal_short_non_match ... bench: 85 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 304 ns/iter (+/- 119) = 3368 MB/s +test bench::easy0_1MB ... bench: 281,912 ns/iter (+/- 5,274) = 3719 MB/s +test bench::easy0_32 ... bench: 74 ns/iter (+/- 16) = 432 MB/s +test bench::easy0_32K ... bench: 8,909 ns/iter (+/- 667) = 3678 MB/s +test bench::easy1_1K ... bench: 300 ns/iter (+/- 111) = 3413 MB/s +test bench::easy1_1MB ... bench: 282,250 ns/iter (+/- 5,556) = 3714 MB/s +test bench::easy1_32 ... bench: 98 ns/iter (+/- 17) = 326 MB/s +test bench::easy1_32K ... bench: 8,105 ns/iter (+/- 593) = 4042 MB/s +test bench::hard_1K ... bench: 34,562 ns/iter (+/- 211) = 29 MB/s +test bench::hard_1MB ... bench: 64,510,947 ns/iter (+/- 308,627) = 15 MB/s +test bench::hard_32 ... bench: 1,139 ns/iter (+/- 26) = 28 MB/s +test bench::hard_32K ... bench: 1,102,562 ns/iter (+/- 1,850) = 29 MB/s +test bench::literal ... bench: 15 ns/iter (+/- 0) +test bench::match_class ... bench: 105 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 105 ns/iter (+/- 1) +test bench::match_class_unicode ... bench: 2,270 ns/iter (+/- 185) +test bench::medium_1K ... bench: 2,262 ns/iter (+/- 73) = 452 MB/s +test bench::medium_1MB ... bench: 2,185,098 ns/iter (+/- 3,007) = 479 MB/s +test bench::medium_32 ... bench: 139 ns/iter (+/- 1) = 230 MB/s +test bench::medium_32K ... bench: 72,320 ns/iter (+/- 193) = 453 MB/s +test bench::no_exponential ... bench: 300,699 ns/iter (+/- 494) +test bench::not_literal ... bench: 1,462 ns/iter (+/- 89) +test bench::one_pass_long_prefix ... bench: 283 ns/iter (+/- 1) +test bench::one_pass_long_prefix_not ... bench: 287 ns/iter (+/- 0) +test bench::one_pass_short_a ... bench: 1,131 ns/iter (+/- 11) +test bench::one_pass_short_a_not ... bench: 1,259 ns/iter (+/- 12) +test bench::one_pass_short_b ... bench: 883 ns/iter (+/- 15) +test bench::one_pass_short_b_not ... bench: 799 ns/iter (+/- 28) +test bench::replace_all ... bench: 170 ns/iter (+/- 1) diff --git a/vendor/regex/record/old-bench-log/old/08-case-fixes b/vendor/regex/record/old-bench-log/old/08-case-fixes new file mode 100644 index 0000000..7609f6c --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/08-case-fixes @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 192 ns/iter (+/- 11) +test bench::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 4) +test bench::anchored_literal_short_match ... bench: 182 ns/iter (+/- 6) +test bench::anchored_literal_short_non_match ... bench: 82 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 277 ns/iter (+/- 79) = 3696 MB/s +test bench::easy0_1MB ... bench: 230,829 ns/iter (+/- 5,712) = 4542 MB/s +test bench::easy0_32 ... bench: 70 ns/iter (+/- 4) = 457 MB/s +test bench::easy0_32K ... bench: 8,444 ns/iter (+/- 492) = 3880 MB/s +test bench::easy1_1K ... bench: 272 ns/iter (+/- 98) = 3764 MB/s +test bench::easy1_1MB ... bench: 273,867 ns/iter (+/- 6,351) = 3828 MB/s +test bench::easy1_32 ... bench: 72 ns/iter (+/- 15) = 444 MB/s +test bench::easy1_32K ... bench: 8,109 ns/iter (+/- 540) = 4040 MB/s +test bench::hard_1K ... bench: 31,043 ns/iter (+/- 1,237) = 32 MB/s +test bench::hard_1MB ... bench: 60,077,413 ns/iter (+/- 129,611) = 16 MB/s +test bench::hard_32 ... bench: 1,036 ns/iter (+/- 20) = 30 MB/s +test bench::hard_32K ... bench: 996,238 ns/iter (+/- 3,181) = 32 MB/s +test bench::literal ... bench: 15 ns/iter (+/- 0) +test bench::match_class ... bench: 75 ns/iter (+/- 7) +test bench::match_class_in_range ... bench: 77 ns/iter (+/- 7) +test bench::match_class_unicode ... bench: 2,057 ns/iter (+/- 102) +test bench::medium_1K ... bench: 2,252 ns/iter (+/- 63) = 454 MB/s +test bench::medium_1MB ... bench: 2,186,091 ns/iter (+/- 7,496) = 479 MB/s +test bench::medium_32 ... bench: 132 ns/iter (+/- 2) = 242 MB/s +test bench::medium_32K ... bench: 72,394 ns/iter (+/- 342) = 452 MB/s +test bench::no_exponential ... bench: 286,662 ns/iter (+/- 1,150) +test bench::not_literal ... bench: 1,130 ns/iter (+/- 10) +test bench::one_pass_long_prefix ... bench: 271 ns/iter (+/- 0) +test bench::one_pass_long_prefix_not ... bench: 276 ns/iter (+/- 3) +test bench::one_pass_short_a ... bench: 1,147 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 901 ns/iter (+/- 8) +test bench::one_pass_short_b ... bench: 887 ns/iter (+/- 7) +test bench::one_pass_short_b_not ... bench: 777 ns/iter (+/- 6) +test bench::replace_all ... bench: 154 ns/iter (+/- 0) diff --git a/vendor/regex/record/old-bench-log/old/09-before-compiler-rewrite b/vendor/regex/record/old-bench-log/old/09-before-compiler-rewrite new file mode 100644 index 0000000..fe67d09 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/09-before-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 156 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 7) +test bench::anchored_literal_short_match ... bench: 145 ns/iter (+/- 3) +test bench::anchored_literal_short_non_match ... bench: 76 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 269 ns/iter (+/- 63) = 3806 MB/s +test bench::easy0_1MB ... bench: 232,461 ns/iter (+/- 13,022) = 4509 MB/s +test bench::easy0_32 ... bench: 63 ns/iter (+/- 6) = 507 MB/s +test bench::easy0_32K ... bench: 8,358 ns/iter (+/- 430) = 3920 MB/s +test bench::easy1_1K ... bench: 274 ns/iter (+/- 101) = 3737 MB/s +test bench::easy1_1MB ... bench: 278,949 ns/iter (+/- 11,324) = 3758 MB/s +test bench::easy1_32 ... bench: 63 ns/iter (+/- 15) = 507 MB/s +test bench::easy1_32K ... bench: 7,731 ns/iter (+/- 488) = 4238 MB/s +test bench::hard_1K ... bench: 44,685 ns/iter (+/- 661) = 22 MB/s +test bench::hard_1MB ... bench: 60,108,237 ns/iter (+/- 814,810) = 16 MB/s +test bench::hard_32 ... bench: 1,412 ns/iter (+/- 38) = 22 MB/s +test bench::hard_32K ... bench: 1,363,335 ns/iter (+/- 21,316) = 24 MB/s +test bench::literal ... bench: 14 ns/iter (+/- 0) +test bench::match_class ... bench: 81 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 81 ns/iter (+/- 2) +test bench::match_class_unicode ... bench: 2,978 ns/iter (+/- 64) +test bench::medium_1K ... bench: 2,239 ns/iter (+/- 68) = 457 MB/s +test bench::medium_1MB ... bench: 2,215,729 ns/iter (+/- 20,897) = 472 MB/s +test bench::medium_32 ... bench: 124 ns/iter (+/- 2) = 258 MB/s +test bench::medium_32K ... bench: 72,486 ns/iter (+/- 1,027) = 452 MB/s +test bench::no_exponential ... bench: 282,992 ns/iter (+/- 8,102) +test bench::not_literal ... bench: 1,526 ns/iter (+/- 32) +test bench::one_pass_long_prefix ... bench: 307 ns/iter (+/- 7) +test bench::one_pass_long_prefix_not ... bench: 311 ns/iter (+/- 8) +test bench::one_pass_short_a ... bench: 623 ns/iter (+/- 12) +test bench::one_pass_short_a_not ... bench: 920 ns/iter (+/- 19) +test bench::one_pass_short_b ... bench: 554 ns/iter (+/- 13) +test bench::one_pass_short_b_not ... bench: 740 ns/iter (+/- 12) +test bench::replace_all ... bench: 155 ns/iter (+/- 5) diff --git a/vendor/regex/record/old-bench-log/old/10-compiler-rewrite b/vendor/regex/record/old-bench-log/old/10-compiler-rewrite new file mode 100644 index 0000000..e25a602 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/10-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 145 ns/iter (+/- 1) +test bench::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 129 ns/iter (+/- 3) +test bench::anchored_literal_short_non_match ... bench: 72 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 268 ns/iter (+/- 88) = 3820 MB/s +test bench::easy0_1MB ... bench: 234,067 ns/iter (+/- 4,663) = 4479 MB/s +test bench::easy0_32 ... bench: 64 ns/iter (+/- 4) = 500 MB/s +test bench::easy0_32K ... bench: 8,298 ns/iter (+/- 521) = 3948 MB/s +test bench::easy1_1K ... bench: 275 ns/iter (+/- 95) = 3723 MB/s +test bench::easy1_1MB ... bench: 280,466 ns/iter (+/- 5,938) = 3738 MB/s +test bench::easy1_32 ... bench: 64 ns/iter (+/- 16) = 500 MB/s +test bench::easy1_32K ... bench: 7,693 ns/iter (+/- 595) = 4259 MB/s +test bench::hard_1K ... bench: 27,844 ns/iter (+/- 1,012) = 36 MB/s +test bench::hard_1MB ... bench: 52,323,489 ns/iter (+/- 1,251,665) = 19 MB/s +test bench::hard_32 ... bench: 970 ns/iter (+/- 92) = 32 MB/s +test bench::hard_32K ... bench: 896,945 ns/iter (+/- 29,977) = 36 MB/s +test bench::literal ... bench: 13 ns/iter (+/- 1) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,150 ns/iter (+/- 18) +test bench::medium_1K ... bench: 2,241 ns/iter (+/- 55) = 456 MB/s +test bench::medium_1MB ... bench: 2,186,354 ns/iter (+/- 9,134) = 479 MB/s +test bench::medium_32 ... bench: 125 ns/iter (+/- 1) = 256 MB/s +test bench::medium_32K ... bench: 72,156 ns/iter (+/- 145) = 454 MB/s +test bench::no_exponential ... bench: 305,034 ns/iter (+/- 1,134) +test bench::not_literal ... bench: 1,169 ns/iter (+/- 105) +test bench::one_pass_long_prefix ... bench: 257 ns/iter (+/- 4) +test bench::one_pass_long_prefix_not ... bench: 276 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 680 ns/iter (+/- 3) +test bench::one_pass_short_a_not ... bench: 804 ns/iter (+/- 48) +test bench::one_pass_short_b ... bench: 337 ns/iter (+/- 3) +test bench::one_pass_short_b_not ... bench: 339 ns/iter (+/- 5) +test bench::replace_all ... bench: 150 ns/iter (+/- 1) diff --git a/vendor/regex/record/old-bench-log/old/11-compiler-rewrite b/vendor/regex/record/old-bench-log/old/11-compiler-rewrite new file mode 100644 index 0000000..3296d43 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/11-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 171 ns/iter (+/- 20) +test bench::anchored_literal_long_non_match ... bench: 90 ns/iter (+/- 8) +test bench::anchored_literal_short_match ... bench: 180 ns/iter (+/- 33) +test bench::anchored_literal_short_non_match ... bench: 78 ns/iter (+/- 9) +test bench::easy0_1K ... bench: 272 ns/iter (+/- 82) = 3764 MB/s +test bench::easy0_1MB ... bench: 233,014 ns/iter (+/- 22,144) = 4500 MB/s +test bench::easy0_32 ... bench: 62 ns/iter (+/- 6) = 516 MB/s +test bench::easy0_32K ... bench: 8,490 ns/iter (+/- 905) = 3859 MB/s +test bench::easy1_1K ... bench: 273 ns/iter (+/- 100) = 3750 MB/s +test bench::easy1_1MB ... bench: 279,901 ns/iter (+/- 5,598) = 3746 MB/s +test bench::easy1_32 ... bench: 62 ns/iter (+/- 6) = 516 MB/s +test bench::easy1_32K ... bench: 7,713 ns/iter (+/- 566) = 4248 MB/s +test bench::hard_1K ... bench: 38,641 ns/iter (+/- 605) = 26 MB/s +test bench::hard_1MB ... bench: 56,579,116 ns/iter (+/- 1,193,231) = 18 MB/s +test bench::hard_32 ... bench: 1,252 ns/iter (+/- 24) = 25 MB/s +test bench::hard_32K ... bench: 1,247,639 ns/iter (+/- 12,774) = 26 MB/s +test bench::literal ... bench: 13 ns/iter (+/- 1) +test bench::match_class ... bench: 80 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,459 ns/iter (+/- 77) +test bench::medium_1K ... bench: 2,244 ns/iter (+/- 63) = 456 MB/s +test bench::medium_1MB ... bench: 2,192,052 ns/iter (+/- 21,460) = 478 MB/s +test bench::medium_32 ... bench: 122 ns/iter (+/- 3) = 262 MB/s +test bench::medium_32K ... bench: 73,167 ns/iter (+/- 15,655) = 447 MB/s +test bench::no_exponential ... bench: 289,292 ns/iter (+/- 1,488) +test bench::not_literal ... bench: 1,480 ns/iter (+/- 18) +test bench::one_pass_long_prefix ... bench: 324 ns/iter (+/- 15) +test bench::one_pass_long_prefix_not ... bench: 337 ns/iter (+/- 5) +test bench::one_pass_short_a ... bench: 1,161 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 798 ns/iter (+/- 6) +test bench::one_pass_short_b ... bench: 456 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 452 ns/iter (+/- 33) +test bench::replace_all ... bench: 148 ns/iter (+/- 0) diff --git a/vendor/regex/record/old-bench-log/old/12-executor b/vendor/regex/record/old-bench-log/old/12-executor new file mode 100644 index 0000000..8ec8561 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/12-executor @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 179 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 90 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 164 ns/iter (+/- 16) +test bench::anchored_literal_short_non_match ... bench: 79 ns/iter (+/- 1) +test bench::compile_simple ... bench: 3,708 ns/iter (+/- 225) +test bench::compile_unicode ... bench: 5,871 ns/iter (+/- 264) +test bench::easy0_1K ... bench: 263 ns/iter (+/- 92) = 3893 MB/s +test bench::easy0_1MB ... bench: 217,835 ns/iter (+/- 4,074) = 4813 MB/s +test bench::easy0_32 ... bench: 67 ns/iter (+/- 1) = 477 MB/s +test bench::easy0_32K ... bench: 8,204 ns/iter (+/- 426) = 3994 MB/s +test bench::easy1_1K ... bench: 276 ns/iter (+/- 100) = 3710 MB/s +test bench::easy1_1MB ... bench: 284,086 ns/iter (+/- 6,516) = 3691 MB/s +test bench::easy1_32 ... bench: 70 ns/iter (+/- 15) = 457 MB/s +test bench::easy1_32K ... bench: 7,844 ns/iter (+/- 556) = 4177 MB/s +test bench::hard_1K ... bench: 30,062 ns/iter (+/- 1,684) = 34 MB/s +test bench::hard_1MB ... bench: 50,839,701 ns/iter (+/- 104,343) = 20 MB/s +test bench::hard_32 ... bench: 1,009 ns/iter (+/- 48) = 31 MB/s +test bench::hard_32K ... bench: 965,341 ns/iter (+/- 45,075) = 33 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 1) +test bench::match_class_unicode ... bench: 2,150 ns/iter (+/- 22) +test bench::medium_1K ... bench: 2,262 ns/iter (+/- 66) = 452 MB/s +test bench::medium_1MB ... bench: 2,193,428 ns/iter (+/- 6,147) = 478 MB/s +test bench::medium_32 ... bench: 129 ns/iter (+/- 1) = 248 MB/s +test bench::medium_32K ... bench: 72,629 ns/iter (+/- 348) = 451 MB/s +test bench::no_exponential ... bench: 289,043 ns/iter (+/- 2,478) +test bench::not_literal ... bench: 1,195 ns/iter (+/- 10) +test bench::one_pass_long_prefix ... bench: 265 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 270 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 730 ns/iter (+/- 4) +test bench::one_pass_short_a_not ... bench: 712 ns/iter (+/- 4) +test bench::one_pass_short_b ... bench: 445 ns/iter (+/- 49) +test bench::one_pass_short_b_not ... bench: 406 ns/iter (+/- 72) +test bench::replace_all ... bench: 136 ns/iter (+/- 2) diff --git a/vendor/regex/record/old-bench-log/old/12-executor-bytes b/vendor/regex/record/old-bench-log/old/12-executor-bytes new file mode 100644 index 0000000..c036920 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/12-executor-bytes @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 190 ns/iter (+/- 12) +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 147 ns/iter (+/- 9) +test bench::anchored_literal_short_non_match ... bench: 74 ns/iter (+/- 5) +test bench::compile_simple ... bench: 4,218 ns/iter (+/- 201) +test bench::compile_unicode ... bench: 402,353 ns/iter (+/- 2,642) +test bench::easy0_1K ... bench: 253 ns/iter (+/- 79) = 4047 MB/s +test bench::easy0_1MB ... bench: 215,308 ns/iter (+/- 3,474) = 4870 MB/s +test bench::easy0_32 ... bench: 64 ns/iter (+/- 4) = 500 MB/s +test bench::easy0_32K ... bench: 8,134 ns/iter (+/- 435) = 4028 MB/s +test bench::easy1_1K ... bench: 277 ns/iter (+/- 105) = 3696 MB/s +test bench::easy1_1MB ... bench: 283,435 ns/iter (+/- 5,975) = 3699 MB/s +test bench::easy1_32 ... bench: 64 ns/iter (+/- 14) = 500 MB/s +test bench::easy1_32K ... bench: 7,832 ns/iter (+/- 575) = 4183 MB/s +test bench::hard_1K ... bench: 35,380 ns/iter (+/- 772) = 28 MB/s +test bench::hard_1MB ... bench: 46,639,535 ns/iter (+/- 456,010) = 22 MB/s +test bench::hard_32 ... bench: 1,110 ns/iter (+/- 53) = 28 MB/s +test bench::hard_32K ... bench: 1,146,751 ns/iter (+/- 17,290) = 28 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,487,088 ns/iter (+/- 103,259) +test bench::medium_1K ... bench: 2,253 ns/iter (+/- 52) = 454 MB/s +test bench::medium_1MB ... bench: 2,193,344 ns/iter (+/- 7,582) = 478 MB/s +test bench::medium_32 ... bench: 119 ns/iter (+/- 5) = 268 MB/s +test bench::medium_32K ... bench: 72,569 ns/iter (+/- 283) = 451 MB/s +test bench::no_exponential ... bench: 292,840 ns/iter (+/- 2,823) +test bench::not_literal ... bench: 6,417 ns/iter (+/- 26) +test bench::one_pass_long_prefix ... bench: 304 ns/iter (+/- 0) +test bench::one_pass_long_prefix_not ... bench: 943 ns/iter (+/- 44) +test bench::one_pass_short_a ... bench: 688 ns/iter (+/- 11) +test bench::one_pass_short_a_not ... bench: 687 ns/iter (+/- 7) +test bench::one_pass_short_b ... bench: 589 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 357 ns/iter (+/- 11) +test bench::replace_all ... bench: 131 ns/iter (+/- 1) diff --git a/vendor/regex/record/old-bench-log/old/13-cache-byte-range-suffixes b/vendor/regex/record/old-bench-log/old/13-cache-byte-range-suffixes new file mode 100644 index 0000000..5a2ec09 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/13-cache-byte-range-suffixes @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 174 ns/iter (+/- 65) +test bench::anchored_literal_long_non_match ... bench: 94 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 142 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 82 ns/iter (+/- 0) +test bench::compile_simple ... bench: 4,878 ns/iter (+/- 207) +test bench::compile_unicode ... bench: 679,701 ns/iter (+/- 10,264) +test bench::easy0_1K ... bench: 257 ns/iter (+/- 83) = 3984 MB/s +test bench::easy0_1MB ... bench: 217,698 ns/iter (+/- 3,307) = 4816 MB/s +test bench::easy0_32 ... bench: 61 ns/iter (+/- 3) = 524 MB/s +test bench::easy0_32K ... bench: 8,144 ns/iter (+/- 449) = 4023 MB/s +test bench::easy1_1K ... bench: 276 ns/iter (+/- 106) = 3710 MB/s +test bench::easy1_1MB ... bench: 285,518 ns/iter (+/- 4,933) = 3672 MB/s +test bench::easy1_32 ... bench: 61 ns/iter (+/- 12) = 524 MB/s +test bench::easy1_32K ... bench: 7,896 ns/iter (+/- 508) = 4149 MB/s +test bench::hard_1K ... bench: 35,361 ns/iter (+/- 684) = 28 MB/s +test bench::hard_1MB ... bench: 48,691,236 ns/iter (+/- 2,316,446) = 21 MB/s +test bench::hard_32 ... bench: 1,087 ns/iter (+/- 33) = 29 MB/s +test bench::hard_32K ... bench: 1,147,627 ns/iter (+/- 4,982) = 28 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,431,592 ns/iter (+/- 89,268) +test bench::medium_1K ... bench: 2,245 ns/iter (+/- 93) = 456 MB/s +test bench::medium_1MB ... bench: 2,192,828 ns/iter (+/- 4,343) = 478 MB/s +test bench::medium_32 ... bench: 120 ns/iter (+/- 2) = 266 MB/s +test bench::medium_32K ... bench: 72,996 ns/iter (+/- 627) = 448 MB/s +test bench::no_exponential ... bench: 290,775 ns/iter (+/- 1,176) +test bench::not_literal ... bench: 5,282 ns/iter (+/- 199) +test bench::one_pass_long_prefix ... bench: 294 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 315 ns/iter (+/- 7) +test bench::one_pass_short_a ... bench: 708 ns/iter (+/- 21) +test bench::one_pass_short_a_not ... bench: 861 ns/iter (+/- 9) +test bench::one_pass_short_b ... bench: 607 ns/iter (+/- 2) +test bench::one_pass_short_b_not ... bench: 344 ns/iter (+/- 11) +test bench::replace_all ... bench: 135 ns/iter (+/- 1) diff --git a/vendor/regex/src/backtrack.rs b/vendor/regex/src/backtrack.rs deleted file mode 100644 index 4d83856..0000000 --- a/vendor/regex/src/backtrack.rs +++ /dev/null @@ -1,282 +0,0 @@ -// This is the backtracking matching engine. It has the same exact capability -// as the full NFA simulation, except it is artificially restricted to small -// regexes on small inputs because of its memory requirements. -// -// In particular, this is a *bounded* backtracking engine. It retains worst -// case linear time by keeping track of the states that it has visited (using a -// bitmap). Namely, once a state is visited, it is never visited again. Since a -// state is keyed by `(instruction index, input index)`, we have that its time -// complexity is `O(mn)` (i.e., linear in the size of the search text). -// -// The backtracking engine can beat out the NFA simulation on small -// regexes/inputs because it doesn't have to keep track of multiple copies of -// the capture groups. In benchmarks, the backtracking engine is roughly twice -// as fast as the full NFA simulation. Note though that its performance doesn't -// scale, even if you're willing to live with the memory requirements. Namely, -// the bitset has to be zeroed on each execution, which becomes quite expensive -// on large bitsets. - -use crate::exec::ProgramCache; -use crate::input::{Input, InputAt}; -use crate::prog::{InstPtr, Program}; -use crate::re_trait::Slot; - -type Bits = u32; - -const BIT_SIZE: usize = 32; -const MAX_SIZE_BYTES: usize = 256 * (1 << 10); // 256 KB - -/// Returns true iff the given regex and input should be executed by this -/// engine with reasonable memory usage. -pub fn should_exec(num_insts: usize, text_len: usize) -> bool { - // Total memory usage in bytes is determined by: - // - // ((len(insts) * (len(input) + 1) + bits - 1) / bits) * (size_of(u32)) - // - // The actual limit picked is pretty much a heuristic. - // See: https://github.com/rust-lang/regex/issues/215 - let size = ((num_insts * (text_len + 1) + BIT_SIZE - 1) / BIT_SIZE) * 4; - size <= MAX_SIZE_BYTES -} - -/// A backtracking matching engine. -#[derive(Debug)] -pub struct Bounded<'a, 'm, 'r, 's, I> { - prog: &'r Program, - input: I, - matches: &'m mut [bool], - slots: &'s mut [Slot], - m: &'a mut Cache, -} - -/// Shared cached state between multiple invocations of a backtracking engine -/// in the same thread. -#[derive(Clone, Debug)] -pub struct Cache { - jobs: Vec, - visited: Vec, -} - -impl Cache { - /// Create new empty cache for the backtracking engine. - pub fn new(_prog: &Program) -> Self { - Cache { jobs: vec![], visited: vec![] } - } -} - -/// A job is an explicit unit of stack space in the backtracking engine. -/// -/// The "normal" representation is a single state transition, which corresponds -/// to an NFA state and a character in the input. However, the backtracking -/// engine must keep track of old capture group values. We use the explicit -/// stack to do it. -#[derive(Clone, Copy, Debug)] -enum Job { - Inst { ip: InstPtr, at: InputAt }, - SaveRestore { slot: usize, old_pos: Option }, -} - -impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { - /// Execute the backtracking matching engine. - /// - /// If there's a match, `exec` returns `true` and populates the given - /// captures accordingly. - pub fn exec( - prog: &'r Program, - cache: &ProgramCache, - matches: &'m mut [bool], - slots: &'s mut [Slot], - input: I, - start: usize, - end: usize, - ) -> bool { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.backtrack; - let start = input.at(start); - let mut b = Bounded { prog, input, matches, slots, m: cache }; - b.exec_(start, end) - } - - /// Clears the cache such that the backtracking engine can be executed - /// on some input of fixed length. - fn clear(&mut self) { - // Reset the job memory so that we start fresh. - self.m.jobs.clear(); - - // Now we need to clear the bit state set. - // We do this by figuring out how much space we need to keep track - // of the states we've visited. - // Then we reset all existing allocated space to 0. - // Finally, we request more space if we need it. - // - // This is all a little circuitous, but doing this using unchecked - // operations doesn't seem to have a measurable impact on performance. - // (Probably because backtracking is limited to such small - // inputs/regexes in the first place.) - let visited_len = - (self.prog.len() * (self.input.len() + 1) + BIT_SIZE - 1) - / BIT_SIZE; - self.m.visited.truncate(visited_len); - for v in &mut self.m.visited { - *v = 0; - } - if visited_len > self.m.visited.len() { - let len = self.m.visited.len(); - self.m.visited.reserve_exact(visited_len - len); - for _ in 0..(visited_len - len) { - self.m.visited.push(0); - } - } - } - - /// Start backtracking at the given position in the input, but also look - /// for literal prefixes. - fn exec_(&mut self, mut at: InputAt, end: usize) -> bool { - self.clear(); - // If this is an anchored regex at the beginning of the input, then - // we're either already done or we only need to try backtracking once. - if self.prog.is_anchored_start { - return if !at.is_start() { false } else { self.backtrack(at) }; - } - let mut matched = false; - loop { - if !self.prog.prefixes.is_empty() { - at = match self.input.prefix_at(&self.prog.prefixes, at) { - None => break, - Some(at) => at, - }; - } - matched = self.backtrack(at) || matched; - if matched && self.prog.matches.len() == 1 { - return true; - } - if at.pos() >= end { - break; - } - at = self.input.at(at.next_pos()); - } - matched - } - - /// The main backtracking loop starting at the given input position. - fn backtrack(&mut self, start: InputAt) -> bool { - // N.B. We use an explicit stack to avoid recursion. - // To avoid excessive pushing and popping, most transitions are handled - // in the `step` helper function, which only pushes to the stack when - // there's a capture or a branch. - let mut matched = false; - self.m.jobs.push(Job::Inst { ip: 0, at: start }); - while let Some(job) = self.m.jobs.pop() { - match job { - Job::Inst { ip, at } => { - if self.step(ip, at) { - // Only quit if we're matching one regex. - // If we're matching a regex set, then mush on and - // try to find other matches (if we want them). - if self.prog.matches.len() == 1 { - return true; - } - matched = true; - } - } - Job::SaveRestore { slot, old_pos } => { - if slot < self.slots.len() { - self.slots[slot] = old_pos; - } - } - } - } - matched - } - - fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool { - use crate::prog::Inst::*; - loop { - // This loop is an optimization to avoid constantly pushing/popping - // from the stack. Namely, if we're pushing a job only to run it - // next, avoid the push and just mutate `ip` (and possibly `at`) - // in place. - if self.has_visited(ip, at) { - return false; - } - match self.prog[ip] { - Match(slot) => { - if slot < self.matches.len() { - self.matches[slot] = true; - } - return true; - } - Save(ref inst) => { - if let Some(&old_pos) = self.slots.get(inst.slot) { - // If this path doesn't work out, then we save the old - // capture index (if one exists) in an alternate - // job. If the next path fails, then the alternate - // job is popped and the old capture index is restored. - self.m.jobs.push(Job::SaveRestore { - slot: inst.slot, - old_pos, - }); - self.slots[inst.slot] = Some(at.pos()); - } - ip = inst.goto; - } - Split(ref inst) => { - self.m.jobs.push(Job::Inst { ip: inst.goto2, at }); - ip = inst.goto1; - } - EmptyLook(ref inst) => { - if self.input.is_empty_match(at, inst) { - ip = inst.goto; - } else { - return false; - } - } - Char(ref inst) => { - if inst.c == at.char() { - ip = inst.goto; - at = self.input.at(at.next_pos()); - } else { - return false; - } - } - Ranges(ref inst) => { - if inst.matches(at.char()) { - ip = inst.goto; - at = self.input.at(at.next_pos()); - } else { - return false; - } - } - Bytes(ref inst) => { - if let Some(b) = at.byte() { - if inst.matches(b) { - ip = inst.goto; - at = self.input.at(at.next_pos()); - continue; - } - } - return false; - } - } - } - } - - fn has_visited(&mut self, ip: InstPtr, at: InputAt) -> bool { - let k = ip * (self.input.len() + 1) + at.pos(); - let k1 = k / BIT_SIZE; - let k2 = usize_to_u32(1 << (k & (BIT_SIZE - 1))); - if self.m.visited[k1] & k2 == 0 { - self.m.visited[k1] |= k2; - false - } else { - true - } - } -} - -fn usize_to_u32(n: usize) -> u32 { - if (n as u64) > (::std::u32::MAX as u64) { - panic!("BUG: {} is too big to fit into u32", n) - } - n as u32 -} diff --git a/vendor/regex/src/builders.rs b/vendor/regex/src/builders.rs new file mode 100644 index 0000000..c111a96 --- /dev/null +++ b/vendor/regex/src/builders.rs @@ -0,0 +1,2539 @@ +#![allow(warnings)] + +// This module defines an internal builder that encapsulates all interaction +// with meta::Regex construction, and then 4 public API builders that wrap +// around it. The docs are essentially repeated on each of the 4 public +// builders, with tweaks to the examples as needed. +// +// The reason why there are so many builders is partially because of a misstep +// in the initial API design: the builder constructor takes in the pattern +// strings instead of using the `build` method to accept the pattern strings. +// This means `new` has a different signature for each builder. It probably +// would have been nicer to to use one builder with `fn new()`, and then add +// `build(pat)` and `build_many(pats)` constructors. +// +// The other reason is because I think the `bytes` module should probably +// have its own builder type. That way, it is completely isolated from the +// top-level API. +// +// If I could do it again, I'd probably have a `regex::Builder` and a +// `regex::bytes::Builder`. Each would have `build` and `build_set` (or +// `build_many`) methods for constructing a single pattern `Regex` and a +// multi-pattern `RegexSet`, respectively. + +use alloc::{ + string::{String, ToString}, + sync::Arc, + vec, + vec::Vec, +}; + +use regex_automata::{ + meta, nfa::thompson::WhichCaptures, util::syntax, MatchKind, +}; + +use crate::error::Error; + +/// A builder for constructing a `Regex`, `bytes::Regex`, `RegexSet` or a +/// `bytes::RegexSet`. +/// +/// This is essentially the implementation of the four different builder types +/// in the public API: `RegexBuilder`, `bytes::RegexBuilder`, `RegexSetBuilder` +/// and `bytes::RegexSetBuilder`. +#[derive(Clone, Debug)] +struct Builder { + pats: Vec, + metac: meta::Config, + syntaxc: syntax::Config, +} + +impl Default for Builder { + fn default() -> Builder { + let metac = meta::Config::new() + .nfa_size_limit(Some(10 * (1 << 20))) + .hybrid_cache_capacity(2 * (1 << 20)); + Builder { pats: vec![], metac, syntaxc: syntax::Config::default() } + } +} + +impl Builder { + fn new(patterns: I) -> Builder + where + S: AsRef, + I: IntoIterator, + { + let mut b = Builder::default(); + b.pats.extend(patterns.into_iter().map(|p| p.as_ref().to_string())); + b + } + + fn build_one_string(&self) -> Result { + assert_eq!(1, self.pats.len()); + let metac = self + .metac + .clone() + .match_kind(MatchKind::LeftmostFirst) + .utf8_empty(true); + let syntaxc = self.syntaxc.clone().utf8(true); + let pattern = Arc::from(self.pats[0].as_str()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build(&pattern) + .map(|meta| crate::Regex { meta, pattern }) + .map_err(Error::from_meta_build_error) + } + + fn build_one_bytes(&self) -> Result { + assert_eq!(1, self.pats.len()); + let metac = self + .metac + .clone() + .match_kind(MatchKind::LeftmostFirst) + .utf8_empty(false); + let syntaxc = self.syntaxc.clone().utf8(false); + let pattern = Arc::from(self.pats[0].as_str()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build(&pattern) + .map(|meta| crate::bytes::Regex { meta, pattern }) + .map_err(Error::from_meta_build_error) + } + + fn build_many_string(&self) -> Result { + let metac = self + .metac + .clone() + .match_kind(MatchKind::All) + .utf8_empty(true) + .which_captures(WhichCaptures::None); + let syntaxc = self.syntaxc.clone().utf8(true); + let patterns = Arc::from(self.pats.as_slice()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build_many(&patterns) + .map(|meta| crate::RegexSet { meta, patterns }) + .map_err(Error::from_meta_build_error) + } + + fn build_many_bytes(&self) -> Result { + let metac = self + .metac + .clone() + .match_kind(MatchKind::All) + .utf8_empty(false) + .which_captures(WhichCaptures::None); + let syntaxc = self.syntaxc.clone().utf8(false); + let patterns = Arc::from(self.pats.as_slice()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build_many(&patterns) + .map(|meta| crate::bytes::RegexSet { meta, patterns }) + .map_err(Error::from_meta_build_error) + } + + fn case_insensitive(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.case_insensitive(yes); + self + } + + fn multi_line(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.multi_line(yes); + self + } + + fn dot_matches_new_line(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.dot_matches_new_line(yes); + self + } + + fn crlf(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.crlf(yes); + self + } + + fn line_terminator(&mut self, byte: u8) -> &mut Builder { + self.metac = self.metac.clone().line_terminator(byte); + self.syntaxc = self.syntaxc.line_terminator(byte); + self + } + + fn swap_greed(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.swap_greed(yes); + self + } + + fn ignore_whitespace(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.ignore_whitespace(yes); + self + } + + fn unicode(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.unicode(yes); + self + } + + fn octal(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.octal(yes); + self + } + + fn size_limit(&mut self, limit: usize) -> &mut Builder { + self.metac = self.metac.clone().nfa_size_limit(Some(limit)); + self + } + + fn dfa_size_limit(&mut self, limit: usize) -> &mut Builder { + self.metac = self.metac.clone().hybrid_cache_capacity(limit); + self + } + + fn nest_limit(&mut self, limit: u32) -> &mut Builder { + self.syntaxc = self.syntaxc.nest_limit(limit); + self + } +} + +pub(crate) mod string { + use crate::{error::Error, Regex, RegexSet}; + + use super::Builder; + + /// A configurable builder for a [`Regex`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexBuilder { + builder: Builder, + } + + impl RegexBuilder { + /// Create a new builder with a default configuration for the given + /// pattern. + /// + /// If the pattern is invalid or exceeds the configured size limits, + /// then an error will be returned when [`RegexBuilder::build`] is + /// called. + pub fn new(pattern: &str) -> RegexBuilder { + RegexBuilder { builder: Builder::new([pattern]) } + } + + /// Compiles the pattern given to `RegexBuilder::new` with the + /// configuration set on this builder. + /// + /// If the pattern isn't a valid regex or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_one_string() + } + + /// This configures Unicode mode for the entire pattern. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that if Unicode mode is disabled, then the regex will fail to + /// compile if it could match invalid UTF-8. For example, when Unicode + /// mode is disabled, then since `.` matches any byte (except for + /// `\n`), then it can match invalid UTF-8 and thus building a regex + /// from it will fail. Another example is `\w` and `\W`. Since `\w` can + /// only match ASCII bytes when Unicode mode is disabled, it's allowed. + /// But `\W` can match more than ASCII bytes, including invalid UTF-8, + /// and so it is not allowed. This restriction can be lifted only by + /// using a [`bytes::Regex`](crate::bytes::Regex). + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"\w") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ")); + /// + /// let re = RegexBuilder::new(r"s") + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for the + /// entire pattern. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match("fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for the entire pattern. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexBuilder::line_terminator`] option changes `\n` above + /// to any ASCII byte. + /// * The [`RegexBuilder::crlf`] option changes the line terminator to + /// be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(1..4), re.find("\nfoo\n").map(|m| m.range())); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo.bar") + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = "foo\nbar"; + /// assert_eq!(Some("foo\nbar"), re.find(hay).map(|m| m.as_str())); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for the entire pattern. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert_eq!(Some("foo"), re.find(hay).map(|m| m.as_str())); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\n\r\n"; + /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); + /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = "\x00foo\x00"; + /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\n")); + /// assert!(!re.is_match("\x00")); + /// ``` + /// + /// This shows that building a regex will fail if the byte given + /// is not ASCII and the pattern could result in matching invalid + /// UTF-8. This is because any singular non-ASCII byte is not valid + /// UTF-8, and it is not permitted for a [`Regex`] to match invalid + /// UTF-8. (It is permissible to use a non-ASCII byte when building a + /// [`bytes::Regex`](crate::bytes::Regex).) + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r".").line_terminator(0x80).build().is_err()); + /// // Note that using a non-ASCII byte isn't enough on its own to + /// // cause regex compilation to fail. You actually have to make use + /// // of it in the regex in a way that leads to matching invalid + /// // UTF-8. If you don't, then regex compilation will succeed! + /// assert!(RegexBuilder::new(r"a").line_terminator(0x80).build().is_ok()); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for the entire pattern. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"a+") + /// .swap_greed(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some("a"), re.find("aaa").map(|m| m.as_str())); + /// ``` + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for the entire pattern. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexBuilder::new(pat) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// + /// let caps = re.captures("Harry Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// assert_eq!("Potter", &caps["last"]); + /// + /// let caps = re.captures("Harry J. Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(Some("J"), caps.name("initial").map(|m| m.as_str())); + /// assert_eq!(None, caps.name("middle").map(|m| m.as_str())); + /// assert_eq!("Potter", &caps["last"]); + /// + /// let caps = re.captures("Harry James Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(None, caps.name("initial").map(|m| m.as_str())); + /// assert_eq!(Some("James"), caps.name("middle").map(|m| m.as_str())); + /// assert_eq!("Potter", &caps["last"]); + /// ``` + pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for the entire pattern. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexBuilder::new(r"\141") + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::RegexBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); + /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { + self.builder.nest_limit(limit); + self + } + } + + /// A configurable builder for a [`RegexSet`]. + /// + /// This builder can be used to programmatically set flags such as + /// `i` (case insensitive) and `x` (for verbose mode). This builder + /// can also be used to configure things like the line terminator + /// and a size limit on the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexSetBuilder { + builder: Builder, + } + + impl RegexSetBuilder { + /// Create a new builder with a default configuration for the given + /// patterns. + /// + /// If the patterns are invalid or exceed the configured size limits, + /// then an error will be returned when [`RegexSetBuilder::build`] is + /// called. + pub fn new(patterns: I) -> RegexSetBuilder + where + I: IntoIterator, + S: AsRef, + { + RegexSetBuilder { builder: Builder::new(patterns) } + } + + /// Compiles the patterns given to `RegexSetBuilder::new` with the + /// configuration set on this builder. + /// + /// If the patterns aren't valid regexes or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_many_string() + } + + /// This configures Unicode mode for the all of the patterns. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that if Unicode mode is disabled, then the regex will fail to + /// compile if it could match invalid UTF-8. For example, when Unicode + /// mode is disabled, then since `.` matches any byte (except for + /// `\n`), then it can match invalid UTF-8 and thus building a regex + /// from it will fail. Another example is `\w` and `\W`. Since `\w` can + /// only match ASCII bytes when Unicode mode is disabled, it's allowed. + /// But `\W` can match more than ASCII bytes, including invalid UTF-8, + /// and so it is not allowed. This restriction can be lifted only by + /// using a [`bytes::RegexSet`](crate::bytes::RegexSet). + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"\w"]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ")); + /// + /// let re = RegexSetBuilder::new([r"s"]) + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for all + /// of the patterns. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match("fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for all of the patterns. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` + /// above to any ASCII byte. + /// * The [`RegexSetBuilder::crlf`] option changes the line terminator + /// to be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\nfoo\n")); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo.bar"]) + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = "foo\nbar"; + /// assert!(re.is_match(hay)); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for all of the patterns. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^\n"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// assert!(!re.is_match("\r\n")); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = "\x00foo\x00"; + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\n")); + /// assert!(!re.is_match("\x00")); + /// ``` + /// + /// This shows that building a regex will fail if the byte given + /// is not ASCII and the pattern could result in matching invalid + /// UTF-8. This is because any singular non-ASCII byte is not valid + /// UTF-8, and it is not permitted for a [`RegexSet`] to match invalid + /// UTF-8. (It is permissible to use a non-ASCII byte when building a + /// [`bytes::RegexSet`](crate::bytes::RegexSet).) + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// assert!( + /// RegexSetBuilder::new([r"."]) + /// .line_terminator(0x80) + /// .build() + /// .is_err() + /// ); + /// // Note that using a non-ASCII byte isn't enough on its own to + /// // cause regex compilation to fail. You actually have to make use + /// // of it in the regex in a way that leads to matching invalid + /// // UTF-8. If you don't, then regex compilation will succeed! + /// assert!( + /// RegexSetBuilder::new([r"a"]) + /// .line_terminator(0x80) + /// .build() + /// .is_ok() + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for all of the patterns. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// Note that this is generally not useful for a `RegexSet` since a + /// `RegexSet` can only report whether a pattern matches or not. Since + /// greediness never impacts whether a match is found or not (only the + /// offsets of the match), it follows that whether parts of a pattern + /// are greedy or not doesn't matter for a `RegexSet`. + /// + /// The default for this is `false`. + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for all of the patterns. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexSetBuilder::new([pat]) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("Harry Potter")); + /// assert!(re.is_match("Harry J. Potter")); + /// assert!(re.is_match("Harry James Potter")); + /// assert!(!re.is_match("harry J. Potter")); + /// ``` + pub fn ignore_whitespace( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for all of the patterns. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexSetBuilder::new([r"\141"]) + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::RegexSetBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!( + /// RegexSetBuilder::new([r"\w"]) + /// .size_limit(45_000) + /// .build() + /// .is_err() + /// ); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit( + &mut self, + bytes: usize, + ) -> &mut RegexSetBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); + /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { + self.builder.nest_limit(limit); + self + } + } +} + +pub(crate) mod bytes { + use crate::{ + bytes::{Regex, RegexSet}, + error::Error, + }; + + use super::Builder; + + /// A configurable builder for a [`Regex`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexBuilder { + builder: Builder, + } + + impl RegexBuilder { + /// Create a new builder with a default configuration for the given + /// pattern. + /// + /// If the pattern is invalid or exceeds the configured size limits, + /// then an error will be returned when [`RegexBuilder::build`] is + /// called. + pub fn new(pattern: &str) -> RegexBuilder { + RegexBuilder { builder: Builder::new([pattern]) } + } + + /// Compiles the pattern given to `RegexBuilder::new` with the + /// configuration set on this builder. + /// + /// If the pattern isn't a valid regex or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_one_bytes() + } + + /// This configures Unicode mode for the entire pattern. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that unlike the top-level `Regex` for searching `&str`, it + /// is permitted to disable Unicode mode even if the resulting pattern + /// could match invalid UTF-8. For example, `(?-u:.)` is not a valid + /// pattern for a top-level `Regex`, but is valid for a `bytes::Regex`. + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"\w") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ".as_bytes())); + /// + /// let re = RegexBuilder::new(r"s") + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ".as_bytes())); + /// ``` + /// + /// Since this builder is for constructing a [`bytes::Regex`](Regex), + /// one can disable Unicode mode even if it would match invalid UTF-8: + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(re.is_match(b"\xFF")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for the + /// entire pattern. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match(b"fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for the entire pattern. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexBuilder::line_terminator`] option changes `\n` above + /// to any ASCII byte. + /// * The [`RegexBuilder::crlf`] option changes the line terminator to + /// be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(1..4), re.find(b"\nfoo\n").map(|m| m.range())); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo.bar") + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = b"foo\nbar"; + /// assert_eq!(Some(&b"foo\nbar"[..]), re.find(hay).map(|m| m.as_bytes())); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for the entire pattern. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert_eq!(Some(&b"foo"[..]), re.find(hay).map(|m| m.as_bytes())); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\n\r\n"; + /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); + /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = b"\x00foo\x00"; + /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\n")); + /// assert!(!re.is_match(b"\x00")); + /// ``` + /// + /// This shows that building a regex will work even when the byte + /// given is not ASCII. This is unlike the top-level `Regex` API where + /// matching invalid UTF-8 is not allowed. + /// + /// Note though that you must disable Unicode mode. This is required + /// because Unicode mode requires matching one codepoint at a time, + /// and there is no way to match a non-ASCII byte as if it were a + /// codepoint. + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// assert!( + /// RegexBuilder::new(r".") + /// .unicode(false) + /// .line_terminator(0x80) + /// .build() + /// .is_ok(), + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for the entire pattern. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"a+") + /// .swap_greed(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(&b"a"[..]), re.find(b"aaa").map(|m| m.as_bytes())); + /// ``` + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for the entire pattern. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexBuilder::new(pat) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// + /// let caps = re.captures(b"Harry Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// + /// let caps = re.captures(b"Harry J. Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!( + /// Some(&b"J"[..]), + /// caps.name("initial").map(|m| m.as_bytes()), + /// ); + /// assert_eq!(None, caps.name("middle").map(|m| m.as_bytes())); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// + /// let caps = re.captures(b"Harry James Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(None, caps.name("initial").map(|m| m.as_bytes())); + /// assert_eq!( + /// Some(&b"James"[..]), + /// caps.name("middle").map(|m| m.as_bytes()), + /// ); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// ``` + pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for the entire pattern. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexBuilder::new(r"\141") + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::bytes::RegexBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); + /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { + self.builder.nest_limit(limit); + self + } + } + + /// A configurable builder for a [`RegexSet`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexSetBuilder { + builder: Builder, + } + + impl RegexSetBuilder { + /// Create a new builder with a default configuration for the given + /// patterns. + /// + /// If the patterns are invalid or exceed the configured size limits, + /// then an error will be returned when [`RegexSetBuilder::build`] is + /// called. + pub fn new(patterns: I) -> RegexSetBuilder + where + I: IntoIterator, + S: AsRef, + { + RegexSetBuilder { builder: Builder::new(patterns) } + } + + /// Compiles the patterns given to `RegexSetBuilder::new` with the + /// configuration set on this builder. + /// + /// If the patterns aren't valid regexes or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_many_bytes() + } + + /// This configures Unicode mode for the all of the patterns. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that unlike the top-level `RegexSet` for searching `&str`, + /// it is permitted to disable Unicode mode even if the resulting + /// pattern could match invalid UTF-8. For example, `(?-u:.)` is not + /// a valid pattern for a top-level `RegexSet`, but is valid for a + /// `bytes::RegexSet`. + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"\w"]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ".as_bytes())); + /// + /// let re = RegexSetBuilder::new([r"s"]) + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ".as_bytes())); + /// ``` + /// + /// Since this builder is for constructing a + /// [`bytes::RegexSet`](RegexSet), one can disable Unicode mode even if + /// it would match invalid UTF-8: + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(re.is_match(b"\xFF")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for all + /// of the patterns. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match(b"fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for all of the patterns. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` + /// above to any ASCII byte. + /// * The [`RegexSetBuilder::crlf`] option changes the line terminator + /// to be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\nfoo\n")); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo.bar"]) + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = b"foo\nbar"; + /// assert!(re.is_match(hay)); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for all of the patterns. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^\n"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// assert!(!re.is_match(b"\r\n")); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = b"\x00foo\x00"; + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\n")); + /// assert!(!re.is_match(b"\x00")); + /// ``` + /// + /// This shows that building a regex will work even when the byte given + /// is not ASCII. This is unlike the top-level `RegexSet` API where + /// matching invalid UTF-8 is not allowed. + /// + /// Note though that you must disable Unicode mode. This is required + /// because Unicode mode requires matching one codepoint at a time, + /// and there is no way to match a non-ASCII byte as if it were a + /// codepoint. + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// assert!( + /// RegexSetBuilder::new([r"."]) + /// .unicode(false) + /// .line_terminator(0x80) + /// .build() + /// .is_ok(), + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for all of the patterns. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// Note that this is generally not useful for a `RegexSet` since a + /// `RegexSet` can only report whether a pattern matches or not. Since + /// greediness never impacts whether a match is found or not (only the + /// offsets of the match), it follows that whether parts of a pattern + /// are greedy or not doesn't matter for a `RegexSet`. + /// + /// The default for this is `false`. + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for all of the patterns. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexSetBuilder::new([pat]) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"Harry Potter")); + /// assert!(re.is_match(b"Harry J. Potter")); + /// assert!(re.is_match(b"Harry James Potter")); + /// assert!(!re.is_match(b"harry J. Potter")); + /// ``` + pub fn ignore_whitespace( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for all of the patterns. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexSetBuilder::new([r"\141"]) + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::bytes::RegexSetBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!( + /// RegexSetBuilder::new([r"\w"]) + /// .size_limit(45_000) + /// .build() + /// .is_err() + /// ); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit( + &mut self, + bytes: usize, + ) -> &mut RegexSetBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); + /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { + self.builder.nest_limit(limit); + self + } + } +} diff --git a/vendor/regex/src/bytes.rs b/vendor/regex/src/bytes.rs new file mode 100644 index 0000000..383ac4a --- /dev/null +++ b/vendor/regex/src/bytes.rs @@ -0,0 +1,91 @@ +/*! +Search for regex matches in `&[u8]` haystacks. + +This module provides a nearly identical API via [`Regex`] to the one found in +the top-level of this crate. There are two important differences: + +1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec` +is used where `String` would have been used in the top-level API. +2. Unicode support can be disabled even when disabling it would result in +matching invalid UTF-8 bytes. + +# Example: match null terminated string + +This shows how to find all null-terminated strings in a slice of bytes. This +works even if a C string contains invalid UTF-8. + +```rust +use regex::bytes::Regex; + +let re = Regex::new(r"(?-u)(?[^\x00]+)\x00").unwrap(); +let hay = b"foo\x00qu\xFFux\x00baz\x00"; + +// Extract all of the strings without the NUL terminator from each match. +// The unwrap is OK here since a match requires the `cstr` capture to match. +let cstrs: Vec<&[u8]> = + re.captures_iter(hay) + .map(|c| c.name("cstr").unwrap().as_bytes()) + .collect(); +assert_eq!(cstrs, vec![&b"foo"[..], &b"qu\xFFux"[..], &b"baz"[..]]); +``` + +# Example: selectively enable Unicode support + +This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded +string (e.g., to extract a title from a Matroska file): + +```rust +use regex::bytes::Regex; + +let re = Regex::new( + r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))" +).unwrap(); +let hay = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65"; + +// Notice that despite the `.*` at the end, it will only match valid UTF-8 +// because Unicode mode was enabled with the `u` flag. Without the `u` flag, +// the `.*` would match the rest of the bytes regardless of whether they were +// valid UTF-8. +let (_, [title]) = re.captures(hay).unwrap().extract(); +assert_eq!(title, b"\xE2\x98\x83"); +// We can UTF-8 decode the title now. And the unwrap here +// is correct because the existence of a match guarantees +// that `title` is valid UTF-8. +let title = std::str::from_utf8(title).unwrap(); +assert_eq!(title, "☃"); +``` + +In general, if the Unicode flag is enabled in a capture group and that capture +is part of the overall match, then the capture is *guaranteed* to be valid +UTF-8. + +# Syntax + +The supported syntax is pretty much the same as the syntax for Unicode +regular expressions with a few changes that make sense for matching arbitrary +bytes: + +1. The `u` flag can be disabled even when disabling it might cause the regex to +match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in +"ASCII compatible" mode. +2. In ASCII compatible mode, Unicode character classes are not allowed. Literal +Unicode scalar values outside of character classes are allowed. +3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`) +revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps +to `[[:digit:]]` and `\s` maps to `[[:space:]]`. +4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to +determine whether a byte is a word byte or not. +5. Hexadecimal notation can be used to specify arbitrary bytes instead of +Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the +literal byte `\xFF`, while in Unicode mode, `\xFF` is the Unicode codepoint +`U+00FF` that matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal +notation when enabled. +6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the +`s` flag is additionally enabled, `.` matches any byte. + +# Performance + +In general, one should expect performance on `&[u8]` to be roughly similar to +performance on `&str`. +*/ +pub use crate::{builders::bytes::*, regex::bytes::*, regexset::bytes::*}; diff --git a/vendor/regex/src/compile.rs b/vendor/regex/src/compile.rs deleted file mode 100644 index 90ca250..0000000 --- a/vendor/regex/src/compile.rs +++ /dev/null @@ -1,1264 +0,0 @@ -use std::collections::HashMap; -use std::fmt; -use std::iter; -use std::result; -use std::sync::Arc; - -use regex_syntax::hir::{self, Hir}; -use regex_syntax::is_word_byte; -use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences}; - -use crate::prog::{ - EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges, - InstSave, InstSplit, Program, -}; - -use crate::Error; - -type Result = result::Result; -type ResultOrEmpty = result::Result, Error>; - -#[derive(Debug)] -struct Patch { - hole: Hole, - entry: InstPtr, -} - -/// A compiler translates a regular expression AST to a sequence of -/// instructions. The sequence of instructions represents an NFA. -// `Compiler` is only public via the `internal` module, so avoid deriving -// `Debug`. -#[allow(missing_debug_implementations)] -pub struct Compiler { - insts: Vec, - compiled: Program, - capture_name_idx: HashMap, - num_exprs: usize, - size_limit: usize, - suffix_cache: SuffixCache, - utf8_seqs: Option, - byte_classes: ByteClassSet, - // This keeps track of extra bytes allocated while compiling the regex - // program. Currently, this corresponds to two things. First is the heap - // memory allocated by Unicode character classes ('InstRanges'). Second is - // a "fake" amount of memory used by empty sub-expressions, so that enough - // empty sub-expressions will ultimately trigger the compiler to bail - // because of a size limit restriction. (That empty sub-expressions don't - // add to heap memory usage is more-or-less an implementation detail.) In - // the second case, if we don't bail, then an excessively large repetition - // on an empty sub-expression can result in the compiler using a very large - // amount of CPU time. - extra_inst_bytes: usize, -} - -impl Compiler { - /// Create a new regular expression compiler. - /// - /// Various options can be set before calling `compile` on an expression. - pub fn new() -> Self { - Compiler { - insts: vec![], - compiled: Program::new(), - capture_name_idx: HashMap::new(), - num_exprs: 0, - size_limit: 10 * (1 << 20), - suffix_cache: SuffixCache::new(1000), - utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')), - byte_classes: ByteClassSet::new(), - extra_inst_bytes: 0, - } - } - - /// The size of the resulting program is limited by size_limit. If - /// the program approximately exceeds the given size (in bytes), then - /// compilation will stop and return an error. - pub fn size_limit(mut self, size_limit: usize) -> Self { - self.size_limit = size_limit; - self - } - - /// If bytes is true, then the program is compiled as a byte based - /// automaton, which incorporates UTF-8 decoding into the machine. If it's - /// false, then the automaton is Unicode scalar value based, e.g., an - /// engine utilizing such an automaton is responsible for UTF-8 decoding. - /// - /// The specific invariant is that when returning a byte based machine, - /// the neither the `Char` nor `Ranges` instructions are produced. - /// Conversely, when producing a Unicode scalar value machine, the `Bytes` - /// instruction is never produced. - /// - /// Note that `dfa(true)` implies `bytes(true)`. - pub fn bytes(mut self, yes: bool) -> Self { - self.compiled.is_bytes = yes; - self - } - - /// When disabled, the program compiled may match arbitrary bytes. - /// - /// When enabled (the default), all compiled programs exclusively match - /// valid UTF-8 bytes. - pub fn only_utf8(mut self, yes: bool) -> Self { - self.compiled.only_utf8 = yes; - self - } - - /// When set, the machine returned is suitable for use in the DFA matching - /// engine. - /// - /// In particular, this ensures that if the regex is not anchored in the - /// beginning, then a preceding `.*?` is included in the program. (The NFA - /// based engines handle the preceding `.*?` explicitly, which is difficult - /// or impossible in the DFA engine.) - pub fn dfa(mut self, yes: bool) -> Self { - self.compiled.is_dfa = yes; - self - } - - /// When set, the machine returned is suitable for matching text in - /// reverse. In particular, all concatenations are flipped. - pub fn reverse(mut self, yes: bool) -> Self { - self.compiled.is_reverse = yes; - self - } - - /// Compile a regular expression given its AST. - /// - /// The compiler is guaranteed to succeed unless the program exceeds the - /// specified size limit. If the size limit is exceeded, then compilation - /// stops and returns an error. - pub fn compile(mut self, exprs: &[Hir]) -> result::Result { - debug_assert!(!exprs.is_empty()); - self.num_exprs = exprs.len(); - if exprs.len() == 1 { - self.compile_one(&exprs[0]) - } else { - self.compile_many(exprs) - } - } - - fn compile_one(mut self, expr: &Hir) -> result::Result { - // If we're compiling a forward DFA and we aren't anchored, then - // add a `.*?` before the first capture group. - // Other matching engines handle this by baking the logic into the - // matching engine itself. - let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 }; - self.compiled.is_anchored_start = expr.is_anchored_start(); - self.compiled.is_anchored_end = expr.is_anchored_end(); - if self.compiled.needs_dotstar() { - dotstar_patch = self.c_dotstar()?; - self.compiled.start = dotstar_patch.entry; - } - self.compiled.captures = vec![None]; - let patch = - self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst()); - if self.compiled.needs_dotstar() { - self.fill(dotstar_patch.hole, patch.entry); - } else { - self.compiled.start = patch.entry; - } - self.fill_to_next(patch.hole); - self.compiled.matches = vec![self.insts.len()]; - self.push_compiled(Inst::Match(0)); - self.compile_finish() - } - - fn compile_many( - mut self, - exprs: &[Hir], - ) -> result::Result { - debug_assert!(exprs.len() > 1); - - self.compiled.is_anchored_start = - exprs.iter().all(|e| e.is_anchored_start()); - self.compiled.is_anchored_end = - exprs.iter().all(|e| e.is_anchored_end()); - let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 }; - if self.compiled.needs_dotstar() { - dotstar_patch = self.c_dotstar()?; - self.compiled.start = dotstar_patch.entry; - } else { - self.compiled.start = 0; // first instruction is always split - } - self.fill_to_next(dotstar_patch.hole); - - let mut prev_hole = Hole::None; - for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let Patch { hole, entry } = - self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst()); - self.fill_to_next(hole); - self.compiled.matches.push(self.insts.len()); - self.push_compiled(Inst::Match(i)); - prev_hole = self.fill_split(split, Some(entry), None); - } - let i = exprs.len() - 1; - let Patch { hole, entry } = - self.c_capture(0, &exprs[i])?.unwrap_or_else(|| self.next_inst()); - self.fill(prev_hole, entry); - self.fill_to_next(hole); - self.compiled.matches.push(self.insts.len()); - self.push_compiled(Inst::Match(i)); - self.compile_finish() - } - - fn compile_finish(mut self) -> result::Result { - self.compiled.insts = - self.insts.into_iter().map(|inst| inst.unwrap()).collect(); - self.compiled.byte_classes = self.byte_classes.byte_classes(); - self.compiled.capture_name_idx = Arc::new(self.capture_name_idx); - Ok(self.compiled) - } - - /// Compile expr into self.insts, returning a patch on success, - /// or an error if we run out of memory. - /// - /// All of the c_* methods of the compiler share the contract outlined - /// here. - /// - /// The main thing that a c_* method does is mutate `self.insts` - /// to add a list of mostly compiled instructions required to execute - /// the given expression. `self.insts` contains MaybeInsts rather than - /// Insts because there is some backpatching required. - /// - /// The `Patch` value returned by each c_* method provides metadata - /// about the compiled instructions emitted to `self.insts`. The - /// `entry` member of the patch refers to the first instruction - /// (the entry point), while the `hole` member contains zero or - /// more offsets to partial instructions that need to be backpatched. - /// The c_* routine can't know where its list of instructions are going to - /// jump to after execution, so it is up to the caller to patch - /// these jumps to point to the right place. So compiling some - /// expression, e, we would end up with a situation that looked like: - /// - /// ```text - /// self.insts = [ ..., i1, i2, ..., iexit1, ..., iexitn, ...] - /// ^ ^ ^ - /// | \ / - /// entry \ / - /// hole - /// ``` - /// - /// To compile two expressions, e1 and e2, concatenated together we - /// would do: - /// - /// ```ignore - /// let patch1 = self.c(e1); - /// let patch2 = self.c(e2); - /// ``` - /// - /// while leaves us with a situation that looks like - /// - /// ```text - /// self.insts = [ ..., i1, ..., iexit1, ..., i2, ..., iexit2 ] - /// ^ ^ ^ ^ - /// | | | | - /// entry1 hole1 entry2 hole2 - /// ``` - /// - /// Then to merge the two patches together into one we would backpatch - /// hole1 with entry2 and return a new patch that enters at entry1 - /// and has hole2 for a hole. In fact, if you look at the c_concat - /// method you will see that it does exactly this, though it handles - /// a list of expressions rather than just the two that we use for - /// an example. - /// - /// Ok(None) is returned when an expression is compiled to no - /// instruction, and so no patch.entry value makes sense. - fn c(&mut self, expr: &Hir) -> ResultOrEmpty { - use crate::prog; - use regex_syntax::hir::HirKind::*; - - self.check_size()?; - match *expr.kind() { - Empty => self.c_empty(), - Literal(hir::Literal::Unicode(c)) => self.c_char(c), - Literal(hir::Literal::Byte(b)) => { - assert!(self.compiled.uses_bytes()); - self.c_byte(b) - } - Class(hir::Class::Unicode(ref cls)) => self.c_class(cls.ranges()), - Class(hir::Class::Bytes(ref cls)) => { - if self.compiled.uses_bytes() { - self.c_class_bytes(cls.ranges()) - } else { - assert!(cls.is_all_ascii()); - let mut char_ranges = vec![]; - for r in cls.iter() { - let (s, e) = (r.start() as char, r.end() as char); - char_ranges.push(hir::ClassUnicodeRange::new(s, e)); - } - self.c_class(&char_ranges) - } - } - Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::EndLine) - } - Anchor(hir::Anchor::StartLine) => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::StartLine) - } - Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::StartLine) - } - Anchor(hir::Anchor::EndLine) => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::EndLine) - } - Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => { - self.c_empty_look(prog::EmptyLook::EndText) - } - Anchor(hir::Anchor::StartText) => { - self.c_empty_look(prog::EmptyLook::StartText) - } - Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => { - self.c_empty_look(prog::EmptyLook::StartText) - } - Anchor(hir::Anchor::EndText) => { - self.c_empty_look(prog::EmptyLook::EndText) - } - WordBoundary(hir::WordBoundary::Unicode) => { - if !cfg!(feature = "unicode-perl") { - return Err(Error::Syntax( - "Unicode word boundaries are unavailable when \ - the unicode-perl feature is disabled" - .to_string(), - )); - } - self.compiled.has_unicode_word_boundary = true; - self.byte_classes.set_word_boundary(); - // We also make sure that all ASCII bytes are in a different - // class from non-ASCII bytes. Otherwise, it's possible for - // ASCII bytes to get lumped into the same class as non-ASCII - // bytes. This in turn may cause the lazy DFA to falsely start - // when it sees an ASCII byte that maps to a byte class with - // non-ASCII bytes. This ensures that never happens. - self.byte_classes.set_range(0, 0x7F); - self.c_empty_look(prog::EmptyLook::WordBoundary) - } - WordBoundary(hir::WordBoundary::UnicodeNegate) => { - if !cfg!(feature = "unicode-perl") { - return Err(Error::Syntax( - "Unicode word boundaries are unavailable when \ - the unicode-perl feature is disabled" - .to_string(), - )); - } - self.compiled.has_unicode_word_boundary = true; - self.byte_classes.set_word_boundary(); - // See comments above for why we set the ASCII range here. - self.byte_classes.set_range(0, 0x7F); - self.c_empty_look(prog::EmptyLook::NotWordBoundary) - } - WordBoundary(hir::WordBoundary::Ascii) => { - self.byte_classes.set_word_boundary(); - self.c_empty_look(prog::EmptyLook::WordBoundaryAscii) - } - WordBoundary(hir::WordBoundary::AsciiNegate) => { - self.byte_classes.set_word_boundary(); - self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii) - } - Group(ref g) => match g.kind { - hir::GroupKind::NonCapturing => self.c(&g.hir), - hir::GroupKind::CaptureIndex(index) => { - if index as usize >= self.compiled.captures.len() { - self.compiled.captures.push(None); - } - self.c_capture(2 * index as usize, &g.hir) - } - hir::GroupKind::CaptureName { index, ref name } => { - if index as usize >= self.compiled.captures.len() { - let n = name.to_string(); - self.compiled.captures.push(Some(n.clone())); - self.capture_name_idx.insert(n, index as usize); - } - self.c_capture(2 * index as usize, &g.hir) - } - }, - Concat(ref es) => { - if self.compiled.is_reverse { - self.c_concat(es.iter().rev()) - } else { - self.c_concat(es) - } - } - Alternation(ref es) => self.c_alternate(&**es), - Repetition(ref rep) => self.c_repeat(rep), - } - } - - fn c_empty(&mut self) -> ResultOrEmpty { - // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 - // See: CVE-2022-24713 - // - // Since 'empty' sub-expressions don't increase the size of - // the actual compiled object, we "fake" an increase in its - // size so that our 'check_size_limit' routine will eventually - // stop compilation if there are too many empty sub-expressions - // (e.g., via a large repetition). - self.extra_inst_bytes += std::mem::size_of::(); - Ok(None) - } - - fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty { - if self.num_exprs > 1 || self.compiled.is_dfa { - // Don't ever compile Save instructions for regex sets because - // they are never used. They are also never used in DFA programs - // because DFAs can't handle captures. - self.c(expr) - } else { - let entry = self.insts.len(); - let hole = self.push_hole(InstHole::Save { slot: first_slot }); - let patch = self.c(expr)?.unwrap_or_else(|| self.next_inst()); - self.fill(hole, patch.entry); - self.fill_to_next(patch.hole); - let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 }); - Ok(Some(Patch { hole, entry })) - } - } - - fn c_dotstar(&mut self) -> Result { - Ok(if !self.compiled.only_utf8() { - self.c(&Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, - greedy: false, - hir: Box::new(Hir::any(true)), - }))? - .unwrap() - } else { - self.c(&Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, - greedy: false, - hir: Box::new(Hir::any(false)), - }))? - .unwrap() - }) - } - - fn c_char(&mut self, c: char) -> ResultOrEmpty { - if self.compiled.uses_bytes() { - if c.is_ascii() { - let b = c as u8; - let hole = - self.push_hole(InstHole::Bytes { start: b, end: b }); - self.byte_classes.set_range(b, b); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } else { - self.c_class(&[hir::ClassUnicodeRange::new(c, c)]) - } - } else { - let hole = self.push_hole(InstHole::Char { c }); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } - } - - fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty { - use std::mem::size_of; - - assert!(!ranges.is_empty()); - if self.compiled.uses_bytes() { - Ok(Some(CompileClass { c: self, ranges }.compile()?)) - } else { - let ranges: Vec<(char, char)> = - ranges.iter().map(|r| (r.start(), r.end())).collect(); - let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 { - self.push_hole(InstHole::Char { c: ranges[0].0 }) - } else { - self.extra_inst_bytes += - ranges.len() * (size_of::() * 2); - self.push_hole(InstHole::Ranges { ranges }) - }; - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } - } - - fn c_byte(&mut self, b: u8) -> ResultOrEmpty { - self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)]) - } - - fn c_class_bytes( - &mut self, - ranges: &[hir::ClassBytesRange], - ) -> ResultOrEmpty { - debug_assert!(!ranges.is_empty()); - - let first_split_entry = self.insts.len(); - let mut holes = vec![]; - let mut prev_hole = Hole::None; - for r in &ranges[0..ranges.len() - 1] { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let next = self.insts.len(); - self.byte_classes.set_range(r.start(), r.end()); - holes.push(self.push_hole(InstHole::Bytes { - start: r.start(), - end: r.end(), - })); - prev_hole = self.fill_split(split, Some(next), None); - } - let next = self.insts.len(); - let r = &ranges[ranges.len() - 1]; - self.byte_classes.set_range(r.start(), r.end()); - holes.push( - self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }), - ); - self.fill(prev_hole, next); - Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry })) - } - - fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty { - let hole = self.push_hole(InstHole::EmptyLook { look }); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } - - fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty - where - I: IntoIterator, - { - let mut exprs = exprs.into_iter(); - let Patch { mut hole, entry } = loop { - match exprs.next() { - None => return self.c_empty(), - Some(e) => { - if let Some(p) = self.c(e)? { - break p; - } - } - } - }; - for e in exprs { - if let Some(p) = self.c(e)? { - self.fill(hole, p.entry); - hole = p.hole; - } - } - Ok(Some(Patch { hole, entry })) - } - - fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty { - debug_assert!( - exprs.len() >= 2, - "alternates must have at least 2 exprs" - ); - - // Initial entry point is always the first split. - let first_split_entry = self.insts.len(); - - // Save up all of the holes from each alternate. They will all get - // patched to point to the same location. - let mut holes = vec![]; - - // true indicates that the hole is a split where we want to fill - // the second branch. - let mut prev_hole = (Hole::None, false); - for e in &exprs[0..exprs.len() - 1] { - if prev_hole.1 { - let next = self.insts.len(); - self.fill_split(prev_hole.0, None, Some(next)); - } else { - self.fill_to_next(prev_hole.0); - } - let split = self.push_split_hole(); - if let Some(Patch { hole, entry }) = self.c(e)? { - holes.push(hole); - prev_hole = (self.fill_split(split, Some(entry), None), false); - } else { - let (split1, split2) = split.dup_one(); - holes.push(split1); - prev_hole = (split2, true); - } - } - if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? { - holes.push(hole); - if prev_hole.1 { - self.fill_split(prev_hole.0, None, Some(entry)); - } else { - self.fill(prev_hole.0, entry); - } - } else { - // We ignore prev_hole.1. When it's true, it means we have two - // empty branches both pushing prev_hole.0 into holes, so both - // branches will go to the same place anyway. - holes.push(prev_hole.0); - } - Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry })) - } - - fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty { - use regex_syntax::hir::RepetitionKind::*; - match rep.kind { - ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy), - ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy), - OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy), - Range(hir::RepetitionRange::Exactly(min_max)) => { - self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max) - } - Range(hir::RepetitionRange::AtLeast(min)) => { - self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min) - } - Range(hir::RepetitionRange::Bounded(min, max)) => { - self.c_repeat_range(&rep.hir, rep.greedy, min, max) - } - } - } - - fn c_repeat_zero_or_one( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let split_entry = self.insts.len(); - let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - let holes = vec![hole_rep, split_hole]; - Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry })) - } - - fn c_repeat_zero_or_more( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let split_entry = self.insts.len(); - let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - - self.fill(hole_rep, split_entry); - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - Ok(Some(Patch { hole: split_hole, entry: split_entry })) - } - - fn c_repeat_one_or_more( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return Ok(None), - }; - self.fill_to_next(hole_rep); - let split = self.push_split_hole(); - - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - Ok(Some(Patch { hole: split_hole, entry: entry_rep })) - } - - fn c_repeat_range_min_or_more( - &mut self, - expr: &Hir, - greedy: bool, - min: u32, - ) -> ResultOrEmpty { - let min = u32_to_usize(min); - // Using next_inst() is ok, because we can't return it (concat would - // have to return Some(_) while c_repeat_range_min_or_more returns - // None). - let patch_concat = self - .c_concat(iter::repeat(expr).take(min))? - .unwrap_or_else(|| self.next_inst()); - if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? { - self.fill(patch_concat.hole, patch_rep.entry); - Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry })) - } else { - Ok(None) - } - } - - fn c_repeat_range( - &mut self, - expr: &Hir, - greedy: bool, - min: u32, - max: u32, - ) -> ResultOrEmpty { - let (min, max) = (u32_to_usize(min), u32_to_usize(max)); - debug_assert!(min <= max); - let patch_concat = self.c_concat(iter::repeat(expr).take(min))?; - if min == max { - return Ok(patch_concat); - } - // Same reasoning as in c_repeat_range_min_or_more (we know that min < - // max at this point). - let patch_concat = patch_concat.unwrap_or_else(|| self.next_inst()); - let initial_entry = patch_concat.entry; - // It is much simpler to compile, e.g., `a{2,5}` as: - // - // aaa?a?a? - // - // But you end up with a sequence of instructions like this: - // - // 0: 'a' - // 1: 'a', - // 2: split(3, 4) - // 3: 'a' - // 4: split(5, 6) - // 5: 'a' - // 6: split(7, 8) - // 7: 'a' - // 8: MATCH - // - // This is *incredibly* inefficient because the splits end - // up forming a chain, which has to be resolved everything a - // transition is followed. - let mut holes = vec![]; - let mut prev_hole = patch_concat.hole; - for _ in min..max { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let Patch { hole, entry } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - prev_hole = hole; - if greedy { - holes.push(self.fill_split(split, Some(entry), None)); - } else { - holes.push(self.fill_split(split, None, Some(entry))); - } - } - holes.push(prev_hole); - Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry })) - } - - /// Can be used as a default value for the c_* functions when the call to - /// c_function is followed by inserting at least one instruction that is - /// always executed after the ones written by the c* function. - fn next_inst(&self) -> Patch { - Patch { hole: Hole::None, entry: self.insts.len() } - } - - fn fill(&mut self, hole: Hole, goto: InstPtr) { - match hole { - Hole::None => {} - Hole::One(pc) => { - self.insts[pc].fill(goto); - } - Hole::Many(holes) => { - for hole in holes { - self.fill(hole, goto); - } - } - } - } - - fn fill_to_next(&mut self, hole: Hole) { - let next = self.insts.len(); - self.fill(hole, next); - } - - fn fill_split( - &mut self, - hole: Hole, - goto1: Option, - goto2: Option, - ) -> Hole { - match hole { - Hole::None => Hole::None, - Hole::One(pc) => match (goto1, goto2) { - (Some(goto1), Some(goto2)) => { - self.insts[pc].fill_split(goto1, goto2); - Hole::None - } - (Some(goto1), None) => { - self.insts[pc].half_fill_split_goto1(goto1); - Hole::One(pc) - } - (None, Some(goto2)) => { - self.insts[pc].half_fill_split_goto2(goto2); - Hole::One(pc) - } - (None, None) => unreachable!( - "at least one of the split \ - holes must be filled" - ), - }, - Hole::Many(holes) => { - let mut new_holes = vec![]; - for hole in holes { - new_holes.push(self.fill_split(hole, goto1, goto2)); - } - if new_holes.is_empty() { - Hole::None - } else if new_holes.len() == 1 { - new_holes.pop().unwrap() - } else { - Hole::Many(new_holes) - } - } - } - } - - fn push_compiled(&mut self, inst: Inst) { - self.insts.push(MaybeInst::Compiled(inst)); - } - - fn push_hole(&mut self, inst: InstHole) -> Hole { - let hole = self.insts.len(); - self.insts.push(MaybeInst::Uncompiled(inst)); - Hole::One(hole) - } - - fn push_split_hole(&mut self) -> Hole { - let hole = self.insts.len(); - self.insts.push(MaybeInst::Split); - Hole::One(hole) - } - - fn pop_split_hole(&mut self) -> ResultOrEmpty { - self.insts.pop(); - Ok(None) - } - - fn check_size(&self) -> result::Result<(), Error> { - use std::mem::size_of; - - let size = - self.extra_inst_bytes + (self.insts.len() * size_of::()); - if size > self.size_limit { - Err(Error::CompiledTooBig(self.size_limit)) - } else { - Ok(()) - } - } -} - -#[derive(Debug)] -enum Hole { - None, - One(InstPtr), - Many(Vec), -} - -impl Hole { - fn dup_one(self) -> (Self, Self) { - match self { - Hole::One(pc) => (Hole::One(pc), Hole::One(pc)), - Hole::None | Hole::Many(_) => { - unreachable!("must be called on single hole") - } - } - } -} - -#[derive(Clone, Debug)] -enum MaybeInst { - Compiled(Inst), - Uncompiled(InstHole), - Split, - Split1(InstPtr), - Split2(InstPtr), -} - -impl MaybeInst { - fn fill(&mut self, goto: InstPtr) { - let maybeinst = match *self { - MaybeInst::Split => MaybeInst::Split1(goto), - MaybeInst::Uncompiled(ref inst) => { - MaybeInst::Compiled(inst.fill(goto)) - } - MaybeInst::Split1(goto1) => { - MaybeInst::Compiled(Inst::Split(InstSplit { - goto1, - goto2: goto, - })) - } - MaybeInst::Split2(goto2) => { - MaybeInst::Compiled(Inst::Split(InstSplit { - goto1: goto, - goto2, - })) - } - _ => unreachable!( - "not all instructions were compiled! \ - found uncompiled instruction: {:?}", - self - ), - }; - *self = maybeinst; - } - - fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) { - let filled = match *self { - MaybeInst::Split => Inst::Split(InstSplit { goto1, goto2 }), - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Compiled(filled); - } - - fn half_fill_split_goto1(&mut self, goto1: InstPtr) { - let half_filled = match *self { - MaybeInst::Split => goto1, - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Split1(half_filled); - } - - fn half_fill_split_goto2(&mut self, goto2: InstPtr) { - let half_filled = match *self { - MaybeInst::Split => goto2, - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Split2(half_filled); - } - - fn unwrap(self) -> Inst { - match self { - MaybeInst::Compiled(inst) => inst, - _ => unreachable!( - "must be called on a compiled instruction, \ - instead it was called on: {:?}", - self - ), - } - } -} - -#[derive(Clone, Debug)] -enum InstHole { - Save { slot: usize }, - EmptyLook { look: EmptyLook }, - Char { c: char }, - Ranges { ranges: Vec<(char, char)> }, - Bytes { start: u8, end: u8 }, -} - -impl InstHole { - fn fill(&self, goto: InstPtr) -> Inst { - match *self { - InstHole::Save { slot } => Inst::Save(InstSave { goto, slot }), - InstHole::EmptyLook { look } => { - Inst::EmptyLook(InstEmptyLook { goto, look }) - } - InstHole::Char { c } => Inst::Char(InstChar { goto, c }), - InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges { - goto, - ranges: ranges.clone().into_boxed_slice(), - }), - InstHole::Bytes { start, end } => { - Inst::Bytes(InstBytes { goto, start, end }) - } - } - } -} - -struct CompileClass<'a, 'b> { - c: &'a mut Compiler, - ranges: &'b [hir::ClassUnicodeRange], -} - -impl<'a, 'b> CompileClass<'a, 'b> { - fn compile(mut self) -> Result { - let mut holes = vec![]; - let mut initial_entry = None; - let mut last_split = Hole::None; - let mut utf8_seqs = self.c.utf8_seqs.take().unwrap(); - self.c.suffix_cache.clear(); - - for (i, range) in self.ranges.iter().enumerate() { - let is_last_range = i + 1 == self.ranges.len(); - utf8_seqs.reset(range.start(), range.end()); - let mut it = (&mut utf8_seqs).peekable(); - loop { - let utf8_seq = match it.next() { - None => break, - Some(utf8_seq) => utf8_seq, - }; - if is_last_range && it.peek().is_none() { - let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; - holes.push(hole); - self.c.fill(last_split, entry); - last_split = Hole::None; - if initial_entry.is_none() { - initial_entry = Some(entry); - } - } else { - if initial_entry.is_none() { - initial_entry = Some(self.c.insts.len()); - } - self.c.fill_to_next(last_split); - last_split = self.c.push_split_hole(); - let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; - holes.push(hole); - last_split = - self.c.fill_split(last_split, Some(entry), None); - } - } - } - self.c.utf8_seqs = Some(utf8_seqs); - Ok(Patch { hole: Hole::Many(holes), entry: initial_entry.unwrap() }) - } - - fn c_utf8_seq(&mut self, seq: &Utf8Sequence) -> Result { - if self.c.compiled.is_reverse { - self.c_utf8_seq_(seq) - } else { - self.c_utf8_seq_(seq.into_iter().rev()) - } - } - - fn c_utf8_seq_<'r, I>(&mut self, seq: I) -> Result - where - I: IntoIterator, - { - // The initial instruction for each UTF-8 sequence should be the same. - let mut from_inst = ::std::usize::MAX; - let mut last_hole = Hole::None; - for byte_range in seq { - let key = SuffixCacheKey { - from_inst, - start: byte_range.start, - end: byte_range.end, - }; - { - let pc = self.c.insts.len(); - if let Some(cached_pc) = self.c.suffix_cache.get(key, pc) { - from_inst = cached_pc; - continue; - } - } - self.c.byte_classes.set_range(byte_range.start, byte_range.end); - if from_inst == ::std::usize::MAX { - last_hole = self.c.push_hole(InstHole::Bytes { - start: byte_range.start, - end: byte_range.end, - }); - } else { - self.c.push_compiled(Inst::Bytes(InstBytes { - goto: from_inst, - start: byte_range.start, - end: byte_range.end, - })); - } - from_inst = self.c.insts.len().checked_sub(1).unwrap(); - debug_assert!(from_inst < ::std::usize::MAX); - } - debug_assert!(from_inst < ::std::usize::MAX); - Ok(Patch { hole: last_hole, entry: from_inst }) - } -} - -/// `SuffixCache` is a simple bounded hash map for caching suffix entries in -/// UTF-8 automata. For example, consider the Unicode range \u{0}-\u{FFFF}. -/// The set of byte ranges looks like this: -/// -/// [0-7F] -/// [C2-DF][80-BF] -/// [E0][A0-BF][80-BF] -/// [E1-EC][80-BF][80-BF] -/// [ED][80-9F][80-BF] -/// [EE-EF][80-BF][80-BF] -/// -/// Each line above translates to one alternate in the compiled regex program. -/// However, all but one of the alternates end in the same suffix, which is -/// a waste of an instruction. The suffix cache facilitates reusing them across -/// alternates. -/// -/// Note that a HashMap could be trivially used for this, but we don't need its -/// overhead. Some small bounded space (LRU style) is more than enough. -/// -/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html), -/// except it uses hashes as original indices and then compares full keys for -/// validation against `dense` array. -#[derive(Debug)] -struct SuffixCache { - sparse: Box<[usize]>, - dense: Vec, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -struct SuffixCacheEntry { - key: SuffixCacheKey, - pc: InstPtr, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -struct SuffixCacheKey { - from_inst: InstPtr, - start: u8, - end: u8, -} - -impl SuffixCache { - fn new(size: usize) -> Self { - SuffixCache { - sparse: vec![0usize; size].into(), - dense: Vec::with_capacity(size), - } - } - - fn get(&mut self, key: SuffixCacheKey, pc: InstPtr) -> Option { - let hash = self.hash(&key); - let pos = &mut self.sparse[hash]; - if let Some(entry) = self.dense.get(*pos) { - if entry.key == key { - return Some(entry.pc); - } - } - *pos = self.dense.len(); - self.dense.push(SuffixCacheEntry { key, pc }); - None - } - - fn clear(&mut self) { - self.dense.clear(); - } - - fn hash(&self, suffix: &SuffixCacheKey) -> usize { - // Basic FNV-1a hash as described: - // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function - const FNV_PRIME: u64 = 1_099_511_628_211; - let mut h = 14_695_981_039_346_656_037; - h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME); - h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME); - h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME); - (h as usize) % self.sparse.len() - } -} - -struct ByteClassSet([bool; 256]); - -impl ByteClassSet { - fn new() -> Self { - ByteClassSet([false; 256]) - } - - fn set_range(&mut self, start: u8, end: u8) { - debug_assert!(start <= end); - if start > 0 { - self.0[start as usize - 1] = true; - } - self.0[end as usize] = true; - } - - fn set_word_boundary(&mut self) { - // We need to mark all ranges of bytes whose pairs result in - // evaluating \b differently. - let iswb = is_word_byte; - let mut b1: u16 = 0; - let mut b2: u16; - while b1 <= 255 { - b2 = b1 + 1; - while b2 <= 255 && iswb(b1 as u8) == iswb(b2 as u8) { - b2 += 1; - } - self.set_range(b1 as u8, (b2 - 1) as u8); - b1 = b2; - } - } - - fn byte_classes(&self) -> Vec { - // N.B. If you're debugging the DFA, it's useful to simply return - // `(0..256).collect()`, which effectively removes the byte classes - // and makes the transitions easier to read. - // (0usize..256).map(|x| x as u8).collect() - let mut byte_classes = vec![0; 256]; - let mut class = 0u8; - let mut i = 0; - loop { - byte_classes[i] = class as u8; - if i >= 255 { - break; - } - if self.0[i] { - class = class.checked_add(1).unwrap(); - } - i += 1; - } - byte_classes - } -} - -impl fmt::Debug for ByteClassSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("ByteClassSet").field(&&self.0[..]).finish() - } -} - -fn u32_to_usize(n: u32) -> usize { - // In case usize is less than 32 bits, we need to guard against overflow. - // On most platforms this compiles to nothing. - // TODO Use `std::convert::TryFrom` once it's stable. - if (n as u64) > (::std::usize::MAX as u64) { - panic!("BUG: {} is too big to be pointer sized", n) - } - n as usize -} - -#[cfg(test)] -mod tests { - use super::ByteClassSet; - - #[test] - fn byte_classes() { - let mut set = ByteClassSet::new(); - set.set_range(b'a', b'z'); - let classes = set.byte_classes(); - assert_eq!(classes[0], 0); - assert_eq!(classes[1], 0); - assert_eq!(classes[2], 0); - assert_eq!(classes[b'a' as usize - 1], 0); - assert_eq!(classes[b'a' as usize], 1); - assert_eq!(classes[b'm' as usize], 1); - assert_eq!(classes[b'z' as usize], 1); - assert_eq!(classes[b'z' as usize + 1], 2); - assert_eq!(classes[254], 2); - assert_eq!(classes[255], 2); - - let mut set = ByteClassSet::new(); - set.set_range(0, 2); - set.set_range(4, 6); - let classes = set.byte_classes(); - assert_eq!(classes[0], 0); - assert_eq!(classes[1], 0); - assert_eq!(classes[2], 0); - assert_eq!(classes[3], 1); - assert_eq!(classes[4], 2); - assert_eq!(classes[5], 2); - assert_eq!(classes[6], 2); - assert_eq!(classes[7], 3); - assert_eq!(classes[255], 3); - } - - #[test] - fn full_byte_classes() { - let mut set = ByteClassSet::new(); - for i in 0..256u16 { - set.set_range(i as u8, i as u8); - } - assert_eq!(set.byte_classes().len(), 256); - } -} diff --git a/vendor/regex/src/dfa.rs b/vendor/regex/src/dfa.rs deleted file mode 100644 index dc99521..0000000 --- a/vendor/regex/src/dfa.rs +++ /dev/null @@ -1,1945 +0,0 @@ -/*! -The DFA matching engine. - -A DFA provides faster matching because the engine is in exactly one state at -any point in time. In the NFA, there may be multiple active states, and -considerable CPU cycles are spent shuffling them around. In finite automata -speak, the DFA follows epsilon transitions in the regex far less than the NFA. - -A DFA is a classic trade off between time and space. The NFA is slower, but -its memory requirements are typically small and predictable. The DFA is faster, -but given the right regex and the right input, the number of states in the -DFA can grow exponentially. To mitigate this space problem, we do two things: - -1. We implement an *online* DFA. That is, the DFA is constructed from the NFA - during a search. When a new state is computed, it is stored in a cache so - that it may be reused. An important consequence of this implementation - is that states that are never reached for a particular input are never - computed. (This is impossible in an "offline" DFA which needs to compute - all possible states up front.) -2. If the cache gets too big, we wipe it and continue matching. - -In pathological cases, a new state can be created for every byte of input. -(e.g., The regex `(a|b)*a(a|b){20}` on a long sequence of a's and b's.) -In this case, performance regresses to slightly slower than the full NFA -simulation, in large part because the cache becomes useless. If the cache -is wiped too frequently, the DFA quits and control falls back to one of the -NFA simulations. - -Because of the "lazy" nature of this DFA, the inner matching loop is -considerably more complex than one might expect out of a DFA. A number of -tricks are employed to make it fast. Tread carefully. - -N.B. While this implementation is heavily commented, Russ Cox's series of -articles on regexes is strongly recommended: -(As is the DFA implementation in RE2, which heavily influenced this -implementation.) -*/ - -use std::collections::HashMap; -use std::fmt; -use std::iter::repeat; -use std::mem; -use std::sync::Arc; - -use crate::exec::ProgramCache; -use crate::prog::{Inst, Program}; -use crate::sparse::SparseSet; - -/// Return true if and only if the given program can be executed by a DFA. -/// -/// Generally, a DFA is always possible. A pathological case where it is not -/// possible is if the number of NFA states exceeds `u32::MAX`, in which case, -/// this function will return false. -/// -/// This function will also return false if the given program has any Unicode -/// instructions (Char or Ranges) since the DFA operates on bytes only. -pub fn can_exec(insts: &Program) -> bool { - use crate::prog::Inst::*; - // If for some reason we manage to allocate a regex program with more - // than i32::MAX instructions, then we can't execute the DFA because we - // use 32 bit instruction pointer deltas for memory savings. - // If i32::MAX is the largest positive delta, - // then -i32::MAX == i32::MIN + 1 is the largest negative delta, - // and we are OK to use 32 bits. - if insts.dfa_size_limit == 0 || insts.len() > ::std::i32::MAX as usize { - return false; - } - for inst in insts { - match *inst { - Char(_) | Ranges(_) => return false, - EmptyLook(_) | Match(_) | Save(_) | Split(_) | Bytes(_) => {} - } - } - true -} - -/// A reusable cache of DFA states. -/// -/// This cache is reused between multiple invocations of the same regex -/// program. (It is not shared simultaneously between threads. If there is -/// contention, then new caches are created.) -#[derive(Debug)] -pub struct Cache { - /// Group persistent DFA related cache state together. The sparse sets - /// listed below are used as scratch space while computing uncached states. - inner: CacheInner, - /// qcur and qnext are ordered sets with constant time - /// addition/membership/clearing-whole-set and linear time iteration. They - /// are used to manage the sets of NFA states in DFA states when computing - /// cached DFA states. In particular, the order of the NFA states matters - /// for leftmost-first style matching. Namely, when computing a cached - /// state, the set of NFA states stops growing as soon as the first Match - /// instruction is observed. - qcur: SparseSet, - qnext: SparseSet, -} - -/// `CacheInner` is logically just a part of Cache, but groups together fields -/// that aren't passed as function parameters throughout search. (This split -/// is mostly an artifact of the borrow checker. It is happily paid.) -#[derive(Debug)] -struct CacheInner { - /// A cache of pre-compiled DFA states, keyed by the set of NFA states - /// and the set of empty-width flags set at the byte in the input when the - /// state was observed. - /// - /// A StatePtr is effectively a `*State`, but to avoid various inconvenient - /// things, we just pass indexes around manually. The performance impact of - /// this is probably an instruction or two in the inner loop. However, on - /// 64 bit, each StatePtr is half the size of a *State. - compiled: StateMap, - /// The transition table. - /// - /// The transition table is laid out in row-major order, where states are - /// rows and the transitions for each state are columns. At a high level, - /// given state `s` and byte `b`, the next state can be found at index - /// `s * 256 + b`. - /// - /// This is, of course, a lie. A StatePtr is actually a pointer to the - /// *start* of a row in this table. When indexing in the DFA's inner loop, - /// this removes the need to multiply the StatePtr by the stride. Yes, it - /// matters. This reduces the number of states we can store, but: the - /// stride is rarely 256 since we define transitions in terms of - /// *equivalence classes* of bytes. Each class corresponds to a set of - /// bytes that never discriminate a distinct path through the DFA from each - /// other. - trans: Transitions, - /// A set of cached start states, which are limited to the number of - /// permutations of flags set just before the initial byte of input. (The - /// index into this vec is a `EmptyFlags`.) - /// - /// N.B. A start state can be "dead" (i.e., no possible match), so we - /// represent it with a StatePtr. - start_states: Vec, - /// Stack scratch space used to follow epsilon transitions in the NFA. - /// (This permits us to avoid recursion.) - /// - /// The maximum stack size is the number of NFA states. - stack: Vec, - /// The total number of times this cache has been flushed by the DFA - /// because of space constraints. - flush_count: u64, - /// The total heap size of the DFA's cache. We use this to determine when - /// we should flush the cache. - size: usize, - /// Scratch space used when building instruction pointer lists for new - /// states. This helps amortize allocation. - insts_scratch_space: Vec, -} - -/// The transition table. -/// -/// It is laid out in row-major order, with states as rows and byte class -/// transitions as columns. -/// -/// The transition table is responsible for producing valid `StatePtrs`. A -/// `StatePtr` points to the start of a particular row in this table. When -/// indexing to find the next state this allows us to avoid a multiplication -/// when computing an index into the table. -#[derive(Clone)] -struct Transitions { - /// The table. - table: Vec, - /// The stride. - num_byte_classes: usize, -} - -/// Fsm encapsulates the actual execution of the DFA. -#[derive(Debug)] -pub struct Fsm<'a> { - /// prog contains the NFA instruction opcodes. DFA execution uses either - /// the `dfa` instructions or the `dfa_reverse` instructions from - /// `exec::ExecReadOnly`. (It never uses `ExecReadOnly.nfa`, which may have - /// Unicode opcodes that cannot be executed by the DFA.) - prog: &'a Program, - /// The start state. We record it here because the pointer may change - /// when the cache is wiped. - start: StatePtr, - /// The current position in the input. - at: usize, - /// Should we quit after seeing the first match? e.g., When the caller - /// uses `is_match` or `shortest_match`. - quit_after_match: bool, - /// The last state that matched. - /// - /// When no match has occurred, this is set to STATE_UNKNOWN. - /// - /// This is only useful when matching regex sets. The last match state - /// is useful because it contains all of the match instructions seen, - /// thereby allowing us to enumerate which regexes in the set matched. - last_match_si: StatePtr, - /// The input position of the last cache flush. We use this to determine - /// if we're thrashing in the cache too often. If so, the DFA quits so - /// that we can fall back to the NFA algorithm. - last_cache_flush: usize, - /// All cached DFA information that is persisted between searches. - cache: &'a mut CacheInner, -} - -/// The result of running the DFA. -/// -/// Generally, the result is either a match or not a match, but sometimes the -/// DFA runs too slowly because the cache size is too small. In that case, it -/// gives up with the intent of falling back to the NFA algorithm. -/// -/// The DFA can also give up if it runs out of room to create new states, or if -/// it sees non-ASCII bytes in the presence of a Unicode word boundary. -#[derive(Clone, Debug)] -pub enum Result { - Match(T), - NoMatch(usize), - Quit, -} - -impl Result { - /// Returns true if this result corresponds to a match. - pub fn is_match(&self) -> bool { - match *self { - Result::Match(_) => true, - Result::NoMatch(_) | Result::Quit => false, - } - } - - /// Maps the given function onto T and returns the result. - /// - /// If this isn't a match, then this is a no-op. - #[cfg(feature = "perf-literal")] - pub fn map U>(self, mut f: F) -> Result { - match self { - Result::Match(t) => Result::Match(f(t)), - Result::NoMatch(x) => Result::NoMatch(x), - Result::Quit => Result::Quit, - } - } - - /// Sets the non-match position. - /// - /// If this isn't a non-match, then this is a no-op. - fn set_non_match(self, at: usize) -> Result { - match self { - Result::NoMatch(_) => Result::NoMatch(at), - r => r, - } - } -} - -/// `State` is a DFA state. It contains an ordered set of NFA states (not -/// necessarily complete) and a smattering of flags. -/// -/// The flags are packed into the first byte of data. -/// -/// States don't carry their transitions. Instead, transitions are stored in -/// a single row-major table. -/// -/// Delta encoding is used to store the instruction pointers. -/// The first instruction pointer is stored directly starting -/// at data[1], and each following pointer is stored as an offset -/// to the previous one. If a delta is in the range -127..127, -/// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8) -/// is coded as a flag, followed by 4 bytes encoding the delta. -#[derive(Clone, Eq, Hash, PartialEq)] -struct State { - data: Arc<[u8]>, -} - -/// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes -/// an NFA state). -/// -/// Throughout this library, this is usually set to `usize`, but we force a -/// `u32` here for the DFA to save on space. -type InstPtr = u32; - -/// Adds ip to data using delta encoding with respect to prev. -/// -/// After completion, `data` will contain `ip` and `prev` will be set to `ip`. -fn push_inst_ptr(data: &mut Vec, prev: &mut InstPtr, ip: InstPtr) { - let delta = (ip as i32) - (*prev as i32); - write_vari32(data, delta); - *prev = ip; -} - -struct InstPtrs<'a> { - base: usize, - data: &'a [u8], -} - -impl<'a> Iterator for InstPtrs<'a> { - type Item = usize; - - fn next(&mut self) -> Option { - if self.data.is_empty() { - return None; - } - let (delta, nread) = read_vari32(self.data); - let base = self.base as i32 + delta; - debug_assert!(base >= 0); - debug_assert!(nread > 0); - self.data = &self.data[nread..]; - self.base = base as usize; - Some(self.base) - } -} - -impl State { - fn flags(&self) -> StateFlags { - StateFlags(self.data[0]) - } - - fn inst_ptrs(&self) -> InstPtrs<'_> { - InstPtrs { base: 0, data: &self.data[1..] } - } -} - -/// `StatePtr` is a 32 bit pointer to the start of a row in the transition -/// table. -/// -/// It has many special values. There are two types of special values: -/// sentinels and flags. -/// -/// Sentinels corresponds to special states that carry some kind of -/// significance. There are three such states: unknown, dead and quit states. -/// -/// Unknown states are states that haven't been computed yet. They indicate -/// that a transition should be filled in that points to either an existing -/// cached state or a new state altogether. In general, an unknown state means -/// "follow the NFA's epsilon transitions." -/// -/// Dead states are states that can never lead to a match, no matter what -/// subsequent input is observed. This means that the DFA should quit -/// immediately and return the longest match it has found thus far. -/// -/// Quit states are states that imply the DFA is not capable of matching the -/// regex correctly. Currently, this is only used when a Unicode word boundary -/// exists in the regex *and* a non-ASCII byte is observed. -/// -/// The other type of state pointer is a state pointer with special flag bits. -/// There are two flags: a start flag and a match flag. The lower bits of both -/// kinds always contain a "valid" `StatePtr` (indicated by the `STATE_MAX` -/// mask). -/// -/// The start flag means that the state is a start state, and therefore may be -/// subject to special prefix scanning optimizations. -/// -/// The match flag means that the state is a match state, and therefore the -/// current position in the input (while searching) should be recorded. -/// -/// The above exists mostly in the service of making the inner loop fast. -/// In particular, the inner *inner* loop looks something like this: -/// -/// ```ignore -/// while state <= STATE_MAX and i < len(text): -/// state = state.next[i] -/// ``` -/// -/// This is nice because it lets us execute a lazy DFA as if it were an -/// entirely offline DFA (i.e., with very few instructions). The loop will -/// quit only when we need to examine a case that needs special attention. -type StatePtr = u32; - -/// An unknown state means that the state has not been computed yet, and that -/// the only way to progress is to compute it. -const STATE_UNKNOWN: StatePtr = 1 << 31; - -/// A dead state means that the state has been computed and it is known that -/// once it is entered, no future match can ever occur. -const STATE_DEAD: StatePtr = STATE_UNKNOWN + 1; - -/// A quit state means that the DFA came across some input that it doesn't -/// know how to process correctly. The DFA should quit and another matching -/// engine should be run in its place. -const STATE_QUIT: StatePtr = STATE_DEAD + 1; - -/// A start state is a state that the DFA can start in. -/// -/// Note that start states have their lower bits set to a state pointer. -const STATE_START: StatePtr = 1 << 30; - -/// A match state means that the regex has successfully matched. -/// -/// Note that match states have their lower bits set to a state pointer. -const STATE_MATCH: StatePtr = 1 << 29; - -/// The maximum state pointer. This is useful to mask out the "valid" state -/// pointer from a state with the "start" or "match" bits set. -/// -/// It doesn't make sense to use this with unknown, dead or quit state -/// pointers, since those pointers are sentinels and never have their lower -/// bits set to anything meaningful. -const STATE_MAX: StatePtr = STATE_MATCH - 1; - -/// Byte is a u8 in spirit, but a u16 in practice so that we can represent the -/// special EOF sentinel value. -#[derive(Copy, Clone, Debug)] -struct Byte(u16); - -/// A set of flags for zero-width assertions. -#[derive(Clone, Copy, Eq, Debug, Default, Hash, PartialEq)] -struct EmptyFlags { - start: bool, - end: bool, - start_line: bool, - end_line: bool, - word_boundary: bool, - not_word_boundary: bool, -} - -/// A set of flags describing various configurations of a DFA state. This is -/// represented by a `u8` so that it is compact. -#[derive(Clone, Copy, Eq, Default, Hash, PartialEq)] -struct StateFlags(u8); - -impl Cache { - /// Create new empty cache for the DFA engine. - pub fn new(prog: &Program) -> Self { - // We add 1 to account for the special EOF byte. - let num_byte_classes = (prog.byte_classes[255] as usize + 1) + 1; - let starts = vec![STATE_UNKNOWN; 256]; - let mut cache = Cache { - inner: CacheInner { - compiled: StateMap::new(num_byte_classes), - trans: Transitions::new(num_byte_classes), - start_states: starts, - stack: vec![], - flush_count: 0, - size: 0, - insts_scratch_space: vec![], - }, - qcur: SparseSet::new(prog.insts.len()), - qnext: SparseSet::new(prog.insts.len()), - }; - cache.inner.reset_size(); - cache - } -} - -impl CacheInner { - /// Resets the cache size to account for fixed costs, such as the program - /// and stack sizes. - fn reset_size(&mut self) { - self.size = (self.start_states.len() * mem::size_of::()) - + (self.stack.len() * mem::size_of::()); - } -} - -impl<'a> Fsm<'a> { - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn forward( - prog: &'a Program, - cache: &ProgramCache, - quit_after_match: bool, - text: &[u8], - at: usize, - ) -> Result { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa; - let mut dfa = Fsm { - prog, - start: 0, // filled in below - at, - quit_after_match, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn reverse( - prog: &'a Program, - cache: &ProgramCache, - quit_after_match: bool, - text: &[u8], - at: usize, - ) -> Result { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa_reverse; - let mut dfa = Fsm { - prog, - start: 0, // filled in below - at, - quit_after_match, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags_reverse(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - dfa.exec_at_reverse(&mut cache.qcur, &mut cache.qnext, text) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn forward_many( - prog: &'a Program, - cache: &ProgramCache, - matches: &mut [bool], - text: &[u8], - at: usize, - ) -> Result { - debug_assert!(matches.len() == prog.matches.len()); - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa; - let mut dfa = Fsm { - prog, - start: 0, // filled in below - at, - quit_after_match: false, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - let result = dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text); - if result.is_match() { - if matches.len() == 1 { - matches[0] = true; - } else { - debug_assert!(dfa.last_match_si != STATE_UNKNOWN); - debug_assert!(dfa.last_match_si != STATE_DEAD); - for ip in dfa.state(dfa.last_match_si).inst_ptrs() { - if let Inst::Match(slot) = dfa.prog[ip] { - matches[slot] = true; - } - } - } - } - result - } - - /// Executes the DFA on a forward NFA. - /// - /// {qcur,qnext} are scratch ordered sets which may be non-empty. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_at( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - text: &[u8], - ) -> Result { - // For the most part, the DFA is basically: - // - // last_match = null - // while current_byte != EOF: - // si = current_state.next[current_byte] - // if si is match - // last_match = si - // return last_match - // - // However, we need to deal with a few things: - // - // 1. This is an *online* DFA, so the current state's next list - // may not point to anywhere yet, so we must go out and compute - // them. (They are then cached into the current state's next list - // to avoid re-computation.) - // 2. If we come across a state that is known to be dead (i.e., never - // leads to a match), then we can quit early. - // 3. If the caller just wants to know if a match occurs, then we - // can quit as soon as we know we have a match. (Full leftmost - // first semantics require continuing on.) - // 4. If we're in the start state, then we can use a pre-computed set - // of prefix literals to skip quickly along the input. - // 5. After the input is exhausted, we run the DFA on one symbol - // that stands for EOF. This is useful for handling empty width - // assertions. - // 6. We can't actually do state.next[byte]. Instead, we have to do - // state.next[byte_classes[byte]], which permits us to keep the - // 'next' list very small. - // - // Since there's a bunch of extra stuff we need to consider, we do some - // pretty hairy tricks to get the inner loop to run as fast as - // possible. - debug_assert!(!self.prog.is_reverse); - - // The last match is the currently known ending match position. It is - // reported as an index to the most recent byte that resulted in a - // transition to a match state and is always stored in capture slot `1` - // when searching forwards. Its maximum value is `text.len()`. - let mut result = Result::NoMatch(self.at); - let (mut prev_si, mut next_si) = (self.start, self.start); - let mut at = self.at; - while at < text.len() { - // This is the real inner loop. We take advantage of special bits - // set in the state pointer to determine whether a state is in the - // "common" case or not. Specifically, the common case is a - // non-match non-start non-dead state that has already been - // computed. So long as we remain in the common case, this inner - // loop will chew through the input. - // - // We also unroll the loop 4 times to amortize the cost of checking - // whether we've consumed the entire input. We are also careful - // to make sure that `prev_si` always represents the previous state - // and `next_si` always represents the next state after the loop - // exits, even if it isn't always true inside the loop. - while next_si <= STATE_MAX && at < text.len() { - // Argument for safety is in the definition of next_si. - prev_si = unsafe { self.next_si(next_si, text, at) }; - at += 1; - if prev_si > STATE_MAX || at + 2 >= text.len() { - mem::swap(&mut prev_si, &mut next_si); - break; - } - next_si = unsafe { self.next_si(prev_si, text, at) }; - at += 1; - if next_si > STATE_MAX { - break; - } - prev_si = unsafe { self.next_si(next_si, text, at) }; - at += 1; - if prev_si > STATE_MAX { - mem::swap(&mut prev_si, &mut next_si); - break; - } - next_si = unsafe { self.next_si(prev_si, text, at) }; - at += 1; - } - if next_si & STATE_MATCH > 0 { - // A match state is outside of the common case because it needs - // special case analysis. In particular, we need to record the - // last position as having matched and possibly quit the DFA if - // we don't need to keep matching. - next_si &= !STATE_MATCH; - result = Result::Match(at - 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - prev_si = next_si; - - // This permits short-circuiting when matching a regex set. - // In particular, if this DFA state contains only match states, - // then it's impossible to extend the set of matches since - // match states are final. Therefore, we can quit. - if self.prog.matches.len() > 1 { - let state = self.state(next_si); - let just_matches = - state.inst_ptrs().all(|ip| self.prog[ip].is_match()); - if just_matches { - return result; - } - } - - // Another inner loop! If the DFA stays in this particular - // match state, then we can rip through all of the input - // very quickly, and only recording the match location once - // we've left this particular state. - let cur = at; - while (next_si & !STATE_MATCH) == prev_si - && at + 2 < text.len() - { - // Argument for safety is in the definition of next_si. - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; - at += 1; - } - if at > cur { - result = Result::Match(at - 2); - } - } else if next_si & STATE_START > 0 { - // A start state isn't in the common case because we may - // want to do quick prefix scanning. If the program doesn't - // have a detected prefix, then start states are actually - // considered common and this case is never reached. - debug_assert!(self.has_prefix()); - next_si &= !STATE_START; - prev_si = next_si; - at = match self.prefix_at(text, at) { - None => return Result::NoMatch(text.len()), - Some(i) => i, - }; - } else if next_si >= STATE_UNKNOWN { - if next_si == STATE_QUIT { - return Result::Quit; - } - // Finally, this corresponds to the case where the transition - // entered a state that can never lead to a match or a state - // that hasn't been computed yet. The latter being the "slow" - // path. - let byte = Byte::byte(text[at - 1]); - // We no longer care about the special bits in the state - // pointer. - prev_si &= STATE_MAX; - // Record where we are. This is used to track progress for - // determining whether we should quit if we've flushed the - // cache too much. - self.at = at; - next_si = match self.next_state(qcur, qnext, prev_si, byte) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(at), - Some(si) => si, - }; - debug_assert!(next_si != STATE_UNKNOWN); - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at - 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - } - prev_si = next_si; - } else { - prev_si = next_si; - } - } - - // Run the DFA once more on the special EOF sentinel value. - // We don't care about the special bits in the state pointer any more, - // so get rid of them. - prev_si &= STATE_MAX; - prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(text.len()), - Some(si) => si & !STATE_START, - }; - debug_assert!(prev_si != STATE_UNKNOWN); - if prev_si & STATE_MATCH > 0 { - prev_si &= !STATE_MATCH; - self.last_match_si = prev_si; - result = Result::Match(text.len()); - } - result - } - - /// Executes the DFA on a reverse NFA. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_at_reverse( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - text: &[u8], - ) -> Result { - // The comments in `exec_at` above mostly apply here too. The main - // difference is that we move backwards over the input and we look for - // the longest possible match instead of the leftmost-first match. - // - // N.B. The code duplication here is regrettable. Efforts to improve - // it without sacrificing performance are welcome. ---AG - debug_assert!(self.prog.is_reverse); - let mut result = Result::NoMatch(self.at); - let (mut prev_si, mut next_si) = (self.start, self.start); - let mut at = self.at; - while at > 0 { - while next_si <= STATE_MAX && at > 0 { - // Argument for safety is in the definition of next_si. - at -= 1; - prev_si = unsafe { self.next_si(next_si, text, at) }; - if prev_si > STATE_MAX || at <= 4 { - mem::swap(&mut prev_si, &mut next_si); - break; - } - at -= 1; - next_si = unsafe { self.next_si(prev_si, text, at) }; - if next_si > STATE_MAX { - break; - } - at -= 1; - prev_si = unsafe { self.next_si(next_si, text, at) }; - if prev_si > STATE_MAX { - mem::swap(&mut prev_si, &mut next_si); - break; - } - at -= 1; - next_si = unsafe { self.next_si(prev_si, text, at) }; - } - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at + 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - prev_si = next_si; - let cur = at; - while (next_si & !STATE_MATCH) == prev_si && at >= 2 { - // Argument for safety is in the definition of next_si. - at -= 1; - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; - } - if at < cur { - result = Result::Match(at + 2); - } - } else if next_si >= STATE_UNKNOWN { - if next_si == STATE_QUIT { - return Result::Quit; - } - let byte = Byte::byte(text[at]); - prev_si &= STATE_MAX; - self.at = at; - next_si = match self.next_state(qcur, qnext, prev_si, byte) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(at), - Some(si) => si, - }; - debug_assert!(next_si != STATE_UNKNOWN); - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at + 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - } - prev_si = next_si; - } else { - prev_si = next_si; - } - } - - // Run the DFA once more on the special EOF sentinel value. - prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(0), - Some(si) => si, - }; - debug_assert!(prev_si != STATE_UNKNOWN); - if prev_si & STATE_MATCH > 0 { - prev_si &= !STATE_MATCH; - self.last_match_si = prev_si; - result = Result::Match(0); - } - result - } - - /// next_si transitions to the next state, where the transition input - /// corresponds to text[i]. - /// - /// This elides bounds checks, and is therefore not safe. - #[cfg_attr(feature = "perf-inline", inline(always))] - unsafe fn next_si(&self, si: StatePtr, text: &[u8], i: usize) -> StatePtr { - // What is the argument for safety here? - // We have three unchecked accesses that could possibly violate safety: - // - // 1. The given byte of input (`text[i]`). - // 2. The class of the byte of input (`classes[text[i]]`). - // 3. The transition for the class (`trans[si + cls]`). - // - // (1) is only safe when calling next_si is guarded by - // `i < text.len()`. - // - // (2) is the easiest case to guarantee since `text[i]` is always a - // `u8` and `self.prog.byte_classes` always has length `u8::MAX`. - // (See `ByteClassSet.byte_classes` in `compile.rs`.) - // - // (3) is only safe if (1)+(2) are safe. Namely, the transitions - // of every state are defined to have length equal to the number of - // byte classes in the program. Therefore, a valid class leads to a - // valid transition. (All possible transitions are valid lookups, even - // if it points to a state that hasn't been computed yet.) (3) also - // relies on `si` being correct, but StatePtrs should only ever be - // retrieved from the transition table, which ensures they are correct. - debug_assert!(i < text.len()); - let b = *text.get_unchecked(i); - debug_assert!((b as usize) < self.prog.byte_classes.len()); - let cls = *self.prog.byte_classes.get_unchecked(b as usize); - self.cache.trans.next_unchecked(si, cls as usize) - } - - /// Computes the next state given the current state and the current input - /// byte (which may be EOF). - /// - /// If STATE_DEAD is returned, then there is no valid state transition. - /// This implies that no permutation of future input can lead to a match - /// state. - /// - /// STATE_UNKNOWN can never be returned. - fn exec_byte( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - mut si: StatePtr, - b: Byte, - ) -> Option { - use crate::prog::Inst::*; - - // Initialize a queue with the current DFA state's NFA states. - qcur.clear(); - for ip in self.state(si).inst_ptrs() { - qcur.insert(ip); - } - - // Before inspecting the current byte, we may need to also inspect - // whether the position immediately preceding the current byte - // satisfies the empty assertions found in the current state. - // - // We only need to do this step if there are any empty assertions in - // the current state. - let is_word_last = self.state(si).flags().is_word(); - let is_word = b.is_ascii_word(); - if self.state(si).flags().has_empty() { - // Compute the flags immediately preceding the current byte. - // This means we only care about the "end" or "end line" flags. - // (The "start" flags are computed immediately following the - // current byte and are handled below.) - let mut flags = EmptyFlags::default(); - if b.is_eof() { - flags.end = true; - flags.end_line = true; - } else if b.as_byte().map_or(false, |b| b == b'\n') { - flags.end_line = true; - } - if is_word_last == is_word { - flags.not_word_boundary = true; - } else { - flags.word_boundary = true; - } - // Now follow epsilon transitions from every NFA state, but make - // sure we only follow transitions that satisfy our flags. - qnext.clear(); - for &ip in &*qcur { - self.follow_epsilons(usize_to_u32(ip), qnext, flags); - } - mem::swap(qcur, qnext); - } - - // Now we set flags for immediately after the current byte. Since start - // states are processed separately, and are the only states that can - // have the StartText flag set, we therefore only need to worry about - // the StartLine flag here. - // - // We do also keep track of whether this DFA state contains a NFA state - // that is a matching state. This is precisely how we delay the DFA - // matching by one byte in order to process the special EOF sentinel - // byte. Namely, if this DFA state containing a matching NFA state, - // then it is the *next* DFA state that is marked as a match. - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start_line = b.as_byte().map_or(false, |b| b == b'\n'); - if b.is_ascii_word() { - state_flags.set_word(); - } - // Now follow all epsilon transitions again, but only after consuming - // the current byte. - qnext.clear(); - for &ip in &*qcur { - match self.prog[ip as usize] { - // These states never happen in a byte-based program. - Char(_) | Ranges(_) => unreachable!(), - // These states are handled when following epsilon transitions. - Save(_) | Split(_) | EmptyLook(_) => {} - Match(_) => { - state_flags.set_match(); - if !self.continue_past_first_match() { - break; - } else if self.prog.matches.len() > 1 - && !qnext.contains(ip as usize) - { - // If we are continuing on to find other matches, - // then keep a record of the match states we've seen. - qnext.insert(ip); - } - } - Bytes(ref inst) => { - if b.as_byte().map_or(false, |b| inst.matches(b)) { - self.follow_epsilons( - inst.goto as InstPtr, - qnext, - empty_flags, - ); - } - } - } - } - - let cache = if b.is_eof() && self.prog.matches.len() > 1 { - // If we're processing the last byte of the input and we're - // matching a regex set, then make the next state contain the - // previous states transitions. We do this so that the main - // matching loop can extract all of the match instructions. - mem::swap(qcur, qnext); - // And don't cache this state because it's totally bunk. - false - } else { - true - }; - - // We've now built up the set of NFA states that ought to comprise the - // next DFA state, so try to find it in the cache, and if it doesn't - // exist, cache it. - // - // N.B. We pass `&mut si` here because the cache may clear itself if - // it has gotten too full. When that happens, the location of the - // current state may change. - let mut next = - match self.cached_state(qnext, state_flags, Some(&mut si)) { - None => return None, - Some(next) => next, - }; - if (self.start & !STATE_START) == next { - // Start states can never be match states since all matches are - // delayed by one byte. - debug_assert!(!self.state(next).flags().is_match()); - next = self.start_ptr(next); - } - if next <= STATE_MAX && self.state(next).flags().is_match() { - next |= STATE_MATCH; - } - debug_assert!(next != STATE_UNKNOWN); - // And now store our state in the current state's next list. - if cache { - let cls = self.byte_class(b); - self.cache.trans.set_next(si, cls, next); - } - Some(next) - } - - /// Follows the epsilon transitions starting at (and including) `ip`. The - /// resulting states are inserted into the ordered set `q`. - /// - /// Conditional epsilon transitions (i.e., empty width assertions) are only - /// followed if they are satisfied by the given flags, which should - /// represent the flags set at the current location in the input. - /// - /// If the current location corresponds to the empty string, then only the - /// end line and/or end text flags may be set. If the current location - /// corresponds to a real byte in the input, then only the start line - /// and/or start text flags may be set. - /// - /// As an exception to the above, when finding the initial state, any of - /// the above flags may be set: - /// - /// If matching starts at the beginning of the input, then start text and - /// start line should be set. If the input is empty, then end text and end - /// line should also be set. - /// - /// If matching starts after the beginning of the input, then only start - /// line should be set if the preceding byte is `\n`. End line should never - /// be set in this case. (Even if the following byte is a `\n`, it will - /// be handled in a subsequent DFA state.) - fn follow_epsilons( - &mut self, - ip: InstPtr, - q: &mut SparseSet, - flags: EmptyFlags, - ) { - use crate::prog::EmptyLook::*; - use crate::prog::Inst::*; - - // We need to traverse the NFA to follow epsilon transitions, so avoid - // recursion with an explicit stack. - self.cache.stack.push(ip); - while let Some(mut ip) = self.cache.stack.pop() { - // Try to munch through as many states as possible without - // pushes/pops to the stack. - loop { - // Don't visit states we've already added. - if q.contains(ip as usize) { - break; - } - q.insert(ip as usize); - match self.prog[ip as usize] { - Char(_) | Ranges(_) => unreachable!(), - Match(_) | Bytes(_) => { - break; - } - EmptyLook(ref inst) => { - // Only follow empty assertion states if our flags - // satisfy the assertion. - match inst.look { - StartLine if flags.start_line => { - ip = inst.goto as InstPtr; - } - EndLine if flags.end_line => { - ip = inst.goto as InstPtr; - } - StartText if flags.start => { - ip = inst.goto as InstPtr; - } - EndText if flags.end => { - ip = inst.goto as InstPtr; - } - WordBoundaryAscii if flags.word_boundary => { - ip = inst.goto as InstPtr; - } - NotWordBoundaryAscii - if flags.not_word_boundary => - { - ip = inst.goto as InstPtr; - } - WordBoundary if flags.word_boundary => { - ip = inst.goto as InstPtr; - } - NotWordBoundary if flags.not_word_boundary => { - ip = inst.goto as InstPtr; - } - StartLine | EndLine | StartText | EndText - | WordBoundaryAscii | NotWordBoundaryAscii - | WordBoundary | NotWordBoundary => { - break; - } - } - } - Save(ref inst) => { - ip = inst.goto as InstPtr; - } - Split(ref inst) => { - self.cache.stack.push(inst.goto2 as InstPtr); - ip = inst.goto1 as InstPtr; - } - } - } - } - } - - /// Find a previously computed state matching the given set of instructions - /// and is_match bool. - /// - /// The given set of instructions should represent a single state in the - /// NFA along with all states reachable without consuming any input. - /// - /// The is_match bool should be true if and only if the preceding DFA state - /// contains an NFA matching state. The cached state produced here will - /// then signify a match. (This enables us to delay a match by one byte, - /// in order to account for the EOF sentinel byte.) - /// - /// If the cache is full, then it is wiped before caching a new state. - /// - /// The current state should be specified if it exists, since it will need - /// to be preserved if the cache clears itself. (Start states are - /// always saved, so they should not be passed here.) It takes a mutable - /// pointer to the index because if the cache is cleared, the state's - /// location may change. - fn cached_state( - &mut self, - q: &SparseSet, - mut state_flags: StateFlags, - current_state: Option<&mut StatePtr>, - ) -> Option { - // If we couldn't come up with a non-empty key to represent this state, - // then it is dead and can never lead to a match. - // - // Note that inst_flags represent the set of empty width assertions - // in q. We use this as an optimization in exec_byte to determine when - // we should follow epsilon transitions at the empty string preceding - // the current byte. - let key = match self.cached_state_key(q, &mut state_flags) { - None => return Some(STATE_DEAD), - Some(v) => v, - }; - // In the cache? Cool. Done. - if let Some(si) = self.cache.compiled.get_ptr(&key) { - return Some(si); - } - // If the cache has gotten too big, wipe it. - if self.approximate_size() > self.prog.dfa_size_limit - && !self.clear_cache_and_save(current_state) - { - // Ooops. DFA is giving up. - return None; - } - // Allocate room for our state and add it. - self.add_state(key) - } - - /// Produces a key suitable for describing a state in the DFA cache. - /// - /// The key invariant here is that equivalent keys are produced for any two - /// sets of ordered NFA states (and toggling of whether the previous NFA - /// states contain a match state) that do not discriminate a match for any - /// input. - /// - /// Specifically, q should be an ordered set of NFA states and is_match - /// should be true if and only if the previous NFA states contained a match - /// state. - fn cached_state_key( - &mut self, - q: &SparseSet, - state_flags: &mut StateFlags, - ) -> Option { - use crate::prog::Inst::*; - - // We need to build up enough information to recognize pre-built states - // in the DFA. Generally speaking, this includes every instruction - // except for those which are purely epsilon transitions, e.g., the - // Save and Split instructions. - // - // Empty width assertions are also epsilon transitions, but since they - // are conditional, we need to make them part of a state's key in the - // cache. - - let mut insts = - mem::replace(&mut self.cache.insts_scratch_space, vec![]); - insts.clear(); - // Reserve 1 byte for flags. - insts.push(0); - - let mut prev = 0; - for &ip in q { - let ip = usize_to_u32(ip); - match self.prog[ip as usize] { - Char(_) | Ranges(_) => unreachable!(), - Save(_) | Split(_) => {} - Bytes(_) => push_inst_ptr(&mut insts, &mut prev, ip), - EmptyLook(_) => { - state_flags.set_empty(); - push_inst_ptr(&mut insts, &mut prev, ip) - } - Match(_) => { - push_inst_ptr(&mut insts, &mut prev, ip); - if !self.continue_past_first_match() { - break; - } - } - } - } - // If we couldn't transition to any other instructions and we didn't - // see a match when expanding NFA states previously, then this is a - // dead state and no amount of additional input can transition out - // of this state. - let opt_state = if insts.len() == 1 && !state_flags.is_match() { - None - } else { - let StateFlags(f) = *state_flags; - insts[0] = f; - Some(State { data: Arc::from(&*insts) }) - }; - self.cache.insts_scratch_space = insts; - opt_state - } - - /// Clears the cache, but saves and restores current_state if it is not - /// none. - /// - /// The current state must be provided here in case its location in the - /// cache changes. - /// - /// This returns false if the cache is not cleared and the DFA should - /// give up. - fn clear_cache_and_save( - &mut self, - current_state: Option<&mut StatePtr>, - ) -> bool { - if self.cache.compiled.is_empty() { - // Nothing to clear... - return true; - } - match current_state { - None => self.clear_cache(), - Some(si) => { - let cur = self.state(*si).clone(); - if !self.clear_cache() { - return false; - } - // The unwrap is OK because we just cleared the cache and - // therefore know that the next state pointer won't exceed - // STATE_MAX. - *si = self.restore_state(cur).unwrap(); - true - } - } - } - - /// Wipes the state cache, but saves and restores the current start state. - /// - /// This returns false if the cache is not cleared and the DFA should - /// give up. - fn clear_cache(&mut self) -> bool { - // Bail out of the DFA if we're moving too "slowly." - // A heuristic from RE2: assume the DFA is too slow if it is processing - // 10 or fewer bytes per state. - // Additionally, we permit the cache to be flushed a few times before - // caling it quits. - let nstates = self.cache.compiled.len(); - if self.cache.flush_count >= 3 - && self.at >= self.last_cache_flush - && (self.at - self.last_cache_flush) <= 10 * nstates - { - return false; - } - // Update statistics tracking cache flushes. - self.last_cache_flush = self.at; - self.cache.flush_count += 1; - - // OK, actually flush the cache. - let start = self.state(self.start & !STATE_START).clone(); - let last_match = if self.last_match_si <= STATE_MAX { - Some(self.state(self.last_match_si).clone()) - } else { - None - }; - self.cache.reset_size(); - self.cache.trans.clear(); - self.cache.compiled.clear(); - for s in &mut self.cache.start_states { - *s = STATE_UNKNOWN; - } - // The unwraps are OK because we just cleared the cache and therefore - // know that the next state pointer won't exceed STATE_MAX. - let start_ptr = self.restore_state(start).unwrap(); - self.start = self.start_ptr(start_ptr); - if let Some(last_match) = last_match { - self.last_match_si = self.restore_state(last_match).unwrap(); - } - true - } - - /// Restores the given state back into the cache, and returns a pointer - /// to it. - fn restore_state(&mut self, state: State) -> Option { - // If we've already stored this state, just return a pointer to it. - // None will be the wiser. - if let Some(si) = self.cache.compiled.get_ptr(&state) { - return Some(si); - } - self.add_state(state) - } - - /// Returns the next state given the current state si and current byte - /// b. {qcur,qnext} are used as scratch space for storing ordered NFA - /// states. - /// - /// This tries to fetch the next state from the cache, but if that fails, - /// it computes the next state, caches it and returns a pointer to it. - /// - /// The pointer can be to a real state, or it can be STATE_DEAD. - /// STATE_UNKNOWN cannot be returned. - /// - /// None is returned if a new state could not be allocated (i.e., the DFA - /// ran out of space and thinks it's running too slowly). - fn next_state( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - si: StatePtr, - b: Byte, - ) -> Option { - if si == STATE_DEAD { - return Some(STATE_DEAD); - } - match self.cache.trans.next(si, self.byte_class(b)) { - STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b), - STATE_QUIT => None, - nsi => Some(nsi), - } - } - - /// Computes and returns the start state, where searching begins at - /// position `at` in `text`. If the state has already been computed, - /// then it is pulled from the cache. If the state hasn't been cached, - /// then it is computed, cached and a pointer to it is returned. - /// - /// This may return STATE_DEAD but never STATE_UNKNOWN. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn start_state( - &mut self, - q: &mut SparseSet, - empty_flags: EmptyFlags, - state_flags: StateFlags, - ) -> Option { - // Compute an index into our cache of start states based on the set - // of empty/state flags set at the current position in the input. We - // don't use every flag since not all flags matter. For example, since - // matches are delayed by one byte, start states can never be match - // states. - let flagi = { - (((empty_flags.start as u8) << 0) - | ((empty_flags.end as u8) << 1) - | ((empty_flags.start_line as u8) << 2) - | ((empty_flags.end_line as u8) << 3) - | ((empty_flags.word_boundary as u8) << 4) - | ((empty_flags.not_word_boundary as u8) << 5) - | ((state_flags.is_word() as u8) << 6)) as usize - }; - match self.cache.start_states[flagi] { - STATE_UNKNOWN => {} - si => return Some(si), - } - q.clear(); - let start = usize_to_u32(self.prog.start); - self.follow_epsilons(start, q, empty_flags); - // Start states can never be match states because we delay every match - // by one byte. Given an empty string and an empty match, the match - // won't actually occur until the DFA processes the special EOF - // sentinel byte. - let sp = match self.cached_state(q, state_flags, None) { - None => return None, - Some(sp) => self.start_ptr(sp), - }; - self.cache.start_states[flagi] = sp; - Some(sp) - } - - /// Computes the set of starting flags for the given position in text. - /// - /// This should only be used when executing the DFA forwards over the - /// input. - fn start_flags(&self, text: &[u8], at: usize) -> (EmptyFlags, StateFlags) { - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start = at == 0; - empty_flags.end = text.is_empty(); - empty_flags.start_line = at == 0 || text[at - 1] == b'\n'; - empty_flags.end_line = text.is_empty(); - - let is_word_last = at > 0 && Byte::byte(text[at - 1]).is_ascii_word(); - let is_word = at < text.len() && Byte::byte(text[at]).is_ascii_word(); - if is_word_last { - state_flags.set_word(); - } - if is_word == is_word_last { - empty_flags.not_word_boundary = true; - } else { - empty_flags.word_boundary = true; - } - (empty_flags, state_flags) - } - - /// Computes the set of starting flags for the given position in text. - /// - /// This should only be used when executing the DFA in reverse over the - /// input. - fn start_flags_reverse( - &self, - text: &[u8], - at: usize, - ) -> (EmptyFlags, StateFlags) { - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start = at == text.len(); - empty_flags.end = text.is_empty(); - empty_flags.start_line = at == text.len() || text[at] == b'\n'; - empty_flags.end_line = text.is_empty(); - - let is_word_last = - at < text.len() && Byte::byte(text[at]).is_ascii_word(); - let is_word = at > 0 && Byte::byte(text[at - 1]).is_ascii_word(); - if is_word_last { - state_flags.set_word(); - } - if is_word == is_word_last { - empty_flags.not_word_boundary = true; - } else { - empty_flags.word_boundary = true; - } - (empty_flags, state_flags) - } - - /// Returns a reference to a State given a pointer to it. - fn state(&self, si: StatePtr) -> &State { - self.cache.compiled.get_state(si).unwrap() - } - - /// Adds the given state to the DFA. - /// - /// This allocates room for transitions out of this state in - /// self.cache.trans. The transitions can be set with the returned - /// StatePtr. - /// - /// If None is returned, then the state limit was reached and the DFA - /// should quit. - fn add_state(&mut self, state: State) -> Option { - // This will fail if the next state pointer exceeds STATE_PTR. In - // practice, the cache limit will prevent us from ever getting here, - // but maybe callers will set the cache size to something ridiculous... - let si = match self.cache.trans.add() { - None => return None, - Some(si) => si, - }; - // If the program has a Unicode word boundary, then set any transitions - // for non-ASCII bytes to STATE_QUIT. If the DFA stumbles over such a - // transition, then it will quit and an alternative matching engine - // will take over. - if self.prog.has_unicode_word_boundary { - for b in 128..256 { - let cls = self.byte_class(Byte::byte(b as u8)); - self.cache.trans.set_next(si, cls, STATE_QUIT); - } - } - // Finally, put our actual state on to our heap of states and index it - // so we can find it later. - self.cache.size += self.cache.trans.state_heap_size() - + state.data.len() - + (2 * mem::size_of::()) - + mem::size_of::(); - self.cache.compiled.insert(state, si); - // Transition table and set of states and map should all be in sync. - debug_assert!( - self.cache.compiled.len() == self.cache.trans.num_states() - ); - Some(si) - } - - /// Quickly finds the next occurrence of any literal prefixes in the regex. - /// If there are no literal prefixes, then the current position is - /// returned. If there are literal prefixes and one could not be found, - /// then None is returned. - /// - /// This should only be called when the DFA is in a start state. - fn prefix_at(&self, text: &[u8], at: usize) -> Option { - self.prog.prefixes.find(&text[at..]).map(|(s, _)| at + s) - } - - /// Returns the number of byte classes required to discriminate transitions - /// in each state. - /// - /// invariant: num_byte_classes() == len(State.next) - fn num_byte_classes(&self) -> usize { - // We add 1 to account for the special EOF byte. - (self.prog.byte_classes[255] as usize + 1) + 1 - } - - /// Given an input byte or the special EOF sentinel, return its - /// corresponding byte class. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn byte_class(&self, b: Byte) -> usize { - match b.as_byte() { - None => self.num_byte_classes() - 1, - Some(b) => self.u8_class(b), - } - } - - /// Like byte_class, but explicitly for u8s. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn u8_class(&self, b: u8) -> usize { - self.prog.byte_classes[b as usize] as usize - } - - /// Returns true if the DFA should continue searching past the first match. - /// - /// Leftmost first semantics in the DFA are preserved by not following NFA - /// transitions after the first match is seen. - /// - /// On occasion, we want to avoid leftmost first semantics to find either - /// the longest match (for reverse search) or all possible matches (for - /// regex sets). - fn continue_past_first_match(&self) -> bool { - self.prog.is_reverse || self.prog.matches.len() > 1 - } - - /// Returns true if there is a prefix we can quickly search for. - fn has_prefix(&self) -> bool { - !self.prog.is_reverse - && !self.prog.prefixes.is_empty() - && !self.prog.is_anchored_start - } - - /// Sets the STATE_START bit in the given state pointer if and only if - /// we have a prefix to scan for. - /// - /// If there's no prefix, then it's a waste to treat the start state - /// specially. - fn start_ptr(&self, si: StatePtr) -> StatePtr { - if self.has_prefix() { - si | STATE_START - } else { - si - } - } - - /// Approximate size returns the approximate heap space currently used by - /// the DFA. It is used to determine whether the DFA's state cache needs to - /// be wiped. Namely, it is possible that for certain regexes on certain - /// inputs, a new state could be created for every byte of input. (This is - /// bad for memory use, so we bound it with a cache.) - fn approximate_size(&self) -> usize { - self.cache.size + self.prog.approximate_size() - } -} - -/// An abstraction for representing a map of states. The map supports two -/// different ways of state lookup. One is fast constant time access via a -/// state pointer. The other is a hashmap lookup based on the DFA's -/// constituent NFA states. -/// -/// A DFA state internally uses an Arc such that we only need to store the -/// set of NFA states on the heap once, even though we support looking up -/// states by two different means. A more natural way to express this might -/// use raw pointers, but an Arc is safe and effectively achieves the same -/// thing. -#[derive(Debug)] -struct StateMap { - /// The keys are not actually static but rely on always pointing to a - /// buffer in `states` which will never be moved except when clearing - /// the map or on drop, in which case the keys of this map will be - /// removed before - map: HashMap, - /// Our set of states. Note that `StatePtr / num_byte_classes` indexes - /// this Vec rather than just a `StatePtr`. - states: Vec, - /// The number of byte classes in the DFA. Used to index `states`. - num_byte_classes: usize, -} - -impl StateMap { - fn new(num_byte_classes: usize) -> StateMap { - StateMap { map: HashMap::new(), states: vec![], num_byte_classes } - } - - fn len(&self) -> usize { - self.states.len() - } - - fn is_empty(&self) -> bool { - self.states.is_empty() - } - - fn get_ptr(&self, state: &State) -> Option { - self.map.get(state).cloned() - } - - fn get_state(&self, si: StatePtr) -> Option<&State> { - self.states.get(si as usize / self.num_byte_classes) - } - - fn insert(&mut self, state: State, si: StatePtr) { - self.map.insert(state.clone(), si); - self.states.push(state); - } - - fn clear(&mut self) { - self.map.clear(); - self.states.clear(); - } -} - -impl Transitions { - /// Create a new transition table. - /// - /// The number of byte classes corresponds to the stride. Every state will - /// have `num_byte_classes` slots for transitions. - fn new(num_byte_classes: usize) -> Transitions { - Transitions { table: vec![], num_byte_classes } - } - - /// Returns the total number of states currently in this table. - fn num_states(&self) -> usize { - self.table.len() / self.num_byte_classes - } - - /// Allocates room for one additional state and returns a pointer to it. - /// - /// If there's no more room, None is returned. - fn add(&mut self) -> Option { - let si = self.table.len(); - if si > STATE_MAX as usize { - return None; - } - self.table.extend(repeat(STATE_UNKNOWN).take(self.num_byte_classes)); - Some(usize_to_u32(si)) - } - - /// Clears the table of all states. - fn clear(&mut self) { - self.table.clear(); - } - - /// Sets the transition from (si, cls) to next. - fn set_next(&mut self, si: StatePtr, cls: usize, next: StatePtr) { - self.table[si as usize + cls] = next; - } - - /// Returns the transition corresponding to (si, cls). - fn next(&self, si: StatePtr, cls: usize) -> StatePtr { - self.table[si as usize + cls] - } - - /// The heap size, in bytes, of a single state in the transition table. - fn state_heap_size(&self) -> usize { - self.num_byte_classes * mem::size_of::() - } - - /// Like `next`, but uses unchecked access and is therefore not safe. - unsafe fn next_unchecked(&self, si: StatePtr, cls: usize) -> StatePtr { - debug_assert!((si as usize) < self.table.len()); - debug_assert!(cls < self.num_byte_classes); - *self.table.get_unchecked(si as usize + cls) - } -} - -impl StateFlags { - fn is_match(&self) -> bool { - self.0 & 0b0000_0001 > 0 - } - - fn set_match(&mut self) { - self.0 |= 0b0000_0001; - } - - fn is_word(&self) -> bool { - self.0 & 0b0000_0010 > 0 - } - - fn set_word(&mut self) { - self.0 |= 0b0000_0010; - } - - fn has_empty(&self) -> bool { - self.0 & 0b0000_0100 > 0 - } - - fn set_empty(&mut self) { - self.0 |= 0b0000_0100; - } -} - -impl Byte { - fn byte(b: u8) -> Self { - Byte(b as u16) - } - fn eof() -> Self { - Byte(256) - } - fn is_eof(&self) -> bool { - self.0 == 256 - } - - fn is_ascii_word(&self) -> bool { - let b = match self.as_byte() { - None => return false, - Some(b) => b, - }; - match b { - b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_' => true, - _ => false, - } - } - - fn as_byte(&self) -> Option { - if self.is_eof() { - None - } else { - Some(self.0 as u8) - } - } -} - -impl fmt::Debug for State { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let ips: Vec = self.inst_ptrs().collect(); - f.debug_struct("State") - .field("flags", &self.flags()) - .field("insts", &ips) - .finish() - } -} - -impl fmt::Debug for Transitions { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut fmtd = f.debug_map(); - for si in 0..self.num_states() { - let s = si * self.num_byte_classes; - let e = s + self.num_byte_classes; - fmtd.entry(&si.to_string(), &TransitionsRow(&self.table[s..e])); - } - fmtd.finish() - } -} - -struct TransitionsRow<'a>(&'a [StatePtr]); - -impl<'a> fmt::Debug for TransitionsRow<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut fmtd = f.debug_map(); - for (b, si) in self.0.iter().enumerate() { - match *si { - STATE_UNKNOWN => {} - STATE_DEAD => { - fmtd.entry(&vb(b as usize), &"DEAD"); - } - si => { - fmtd.entry(&vb(b as usize), &si.to_string()); - } - } - } - fmtd.finish() - } -} - -impl fmt::Debug for StateFlags { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("StateFlags") - .field("is_match", &self.is_match()) - .field("is_word", &self.is_word()) - .field("has_empty", &self.has_empty()) - .finish() - } -} - -/// Helper function for formatting a byte as a nice-to-read escaped string. -fn vb(b: usize) -> String { - use std::ascii::escape_default; - - if b > ::std::u8::MAX as usize { - "EOF".to_owned() - } else { - let escaped = escape_default(b as u8).collect::>(); - String::from_utf8_lossy(&escaped).into_owned() - } -} - -fn usize_to_u32(n: usize) -> u32 { - if (n as u64) > (::std::u32::MAX as u64) { - panic!("BUG: {} is too big to fit into u32", n) - } - n as u32 -} - -#[allow(dead_code)] // useful for debugging -fn show_state_ptr(si: StatePtr) -> String { - let mut s = format!("{:?}", si & STATE_MAX); - if si == STATE_UNKNOWN { - s = format!("{} (unknown)", s); - } - if si == STATE_DEAD { - s = format!("{} (dead)", s); - } - if si == STATE_QUIT { - s = format!("{} (quit)", s); - } - if si & STATE_START > 0 { - s = format!("{} (start)", s); - } - if si & STATE_MATCH > 0 { - s = format!("{} (match)", s); - } - s -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn write_vari32(data: &mut Vec, n: i32) { - let mut un = (n as u32) << 1; - if n < 0 { - un = !un; - } - write_varu32(data, un) -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn read_vari32(data: &[u8]) -> (i32, usize) { - let (un, i) = read_varu32(data); - let mut n = (un >> 1) as i32; - if un & 1 != 0 { - n = !n; - } - (n, i) -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn write_varu32(data: &mut Vec, mut n: u32) { - while n >= 0b1000_0000 { - data.push((n as u8) | 0b1000_0000); - n >>= 7; - } - data.push(n as u8); -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn read_varu32(data: &[u8]) -> (u32, usize) { - let mut n: u32 = 0; - let mut shift: u32 = 0; - for (i, &b) in data.iter().enumerate() { - if b < 0b1000_0000 { - return (n | ((b as u32) << shift), i + 1); - } - n |= ((b as u32) & 0b0111_1111) << shift; - shift += 7; - } - (0, 0) -} - -#[cfg(test)] -mod tests { - - use super::{ - push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, - State, StateFlags, - }; - use quickcheck::{quickcheck, Gen, QuickCheck}; - use std::sync::Arc; - - #[test] - fn prop_state_encode_decode() { - fn p(mut ips: Vec, flags: u8) -> bool { - // It looks like our encoding scheme can't handle instruction - // pointers at or above 2**31. We should fix that, but it seems - // unlikely to occur in real code due to the amount of memory - // required for such a state machine. So for now, we just clamp - // our test data. - for ip in &mut ips { - if *ip >= 1 << 31 { - *ip = (1 << 31) - 1; - } - } - let mut data = vec![flags]; - let mut prev = 0; - for &ip in ips.iter() { - push_inst_ptr(&mut data, &mut prev, ip); - } - let state = State { data: Arc::from(&data[..]) }; - - let expected: Vec = - ips.into_iter().map(|ip| ip as usize).collect(); - let got: Vec = state.inst_ptrs().collect(); - expected == got && state.flags() == StateFlags(flags) - } - QuickCheck::new() - .gen(Gen::new(10_000)) - .quickcheck(p as fn(Vec, u8) -> bool); - } - - #[test] - fn prop_read_write_u32() { - fn p(n: u32) -> bool { - let mut buf = vec![]; - write_varu32(&mut buf, n); - let (got, nread) = read_varu32(&buf); - nread == buf.len() && got == n - } - quickcheck(p as fn(u32) -> bool); - } - - #[test] - fn prop_read_write_i32() { - fn p(n: i32) -> bool { - let mut buf = vec![]; - write_vari32(&mut buf, n); - let (got, nread) = read_vari32(&buf); - nread == buf.len() && got == n - } - quickcheck(p as fn(i32) -> bool); - } -} diff --git a/vendor/regex/src/error.rs b/vendor/regex/src/error.rs index 3e0ec75..6026b38 100644 --- a/vendor/regex/src/error.rs +++ b/vendor/regex/src/error.rs @@ -1,37 +1,72 @@ -use std::fmt; -use std::iter::repeat; +use alloc::string::{String, ToString}; + +use regex_automata::meta; /// An error that occurred during parsing or compiling a regular expression. +#[non_exhaustive] #[derive(Clone, PartialEq)] pub enum Error { /// A syntax error. Syntax(String), - /// The compiled program exceeded the set size limit. - /// The argument is the size limit imposed. - CompiledTooBig(usize), - /// Hints that destructuring should not be exhaustive. + /// The compiled program exceeded the set size + /// limit. The argument is the size limit imposed by + /// [`RegexBuilder::size_limit`](crate::RegexBuilder::size_limit). Even + /// when not configured explicitly, it defaults to a reasonable limit. + /// + /// If you're getting this error, it occurred because your regex has been + /// compiled to an intermediate state that is too big. It is important to + /// note that exceeding this limit does _not_ mean the regex is too big to + /// _work_, but rather, the regex is big enough that it may wind up being + /// surprisingly slow when used in a search. In other words, this error is + /// meant to be a practical heuristic for avoiding a performance footgun, + /// and especially so for the case where the regex pattern is coming from + /// an untrusted source. /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, + /// There are generally two ways to move forward if you hit this error. + /// The first is to find some way to use a smaller regex. The second is to + /// increase the size limit via `RegexBuilder::size_limit`. However, if + /// your regex pattern is not from a trusted source, then neither of these + /// approaches may be appropriate. Instead, you'll have to determine just + /// how big of a regex you want to allow. + CompiledTooBig(usize), } -impl ::std::error::Error for Error { +impl Error { + pub(crate) fn from_meta_build_error(err: meta::BuildError) -> Error { + if let Some(size_limit) = err.size_limit() { + Error::CompiledTooBig(size_limit) + } else if let Some(ref err) = err.syntax_error() { + Error::Syntax(err.to_string()) + } else { + // This is a little suspect. Technically there are more ways for + // a meta regex to fail to build other than "exceeded size limit" + // and "syntax error." For example, if there are too many states + // or even too many patterns. But in practice this is probably + // good enough. The worst thing that happens is that Error::Syntax + // represents an error that isn't technically a syntax error, but + // the actual message will still be shown. So... it's not too bad. + // + // We really should have made the Error type in the regex crate + // completely opaque. Rookie mistake. + Error::Syntax(err.to_string()) + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { // TODO: Remove this method entirely on the next breaking semver release. #[allow(deprecated)] fn description(&self) -> &str { match *self { Error::Syntax(ref err) => err, Error::CompiledTooBig(_) => "compiled program too big", - Error::__Nonexhaustive => unreachable!(), } } } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match *self { Error::Syntax(ref err) => err.fmt(f), Error::CompiledTooBig(limit) => write!( @@ -39,7 +74,6 @@ impl fmt::Display for Error { "Compiled regex exceeds size limit of {} bytes.", limit ), - Error::__Nonexhaustive => unreachable!(), } } } @@ -48,11 +82,11 @@ impl fmt::Display for Error { // errors when people use `Regex::new(...).unwrap()`. It's a little weird, // but the `Syntax` variant is already storing a `String` anyway, so we might // as well format it nicely. -impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match *self { Error::Syntax(ref err) => { - let hr: String = repeat('~').take(79).collect(); + let hr: String = core::iter::repeat('~').take(79).collect(); writeln!(f, "Syntax(")?; writeln!(f, "{}", hr)?; writeln!(f, "{}", err)?; @@ -63,9 +97,6 @@ impl fmt::Debug for Error { Error::CompiledTooBig(limit) => { f.debug_tuple("CompiledTooBig").field(&limit).finish() } - Error::__Nonexhaustive => { - f.debug_tuple("__Nonexhaustive").finish() - } } } } diff --git a/vendor/regex/src/exec.rs b/vendor/regex/src/exec.rs deleted file mode 100644 index b9abcdc..0000000 --- a/vendor/regex/src/exec.rs +++ /dev/null @@ -1,1655 +0,0 @@ -use std::cell::RefCell; -use std::collections::HashMap; -use std::panic::AssertUnwindSafe; -use std::sync::Arc; - -#[cfg(feature = "perf-literal")] -use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; -use regex_syntax::hir::literal::Literals; -use regex_syntax::hir::Hir; -use regex_syntax::ParserBuilder; - -use crate::backtrack; -use crate::compile::Compiler; -#[cfg(feature = "perf-dfa")] -use crate::dfa; -use crate::error::Error; -use crate::input::{ByteInput, CharInput}; -use crate::literal::LiteralSearcher; -use crate::pikevm; -use crate::pool::{Pool, PoolGuard}; -use crate::prog::Program; -use crate::re_builder::RegexOptions; -use crate::re_bytes; -use crate::re_set; -use crate::re_trait::{Locations, RegularExpression, Slot}; -use crate::re_unicode; -use crate::utf8::next_utf8; - -/// `Exec` manages the execution of a regular expression. -/// -/// In particular, this manages the various compiled forms of a single regular -/// expression and the choice of which matching engine to use to execute a -/// regular expression. -#[derive(Debug)] -pub struct Exec { - /// All read only state. - ro: Arc, - /// A pool of reusable values for the various matching engines. - /// - /// Note that boxing this value is not strictly necessary, but it is an - /// easy way to ensure that T does not bloat the stack sized used by a pool - /// in the case where T is big. And this turns out to be the case at the - /// time of writing for regex's use of this pool. At the time of writing, - /// the size of a Regex on the stack is 856 bytes. Boxing this value - /// reduces that size to 16 bytes. - pool: Box>, -} - -/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This -/// means it is no longer Sync, but we can now avoid the overhead of -/// synchronization to fetch the cache. -#[derive(Debug)] -pub struct ExecNoSync<'c> { - /// All read only state. - ro: &'c Arc, - /// Caches for the various matching engines. - cache: PoolGuard<'c, ProgramCache>, -} - -/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8]. -#[derive(Debug)] -pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>); - -/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such -/// state is determined at compile time and never changes during search. -#[derive(Debug)] -struct ExecReadOnly { - /// The original regular expressions given by the caller to compile. - res: Vec, - /// A compiled program that is used in the NFA simulation and backtracking. - /// It can be byte-based or Unicode codepoint based. - /// - /// N.B. It is not possibly to make this byte-based from the public API. - /// It is only used for testing byte based programs in the NFA simulations. - nfa: Program, - /// A compiled byte based program for DFA execution. This is only used - /// if a DFA can be executed. (Currently, only word boundary assertions are - /// not supported.) Note that this program contains an embedded `.*?` - /// preceding the first capture group, unless the regex is anchored at the - /// beginning. - dfa: Program, - /// The same as above, except the program is reversed (and there is no - /// preceding `.*?`). This is used by the DFA to find the starting location - /// of matches. - dfa_reverse: Program, - /// A set of suffix literals extracted from the regex. - /// - /// Prefix literals are stored on the `Program`, since they are used inside - /// the matching engines. - suffixes: LiteralSearcher, - /// An Aho-Corasick automaton with leftmost-first match semantics. - /// - /// This is only set when the entire regex is a simple unanchored - /// alternation of literals. We could probably use it more circumstances, - /// but this is already hacky enough in this architecture. - /// - /// N.B. We use u32 as a state ID representation under the assumption that - /// if we were to exhaust the ID space, we probably would have long - /// surpassed the compilation size limit. - #[cfg(feature = "perf-literal")] - ac: Option>, - /// match_type encodes as much upfront knowledge about how we're going to - /// execute a search as possible. - match_type: MatchType, -} - -/// Facilitates the construction of an executor by exposing various knobs -/// to control how a regex is executed and what kinds of resources it's -/// permitted to use. -// `ExecBuilder` is only public via the `internal` module, so avoid deriving -// `Debug`. -#[allow(missing_debug_implementations)] -pub struct ExecBuilder { - options: RegexOptions, - match_type: Option, - bytes: bool, - only_utf8: bool, -} - -/// Parsed represents a set of parsed regular expressions and their detected -/// literals. -struct Parsed { - exprs: Vec, - prefixes: Literals, - suffixes: Literals, - bytes: bool, -} - -impl ExecBuilder { - /// Create a regex execution builder. - /// - /// This uses default settings for everything except the regex itself, - /// which must be provided. Further knobs can be set by calling methods, - /// and then finally, `build` to actually create the executor. - pub fn new(re: &str) -> Self { - Self::new_many(&[re]) - } - - /// Like new, but compiles the union of the given regular expressions. - /// - /// Note that when compiling 2 or more regular expressions, capture groups - /// are completely unsupported. (This means both `find` and `captures` - /// won't work.) - pub fn new_many(res: I) -> Self - where - S: AsRef, - I: IntoIterator, - { - let mut opts = RegexOptions::default(); - opts.pats = res.into_iter().map(|s| s.as_ref().to_owned()).collect(); - Self::new_options(opts) - } - - /// Create a regex execution builder. - pub fn new_options(opts: RegexOptions) -> Self { - ExecBuilder { - options: opts, - match_type: None, - bytes: false, - only_utf8: true, - } - } - - /// Set the matching engine to be automatically determined. - /// - /// This is the default state and will apply whatever optimizations are - /// possible, such as running a DFA. - /// - /// This overrides whatever was previously set via the `nfa` or - /// `bounded_backtracking` methods. - pub fn automatic(mut self) -> Self { - self.match_type = None; - self - } - - /// Sets the matching engine to use the NFA algorithm no matter what - /// optimizations are possible. - /// - /// This overrides whatever was previously set via the `automatic` or - /// `bounded_backtracking` methods. - pub fn nfa(mut self) -> Self { - self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM)); - self - } - - /// Sets the matching engine to use a bounded backtracking engine no - /// matter what optimizations are possible. - /// - /// One must use this with care, since the bounded backtracking engine - /// uses memory proportion to `len(regex) * len(text)`. - /// - /// This overrides whatever was previously set via the `automatic` or - /// `nfa` methods. - pub fn bounded_backtracking(mut self) -> Self { - self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack)); - self - } - - /// Compiles byte based programs for use with the NFA matching engines. - /// - /// By default, the NFA engines match on Unicode scalar values. They can - /// be made to use byte based programs instead. In general, the byte based - /// programs are slower because of a less efficient encoding of character - /// classes. - /// - /// Note that this does not impact DFA matching engines, which always - /// execute on bytes. - pub fn bytes(mut self, yes: bool) -> Self { - self.bytes = yes; - self - } - - /// When disabled, the program compiled may match arbitrary bytes. - /// - /// When enabled (the default), all compiled programs exclusively match - /// valid UTF-8 bytes. - pub fn only_utf8(mut self, yes: bool) -> Self { - self.only_utf8 = yes; - self - } - - /// Set the Unicode flag. - pub fn unicode(mut self, yes: bool) -> Self { - self.options.unicode = yes; - self - } - - /// Parse the current set of patterns into their AST and extract literals. - fn parse(&self) -> Result { - let mut exprs = Vec::with_capacity(self.options.pats.len()); - let mut prefixes = Some(Literals::empty()); - let mut suffixes = Some(Literals::empty()); - let mut bytes = false; - let is_set = self.options.pats.len() > 1; - // If we're compiling a regex set and that set has any anchored - // expressions, then disable all literal optimizations. - for pat in &self.options.pats { - let mut parser = ParserBuilder::new() - .octal(self.options.octal) - .case_insensitive(self.options.case_insensitive) - .multi_line(self.options.multi_line) - .dot_matches_new_line(self.options.dot_matches_new_line) - .swap_greed(self.options.swap_greed) - .ignore_whitespace(self.options.ignore_whitespace) - .unicode(self.options.unicode) - .allow_invalid_utf8(!self.only_utf8) - .nest_limit(self.options.nest_limit) - .build(); - let expr = - parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?; - bytes = bytes || !expr.is_always_utf8(); - - if cfg!(feature = "perf-literal") { - if !expr.is_anchored_start() && expr.is_any_anchored_start() { - // Partial anchors unfortunately make it hard to use - // prefixes, so disable them. - prefixes = None; - } else if is_set && expr.is_anchored_start() { - // Regex sets with anchors do not go well with literal - // optimizations. - prefixes = None; - } - prefixes = prefixes.and_then(|mut prefixes| { - if !prefixes.union_prefixes(&expr) { - None - } else { - Some(prefixes) - } - }); - - if !expr.is_anchored_end() && expr.is_any_anchored_end() { - // Partial anchors unfortunately make it hard to use - // suffixes, so disable them. - suffixes = None; - } else if is_set && expr.is_anchored_end() { - // Regex sets with anchors do not go well with literal - // optimizations. - suffixes = None; - } - suffixes = suffixes.and_then(|mut suffixes| { - if !suffixes.union_suffixes(&expr) { - None - } else { - Some(suffixes) - } - }); - } - exprs.push(expr); - } - Ok(Parsed { - exprs, - prefixes: prefixes.unwrap_or_else(Literals::empty), - suffixes: suffixes.unwrap_or_else(Literals::empty), - bytes, - }) - } - - /// Build an executor that can run a regular expression. - pub fn build(self) -> Result { - // Special case when we have no patterns to compile. - // This can happen when compiling a regex set. - if self.options.pats.is_empty() { - let ro = Arc::new(ExecReadOnly { - res: vec![], - nfa: Program::new(), - dfa: Program::new(), - dfa_reverse: Program::new(), - suffixes: LiteralSearcher::empty(), - #[cfg(feature = "perf-literal")] - ac: None, - match_type: MatchType::Nothing, - }); - let pool = ExecReadOnly::new_pool(&ro); - return Ok(Exec { ro, pool }); - } - let parsed = self.parse()?; - let mut nfa = Compiler::new() - .size_limit(self.options.size_limit) - .bytes(self.bytes || parsed.bytes) - .only_utf8(self.only_utf8) - .compile(&parsed.exprs)?; - let mut dfa = Compiler::new() - .size_limit(self.options.size_limit) - .dfa(true) - .only_utf8(self.only_utf8) - .compile(&parsed.exprs)?; - let mut dfa_reverse = Compiler::new() - .size_limit(self.options.size_limit) - .dfa(true) - .only_utf8(self.only_utf8) - .reverse(true) - .compile(&parsed.exprs)?; - - #[cfg(feature = "perf-literal")] - let ac = self.build_aho_corasick(&parsed); - nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes); - dfa.prefixes = nfa.prefixes.clone(); - dfa.dfa_size_limit = self.options.dfa_size_limit; - dfa_reverse.dfa_size_limit = self.options.dfa_size_limit; - - let mut ro = ExecReadOnly { - res: self.options.pats, - nfa, - dfa, - dfa_reverse, - suffixes: LiteralSearcher::suffixes(parsed.suffixes), - #[cfg(feature = "perf-literal")] - ac, - match_type: MatchType::Nothing, - }; - ro.match_type = ro.choose_match_type(self.match_type); - - let ro = Arc::new(ro); - let pool = ExecReadOnly::new_pool(&ro); - Ok(Exec { ro, pool }) - } - - #[cfg(feature = "perf-literal")] - fn build_aho_corasick(&self, parsed: &Parsed) -> Option> { - if parsed.exprs.len() != 1 { - return None; - } - let lits = match alternation_literals(&parsed.exprs[0]) { - None => return None, - Some(lits) => lits, - }; - // If we have a small number of literals, then let Teddy handle - // things (see literal/mod.rs). - if lits.len() <= 32 { - return None; - } - Some( - AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .auto_configure(&lits) - .build_with_size::(&lits) - // This should never happen because we'd long exceed the - // compilation limit for regexes first. - .expect("AC automaton too big"), - ) - } -} - -impl<'c> RegularExpression for ExecNoSyncStr<'c> { - type Text = str; - - fn slots_len(&self) -> usize { - self.0.slots_len() - } - - fn next_after_empty(&self, text: &str, i: usize) -> usize { - next_utf8(text.as_bytes(), i) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_match_at(&self, text: &str, start: usize) -> Option { - self.0.shortest_match_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_match_at(&self, text: &str, start: usize) -> bool { - self.0.is_match_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> { - self.0.find_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn captures_read_at( - &self, - locs: &mut Locations, - text: &str, - start: usize, - ) -> Option<(usize, usize)> { - self.0.captures_read_at(locs, text.as_bytes(), start) - } -} - -impl<'c> RegularExpression for ExecNoSync<'c> { - type Text = [u8]; - - /// Returns the number of capture slots in the regular expression. (There - /// are two slots for every capture group, corresponding to possibly empty - /// start and end locations of the capture.) - fn slots_len(&self) -> usize { - self.ro.nfa.captures.len() * 2 - } - - fn next_after_empty(&self, _text: &[u8], i: usize) -> usize { - i + 1 - } - - /// Returns the end of a match location, possibly occurring before the - /// end location of the correct leftmost-first match. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_match_at(&self, text: &[u8], start: usize) -> Option { - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).map(|(_, e)| e) - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa | MatchType::DfaMany => { - match self.shortest_dfa(text, start) { - dfa::Result::Match(end) => Some(end), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - true, - &text[start..], - text.len() - start, - ) { - dfa::Result::Match(_) => Some(text.len()), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.shortest_dfa_reverse_suffix(text, start) { - dfa::Result::Match(e) => Some(e), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start), - MatchType::Nothing => None, - } - } - - /// Returns true if and only if the regex matches text. - /// - /// For single regular expressions, this is equivalent to calling - /// shortest_match(...).is_some(). - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_match_at(&self, text: &[u8], start: usize) -> bool { - if !self.is_anchor_end_match(text) { - return false; - } - // We need to do this dance because shortest_match relies on the NFA - // filling in captures[1], but a RegexSet has no captures. In other - // words, a RegexSet can't (currently) use shortest_match. ---AG - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).is_some() - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa | MatchType::DfaMany => { - match self.shortest_dfa(text, start) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - true, - &text[start..], - text.len() - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.shortest_dfa_reverse_suffix(text, start) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start), - MatchType::Nothing => false, - } - } - - /// Finds the start and end location of the leftmost-first match, starting - /// at the given location. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> { - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => self.find_literals(ty, text, start), - #[cfg(feature = "perf-dfa")] - MatchType::Dfa => match self.find_dfa_forward(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - }, - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match self.find_dfa_anchored_reverse(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.find_dfa_reverse_suffix(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - } - } - MatchType::Nfa(ty) => self.find_nfa(ty, text, start), - MatchType::Nothing => None, - #[cfg(feature = "perf-dfa")] - MatchType::DfaMany => { - unreachable!("BUG: RegexSet cannot be used with find") - } - } - } - - /// Finds the start and end location of the leftmost-first match and also - /// fills in all matching capture groups. - /// - /// The number of capture slots given should be equal to the total number - /// of capture slots in the compiled program. - /// - /// Note that the first two slots always correspond to the start and end - /// locations of the overall match. - fn captures_read_at( - &self, - locs: &mut Locations, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - let slots = locs.as_slots(); - for slot in slots.iter_mut() { - *slot = None; - } - // If the caller unnecessarily uses this, then we try to save them - // from themselves. - match slots.len() { - 0 => return self.find_at(text, start), - 2 => { - return self.find_at(text, start).map(|(s, e)| { - slots[0] = Some(s); - slots[1] = Some(e); - (s, e) - }); - } - _ => {} // fallthrough - } - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).and_then(|(s, e)| { - self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ) - }) - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa => { - if self.ro.nfa.is_anchored_start { - self.captures_nfa(slots, text, start) - } else { - match self.find_dfa_forward(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.captures_nfa(slots, text, start) - } - } - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match self.find_dfa_anchored_reverse(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.captures_nfa(slots, text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.find_dfa_reverse_suffix(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.captures_nfa(slots, text, start), - } - } - MatchType::Nfa(ty) => { - self.captures_nfa_type(ty, slots, text, start, text.len()) - } - MatchType::Nothing => None, - #[cfg(feature = "perf-dfa")] - MatchType::DfaMany => { - unreachable!("BUG: RegexSet cannot be used with captures") - } - } - } -} - -impl<'c> ExecNoSync<'c> { - /// Finds the leftmost-first match using only literal search. - #[cfg(feature = "perf-literal")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_literals( - &self, - ty: MatchLiteralType, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - use self::MatchLiteralType::*; - match ty { - Unanchored => { - let lits = &self.ro.nfa.prefixes; - lits.find(&text[start..]).map(|(s, e)| (start + s, start + e)) - } - AnchoredStart => { - let lits = &self.ro.nfa.prefixes; - if start == 0 || !self.ro.nfa.is_anchored_start { - lits.find_start(&text[start..]) - .map(|(s, e)| (start + s, start + e)) - } else { - None - } - } - AnchoredEnd => { - let lits = &self.ro.suffixes; - lits.find_end(&text[start..]) - .map(|(s, e)| (start + s, start + e)) - } - AhoCorasick => self - .ro - .ac - .as_ref() - .unwrap() - .find(&text[start..]) - .map(|m| (start + m.start(), start + m.end())), - } - } - - /// Finds the leftmost-first match (start and end) using only the DFA. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_forward( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - let end = match dfa::Fsm::forward( - &self.ro.dfa, - self.cache.value(), - false, - text, - start, - ) { - NoMatch(i) => return NoMatch(i), - Quit => return Quit, - Match(end) if start == end => return Match((start, start)), - Match(end) => end, - }; - // Now run the DFA in reverse to find the start of the match. - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..], - end - start, - ) { - Match(s) => Match((start + s, end)), - NoMatch(i) => NoMatch(i), - Quit => Quit, - } - } - - /// Finds the leftmost-first match (start and end) using only the DFA, - /// but assumes the regex is anchored at the end and therefore starts at - /// the end of the regex and matches in reverse. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_anchored_reverse( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..], - text.len() - start, - ) { - Match(s) => Match((start + s, text.len())), - NoMatch(i) => NoMatch(i), - Quit => Quit, - } - } - - /// Finds the end of the shortest match using only the DFA. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result { - dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start) - } - - /// Finds the end of the shortest match using only the DFA by scanning for - /// suffix literals. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_dfa_reverse_suffix( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result { - match self.exec_dfa_reverse_suffix(text, start) { - None => self.shortest_dfa(text, start), - Some(r) => r.map(|(_, end)| end), - } - } - - /// Finds the end of the shortest match using only the DFA by scanning for - /// suffix literals. It also reports the start of the match. - /// - /// Note that if None is returned, then the optimization gave up to avoid - /// worst case quadratic behavior. A forward scanning DFA should be tried - /// next. - /// - /// If a match is returned and the full leftmost-first match is desired, - /// then a forward scan starting from the beginning of the match must be - /// done. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_dfa_reverse_suffix( - &self, - text: &[u8], - original_start: usize, - ) -> Option> { - use crate::dfa::Result::*; - - let lcs = self.ro.suffixes.lcs(); - debug_assert!(lcs.len() >= 1); - let mut start = original_start; - let mut end = start; - let mut last_literal = start; - while end <= text.len() { - last_literal += match lcs.find(&text[last_literal..]) { - None => return Some(NoMatch(text.len())), - Some(i) => i, - }; - end = last_literal + lcs.len(); - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..end], - end - start, - ) { - Match(0) | NoMatch(0) => return None, - Match(i) => return Some(Match((start + i, end))), - NoMatch(i) => { - start += i; - last_literal += 1; - continue; - } - Quit => return Some(Quit), - }; - } - Some(NoMatch(text.len())) - } - - /// Finds the leftmost-first match (start and end) using only the DFA - /// by scanning for suffix literals. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_reverse_suffix( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - - let match_start = match self.exec_dfa_reverse_suffix(text, start) { - None => return self.find_dfa_forward(text, start), - Some(Match((start, _))) => start, - Some(r) => return r, - }; - // At this point, we've found a match. The only way to quit now - // without a match is if the DFA gives up (seems unlikely). - // - // Now run the DFA forwards to find the proper end of the match. - // (The suffix literal match can only indicate the earliest - // possible end location, which may appear before the end of the - // leftmost-first match.) - match dfa::Fsm::forward( - &self.ro.dfa, - self.cache.value(), - false, - text, - match_start, - ) { - NoMatch(_) => panic!("BUG: reverse match implies forward match"), - Quit => Quit, - Match(e) => Match((match_start, e)), - } - } - - /// Executes the NFA engine to return whether there is a match or not. - /// - /// Ideally, we could use shortest_nfa(...).is_some() and get the same - /// performance characteristics, but regex sets don't have captures, which - /// shortest_nfa depends on. - #[cfg(feature = "perf-dfa")] - fn match_nfa(&self, text: &[u8], start: usize) -> bool { - self.match_nfa_type(MatchNfaType::Auto, text, start) - } - - /// Like match_nfa, but allows specification of the type of NFA engine. - fn match_nfa_type( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> bool { - self.exec_nfa( - ty, - &mut [false], - &mut [], - true, - false, - text, - start, - text.len(), - ) - } - - /// Finds the shortest match using an NFA. - #[cfg(feature = "perf-dfa")] - fn shortest_nfa(&self, text: &[u8], start: usize) -> Option { - self.shortest_nfa_type(MatchNfaType::Auto, text, start) - } - - /// Like shortest_nfa, but allows specification of the type of NFA engine. - fn shortest_nfa_type( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> Option { - let mut slots = [None, None]; - if self.exec_nfa( - ty, - &mut [false], - &mut slots, - true, - true, - text, - start, - text.len(), - ) { - slots[1] - } else { - None - } - } - - /// Like find, but executes an NFA engine. - fn find_nfa( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - let mut slots = [None, None]; - if self.exec_nfa( - ty, - &mut [false], - &mut slots, - false, - false, - text, - start, - text.len(), - ) { - match (slots[0], slots[1]) { - (Some(s), Some(e)) => Some((s, e)), - _ => None, - } - } else { - None - } - } - - /// Like find_nfa, but fills in captures. - /// - /// `slots` should have length equal to `2 * nfa.captures.len()`. - #[cfg(feature = "perf-dfa")] - fn captures_nfa( - &self, - slots: &mut [Slot], - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - start, - text.len(), - ) - } - - /// Like captures_nfa, but allows specification of type of NFA engine. - fn captures_nfa_type( - &self, - ty: MatchNfaType, - slots: &mut [Slot], - text: &[u8], - start: usize, - end: usize, - ) -> Option<(usize, usize)> { - if self.exec_nfa( - ty, - &mut [false], - slots, - false, - false, - text, - start, - end, - ) { - match (slots[0], slots[1]) { - (Some(s), Some(e)) => Some((s, e)), - _ => None, - } - } else { - None - } - } - - fn exec_nfa( - &self, - mut ty: MatchNfaType, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - quit_after_match_with_pos: bool, - text: &[u8], - start: usize, - end: usize, - ) -> bool { - use self::MatchNfaType::*; - if let Auto = ty { - if backtrack::should_exec(self.ro.nfa.len(), text.len()) { - ty = Backtrack; - } else { - ty = PikeVM; - } - } - // The backtracker can't return the shortest match position as it is - // implemented today. So if someone calls `shortest_match` and we need - // to run an NFA, then use the PikeVM. - if quit_after_match_with_pos || ty == PikeVM { - self.exec_pikevm( - matches, - slots, - quit_after_match, - text, - start, - end, - ) - } else { - self.exec_backtrack(matches, slots, text, start, end) - } - } - - /// Always run the NFA algorithm. - fn exec_pikevm( - &self, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - text: &[u8], - start: usize, - end: usize, - ) -> bool { - if self.ro.nfa.uses_bytes() { - pikevm::Fsm::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - quit_after_match, - ByteInput::new(text, self.ro.nfa.only_utf8), - start, - end, - ) - } else { - pikevm::Fsm::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - quit_after_match, - CharInput::new(text), - start, - end, - ) - } - } - - /// Always runs the NFA using bounded backtracking. - fn exec_backtrack( - &self, - matches: &mut [bool], - slots: &mut [Slot], - text: &[u8], - start: usize, - end: usize, - ) -> bool { - if self.ro.nfa.uses_bytes() { - backtrack::Bounded::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - ByteInput::new(text, self.ro.nfa.only_utf8), - start, - end, - ) - } else { - backtrack::Bounded::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - CharInput::new(text), - start, - end, - ) - } - } - - /// Finds which regular expressions match the given text. - /// - /// `matches` should have length equal to the number of regexes being - /// searched. - /// - /// This is only useful when one wants to know which regexes in a set - /// match some text. - pub fn many_matches_at( - &self, - matches: &mut [bool], - text: &[u8], - start: usize, - ) -> bool { - use self::MatchType::*; - if !self.is_anchor_end_match(text) { - return false; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - Literal(ty) => { - debug_assert_eq!(matches.len(), 1); - matches[0] = self.find_literals(ty, text, start).is_some(); - matches[0] - } - #[cfg(feature = "perf-dfa")] - Dfa | DfaAnchoredReverse | DfaMany => { - match dfa::Fsm::forward_many( - &self.ro.dfa, - self.cache.value(), - matches, - text, - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.exec_nfa( - MatchNfaType::Auto, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - DfaSuffix => { - match dfa::Fsm::forward_many( - &self.ro.dfa, - self.cache.value(), - matches, - text, - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.exec_nfa( - MatchNfaType::Auto, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - } - } - Nfa(ty) => self.exec_nfa( - ty, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - Nothing => false, - } - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_anchor_end_match(&self, text: &[u8]) -> bool { - #[cfg(not(feature = "perf-literal"))] - fn imp(_: &ExecReadOnly, _: &[u8]) -> bool { - true - } - - #[cfg(feature = "perf-literal")] - fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool { - // Only do this check if the haystack is big (>1MB). - if text.len() > (1 << 20) && ro.nfa.is_anchored_end { - let lcs = ro.suffixes.lcs(); - if lcs.len() >= 1 && !lcs.is_suffix(text) { - return false; - } - } - true - } - - imp(&self.ro, text) - } - - pub fn capture_name_idx(&self) -> &Arc> { - &self.ro.nfa.capture_name_idx - } -} - -impl<'c> ExecNoSyncStr<'c> { - pub fn capture_name_idx(&self) -> &Arc> { - self.0.capture_name_idx() - } -} - -impl Exec { - /// Get a searcher that isn't Sync. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher(&self) -> ExecNoSync<'_> { - ExecNoSync { - ro: &self.ro, // a clone is too expensive here! (and not needed) - cache: self.pool.get(), - } - } - - /// Get a searcher that isn't Sync and can match on &str. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher_str(&self) -> ExecNoSyncStr<'_> { - ExecNoSyncStr(self.searcher()) - } - - /// Build a Regex from this executor. - pub fn into_regex(self) -> re_unicode::Regex { - re_unicode::Regex::from(self) - } - - /// Build a RegexSet from this executor. - pub fn into_regex_set(self) -> re_set::unicode::RegexSet { - re_set::unicode::RegexSet::from(self) - } - - /// Build a Regex from this executor that can match arbitrary bytes. - pub fn into_byte_regex(self) -> re_bytes::Regex { - re_bytes::Regex::from(self) - } - - /// Build a RegexSet from this executor that can match arbitrary bytes. - pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet { - re_set::bytes::RegexSet::from(self) - } - - /// The original regular expressions given by the caller that were - /// compiled. - pub fn regex_strings(&self) -> &[String] { - &self.ro.res - } - - /// Return a slice of capture names. - /// - /// Any capture that isn't named is None. - pub fn capture_names(&self) -> &[Option] { - &self.ro.nfa.captures - } - - /// Return a reference to named groups mapping (from group name to - /// group position). - pub fn capture_name_idx(&self) -> &Arc> { - &self.ro.nfa.capture_name_idx - } -} - -impl Clone for Exec { - fn clone(&self) -> Exec { - let pool = ExecReadOnly::new_pool(&self.ro); - Exec { ro: self.ro.clone(), pool } - } -} - -impl ExecReadOnly { - fn choose_match_type(&self, hint: Option) -> MatchType { - if let Some(MatchType::Nfa(_)) = hint { - return hint.unwrap(); - } - // If the NFA is empty, then we'll never match anything. - if self.nfa.insts.is_empty() { - return MatchType::Nothing; - } - if let Some(literalty) = self.choose_literal_match_type() { - return literalty; - } - if let Some(dfaty) = self.choose_dfa_match_type() { - return dfaty; - } - // We're so totally hosed. - MatchType::Nfa(MatchNfaType::Auto) - } - - /// If a plain literal scan can be used, then a corresponding literal - /// search type is returned. - fn choose_literal_match_type(&self) -> Option { - #[cfg(not(feature = "perf-literal"))] - fn imp(_: &ExecReadOnly) -> Option { - None - } - - #[cfg(feature = "perf-literal")] - fn imp(ro: &ExecReadOnly) -> Option { - // If our set of prefixes is complete, then we can use it to find - // a match in lieu of a regex engine. This doesn't quite work well - // in the presence of multiple regexes, so only do it when there's - // one. - // - // TODO(burntsushi): Also, don't try to match literals if the regex - // is partially anchored. We could technically do it, but we'd need - // to create two sets of literals: all of them and then the subset - // that aren't anchored. We would then only search for all of them - // when at the beginning of the input and use the subset in all - // other cases. - if ro.res.len() != 1 { - return None; - } - if ro.ac.is_some() { - return Some(MatchType::Literal( - MatchLiteralType::AhoCorasick, - )); - } - if ro.nfa.prefixes.complete() { - return if ro.nfa.is_anchored_start { - Some(MatchType::Literal(MatchLiteralType::AnchoredStart)) - } else { - Some(MatchType::Literal(MatchLiteralType::Unanchored)) - }; - } - if ro.suffixes.complete() { - return if ro.nfa.is_anchored_end { - Some(MatchType::Literal(MatchLiteralType::AnchoredEnd)) - } else { - // This case shouldn't happen. When the regex isn't - // anchored, then complete prefixes should imply complete - // suffixes. - Some(MatchType::Literal(MatchLiteralType::Unanchored)) - }; - } - None - } - - imp(self) - } - - /// If a DFA scan can be used, then choose the appropriate DFA strategy. - fn choose_dfa_match_type(&self) -> Option { - #[cfg(not(feature = "perf-dfa"))] - fn imp(_: &ExecReadOnly) -> Option { - None - } - - #[cfg(feature = "perf-dfa")] - fn imp(ro: &ExecReadOnly) -> Option { - if !dfa::can_exec(&ro.dfa) { - return None; - } - // Regex sets require a slightly specialized path. - if ro.res.len() >= 2 { - return Some(MatchType::DfaMany); - } - // If the regex is anchored at the end but not the start, then - // just match in reverse from the end of the haystack. - if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end { - return Some(MatchType::DfaAnchoredReverse); - } - #[cfg(feature = "perf-literal")] - { - // If there's a longish suffix literal, then it might be faster - // to look for that first. - if ro.should_suffix_scan() { - return Some(MatchType::DfaSuffix); - } - } - // Fall back to your garden variety forward searching lazy DFA. - Some(MatchType::Dfa) - } - - imp(self) - } - - /// Returns true if the program is amenable to suffix scanning. - /// - /// When this is true, as a heuristic, we assume it is OK to quickly scan - /// for suffix literals and then do a *reverse* DFA match from any matches - /// produced by the literal scan. (And then followed by a forward DFA - /// search, since the previously found suffix literal maybe not actually be - /// the end of a match.) - /// - /// This is a bit of a specialized optimization, but can result in pretty - /// big performance wins if 1) there are no prefix literals and 2) the - /// suffix literals are pretty rare in the text. (1) is obviously easy to - /// account for but (2) is harder. As a proxy, we assume that longer - /// strings are generally rarer, so we only enable this optimization when - /// we have a meaty suffix. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - fn should_suffix_scan(&self) -> bool { - if self.suffixes.is_empty() { - return false; - } - let lcs_len = self.suffixes.lcs().char_len(); - lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len() - } - - fn new_pool(ro: &Arc) -> Box> { - let ro = ro.clone(); - Box::new(Pool::new(Box::new(move || { - AssertUnwindSafe(RefCell::new(ProgramCacheInner::new(&ro))) - }))) - } -} - -#[derive(Clone, Copy, Debug)] -enum MatchType { - /// A single or multiple literal search. This is only used when the regex - /// can be decomposed into a literal search. - #[cfg(feature = "perf-literal")] - Literal(MatchLiteralType), - /// A normal DFA search. - #[cfg(feature = "perf-dfa")] - Dfa, - /// A reverse DFA search starting from the end of a haystack. - #[cfg(feature = "perf-dfa")] - DfaAnchoredReverse, - /// A reverse DFA search with suffix literal scanning. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - DfaSuffix, - /// Use the DFA on two or more regular expressions. - #[cfg(feature = "perf-dfa")] - DfaMany, - /// An NFA variant. - Nfa(MatchNfaType), - /// No match is ever possible, so don't ever try to search. - Nothing, -} - -#[derive(Clone, Copy, Debug)] -#[cfg(feature = "perf-literal")] -enum MatchLiteralType { - /// Match literals anywhere in text. - Unanchored, - /// Match literals only at the start of text. - AnchoredStart, - /// Match literals only at the end of text. - AnchoredEnd, - /// Use an Aho-Corasick automaton. This requires `ac` to be Some on - /// ExecReadOnly. - AhoCorasick, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum MatchNfaType { - /// Choose between Backtrack and PikeVM. - Auto, - /// NFA bounded backtracking. - /// - /// (This is only set by tests, since it never makes sense to always want - /// backtracking.) - Backtrack, - /// The Pike VM. - /// - /// (This is only set by tests, since it never makes sense to always want - /// the Pike VM.) - PikeVM, -} - -/// `ProgramCache` maintains reusable allocations for each matching engine -/// available to a particular program. -/// -/// We declare this as unwind safe since it's a cache that's only used for -/// performance purposes. If a panic occurs, it is (or should be) always safe -/// to continue using the same regex object. -pub type ProgramCache = AssertUnwindSafe>; - -#[derive(Debug)] -pub struct ProgramCacheInner { - pub pikevm: pikevm::Cache, - pub backtrack: backtrack::Cache, - #[cfg(feature = "perf-dfa")] - pub dfa: dfa::Cache, - #[cfg(feature = "perf-dfa")] - pub dfa_reverse: dfa::Cache, -} - -impl ProgramCacheInner { - fn new(ro: &ExecReadOnly) -> Self { - ProgramCacheInner { - pikevm: pikevm::Cache::new(&ro.nfa), - backtrack: backtrack::Cache::new(&ro.nfa), - #[cfg(feature = "perf-dfa")] - dfa: dfa::Cache::new(&ro.dfa), - #[cfg(feature = "perf-dfa")] - dfa_reverse: dfa::Cache::new(&ro.dfa_reverse), - } - } -} - -/// Alternation literals checks if the given HIR is a simple alternation of -/// literals, and if so, returns them. Otherwise, this returns None. -#[cfg(feature = "perf-literal")] -fn alternation_literals(expr: &Hir) -> Option>> { - use regex_syntax::hir::{HirKind, Literal}; - - // This is pretty hacky, but basically, if `is_alternation_literal` is - // true, then we can make several assumptions about the structure of our - // HIR. This is what justifies the `unreachable!` statements below. - // - // This code should be refactored once we overhaul this crate's - // optimization pipeline, because this is a terribly inflexible way to go - // about things. - - if !expr.is_alternation_literal() { - return None; - } - let alts = match *expr.kind() { - HirKind::Alternation(ref alts) => alts, - _ => return None, // one literal isn't worth it - }; - - let extendlit = |lit: &Literal, dst: &mut Vec| match *lit { - Literal::Unicode(c) => { - let mut buf = [0; 4]; - dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes()); - } - Literal::Byte(b) => { - dst.push(b); - } - }; - - let mut lits = vec![]; - for alt in alts { - let mut lit = vec![]; - match *alt.kind() { - HirKind::Literal(ref x) => extendlit(x, &mut lit), - HirKind::Concat(ref exprs) => { - for e in exprs { - match *e.kind() { - HirKind::Literal(ref x) => extendlit(x, &mut lit), - _ => unreachable!("expected literal, got {:?}", e), - } - } - } - _ => unreachable!("expected literal or concat, got {:?}", alt), - } - lits.push(lit); - } - Some(lits) -} - -#[cfg(test)] -mod test { - #[test] - fn uppercut_s_backtracking_bytes_default_bytes_mismatch() { - use crate::internal::ExecBuilder; - - let backtrack_bytes_re = ExecBuilder::new("^S") - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_bytes_re = ExecBuilder::new("^S") - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = vec![83, 83]; - - let s1 = backtrack_bytes_re.split(&input); - let s2 = default_bytes_re.split(&input); - for (chunk1, chunk2) in s1.zip(s2) { - assert_eq!(chunk1, chunk2); - } - } - - #[test] - fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() { - use crate::internal::ExecBuilder; - - let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)") - .bounded_backtracking() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_bytes_re = ExecBuilder::new(r"^(?u:\*)") - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = "**"; - - let s1 = backtrack_bytes_re.split(input); - let s2 = default_bytes_re.split(input); - for (chunk1, chunk2) in s1.zip(s2) { - assert_eq!(chunk1, chunk2); - } - } -} diff --git a/vendor/regex/src/expand.rs b/vendor/regex/src/expand.rs deleted file mode 100644 index 67b5149..0000000 --- a/vendor/regex/src/expand.rs +++ /dev/null @@ -1,239 +0,0 @@ -use std::str; - -use crate::find_byte::find_byte; - -use crate::re_bytes; -use crate::re_unicode; - -pub fn expand_str( - caps: &re_unicode::Captures<'_>, - mut replacement: &str, - dst: &mut String, -) { - while !replacement.is_empty() { - match find_byte(b'$', replacement.as_bytes()) { - None => break, - Some(i) => { - dst.push_str(&replacement[..i]); - replacement = &replacement[i..]; - } - } - if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { - dst.push_str("$"); - replacement = &replacement[2..]; - continue; - } - debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement.as_bytes()) { - Some(cap_ref) => cap_ref, - None => { - dst.push_str("$"); - replacement = &replacement[1..]; - continue; - } - }; - replacement = &replacement[cap_ref.end..]; - match cap_ref.cap { - Ref::Number(i) => { - dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or("")); - } - Ref::Named(name) => { - dst.push_str( - caps.name(name).map(|m| m.as_str()).unwrap_or(""), - ); - } - } - } - dst.push_str(replacement); -} - -pub fn expand_bytes( - caps: &re_bytes::Captures<'_>, - mut replacement: &[u8], - dst: &mut Vec, -) { - while !replacement.is_empty() { - match find_byte(b'$', replacement) { - None => break, - Some(i) => { - dst.extend(&replacement[..i]); - replacement = &replacement[i..]; - } - } - if replacement.get(1).map_or(false, |&b| b == b'$') { - dst.push(b'$'); - replacement = &replacement[2..]; - continue; - } - debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement) { - Some(cap_ref) => cap_ref, - None => { - dst.push(b'$'); - replacement = &replacement[1..]; - continue; - } - }; - replacement = &replacement[cap_ref.end..]; - match cap_ref.cap { - Ref::Number(i) => { - dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b"")); - } - Ref::Named(name) => { - dst.extend( - caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""), - ); - } - } - } - dst.extend(replacement); -} - -/// `CaptureRef` represents a reference to a capture group inside some text. -/// The reference is either a capture group name or a number. -/// -/// It is also tagged with the position in the text following the -/// capture reference. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -struct CaptureRef<'a> { - cap: Ref<'a>, - end: usize, -} - -/// A reference to a capture group in some text. -/// -/// e.g., `$2`, `$foo`, `${foo}`. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum Ref<'a> { - Named(&'a str), - Number(usize), -} - -impl<'a> From<&'a str> for Ref<'a> { - fn from(x: &'a str) -> Ref<'a> { - Ref::Named(x) - } -} - -impl From for Ref<'static> { - fn from(x: usize) -> Ref<'static> { - Ref::Number(x) - } -} - -/// Parses a possible reference to a capture group name in the given text, -/// starting at the beginning of `replacement`. -/// -/// If no such valid reference could be found, None is returned. -fn find_cap_ref(replacement: &[u8]) -> Option> { - let mut i = 0; - let rep: &[u8] = replacement; - if rep.len() <= 1 || rep[0] != b'$' { - return None; - } - i += 1; - if rep[i] == b'{' { - return find_cap_ref_braced(rep, i + 1); - } - let mut cap_end = i; - while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { - cap_end += 1; - } - if cap_end == i { - return None; - } - // We just verified that the range 0..cap_end is valid ASCII, so it must - // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 - // check via an unchecked conversion or by parsing the number straight from - // &[u8]. - let cap = - str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); - Some(CaptureRef { - cap: match cap.parse::() { - Ok(i) => Ref::Number(i as usize), - Err(_) => Ref::Named(cap), - }, - end: cap_end, - }) -} - -fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option> { - let start = i; - while rep.get(i).map_or(false, |&b| b != b'}') { - i += 1; - } - if !rep.get(i).map_or(false, |&b| b == b'}') { - return None; - } - // When looking at braced names, we don't put any restrictions on the name, - // so it's possible it could be invalid UTF-8. But a capture group name - // can never be invalid UTF-8, so if we have invalid UTF-8, then we can - // safely return None. - let cap = match str::from_utf8(&rep[start..i]) { - Err(_) => return None, - Ok(cap) => cap, - }; - Some(CaptureRef { - cap: match cap.parse::() { - Ok(i) => Ref::Number(i as usize), - Err(_) => Ref::Named(cap), - }, - end: i + 1, - }) -} - -/// Returns true if and only if the given byte is allowed in a capture name. -fn is_valid_cap_letter(b: u8) -> bool { - match b { - b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, - _ => false, - } -} - -#[cfg(test)] -mod tests { - use super::{find_cap_ref, CaptureRef}; - - macro_rules! find { - ($name:ident, $text:expr) => { - #[test] - fn $name() { - assert_eq!(None, find_cap_ref($text.as_bytes())); - } - }; - ($name:ident, $text:expr, $capref:expr) => { - #[test] - fn $name() { - assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); - } - }; - } - - macro_rules! c { - ($name_or_number:expr, $pos:expr) => { - CaptureRef { cap: $name_or_number.into(), end: $pos } - }; - } - - find!(find_cap_ref1, "$foo", c!("foo", 4)); - find!(find_cap_ref2, "${foo}", c!("foo", 6)); - find!(find_cap_ref3, "$0", c!(0, 2)); - find!(find_cap_ref4, "$5", c!(5, 2)); - find!(find_cap_ref5, "$10", c!(10, 3)); - // See https://github.com/rust-lang/regex/pull/585 - // for more on characters following numbers - find!(find_cap_ref6, "$42a", c!("42a", 4)); - find!(find_cap_ref7, "${42}a", c!(42, 5)); - find!(find_cap_ref8, "${42"); - find!(find_cap_ref9, "${42 "); - find!(find_cap_ref10, " $0 "); - find!(find_cap_ref11, "$"); - find!(find_cap_ref12, " "); - find!(find_cap_ref13, ""); - find!(find_cap_ref14, "$1-$2", c!(1, 2)); - find!(find_cap_ref15, "$1_$2", c!("1_", 3)); - find!(find_cap_ref16, "$x-$y", c!("x", 2)); - find!(find_cap_ref17, "$x_$y", c!("x_", 3)); - find!(find_cap_ref18, "${#}", c!("#", 4)); - find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); -} diff --git a/vendor/regex/src/find_byte.rs b/vendor/regex/src/find_byte.rs index e95f72a..9c6915d 100644 --- a/vendor/regex/src/find_byte.rs +++ b/vendor/regex/src/find_byte.rs @@ -2,7 +2,7 @@ /// /// If the perf-literal feature is enabled, then this uses the super optimized /// memchr crate. Otherwise, it uses the naive byte-at-a-time implementation. -pub fn find_byte(needle: u8, haystack: &[u8]) -> Option { +pub(crate) fn find_byte(needle: u8, haystack: &[u8]) -> Option { #[cfg(not(feature = "perf-literal"))] fn imp(needle: u8, haystack: &[u8]) -> Option { haystack.iter().position(|&b| b == needle) @@ -10,8 +10,7 @@ pub fn find_byte(needle: u8, haystack: &[u8]) -> Option { #[cfg(feature = "perf-literal")] fn imp(needle: u8, haystack: &[u8]) -> Option { - use memchr::memchr; - memchr(needle, haystack) + memchr::memchr(needle, haystack) } imp(needle, haystack) diff --git a/vendor/regex/src/input.rs b/vendor/regex/src/input.rs deleted file mode 100644 index df6c3e0..0000000 --- a/vendor/regex/src/input.rs +++ /dev/null @@ -1,432 +0,0 @@ -use std::char; -use std::cmp::Ordering; -use std::fmt; -use std::ops; -use std::u32; - -use crate::literal::LiteralSearcher; -use crate::prog::InstEmptyLook; -use crate::utf8::{decode_last_utf8, decode_utf8}; - -/// Represents a location in the input. -#[derive(Clone, Copy, Debug)] -pub struct InputAt { - pos: usize, - c: Char, - byte: Option, - len: usize, -} - -impl InputAt { - /// Returns true iff this position is at the beginning of the input. - pub fn is_start(&self) -> bool { - self.pos == 0 - } - - /// Returns true iff this position is past the end of the input. - pub fn is_end(&self) -> bool { - self.c.is_none() && self.byte.is_none() - } - - /// Returns the character at this position. - /// - /// If this position is just before or after the input, then an absent - /// character is returned. - pub fn char(&self) -> Char { - self.c - } - - /// Returns the byte at this position. - pub fn byte(&self) -> Option { - self.byte - } - - /// Returns the UTF-8 width of the character at this position. - pub fn len(&self) -> usize { - self.len - } - - /// Returns whether the UTF-8 width of the character at this position - /// is zero. - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - /// Returns the byte offset of this position. - pub fn pos(&self) -> usize { - self.pos - } - - /// Returns the byte offset of the next position in the input. - pub fn next_pos(&self) -> usize { - self.pos + self.len - } -} - -/// An abstraction over input used in the matching engines. -pub trait Input: fmt::Debug { - /// Return an encoding of the position at byte offset `i`. - fn at(&self, i: usize) -> InputAt; - - /// Return the Unicode character occurring next to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn next_char(&self, at: InputAt) -> Char; - - /// Return the Unicode character occurring previous to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn previous_char(&self, at: InputAt) -> Char; - - /// Return true if the given empty width instruction matches at the - /// input position given. - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool; - - /// Scan the input for a matching prefix. - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option; - - /// The number of bytes in the input. - fn len(&self) -> usize; - - /// Whether the input is empty. - fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Return the given input as a sequence of bytes. - fn as_bytes(&self) -> &[u8]; -} - -impl<'a, T: Input> Input for &'a T { - fn at(&self, i: usize) -> InputAt { - (**self).at(i) - } - - fn next_char(&self, at: InputAt) -> Char { - (**self).next_char(at) - } - - fn previous_char(&self, at: InputAt) -> Char { - (**self).previous_char(at) - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - (**self).is_empty_match(at, empty) - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - (**self).prefix_at(prefixes, at) - } - - fn len(&self) -> usize { - (**self).len() - } - - fn as_bytes(&self) -> &[u8] { - (**self).as_bytes() - } -} - -/// An input reader over characters. -#[derive(Clone, Copy, Debug)] -pub struct CharInput<'t>(&'t [u8]); - -impl<'t> CharInput<'t> { - /// Return a new character input reader for the given string. - pub fn new(s: &'t [u8]) -> CharInput<'t> { - CharInput(s) - } -} - -impl<'t> ops::Deref for CharInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.0 - } -} - -impl<'t> Input for CharInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - let c = decode_utf8(&self[i..]).map(|(c, _)| c).into(); - InputAt { pos: i, c, byte: None, len: c.len_utf8() } - } - } - - fn next_char(&self, at: InputAt) -> Char { - at.char() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.0.len() - } - - fn as_bytes(&self) -> &[u8] { - self.0 - } -} - -/// An input reader over bytes. -#[derive(Clone, Copy, Debug)] -pub struct ByteInput<'t> { - text: &'t [u8], - only_utf8: bool, -} - -impl<'t> ByteInput<'t> { - /// Return a new byte-based input reader for the given string. - pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> { - ByteInput { text, only_utf8 } - } -} - -impl<'t> ops::Deref for ByteInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.text - } -} - -impl<'t> Input for ByteInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - InputAt { - pos: i, - c: None.into(), - byte: self.get(i).cloned(), - len: 1, - } - } - } - - fn next_char(&self, at: InputAt) -> Char { - decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.text.len() - } - - fn as_bytes(&self) -> &[u8] { - self.text - } -} - -/// An inline representation of `Option`. -/// -/// This eliminates the need to do case analysis on `Option` to determine -/// ordinality with other characters. -/// -/// (The `Option` is not related to encoding. Instead, it is used in the -/// matching engines to represent the beginning and ending boundaries of the -/// search text.) -#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Char(u32); - -impl fmt::Debug for Char { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match char::from_u32(self.0) { - None => write!(f, "Empty"), - Some(c) => write!(f, "{:?}", c), - } - } -} - -impl Char { - /// Returns true iff the character is absent. - #[inline] - pub fn is_none(self) -> bool { - self.0 == u32::MAX - } - - /// Returns the length of the character's UTF-8 encoding. - /// - /// If the character is absent, then `1` is returned. - #[inline] - pub fn len_utf8(self) -> usize { - char::from_u32(self.0).map_or(1, |c| c.len_utf8()) - } - - /// Returns true iff the character is a word character. - /// - /// If the character is absent, then false is returned. - pub fn is_word_char(self) -> bool { - // is_word_character can panic if the Unicode data for \w isn't - // available. However, our compiler ensures that if a Unicode word - // boundary is used, then the data must also be available. If it isn't, - // then the compiler returns an error. - char::from_u32(self.0).map_or(false, regex_syntax::is_word_character) - } - - /// Returns true iff the byte is a word byte. - /// - /// If the byte is absent, then false is returned. - pub fn is_word_byte(self) -> bool { - match char::from_u32(self.0) { - Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8), - None | Some(_) => false, - } - } -} - -impl From for Char { - fn from(c: char) -> Char { - Char(c as u32) - } -} - -impl From> for Char { - fn from(c: Option) -> Char { - c.map_or(Char(u32::MAX), |c| c.into()) - } -} - -impl PartialEq for Char { - #[inline] - fn eq(&self, other: &char) -> bool { - self.0 == *other as u32 - } -} - -impl PartialEq for char { - #[inline] - fn eq(&self, other: &Char) -> bool { - *self as u32 == other.0 - } -} - -impl PartialOrd for Char { - #[inline] - fn partial_cmp(&self, other: &char) -> Option { - self.0.partial_cmp(&(*other as u32)) - } -} - -impl PartialOrd for char { - #[inline] - fn partial_cmp(&self, other: &Char) -> Option { - (*self as u32).partial_cmp(&other.0) - } -} diff --git a/vendor/regex/src/lib.rs b/vendor/regex/src/lib.rs index 6b95739..6dbd3c2 100644 --- a/vendor/regex/src/lib.rs +++ b/vendor/regex/src/lib.rs @@ -1,146 +1,371 @@ /*! -This crate provides a library for parsing, compiling, and executing regular -expressions. Its syntax is similar to Perl-style regular expressions, but lacks -a few features like look around and backreferences. In exchange, all searches -execute in linear time with respect to the size of the regular expression and -search text. +This crate provides routines for searching strings for matches of a [regular +expression] (aka "regex"). The regex syntax supported by this crate is similar +to other regex engines, but it lacks several features that are not known how to +implement efficiently. This includes, but is not limited to, look-around and +backreferences. In exchange, all regex searches in this crate have worst case +`O(m * n)` time complexity, where `m` is proportional to the size of the regex +and `n` is proportional to the size of the string being searched. -This crate's documentation provides some simple examples, describes -[Unicode support](#unicode) and exhaustively lists the -[supported syntax](#syntax). +[regular expression]: https://en.wikipedia.org/wiki/Regular_expression -For more specific details on the API for regular expressions, please see the -documentation for the [`Regex`](struct.Regex.html) type. +If you just want API documentation, then skip to the [`Regex`] type. Otherwise, +here's a quick example showing one way of parsing the output of a grep-like +program: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?m)^([^:]+):([0-9]+):(.+)$").unwrap(); +let hay = "\ +path/to/foo:54:Blue Harvest +path/to/bar:90:Something, Something, Something, Dark Side +path/to/baz:3:It's a Trap! +"; + +let mut results = vec![]; +for (_, [path, lineno, line]) in re.captures_iter(hay).map(|c| c.extract()) { + results.push((path, lineno.parse::()?, line)); +} +assert_eq!(results, vec![ + ("path/to/foo", 54, "Blue Harvest"), + ("path/to/bar", 90, "Something, Something, Something, Dark Side"), + ("path/to/baz", 3, "It's a Trap!"), +]); +# Ok::<(), Box>(()) +``` + +# Overview + +The primary type in this crate is a [`Regex`]. Its most important methods are +as follows: + +* [`Regex::new`] compiles a regex using the default configuration. A +[`RegexBuilder`] permits setting a non-default configuration. (For example, +case insensitive matching, verbose mode and others.) +* [`Regex::is_match`] reports whether a match exists in a particular haystack. +* [`Regex::find`] reports the byte offsets of a match in a haystack, if one +exists. [`Regex::find_iter`] returns an iterator over all such matches. +* [`Regex::captures`] returns a [`Captures`], which reports both the byte +offsets of a match in a haystack and the byte offsets of each matching capture +group from the regex in the haystack. +[`Regex::captures_iter`] returns an iterator over all such matches. + +There is also a [`RegexSet`], which permits searching for multiple regex +patterns simultaneously in a single search. However, it currently only reports +which patterns match and *not* the byte offsets of a match. + +Otherwise, this top-level crate documentation is organized as follows: + +* [Usage](#usage) shows how to add the `regex` crate to your Rust project. +* [Examples](#examples) provides a limited selection of regex search examples. +* [Performance](#performance) provides a brief summary of how to optimize regex +searching speed. +* [Unicode](#unicode) discusses support for non-ASCII patterns. +* [Syntax](#syntax) enumerates the specific regex syntax supported by this +crate. +* [Untrusted input](#untrusted-input) discusses how this crate deals with regex +patterns or haystacks that are untrusted. +* [Crate features](#crate-features) documents the Cargo features that can be +enabled or disabled for this crate. +* [Other crates](#other-crates) links to other crates in the `regex` family. # Usage -This crate is [on crates.io](https://crates.io/crates/regex) and can be +The `regex` crate is [on crates.io](https://crates.io/crates/regex) and can be used by adding `regex` to your dependencies in your project's `Cargo.toml`. +Or more simply, just run `cargo add regex`. + +Here is a complete example that creates a new Rust project, adds a dependency +on `regex`, creates the source code for a regex search and then runs the +program. -```toml -[dependencies] -regex = "1" +First, create the project in a new directory: + +```text +$ mkdir regex-example +$ cd regex-example +$ cargo init ``` -# Example: find a date +Second, add a dependency on `regex`: + +```text +$ cargo add regex +``` + +Third, edit `src/main.rs`. Delete what's there and replace it with this: + +``` +use regex::Regex; + +fn main() { + let re = Regex::new(r"Hello (?\w+)!").unwrap(); + let Some(caps) = re.captures("Hello Murphy!") else { + println!("no match!"); + return; + }; + println!("The name is: {}", &caps["name"]); +} +``` + +Fourth, run it with `cargo run`: + +```text +$ cargo run + Compiling memchr v2.5.0 + Compiling regex-syntax v0.7.1 + Compiling aho-corasick v1.0.1 + Compiling regex v1.8.1 + Compiling regex-example v0.1.0 (/tmp/regex-example) + Finished dev [unoptimized + debuginfo] target(s) in 4.22s + Running `target/debug/regex-example` +The name is: Murphy +``` -General use of regular expressions in this package involves compiling an -expression and then using it to search, split or replace text. For example, -to confirm that some text resembles a date: +The first time you run the program will show more output like above. But +subsequent runs shouldn't have to re-compile the dependencies. + +# Examples + +This section provides a few examples, in tutorial style, showing how to +search a haystack with a regex. There are more examples throughout the API +documentation. + +Before starting though, it's worth defining a few terms: + +* A **regex** is a Rust value whose type is `Regex`. We use `re` as a +variable name for a regex. +* A **pattern** is the string that is used to build a regex. We use `pat` as +a variable name for a pattern. +* A **haystack** is the string that is searched by a regex. We use `hay` as a +variable name for a haystack. + +Sometimes the words "regex" and "pattern" are used interchangeably. + +General use of regular expressions in this crate proceeds by compiling a +**pattern** into a **regex**, and then using that regex to search, split or +replace parts of a **haystack**. + +### Example: find a middle initial + +We'll start off with a very simple example: a regex that looks for a specific +name but uses a wildcard to match a middle initial. Our pattern serves as +something like a template that will match a particular name with *any* middle +initial. ```rust use regex::Regex; -let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); -assert!(re.is_match("2014-01-01")); + +// We use 'unwrap()' here because it would be a bug in our program if the +// pattern failed to compile to a regex. Panicking in the presence of a bug +// is okay. +let re = Regex::new(r"Homer (.)\. Simpson").unwrap(); +let hay = "Homer J. Simpson"; +let Some(caps) = re.captures(hay) else { return }; +assert_eq!("J", &caps[1]); ``` -Notice the use of the `^` and `$` anchors. In this crate, every expression -is executed with an implicit `.*?` at the beginning and end, which allows -it to match anywhere in the text. Anchors can be used to ensure that the -full text matches an expression. +There are a few things worth noticing here in our first example: -This example also demonstrates the utility of -[raw strings](https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals) -in Rust, which -are just like regular strings except they are prefixed with an `r` and do -not process any escape sequences. For example, `"\\d"` is the same -expression as `r"\d"`. +* The `.` is a special pattern meta character that means "match any single +character except for new lines." (More precisely, in this crate, it means +"match any UTF-8 encoding of any Unicode scalar value other than `\n`.") +* We can match an actual `.` literally by escaping it, i.e., `\.`. +* We use Rust's [raw strings] to avoid needing to deal with escape sequences in +both the regex pattern syntax and in Rust's string literal syntax. If we didn't +use raw strings here, we would have had to use `\\.` to match a literal `.` +character. That is, `r"\."` and `"\\."` are equivalent patterns. +* We put our wildcard `.` instruction in parentheses. These parentheses have a +special meaning that says, "make whatever part of the haystack matches within +these parentheses available as a capturing group." After finding a match, we +access this capture group with `&caps[1]`. -# Example: Avoid compiling the same regex in a loop +[raw strings]: https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals -It is an anti-pattern to compile the same regular expression in a loop -since compilation is typically expensive. (It takes anywhere from a few -microseconds to a few **milliseconds** depending on the size of the -regex.) Not only is compilation itself expensive, but this also prevents -optimizations that reuse allocations internally to the matching engines. +Otherwise, we execute a search using `re.captures(hay)` and return from our +function if no match occurred. We then reference the middle initial by asking +for the part of the haystack that matched the capture group indexed at `1`. +(The capture group at index 0 is implicit and always corresponds to the entire +match. In this case, that's `Homer J. Simpson`.) -In Rust, it can sometimes be a pain to pass regular expressions around if -they're used from inside a helper function. Instead, we recommend using the -[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that -regular expressions are compiled exactly once. +### Example: named capture groups -For example: +Continuing from our middle initial example above, we can tweak the pattern +slightly to give a name to the group that matches the middle initial: ```rust -use lazy_static::lazy_static; use regex::Regex; -fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref RE: Regex = Regex::new("...").unwrap(); - } - RE.is_match(text) -} +// Note that (?P.) is a different way to spell the same thing. +let re = Regex::new(r"Homer (?.)\. Simpson").unwrap(); +let hay = "Homer J. Simpson"; +let Some(caps) = re.captures(hay) else { return }; +assert_eq!("J", &caps["middle"]); +``` + +Giving a name to a group can be useful when there are multiple groups in +a pattern. It makes the code referring to those groups a bit easier to +understand. + +### Example: validating a particular date format + +This examples shows how to confirm whether a haystack, in its entirety, matches +a particular date format: -fn main() {} +```rust +use regex::Regex; + +let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); +assert!(re.is_match("2010-03-14")); ``` -Specifically, in this example, the regex will be compiled when it is used for -the first time. On subsequent uses, it will reuse the previous compilation. +Notice the use of the `^` and `$` anchors. In this crate, every regex search is +run with an implicit `(?s:.)*?` at the beginning of its pattern, which allows +the regex to match anywhere in a haystack. Anchors, as above, can be used to +ensure that the full haystack matches a pattern. -# Example: iterating over capture groups +This crate is also Unicode aware by default, which means that `\d` might match +more than you might expect it to. For example: -This crate provides convenient iterators for matching an expression -repeatedly against a search string to find successive non-overlapping -matches. For example, to find all dates in a string and be able to access -them by their component pieces: +```rust +use regex::Regex; + +let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); +assert!(re.is_match("𝟚𝟘𝟙𝟘-𝟘𝟛-𝟙𝟜")); +``` + +To only match an ASCII decimal digit, all of the following are equivalent: + +* `[0-9]` +* `(?-u:\d)` +* `[[:digit:]]` +* `[\d&&\p{ascii}]` + +### Example: finding dates in a haystack + +In the previous example, we showed how one might validate that a haystack, +in its entirety, corresponded to a particular date format. But what if we wanted +to extract all things that look like dates in a specific format from a haystack? +To do this, we can use an iterator API to find all matches (notice that we've +removed the anchors and switched to looking for ASCII-only digits): ```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); -let text = "2012-03-14, 2013-01-01 and 2014-07-05"; -for cap in re.captures_iter(text) { - println!("Month: {} Day: {} Year: {}", &cap[2], &cap[3], &cap[1]); -} -// Output: -// Month: 03 Day: 14 Year: 2012 -// Month: 01 Day: 01 Year: 2013 -// Month: 07 Day: 05 Year: 2014 -# } +use regex::Regex; + +let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +// 'm' is a 'Match', and 'as_str()' returns the matching part of the haystack. +let dates: Vec<&str> = re.find_iter(hay).map(|m| m.as_str()).collect(); +assert_eq!(dates, vec![ + "1865-04-14", + "1881-07-02", + "1901-09-06", + "1963-11-22", +]); +``` + +We can also iterate over [`Captures`] values instead of [`Match`] values, and +that in turn permits accessing each component of the date via capturing groups: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +// 'm' is a 'Match', and 'as_str()' returns the matching part of the haystack. +let dates: Vec<(&str, &str, &str)> = re.captures_iter(hay).map(|caps| { + // The unwraps are okay because every capture group must match if the whole + // regex matches, and in this context, we know we have a match. + // + // Note that we use `caps.name("y").unwrap().as_str()` instead of + // `&caps["y"]` because the lifetime of the former is the same as the + // lifetime of `hay` above, but the lifetime of the latter is tied to the + // lifetime of `caps` due to how the `Index` trait is defined. + let year = caps.name("y").unwrap().as_str(); + let month = caps.name("m").unwrap().as_str(); + let day = caps.name("d").unwrap().as_str(); + (year, month, day) +}).collect(); +assert_eq!(dates, vec![ + ("1865", "04", "14"), + ("1881", "07", "02"), + ("1901", "09", "06"), + ("1963", "11", "22"), +]); ``` -Notice that the year is in the capture group indexed at `1`. This is -because the *entire match* is stored in the capture group at index `0`. +### Example: simpler capture group extraction + +One can use [`Captures::extract`] to make the code from the previous example a +bit simpler in this case: -# Example: replacement with named capture groups +```rust +use regex::Regex; + +let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +let dates: Vec<(&str, &str, &str)> = re.captures_iter(hay).map(|caps| { + let (_, [year, month, day]) = caps.extract(); + (year, month, day) +}).collect(); +assert_eq!(dates, vec![ + ("1865", "04", "14"), + ("1881", "07", "02"), + ("1901", "09", "06"), + ("1963", "11", "22"), +]); +``` + +`Captures::extract` works by ensuring that the number of matching groups match +the number of groups requested via the `[year, month, day]` syntax. If they do, +then the substrings for each corresponding capture group are automatically +returned in an appropriately sized array. Rust's syntax for pattern matching +arrays does the rest. + +### Example: replacement with named capture groups Building on the previous example, perhaps we'd like to rearrange the date -formats. This can be done with text replacement. But to make the code -clearer, we can *name* our capture groups and use those names as variables -in our replacement text: +formats. This can be done by finding each match and replacing it with +something different. The [`Regex::replace_all`] routine provides a convenient +way to do this, including by supporting references to named groups in the +replacement string: ```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(?P\d{4})-(?P\d{2})-(?P\d{2})").unwrap(); -let before = "2012-03-14, 2013-01-01 and 2014-07-05"; +use regex::Regex; + +let re = Regex::new(r"(?\d{4})-(?\d{2})-(?\d{2})").unwrap(); +let before = "1973-01-05, 1975-08-25 and 1980-10-18"; let after = re.replace_all(before, "$m/$d/$y"); -assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); -# } +assert_eq!(after, "01/05/1973, 08/25/1975 and 10/18/1980"); ``` -The `replace` methods are actually polymorphic in the replacement, which +The replace methods are actually polymorphic in the replacement, which provides more flexibility than is seen here. (See the documentation for -`Regex::replace` for more details.) +[`Regex::replace`] for more details.) + +### Example: verbose mode -Note that if your regex gets complicated, you can use the `x` flag to -enable insignificant whitespace mode, which also lets you write comments: +When your regex gets complicated, you might consider using something other +than regex. But if you stick with regex, you can use the `x` flag to enable +insignificant whitespace mode or "verbose mode." In this mode, whitespace +is treated as insignificant and one may write comments. This may make your +patterns easier to comprehend. ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"(?x) - (?P\d{4}) # the year + (?P\d{4}) # the year, including all Unicode digits - - (?P\d{2}) # the month + (?P\d{2}) # the month, including all Unicode digits - - (?P\d{2}) # the day + (?P\d{2}) # the day, including all Unicode digits ").unwrap(); -let before = "2012-03-14, 2013-01-01 and 2014-07-05"; + +let before = "1973-01-05, 1975-08-25 and 1980-10-18"; let after = re.replace_all(before, "$m/$d/$y"); -assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); -# } +assert_eq!(after, "01/05/1973, 08/25/1975 and 10/18/1980"); ``` If you wish to match against whitespace in this mode, you can still use `\s`, @@ -148,10 +373,10 @@ If you wish to match against whitespace in this mode, you can still use `\s`, directly with `\ `, use its hex character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`. -# Example: match multiple regular expressions simultaneously +### Example: match multiple regular expressions simultaneously -This demonstrates how to use a `RegexSet` to match multiple (possibly -overlapping) regular expressions in a single scan of the search text: +This demonstrates how to use a [`RegexSet`] to match multiple (possibly +overlapping) regexes in a single scan of a haystack: ```rust use regex::RegexSet; @@ -166,7 +391,8 @@ let set = RegexSet::new(&[ r"foobar", ]).unwrap(); -// Iterate over and collect all of the matches. +// Iterate over and collect all of the matches. Each match corresponds to the +// ID of the matching pattern. let matches: Vec<_> = set.matches("foobar").into_iter().collect(); assert_eq!(matches, vec![0, 2, 3, 4, 6]); @@ -176,94 +402,225 @@ assert!(!matches.matched(5)); assert!(matches.matched(6)); ``` -# Pay for what you use +# Performance + +This section briefly discusses a few concerns regarding the speed and resource +usage of regexes. + +### Only ask for what you need -With respect to searching text with a regular expression, there are three -questions that can be asked: +When running a search with a regex, there are generally three different types +of information one can ask for: -1. Does the text match this expression? -2. If so, where does it match? -3. Where did the capturing groups match? +1. Does a regex match in a haystack? +2. Where does a regex match in a haystack? +3. Where do each of the capturing groups match in a haystack? Generally speaking, this crate could provide a function to answer only #3, which would subsume #1 and #2 automatically. However, it can be significantly more expensive to compute the location of capturing group matches, so it's best not to do it if you don't need to. -Therefore, only use what you need. For example, don't use `find` if you -only need to test if an expression matches a string. (Use `is_match` -instead.) +Therefore, only ask for what you need. For example, don't use [`Regex::find`] +if you only need to test if a regex matches a haystack. Use [`Regex::is_match`] +instead. + +### Unicode can impact memory usage and search speed + +This crate has first class support for Unicode and it is **enabled by default**. +In many cases, the extra memory required to support it will be negligible and +it typically won't impact search speed. But it can in some cases. + +With respect to memory usage, the impact of Unicode principally manifests +through the use of Unicode character classes. Unicode character classes +tend to be quite large. For example, `\w` by default matches around 140,000 +distinct codepoints. This requires additional memory, and tends to slow down +regex compilation. While a `\w` here and there is unlikely to be noticed, +writing `\w{100}` will for example result in quite a large regex by default. +Indeed, `\w` is considerably larger than its ASCII-only version, so if your +requirements are satisfied by ASCII, it's probably a good idea to stick to +ASCII classes. The ASCII-only version of `\w` can be spelled in a number of +ways. All of the following are equivalent: + +* `[0-9A-Za-z_]` +* `(?-u:\w)` +* `[[:word:]]` +* `[\w&&\p{ascii}]` + +With respect to search speed, Unicode tends to be handled pretty well, even when +using large Unicode character classes. However, some of the faster internal +regex engines cannot handle a Unicode aware word boundary assertion. So if you +don't need Unicode-aware word boundary assertions, you might consider using +`(?-u:\b)` instead of `\b`, where the former uses an ASCII-only definition of +a word character. + +### Literals might accelerate searches + +This crate tends to be quite good at recognizing literals in a regex pattern +and using them to accelerate a search. If it is at all possible to include +some kind of literal in your pattern, then it might make search substantially +faster. For example, in the regex `\w+@\w+`, the engine will look for +occurrences of `@` and then try a reverse match for `\w+` to find the start +position. + +### Avoid re-compiling regexes, especially in a loop + +It is an anti-pattern to compile the same pattern in a loop since regex +compilation is typically expensive. (It takes anywhere from a few microseconds +to a few **milliseconds** depending on the size of the pattern.) Not only is +compilation itself expensive, but this also prevents optimizations that reuse +allocations internally to the regex engine. + +In Rust, it can sometimes be a pain to pass regexes around if they're used from +inside a helper function. Instead, we recommend using crates like [`once_cell`] +and [`lazy_static`] to ensure that patterns are compiled exactly once. + +[`once_cell`]: https://crates.io/crates/once_cell +[`lazy_static`]: https://crates.io/crates/lazy_static + +This example shows how to use `once_cell`: -# Unicode +```rust +use { + once_cell::sync::Lazy, + regex::Regex, +}; + +fn some_helper_function(haystack: &str) -> bool { + static RE: Lazy = Lazy::new(|| Regex::new(r"...").unwrap()); + RE.is_match(haystack) +} + +fn main() { + assert!(some_helper_function("abc")); + assert!(!some_helper_function("ac")); +} +``` -This implementation executes regular expressions **only** on valid UTF-8 -while exposing match locations as byte indices into the search string. (To -relax this restriction, use the [`bytes`](bytes/index.html) sub-module.) +Specifically, in this example, the regex will be compiled when it is used for +the first time. On subsequent uses, it will reuse the previously built `Regex`. +Notice how one can define the `Regex` locally to a specific function. + +### Sharing a regex across threads can result in contention + +While a single `Regex` can be freely used from multiple threads simultaneously, +there is a small synchronization cost that must be paid. Generally speaking, +one shouldn't expect to observe this unless the principal task in each thread +is searching with the regex *and* most searches are on short haystacks. In this +case, internal contention on shared resources can spike and increase latency, +which in turn may slow down each individual search. + +One can work around this by cloning each `Regex` before sending it to another +thread. The cloned regexes will still share the same internal read-only portion +of its compiled state (it's reference counted), but each thread will get +optimized access to the mutable space that is used to run a search. In general, +there is no additional cost in memory to doing this. The only cost is the added +code complexity required to explicitly clone the regex. (If you share the same +`Regex` across multiple threads, each thread still gets its own mutable space, +but accessing that space is slower.) -Only simple case folding is supported. Namely, when matching -case-insensitively, the characters are first mapped using the "simple" case -folding rules defined by Unicode. +# Unicode -Regular expressions themselves are **only** interpreted as a sequence of -Unicode scalar values. This means you can use Unicode characters directly -in your expression: +This section discusses what kind of Unicode support this regex library has. +Before showing some examples, we'll summarize the relevant points: + +* This crate almost fully implements "Basic Unicode Support" (Level 1) as +specified by the [Unicode Technical Standard #18][UTS18]. The full details +of what is supported are documented in [UNICODE.md] in the root of the regex +crate repository. There is virtually no support for "Extended Unicode Support" +(Level 2) from UTS#18. +* The top-level [`Regex`] runs searches *as if* iterating over each of the +codepoints in the haystack. That is, the fundamental atom of matching is a +single codepoint. +* [`bytes::Regex`], in contrast, permits disabling Unicode mode for part of all +of your pattern in all cases. When Unicode mode is disabled, then a search is +run *as if* iterating over each byte in the haystack. That is, the fundamental +atom of matching is a single byte. (A top-level `Regex` also permits disabling +Unicode and thus matching *as if* it were one byte at a time, but only when +doing so wouldn't permit matching invalid UTF-8.) +* When Unicode mode is enabled (the default), `.` will match an entire Unicode +scalar value, even when it is encoded using multiple bytes. When Unicode mode +is disabled (e.g., `(?-u:.)`), then `.` will match a single byte in all cases. +* The character classes `\w`, `\d` and `\s` are all Unicode-aware by default. +Use `(?-u:\w)`, `(?-u:\d)` and `(?-u:\s)` to get their ASCII-only definitions. +* Similarly, `\b` and `\B` use a Unicode definition of a "word" character. +To get ASCII-only word boundaries, use `(?-u:\b)` and `(?-u:\B)`. This also +applies to the special word boundary assertions. (That is, `\b{start}`, +`\b{end}`, `\b{start-half}`, `\b{end-half}`.) +* `^` and `$` are **not** Unicode-aware in multi-line mode. Namely, they only +recognize `\n` (assuming CRLF mode is not enabled) and not any of the other +forms of line terminators defined by Unicode. +* Case insensitive searching is Unicode-aware and uses simple case folding. +* Unicode general categories, scripts and many boolean properties are available +by default via the `\p{property name}` syntax. +* In all cases, matches are reported using byte offsets. Or more precisely, +UTF-8 code unit offsets. This permits constant time indexing and slicing of the +haystack. + +[UTS18]: https://unicode.org/reports/tr18/ +[UNICODE.md]: https://github.com/rust-lang/regex/blob/master/UNICODE.md + +Patterns themselves are **only** interpreted as a sequence of Unicode scalar +values. This means you can use Unicode characters directly in your pattern: ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"(?i)Δ+").unwrap(); -let mat = re.find("ΔδΔ").unwrap(); -assert_eq!((mat.start(), mat.end()), (0, 6)); -# } +let m = re.find("ΔδΔ").unwrap(); +assert_eq!((0, 6), (m.start(), m.end())); +// alternatively: +assert_eq!(0..6, m.range()); ``` -Most features of the regular expressions in this crate are Unicode aware. Here -are some examples: - -* `.` will match any valid UTF-8 encoded Unicode scalar value except for `\n`. - (To also match `\n`, enable the `s` flag, e.g., `(?s:.)`.) -* `\w`, `\d` and `\s` are Unicode aware. For example, `\s` will match all forms - of whitespace categorized by Unicode. -* `\b` matches a Unicode word boundary. -* Negated character classes like `[^a]` match all Unicode scalar values except - for `a`. -* `^` and `$` are **not** Unicode aware in multi-line mode. Namely, they only - recognize `\n` and not any of the other forms of line terminators defined - by Unicode. - -Unicode general categories, scripts, script extensions, ages and a smattering -of boolean properties are available as character classes. For example, you can -match a sequence of numerals, Greek or Cherokee letters: +As noted above, Unicode general categories, scripts, script extensions, ages +and a smattering of boolean properties are available as character classes. For +example, you can match a sequence of numerals, Greek or Cherokee letters: ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap(); -let mat = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap(); -assert_eq!((mat.start(), mat.end()), (3, 23)); -# } +let m = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap(); +assert_eq!(3..23, m.range()); ``` -For a more detailed breakdown of Unicode support with respect to -[UTS#18](https://unicode.org/reports/tr18/), -please see the -[UNICODE](https://github.com/rust-lang/regex/blob/master/UNICODE.md) -document in the root of the regex repository. +While not specific to Unicode, this library also supports character class set +operations. Namely, one can nest character classes arbitrarily and perform set +operations on them. Those set operations are union (the default), intersection, +difference and symmetric difference. These set operations tend to be most +useful with Unicode character classes. For example, to match any codepoint +that is both in the `Greek` script and in the `Letter` general category: -# Opt out of Unicode support +```rust +use regex::Regex; -The `bytes` sub-module provides a `Regex` type that can be used to match -on `&[u8]`. By default, text is interpreted as UTF-8 just like it is with -the main `Regex` type. However, this behavior can be disabled by turning -off the `u` flag, even if doing so could result in matching invalid UTF-8. -For example, when the `u` flag is disabled, `.` will match any byte instead -of any Unicode scalar value. +let re = Regex::new(r"[\p{Greek}&&\pL]+").unwrap(); +let subs: Vec<&str> = re.find_iter("ΔδΔ𐅌ΔδΔ").map(|m| m.as_str()).collect(); +assert_eq!(subs, vec!["ΔδΔ", "ΔδΔ"]); + +// If we just matches on Greek, then all codepoints would match! +let re = Regex::new(r"\p{Greek}+").unwrap(); +let subs: Vec<&str> = re.find_iter("ΔδΔ𐅌ΔδΔ").map(|m| m.as_str()).collect(); +assert_eq!(subs, vec!["ΔδΔ𐅌ΔδΔ"]); +``` + +### Opt out of Unicode support + +The [`bytes::Regex`] type that can be used to search `&[u8]` haystacks. By +default, haystacks are conventionally treated as UTF-8 just like it is with the +main `Regex` type. However, this behavior can be disabled by turning off the +`u` flag, even if doing so could result in matching invalid UTF-8. For example, +when the `u` flag is disabled, `.` will match any byte instead of any Unicode +scalar value. Disabling the `u` flag is also possible with the standard `&str`-based `Regex` type, but it is only allowed where the UTF-8 invariant is maintained. For example, `(?-u:\w)` is an ASCII-only `\w` character class and is legal in an -`&str`-based `Regex`, but `(?-u:\xFF)` will attempt to match the raw byte -`\xFF`, which is invalid UTF-8 and therefore is illegal in `&str`-based +`&str`-based `Regex`, but `(?-u:\W)` will attempt to match *any byte* that +isn't in `(?-u:\w)`, which in turn includes bytes that are invalid UTF-8. +Similarly, `(?-u:\xFF)` will attempt to match the raw byte `\xFF` (instead of +`U+00FF`), which is invalid UTF-8 and therefore is illegal in `&str`-based regexes. Finally, since Unicode support requires bundling large Unicode data @@ -279,15 +636,16 @@ The syntax supported in this crate is documented below. Note that the regular expression parser and abstract syntax are exposed in a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax). -## Matching one character +### Matching one character